osc/rdma: use local base for local process when possible
This commit fixes a crash that occurs when using btl/vader as an RDMA btl. This btl supports using CPU atomics and does not support using the btl for self communication so we must use the local memory optimizations in osc/rdma. Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
родитель
c87a3df0c9
Коммит
304a6a52d4
@ -892,7 +892,7 @@ int ompi_osc_rdma_rget_accumulate_internal (ompi_osc_rdma_sync_t *sync, const vo
|
||||
/* if the datatype is small enough (and the count is 1) then try to directly use the hardware to execute
|
||||
* the atomic operation. this should be safe in all cases as either 1) the user has assured us they will
|
||||
* never use atomics with count > 1, 2) we have the accumulate lock, or 3) we have an exclusive lock */
|
||||
if (origin_extent <= 8 && 1 == origin_count) {
|
||||
if (origin_extent <= 8 && 1 == origin_count && !ompi_osc_rdma_peer_local_base (peer)) {
|
||||
if (module->acc_use_amo && ompi_datatype_is_predefined (origin_datatype)) {
|
||||
if (NULL == result_addr) {
|
||||
ret = ompi_osc_rdma_acc_single_atomic (sync, origin_addr, origin_datatype, origin_extent, peer, target_address,
|
||||
|
@ -759,9 +759,14 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
|
||||
ex_peer->size = temp[i].size;
|
||||
}
|
||||
|
||||
if (module->use_cpu_atomics && MPI_WIN_FLAVOR_ALLOCATE == module->flavor) {
|
||||
if (module->use_cpu_atomics && (MPI_WIN_FLAVOR_ALLOCATE == module->flavor || peer_rank == my_rank)) {
|
||||
/* base is local and cpu atomics are available */
|
||||
if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) {
|
||||
ex_peer->super.base = (uintptr_t) module->segment_base + offset;
|
||||
} else {
|
||||
ex_peer->super.base = (uintptr_t) *base;
|
||||
}
|
||||
|
||||
peer->flags |= OMPI_OSC_RDMA_PEER_LOCAL_BASE;
|
||||
offset += temp[i].size;
|
||||
} else {
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user