1
1

osc/rdma: use local base for local process when possible

This commit fixes a crash that occurs when using btl/vader as an RDMA
btl. This btl supports using CPU atomics and does not support using
the btl for self communication so we must use the local memory
optimizations in osc/rdma.

Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
Nathan Hjelm 2018-07-12 10:50:20 -06:00 коммит произвёл Nathan Hjelm
родитель c87a3df0c9
Коммит 304a6a52d4
2 изменённых файлов: 8 добавлений и 3 удалений

Просмотреть файл

@ -892,7 +892,7 @@ int ompi_osc_rdma_rget_accumulate_internal (ompi_osc_rdma_sync_t *sync, const vo
/* if the datatype is small enough (and the count is 1) then try to directly use the hardware to execute
* the atomic operation. this should be safe in all cases as either 1) the user has assured us they will
* never use atomics with count > 1, 2) we have the accumulate lock, or 3) we have an exclusive lock */
if (origin_extent <= 8 && 1 == origin_count) {
if (origin_extent <= 8 && 1 == origin_count && !ompi_osc_rdma_peer_local_base (peer)) {
if (module->acc_use_amo && ompi_datatype_is_predefined (origin_datatype)) {
if (NULL == result_addr) {
ret = ompi_osc_rdma_acc_single_atomic (sync, origin_addr, origin_datatype, origin_extent, peer, target_address,

Просмотреть файл

@ -759,9 +759,14 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
ex_peer->size = temp[i].size;
}
if (module->use_cpu_atomics && MPI_WIN_FLAVOR_ALLOCATE == module->flavor) {
if (module->use_cpu_atomics && (MPI_WIN_FLAVOR_ALLOCATE == module->flavor || peer_rank == my_rank)) {
/* base is local and cpu atomics are available */
if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) {
ex_peer->super.base = (uintptr_t) module->segment_base + offset;
} else {
ex_peer->super.base = (uintptr_t) *base;
}
peer->flags |= OMPI_OSC_RDMA_PEER_LOCAL_BASE;
offset += temp[i].size;
} else {