1
1

OSC rdma: make sure accumulating in shared memory is safe

Signed-off-by: Joseph Schuchart <schuchart@hlrs.de>
(cherry picked from commit c67e229193)
Этот коммит содержится в:
Joseph Schuchart 2019-05-10 14:32:27 +02:00
родитель a42977f1c2
Коммит 900f0fa21f
3 изменённых файлов: 16 добавлений и 3 удалений

Просмотреть файл

@ -145,6 +145,9 @@ struct ompi_osc_rdma_module_t {
bool acc_use_amo;
/** whether the group is located on a single node */
bool single_node;
/** flavor of this window */
int flavor;

Просмотреть файл

@ -889,10 +889,19 @@ int ompi_osc_rdma_rget_accumulate_internal (ompi_osc_rdma_sync_t *sync, const vo
(void) ompi_osc_rdma_lock_acquire_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock));
}
/* accumulate in (shared) memory if there is only a single node
* OR if we have an exclusive lock
* OR if other processes won't try to use the network either */
bool use_shared_mem = module->single_node ||
(ompi_osc_rdma_peer_local_base (peer) &&
(ompi_osc_rdma_peer_is_exclusive (peer) ||
!module->acc_single_intrinsic));
/* if the datatype is small enough (and the count is 1) then try to directly use the hardware to execute
* the atomic operation. this should be safe in all cases as either 1) the user has assured us they will
* never use atomics with count > 1, 2) we have the accumulate lock, or 3) we have an exclusive lock */
if (origin_extent <= 8 && 1 == origin_count && !ompi_osc_rdma_peer_local_base (peer)) {
* never use atomics with count > 1, 2) we have the accumulate lock, or 3) we have an exclusive lock.
* avoid using the NIC if the operation can be done directly in shared memory. */
if (origin_extent <= 8 && 1 == origin_count && !use_shared_mem) {
if (module->acc_use_amo && ompi_datatype_is_predefined (origin_datatype)) {
if (NULL == result_addr) {
ret = ompi_osc_rdma_acc_single_atomic (sync, origin_addr, origin_datatype, origin_extent, peer, target_address,

Просмотреть файл

@ -543,7 +543,8 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
local_size = ompi_comm_size (shared_comm);
/* CPU atomics can be used if every process is on the same node or the NIC allows mixing CPU and NIC atomics */
module->use_cpu_atomics = local_size == global_size || (module->selected_btl->btl_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB);
module->single_node = local_size == global_size;
module->use_cpu_atomics = module->single_node || (module->selected_btl->btl_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB);
if (1 == local_size) {
/* no point using a shared segment if there are no other processes on this node */