OSC rdma: make sure accumulating in shared memory is safe
Signed-off-by: Joseph Schuchart <schuchart@hlrs.de>
(cherry picked from commit c67e229193
)
Этот коммит содержится в:
родитель
a42977f1c2
Коммит
900f0fa21f
@ -145,6 +145,9 @@ struct ompi_osc_rdma_module_t {
|
||||
|
||||
bool acc_use_amo;
|
||||
|
||||
/** whether the group is located on a single node */
|
||||
bool single_node;
|
||||
|
||||
/** flavor of this window */
|
||||
int flavor;
|
||||
|
||||
|
@ -889,10 +889,19 @@ int ompi_osc_rdma_rget_accumulate_internal (ompi_osc_rdma_sync_t *sync, const vo
|
||||
(void) ompi_osc_rdma_lock_acquire_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock));
|
||||
}
|
||||
|
||||
/* accumulate in (shared) memory if there is only a single node
|
||||
* OR if we have an exclusive lock
|
||||
* OR if other processes won't try to use the network either */
|
||||
bool use_shared_mem = module->single_node ||
|
||||
(ompi_osc_rdma_peer_local_base (peer) &&
|
||||
(ompi_osc_rdma_peer_is_exclusive (peer) ||
|
||||
!module->acc_single_intrinsic));
|
||||
|
||||
/* if the datatype is small enough (and the count is 1) then try to directly use the hardware to execute
|
||||
* the atomic operation. this should be safe in all cases as either 1) the user has assured us they will
|
||||
* never use atomics with count > 1, 2) we have the accumulate lock, or 3) we have an exclusive lock */
|
||||
if (origin_extent <= 8 && 1 == origin_count && !ompi_osc_rdma_peer_local_base (peer)) {
|
||||
* never use atomics with count > 1, 2) we have the accumulate lock, or 3) we have an exclusive lock.
|
||||
* avoid using the NIC if the operation can be done directly in shared memory. */
|
||||
if (origin_extent <= 8 && 1 == origin_count && !use_shared_mem) {
|
||||
if (module->acc_use_amo && ompi_datatype_is_predefined (origin_datatype)) {
|
||||
if (NULL == result_addr) {
|
||||
ret = ompi_osc_rdma_acc_single_atomic (sync, origin_addr, origin_datatype, origin_extent, peer, target_address,
|
||||
|
@ -543,7 +543,8 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
|
||||
local_size = ompi_comm_size (shared_comm);
|
||||
|
||||
/* CPU atomics can be used if every process is on the same node or the NIC allows mixing CPU and NIC atomics */
|
||||
module->use_cpu_atomics = local_size == global_size || (module->selected_btl->btl_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB);
|
||||
module->single_node = local_size == global_size;
|
||||
module->use_cpu_atomics = module->single_node || (module->selected_btl->btl_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB);
|
||||
|
||||
if (1 == local_size) {
|
||||
/* no point using a shared segment if there are no other processes on this node */
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user