diff --git a/oshmem/mca/scoll/basic/scoll_basic.h b/oshmem/mca/scoll/basic/scoll_basic.h index d22d766708..d48f7e3947 100644 --- a/oshmem/mca/scoll/basic/scoll_basic.h +++ b/oshmem/mca/scoll/basic/scoll_basic.h @@ -19,6 +19,17 @@ BEGIN_C_DECLS +/* These functions (BARRIER_FUNC, BCAST_FUNC) may be called from any basic algorithm. + * In case of shmem, the implementation of broadcast doesn't require + * each process to know message size ( just root should know). + * It differs from other implementations, so it may cause problems if + * BCAST_FUNC is a callback to another implementation (e.g, fca, hcoll). + * So we replace a callback (group->g_scoll.scoll_[func]) + * with a corresponding basic function. */ + +#define BARRIER_FUNC mca_scoll_basic_barrier +#define BCAST_FUNC mca_scoll_basic_broadcast + /* Globally exported variables */ OSHMEM_MODULE_DECLSPEC extern mca_scoll_base_component_1_0_0_t diff --git a/oshmem/mca/scoll/basic/scoll_basic_broadcast.c b/oshmem/mca/scoll/basic/scoll_basic_broadcast.c index 9a229879d9..2c8caa5f52 100644 --- a/oshmem/mca/scoll/basic/scoll_basic_broadcast.c +++ b/oshmem/mca/scoll/basic/scoll_basic_broadcast.c @@ -151,9 +151,9 @@ static int _algorithm_central_counter(struct oshmem_group_t *group, /* Wait for operation completion to set needed size */ if (rc == OSHMEM_SUCCESS) { SCOLL_VERBOSE(14, "[#%d] Wait for operation completion", group->my_pe); - rc = group->g_scoll.scoll_barrier(group, - (pSync + 1), - SCOLL_DEFAULT_ALG); + rc = BARRIER_FUNC(group, + (pSync + 1), + SCOLL_DEFAULT_ALG); } return rc; diff --git a/oshmem/mca/scoll/basic/scoll_basic_collect.c b/oshmem/mca/scoll/basic/scoll_basic_collect.c index f5787449be..aa81facae5 100644 --- a/oshmem/mca/scoll/basic/scoll_basic_collect.c +++ b/oshmem/mca/scoll/basic/scoll_basic_collect.c @@ -187,13 +187,13 @@ static int _algorithm_f_central_counter(struct oshmem_group_t *group, SCOLL_VERBOSE(14, "[#%d] Broadcast from the root #%d", group->my_pe, PE_root); - rc = group->g_scoll.scoll_broadcast(group, - PE_root, - target, - target, - group->proc_count * nlong, - (pSync + 1), - SCOLL_DEFAULT_ALG); + rc = BCAST_FUNC(group, + PE_root, + target, + target, + group->proc_count * nlong, + (pSync + 1), + SCOLL_DEFAULT_ALG); } SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld", group->my_pe, pSync[0]); @@ -297,13 +297,13 @@ static int _algorithm_f_tournament(struct oshmem_group_t *group, SCOLL_VERBOSE(14, "[#%d] Broadcast from the root #%d", group->my_pe, PE_root); - rc = group->g_scoll.scoll_broadcast(group, - PE_root, - target, - target, - group->proc_count * nlong, - (pSync + 1), - SCOLL_DEFAULT_ALG); + rc = BCAST_FUNC(group, + PE_root, + target, + target, + group->proc_count * nlong, + (pSync + 1), + SCOLL_DEFAULT_ALG); } SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld", group->my_pe, pSync[0]); @@ -612,13 +612,14 @@ static int _algorithm_central_collector(struct oshmem_group_t *group, SCOLL_VERBOSE(14, "[#%d] Broadcast from the root #%d", group->my_pe, PE_root); - rc = group->g_scoll.scoll_broadcast(group, - PE_root, - target, - target, - offset, - (pSync + 1), - SCOLL_DEFAULT_ALG); + + rc = BCAST_FUNC(group, + PE_root, + target, + target, + offset, + (pSync + 1), + SCOLL_DEFAULT_ALG); } return rc; diff --git a/oshmem/mca/scoll/basic/scoll_basic_reduce.c b/oshmem/mca/scoll/basic/scoll_basic_reduce.c index dc82540c5a..a7849f7ba0 100644 --- a/oshmem/mca/scoll/basic/scoll_basic_reduce.c +++ b/oshmem/mca/scoll/basic/scoll_basic_reduce.c @@ -231,13 +231,13 @@ static int _algorithm_central_counter(struct oshmem_group_t *group, SCOLL_VERBOSE(14, "[#%d] Broadcast from the root #%d", group->my_pe, PE_root); - rc = group->g_scoll.scoll_broadcast(group, - PE_root, - target, - target, - nlong, - (pSync + 1), - SCOLL_DEFAULT_ALG); + rc = BCAST_FUNC(group, + PE_root, + target, + target, + nlong, + (pSync + 1), + SCOLL_DEFAULT_ALG); } return rc; @@ -349,13 +349,13 @@ static int _algorithm_tournament(struct oshmem_group_t *group, SCOLL_VERBOSE(14, "[#%d] Broadcast from the root #%d", group->my_pe, PE_root); - rc = group->g_scoll.scoll_broadcast(group, - PE_root, - target, - target, - nlong, - (pSync + 1), - SCOLL_DEFAULT_ALG); + rc = BCAST_FUNC(group, + PE_root, + target, + target, + nlong, + (pSync + 1), + SCOLL_DEFAULT_ALG); } free(target_cur); @@ -628,13 +628,13 @@ static int _algorithm_linear(struct oshmem_group_t *group, SCOLL_VERBOSE(14, "[#%d] Broadcast from the root #%d", group->my_pe, root_pe); - rc = group->g_scoll.scoll_broadcast(group, - root_pe, - target, - target, - nlong, - (pSync + 1), - SCOLL_DEFAULT_ALG); + rc = BCAST_FUNC(group, + root_pe, + target, + target, + nlong, + (pSync + 1), + SCOLL_DEFAULT_ALG); } /* All done */ @@ -796,13 +796,13 @@ static int _algorithm_log(struct oshmem_group_t *group, SCOLL_VERBOSE(14, "[#%d] Broadcast from the root #%d", rank, root_pe); - rc = group->g_scoll.scoll_broadcast(group, - root_pe, - target, - target, - nlong, - (pSync + 1), - SCOLL_DEFAULT_ALG); + rc = BCAST_FUNC(group, + root_pe, + target, + target, + nlong, + (pSync + 1), + SCOLL_DEFAULT_ALG); } /* All done */ diff --git a/oshmem/mca/spml/yoda/spml_yoda.c b/oshmem/mca/spml/yoda/spml_yoda.c index 3a610f6a50..5a7239045f 100644 --- a/oshmem/mca/spml/yoda/spml_yoda.c +++ b/oshmem/mca/spml/yoda/spml_yoda.c @@ -418,7 +418,7 @@ mca_spml_mkey_t *mca_spml_yoda_register(void* addr, SPML_VERBOSE(5, "rank %d btl %s address 0x%p len %llu shmid 0x%X|0x%X", - oshmem_proc_local_proc->proc_name.vpid, btl_type2str(ybtl->btl_type), + oshmem_proc_local_proc->proc_name.vpid, btl_type2str(ybtl->btl_type), mkeys[i].va_base, (unsigned long long)size, MEMHEAP_SHM_GET_TYPE(shmid), MEMHEAP_SHM_GET_ID(shmid)); } OBJ_DESTRUCT(&convertor); @@ -917,6 +917,15 @@ int mca_spml_yoda_enable(bool enable) mca_spml_yoda.enabled = true; + /* The following line resolves the issue with BTL tcp and SPML yoda. In this case the + * atomic_basic_lock(root_rank) function may behave as DoS attack on root_rank, since + * all the procceses will do shmem_int_get from root_rank. These calls would go through + * bml active messaging and will trigger replays in libevent on root rank. If the flag + * OPAL_ENVLOOP_ONCE is not set then libevent will continously progress constantly + * incoming events thus causing root_rank to stuck in libevent loop. + */ + opal_progress_set_event_flag(OPAL_EVLOOP_NONBLOCK | OPAL_EVLOOP_ONCE); + #if OSHMEM_WAIT_COMPLETION_DEBUG == 1 condition_dbg_init(); #endif