OSHMEM: various fixes
1. fix in oshmem scoll component: basic algorithms should call basic collectives since their implementation incompatible with others (fca, hcoll). 2. Set OPAL_EVLOOP_ONCE flag ON for libevent in the case of yoda smpl. Otherwise there is possible deadlock in atomic_basic_lock call fixed by Val, Igor, reviewed by Miked cmr=v1.7.5:reviewer=ompi-rm1.7 This commit was SVN r30762.
Этот коммит содержится в:
родитель
69aba904ed
Коммит
982149d8c8
@ -19,6 +19,17 @@
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/* These functions (BARRIER_FUNC, BCAST_FUNC) may be called from any basic algorithm.
|
||||
* In case of shmem, the implementation of broadcast doesn't require
|
||||
* each process to know message size ( just root should know).
|
||||
* It differs from other implementations, so it may cause problems if
|
||||
* BCAST_FUNC is a callback to another implementation (e.g, fca, hcoll).
|
||||
* So we replace a callback (group->g_scoll.scoll_[func])
|
||||
* with a corresponding basic function. */
|
||||
|
||||
#define BARRIER_FUNC mca_scoll_basic_barrier
|
||||
#define BCAST_FUNC mca_scoll_basic_broadcast
|
||||
|
||||
/* Globally exported variables */
|
||||
|
||||
OSHMEM_MODULE_DECLSPEC extern mca_scoll_base_component_1_0_0_t
|
||||
|
@ -151,7 +151,7 @@ static int _algorithm_central_counter(struct oshmem_group_t *group,
|
||||
/* Wait for operation completion to set needed size */
|
||||
if (rc == OSHMEM_SUCCESS) {
|
||||
SCOLL_VERBOSE(14, "[#%d] Wait for operation completion", group->my_pe);
|
||||
rc = group->g_scoll.scoll_barrier(group,
|
||||
rc = BARRIER_FUNC(group,
|
||||
(pSync + 1),
|
||||
SCOLL_DEFAULT_ALG);
|
||||
}
|
||||
|
@ -187,7 +187,7 @@ static int _algorithm_f_central_counter(struct oshmem_group_t *group,
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] Broadcast from the root #%d",
|
||||
group->my_pe, PE_root);
|
||||
rc = group->g_scoll.scoll_broadcast(group,
|
||||
rc = BCAST_FUNC(group,
|
||||
PE_root,
|
||||
target,
|
||||
target,
|
||||
@ -297,7 +297,7 @@ static int _algorithm_f_tournament(struct oshmem_group_t *group,
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] Broadcast from the root #%d",
|
||||
group->my_pe, PE_root);
|
||||
rc = group->g_scoll.scoll_broadcast(group,
|
||||
rc = BCAST_FUNC(group,
|
||||
PE_root,
|
||||
target,
|
||||
target,
|
||||
@ -612,7 +612,8 @@ static int _algorithm_central_collector(struct oshmem_group_t *group,
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] Broadcast from the root #%d",
|
||||
group->my_pe, PE_root);
|
||||
rc = group->g_scoll.scoll_broadcast(group,
|
||||
|
||||
rc = BCAST_FUNC(group,
|
||||
PE_root,
|
||||
target,
|
||||
target,
|
||||
|
@ -231,7 +231,7 @@ static int _algorithm_central_counter(struct oshmem_group_t *group,
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] Broadcast from the root #%d",
|
||||
group->my_pe, PE_root);
|
||||
rc = group->g_scoll.scoll_broadcast(group,
|
||||
rc = BCAST_FUNC(group,
|
||||
PE_root,
|
||||
target,
|
||||
target,
|
||||
@ -349,7 +349,7 @@ static int _algorithm_tournament(struct oshmem_group_t *group,
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] Broadcast from the root #%d",
|
||||
group->my_pe, PE_root);
|
||||
rc = group->g_scoll.scoll_broadcast(group,
|
||||
rc = BCAST_FUNC(group,
|
||||
PE_root,
|
||||
target,
|
||||
target,
|
||||
@ -628,7 +628,7 @@ static int _algorithm_linear(struct oshmem_group_t *group,
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] Broadcast from the root #%d",
|
||||
group->my_pe, root_pe);
|
||||
rc = group->g_scoll.scoll_broadcast(group,
|
||||
rc = BCAST_FUNC(group,
|
||||
root_pe,
|
||||
target,
|
||||
target,
|
||||
@ -796,7 +796,7 @@ static int _algorithm_log(struct oshmem_group_t *group,
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] Broadcast from the root #%d",
|
||||
rank, root_pe);
|
||||
rc = group->g_scoll.scoll_broadcast(group,
|
||||
rc = BCAST_FUNC(group,
|
||||
root_pe,
|
||||
target,
|
||||
target,
|
||||
|
@ -917,6 +917,15 @@ int mca_spml_yoda_enable(bool enable)
|
||||
|
||||
mca_spml_yoda.enabled = true;
|
||||
|
||||
/* The following line resolves the issue with BTL tcp and SPML yoda. In this case the
|
||||
* atomic_basic_lock(root_rank) function may behave as DoS attack on root_rank, since
|
||||
* all the procceses will do shmem_int_get from root_rank. These calls would go through
|
||||
* bml active messaging and will trigger replays in libevent on root rank. If the flag
|
||||
* OPAL_ENVLOOP_ONCE is not set then libevent will continously progress constantly
|
||||
* incoming events thus causing root_rank to stuck in libevent loop.
|
||||
*/
|
||||
opal_progress_set_event_flag(OPAL_EVLOOP_NONBLOCK | OPAL_EVLOOP_ONCE);
|
||||
|
||||
#if OSHMEM_WAIT_COMPLETION_DEBUG == 1
|
||||
condition_dbg_init();
|
||||
#endif
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user