Merge pull request #6922 from hoopoepg/topic/fixed-hand-on-shmem-finalize-v4.0
SPML/UCX: fixed hang in SHMEM_FINALIZE - v4.0
Этот коммит содержится в:
Коммит
be67734fdf
@ -186,8 +186,11 @@ static void opal_common_ucx_wait_all_requests(void **reqs, int count, ucp_worker
|
||||
}
|
||||
}
|
||||
|
||||
OPAL_DECLSPEC int opal_common_ucx_del_procs_nofence(opal_common_ucx_del_proc_t *procs, size_t count,
|
||||
size_t my_rank, size_t max_disconnect, ucp_worker_h worker) {
|
||||
OPAL_DECLSPEC int opal_common_ucx_del_procs_nofence(opal_common_ucx_del_proc_t *procs,
|
||||
size_t count, size_t my_rank,
|
||||
size_t max_disconnect,
|
||||
ucp_worker_h worker)
|
||||
{
|
||||
size_t num_reqs;
|
||||
size_t max_reqs;
|
||||
void *dreq, **dreqs;
|
||||
|
@ -127,18 +127,16 @@ int mca_spml_ucx_del_procs(ompi_proc_t** procs, size_t nprocs)
|
||||
mca_spml_ucx_ctx_default.ucp_peers[i].ucp_conn = NULL;
|
||||
}
|
||||
|
||||
ret = opal_common_ucx_del_procs(del_procs, nprocs, oshmem_my_proc_id(),
|
||||
mca_spml_ucx.num_disconnect,
|
||||
mca_spml_ucx_ctx_default.ucp_worker);
|
||||
|
||||
ret = opal_common_ucx_del_procs_nofence(del_procs, nprocs, oshmem_my_proc_id(),
|
||||
mca_spml_ucx.num_disconnect,
|
||||
mca_spml_ucx_ctx_default.ucp_worker);
|
||||
/* No need to barrier here - barrier is called in _shmem_finalize */
|
||||
free(del_procs);
|
||||
free(mca_spml_ucx.remote_addrs_tbl);
|
||||
free(mca_spml_ucx_ctx_default.ucp_peers);
|
||||
|
||||
mca_spml_ucx_ctx_default.ucp_peers = NULL;
|
||||
|
||||
opal_common_ucx_mca_proc_added();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -326,6 +324,8 @@ int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs)
|
||||
free(wk_roffs);
|
||||
|
||||
SPML_UCX_VERBOSE(50, "*** ADDED PROCS ***");
|
||||
|
||||
opal_common_ucx_mca_proc_added();
|
||||
return OSHMEM_SUCCESS;
|
||||
|
||||
error2:
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user