OSHMEM: finalization fixes
Fixes mxm endpoint destruction and hung during SHMEM finalization. Add a barrier between spml del procs and finalization. Not having it caused hungs because ikrit spml can not properly disconnect if its peer already finizalized. Refs trac:3763 This commit was SVN r30572. The following Trac tickets were found above: Ticket 3763 --> https://svn.open-mpi.org/trac/ompi/ticket/3763
Этот коммит содержится в:
родитель
081b679881
Коммит
27a763c86c
@ -343,16 +343,14 @@ int mca_spml_ikrit_del_procs(oshmem_proc_t** procs, size_t nprocs)
|
|||||||
size_t i;
|
size_t i;
|
||||||
opal_list_item_t *item;
|
opal_list_item_t *item;
|
||||||
|
|
||||||
if (mca_spml_ikrit.mxm_ep) {
|
|
||||||
mxm_ep_destroy(mca_spml_ikrit.mxm_ep);
|
|
||||||
mca_spml_ikrit.mxm_ep = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
while (NULL != (item = opal_list_remove_first(&mca_spml_ikrit.active_peers))) {
|
while (NULL != (item = opal_list_remove_first(&mca_spml_ikrit.active_peers))) {
|
||||||
};
|
};
|
||||||
OBJ_DESTRUCT(&mca_spml_ikrit.active_peers);
|
OBJ_DESTRUCT(&mca_spml_ikrit.active_peers);
|
||||||
|
|
||||||
for (i = 0; i < nprocs; i++) {
|
for (i = 0; i < nprocs; i++) {
|
||||||
|
if (mca_spml_ikrit.mxm_peers[i]->mxm_conn) {
|
||||||
|
mxm_ep_disconnect(mca_spml_ikrit.mxm_peers[i]->mxm_conn);
|
||||||
|
}
|
||||||
destroy_ptl_idx(i);
|
destroy_ptl_idx(i);
|
||||||
if (mca_spml_ikrit.mxm_peers[i]) {
|
if (mca_spml_ikrit.mxm_peers[i]) {
|
||||||
OBJ_RELEASE(mca_spml_ikrit.mxm_peers[i]);
|
OBJ_RELEASE(mca_spml_ikrit.mxm_peers[i]);
|
||||||
|
@ -130,6 +130,8 @@ static int _shmem_finalize(void)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
oshmem_shmem_barrier();
|
||||||
|
|
||||||
/* free spml resource */
|
/* free spml resource */
|
||||||
if (OSHMEM_SUCCESS != (ret = mca_spml_base_finalize())) {
|
if (OSHMEM_SUCCESS != (ret = mca_spml_base_finalize())) {
|
||||||
return ret;
|
return ret;
|
||||||
|
Загрузка…
Ссылка в новой задаче
Block a user