1
1
Fixes mxm endpoint destruction and hung during
SHMEM finalization.

Add a barrier between spml del procs and finalization.
Not having it caused hungs because ikrit spml can not properly
disconnect if its peer already finizalized.

Refs trac:3763

This commit was SVN r30572.

The following Trac tickets were found above:
  Ticket 3763 --> https://svn.open-mpi.org/trac/ompi/ticket/3763
Этот коммит содержится в:
Mike Dubman 2014-02-06 08:40:43 +00:00
родитель 081b679881
Коммит 27a763c86c
2 изменённых файлов: 5 добавлений и 5 удалений

Просмотреть файл

@ -343,16 +343,14 @@ int mca_spml_ikrit_del_procs(oshmem_proc_t** procs, size_t nprocs)
size_t i; size_t i;
opal_list_item_t *item; opal_list_item_t *item;
if (mca_spml_ikrit.mxm_ep) {
mxm_ep_destroy(mca_spml_ikrit.mxm_ep);
mca_spml_ikrit.mxm_ep = 0;
}
while (NULL != (item = opal_list_remove_first(&mca_spml_ikrit.active_peers))) { while (NULL != (item = opal_list_remove_first(&mca_spml_ikrit.active_peers))) {
}; };
OBJ_DESTRUCT(&mca_spml_ikrit.active_peers); OBJ_DESTRUCT(&mca_spml_ikrit.active_peers);
for (i = 0; i < nprocs; i++) { for (i = 0; i < nprocs; i++) {
if (mca_spml_ikrit.mxm_peers[i]->mxm_conn) {
mxm_ep_disconnect(mca_spml_ikrit.mxm_peers[i]->mxm_conn);
}
destroy_ptl_idx(i); destroy_ptl_idx(i);
if (mca_spml_ikrit.mxm_peers[i]) { if (mca_spml_ikrit.mxm_peers[i]) {
OBJ_RELEASE(mca_spml_ikrit.mxm_peers[i]); OBJ_RELEASE(mca_spml_ikrit.mxm_peers[i]);

Просмотреть файл

@ -130,6 +130,8 @@ static int _shmem_finalize(void)
return ret; return ret;
} }
oshmem_shmem_barrier();
/* free spml resource */ /* free spml resource */
if (OSHMEM_SUCCESS != (ret = mca_spml_base_finalize())) { if (OSHMEM_SUCCESS != (ret = mca_spml_base_finalize())) {
return ret; return ret;