From 27a763c86c91c07af007d72321086e62e284aed3 Mon Sep 17 00:00:00 2001 From: Mike Dubman Date: Thu, 6 Feb 2014 08:40:43 +0000 Subject: [PATCH] OSHMEM: finalization fixes Fixes mxm endpoint destruction and hung during SHMEM finalization. Add a barrier between spml del procs and finalization. Not having it caused hungs because ikrit spml can not properly disconnect if its peer already finizalized. Refs trac:3763 This commit was SVN r30572. The following Trac tickets were found above: Ticket 3763 --> https://svn.open-mpi.org/trac/ompi/ticket/3763 --- oshmem/mca/spml/ikrit/spml_ikrit.c | 8 +++----- oshmem/runtime/oshmem_shmem_finalize.c | 2 ++ 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/oshmem/mca/spml/ikrit/spml_ikrit.c b/oshmem/mca/spml/ikrit/spml_ikrit.c index cb57a40071..2c14a83df0 100644 --- a/oshmem/mca/spml/ikrit/spml_ikrit.c +++ b/oshmem/mca/spml/ikrit/spml_ikrit.c @@ -343,16 +343,14 @@ int mca_spml_ikrit_del_procs(oshmem_proc_t** procs, size_t nprocs) size_t i; opal_list_item_t *item; - if (mca_spml_ikrit.mxm_ep) { - mxm_ep_destroy(mca_spml_ikrit.mxm_ep); - mca_spml_ikrit.mxm_ep = 0; - } - while (NULL != (item = opal_list_remove_first(&mca_spml_ikrit.active_peers))) { }; OBJ_DESTRUCT(&mca_spml_ikrit.active_peers); for (i = 0; i < nprocs; i++) { + if (mca_spml_ikrit.mxm_peers[i]->mxm_conn) { + mxm_ep_disconnect(mca_spml_ikrit.mxm_peers[i]->mxm_conn); + } destroy_ptl_idx(i); if (mca_spml_ikrit.mxm_peers[i]) { OBJ_RELEASE(mca_spml_ikrit.mxm_peers[i]); diff --git a/oshmem/runtime/oshmem_shmem_finalize.c b/oshmem/runtime/oshmem_shmem_finalize.c index d5428498a7..92ea42f874 100644 --- a/oshmem/runtime/oshmem_shmem_finalize.c +++ b/oshmem/runtime/oshmem_shmem_finalize.c @@ -130,6 +130,8 @@ static int _shmem_finalize(void) return ret; } + oshmem_shmem_barrier(); + /* free spml resource */ if (OSHMEM_SUCCESS != (ret = mca_spml_base_finalize())) { return ret;