From 97605a4002baf4bc556604890d7255f58544c086 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Wed, 14 May 2014 16:14:00 +0000 Subject: [PATCH] btl/scif: fix hang at shutdown scif_close is not causing scif_poll in the listening thread to return as expected. To ensure the thread exits attempt to make a local connection to wake up the thread before calling pthread_join. cmr=v1.8.2:reviewer=ggouaillardet This commit was SVN r31756. --- ompi/mca/btl/scif/btl_scif.h | 2 ++ ompi/mca/btl/scif/btl_scif_add_procs.c | 2 +- ompi/mca/btl/scif/btl_scif_component.c | 1 + ompi/mca/btl/scif/btl_scif_module.c | 4 ++++ 4 files changed, 8 insertions(+), 1 deletion(-) diff --git a/ompi/mca/btl/scif/btl_scif.h b/ompi/mca/btl/scif/btl_scif.h index b5e39c4ffc..6922a1a53b 100644 --- a/ompi/mca/btl/scif/btl_scif.h +++ b/ompi/mca/btl/scif/btl_scif.h @@ -90,6 +90,8 @@ typedef struct mca_btl_scif_module_t { ompi_free_list_t eager_frags; pthread_t listen_thread; + + bool exiting; } mca_btl_scif_module_t; typedef struct mca_btl_scif_component_t { diff --git a/ompi/mca/btl/scif/btl_scif_add_procs.c b/ompi/mca/btl/scif/btl_scif_add_procs.c index b95802365c..4c35a29278 100644 --- a/ompi/mca/btl/scif/btl_scif_add_procs.c +++ b/ompi/mca/btl/scif/btl_scif_add_procs.c @@ -117,7 +117,7 @@ static void *mca_btl_scif_connect_accept (void *arg) rc = scif_poll (&pollepd, 1, -1); if (1 == rc) { - if (SCIF_POLLIN != pollepd.revents) { + if (SCIF_POLLIN != pollepd.revents || mca_btl_scif_module.exiting) { break; } diff --git a/ompi/mca/btl/scif/btl_scif_component.c b/ompi/mca/btl/scif/btl_scif_component.c index 4998aae7a9..559f822554 100644 --- a/ompi/mca/btl/scif/btl_scif_component.c +++ b/ompi/mca/btl/scif/btl_scif_component.c @@ -261,6 +261,7 @@ static mca_btl_base_module_t **mca_btl_scif_component_init (int *num_btl_modules } base_modules[0] = &mca_btl_scif_module.super; + mca_btl_scif_module.exiting = false; rc = mca_btl_scif_modex_send (); if (OMPI_SUCCESS != rc) { diff --git a/ompi/mca/btl/scif/btl_scif_module.c b/ompi/mca/btl/scif/btl_scif_module.c index a52d593dd0..3f9188e0a9 100644 --- a/ompi/mca/btl/scif/btl_scif_module.c +++ b/ompi/mca/btl/scif/btl_scif_module.c @@ -111,6 +111,8 @@ mca_btl_scif_module_finalize (struct mca_btl_base_module_t *btl) OBJ_DESTRUCT(&mca_btl_scif_module.dma_frags); OBJ_DESTRUCT(&mca_btl_scif_module.eager_frags); + mca_btl_scif_module.exiting = true; + /* close all open connections and release endpoints */ if (NULL != scif_module->endpoints) { for (i = 0 ; i < scif_module->endpoint_count ; ++i) { @@ -125,6 +127,8 @@ mca_btl_scif_module_finalize (struct mca_btl_base_module_t *btl) /* close the listening endpoint */ if (-1 != mca_btl_scif_module.scif_fd) { + /* wake up the scif thread */ + scif_connect (mca_btl_scif_module.scif_fd, &mca_btl_scif_module.port_id); scif_close (mca_btl_scif_module.scif_fd); pthread_join(mca_btl_scif_module.listen_thread, NULL); }