From 3e6e1046a3fca0f8e630231facb43f7be79b5c60 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 16 May 2013 15:04:37 +0000 Subject: [PATCH] fix a correctness issue by returning an error if waitall fails and invoking the mpi error handler cmr:v1.7.2:reviewer=jsquyres This commit was SVN r28533. --- ompi/mca/dpm/base/base.h | 4 ++-- ompi/mca/dpm/base/dpm_base_common_fns.c | 8 ++++---- ompi/mca/dpm/base/dpm_base_null_fns.c | 4 ++-- ompi/mca/dpm/dpm.h | 2 +- ompi/mca/dpm/orte/dpm_orte.c | 6 +++--- ompi/mpi/c/comm_disconnect.c | 8 ++++++-- 6 files changed, 18 insertions(+), 14 deletions(-) diff --git a/ompi/mca/dpm/base/base.h b/ompi/mca/dpm/base/base.h index 46de9da28f..e89ed73ec8 100644 --- a/ompi/mca/dpm/base/base.h +++ b/ompi/mca/dpm/base/base.h @@ -52,13 +52,13 @@ OMPI_DECLSPEC char* ompi_dpm_base_dyn_init (void); OMPI_DECLSPEC int ompi_dpm_base_dyn_finalize (void); OMPI_DECLSPEC void ompi_dpm_base_mark_dyncomm (ompi_communicator_t *comm); OMPI_DECLSPEC ompi_dpm_base_disconnect_obj *ompi_dpm_base_disconnect_init ( ompi_communicator_t *comm); -OMPI_DECLSPEC void ompi_dpm_base_disconnect_waitall (int count, ompi_dpm_base_disconnect_obj **objs); +OMPI_DECLSPEC int ompi_dpm_base_disconnect_waitall (int count, ompi_dpm_base_disconnect_obj **objs); /* NULL component functions */ int ompi_dpm_base_null_connect_accept (ompi_communicator_t *comm, int root, char *port_string, bool send_first, ompi_communicator_t **newcomm); -void ompi_dpm_base_null_disconnect(ompi_communicator_t *comm); +int ompi_dpm_base_null_disconnect(ompi_communicator_t *comm); int ompi_dpm_base_null_spawn(int count, char **array_of_commands, char ***array_of_argv, int *array_of_maxprocs, diff --git a/ompi/mca/dpm/base/dpm_base_common_fns.c b/ompi/mca/dpm/base/dpm_base_common_fns.c index f5501bb835..a215b9bb1d 100644 --- a/ompi/mca/dpm/base/dpm_base_common_fns.c +++ b/ompi/mca/dpm/base/dpm_base_common_fns.c @@ -177,7 +177,7 @@ ompi_dpm_base_disconnect_obj *ompi_dpm_base_disconnect_init ( ompi_communicator_ * - call waitall on the overall request array * - free the objects */ -void ompi_dpm_base_disconnect_waitall (int count, ompi_dpm_base_disconnect_obj **objs) +int ompi_dpm_base_disconnect_waitall (int count, ompi_dpm_base_disconnect_obj **objs) { ompi_request_t **reqs=NULL; @@ -189,7 +189,7 @@ void ompi_dpm_base_disconnect_waitall (int count, ompi_dpm_base_disconnect_obj * for (i=0; isize; @@ -198,7 +198,7 @@ void ompi_dpm_base_disconnect_waitall (int count, ompi_dpm_base_disconnect_obj * reqs = (ompi_request_t **) malloc (2*totalcount*sizeof(ompi_request_t *)); if ( NULL == reqs ) { printf("ompi_comm_disconnect_waitall: error allocating memory\n"); - return; + return OMPI_ERROR; } /* generate a single, large array of pending requests */ @@ -221,7 +221,7 @@ void ompi_dpm_base_disconnect_waitall (int count, ompi_dpm_base_disconnect_obj * free (reqs); - return; + return ret; } /**********************************************************************/ diff --git a/ompi/mca/dpm/base/dpm_base_null_fns.c b/ompi/mca/dpm/base/dpm_base_null_fns.c index 34e9330c93..816794dcb4 100644 --- a/ompi/mca/dpm/base/dpm_base_null_fns.c +++ b/ompi/mca/dpm/base/dpm_base_null_fns.c @@ -37,9 +37,9 @@ int ompi_dpm_base_null_connect_accept (ompi_communicator_t *comm, int root, return OMPI_ERR_NOT_SUPPORTED; } -void ompi_dpm_base_null_disconnect(ompi_communicator_t *comm) +int ompi_dpm_base_null_disconnect(ompi_communicator_t *comm) { - return; + return OMPI_SUCCESS; } int ompi_dpm_base_null_spawn(int count, char **array_of_commands, diff --git a/ompi/mca/dpm/dpm.h b/ompi/mca/dpm/dpm.h index 934f172682..4e990bc2da 100644 --- a/ompi/mca/dpm/dpm.h +++ b/ompi/mca/dpm/dpm.h @@ -51,7 +51,7 @@ typedef int (*ompi_dpm_base_module_connect_accept_fn_t)(ompi_communicator_t *com * Executes internally a disconnect on all dynamic communicators * in case the user did not disconnect them. */ -typedef void (*ompi_dpm_base_module_disconnect_fn_t)(ompi_communicator_t *comm); +typedef int (*ompi_dpm_base_module_disconnect_fn_t)(ompi_communicator_t *comm); /* * Dynamically spawn processes diff --git a/ompi/mca/dpm/orte/dpm_orte.c b/ompi/mca/dpm/orte/dpm_orte.c index a54a98cb2c..45bc22019e 100644 --- a/ompi/mca/dpm/orte/dpm_orte.c +++ b/ompi/mca/dpm/orte/dpm_orte.c @@ -72,7 +72,7 @@ static int init(void); static int connect_accept ( ompi_communicator_t *comm, int root, char *port_string, bool send_first, ompi_communicator_t **newcomm ); -static void disconnect(ompi_communicator_t *comm); +static int disconnect(ompi_communicator_t *comm); static int spawn(int count, char **array_of_commands, char ***array_of_argv, int *array_of_maxprocs, @@ -646,12 +646,12 @@ static int connect_accept ( ompi_communicator_t *comm, int root, return rc; } -static void disconnect(ompi_communicator_t *comm) +static int disconnect(ompi_communicator_t *comm) { ompi_dpm_base_disconnect_obj *dobj; dobj = ompi_dpm_base_disconnect_init (comm); - ompi_dpm_base_disconnect_waitall(1, &dobj); + return ompi_dpm_base_disconnect_waitall(1, &dobj); } diff --git a/ompi/mpi/c/comm_disconnect.c b/ompi/mpi/c/comm_disconnect.c index f58d2d46b7..a39e9aa610 100644 --- a/ompi/mpi/c/comm_disconnect.c +++ b/ompi/mpi/c/comm_disconnect.c @@ -41,6 +41,8 @@ static const char FUNC_NAME[] = "MPI_Comm_disconnect"; int MPI_Comm_disconnect(MPI_Comm *comm) { + int ret = MPI_SUCCESS; + MEMCHECKER( memchecker_comm(*comm); ); @@ -60,7 +62,9 @@ int MPI_Comm_disconnect(MPI_Comm *comm) OPAL_CR_ENTER_LIBRARY(); if ( OMPI_COMM_IS_DYNAMIC(*comm)) { - ompi_dpm.disconnect (*comm); + if (OMPI_SUCCESS != ompi_dpm.disconnect (*comm)) { + ret = OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); + } } else { (*comm)->c_coll.coll_barrier(*comm, (*comm)->c_coll.coll_barrier_module); @@ -69,5 +73,5 @@ int MPI_Comm_disconnect(MPI_Comm *comm) ompi_comm_free(comm); OPAL_CR_EXIT_LIBRARY(); - return MPI_SUCCESS; + return ret; }