diff --git a/config/ompi_check_bproc.m4 b/config/ompi_check_bproc.m4 index d6603721d1..1d6a19aca9 100644 --- a/config/ompi_check_bproc.m4 +++ b/config/ompi_check_bproc.m4 @@ -52,7 +52,7 @@ AC_DEFUN([OMPI_CHECK_BPROC],[ LIBS="$ompi_check_bproc_save_LIBS" AS_IF([test "$ompi_check_bproc_happy" != "no"], - [AS_IF([test ! -z "$with_bproc"], + [AS_IF([test ! -z "$with_bproc" -a "$with_bproc" != "yes"], [$1_CPPFLAGS="$$1_CPPFLAGS -I$with_bproc/include" $1_LDFLAGS="$$1_LDFLAGS -L$with_bproc/lib"]) $1_LIBS="$$1_LIBS -lbproc" diff --git a/orte/mca/pls/bproc/pls_bproc.c b/orte/mca/pls/bproc/pls_bproc.c index 885e887ab6..c81b041031 100644 --- a/orte/mca/pls/bproc/pls_bproc.c +++ b/orte/mca/pls/bproc/pls_bproc.c @@ -266,7 +266,6 @@ static void orte_pls_bproc_waitpid_cb(pid_t wpid, int status, void *data) { free(proc); } OBJ_DESTRUCT(&ack); - OBJ_RELEASE(mca_pls_bproc_component.daemon_names); while(0 < mca_pls_bproc_component.num_daemons) { opal_condition_wait(&mca_pls_bproc_component.condition, &mca_pls_bproc_component.lock); @@ -501,6 +500,11 @@ static int orte_pls_bproc_launch_app(orte_cellid_t cellid, orte_jobid_t jobid, rc = ORTE_ERROR; goto cleanup; } + if(0 < mca_pls_bproc_component.debug) { + opal_output(0, "PLS_BPROC DEBUG: %d daemons launched. First pid: %d\n", + rc, *pids); + } + for(i = 0; i < num_daemons; i++) { if(0 >= pids[i]) { opal_output(0, "pls_bproc: failed to launch all daemons. " @@ -543,11 +547,6 @@ static int orte_pls_bproc_launch_app(orte_cellid_t cellid, orte_jobid_t jobid, } mca_pls_bproc_component.num_daemons += num_daemons; - if(0 < mca_pls_bproc_component.debug) { - opal_output(0, "PLS_BPROC DEBUG: %d daemons launched. First pid: %d\n", - rc, *pids); - } - /* wait for communication back */ for(i = 0; i < num_daemons; i++) { rc = mca_oob_recv_packed(MCA_OOB_NAME_ANY, &ack, MCA_OOB_TAG_BPROC); @@ -561,6 +560,7 @@ static int orte_pls_bproc_launch_app(orte_cellid_t cellid, orte_jobid_t jobid, opal_output(0, "pls_bproc: daemon exited unexpectedly\n"); rc = ORTE_ERROR; ORTE_ERROR_LOG(rc); + orte_pls_bproc_terminate_job(daemon_jobid); goto cleanup; } } @@ -689,13 +689,6 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) { goto cleanup; } - /* init the list to hold the daemon names */ - rc = orte_pointer_array_init(&mca_pls_bproc_component.daemon_names, 8, 200000, 8); - if(ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - /* get the cellid */ rc = orte_ns_base_get_cellid(&cellid, orte_process_info.my_name); if(ORTE_SUCCESS != rc) { diff --git a/orte/mca/pls/bproc/pls_bproc_component.c b/orte/mca/pls/bproc/pls_bproc_component.c index e6b8f2e02a..938fd8f2cd 100644 --- a/orte/mca/pls/bproc/pls_bproc_component.c +++ b/orte/mca/pls/bproc/pls_bproc_component.c @@ -17,6 +17,7 @@ */ #include "orte_config.h" +#include "orte/mca/errmgr/errmgr.h" #include "opal/mca/mca.h" #include "opal/mca/base/mca_base_param.h" #include "pls_bproc.h" @@ -64,6 +65,7 @@ static char* orte_pls_bproc_param_register_string(const char* param_name, } int orte_pls_bproc_component_open(void) { + int rc; /* init parameters */ mca_pls_bproc_component.debug = orte_pls_bproc_param_register_int("debug", 0); mca_pls_bproc_component.priority = @@ -78,12 +80,20 @@ int orte_pls_bproc_component_open(void) { mca_pls_bproc_component.done_launching = false; OBJ_CONSTRUCT(&mca_pls_bproc_component.lock, opal_mutex_t); OBJ_CONSTRUCT(&mca_pls_bproc_component.condition, opal_condition_t); + /* init the list to hold the daemon names */ + rc = orte_pointer_array_init(&mca_pls_bproc_component.daemon_names, 8, 200000, +8); + if(ORTE_SUCCESS != rc) { + ORTE_ERROR_LOG(rc); + } + return ORTE_SUCCESS; } int orte_pls_bproc_component_close(void) { OBJ_DESTRUCT(&mca_pls_bproc_component.lock); OBJ_DESTRUCT(&mca_pls_bproc_component.condition); + OBJ_RELEASE(mca_pls_bproc_component.daemon_names); return ORTE_SUCCESS; }