From 30f23ac67a6278cc898042d4780549e473e60f5a Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 24 Nov 2017 11:18:47 -0800 Subject: [PATCH 1/5] Save one more file descriptor per process by not opening one for stddiag if PMIx (version > 1.x) is active since all diagnostic messages will instead flow thru the PMIx connection. Unfortunately, PMIx v1 does not support this feature, but we can remove the stddiag support once PMIx v1 slides out of the support window Signed-off-by: Ralph Castain --- opal/mca/pmix/cray/pmix_cray.c | 1 + opal/mca/pmix/ext1x/pmix1x.c | 3 +- opal/mca/pmix/ext2x/ext2x.c | 1 + opal/mca/pmix/flux/pmix_flux.c | 1 + opal/mca/pmix/isolated/pmix_isolated.c | 1 + opal/mca/pmix/pmix.h | 1 + opal/mca/pmix/pmix3x/pmix3x.c | 1 + opal/mca/pmix/s1/pmix_s1.c | 1 + opal/mca/pmix/s2/pmix_s2.c | 1 + orte/mca/iof/base/iof_base_setup.c | 65 +++++++++++++-------- orte/mca/iof/hnp/iof_hnp.c | 16 +++-- orte/mca/iof/orted/iof_orted.c | 12 ++-- orte/mca/odls/default/odls_default_module.c | 19 +++--- 13 files changed, 82 insertions(+), 41 deletions(-) diff --git a/opal/mca/pmix/cray/pmix_cray.c b/opal/mca/pmix/cray/pmix_cray.c index bf5ad0e030..181af7c1e0 100644 --- a/opal/mca/pmix/cray/pmix_cray.c +++ b/opal/mca/pmix/cray/pmix_cray.c @@ -83,6 +83,7 @@ static bool cray_get_attr(const char *attr, opal_value_t **kv); #endif const opal_pmix_base_module_t opal_pmix_cray_module = { + .name = "cray", .init = cray_init, .finalize = cray_fini, .initialized = cray_initialized, diff --git a/opal/mca/pmix/ext1x/pmix1x.c b/opal/mca/pmix/ext1x/pmix1x.c index c255962d15..578178408a 100644 --- a/opal/mca/pmix/ext1x/pmix1x.c +++ b/opal/mca/pmix/ext1x/pmix1x.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Mellanox Technologies, Inc. @@ -50,6 +50,7 @@ static const char *pmix1_get_nspace(opal_jobid_t jobid); static void pmix1_register_jobid(opal_jobid_t jobid, const char *nspace); const opal_pmix_base_module_t opal_pmix_ext1x_module = { + .name = "pmix1", /* client APIs */ .init = pmix1_client_init, .finalize = pmix1_client_finalize, diff --git a/opal/mca/pmix/ext2x/ext2x.c b/opal/mca/pmix/ext2x/ext2x.c index 39d6ed61ae..968d110a10 100644 --- a/opal/mca/pmix/ext2x/ext2x.c +++ b/opal/mca/pmix/ext2x/ext2x.c @@ -72,6 +72,7 @@ static void ext2x_log(opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); const opal_pmix_base_module_t opal_pmix_ext2x_module = { + .name = "pmix2", /* client APIs */ .init = ext2x_client_init, .finalize = ext2x_client_finalize, diff --git a/opal/mca/pmix/flux/pmix_flux.c b/opal/mca/pmix/flux/pmix_flux.c index 187108bcc7..5e333f0620 100644 --- a/opal/mca/pmix/flux/pmix_flux.c +++ b/opal/mca/pmix/flux/pmix_flux.c @@ -59,6 +59,7 @@ static const char *flux_get_nspace(opal_jobid_t jobid); static void flux_register_jobid(opal_jobid_t jobid, const char *nspace); const opal_pmix_base_module_t opal_pmix_flux_module = { + .name = "flux", .init = flux_init, .finalize = flux_fini, .initialized = flux_initialized, diff --git a/opal/mca/pmix/isolated/pmix_isolated.c b/opal/mca/pmix/isolated/pmix_isolated.c index a13ec13783..3b803fc81f 100644 --- a/opal/mca/pmix/isolated/pmix_isolated.c +++ b/opal/mca/pmix/isolated/pmix_isolated.c @@ -83,6 +83,7 @@ static const char *isolated_get_nspace(opal_jobid_t jobid); static void isolated_register_jobid(opal_jobid_t jobid, const char *nspace); const opal_pmix_base_module_t opal_pmix_isolated_module = { + .name = "isolated", .init = isolated_init, .finalize = isolated_fini, .initialized = isolated_initialized, diff --git a/opal/mca/pmix/pmix.h b/opal/mca/pmix/pmix.h index 53e04571ab..b768d7f68d 100644 --- a/opal/mca/pmix/pmix.h +++ b/opal/mca/pmix/pmix.h @@ -871,6 +871,7 @@ typedef int (*opal_pmix_base_process_monitor_fn_t)(opal_list_t *monitor, * the standard public API data structure */ typedef struct { + char *name; /* client APIs */ opal_pmix_base_module_init_fn_t init; opal_pmix_base_module_fini_fn_t finalize; diff --git a/opal/mca/pmix/pmix3x/pmix3x.c b/opal/mca/pmix/pmix3x/pmix3x.c index 5499d18d0a..b6191e4138 100644 --- a/opal/mca/pmix/pmix3x/pmix3x.c +++ b/opal/mca/pmix/pmix3x/pmix3x.c @@ -72,6 +72,7 @@ static void pmix3x_log(opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); const opal_pmix_base_module_t opal_pmix_pmix3x_module = { + .name = "pmix3", /* client APIs */ .init = pmix3x_client_init, .finalize = pmix3x_client_finalize, diff --git a/opal/mca/pmix/s1/pmix_s1.c b/opal/mca/pmix/s1/pmix_s1.c index 7be7703882..d397163b30 100644 --- a/opal/mca/pmix/s1/pmix_s1.c +++ b/opal/mca/pmix/s1/pmix_s1.c @@ -59,6 +59,7 @@ static const char *s1_get_nspace(opal_jobid_t jobid); static void s1_register_jobid(opal_jobid_t jobid, const char *nspace); const opal_pmix_base_module_t opal_pmix_s1_module = { + .name = "s1", .init = s1_init, .finalize = s1_fini, .initialized = s1_initialized, diff --git a/opal/mca/pmix/s2/pmix_s2.c b/opal/mca/pmix/s2/pmix_s2.c index b98905f4e1..ab9a16e5dc 100644 --- a/opal/mca/pmix/s2/pmix_s2.c +++ b/opal/mca/pmix/s2/pmix_s2.c @@ -62,6 +62,7 @@ static const char *s2_get_nspace(opal_jobid_t jobid); static void s2_register_jobid(opal_jobid_t jobid, const char *nspace); const opal_pmix_base_module_t opal_pmix_s2_module = { + .name = "s2", .init = s2_init, .finalize = s2_fini, .initialized = s2_initialized, diff --git a/orte/mca/iof/base/iof_base_setup.c b/orte/mca/iof/base/iof_base_setup.c index 1a4288782f..24574c35e5 100644 --- a/orte/mca/iof/base/iof_base_setup.c +++ b/orte/mca/iof/base/iof_base_setup.c @@ -64,6 +64,7 @@ #include "opal/util/os_dirpath.h" #include "opal/util/output.h" #include "opal/util/argv.h" +#include "opal/mca/pmix/pmix.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/util/name_fns.h" @@ -119,9 +120,11 @@ orte_iof_base_setup_prefork(orte_iof_base_io_conf_t *opts) return ORTE_ERR_SYS_LIMITS_PIPES; } } - if (pipe(opts->p_internal) < 0) { - ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_PIPES); - return ORTE_ERR_SYS_LIMITS_PIPES; + if (0 == strncmp(opal_pmix.name, "pmix1", 5)) { + if (pipe(opts->p_internal) < 0) { + ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_PIPES); + return ORTE_ERR_SYS_LIMITS_PIPES; + } } return ORTE_SUCCESS; @@ -141,7 +144,9 @@ orte_iof_base_setup_child(orte_iof_base_io_conf_t *opts, char ***env) if( !orte_iof_base.redirect_app_stderr_to_stdout ) { close(opts->p_stderr[0]); } - close(opts->p_internal[0]); + if (0 == strncmp(opal_pmix.name, "pmix1", 5)) { + close(opts->p_internal[0]); + } if (opts->usepty) { /* disable echo */ @@ -163,19 +168,27 @@ orte_iof_base_setup_child(orte_iof_base_io_conf_t *opts, char ***env) return ORTE_ERR_PIPE_SETUP_FAILURE; } ret = dup2(opts->p_stdout[1], fileno(stdout)); - if (ret < 0) return ORTE_ERR_PIPE_SETUP_FAILURE; + if (ret < 0) { + return ORTE_ERR_PIPE_SETUP_FAILURE; + } if( orte_iof_base.redirect_app_stderr_to_stdout ) { ret = dup2(opts->p_stdout[1], fileno(stderr)); - if (ret < 0) return ORTE_ERR_PIPE_SETUP_FAILURE; + if (ret < 0) { + return ORTE_ERR_PIPE_SETUP_FAILURE; + } } close(opts->p_stdout[1]); } else { if(opts->p_stdout[1] != fileno(stdout)) { ret = dup2(opts->p_stdout[1], fileno(stdout)); - if (ret < 0) return ORTE_ERR_PIPE_SETUP_FAILURE; + if (ret < 0) { + return ORTE_ERR_PIPE_SETUP_FAILURE; + } if( orte_iof_base.redirect_app_stderr_to_stdout ) { ret = dup2(opts->p_stdout[1], fileno(stderr)); - if (ret < 0) return ORTE_ERR_PIPE_SETUP_FAILURE; + if (ret < 0) { + return ORTE_ERR_PIPE_SETUP_FAILURE; + } } close(opts->p_stdout[1]); } @@ -183,7 +196,9 @@ orte_iof_base_setup_child(orte_iof_base_io_conf_t *opts, char ***env) if (opts->connect_stdin) { if(opts->p_stdin[0] != fileno(stdin)) { ret = dup2(opts->p_stdin[0], fileno(stdin)); - if (ret < 0) return ORTE_ERR_PIPE_SETUP_FAILURE; + if (ret < 0) { + return ORTE_ERR_PIPE_SETUP_FAILURE; + } close(opts->p_stdin[0]); } } else { @@ -205,17 +220,19 @@ orte_iof_base_setup_child(orte_iof_base_io_conf_t *opts, char ***env) } } - if (!orte_map_stddiag_to_stderr && !orte_map_stddiag_to_stdout ) { - /* Set an environment variable that the new child process can use - to get the fd of the pipe connected to the INTERNAL IOF tag. */ - asprintf(&str, "%d", opts->p_internal[1]); - if (NULL != str) { - opal_setenv("OPAL_OUTPUT_STDERR_FD", str, true, env); - free(str); + if (0 == strncmp(opal_pmix.name, "pmix1", 5)) { + if (!orte_map_stddiag_to_stderr && !orte_map_stddiag_to_stdout ) { + /* Set an environment variable that the new child process can use + to get the fd of the pipe connected to the INTERNAL IOF tag. */ + asprintf(&str, "%d", opts->p_internal[1]); + if (NULL != str) { + opal_setenv("OPAL_OUTPUT_STDERR_FD", str, true, env); + free(str); + } + } + else if( orte_map_stddiag_to_stdout ) { + opal_setenv("OPAL_OUTPUT_INTERNAL_TO_STDOUT", "1", true, env); } - } - else if( orte_map_stddiag_to_stdout ) { - opal_setenv("OPAL_OUTPUT_INTERNAL_TO_STDOUT", "1", true, env); } return ORTE_SUCCESS; @@ -253,10 +270,12 @@ orte_iof_base_setup_parent(const orte_process_name_t* name, } } - ret = orte_iof.push(name, ORTE_IOF_STDDIAG, opts->p_internal[0]); - if(ORTE_SUCCESS != ret) { - ORTE_ERROR_LOG(ret); - return ret; + if (0 == strncmp(opal_pmix.name, "pmix1", 5)) { + ret = orte_iof.push(name, ORTE_IOF_STDDIAG, opts->p_internal[0]); + if(ORTE_SUCCESS != ret) { + ORTE_ERROR_LOG(ret); + return ret; + } } return ORTE_SUCCESS; diff --git a/orte/mca/iof/hnp/iof_hnp.c b/orte/mca/iof/hnp/iof_hnp.c index f8c6c1791c..8954369358 100644 --- a/orte/mca/iof/hnp/iof_hnp.c +++ b/orte/mca/iof/hnp/iof_hnp.c @@ -42,6 +42,7 @@ #endif #include "opal/mca/event/event.h" +#include "opal/mca/pmix/pmix.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/errmgr/errmgr.h" @@ -186,7 +187,7 @@ static int hnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag, } else if (src_tag & ORTE_IOF_STDERR) { ORTE_IOF_READ_EVENT(&proct->revstderr, proct, fd, ORTE_IOF_STDERR, orte_iof_hnp_read_local_handler, false); - } else if (src_tag & ORTE_IOF_STDDIAG) { + } else if (0 == strncmp(opal_pmix.name, "pmix1", 5) && src_tag & ORTE_IOF_STDDIAG) { ORTE_IOF_READ_EVENT(&proct->revstddiag, proct, fd, ORTE_IOF_STDDIAG, orte_iof_hnp_read_local_handler, false); } @@ -201,7 +202,8 @@ static int hnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag, * because one of the readevents fires -prior- to all of them having * been defined! */ - if (NULL != proct->revstdout && NULL != proct->revstddiag && + if (NULL != proct->revstdout && + (0 != strncmp(opal_pmix.name, "pmix1", 5) || NULL != proct->revstddiag) && (orte_iof_base.redirect_app_stderr_to_stdout || NULL != proct->revstderr)) { if (proct->copy) { /* see if there are any wildcard subscribers out there that @@ -220,7 +222,9 @@ static int hnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag, if (!orte_iof_base.redirect_app_stderr_to_stdout) { ORTE_IOF_READ_ACTIVATE(proct->revstderr); } - ORTE_IOF_READ_ACTIVATE(proct->revstddiag); + if (NULL != proct->revstddiag) { + ORTE_IOF_READ_ACTIVATE(proct->revstddiag); + } } return ORTE_SUCCESS; } @@ -586,9 +590,9 @@ static void stdin_write_handler(int fd, short event, void *cbdata) } } goto check; -re_enter: + re_enter: ORTE_IOF_SINK_ACTIVATE(wev); -check: + check: if (NULL != mca_iof_hnp_component.stdinev && !orte_abnormal_term_ordered && !mca_iof_hnp_component.stdinev->active) { @@ -612,7 +616,7 @@ check: } } return; -finish: + finish: OBJ_RELEASE(wev); sink->wev = NULL; return; diff --git a/orte/mca/iof/orted/iof_orted.c b/orte/mca/iof/orted/iof_orted.c index 9bd5a312bd..261b7d3c8f 100644 --- a/orte/mca/iof/orted/iof_orted.c +++ b/orte/mca/iof/orted/iof_orted.c @@ -42,6 +42,7 @@ #endif #include "opal/util/os_dirpath.h" +#include "opal/mca/pmix/pmix.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/util/name_fns.h" @@ -164,7 +165,7 @@ static int orted_push(const orte_process_name_t* dst_name, proct->name.vpid = dst_name->vpid; opal_list_append(&mca_iof_orted_component.procs, &proct->super); -SETUP: + SETUP: /* get the local jobdata for this proc */ if (NULL == (jobdat = orte_get_job_data_object(proct->name.jobid))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); @@ -177,7 +178,7 @@ SETUP: } else if (src_tag & ORTE_IOF_STDERR) { ORTE_IOF_READ_EVENT(&proct->revstderr, proct, fd, ORTE_IOF_STDERR, orte_iof_orted_read_handler, false); - } else if (src_tag & ORTE_IOF_STDDIAG) { + } else if (0 == strncmp(opal_pmix.name, "pmix1", 5) && src_tag & ORTE_IOF_STDDIAG) { ORTE_IOF_READ_EVENT(&proct->revstddiag, proct, fd, ORTE_IOF_STDDIAG, orte_iof_orted_read_handler, false); } @@ -192,13 +193,16 @@ SETUP: * because one of the readevents fires -prior- to all of them having * been defined! */ - if (NULL != proct->revstdout && NULL != proct->revstddiag && + if (NULL != proct->revstdout && + (0 != strncmp(opal_pmix.name, "pmix1", 5) || NULL != proct->revstddiag) && (orte_iof_base.redirect_app_stderr_to_stdout || NULL != proct->revstderr)) { ORTE_IOF_READ_ACTIVATE(proct->revstdout); if (!orte_iof_base.redirect_app_stderr_to_stdout) { ORTE_IOF_READ_ACTIVATE(proct->revstderr); } - ORTE_IOF_READ_ACTIVATE(proct->revstddiag); + if (NULL != proct->revstddiag) { + ORTE_IOF_READ_ACTIVATE(proct->revstddiag); + } } return ORTE_SUCCESS; } diff --git a/orte/mca/odls/default/odls_default_module.c b/orte/mca/odls/default/odls_default_module.c index 4723708f8e..9822add360 100644 --- a/orte/mca/odls/default/odls_default_module.c +++ b/orte/mca/odls/default/odls_default_module.c @@ -112,6 +112,7 @@ #include "opal/mca/hwloc/hwloc-internal.h" #include "opal/mca/hwloc/base/base.h" +#include "opal/mca/pmix/pmix.h" #include "opal/class/opal_pointer_array.h" #include "opal/util/opal_environ.h" #include "opal/util/show_help.h" @@ -312,7 +313,7 @@ static int close_open_file_descriptors(int write_fd, closedir(dir); return ORTE_ERR_TYPE_MISMATCH; } - if (fd >=3 && fd != opts.p_internal[1] && fd != write_fd) { + if (fd >=3 && (0 != strncmp(opal_pmix.name, "pmix", 4) && fd != opts.p_internal[1]) && fd != write_fd) { close(fd); } } @@ -374,11 +375,13 @@ static int do_child(orte_odls_spawn_caddy_t *cd, int write_fd) } close(fdnull); } - fdnull = open("/dev/null", O_RDONLY, 0); - if (fdnull > cd->opts.p_internal[1]) { - dup2(fdnull, cd->opts.p_internal[1]); + if (0 != strncmp(opal_pmix.name, "pmix", 4)) { + fdnull = open("/dev/null", O_RDONLY, 0); + if (fdnull > cd->opts.p_internal[1]) { + dup2(fdnull, cd->opts.p_internal[1]); + } + close(fdnull); } - close(fdnull); } /* close all open file descriptors w/ exception of stdin/stdout/stderr, @@ -387,7 +390,7 @@ static int do_child(orte_odls_spawn_caddy_t *cd, int write_fd) if (ORTE_SUCCESS != close_open_file_descriptors(write_fd, cd->opts)) { // close *all* file descriptors -- slow for(fd=3; fdopts.p_internal[1] && fd != write_fd) { + if ((0 != strncmp(opal_pmix.name, "pmix", 4) && fd != cd->opts.p_internal[1]) && fd != write_fd) { close(fd); } } @@ -456,7 +459,9 @@ static int do_parent(orte_odls_spawn_caddy_t *cd, int read_fd) if( !orte_iof_base.redirect_app_stderr_to_stdout ) { close(cd->opts.p_stderr[1]); } - close(cd->opts.p_internal[1]); + if (0 != strncmp(opal_pmix.name, "pmix", 4)) { + close(cd->opts.p_internal[1]); + } /* Block reading a message from the pipe */ while (1) { From 3906aaf41aad8337a0681e5029d776c6138bb8c1 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Sat, 25 Nov 2017 11:48:19 -0800 Subject: [PATCH 2/5] Silence warnings Signed-off-by: Ralph Castain --- config/opal_check_pmi.m4 | 9 ++- .../monitoring/common_monitoring_coll.c | 9 ++- ompi/mca/osc/rdma/osc_rdma_component.c | 9 +-- ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c | 4 +- .../treematch/treematch/tm_kpartitioning.h | 6 +- opal/mca/pmix/cray/pmix_cray.c | 1 - opal/mca/pmix/ext1x/pmix1x.c | 1 - opal/mca/pmix/ext2x/ext2x.c | 1 - opal/mca/pmix/flux/pmix_flux.c | 1 - opal/mca/pmix/isolated/pmix_isolated.c | 1 - opal/mca/pmix/pmix.h | 1 - opal/mca/pmix/pmix3x/pmix3x.c | 1 - opal/mca/pmix/s1/pmix_s1.c | 1 - opal/mca/pmix/s2/pmix_s2.c | 1 - opal/util/net.c | 2 + orte/mca/iof/base/base.h | 2 + orte/mca/iof/base/iof_base_frame.c | 4 ++ orte/mca/iof/base/iof_base_setup.c | 58 +++++++++---------- orte/mca/iof/base/iof_base_setup.h | 2 + orte/mca/iof/hnp/iof_hnp.c | 17 +++++- orte/mca/iof/hnp/iof_hnp_read.c | 8 ++- orte/mca/iof/orted/iof_orted.c | 17 +++++- orte/mca/iof/orted/iof_orted_read.c | 8 ++- orte/mca/odls/default/odls_default_module.c | 36 +++++++----- 24 files changed, 122 insertions(+), 78 deletions(-) diff --git a/config/opal_check_pmi.m4 b/config/opal_check_pmi.m4 index 2fa331fbd2..bae5be31b5 100644 --- a/config/opal_check_pmi.m4 +++ b/config/opal_check_pmi.m4 @@ -245,6 +245,7 @@ AC_DEFUN([OPAL_CHECK_PMIX],[ AC_MSG_CHECKING([if user requested external PMIx support($with_pmix)]) opal_prun_happy=no + opal_external_have_pmix1=0 AS_IF([test -z "$with_pmix" || test "$with_pmix" = "yes" || test "$with_pmix" = "internal"], [AC_MSG_RESULT([no]) opal_external_pmix_happy=no @@ -299,7 +300,8 @@ AC_DEFUN([OPAL_CHECK_PMIX],[ AS_IF([test "x`ls $pmix_ext_install_dir/include/pmix_version.h 2> /dev/null`" = "x"], [AC_MSG_RESULT([version file not found - assuming v1.1.4]) opal_external_pmix_version_found=1 - opal_external_pmix_version=114], + opal_external_pmix_version=114 + opal_external_have_pmix1=1], [AC_MSG_RESULT([version file found]) opal_external_pmix_version_found=0]) @@ -342,7 +344,8 @@ AC_DEFUN([OPAL_CHECK_PMIX],[ ], [])], [AC_MSG_RESULT([found]) opal_external_pmix_version=1x - opal_external_pmix_version_found=1], + opal_external_pmix_version_found=1 + opal_external_have_pmix1=1], [AC_MSG_RESULT([not found])])]) AS_IF([test "x$opal_external_pmix_version" = "x"], @@ -361,6 +364,8 @@ AC_DEFUN([OPAL_CHECK_PMIX],[ opal_external_pmix_LIBS=-lpmix opal_external_pmix_happy=yes]) + AC_DEFINE_UNQUOTED([OPAL_PMIX_V1],[$opal_external_have_pmix1], + [Whether the external PMIx library is v1]) AM_CONDITIONAL([OPAL_WANT_PRUN], [test "$opal_prun_happy" = "yes"]) OPAL_VAR_SCOPE_POP ]) diff --git a/ompi/mca/common/monitoring/common_monitoring_coll.c b/ompi/mca/common/monitoring/common_monitoring_coll.c index 9383eb415d..e37d5bb6c3 100644 --- a/ompi/mca/common/monitoring/common_monitoring_coll.c +++ b/ompi/mca/common/monitoring/common_monitoring_coll.c @@ -54,7 +54,6 @@ int mca_common_monitoring_coll_cache_name(ompi_communicator_t*comm) static inline void mca_common_monitoring_coll_cache(mca_monitoring_coll_data_t*data) { - int world_rank; if( -1 == data->world_rank ) { /* Get current process world_rank */ mca_common_monitoring_get_world_rank(ompi_comm_rank(data->p_comm), data->p_comm, @@ -95,7 +94,7 @@ mca_monitoring_coll_data_t*mca_common_monitoring_coll_new( ompi_communicator_t*c } data->p_comm = comm; - + /* Allocate hashtable */ if( NULL == comm_data ) { comm_data = OBJ_NEW(opal_hash_table_t); @@ -105,7 +104,7 @@ mca_monitoring_coll_data_t*mca_common_monitoring_coll_new( ompi_communicator_t*c } opal_hash_table_init(comm_data, 2048); } - + /* Insert in hashtable */ uint64_t key = *((uint64_t*)&comm); if( OPAL_SUCCESS != opal_hash_table_set_value_uint64(comm_data, key, (void*)data) ) { @@ -127,7 +126,7 @@ void mca_common_monitoring_coll_release(mca_monitoring_coll_data_t*data) return; } #endif /* OPAL_ENABLE_DEBUG */ - + /* not flushed yet */ data->is_released = 1; mca_common_monitoring_coll_cache(data); @@ -141,7 +140,7 @@ static void mca_common_monitoring_coll_cond_release(mca_monitoring_coll_data_t*d return; } #endif /* OPAL_ENABLE_DEBUG */ - + if( data->is_released ) { /* if the communicator is already released */ opal_hash_table_remove_value_uint64(comm_data, *((uint64_t*)&data->p_comm)); data->p_comm = NULL; diff --git a/ompi/mca/osc/rdma/osc_rdma_component.c b/ompi/mca/osc/rdma/osc_rdma_component.c index d867eba0eb..d88689e2f4 100644 --- a/ompi/mca/osc/rdma/osc_rdma_component.c +++ b/ompi/mca/osc/rdma/osc_rdma_component.c @@ -65,10 +65,10 @@ static int ompi_osc_rdma_component_query (struct ompi_win_t *win, void **base, s static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base, size_t size, int disp_unit, struct ompi_communicator_t *comm, struct opal_info_t *info, int flavor, int *model); - +#if 0 // stale code? static int ompi_osc_rdma_set_info (struct ompi_win_t *win, struct opal_info_t *info); static int ompi_osc_rdma_get_info (struct ompi_win_t *win, struct opal_info_t **info_used); - +#endif static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, struct mca_btl_base_module_t **btl); static char* ompi_osc_rdma_set_no_lock_info(opal_infosubscriber_t *obj, char *key, char *value); @@ -1250,12 +1250,13 @@ static char* ompi_osc_rdma_set_no_lock_info(opal_infosubscriber_t *obj, char *ke } /* enforce collectiveness... */ module->comm->c_coll->coll_barrier(module->comm, module->comm->c_coll->coll_barrier_module); -/* +/* * Accept any value */ return module->no_locks ? "true" : "false"; } +#if 0 // stale code? static int ompi_osc_rdma_set_info (struct ompi_win_t *win, struct opal_info_t *info) { ompi_osc_rdma_module_t *module = GET_MODULE(win); @@ -1302,5 +1303,5 @@ static int ompi_osc_rdma_get_info (struct ompi_win_t *win, struct opal_info_t ** return OMPI_SUCCESS; } - +#endif OBJ_CLASS_INSTANCE(ompi_osc_rdma_aggregation_t, opal_list_item_t, NULL, NULL); diff --git a/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c b/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c index ab7ffdd77f..4ed76e12a8 100644 --- a/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c +++ b/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c @@ -133,7 +133,7 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, ** and then mapping it to memory ** For sharedfp we also want to put the file backed shared memory into the tmp directory */ - filename_basename = basename(filename); + filename_basename = basename((char*)filename); /* format is "%s/%s_cid-%d.sm", see below */ sm_filename_length = strlen(ompi_process_info.job_session_dir) + 1 + strlen(filename_basename) + 5 + (3*sizeof(uint32_t)+1) + 4; sm_filename = (char*) malloc( sizeof(char) * sm_filename_length); @@ -191,7 +191,7 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, #if defined(HAVE_SEM_OPEN) -#if defined (__APPLE__) +#if defined (__APPLE__) sm_data->sem_name = (char*) malloc( sizeof(char) * 32); snprintf(sm_data->sem_name,31,"OMPIO_%s",filename_basename); #else diff --git a/ompi/mca/topo/treematch/treematch/tm_kpartitioning.h b/ompi/mca/topo/treematch/treematch/tm_kpartitioning.h index 09c2227c06..aa9eee619d 100644 --- a/ompi/mca/topo/treematch/treematch/tm_kpartitioning.h +++ b/ompi/mca/topo/treematch/treematch/tm_kpartitioning.h @@ -1,9 +1,11 @@ typedef struct _com_mat_t{ - double **comm; + double **comm; int n; /*comm is of size n by n the other element are zeroes*/ - + } com_mat_t; int *kpartition(int, com_mat_t*, int, int *, int); tm_tree_t * kpartition_build_tree_from_topology(tm_topology_t *topology,double **com_mat,int N, int *constraints, int nb_constraints, double *obj_weight, double *com_speed); + +#define HAVE_LIBSCOTCH 0 // missing configure setup? diff --git a/opal/mca/pmix/cray/pmix_cray.c b/opal/mca/pmix/cray/pmix_cray.c index 181af7c1e0..bf5ad0e030 100644 --- a/opal/mca/pmix/cray/pmix_cray.c +++ b/opal/mca/pmix/cray/pmix_cray.c @@ -83,7 +83,6 @@ static bool cray_get_attr(const char *attr, opal_value_t **kv); #endif const opal_pmix_base_module_t opal_pmix_cray_module = { - .name = "cray", .init = cray_init, .finalize = cray_fini, .initialized = cray_initialized, diff --git a/opal/mca/pmix/ext1x/pmix1x.c b/opal/mca/pmix/ext1x/pmix1x.c index 578178408a..fbc6025da7 100644 --- a/opal/mca/pmix/ext1x/pmix1x.c +++ b/opal/mca/pmix/ext1x/pmix1x.c @@ -50,7 +50,6 @@ static const char *pmix1_get_nspace(opal_jobid_t jobid); static void pmix1_register_jobid(opal_jobid_t jobid, const char *nspace); const opal_pmix_base_module_t opal_pmix_ext1x_module = { - .name = "pmix1", /* client APIs */ .init = pmix1_client_init, .finalize = pmix1_client_finalize, diff --git a/opal/mca/pmix/ext2x/ext2x.c b/opal/mca/pmix/ext2x/ext2x.c index 968d110a10..39d6ed61ae 100644 --- a/opal/mca/pmix/ext2x/ext2x.c +++ b/opal/mca/pmix/ext2x/ext2x.c @@ -72,7 +72,6 @@ static void ext2x_log(opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); const opal_pmix_base_module_t opal_pmix_ext2x_module = { - .name = "pmix2", /* client APIs */ .init = ext2x_client_init, .finalize = ext2x_client_finalize, diff --git a/opal/mca/pmix/flux/pmix_flux.c b/opal/mca/pmix/flux/pmix_flux.c index 5e333f0620..187108bcc7 100644 --- a/opal/mca/pmix/flux/pmix_flux.c +++ b/opal/mca/pmix/flux/pmix_flux.c @@ -59,7 +59,6 @@ static const char *flux_get_nspace(opal_jobid_t jobid); static void flux_register_jobid(opal_jobid_t jobid, const char *nspace); const opal_pmix_base_module_t opal_pmix_flux_module = { - .name = "flux", .init = flux_init, .finalize = flux_fini, .initialized = flux_initialized, diff --git a/opal/mca/pmix/isolated/pmix_isolated.c b/opal/mca/pmix/isolated/pmix_isolated.c index 3b803fc81f..a13ec13783 100644 --- a/opal/mca/pmix/isolated/pmix_isolated.c +++ b/opal/mca/pmix/isolated/pmix_isolated.c @@ -83,7 +83,6 @@ static const char *isolated_get_nspace(opal_jobid_t jobid); static void isolated_register_jobid(opal_jobid_t jobid, const char *nspace); const opal_pmix_base_module_t opal_pmix_isolated_module = { - .name = "isolated", .init = isolated_init, .finalize = isolated_fini, .initialized = isolated_initialized, diff --git a/opal/mca/pmix/pmix.h b/opal/mca/pmix/pmix.h index b768d7f68d..53e04571ab 100644 --- a/opal/mca/pmix/pmix.h +++ b/opal/mca/pmix/pmix.h @@ -871,7 +871,6 @@ typedef int (*opal_pmix_base_process_monitor_fn_t)(opal_list_t *monitor, * the standard public API data structure */ typedef struct { - char *name; /* client APIs */ opal_pmix_base_module_init_fn_t init; opal_pmix_base_module_fini_fn_t finalize; diff --git a/opal/mca/pmix/pmix3x/pmix3x.c b/opal/mca/pmix/pmix3x/pmix3x.c index b6191e4138..5499d18d0a 100644 --- a/opal/mca/pmix/pmix3x/pmix3x.c +++ b/opal/mca/pmix/pmix3x/pmix3x.c @@ -72,7 +72,6 @@ static void pmix3x_log(opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); const opal_pmix_base_module_t opal_pmix_pmix3x_module = { - .name = "pmix3", /* client APIs */ .init = pmix3x_client_init, .finalize = pmix3x_client_finalize, diff --git a/opal/mca/pmix/s1/pmix_s1.c b/opal/mca/pmix/s1/pmix_s1.c index d397163b30..7be7703882 100644 --- a/opal/mca/pmix/s1/pmix_s1.c +++ b/opal/mca/pmix/s1/pmix_s1.c @@ -59,7 +59,6 @@ static const char *s1_get_nspace(opal_jobid_t jobid); static void s1_register_jobid(opal_jobid_t jobid, const char *nspace); const opal_pmix_base_module_t opal_pmix_s1_module = { - .name = "s1", .init = s1_init, .finalize = s1_fini, .initialized = s1_initialized, diff --git a/opal/mca/pmix/s2/pmix_s2.c b/opal/mca/pmix/s2/pmix_s2.c index ab9a16e5dc..b98905f4e1 100644 --- a/opal/mca/pmix/s2/pmix_s2.c +++ b/opal/mca/pmix/s2/pmix_s2.c @@ -62,7 +62,6 @@ static const char *s2_get_nspace(opal_jobid_t jobid); static void s2_register_jobid(opal_jobid_t jobid, const char *nspace); const opal_pmix_base_module_t opal_pmix_s2_module = { - .name = "s2", .init = s2_init, .finalize = s2_fini, .initialized = s2_initialized, diff --git a/opal/util/net.c b/opal/util/net.c index 06178e0c89..77fcf25ff4 100644 --- a/opal/util/net.c +++ b/opal/util/net.c @@ -361,7 +361,9 @@ opal_net_addr_isipv4public(const struct sockaddr *addr) bool opal_net_addr_isipv6linklocal(const struct sockaddr *addr) { +#if OPAL_ENABLE_IPV6 struct sockaddr_in6 if_addr; +#endif switch (addr->sa_family) { #if OPAL_ENABLE_IPV6 diff --git a/orte/mca/iof/base/base.h b/orte/mca/iof/base/base.h index 2e767c18d3..8f80c066ac 100644 --- a/orte/mca/iof/base/base.h +++ b/orte/mca/iof/base/base.h @@ -126,7 +126,9 @@ typedef struct { orte_iof_sink_t *stdinev; orte_iof_read_event_t *revstdout; orte_iof_read_event_t *revstderr; +#if OPAL_PMIX_V1 orte_iof_read_event_t *revstddiag; +#endif opal_list_t *subscribers; bool copy; } orte_iof_proc_t; diff --git a/orte/mca/iof/base/iof_base_frame.c b/orte/mca/iof/base/iof_base_frame.c index 6c9d0b379c..5f850945fb 100644 --- a/orte/mca/iof/base/iof_base_frame.c +++ b/orte/mca/iof/base/iof_base_frame.c @@ -211,7 +211,9 @@ static void orte_iof_base_proc_construct(orte_iof_proc_t* ptr) ptr->stdinev = NULL; ptr->revstdout = NULL; ptr->revstderr = NULL; +#if OPAL_PMIX_V1 ptr->revstddiag = NULL; +#endif ptr->subscribers = NULL; ptr->copy = true; } @@ -226,9 +228,11 @@ static void orte_iof_base_proc_destruct(orte_iof_proc_t* ptr) if (NULL != ptr->revstderr) { OBJ_RELEASE(ptr->revstderr); } +#if OPAL_PMIX_V1 if (NULL != ptr->revstddiag) { OBJ_RELEASE(ptr->revstddiag); } +#endif if (NULL != ptr->subscribers) { OPAL_LIST_RELEASE(ptr->subscribers); } diff --git a/orte/mca/iof/base/iof_base_setup.c b/orte/mca/iof/base/iof_base_setup.c index 24574c35e5..01fda216c9 100644 --- a/orte/mca/iof/base/iof_base_setup.c +++ b/orte/mca/iof/base/iof_base_setup.c @@ -64,7 +64,6 @@ #include "opal/util/os_dirpath.h" #include "opal/util/output.h" #include "opal/util/argv.h" -#include "opal/mca/pmix/pmix.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/util/name_fns.h" @@ -120,13 +119,12 @@ orte_iof_base_setup_prefork(orte_iof_base_io_conf_t *opts) return ORTE_ERR_SYS_LIMITS_PIPES; } } - if (0 == strncmp(opal_pmix.name, "pmix1", 5)) { - if (pipe(opts->p_internal) < 0) { - ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_PIPES); - return ORTE_ERR_SYS_LIMITS_PIPES; - } +#if OPAL_PMIX_V1 + if (pipe(opts->p_internal) < 0) { + ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_PIPES); + return ORTE_ERR_SYS_LIMITS_PIPES; } - +#endif return ORTE_SUCCESS; } @@ -135,7 +133,9 @@ int orte_iof_base_setup_child(orte_iof_base_io_conf_t *opts, char ***env) { int ret; +#if OPAL_PMIX_V1 char *str; +#endif if (opts->connect_stdin) { close(opts->p_stdin[1]); @@ -144,9 +144,9 @@ orte_iof_base_setup_child(orte_iof_base_io_conf_t *opts, char ***env) if( !orte_iof_base.redirect_app_stderr_to_stdout ) { close(opts->p_stderr[0]); } - if (0 == strncmp(opal_pmix.name, "pmix1", 5)) { - close(opts->p_internal[0]); - } +#if OPAL_PMIX_V1 + close(opts->p_internal[0]); +#endif if (opts->usepty) { /* disable echo */ @@ -220,20 +220,19 @@ orte_iof_base_setup_child(orte_iof_base_io_conf_t *opts, char ***env) } } - if (0 == strncmp(opal_pmix.name, "pmix1", 5)) { - if (!orte_map_stddiag_to_stderr && !orte_map_stddiag_to_stdout ) { - /* Set an environment variable that the new child process can use - to get the fd of the pipe connected to the INTERNAL IOF tag. */ - asprintf(&str, "%d", opts->p_internal[1]); - if (NULL != str) { - opal_setenv("OPAL_OUTPUT_STDERR_FD", str, true, env); - free(str); - } - } - else if( orte_map_stddiag_to_stdout ) { - opal_setenv("OPAL_OUTPUT_INTERNAL_TO_STDOUT", "1", true, env); +#if OPAL_PMIX_V1 + if (!orte_map_stddiag_to_stderr && !orte_map_stddiag_to_stdout ) { + /* Set an environment variable that the new child process can use + to get the fd of the pipe connected to the INTERNAL IOF tag. */ + asprintf(&str, "%d", opts->p_internal[1]); + if (NULL != str) { + opal_setenv("OPAL_OUTPUT_STDERR_FD", str, true, env); + free(str); } + } else if( orte_map_stddiag_to_stdout ) { + opal_setenv("OPAL_OUTPUT_INTERNAL_TO_STDOUT", "1", true, env); } +#endif return ORTE_SUCCESS; } @@ -270,13 +269,13 @@ orte_iof_base_setup_parent(const orte_process_name_t* name, } } - if (0 == strncmp(opal_pmix.name, "pmix1", 5)) { - ret = orte_iof.push(name, ORTE_IOF_STDDIAG, opts->p_internal[0]); - if(ORTE_SUCCESS != ret) { - ORTE_ERROR_LOG(ret); - return ret; - } +#if OPAL_PMIX_V1 + ret = orte_iof.push(name, ORTE_IOF_STDDIAG, opts->p_internal[0]); + if(ORTE_SUCCESS != ret) { + ORTE_ERROR_LOG(ret); + return ret; } +#endif return ORTE_SUCCESS; } @@ -372,12 +371,13 @@ int orte_iof_base_setup_output_files(const orte_process_name_t* dst_name, orte_iof_base_write_handler); } } - +#if OPAL_PMIX_V1 if (NULL != proct->revstddiag && NULL == proct->revstddiag->sink) { /* always tie the sink for stddiag to stderr */ OBJ_RETAIN(proct->revstderr->sink); proct->revstddiag->sink = proct->revstderr->sink; } +#endif } return ORTE_SUCCESS; diff --git a/orte/mca/iof/base/iof_base_setup.h b/orte/mca/iof/base/iof_base_setup.h index ea9c4951ca..bb6621e527 100644 --- a/orte/mca/iof/base/iof_base_setup.h +++ b/orte/mca/iof/base/iof_base_setup.h @@ -35,7 +35,9 @@ struct orte_iof_base_io_conf_t { int p_stdin[2]; int p_stdout[2]; int p_stderr[2]; +#if OPAL_PMIX_V1 int p_internal[2]; +#endif }; typedef struct orte_iof_base_io_conf_t orte_iof_base_io_conf_t; diff --git a/orte/mca/iof/hnp/iof_hnp.c b/orte/mca/iof/hnp/iof_hnp.c index 8954369358..2778111053 100644 --- a/orte/mca/iof/hnp/iof_hnp.c +++ b/orte/mca/iof/hnp/iof_hnp.c @@ -42,7 +42,6 @@ #endif #include "opal/mca/event/event.h" -#include "opal/mca/pmix/pmix.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/errmgr/errmgr.h" @@ -187,9 +186,11 @@ static int hnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag, } else if (src_tag & ORTE_IOF_STDERR) { ORTE_IOF_READ_EVENT(&proct->revstderr, proct, fd, ORTE_IOF_STDERR, orte_iof_hnp_read_local_handler, false); - } else if (0 == strncmp(opal_pmix.name, "pmix1", 5) && src_tag & ORTE_IOF_STDDIAG) { +#if OPAL_PMIX_V1 + } else if (src_tag & ORTE_IOF_STDDIAG) { ORTE_IOF_READ_EVENT(&proct->revstddiag, proct, fd, ORTE_IOF_STDDIAG, orte_iof_hnp_read_local_handler, false); +#endif } /* setup any requested output files */ if (ORTE_SUCCESS != (rc = orte_iof_base_setup_output_files(dst_name, jdata, proct))) { @@ -203,7 +204,9 @@ static int hnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag, * been defined! */ if (NULL != proct->revstdout && - (0 != strncmp(opal_pmix.name, "pmix1", 5) || NULL != proct->revstddiag) && +#if OPAL_PMIX_V1 + NULL != proct->revstddiag && +#endif (orte_iof_base.redirect_app_stderr_to_stdout || NULL != proct->revstderr)) { if (proct->copy) { /* see if there are any wildcard subscribers out there that @@ -222,9 +225,11 @@ static int hnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag, if (!orte_iof_base.redirect_app_stderr_to_stdout) { ORTE_IOF_READ_ACTIVATE(proct->revstderr); } +#if OPAL_PMIX_V1 if (NULL != proct->revstddiag) { ORTE_IOF_READ_ACTIVATE(proct->revstddiag); } +#endif } return ORTE_SUCCESS; } @@ -408,6 +413,7 @@ static int hnp_close(const orte_process_name_t* peer, } ++cnt; } +#if OPAL_PMIX_V1 if (ORTE_IOF_STDDIAG & source_tag) { if (NULL != proct->revstddiag) { orte_iof_base_static_dump_output(proct->revstddiag); @@ -415,6 +421,9 @@ static int hnp_close(const orte_process_name_t* peer, } ++cnt; } +#else + ++cnt; +#endif /* if we closed them all, then remove this proc */ if (4 == cnt) { opal_list_remove_item(&mca_iof_hnp_component.procs, &proct->super); @@ -491,9 +500,11 @@ static int finalize(void) if (NULL != proct->revstderr) { orte_iof_base_static_dump_output(proct->revstderr); } +#if OPAL_PMIX_V1 if (NULL != proct->revstddiag) { orte_iof_base_static_dump_output(proct->revstddiag); } +#endif OBJ_RELEASE(proct); } OBJ_DESTRUCT(&mca_iof_hnp_component.procs); diff --git a/orte/mca/iof/hnp/iof_hnp_read.c b/orte/mca/iof/hnp/iof_hnp_read.c index 9f6b1e4968..1e30ac5636 100644 --- a/orte/mca/iof/hnp/iof_hnp_read.c +++ b/orte/mca/iof/hnp/iof_hnp_read.c @@ -267,14 +267,18 @@ void orte_iof_hnp_read_local_handler(int fd, short event, void *cbdata) } else if (rev->tag & ORTE_IOF_STDERR) { orte_iof_base_static_dump_output(proct->revstderr); OBJ_RELEASE(proct->revstderr); +#if OPAL_PMIX_V1 } else if (rev->tag & ORTE_IOF_STDDIAG) { orte_iof_base_static_dump_output(proct->revstddiag); OBJ_RELEASE(proct->revstddiag); +#endif } /* check to see if they are all done */ if (NULL == proct->revstdout && - NULL == proct->revstderr && - NULL == proct->revstddiag) { +#if OPAL_PMIX_V1 + NULL == proct->revstddiag && +#endif + NULL == proct->revstderr) { /* this proc's iof is complete */ ORTE_ACTIVATE_PROC_STATE(&proct->name, ORTE_PROC_STATE_IOF_COMPLETE); } diff --git a/orte/mca/iof/orted/iof_orted.c b/orte/mca/iof/orted/iof_orted.c index 261b7d3c8f..7aa1d98437 100644 --- a/orte/mca/iof/orted/iof_orted.c +++ b/orte/mca/iof/orted/iof_orted.c @@ -42,7 +42,6 @@ #endif #include "opal/util/os_dirpath.h" -#include "opal/mca/pmix/pmix.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/util/name_fns.h" @@ -178,9 +177,11 @@ static int orted_push(const orte_process_name_t* dst_name, } else if (src_tag & ORTE_IOF_STDERR) { ORTE_IOF_READ_EVENT(&proct->revstderr, proct, fd, ORTE_IOF_STDERR, orte_iof_orted_read_handler, false); - } else if (0 == strncmp(opal_pmix.name, "pmix1", 5) && src_tag & ORTE_IOF_STDDIAG) { +#if OPAL_PMIX_V1 + } else if (src_tag & ORTE_IOF_STDDIAG) { ORTE_IOF_READ_EVENT(&proct->revstddiag, proct, fd, ORTE_IOF_STDDIAG, orte_iof_orted_read_handler, false); +#endif } /* setup any requested output files */ if (ORTE_SUCCESS != (rc = orte_iof_base_setup_output_files(dst_name, jobdat, proct))) { @@ -194,15 +195,19 @@ static int orted_push(const orte_process_name_t* dst_name, * been defined! */ if (NULL != proct->revstdout && - (0 != strncmp(opal_pmix.name, "pmix1", 5) || NULL != proct->revstddiag) && +#if OPAL_PMIX_V1 + NULL != proct->revstddiag && +#endif (orte_iof_base.redirect_app_stderr_to_stdout || NULL != proct->revstderr)) { ORTE_IOF_READ_ACTIVATE(proct->revstdout); if (!orte_iof_base.redirect_app_stderr_to_stdout) { ORTE_IOF_READ_ACTIVATE(proct->revstderr); } +#if OPAL_PMIX_V1 if (NULL != proct->revstddiag) { ORTE_IOF_READ_ACTIVATE(proct->revstddiag); } +#endif } return ORTE_SUCCESS; } @@ -301,6 +306,7 @@ static int orted_close(const orte_process_name_t* peer, } ++cnt; } +#if OPAL_PMIX_V1 if (ORTE_IOF_STDDIAG & source_tag) { if (NULL != proct->revstddiag) { orte_iof_base_static_dump_output(proct->revstddiag); @@ -308,6 +314,9 @@ static int orted_close(const orte_process_name_t* peer, } ++cnt; } +#else + ++cnt; +#endif /* if we closed them all, then remove this proc */ if (4 == cnt) { opal_list_remove_item(&mca_iof_orted_component.procs, &proct->super); @@ -346,9 +355,11 @@ static int finalize(void) if (NULL != proct->revstderr) { orte_iof_base_static_dump_output(proct->revstderr); } +#if OPAL_PMIX_V1 if (NULL != proct->revstddiag) { orte_iof_base_static_dump_output(proct->revstddiag); } +#endif OBJ_RELEASE(proct); } OBJ_DESTRUCT(&mca_iof_orted_component.procs); diff --git a/orte/mca/iof/orted/iof_orted_read.c b/orte/mca/iof/orted/iof_orted_read.c index c7fcedd0b5..d1e07898bb 100644 --- a/orte/mca/iof/orted/iof_orted_read.c +++ b/orte/mca/iof/orted/iof_orted_read.c @@ -164,16 +164,20 @@ void orte_iof_orted_read_handler(int fd, short event, void *cbdata) orte_iof_base_static_dump_output(proct->revstderr); OBJ_RELEASE(proct->revstderr); } +#if OPAL_PMIX_V1 } else if (rev->tag & ORTE_IOF_STDDIAG) { if( NULL != proct->revstddiag ) { orte_iof_base_static_dump_output(proct->revstddiag); OBJ_RELEASE(proct->revstddiag); } +#endif } /* check to see if they are all done */ if (NULL == proct->revstdout && - NULL == proct->revstderr && - NULL == proct->revstddiag) { +#if OPAL_PMIX_V1 + NULL == proct->revstddiag && +#endif + NULL == proct->revstderr) { /* this proc's iof is complete */ ORTE_ACTIVATE_PROC_STATE(&proct->name, ORTE_PROC_STATE_IOF_COMPLETE); } diff --git a/orte/mca/odls/default/odls_default_module.c b/orte/mca/odls/default/odls_default_module.c index 9822add360..b9c6f665ce 100644 --- a/orte/mca/odls/default/odls_default_module.c +++ b/orte/mca/odls/default/odls_default_module.c @@ -112,7 +112,6 @@ #include "opal/mca/hwloc/hwloc-internal.h" #include "opal/mca/hwloc/base/base.h" -#include "opal/mca/pmix/pmix.h" #include "opal/class/opal_pointer_array.h" #include "opal/util/opal_environ.h" #include "opal/util/show_help.h" @@ -294,9 +293,8 @@ static void send_error_show_help(int fd, int exit_status, exit(exit_status); } -/* close all open file descriptors w/ exception of stdin/stdout/stderr, - the pipe used for the IOF INTERNAL messages, and the pipe up to - the parent. */ +/* close all open file descriptors w/ exception of stdin/stdout/stderr + and the pipe up to the parent. */ static int close_open_file_descriptors(int write_fd, orte_iof_base_io_conf_t opts) { DIR *dir = opendir("/proc/self/fd"); @@ -313,7 +311,11 @@ static int close_open_file_descriptors(int write_fd, closedir(dir); return ORTE_ERR_TYPE_MISMATCH; } - if (fd >=3 && (0 != strncmp(opal_pmix.name, "pmix", 4) && fd != opts.p_internal[1]) && fd != write_fd) { + if (fd >=3 && +#if OPAL_PMIX_V1 + fd != opts.p_internal[1] && +#endif + fd != write_fd) { close(fd); } } @@ -375,13 +377,13 @@ static int do_child(orte_odls_spawn_caddy_t *cd, int write_fd) } close(fdnull); } - if (0 != strncmp(opal_pmix.name, "pmix", 4)) { - fdnull = open("/dev/null", O_RDONLY, 0); - if (fdnull > cd->opts.p_internal[1]) { - dup2(fdnull, cd->opts.p_internal[1]); - } - close(fdnull); +#if OPAL_PMIX_V1 + fdnull = open("/dev/null", O_RDONLY, 0); + if (fdnull > cd->opts.p_internal[1]) { + dup2(fdnull, cd->opts.p_internal[1]); } + close(fdnull); +#endif } /* close all open file descriptors w/ exception of stdin/stdout/stderr, @@ -390,7 +392,11 @@ static int do_child(orte_odls_spawn_caddy_t *cd, int write_fd) if (ORTE_SUCCESS != close_open_file_descriptors(write_fd, cd->opts)) { // close *all* file descriptors -- slow for(fd=3; fdopts.p_internal[1]) && fd != write_fd) { + if ( +#if OPAL_PMIX_V1 + fd != cd->opts.p_internal[1] && +#endif + fd != write_fd) { close(fd); } } @@ -459,9 +465,9 @@ static int do_parent(orte_odls_spawn_caddy_t *cd, int read_fd) if( !orte_iof_base.redirect_app_stderr_to_stdout ) { close(cd->opts.p_stderr[1]); } - if (0 != strncmp(opal_pmix.name, "pmix", 4)) { - close(cd->opts.p_internal[1]); - } +#if OPAL_PMIX_V1 + close(cd->opts.p_internal[1]); +#endif /* Block reading a message from the pipe */ while (1) { From e3c308dfc85571a4405dd9dd0a078933b8146b9a Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Sat, 25 Nov 2017 19:51:07 -0800 Subject: [PATCH 3/5] Update the odls/alps component Signed-off-by: Ralph Castain --- orte/mca/odls/alps/odls_alps_module.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/orte/mca/odls/alps/odls_alps_module.c b/orte/mca/odls/alps/odls_alps_module.c index 2990d50f45..d99bd177f5 100644 --- a/orte/mca/odls/alps/odls_alps_module.c +++ b/orte/mca/odls/alps/odls_alps_module.c @@ -333,7 +333,11 @@ static int close_open_file_descriptors(int write_fd, orte_iof_base_io_conf_t opt (fd == alps_app_filedes[0]) || (fd == alps_app_filedes[1])) continue; - if (fd >=3 && fd != opts.p_internal[1] && fd != write_fd) { + if (fd >=3 && +#if OPAL_PMIX_V1 + fd != opts.p_internal[1] && +#endif + fd != write_fd) { close(fd); } } @@ -386,11 +390,13 @@ static int do_child(orte_odls_spawn_caddy_t *cd, int write_fd) } close(fdnull); } +#if OPAL_PMIX_V1 fdnull = open("/dev/null", O_RDONLY, 0); if (fdnull > cd->opts.p_internal[1]) { dup2(fdnull, cd->opts.p_internal[1]); } close(fdnull); +#endif } if (ORTE_SUCCESS != close_open_file_descriptors(write_fd, cd->opts)) { @@ -475,7 +481,9 @@ static int do_parent(orte_odls_spawn_caddy_t *cd, int read_fd) if( !orte_iof_base.redirect_app_stderr_to_stdout ) { close(cd->opts.p_stderr[1]); } +#if OPAL_PMIX_V1 close(cd->opts.p_internal[1]); +#endif /* Block reading a message from the pipe */ while (1) { From a25a7bcba7d0f5bfa9fd3cb67625a0cbfa609470 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Sun, 26 Nov 2017 08:18:34 -0800 Subject: [PATCH 4/5] Handle the case where stdout and stderr get merged into a file Signed-off-by: Ralph Castain --- orte/mca/iof/hnp/iof_hnp.c | 5 +++-- orte/mca/iof/orted/iof_orted.c | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/orte/mca/iof/hnp/iof_hnp.c b/orte/mca/iof/hnp/iof_hnp.c index 2778111053..e5592d3145 100644 --- a/orte/mca/iof/hnp/iof_hnp.c +++ b/orte/mca/iof/hnp/iof_hnp.c @@ -389,7 +389,7 @@ static int hnp_close(const orte_process_name_t* peer, { orte_iof_proc_t* proct; orte_ns_cmp_bitmask_t mask = ORTE_NS_CMP_ALL; - int cnt = 0; + int cnt; OPAL_LIST_FOREACH(proct, &mca_iof_hnp_component.procs, orte_iof_proc_t) { if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &proct->name, peer)) { @@ -399,7 +399,8 @@ static int hnp_close(const orte_process_name_t* peer, } ++cnt; } - if (ORTE_IOF_STDOUT & source_tag) { + if ((ORTE_IOF_STDOUT & source_tag) || + (ORTE_IOF_STDMERGE & source_tag)) { if (NULL != proct->revstdout) { orte_iof_base_static_dump_output(proct->revstdout); OBJ_RELEASE(proct->revstdout); diff --git a/orte/mca/iof/orted/iof_orted.c b/orte/mca/iof/orted/iof_orted.c index 7aa1d98437..b27b2bf712 100644 --- a/orte/mca/iof/orted/iof_orted.c +++ b/orte/mca/iof/orted/iof_orted.c @@ -292,7 +292,8 @@ static int orted_close(const orte_process_name_t* peer, } ++cnt; } - if (ORTE_IOF_STDOUT & source_tag) { + if ((ORTE_IOF_STDOUT & source_tag) || + (ORTE_IOF_STDMERGE & source_tag)) { if (NULL != proct->revstdout) { orte_iof_base_static_dump_output(proct->revstdout); OBJ_RELEASE(proct->revstdout); From 1de0421e484c873095b36acccbd62a6f86d6abe8 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Sun, 26 Nov 2017 10:39:53 -0800 Subject: [PATCH 5/5] Provide a more robust way of checking for proct completion Signed-off-by: Ralph Castain --- orte/mca/iof/hnp/iof_hnp.c | 18 ++++++++++-------- orte/mca/iof/orted/iof_orted.c | 18 ++++++++++-------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/orte/mca/iof/hnp/iof_hnp.c b/orte/mca/iof/hnp/iof_hnp.c index e5592d3145..2d4bdf3607 100644 --- a/orte/mca/iof/hnp/iof_hnp.c +++ b/orte/mca/iof/hnp/iof_hnp.c @@ -389,7 +389,6 @@ static int hnp_close(const orte_process_name_t* peer, { orte_iof_proc_t* proct; orte_ns_cmp_bitmask_t mask = ORTE_NS_CMP_ALL; - int cnt; OPAL_LIST_FOREACH(proct, &mca_iof_hnp_component.procs, orte_iof_proc_t) { if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &proct->name, peer)) { @@ -397,7 +396,7 @@ static int hnp_close(const orte_process_name_t* peer, if (NULL != proct->stdinev) { OBJ_RELEASE(proct->stdinev); } - ++cnt; + proct->stdinev = NULL; } if ((ORTE_IOF_STDOUT & source_tag) || (ORTE_IOF_STDMERGE & source_tag)) { @@ -405,14 +404,14 @@ static int hnp_close(const orte_process_name_t* peer, orte_iof_base_static_dump_output(proct->revstdout); OBJ_RELEASE(proct->revstdout); } - ++cnt; + proct->revstdout = NULL; } if (ORTE_IOF_STDERR & source_tag) { if (NULL != proct->revstderr) { orte_iof_base_static_dump_output(proct->revstderr); OBJ_RELEASE(proct->revstderr); } - ++cnt; + proct->revstderr = NULL; } #if OPAL_PMIX_V1 if (ORTE_IOF_STDDIAG & source_tag) { @@ -420,13 +419,16 @@ static int hnp_close(const orte_process_name_t* peer, orte_iof_base_static_dump_output(proct->revstddiag); OBJ_RELEASE(proct->revstddiag); } - ++cnt; + proct->revstddiag = NULL; } -#else - ++cnt; #endif /* if we closed them all, then remove this proc */ - if (4 == cnt) { + if (NULL == proct->stdinev && + NULL == proct->revstdout && +#if OPAL_PMIX_V1 + NULL == proct->revstddiag && +#endif + NULL == proct->revstderr) { opal_list_remove_item(&mca_iof_hnp_component.procs, &proct->super); OBJ_RELEASE(proct); } diff --git a/orte/mca/iof/orted/iof_orted.c b/orte/mca/iof/orted/iof_orted.c index b27b2bf712..e06c3ce9ed 100644 --- a/orte/mca/iof/orted/iof_orted.c +++ b/orte/mca/iof/orted/iof_orted.c @@ -282,7 +282,6 @@ static int orted_close(const orte_process_name_t* peer, { orte_iof_proc_t* proct; orte_ns_cmp_bitmask_t mask = ORTE_NS_CMP_ALL; - int cnt = 0; OPAL_LIST_FOREACH(proct, &mca_iof_orted_component.procs, orte_iof_proc_t) { if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &proct->name, peer)) { @@ -290,7 +289,7 @@ static int orted_close(const orte_process_name_t* peer, if (NULL != proct->stdinev) { OBJ_RELEASE(proct->stdinev); } - ++cnt; + proct->stdinev = NULL; } if ((ORTE_IOF_STDOUT & source_tag) || (ORTE_IOF_STDMERGE & source_tag)) { @@ -298,14 +297,14 @@ static int orted_close(const orte_process_name_t* peer, orte_iof_base_static_dump_output(proct->revstdout); OBJ_RELEASE(proct->revstdout); } - ++cnt; + proct->revstdout = NULL; } if (ORTE_IOF_STDERR & source_tag) { if (NULL != proct->revstderr) { orte_iof_base_static_dump_output(proct->revstderr); OBJ_RELEASE(proct->revstderr); } - ++cnt; + proct->revstderr = NULL; } #if OPAL_PMIX_V1 if (ORTE_IOF_STDDIAG & source_tag) { @@ -313,13 +312,16 @@ static int orted_close(const orte_process_name_t* peer, orte_iof_base_static_dump_output(proct->revstddiag); OBJ_RELEASE(proct->revstddiag); } - ++cnt; + proct->revstddiag = NULL; } -#else - ++cnt; #endif /* if we closed them all, then remove this proc */ - if (4 == cnt) { + if (NULL == proct->stdinev && + NULL == proct->revstdout && +#if OPAL_PMIX_V1 + NULL == proct->revstddiag && +#endif + NULL == proct->revstderr) { opal_list_remove_item(&mca_iof_orted_component.procs, &proct->super); OBJ_RELEASE(proct); }