1
1

Save one more file descriptor per process by not opening one for stddiag

if PMIx (version > 1.x) is active since all diagnostic messages will instead flow thru
the PMIx connection. Unfortunately, PMIx v1 does not support this
feature, but we can remove the stddiag support once PMIx v1 slides out
of the support window

Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
Ralph Castain 2017-11-24 11:18:47 -08:00
родитель e88767866e
Коммит 30f23ac67a
13 изменённых файлов: 82 добавлений и 41 удалений

Просмотреть файл

@ -83,6 +83,7 @@ static bool cray_get_attr(const char *attr, opal_value_t **kv);
#endif
const opal_pmix_base_module_t opal_pmix_cray_module = {
.name = "cray",
.init = cray_init,
.finalize = cray_fini,
.initialized = cray_initialized,

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014-2015 Mellanox Technologies, Inc.
@ -50,6 +50,7 @@ static const char *pmix1_get_nspace(opal_jobid_t jobid);
static void pmix1_register_jobid(opal_jobid_t jobid, const char *nspace);
const opal_pmix_base_module_t opal_pmix_ext1x_module = {
.name = "pmix1",
/* client APIs */
.init = pmix1_client_init,
.finalize = pmix1_client_finalize,

Просмотреть файл

@ -72,6 +72,7 @@ static void ext2x_log(opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
const opal_pmix_base_module_t opal_pmix_ext2x_module = {
.name = "pmix2",
/* client APIs */
.init = ext2x_client_init,
.finalize = ext2x_client_finalize,

Просмотреть файл

@ -59,6 +59,7 @@ static const char *flux_get_nspace(opal_jobid_t jobid);
static void flux_register_jobid(opal_jobid_t jobid, const char *nspace);
const opal_pmix_base_module_t opal_pmix_flux_module = {
.name = "flux",
.init = flux_init,
.finalize = flux_fini,
.initialized = flux_initialized,

Просмотреть файл

@ -83,6 +83,7 @@ static const char *isolated_get_nspace(opal_jobid_t jobid);
static void isolated_register_jobid(opal_jobid_t jobid, const char *nspace);
const opal_pmix_base_module_t opal_pmix_isolated_module = {
.name = "isolated",
.init = isolated_init,
.finalize = isolated_fini,
.initialized = isolated_initialized,

Просмотреть файл

@ -871,6 +871,7 @@ typedef int (*opal_pmix_base_process_monitor_fn_t)(opal_list_t *monitor,
* the standard public API data structure
*/
typedef struct {
char *name;
/* client APIs */
opal_pmix_base_module_init_fn_t init;
opal_pmix_base_module_fini_fn_t finalize;

Просмотреть файл

@ -72,6 +72,7 @@ static void pmix3x_log(opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
const opal_pmix_base_module_t opal_pmix_pmix3x_module = {
.name = "pmix3",
/* client APIs */
.init = pmix3x_client_init,
.finalize = pmix3x_client_finalize,

Просмотреть файл

@ -59,6 +59,7 @@ static const char *s1_get_nspace(opal_jobid_t jobid);
static void s1_register_jobid(opal_jobid_t jobid, const char *nspace);
const opal_pmix_base_module_t opal_pmix_s1_module = {
.name = "s1",
.init = s1_init,
.finalize = s1_fini,
.initialized = s1_initialized,

Просмотреть файл

@ -62,6 +62,7 @@ static const char *s2_get_nspace(opal_jobid_t jobid);
static void s2_register_jobid(opal_jobid_t jobid, const char *nspace);
const opal_pmix_base_module_t opal_pmix_s2_module = {
.name = "s2",
.init = s2_init,
.finalize = s2_fini,
.initialized = s2_initialized,

Просмотреть файл

@ -64,6 +64,7 @@
#include "opal/util/os_dirpath.h"
#include "opal/util/output.h"
#include "opal/util/argv.h"
#include "opal/mca/pmix/pmix.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/name_fns.h"
@ -119,9 +120,11 @@ orte_iof_base_setup_prefork(orte_iof_base_io_conf_t *opts)
return ORTE_ERR_SYS_LIMITS_PIPES;
}
}
if (pipe(opts->p_internal) < 0) {
ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_PIPES);
return ORTE_ERR_SYS_LIMITS_PIPES;
if (0 == strncmp(opal_pmix.name, "pmix1", 5)) {
if (pipe(opts->p_internal) < 0) {
ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_PIPES);
return ORTE_ERR_SYS_LIMITS_PIPES;
}
}
return ORTE_SUCCESS;
@ -141,7 +144,9 @@ orte_iof_base_setup_child(orte_iof_base_io_conf_t *opts, char ***env)
if( !orte_iof_base.redirect_app_stderr_to_stdout ) {
close(opts->p_stderr[0]);
}
close(opts->p_internal[0]);
if (0 == strncmp(opal_pmix.name, "pmix1", 5)) {
close(opts->p_internal[0]);
}
if (opts->usepty) {
/* disable echo */
@ -163,19 +168,27 @@ orte_iof_base_setup_child(orte_iof_base_io_conf_t *opts, char ***env)
return ORTE_ERR_PIPE_SETUP_FAILURE;
}
ret = dup2(opts->p_stdout[1], fileno(stdout));
if (ret < 0) return ORTE_ERR_PIPE_SETUP_FAILURE;
if (ret < 0) {
return ORTE_ERR_PIPE_SETUP_FAILURE;
}
if( orte_iof_base.redirect_app_stderr_to_stdout ) {
ret = dup2(opts->p_stdout[1], fileno(stderr));
if (ret < 0) return ORTE_ERR_PIPE_SETUP_FAILURE;
if (ret < 0) {
return ORTE_ERR_PIPE_SETUP_FAILURE;
}
}
close(opts->p_stdout[1]);
} else {
if(opts->p_stdout[1] != fileno(stdout)) {
ret = dup2(opts->p_stdout[1], fileno(stdout));
if (ret < 0) return ORTE_ERR_PIPE_SETUP_FAILURE;
if (ret < 0) {
return ORTE_ERR_PIPE_SETUP_FAILURE;
}
if( orte_iof_base.redirect_app_stderr_to_stdout ) {
ret = dup2(opts->p_stdout[1], fileno(stderr));
if (ret < 0) return ORTE_ERR_PIPE_SETUP_FAILURE;
if (ret < 0) {
return ORTE_ERR_PIPE_SETUP_FAILURE;
}
}
close(opts->p_stdout[1]);
}
@ -183,7 +196,9 @@ orte_iof_base_setup_child(orte_iof_base_io_conf_t *opts, char ***env)
if (opts->connect_stdin) {
if(opts->p_stdin[0] != fileno(stdin)) {
ret = dup2(opts->p_stdin[0], fileno(stdin));
if (ret < 0) return ORTE_ERR_PIPE_SETUP_FAILURE;
if (ret < 0) {
return ORTE_ERR_PIPE_SETUP_FAILURE;
}
close(opts->p_stdin[0]);
}
} else {
@ -205,17 +220,19 @@ orte_iof_base_setup_child(orte_iof_base_io_conf_t *opts, char ***env)
}
}
if (!orte_map_stddiag_to_stderr && !orte_map_stddiag_to_stdout ) {
/* Set an environment variable that the new child process can use
to get the fd of the pipe connected to the INTERNAL IOF tag. */
asprintf(&str, "%d", opts->p_internal[1]);
if (NULL != str) {
opal_setenv("OPAL_OUTPUT_STDERR_FD", str, true, env);
free(str);
if (0 == strncmp(opal_pmix.name, "pmix1", 5)) {
if (!orte_map_stddiag_to_stderr && !orte_map_stddiag_to_stdout ) {
/* Set an environment variable that the new child process can use
to get the fd of the pipe connected to the INTERNAL IOF tag. */
asprintf(&str, "%d", opts->p_internal[1]);
if (NULL != str) {
opal_setenv("OPAL_OUTPUT_STDERR_FD", str, true, env);
free(str);
}
}
else if( orte_map_stddiag_to_stdout ) {
opal_setenv("OPAL_OUTPUT_INTERNAL_TO_STDOUT", "1", true, env);
}
}
else if( orte_map_stddiag_to_stdout ) {
opal_setenv("OPAL_OUTPUT_INTERNAL_TO_STDOUT", "1", true, env);
}
return ORTE_SUCCESS;
@ -253,10 +270,12 @@ orte_iof_base_setup_parent(const orte_process_name_t* name,
}
}
ret = orte_iof.push(name, ORTE_IOF_STDDIAG, opts->p_internal[0]);
if(ORTE_SUCCESS != ret) {
ORTE_ERROR_LOG(ret);
return ret;
if (0 == strncmp(opal_pmix.name, "pmix1", 5)) {
ret = orte_iof.push(name, ORTE_IOF_STDDIAG, opts->p_internal[0]);
if(ORTE_SUCCESS != ret) {
ORTE_ERROR_LOG(ret);
return ret;
}
}
return ORTE_SUCCESS;

Просмотреть файл

@ -42,6 +42,7 @@
#endif
#include "opal/mca/event/event.h"
#include "opal/mca/pmix/pmix.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/errmgr/errmgr.h"
@ -186,7 +187,7 @@ static int hnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag,
} else if (src_tag & ORTE_IOF_STDERR) {
ORTE_IOF_READ_EVENT(&proct->revstderr, proct, fd, ORTE_IOF_STDERR,
orte_iof_hnp_read_local_handler, false);
} else if (src_tag & ORTE_IOF_STDDIAG) {
} else if (0 == strncmp(opal_pmix.name, "pmix1", 5) && src_tag & ORTE_IOF_STDDIAG) {
ORTE_IOF_READ_EVENT(&proct->revstddiag, proct, fd, ORTE_IOF_STDDIAG,
orte_iof_hnp_read_local_handler, false);
}
@ -201,7 +202,8 @@ static int hnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag,
* because one of the readevents fires -prior- to all of them having
* been defined!
*/
if (NULL != proct->revstdout && NULL != proct->revstddiag &&
if (NULL != proct->revstdout &&
(0 != strncmp(opal_pmix.name, "pmix1", 5) || NULL != proct->revstddiag) &&
(orte_iof_base.redirect_app_stderr_to_stdout || NULL != proct->revstderr)) {
if (proct->copy) {
/* see if there are any wildcard subscribers out there that
@ -220,7 +222,9 @@ static int hnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag,
if (!orte_iof_base.redirect_app_stderr_to_stdout) {
ORTE_IOF_READ_ACTIVATE(proct->revstderr);
}
ORTE_IOF_READ_ACTIVATE(proct->revstddiag);
if (NULL != proct->revstddiag) {
ORTE_IOF_READ_ACTIVATE(proct->revstddiag);
}
}
return ORTE_SUCCESS;
}
@ -586,9 +590,9 @@ static void stdin_write_handler(int fd, short event, void *cbdata)
}
}
goto check;
re_enter:
re_enter:
ORTE_IOF_SINK_ACTIVATE(wev);
check:
check:
if (NULL != mca_iof_hnp_component.stdinev &&
!orte_abnormal_term_ordered &&
!mca_iof_hnp_component.stdinev->active) {
@ -612,7 +616,7 @@ check:
}
}
return;
finish:
finish:
OBJ_RELEASE(wev);
sink->wev = NULL;
return;

Просмотреть файл

@ -42,6 +42,7 @@
#endif
#include "opal/util/os_dirpath.h"
#include "opal/mca/pmix/pmix.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/name_fns.h"
@ -164,7 +165,7 @@ static int orted_push(const orte_process_name_t* dst_name,
proct->name.vpid = dst_name->vpid;
opal_list_append(&mca_iof_orted_component.procs, &proct->super);
SETUP:
SETUP:
/* get the local jobdata for this proc */
if (NULL == (jobdat = orte_get_job_data_object(proct->name.jobid))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
@ -177,7 +178,7 @@ SETUP:
} else if (src_tag & ORTE_IOF_STDERR) {
ORTE_IOF_READ_EVENT(&proct->revstderr, proct, fd, ORTE_IOF_STDERR,
orte_iof_orted_read_handler, false);
} else if (src_tag & ORTE_IOF_STDDIAG) {
} else if (0 == strncmp(opal_pmix.name, "pmix1", 5) && src_tag & ORTE_IOF_STDDIAG) {
ORTE_IOF_READ_EVENT(&proct->revstddiag, proct, fd, ORTE_IOF_STDDIAG,
orte_iof_orted_read_handler, false);
}
@ -192,13 +193,16 @@ SETUP:
* because one of the readevents fires -prior- to all of them having
* been defined!
*/
if (NULL != proct->revstdout && NULL != proct->revstddiag &&
if (NULL != proct->revstdout &&
(0 != strncmp(opal_pmix.name, "pmix1", 5) || NULL != proct->revstddiag) &&
(orte_iof_base.redirect_app_stderr_to_stdout || NULL != proct->revstderr)) {
ORTE_IOF_READ_ACTIVATE(proct->revstdout);
if (!orte_iof_base.redirect_app_stderr_to_stdout) {
ORTE_IOF_READ_ACTIVATE(proct->revstderr);
}
ORTE_IOF_READ_ACTIVATE(proct->revstddiag);
if (NULL != proct->revstddiag) {
ORTE_IOF_READ_ACTIVATE(proct->revstddiag);
}
}
return ORTE_SUCCESS;
}

Просмотреть файл

@ -112,6 +112,7 @@
#include "opal/mca/hwloc/hwloc-internal.h"
#include "opal/mca/hwloc/base/base.h"
#include "opal/mca/pmix/pmix.h"
#include "opal/class/opal_pointer_array.h"
#include "opal/util/opal_environ.h"
#include "opal/util/show_help.h"
@ -312,7 +313,7 @@ static int close_open_file_descriptors(int write_fd,
closedir(dir);
return ORTE_ERR_TYPE_MISMATCH;
}
if (fd >=3 && fd != opts.p_internal[1] && fd != write_fd) {
if (fd >=3 && (0 != strncmp(opal_pmix.name, "pmix", 4) && fd != opts.p_internal[1]) && fd != write_fd) {
close(fd);
}
}
@ -374,11 +375,13 @@ static int do_child(orte_odls_spawn_caddy_t *cd, int write_fd)
}
close(fdnull);
}
fdnull = open("/dev/null", O_RDONLY, 0);
if (fdnull > cd->opts.p_internal[1]) {
dup2(fdnull, cd->opts.p_internal[1]);
if (0 != strncmp(opal_pmix.name, "pmix", 4)) {
fdnull = open("/dev/null", O_RDONLY, 0);
if (fdnull > cd->opts.p_internal[1]) {
dup2(fdnull, cd->opts.p_internal[1]);
}
close(fdnull);
}
close(fdnull);
}
/* close all open file descriptors w/ exception of stdin/stdout/stderr,
@ -387,7 +390,7 @@ static int do_child(orte_odls_spawn_caddy_t *cd, int write_fd)
if (ORTE_SUCCESS != close_open_file_descriptors(write_fd, cd->opts)) {
// close *all* file descriptors -- slow
for(fd=3; fd<fdmax; fd++) {
if (fd != cd->opts.p_internal[1] && fd != write_fd) {
if ((0 != strncmp(opal_pmix.name, "pmix", 4) && fd != cd->opts.p_internal[1]) && fd != write_fd) {
close(fd);
}
}
@ -456,7 +459,9 @@ static int do_parent(orte_odls_spawn_caddy_t *cd, int read_fd)
if( !orte_iof_base.redirect_app_stderr_to_stdout ) {
close(cd->opts.p_stderr[1]);
}
close(cd->opts.p_internal[1]);
if (0 != strncmp(opal_pmix.name, "pmix", 4)) {
close(cd->opts.p_internal[1]);
}
/* Block reading a message from the pipe */
while (1) {