Save one more file descriptor per process by not opening one for stddiag
if PMIx (version > 1.x) is active since all diagnostic messages will instead flow thru the PMIx connection. Unfortunately, PMIx v1 does not support this feature, but we can remove the stddiag support once PMIx v1 slides out of the support window Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
родитель
e88767866e
Коммит
30f23ac67a
@ -83,6 +83,7 @@ static bool cray_get_attr(const char *attr, opal_value_t **kv);
|
||||
#endif
|
||||
|
||||
const opal_pmix_base_module_t opal_pmix_cray_module = {
|
||||
.name = "cray",
|
||||
.init = cray_init,
|
||||
.finalize = cray_fini,
|
||||
.initialized = cray_initialized,
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2017 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2014-2015 Mellanox Technologies, Inc.
|
||||
@ -50,6 +50,7 @@ static const char *pmix1_get_nspace(opal_jobid_t jobid);
|
||||
static void pmix1_register_jobid(opal_jobid_t jobid, const char *nspace);
|
||||
|
||||
const opal_pmix_base_module_t opal_pmix_ext1x_module = {
|
||||
.name = "pmix1",
|
||||
/* client APIs */
|
||||
.init = pmix1_client_init,
|
||||
.finalize = pmix1_client_finalize,
|
||||
|
@ -72,6 +72,7 @@ static void ext2x_log(opal_list_t *info,
|
||||
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
|
||||
|
||||
const opal_pmix_base_module_t opal_pmix_ext2x_module = {
|
||||
.name = "pmix2",
|
||||
/* client APIs */
|
||||
.init = ext2x_client_init,
|
||||
.finalize = ext2x_client_finalize,
|
||||
|
@ -59,6 +59,7 @@ static const char *flux_get_nspace(opal_jobid_t jobid);
|
||||
static void flux_register_jobid(opal_jobid_t jobid, const char *nspace);
|
||||
|
||||
const opal_pmix_base_module_t opal_pmix_flux_module = {
|
||||
.name = "flux",
|
||||
.init = flux_init,
|
||||
.finalize = flux_fini,
|
||||
.initialized = flux_initialized,
|
||||
|
@ -83,6 +83,7 @@ static const char *isolated_get_nspace(opal_jobid_t jobid);
|
||||
static void isolated_register_jobid(opal_jobid_t jobid, const char *nspace);
|
||||
|
||||
const opal_pmix_base_module_t opal_pmix_isolated_module = {
|
||||
.name = "isolated",
|
||||
.init = isolated_init,
|
||||
.finalize = isolated_fini,
|
||||
.initialized = isolated_initialized,
|
||||
|
@ -871,6 +871,7 @@ typedef int (*opal_pmix_base_process_monitor_fn_t)(opal_list_t *monitor,
|
||||
* the standard public API data structure
|
||||
*/
|
||||
typedef struct {
|
||||
char *name;
|
||||
/* client APIs */
|
||||
opal_pmix_base_module_init_fn_t init;
|
||||
opal_pmix_base_module_fini_fn_t finalize;
|
||||
|
@ -72,6 +72,7 @@ static void pmix3x_log(opal_list_t *info,
|
||||
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
|
||||
|
||||
const opal_pmix_base_module_t opal_pmix_pmix3x_module = {
|
||||
.name = "pmix3",
|
||||
/* client APIs */
|
||||
.init = pmix3x_client_init,
|
||||
.finalize = pmix3x_client_finalize,
|
||||
|
@ -59,6 +59,7 @@ static const char *s1_get_nspace(opal_jobid_t jobid);
|
||||
static void s1_register_jobid(opal_jobid_t jobid, const char *nspace);
|
||||
|
||||
const opal_pmix_base_module_t opal_pmix_s1_module = {
|
||||
.name = "s1",
|
||||
.init = s1_init,
|
||||
.finalize = s1_fini,
|
||||
.initialized = s1_initialized,
|
||||
|
@ -62,6 +62,7 @@ static const char *s2_get_nspace(opal_jobid_t jobid);
|
||||
static void s2_register_jobid(opal_jobid_t jobid, const char *nspace);
|
||||
|
||||
const opal_pmix_base_module_t opal_pmix_s2_module = {
|
||||
.name = "s2",
|
||||
.init = s2_init,
|
||||
.finalize = s2_fini,
|
||||
.initialized = s2_initialized,
|
||||
|
@ -64,6 +64,7 @@
|
||||
#include "opal/util/os_dirpath.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/mca/pmix/pmix.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
@ -119,9 +120,11 @@ orte_iof_base_setup_prefork(orte_iof_base_io_conf_t *opts)
|
||||
return ORTE_ERR_SYS_LIMITS_PIPES;
|
||||
}
|
||||
}
|
||||
if (pipe(opts->p_internal) < 0) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_PIPES);
|
||||
return ORTE_ERR_SYS_LIMITS_PIPES;
|
||||
if (0 == strncmp(opal_pmix.name, "pmix1", 5)) {
|
||||
if (pipe(opts->p_internal) < 0) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_PIPES);
|
||||
return ORTE_ERR_SYS_LIMITS_PIPES;
|
||||
}
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
@ -141,7 +144,9 @@ orte_iof_base_setup_child(orte_iof_base_io_conf_t *opts, char ***env)
|
||||
if( !orte_iof_base.redirect_app_stderr_to_stdout ) {
|
||||
close(opts->p_stderr[0]);
|
||||
}
|
||||
close(opts->p_internal[0]);
|
||||
if (0 == strncmp(opal_pmix.name, "pmix1", 5)) {
|
||||
close(opts->p_internal[0]);
|
||||
}
|
||||
|
||||
if (opts->usepty) {
|
||||
/* disable echo */
|
||||
@ -163,19 +168,27 @@ orte_iof_base_setup_child(orte_iof_base_io_conf_t *opts, char ***env)
|
||||
return ORTE_ERR_PIPE_SETUP_FAILURE;
|
||||
}
|
||||
ret = dup2(opts->p_stdout[1], fileno(stdout));
|
||||
if (ret < 0) return ORTE_ERR_PIPE_SETUP_FAILURE;
|
||||
if (ret < 0) {
|
||||
return ORTE_ERR_PIPE_SETUP_FAILURE;
|
||||
}
|
||||
if( orte_iof_base.redirect_app_stderr_to_stdout ) {
|
||||
ret = dup2(opts->p_stdout[1], fileno(stderr));
|
||||
if (ret < 0) return ORTE_ERR_PIPE_SETUP_FAILURE;
|
||||
if (ret < 0) {
|
||||
return ORTE_ERR_PIPE_SETUP_FAILURE;
|
||||
}
|
||||
}
|
||||
close(opts->p_stdout[1]);
|
||||
} else {
|
||||
if(opts->p_stdout[1] != fileno(stdout)) {
|
||||
ret = dup2(opts->p_stdout[1], fileno(stdout));
|
||||
if (ret < 0) return ORTE_ERR_PIPE_SETUP_FAILURE;
|
||||
if (ret < 0) {
|
||||
return ORTE_ERR_PIPE_SETUP_FAILURE;
|
||||
}
|
||||
if( orte_iof_base.redirect_app_stderr_to_stdout ) {
|
||||
ret = dup2(opts->p_stdout[1], fileno(stderr));
|
||||
if (ret < 0) return ORTE_ERR_PIPE_SETUP_FAILURE;
|
||||
if (ret < 0) {
|
||||
return ORTE_ERR_PIPE_SETUP_FAILURE;
|
||||
}
|
||||
}
|
||||
close(opts->p_stdout[1]);
|
||||
}
|
||||
@ -183,7 +196,9 @@ orte_iof_base_setup_child(orte_iof_base_io_conf_t *opts, char ***env)
|
||||
if (opts->connect_stdin) {
|
||||
if(opts->p_stdin[0] != fileno(stdin)) {
|
||||
ret = dup2(opts->p_stdin[0], fileno(stdin));
|
||||
if (ret < 0) return ORTE_ERR_PIPE_SETUP_FAILURE;
|
||||
if (ret < 0) {
|
||||
return ORTE_ERR_PIPE_SETUP_FAILURE;
|
||||
}
|
||||
close(opts->p_stdin[0]);
|
||||
}
|
||||
} else {
|
||||
@ -205,17 +220,19 @@ orte_iof_base_setup_child(orte_iof_base_io_conf_t *opts, char ***env)
|
||||
}
|
||||
}
|
||||
|
||||
if (!orte_map_stddiag_to_stderr && !orte_map_stddiag_to_stdout ) {
|
||||
/* Set an environment variable that the new child process can use
|
||||
to get the fd of the pipe connected to the INTERNAL IOF tag. */
|
||||
asprintf(&str, "%d", opts->p_internal[1]);
|
||||
if (NULL != str) {
|
||||
opal_setenv("OPAL_OUTPUT_STDERR_FD", str, true, env);
|
||||
free(str);
|
||||
if (0 == strncmp(opal_pmix.name, "pmix1", 5)) {
|
||||
if (!orte_map_stddiag_to_stderr && !orte_map_stddiag_to_stdout ) {
|
||||
/* Set an environment variable that the new child process can use
|
||||
to get the fd of the pipe connected to the INTERNAL IOF tag. */
|
||||
asprintf(&str, "%d", opts->p_internal[1]);
|
||||
if (NULL != str) {
|
||||
opal_setenv("OPAL_OUTPUT_STDERR_FD", str, true, env);
|
||||
free(str);
|
||||
}
|
||||
}
|
||||
else if( orte_map_stddiag_to_stdout ) {
|
||||
opal_setenv("OPAL_OUTPUT_INTERNAL_TO_STDOUT", "1", true, env);
|
||||
}
|
||||
}
|
||||
else if( orte_map_stddiag_to_stdout ) {
|
||||
opal_setenv("OPAL_OUTPUT_INTERNAL_TO_STDOUT", "1", true, env);
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
@ -253,10 +270,12 @@ orte_iof_base_setup_parent(const orte_process_name_t* name,
|
||||
}
|
||||
}
|
||||
|
||||
ret = orte_iof.push(name, ORTE_IOF_STDDIAG, opts->p_internal[0]);
|
||||
if(ORTE_SUCCESS != ret) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
return ret;
|
||||
if (0 == strncmp(opal_pmix.name, "pmix1", 5)) {
|
||||
ret = orte_iof.push(name, ORTE_IOF_STDDIAG, opts->p_internal[0]);
|
||||
if(ORTE_SUCCESS != ret) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
|
@ -42,6 +42,7 @@
|
||||
#endif
|
||||
|
||||
#include "opal/mca/event/event.h"
|
||||
#include "opal/mca/pmix/pmix.h"
|
||||
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
@ -186,7 +187,7 @@ static int hnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag,
|
||||
} else if (src_tag & ORTE_IOF_STDERR) {
|
||||
ORTE_IOF_READ_EVENT(&proct->revstderr, proct, fd, ORTE_IOF_STDERR,
|
||||
orte_iof_hnp_read_local_handler, false);
|
||||
} else if (src_tag & ORTE_IOF_STDDIAG) {
|
||||
} else if (0 == strncmp(opal_pmix.name, "pmix1", 5) && src_tag & ORTE_IOF_STDDIAG) {
|
||||
ORTE_IOF_READ_EVENT(&proct->revstddiag, proct, fd, ORTE_IOF_STDDIAG,
|
||||
orte_iof_hnp_read_local_handler, false);
|
||||
}
|
||||
@ -201,7 +202,8 @@ static int hnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag,
|
||||
* because one of the readevents fires -prior- to all of them having
|
||||
* been defined!
|
||||
*/
|
||||
if (NULL != proct->revstdout && NULL != proct->revstddiag &&
|
||||
if (NULL != proct->revstdout &&
|
||||
(0 != strncmp(opal_pmix.name, "pmix1", 5) || NULL != proct->revstddiag) &&
|
||||
(orte_iof_base.redirect_app_stderr_to_stdout || NULL != proct->revstderr)) {
|
||||
if (proct->copy) {
|
||||
/* see if there are any wildcard subscribers out there that
|
||||
@ -220,7 +222,9 @@ static int hnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag,
|
||||
if (!orte_iof_base.redirect_app_stderr_to_stdout) {
|
||||
ORTE_IOF_READ_ACTIVATE(proct->revstderr);
|
||||
}
|
||||
ORTE_IOF_READ_ACTIVATE(proct->revstddiag);
|
||||
if (NULL != proct->revstddiag) {
|
||||
ORTE_IOF_READ_ACTIVATE(proct->revstddiag);
|
||||
}
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
@ -586,9 +590,9 @@ static void stdin_write_handler(int fd, short event, void *cbdata)
|
||||
}
|
||||
}
|
||||
goto check;
|
||||
re_enter:
|
||||
re_enter:
|
||||
ORTE_IOF_SINK_ACTIVATE(wev);
|
||||
check:
|
||||
check:
|
||||
if (NULL != mca_iof_hnp_component.stdinev &&
|
||||
!orte_abnormal_term_ordered &&
|
||||
!mca_iof_hnp_component.stdinev->active) {
|
||||
@ -612,7 +616,7 @@ check:
|
||||
}
|
||||
}
|
||||
return;
|
||||
finish:
|
||||
finish:
|
||||
OBJ_RELEASE(wev);
|
||||
sink->wev = NULL;
|
||||
return;
|
||||
|
@ -42,6 +42,7 @@
|
||||
#endif
|
||||
|
||||
#include "opal/util/os_dirpath.h"
|
||||
#include "opal/mca/pmix/pmix.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
@ -164,7 +165,7 @@ static int orted_push(const orte_process_name_t* dst_name,
|
||||
proct->name.vpid = dst_name->vpid;
|
||||
opal_list_append(&mca_iof_orted_component.procs, &proct->super);
|
||||
|
||||
SETUP:
|
||||
SETUP:
|
||||
/* get the local jobdata for this proc */
|
||||
if (NULL == (jobdat = orte_get_job_data_object(proct->name.jobid))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
@ -177,7 +178,7 @@ SETUP:
|
||||
} else if (src_tag & ORTE_IOF_STDERR) {
|
||||
ORTE_IOF_READ_EVENT(&proct->revstderr, proct, fd, ORTE_IOF_STDERR,
|
||||
orte_iof_orted_read_handler, false);
|
||||
} else if (src_tag & ORTE_IOF_STDDIAG) {
|
||||
} else if (0 == strncmp(opal_pmix.name, "pmix1", 5) && src_tag & ORTE_IOF_STDDIAG) {
|
||||
ORTE_IOF_READ_EVENT(&proct->revstddiag, proct, fd, ORTE_IOF_STDDIAG,
|
||||
orte_iof_orted_read_handler, false);
|
||||
}
|
||||
@ -192,13 +193,16 @@ SETUP:
|
||||
* because one of the readevents fires -prior- to all of them having
|
||||
* been defined!
|
||||
*/
|
||||
if (NULL != proct->revstdout && NULL != proct->revstddiag &&
|
||||
if (NULL != proct->revstdout &&
|
||||
(0 != strncmp(opal_pmix.name, "pmix1", 5) || NULL != proct->revstddiag) &&
|
||||
(orte_iof_base.redirect_app_stderr_to_stdout || NULL != proct->revstderr)) {
|
||||
ORTE_IOF_READ_ACTIVATE(proct->revstdout);
|
||||
if (!orte_iof_base.redirect_app_stderr_to_stdout) {
|
||||
ORTE_IOF_READ_ACTIVATE(proct->revstderr);
|
||||
}
|
||||
ORTE_IOF_READ_ACTIVATE(proct->revstddiag);
|
||||
if (NULL != proct->revstddiag) {
|
||||
ORTE_IOF_READ_ACTIVATE(proct->revstddiag);
|
||||
}
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
@ -112,6 +112,7 @@
|
||||
|
||||
#include "opal/mca/hwloc/hwloc-internal.h"
|
||||
#include "opal/mca/hwloc/base/base.h"
|
||||
#include "opal/mca/pmix/pmix.h"
|
||||
#include "opal/class/opal_pointer_array.h"
|
||||
#include "opal/util/opal_environ.h"
|
||||
#include "opal/util/show_help.h"
|
||||
@ -312,7 +313,7 @@ static int close_open_file_descriptors(int write_fd,
|
||||
closedir(dir);
|
||||
return ORTE_ERR_TYPE_MISMATCH;
|
||||
}
|
||||
if (fd >=3 && fd != opts.p_internal[1] && fd != write_fd) {
|
||||
if (fd >=3 && (0 != strncmp(opal_pmix.name, "pmix", 4) && fd != opts.p_internal[1]) && fd != write_fd) {
|
||||
close(fd);
|
||||
}
|
||||
}
|
||||
@ -374,11 +375,13 @@ static int do_child(orte_odls_spawn_caddy_t *cd, int write_fd)
|
||||
}
|
||||
close(fdnull);
|
||||
}
|
||||
fdnull = open("/dev/null", O_RDONLY, 0);
|
||||
if (fdnull > cd->opts.p_internal[1]) {
|
||||
dup2(fdnull, cd->opts.p_internal[1]);
|
||||
if (0 != strncmp(opal_pmix.name, "pmix", 4)) {
|
||||
fdnull = open("/dev/null", O_RDONLY, 0);
|
||||
if (fdnull > cd->opts.p_internal[1]) {
|
||||
dup2(fdnull, cd->opts.p_internal[1]);
|
||||
}
|
||||
close(fdnull);
|
||||
}
|
||||
close(fdnull);
|
||||
}
|
||||
|
||||
/* close all open file descriptors w/ exception of stdin/stdout/stderr,
|
||||
@ -387,7 +390,7 @@ static int do_child(orte_odls_spawn_caddy_t *cd, int write_fd)
|
||||
if (ORTE_SUCCESS != close_open_file_descriptors(write_fd, cd->opts)) {
|
||||
// close *all* file descriptors -- slow
|
||||
for(fd=3; fd<fdmax; fd++) {
|
||||
if (fd != cd->opts.p_internal[1] && fd != write_fd) {
|
||||
if ((0 != strncmp(opal_pmix.name, "pmix", 4) && fd != cd->opts.p_internal[1]) && fd != write_fd) {
|
||||
close(fd);
|
||||
}
|
||||
}
|
||||
@ -456,7 +459,9 @@ static int do_parent(orte_odls_spawn_caddy_t *cd, int read_fd)
|
||||
if( !orte_iof_base.redirect_app_stderr_to_stdout ) {
|
||||
close(cd->opts.p_stderr[1]);
|
||||
}
|
||||
close(cd->opts.p_internal[1]);
|
||||
if (0 != strncmp(opal_pmix.name, "pmix", 4)) {
|
||||
close(cd->opts.p_internal[1]);
|
||||
}
|
||||
|
||||
/* Block reading a message from the pipe */
|
||||
while (1) {
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user