ORTE-level MCA params are defined in several places. Ompi_info cannot call orte_init due to an issue with the memory allocator, thus making it impossible for ompi_info to display all of the ORTE-level MCA params.
By consolidating them all into one function, ompi_info can call that function and register the desired variables. This also requires, however, that ompi_info call orte_output_init to avoid generating tons of error messages, so make that adjustment too. Fixes ticket #1314 In addition, orte_output has a race condition issue whereby calls to orte_output/verbose can occur prior to either the RML being defined/setup, or the HNP being defined. This latter occurs during the initialization of the orte_process_info structure. In both cases, there is no way orte_output can send the output to the HNP. Hence, the message must be simply output locally. Fixes ticket #1315 This commit was SVN r18524.
Этот коммит содержится в:
родитель
879a9fe45c
Коммит
828ae26d90
@ -114,6 +114,7 @@
|
|||||||
#endif
|
#endif
|
||||||
#include "orte/mca/filem/filem.h"
|
#include "orte/mca/filem/filem.h"
|
||||||
#include "orte/mca/filem/base/base.h"
|
#include "orte/mca/filem/base/base.h"
|
||||||
|
#include "orte/util/output.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace ompi_info;
|
using namespace ompi_info;
|
||||||
@ -189,6 +190,9 @@ void ompi_info::open_components()
|
|||||||
|
|
||||||
orte_register_params();
|
orte_register_params();
|
||||||
|
|
||||||
|
// Initialize the orte_output system
|
||||||
|
orte_output_init();
|
||||||
|
|
||||||
// Register the MPI layer's MCA parameters
|
// Register the MPI layer's MCA parameters
|
||||||
|
|
||||||
ompi_mpi_register_params();
|
ompi_mpi_register_params();
|
||||||
|
@ -221,7 +221,6 @@ int main(int argc, char *argv[])
|
|||||||
ompi_info::mca_types.push_back("rml");
|
ompi_info::mca_types.push_back("rml");
|
||||||
ompi_info::mca_types.push_back("routed");
|
ompi_info::mca_types.push_back("routed");
|
||||||
ompi_info::mca_types.push_back("plm");
|
ompi_info::mca_types.push_back("plm");
|
||||||
ompi_info::mca_types.push_back("sds");
|
|
||||||
#if OPAL_ENABLE_FT == 1
|
#if OPAL_ENABLE_FT == 1
|
||||||
ompi_info::mca_types.push_back("snapc");
|
ompi_info::mca_types.push_back("snapc");
|
||||||
#endif
|
#endif
|
||||||
|
@ -47,4 +47,6 @@ libopen_rte_la_SOURCES += \
|
|||||||
runtime/orte_wakeup.c \
|
runtime/orte_wakeup.c \
|
||||||
runtime/orte_locks.c \
|
runtime/orte_locks.c \
|
||||||
runtime/orte_cr.c \
|
runtime/orte_cr.c \
|
||||||
runtime/orte_data_server.c
|
runtime/orte_data_server.c \
|
||||||
|
runtime/orte_mca_params.c
|
||||||
|
|
||||||
|
@ -52,6 +52,10 @@ bool orte_do_not_launch = false;
|
|||||||
bool orted_spin_flag = false;
|
bool orted_spin_flag = false;
|
||||||
bool orte_static_ports = false;
|
bool orte_static_ports = false;
|
||||||
bool orte_keep_fqdn_hostnames = false;
|
bool orte_keep_fqdn_hostnames = false;
|
||||||
|
bool orte_help_want_aggregate = true;
|
||||||
|
bool orte_help_show_recursions;
|
||||||
|
bool orte_params_set = false;
|
||||||
|
int orte_debug_verbosity;
|
||||||
|
|
||||||
int32_t orte_contiguous_nodes;
|
int32_t orte_contiguous_nodes;
|
||||||
int orte_debug_output = -1;
|
int orte_debug_output = -1;
|
||||||
@ -81,45 +85,13 @@ opal_pointer_array_t *orte_node_pool;
|
|||||||
bool orte_initialized = false;
|
bool orte_initialized = false;
|
||||||
bool orte_finalizing = false;
|
bool orte_finalizing = false;
|
||||||
|
|
||||||
/* whether we have registered params or not */
|
int orte_dt_init(void)
|
||||||
static bool params_set = false;
|
|
||||||
|
|
||||||
int orte_register_params(void)
|
|
||||||
{
|
{
|
||||||
int value;
|
int rc;
|
||||||
int orte_debug_verbosity;
|
opal_data_type_t tmp;
|
||||||
|
|
||||||
if (params_set) {
|
|
||||||
return ORTE_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* set default output */
|
/* set default output */
|
||||||
orte_debug_output = orte_output_open(NULL, "ORTE", "DEBUG", NULL);
|
orte_debug_output = orte_output_open(NULL, "ORTE", "DEBUG", NULL);
|
||||||
|
|
||||||
mca_base_param_reg_int_name("orte", "debug",
|
|
||||||
"Top-level ORTE debug switch (default verbosity: 1)",
|
|
||||||
false, false, (int)false, &value);
|
|
||||||
orte_debug_flag = OPAL_INT_TO_BOOL(value);
|
|
||||||
|
|
||||||
mca_base_param_reg_int_name("orte", "debug_verbose",
|
|
||||||
"Verbosity level for ORTE debug messages (default: 1)",
|
|
||||||
false, false, -1, &orte_debug_verbosity);
|
|
||||||
|
|
||||||
mca_base_param_reg_int_name("orte", "debug_daemons",
|
|
||||||
"Whether to debug the ORTE daemons or not",
|
|
||||||
false, false, (int)false, &value);
|
|
||||||
orte_debug_daemons_flag = OPAL_INT_TO_BOOL(value);
|
|
||||||
|
|
||||||
mca_base_param_reg_int_name("orte", "debug_daemons_file",
|
|
||||||
"Whether want stdout/stderr of daemons to go to a file or not",
|
|
||||||
false, false, (int)false, &value);
|
|
||||||
orte_debug_daemons_file_flag = OPAL_INT_TO_BOOL(value);
|
|
||||||
/* If --debug-daemons-file was specified, that also implies
|
|
||||||
--debug-daemons */
|
|
||||||
if (orte_debug_daemons_file_flag) {
|
|
||||||
orte_debug_daemons_flag = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* open up the verbose output for ORTE debugging */
|
/* open up the verbose output for ORTE debugging */
|
||||||
if (orte_debug_flag || 0 < orte_debug_verbosity ||
|
if (orte_debug_flag || 0 < orte_debug_verbosity ||
|
||||||
(orte_debug_daemons_flag && (orte_process_info.daemon || orte_process_info.hnp))) {
|
(orte_debug_daemons_flag && (orte_process_info.daemon || orte_process_info.hnp))) {
|
||||||
@ -129,72 +101,7 @@ int orte_register_params(void)
|
|||||||
orte_output_set_verbosity(orte_debug_output, 1);
|
orte_output_set_verbosity(orte_debug_output, 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mca_base_param_reg_int_name("orte", "do_not_launch",
|
|
||||||
"Perform all necessary operations to prepare to launch the application, but do not actually launch it",
|
|
||||||
false, false, (int)false, &value);
|
|
||||||
orte_do_not_launch = OPAL_INT_TO_BOOL(value);
|
|
||||||
|
|
||||||
mca_base_param_reg_int_name("orted", "spin",
|
|
||||||
"Have any orteds spin until we can connect a debugger to them",
|
|
||||||
false, false, (int)false, &value);
|
|
||||||
orted_spin_flag = OPAL_INT_TO_BOOL(value);
|
|
||||||
|
|
||||||
/* check for timing requests */
|
|
||||||
mca_base_param_reg_int_name("orte", "timing",
|
|
||||||
"Request that critical timing loops be measured",
|
|
||||||
false, false, (int)false, &value);
|
|
||||||
orte_timing = OPAL_INT_TO_BOOL(value);
|
|
||||||
|
|
||||||
/* User-level debugger info string */
|
|
||||||
|
|
||||||
mca_base_param_reg_string_name("orte", "base_user_debugger",
|
|
||||||
"Sequence of user-level debuggers to search for in orterun",
|
|
||||||
false, false, "totalview @mpirun@ -a @mpirun_args@ : ddt -n @np@ -start @executable@ @executable_argv@ @single_app@ : fxp @mpirun@ -a @mpirun_args@", NULL);
|
|
||||||
|
|
||||||
|
|
||||||
mca_base_param_reg_int_name("orte", "abort_timeout",
|
|
||||||
"Max time to wait [in secs] before aborting an ORTE operation (default: 1sec)",
|
|
||||||
false, false, 1, &value);
|
|
||||||
orte_max_timeout = 1000000.0 * value; /* convert to usec */
|
|
||||||
|
|
||||||
mca_base_param_reg_int_name("orte", "timeout_step",
|
|
||||||
"Time to wait [in usecs/proc] before aborting an ORTE operation (default: 100 usec/proc)",
|
|
||||||
false, false, 100, &orte_timeout_usec_per_proc);
|
|
||||||
|
|
||||||
/* default hostfile */
|
|
||||||
mca_base_param_reg_string_name("orte", "default_hostfile",
|
|
||||||
"Name of the default hostfile (relative or absolute path)",
|
|
||||||
false, false, NULL, &orte_default_hostfile);
|
|
||||||
|
|
||||||
|
|
||||||
/* whether or not to keep FQDN hostnames */
|
|
||||||
mca_base_param_reg_int_name("orte", "keep_fqdn_hostnames",
|
|
||||||
"Whether or not to keep FQDN hostnames [default: no]",
|
|
||||||
false, false, (int)false, &value);
|
|
||||||
orte_keep_fqdn_hostnames = OPAL_INT_TO_BOOL(value);
|
|
||||||
|
|
||||||
/* whether or not static ports exist */
|
|
||||||
mca_base_param_reg_int_name("orte", "static_ports",
|
|
||||||
"Whether or not static ports are in use [default: no]",
|
|
||||||
false, false, (int)false, &value);
|
|
||||||
orte_static_ports = OPAL_INT_TO_BOOL(value);
|
|
||||||
|
|
||||||
/* whether or not contiguous nodenames are in use */
|
|
||||||
mca_base_param_reg_int_name("orte", "contiguous_nodes",
|
|
||||||
"Number of nodes after which contiguous nodenames will be used [default: INT_MAX]",
|
|
||||||
false, false, INT32_MAX, &orte_contiguous_nodes);
|
|
||||||
|
|
||||||
/* All done */
|
|
||||||
params_set = true;
|
|
||||||
return ORTE_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
int orte_dt_init(void)
|
|
||||||
{
|
|
||||||
int rc;
|
|
||||||
opal_data_type_t tmp;
|
|
||||||
|
|
||||||
/** register the base system types with the DSS */
|
/** register the base system types with the DSS */
|
||||||
tmp = ORTE_STD_CNTR;
|
tmp = ORTE_STD_CNTR;
|
||||||
if (ORTE_SUCCESS != (rc = opal_dss.register_type(orte_dt_pack_std_cntr,
|
if (ORTE_SUCCESS != (rc = opal_dss.register_type(orte_dt_pack_std_cntr,
|
||||||
|
@ -319,6 +319,10 @@ ORTE_DECLSPEC extern bool orte_static_ports;
|
|||||||
ORTE_DECLSPEC extern int32_t orte_contiguous_nodes;
|
ORTE_DECLSPEC extern int32_t orte_contiguous_nodes;
|
||||||
ORTE_DECLSPEC extern int orte_debug_output;
|
ORTE_DECLSPEC extern int orte_debug_output;
|
||||||
ORTE_DECLSPEC extern bool orte_keep_fqdn_hostnames;
|
ORTE_DECLSPEC extern bool orte_keep_fqdn_hostnames;
|
||||||
|
ORTE_DECLSPEC extern bool orte_help_want_aggregate;
|
||||||
|
ORTE_DECLSPEC extern bool orte_help_show_recursions;
|
||||||
|
ORTE_DECLSPEC extern bool orte_params_set;
|
||||||
|
ORTE_DECLSPEC extern int orte_debug_verbosity;
|
||||||
|
|
||||||
ORTE_DECLSPEC extern char **orte_launch_environ;
|
ORTE_DECLSPEC extern char **orte_launch_environ;
|
||||||
ORTE_DECLSPEC extern opal_pointer_array_t orte_daemonmap;
|
ORTE_DECLSPEC extern opal_pointer_array_t orte_daemonmap;
|
||||||
|
@ -69,6 +69,15 @@ int orte_init(char flags)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ensure we know the tool setting for when we finalize */
|
||||||
|
if ((flags & ORTE_TOOL) || (flags & ORTE_TOOL_WITH_NAME)) {
|
||||||
|
orte_process_info.tool = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (orte_process_info.hnp) {
|
||||||
|
orte_process_info.daemon = false;
|
||||||
|
}
|
||||||
|
|
||||||
/* setup the orte_output system */
|
/* setup the orte_output system */
|
||||||
if (ORTE_SUCCESS != (ret = orte_output_init())) {
|
if (ORTE_SUCCESS != (ret = orte_output_init())) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
@ -91,29 +100,23 @@ int orte_init(char flags)
|
|||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Ensure the process info structure is instantiated and initialized */
|
|
||||||
if (ORTE_SUCCESS != (ret = orte_proc_info())) {
|
|
||||||
error = "orte_proc_info";
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ensure we know the tool setting for when we finalize */
|
|
||||||
if ((flags & ORTE_TOOL) || (flags & ORTE_TOOL_WITH_NAME)) {
|
|
||||||
orte_process_info.tool = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Initialize the ORTE data type support */
|
/* Initialize the ORTE data type support */
|
||||||
if (ORTE_SUCCESS != (ret = orte_dt_init())) {
|
if (ORTE_SUCCESS != (ret = orte_dt_init())) {
|
||||||
error = "orte_dt_init";
|
error = "orte_dt_init";
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Ensure the rest of the process info structure is initialized */
|
||||||
|
if (ORTE_SUCCESS != (ret = orte_proc_info())) {
|
||||||
|
error = "orte_proc_info";
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
/* if I'm the HNP, make sure that the daemon flag is NOT set so that
|
/* if I'm the HNP, make sure that the daemon flag is NOT set so that
|
||||||
* components unique to non-HNP orteds can be selected and init
|
* components unique to non-HNP orteds can be selected and init
|
||||||
* my basic storage elements
|
* my basic storage elements
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.hnp) {
|
if (orte_process_info.hnp) {
|
||||||
orte_process_info.daemon = false;
|
|
||||||
if (ORTE_SUCCESS != (ret = orte_hnp_globals_init())) {
|
if (ORTE_SUCCESS != (ret = orte_hnp_globals_init())) {
|
||||||
error = "orte_hnp_globals_init";
|
error = "orte_hnp_globals_init";
|
||||||
goto error;
|
goto error;
|
||||||
|
139
orte/runtime/orte_mca_params.c
Обычный файл
139
orte/runtime/orte_mca_params.c
Обычный файл
@ -0,0 +1,139 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||||
|
* University Research and Technology
|
||||||
|
* Corporation. All rights reserved.
|
||||||
|
* Copyright (c) 2004-2008 The University of Tennessee and The University
|
||||||
|
* of Tennessee Research Foundation. All rights
|
||||||
|
* reserved.
|
||||||
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
|
* University of Stuttgart. All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
|
||||||
|
* $COPYRIGHT$
|
||||||
|
*
|
||||||
|
* Additional copyrights may follow
|
||||||
|
*
|
||||||
|
* $HEADER$
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "orte_config.h"
|
||||||
|
#include "orte/constants.h"
|
||||||
|
#include "orte/types.h"
|
||||||
|
|
||||||
|
#ifdef HAVE_SYS_TIME_H
|
||||||
|
#include <sys/time.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "opal/mca/base/mca_base_param.h"
|
||||||
|
|
||||||
|
#include "orte/util/proc_info.h"
|
||||||
|
#include "orte/util/output.h"
|
||||||
|
|
||||||
|
#include "orte/runtime/runtime.h"
|
||||||
|
#include "orte/runtime/orte_globals.h"
|
||||||
|
|
||||||
|
int orte_register_params(void)
|
||||||
|
{
|
||||||
|
int value;
|
||||||
|
|
||||||
|
if (orte_params_set) {
|
||||||
|
return ORTE_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
mca_base_param_reg_int_name("orte", "debug",
|
||||||
|
"Top-level ORTE debug switch (default verbosity: 1)",
|
||||||
|
false, false, (int)false, &value);
|
||||||
|
orte_debug_flag = OPAL_INT_TO_BOOL(value);
|
||||||
|
|
||||||
|
mca_base_param_reg_int_name("orte", "debug_verbose",
|
||||||
|
"Verbosity level for ORTE debug messages (default: 1)",
|
||||||
|
false, false, -1, &orte_debug_verbosity);
|
||||||
|
|
||||||
|
mca_base_param_reg_int_name("orte", "debug_daemons",
|
||||||
|
"Whether to debug the ORTE daemons or not",
|
||||||
|
false, false, (int)false, &value);
|
||||||
|
orte_debug_daemons_flag = OPAL_INT_TO_BOOL(value);
|
||||||
|
|
||||||
|
mca_base_param_reg_int_name("orte", "debug_daemons_file",
|
||||||
|
"Whether want stdout/stderr of daemons to go to a file or not",
|
||||||
|
false, false, (int)false, &value);
|
||||||
|
orte_debug_daemons_file_flag = OPAL_INT_TO_BOOL(value);
|
||||||
|
/* If --debug-daemons-file was specified, that also implies
|
||||||
|
--debug-daemons */
|
||||||
|
if (orte_debug_daemons_file_flag) {
|
||||||
|
orte_debug_daemons_flag = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
mca_base_param_reg_int_name("orte", "do_not_launch",
|
||||||
|
"Perform all necessary operations to prepare to launch the application, but do not actually launch it",
|
||||||
|
false, false, (int)false, &value);
|
||||||
|
orte_do_not_launch = OPAL_INT_TO_BOOL(value);
|
||||||
|
|
||||||
|
mca_base_param_reg_int_name("orte", "daemon_spin",
|
||||||
|
"Have any orteds spin until we can connect a debugger to them",
|
||||||
|
false, false, (int)false, &value);
|
||||||
|
orted_spin_flag = OPAL_INT_TO_BOOL(value);
|
||||||
|
|
||||||
|
/* check for timing requests */
|
||||||
|
mca_base_param_reg_int_name("orte", "timing",
|
||||||
|
"Request that critical timing loops be measured",
|
||||||
|
false, false, (int)false, &value);
|
||||||
|
orte_timing = OPAL_INT_TO_BOOL(value);
|
||||||
|
|
||||||
|
/* User-level debugger info string */
|
||||||
|
|
||||||
|
mca_base_param_reg_string_name("orte", "base_user_debugger",
|
||||||
|
"Sequence of user-level debuggers to search for in orterun",
|
||||||
|
false, false, "totalview @mpirun@ -a @mpirun_args@ : ddt -n @np@ -start @executable@ @executable_argv@ @single_app@ : fxp @mpirun@ -a @mpirun_args@", NULL);
|
||||||
|
|
||||||
|
|
||||||
|
mca_base_param_reg_int_name("orte", "abort_timeout",
|
||||||
|
"Max time to wait [in secs] before aborting an ORTE operation (default: 1sec)",
|
||||||
|
false, false, 1, &value);
|
||||||
|
orte_max_timeout = 1000000.0 * value; /* convert to usec */
|
||||||
|
|
||||||
|
mca_base_param_reg_int_name("orte", "timeout_step",
|
||||||
|
"Time to wait [in usecs/proc] before aborting an ORTE operation (default: 100 usec/proc)",
|
||||||
|
false, false, 100, &orte_timeout_usec_per_proc);
|
||||||
|
|
||||||
|
/* default hostfile */
|
||||||
|
mca_base_param_reg_string_name("orte", "default_hostfile",
|
||||||
|
"Name of the default hostfile (relative or absolute path)",
|
||||||
|
false, false, NULL, &orte_default_hostfile);
|
||||||
|
|
||||||
|
|
||||||
|
/* whether or not to keep FQDN hostnames */
|
||||||
|
mca_base_param_reg_int_name("orte", "keep_fqdn_hostnames",
|
||||||
|
"Whether or not to keep FQDN hostnames [default: no]",
|
||||||
|
false, false, (int)false, &value);
|
||||||
|
orte_keep_fqdn_hostnames = OPAL_INT_TO_BOOL(value);
|
||||||
|
|
||||||
|
/* whether or not contiguous nodenames are in use */
|
||||||
|
mca_base_param_reg_int_name("orte", "contiguous_nodes",
|
||||||
|
"Number of nodes after which contiguous nodename encoding will automatically be used [default: INT_MAX]",
|
||||||
|
false, false, INT32_MAX, &orte_contiguous_nodes);
|
||||||
|
|
||||||
|
mca_base_param_reg_int_name("orte", "base_help_aggregate",
|
||||||
|
"If orte_base_help_aggregate is true, duplicate help messages will be aggregated rather than displayed individually. This can be helpful for parallel jobs that experience multiple identical failures; rather than print out the same help/failure message N times, display it once with a count of how many processes sent the same message.",
|
||||||
|
false, false,
|
||||||
|
(int) orte_help_want_aggregate, &value);
|
||||||
|
orte_help_want_aggregate = OPAL_INT_TO_BOOL(value);
|
||||||
|
|
||||||
|
mca_base_param_reg_int_name("orte", "base_show_output_recursions",
|
||||||
|
"If orte_base_show_output_recursion is true, recursive calls to orte_output will be reported to stderr",
|
||||||
|
false, false,
|
||||||
|
(int) false, &value);
|
||||||
|
orte_help_show_recursions = OPAL_INT_TO_BOOL(value);
|
||||||
|
|
||||||
|
/* some params that are accessed elsewhere, but simply registered here so they will
|
||||||
|
* be visible to ompi_info
|
||||||
|
*/
|
||||||
|
mca_base_param_reg_string_name("orte", "tmpdir_base",
|
||||||
|
"Base of the session directory tree",
|
||||||
|
false, false, NULL, &(orte_process_info.tmpdir_base));
|
||||||
|
|
||||||
|
/* All done */
|
||||||
|
orte_params_set = true;
|
||||||
|
return ORTE_SUCCESS;
|
||||||
|
}
|
@ -80,31 +80,6 @@ int orte_output_get_verbosity(int output_id)
|
|||||||
return opal_output_get_verbosity(output_id);
|
return opal_output_get_verbosity(output_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Whether we aggregate show_help() messages or not */
|
|
||||||
static bool want_aggregate = true;
|
|
||||||
|
|
||||||
/* Whether to report recursions or not */
|
|
||||||
static bool show_recursions;
|
|
||||||
|
|
||||||
static void register_mca(void)
|
|
||||||
{
|
|
||||||
int tmp;
|
|
||||||
|
|
||||||
mca_base_param_reg_int_name("orte", "base_help_aggregate",
|
|
||||||
"If orte_base_help_aggregate is true, duplicate help messages will be aggregated rather than displayed individually. This can be helpful for parallel jobs that experience multiple identical failures; rather than print out the same help/failure message N times, display it once with a count of how many processes sent the same message.",
|
|
||||||
false, false,
|
|
||||||
(int) want_aggregate, &tmp);
|
|
||||||
want_aggregate = OPAL_INT_TO_BOOL(tmp);
|
|
||||||
|
|
||||||
mca_base_param_reg_int_name("orte", "base_show_output_recursions",
|
|
||||||
"If orte_base_show_output_recursion is true, recursive calls to orte_output will be reported to stderr",
|
|
||||||
false, false,
|
|
||||||
(int) false, &tmp);
|
|
||||||
show_recursions = OPAL_INT_TO_BOOL(tmp);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/************************************************************************/
|
/************************************************************************/
|
||||||
|
|
||||||
@ -118,7 +93,7 @@ int orte_output_init(void)
|
|||||||
{
|
{
|
||||||
stderr_stream = opal_output_open(NULL);
|
stderr_stream = opal_output_open(NULL);
|
||||||
regiester_mca();
|
regiester_mca();
|
||||||
if (0 == ORTE_PROC_MY_NAME->vpid && want_aggregate) {
|
if (0 == ORTE_PROC_MY_NAME->vpid && orte_help_want_aggregate) {
|
||||||
orte_output(stderr_stream, "WARNING: orte_base_help_aggregate was set to true, but this system does not support help message aggregation");
|
orte_output(stderr_stream, "WARNING: orte_base_help_aggregate was set to true, but this system does not support help message aggregation");
|
||||||
}
|
}
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
@ -429,7 +404,7 @@ static void output_vverbose(int verbose_level, int output_id,
|
|||||||
HNP).
|
HNP).
|
||||||
*/
|
*/
|
||||||
if (am_inside) {
|
if (am_inside) {
|
||||||
if (show_recursions) {
|
if (orte_help_want_aggregate) {
|
||||||
opal_output(0, "%s orte_output recursion detected!", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
opal_output(0, "%s orte_output recursion detected!", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||||
}
|
}
|
||||||
opal_output(output_id, filtered);
|
opal_output(output_id, filtered);
|
||||||
@ -474,9 +449,12 @@ static void output_vverbose(int verbose_level, int output_id,
|
|||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
filtered, output_id));
|
filtered, output_id));
|
||||||
|
|
||||||
/* If RML is not yet setup, then just output this locally.
|
/* If RML is not yet setup, or we haven't yet defined the HNP,
|
||||||
What else can we do? */
|
* then just output this locally.
|
||||||
if (NULL == orte_rml.send_buffer) {
|
* What else can we do?
|
||||||
|
*/
|
||||||
|
if (NULL == orte_rml.send_buffer ||
|
||||||
|
ORTE_PROC_MY_HNP->vpid == ORTE_VPID_INVALID) {
|
||||||
opal_output(0, filtered);
|
opal_output(0, filtered);
|
||||||
} else {
|
} else {
|
||||||
/* setup a buffer to send to the HNP */
|
/* setup a buffer to send to the HNP */
|
||||||
@ -541,7 +519,7 @@ static int show_help(const char *filename, const char *topic,
|
|||||||
|
|
||||||
/* If we're aggregating, check for duplicates. Otherwise, don't
|
/* If we're aggregating, check for duplicates. Otherwise, don't
|
||||||
track duplicates at all and always display the message. */
|
track duplicates at all and always display the message. */
|
||||||
if (orte_output_ready && want_aggregate) {
|
if (orte_output_ready && orte_help_want_aggregate) {
|
||||||
rc = get_tli(filename, topic, &tli);
|
rc = get_tli(filename, topic, &tli);
|
||||||
} else {
|
} else {
|
||||||
rc = ORTE_ERR_NOT_FOUND;
|
rc = ORTE_ERR_NOT_FOUND;
|
||||||
@ -598,7 +576,7 @@ static int show_help(const char *filename, const char *topic,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* If we're aggregating, add this process name to the list */
|
/* If we're aggregating, add this process name to the list */
|
||||||
if (orte_output_ready && want_aggregate) {
|
if (orte_output_ready && orte_help_want_aggregate) {
|
||||||
pnli = OBJ_NEW(process_name_list_item_t);
|
pnli = OBJ_NEW(process_name_list_item_t);
|
||||||
if (NULL == pnli) {
|
if (NULL == pnli) {
|
||||||
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||||
@ -711,8 +689,6 @@ int orte_output_init(void)
|
|||||||
{
|
{
|
||||||
OPAL_OUTPUT_VERBOSE((5, orte_debug_output, "orte_output init"));
|
OPAL_OUTPUT_VERBOSE((5, orte_debug_output, "orte_output init"));
|
||||||
|
|
||||||
register_mca();
|
|
||||||
|
|
||||||
/* define the default stream that has everything off */
|
/* define the default stream that has everything off */
|
||||||
OBJ_CONSTRUCT(&orte_output_default, opal_output_stream_t);
|
OBJ_CONSTRUCT(&orte_output_default, opal_output_stream_t);
|
||||||
|
|
||||||
@ -1031,11 +1007,17 @@ int orte_show_help(const char *filename, const char *topic,
|
|||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (orte_process_info.hnp) {
|
/* if we are the HNP, or the RML has not yet been setup,
|
||||||
|
* or we don't yet know our HNP, then all we can do
|
||||||
|
* is process this locally
|
||||||
|
*/
|
||||||
|
if (orte_process_info.hnp ||
|
||||||
|
NULL == orte_rml.send_buffer ||
|
||||||
|
ORTE_PROC_MY_HNP->vpid == ORTE_VPID_INVALID) {
|
||||||
rc = show_help(filename, topic, output, ORTE_PROC_MY_NAME);
|
rc = show_help(filename, topic, output, ORTE_PROC_MY_NAME);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* if we are not the HNP, then we must relay the output message to
|
/* otherwise, we relay the output message to
|
||||||
* the HNP for processing
|
* the HNP for processing
|
||||||
*/
|
*/
|
||||||
else {
|
else {
|
||||||
|
@ -42,7 +42,7 @@ ORTE_DECLSPEC orte_proc_info_t orte_process_info = {
|
|||||||
/* .my_name = */ {ORTE_JOBID_INVALID, ORTE_VPID_INVALID},
|
/* .my_name = */ {ORTE_JOBID_INVALID, ORTE_VPID_INVALID},
|
||||||
/* .my_daemon = */ {ORTE_JOBID_INVALID, ORTE_VPID_INVALID},
|
/* .my_daemon = */ {ORTE_JOBID_INVALID, ORTE_VPID_INVALID},
|
||||||
/* .my_daemon_uri = */ NULL,
|
/* .my_daemon_uri = */ NULL,
|
||||||
/* .my_hnp = */ {0, 0},
|
/* .my_hnp = */ {ORTE_JOBID_INVALID, ORTE_VPID_INVALID},
|
||||||
/* .my_hnp_uri = */ NULL,
|
/* .my_hnp_uri = */ NULL,
|
||||||
/* .hnp_pid = */ 0,
|
/* .hnp_pid = */ 0,
|
||||||
/* ,app_num = */ -1,
|
/* ,app_num = */ -1,
|
||||||
@ -134,10 +134,6 @@ int orte_proc_info(void)
|
|||||||
true, false, -1, &tmp);
|
true, false, -1, &tmp);
|
||||||
orte_process_info.universe_size = tmp;
|
orte_process_info.universe_size = tmp;
|
||||||
|
|
||||||
mca_base_param_reg_string_name("orte", "tmpdir_base",
|
|
||||||
"Base of the session directory tree",
|
|
||||||
false, false, NULL, &(orte_process_info.tmpdir_base));
|
|
||||||
|
|
||||||
/* get the process id */
|
/* get the process id */
|
||||||
orte_process_info.pid = getpid();
|
orte_process_info.pid = getpid();
|
||||||
|
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user