diff --git a/ompi/tools/ompi_info/components.cc b/ompi/tools/ompi_info/components.cc index feb8f5a008..59053c54b6 100644 --- a/ompi/tools/ompi_info/components.cc +++ b/ompi/tools/ompi_info/components.cc @@ -114,6 +114,7 @@ #endif #include "orte/mca/filem/filem.h" #include "orte/mca/filem/base/base.h" +#include "orte/util/output.h" using namespace std; using namespace ompi_info; @@ -189,6 +190,9 @@ void ompi_info::open_components() orte_register_params(); + // Initialize the orte_output system + orte_output_init(); + // Register the MPI layer's MCA parameters ompi_mpi_register_params(); diff --git a/ompi/tools/ompi_info/ompi_info.cc b/ompi/tools/ompi_info/ompi_info.cc index 673881295a..9efca57024 100644 --- a/ompi/tools/ompi_info/ompi_info.cc +++ b/ompi/tools/ompi_info/ompi_info.cc @@ -221,7 +221,6 @@ int main(int argc, char *argv[]) ompi_info::mca_types.push_back("rml"); ompi_info::mca_types.push_back("routed"); ompi_info::mca_types.push_back("plm"); - ompi_info::mca_types.push_back("sds"); #if OPAL_ENABLE_FT == 1 ompi_info::mca_types.push_back("snapc"); #endif diff --git a/orte/runtime/Makefile.am b/orte/runtime/Makefile.am index bfab3b4db3..f4177d0e69 100644 --- a/orte/runtime/Makefile.am +++ b/orte/runtime/Makefile.am @@ -47,4 +47,6 @@ libopen_rte_la_SOURCES += \ runtime/orte_wakeup.c \ runtime/orte_locks.c \ runtime/orte_cr.c \ - runtime/orte_data_server.c + runtime/orte_data_server.c \ + runtime/orte_mca_params.c + diff --git a/orte/runtime/orte_globals.c b/orte/runtime/orte_globals.c index ff8620d814..ed5615a0c6 100644 --- a/orte/runtime/orte_globals.c +++ b/orte/runtime/orte_globals.c @@ -52,6 +52,10 @@ bool orte_do_not_launch = false; bool orted_spin_flag = false; bool orte_static_ports = false; bool orte_keep_fqdn_hostnames = false; +bool orte_help_want_aggregate = true; +bool orte_help_show_recursions; +bool orte_params_set = false; +int orte_debug_verbosity; int32_t orte_contiguous_nodes; int orte_debug_output = -1; @@ -81,45 +85,13 @@ opal_pointer_array_t *orte_node_pool; bool orte_initialized = false; bool orte_finalizing = false; -/* whether we have registered params or not */ -static bool params_set = false; - -int orte_register_params(void) +int orte_dt_init(void) { - int value; - int orte_debug_verbosity; - - if (params_set) { - return ORTE_SUCCESS; - } - + int rc; + opal_data_type_t tmp; + /* set default output */ orte_debug_output = orte_output_open(NULL, "ORTE", "DEBUG", NULL); - - mca_base_param_reg_int_name("orte", "debug", - "Top-level ORTE debug switch (default verbosity: 1)", - false, false, (int)false, &value); - orte_debug_flag = OPAL_INT_TO_BOOL(value); - - mca_base_param_reg_int_name("orte", "debug_verbose", - "Verbosity level for ORTE debug messages (default: 1)", - false, false, -1, &orte_debug_verbosity); - - mca_base_param_reg_int_name("orte", "debug_daemons", - "Whether to debug the ORTE daemons or not", - false, false, (int)false, &value); - orte_debug_daemons_flag = OPAL_INT_TO_BOOL(value); - - mca_base_param_reg_int_name("orte", "debug_daemons_file", - "Whether want stdout/stderr of daemons to go to a file or not", - false, false, (int)false, &value); - orte_debug_daemons_file_flag = OPAL_INT_TO_BOOL(value); - /* If --debug-daemons-file was specified, that also implies - --debug-daemons */ - if (orte_debug_daemons_file_flag) { - orte_debug_daemons_flag = true; - } - /* open up the verbose output for ORTE debugging */ if (orte_debug_flag || 0 < orte_debug_verbosity || (orte_debug_daemons_flag && (orte_process_info.daemon || orte_process_info.hnp))) { @@ -129,72 +101,7 @@ int orte_register_params(void) orte_output_set_verbosity(orte_debug_output, 1); } } - - mca_base_param_reg_int_name("orte", "do_not_launch", - "Perform all necessary operations to prepare to launch the application, but do not actually launch it", - false, false, (int)false, &value); - orte_do_not_launch = OPAL_INT_TO_BOOL(value); - - mca_base_param_reg_int_name("orted", "spin", - "Have any orteds spin until we can connect a debugger to them", - false, false, (int)false, &value); - orted_spin_flag = OPAL_INT_TO_BOOL(value); - - /* check for timing requests */ - mca_base_param_reg_int_name("orte", "timing", - "Request that critical timing loops be measured", - false, false, (int)false, &value); - orte_timing = OPAL_INT_TO_BOOL(value); - - /* User-level debugger info string */ - - mca_base_param_reg_string_name("orte", "base_user_debugger", - "Sequence of user-level debuggers to search for in orterun", - false, false, "totalview @mpirun@ -a @mpirun_args@ : ddt -n @np@ -start @executable@ @executable_argv@ @single_app@ : fxp @mpirun@ -a @mpirun_args@", NULL); - - - mca_base_param_reg_int_name("orte", "abort_timeout", - "Max time to wait [in secs] before aborting an ORTE operation (default: 1sec)", - false, false, 1, &value); - orte_max_timeout = 1000000.0 * value; /* convert to usec */ - - mca_base_param_reg_int_name("orte", "timeout_step", - "Time to wait [in usecs/proc] before aborting an ORTE operation (default: 100 usec/proc)", - false, false, 100, &orte_timeout_usec_per_proc); - - /* default hostfile */ - mca_base_param_reg_string_name("orte", "default_hostfile", - "Name of the default hostfile (relative or absolute path)", - false, false, NULL, &orte_default_hostfile); - - - /* whether or not to keep FQDN hostnames */ - mca_base_param_reg_int_name("orte", "keep_fqdn_hostnames", - "Whether or not to keep FQDN hostnames [default: no]", - false, false, (int)false, &value); - orte_keep_fqdn_hostnames = OPAL_INT_TO_BOOL(value); - - /* whether or not static ports exist */ - mca_base_param_reg_int_name("orte", "static_ports", - "Whether or not static ports are in use [default: no]", - false, false, (int)false, &value); - orte_static_ports = OPAL_INT_TO_BOOL(value); - - /* whether or not contiguous nodenames are in use */ - mca_base_param_reg_int_name("orte", "contiguous_nodes", - "Number of nodes after which contiguous nodenames will be used [default: INT_MAX]", - false, false, INT32_MAX, &orte_contiguous_nodes); - - /* All done */ - params_set = true; - return ORTE_SUCCESS; -} - -int orte_dt_init(void) -{ - int rc; - opal_data_type_t tmp; - + /** register the base system types with the DSS */ tmp = ORTE_STD_CNTR; if (ORTE_SUCCESS != (rc = opal_dss.register_type(orte_dt_pack_std_cntr, diff --git a/orte/runtime/orte_globals.h b/orte/runtime/orte_globals.h index 59273023fa..3e8e1e112d 100644 --- a/orte/runtime/orte_globals.h +++ b/orte/runtime/orte_globals.h @@ -319,6 +319,10 @@ ORTE_DECLSPEC extern bool orte_static_ports; ORTE_DECLSPEC extern int32_t orte_contiguous_nodes; ORTE_DECLSPEC extern int orte_debug_output; ORTE_DECLSPEC extern bool orte_keep_fqdn_hostnames; +ORTE_DECLSPEC extern bool orte_help_want_aggregate; +ORTE_DECLSPEC extern bool orte_help_show_recursions; +ORTE_DECLSPEC extern bool orte_params_set; +ORTE_DECLSPEC extern int orte_debug_verbosity; ORTE_DECLSPEC extern char **orte_launch_environ; ORTE_DECLSPEC extern opal_pointer_array_t orte_daemonmap; diff --git a/orte/runtime/orte_init.c b/orte/runtime/orte_init.c index a73f9e0e8b..a020260f31 100644 --- a/orte/runtime/orte_init.c +++ b/orte/runtime/orte_init.c @@ -69,6 +69,15 @@ int orte_init(char flags) return ret; } + /* ensure we know the tool setting for when we finalize */ + if ((flags & ORTE_TOOL) || (flags & ORTE_TOOL_WITH_NAME)) { + orte_process_info.tool = true; + } + + if (orte_process_info.hnp) { + orte_process_info.daemon = false; + } + /* setup the orte_output system */ if (ORTE_SUCCESS != (ret = orte_output_init())) { ORTE_ERROR_LOG(ret); @@ -91,29 +100,23 @@ int orte_init(char flags) goto error; } - /* Ensure the process info structure is instantiated and initialized */ - if (ORTE_SUCCESS != (ret = orte_proc_info())) { - error = "orte_proc_info"; - goto error; - } - - /* ensure we know the tool setting for when we finalize */ - if ((flags & ORTE_TOOL) || (flags & ORTE_TOOL_WITH_NAME)) { - orte_process_info.tool = true; - } - /* Initialize the ORTE data type support */ if (ORTE_SUCCESS != (ret = orte_dt_init())) { error = "orte_dt_init"; goto error; } + /* Ensure the rest of the process info structure is initialized */ + if (ORTE_SUCCESS != (ret = orte_proc_info())) { + error = "orte_proc_info"; + goto error; + } + /* if I'm the HNP, make sure that the daemon flag is NOT set so that * components unique to non-HNP orteds can be selected and init * my basic storage elements */ if (orte_process_info.hnp) { - orte_process_info.daemon = false; if (ORTE_SUCCESS != (ret = orte_hnp_globals_init())) { error = "orte_hnp_globals_init"; goto error; diff --git a/orte/runtime/orte_mca_params.c b/orte/runtime/orte_mca_params.c new file mode 100644 index 0000000000..f8d81d8e71 --- /dev/null +++ b/orte/runtime/orte_mca_params.c @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2008 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Cisco, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/constants.h" +#include "orte/types.h" + +#ifdef HAVE_SYS_TIME_H +#include +#endif + +#include "opal/mca/base/mca_base_param.h" + +#include "orte/util/proc_info.h" +#include "orte/util/output.h" + +#include "orte/runtime/runtime.h" +#include "orte/runtime/orte_globals.h" + +int orte_register_params(void) +{ + int value; + + if (orte_params_set) { + return ORTE_SUCCESS; + } + + mca_base_param_reg_int_name("orte", "debug", + "Top-level ORTE debug switch (default verbosity: 1)", + false, false, (int)false, &value); + orte_debug_flag = OPAL_INT_TO_BOOL(value); + + mca_base_param_reg_int_name("orte", "debug_verbose", + "Verbosity level for ORTE debug messages (default: 1)", + false, false, -1, &orte_debug_verbosity); + + mca_base_param_reg_int_name("orte", "debug_daemons", + "Whether to debug the ORTE daemons or not", + false, false, (int)false, &value); + orte_debug_daemons_flag = OPAL_INT_TO_BOOL(value); + + mca_base_param_reg_int_name("orte", "debug_daemons_file", + "Whether want stdout/stderr of daemons to go to a file or not", + false, false, (int)false, &value); + orte_debug_daemons_file_flag = OPAL_INT_TO_BOOL(value); + /* If --debug-daemons-file was specified, that also implies + --debug-daemons */ + if (orte_debug_daemons_file_flag) { + orte_debug_daemons_flag = true; + } + + mca_base_param_reg_int_name("orte", "do_not_launch", + "Perform all necessary operations to prepare to launch the application, but do not actually launch it", + false, false, (int)false, &value); + orte_do_not_launch = OPAL_INT_TO_BOOL(value); + + mca_base_param_reg_int_name("orte", "daemon_spin", + "Have any orteds spin until we can connect a debugger to them", + false, false, (int)false, &value); + orted_spin_flag = OPAL_INT_TO_BOOL(value); + + /* check for timing requests */ + mca_base_param_reg_int_name("orte", "timing", + "Request that critical timing loops be measured", + false, false, (int)false, &value); + orte_timing = OPAL_INT_TO_BOOL(value); + + /* User-level debugger info string */ + + mca_base_param_reg_string_name("orte", "base_user_debugger", + "Sequence of user-level debuggers to search for in orterun", + false, false, "totalview @mpirun@ -a @mpirun_args@ : ddt -n @np@ -start @executable@ @executable_argv@ @single_app@ : fxp @mpirun@ -a @mpirun_args@", NULL); + + + mca_base_param_reg_int_name("orte", "abort_timeout", + "Max time to wait [in secs] before aborting an ORTE operation (default: 1sec)", + false, false, 1, &value); + orte_max_timeout = 1000000.0 * value; /* convert to usec */ + + mca_base_param_reg_int_name("orte", "timeout_step", + "Time to wait [in usecs/proc] before aborting an ORTE operation (default: 100 usec/proc)", + false, false, 100, &orte_timeout_usec_per_proc); + + /* default hostfile */ + mca_base_param_reg_string_name("orte", "default_hostfile", + "Name of the default hostfile (relative or absolute path)", + false, false, NULL, &orte_default_hostfile); + + + /* whether or not to keep FQDN hostnames */ + mca_base_param_reg_int_name("orte", "keep_fqdn_hostnames", + "Whether or not to keep FQDN hostnames [default: no]", + false, false, (int)false, &value); + orte_keep_fqdn_hostnames = OPAL_INT_TO_BOOL(value); + + /* whether or not contiguous nodenames are in use */ + mca_base_param_reg_int_name("orte", "contiguous_nodes", + "Number of nodes after which contiguous nodename encoding will automatically be used [default: INT_MAX]", + false, false, INT32_MAX, &orte_contiguous_nodes); + + mca_base_param_reg_int_name("orte", "base_help_aggregate", + "If orte_base_help_aggregate is true, duplicate help messages will be aggregated rather than displayed individually. This can be helpful for parallel jobs that experience multiple identical failures; rather than print out the same help/failure message N times, display it once with a count of how many processes sent the same message.", + false, false, + (int) orte_help_want_aggregate, &value); + orte_help_want_aggregate = OPAL_INT_TO_BOOL(value); + + mca_base_param_reg_int_name("orte", "base_show_output_recursions", + "If orte_base_show_output_recursion is true, recursive calls to orte_output will be reported to stderr", + false, false, + (int) false, &value); + orte_help_show_recursions = OPAL_INT_TO_BOOL(value); + + /* some params that are accessed elsewhere, but simply registered here so they will + * be visible to ompi_info + */ + mca_base_param_reg_string_name("orte", "tmpdir_base", + "Base of the session directory tree", + false, false, NULL, &(orte_process_info.tmpdir_base)); + + /* All done */ + orte_params_set = true; + return ORTE_SUCCESS; +} diff --git a/orte/util/output.c b/orte/util/output.c index 8b780cf511..ccdac22dd6 100644 --- a/orte/util/output.c +++ b/orte/util/output.c @@ -80,31 +80,6 @@ int orte_output_get_verbosity(int output_id) return opal_output_get_verbosity(output_id); } -/* Whether we aggregate show_help() messages or not */ -static bool want_aggregate = true; - -/* Whether to report recursions or not */ -static bool show_recursions; - -static void register_mca(void) -{ - int tmp; - - mca_base_param_reg_int_name("orte", "base_help_aggregate", - "If orte_base_help_aggregate is true, duplicate help messages will be aggregated rather than displayed individually. This can be helpful for parallel jobs that experience multiple identical failures; rather than print out the same help/failure message N times, display it once with a count of how many processes sent the same message.", - false, false, - (int) want_aggregate, &tmp); - want_aggregate = OPAL_INT_TO_BOOL(tmp); - - mca_base_param_reg_int_name("orte", "base_show_output_recursions", - "If orte_base_show_output_recursion is true, recursive calls to orte_output will be reported to stderr", - false, false, - (int) false, &tmp); - show_recursions = OPAL_INT_TO_BOOL(tmp); - -} - - /************************************************************************/ @@ -118,7 +93,7 @@ int orte_output_init(void) { stderr_stream = opal_output_open(NULL); regiester_mca(); - if (0 == ORTE_PROC_MY_NAME->vpid && want_aggregate) { + if (0 == ORTE_PROC_MY_NAME->vpid && orte_help_want_aggregate) { orte_output(stderr_stream, "WARNING: orte_base_help_aggregate was set to true, but this system does not support help message aggregation"); } return ORTE_SUCCESS; @@ -429,7 +404,7 @@ static void output_vverbose(int verbose_level, int output_id, HNP). */ if (am_inside) { - if (show_recursions) { + if (orte_help_want_aggregate) { opal_output(0, "%s orte_output recursion detected!", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); } opal_output(output_id, filtered); @@ -474,9 +449,12 @@ static void output_vverbose(int verbose_level, int output_id, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), filtered, output_id)); - /* If RML is not yet setup, then just output this locally. - What else can we do? */ - if (NULL == orte_rml.send_buffer) { + /* If RML is not yet setup, or we haven't yet defined the HNP, + * then just output this locally. + * What else can we do? + */ + if (NULL == orte_rml.send_buffer || + ORTE_PROC_MY_HNP->vpid == ORTE_VPID_INVALID) { opal_output(0, filtered); } else { /* setup a buffer to send to the HNP */ @@ -541,7 +519,7 @@ static int show_help(const char *filename, const char *topic, /* If we're aggregating, check for duplicates. Otherwise, don't track duplicates at all and always display the message. */ - if (orte_output_ready && want_aggregate) { + if (orte_output_ready && orte_help_want_aggregate) { rc = get_tli(filename, topic, &tli); } else { rc = ORTE_ERR_NOT_FOUND; @@ -598,7 +576,7 @@ static int show_help(const char *filename, const char *topic, } /* If we're aggregating, add this process name to the list */ - if (orte_output_ready && want_aggregate) { + if (orte_output_ready && orte_help_want_aggregate) { pnli = OBJ_NEW(process_name_list_item_t); if (NULL == pnli) { rc = ORTE_ERR_OUT_OF_RESOURCE; @@ -711,8 +689,6 @@ int orte_output_init(void) { OPAL_OUTPUT_VERBOSE((5, orte_debug_output, "orte_output init")); - register_mca(); - /* define the default stream that has everything off */ OBJ_CONSTRUCT(&orte_output_default, opal_output_stream_t); @@ -1031,11 +1007,17 @@ int orte_show_help(const char *filename, const char *topic, return ORTE_SUCCESS; } - if (orte_process_info.hnp) { + /* if we are the HNP, or the RML has not yet been setup, + * or we don't yet know our HNP, then all we can do + * is process this locally + */ + if (orte_process_info.hnp || + NULL == orte_rml.send_buffer || + ORTE_PROC_MY_HNP->vpid == ORTE_VPID_INVALID) { rc = show_help(filename, topic, output, ORTE_PROC_MY_NAME); } - /* if we are not the HNP, then we must relay the output message to + /* otherwise, we relay the output message to * the HNP for processing */ else { diff --git a/orte/util/proc_info.c b/orte/util/proc_info.c index 212a398a37..f7d776c279 100644 --- a/orte/util/proc_info.c +++ b/orte/util/proc_info.c @@ -42,7 +42,7 @@ ORTE_DECLSPEC orte_proc_info_t orte_process_info = { /* .my_name = */ {ORTE_JOBID_INVALID, ORTE_VPID_INVALID}, /* .my_daemon = */ {ORTE_JOBID_INVALID, ORTE_VPID_INVALID}, /* .my_daemon_uri = */ NULL, - /* .my_hnp = */ {0, 0}, + /* .my_hnp = */ {ORTE_JOBID_INVALID, ORTE_VPID_INVALID}, /* .my_hnp_uri = */ NULL, /* .hnp_pid = */ 0, /* ,app_num = */ -1, @@ -134,10 +134,6 @@ int orte_proc_info(void) true, false, -1, &tmp); orte_process_info.universe_size = tmp; - mca_base_param_reg_string_name("orte", "tmpdir_base", - "Base of the session directory tree", - false, false, NULL, &(orte_process_info.tmpdir_base)); - /* get the process id */ orte_process_info.pid = getpid();