From 4be24521aa2b89c75109ed92ae270f6b016a1d2a Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Mon, 4 May 2009 11:07:40 +0000 Subject: [PATCH] Modify the orte_process_info structure to handle a broader range of process types by replacing the individual booleans with a 32-bit bitmap. Use a set of #define's to define the individual bits, and a set of matching macros to test for them. Update the orte code base to use the macros instead of the booleans. Minor mod to the ompi layer to use the new #define's - just one-line name replacements. This commit was SVN r21144. --- ompi/runtime/ompi_mpi_init.c | 5 +- ompi/tools/ompi-server/ompi-server.c | 3 +- ompi/tools/ompi_info/components.cc | 5 +- .../errmgr/default/errmgr_default_component.c | 2 +- orte/mca/ess/alps/ess_alps_module.c | 12 +- orte/mca/ess/base/ess_base_std_app.c | 2 +- orte/mca/ess/base/ess_base_std_orted.c | 2 +- orte/mca/ess/base/ess_base_std_tool.c | 2 +- orte/mca/ess/bproc/ess_bproc_module.c | 12 +- orte/mca/ess/cnos/ess_cnos_module.c | 4 +- orte/mca/ess/env/ess_env_module.c | 18 +- orte/mca/ess/ess.h | 4 +- orte/mca/ess/hnp/ess_hnp_component.c | 2 +- orte/mca/ess/hnp/ess_hnp_module.c | 6 +- orte/mca/ess/lsf/ess_lsf_module.c | 13 +- .../portals_utcp/ess_portals_utcp_module.c | 4 +- .../ess/singleton/ess_singleton_component.c | 6 +- orte/mca/ess/singleton/ess_singleton_module.c | 6 +- orte/mca/ess/slave/ess_slave_module.c | 10 +- orte/mca/ess/slurm/ess_slurm_module.c | 12 +- orte/mca/ess/slurmd/ess_slurmd_component.c | 2 +- orte/mca/ess/slurmd/ess_slurmd_module.c | 4 +- orte/mca/ess/tool/ess_tool_component.c | 2 +- orte/mca/ess/tool/ess_tool_module.c | 4 +- orte/mca/filem/base/filem_base_fns.c | 3 +- orte/mca/filem/base/filem_base_receive.c | 4 +- orte/mca/grpcomm/bad/grpcomm_bad_module.c | 8 +- orte/mca/grpcomm/basic/grpcomm_basic_module.c | 16 +- orte/mca/grpcomm/hier/grpcomm_hier_module.c | 2 +- orte/mca/iof/base/iof_base_close.c | 2 +- orte/mca/iof/base/iof_base_open.c | 2 +- orte/mca/iof/hnp/iof_hnp_component.c | 2 +- orte/mca/iof/orted/iof_orted_component.c | 2 +- orte/mca/iof/tool/iof_tool_component.c | 2 +- orte/mca/odls/base/odls_base_default_fns.c | 12 +- orte/mca/odls/base/odls_base_state.c | 5 +- orte/mca/oob/tcp/oob_tcp.c | 12 +- orte/mca/oob/tcp/oob_tcp_msg.c | 2 +- orte/mca/plm/base/plm_base_close.c | 2 +- orte/mca/plm/base/plm_base_launch_support.c | 7 +- orte/mca/plm/base/plm_base_orted_cmds.c | 3 +- orte/mca/plm/base/plm_base_receive.c | 2 +- orte/mca/plm/base/plm_base_select.c | 2 +- orte/mca/plm/ccp/plm_ccp_component.c | 2 +- orte/mca/plm/rsh/plm_rsh_module.c | 5 +- orte/mca/ras/alps/ras_alps_component.c | 2 +- orte/mca/ras/ccp/ras_ccp_component.c | 2 +- orte/mca/rml/base/rml_base_contact.c | 3 +- orte/mca/routed/base/routed_base_receive.c | 4 +- orte/mca/routed/binomial/routed_binomial.c | 38 ++--- orte/mca/routed/direct/routed_direct.c | 161 +++++++++++++++++- orte/mca/routed/linear/routed_linear.c | 40 ++--- orte/mca/routed/radix/routed_radix.c | 38 ++--- orte/mca/snapc/full/snapc_full_global.c | 9 +- orte/orted/orted_comm.c | 20 +-- orte/orted/orted_main.c | 29 ++-- orte/runtime/orte_globals.c | 5 +- orte/runtime/orte_init.c | 14 +- orte/runtime/orte_mca_params.c | 2 +- orte/runtime/runtime.h | 8 +- orte/test/system/binom.c | 3 +- orte/test/system/orte_loop_child.c | 3 +- orte/test/system/orte_tool.c | 3 +- orte/test/system/radix.c | 3 +- orte/test/system/sigusr_trap.c | 3 +- orte/tools/orte-checkpoint/orte-checkpoint.c | 3 +- orte/tools/orte-clean/orte-clean.c | 4 +- orte/tools/orte-iof/orte-iof.c | 3 +- orte/tools/orte-ps/orte-ps.c | 3 +- orte/tools/orte-restart/orte-restart.c | 3 +- orte/tools/orte-top/orte-top.c | 3 +- orte/tools/orterun/orterun.c | 8 +- orte/util/name_fns.h | 2 +- orte/util/nidmap.c | 4 +- orte/util/proc_info.c | 10 +- orte/util/proc_info.h | 25 ++- orte/util/show_help.c | 5 +- 77 files changed, 437 insertions(+), 265 deletions(-) diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index d67c702bb4..7b08a22c4e 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -336,9 +336,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) gettimeofday(&ompistart, NULL); } - /* Setup ORTE - note that we are not a tool */ - orte_process_info.mpi_proc = true; - if (ORTE_SUCCESS != (ret = orte_init(ORTE_NON_TOOL))) { + /* Setup ORTE - note that we are an MPI process */ + if (ORTE_SUCCESS != (ret = orte_init(ORTE_PROC_MPI))) { error = "ompi_mpi_init: orte_init failed"; goto error; } diff --git a/ompi/tools/ompi-server/ompi-server.c b/ompi/tools/ompi-server/ompi-server.c index 1a8c5e1bc8..8d04961547 100644 --- a/ompi/tools/ompi-server/ompi-server.c +++ b/ompi/tools/ompi-server/ompi-server.c @@ -54,6 +54,7 @@ #include "orte/util/name_fns.h" +#include "orte/util/proc_info.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/rml/rml.h" @@ -187,7 +188,7 @@ int main(int argc, char *argv[]) * so that we only open up the communications infrastructure. No * session directories will be created. */ - if (ORTE_SUCCESS != (ret = orte_init(ORTE_TOOL))) { + if (ORTE_SUCCESS != (ret = orte_init(ORTE_PROC_TOOL))) { fprintf(stderr, "ompi-server: failed to initialize -- aborting\n"); exit(1); } diff --git a/ompi/tools/ompi_info/components.cc b/ompi/tools/ompi_info/components.cc index 9ca57d3aa8..21b711748c 100644 --- a/ompi/tools/ompi_info/components.cc +++ b/ompi/tools/ompi_info/components.cc @@ -89,6 +89,7 @@ #include "orte/mca/notifier/notifier.h" #include "orte/mca/notifier/base/base.h" #include "orte/util/show_help.h" +#include "orte/util/proc_info.h" #if !ORTE_DISABLE_FULL_SUPPORT #include "orte/mca/iof/iof.h" #include "orte/mca/iof/base/base.h" @@ -269,9 +270,9 @@ void ompi_info::open_components() component_map["installdirs"] = &opal_installdirs_components; // ORTE frameworks - // Set orte_process_info.hnp to true to force all frameworks to + // Set orte_process_info.proc_type to HNP to force all frameworks to // open components - orte_process_info.hnp = true; + orte_process_info.proc_type = ORTE_PROC_HNP; if (ORTE_SUCCESS != orte_errmgr_base_open()) { goto error; diff --git a/orte/mca/errmgr/default/errmgr_default_component.c b/orte/mca/errmgr/default/errmgr_default_component.c index 1e0bc44ca3..168ee70707 100644 --- a/orte/mca/errmgr/default/errmgr_default_component.c +++ b/orte/mca/errmgr/default/errmgr_default_component.c @@ -92,7 +92,7 @@ int orte_errmgr_default_component_close(void) int orte_errmgr_default_component_query(mca_base_module_t **module, int *priority) { /* If we are not an HNP, then don't pick us! */ - if (!orte_process_info.hnp) { + if (!ORTE_PROC_IS_HNP) { /* don't take me! */ *module = NULL; return ORTE_ERROR; diff --git a/orte/mca/ess/alps/ess_alps_module.c b/orte/mca/ess/alps/ess_alps_module.c index 4efc5a2d1f..4202cbfefe 100644 --- a/orte/mca/ess/alps/ess_alps_module.c +++ b/orte/mca/ess/alps/ess_alps_module.c @@ -37,7 +37,7 @@ static int alps_set_name(void); -static int rte_init(char flags); +static int rte_init(void); static int rte_finalize(void); static uint8_t proc_get_locality(orte_process_name_t *proc); static orte_vpid_t proc_get_daemon(orte_process_name_t *proc); @@ -66,7 +66,7 @@ orte_ess_base_module_t orte_ess_alps_module = { }; -static int rte_init(char flags) +static int rte_init(void) { int ret; char *error = NULL; @@ -83,13 +83,13 @@ static int rte_init(char flags) /* if I am a daemon, complete my setup using the * default procedure */ - if (orte_process_info.daemon) { + if (ORTE_PROC_IS_DAEMON) { if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_orted_setup"; goto error; } - } else if (orte_process_info.tool) { + } else if (ORTE_PROC_IS_TOOL) { /* otherwise, if I am a tool proc, use that procedure */ if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) { ORTE_ERROR_LOG(ret); @@ -131,11 +131,11 @@ static int rte_finalize(void) int ret; /* if I am a daemon, finalize using the default procedure */ - if (orte_process_info.daemon) { + if (ORTE_PROC_IS_DAEMON) { if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) { ORTE_ERROR_LOG(ret); } - } else if (orte_process_info.tool) { + } else if (ORTE_PROC_IS_TOOL) { /* otherwise, if I am a tool proc, use that procedure */ if (ORTE_SUCCESS != (ret = orte_ess_base_tool_finalize())) { ORTE_ERROR_LOG(ret); diff --git a/orte/mca/ess/base/ess_base_std_app.c b/orte/mca/ess/base/ess_base_std_app.c index 7187da988d..56f5cbf841 100644 --- a/orte/mca/ess/base/ess_base_std_app.c +++ b/orte/mca/ess/base/ess_base_std_app.c @@ -164,7 +164,7 @@ int orte_ess_base_app_setup(void) error = "orte_snapc_base_open"; goto error; } - if (ORTE_SUCCESS != (ret = orte_snapc_base_select(orte_process_info.hnp, !orte_process_info.daemon))) { + if (ORTE_SUCCESS != (ret = orte_snapc_base_select(ORTE_PROC_IS_HNP, !ORTE_PROC_IS_DAEMON))) { ORTE_ERROR_LOG(ret); error = "orte_snapc_base_select"; goto error; diff --git a/orte/mca/ess/base/ess_base_std_orted.c b/orte/mca/ess/base/ess_base_std_orted.c index 5eafd2dd6b..927e2994de 100644 --- a/orte/mca/ess/base/ess_base_std_orted.c +++ b/orte/mca/ess/base/ess_base_std_orted.c @@ -243,7 +243,7 @@ int orte_ess_base_orted_setup(void) goto error; } - if (ORTE_SUCCESS != (ret = orte_snapc_base_select(orte_process_info.hnp, !orte_process_info.daemon))) { + if (ORTE_SUCCESS != (ret = orte_snapc_base_select(ORTE_PROC_IS_HNP, !ORTE_PROC_IS_DAEMON))) { ORTE_ERROR_LOG(ret); error = "orte_snapc_base_select"; goto error; diff --git a/orte/mca/ess/base/ess_base_std_tool.c b/orte/mca/ess/base/ess_base_std_tool.c index f3e45d369c..1f62f39c46 100644 --- a/orte/mca/ess/base/ess_base_std_tool.c +++ b/orte/mca/ess/base/ess_base_std_tool.c @@ -136,7 +136,7 @@ int orte_ess_base_tool_setup(void) error = "orte_snapc_base_open"; goto error; } - if (ORTE_SUCCESS != (ret = orte_snapc_base_select(orte_process_info.hnp, !orte_process_info.daemon))) { + if (ORTE_SUCCESS != (ret = orte_snapc_base_select(ORTE_PROC_IS_HNP, !ORTE_PROC_IS_DAEMON))) { ORTE_ERROR_LOG(ret); error = "orte_snapc_base_select"; goto error; diff --git a/orte/mca/ess/bproc/ess_bproc_module.c b/orte/mca/ess/bproc/ess_bproc_module.c index 07aa2b9edf..4e8d72dfdd 100644 --- a/orte/mca/ess/bproc/ess_bproc_module.c +++ b/orte/mca/ess/bproc/ess_bproc_module.c @@ -36,7 +36,7 @@ static int bproc_set_name(void); -static int rte_init(char flags); +static int rte_init(void); static int rte_finalize(void); static bool proc_is_local(orte_process_name_t *proc); static char* proc_get_hostname(orte_process_name_t *proc); @@ -62,7 +62,7 @@ static opal_pointer_array_t nidmap; static orte_pmap_t *pmap = NULL; static orte_vpid_t nprocs; -static int rte_init(char flags) +static int rte_init(void) { int ret; char *error = NULL; @@ -79,14 +79,14 @@ static int rte_init(char flags) /* if I am a daemon, complete my setup using the * default procedure */ - if (orte_process_info.daemon) { + if (ORTE_PROC_IS_DAEMON) { if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_orted_setup"; goto error; } - } else if (orte_process_info.tool) { + } else if (ORTE_PROC_IS_TOOL) { /* otherwise, if I am a tool proc, use that procedure */ if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) { ORTE_ERROR_LOG(ret); @@ -134,11 +134,11 @@ static int rte_finalize(void) int32_t i; /* if I am a daemon, finalize using the default procedure */ - if (orte_process_info.daemon) { + if (ORTE_PROC_IS_DAEMON) { if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) { ORTE_ERROR_LOG(ret); } - } else if (orte_process_info.tool) { + } else if (ORTE_PROC_IS_TOOL) { /* otherwise, if I am a tool proc, use that procedure */ if (ORTE_SUCCESS != (ret = orte_ess_base_tool_finalize())) { ORTE_ERROR_LOG(ret); diff --git a/orte/mca/ess/cnos/ess_cnos_module.c b/orte/mca/ess/cnos/ess_cnos_module.c index d9d57a1b79..e86ed1f169 100644 --- a/orte/mca/ess/cnos/ess_cnos_module.c +++ b/orte/mca/ess/cnos/ess_cnos_module.c @@ -36,7 +36,7 @@ #include "orte/mca/ess/base/base.h" #include "orte/mca/ess/cnos/ess_cnos.h" -static int rte_init(char flags); +static int rte_init(void); static int rte_finalize(void); static void rte_abort(int status, bool report) __opal_attribute_noreturn__; static uint8_t proc_get_locality(orte_process_name_t *proc); @@ -64,7 +64,7 @@ orte_ess_base_module_t orte_ess_cnos_module = { static cnos_nidpid_map_t *map; -static int rte_init(char flags) +static int rte_init(void) { int rc; int nprocs; diff --git a/orte/mca/ess/env/ess_env_module.c b/orte/mca/ess/env/ess_env_module.c index 482198772d..c2a0fef277 100644 --- a/orte/mca/ess/env/ess_env_module.c +++ b/orte/mca/ess/env/ess_env_module.c @@ -73,7 +73,7 @@ static int env_set_name(void); -static int rte_init(char flags); +static int rte_init(void); static int rte_finalize(void); static uint8_t proc_get_locality(orte_process_name_t *proc); static orte_vpid_t proc_get_daemon(orte_process_name_t *proc); @@ -110,7 +110,7 @@ orte_ess_base_module_t orte_ess_env_module = { #endif }; -static int rte_init(char flags) +static int rte_init(void) { int ret; char *error = NULL; @@ -127,14 +127,14 @@ static int rte_init(char flags) /* if I am a daemon, complete my setup using the * default procedure */ - if (orte_process_info.daemon) { + if (ORTE_PROC_IS_DAEMON) { if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_orted_setup"; goto error; } - } else if (orte_process_info.tool) { + } else if (ORTE_PROC_IS_TOOL) { /* otherwise, if I am a tool proc, use that procedure */ if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) { ORTE_ERROR_LOG(ret); @@ -177,11 +177,11 @@ static int rte_finalize(void) int ret; /* if I am a daemon, finalize using the default procedure */ - if (orte_process_info.daemon) { + if (ORTE_PROC_IS_DAEMON) { if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) { ORTE_ERROR_LOG(ret); } - } else if (orte_process_info.tool) { + } else if (ORTE_PROC_IS_TOOL) { /* otherwise, if I am a tool proc, use that procedure */ if (ORTE_SUCCESS != (ret = orte_ess_base_tool_finalize())) { ORTE_ERROR_LOG(ret); @@ -413,6 +413,7 @@ static int env_set_name(void) static int rte_ft_event(int state) { int ret, exit_status = ORTE_SUCCESS; + orte_proc_type_t svtype; /******** Checkpoint Prep ********/ if(OPAL_CRS_CHECKPOINT == state) { @@ -496,12 +497,13 @@ static int rte_ft_event(int state) * Restart the routed framework * JJH: Lie to the finalize function so it does not try to contact the daemon. */ - orte_process_info.tool = true; + svtype = orte_process_info.proc_type; + orte_process_info.proc_type = ORTE_PROC_TOOL; if (ORTE_SUCCESS != (ret = orte_routed.finalize()) ) { exit_status = ret; goto cleanup; } - orte_process_info.tool = false; + orte_process_info.proc_type = svtype; if (ORTE_SUCCESS != (ret = orte_routed.initialize()) ) { exit_status = ret; goto cleanup; diff --git a/orte/mca/ess/ess.h b/orte/mca/ess/ess.h index f76d2b277e..e8fc43c1d0 100644 --- a/orte/mca/ess/ess.h +++ b/orte/mca/ess/ess.h @@ -29,6 +29,8 @@ #include "opal/mca/mca.h" +#include "orte/util/proc_info.h" + BEGIN_C_DECLS /* @@ -38,7 +40,7 @@ BEGIN_C_DECLS /* * Initialize the RTE for this environment */ -typedef int (*orte_ess_base_module_init_fn_t)(char flags); +typedef int (*orte_ess_base_module_init_fn_t)(void); /* * Finalize the RTE for this environment diff --git a/orte/mca/ess/hnp/ess_hnp_component.c b/orte/mca/ess/hnp/ess_hnp_component.c index 06faaf7b0f..69bc673493 100644 --- a/orte/mca/ess/hnp/ess_hnp_component.c +++ b/orte/mca/ess/hnp/ess_hnp_component.c @@ -73,7 +73,7 @@ int orte_ess_hnp_component_query(mca_base_module_t **module, int *priority) /* we are the hnp module - we need to be selected * IFF we are designated as the hnp */ - if (orte_process_info.hnp) { + if (ORTE_PROC_IS_HNP) { *priority = 100; *module = (mca_base_module_t *)&orte_ess_hnp_module; return ORTE_SUCCESS; diff --git a/orte/mca/ess/hnp/ess_hnp_module.c b/orte/mca/ess/hnp/ess_hnp_module.c index 49169c7b4e..b4b10f00ef 100644 --- a/orte/mca/ess/hnp/ess_hnp_module.c +++ b/orte/mca/ess/hnp/ess_hnp_module.c @@ -73,7 +73,7 @@ #include "orte/mca/ess/base/base.h" #include "orte/mca/ess/hnp/ess_hnp.h" -static int rte_init(char flags); +static int rte_init(void); static int rte_finalize(void); static void rte_abort(int status, bool report) __opal_attribute_noreturn__; static uint8_t proc_get_locality(orte_process_name_t *proc); @@ -104,7 +104,7 @@ orte_ess_base_module_t orte_ess_hnp_module = { }; -static int rte_init(char flags) +static int rte_init(void) { int ret; char *error = NULL; @@ -430,7 +430,7 @@ static int rte_init(char flags) goto error; } - if (ORTE_SUCCESS != (ret = orte_snapc_base_select(orte_process_info.hnp, !orte_process_info.daemon))) { + if (ORTE_SUCCESS != (ret = orte_snapc_base_select(ORTE_PROC_IS_HNP, !ORTE_PROC_IS_DAEMON))) { ORTE_ERROR_LOG(ret); error = "orte_snapc_base_select"; goto error; diff --git a/orte/mca/ess/lsf/ess_lsf_module.c b/orte/mca/ess/lsf/ess_lsf_module.c index dd7f98d359..bbc4ae5c2f 100644 --- a/orte/mca/ess/lsf/ess_lsf_module.c +++ b/orte/mca/ess/lsf/ess_lsf_module.c @@ -36,6 +36,7 @@ #include "orte/util/show_help.h" #include "orte/util/name_fns.h" +#include "orte/util/proc_info.h" #include "orte/runtime/orte_globals.h" #include "opal/mca/base/mca_base_param.h" #include "orte/mca/errmgr/errmgr.h" @@ -47,7 +48,7 @@ static int lsf_set_name(void); -static int rte_init(char flags); +static int rte_init(void); static int rte_finalize(void); static uint8_t proc_get_locality(orte_process_name_t *proc); static orte_vpid_t proc_get_daemon(orte_process_name_t *proc); @@ -76,7 +77,7 @@ orte_ess_base_module_t orte_ess_lsf_module = { }; -static int rte_init(char flags) +static int rte_init(void) { int ret; char *error = NULL; @@ -94,13 +95,13 @@ static int rte_init(char flags) /* if I am a daemon, complete my setup using the * default procedure */ - if (orte_process_info.daemon) { + if (ORTE_PROC_IS_DAEMON) { if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_orted_setup"; goto error; } - } else if (orte_process_info.tool) { + } else if (ORTE_PROC_IS_TOOL) { /* otherwise, if I am a tool proc, use that procedure */ if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) { ORTE_ERROR_LOG(ret); @@ -143,11 +144,11 @@ static int rte_finalize(void) int ret; /* if I am a daemon, finalize using the default procedure */ - if (orte_process_info.daemon) { + if (ORTE_PROC_IS_DAEMON) { if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) { ORTE_ERROR_LOG(ret); } - } else if (orte_process_info.tool) { + } else if (ORTE_PROC_IS_TOOL) { /* otherwise, if I am a tool proc, use that procedure */ if (ORTE_SUCCESS != (ret = orte_ess_base_tool_finalize())) { ORTE_ERROR_LOG(ret); diff --git a/orte/mca/ess/portals_utcp/ess_portals_utcp_module.c b/orte/mca/ess/portals_utcp/ess_portals_utcp_module.c index 7ad277d3fc..43c617c924 100644 --- a/orte/mca/ess/portals_utcp/ess_portals_utcp_module.c +++ b/orte/mca/ess/portals_utcp/ess_portals_utcp_module.c @@ -35,7 +35,7 @@ #include "orte/mca/ess/base/base.h" #include "orte/mca/ess/portals_utcp/ess_portals_utcp.h" -static int rte_init(char flags); +static int rte_init(void); static int rte_finalize(void); static void rte_abort(int status, bool report) __opal_attribute_noreturn__; static uint8_t proc_get_locality(orte_process_name_t *proc); @@ -63,7 +63,7 @@ orte_ess_base_module_t orte_ess_portals_utcp_module = { static char **nidmap=NULL; -static int rte_init(char flags) +static int rte_init(void) { int rc; orte_vpid_t vpid; diff --git a/orte/mca/ess/singleton/ess_singleton_component.c b/orte/mca/ess/singleton/ess_singleton_component.c index a2b514b5a0..35d026a3e7 100644 --- a/orte/mca/ess/singleton/ess_singleton_component.c +++ b/orte/mca/ess/singleton/ess_singleton_component.c @@ -73,9 +73,9 @@ int orte_ess_singleton_component_query(mca_base_module_t **module, int *priority /* if we are an HNP, daemon, or tool, then we * are definitely not a singleton! */ - if (orte_process_info.hnp || - orte_process_info.daemon || - orte_process_info.tool) { + if (ORTE_PROC_IS_HNP || + ORTE_PROC_IS_DAEMON || + ORTE_PROC_IS_TOOL) { *module = NULL; return ORTE_ERROR; } diff --git a/orte/mca/ess/singleton/ess_singleton_module.c b/orte/mca/ess/singleton/ess_singleton_module.c index 3375f022ed..b407c58b90 100644 --- a/orte/mca/ess/singleton/ess_singleton_module.c +++ b/orte/mca/ess/singleton/ess_singleton_module.c @@ -65,7 +65,7 @@ static void set_handler_default(int sig) #endif /* !defined(__WINDOWS__) */ } -static int rte_init(char flags); +static int rte_init(void); static int rte_finalize(void); static uint8_t proc_get_locality(orte_process_name_t *proc); static orte_vpid_t proc_get_daemon(orte_process_name_t *proc); @@ -93,7 +93,7 @@ orte_ess_base_module_t orte_ess_singleton_module = { NULL /* ft_event */ }; -static int rte_init(char flags) +static int rte_init(void) { int rc; @@ -373,7 +373,7 @@ static int fork_hnp(void) orte_process_info.my_hnp_uri = strdup(orted_uri); /* indicate we are a singleton so orte_init knows what to do */ - orte_process_info.singleton = true; + orte_process_info.proc_type = ORTE_PROC_SINGLETON; /* all done - report success */ free(orted_uri); return ORTE_SUCCESS; diff --git a/orte/mca/ess/slave/ess_slave_module.c b/orte/mca/ess/slave/ess_slave_module.c index 95a0086764..146538b638 100644 --- a/orte/mca/ess/slave/ess_slave_module.c +++ b/orte/mca/ess/slave/ess_slave_module.c @@ -73,7 +73,7 @@ static int slave_set_name(void); -static int rte_init(char flags); +static int rte_init(void); static int rte_finalize(void); static uint8_t proc_get_locality(orte_process_name_t *proc); static orte_vpid_t proc_get_daemon(orte_process_name_t *proc); @@ -110,7 +110,7 @@ orte_ess_base_module_t orte_ess_slave_module = { #endif }; -static int rte_init(char flags) +static int rte_init(void) { int ret; char *error = NULL; @@ -309,6 +309,7 @@ static int slave_set_name(void) static int rte_ft_event(int state) { int ret, exit_status = ORTE_SUCCESS; + orte_proc_type_t svtype; /******** Checkpoint Prep ********/ if(OPAL_CRS_CHECKPOINT == state) { @@ -392,12 +393,13 @@ static int rte_ft_event(int state) * Restart the routed framework * JJH: Lie to the finalize function so it does not try to contact the daemon. */ - orte_process_info.tool = true; + svtype = orte_process_info.proc_type; + orte_process_info.proc_type = ORTE_PROC_TOOL; if (ORTE_SUCCESS != (ret = orte_routed.finalize()) ) { exit_status = ret; goto cleanup; } - orte_process_info.tool = false; + orte_process_info.proc_type = svtype; if (ORTE_SUCCESS != (ret = orte_routed.initialize()) ) { exit_status = ret; goto cleanup; diff --git a/orte/mca/ess/slurm/ess_slurm_module.c b/orte/mca/ess/slurm/ess_slurm_module.c index 5178bb9eb0..da9b9da641 100644 --- a/orte/mca/ess/slurm/ess_slurm_module.c +++ b/orte/mca/ess/slurm/ess_slurm_module.c @@ -70,7 +70,7 @@ static char *get_slurm_nodename(int nodeid); static int slurm_set_name(void); static int build_daemon_nidmap(void); -static int rte_init(char flags); +static int rte_init(void); static int rte_finalize(void); static uint8_t proc_get_locality(orte_process_name_t *proc); static orte_vpid_t proc_get_daemon(orte_process_name_t *proc); @@ -99,7 +99,7 @@ orte_ess_base_module_t orte_ess_slurm_module = { }; -static int rte_init(char flags) +static int rte_init(void) { int ret; char *error = NULL; @@ -116,7 +116,7 @@ static int rte_init(char flags) /* if I am a daemon, complete my setup using the * default procedure */ - if (orte_process_info.daemon) { + if (ORTE_PROC_IS_DAEMON) { if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_orted_setup"; @@ -148,7 +148,7 @@ static int rte_init(char flags) } return ORTE_SUCCESS; } - } else if (orte_process_info.tool) { + } else if (ORTE_PROC_IS_TOOL) { /* otherwise, if I am a tool proc, use that procedure */ if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) { ORTE_ERROR_LOG(ret); @@ -191,11 +191,11 @@ static int rte_finalize(void) int ret; /* if I am a daemon, finalize using the default procedure */ - if (orte_process_info.daemon) { + if (ORTE_PROC_IS_DAEMON) { if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) { ORTE_ERROR_LOG(ret); } - } else if (orte_process_info.tool) { + } else if (ORTE_PROC_IS_TOOL) { /* otherwise, if I am a tool proc, use that procedure */ if (ORTE_SUCCESS != (ret = orte_ess_base_tool_finalize())) { ORTE_ERROR_LOG(ret); diff --git a/orte/mca/ess/slurmd/ess_slurmd_component.c b/orte/mca/ess/slurmd/ess_slurmd_component.c index 7f5f26b45e..eaf4f5b5b7 100644 --- a/orte/mca/ess/slurmd/ess_slurmd_component.c +++ b/orte/mca/ess/slurmd/ess_slurmd_component.c @@ -74,7 +74,7 @@ int orte_ess_slurmd_component_query(mca_base_module_t **module, int *priority) * by mpirun but are in a slurm world */ - if (orte_process_info.mpi_proc && + if (ORTE_PROC_IS_MPI && NULL != getenv("SLURM_JOBID") && NULL != getenv("SLURM_STEPID") && NULL == orte_process_info.my_hnp_uri) { diff --git a/orte/mca/ess/slurmd/ess_slurmd_module.c b/orte/mca/ess/slurmd/ess_slurmd_module.c index a87b064288..93a8cb2c12 100644 --- a/orte/mca/ess/slurmd/ess_slurmd_module.c +++ b/orte/mca/ess/slurmd/ess_slurmd_module.c @@ -56,7 +56,7 @@ #include "orte/mca/ess/base/base.h" #include "orte/mca/ess/slurmd/ess_slurmd.h" -static int rte_init(char flags); +static int rte_init(void); static int rte_finalize(void); static uint8_t proc_get_locality(orte_process_name_t *proc); static orte_vpid_t proc_get_daemon(orte_process_name_t *proc); @@ -89,7 +89,7 @@ static bool app_init_complete; /**** MODULE FUNCTIONS ****/ -static int rte_init(char flags) +static int rte_init(void) { int ret; char *error = NULL; diff --git a/orte/mca/ess/tool/ess_tool_component.c b/orte/mca/ess/tool/ess_tool_component.c index eba95ced64..e747389d57 100644 --- a/orte/mca/ess/tool/ess_tool_component.c +++ b/orte/mca/ess/tool/ess_tool_component.c @@ -74,7 +74,7 @@ int orte_ess_tool_component_query(mca_base_module_t **module, int *priority) * precedence. This would happen, for example, * if the tool is a distributed set of processes */ - if (orte_process_info.tool) { + if (ORTE_PROC_IS_TOOL || ORTE_PROC_IS_TOOL_WNAME) { *priority = 10; *module = (mca_base_module_t *)&orte_ess_tool_module; return ORTE_SUCCESS; diff --git a/orte/mca/ess/tool/ess_tool_module.c b/orte/mca/ess/tool/ess_tool_module.c index c6bf55390f..377b3f1ff8 100644 --- a/orte/mca/ess/tool/ess_tool_module.c +++ b/orte/mca/ess/tool/ess_tool_module.c @@ -41,7 +41,7 @@ #include "orte/mca/ess/base/base.h" #include "orte/mca/ess/tool/ess_tool.h" -static int rte_init(char flags); +static int rte_init(void); static void rte_abort(int status, bool report) __opal_attribute_noreturn__; static orte_vpid_t proc_get_daemon(orte_process_name_t *proc); @@ -63,7 +63,7 @@ orte_ess_base_module_t orte_ess_tool_module = { }; -static int rte_init(char flags) +static int rte_init(void) { int ret; char *error = NULL; diff --git a/orte/mca/filem/base/filem_base_fns.c b/orte/mca/filem/base/filem_base_fns.c index 12567a4cb7..7ec57f8199 100644 --- a/orte/mca/filem/base/filem_base_fns.c +++ b/orte/mca/filem/base/filem_base_fns.c @@ -38,6 +38,7 @@ #include "orte/mca/rml/rml_types.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/runtime/orte_globals.h" +#include "orte/util/proc_info.h" #include "orte/mca/filem/filem.h" #include "orte/mca/filem/base/base.h" @@ -231,7 +232,7 @@ int orte_filem_base_get_proc_node_name(orte_process_name_t *proc, char **machine /* set default answer */ *machine_name = NULL; - if (orte_process_info.hnp) { + if (ORTE_PROC_IS_HNP) { /* if I am the HNP, then all the data structures are local to me - no * need to send messages around to get the info */ diff --git a/orte/mca/filem/base/filem_base_receive.c b/orte/mca/filem/base/filem_base_receive.c index 368cf415f0..f7df6449c3 100644 --- a/orte/mca/filem/base/filem_base_receive.c +++ b/orte/mca/filem/base/filem_base_receive.c @@ -71,7 +71,7 @@ int orte_filem_base_comm_start(void) int rc; /* Only active in HNP and daemons */ - if( !orte_process_info.hnp && !orte_process_info.daemon ) { + if( !ORTE_PROC_IS_HNP && !ORTE_PROC_IS_DAEMON ) { return ORTE_SUCCESS; } if ( recv_issued ) { @@ -101,7 +101,7 @@ int orte_filem_base_comm_stop(void) int rc; /* Only active in HNP and daemons */ - if( !orte_process_info.hnp && !orte_process_info.daemon ) { + if( !ORTE_PROC_IS_HNP && !ORTE_PROC_IS_DAEMON ) { return ORTE_SUCCESS; } if ( recv_issued ) { diff --git a/orte/mca/grpcomm/bad/grpcomm_bad_module.c b/orte/mca/grpcomm/bad/grpcomm_bad_module.c index 2cd176ae83..d435850fe8 100644 --- a/orte/mca/grpcomm/bad/grpcomm_bad_module.c +++ b/orte/mca/grpcomm/bad/grpcomm_bad_module.c @@ -86,7 +86,7 @@ static int init(void) /* if we are a daemon or the hnp, we need to post a * recv to catch any collective operations */ - if (orte_process_info.daemon || orte_process_info.hnp) { + if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) { if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DAEMON_COLLECTIVE, ORTE_RML_NON_PERSISTENT, @@ -109,7 +109,7 @@ static void finalize(void) /* if we are a daemon or the hnp, we need to cancel the * recv we posted */ - if (orte_process_info.daemon || orte_process_info.hnp) { + if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) { orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DAEMON_COLLECTIVE); } } @@ -201,7 +201,7 @@ static int xcast(orte_jobid_t job, * fire right away, but that's okay * The macro makes a copy of the buffer, so it's okay to release it here */ - if (orte_process_info.hnp) { + if (ORTE_PROC_IS_HNP) { ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &buf, ORTE_RML_TAG_DAEMON, orte_daemon_cmd_processor); } else { /* otherwise, send it to the HNP for relay */ @@ -540,7 +540,7 @@ static int daemon_collective(orte_process_name_t *sender, opal_buffer_t *data) if (jobdat->num_collected == jobdat->num_participating) { /* if I am the HNP, go process the results */ - if (orte_process_info.hnp) { + if (ORTE_PROC_IS_HNP) { goto hnp_process; } diff --git a/orte/mca/grpcomm/basic/grpcomm_basic_module.c b/orte/mca/grpcomm/basic/grpcomm_basic_module.c index ea5065fe38..d6c0749892 100644 --- a/orte/mca/grpcomm/basic/grpcomm_basic_module.c +++ b/orte/mca/grpcomm/basic/grpcomm_basic_module.c @@ -102,7 +102,7 @@ static int init(void) ORTE_ERROR_LOG(rc); } - if (opal_profile && orte_process_info.mpi_proc) { + if (opal_profile && ORTE_PROC_IS_MPI) { /* if I am an MPI application proc, then create a buffer * to pack all my attributes in */ profile_buf = OBJ_NEW(opal_buffer_t); @@ -112,7 +112,7 @@ static int init(void) } } - if (orte_process_info.hnp && recv_on) { + if (ORTE_PROC_IS_HNP && recv_on) { /* open the profile file for writing */ if (NULL == opal_profile_file) { /* no file specified - we will just ignore any incoming data */ @@ -138,7 +138,7 @@ static int init(void) /* if we are a daemon or the hnp, we need to post a * recv to catch any collective operations */ - if (orte_process_info.daemon || orte_process_info.hnp) { + if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) { if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DAEMON_COLLECTIVE, ORTE_RML_NON_PERSISTENT, @@ -161,7 +161,7 @@ static void finalize(void) orte_grpcomm_base_modex_finalize(); - if (opal_profile && orte_process_info.mpi_proc) { + if (opal_profile && ORTE_PROC_IS_MPI) { /* if I am an MPI proc, send my buffer to the collector */ boptr = &bo; opal_dss.unload(profile_buf, (void**)&boptr->bytes, &boptr->size); @@ -175,7 +175,7 @@ static void finalize(void) OBJ_DESTRUCT(&profile); } - if (orte_process_info.hnp && recv_on) { + if (ORTE_PROC_IS_HNP && recv_on) { /* if we are profiling and I am the HNP, then stop the * profiling receive */ @@ -189,7 +189,7 @@ static void finalize(void) /* if we are a daemon or the hnp, we need to cancel the * recv we posted */ - if (orte_process_info.daemon || orte_process_info.hnp) { + if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) { orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DAEMON_COLLECTIVE); } } @@ -281,7 +281,7 @@ static int xcast(orte_jobid_t job, * fire right away, but that's okay * The macro makes a copy of the buffer, so it's okay to release it here */ - if (orte_process_info.hnp) { + if (ORTE_PROC_IS_HNP) { ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &buf, ORTE_RML_TAG_DAEMON, orte_daemon_cmd_processor); } else { /* otherwise, send it to the HNP for relay */ @@ -928,7 +928,7 @@ static int daemon_collective(orte_process_name_t *sender, opal_buffer_t *data) if (jobdat->num_collected == jobdat->num_participating) { /* if I am the HNP, go process the results */ - if (orte_process_info.hnp) { + if (ORTE_PROC_IS_HNP) { goto hnp_process; } diff --git a/orte/mca/grpcomm/hier/grpcomm_hier_module.c b/orte/mca/grpcomm/hier/grpcomm_hier_module.c index 2166215007..54c7b27984 100644 --- a/orte/mca/grpcomm/hier/grpcomm_hier_module.c +++ b/orte/mca/grpcomm/hier/grpcomm_hier_module.c @@ -206,7 +206,7 @@ static int xcast(orte_jobid_t job, * fire right away, but that's okay * The macro makes a copy of the buffer, so it's okay to release it here */ - if (orte_process_info.hnp) { + if (ORTE_PROC_IS_HNP) { ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &buf, ORTE_RML_TAG_DAEMON, orte_daemon_cmd_processor); } else { /* otherwise, send it to the HNP for relay */ diff --git a/orte/mca/iof/base/iof_base_close.c b/orte/mca/iof/base/iof_base_close.c index ef7f617889..a5168887c8 100644 --- a/orte/mca/iof/base/iof_base_close.c +++ b/orte/mca/iof/base/iof_base_close.c @@ -47,7 +47,7 @@ int orte_iof_base_close(void) OBJ_DESTRUCT(&orte_iof_base.iof_components_opened); OPAL_THREAD_LOCK(&orte_iof_base.iof_write_output_lock); - if (!orte_process_info.daemon) { + if (!ORTE_PROC_IS_DAEMON) { /* check if anything is still trying to be written out */ wev = orte_iof_base.iof_write_stdout->wev; if (!opal_list_is_empty(&wev->outputs)) { diff --git a/orte/mca/iof/base/iof_base_open.c b/orte/mca/iof/base/iof_base_open.c index 2e0726916b..a1ddea3ac0 100644 --- a/orte/mca/iof/base/iof_base_open.c +++ b/orte/mca/iof/base/iof_base_open.c @@ -195,7 +195,7 @@ int orte_iof_base_open(void) } /* daemons do not need to do this as they do not write out stdout/err */ - if (!orte_process_info.daemon) { + if (!ORTE_PROC_IS_DAEMON) { /* setup the stdout event */ ORTE_IOF_SINK_DEFINE(&orte_iof_base.iof_write_stdout, ORTE_PROC_MY_NAME, 1, ORTE_IOF_STDOUT, orte_iof_base_write_handler, NULL); diff --git a/orte/mca/iof/hnp/iof_hnp_component.c b/orte/mca/iof/hnp/iof_hnp_component.c index 7408ab56d1..311efc8548 100644 --- a/orte/mca/iof/hnp/iof_hnp_component.c +++ b/orte/mca/iof/hnp/iof_hnp_component.c @@ -131,7 +131,7 @@ static int orte_iof_hnp_query(mca_base_module_t **module, int *priority) *priority = -1; /* if we are not the HNP, then don't use this module */ - if (!orte_process_info.hnp) { + if (!ORTE_PROC_IS_HNP) { return ORTE_ERROR; } diff --git a/orte/mca/iof/orted/iof_orted_component.c b/orte/mca/iof/orted/iof_orted_component.c index 2a2d6c54e2..507a21bfb4 100644 --- a/orte/mca/iof/orted/iof_orted_component.c +++ b/orte/mca/iof/orted/iof_orted_component.c @@ -114,7 +114,7 @@ static int orte_iof_orted_query(mca_base_module_t **module, int *priority) *priority = -1; /* if we are not a daemon, then don't use this module */ - if (!orte_process_info.daemon) { + if (!ORTE_PROC_IS_DAEMON) { return ORTE_ERROR; } diff --git a/orte/mca/iof/tool/iof_tool_component.c b/orte/mca/iof/tool/iof_tool_component.c index 57f6d2a37a..15b061175f 100644 --- a/orte/mca/iof/tool/iof_tool_component.c +++ b/orte/mca/iof/tool/iof_tool_component.c @@ -105,7 +105,7 @@ static int orte_iof_tool_query(mca_base_module_t **module, int *priority) *priority = -1; /* if we are not a tool, then don't use this module */ - if (!orte_process_info.tool) { + if (!ORTE_PROC_IS_TOOL) { return ORTE_ERROR; } diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index 07e234064c..f82f735e0d 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -659,7 +659,7 @@ REPORT_ERROR: /* if we are the HNP, then we would rather not send this to ourselves - * instead, we queue it up for local processing */ - if (orte_process_info.hnp) { + if (ORTE_PROC_IS_HNP) { ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &alert, ORTE_RML_TAG_APP_LAUNCH_CALLBACK, orte_plm_base_app_report_launch); @@ -1416,7 +1416,7 @@ CLEANUP: /* if we are the HNP, then we would rather not send this to ourselves - * instead, we queue it up for local processing */ - if (orte_process_info.hnp) { + if (ORTE_PROC_IS_HNP) { ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &alert, ORTE_RML_TAG_APP_LAUNCH_CALLBACK, orte_plm_base_app_report_launch); @@ -1814,7 +1814,7 @@ int orte_odls_base_default_require_sync(orte_process_name_t *proc, /* if we are the HNP, then we would rather not send this to ourselves - * instead, we queue it up for local processing */ - if (orte_process_info.hnp) { + if (ORTE_PROC_IS_HNP) { ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &buffer, ORTE_RML_TAG_INIT_ROUTES, orte_routed_base_process_msg); @@ -1920,7 +1920,7 @@ static void check_proc_complete(orte_odls_child_t *child) /* if we are the HNP, then we would rather not send this to ourselves - * instead, we queue it up for local processing */ - if (orte_process_info.hnp) { + if (ORTE_PROC_IS_HNP) { ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &alert, ORTE_RML_TAG_PLM, orte_plm_base_receive_process_msg); @@ -1989,7 +1989,7 @@ static void check_proc_complete(orte_odls_child_t *child) /* if we are the HNP, then we would rather not send this to ourselves - * instead, we queue it up for local processing */ - if (orte_process_info.hnp) { + if (ORTE_PROC_IS_HNP) { ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &alert, ORTE_RML_TAG_PLM, orte_plm_base_receive_process_msg); @@ -2483,7 +2483,7 @@ RECORD: /* if we are the HNP, then we would rather not send this to ourselves - * instead, we queue it up for local processing */ - if (orte_process_info.hnp) { + if (ORTE_PROC_IS_HNP) { ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &alert, ORTE_RML_TAG_PLM, orte_plm_base_receive_process_msg); diff --git a/orte/mca/odls/base/odls_base_state.c b/orte/mca/odls/base/odls_base_state.c index 4c8dd3502d..2a57efc16f 100644 --- a/orte/mca/odls/base/odls_base_state.c +++ b/orte/mca/odls/base/odls_base_state.c @@ -37,6 +37,7 @@ #include "opal/util/basename.h" #include "orte/util/name_fns.h" +#include "orte/util/proc_info.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/filem/filem.h" @@ -72,7 +73,7 @@ int orte_odls_base_preload_files_app_context(orte_app_context_t* app_context) /* Define the process set */ p_set = OBJ_NEW(orte_filem_base_process_set_t); - if( orte_process_info.hnp ) { + if( ORTE_PROC_IS_HNP ) { /* if I am the HNP, then use me as the source */ p_set->source.jobid = ORTE_PROC_MY_NAME->jobid; p_set->source.vpid = ORTE_PROC_MY_NAME->vpid; @@ -224,7 +225,7 @@ static int orte_odls_base_preload_append_files(orte_app_context_t* context, } /* If this is the HNP, then source = sink, so use the same path for each local and remote */ - if( orte_process_info.hnp ) { + if( ORTE_PROC_IS_HNP ) { free(remote_targets[i]); remote_targets[i] = strdup(local_ref); } diff --git a/orte/mca/oob/tcp/oob_tcp.c b/orte/mca/oob/tcp/oob_tcp.c index cec3940d3b..77f425cd30 100644 --- a/orte/mca/oob/tcp/oob_tcp.c +++ b/orte/mca/oob/tcp/oob_tcp.c @@ -596,7 +596,7 @@ mca_oob_tcp_create_listen(int *target_sd, unsigned short *target_port, uint16_t port in the range. Otherwise, tcp_port_min will be 0, which means "pick any port" */ if (AF_INET == af_family) { - if (orte_process_info.daemon) { + if (ORTE_PROC_IS_DAEMON) { if (NULL != mca_oob_tcp_component.tcp4_static_ports) { /* if static ports were provided, the daemon takes the * first entry in the list @@ -613,7 +613,7 @@ mca_oob_tcp_create_listen(int *target_sd, unsigned short *target_port, uint16_t opal_argv_append_nosize(&ports, "0"); orte_static_ports = false; } - } else if (orte_process_info.mpi_proc) { + } else if (ORTE_PROC_IS_MPI) { if (NULL != mca_oob_tcp_component.tcp4_static_ports) { /* if static ports were provided, an mpi proc takes its * node_local_rank entry in the list IF it has that info @@ -652,7 +652,7 @@ mca_oob_tcp_create_listen(int *target_sd, unsigned short *target_port, uint16_t #if OPAL_WANT_IPV6 if (AF_INET6 == af_family) { - if (orte_process_info.daemon) { + if (ORTE_PROC_IS_DAEMON) { if (NULL != mca_oob_tcp_component.tcp6_static_ports) { /* if static ports were provided, the daemon takes the * first entry in the list @@ -669,7 +669,7 @@ mca_oob_tcp_create_listen(int *target_sd, unsigned short *target_port, uint16_t opal_argv_append_nosize(&ports, "0"); orte_static_ports = false; } - } else if (orte_process_info.mpi_proc) { + } else if (ORTE_PROC_IS_MPI) { if (NULL != mca_oob_tcp_component.tcp6_static_ports) { /* if static ports were provided, an mpi proc takes its * node_local_rank entry in the list IF it has that info @@ -1480,10 +1480,10 @@ int mca_oob_tcp_init(void) jobid = ORTE_PROC_MY_NAME->jobid; /* Fix up the listen type. This is the first call into the OOB in - which the orte_process_info.hnp field is reliably set. The + which the ORTE_PROC_IS_HNP field is reliably set. The listen_mode should only be listen_thread for the HNP -- all others should use the traditional event library. */ - if (!orte_process_info.hnp) { + if (!ORTE_PROC_IS_HNP) { mca_oob_tcp_component.tcp_listen_type = OOB_TCP_EVENT; } diff --git a/orte/mca/oob/tcp/oob_tcp_msg.c b/orte/mca/oob/tcp/oob_tcp_msg.c index 784dd3eb4a..968b534510 100644 --- a/orte/mca/oob/tcp/oob_tcp_msg.c +++ b/orte/mca/oob/tcp/oob_tcp_msg.c @@ -479,7 +479,7 @@ static void mca_oob_tcp_msg_data(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* pee * another job family - procs dont' need to do this because * they always route through their daemons anyway */ - if (!orte_process_info.mpi_proc) { + if (!ORTE_PROC_IS_MPI) { if ((ORTE_JOB_FAMILY(msg->msg_hdr.msg_origin.jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) && (0 != ORTE_JOB_FAMILY(msg->msg_hdr.msg_origin.jobid))) { diff --git a/orte/mca/plm/base/plm_base_close.c b/orte/mca/plm/base/plm_base_close.c index e13f450bd5..204a58633c 100644 --- a/orte/mca/plm/base/plm_base_close.c +++ b/orte/mca/plm/base/plm_base_close.c @@ -39,7 +39,7 @@ int orte_plm_base_finalize(void) orte_plm.finalize(); /* if we are the HNP, then stop our receive */ - if (orte_process_info.hnp) { + if (ORTE_PROC_IS_HNP) { if (ORTE_SUCCESS != (rc = orte_plm_base_comm_stop())) { ORTE_ERROR_LOG(rc); return rc; diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c index 685947e7b5..441aab5695 100644 --- a/orte/mca/plm/base/plm_base_launch_support.c +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -52,6 +52,7 @@ #include "orte/util/name_fns.h" #include "orte/util/nidmap.h" +#include "orte/util/proc_info.h" #include "orte/mca/plm/base/plm_private.h" #include "orte/mca/plm/base/base.h" @@ -1014,7 +1015,7 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv, } /* pass the total number of daemons that will be in the system */ - if (orte_process_info.hnp) { + if (ORTE_PROC_IS_HNP) { jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); num_procs = jdata->num_procs; } else { @@ -1027,7 +1028,7 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv, free(param); /* pass the uri of the hnp */ - if (orte_process_info.hnp) { + if (ORTE_PROC_IS_HNP) { rml_uri = orte_rml.get_contact_info(); } else { rml_uri = orte_process_info.my_hnp_uri; @@ -1041,7 +1042,7 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv, * being sure to "purge" any that would cause problems * on backend nodes */ - if (orte_process_info.hnp) { + if (ORTE_PROC_IS_HNP) { cnt = opal_argv_count(orted_cmd_line); for (i=0; i < cnt; i+=3) { /* if the specified option is more than one word, we don't diff --git a/orte/mca/plm/base/plm_base_orted_cmds.c b/orte/mca/plm/base/plm_base_orted_cmds.c index 640309131a..4b154de921 100644 --- a/orte/mca/plm/base/plm_base_orted_cmds.c +++ b/orte/mca/plm/base/plm_base_orted_cmds.c @@ -36,6 +36,7 @@ #include "orte/mca/rml/rml_types.h" #include "orte/runtime/orte_globals.h" #include "orte/util/name_fns.h" +#include "orte/util/proc_info.h" #include "orte/runtime/orte_wait.h" #include "orte/orted/orted.h" @@ -279,7 +280,7 @@ int orte_plm_base_orted_kill_local_procs(orte_jobid_t job) * fire right away, but that's okay * The macro makes a copy of the buffer, so it's okay to release it here */ - if (orte_process_info.hnp) { + if (ORTE_PROC_IS_HNP) { ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &cmd, ORTE_RML_TAG_DAEMON, orte_daemon_cmd_processor); } diff --git a/orte/mca/plm/base/plm_base_receive.c b/orte/mca/plm/base/plm_base_receive.c index a3c64da070..e762c1a274 100644 --- a/orte/mca/plm/base/plm_base_receive.c +++ b/orte/mca/plm/base/plm_base_receive.c @@ -316,7 +316,7 @@ CLEANUP: OBJ_DESTRUCT(&answer); /* see if an error occurred - if so, wakeup the HNP so we can exit */ - if (orte_process_info.hnp && ORTE_SUCCESS != rc) { + if (ORTE_PROC_IS_HNP && ORTE_SUCCESS != rc) { orte_trigger_event(&orte_exit); } } diff --git a/orte/mca/plm/base/plm_base_select.c b/orte/mca/plm/base/plm_base_select.c index 6a85ea4275..c99562a824 100644 --- a/orte/mca/plm/base/plm_base_select.c +++ b/orte/mca/plm/base/plm_base_select.c @@ -52,7 +52,7 @@ int orte_plm_base_select(void) * If we didn't find one, and we are a daemon, then default to retaining the proxy. * Otherwise, if we didn't find one to select, that is unacceptable. */ - if (orte_process_info.daemon) { + if (ORTE_PROC_IS_DAEMON) { /* don't record a selected component or flag selected * so we finalize correctly - just leave the plm alone * as it defaults to pointing at the proxy diff --git a/orte/mca/plm/ccp/plm_ccp_component.c b/orte/mca/plm/ccp/plm_ccp_component.c index 5e6e5fb142..a51999c657 100644 --- a/orte/mca/plm/ccp/plm_ccp_component.c +++ b/orte/mca/plm/ccp/plm_ccp_component.c @@ -143,7 +143,7 @@ static int orte_plm_ccp_component_query(mca_base_module_t **module, int *priorit } /* if we are NOT an HNP, then don't select us */ - if (!orte_process_info.hnp) { + if (!ORTE_PROC_IS_HNP) { pCluster->Release(); *module = NULL; return ORTE_ERROR; diff --git a/orte/mca/plm/rsh/plm_rsh_module.c b/orte/mca/plm/rsh/plm_rsh_module.c index 7b6c2e887c..433896e6b3 100644 --- a/orte/mca/plm/rsh/plm_rsh_module.c +++ b/orte/mca/plm/rsh/plm_rsh_module.c @@ -73,6 +73,7 @@ #include "orte/runtime/orte_globals.h" #include "orte/util/name_fns.h" #include "orte/util/nidmap.h" +#include "orte/util/proc_info.h" #include "orte/mca/rml/rml.h" #include "orte/mca/rml/rml_types.h" @@ -294,7 +295,7 @@ static void orte_plm_rsh_wait_daemon(pid_t pid, int status, void* cbdata) /* if we are not the HNP, send a message to the HNP alerting it * to the failure */ - if (!orte_process_info.hnp) { + if (!ORTE_PROC_IS_HNP) { opal_buffer_t buf; orte_vpid_t *vpid=(orte_vpid_t*)cbdata; OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, @@ -664,7 +665,7 @@ static int setup_launch(int *argcptr, char ***argvptr, * by enclosing them in quotes. Check for any multi-word * mca params passed to mpirun and include them */ - if (orte_process_info.hnp) { + if (ORTE_PROC_IS_HNP) { int cnt, i; cnt = opal_argv_count(orted_cmd_line); for (i=0; i < cnt; i+=3) { diff --git a/orte/mca/ras/alps/ras_alps_component.c b/orte/mca/ras/alps/ras_alps_component.c index c0654d2393..cbcf6032a2 100644 --- a/orte/mca/ras/alps/ras_alps_component.c +++ b/orte/mca/ras/alps/ras_alps_component.c @@ -86,7 +86,7 @@ static int ras_alps_open(void) static int orte_ras_alps_component_query(mca_base_module_t **module, int *priority) { /* if we are not an HNP, then we must not be selected */ - if (!orte_process_info.hnp) { + if (!ORTE_PROC_IS_HNP) { *module = NULL; return ORTE_ERROR; } diff --git a/orte/mca/ras/ccp/ras_ccp_component.c b/orte/mca/ras/ccp/ras_ccp_component.c index 1c7148f51c..311a8d1c89 100644 --- a/orte/mca/ras/ccp/ras_ccp_component.c +++ b/orte/mca/ras/ccp/ras_ccp_component.c @@ -104,7 +104,7 @@ static int orte_ras_ccp_component_query(mca_base_module_t **module, int *priorit } /* if we are NOT an HNP, then don't select us */ - if (!orte_process_info.hnp) { + if (!ORTE_PROC_IS_HNP) { pCluster->Release(); *module = NULL; return ORTE_ERROR; diff --git a/orte/mca/rml/base/rml_base_contact.c b/orte/mca/rml/base/rml_base_contact.c index f43d654057..21f6fb4d88 100644 --- a/orte/mca/rml/base/rml_base_contact.c +++ b/orte/mca/rml/base/rml_base_contact.c @@ -28,6 +28,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/routed/routed.h" #include "orte/util/name_fns.h" +#include "orte/util/proc_info.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/rml/rml.h" @@ -131,7 +132,7 @@ int orte_rml_base_update_contact_info(opal_buffer_t* data) * in our process_info struct so we can correctly route any messages */ if (ORTE_PROC_MY_NAME->jobid == name.jobid && - orte_process_info.daemon && + ORTE_PROC_IS_DAEMON && orte_process_info.num_procs < num_procs) { orte_process_info.num_procs = num_procs; /* if we changed it, then we better update the routed diff --git a/orte/mca/routed/base/routed_base_receive.c b/orte/mca/routed/base/routed_base_receive.c index 0ffee0bbf6..a0aa4f2a2a 100644 --- a/orte/mca/routed/base/routed_base_receive.c +++ b/orte/mca/routed/base/routed_base_receive.c @@ -58,7 +58,7 @@ int orte_routed_base_comm_start(void) { int rc; - if (recv_issued || !orte_process_info.hnp) { + if (recv_issued || !ORTE_PROC_IS_HNP) { return ORTE_SUCCESS; } @@ -84,7 +84,7 @@ int orte_routed_base_comm_stop(void) { int rc; - if (!recv_issued || !orte_process_info.hnp) { + if (!recv_issued || !ORTE_PROC_IS_HNP) { return ORTE_SUCCESS; } diff --git a/orte/mca/routed/binomial/routed_binomial.c b/orte/mca/routed/binomial/routed_binomial.c index 146845b241..cc442896e6 100644 --- a/orte/mca/routed/binomial/routed_binomial.c +++ b/orte/mca/routed/binomial/routed_binomial.c @@ -109,9 +109,9 @@ static int finalize(void) /* if I am an application process, indicate that I am * truly finalizing prior to departure */ - if (!orte_process_info.hnp && - !orte_process_info.daemon && - !orte_process_info.tool) { + if (!ORTE_PROC_IS_HNP && + !ORTE_PROC_IS_DAEMON && + !ORTE_PROC_IS_TOOL) { if (ORTE_SUCCESS != (rc = orte_routed_base_register_sync(false))) { ORTE_ERROR_LOG(rc); return rc; @@ -119,7 +119,7 @@ static int finalize(void) } /* if I am the HNP, I need to stop the comm recv */ - if (orte_process_info.hnp) { + if (ORTE_PROC_IS_HNP) { orte_routed_base_comm_stop(); } @@ -153,8 +153,8 @@ static int delete_route(orte_process_name_t *proc) /* if I am an application process, I don't have any routes * so there is nothing for me to do */ - if (!orte_process_info.hnp && !orte_process_info.daemon && - !orte_process_info.tool) { + if (!ORTE_PROC_IS_HNP && !ORTE_PROC_IS_DAEMON && + !ORTE_PROC_IS_TOOL) { return ORTE_SUCCESS; } @@ -174,7 +174,7 @@ static int delete_route(orte_process_name_t *proc) * in my routing table and thus have nothing to do * here, just return */ - if (orte_process_info.daemon) { + if (ORTE_PROC_IS_DAEMON) { return ORTE_SUCCESS; } @@ -221,8 +221,8 @@ static int update_route(orte_process_name_t *target, /* if I am an application process, we don't update the route since * we automatically route everything through the local daemon */ - if (!orte_process_info.hnp && !orte_process_info.daemon && - !orte_process_info.tool) { + if (!ORTE_PROC_IS_HNP && !ORTE_PROC_IS_DAEMON && + !ORTE_PROC_IS_TOOL) { return ORTE_SUCCESS; } @@ -249,7 +249,7 @@ static int update_route(orte_process_name_t *target, * anything to this job family via my HNP - so nothing to do * here, just return */ - if (orte_process_info.daemon) { + if (ORTE_PROC_IS_DAEMON) { return ORTE_SUCCESS; } @@ -315,8 +315,8 @@ static orte_process_name_t get_route(orte_process_name_t *target) } /* if I am an application process, always route via my local daemon */ - if (!orte_process_info.hnp && !orte_process_info.daemon && - !orte_process_info.tool) { + if (!ORTE_PROC_IS_HNP && !ORTE_PROC_IS_DAEMON && + !ORTE_PROC_IS_TOOL) { ret = ORTE_PROC_MY_DAEMON; goto found; } @@ -334,7 +334,7 @@ static orte_process_name_t get_route(orte_process_name_t *target) /* IF THIS IS FOR A DIFFERENT JOB FAMILY... */ if (ORTE_JOB_FAMILY(target->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) { /* if I am a daemon, route this via the HNP */ - if (orte_process_info.daemon) { + if (ORTE_PROC_IS_DAEMON) { ret = ORTE_PROC_MY_HNP; goto found; } @@ -495,7 +495,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat) int rc; /* if I am a tool, then I stand alone - there is nothing to do */ - if (orte_process_info.tool) { + if (ORTE_PROC_IS_TOOL) { return ORTE_SUCCESS; } @@ -503,7 +503,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat) * from the data sent to me for launch and update the routing tables to * point at the daemon for each proc */ - if (orte_process_info.daemon) { + if (ORTE_PROC_IS_DAEMON) { OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output, "%s routed_binomial: init routes for daemon job %s\n\thnp_uri %s", @@ -558,7 +558,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat) } - if (orte_process_info.hnp) { + if (ORTE_PROC_IS_HNP) { OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output, "%s routed_binomial: init routes for HNP job %s", @@ -851,7 +851,7 @@ static int update_routing_tree(void) /* if I am anything other than a daemon or the HNP, this * is a meaningless command as I am not allowed to route */ - if (!orte_process_info.daemon && !orte_process_info.hnp) { + if (!ORTE_PROC_IS_DAEMON && !ORTE_PROC_IS_HNP) { return ORTE_ERR_NOT_SUPPORTED; } @@ -894,7 +894,7 @@ static orte_vpid_t get_routing_tree(opal_list_t *children) /* if I am anything other than a daemon or the HNP, this * is a meaningless command as I am not allowed to route */ - if (!orte_process_info.daemon && !orte_process_info.hnp) { + if (!ORTE_PROC_IS_DAEMON && !ORTE_PROC_IS_HNP) { return ORTE_VPID_INVALID; } @@ -925,7 +925,7 @@ static int get_wireup_info(opal_buffer_t *buf) * is a meaningless command as I cannot get * the requested info */ - if (!orte_process_info.hnp) { + if (!ORTE_PROC_IS_HNP) { return ORTE_ERR_NOT_SUPPORTED; } diff --git a/orte/mca/routed/direct/routed_direct.c b/orte/mca/routed/direct/routed_direct.c index d3b20745e7..df83fe2c06 100644 --- a/orte/mca/routed/direct/routed_direct.c +++ b/orte/mca/routed/direct/routed_direct.c @@ -16,7 +16,9 @@ #include "opal/util/output.h" #include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/rml/rml.h" #include "orte/util/name_fns.h" +#include "orte/util/proc_info.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/rml/base/rml_contact.h" @@ -78,6 +80,20 @@ static int init(void) static int finalize(void) { + int rc; + + /* if I am the HNP, I need to stop the comm recv */ + if (ORTE_PROC_IS_HNP) { + orte_routed_base_comm_stop(); + } + + if (ORTE_PROC_IS_MPI && NULL != orte_process_info.my_daemon_uri) { + /* if a daemon launched me, register that I am leaving */ + if (ORTE_SUCCESS != (rc = orte_routed_base_register_sync(false))) { + ORTE_ERROR_LOG(rc); + } + } + /* destruct the global condition and lock */ OBJ_DESTRUCT(&cond); OBJ_DESTRUCT(&lock); @@ -138,10 +154,128 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat) { int rc; - /* if ndat=NULL, then we are being called during orte_init. In this - * case, there is nothing to do + /* if I am a tool, then I stand alone - there is nothing to do */ + if (ORTE_PROC_IS_TOOL) { + return ORTE_SUCCESS; + } + + /* if I am a daemon or HNP, then I have to extract the routing info for this job + * from the data sent to me for launch and update the routing tables to + * point at the daemon for each proc */ + if (ORTE_PROC_IS_DAEMON) { + + OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output, + "%s direct: init routes for daemon job %s\n\thnp_uri %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_JOBID_PRINT(job), + (NULL == orte_process_info.my_hnp_uri) ? "NULL" : orte_process_info.my_hnp_uri)); + + if (NULL == ndat) { + /* indicates this is being called during orte_init. + * Get the HNP's name for possible later use + */ + if (NULL == orte_process_info.my_hnp_uri) { + /* fatal error */ + ORTE_ERROR_LOG(ORTE_ERR_FATAL); + return ORTE_ERR_FATAL; + } + /* set the contact info into the hash table */ + if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(orte_process_info.my_hnp_uri))) { + ORTE_ERROR_LOG(rc); + return(rc); + } + + /* extract the hnp name and store it */ + if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_hnp_uri, + ORTE_PROC_MY_HNP, NULL))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* daemons will send their contact info back to the HNP as + * part of the message confirming they are read to go. HNP's + * load their contact info during orte_init + */ + } else { + /* ndat != NULL means we are getting an update of RML info + * for the daemons - so update our contact info and routes + */ + if (ORTE_SUCCESS != (rc = orte_rml_base_update_contact_info(ndat))) { + ORTE_ERROR_LOG(rc); + } + return rc; + } + + OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output, + "%s routed_direct: completed init routes", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + return ORTE_SUCCESS; + } + + + if (ORTE_PROC_IS_HNP) { + + OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output, + "%s routed_direct: init routes for HNP job %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_JOBID_PRINT(job))); + + if (NULL == ndat) { + /* if ndat is NULL, then this is being called during init, so just + * make myself available to catch any reported contact info + */ + if (ORTE_SUCCESS != (rc = orte_routed_base_comm_start())) { + ORTE_ERROR_LOG(rc); + return rc; + } + } else { + /* if this is for my own jobid, then I am getting an update of RML info + * for the daemons - so update our contact info and routes + */ + if (ORTE_PROC_MY_NAME->jobid == job) { + if (ORTE_SUCCESS != (rc = orte_rml_base_update_contact_info(ndat))) { + ORTE_ERROR_LOG(rc); + return rc; + } + } + } + + return ORTE_SUCCESS; + } + + /*** MUST BE A PROC ***/ + + /* if ndat=NULL, then we are being called during orte_init */ if (NULL == ndat) { + if (NULL != orte_process_info.my_daemon_uri) { + /* we are being launched by a daemon, so we need to + * register a sync with it to get our nidmap back + */ + /* Set the contact info in the RML - this won't actually establish + * the connection, but just tells the RML how to reach the daemon + * if/when we attempt to send to it + */ + if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(orte_process_info.my_daemon_uri))) { + ORTE_ERROR_LOG(rc); + return(rc); + } + /* extract the daemon's name so we can update the routing table */ + if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_daemon_uri, + ORTE_PROC_MY_DAEMON, NULL))) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* register ourselves -this sends a message to the daemon (warming up that connection) + * and sends our contact info to the HNP when all local procs have reported + */ + if (ORTE_SUCCESS != (rc = orte_routed_base_register_sync(true))) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* no answer is expected or coming */ + } return ORTE_SUCCESS; } @@ -183,20 +317,35 @@ static int set_lifeline(orte_process_name_t *proc) static int update_routing_tree(void) { - /* this is a meaningless command for a direct as I am not allowed to route */ - return ORTE_ERR_NOT_SUPPORTED; + /* nothing to do here */ + return ORTE_SUCCESS; } static orte_vpid_t get_routing_tree(opal_list_t *children) { - /* this is a meaningless command for a direct as I am not allowed to route */ + orte_vpid_t i; + orte_routed_tree_t *nm; + + if (!ORTE_PROC_IS_HNP) { + /* if I am not the HNP, there is nothing to do */ + return ORTE_VPID_INVALID; + } + + /* if I am the HNP, then I need to construct a list containing all + * daemons so I can relay messages to them + */ + for (i=0; i < orte_process_info.num_procs; i++) { + nm = OBJ_NEW(orte_routed_tree_t); + nm->vpid = i; + opal_list_append(children, &nm->super); + } return ORTE_VPID_INVALID; } static int get_wireup_info(opal_buffer_t *buf) { /* this is a meaningless command for a direct as I am not allowed to route */ - return ORTE_ERR_NOT_SUPPORTED; + return ORTE_SUCCESS; } diff --git a/orte/mca/routed/linear/routed_linear.c b/orte/mca/routed/linear/routed_linear.c index a5821ac8d1..77438728b1 100644 --- a/orte/mca/routed/linear/routed_linear.c +++ b/orte/mca/routed/linear/routed_linear.c @@ -100,9 +100,9 @@ static int finalize(void) /* if I am an application process, indicate that I am * truly finalizing prior to departure */ - if (!orte_process_info.hnp && - !orte_process_info.daemon && - !orte_process_info.tool) { + if (!ORTE_PROC_IS_HNP && + !ORTE_PROC_IS_DAEMON && + !ORTE_PROC_IS_TOOL) { if (ORTE_SUCCESS != (rc = orte_routed_base_register_sync(false))) { ORTE_ERROR_LOG(rc); return rc; @@ -110,7 +110,7 @@ static int finalize(void) } /* if I am the HNP, I need to stop the comm recv */ - if (orte_process_info.hnp) { + if (ORTE_PROC_IS_HNP) { orte_routed_base_comm_stop(); } @@ -137,8 +137,8 @@ static int delete_route(orte_process_name_t *proc) /* if I am an application process, I don't have any routes * so there is nothing for me to do */ - if (!orte_process_info.hnp && !orte_process_info.daemon && - !orte_process_info.tool) { + if (!ORTE_PROC_IS_HNP && !ORTE_PROC_IS_DAEMON && + !ORTE_PROC_IS_TOOL) { return ORTE_SUCCESS; } @@ -158,7 +158,7 @@ static int delete_route(orte_process_name_t *proc) * in my routing table and thus have nothing to do * here, just return */ - if (orte_process_info.daemon) { + if (ORTE_PROC_IS_DAEMON) { return ORTE_SUCCESS; } @@ -205,8 +205,8 @@ static int update_route(orte_process_name_t *target, /* if I am an application process, we don't update the route since * we automatically route everything through the local daemon */ - if (!orte_process_info.hnp && !orte_process_info.daemon && - !orte_process_info.tool) { + if (!ORTE_PROC_IS_HNP && !ORTE_PROC_IS_DAEMON && + !ORTE_PROC_IS_TOOL) { return ORTE_SUCCESS; } @@ -233,7 +233,7 @@ static int update_route(orte_process_name_t *target, * anything to this job family via my HNP - so nothing to do * here, just return */ - if (orte_process_info.daemon) { + if (ORTE_PROC_IS_DAEMON) { return ORTE_SUCCESS; } @@ -293,8 +293,8 @@ static orte_process_name_t get_route(orte_process_name_t *target) } /* if I am an application process, always route via my local daemon */ - if (!orte_process_info.hnp && !orte_process_info.daemon && - !orte_process_info.tool) { + if (!ORTE_PROC_IS_HNP && !ORTE_PROC_IS_DAEMON && + !ORTE_PROC_IS_TOOL) { ret = ORTE_PROC_MY_DAEMON; goto found; } @@ -312,7 +312,7 @@ static orte_process_name_t get_route(orte_process_name_t *target) /* IF THIS IS FOR A DIFFERENT JOB FAMILY... */ if (ORTE_JOB_FAMILY(target->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) { /* if I am a daemon, route this via the HNP */ - if (orte_process_info.daemon) { + if (ORTE_PROC_IS_DAEMON) { ret = ORTE_PROC_MY_HNP; goto found; } @@ -490,7 +490,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat) int rc; /* if I am a tool, then I stand alone - there is nothing to do */ - if (orte_process_info.tool) { + if (ORTE_PROC_IS_TOOL) { return ORTE_SUCCESS; } @@ -498,7 +498,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat) * from the data sent to me for launch and update the routing tables to * point at the daemon for each proc */ - if (orte_process_info.daemon) { + if (ORTE_PROC_IS_DAEMON) { OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output, "%s routed_linear: init routes for daemon job %s\n\thnp_uri %s", @@ -553,7 +553,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat) } - if (orte_process_info.hnp) { + if (ORTE_PROC_IS_HNP) { OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output, "%s routed_linear: init routes for HNP job %s", @@ -781,7 +781,7 @@ static int update_routing_tree(void) /* if I am anything other than a daemon or the HNP, this * is a meaningless command as I am not allowed to route */ - if (!orte_process_info.daemon && !orte_process_info.hnp) { + if (!ORTE_PROC_IS_DAEMON && !ORTE_PROC_IS_HNP) { return ORTE_ERR_NOT_SUPPORTED; } @@ -797,7 +797,7 @@ static orte_vpid_t get_routing_tree(opal_list_t *children) /* if I am anything other than a daemon or the HNP, this * is a meaningless command as I am not allowed to route */ - if (!orte_process_info.daemon && !orte_process_info.hnp) { + if (!ORTE_PROC_IS_DAEMON && !ORTE_PROC_IS_HNP) { return ORTE_VPID_INVALID; } @@ -818,7 +818,7 @@ static orte_vpid_t get_routing_tree(opal_list_t *children) opal_list_append(children, &nm->super); } - if (orte_process_info.hnp) { + if (ORTE_PROC_IS_HNP) { /* the parent of the HNP is invalid */ return ORTE_VPID_INVALID; } @@ -836,7 +836,7 @@ static int get_wireup_info(opal_buffer_t *buf) * is a meaningless command as I cannot get * the requested info */ - if (!orte_process_info.hnp) { + if (!ORTE_PROC_IS_HNP) { return ORTE_ERR_NOT_SUPPORTED; } diff --git a/orte/mca/routed/radix/routed_radix.c b/orte/mca/routed/radix/routed_radix.c index bcfaac8319..943a7270be 100644 --- a/orte/mca/routed/radix/routed_radix.c +++ b/orte/mca/routed/radix/routed_radix.c @@ -109,9 +109,9 @@ static int finalize(void) /* if I am an application process, indicate that I am * truly finalizing prior to departure */ - if (!orte_process_info.hnp && - !orte_process_info.daemon && - !orte_process_info.tool) { + if (!ORTE_PROC_IS_HNP && + !ORTE_PROC_IS_DAEMON && + !ORTE_PROC_IS_TOOL) { if (ORTE_SUCCESS != (rc = orte_routed_base_register_sync(false))) { ORTE_ERROR_LOG(rc); return rc; @@ -119,7 +119,7 @@ static int finalize(void) } /* if I am the HNP, I need to stop the comm recv */ - if (orte_process_info.hnp) { + if (ORTE_PROC_IS_HNP) { orte_routed_base_comm_stop(); } @@ -153,8 +153,8 @@ static int delete_route(orte_process_name_t *proc) /* if I am an application process, I don't have any routes * so there is nothing for me to do */ - if (!orte_process_info.hnp && !orte_process_info.daemon && - !orte_process_info.tool) { + if (!ORTE_PROC_IS_HNP && !ORTE_PROC_IS_DAEMON && + !ORTE_PROC_IS_TOOL) { return ORTE_SUCCESS; } @@ -174,7 +174,7 @@ static int delete_route(orte_process_name_t *proc) * in my routing table and thus have nothing to do * here, just return */ - if (orte_process_info.daemon) { + if (ORTE_PROC_IS_DAEMON) { return ORTE_SUCCESS; } @@ -221,8 +221,8 @@ static int update_route(orte_process_name_t *target, /* if I am an application process, we don't update the route since * we automatically route everything through the local daemon */ - if (!orte_process_info.hnp && !orte_process_info.daemon && - !orte_process_info.tool) { + if (!ORTE_PROC_IS_HNP && !ORTE_PROC_IS_DAEMON && + !ORTE_PROC_IS_TOOL) { return ORTE_SUCCESS; } @@ -249,7 +249,7 @@ static int update_route(orte_process_name_t *target, * anything to this job family via my HNP - so nothing to do * here, just return */ - if (orte_process_info.daemon) { + if (ORTE_PROC_IS_DAEMON) { return ORTE_SUCCESS; } @@ -317,8 +317,8 @@ static orte_process_name_t get_route(orte_process_name_t *target) } /* if I am an application process, always route via my local daemon */ - if (!orte_process_info.hnp && !orte_process_info.daemon && - !orte_process_info.tool) { + if (!ORTE_PROC_IS_HNP && !ORTE_PROC_IS_DAEMON && + !ORTE_PROC_IS_TOOL) { ret = ORTE_PROC_MY_DAEMON; goto found; } @@ -336,7 +336,7 @@ static orte_process_name_t get_route(orte_process_name_t *target) /* IF THIS IS FOR A DIFFERENT JOB FAMILY... */ if (ORTE_JOB_FAMILY(target->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) { /* if I am a daemon, route this via the HNP */ - if (orte_process_info.daemon) { + if (ORTE_PROC_IS_DAEMON) { ret = ORTE_PROC_MY_HNP; goto found; } @@ -521,7 +521,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat) int rc; /* if I am a tool, then I stand alone - there is nothing to do */ - if (orte_process_info.tool) { + if (ORTE_PROC_IS_TOOL) { return ORTE_SUCCESS; } @@ -529,7 +529,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat) * from the data sent to me for launch and update the routing tables to * point at the daemon for each proc */ - if (orte_process_info.daemon) { + if (ORTE_PROC_IS_DAEMON) { OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output, "%s routed_radix: init routes for daemon job %s\n\thnp_uri %s", @@ -584,7 +584,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat) } - if (orte_process_info.hnp) { + if (ORTE_PROC_IS_HNP) { OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output, "%s routed_radix: init routes for HNP job %s", @@ -861,7 +861,7 @@ static int update_routing_tree(void) /* if I am anything other than a daemon or the HNP, this * is a meaningless command as I am not allowed to route */ - if (!orte_process_info.daemon && !orte_process_info.hnp) { + if (!ORTE_PROC_IS_DAEMON && !ORTE_PROC_IS_HNP) { return ORTE_ERR_NOT_SUPPORTED; } @@ -924,7 +924,7 @@ static orte_vpid_t get_routing_tree(opal_list_t *children) /* if I am anything other than a daemon or the HNP, this * is a meaningless command as I am not allowed to route */ - if (!orte_process_info.daemon && !orte_process_info.hnp) { + if (!ORTE_PROC_IS_DAEMON && !ORTE_PROC_IS_HNP) { return ORTE_VPID_INVALID; } @@ -954,7 +954,7 @@ static int get_wireup_info(opal_buffer_t *buf) * is a meaningless command as I cannot get * the requested info */ - if (!orte_process_info.hnp) { + if (!ORTE_PROC_IS_HNP) { return ORTE_ERR_NOT_SUPPORTED; } diff --git a/orte/mca/snapc/full/snapc_full_global.c b/orte/mca/snapc/full/snapc_full_global.c index 2236a639d3..b574fa1140 100644 --- a/orte/mca/snapc/full/snapc_full_global.c +++ b/orte/mca/snapc/full/snapc_full_global.c @@ -32,6 +32,7 @@ #include "opal/mca/crs/base/base.h" #include "orte/util/name_fns.h" +#include "orte/util/proc_info.h" #include "orte/runtime/orte_globals.h" #include "opal/dss/dss.h" #include "orte/mca/rml/rml.h" @@ -342,7 +343,7 @@ static int snapc_full_global_start_listener(void) { int ret, exit_status = ORTE_SUCCESS; - if (snapc_orted_recv_issued && orte_process_info.hnp) { + if (snapc_orted_recv_issued && ORTE_PROC_IS_HNP) { return ORTE_SUCCESS; } @@ -372,7 +373,7 @@ static int snapc_full_global_stop_listener(void) { int ret, exit_status = ORTE_SUCCESS; - if (!snapc_orted_recv_issued && orte_process_info.hnp) { + if (!snapc_orted_recv_issued && ORTE_PROC_IS_HNP) { return ORTE_SUCCESS; } @@ -396,7 +397,7 @@ static int snapc_full_global_start_cmdline_listener(void) { int ret, exit_status = ORTE_SUCCESS; - if (snapc_cmdline_recv_issued && orte_process_info.hnp) { + if (snapc_cmdline_recv_issued && ORTE_PROC_IS_HNP) { return ORTE_SUCCESS; } @@ -426,7 +427,7 @@ static int snapc_full_global_stop_cmdline_listener(void) { int ret, exit_status = ORTE_SUCCESS; - if (!snapc_cmdline_recv_issued && orte_process_info.hnp) { + if (!snapc_cmdline_recv_issued && ORTE_PROC_IS_HNP) { return ORTE_SUCCESS; } diff --git a/orte/orted/orted_comm.c b/orte/orted/orted_comm.c index 6b2f93bbfa..e0a7015a4e 100644 --- a/orte/orted/orted_comm.c +++ b/orte/orted/orted_comm.c @@ -181,7 +181,7 @@ void orte_daemon_cmd_processor(int fd, short event, void *data) orte_daemon_cmd_flag_t command; /* check to see if we are in a progress recursion */ - if (orte_process_info.daemon && 1 < (ret = opal_progress_recursion_depth())) { + if (ORTE_PROC_IS_DAEMON && 1 < (ret = opal_progress_recursion_depth())) { /* if we are in a recursion, we want to repost the message event * so the progress engine can work its way back up to the top * of the stack. Given that this could happen multiple times, @@ -221,7 +221,7 @@ void orte_daemon_cmd_processor(int fd, short event, void *data) wait_time = 1; num_recursions = 0; - if (orte_timing && orte_process_info.hnp) { + if (orte_timing && ORTE_PROC_IS_HNP) { /* if we are doing timing, and we are the HNP, then the message doesn't come * through the RML recv, so we have to pickup the recv time here */ @@ -590,7 +590,7 @@ static int process_commands(orte_process_name_t* sender, /* if we are the HNP, kill our local procs and * flag we are exited - but don't yet exit */ - if (orte_process_info.hnp) { + if (ORTE_PROC_IS_HNP) { orte_job_t *daemons; orte_proc_t **procs; /* if we are the HNP, ensure our local procs are terminated */ @@ -648,7 +648,7 @@ static int process_commands(orte_process_name_t* sender, /* if we are the HNP, kill our local procs and * flag we are exited - but don't yet exit */ - if (orte_process_info.hnp) { + if (ORTE_PROC_IS_HNP) { orte_job_t *daemons; orte_proc_t **procs; /* if we are the HNP, ensure our local procs are terminated */ @@ -694,7 +694,7 @@ static int process_commands(orte_process_name_t* sender, answer = OBJ_NEW(opal_buffer_t); job = ORTE_JOBID_INVALID; /* can only process this if we are the HNP */ - if (orte_process_info.hnp) { + if (ORTE_PROC_IS_HNP) { /* unpack the job data */ n = 1; if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &jdata, &n, ORTE_JOB))) { @@ -763,7 +763,7 @@ static int process_commands(orte_process_name_t* sender, /* if we are not the HNP, we can do nothing - report * back 0 procs so the tool won't hang */ - if (!orte_process_info.hnp) { + if (!ORTE_PROC_IS_HNP) { orte_std_cntr_t zero=0; answer = OBJ_NEW(opal_buffer_t); @@ -846,7 +846,7 @@ static int process_commands(orte_process_name_t* sender, /* if we are not the HNP, we can do nothing - report * back 0 nodes so the tool won't hang */ - if (!orte_process_info.hnp) { + if (!ORTE_PROC_IS_HNP) { orte_std_cntr_t zero=0; answer = OBJ_NEW(opal_buffer_t); @@ -927,7 +927,7 @@ static int process_commands(orte_process_name_t* sender, /* if we are not the HNP, we can do nothing - report * back 0 procs so the tool won't hang */ - if (!orte_process_info.hnp) { + if (!ORTE_PROC_IS_HNP) { orte_std_cntr_t zero=0; answer = OBJ_NEW(opal_buffer_t); @@ -1062,7 +1062,7 @@ SEND_ANSWER: * the requestor. We need to convert that to our own job family */ proc.jobid = ORTE_CONSTRUCT_LOCAL_JOBID(ORTE_PROC_MY_NAME->jobid, proc.jobid); - if (orte_process_info.hnp) { + if (ORTE_PROC_IS_HNP) { return_addr = sender; /* if the request is for a wildcard vpid, then it goes to every * daemon. For scalability, we should probably xcast this some @@ -1185,7 +1185,7 @@ SEND_ANSWER: /* send the answer back to requester - callback * function will release buffer */ - if (orte_process_info.hnp) { + if (ORTE_PROC_IS_HNP) { /* if I am the HNP, I need to also provide the number of * replies the caller should recv and the sample time */ diff --git a/orte/orted/orted_main.c b/orte/orted/orted_main.c index 2eb3f4a47a..806846693c 100644 --- a/orte/orted/orted_main.c +++ b/orte/orted/orted_main.c @@ -308,18 +308,6 @@ int orte_daemon(int argc, char *argv[]) if (1000 < i) i=0; } - /* Okay, now on to serious business! */ - - if (orted_globals.hnp) { - /* we are to be the hnp, so set that flag */ - orte_process_info.hnp = true; - orte_process_info.daemon = false; - } else { - /* set ourselves to be just a daemon */ - orte_process_info.hnp = false; - orte_process_info.daemon = true; - } - #if OPAL_ENABLE_FT == 1 /* Mark as a tool program */ tmp_env_var = mca_base_param_env_var("opal_cr_is_tool"); @@ -335,9 +323,16 @@ int orte_daemon(int argc, char *argv[]) * up incorrect infrastructure that only a singleton would * require. */ - if (ORTE_SUCCESS != (ret = orte_init(ORTE_NON_TOOL))) { - ORTE_ERROR_LOG(ret); - return ret; + if (orted_globals.hnp) { + if (ORTE_SUCCESS != (ret = orte_init(ORTE_PROC_HNP))) { + ORTE_ERROR_LOG(ret); + return ret; + } + } else { + if (ORTE_SUCCESS != (ret = orte_init(ORTE_PROC_DAEMON))) { + ORTE_ERROR_LOG(ret); + return ret; + } } if ((int)ORTE_VPID_INVALID != orted_globals.fail) { @@ -397,7 +392,7 @@ int orte_daemon(int argc, char *argv[]) ORTE_PROC_MY_DAEMON->vpid = ORTE_PROC_MY_NAME->vpid; /* if I am also the hnp, then update that contact info field too */ - if (orte_process_info.hnp) { + if (ORTE_PROC_IS_HNP) { orte_process_info.my_hnp_uri = orte_rml.get_contact_info(); ORTE_PROC_MY_HNP->jobid = ORTE_PROC_MY_NAME->jobid; ORTE_PROC_MY_HNP->vpid = ORTE_PROC_MY_NAME->vpid; @@ -595,7 +590,7 @@ int orte_daemon(int argc, char *argv[]) * is if we are launched by a singleton to provide support * for it */ - if (!orte_process_info.hnp) { + if (!ORTE_PROC_IS_HNP) { /* send the information to the orted report-back point - this function * will process the data, but also counts the number of * orteds that reported back so the launch procedure can continue. diff --git a/orte/runtime/orte_globals.c b/orte/runtime/orte_globals.c index b046c62ac9..c7f038f85d 100644 --- a/orte/runtime/orte_globals.c +++ b/orte/runtime/orte_globals.c @@ -33,6 +33,7 @@ #include "opal/dss/dss.h" #include "orte/mca/errmgr/errmgr.h" +#include "orte/util/proc_info.h" #include "orte/runtime/runtime.h" #include "orte/runtime/runtime_internals.h" @@ -142,7 +143,7 @@ int orte_dt_init(void) /* open up the verbose output for ORTE debugging */ if (orte_debug_flag || 0 < orte_debug_verbosity || - (orte_debug_daemons_flag && (orte_process_info.daemon || orte_process_info.hnp))) { + (orte_debug_daemons_flag && (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP))) { if (0 < orte_debug_verbosity) { opal_output_set_verbosity(orte_debug_output, orte_debug_verbosity); } else { @@ -410,7 +411,7 @@ orte_job_t* orte_get_job_data_object(orte_jobid_t job) int32_t ljob; /* if I am not an HNP, I cannot provide this object */ - if (!orte_process_info.hnp) { + if (!ORTE_PROC_IS_HNP) { return NULL; } diff --git a/orte/runtime/orte_init.c b/orte/runtime/orte_init.c index 416c49187b..5c61bf7bbb 100644 --- a/orte/runtime/orte_init.c +++ b/orte/runtime/orte_init.c @@ -65,7 +65,7 @@ orte_process_name_t orte_name_invalid = {ORTE_JOBID_INVALID, ORTE_VPID_INVALID}; #endif const char orte_version_string[] = ORTE_IDENT_STRING; -int orte_init(char flags) +int orte_init(orte_proc_type_t flags) { int ret; char *error = NULL; @@ -80,10 +80,8 @@ int orte_init(char flags) return ret; } - /* ensure we know the tool setting for when we finalize */ - if ((flags & ORTE_TOOL) || (flags & ORTE_TOOL_WITH_NAME)) { - orte_process_info.tool = true; - } + /* ensure we know the type of proc for when we finalize */ + orte_process_info.proc_type = flags; /* setup the locks */ if (ORTE_SUCCESS != (ret = orte_locks_init())) { @@ -91,10 +89,6 @@ int orte_init(char flags) goto error; } - if (orte_process_info.hnp) { - orte_process_info.daemon = false; - } - /* Register all MCA Params */ if (ORTE_SUCCESS != (ret = orte_register_params())) { error = "orte_register_params"; @@ -130,7 +124,7 @@ int orte_init(char flags) } /* initialize the RTE for this environment */ - if (ORTE_SUCCESS != (ret = orte_ess.init(flags))) { + if (ORTE_SUCCESS != (ret = orte_ess.init())) { ORTE_ERROR_LOG(ret); error = "orte_ess_set_name"; goto error; diff --git a/orte/runtime/orte_mca_params.c b/orte/runtime/orte_mca_params.c index 852fa61c36..688ef2402b 100644 --- a/orte/runtime/orte_mca_params.c +++ b/orte/runtime/orte_mca_params.c @@ -156,7 +156,7 @@ int orte_register_params(void) orte_timing = true; } - if (orte_process_info.hnp) { + if (ORTE_PROC_IS_HNP) { char *tmp; mca_base_param_reg_string_name("orte", "timing_file", "Name of the file where timing data is to be written (relative or absolute path)", diff --git a/orte/runtime/runtime.h b/orte/runtime/runtime.h index de42403afb..c3c02ae115 100644 --- a/orte/runtime/runtime.h +++ b/orte/runtime/runtime.h @@ -31,6 +31,7 @@ #include #endif +#include "orte/util/proc_info.h" BEGIN_C_DECLS @@ -45,11 +46,6 @@ ORTE_DECLSPEC extern bool orte_finalizing; ORTE_DECLSPEC extern int orte_debug_output; ORTE_DECLSPEC extern bool orte_debug_flag; -/* some convenience definitions for code clarity */ -#define ORTE_NON_TOOL 0x00 -#define ORTE_TOOL 0x01 -#define ORTE_TOOL_WITH_NAME 0x02 - /** * Initialize the Open Run Time Environment * @@ -61,7 +57,7 @@ ORTE_DECLSPEC extern bool orte_debug_flag; * * @param tool Whether we are ORTE tool or not */ -ORTE_DECLSPEC int orte_init(char flags); +ORTE_DECLSPEC int orte_init(orte_proc_type_t flags); /** * Initialize parameters for ORTE. diff --git a/orte/test/system/binom.c b/orte/test/system/binom.c index 27fe5d02c6..a8aab771a4 100644 --- a/orte/test/system/binom.c +++ b/orte/test/system/binom.c @@ -16,6 +16,7 @@ #include "opal/class/opal_list.h" #include "opal/class/opal_bitmap.h" +#include "orte/util/proc_info.h" #include "orte/runtime/runtime.h" typedef struct { @@ -111,7 +112,7 @@ int main(int argc, char* argv[]) exit(1); } - orte_init(ORTE_TOOL); + orte_init(ORTE_PROC_TOOL); num_procs = atoi(argv[1]); diff --git a/orte/test/system/orte_loop_child.c b/orte/test/system/orte_loop_child.c index 2961bc8137..57479543de 100644 --- a/orte/test/system/orte_loop_child.c +++ b/orte/test/system/orte_loop_child.c @@ -2,13 +2,14 @@ #include #include +#include "orte/util/proc_info.h" #include "orte/runtime/runtime.h" int main( int argc, char **argv ) { int rc; - if (ORTE_SUCCESS != (rc = orte_init(ORTE_TOOL))) { + if (ORTE_SUCCESS != (rc = orte_init(ORTE_PROC_TOOL))) { fprintf(stderr, "couldn't init orte - error code %d\n", rc); return rc; } diff --git a/orte/test/system/orte_tool.c b/orte/test/system/orte_tool.c index aec4baff8f..76ba5f1896 100644 --- a/orte/test/system/orte_tool.c +++ b/orte/test/system/orte_tool.c @@ -14,6 +14,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/util/comm/comm.h" #include "orte/util/hnp_contact.h" +#include "orte/util/proc_info.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/runtime.h" @@ -27,7 +28,7 @@ int main(int argc, char* argv[]) orte_app_context_t *app; char cwd[OMPI_PATH_MAX]; - if (0 > (rc = orte_init(ORTE_TOOL))) { + if (0 > (rc = orte_init(ORTE_PROC_TOOL))) { fprintf(stderr, "orte_tool: couldn't init orte\n"); return rc; } diff --git a/orte/test/system/radix.c b/orte/test/system/radix.c index e3e509e80e..642fb809c7 100644 --- a/orte/test/system/radix.c +++ b/orte/test/system/radix.c @@ -15,6 +15,7 @@ #include "opal/class/opal_list.h" #include "opal/class/opal_bitmap.h" +#include "orte/util/proc_info.h" #include "orte/mca/routed/base/base.h" #include "orte/runtime/runtime.h" @@ -86,7 +87,7 @@ main(int argc, char **argv) exit(1); } - orte_init(ORTE_TOOL); + orte_init(ORTE_PROC_TOOL); Radix = atoi(argv[1]); NProcs = atoi(argv[2]); diff --git a/orte/test/system/sigusr_trap.c b/orte/test/system/sigusr_trap.c index aec9f0e6f8..2a575ded95 100644 --- a/orte/test/system/sigusr_trap.c +++ b/orte/test/system/sigusr_trap.c @@ -10,6 +10,7 @@ #include #include "orte/util/name_fns.h" +#include "orte/util/proc_info.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/runtime.h" @@ -45,7 +46,7 @@ int main(int argc, char* argv[]) int i; double pi; - orte_init(ORTE_TOOL_WITH_NAME); + orte_init(ORTE_PROC_TOOL_WNAME); if (signal(SIGUSR1, sigusr_handler) == SIG_IGN) { fprintf(stderr, "Could not setup signal trap for SIGUSR1\n"); diff --git a/orte/tools/orte-checkpoint/orte-checkpoint.c b/orte/tools/orte-checkpoint/orte-checkpoint.c index 156e3bb48b..49598363cf 100644 --- a/orte/tools/orte-checkpoint/orte-checkpoint.c +++ b/orte/tools/orte-checkpoint/orte-checkpoint.c @@ -69,6 +69,7 @@ #include "orte/runtime/orte_globals.h" #include "orte/util/name_fns.h" #include "orte/util/show_help.h" +#include "orte/util/proc_info.h" #include "orte/mca/rml/rml.h" #include "orte/mca/rml/rml_types.h" #include "orte/mca/errmgr/errmgr.h" @@ -486,7 +487,7 @@ static int ckpt_init(int argc, char *argv[]) { * We need all of OPAL and the TOOLS portion of ORTE - this * sets us up so we can talk to any HNP over the wire ***************************/ - if (ORTE_SUCCESS != (ret = orte_init(ORTE_TOOL))) { + if (ORTE_SUCCESS != (ret = orte_init(ORTE_PROC_TOOL))) { exit_status = ret; goto cleanup; } diff --git a/orte/tools/orte-clean/orte-clean.c b/orte/tools/orte-clean/orte-clean.c index 19ca944dc7..4ea9c57e97 100644 --- a/orte/tools/orte-clean/orte-clean.c +++ b/orte/tools/orte-clean/orte-clean.c @@ -157,7 +157,7 @@ main(int argc, char *argv[]) #endif tmp_env_var = NULL; /* Silence compiler warning */ - if (ORTE_SUCCESS != (ret = orte_init(ORTE_TOOL_WITH_NAME))) { + if (ORTE_SUCCESS != (ret = orte_init(ORTE_PROC_TOOL_WNAME))) { return ret; } @@ -403,7 +403,7 @@ void kill_procs(void) { } /* if we are a singleton, check the hnp_pid as well */ - if (orte_process_info.singleton) { + if (ORTE_PROC_IS_SINGLETON) { if (procpid != orte_process_info.hnp_pid) { (void)kill(procpid, SIGKILL); } diff --git a/orte/tools/orte-iof/orte-iof.c b/orte/tools/orte-iof/orte-iof.c index bf5add9501..80cf6f4bb8 100644 --- a/orte/tools/orte-iof/orte-iof.c +++ b/orte/tools/orte-iof/orte-iof.c @@ -70,6 +70,7 @@ #include "orte/util/hnp_contact.h" #include "orte/util/show_help.h" #include "orte/util/parse_options.h" +#include "orte/util/proc_info.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/iof/iof.h" #if OPAL_ENABLE_FT == 1 @@ -211,7 +212,7 @@ main(int argc, char *argv[]) /*************************** * We need all of OPAL and the TOOL portion of ORTE ***************************/ - if (ORTE_SUCCESS != orte_init(ORTE_TOOL)) { + if (ORTE_SUCCESS != orte_init(ORTE_PROC_TOOL)) { orte_finalize(); return 1; } diff --git a/orte/tools/orte-ps/orte-ps.c b/orte/tools/orte-ps/orte-ps.c index 3ebb267818..dc59a9f38a 100644 --- a/orte/tools/orte-ps/orte-ps.c +++ b/orte/tools/orte-ps/orte-ps.c @@ -67,6 +67,7 @@ #include "orte/util/hnp_contact.h" #include "orte/util/name_fns.h" #include "orte/util/show_help.h" +#include "orte/util/proc_info.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/util/comm/comm.h" #include "orte/mca/ras/ras_types.h" @@ -375,7 +376,7 @@ static int orte_ps_init(int argc, char *argv[]) { /*************************** * We need all of OPAL and the TOOL portion of ORTE ***************************/ - ret = orte_init(ORTE_TOOL); + ret = orte_init(ORTE_PROC_TOOL); return ret; } diff --git a/orte/tools/orte-restart/orte-restart.c b/orte/tools/orte-restart/orte-restart.c index 1fe118c3d1..b846727503 100644 --- a/orte/tools/orte-restart/orte-restart.c +++ b/orte/tools/orte-restart/orte-restart.c @@ -66,6 +66,7 @@ #include "orte/mca/snapc/base/base.h" #include "orte/mca/filem/base/base.h" #include "orte/util/show_help.h" +#include "orte/util/proc_info.h" /****************** * Local Functions @@ -277,7 +278,7 @@ static int initialize(int argc, char *argv[]) { /* * Setup any ORTE stuff we might need */ - if (OPAL_SUCCESS != (ret = orte_init(ORTE_TOOL))) { + if (OPAL_SUCCESS != (ret = orte_init(ORTE_PROC_TOOL))) { exit_status = ret; goto cleanup; } diff --git a/orte/tools/orte-top/orte-top.c b/orte/tools/orte-top/orte-top.c index c954a453c5..8d7e75f778 100644 --- a/orte/tools/orte-top/orte-top.c +++ b/orte/tools/orte-top/orte-top.c @@ -52,6 +52,7 @@ #include "orte/util/hnp_contact.h" #include "orte/util/name_fns.h" #include "orte/util/show_help.h" +#include "orte/util/proc_info.h" #include "orte/runtime/orte_wait.h" #include "orte/mca/rml/base/rml_contact.h" @@ -256,7 +257,7 @@ main(int argc, char *argv[]) /*************************** * We need all of OPAL and the TOOL portion of ORTE ***************************/ - if (ORTE_SUCCESS != orte_init(ORTE_TOOL)) { + if (ORTE_SUCCESS != orte_init(ORTE_PROC_TOOL)) { orte_finalize(); return 1; } diff --git a/orte/tools/orterun/orterun.c b/orte/tools/orterun/orterun.c index 815ba294e8..efb330bb74 100644 --- a/orte/tools/orterun/orterun.c +++ b/orte/tools/orterun/orterun.c @@ -440,8 +440,10 @@ int orterun(int argc, char *argv[]) OBJ_CONSTRUCT(&orte_exit, orte_trigger_event_t); OBJ_CONSTRUCT(&orteds_exit, orte_trigger_event_t); - /* flag that I am the HNP */ - orte_process_info.hnp = true; + /* flag that I am the HNP - needs to be done prior to + * registering params + */ + orte_process_info.proc_type = ORTE_PROC_HNP; /* Setup MCA params */ orte_register_params(); @@ -504,7 +506,7 @@ int orterun(int argc, char *argv[]) * up incorrect infrastructure that only a singleton would * require */ - if (ORTE_SUCCESS != (rc = orte_init(ORTE_NON_TOOL))) { + if (ORTE_SUCCESS != (rc = orte_init(ORTE_PROC_HNP))) { ORTE_ERROR_LOG(rc); return rc; } diff --git a/orte/util/name_fns.h b/orte/util/name_fns.h index 3db46e3980..9de0075c5d 100644 --- a/orte/util/name_fns.h +++ b/orte/util/name_fns.h @@ -90,7 +90,7 @@ ORTE_DECLSPEC char* orte_util_print_local_jobid(const orte_jobid_t job); ( ((local) & 0xffff0000) | ((job) & 0x0000ffff) ) /* a macro for identifying that a proc is a daemon */ -#define ORTE_PROC_IS_DAEMON(n) \ +#define ORTE_PROC_NAME_IS_DAEMON(n) \ !((n) & 0x0000ffff) /* List of names for general use */ diff --git a/orte/util/nidmap.c b/orte/util/nidmap.c index 35d5f9c76f..d162a48f5f 100644 --- a/orte/util/nidmap.c +++ b/orte/util/nidmap.c @@ -689,7 +689,7 @@ process_daemons: free(vpids); /* if we are a daemon or the HNP, update our num_procs */ - if (orte_process_info.hnp || orte_process_info.daemon) { + if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON) { orte_process_info.num_procs = num_daemons; } @@ -1106,7 +1106,7 @@ orte_nid_t* orte_util_lookup_nid(orte_process_name_t *proc) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(proc))); - if (ORTE_PROC_IS_DAEMON(proc->jobid)) { + if (ORTE_PROC_NAME_IS_DAEMON(proc->jobid)) { /* looking for a daemon */ return find_daemon_node(proc); } diff --git a/orte/util/proc_info.c b/orte/util/proc_info.c index 8c233c4167..54cfda307c 100644 --- a/orte/util/proc_info.c +++ b/orte/util/proc_info.c @@ -50,11 +50,7 @@ ORTE_DECLSPEC orte_proc_info_t orte_process_info = { /* .nodename = */ NULL, /* .arch = */ 0, /* .pid = */ 0, - /* .singleton = */ false, - /* .daemon = */ false, - /* .hnp = */ false, - /* .tool = */ false, - /* .mpi_proc = */ false, + /* .proc_type = */ ORTE_PROC_TYPE_NONE, /* .sync_buf = */ NULL, /* .my_port = */ 0, /* .tmpdir_base = */ NULL, @@ -207,9 +203,7 @@ int orte_proc_info_finalize(void) orte_process_info.my_daemon_uri = NULL; } - orte_process_info.hnp = false; - orte_process_info.singleton = false; - orte_process_info.daemon = false; + orte_process_info.proc_type = ORTE_PROC_TYPE_NONE; OBJ_RELEASE(orte_process_info.sync_buf); orte_process_info.sync_buf = NULL; diff --git a/orte/util/proc_info.h b/orte/util/proc_info.h index 6a996c9efa..bd5622705f 100644 --- a/orte/util/proc_info.h +++ b/orte/util/proc_info.h @@ -43,6 +43,25 @@ BEGIN_C_DECLS #define ORTE_MAX_HOSTNAME_SIZE 512 +typedef uint32_t orte_proc_type_t; +#define ORTE_PROC_TYPE_NONE 0x0000 +#define ORTE_PROC_SINGLETON 0x0001 +#define ORTE_PROC_DAEMON 0x0002 +#define ORTE_PROC_HNP 0x0004 +#define ORTE_PROC_TOOL 0x0008 +#define ORTE_PROC_TOOL_WNAME 0x0010 +#define ORTE_PROC_MPI 0x0020 +#define ORTE_PROC_CM 0x0040 + +#define ORTE_PROC_IS_SINGLETON (ORTE_PROC_SINGLETON & orte_process_info.proc_type) +#define ORTE_PROC_IS_DAEMON (ORTE_PROC_DAEMON & orte_process_info.proc_type) +#define ORTE_PROC_IS_HNP (ORTE_PROC_HNP & orte_process_info.proc_type) +#define ORTE_PROC_IS_TOOL (ORTE_PROC_TOOL & orte_process_info.proc_type) +#define ORTE_PROC_IS_TOOL_WNAME (ORTE_PROC_TOOL_WNAME & orte_process_info.proc_type) +#define ORTE_PROC_IS_MPI (ORTE_PROC_MPI & orte_process_info.proc_type) +#define ORTE_PROC_IS_CM (ORTE_PROC_CM & orte_process_info.proc_type) + + /** * Process information structure * @@ -65,11 +84,7 @@ struct orte_proc_info_t { char *nodename; /**< string name for this node */ uint32_t arch; /**< arch for this node */ pid_t pid; /**< Local process ID for this process */ - bool singleton; /**< I am a singleton */ - bool daemon; /**< Indicate whether or not I am a daemon */ - bool hnp; /**< Indicate whether or not I am the HNP (orterun) */ - bool tool; /**< I am a tool or not */ - bool mpi_proc; /**< I am an MPI process */ + orte_proc_type_t proc_type; /**< Type of process */ opal_buffer_t *sync_buf; /**< buffer to store sync response */ uint16_t my_port; /**< TCP port for out-of-band comm */ /* The session directory has the form diff --git a/orte/util/show_help.c b/orte/util/show_help.c index 0bbad29041..a7ca7e7679 100644 --- a/orte/util/show_help.c +++ b/orte/util/show_help.c @@ -32,6 +32,7 @@ #include "orte/mca/rml/rml.h" #include "orte/mca/rml/rml_types.h" #include "orte/util/name_fns.h" +#include "orte/util/proc_info.h" #include "orte/runtime/orte_globals.h" #include "orte/util/show_help.h" @@ -378,7 +379,7 @@ void orte_show_help_finalize(void) ready = false; /* Shutdown show_help, showing final messages */ - if (orte_process_info.hnp) { + if (ORTE_PROC_IS_HNP) { show_accumulated_duplicates(0, 0, NULL); OBJ_DESTRUCT(&abd_tuples); if (show_help_timer_set) { @@ -427,7 +428,7 @@ int orte_show_help(const char *filename, const char *topic, * or we don't yet know our HNP, then all we can do * is process this locally */ - if (orte_process_info.hnp || + if (ORTE_PROC_IS_HNP || NULL == orte_rml.send_buffer || ORTE_PROC_MY_HNP->vpid == ORTE_VPID_INVALID) { rc = show_help(filename, topic, output, ORTE_PROC_MY_NAME);