From 2121e9c01b57c438ee58a97ef435e1d94afb649e Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 2 Oct 2013 19:03:46 +0000 Subject: [PATCH] Fix an issue regarding use of PMI when running processes and tools that don't need or want to use it. We build PMI support based on configuration settings and library availability. However, tools such as mpirun don't need it, and definitely shouldn't be using it. Ditto for procs launched by mpirun. We used to have a way of dealing with this - we had the PMI component check to see if the process was the HNP or was launched by an HNP. Sadly, moving the OPAL db framework removed that ability as OPAL has no notion of HNPs or proc type. So add a boolean flag to the db_base_select API that allows us to restrict selection to "local" components. This gives the PMI component the ability to reject itself as required. W e then need to pass that param into the ess_base_std_app call so it can pass it all down. This commit was SVN r29341. --- opal/mca/db/base/base.h | 3 +- opal/mca/db/base/db_base_select.c | 5 +- opal/mca/db/db.h | 6 +- opal/mca/db/hash/db_hash_component.c | 7 ++- opal/mca/db/pmi/db_pmi_component.c | 25 ++++---- opal/mca/db/print/db_print_component.c | 7 ++- opal/mca/db/sqlite/db_sqlite_component.c | 7 ++- orte/mca/ess/base/base.h | 3 +- orte/mca/ess/base/ess_base_std_app.c | 4 +- orte/mca/ess/base/ess_base_std_orted.c | 3 +- orte/mca/ess/env/ess_env_module.c | 13 +--- orte/mca/ess/hnp/ess_hnp_module.c | 59 ++++++++++--------- orte/mca/ess/lsf/ess_lsf_module.c | 3 +- orte/mca/ess/pmi/ess_pmi_module.c | 6 +- orte/mca/ess/singleton/ess_singleton_module.c | 2 +- orte/mca/ess/slurm/ess_slurm_module.c | 1 + orte/mca/ess/tm/ess_tm_module.c | 20 +------ 17 files changed, 87 insertions(+), 87 deletions(-) diff --git a/opal/mca/db/base/base.h b/opal/mca/db/base/base.h index 1c1f6f5fea..249dcd5ae2 100644 --- a/opal/mca/db/base/base.h +++ b/opal/mca/db/base/base.h @@ -1,6 +1,7 @@ /* * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2013 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,7 +31,7 @@ OPAL_DECLSPEC extern mca_base_framework_t opal_db_base_framework; /** * Select a db module */ -OPAL_DECLSPEC int opal_db_base_select(void); +OPAL_DECLSPEC int opal_db_base_select(bool restrict_local); typedef struct { opal_list_item_t super; diff --git a/opal/mca/db/base/db_base_select.c b/opal/mca/db/base/db_base_select.c index a973edda62..396fa17ab4 100644 --- a/opal/mca/db/base/db_base_select.c +++ b/opal/mca/db/base/db_base_select.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2013 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -22,7 +23,7 @@ static bool selected = false; int -opal_db_base_select(void) +opal_db_base_select(bool restrict_local) { mca_base_component_list_item_t *cli = NULL; opal_db_base_component_t *component = NULL; @@ -57,7 +58,7 @@ opal_db_base_select(void) opal_output_verbose(5, opal_db_base_framework.framework_output, "mca:db:select: Querying component [%s]", component->base_version.mca_component_name); - rc = component->query(&module, &store, &fetch); + rc = component->query(&module, &store, &fetch, restrict_local); /* If no module was returned, then skip component */ if (OPAL_SUCCESS != rc || NULL == module) { diff --git a/opal/mca/db/db.h b/opal/mca/db/db.h index 70ab34b30f..300a21ffcb 100644 --- a/opal/mca/db/db.h +++ b/opal/mca/db/db.h @@ -1,6 +1,7 @@ /* * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2013 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -176,7 +177,8 @@ typedef struct opal_db_base_module_1_0_0_t opal_db_base_module_t; */ typedef int (*opal_db_component_query_fn_t)(opal_db_base_module_t **module, int *store_priority, - int *fetch_priority); + int *fetch_priority, + bool restrict_local); /* * the standard component data structure */ diff --git a/opal/mca/db/hash/db_hash_component.c b/opal/mca/db/hash/db_hash_component.c index e5e530f6c1..838f34b46b 100644 --- a/opal/mca/db/hash/db_hash_component.c +++ b/opal/mca/db/hash/db_hash_component.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2013 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -26,7 +27,8 @@ static int db_hash_component_open(void); static int db_hash_component_query(opal_db_base_module_t **module, int *store_priority, - int *fetch_priority); + int *fetch_priority, + bool restrict_local); static int db_hash_component_close(void); static int db_hash_component_register(void); @@ -75,7 +77,8 @@ static int db_hash_component_open(void) static int db_hash_component_query(opal_db_base_module_t **module, int *store_priority, - int *fetch_priority) + int *fetch_priority, + bool restrict_local) { /* we are the default - the ESS modules will set the db selection * envar if they need someone else diff --git a/opal/mca/db/pmi/db_pmi_component.c b/opal/mca/db/pmi/db_pmi_component.c index 78999d646b..39503194e3 100644 --- a/opal/mca/db/pmi/db_pmi_component.c +++ b/opal/mca/db/pmi/db_pmi_component.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2013 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -21,7 +22,8 @@ static int db_pmi_component_open(void); static int db_pmi_component_query(opal_db_base_module_t **module, int *store_priority, - int *fetch_priority); + int *fetch_priority, + bool restrict_local); static int db_pmi_component_close(void); static int db_pmi_component_register(void); @@ -67,16 +69,19 @@ static int db_pmi_component_open(void) static int db_pmi_component_query(opal_db_base_module_t **module, int *store_priority, - int *fetch_priority) + int *fetch_priority, + bool restrict_local) { - /* only use PMI if available - the ESS pmi module - * will force our selection if we are direct-launched - */ - if (mca_common_pmi_init()) { - *store_priority = my_store_priority; - *fetch_priority = my_fetch_priority; - *module = &opal_db_pmi_module; - return OPAL_SUCCESS; + if (!restrict_local) { + /* only use PMI if available - the ESS pmi module + * will force our selection if we are direct-launched + */ + if (mca_common_pmi_init()) { + *store_priority = my_store_priority; + *fetch_priority = my_fetch_priority; + *module = &opal_db_pmi_module; + return OPAL_SUCCESS; + } } *store_priority = 0; diff --git a/opal/mca/db/print/db_print_component.c b/opal/mca/db/print/db_print_component.c index 8cdcf5dff9..eade0267e5 100644 --- a/opal/mca/db/print/db_print_component.c +++ b/opal/mca/db/print/db_print_component.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2013 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,7 +30,8 @@ static int print_component_open(void); static int print_component_close(void); static int print_component_query(opal_db_base_module_t **module, int *store_priority, - int *fetch_priority); + int *fetch_priority, + bool restrict_local); static int print_component_register(void); /* @@ -69,7 +71,8 @@ static int print_component_open(void) /* this component is NEVER used for store or fetch */ static int print_component_query(opal_db_base_module_t **module, int *store_priority, - int *fetch_priority) + int *fetch_priority, + bool restrict_local) { if (NULL == mca_db_print_component.filename) { *store_priority = 0; diff --git a/opal/mca/db/sqlite/db_sqlite_component.c b/opal/mca/db/sqlite/db_sqlite_component.c index 45d736283d..10a17d07e0 100644 --- a/opal/mca/db/sqlite/db_sqlite_component.c +++ b/opal/mca/db/sqlite/db_sqlite_component.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2013 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -34,7 +35,8 @@ static int sqlite_component_open(void); static int sqlite_component_close(void); static int sqlite_component_query(opal_db_base_module_t **module, int *store_priority, - int *fetch_priority); + int *fetch_priority, + bool restrict_local); static int sqlite_component_register(void); /* @@ -74,7 +76,8 @@ static int sqlite_component_open(void) /* this component is NEVER used for store or fetch */ static int sqlite_component_query(opal_db_base_module_t **module, int *store_priority, - int *fetch_priority) + int *fetch_priority, + bool restrict_local) { struct stat buf; diff --git a/orte/mca/ess/base/base.h b/orte/mca/ess/base/base.h index eebd101e46..2794180e47 100644 --- a/orte/mca/ess/base/base.h +++ b/orte/mca/ess/base/base.h @@ -12,6 +12,7 @@ * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. + * Copyright (c) 2013 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -59,7 +60,7 @@ ORTE_DECLSPEC int orte_ess_env_get(void); ORTE_DECLSPEC int orte_ess_base_std_prolog(void); -ORTE_DECLSPEC int orte_ess_base_app_setup(void); +ORTE_DECLSPEC int orte_ess_base_app_setup(bool db_restrict_local); ORTE_DECLSPEC int orte_ess_base_app_finalize(void); ORTE_DECLSPEC void orte_ess_base_app_abort(int status, bool report); diff --git a/orte/mca/ess/base/ess_base_std_app.c b/orte/mca/ess/base/ess_base_std_app.c index 83c40ad66c..de712649f6 100644 --- a/orte/mca/ess/base/ess_base_std_app.c +++ b/orte/mca/ess/base/ess_base_std_app.c @@ -66,7 +66,7 @@ #include "orte/mca/ess/base/base.h" -int orte_ess_base_app_setup(void) +int orte_ess_base_app_setup(bool db_restrict_local) { int ret; char *error = NULL; @@ -162,7 +162,7 @@ int orte_ess_base_app_setup(void) error = "opal_db_base_open"; goto error; } - if (ORTE_SUCCESS != (ret = opal_db_base_select())) { + if (ORTE_SUCCESS != (ret = opal_db_base_select(db_restrict_local))) { ORTE_ERROR_LOG(ret); error = "orte_db_base_select"; goto error; diff --git a/orte/mca/ess/base/ess_base_std_orted.c b/orte/mca/ess/base/ess_base_std_orted.c index d98807ff0a..3f228fcc79 100644 --- a/orte/mca/ess/base/ess_base_std_orted.c +++ b/orte/mca/ess/base/ess_base_std_orted.c @@ -281,7 +281,8 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_db_base_open"; goto error; } - if (ORTE_SUCCESS != (ret = opal_db_base_select())) { + /* always restrict daemons to local database components */ + if (ORTE_SUCCESS != (ret = opal_db_base_select(true))) { ORTE_ERROR_LOG(ret); error = "orte_db_base_select"; goto error; diff --git a/orte/mca/ess/env/ess_env_module.c b/orte/mca/ess/env/ess_env_module.c index c6029ceaa9..28f03b4b2d 100644 --- a/orte/mca/ess/env/ess_env_module.c +++ b/orte/mca/ess/env/ess_env_module.c @@ -142,16 +142,8 @@ static int rte_init(void) } - /* otherwise, I must be an application process - ensure - * that we do NOT load the PMI database component or else - * we could wind up wasting a lot of time in startup if - * we are a proc launched by mpirun in an environment that - * has PMI as well - */ - putenv("OMPI_MCA_db=^pmi"); - /* use the default procedure to finish my setup */ - if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup())) { + if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup(true))) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_app_setup"; goto error; @@ -211,9 +203,6 @@ static int rte_finalize(void) /* deconstruct the nidmap and jobmap arrays */ orte_util_nidmap_finalize(); - /* cleanup the env */ - unsetenv("OMPI_MCA_db"); - return ORTE_SUCCESS; } diff --git a/orte/mca/ess/hnp/ess_hnp_module.c b/orte/mca/ess/hnp/ess_hnp_module.c index aba7db4ced..9991814abc 100644 --- a/orte/mca/ess/hnp/ess_hnp_module.c +++ b/orte/mca/ess/hnp/ess_hnp_module.c @@ -329,9 +329,9 @@ static int rte_init(void) /* setup the global job and node arrays */ orte_job_data = OBJ_NEW(opal_pointer_array_t); if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_job_data, - 1, - ORTE_GLOBAL_ARRAY_MAX_SIZE, - 1))) { + 1, + ORTE_GLOBAL_ARRAY_MAX_SIZE, + 1))) { ORTE_ERROR_LOG(ret); error = "setup job array"; goto error; @@ -339,18 +339,18 @@ static int rte_init(void) orte_node_pool = OBJ_NEW(opal_pointer_array_t); if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_node_pool, - ORTE_GLOBAL_ARRAY_BLOCK_SIZE, - ORTE_GLOBAL_ARRAY_MAX_SIZE, - ORTE_GLOBAL_ARRAY_BLOCK_SIZE))) { + ORTE_GLOBAL_ARRAY_BLOCK_SIZE, + ORTE_GLOBAL_ARRAY_MAX_SIZE, + ORTE_GLOBAL_ARRAY_BLOCK_SIZE))) { ORTE_ERROR_LOG(ret); error = "setup node array"; goto error; } orte_node_topologies = OBJ_NEW(opal_pointer_array_t); if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_node_topologies, - ORTE_GLOBAL_ARRAY_BLOCK_SIZE, - ORTE_GLOBAL_ARRAY_MAX_SIZE, - ORTE_GLOBAL_ARRAY_BLOCK_SIZE))) { + ORTE_GLOBAL_ARRAY_BLOCK_SIZE, + ORTE_GLOBAL_ARRAY_MAX_SIZE, + ORTE_GLOBAL_ARRAY_BLOCK_SIZE))) { ORTE_ERROR_LOG(ret); error = "setup node topologies array"; goto error; @@ -441,7 +441,7 @@ static int rte_init(void) error = "orte_db_base_open"; goto error; } - if (ORTE_SUCCESS != (ret = opal_db_base_select())) { + if (ORTE_SUCCESS != (ret = opal_db_base_select(true))) { ORTE_ERROR_LOG(ret); error = "orte_db_base_select"; goto error; @@ -555,8 +555,8 @@ static int rte_init(void) } /* Once the session directory location has been established, set - the opal_output hnp file location to be in the - proc-specific session directory. */ + the opal_output hnp file location to be in the + proc-specific session directory. */ opal_output_set_output_file_info(orte_process_info.proc_session_dir, "output-", NULL, NULL); @@ -695,28 +695,28 @@ static int rte_init(void) } /* We actually do *not* want an HNP to voluntarily yield() the - processor more than necessary. Orterun already blocks when - it is doing nothing, so it doesn't use any more CPU cycles than - it should; but when it *is* doing something, we do not want it - to be unnecessarily delayed because it voluntarily yielded the - processor in the middle of its work. + processor more than necessary. Orterun already blocks when + it is doing nothing, so it doesn't use any more CPU cycles than + it should; but when it *is* doing something, we do not want it + to be unnecessarily delayed because it voluntarily yielded the + processor in the middle of its work. - For example: when a message arrives at orterun, we want the - OS to wake us up in a timely fashion (which most OS's - seem good about doing) and then we want orterun to process - the message as fast as possible. If orterun yields and lets - aggressive MPI applications get the processor back, it may be a - long time before the OS schedules orterun to run again - (particularly if there is no IO event to wake it up). Hence, - routed OOB messages (for example) may be significantly delayed - before being delivered to MPI processes, which can be - problematic in some scenarios (e.g., COMM_SPAWN, BTL's that - require OOB messages for wireup, etc.). */ + For example: when a message arrives at orterun, we want the + OS to wake us up in a timely fashion (which most OS's + seem good about doing) and then we want orterun to process + the message as fast as possible. If orterun yields and lets + aggressive MPI applications get the processor back, it may be a + long time before the OS schedules orterun to run again + (particularly if there is no IO event to wake it up). Hence, + routed OOB messages (for example) may be significantly delayed + before being delivered to MPI processes, which can be + problematic in some scenarios (e.g., COMM_SPAWN, BTL's that + require OOB messages for wireup, etc.). */ opal_progress_set_yield_when_idle(false); return ORTE_SUCCESS; -error: + error: if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) { orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", @@ -794,6 +794,7 @@ static int rte_finalize(void) fclose(orte_xml_fp); } } + return ORTE_SUCCESS; } diff --git a/orte/mca/ess/lsf/ess_lsf_module.c b/orte/mca/ess/lsf/ess_lsf_module.c index f8ba7acb57..3df20efe86 100644 --- a/orte/mca/ess/lsf/ess_lsf_module.c +++ b/orte/mca/ess/lsf/ess_lsf_module.c @@ -10,6 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -113,7 +114,7 @@ static int rte_init(void) /* otherwise, I must be an application process - use * the default procedure to finish my setup */ - if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup())) { + if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup(false))) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_app_setup"; goto error; diff --git a/orte/mca/ess/pmi/ess_pmi_module.c b/orte/mca/ess/pmi/ess_pmi_module.c index 3a5611a322..32a585c19e 100644 --- a/orte/mca/ess/pmi/ess_pmi_module.c +++ b/orte/mca/ess/pmi/ess_pmi_module.c @@ -247,7 +247,7 @@ static int rte_init(void) putenv("OMPI_MCA_routed=direct"); /* now use the default procedure to finish my setup */ - if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup())) { + if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup(false))) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_app_setup"; goto error; @@ -277,12 +277,14 @@ static int rte_init(void) } n += procs; } + } else { + procs = 0; } } } free(pmapping); - if ((procs > 0) && (procs < orte_process_info.num_procs)) { + if (0 < procs) { ranks = (int*)malloc(procs * sizeof(int)); for (i=0; i < procs; i++) { ranks[i] = n + i; diff --git a/orte/mca/ess/singleton/ess_singleton_module.c b/orte/mca/ess/singleton/ess_singleton_module.c index 3b2699816f..5d6989045e 100644 --- a/orte/mca/ess/singleton/ess_singleton_module.c +++ b/orte/mca/ess/singleton/ess_singleton_module.c @@ -214,7 +214,7 @@ static int rte_init(void) */ /* use the std app init to complete the procedure */ - if (ORTE_SUCCESS != (rc = orte_ess_base_app_setup())) { + if (ORTE_SUCCESS != (rc = orte_ess_base_app_setup(true))) { ORTE_ERROR_LOG(rc); return rc; } diff --git a/orte/mca/ess/slurm/ess_slurm_module.c b/orte/mca/ess/slurm/ess_slurm_module.c index 40d351cb97..d280b7ac0c 100644 --- a/orte/mca/ess/slurm/ess_slurm_module.c +++ b/orte/mca/ess/slurm/ess_slurm_module.c @@ -10,6 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/orte/mca/ess/tm/ess_tm_module.c b/orte/mca/ess/tm/ess_tm_module.c index 36bd6c6085..a6fe6f2eb7 100644 --- a/orte/mca/ess/tm/ess_tm_module.c +++ b/orte/mca/ess/tm/ess_tm_module.c @@ -112,23 +112,9 @@ static int rte_init(void) } - /* otherwise, I must be an application process - use - * the default procedure to finish my setup - */ - if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup())) { - ORTE_ERROR_LOG(ret); - error = "orte_ess_base_app_setup"; - goto error; - } - - /* setup the nidmap arrays */ - if (ORTE_SUCCESS != (ret = orte_util_nidmap_init(orte_process_info.sync_buf))) { - ORTE_ERROR_LOG(ret); - error = "orte_util_nidmap_init"; - goto error; - } - - return ORTE_SUCCESS; + /* no other options are supported! */ + error = "ess_error"; + ret = ORTE_ERROR; error: if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) {