From 1107f9099e74697bea9bedebfdac46a6252c9e24 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Sun, 1 Jun 2014 04:28:17 +0000 Subject: [PATCH] Per the RFC issued here: http://www.open-mpi.org/community/lists/devel/2014/05/14827.php Refactor PMI support This commit was SVN r31907. --- ompi/mca/pubsub/pmi/pubsub_pmi.c | 56 +- ompi/mca/pubsub/pmi/pubsub_pmi_component.c | 5 +- ompi/mca/rte/pmi/rte_pmi_coll.c | 30 +- ompi/mca/rte/pmi/rte_pmi_component.c | 64 +- ompi/mca/rte/pmi/rte_pmi_db.c | 75 +-- ompi/mca/rte/pmi/rte_pmi_error.c | 9 +- ompi/mca/rte/pmi/rte_pmi_name.c | 5 +- opal/mca/common/pmi/Makefile.am | 6 + opal/mca/common/pmi/common_pmi.c | 593 ++++++++++++++++-- opal/mca/common/pmi/common_pmi.h | 50 +- opal/mca/common/pmi/help-common-pmi.txt | 24 + .../mca/common}/pmi/pmi2_pmap_parser.c | 10 +- opal/mca/common/pmi/pmi2_pmap_parser.h | 18 + opal/mca/dstore/pmi/dstore_pmi.c | 74 +-- opal/mca/dstore/pmi/dstore_pmi_component.c | 60 +- opal/runtime/opal_params.c | 13 + opal/runtime/opal_params.h | 1 + orte/mca/ess/pmi/ess_pmi_component.c | 3 +- orte/mca/ess/pmi/ess_pmi_module.c | 119 +--- orte/mca/grpcomm/pmi/Makefile.am | 4 - orte/mca/grpcomm/pmi/grpcomm_pmi_component.c | 3 +- orte/mca/grpcomm/pmi/grpcomm_pmi_module.c | 74 +-- orte/runtime/orte_globals.c | 1 + orte/runtime/orte_globals.h | 3 + 24 files changed, 779 insertions(+), 521 deletions(-) create mode 100644 opal/mca/common/pmi/help-common-pmi.txt rename {orte/mca/grpcomm => opal/mca/common}/pmi/pmi2_pmap_parser.c (97%) create mode 100644 opal/mca/common/pmi/pmi2_pmap_parser.h diff --git a/ompi/mca/pubsub/pmi/pubsub_pmi.c b/ompi/mca/pubsub/pmi/pubsub_pmi.c index f0f08fe143..bea2c0a11d 100644 --- a/ompi/mca/pubsub/pmi/pubsub_pmi.c +++ b/ompi/mca/pubsub/pmi/pubsub_pmi.c @@ -12,10 +12,6 @@ #include "ompi_config.h" #include "ompi/constants.h" -#include -#if WANT_PMI2_SUPPORT -#include -#endif #include "opal/mca/common/pmi/common_pmi.h" @@ -37,40 +33,21 @@ static int init(void) */ static int publish ( const char *service_name, ompi_info_t *info, const char *port_name ) { - int rc; - -#if WANT_PMI2_SUPPORT - if (PMI_SUCCESS != (rc = PMI2_Nameserv_publish(service_name, NULL, port_name))) { - OPAL_PMI_ERROR(rc, "PMI2_Nameserv_publish"); - return OMPI_ERROR; - } -#else - if (PMI_SUCCESS != (rc = PMI_Publish_name(service_name, port_name))) { - OPAL_PMI_ERROR(rc, "PMI_Publish_name"); - return OMPI_ERROR; - } -#endif - return OMPI_SUCCESS; + return mca_common_pmi_publish(service_name,port_name); } static char* lookup ( const char *service_name, ompi_info_t *info ) { char *port=NULL; - int rc; - -#if WANT_PMI2_SUPPORT - port = (char*)malloc(1024*sizeof(char)); /* arbitrary size */ - if (PMI_SUCCESS != (rc = PMI2_Nameserv_lookup(service_name, NULL, port, 1024))) { - OPAL_PMI_ERROR(rc, "PMI2_Nameserv_lookup"); - free(port); - return NULL; + int rc = mca_common_pmi_lookup(service_name, &port); + /* in error case port will be set to NULL + * this is what our callers expect to see + * In future maybe som error handling need? + */ + if( rc != OPAL_SUCCESS ){ + // improove error processing + return port; // NULL ? } -#else - if (PMI_SUCCESS != (rc = PMI_Lookup_name(service_name, port))) { - OPAL_PMI_ERROR(rc, "PMI_Lookup_name"); - return NULL; - } -#endif return port; } @@ -78,20 +55,7 @@ static char* lookup ( const char *service_name, ompi_info_t *info ) * delete the entry */ static int unpublish ( const char *service_name, ompi_info_t *info ) { - int rc; - -#if WANT_PMI2_SUPPORT - if (PMI_SUCCESS != (rc = PMI2_Nameserv_unpublish(service_name, NULL))) { - OPAL_PMI_ERROR(rc, "PMI2_Nameserv_unpublish"); - return OMPI_ERROR; - } -#else - if (PMI_SUCCESS != (rc = PMI_Unpublish_name(service_name))) { - OPAL_PMI_ERROR(rc, "PMI2_Nameserv_unpublish"); - return OMPI_ERROR; - } -#endif - return OMPI_SUCCESS;; + return mca_common_pmi_unpublish( service_name ); } diff --git a/ompi/mca/pubsub/pmi/pubsub_pmi_component.c b/ompi/mca/pubsub/pmi/pubsub_pmi_component.c index 698ba1f9e6..1b2a9a08e1 100644 --- a/ompi/mca/pubsub/pmi/pubsub_pmi_component.c +++ b/ompi/mca/pubsub/pmi/pubsub_pmi_component.c @@ -12,6 +12,7 @@ #include "ompi_config.h" +#include "opal/runtime/opal_params.h" #include "opal/mca/common/pmi/common_pmi.h" #include "ompi/constants.h" @@ -73,7 +74,9 @@ static int pubsub_pmi_component_query(mca_base_module_t **module, int *priority) /* if we are indirectly launched via orted, the * selection will have been turned "off" for us */ - if (mca_common_pmi_init ()) { + int rc = mca_common_pmi_init (opal_pmi_version); + + if ( OPAL_SUCCESS == rc ) { *priority = my_priority; *module = (mca_base_module_t *)&ompi_pubsub_pmi_module; return OMPI_SUCCESS; diff --git a/ompi/mca/rte/pmi/rte_pmi_coll.c b/ompi/mca/rte/pmi/rte_pmi_coll.c index 1932da2bf6..48845381ed 100644 --- a/ompi/mca/rte/pmi/rte_pmi_coll.c +++ b/ompi/mca/rte/pmi/rte_pmi_coll.c @@ -12,10 +12,7 @@ #include "ompi_config.h" #include -#include -#if WANT_PMI2_SUPPORT -#include -#endif +#include "opal/mca/common/pmi/common_pmi.h" #include "opal/threads/tsd.h" #include "ompi/constants.h" @@ -35,30 +32,19 @@ coll_construct(ompi_rte_collective_t *coll) OBJ_CLASS_INSTANCE(ompi_rte_collective_t, opal_object_t, coll_construct, NULL); -int -ompi_rte_modex(ompi_rte_collective_t *coll) +int ompi_rte_modex(ompi_rte_collective_t *coll) { int len, ret; char *kvs; - ret = PMI_KVS_Get_name_length_max(&len); - if (PMI_SUCCESS != ret) return OMPI_ERROR; - + len = mca_common_pmi_kvslen(); kvs = malloc(len); if (NULL == kvs) { return OMPI_ERR_OUT_OF_RESOURCE; } - ret = PMI_KVS_Get_my_name(kvs, len); - if (PMI_SUCCESS != ret) return OMPI_ERROR; - - ret = PMI_KVS_Commit(kvs); - if (PMI_SUCCESS != ret) return OMPI_ERROR; - - ret = PMI_Barrier(); - if (PMI_SUCCESS != ret) return OMPI_ERROR; - - return OMPI_SUCCESS; + mca_common_pmi_kvsname(kvs, len); + return mca_common_pmi_commit(kvs); } @@ -67,10 +53,10 @@ ompi_rte_barrier(ompi_rte_collective_t *coll) { int ret; - ret = PMI_Barrier(); - if (PMI_SUCCESS != ret) return OMPI_ERROR; + ret = mca_common_pmi_barrier(); + if (OPAL_SUCCESS != ret) + return OMPI_ERROR; coll->active = false; - return OMPI_SUCCESS; } diff --git a/ompi/mca/rte/pmi/rte_pmi_component.c b/ompi/mca/rte/pmi/rte_pmi_component.c index ddb1d99e0b..5a5c7b7c3c 100644 --- a/ompi/mca/rte/pmi/rte_pmi_component.c +++ b/ompi/mca/rte/pmi/rte_pmi_component.c @@ -14,10 +14,8 @@ #include #include #include -#include -#if WANT_PMI2_SUPPORT -#include -#endif +#include "opal/runtime/opal_params.h" +#include "opal/mca/common/pmi/common_pmi.h" #include "opal/mca/hwloc/base/base.h" #include "opal/runtime/opal.h" @@ -70,30 +68,14 @@ ompi_rte_init(int *argc, char ***argv) char *node_info; hwloc_obj_t root; hwloc_cpuset_t boundset, rootset; - char *tmp_str; + char *tmp_str, *error; -#if WANT_PMI2_SUPPORT - { - int spawned, appnum; - - if (PMI2_Initialized ()) return OMPI_SUCCESS; - if (PMI_SUCCESS != PMI2_Init(&spawned, &size, &rank, &appnum)) { - return OMPI_ERROR; - } + // Initialize PMI + int rc = mca_common_pmi_init (opal_pmi_version); + + if ( OPAL_SUCCESS != rc ) { + return rc; } -#else - { - PMI_BOOL initialized; - - if (PMI_SUCCESS != PMI_Initialized(&initialized)) { - return OMPI_ERROR; - } - - if (PMI_TRUE != initialized && PMI_SUCCESS != PMI_Init(&initialized)) { - return OMPI_ERROR; - } - } -#endif /* be kind, set line buffering */ setvbuf(stdout, NULL, _IONBF, 0); @@ -103,19 +85,27 @@ ompi_rte_init(int *argc, char ***argv) return ret; } - PMI_Get_appnum(&tmp); + // Setup job name + tmp = mca_common_pmi_appnum(); ompi_rte_my_process_name.jobid = tmp; - PMI_Get_rank(&rank); - ompi_rte_my_process_name.vpid = rank; - ompi_process_info.app_num = ompi_rte_my_process_name.jobid; ompi_process_info.pid = getpid(); - PMI_Get_size(&size); + + // Setup rank information + rank = mca_common_pmi_rank(); + ompi_rte_my_process_name.vpid = rank; + + // Setup process groups size + size = mca_common_pmi_size(); ompi_process_info.num_procs = size; - PMI_Get_clique_size(&tmp); - node_ranks = malloc(tmp * sizeof(int)); - if (NULL == node_ranks) return OMPI_ERROR; - PMI_Get_clique_ranks(node_ranks, tmp); + + + rc = mca_common_pmi_local_info(rank, &node_ranks, &tmp, &error); + if( OPAL_SUCCESS != rc ){ + // FIX ME: maybe we somehow should use error message to + // help user understand the reason of failure? + return rc; + } ompi_process_info.num_local_peers = tmp; for (i = 0 ; i < ompi_process_info.num_local_peers ; ++i) { if (rank == node_ranks[i]) { @@ -164,8 +154,7 @@ ompi_rte_init(int *argc, char ***argv) if (OMPI_SUCCESS != ret) return ret; /* Fill in things the attributes want to know... */ - ret = PMI_Get_universe_size(&tmp); - if (OMPI_SUCCESS != ret) return OMPI_ERROR; + tmp = mca_common_pmi_universe(); asprintf(&tmp_str, "%d", tmp); setenv("OMPI_UNIVERSE_SIZE", tmp_str, 1); free(tmp_str); @@ -195,6 +184,7 @@ ompi_rte_finalize(void) { ompi_rte_pmi_db_fini(); ompi_rte_pmi_name_fini(); + mca_common_pmi_finalize(); opal_finalize(); return OMPI_SUCCESS; } diff --git a/ompi/mca/rte/pmi/rte_pmi_db.c b/ompi/mca/rte/pmi/rte_pmi_db.c index f181438345..0eedd8ecb3 100644 --- a/ompi/mca/rte/pmi/rte_pmi_db.c +++ b/ompi/mca/rte/pmi/rte_pmi_db.c @@ -12,10 +12,7 @@ #include "ompi_config.h" #include -#include -#if WANT_PMI2_SUPPORT -#include -#endif +#include "opal/mca/common/pmi/common_pmi.h" #include "opal/util/argv.h" #include "opal/util/output.h" @@ -66,22 +63,12 @@ OBJ_CLASS_INSTANCE(local_data_t, */ static int kvs_put(const char *key, const char *value) { -#if WANT_PMI2_SUPPORT - return PMI2_KVS_Put(key, value); -#else - return PMI_KVS_Put(pmi_kvs_name, key, value); -#endif + return mca_common_pmi_put(pmi_kvs_name, key, value); } static int kvs_get(const char *key, char *value, int valuelen) { -#if WANT_PMI2_SUPPORT - int len; - - return PMI2_KVS_Get(pmi_kvs_name, PMI2_ID_NULL, key, value, valuelen, &len); -#else - return PMI_KVS_Get(pmi_kvs_name, key, value, valuelen); -#endif + return mca_common_pmi_get(pmi_kvs_name, key, value, valuelen); } @@ -89,45 +76,19 @@ static int setup_pmi(void) { int max_length, rc; -#if WANT_PMI2_SUPPORT - pmi_vallen_max = PMI2_MAX_VALLEN; -#else - rc = PMI_KVS_Get_value_length_max(&pmi_vallen_max); - if (PMI_SUCCESS != rc) { - return OMPI_ERROR; - } -#endif + pmi_vallen_max = mca_common_pmi_vallen(); + max_length = mca_common_pmi_kvslen(); + pmi_keylen_max = mca_common_pmi_keylen(); -#if WANT_PMI2_SUPPORT - /* TODO -- is this ok */ - max_length = 1024; -#else - if (PMI_SUCCESS != (rc = PMI_KVS_Get_name_length_max(&max_length))) { - return OMPI_ERROR; - } -#endif pmi_kvs_name = (char*)malloc(max_length); if (NULL == pmi_kvs_name) { return OMPI_ERR_OUT_OF_RESOURCE; } -#if WANT_PMI2_SUPPORT - rc = PMI2_Job_GetId(pmi_kvs_name, max_length); -#else - rc = PMI_KVS_Get_my_name(pmi_kvs_name,max_length); -#endif - if (PMI_SUCCESS != rc) { - return OMPI_ERROR; + rc = mca_common_pmi_kvsname(pmi_kvs_name, max_length); + if( OPAL_SUCCESS != rc ){ + return rc; } - -#if WANT_PMI2_SUPPORT - pmi_keylen_max = PMI2_MAX_KEYLEN; -#else - if (PMI_SUCCESS != (rc = PMI_KVS_Get_key_length_max(&pmi_keylen_max))) { - return OMPI_ERROR; - } -#endif - return OMPI_SUCCESS; } @@ -259,7 +220,7 @@ static char* fetch_string(const char *key) tmp_val = (char*)malloc(pmi_vallen_max * sizeof(char)); /* the first section of the string has the original key, so fetch it */ - if (PMI_SUCCESS != kvs_get(key, tmp_val, pmi_vallen_max)) { + if (OPAL_SUCCESS != kvs_get(key, tmp_val, pmi_vallen_max)) { OMPI_ERROR_LOG(OMPI_ERR_NOT_FOUND); free(tmp_val); return NULL; @@ -285,7 +246,7 @@ static char* fetch_string(const char *key) /* create the key */ asprintf(&tmpkey, "%s:%d", key, i); /* fetch it */ - if (PMI_SUCCESS != kvs_get(tmpkey, tmp_val, pmi_vallen_max)) { + if (OPAL_SUCCESS != kvs_get(tmpkey, tmp_val, pmi_vallen_max)) { OMPI_ERROR_LOG(OMPI_ERR_NOT_FOUND); free(tmp_val); free(tmpkey); @@ -441,11 +402,11 @@ ompi_rte_db_store(const ompi_process_name_t *proc, OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), pmikey, pmidata); - if (PMI_SUCCESS != (rc = kvs_put(pmikey, pmidata))) { + if ( OPAL_SUCCESS != (rc = kvs_put(pmikey, pmidata))) { free(pmidata); free(pmikey); opal_argv_free(strdata); - return OMPI_ERROR; + return rc; } free(pmidata); /* for each remaining segment, augment the key with the index */ @@ -456,10 +417,10 @@ ompi_rte_db_store(const ompi_process_name_t *proc, OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), pmikey, strdata[i]); - if (PMI_SUCCESS != (rc = kvs_put(tmpkey, strdata[i]))) { + if (OPAL_SUCCESS != (rc = kvs_put(tmpkey, strdata[i]))) { free(pmikey); opal_argv_free(strdata); - return OMPI_ERROR; + return rc; } free(tmpkey); } @@ -518,8 +479,8 @@ ompi_rte_db_store(const ompi_process_name_t *proc, pmikey, pmidata); rc = kvs_put(pmikey, pmidata); - if (PMI_SUCCESS != rc) { - return OMPI_ERROR; + if (OPAL_SUCCESS != rc) { + return rc; } free(pmidata); free(pmikey); @@ -629,7 +590,7 @@ ompi_rte_db_fetch(const struct ompi_proc_t *pptr, OMPI_ERROR_LOG(OMPI_ERR_BAD_PARAM); return OMPI_ERR_BAD_PARAM; } - if (PMI_SUCCESS != kvs_get(pmikey, tmp_val, pmi_vallen_max)) { + if (OPAL_SUCCESS != kvs_get(pmikey, tmp_val, pmi_vallen_max)) { OMPI_ERROR_LOG(OMPI_ERR_NOT_FOUND); free(pmikey); return OMPI_ERR_NOT_FOUND; diff --git a/ompi/mca/rte/pmi/rte_pmi_error.c b/ompi/mca/rte/pmi/rte_pmi_error.c index 837729dbb5..5687fb0b05 100644 --- a/ompi/mca/rte/pmi/rte_pmi_error.c +++ b/ompi/mca/rte/pmi/rte_pmi_error.c @@ -13,10 +13,7 @@ #include #include -#include -#if WANT_PMI2_SUPPORT -#include -#endif +#include "opal/mca/common/pmi/common_pmi.h" #include "opal/util/error.h" #include "opal/util/output.h" @@ -40,14 +37,14 @@ ompi_rte_abort(int error_code, char *fmt, ...) va_end(ap); - PMI_Abort(error_code, msg); + mca_common_pmi_abort(error_code, msg); } int ompi_rte_abort_peers(ompi_process_name_t *procs, size_t nprocs, int status) { - PMI_Abort(status, ""); + mca_common_pmi_abort(status, "N/A"); return OMPI_SUCCESS; } diff --git a/ompi/mca/rte/pmi/rte_pmi_name.c b/ompi/mca/rte/pmi/rte_pmi_name.c index 66967002ac..0f0ba73ee7 100644 --- a/ompi/mca/rte/pmi/rte_pmi_name.c +++ b/ompi/mca/rte/pmi/rte_pmi_name.c @@ -12,10 +12,7 @@ #include "ompi_config.h" #include -#include -#if WANT_PMI2_SUPPORT -#include -#endif +#include "opal/mca/common/pmi/common_pmi.h" #include "opal/dss/dss.h" #include "opal/threads/tsd.h" diff --git a/opal/mca/common/pmi/Makefile.am b/opal/mca/common/pmi/Makefile.am index 798f09d2fc..c2b4d650f0 100644 --- a/opal/mca/common/pmi/Makefile.am +++ b/opal/mca/common/pmi/Makefile.am @@ -24,6 +24,8 @@ AM_CPPFLAGS = $(common_pmi_CPPFLAGS) +dist_opaldata_DATA = help-common-pmi.txt + # control whether building an installed library or a convenience # (noinst) library if MCA_BUILD_opal_common_pmi_DSO @@ -37,5 +39,9 @@ endif lib_LTLIBRARIES = $(component_install) noinst_LTLIBRARIES = $(component_noinst) libmca_common_pmi_la_SOURCES = common_pmi.h common_pmi.c +if WANT_PMI2_SUPPORT +libmca_common_pmi_la_SOURCES += pmi2_pmap_parser.c +endif + libmca_common_pmi_la_LDFLAGS = $(common_pmi_LDFLAGS) -version-info $(libmca_opal_common_pmi_so_version) libmca_common_pmi_la_LIBADD = $(common_pmi_LIBS) diff --git a/opal/mca/common/pmi/common_pmi.c b/opal/mca/common/pmi/common_pmi.c index ae2c494861..a75541577f 100644 --- a/opal/mca/common/pmi/common_pmi.c +++ b/opal/mca/common/pmi/common_pmi.c @@ -18,6 +18,9 @@ #include "opal/types.h" #include "opal/util/output.h" +#include "opal/util/show_help.h" + +#include "pmi2_pmap_parser.h" #include #include @@ -27,52 +30,214 @@ #include "common_pmi.h" +// usage accounting static int mca_common_pmi_init_count = 0; -static int mca_common_pmi_init_size = 0; -static int mca_common_pmi_init_rank = 0; -bool mca_common_pmi_init (void) { - if (0 < mca_common_pmi_init_count++) { - return true; - } +// per-launch selection between PMI versions +static int mca_common_pmi_version = 0; + +// PMI constant values: +static int pmi_kvslen_max = 0; +static int pmi_keylen_max = 0; +static int pmi_vallen_max = 0; + +// Job environment description +static int pmi_size = 0; +static int pmi_rank = 0; +static int pmi_appnum = 0; +static int pmi_usize = 0; +static char *pmi_kvs_name = NULL; + #if WANT_PMI2_SUPPORT - { - int spawned, size, rank, appnum; +static int mca_initialize_pmi_v2(void) +{ + int spawned, size, rank, appnum; + int rc, ret = OPAL_ERROR; - opal_output(0, "INIT PMI"); - - /* if we can't startup PMI, we can't be used */ - if (PMI2_Initialized ()) { - return true; - } - - if (PMI_SUCCESS == PMI2_Init(&spawned, &size, &rank, &appnum)) { - mca_common_pmi_init_size = size; - mca_common_pmi_init_rank = rank; - mca_common_pmi_init_count--; - return true; - } else { - return false; - } + /* deal with a Slurm bug by first checking if we were + * even launched by a PMI server before attempting + * to use PMI */ + if (NULL == getenv("PMI_FD")) { + return OPAL_ERROR; } -#else - { - PMI_BOOL initialized; - if (PMI_SUCCESS != PMI_Initialized(&initialized)) { - mca_common_pmi_init_count--; - return false; + /* if we can't startup PMI, we can't be used */ + if ( PMI2_Initialized () ) { + return OPAL_SUCCESS; + } + size = -1; + rank = -1; + appnum = -1; + if (PMI2_SUCCESS != (rc = PMI2_Init(&spawned, &size, &rank, &appnum))) { + opal_show_help("help-common-pmi.txt", "pmi2-init-failed", true, rc); + return OPAL_ERROR; + } + if( size < 0 || rank < 0 ){ + opal_output(0, "SIZE %d RANK %d", size, rank); + opal_show_help("help-common-pmi.txt", "pmi2-init-returned-bad-values", true); + goto err_exit; + } + + + pmi_size = size; + pmi_rank = rank; + pmi_appnum = appnum; + + pmi_vallen_max = PMI2_MAX_VALLEN; + pmi_kvslen_max = PMI2_MAX_VALLEN; // FIX ME: What to put here for versatility? + pmi_keylen_max = PMI2_MAX_KEYLEN; + + + char buf[16]; + int found; + + rc = PMI2_Info_GetJobAttr("universeSize", buf, 16, &found); + if( PMI2_SUCCESS != rc ) { + OPAL_PMI_ERROR(rc, "PMI_Get_universe_size"); + goto err_exit; + } + pmi_usize = atoi(buf); + + pmi_kvs_name = (char*)malloc(pmi_kvslen_max); + if( pmi_kvs_name == NULL ){ + PMI2_Finalize(); + ret = OPAL_ERR_OUT_OF_RESOURCE; + goto err_exit; + } + rc = PMI2_Job_GetId(pmi_kvs_name, pmi_kvslen_max); + if( PMI2_SUCCESS != rc ) { + OPAL_PMI_ERROR(rc, "PMI2_Job_GetId"); + goto err_exit; + } + + return OPAL_SUCCESS; +err_exit: + PMI2_Finalize(); + return ret; +} +#endif + +static int mca_initialize_pmi_v1(void) +{ + PMI_BOOL initialized; + int spawned; + int rc, ret = OPAL_ERROR; + + /* deal with a Slurm bug by first checking if we were + * even launched by a PMI server before attempting + * to use PMI */ + if (NULL == getenv("PMI_FD")) { + return OPAL_ERROR; + } + + if (PMI_SUCCESS != (rc = PMI_Initialized(&initialized))) { + OPAL_PMI_ERROR(rc, "PMI_Initialized"); + return OPAL_ERROR; + } + + if( PMI_TRUE != initialized && PMI_SUCCESS != (rc = PMI_Init(&spawned)) ) { + OPAL_PMI_ERROR(rc, "PMI_Init"); + return OPAL_ERROR; + } + + // Initialize space demands + rc = PMI_KVS_Get_value_length_max(&pmi_vallen_max); + if( PMI_SUCCESS != rc ) { + OPAL_PMI_ERROR(rc, "PMI_KVS_Get_value_length_max"); + goto err_exit; + } + + rc = PMI_KVS_Get_name_length_max(&pmi_kvslen_max); + if (PMI_SUCCESS != rc ) { + OPAL_PMI_ERROR(rc, "PMI_KVS_Get_name_length_max"); + goto err_exit; + } + + rc = PMI_KVS_Get_key_length_max(&pmi_keylen_max); + if( PMI_SUCCESS != rc ) { + OPAL_PMI_ERROR(rc, "PMI_KVS_Get_key_length_max"); + goto err_exit; + } + + // Initialize job environment information + rc = PMI_Get_rank(&pmi_rank); + if( PMI_SUCCESS != rc ) { + OPAL_PMI_ERROR(rc, "PMI_Get_rank"); + return OPAL_ERROR; + } + rc = PMI_Get_universe_size(&pmi_usize); + if( PMI_SUCCESS != rc ) { + OPAL_PMI_ERROR(rc, "PMI_Get_universe_size"); + goto err_exit; + } + + rc = PMI_Get_size(&pmi_size); + if( PMI_SUCCESS != rc ) { + OPAL_PMI_ERROR(rc, "PMI_Get_size"); + goto err_exit; + } + + rc = PMI_Get_appnum(&pmi_appnum); + if( PMI_SUCCESS != rc ) { + OPAL_PMI_ERROR(rc, "PMI_Get_appnum"); + goto err_exit; + } + + pmi_kvs_name = (char*)malloc(pmi_kvslen_max); + if( pmi_kvs_name == NULL ){ + ret = OPAL_ERR_OUT_OF_RESOURCE; + goto err_exit; + } + + rc = PMI_KVS_Get_my_name(pmi_kvs_name,pmi_kvslen_max); + if( PMI_SUCCESS != rc ) { + OPAL_PMI_ERROR(rc, "PMI2_Job_GetId"); + goto err_exit; + } + + return OPAL_SUCCESS; + +err_exit: + PMI_Finalize(); + return ret; +} + + +int mca_common_pmi_init (int preferred_version) { + int rc = OPAL_SUCCESS; + if (0 < mca_common_pmi_init_count++) { + return rc; + } + + // Decide what version of PMI we want +#if WANT_PMI2_SUPPORT + { + bool auto_select = !(preferred_version >= 1 && preferred_version <= 2); + if( auto_select ){ + // choose PMIv2 + mca_common_pmi_version = 2; + }else{ + mca_common_pmi_version = preferred_version; } - if (PMI_TRUE != initialized && PMI_SUCCESS != PMI_Init(&initialized)) { - mca_common_pmi_init_count--; - return false; + if( mca_common_pmi_version == 2 ){ + rc = mca_initialize_pmi_v2(); + if( !auto_select || rc == OPAL_SUCCESS ){ + // If we want exactly PMIv2 or we succeed + if( rc != OPAL_SUCCESS ){ + mca_common_pmi_init_count--; + } + return rc; + } } } #endif - - return true; + mca_common_pmi_version = 1; + if( OPAL_SUCCESS != (rc = mca_initialize_pmi_v1()) ){ + mca_common_pmi_init_count--; + } + return rc; } void mca_common_pmi_finalize (void) { @@ -82,10 +247,14 @@ void mca_common_pmi_finalize (void) { if (0 == --mca_common_pmi_init_count) { #if WANT_PMI2_SUPPORT - PMI2_Finalize (); -#else - PMI_Finalize (); + if( mca_common_pmi_version == 2){ + PMI2_Finalize (); + } + else #endif + { + PMI_Finalize (); + } } } @@ -120,33 +289,339 @@ char* opal_errmgr_base_pmi_error(int pmi_err) } -bool mca_common_pmi_rank(int *rank) { - -#if !WANT_PMI2_SUPPORT - { - int ret; - if (PMI_SUCCESS != (ret = PMI_Get_rank(&mca_common_pmi_init_rank))) { - OPAL_PMI_ERROR(ret, "PMI_Get_rank"); - return false; - } - } -#endif - *rank = mca_common_pmi_init_rank; - return true; +int mca_common_pmi_rank() +{ + return pmi_rank; } -bool mca_common_pmi_size(int *size) { +int mca_common_pmi_size() +{ + return pmi_size; +} -#if !WANT_PMI2_SUPPORT +int mca_common_pmi_appnum() +{ + return pmi_appnum; +} + + +int mca_common_pmi_universe() +{ + return pmi_usize; +} + +int mca_common_pmi_kvslen() { + return pmi_kvslen_max; +} + +int mca_common_pmi_keylen() +{ + return pmi_keylen_max; +} + +int mca_common_pmi_vallen() +{ + return pmi_vallen_max; +} + +int mca_common_pmi_kvsname(char *buf, int len) +{ + int i; + if( (unsigned)len < strnlen(pmi_kvs_name,pmi_kvslen_max) ){ + return OPAL_ERR_BAD_PARAM; + } + for(i = 0; pmi_kvs_name[i]; i++){ + buf[i] = pmi_kvs_name[i]; + } + buf[i] = '\0'; + return OPAL_SUCCESS; +} + +int mca_common_pmi_id(char **pmi_id_ret, char **error){ + char *pmi_id = NULL; + int rc; + + // Default values + *pmi_id_ret = pmi_id; + *error = NULL; + +#if WANT_PMI2_SUPPORT + if( mca_common_pmi_version == 2 ){ + // TODO: add proper error handling + pmi_id = (char*)malloc(PMI2_MAX_VALLEN); + if( pmi_id == NULL ){ + *error = "mca_common_pmi_id: could not get memory for PMIv2 ID"; + return OPAL_ERR_OUT_OF_RESOURCE; + } + strncpy(pmi_id, pmi_kvs_name, pmi_kvslen_max); + } + else +#endif { - int ret; - if (PMI_SUCCESS != (ret = PMI_Get_universe_size(&mca_common_pmi_init_size))) { - OPAL_PMI_ERROR(ret, "PMI_Get_universe_size"); - return false; + int pmi_maxlen; + /* get our PMI id length */ + if (PMI_SUCCESS != (rc = PMI_Get_id_length_max(&pmi_maxlen))) { + *error = "PMI_Get_id_length_max"; + return OPAL_ERROR; + } + // TODO: add proper error handling + pmi_id = (char*)malloc(pmi_maxlen); + if( pmi_id == NULL ){ + *error = "mca_common_pmi_id: could not get memory for PMIv1 ID"; + return OPAL_ERR_OUT_OF_RESOURCE; + } + /* Get domain id */ + if (PMI_SUCCESS != (rc = PMI_Get_kvs_domain_id(pmi_id, pmi_maxlen))) { + free(pmi_id); + *error = "PMI_Get_kvs_domain_id"; + return OPAL_ERROR; } } -#endif - *size = mca_common_pmi_init_size; - return true; + + *pmi_id_ret = pmi_id; + return OPAL_SUCCESS; } + +int mca_common_pmi_local_info(int vpid, int **ranks_ret, + int *procs_ret, char **error) +{ + int *ranks; + int procs = -1; + int rc; + +#if WANT_PMI2_SUPPORT + if(mca_common_pmi_version == 2){ + + { + char *pmapping = (char*)malloc(PMI2_MAX_VALLEN); + if( pmapping == NULL ){ + *error = "mca_common_pmi_local_info: could not get memory for PMIv2 process mapping"; + return OPAL_ERR_OUT_OF_RESOURCE; + } + int found; + int my_node; + + rc = PMI2_Info_GetJobAttr("PMI_process_mapping", pmapping, PMI2_MAX_VALLEN, &found); + if( !found || PMI2_SUCCESS != rc ) { + /* can't check PMI2_SUCCESS as some folks (i.e., Cray) don't define it */ + OPAL_PMI_ERROR(rc,"PMI2_Info_GetJobAttr"); + *error = "mca_common_pmi_local_info: could not get PMI_process_mapping"; + return OPAL_ERROR; + } + + ranks = mca_common_pmi2_parse_pmap(pmapping, vpid, &my_node, &procs); + if (NULL == ranks) { + *error = "mca_common_pmi_local_info: could not get memory for PMIv2 local ranks"; + return OPAL_ERR_OUT_OF_RESOURCE; + } + + free(pmapping); + } + + } + else +#endif + { + /* get our local proc info to find our local rank */ + if (PMI_SUCCESS != (rc = PMI_Get_clique_size(&procs))) { + OPAL_PMI_ERROR(rc, "PMI_Get_clique_size"); + *error = "mca_common_pmi_local_info: could not get PMI clique size"; + return OPAL_ERROR; + } + /* now get the specific ranks */ + ranks = (int*)calloc(procs, sizeof(int)); + if (NULL == ranks) { + *error = "mca_common_pmi_local_info: could not get memory for local ranks"; + return OPAL_ERR_OUT_OF_RESOURCE; + } + if (PMI_SUCCESS != (rc = PMI_Get_clique_ranks(ranks, procs))) { + OPAL_PMI_ERROR(rc, "PMI_Get_clique_ranks"); + *error = "mca_common_pmi_local_info: could not get clique ranks"; + return OPAL_ERROR; + } + } + + *ranks_ret = ranks; + *procs_ret = procs; + return OPAL_SUCCESS; +} + +void mca_common_pmi_abort(int status, char *msg) +{ +#if WANT_PMI2_SUPPORT + if( mca_common_pmi_version == 2){ + PMI2_Abort(status, msg); + } + else +#endif + { + PMI_Abort(status, msg); + } +} + +int rc; + +int mca_common_pmi_publish(const char *service_name, const char *port_name) +{ +#if WANT_PMI2_SUPPORT + if( mca_common_pmi_version == 2){ + if (PMI2_SUCCESS != (rc = PMI2_Nameserv_publish(service_name, NULL, port_name))) { + OPAL_PMI_ERROR(rc, "PMI2_Nameserv_publish"); + return OPAL_ERROR; + } + } + else +#endif + { + if (PMI_SUCCESS != (rc = PMI_Publish_name(service_name, port_name))) { + OPAL_PMI_ERROR(rc, "PMI_Publish_name"); + return OPAL_ERROR; + } + } + return OPAL_SUCCESS; +} + +int mca_common_pmi_lookup(const char *service_name, char **port_ret) +{ + // FIXME: + // 1. Why don't we malloc memory for the port for PMI v1? + // 2. Maybe error handling is needed in pbusub? + // 3. Is it legal to call OPAL_PMI_ERROR for PMIv2 rc? + + char *port = NULL; + *port_ret = port; + int rc; + +#if WANT_PMI2_SUPPORT + if( mca_common_pmi_version == 2 ){ + port = (char*)malloc(1024*sizeof(char)); /* arbitrary size */ + if( port == NULL ){ + return OPAL_ERR_OUT_OF_RESOURCE; + } + if (PMI_SUCCESS != (rc = PMI2_Nameserv_lookup(service_name, NULL, port, 1024))) { + OPAL_PMI_ERROR(rc, "PMI2_Nameserv_lookup"); + free(port); + return OPAL_ERROR; + } + } + else +#endif + { + // Allocate mem for port here? Otherwise we won't get success! + // SLURM PMIv1 doesn't implement this function + + if (PMI_SUCCESS != (rc = PMI_Lookup_name(service_name, port))) { + OPAL_PMI_ERROR(rc, "PMI_Lookup_name"); + return OPAL_ERROR; + } + } + + *port_ret = port; + return OPAL_SUCCESS; +} + +int mca_common_pmi_unpublish ( const char *service_name ) +{ + int rc; + +#if WANT_PMI2_SUPPORT + if( mca_common_pmi_version == 2 ){ + if (PMI2_SUCCESS != (rc = PMI2_Nameserv_unpublish(service_name, NULL))) { + OPAL_PMI_ERROR(rc, "PMI2_Nameserv_unpublish"); + return OPAL_ERROR; + } + } + else +#endif + { + if (PMI_SUCCESS != (rc = PMI_Unpublish_name(service_name))) { + OPAL_PMI_ERROR(rc, "PMI2_Nameserv_unpublish"); + return OPAL_ERROR; + } + } + return OPAL_SUCCESS;; +} + +int mca_common_pmi_barrier() +{ +#if WANT_PMI2_SUPPORT + if( mca_common_pmi_version == 2 ){ + /* PMI2 doesn't provide a barrier, so use the Fence function here */ + if (PMI2_SUCCESS != (rc = PMI2_KVS_Fence())) { + // FIX ME: OPAL_PMI2_ERROR(rc, "PMI2_KVS_Fence"); + return OPAL_ERROR; + } + } + else +#endif + { + /* use the PMI barrier function */ + if (PMI_SUCCESS != (rc = PMI_Barrier())) { + OPAL_PMI_ERROR(rc, "PMI_Barrier"); + return OPAL_ERROR; + } + } + return OPAL_SUCCESS; +} + +int mca_common_pmi_put(const char *kvs_name, + const char *key, const char *value) +{ + int rc; +#if WANT_PMI2_SUPPORT + if( mca_common_pmi_version == 2 ){ + if( PMI2_SUCCESS != PMI2_KVS_Put(key, value) ){ + // FIXME: OPAL_PMI2_ERROR(rc, "PMI2_KVS_Put"); + return OPAL_ERROR; + } + } + else +#endif + { + rc = PMI_KVS_Put(kvs_name, key, value); + if( PMI_SUCCESS != rc ){ + OPAL_PMI_ERROR(rc, "PMI_KVS_Put"); + return OPAL_ERROR; + } + } + return OPAL_SUCCESS; +} + +int mca_common_pmi_get(const char *kvs_name, const char *key, + char *value, int valuelen) +{ + int rc; +#if WANT_PMI2_SUPPORT + if( mca_common_pmi_version == 2 ){ + int len; + rc = PMI2_KVS_Get(kvs_name, PMI2_ID_NULL, key, value, valuelen, &len); + if( PMI2_SUCCESS != rc ){ + // OPAL_PMI2_ERROR(rc, "PMI_KVS_Put"); + return OPAL_ERROR; + } + } + else +#endif + { + rc = PMI_KVS_Get(kvs_name, key, value, valuelen); + if( PMI_SUCCESS != rc ){ + OPAL_PMI_ERROR(rc, "PMI_KVS_Put"); + return OPAL_ERROR; + } + } + return OPAL_SUCCESS; +} + +int mca_common_pmi_commit(char *kvs_name) +{ + if( mca_common_pmi_version == 1 ){ + + if (PMI_SUCCESS != (rc = PMI_KVS_Commit(kvs_name))) { + OPAL_PMI_ERROR(rc, "PMI_KVS_Commit"); + return OPAL_ERROR; + } + } + return mca_common_pmi_barrier(); +} + diff --git a/opal/mca/common/pmi/common_pmi.h b/opal/mca/common/pmi/common_pmi.h index 09b338d0f1..175388c37e 100644 --- a/opal/mca/common/pmi/common_pmi.h +++ b/opal/mca/common/pmi/common_pmi.h @@ -13,6 +13,14 @@ * $HEADER$ */ +#ifndef COMMON_PMI_H +#define COMMON_PMI_H + +#include +#if WANT_PMI2_SUPPORT +#include +#endif + #if !defined(OPAL_MCA_COMMON_PMI) #define OPAL_MCA_COMMON_PMI @@ -26,7 +34,7 @@ * @retval true PMI successfully initialized * @retval false PMI could not be initialized */ -bool mca_common_pmi_init (void); +int mca_common_pmi_init (int preferred_version); /** * mca_common_pmi_finalize: @@ -38,13 +46,41 @@ void mca_common_pmi_finalize (void); #define OPAL_PMI_ERROR(pmi_err, pmi_func) \ do { \ - opal_output(0, "[%s:%d:%s] %s: %s\n", \ - __FILE__, __LINE__, __func__, \ - pmi_func, opal_errmgr_base_pmi_error(pmi_err)); \ + opal_output(0, "%s [%s:%d:%s]: %s\n", \ + pmi_func, __FILE__, __LINE__, __func__, \ + opal_errmgr_base_pmi_error(pmi_err)); \ } while(0); + OPAL_DECLSPEC char* opal_errmgr_base_pmi_error(int pmi_err); -#endif +int mca_common_pmi_rank(void); +int mca_common_pmi_size(void); +int mca_common_pmi_appnum(void); +int mca_common_pmi_universe(void); +int mca_common_pmi_kvsname(char *buf, int len); -bool mca_common_pmi_rank(int *rank); -bool mca_common_pmi_size(int *size); +int mca_common_pmi_kvslen(void); +int mca_common_pmi_keylen(void); +int mca_common_pmi_vallen(void); + +int mca_common_pmi_id(char **pmi_id_ret, char **error); +int mca_common_pmi_local_info(int vpid, int **ranks_ret, + int *procs_ret, char **error); +void mca_common_pmi_abort(int status, char *msg); + +// Publish-subscribe operations +int mca_common_pmi_publish(const char *service_name, const char *port_name); +int mca_common_pmi_lookup(const char *service_name, char **port_ret); +int mca_common_pmi_unpublish ( const char *service_name ); + +// KVS put/get +int mca_common_pmi_put(const char *kvs_name, + const char *key, const char *value); + +int mca_common_pmi_get(const char *kvs_name, const char *key, + char *value, int valuelen); +int mca_common_pmi_commit(char *kvs_name); +int mca_common_pmi_barrier(void); + +#endif +#endif diff --git a/opal/mca/common/pmi/help-common-pmi.txt b/opal/mca/common/pmi/help-common-pmi.txt new file mode 100644 index 0000000000..acd7efde19 --- /dev/null +++ b/opal/mca/common/pmi/help-common-pmi.txt @@ -0,0 +1,24 @@ +# -*- text -*- +# +# Copyright (c) 2014 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +[pmi2-init-failed] +PMI2 failed to initialize, returning an error code of %d. +We cannot use PMI2 at this time, and your job will +likely abort. +# +[pmi2-init-returned-bad-values] +PMI2 initialized but returned bad values for size and rank. +This is symptomatic of either a failure to use the +"--mpi=pmi2" flag in SLURM, or a borked PMI2 installation. +If running under SLURM, try adding "-mpi=pmi2" to your +srun command line. If that doesn't work, or if you are +not running under SLURM, try removing or renaming the +pmi2.h header file so PMI2 support will not automatically +be built, reconfigure and build OMPI, and then try again +with only PMI1 support enabled. diff --git a/orte/mca/grpcomm/pmi/pmi2_pmap_parser.c b/opal/mca/common/pmi/pmi2_pmap_parser.c similarity index 97% rename from orte/mca/grpcomm/pmi/pmi2_pmap_parser.c rename to opal/mca/common/pmi/pmi2_pmap_parser.c index 8c307a6a73..4f0d43f045 100644 --- a/orte/mca/grpcomm/pmi/pmi2_pmap_parser.c +++ b/opal/mca/common/pmi/pmi2_pmap_parser.c @@ -10,14 +10,10 @@ * $HEADER$ * */ + +#include "pmi2_pmap_parser.h" #ifdef STANDALONE_TEST #define WANT_PMI2_SUPPORT 1 -#else -#include "orte_config.h" -#include "orte/constants.h" -#include "orte/types.h" - -#include "grpcomm_pmi.h" #endif /** @@ -134,7 +130,7 @@ static int *find_lrs(char *map, int my_node, int *nlrs) * @return array that contains ranks local to my_rank or NULL * on failure. Array must be freed by the caller. */ -int *orte_grpcomm_pmi2_parse_pmap(char *pmap, int my_rank, +int *mca_common_pmi2_parse_pmap(char *pmap, int my_rank, int *node, int *nlrs) { char *p; diff --git a/opal/mca/common/pmi/pmi2_pmap_parser.h b/opal/mca/common/pmi/pmi2_pmap_parser.h new file mode 100644 index 0000000000..ad8f012213 --- /dev/null +++ b/opal/mca/common/pmi/pmi2_pmap_parser.h @@ -0,0 +1,18 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * + * Copyright (c) 2013 Mellanox Technologies, Inc. + * All rights reserved. + * Copyright (c) 2014 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * Additional copyrights may follow + * + * $HEADER$ + * + */ +#ifndef PMI2_PMAP_PARSER_H +#define PMI2_PMAP_PARSER_H + +int *mca_common_pmi2_parse_pmap(char *pmap, int my_rank, + int *node, int *nlrs); +#endif diff --git a/opal/mca/dstore/pmi/dstore_pmi.c b/opal/mca/dstore/pmi/dstore_pmi.c index a5fe909ae1..11fb609560 100644 --- a/opal/mca/dstore/pmi/dstore_pmi.c +++ b/opal/mca/dstore/pmi/dstore_pmi.c @@ -15,10 +15,8 @@ #include #include -#include -#if WANT_PMI2_SUPPORT -#include -#endif + +#include "opal/mca/common/pmi/common_pmi.h" #include @@ -30,7 +28,6 @@ #include "opal/util/output.h" #include "opal/util/show_help.h" -#include "opal/mca/common/pmi/common_pmi.h" #include "opal/mca/dstore/base/base.h" #include "dstore_pmi.h" @@ -73,26 +70,16 @@ static char* setup_key(mca_dstore_pmi_module_t *mod, * PMI functions, we define a set of wrappers for those * common functions we will use */ -static int kvs_put(mca_dstore_pmi_module_t *mod, +static inline int kvs_put(mca_dstore_pmi_module_t *mod, const char *key, const char *value) { -#if WANT_PMI2_SUPPORT - return PMI2_KVS_Put(key, value); -#else - return PMI_KVS_Put(mod->pmi_kvs_name, key, value); -#endif + return mca_common_pmi_put(mod->pmi_kvs_name, key, value); } -static int kvs_get(mca_dstore_pmi_module_t *mod, +static inline int kvs_get(mca_dstore_pmi_module_t *mod, const char *key, char *value, int valuelen) { -#if WANT_PMI2_SUPPORT - int len; - - return PMI2_KVS_Get(mod->pmi_kvs_name, PMI2_ID_NULL, key, value, valuelen, &len); -#else - return PMI_KVS_Get(mod->pmi_kvs_name, key, value, valuelen); -#endif + return mca_common_pmi_get(mod->pmi_kvs_name, key, value, valuelen); } static void finalize(struct opal_dstore_base_module_t *imod) @@ -164,9 +151,7 @@ static int pmi_commit_packed(mca_dstore_pmi_module_t *mod, rc = kvs_put(mod, pmikey, tmp); free(pmikey); - if (PMI_SUCCESS != rc) { - OPAL_PMI_ERROR(rc, "PMI_KVS_Put"); - rc = OPAL_ERROR; + if (OPAL_SUCCESS != rc) { break; } @@ -265,7 +250,7 @@ static int pmi_get_packed(mca_dstore_pmi_module_t *mod, pmi_tmp = calloc (mod->pmi_vallen_max, 1); if (NULL == pmi_tmp) { - return OPAL_ERR_OUT_OF_RESOURCE; + return OPAL_ERR_OUT_OF_RESOURCE; } /* read all of the packed data from this proc */ @@ -275,7 +260,7 @@ static int pmi_get_packed(mca_dstore_pmi_module_t *mod, sprintf (tmp_key, "key%d", remote_key); if (NULL == (pmikey = setup_key(mod, proc, tmp_key))) { - rc = OPAL_ERR_OUT_OF_RESOURCE; + rc = OPAL_ERR_OUT_OF_RESOURCE; OPAL_ERROR_LOG(rc); return rc; } @@ -284,21 +269,21 @@ static int pmi_get_packed(mca_dstore_pmi_module_t *mod, "GETTING KEY %s", pmikey)); rc = kvs_get(mod, pmikey, pmi_tmp, mod->pmi_vallen_max); - free (pmikey); - if (PMI_SUCCESS != rc) { - break; + free (pmikey); + if (OPAL_SUCCESS != rc) { + break; } - size = strlen (pmi_tmp); + size = strlen (pmi_tmp); - if (NULL == tmp_encoded) { - tmp_encoded = malloc (size + 1); - } else { - tmp_encoded = realloc (tmp_encoded, bytes_read + size + 1); - } + if (NULL == tmp_encoded) { + tmp_encoded = malloc (size + 1); + } else { + tmp_encoded = realloc (tmp_encoded, bytes_read + size + 1); + } - strcpy (tmp_encoded + bytes_read, pmi_tmp); - bytes_read += size; + strcpy (tmp_encoded + bytes_read, pmi_tmp); + bytes_read += size; /* is the string terminator present? */ if ('-' == tmp_encoded[bytes_read-1]) { @@ -316,7 +301,7 @@ static int pmi_get_packed(mca_dstore_pmi_module_t *mod, *packed_data = (char *) pmi_decode (tmp_encoded, len); free (tmp_encoded); if (NULL == *packed_data) { - return OPAL_ERR_OUT_OF_RESOURCE; + return OPAL_ERR_OUT_OF_RESOURCE; } } @@ -497,20 +482,11 @@ static void commit(struct opal_dstore_base_module_t *imod, /* commit the packed data to PMI */ pmi_commit_packed(mod, id); -#if WANT_PMI2_SUPPORT - PMI2_KVS_Fence(); -#else - { - int rc; - - if (PMI_SUCCESS != (rc = PMI_KVS_Commit(mod->pmi_kvs_name))) { - OPAL_PMI_ERROR(rc, "PMI_KVS_Commit"); - return; - } - /* Barrier here to ensure all other procs have committed */ - PMI_Barrier(); + int rc = mca_common_pmi_commit(mod->pmi_kvs_name); + if( OPAL_SUCCESS != rc ){ + // TODO: What we do here? failure exit? + } -#endif } static int fetch(struct opal_dstore_base_module_t *imod, diff --git a/opal/mca/dstore/pmi/dstore_pmi_component.c b/opal/mca/dstore/pmi/dstore_pmi_component.c index 665f09926e..c908cdca2f 100644 --- a/opal/mca/dstore/pmi/dstore_pmi_component.c +++ b/opal/mca/dstore/pmi/dstore_pmi_component.c @@ -11,17 +11,13 @@ #include "opal_config.h" #include "opal/constants.h" -#include -#if WANT_PMI2_SUPPORT -#include -#endif +#include "opal/mca/common/pmi/common_pmi.h" #include "opal/mca/base/base.h" -#include "opal/mca/common/pmi/common_pmi.h" - #include "opal/mca/dstore/dstore.h" #include "opal/mca/dstore/base/base.h" +#include "opal/runtime/opal_params.h" #include "dstore_pmi.h" static int dstore_pmi_component_register(void); @@ -85,7 +81,8 @@ static bool component_avail(void) * will force our selection if we are direct-launched, * and the orted will turn us "off" if indirectly launched */ - if (mca_common_pmi_init() && OPAL_SUCCESS == setup_pmi()) { + int rc = mca_common_pmi_init(opal_pmi_version); + if ( OPAL_SUCCESS == rc && OPAL_SUCCESS == setup_pmi()) { return true; } /* if not, then we are not available */ @@ -130,56 +127,23 @@ static int setup_pmi(void) { int max_length, rc; -#if WANT_PMI2_SUPPORT - pmi_vallen_max = PMI2_MAX_VALLEN; - max_length = PMI2_MAX_VALLEN; -#else - rc = PMI_KVS_Get_value_length_max(&pmi_vallen_max); - if (PMI_SUCCESS != rc) { - OPAL_OUTPUT_VERBOSE((1, opal_dstore_base_framework.framework_output, - "dstore:pmi:pmi_setup failed %s with error %s", - "PMI_Get_value_length_max", - opal_errmgr_base_pmi_error(rc))); - return OPAL_ERROR; - } - - if (PMI_SUCCESS != (rc = PMI_KVS_Get_name_length_max(&max_length))) { - OPAL_OUTPUT_VERBOSE((1, opal_dstore_base_framework.framework_output, - "dstore:pmi:pmi_setup failed %s with error %s", - "PMI_KVS_Get_name_length_max", - opal_errmgr_base_pmi_error(rc))); - return OPAL_ERROR; - } -#endif + pmi_vallen_max = mca_common_pmi_vallen(); + max_length = mca_common_pmi_kvslen(); pmi_kvs_name = (char*)malloc(max_length); if (NULL == pmi_kvs_name) { return OPAL_ERR_OUT_OF_RESOURCE; } -#if WANT_PMI2_SUPPORT - rc = PMI2_Job_GetId(pmi_kvs_name, max_length); -#else - rc = PMI_KVS_Get_my_name(pmi_kvs_name,max_length); -#endif - if (PMI_SUCCESS != rc) { - OPAL_OUTPUT_VERBOSE((1, opal_dstore_base_framework.framework_output, - "dstore:pmi:pmi_setup failed %s with error %s on maxlength %d", - "PMI_KVS_Get_my_name", - opal_errmgr_base_pmi_error(rc), max_length)); - return OPAL_ERROR; - } - -#if WANT_PMI2_SUPPORT - pmi_keylen_max = PMI2_MAX_KEYLEN; -#else - if (PMI_SUCCESS != (rc = PMI_KVS_Get_key_length_max(&pmi_keylen_max))) { + rc = mca_common_pmi_kvsname(pmi_kvs_name, max_length); + if( OPAL_SUCCESS != rc ){ OPAL_OUTPUT_VERBOSE((1, opal_dstore_base_framework.framework_output, "dstore:pmi:pmi_setup failed %s with error %s", - "PMI_KVS_Get_key_length_max", + "mca_common_pmi_jobname", opal_errmgr_base_pmi_error(rc))); - return OPAL_ERROR; + return rc; } -#endif + + pmi_keylen_max = mca_common_pmi_keylen(); return OPAL_SUCCESS; } diff --git a/opal/runtime/opal_params.c b/opal/runtime/opal_params.c index bddd6ce416..916b5efb26 100644 --- a/opal/runtime/opal_params.c +++ b/opal/runtime/opal_params.c @@ -45,6 +45,7 @@ char *opal_signal_string = NULL; char *opal_net_private_ipv4 = NULL; char *opal_set_max_sys_limits = NULL; +int opal_pmi_version = 0; #if OPAL_ENABLE_FT_CR == 1 bool opal_base_distill_checkpoint_ready = false; @@ -179,6 +180,18 @@ int opal_register_params(void) return ret; } + opal_pmi_version = 0; +#ifdef WANT_PMI2_SUPPORT + (void) mca_base_var_register ("opal", "opal", NULL, "pmi_version", + "Set preferred PMI version: 0 => auto detect, 1 = PMIv1, 2 = PMIv2", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL, + &opal_pmi_version); +#else + opal_pmi_version = 1; +#endif + + /* The ddt engine has a few parameters */ ret = opal_datatype_register_params(); if (OPAL_SUCCESS != ret) { diff --git a/opal/runtime/opal_params.h b/opal/runtime/opal_params.h index 77c992b6a7..f06e173934 100644 --- a/opal/runtime/opal_params.h +++ b/opal/runtime/opal_params.h @@ -29,6 +29,7 @@ extern char *opal_signal_string; extern char *opal_net_private_ipv4; extern char *opal_set_max_sys_limits; +extern int opal_pmi_version; #if OPAL_ENABLE_DEBUG extern bool opal_progress_debug; diff --git a/orte/mca/ess/pmi/ess_pmi_component.c b/orte/mca/ess/pmi/ess_pmi_component.c index 5f8036ee1b..f165c31ba4 100644 --- a/orte/mca/ess/pmi/ess_pmi_component.c +++ b/orte/mca/ess/pmi/ess_pmi_component.c @@ -18,6 +18,7 @@ #include "orte_config.h" #include "orte/constants.h" +#include "opal/runtime/opal_params.h" #include "opal/mca/common/pmi/common_pmi.h" #include "orte/util/proc_info.h" @@ -65,7 +66,7 @@ static int pmi_component_open(void) static int pmi_component_query(mca_base_module_t **module, int *priority) { /* we are available anywhere PMI is available, but not for HNP itself */ - if (!ORTE_PROC_IS_HNP && mca_common_pmi_init()) { + if (!ORTE_PROC_IS_HNP && OPAL_SUCCESS == mca_common_pmi_init(opal_pmi_version)) { /* if PMI is available, use it */ *priority = 35; *module = (mca_base_module_t *)&orte_ess_pmi_module; diff --git a/orte/mca/ess/pmi/ess_pmi_module.c b/orte/mca/ess/pmi/ess_pmi_module.c index 38d5983767..c42e459e92 100644 --- a/orte/mca/ess/pmi/ess_pmi_module.c +++ b/orte/mca/ess/pmi/ess_pmi_module.c @@ -38,11 +38,6 @@ #include #endif -#include -#if WANT_PMI2_SUPPORT -#include -#endif - #include "opal/util/opal_environ.h" #include "opal/util/output.h" #include "opal/util/argv.h" @@ -134,17 +129,11 @@ static int rte_init(void) } ORTE_PROC_MY_NAME->jobid = jobid; /* get our rank from PMI */ - if (!mca_common_pmi_rank(&i)) { - error = "could not get PMI rank"; - goto error; - } + i = mca_common_pmi_rank(); ORTE_PROC_MY_NAME->vpid = i + 1; /* compensate for orterun */ /* get the number of procs from PMI */ - if (!mca_common_pmi_size(&i)) { - error = "could not get PMI universe size"; - goto error; - } + i = mca_common_pmi_universe(); orte_process_info.num_procs = i + 1; /* compensate for orterun */ /* complete setup */ @@ -158,31 +147,10 @@ static int rte_init(void) } /* we are a direct-launched MPI process */ - -#if WANT_PMI2_SUPPORT - /* Get domain id */ - pmi_id = (char*)malloc(PMI2_MAX_VALLEN); - if (PMI_SUCCESS != (ret = PMI2_Job_GetId(pmi_id, PMI2_MAX_VALLEN))) { - error = "PMI2_Job_GetId failed"; + if( OPAL_SUCCESS != (ret = mca_common_pmi_id(&pmi_id, &error)) ){ goto error; } -#else - { - int pmi_maxlen; - /* get our PMI id length */ - if (PMI_SUCCESS != (ret = PMI_Get_id_length_max(&pmi_maxlen))) { - error = "PMI_Get_id_length_max"; - goto error; - } - pmi_id = (char*)malloc(pmi_maxlen); - if (PMI_SUCCESS != (ret = PMI_Get_kvs_domain_id(pmi_id, pmi_maxlen))) { - free(pmi_id); - error = "PMI_Get_kvs_domain_id"; - goto error; - } - } -#endif /* PMI is very nice to us - the domain id is an integer followed * by a '.', followed by essentially a stepid. The first integer * defines an overall job number. The second integer is the number of @@ -204,17 +172,11 @@ static int rte_init(void) ORTE_PROC_MY_NAME->jobid = ORTE_CONSTRUCT_LOCAL_JOBID(jobfam << 16, stepid); /* get our rank */ - if (!mca_common_pmi_rank(&i)) { - error = "could not get PMI rank"; - goto error; - } + i = mca_common_pmi_rank(); ORTE_PROC_MY_NAME->vpid = i; - /* get the number of procs from PMI */ - if (!mca_common_pmi_size(&i)) { - error = "could not get PMI universe size"; - goto error; - } + // FIX ME: What do we need here - size or universe? + i = mca_common_pmi_universe(); orte_process_info.num_procs = i; /* push into the environ for pickup in MPI layer for * MPI-3 required info key @@ -267,69 +229,10 @@ static int rte_init(void) goto error; } -#if WANT_PMI2_SUPPORT - { - /* get our local proc info to find our local rank */ - char *pmapping = (char*)malloc(PMI2_MAX_VALLEN); - int found, sid, nodes, k; - orte_vpid_t n; - char *p; - ret = PMI2_Info_GetJobAttr("PMI_process_mapping", pmapping, PMI2_MAX_VALLEN, &found); - if (!found || PMI_SUCCESS != ret) { /* can't check PMI2_SUCCESS as some folks (i.e., Cray) don't define it */ - error = "could not get PMI_process_mapping (PMI2_Info_GetJobAttr() failed)"; - goto error; - } - - i = 0; n = 0; procs = 0; - if (NULL != (p = strstr(pmapping, "(vector"))) { - while (NULL != (p = strstr(p+1, ",("))) { - if (3 == sscanf(p, ",(%d,%d,%d)", &sid, &nodes, &procs)) { - for (k = 0; k < nodes; k++) { - if ((ORTE_PROC_MY_NAME->vpid >= n) && - (ORTE_PROC_MY_NAME->vpid < (n + procs))) { - break; - } - n += procs; - } - } else { - procs = 0; - } - } - } - free(pmapping); - - if (0 < procs) { - ranks = (int*)malloc(procs * sizeof(int)); - for (i=0; i < procs; i++) { - ranks[i] = n + i; - } - } - - if (NULL == ranks) { - error = "could not get PMI_process_mapping"; - goto error; - } - } -#else - /* get our local proc info to find our local rank */ - if (PMI_SUCCESS != (ret = PMI_Get_clique_size(&procs))) { - OPAL_PMI_ERROR(ret, "PMI_Get_clique_size"); - error = "could not get PMI clique size"; + ret = mca_common_pmi_local_info(ORTE_PROC_MY_NAME->vpid, &ranks, &procs, &error); + if( OPAL_SUCCESS != ret ){ goto error; } - /* now get the specific ranks */ - ranks = (int*)calloc(procs, sizeof(int)); - if (NULL == ranks) { - error = "could not get memory for local ranks"; - ret = ORTE_ERR_OUT_OF_RESOURCE; - goto error; - } - if (PMI_SUCCESS != (ret = PMI_Get_clique_ranks(ranks, procs))) { - OPAL_PMI_ERROR(ret, "PMI_Get_clique_ranks"); - error = "could not get clique ranks"; - goto error; - } -#endif /* store the number of local peers - remember, we want the number * of peers that share the node WITH ME, so we have to subtract * ourselves from that number @@ -533,11 +436,7 @@ static void rte_abort(int status, bool report) /* PMI doesn't like NULL messages, but our interface * doesn't provide one - so rig one up here */ -#if WANT_PMI2_SUPPORT - PMI2_Abort(status, "N/A"); -#else - PMI_Abort(status, "N/A"); -#endif + mca_common_pmi_abort(status, "N/A"); /* - Clean out the global structures * (not really necessary, but good practice) */ diff --git a/orte/mca/grpcomm/pmi/Makefile.am b/orte/mca/grpcomm/pmi/Makefile.am index f98c305662..3e7a57ecab 100644 --- a/orte/mca/grpcomm/pmi/Makefile.am +++ b/orte/mca/grpcomm/pmi/Makefile.am @@ -17,10 +17,6 @@ sources = \ grpcomm_pmi_module.c \ grpcomm_pmi_component.c -if WANT_PMI2_SUPPORT -sources += pmi2_pmap_parser.c -endif - # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la # (for static builds). diff --git a/orte/mca/grpcomm/pmi/grpcomm_pmi_component.c b/orte/mca/grpcomm/pmi/grpcomm_pmi_component.c index d26673202c..0289a3d30e 100644 --- a/orte/mca/grpcomm/pmi/grpcomm_pmi_component.c +++ b/orte/mca/grpcomm/pmi/grpcomm_pmi_component.c @@ -14,6 +14,7 @@ #include "orte/constants.h" #include "opal/mca/mca.h" +#include "opal/runtime/opal_params.h" #include "opal/mca/common/pmi/common_pmi.h" #include "orte/util/proc_info.h" @@ -80,7 +81,7 @@ int orte_grpcomm_pmi_component_query(mca_base_module_t **module, int *priority) * selection will have been turned "off" for us */ if (ORTE_PROC_IS_APP && - mca_common_pmi_init()) { + OPAL_SUCCESS == mca_common_pmi_init(opal_pmi_version)) { /* if PMI is available, make it available for use by MPI procs */ *priority = my_priority; *module = (mca_base_module_t *)&orte_grpcomm_pmi_module; diff --git a/orte/mca/grpcomm/pmi/grpcomm_pmi_module.c b/orte/mca/grpcomm/pmi/grpcomm_pmi_module.c index ffe33084b5..74617826a7 100644 --- a/orte/mca/grpcomm/pmi/grpcomm_pmi_module.c +++ b/orte/mca/grpcomm/pmi/grpcomm_pmi_module.c @@ -18,13 +18,10 @@ #include "orte/types.h" #include -#include -#if WANT_PMI2_SUPPORT -#include -#endif #include "opal/dss/dss.h" #include "opal/mca/hwloc/base/base.h" +#include "opal/runtime/opal_params.h" #include "opal/mca/common/pmi/common_pmi.h" #include "opal/mca/dstore/dstore.h" @@ -62,7 +59,7 @@ orte_grpcomm_base_module_t orte_grpcomm_pmi_module = { */ static int init(void) { - return ORTE_SUCCESS; + return mca_common_pmi_init(opal_pmi_version); } /** @@ -70,6 +67,7 @@ static int init(void) */ static void finalize(void) { + mca_common_pmi_finalize(); return; } @@ -107,19 +105,9 @@ static int pmi_barrier(orte_grpcomm_collective_t *coll) return ORTE_SUCCESS; } -#if WANT_PMI2_SUPPORT - /* PMI2 doesn't provide a barrier, so use the Fence function here */ - if (PMI_SUCCESS != (rc = PMI2_KVS_Fence())) { - OPAL_PMI_ERROR(rc, "PMI2_KVS_Fence"); - return ORTE_ERROR; + if( OPAL_SUCCESS != (rc = mca_common_pmi_barrier()) ){ + return rc; } -#else - /* use the PMI barrier function */ - if (PMI_SUCCESS != (rc = PMI_Barrier())) { - OPAL_PMI_ERROR(rc, "PMI_Barrier"); - return ORTE_ERROR; - } -#endif OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_framework.framework_output, "%s grpcomm:pmi barrier complete", @@ -152,58 +140,20 @@ static int modex(orte_grpcomm_collective_t *coll) int rc, i; opal_list_t myvals; opal_value_t *kv, kvn; + char *error; OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_framework.framework_output, "%s grpcomm:pmi: modex entered", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* discover the local ranks */ -#if WANT_PMI2_SUPPORT - { - char *pmapping = (char*)malloc(PMI2_MAX_VALLEN); - int found; - int my_node; - - rc = PMI2_Info_GetJobAttr("PMI_process_mapping", pmapping, PMI2_MAX_VALLEN, &found); - if (!found || PMI_SUCCESS != rc) { /* can't check PMI2_SUCCESS as some folks (i.e., Cray) don't define it */ - opal_output(0, "%s could not get PMI_process_mapping (PMI2_Info_GetJobAttr() failed)", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - return ORTE_ERROR; - } - - local_ranks = orte_grpcomm_pmi2_parse_pmap(pmapping, ORTE_PROC_MY_NAME->vpid, &my_node, &local_rank_count); - if (NULL == local_ranks) { - opal_output(0, "%s could not get PMI_process_mapping", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - return ORTE_ERROR; - } - - OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_framework.framework_output, - "%s: pmapping: %s my_node=%d lr_count=%d\n", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), pmapping, my_node, local_rank_count)); - - free(pmapping); + rc = mca_common_pmi_local_info(ORTE_PROC_MY_NAME->vpid, &local_ranks, + &local_rank_count, &error); + if( OPAL_SUCCESS != rc){ + opal_output(0, "%s could not get PMI_process_mapping: %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), error); + return ORTE_ERROR; } -#else - rc = PMI_Get_clique_size (&local_rank_count); - if (PMI_SUCCESS != rc) { - ORTE_ERROR_LOG(ORTE_ERROR); - return ORTE_ERROR; - } - - local_ranks = calloc (local_rank_count, sizeof (int)); - if (NULL == local_ranks) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - rc = PMI_Get_clique_ranks (local_ranks, local_rank_count); - if (PMI_SUCCESS != rc) { - ORTE_ERROR_LOG(ORTE_ERROR); - return ORTE_ERROR; - } -#endif - /* our RTE data was constructed and pushed in the ESS pmi component */ diff --git a/orte/runtime/orte_globals.c b/orte/runtime/orte_globals.c index de3d5c8fdf..40681d5496 100644 --- a/orte/runtime/orte_globals.c +++ b/orte/runtime/orte_globals.c @@ -102,6 +102,7 @@ char *orte_set_slots = NULL; bool orte_display_allocation; bool orte_display_devel_allocation; bool orte_soft_locations = false; +int orted_pmi_version = 0; /* launch agents */ char *orte_launch_agent = NULL; diff --git a/orte/runtime/orte_globals.h b/orte/runtime/orte_globals.h index c7b0d11dcd..64d32e0561 100644 --- a/orte/runtime/orte_globals.h +++ b/orte/runtime/orte_globals.h @@ -703,6 +703,9 @@ ORTE_DECLSPEC extern char *orte_rankfile; ORTE_DECLSPEC extern int orte_num_allocated_nodes; ORTE_DECLSPEC extern char *orte_node_regex; +/* PMI version control */ +ORTE_DECLSPEC extern int orted_pmi_version; + /* tool communication controls */ ORTE_DECLSPEC extern bool orte_report_events; ORTE_DECLSPEC extern char *orte_report_events_uri;