From f298f294e1c7eb4f627518071f0636f84bf6b63f Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 26 Oct 2016 15:48:56 -0700 Subject: [PATCH] Update PMIx to latest master tarball. Ensure we set the HNP name for orted's so that PMIx_Lookup can find the server Signed-off-by: Ralph Castain --- opal/mca/pmix/pmix3x/pmix/VERSION | 4 +-- opal/mca/pmix/pmix3x/pmix/src/client/pmi1.c | 26 ++++++++--------- opal/mca/pmix/pmix3x/pmix/src/client/pmi2.c | 24 ++++++++-------- .../pmix/pmix3x/pmix/src/client/pmix_client.c | 25 ++++++----------- .../pmix3x/pmix/src/client/pmix_client_get.c | 27 +++++++++++++++++- .../pmix/src/mca/psec/base/psec_base_frame.c | 23 +++++++++------ .../pmix3x/pmix/src/runtime/pmix_finalize.c | 10 ++++++- .../pmix/src/runtime/pmix_progress_threads.c | 28 ++++++++++++++++++- .../pmix/src/runtime/pmix_progress_threads.h | 16 +++++++++-- orte/mca/ess/base/ess_base_std_orted.c | 5 ++++ 10 files changed, 129 insertions(+), 59 deletions(-) diff --git a/opal/mca/pmix/pmix3x/pmix/VERSION b/opal/mca/pmix/pmix3x/pmix/VERSION index e3e2545b72..f4b3d05011 100644 --- a/opal/mca/pmix/pmix3x/pmix/VERSION +++ b/opal/mca/pmix/pmix3x/pmix/VERSION @@ -30,7 +30,7 @@ greek= # command, or with the date (if "git describe" fails) in the form of # "date". -repo_rev=git967f3e3 +repo_rev=git38811ec # If tarball_version is not empty, it is used as the version string in # the tarball filename, regardless of all other versions listed in @@ -44,7 +44,7 @@ tarball_version= # The date when this release was created -date="Oct 20, 2016" +date="Oct 26, 2016" # The shared library version of each of PMIx's public libraries. # These versions are maintained in accordance with the "Library diff --git a/opal/mca/pmix/pmix3x/pmix/src/client/pmi1.c b/opal/mca/pmix/pmix3x/pmix/src/client/pmi1.c index e0cf32377b..fa894bed68 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/client/pmi1.c +++ b/opal/mca/pmix/pmix3x/pmix/src/client/pmi1.c @@ -247,9 +247,10 @@ PMIX_EXPORT int PMI_Get_size(int *size) { pmix_status_t rc = PMIX_SUCCESS; pmix_value_t *val; - pmix_proc_t proc; pmix_info_t info[1]; bool val_optinal = 1; + pmix_proc_t proc = myproc; + proc.rank = PMIX_RANK_WILDCARD; PMI_CHECK(); @@ -257,9 +258,6 @@ PMIX_EXPORT int PMI_Get_size(int *size) return PMI_ERR_INVALID_ARG; } - (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_UNDEF; - /* set controlling parameters * PMIX_OPTIONAL - expect that these keys should be available on startup */ @@ -293,9 +291,10 @@ PMIX_EXPORT int PMI_Get_universe_size(int *size) { pmix_status_t rc = PMIX_SUCCESS; pmix_value_t *val; - pmix_proc_t proc; pmix_info_t info[1]; bool val_optinal = 1; + pmix_proc_t proc = myproc; + proc.rank = PMIX_RANK_WILDCARD; PMI_CHECK(); @@ -303,9 +302,6 @@ PMIX_EXPORT int PMI_Get_universe_size(int *size) return PMI_ERR_INVALID_ARG; } - (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_UNDEF; - /* set controlling parameters * PMIX_OPTIONAL - expect that these keys should be available on startup */ @@ -327,9 +323,10 @@ PMIX_EXPORT int PMI_Get_appnum(int *appnum) { pmix_status_t rc = PMIX_SUCCESS; pmix_value_t *val; - pmix_proc_t proc; pmix_info_t info[1]; bool val_optinal = 1; + pmix_proc_t proc = myproc; + proc.rank = PMIX_RANK_WILDCARD; PMI_CHECK(); @@ -337,9 +334,6 @@ PMIX_EXPORT int PMI_Get_appnum(int *appnum) return PMI_ERR_INVALID_ARG; } - (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_UNDEF; - /* set controlling parameters * PMIX_OPTIONAL - expect that these keys should be available on startup */ @@ -481,6 +475,8 @@ PMIX_EXPORT int PMI_Get_clique_size(int *size) pmix_value_t *val; pmix_info_t info[1]; bool val_optinal = 1; + pmix_proc_t proc = myproc; + proc.rank = PMIX_RANK_WILDCARD; PMI_CHECK(); @@ -494,7 +490,7 @@ PMIX_EXPORT int PMI_Get_clique_size(int *size) PMIX_INFO_CONSTRUCT(&info[0]); PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL); - rc = PMIx_Get(&myproc, PMIX_LOCAL_SIZE, info, 1, &val); + rc = PMIx_Get(&proc, PMIX_LOCAL_SIZE, info, 1, &val); if (PMIX_SUCCESS == rc) { rc = convert_int(size, val); PMIX_VALUE_RELEASE(val); @@ -511,6 +507,8 @@ PMIX_EXPORT int PMI_Get_clique_ranks(int ranks[], int length) pmix_value_t *val; char **rks; int i; + pmix_proc_t proc = myproc; + proc.rank = PMIX_RANK_WILDCARD; PMI_CHECK(); @@ -518,7 +516,7 @@ PMIX_EXPORT int PMI_Get_clique_ranks(int ranks[], int length) return PMI_ERR_INVALID_ARGS; } - rc = PMIx_Get(&myproc, PMIX_LOCAL_PEERS, NULL, 0, &val); + rc = PMIx_Get(&proc, PMIX_LOCAL_PEERS, NULL, 0, &val); if (PMIX_SUCCESS == rc) { /* kv will contain a string of comma-separated * ranks on my node */ diff --git a/opal/mca/pmix/pmix3x/pmix/src/client/pmi2.c b/opal/mca/pmix/pmix3x/pmix/src/client/pmi2.c index e25c12027a..86386729a3 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/client/pmi2.c +++ b/opal/mca/pmix/pmix3x/pmix/src/client/pmi2.c @@ -55,9 +55,10 @@ PMIX_EXPORT int PMI2_Init(int *spawned, int *size, int *rank, int *appnum) { pmix_status_t rc = PMIX_SUCCESS; pmix_value_t *val; - pmix_proc_t proc; pmix_info_t info[1]; bool val_optinal = 1; + pmix_proc_t proc = myproc; + proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != PMIx_Init(&myproc, NULL, 0)) { return PMI2_ERR_INIT; @@ -66,10 +67,6 @@ PMIX_EXPORT int PMI2_Init(int *spawned, int *size, int *rank, int *appnum) /* get the rank */ *rank = myproc.rank; - /* getting internal key requires special rank value */ - memcpy(&proc, &myproc, sizeof(myproc)); - proc.rank = PMIX_RANK_UNDEF; - /* set controlling parameters * PMIX_OPTIONAL - expect that these keys should be available on startup */ @@ -257,6 +254,9 @@ PMIX_EXPORT int PMI2_Info_GetSize(int *size) pmix_value_t *val; pmix_info_t info[1]; bool val_optinal = 1; + pmix_proc_t proc = myproc; + proc.rank = PMIX_RANK_WILDCARD; + PMI2_CHECK(); @@ -270,7 +270,7 @@ PMIX_EXPORT int PMI2_Info_GetSize(int *size) PMIX_INFO_CONSTRUCT(&info[0]); PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL); - if (PMIX_SUCCESS == PMIx_Get(&myproc, PMIX_LOCAL_SIZE, info, 1, &val)) { + if (PMIX_SUCCESS == PMIx_Get(&proc, PMIX_LOCAL_SIZE, info, 1, &val)) { rc = convert_int(size, val); PMIX_VALUE_RELEASE(val); } @@ -426,6 +426,8 @@ PMIX_EXPORT int PMI2_Info_GetNodeAttr(const char name[], pmix_value_t *val; pmix_info_t info[1]; bool val_optinal = 1; + pmix_proc_t proc = myproc; + proc.rank = PMIX_RANK_UNDEF; PMI2_CHECK(); @@ -440,7 +442,8 @@ PMIX_EXPORT int PMI2_Info_GetNodeAttr(const char name[], PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL); *found = 0; - rc = PMIx_Get(&myproc, name, info, 1, &val); + /* TODO: does PMI2's "name" makes sense to PMIx? */ + rc = PMIx_Get(&proc, name, info, 1, &val); if (PMIX_SUCCESS == rc && NULL != val) { if (PMIX_STRING != val->type) { rc = PMIX_ERROR; @@ -486,9 +489,10 @@ PMIX_EXPORT int PMI2_Info_GetJobAttr(const char name[], char value[], int valuel { pmix_status_t rc = PMIX_SUCCESS; pmix_value_t *val; - pmix_proc_t proc; pmix_info_t info[1]; bool val_optinal = 1; + pmix_proc_t proc = myproc; + proc.rank = PMIX_RANK_UNDEF; PMI2_CHECK(); @@ -496,10 +500,6 @@ PMIX_EXPORT int PMI2_Info_GetJobAttr(const char name[], char value[], int valuel return PMI2_ERR_INVALID_ARG; } - /* getting internal key requires special rank value */ - memcpy(&proc, &myproc, sizeof(myproc)); - proc.rank = PMIX_RANK_UNDEF; - /* set controlling parameters * PMIX_OPTIONAL - expect that these keys should be available on startup */ diff --git a/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client.c b/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client.c index 0e941a6ded..8763fe2b06 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client.c +++ b/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client.c @@ -445,17 +445,19 @@ PMIX_EXPORT pmix_status_t PMIx_Finalize(const pmix_info_t info[], size_t ninfo) "pmix:client finalize sync received"); } + PMIX_DESTRUCT(&pmix_client_globals.myserver); pmix_rte_finalize(); - PMIX_DESTRUCT(&pmix_client_globals.myserver); - PMIX_LIST_DESTRUCT(&pmix_client_globals.pending_requests); + PMIX_LIST_DESTRUCT(&pmix_client_globals.pending_requests); - if (0 <= pmix_client_globals.myserver.sd) { + if (0 <= pmix_client_globals.myserver.sd) { CLOSE_THE_SOCKET(pmix_client_globals.myserver.sd); } pmix_bfrop_close(); + pmix_class_finalize(); + return PMIX_SUCCESS; } @@ -529,8 +531,8 @@ PMIX_EXPORT pmix_status_t PMIx_Abort(int flag, const char msg[], return PMIX_SUCCESS; } - static void _putfn(int sd, short args, void *cbdata) - { +static void _putfn(int sd, short args, void *cbdata) +{ pmix_cb_t *cb = (pmix_cb_t*)cbdata; pmix_status_t rc; pmix_kval_t *kv; @@ -552,21 +554,10 @@ PMIX_EXPORT pmix_status_t PMIx_Abort(int flag, const char msg[], /* shouldn't be possible */ goto done; } -#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) - /* TODO: It is not safe to store data on a client side - * There is a possibility to get server/client conflict. - * Do nothing here misses PMIx_Get/PMIx_Put flow (w/o PMIx_Commit) - */ - /* - if (PMIX_SUCCESS != (rc = pmix_dstore_store(ns->nspace, pmix_globals.myid.rank, kv))) { - PMIX_ERROR_LOG(rc); - } - */ -#else + if (PMIX_SUCCESS != (rc = pmix_hash_store(&ns->modex, pmix_globals.myid.rank, kv))) { PMIX_ERROR_LOG(rc); } -#endif /* PMIX_ENABLE_DSTORE */ /* pack the cache that matches the scope - global scope needs * to go into both local and remote caches */ diff --git a/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_get.c b/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_get.c index a68560d891..8c5e3355b2 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_get.c +++ b/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_get.c @@ -571,7 +571,32 @@ static void _getnbfn(int fd, short flags, void *cbdata) /* otherwise, the data must be something they "put" */ #if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) - rc = pmix_dstore_fetch(nptr->nspace, cb->rank, cb->key, &val); + rc = PMIX_ERR_PROC_ENTRY_NOT_FOUND; + if ((0 == strncmp(pmix_globals.myid.nspace, nptr->nspace, PMIX_MAX_NSLEN + 1)) && + ((pmix_globals.myid.rank == cb->rank) || (PMIX_RANK_UNDEF == cb->rank))){ + /* if we asking the data about this or undefined process - + check local hash table first. All the data passed through + PMIx_Put settle down there */ + rc = pmix_hash_fetch(&nptr->modex, pmix_globals.myid.rank, cb->key, &val); + assert( (PMIX_SUCCESS == rc) || (PMIX_ERR_PROC_ENTRY_NOT_FOUND == rc) || + (PMIX_ERR_NOT_FOUND == rc) ); + if( PMIX_SUCCESS != rc ){ + if(pmix_globals.myid.rank == cb->rank){ + rc = PMIX_ERR_NOT_FOUND; + } + } + /* in else case we supposed to get PMIX_ERR_PROC_ENTRY_NOT_FOUND because + we don't push data from the remote processes into the dstore */ + } + /* try to take it from dstore */ + if( PMIX_ERR_PROC_ENTRY_NOT_FOUND == rc ){ + /* Two option possible here: + - we asking the key from UNDEF process and local proc + haven't pushed this data + - we askin the key from the particular process which is not us. + */ + rc = pmix_dstore_fetch(nptr->nspace, cb->rank, cb->key, &val); + } #else rc = pmix_hash_fetch(&nptr->modex, cb->rank, cb->key, &val); #endif /* PMIX_ENABLE_DSTORE */ diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/psec/base/psec_base_frame.c b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/base/psec_base_frame.c index 34e0691b58..b3a899bdc3 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/psec/base/psec_base_frame.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/base/psec_base_frame.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -32,6 +32,7 @@ #endif #include "src/class/pmix_list.h" +#include "src/mca/base/base.h" #include "src/mca/psec/base/base.h" /* @@ -43,33 +44,37 @@ #include "src/mca/psec/base/static-components.h" /* Instantiate the global vars */ -pmix_psec_globals_t pmix_psec_globals = {0}; +pmix_psec_globals_t pmix_psec_globals = {{{0}}}; static pmix_status_t pmix_psec_close(void) { - pmix_psec_base_active_module_t *active; + pmix_psec_base_active_module_t *active, *prev; if (!pmix_psec_globals.initialized) { return PMIX_SUCCESS; } pmix_psec_globals.initialized = false; - PMIX_LIST_FOREACH(active, &pmix_psec_globals.actives, pmix_psec_base_active_module_t) { + PMIX_LIST_FOREACH_SAFE(active, prev, &pmix_psec_globals.actives, pmix_psec_base_active_module_t) { + pmix_list_remove_item(&pmix_psec_globals.actives, &active->super); if (NULL != active->component->finalize) { active->component->finalize(); } + PMIX_RELEASE(active); } PMIX_DESTRUCT(&pmix_psec_globals.actives); - return PMIX_SUCCESS; + return pmix_mca_base_framework_components_close(&pmix_psec_base_framework, NULL); } static pmix_status_t pmix_psec_open(pmix_mca_base_open_flag_t flags) { - /* initialize globals */ - pmix_psec_globals.initialized = true; - PMIX_CONSTRUCT(&pmix_psec_globals.actives, pmix_list_t); - return PMIX_SUCCESS; + /* initialize globals */ + pmix_psec_globals.initialized = true; + PMIX_CONSTRUCT(&pmix_psec_globals.actives, pmix_list_t); + + /* Open up all available components */ + return pmix_mca_base_framework_components_open(&pmix_psec_base_framework, flags); } PMIX_MCA_BASE_FRAMEWORK_DECLARE(pmix, psec, "PMIx Security Operations", diff --git a/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_finalize.c b/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_finalize.c index c1654b8ee4..08082e19ec 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_finalize.c +++ b/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_finalize.c @@ -66,11 +66,17 @@ void pmix_rte_finalize(void) if (!pmix_globals.external_evbase) { /* stop the progress thread */ - (void)pmix_progress_thread_finalize(NULL); + (void)pmix_progress_thread_stop(NULL); } /* cleanup communications */ pmix_usock_finalize(); + + if (!pmix_globals.external_evbase) { + /* finalize the progress thread */ + (void)pmix_progress_thread_finalize(NULL); + } + if (PMIX_PROC_SERVER != pmix_globals.proc_type && 0 <= pmix_client_globals.myserver.sd) { CLOSE_THE_SOCKET(pmix_client_globals.myserver.sd); @@ -82,6 +88,8 @@ void pmix_rte_finalize(void) /* close the security framework */ (void)pmix_mca_base_framework_close(&pmix_psec_base_framework); + /* finalize the mca */ + (void)pmix_mca_base_close(); /* Clear out all the registered MCA params */ pmix_deregister_params(); pmix_mca_base_var_finalize(); diff --git a/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_progress_threads.c b/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_progress_threads.c index 6319ef1895..5c419757f3 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_progress_threads.c +++ b/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_progress_threads.c @@ -243,7 +243,7 @@ pmix_event_base_t *pmix_progress_thread_init(const char *name) return trk->ev_base; } -int pmix_progress_thread_finalize(const char *name) +int pmix_progress_thread_stop(const char *name) { pmix_progress_tracker_t *trk; @@ -271,6 +271,32 @@ int pmix_progress_thread_finalize(const char *name) if (trk->ev_active) { stop_progress_engine(trk); } + } + } + + return PMIX_ERR_NOT_FOUND; +} + +int pmix_progress_thread_finalize(const char *name) +{ + pmix_progress_tracker_t *trk; + + if (!inited) { + /* nothing we can do */ + return PMIX_ERR_NOT_FOUND; + } + + if (NULL == name) { + name = shared_thread_name; + } + + /* find the specified engine */ + PMIX_LIST_FOREACH(trk, &tracking, pmix_progress_tracker_t) { + if (0 == strcmp(name, trk->name)) { + /* If the refcount is still above 0, we're done here */ + if (trk->refcount > 0) { + return PMIX_SUCCESS; + } pmix_list_remove_item(&tracking, &trk->super); PMIX_RELEASE(trk); diff --git a/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_progress_threads.h b/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_progress_threads.h index 91f099b953..9a09a049c3 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_progress_threads.h +++ b/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_progress_threads.h @@ -30,16 +30,28 @@ pmix_event_base_t *pmix_progress_thread_init(const char *name); /** - * Finalize a progress thread name (reference counted). + * Stop a progress thread name (reference counted). * * Once this function is invoked as many times as * pmix_progress_thread_init() was invoked on this name (or NULL), the - * progress function is shut down and the event base associated with + * progress function is shut down. * it is destroyed. * * Will return PMIX_ERR_NOT_FOUND if the progress thread name does not * exist; PMIX_SUCCESS otherwise. */ +int pmix_progress_thread_stop(const char *name); + +/** + * Finalize a progress thread name (reference counted). + * + * Once this function is invoked after pmix_progress_thread_stop() has been called + * as many times as pmix_progress_thread_init() was invoked on this name (or NULL), + * the event base associated with it is destroyed. + * + * Will return PMIX_ERR_NOT_FOUND if the progress thread name does not + * exist; PMIX_SUCCESS otherwise. + */ int pmix_progress_thread_finalize(const char *name); /** diff --git a/orte/mca/ess/base/ess_base_std_orted.c b/orte/mca/ess/base/ess_base_std_orted.c index c666cb3b44..3fa0a02078 100644 --- a/orte/mca/ess/base/ess_base_std_orted.c +++ b/orte/mca/ess/base/ess_base_std_orted.c @@ -188,6 +188,11 @@ int orte_ess_base_orted_setup(char **hosts) error = "opal_pstat_base_select"; goto error; } + + /* define the HNP name */ + ORTE_PROC_MY_HNP->jobid = ORTE_PROC_MY_NAME->jobid; + ORTE_PROC_MY_HNP->vpid = 0; + /* open and setup the state machine */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) { ORTE_ERROR_LOG(ret);