Update to track PMIx v2.0.1
Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
родитель
e31e8b90bb
Коммит
ed85512a7c
@ -9,31 +9,22 @@ Email Name Affiliation(s)
|
|||||||
alinask Elena Shipunova Mellanox
|
alinask Elena Shipunova Mellanox
|
||||||
annu13 Annapurna Dasari Intel
|
annu13 Annapurna Dasari Intel
|
||||||
artpol84 Artem Polyakov Mellanox
|
artpol84 Artem Polyakov Mellanox
|
||||||
ashleypittman Ashley Pittman Intel
|
|
||||||
dsolt Dave Solt IBM
|
dsolt Dave Solt IBM
|
||||||
garlick Jim Garlick LLNL
|
|
||||||
ggouaillardet Gilles Gouaillardet RIST
|
ggouaillardet Gilles Gouaillardet RIST
|
||||||
hjelmn Nathan Hjelm LANL
|
hjelmn Nathan Hjelm LANL
|
||||||
igor-ivanov Igor Ivanov Mellanox
|
igor-ivanov Igor Ivanov Mellanox
|
||||||
jladd-mlnx Joshua Ladd Mellanox
|
jladd-mlnx Joshua Ladd Mellanox
|
||||||
jjhursey Joshua Hursey IBM
|
jsquyres Jeff Squyres Cisco, IU
|
||||||
jsquyres Jeff Squyres Cisco
|
|
||||||
karasevb Boris Karasev Mellanox
|
|
||||||
kawashima-fj Takahiro Kawashima Fujitsu
|
|
||||||
nkogteva Nadezhda Kogteva Mellanox
|
nkogteva Nadezhda Kogteva Mellanox
|
||||||
nysal Nysal Jan KA IBM
|
rhc54 Ralph Castain LANL, Cisco, Intel
|
||||||
PHHargrove Paul Hargrove LBNL
|
|
||||||
rhc54 Ralph Castain Intel
|
|
||||||
------------------------------- --------------------------- -------------------
|
------------------------------- --------------------------- -------------------
|
||||||
|
|
||||||
Affiliation abbreviations:
|
Affiliation abbreviations:
|
||||||
--------------------------
|
--------------------------
|
||||||
Cisco = Cisco Systems, Inc.
|
Cisco = Cisco Systems, Inc.
|
||||||
Fujitsu = Fujitsu
|
|
||||||
IBM = International Business Machines, Inc.
|
IBM = International Business Machines, Inc.
|
||||||
Intel = Intel, Inc.
|
Intel = Intel, Inc.
|
||||||
|
IU = Indiana University
|
||||||
LANL = Los Alamos National Laboratory
|
LANL = Los Alamos National Laboratory
|
||||||
LBNL = Lawrence Berkeley National Laboratory
|
|
||||||
LLNL = Lawrence Livermore National Laboratory
|
|
||||||
Mellanox = Mellanox
|
Mellanox = Mellanox
|
||||||
RIST = Research Organization for Information Science and Technology
|
RIST = Research Organization for Information Science and Technology
|
||||||
|
@ -24,7 +24,7 @@ This file is a *very* short overview of building and installing
|
|||||||
the PMIx library. Much more information is available on the
|
the PMIx library. Much more information is available on the
|
||||||
PMIx web site (e.g., see the FAQ section):
|
PMIx web site (e.g., see the FAQ section):
|
||||||
|
|
||||||
http://pmix.github.io/pmix/pmix
|
http://pmix.github.io/pmix/master
|
||||||
|
|
||||||
|
|
||||||
Developer Builds
|
Developer Builds
|
||||||
@ -34,7 +34,7 @@ If you have checked out a DEVELOPER'S COPY of PMIx (i.e., you checked
|
|||||||
out from Git), you should read the HACKING file before attempting to
|
out from Git), you should read the HACKING file before attempting to
|
||||||
build PMIx. You must then run:
|
build PMIx. You must then run:
|
||||||
|
|
||||||
shell$ ./autogen.pl
|
shell$ ./autogen.sh
|
||||||
|
|
||||||
You will need very recent versions of GNU Autoconf, Automake, and
|
You will need very recent versions of GNU Autoconf, Automake, and
|
||||||
Libtool. If autogen.sh fails, read the HACKING file. If anything
|
Libtool. If autogen.sh fails, read the HACKING file. If anything
|
||||||
|
@ -24,65 +24,6 @@ current release as well as the "stable" bug fix release branch.
|
|||||||
Master (not on release branches yet)
|
Master (not on release branches yet)
|
||||||
------------------------------------
|
------------------------------------
|
||||||
|
|
||||||
|
|
||||||
2.0.0
|
|
||||||
------
|
|
||||||
**** NOTE: This release implements the complete PMIX v2.0 Standard
|
|
||||||
**** and therefore includes a number of new APIs and features. These
|
|
||||||
**** can be tracked by their RFC's in the RFC repository at:
|
|
||||||
**** https://github.com/pmix/RFCs. A formal standards document will
|
|
||||||
**** be included in a later v2.x release. Some of the changes are
|
|
||||||
**** identified below.
|
|
||||||
- Added the Modular Component Architecture (MCA) plugin manager and
|
|
||||||
converted a number of operations to plugins, thereby allowing easy
|
|
||||||
customization and extension (including proprietary offerings)
|
|
||||||
- Added support for TCP sockets instead of Unix domain sockets for
|
|
||||||
client-server communications
|
|
||||||
- Added support for on-the-fly Allocation requests, including requests
|
|
||||||
for additional resources, extension of time for currently allocated
|
|
||||||
resources, and return of identified allocated resources to the scheduler
|
|
||||||
(RFC 0005 - https://github.com/pmix/RFCs/blob/master/RFC0005.md)
|
|
||||||
- Tightened rules on the processing of PMIx_Get requests, including
|
|
||||||
reservation of the "pmix" prefix for attribute keys and specifying
|
|
||||||
behaviors associated with the PMIX_RANK_WILDCARD value
|
|
||||||
(RFC 0009 - https://github.com/pmix/RFCs/blob/master/RFC0009.md)
|
|
||||||
- Extended support for tool interactions with a PMIx server aimed at
|
|
||||||
meeting the needs of debuggers and other tools. Includes support
|
|
||||||
for rendezvousing with a system-level PMIx server for interacting
|
|
||||||
with the system management stack (SMS) outside of an allocated
|
|
||||||
session, and adds two new APIs:
|
|
||||||
- PMIx_Query: request general information such as the process
|
|
||||||
table for a specified job, and available SMS capabilities
|
|
||||||
- PMIx_Log: log messages (e.g., application progress) to a
|
|
||||||
system-hosted persistent store
|
|
||||||
(RFC 0010 - https://github.com/pmix/RFCs/blob/master/RFC0010.md)
|
|
||||||
- Added support for fabric/network interactions associated with
|
|
||||||
"instant on" application startup
|
|
||||||
(RFC 0012 - https://github.com/pmix/RFCs/blob/master/RFC0012.md)
|
|
||||||
- Added an attribute to support getting the time remaining in an
|
|
||||||
allocation via the PMIx_Query interface
|
|
||||||
(RFC 0013 - https://github.com/pmix/RFCs/blob/master/RFC0013.md)
|
|
||||||
- Added interfaces to support job control and monitoring requests,
|
|
||||||
including heartbeat and file monitors to detect stalled applications.
|
|
||||||
Job control interface supports standard signal-related operations
|
|
||||||
(pause, kill, resume, etc.) as well as checkpoint/restart requests.
|
|
||||||
The interface can also be used by an application to indicate it is
|
|
||||||
willing to be pre-empted, with the host RM providing an event
|
|
||||||
notification when the preemption is desired.
|
|
||||||
(RFC 0015 - https://github.com/pmix/RFCs/blob/master/RFC0015.md)
|
|
||||||
- Extended the event notification system to support notifications
|
|
||||||
across threads in the same process, and the ability to direct
|
|
||||||
ordering of notifications when registering event handlers.
|
|
||||||
(RFC 0018 - https://github.com/pmix/RFCs/blob/master/RFC0018.md)
|
|
||||||
- Expose the buffer manipulation functions via a new set of APIs
|
|
||||||
to support heterogeneous data transfers within the host RM
|
|
||||||
environment
|
|
||||||
(RFC 0020 - https://github.com/pmix/RFCs/blob/master/RFC0020.md)
|
|
||||||
- Fix a number of race condition issues that arose at scale
|
|
||||||
- Enable PMIx servers to generate notifications to the host RM
|
|
||||||
and to themselves
|
|
||||||
|
|
||||||
|
|
||||||
1.2.2 -- 21 March 2017
|
1.2.2 -- 21 March 2017
|
||||||
----------------------
|
----------------------
|
||||||
- Compiler fix for Sun/Oracle CC (PR #322)
|
- Compiler fix for Sun/Oracle CC (PR #322)
|
||||||
|
@ -13,7 +13,7 @@
|
|||||||
# major, minor, and release are generally combined in the form
|
# major, minor, and release are generally combined in the form
|
||||||
# <major>.<minor>.<release>.
|
# <major>.<minor>.<release>.
|
||||||
|
|
||||||
major=2
|
major=3
|
||||||
minor=0
|
minor=0
|
||||||
release=0
|
release=0
|
||||||
|
|
||||||
@ -23,14 +23,14 @@ release=0
|
|||||||
# The only requirement is that it must be entirely printable ASCII
|
# The only requirement is that it must be entirely printable ASCII
|
||||||
# characters and have no white space.
|
# characters and have no white space.
|
||||||
|
|
||||||
greek=
|
greek=a1
|
||||||
|
|
||||||
# If repo_rev is empty, then the repository version number will be
|
# If repo_rev is empty, then the repository version number will be
|
||||||
# obtained during "make dist" via the "git describe --tags --always"
|
# obtained during "make dist" via the "git describe --tags --always"
|
||||||
# command, or with the date (if "git describe" fails) in the form of
|
# command, or with the date (if "git describe" fails) in the form of
|
||||||
# "date<date>".
|
# "date<date>".
|
||||||
|
|
||||||
repo_rev=git6fb501d
|
repo_rev=git4c2c8d0
|
||||||
|
|
||||||
# If tarball_version is not empty, it is used as the version string in
|
# If tarball_version is not empty, it is used as the version string in
|
||||||
# the tarball filename, regardless of all other versions listed in
|
# the tarball filename, regardless of all other versions listed in
|
||||||
@ -44,7 +44,7 @@ tarball_version=
|
|||||||
|
|
||||||
# The date when this release was created
|
# The date when this release was created
|
||||||
|
|
||||||
date="Jun 19, 2017"
|
date="Jun 25, 2017"
|
||||||
|
|
||||||
# The shared library version of each of PMIx's public libraries.
|
# The shared library version of each of PMIx's public libraries.
|
||||||
# These versions are maintained in accordance with the "Library
|
# These versions are maintained in accordance with the "Library
|
||||||
@ -75,4 +75,4 @@ date="Jun 19, 2017"
|
|||||||
# Version numbers are described in the Libtool current:revision:age
|
# Version numbers are described in the Libtool current:revision:age
|
||||||
# format.
|
# format.
|
||||||
|
|
||||||
libpmix_so_version=3:0:1
|
libpmix_so_version=0:0:0
|
||||||
|
@ -124,6 +124,8 @@ typedef uint32_t pmix_rank_t;
|
|||||||
#define PMIX_CONNECT_SYSTEM_FIRST "pmix.cnct.sys.first" // (bool) Preferentially look for a system-level PMIx server first
|
#define PMIX_CONNECT_SYSTEM_FIRST "pmix.cnct.sys.first" // (bool) Preferentially look for a system-level PMIx server first
|
||||||
#define PMIX_REGISTER_NODATA "pmix.reg.nodata" // (bool) Registration is for nspace only, do not copy job data
|
#define PMIX_REGISTER_NODATA "pmix.reg.nodata" // (bool) Registration is for nspace only, do not copy job data
|
||||||
#define PMIX_SERVER_ENABLE_MONITORING "pmix.srv.monitor" // (bool) Enable PMIx internal monitoring by server
|
#define PMIX_SERVER_ENABLE_MONITORING "pmix.srv.monitor" // (bool) Enable PMIx internal monitoring by server
|
||||||
|
#define PMIX_SERVER_NSPACE "pmix.srv.nspace" // (char*) Name of the nspace to use for this server
|
||||||
|
#define PMIX_SERVER_RANK "pmix.srv.rank" // (pmix_rank_t) Rank of this server
|
||||||
|
|
||||||
|
|
||||||
/* identification attributes */
|
/* identification attributes */
|
||||||
|
@ -425,7 +425,7 @@ PMIX_EXPORT pmix_status_t pmix_value_xfer(pmix_value_t *p, pmix_value_t *src)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
/* allocate space and do the copy */
|
/* allocate space and do the copy */
|
||||||
switch (src->type) {
|
switch (src->data.darray->type) {
|
||||||
case PMIX_UINT8:
|
case PMIX_UINT8:
|
||||||
case PMIX_INT8:
|
case PMIX_INT8:
|
||||||
case PMIX_BYTE:
|
case PMIX_BYTE:
|
||||||
|
@ -769,6 +769,7 @@ pmix_status_t pmix_bfrop_unpack_info(pmix_buffer_t *buffer, void *dest,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
if (NULL == tmp) {
|
if (NULL == tmp) {
|
||||||
|
PMIX_ERROR_LOG(PMIX_ERROR);
|
||||||
return PMIX_ERROR;
|
return PMIX_ERROR;
|
||||||
}
|
}
|
||||||
(void)strncpy(ptr[i].key, tmp, PMIX_MAX_KEYLEN);
|
(void)strncpy(ptr[i].key, tmp, PMIX_MAX_KEYLEN);
|
||||||
|
@ -111,7 +111,7 @@ PMIX_EXPORT pmix_status_t PMIx_Get(const pmix_proc_t *proc, const char key[],
|
|||||||
PMIX_RELEASE(cb);
|
PMIX_RELEASE(cb);
|
||||||
|
|
||||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||||
"pmix:client get completed");
|
"pmix:client get completed %d", rc);
|
||||||
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
@ -464,7 +464,7 @@ static pmix_status_t process_val(pmix_value_t *val,
|
|||||||
}
|
}
|
||||||
nvals = 0;
|
nvals = 0;
|
||||||
for (n=0; n < nsize; n++) {
|
for (n=0; n < nsize; n++) {
|
||||||
if (PMIX_SUCCESS != (rc = pmix_pointer_array_add(results, &info[n]))) {
|
if (0 > (rc = pmix_pointer_array_add(results, &info[n]))) {
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
++nvals;
|
++nvals;
|
||||||
@ -536,25 +536,45 @@ static void _getnbfn(int fd, short flags, void *cbdata)
|
|||||||
/* if the rank is WILDCARD, then they want all the job-level info,
|
/* if the rank is WILDCARD, then they want all the job-level info,
|
||||||
* so no need to check the modex */
|
* so no need to check the modex */
|
||||||
if (PMIX_RANK_WILDCARD != cb->rank) {
|
if (PMIX_RANK_WILDCARD != cb->rank) {
|
||||||
|
rc = PMIX_ERR_NOT_FOUND;
|
||||||
#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1)
|
#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1)
|
||||||
if (PMIX_SUCCESS == (rc = pmix_dstore_fetch(nptr->nspace, cb->rank, NULL, &val))) {
|
/* my own data is in the hash table, so don't bother looking
|
||||||
#else
|
* in the dstore if that is what they want */
|
||||||
if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->modex, cb->rank, NULL, &val))) {
|
if (pmix_globals.myid.rank != cb->rank) {
|
||||||
#endif /* PMIX_ENABLE_DSTORE */
|
if (PMIX_SUCCESS == (rc = pmix_dstore_fetch(nptr->nspace, cb->rank, NULL, &val))) {
|
||||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||||
"pmix_get[%d]: value retrieved from dstore", __LINE__);
|
"pmix_get[%d]: value retrieved from dstore", __LINE__);
|
||||||
if (PMIX_SUCCESS != (rc = process_val(val, &nvals, &results))) {
|
if (PMIX_SUCCESS != (rc = process_val(val, &nvals, &results))) {
|
||||||
cb->value_cbfunc(rc, NULL, cb->cbdata);
|
cb->value_cbfunc(rc, NULL, cb->cbdata);
|
||||||
/* cleanup */
|
/* cleanup */
|
||||||
if (NULL != val) {
|
if (NULL != val) {
|
||||||
PMIX_VALUE_RELEASE(val);
|
PMIX_VALUE_RELEASE(val);
|
||||||
|
}
|
||||||
|
PMIX_RELEASE(cb);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
PMIX_RELEASE(cb);
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
/* cleanup */
|
}
|
||||||
PMIX_VALUE_RELEASE(val);
|
#endif /* PMIX_ENABLE_DSTORE */
|
||||||
} else {
|
if (PMIX_SUCCESS != rc) {
|
||||||
|
/* if the user was asking about themselves, or we aren't using the dstore,
|
||||||
|
* then we need to check the hash table */
|
||||||
|
if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->modex, cb->rank, NULL, &val))) {
|
||||||
|
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||||
|
"pmix_get[%d]: value retrieved from hash", __LINE__);
|
||||||
|
if (PMIX_SUCCESS != (rc = process_val(val, &nvals, &results))) {
|
||||||
|
cb->value_cbfunc(rc, NULL, cb->cbdata);
|
||||||
|
/* cleanup */
|
||||||
|
if (NULL != val) {
|
||||||
|
PMIX_VALUE_RELEASE(val);
|
||||||
|
}
|
||||||
|
PMIX_RELEASE(cb);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
PMIX_VALUE_RELEASE(val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (PMIX_SUCCESS != rc) {
|
||||||
/* if we didn't find a modex for this rank, then we need
|
/* if we didn't find a modex for this rank, then we need
|
||||||
* to go get it. Thus, the caller wants -all- information for
|
* to go get it. Thus, the caller wants -all- information for
|
||||||
* the specified rank, not just the job-level info. */
|
* the specified rank, not just the job-level info. */
|
||||||
@ -572,12 +592,17 @@ static void _getnbfn(int fd, short flags, void *cbdata)
|
|||||||
PMIX_RELEASE(cb);
|
PMIX_RELEASE(cb);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
/* cleanup */
|
|
||||||
PMIX_VALUE_RELEASE(val);
|
PMIX_VALUE_RELEASE(val);
|
||||||
}
|
}
|
||||||
/* now let's package up the results */
|
/* now let's package up the results */
|
||||||
PMIX_VALUE_CREATE(val, 1);
|
PMIX_VALUE_CREATE(val, 1);
|
||||||
val->type = PMIX_DATA_ARRAY;
|
val->type = PMIX_DATA_ARRAY;
|
||||||
|
val->data.darray = (pmix_data_array_t*)malloc(sizeof(pmix_data_array_t));
|
||||||
|
if (NULL == val->data.darray) {
|
||||||
|
PMIX_VALUE_RELEASE(val);
|
||||||
|
cb->value_cbfunc(PMIX_ERR_NOMEM, NULL, cb->cbdata);
|
||||||
|
return;
|
||||||
|
}
|
||||||
val->data.darray->type = PMIX_INFO;
|
val->data.darray->type = PMIX_INFO;
|
||||||
val->data.darray->size = nvals;
|
val->data.darray->size = nvals;
|
||||||
PMIX_INFO_CREATE(iptr, nvals);
|
PMIX_INFO_CREATE(iptr, nvals);
|
||||||
@ -597,14 +622,13 @@ static void _getnbfn(int fd, short flags, void *cbdata)
|
|||||||
} else {
|
} else {
|
||||||
pmix_value_xfer(&iptr[n].value, &info->value);
|
pmix_value_xfer(&iptr[n].value, &info->value);
|
||||||
}
|
}
|
||||||
PMIX_INFO_FREE(info, 1);
|
PMIX_INFO_DESTRUCT(info);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* done with results array */
|
/* done with results array */
|
||||||
PMIX_DESTRUCT(&results);
|
PMIX_DESTRUCT(&results);
|
||||||
/* return the result to the caller */
|
/* return the result to the caller - they are responsible for releasing it */
|
||||||
cb->value_cbfunc(PMIX_SUCCESS, val, cb->cbdata);
|
cb->value_cbfunc(PMIX_SUCCESS, val, cb->cbdata);
|
||||||
PMIX_VALUE_FREE(val, 1);
|
|
||||||
PMIX_RELEASE(cb);
|
PMIX_RELEASE(cb);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -87,24 +87,6 @@ static inline int _my_client(const char *nspace, pmix_rank_t rank);
|
|||||||
|
|
||||||
static pmix_status_t initialize_server_base(pmix_server_module_t *module)
|
static pmix_status_t initialize_server_base(pmix_server_module_t *module)
|
||||||
{
|
{
|
||||||
char *evar;
|
|
||||||
|
|
||||||
/* look for our namespace, if one was given */
|
|
||||||
if (NULL == (evar = getenv("PMIX_SERVER_NAMESPACE"))) {
|
|
||||||
/* use a fake namespace */
|
|
||||||
(void)strncpy(pmix_globals.myid.nspace, "pmix-server", PMIX_MAX_NSLEN);
|
|
||||||
} else {
|
|
||||||
(void)strncpy(pmix_globals.myid.nspace, evar, PMIX_MAX_NSLEN);
|
|
||||||
}
|
|
||||||
/* look for our rank, if one was given */
|
|
||||||
mypid = getpid();
|
|
||||||
if (NULL == (evar = getenv("PMIX_SERVER_RANK"))) {
|
|
||||||
/* use our pid */
|
|
||||||
pmix_globals.myid.rank = mypid;
|
|
||||||
} else {
|
|
||||||
pmix_globals.myid.rank = strtol(evar, NULL, 10);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* setup the server-specific globals */
|
/* setup the server-specific globals */
|
||||||
PMIX_CONSTRUCT(&pmix_server_globals.clients, pmix_pointer_array_t);
|
PMIX_CONSTRUCT(&pmix_server_globals.clients, pmix_pointer_array_t);
|
||||||
pmix_pointer_array_init(&pmix_server_globals.clients, 1, INT_MAX, 1);
|
pmix_pointer_array_init(&pmix_server_globals.clients, 1, INT_MAX, 1);
|
||||||
@ -131,7 +113,7 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module,
|
|||||||
pmix_status_t rc;
|
pmix_status_t rc;
|
||||||
size_t n, m;
|
size_t n, m;
|
||||||
pmix_kval_t kv;
|
pmix_kval_t kv;
|
||||||
bool protect;
|
bool protect, nspace_given = false, rank_given = false;
|
||||||
char *protected[] = {
|
char *protected[] = {
|
||||||
PMIX_USERID,
|
PMIX_USERID,
|
||||||
PMIX_GRPID,
|
PMIX_GRPID,
|
||||||
@ -140,6 +122,8 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module,
|
|||||||
PMIX_SERVER_SYSTEM_SUPPORT,
|
PMIX_SERVER_SYSTEM_SUPPORT,
|
||||||
NULL
|
NULL
|
||||||
};
|
};
|
||||||
|
char *evar;
|
||||||
|
pmix_rank_info_t *rinfo;
|
||||||
|
|
||||||
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
|
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
|
||||||
|
|
||||||
@ -159,31 +143,22 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module,
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1)
|
|
||||||
if (PMIX_SUCCESS != (rc = pmix_dstore_init(info, ninfo))) {
|
|
||||||
PMIX_RELEASE_THREAD(&pmix_global_lock);
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
#endif /* PMIX_ENABLE_DSTORE */
|
|
||||||
|
|
||||||
/* setup the wildcard recv for inbound messages from clients */
|
|
||||||
req = PMIX_NEW(pmix_ptl_posted_recv_t);
|
|
||||||
req->tag = UINT32_MAX;
|
|
||||||
req->cbfunc = server_message_handler;
|
|
||||||
/* add it to the end of the list of recvs */
|
|
||||||
pmix_list_append(&pmix_ptl_globals.posted_recvs, &req->super);
|
|
||||||
|
|
||||||
if (PMIX_SUCCESS != pmix_ptl_base_start_listening(info, ninfo)) {
|
|
||||||
pmix_show_help("help-pmix-server.txt", "listener-thread-start", true);
|
|
||||||
PMIX_RELEASE_THREAD(&pmix_global_lock);
|
|
||||||
return PMIX_ERR_INIT;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* check the info keys for info we
|
/* check the info keys for info we
|
||||||
* need to provide to every client */
|
* need to provide to every client and
|
||||||
|
* directives aimed at us */
|
||||||
if (NULL != info) {
|
if (NULL != info) {
|
||||||
PMIX_CONSTRUCT(&kv, pmix_kval_t);
|
PMIX_CONSTRUCT(&kv, pmix_kval_t);
|
||||||
for (n=0; n < ninfo; n++) {
|
for (n=0; n < ninfo; n++) {
|
||||||
|
if (0 == strncmp(info[n].key, PMIX_SERVER_NSPACE, PMIX_MAX_KEYLEN)) {
|
||||||
|
(void)strncpy(pmix_globals.myid.nspace, info[n].value.data.string, PMIX_MAX_NSLEN);
|
||||||
|
nspace_given = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (0 == strncmp(info[n].key, PMIX_SERVER_RANK, PMIX_MAX_KEYLEN)) {
|
||||||
|
pmix_globals.myid.rank = info[n].value.data.rank;
|
||||||
|
rank_given = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
/* check the list of protected keys */
|
/* check the list of protected keys */
|
||||||
protect = false;
|
protect = false;
|
||||||
for (m=0; NULL != protected[m]; m++) {
|
for (m=0; NULL != protected[m]; m++) {
|
||||||
@ -215,6 +190,64 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module,
|
|||||||
PMIX_DESTRUCT(&kv);
|
PMIX_DESTRUCT(&kv);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!nspace_given) {
|
||||||
|
/* look for our namespace, if one was given */
|
||||||
|
if (NULL == (evar = getenv("PMIX_SERVER_NAMESPACE"))) {
|
||||||
|
/* use a fake namespace */
|
||||||
|
(void)strncpy(pmix_globals.myid.nspace, "pmix-server", PMIX_MAX_NSLEN);
|
||||||
|
} else {
|
||||||
|
(void)strncpy(pmix_globals.myid.nspace, evar, PMIX_MAX_NSLEN);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!rank_given) {
|
||||||
|
/* look for our rank, if one was given */
|
||||||
|
mypid = getpid();
|
||||||
|
if (NULL == (evar = getenv("PMIX_SERVER_RANK"))) {
|
||||||
|
/* use our pid */
|
||||||
|
pmix_globals.myid.rank = mypid;
|
||||||
|
} else {
|
||||||
|
pmix_globals.myid.rank = strtol(evar, NULL, 10);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* copy it into mypeer entries */
|
||||||
|
if (NULL == pmix_globals.mypeer->info) {
|
||||||
|
rinfo = PMIX_NEW(pmix_rank_info_t);
|
||||||
|
pmix_globals.mypeer->info = rinfo;
|
||||||
|
} else {
|
||||||
|
rinfo = pmix_globals.mypeer->info;
|
||||||
|
}
|
||||||
|
if (NULL == rinfo->nptr) {
|
||||||
|
rinfo->nptr = PMIX_NEW(pmix_nspace_t);
|
||||||
|
/* ensure our own nspace is first on the list */
|
||||||
|
PMIX_RETAIN(rinfo->nptr);
|
||||||
|
rinfo->nptr->server = PMIX_NEW(pmix_server_nspace_t);
|
||||||
|
pmix_list_prepend(&pmix_globals.nspaces, &rinfo->nptr->super);
|
||||||
|
}
|
||||||
|
(void)strncpy(rinfo->nptr->nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN);
|
||||||
|
rinfo->rank = pmix_globals.myid.rank;
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1)
|
||||||
|
if (PMIX_SUCCESS != (rc = pmix_dstore_init(info, ninfo))) {
|
||||||
|
PMIX_RELEASE_THREAD(&pmix_global_lock);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
#endif /* PMIX_ENABLE_DSTORE */
|
||||||
|
|
||||||
|
/* setup the wildcard recv for inbound messages from clients */
|
||||||
|
req = PMIX_NEW(pmix_ptl_posted_recv_t);
|
||||||
|
req->tag = UINT32_MAX;
|
||||||
|
req->cbfunc = server_message_handler;
|
||||||
|
/* add it to the end of the list of recvs */
|
||||||
|
pmix_list_append(&pmix_ptl_globals.posted_recvs, &req->super);
|
||||||
|
|
||||||
|
if (PMIX_SUCCESS != pmix_ptl_base_start_listening(info, ninfo)) {
|
||||||
|
pmix_show_help("help-pmix-server.txt", "listener-thread-start", true);
|
||||||
|
PMIX_RELEASE_THREAD(&pmix_global_lock);
|
||||||
|
return PMIX_ERR_INIT;
|
||||||
|
}
|
||||||
|
|
||||||
/* get our available security modules */
|
/* get our available security modules */
|
||||||
security_mode = pmix_psec.get_available_modules();
|
security_mode = pmix_psec.get_available_modules();
|
||||||
|
|
||||||
|
@ -106,6 +106,9 @@ pmix_status_t pmix_hash_fetch(pmix_hash_table_t *table, pmix_rank_t rank,
|
|||||||
pmix_kval_t *hv;
|
pmix_kval_t *hv;
|
||||||
uint64_t id;
|
uint64_t id;
|
||||||
char *node;
|
char *node;
|
||||||
|
pmix_info_t *info;
|
||||||
|
size_t ninfo, n;
|
||||||
|
pmix_value_t *val;
|
||||||
|
|
||||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||||
"HASH:FETCH rank %d key %s",
|
"HASH:FETCH rank %d key %s",
|
||||||
@ -143,7 +146,36 @@ pmix_status_t pmix_hash_fetch(pmix_hash_table_t *table, pmix_rank_t rank,
|
|||||||
if (NULL == key) {
|
if (NULL == key) {
|
||||||
/* we will return the data as an array of pmix_info_t
|
/* we will return the data as an array of pmix_info_t
|
||||||
* in the kvs pmix_value_t */
|
* in the kvs pmix_value_t */
|
||||||
|
val = (pmix_value_t*)malloc(sizeof(pmix_value_t));
|
||||||
|
if (NULL == val) {
|
||||||
|
return PMIX_ERR_NOMEM;
|
||||||
|
}
|
||||||
|
val->type = PMIX_DATA_ARRAY;
|
||||||
|
val->data.darray = (pmix_data_array_t*)malloc(sizeof(pmix_data_array_t));
|
||||||
|
if (NULL == val->data.darray) {
|
||||||
|
PMIX_VALUE_RELEASE(val);
|
||||||
|
return PMIX_ERR_NOMEM;
|
||||||
|
}
|
||||||
|
val->data.darray->type = PMIX_INFO;
|
||||||
|
val->data.darray->size = 0;
|
||||||
|
val->data.darray->array = NULL;
|
||||||
|
ninfo = pmix_list_get_size(&proc_data->data);
|
||||||
|
PMIX_INFO_CREATE(info, ninfo);
|
||||||
|
if (NULL == info) {
|
||||||
|
PMIX_VALUE_RELEASE(val);
|
||||||
|
return PMIX_ERR_NOMEM;
|
||||||
|
}
|
||||||
|
/* copy the list elements */
|
||||||
|
n=0;
|
||||||
|
PMIX_LIST_FOREACH(hv, &proc_data->data, pmix_kval_t) {
|
||||||
|
(void)strncpy(info[n].key, hv->key, PMIX_MAX_KEYLEN);
|
||||||
|
pmix_value_xfer(&info[n].value, hv->value);
|
||||||
|
++n;
|
||||||
|
}
|
||||||
|
val->data.darray->size = ninfo;
|
||||||
|
val->data.darray->array = info;
|
||||||
|
*kvs = val;
|
||||||
|
return PMIX_SUCCESS;
|
||||||
} else {
|
} else {
|
||||||
/* find the value from within this proc_data object */
|
/* find the value from within this proc_data object */
|
||||||
hv = lookup_keyval(&proc_data->data, key);
|
hv = lookup_keyval(&proc_data->data, key);
|
||||||
|
@ -269,21 +269,51 @@ int main(int argc, char **argv)
|
|||||||
PMIX_VALUE_RELEASE(val);
|
PMIX_VALUE_RELEASE(val);
|
||||||
free(tmp);
|
free(tmp);
|
||||||
|
|
||||||
(void)asprintf(&tmp, "%s-%d-remote-%d", proc.nspace, n, j);
|
if (n != myproc.rank) {
|
||||||
if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) {
|
(void)asprintf(&tmp, "%s-%d-remote-%d", proc.nspace, n, j);
|
||||||
/* this data should _not_ be found as we are on the same node
|
if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) {
|
||||||
* and the data was "put" with a PMIX_REMOTE scope */
|
/* this data should _not_ be found as we are on the same node
|
||||||
pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned correct", myproc.nspace, myproc.rank, j, tmp);
|
* and the data was "put" with a PMIX_REMOTE scope */
|
||||||
continue;
|
pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned correct", myproc.nspace, myproc.rank, j, tmp);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned remote data for a local proc",
|
||||||
|
myproc.nspace, myproc.rank, j, tmp);
|
||||||
|
PMIX_VALUE_RELEASE(val);
|
||||||
|
free(tmp);
|
||||||
}
|
}
|
||||||
pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned remote data for a local proc",
|
|
||||||
myproc.nspace, myproc.rank, j, tmp);
|
|
||||||
PMIX_VALUE_RELEASE(val);
|
|
||||||
free(tmp);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* now get the data blob for myself */
|
||||||
|
pmix_output(0, "Client ns %s rank %d testing internal modex blob",
|
||||||
|
myproc.nspace, myproc.rank);
|
||||||
|
if (PMIX_SUCCESS == (rc = PMIx_Get(&myproc, NULL, NULL, 0, &val))) {
|
||||||
|
if (PMIX_DATA_ARRAY != val->type) {
|
||||||
|
pmix_output(0, "Client ns %s rank %d did not return an array for its internal modex blob",
|
||||||
|
myproc.nspace, myproc.rank);
|
||||||
|
PMIX_VALUE_RELEASE(val);
|
||||||
|
} else if (PMIX_INFO != val->data.darray->type) {
|
||||||
|
pmix_output(0, "Client ns %s rank %d returned an internal modex array of type %s instead of PMIX_INFO",
|
||||||
|
myproc.nspace, myproc.rank, PMIx_Data_type_string(val->data.darray->type));
|
||||||
|
PMIX_VALUE_RELEASE(val);
|
||||||
|
} else if (0 == val->data.darray->size) {
|
||||||
|
pmix_output(0, "Client ns %s rank %d returned an internal modex array of zero length",
|
||||||
|
myproc.nspace, myproc.rank);
|
||||||
|
PMIX_VALUE_RELEASE(val);
|
||||||
|
} else {
|
||||||
|
pmix_info_t *iptr = (pmix_info_t*)val->data.darray->array;
|
||||||
|
for (n=0; n < val->data.darray->size; n++) {
|
||||||
|
pmix_output(0, "\tKey: %s", iptr[n].key);
|
||||||
|
}
|
||||||
|
PMIX_VALUE_RELEASE(val);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
pmix_output(0, "Client ns %s rank %d internal modex blob FAILED with error %s(%d)",
|
||||||
|
myproc.nspace, myproc.rank, PMIx_Error_string(rc), rc);
|
||||||
|
}
|
||||||
|
|
||||||
/* log something */
|
/* log something */
|
||||||
PMIX_INFO_CONSTRUCT(&info);
|
PMIX_INFO_CONSTRUCT(&info);
|
||||||
(void)strncpy(info.key, "foobar", PMIX_MAX_KEYLEN);
|
(void)strncpy(info.key, "foobar", PMIX_MAX_KEYLEN);
|
||||||
|
@ -226,10 +226,7 @@ void parse_cmd(int argc, char **argv, test_params *params)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Fix rank if running under SLURM
|
// Fix rank if running under SLURM
|
||||||
#if 0
|
if( PMIX_RANK_UNDEF == params->rank ){
|
||||||
/* the following "if" statement can never be true as rank is
|
|
||||||
* an unsigned 32-bit int */
|
|
||||||
if( 0 > params->rank ){
|
|
||||||
char *ranklist = getenv("SLURM_GTIDS");
|
char *ranklist = getenv("SLURM_GTIDS");
|
||||||
char *rankno = getenv("SLURM_LOCALID");
|
char *rankno = getenv("SLURM_LOCALID");
|
||||||
if( NULL != ranklist && NULL != rankno ){
|
if( NULL != ranklist && NULL != rankno ){
|
||||||
@ -246,7 +243,6 @@ void parse_cmd(int argc, char **argv, test_params *params)
|
|||||||
pmix_argv_free(argv);
|
pmix_argv_free(argv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
// Fix namespace if running under SLURM
|
// Fix namespace if running under SLURM
|
||||||
if( NULL == params->nspace ){
|
if( NULL == params->nspace ){
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user