1
1

Sync to IOF branch and continue fix of request for job info from unknown nspace

Signed-off-by: Ralph Castain <rhc@open-mpi.org>
(cherry picked from commit 02400d30d79ce3c7e7e28f9a08f7062a5b6f4c51)
Этот коммит содержится в:
Ralph Castain 2018-02-03 18:53:50 -08:00
родитель 73a9a4f8c7
Коммит 9fe8153d38
24 изменённых файлов: 505 добавлений и 163 удалений

Просмотреть файл

@ -1,4 +1,4 @@
Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
Copyright (c) 2015-2018 Intel, Inc. All rights reserved.
Copyright (c) 2017 IBM Corporation. All rights reserved.
$COPYRIGHT$
@ -25,7 +25,7 @@ Master (not on release branches yet)
------------------------------------
2.1.0 -- 26 Oct 2017
2.1.0 -- 1 Feb 2018
----------------------
**** NOTE: This release contains the first implementation of cross-version
**** support. Servers using v2.1.0 are capable of supporting clients using
@ -36,6 +36,30 @@ Master (not on release branches yet)
- Enable support for remote tool connections (PR #540, #542)
- Cleanup libevent configure logi to support default install paths (PR #541)
- Debounce "unreachable" notifications for tools when they disconnect (PR #544)
- Enable the regex generator to support node names that include multiple
sets of numbers
2.0.3 -- TBD
----------------------
- Fix event notification so all sides of multi-library get notified
of other library's existence
- Update syslog protection to support Mac High Sierra OS
- Remove usock component - unable to support v1.x clients due
to datatype differences
- Cleanup security handshake
- Cleanup separation of PMI-1/2 libraries and PMIx symbols
- Protect against overly-large messages
- Update data buffer APIs to support cross-version operations
- Protect receive callbacks from NULL and/or empty buffers as this
can occur when the peer on a connection disappears.
- Fix tool connection search so it properly descends into the directory
tree while searching for the server's contact file.
- Fix store_local so it doesn't reject a new nspace as that can happen
when working with tools
- Ensure we always complete PMIx_Finalize - don't return if something
goes wrong in the middle of the procedure
- Fix several tool connection issues
2.0.2 -- 19 Oct 2017
@ -119,6 +143,14 @@ Master (not on release branches yet)
and to themselves
1.2.5 -- TBD
----------------------
- Fix cross-version issue when v1.2 client interacts with v2.1 server (PR #564)
- Update client connection for cross-version support (PR #591)
- Fix write memory barrier ASM for PowerPC (PR #606)
- Add protection from overly-large messages
1.2.4 -- 13 Oct. 2017
----------------------
- Silence some unnecessary warning messages (PR #487)

Просмотреть файл

@ -30,7 +30,7 @@ greek=
# command, or with the date (if "git describe" fails) in the form of
# "date<date>".
repo_rev=gitdb5d380
repo_rev=git3bd2b2d
# If tarball_version is not empty, it is used as the version string in
# the tarball filename, regardless of all other versions listed in
@ -44,7 +44,7 @@ tarball_version=
# The date when this release was created
date="Jan 25, 2018"
date="Feb 03, 2018"
# The shared library version of each of PMIx's public libraries.
# These versions are maintained in accordance with the "Library

Просмотреть файл

@ -1,6 +1,6 @@
# -*- shell-script -*-
# PMIx copyrights:
# Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
# Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
#
#########################
#
@ -15,7 +15,7 @@
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
# Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2010-2018 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2013 Mellanox Technologies, Inc.
# All rights reserved.
# Copyright (c) 2015 Intel, Inc. All rights reserved.
@ -499,7 +499,7 @@ AC_DEFUN([PMIX_CHECK_ATTRIBUTES], [
_PMIX_CHECK_SPECIFIC_ATTRIBUTE([extension],
[
#define FOO __extension__ ({size_t bar; bar = 3;})
int i = __extension__ 3;
],
[],
[])

Просмотреть файл

@ -96,19 +96,19 @@ echo "--> Found specfile: $specfile"
rpmtopdir=${rpmtopdir:-"`grep %_topdir $HOME/.rpmmacros | awk '{ print $2 }'`"}
if test "$rpmtopdir" != ""; then
rpmbuild_options="$rpmbuild_options --define '_topdir $rpmtopdir'"
rpmbuild_options="$rpmbuild_options --define '_topdir $rpmtopdir'"
if test ! -d "$rpmtopdir"; then
mkdir -p "$rpmtopdir"
mkdir -p "$rpmtopdir/BUILD"
mkdir -p "$rpmtopdir/RPMS"
mkdir -p "$rpmtopdir/RPMS/i386"
mkdir -p "$rpmtopdir/RPMS/i586"
mkdir -p "$rpmtopdir/RPMS/i686"
mkdir -p "$rpmtopdir/RPMS/noarch"
mkdir -p "$rpmtopdir/RPMS/athlon"
mkdir -p "$rpmtopdir/SOURCES"
mkdir -p "$rpmtopdir/SPECS"
mkdir -p "$rpmtopdir/SRPMS"
mkdir -p "$rpmtopdir"
mkdir -p "$rpmtopdir/BUILD"
mkdir -p "$rpmtopdir/RPMS"
mkdir -p "$rpmtopdir/RPMS/i386"
mkdir -p "$rpmtopdir/RPMS/i586"
mkdir -p "$rpmtopdir/RPMS/i686"
mkdir -p "$rpmtopdir/RPMS/noarch"
mkdir -p "$rpmtopdir/RPMS/athlon"
mkdir -p "$rpmtopdir/SOURCES"
mkdir -p "$rpmtopdir/SPECS"
mkdir -p "$rpmtopdir/SRPMS"
fi
need_root=0
elif test -d /usr/src/RPM; then

Просмотреть файл

@ -36,16 +36,16 @@ for branch in $branches; do
module load libevent/pmix-$branch
./$script $@ >dist.out 2>&1
if test "$?" != "0"; then
cat <<EOF
if test "$?" != "0"; then
cat <<EOF
=============================================================================
== Dist failure
== Last few lines of output (full results in dist.out file):
=============================================================================
EOF
tail -n 20 dist.out
exit 1
fi
tail -n 20 dist.out
exit 1
fi
module unload libevent
module unload autotools

Просмотреть файл

@ -13,7 +13,7 @@
* All rights reserved.
* Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved.
* $COPYRIGHT$
*
@ -97,11 +97,11 @@ int main(int argc, char **argv)
int rc;
pmix_value_t value;
pmix_value_t *val = &value;
char *tmp;
char *tmp, *ptr, *p;
pmix_proc_t proc;
uint32_t nprocs, n;
uint32_t nprocs, m, n, local_cnt, *localpeers;
pmix_info_t *info;
bool flag;
bool flag, local;
volatile int active;
pmix_status_t dbg = PMIX_ERR_DEBUGGER_RELEASE;
@ -196,7 +196,7 @@ int main(int argc, char **argv)
value.type = PMIX_UINT64;
value.data.uint64 = 1234;
if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_LOCAL, tmp, &value))) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Put internal failed: %d\n", myproc.nspace, myproc.rank, rc);
fprintf(stderr, "Client ns %s rank %d: PMIx_Put local failed: %d\n", myproc.nspace, myproc.rank, rc);
goto done;
}
free(tmp);
@ -207,7 +207,7 @@ int main(int argc, char **argv)
value.type = PMIX_STRING;
value.data.string = "1234";
if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_REMOTE, tmp, &value))) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Put internal failed: %d\n", myproc.nspace, myproc.rank, rc);
fprintf(stderr, "Client ns %s rank %d: PMIx_Put remote failed: %d\n", myproc.nspace, myproc.rank, rc);
goto done;
}
free(tmp);
@ -230,53 +230,96 @@ int main(int argc, char **argv)
}
PMIX_INFO_FREE(info, 1);
/* get the number of local peers */
if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_LOCAL_SIZE, NULL, 0, &val))) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Get PMIX_LOCAL_SIZE failed: %d", myproc.nspace, myproc.rank, rc);
goto done;
}
local_cnt = val->data.uint32;
PMIX_VALUE_RELEASE(val);
/* create an array for the peers */
localpeers = (uint32_t*)malloc(local_cnt * sizeof(int));
/* get the list of local peers */
if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_LOCAL_PEERS, NULL, 0, &val))) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Get PMIX_LOCAL_PEERS failed: %d", myproc.nspace, myproc.rank, rc);
goto done;
}
ptr = strdup(val->data.string);
PMIX_VALUE_RELEASE(val);
/* populate the peers array */
p = strtok(ptr, ",");
localpeers[0] = strtoul(p, NULL, 10);
for (n=1; n < local_cnt; n++) {
p = strtok(NULL, ",");
localpeers[n] = strtoul(p, NULL, 10);
}
free(ptr);
/* check the returned data */
for (n=0; n < nprocs; n++) {
if (0 > asprintf(&tmp, "%s-%d-local", myproc.nspace, myproc.rank)) {
exit(1);
if (n == myproc.rank) {
continue;
}
if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, tmp, NULL, 0, &val))) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s failed: %d\n", myproc.nspace, myproc.rank, tmp, rc);
goto done;
proc.rank = n;
local = false;
for (m=0; m < local_cnt; m++) {
if (localpeers[m] == proc.rank) {
local = true;
break;
}
}
if (PMIX_UINT64 != val->type) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong type: %d\n", myproc.nspace, myproc.rank, tmp, val->type);
if (local) {
if (0 > asprintf(&tmp, "%s-%d-local", proc.nspace, proc.rank)) {
exit(1);
}
if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s failed: %d\n", myproc.nspace, myproc.rank, tmp, rc);
goto done;
}
if (PMIX_UINT64 != val->type) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong type: %d\n", myproc.nspace, myproc.rank, tmp, val->type);
PMIX_VALUE_RELEASE(val);
free(tmp);
goto done;
}
if (1234 != val->data.uint64) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong value: %d\n", myproc.nspace, myproc.rank, tmp, (int)val->data.uint64);
PMIX_VALUE_RELEASE(val);
free(tmp);
goto done;
}
fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned correct\n", myproc.nspace, myproc.rank, tmp);
PMIX_VALUE_RELEASE(val);
free(tmp);
goto done;
}
if (1234 != val->data.uint64) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong value: %d\n", myproc.nspace, myproc.rank, tmp, (int)val->data.uint64);
} else {
if (0 > asprintf(&tmp, "%s-%d-remote", proc.nspace, proc.rank)) {
exit(1);
}
if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s failed: %d\n", myproc.nspace, myproc.rank, tmp, rc);
goto done;
}
if (PMIX_STRING != val->type) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong type: %d\n", myproc.nspace, myproc.rank, tmp, val->type);
PMIX_VALUE_RELEASE(val);
free(tmp);
goto done;
}
if (0 != strcmp(val->data.string, "1234")) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong value: %s\n", myproc.nspace, myproc.rank, tmp, val->data.string);
PMIX_VALUE_RELEASE(val);
free(tmp);
goto done;
}
fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned correct\n", myproc.nspace, myproc.rank, tmp);
PMIX_VALUE_RELEASE(val);
free(tmp);
goto done;
}
fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned correct\n", myproc.nspace, myproc.rank, tmp);
PMIX_VALUE_RELEASE(val);
free(tmp);
if (0 > asprintf(&tmp, "%s-%d-remote", myproc.nspace, myproc.rank)) {
exit(1);
}
if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, tmp, NULL, 0, &val))) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s failed: %d\n", myproc.nspace, myproc.rank, tmp, rc);
goto done;
}
if (PMIX_STRING != val->type) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong type: %d\n", myproc.nspace, myproc.rank, tmp, val->type);
PMIX_VALUE_RELEASE(val);
free(tmp);
goto done;
}
if (0 != strcmp(val->data.string, "1234")) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong value: %s\n", myproc.nspace, myproc.rank, tmp, val->data.string);
PMIX_VALUE_RELEASE(val);
free(tmp);
goto done;
}
fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned correct\n", myproc.nspace, myproc.rank, tmp);
PMIX_VALUE_RELEASE(val);
free(tmp);
}
free(localpeers);
done:
/* finalize us */

Просмотреть файл

@ -1862,7 +1862,6 @@ PMIX_EXPORT const char* PMIx_Get_version(void);
PMIX_EXPORT pmix_status_t PMIx_Store_internal(const pmix_proc_t *proc,
const char *key, pmix_value_t *val);
/**
* Top-level interface function to pack one or more values into a
* buffer.
@ -1881,6 +1880,17 @@ PMIX_EXPORT pmix_status_t PMIx_Store_internal(const pmix_proc_t *proc,
* will return an error code (generated upon unpacking) -
* the error cannot be detected during packing.
*
* The identity of the intended recipient of the packed buffer (i.e., the
* process that will be unpacking it) is used solely to resolve any data type
* differences between PMIx versions. The recipient must, therefore, be
* known to the user prior to calling the pack function so that the
* PMIx library is aware of the version the recipient is using.
*
* @param *target Pointer to a pmix_proc_t structure containing the
* nspace/rank of the process that will be unpacking the final buffer.
* A NULL value may be used to indicate that the target is based on
* the same PMIx version as the caller.
*
* @param *buffer A pointer to the buffer into which the value is to
* be packed.
*
@ -1914,7 +1924,8 @@ PMIX_EXPORT pmix_status_t PMIx_Store_internal(const pmix_proc_t *proc,
* status_code = PMIx_Data_pack(buffer, &src, 1, PMIX_INT32);
* @endcode
*/
PMIX_EXPORT pmix_status_t PMIx_Data_pack(pmix_data_buffer_t *buffer,
PMIX_EXPORT pmix_status_t PMIx_Data_pack(const pmix_proc_t *target,
pmix_data_buffer_t *buffer,
void *src, int32_t num_vals,
pmix_data_type_t type);
@ -1961,6 +1972,17 @@ PMIX_EXPORT pmix_status_t PMIx_Data_pack(pmix_data_buffer_t *buffer,
* will return an error code generated upon unpacking - these errors
* cannot be detected during packing.
*
* The identity of the source of the packed buffer (i.e., the
* process that packed it) is used solely to resolve any data type
* differences between PMIx versions. The source must, therefore, be
* known to the user prior to calling the unpack function so that the
* PMIx library is aware of the version the source used.
*
* @param *source Pointer to a pmix_proc_t structure containing the
* nspace/rank of the process that packed the provided buffer.
* A NULL value may be used to indicate that the source is based on
* the same PMIx version as the caller.
*
* @param *buffer A pointer to the buffer from which the value will be
* extracted.
*
@ -2010,7 +2032,8 @@ PMIX_EXPORT pmix_status_t PMIx_Data_pack(pmix_data_buffer_t *buffer,
*
* @endcode
*/
PMIX_EXPORT pmix_status_t PMIx_Data_unpack(pmix_data_buffer_t *buffer, void *dest,
PMIX_EXPORT pmix_status_t PMIx_Data_unpack(const pmix_proc_t *source,
pmix_data_buffer_t *buffer, void *dest,
int32_t *max_num_values,
pmix_data_type_t type);

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014 Artem Y. Polyakov <artpol84@gmail.com>.
@ -440,6 +440,8 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc,
/* setup the globals */
PMIX_CONSTRUCT(&pmix_client_globals.pending_requests, pmix_list_t);
PMIX_CONSTRUCT(&pmix_client_globals.peers, pmix_pointer_array_t);
pmix_pointer_array_init(&pmix_client_globals.peers, 1, INT_MAX, 1);
pmix_client_globals.myserver = PMIX_NEW(pmix_peer_t);
if (NULL == pmix_client_globals.myserver) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
@ -700,6 +702,8 @@ PMIX_EXPORT pmix_status_t PMIx_Finalize(const pmix_info_t info[], size_t ninfo)
size_t n;
pmix_client_timeout_t tev;
struct timeval tv = {2, 0};
pmix_peer_t *peer;
int i;
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
if (1 != pmix_globals.init_cntr) {
@ -788,6 +792,11 @@ PMIX_EXPORT pmix_status_t PMIx_Finalize(const pmix_info_t info[], size_t ninfo)
}
PMIX_LIST_DESTRUCT(&pmix_client_globals.pending_requests);
for (i=0; i < pmix_client_globals.peers.size; i++) {
if (NULL != (peer = (pmix_peer_t*)pmix_pointer_array_get_item(&pmix_client_globals.peers, i))) {
PMIX_RELEASE(peer);
}
}
if (0 <= pmix_client_globals.myserver->sd) {
CLOSE_THE_SOCKET(pmix_client_globals.myserver->sd);

Просмотреть файл

@ -15,6 +15,7 @@
#include "src/threads/threads.h"
#include "src/class/pmix_list.h"
#include "src/class/pmix_pointer_array.h"
#include "src/include/pmix_globals.h"
BEGIN_C_DECLS
@ -22,6 +23,7 @@ BEGIN_C_DECLS
typedef struct {
pmix_peer_t *myserver; // messaging support to/from my server
pmix_list_t pending_requests; // list of pmix_cb_t pending data requests
pmix_pointer_array_t peers; // array of pmix_peer_t cached for data ops
// verbosity for client get operations
int get_output;
int get_verbose;

Просмотреть файл

@ -31,11 +31,13 @@
#include <stdlib.h>
#endif
#include <pmix_common.h>
#include <pmix.h>
#include <pmix_rename.h>
#include "src/mca/bfrops/bfrops.h"
#include "src/include/pmix_globals.h"
#include "src/server/pmix_server_ops.h"
#include "src/client/pmix_client_ops.h"
#define PMIX_EMBED_DATA_BUFFER(b, db) \
do { \
@ -66,12 +68,121 @@
(b)->bytes_used = 0; \
} while (0)
PMIX_EXPORT pmix_status_t PMIx_Data_pack(pmix_data_buffer_t *buffer,
static pmix_peer_t* find_peer(const pmix_proc_t *proc)
{
pmix_status_t rc;
pmix_peer_t *peer;
pmix_proc_t wildcard;
pmix_value_t *value;
int i;
if (NULL == proc) {
return pmix_globals.mypeer;
}
if (PMIX_PROC_IS_SERVER(pmix_globals.mypeer)) {
/* see if we know this proc */
for (i=0; i < pmix_server_globals.clients.size; i++) {
if (NULL != (peer = (pmix_peer_t*)pmix_pointer_array_get_item(&pmix_server_globals.clients, i))) {
continue;
}
if (0 == strncmp(proc->nspace, peer->nptr->nspace, PMIX_MAX_NSLEN)) {
return peer;
}
}
/* didn't find it, so try to get the library version of the target
* from the host - the result will be cached, so we will only have
* to retrieve it once */
(void)strncpy(wildcard.nspace, proc->nspace, PMIX_MAX_NSLEN);
wildcard.rank = PMIX_RANK_WILDCARD;
if (PMIX_SUCCESS != (rc = PMIx_Get(&wildcard, PMIX_BFROPS_MODULE, NULL, 0, &value))) {
/* couldn't get it - nothing we can do */
return NULL;
}
/* setup a peer for this nspace */
peer = PMIX_NEW(pmix_peer_t);
if (NULL == peer) {
PMIX_RELEASE(value);
return NULL;
}
peer->nptr = PMIX_NEW(pmix_nspace_t);
if (NULL == peer->nptr) {
PMIX_RELEASE(peer);
PMIX_RELEASE(value);
return NULL;
}
peer->nptr->nspace = strdup(proc->nspace);
/* assign a module to it based on the returned version */
peer->nptr->compat.bfrops = pmix_bfrops_base_assign_module(value->data.string);
PMIX_RELEASE(value);
if (NULL == peer->nptr->compat.bfrops) {
PMIX_RELEASE(peer);
return NULL;
}
/* cache the peer object */
pmix_pointer_array_add(&pmix_server_globals.clients, peer);
return peer;
}
// we are a client or tool
/* If the target is for the server, then
* pack it using that peer. */
if (0 == strncmp(proc->nspace, pmix_client_globals.myserver->info->pname.nspace, PMIX_MAX_NSLEN)) {
return pmix_client_globals.myserver;
}
/* if the target is another member of my nspace, then
* they must be using the same version */
if (0 == strncmp(proc->nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN)) {
return pmix_globals.mypeer;
}
/* try to get the library version of this peer - the result will be
* cached, so we will only have to retrieve it once */
(void)strncpy(wildcard.nspace, proc->nspace, PMIX_MAX_NSLEN);
wildcard.rank = PMIX_RANK_WILDCARD;
if (PMIX_SUCCESS != (rc = PMIx_Get(&wildcard, PMIX_BFROPS_MODULE, NULL, 0, &value))) {
/* couldn't get it - nothing we can do */
return NULL;
}
/* setup a peer for this nspace */
peer = PMIX_NEW(pmix_peer_t);
if (NULL == peer) {
PMIX_RELEASE(value);
return NULL;
}
peer->nptr = PMIX_NEW(pmix_nspace_t);
if (NULL == peer->nptr) {
PMIX_RELEASE(peer);
PMIX_RELEASE(value);
return NULL;
}
peer->nptr->nspace = strdup(proc->nspace);
/* assign a module to it based on the returned version */
peer->nptr->compat.bfrops = pmix_bfrops_base_assign_module(value->data.string);
PMIX_RELEASE(value);
if (NULL == peer->nptr->compat.bfrops) {
PMIX_RELEASE(peer);
return NULL;
}
/* need to cache the peer someplace so we can clean it
* up later */
return peer;
}
PMIX_EXPORT pmix_status_t PMIx_Data_pack(const pmix_proc_t *target,
pmix_data_buffer_t *buffer,
void *src, int32_t num_vals,
pmix_data_type_t type)
{
pmix_status_t rc;
pmix_buffer_t buf;
pmix_peer_t *peer;
if (NULL == (peer = find_peer(target))) {
return PMIX_ERR_NOT_SUPPORTED;
}
/* setup the host */
PMIX_CONSTRUCT(&buf, pmix_buffer_t);
@ -80,7 +191,7 @@ PMIX_EXPORT pmix_status_t PMIx_Data_pack(pmix_data_buffer_t *buffer,
PMIX_EMBED_DATA_BUFFER(&buf, buffer);
/* pack the value */
PMIX_BFROPS_PACK(rc, pmix_globals.mypeer,
PMIX_BFROPS_PACK(rc, peer,
&buf, src, num_vals, type);
/* extract the data buffer - the pointers may have changed */
@ -91,12 +202,18 @@ PMIX_EXPORT pmix_status_t PMIx_Data_pack(pmix_data_buffer_t *buffer,
}
PMIX_EXPORT pmix_status_t PMIx_Data_unpack(pmix_data_buffer_t *buffer, void *dest,
PMIX_EXPORT pmix_status_t PMIx_Data_unpack(const pmix_proc_t *source,
pmix_data_buffer_t *buffer, void *dest,
int32_t *max_num_values,
pmix_data_type_t type)
{
pmix_status_t rc;
pmix_buffer_t buf;
pmix_peer_t *peer;
if (NULL == (peer = find_peer(source))) {
return PMIX_ERR_NOT_SUPPORTED;
}
/* setup the host */
PMIX_CONSTRUCT(&buf, pmix_buffer_t);
@ -105,7 +222,7 @@ PMIX_EXPORT pmix_status_t PMIx_Data_unpack(pmix_data_buffer_t *buffer, void *des
PMIX_EMBED_DATA_BUFFER(&buf, buffer);
/* unpack the value */
PMIX_BFROPS_UNPACK(rc, pmix_globals.mypeer,
PMIX_BFROPS_UNPACK(rc, peer,
&buf, dest, max_num_values, type);
/* extract the data buffer - the pointers may have changed */

Просмотреть файл

@ -101,6 +101,7 @@ static void nscon(pmix_nspace_t *p)
p->nspace = NULL;
p->nlocalprocs = 0;
p->all_registered = false;
p->version_stored = false;
p->jobbkt = NULL;
p->ndelivered = 0;
PMIX_CONSTRUCT(&p->ranks, pmix_list_t);

Просмотреть файл

@ -51,6 +51,8 @@ BEGIN_C_DECLS
#define PMIX_MAX_CRED_SIZE 131072 // set max at 128kbytes
#define PMIX_MAX_ERR_CONSTANT INT_MIN
/* internal-only attributes */
#define PMIX_BFROPS_MODULE "pmix.bfrops.mod" // (char*) name of bfrops plugin in-use by a given nspace
/* define an internal-only process name that has
* a dynamically-sized nspace field to save memory */
@ -152,6 +154,7 @@ typedef struct {
char *nspace;
size_t nlocalprocs;
bool all_registered; // all local ranks have been defined
bool version_stored; // the version string used by this nspace has been stored
pmix_buffer_t *jobbkt; // packed version of jobinfo
size_t ndelivered; // count of #local clients that have received the jobinfo
pmix_list_t ranks; // list of pmix_rank_info_t for connection support of my clients

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2016 IBM Corporation. All rights reserved.
*
* $COPYRIGHT$
@ -112,6 +112,7 @@ typedef struct {
pmix_hash_table_t internal;
pmix_hash_table_t remote;
pmix_hash_table_t local;
bool gdata_added;
} pmix_hash_trkr_t;
static void htcon(pmix_hash_trkr_t *p)
@ -124,6 +125,7 @@ static void htcon(pmix_hash_trkr_t *p)
pmix_hash_table_init(&p->remote, 256);
PMIX_CONSTRUCT(&p->local, pmix_hash_table_t);
pmix_hash_table_init(&p->local, 256);
p->gdata_added = false;
}
static void htdes(pmix_hash_trkr_t *p)
{
@ -515,26 +517,29 @@ pmix_status_t hash_cache_job_info(struct pmix_nspace_t *ns,
}
/* now add any global data that was provided */
PMIX_LIST_FOREACH(kvptr, &pmix_server_globals.gdata, pmix_kval_t) {
/* sadly, the data cannot simultaneously exist on two lists,
* so we must make a copy of it here */
kp2 = PMIX_NEW(pmix_kval_t);
if (NULL == kp2) {
rc = PMIX_ERR_NOMEM;
goto release;
}
kp2->key = strdup(kvptr->key);
PMIX_VALUE_XFER(rc, kp2->value, kvptr->value);
if (PMIX_SUCCESS != rc) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE(kp2);
goto release;
}
if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, PMIX_RANK_WILDCARD, kp2))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE(kp2);
break;
if (!trk->gdata_added) {
PMIX_LIST_FOREACH(kvptr, &pmix_server_globals.gdata, pmix_kval_t) {
/* sadly, the data cannot simultaneously exist on two lists,
* so we must make a copy of it here */
kp2 = PMIX_NEW(pmix_kval_t);
if (NULL == kp2) {
rc = PMIX_ERR_NOMEM;
goto release;
}
kp2->key = strdup(kvptr->key);
PMIX_VALUE_XFER(rc, kp2->value, kvptr->value);
if (PMIX_SUCCESS != rc) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE(kp2);
goto release;
}
if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, PMIX_RANK_WILDCARD, kp2))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE(kp2);
break;
}
}
trk->gdata_added = true;
}
release:

Просмотреть файл

@ -1,6 +1,8 @@
/*
* Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2016 IBM Corporation. All rights reserved.
* Copyright (c) 2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
*
* $COPYRIGHT$
*
@ -106,7 +108,6 @@ static pmix_status_t generate_node_regex(const char *input,
len = strlen(vptr);
startnum = -1;
memset(prefix, 0, PMIX_MAX_NODE_PREFIX);
numdigits = 0;
for (i=0, j=0; i < len; i++) {
if (!isalpha(vptr[i])) {
/* found a non-alpha char */
@ -120,7 +121,6 @@ static pmix_status_t generate_node_regex(const char *input,
/* count the size of the numeric field - but don't
* add the digits to the prefix
*/
numdigits++;
if (startnum < 0) {
/* okay, this defines end of the prefix */
startnum = i;
@ -147,8 +147,10 @@ static pmix_status_t generate_node_regex(const char *input,
vnum = strtol(&vptr[startnum], &sfx, 10);
if (NULL != sfx) {
suffix = strdup(sfx);
numdigits = (int)(sfx - &vptr[startnum]);
} else {
suffix = NULL;
numdigits = (int)strlen(&vptr[startnum]);
}
/* is this value already on our list? */
found = false;

Просмотреть файл

@ -11,7 +11,7 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -53,19 +53,23 @@
#include "src/mca/ptl/base/static-components.h"
#define PMIX_MAX_MSG_SIZE 16
/* Instantiate the global vars */
pmix_ptl_globals_t pmix_ptl_globals = {{{0}}};
int pmix_ptl_base_output = -1;
static size_t max_msg_size = PMIX_MAX_MSG_SIZE;
static int pmix_ptl_register(pmix_mca_base_register_flag_t flags)
{
pmix_ptl_globals.max_msg_size = 8000000;
pmix_mca_base_var_register("pmix", "ptl", "base", "max_msg_size",
"Max size (in bytes) of a client/server msg",
"Max size (in Mbytes) of a client/server msg",
PMIX_MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0,
PMIX_INFO_LVL_2,
PMIX_MCA_BASE_VAR_SCOPE_READONLY,
&pmix_ptl_globals.max_msg_size);
&max_msg_size);
pmix_ptl_globals.max_msg_size = max_msg_size * 1024 * 1024;
return PMIX_SUCCESS;
}

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2016-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2016-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -1215,8 +1215,8 @@ static void connection_handler(int sd, short args, void *cbdata)
} else {
peer->nptr->compat.gds = pmix_gds_base_assign_module(NULL, 0);
}
free(msg); // can now release the data buffer
if (NULL == peer->nptr->compat.gds) {
free(msg);
info->proc_cnt--;
pmix_pointer_array_set_item(&pmix_server_globals.clients, peer->index, NULL);
PMIX_RELEASE(peer);
@ -1224,6 +1224,17 @@ static void connection_handler(int sd, short args, void *cbdata)
goto error;
}
/* if we haven't previously stored the version for this
* nspace, do so now */
if (!nptr->version_stored) {
PMIX_INFO_LOAD(&ginfo, PMIX_BFROPS_MODULE, peer->nptr->compat.bfrops->version, PMIX_STRING);
PMIX_GDS_CACHE_JOB_INFO(rc, pmix_globals.mypeer, peer->nptr, &ginfo, 1);
PMIX_INFO_DESTRUCT(&ginfo);
nptr->version_stored = true;
}
free(msg); // can now release the data buffer
/* the choice of PTL module is obviously us */
peer->nptr->compat.ptl = &pmix_ptl_tcp_module;
@ -1447,6 +1458,7 @@ static void process_cbfunc(int sd, short args, void *cbdata)
/* set the gds */
PMIX_INFO_LOAD(&ginfo, PMIX_GDS_MODULE, pnd->gds, PMIX_STRING);
peer->nptr->compat.gds = pmix_gds_base_assign_module(&ginfo, 1);
PMIX_INFO_DESTRUCT(&ginfo);
if (NULL == peer->nptr->compat.gds) {
PMIX_RELEASE(peer);
pmix_list_remove_item(&pmix_server_globals.nspaces, &nptr->super);
@ -1455,6 +1467,15 @@ static void process_cbfunc(int sd, short args, void *cbdata)
goto done;
}
/* if we haven't previously stored the version for this
* nspace, do so now */
if (!peer->nptr->version_stored) {
PMIX_INFO_LOAD(&ginfo, PMIX_BFROPS_MODULE, peer->nptr->compat.bfrops->version, PMIX_STRING);
PMIX_GDS_CACHE_JOB_INFO(rc, pmix_globals.mypeer, peer->nptr, &ginfo, 1);
PMIX_INFO_DESTRUCT(&ginfo);
nptr->version_stored = true;
}
/* validate the connection */
cred.bytes = pnd->cred;
cred.size = pnd->len;

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2016-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2016-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -668,12 +668,21 @@ static void connection_handler(int sd, short args, void *cbdata)
goto error;
}
/* if we haven't previously stored the version for this
* nspace, do so now */
if (!nptr->version_stored) {
PMIX_INFO_LOAD(&ginfo, PMIX_BFROPS_MODULE, nptr->compat.bfrops->version, PMIX_STRING);
PMIX_GDS_CACHE_JOB_INFO(rc, pmix_globals.mypeer, nptr, &ginfo, 1);
PMIX_INFO_DESTRUCT(&ginfo);
nptr->version_stored = true;
}
/* the choice of PTL module was obviously made by the connecting
* tool as we received this request via that channel, so simply
* record it here for future use */
nptr->compat.ptl = &pmix_ptl_usock_module;
/* validate the connection */
/* validate the connection - the macro will send the status result to the client */
PMIX_PSEC_VALIDATE_CONNECTION(rc, psave, NULL, 0, NULL, 0, &cred);
/* now done with the msg */
free(msg);

Просмотреть файл

@ -871,7 +871,12 @@ static void _process_dmdx_reply(int fd, short args, void *cbdata)
* store the data first so we can immediately satisfy any future
* requests. Then, rather than duplicate the resolve code here, we
* will let the pmix_pending_resolve function go ahead and retrieve
* it from the GDS */
* it from the GDS
*
* NOTE: if the data returned is NULL, then it has already been
* stored (e.g., via a register_nspace call in response to a request
* for job-level data). For now, we will retrieve it so it can
* be stored for each peer */
if (PMIX_SUCCESS == caddy->status) {
/* cycle across all outstanding local requests and collect their
* unique nspaces so we can store this for each one */
@ -906,8 +911,11 @@ static void _process_dmdx_reply(int fd, short args, void *cbdata)
peer = (pmix_peer_t*)pmix_pointer_array_get_item(&pmix_server_globals.clients, rinfo->peerid);
}
PMIX_CONSTRUCT(&pbkt, pmix_buffer_t);
PMIX_LOAD_BUFFER(pmix_globals.mypeer, &pbkt, caddy->data, caddy->ndata);
if (NULL == caddy->data) {
PMIX_GDS_REGISTER_JOB_INFO(rc, pmix_globals.mypeer, &pbkt);
} else {
PMIX_LOAD_BUFFER(pmix_globals.mypeer, &pbkt, caddy->data, caddy->ndata);
}
/* unpack and store it*/
kv = PMIX_NEW(pmix_kval_t);
cnt = 1;

Просмотреть файл

@ -295,6 +295,8 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc,
PMIX_CONSTRUCT(&pmix_client_globals.pending_requests, pmix_list_t);
PMIX_CONSTRUCT(&pmix_client_globals.peers, pmix_pointer_array_t);
pmix_pointer_array_init(&pmix_client_globals.peers, 1, INT_MAX, 1);
pmix_client_globals.myserver = PMIX_NEW(pmix_peer_t);
pmix_client_globals.myserver->nptr = PMIX_NEW(pmix_nspace_t);
@ -386,7 +388,13 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc,
if (NULL == pmix_globals.mypeer->nptr->nspace) {
pmix_globals.mypeer->nptr->nspace = strdup(proc->nspace);
}
(void)strncpy(pmix_globals.mypeer->info->pname.nspace, proc->nspace, PMIX_MAX_NSLEN);
/* setup a rank_info object for us */
pmix_globals.mypeer->info = PMIX_NEW(pmix_rank_info_t);
if (NULL == pmix_globals.mypeer->info) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_NOMEM;
}
pmix_globals.mypeer->info->pname.nspace = strdup(proc->nspace);
pmix_globals.mypeer->info->pname.rank = proc->rank;
/* increment our init reference counter */
@ -748,6 +756,8 @@ PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void)
pmix_status_t rc;
pmix_tool_timeout_t tev;
struct timeval tv = {2, 0};
int n;
pmix_peer_t *peer;
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
if (1 != pmix_globals.init_cntr) {
@ -818,6 +828,11 @@ PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void)
PMIX_RELEASE(pmix_client_globals.myserver);
PMIX_LIST_DESTRUCT(&pmix_client_globals.pending_requests);
for (n=0; n < pmix_client_globals.peers.size; n++) {
if (NULL != (peer = (pmix_peer_t*)pmix_pointer_array_get_item(&pmix_client_globals.peers, n))) {
PMIX_RELEASE(peer);
}
}
/* shutdown services */
pmix_rte_finalize();

Просмотреть файл

@ -12,6 +12,8 @@
# Copyright (c) 2006-2010 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved.
# Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
# Copyright (c) 2018 Research Organization for Information Science
# and Technology (RIST). All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
@ -68,7 +70,7 @@ pmix_client_LDADD = \
$(top_builddir)/src/libpmix.la
pmix_regex_SOURCES = $(headers) \
pmix_regex.c test_common.c cli_stages.c utils.c
pmix_regex.c test_common.c cli_stages.c server_callbacks.c utils.c
pmix_regex_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS)
pmix_regex_LDADD = \
$(top_builddir)/src/libpmix.la

Просмотреть файл

@ -14,6 +14,8 @@
* Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -37,6 +39,9 @@
#define TEST_NODES "odin001,odin002,odin003,odin010,odin011,odin075"
#define TEST_PROCS "1,2,3,4;5-8;9,11-12;17-20;21-24;100"
#define TEST_NODES2 "c712f6n01,c712f6n02,c712f6n03"
bool spawn_wait = false;
int main(int argc, char **argv)
{
@ -52,6 +57,8 @@ int main(int argc, char **argv)
TEST_VERBOSE(("Testing version %s", PMIx_Get_version()));
PMIx_server_init(&mymodule, NULL, 0);
TEST_VERBOSE(("Start PMIx regex smoke test"));
fprintf(stderr, "NODES: %s\n", TEST_NODES);
@ -86,5 +93,20 @@ int main(int argc, char **argv)
fprintf(stderr, "PPN reverse failed: %d\n", rc);
}
fprintf(stderr, "NODES: %s\n", TEST_NODES2);
PMIx_generate_regex(TEST_NODES2, &regex);
fprintf(stderr, "REGEX: %s\n\n", regex);
/* test reverse parsing */
rc = pmix_preg.parse_nodes(regex, &nodes);
free(regex);
if (PMIX_SUCCESS == rc) {
regex = pmix_argv_join(nodes, ',');
pmix_argv_free(nodes);
fprintf(stderr, "NODES: %s\n", TEST_NODES2);
fprintf(stderr, "RSULT: %s\n\n\n", regex);
free(regex);
} else {
fprintf(stderr, "Node reverse failed: %d\n\n\n", rc);
}
return 0;
}

Просмотреть файл

@ -5,63 +5,63 @@
int main(int argc, char **argv)
{
pmix_proc_t myproc;
pmix_status_t rc;
pmix_proc_t myproc;
pmix_status_t rc;
int rank;
rc = PMIx_Init(&myproc, NULL, 0);
assert(PMIX_SUCCESS == rc);
int rank;
rc = PMIx_Init(&myproc, NULL, 0);
assert(PMIX_SUCCESS == rc);
{
pmix_value_t *value;
rc = PMIx_Get(&myproc, PMIX_RANK, NULL, 0, &value);
assert(PMIX_SUCCESS == rc);
printf("%d\n", value->type);
assert(value->type == PMIX_INT);
rank = value->data.uint32;
PMIX_VALUE_RELEASE(value);
}
{
pmix_value_t *value;
rc = PMIx_Get(&myproc, PMIX_RANK, NULL, 0, &value);
assert(PMIX_SUCCESS == rc);
printf("%d\n", value->type);
assert(value->type == PMIX_INT);
rank = value->data.uint32;
PMIX_VALUE_RELEASE(value);
}
if (rank == 0 ) {
pmix_info_t *info;
PMIX_INFO_CREATE(info, 1);
snprintf(info[0].key, PMIX_MAX_KEYLEN, "magic-found");
info[0].value.type = PMIX_STRING;
info[0].value.data.string = "yes";
rc = PMIx_Publish(info, 1);
assert(PMIX_SUCCESS == rc);
}
if (rank == 0 ) {
pmix_info_t *info;
PMIX_INFO_CREATE(info, 1);
snprintf(info[0].key, PMIX_MAX_KEYLEN, "magic-found");
info[0].value.type = PMIX_STRING;
info[0].value.data.string = "yes";
rc = PMIx_Publish(info, 1);
assert(PMIX_SUCCESS == rc);
}
printf("I am rank %d\n", rank);
printf("I am rank %d\n", rank);
{
bool flag;
pmix_info_t *info;
PMIX_INFO_CREATE(info, 1);
flag = true;
PMIX_INFO_LOAD(info, PMIX_COLLECT_DATA, &flag, PMIX_BOOL);
rc = PMIx_Fence(&myproc, 1, info, 1);
assert(PMIX_SUCCESS == rc);
PMIX_INFO_FREE(info, 1);
}
{
bool flag;
pmix_info_t *info;
PMIX_INFO_CREATE(info, 1);
flag = true;
PMIX_INFO_LOAD(info, PMIX_COLLECT_DATA, &flag, PMIX_BOOL);
rc = PMIx_Fence(&myproc, 1, info, 1);
assert(PMIX_SUCCESS == rc);
PMIX_INFO_FREE(info, 1);
}
if (rank == 1) {
int i;
pmix_pdata_t *pdata;
PMIX_PDATA_CREATE(pdata, 2);
snprintf(pdata[0].key, PMIX_MAX_KEYLEN, "magic-found");
snprintf(pdata[1].key, PMIX_MAX_KEYLEN, "magic-not-found");
rc = PMIx_Lookup(&pdata[0], 2, NULL, 0);
assert((PMIX_SUCCESS == rc) || (PMIX_ERR_NOT_FOUND == rc));
for ( i = 0 ; i < 2 ; i++ )
if (pdata[i].value.type == PMIX_STRING)
printf("Found[%d] %d %s\n", i, pdata[i].value.type, pdata[i].value.data.string);
else
printf("Found[%d] %d\n", i, pdata[i].value.type);
PMIX_PDATA_FREE(pdata, 1);
}
if (rank == 1) {
int i;
pmix_pdata_t *pdata;
PMIX_PDATA_CREATE(pdata, 2);
snprintf(pdata[0].key, PMIX_MAX_KEYLEN, "magic-found");
snprintf(pdata[1].key, PMIX_MAX_KEYLEN, "magic-not-found");
rc = PMIx_Lookup(&pdata[0], 2, NULL, 0);
assert((PMIX_SUCCESS == rc) || (PMIX_ERR_NOT_FOUND == rc));
for ( i = 0 ; i < 2 ; i++ )
if (pdata[i].value.type == PMIX_STRING)
printf("Found[%d] %d %s\n", i, pdata[i].value.type, pdata[i].value.data.string);
else
printf("Found[%d] %d\n", i, pdata[i].value.type);
PMIX_PDATA_FREE(pdata, 1);
}
rc = PMIx_Finalize(NULL, 0);
assert(PMIX_SUCCESS == rc);
rc = PMIx_Finalize(NULL, 0);
assert(PMIX_SUCCESS == rc);
}

Просмотреть файл

@ -394,6 +394,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
}
if (0 != flag) {
opal_output(0, "UNPACKING PRIOR JOBS");
/* unpack the buffer containing the info */
cnt=1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &bptr, &cnt, OPAL_BUFFER))) {
@ -418,6 +419,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
/* nope - add it */
opal_hash_table_set_value_uint32(orte_job_data, jdata->jobid, jdata);
} else {
opal_output(0, "DROPPING COPY");
/* yep - so we can drop this copy */
jdata->jobid = ORTE_JOBID_INVALID;
OBJ_RELEASE(jdata);
@ -508,6 +510,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
* and sent us the complete array of procs in the orte_job_t, so we
* don't need to do anything more here */
if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) {
opal_output(0, "JOB NOT FULLY DESCRIBED");
if (!ORTE_PROC_IS_HNP) {
/* extract the ppn regex */
cnt = 1;
@ -524,11 +527,13 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
}
free(ppn);
/* now assign locations to the procs */
opal_output(0, "ASSIGN LOCS");
if (ORTE_SUCCESS != (rc = orte_rmaps_base_assign_locations(jdata))) {
ORTE_ERROR_LOG(rc);
goto REPORT_ERROR;
}
}
opal_output(0, "COMPUTE VPIDS");
/* compute the ranks and add the proc objects
* to the jdata->procs array */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) {

Просмотреть файл

@ -13,7 +13,7 @@
* All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2014-2017 Research Organization for Information Science
@ -44,6 +44,7 @@
#include "orte/mca/rml/rml.h"
#include "pmix_server_internal.h"
#include "pmix_server.h"
static void relcb(void *cbdata)
{
@ -193,6 +194,24 @@ static void dmodex_req(int sd, short args, void *cbdata)
}
return;
}
/* if this is a request for rank=WILDCARD, then they want the job-level data
* for this job. It was probably not stored locally because we aren't hosting
* any local procs. There is no need to request the data as we already have
* it - so just register the nspace so the local PMIx server gets it */
if (ORTE_VPID_WILDCARD == req->target.vpid) {
rc = orte_pmix_server_register_nspace(jdata, true);
if (ORTE_SUCCESS != rc) {
goto callback;
}
/* let the server know that the data is now available */
if (NULL != req->mdxcbfunc) {
req->mdxcbfunc(rc, NULL, 0, req->cbdata, NULL, NULL);
}
OBJ_RELEASE(req);
return;
}
/* if they are asking about a specific proc, then fetch it */
if (NULL == (proct = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, req->target.vpid))) {
/* if we find the job, but not the process, then that is an error */
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);