From 9fe8153d38d5514e3c8b2d7d29f624c149ff13b3 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Sat, 3 Feb 2018 18:53:50 -0800 Subject: [PATCH 1/2] Sync to IOF branch and continue fix of request for job info from unknown nspace Signed-off-by: Ralph Castain (cherry picked from commit 02400d30d79ce3c7e7e28f9a08f7062a5b6f4c51) --- opal/mca/pmix/pmix3x/pmix/NEWS | 36 ++++- opal/mca/pmix/pmix3x/pmix/VERSION | 4 +- .../pmix/config/pmix_check_attributes.m4 | 6 +- opal/mca/pmix/pmix3x/pmix/contrib/buildrpm.sh | 24 ++-- .../pmix/pmix3x/pmix/contrib/pmix-release.sh | 10 +- opal/mca/pmix/pmix3x/pmix/examples/client.c | 129 ++++++++++++------ .../pmix/pmix3x/pmix/include/pmix_common.h.in | 29 +++- .../pmix/pmix3x/pmix/src/client/pmix_client.c | 11 +- .../pmix3x/pmix/src/client/pmix_client_ops.h | 2 + .../pmix/pmix3x/pmix/src/common/pmix_data.c | 127 ++++++++++++++++- .../pmix3x/pmix/src/include/pmix_globals.c | 1 + .../pmix3x/pmix/src/include/pmix_globals.h | 3 + .../pmix3x/pmix/src/mca/gds/hash/gds_hash.c | 45 +++--- .../pmix/src/mca/preg/native/preg_native.c | 6 +- .../pmix/src/mca/ptl/base/ptl_base_frame.c | 12 +- .../pmix/src/mca/ptl/tcp/ptl_tcp_component.c | 25 +++- .../src/mca/ptl/usock/ptl_usock_component.c | 13 +- .../pmix3x/pmix/src/server/pmix_server_get.c | 14 +- .../mca/pmix/pmix3x/pmix/src/tool/pmix_tool.c | 17 ++- opal/mca/pmix/pmix3x/pmix/test/Makefile.am | 4 +- opal/mca/pmix/pmix3x/pmix/test/pmix_regex.c | 22 +++ .../pmix/pmix3x/pmix/test/simple/test_pmix.c | 102 +++++++------- orte/mca/odls/base/odls_base_default_fns.c | 5 + orte/orted/pmix/pmix_server_fence.c | 21 ++- 24 files changed, 505 insertions(+), 163 deletions(-) diff --git a/opal/mca/pmix/pmix3x/pmix/NEWS b/opal/mca/pmix/pmix3x/pmix/NEWS index b0792ea7ea..c52c0b0de6 100644 --- a/opal/mca/pmix/pmix3x/pmix/NEWS +++ b/opal/mca/pmix/pmix3x/pmix/NEWS @@ -1,4 +1,4 @@ -Copyright (c) 2015-2017 Intel, Inc. All rights reserved. +Copyright (c) 2015-2018 Intel, Inc. All rights reserved. Copyright (c) 2017 IBM Corporation. All rights reserved. $COPYRIGHT$ @@ -25,7 +25,7 @@ Master (not on release branches yet) ------------------------------------ -2.1.0 -- 26 Oct 2017 +2.1.0 -- 1 Feb 2018 ---------------------- **** NOTE: This release contains the first implementation of cross-version **** support. Servers using v2.1.0 are capable of supporting clients using @@ -36,6 +36,30 @@ Master (not on release branches yet) - Enable support for remote tool connections (PR #540, #542) - Cleanup libevent configure logi to support default install paths (PR #541) - Debounce "unreachable" notifications for tools when they disconnect (PR #544) +- Enable the regex generator to support node names that include multiple + sets of numbers + + +2.0.3 -- TBD +---------------------- +- Fix event notification so all sides of multi-library get notified + of other library's existence +- Update syslog protection to support Mac High Sierra OS +- Remove usock component - unable to support v1.x clients due + to datatype differences +- Cleanup security handshake +- Cleanup separation of PMI-1/2 libraries and PMIx symbols +- Protect against overly-large messages +- Update data buffer APIs to support cross-version operations +- Protect receive callbacks from NULL and/or empty buffers as this + can occur when the peer on a connection disappears. +- Fix tool connection search so it properly descends into the directory + tree while searching for the server's contact file. +- Fix store_local so it doesn't reject a new nspace as that can happen + when working with tools +- Ensure we always complete PMIx_Finalize - don't return if something + goes wrong in the middle of the procedure +- Fix several tool connection issues 2.0.2 -- 19 Oct 2017 @@ -119,6 +143,14 @@ Master (not on release branches yet) and to themselves +1.2.5 -- TBD +---------------------- +- Fix cross-version issue when v1.2 client interacts with v2.1 server (PR #564) +- Update client connection for cross-version support (PR #591) +- Fix write memory barrier ASM for PowerPC (PR #606) +- Add protection from overly-large messages + + 1.2.4 -- 13 Oct. 2017 ---------------------- - Silence some unnecessary warning messages (PR #487) diff --git a/opal/mca/pmix/pmix3x/pmix/VERSION b/opal/mca/pmix/pmix3x/pmix/VERSION index 055bc93b6f..c5fdb7f036 100644 --- a/opal/mca/pmix/pmix3x/pmix/VERSION +++ b/opal/mca/pmix/pmix3x/pmix/VERSION @@ -30,7 +30,7 @@ greek= # command, or with the date (if "git describe" fails) in the form of # "date". -repo_rev=gitdb5d380 +repo_rev=git3bd2b2d # If tarball_version is not empty, it is used as the version string in # the tarball filename, regardless of all other versions listed in @@ -44,7 +44,7 @@ tarball_version= # The date when this release was created -date="Jan 25, 2018" +date="Feb 03, 2018" # The shared library version of each of PMIx's public libraries. # These versions are maintained in accordance with the "Library diff --git a/opal/mca/pmix/pmix3x/pmix/config/pmix_check_attributes.m4 b/opal/mca/pmix/pmix3x/pmix/config/pmix_check_attributes.m4 index 7c7a0fe5fe..b5005b72c0 100644 --- a/opal/mca/pmix/pmix3x/pmix/config/pmix_check_attributes.m4 +++ b/opal/mca/pmix/pmix3x/pmix/config/pmix_check_attributes.m4 @@ -1,6 +1,6 @@ # -*- shell-script -*- # PMIx copyrights: -# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2018 Intel, Inc. All rights reserved. # ######################### # @@ -15,7 +15,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. -# Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010-2018 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2013 Mellanox Technologies, Inc. # All rights reserved. # Copyright (c) 2015 Intel, Inc. All rights reserved. @@ -499,7 +499,7 @@ AC_DEFUN([PMIX_CHECK_ATTRIBUTES], [ _PMIX_CHECK_SPECIFIC_ATTRIBUTE([extension], [ - #define FOO __extension__ ({size_t bar; bar = 3;}) + int i = __extension__ 3; ], [], []) diff --git a/opal/mca/pmix/pmix3x/pmix/contrib/buildrpm.sh b/opal/mca/pmix/pmix3x/pmix/contrib/buildrpm.sh index 77ad94d09b..9b65ceefed 100755 --- a/opal/mca/pmix/pmix3x/pmix/contrib/buildrpm.sh +++ b/opal/mca/pmix/pmix3x/pmix/contrib/buildrpm.sh @@ -96,19 +96,19 @@ echo "--> Found specfile: $specfile" rpmtopdir=${rpmtopdir:-"`grep %_topdir $HOME/.rpmmacros | awk '{ print $2 }'`"} if test "$rpmtopdir" != ""; then - rpmbuild_options="$rpmbuild_options --define '_topdir $rpmtopdir'" + rpmbuild_options="$rpmbuild_options --define '_topdir $rpmtopdir'" if test ! -d "$rpmtopdir"; then - mkdir -p "$rpmtopdir" - mkdir -p "$rpmtopdir/BUILD" - mkdir -p "$rpmtopdir/RPMS" - mkdir -p "$rpmtopdir/RPMS/i386" - mkdir -p "$rpmtopdir/RPMS/i586" - mkdir -p "$rpmtopdir/RPMS/i686" - mkdir -p "$rpmtopdir/RPMS/noarch" - mkdir -p "$rpmtopdir/RPMS/athlon" - mkdir -p "$rpmtopdir/SOURCES" - mkdir -p "$rpmtopdir/SPECS" - mkdir -p "$rpmtopdir/SRPMS" + mkdir -p "$rpmtopdir" + mkdir -p "$rpmtopdir/BUILD" + mkdir -p "$rpmtopdir/RPMS" + mkdir -p "$rpmtopdir/RPMS/i386" + mkdir -p "$rpmtopdir/RPMS/i586" + mkdir -p "$rpmtopdir/RPMS/i686" + mkdir -p "$rpmtopdir/RPMS/noarch" + mkdir -p "$rpmtopdir/RPMS/athlon" + mkdir -p "$rpmtopdir/SOURCES" + mkdir -p "$rpmtopdir/SPECS" + mkdir -p "$rpmtopdir/SRPMS" fi need_root=0 elif test -d /usr/src/RPM; then diff --git a/opal/mca/pmix/pmix3x/pmix/contrib/pmix-release.sh b/opal/mca/pmix/pmix3x/pmix/contrib/pmix-release.sh index 7ff1e2918d..ec64d5522f 100755 --- a/opal/mca/pmix/pmix3x/pmix/contrib/pmix-release.sh +++ b/opal/mca/pmix/pmix3x/pmix/contrib/pmix-release.sh @@ -36,16 +36,16 @@ for branch in $branches; do module load libevent/pmix-$branch ./$script $@ >dist.out 2>&1 - if test "$?" != "0"; then - cat <data.uint32; + PMIX_VALUE_RELEASE(val); + + /* create an array for the peers */ + localpeers = (uint32_t*)malloc(local_cnt * sizeof(int)); + + /* get the list of local peers */ + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_LOCAL_PEERS, NULL, 0, &val))) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Get PMIX_LOCAL_PEERS failed: %d", myproc.nspace, myproc.rank, rc); + goto done; + } + ptr = strdup(val->data.string); + PMIX_VALUE_RELEASE(val); + + /* populate the peers array */ + p = strtok(ptr, ","); + localpeers[0] = strtoul(p, NULL, 10); + for (n=1; n < local_cnt; n++) { + p = strtok(NULL, ","); + localpeers[n] = strtoul(p, NULL, 10); + } + free(ptr); + /* check the returned data */ for (n=0; n < nprocs; n++) { - if (0 > asprintf(&tmp, "%s-%d-local", myproc.nspace, myproc.rank)) { - exit(1); + if (n == myproc.rank) { + continue; } - if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, tmp, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s failed: %d\n", myproc.nspace, myproc.rank, tmp, rc); - goto done; + proc.rank = n; + local = false; + for (m=0; m < local_cnt; m++) { + if (localpeers[m] == proc.rank) { + local = true; + break; + } } - if (PMIX_UINT64 != val->type) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong type: %d\n", myproc.nspace, myproc.rank, tmp, val->type); + if (local) { + if (0 > asprintf(&tmp, "%s-%d-local", proc.nspace, proc.rank)) { + exit(1); + } + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s failed: %d\n", myproc.nspace, myproc.rank, tmp, rc); + goto done; + } + if (PMIX_UINT64 != val->type) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong type: %d\n", myproc.nspace, myproc.rank, tmp, val->type); + PMIX_VALUE_RELEASE(val); + free(tmp); + goto done; + } + if (1234 != val->data.uint64) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong value: %d\n", myproc.nspace, myproc.rank, tmp, (int)val->data.uint64); + PMIX_VALUE_RELEASE(val); + free(tmp); + goto done; + } + fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned correct\n", myproc.nspace, myproc.rank, tmp); PMIX_VALUE_RELEASE(val); free(tmp); - goto done; - } - if (1234 != val->data.uint64) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong value: %d\n", myproc.nspace, myproc.rank, tmp, (int)val->data.uint64); + } else { + if (0 > asprintf(&tmp, "%s-%d-remote", proc.nspace, proc.rank)) { + exit(1); + } + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s failed: %d\n", myproc.nspace, myproc.rank, tmp, rc); + goto done; + } + if (PMIX_STRING != val->type) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong type: %d\n", myproc.nspace, myproc.rank, tmp, val->type); + PMIX_VALUE_RELEASE(val); + free(tmp); + goto done; + } + if (0 != strcmp(val->data.string, "1234")) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong value: %s\n", myproc.nspace, myproc.rank, tmp, val->data.string); + PMIX_VALUE_RELEASE(val); + free(tmp); + goto done; + } + fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned correct\n", myproc.nspace, myproc.rank, tmp); PMIX_VALUE_RELEASE(val); free(tmp); - goto done; } - fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned correct\n", myproc.nspace, myproc.rank, tmp); - PMIX_VALUE_RELEASE(val); - free(tmp); - if (0 > asprintf(&tmp, "%s-%d-remote", myproc.nspace, myproc.rank)) { - exit(1); - } - if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, tmp, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s failed: %d\n", myproc.nspace, myproc.rank, tmp, rc); - goto done; - } - if (PMIX_STRING != val->type) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong type: %d\n", myproc.nspace, myproc.rank, tmp, val->type); - PMIX_VALUE_RELEASE(val); - free(tmp); - goto done; - } - if (0 != strcmp(val->data.string, "1234")) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong value: %s\n", myproc.nspace, myproc.rank, tmp, val->data.string); - PMIX_VALUE_RELEASE(val); - free(tmp); - goto done; - } - fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned correct\n", myproc.nspace, myproc.rank, tmp); - PMIX_VALUE_RELEASE(val); - free(tmp); } + free(localpeers); done: /* finalize us */ diff --git a/opal/mca/pmix/pmix3x/pmix/include/pmix_common.h.in b/opal/mca/pmix/pmix3x/pmix/include/pmix_common.h.in index 7d9d8ffc7d..e9bebb2e20 100644 --- a/opal/mca/pmix/pmix3x/pmix/include/pmix_common.h.in +++ b/opal/mca/pmix/pmix3x/pmix/include/pmix_common.h.in @@ -1862,7 +1862,6 @@ PMIX_EXPORT const char* PMIx_Get_version(void); PMIX_EXPORT pmix_status_t PMIx_Store_internal(const pmix_proc_t *proc, const char *key, pmix_value_t *val); - /** * Top-level interface function to pack one or more values into a * buffer. @@ -1881,6 +1880,17 @@ PMIX_EXPORT pmix_status_t PMIx_Store_internal(const pmix_proc_t *proc, * will return an error code (generated upon unpacking) - * the error cannot be detected during packing. * + * The identity of the intended recipient of the packed buffer (i.e., the + * process that will be unpacking it) is used solely to resolve any data type + * differences between PMIx versions. The recipient must, therefore, be + * known to the user prior to calling the pack function so that the + * PMIx library is aware of the version the recipient is using. + * + * @param *target Pointer to a pmix_proc_t structure containing the + * nspace/rank of the process that will be unpacking the final buffer. + * A NULL value may be used to indicate that the target is based on + * the same PMIx version as the caller. + * * @param *buffer A pointer to the buffer into which the value is to * be packed. * @@ -1914,7 +1924,8 @@ PMIX_EXPORT pmix_status_t PMIx_Store_internal(const pmix_proc_t *proc, * status_code = PMIx_Data_pack(buffer, &src, 1, PMIX_INT32); * @endcode */ -PMIX_EXPORT pmix_status_t PMIx_Data_pack(pmix_data_buffer_t *buffer, +PMIX_EXPORT pmix_status_t PMIx_Data_pack(const pmix_proc_t *target, + pmix_data_buffer_t *buffer, void *src, int32_t num_vals, pmix_data_type_t type); @@ -1961,6 +1972,17 @@ PMIX_EXPORT pmix_status_t PMIx_Data_pack(pmix_data_buffer_t *buffer, * will return an error code generated upon unpacking - these errors * cannot be detected during packing. * + * The identity of the source of the packed buffer (i.e., the + * process that packed it) is used solely to resolve any data type + * differences between PMIx versions. The source must, therefore, be + * known to the user prior to calling the unpack function so that the + * PMIx library is aware of the version the source used. + * + * @param *source Pointer to a pmix_proc_t structure containing the + * nspace/rank of the process that packed the provided buffer. + * A NULL value may be used to indicate that the source is based on + * the same PMIx version as the caller. + * * @param *buffer A pointer to the buffer from which the value will be * extracted. * @@ -2010,7 +2032,8 @@ PMIX_EXPORT pmix_status_t PMIx_Data_pack(pmix_data_buffer_t *buffer, * * @endcode */ -PMIX_EXPORT pmix_status_t PMIx_Data_unpack(pmix_data_buffer_t *buffer, void *dest, +PMIX_EXPORT pmix_status_t PMIx_Data_unpack(const pmix_proc_t *source, + pmix_data_buffer_t *buffer, void *dest, int32_t *max_num_values, pmix_data_type_t type); diff --git a/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client.c b/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client.c index 7c3d08acfc..a1311d6ec6 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client.c +++ b/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Artem Y. Polyakov . @@ -440,6 +440,8 @@ PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc, /* setup the globals */ PMIX_CONSTRUCT(&pmix_client_globals.pending_requests, pmix_list_t); + PMIX_CONSTRUCT(&pmix_client_globals.peers, pmix_pointer_array_t); + pmix_pointer_array_init(&pmix_client_globals.peers, 1, INT_MAX, 1); pmix_client_globals.myserver = PMIX_NEW(pmix_peer_t); if (NULL == pmix_client_globals.myserver) { PMIX_RELEASE_THREAD(&pmix_global_lock); @@ -700,6 +702,8 @@ PMIX_EXPORT pmix_status_t PMIx_Finalize(const pmix_info_t info[], size_t ninfo) size_t n; pmix_client_timeout_t tev; struct timeval tv = {2, 0}; + pmix_peer_t *peer; + int i; PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (1 != pmix_globals.init_cntr) { @@ -788,6 +792,11 @@ PMIX_EXPORT pmix_status_t PMIx_Finalize(const pmix_info_t info[], size_t ninfo) } PMIX_LIST_DESTRUCT(&pmix_client_globals.pending_requests); + for (i=0; i < pmix_client_globals.peers.size; i++) { + if (NULL != (peer = (pmix_peer_t*)pmix_pointer_array_get_item(&pmix_client_globals.peers, i))) { + PMIX_RELEASE(peer); + } + } if (0 <= pmix_client_globals.myserver->sd) { CLOSE_THE_SOCKET(pmix_client_globals.myserver->sd); diff --git a/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_ops.h b/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_ops.h index 0723464166..a062ceff25 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_ops.h +++ b/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_ops.h @@ -15,6 +15,7 @@ #include "src/threads/threads.h" #include "src/class/pmix_list.h" +#include "src/class/pmix_pointer_array.h" #include "src/include/pmix_globals.h" BEGIN_C_DECLS @@ -22,6 +23,7 @@ BEGIN_C_DECLS typedef struct { pmix_peer_t *myserver; // messaging support to/from my server pmix_list_t pending_requests; // list of pmix_cb_t pending data requests + pmix_pointer_array_t peers; // array of pmix_peer_t cached for data ops // verbosity for client get operations int get_output; int get_verbose; diff --git a/opal/mca/pmix/pmix3x/pmix/src/common/pmix_data.c b/opal/mca/pmix/pmix3x/pmix/src/common/pmix_data.c index a377fadcdf..f1e9c00cec 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/common/pmix_data.c +++ b/opal/mca/pmix/pmix3x/pmix/src/common/pmix_data.c @@ -31,11 +31,13 @@ #include #endif -#include +#include #include #include "src/mca/bfrops/bfrops.h" #include "src/include/pmix_globals.h" +#include "src/server/pmix_server_ops.h" +#include "src/client/pmix_client_ops.h" #define PMIX_EMBED_DATA_BUFFER(b, db) \ do { \ @@ -66,12 +68,121 @@ (b)->bytes_used = 0; \ } while (0) -PMIX_EXPORT pmix_status_t PMIx_Data_pack(pmix_data_buffer_t *buffer, +static pmix_peer_t* find_peer(const pmix_proc_t *proc) +{ + pmix_status_t rc; + pmix_peer_t *peer; + pmix_proc_t wildcard; + pmix_value_t *value; + int i; + + if (NULL == proc) { + return pmix_globals.mypeer; + } + + if (PMIX_PROC_IS_SERVER(pmix_globals.mypeer)) { + /* see if we know this proc */ + for (i=0; i < pmix_server_globals.clients.size; i++) { + if (NULL != (peer = (pmix_peer_t*)pmix_pointer_array_get_item(&pmix_server_globals.clients, i))) { + continue; + } + if (0 == strncmp(proc->nspace, peer->nptr->nspace, PMIX_MAX_NSLEN)) { + return peer; + } + } + /* didn't find it, so try to get the library version of the target + * from the host - the result will be cached, so we will only have + * to retrieve it once */ + (void)strncpy(wildcard.nspace, proc->nspace, PMIX_MAX_NSLEN); + wildcard.rank = PMIX_RANK_WILDCARD; + if (PMIX_SUCCESS != (rc = PMIx_Get(&wildcard, PMIX_BFROPS_MODULE, NULL, 0, &value))) { + /* couldn't get it - nothing we can do */ + return NULL; + } + /* setup a peer for this nspace */ + peer = PMIX_NEW(pmix_peer_t); + if (NULL == peer) { + PMIX_RELEASE(value); + return NULL; + } + peer->nptr = PMIX_NEW(pmix_nspace_t); + if (NULL == peer->nptr) { + PMIX_RELEASE(peer); + PMIX_RELEASE(value); + return NULL; + } + peer->nptr->nspace = strdup(proc->nspace); + /* assign a module to it based on the returned version */ + peer->nptr->compat.bfrops = pmix_bfrops_base_assign_module(value->data.string); + PMIX_RELEASE(value); + if (NULL == peer->nptr->compat.bfrops) { + PMIX_RELEASE(peer); + return NULL; + } + /* cache the peer object */ + pmix_pointer_array_add(&pmix_server_globals.clients, peer); + return peer; + } + + // we are a client or tool + + /* If the target is for the server, then + * pack it using that peer. */ + if (0 == strncmp(proc->nspace, pmix_client_globals.myserver->info->pname.nspace, PMIX_MAX_NSLEN)) { + return pmix_client_globals.myserver; + } + + /* if the target is another member of my nspace, then + * they must be using the same version */ + if (0 == strncmp(proc->nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN)) { + return pmix_globals.mypeer; + } + + /* try to get the library version of this peer - the result will be + * cached, so we will only have to retrieve it once */ + (void)strncpy(wildcard.nspace, proc->nspace, PMIX_MAX_NSLEN); + wildcard.rank = PMIX_RANK_WILDCARD; + if (PMIX_SUCCESS != (rc = PMIx_Get(&wildcard, PMIX_BFROPS_MODULE, NULL, 0, &value))) { + /* couldn't get it - nothing we can do */ + return NULL; + } + /* setup a peer for this nspace */ + peer = PMIX_NEW(pmix_peer_t); + if (NULL == peer) { + PMIX_RELEASE(value); + return NULL; + } + peer->nptr = PMIX_NEW(pmix_nspace_t); + if (NULL == peer->nptr) { + PMIX_RELEASE(peer); + PMIX_RELEASE(value); + return NULL; + } + peer->nptr->nspace = strdup(proc->nspace); + /* assign a module to it based on the returned version */ + peer->nptr->compat.bfrops = pmix_bfrops_base_assign_module(value->data.string); + PMIX_RELEASE(value); + if (NULL == peer->nptr->compat.bfrops) { + PMIX_RELEASE(peer); + return NULL; + } + /* need to cache the peer someplace so we can clean it + * up later */ + return peer; +} + +PMIX_EXPORT pmix_status_t PMIx_Data_pack(const pmix_proc_t *target, + pmix_data_buffer_t *buffer, void *src, int32_t num_vals, pmix_data_type_t type) { pmix_status_t rc; pmix_buffer_t buf; + pmix_peer_t *peer; + + if (NULL == (peer = find_peer(target))) { + return PMIX_ERR_NOT_SUPPORTED; + } /* setup the host */ PMIX_CONSTRUCT(&buf, pmix_buffer_t); @@ -80,7 +191,7 @@ PMIX_EXPORT pmix_status_t PMIx_Data_pack(pmix_data_buffer_t *buffer, PMIX_EMBED_DATA_BUFFER(&buf, buffer); /* pack the value */ - PMIX_BFROPS_PACK(rc, pmix_globals.mypeer, + PMIX_BFROPS_PACK(rc, peer, &buf, src, num_vals, type); /* extract the data buffer - the pointers may have changed */ @@ -91,12 +202,18 @@ PMIX_EXPORT pmix_status_t PMIx_Data_pack(pmix_data_buffer_t *buffer, } -PMIX_EXPORT pmix_status_t PMIx_Data_unpack(pmix_data_buffer_t *buffer, void *dest, +PMIX_EXPORT pmix_status_t PMIx_Data_unpack(const pmix_proc_t *source, + pmix_data_buffer_t *buffer, void *dest, int32_t *max_num_values, pmix_data_type_t type) { pmix_status_t rc; pmix_buffer_t buf; + pmix_peer_t *peer; + + if (NULL == (peer = find_peer(source))) { + return PMIX_ERR_NOT_SUPPORTED; + } /* setup the host */ PMIX_CONSTRUCT(&buf, pmix_buffer_t); @@ -105,7 +222,7 @@ PMIX_EXPORT pmix_status_t PMIx_Data_unpack(pmix_data_buffer_t *buffer, void *des PMIX_EMBED_DATA_BUFFER(&buf, buffer); /* unpack the value */ - PMIX_BFROPS_UNPACK(rc, pmix_globals.mypeer, + PMIX_BFROPS_UNPACK(rc, peer, &buf, dest, max_num_values, type); /* extract the data buffer - the pointers may have changed */ diff --git a/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.c b/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.c index c7aba47dd7..2f32cd4bd7 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.c +++ b/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.c @@ -101,6 +101,7 @@ static void nscon(pmix_nspace_t *p) p->nspace = NULL; p->nlocalprocs = 0; p->all_registered = false; + p->version_stored = false; p->jobbkt = NULL; p->ndelivered = 0; PMIX_CONSTRUCT(&p->ranks, pmix_list_t); diff --git a/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.h b/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.h index 3963a33b72..3cdbc7b524 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.h +++ b/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.h @@ -51,6 +51,8 @@ BEGIN_C_DECLS #define PMIX_MAX_CRED_SIZE 131072 // set max at 128kbytes #define PMIX_MAX_ERR_CONSTANT INT_MIN +/* internal-only attributes */ +#define PMIX_BFROPS_MODULE "pmix.bfrops.mod" // (char*) name of bfrops plugin in-use by a given nspace /* define an internal-only process name that has * a dynamically-sized nspace field to save memory */ @@ -152,6 +154,7 @@ typedef struct { char *nspace; size_t nlocalprocs; bool all_registered; // all local ranks have been defined + bool version_stored; // the version string used by this nspace has been stored pmix_buffer_t *jobbkt; // packed version of jobinfo size_t ndelivered; // count of #local clients that have received the jobinfo pmix_list_t ranks; // list of pmix_rank_info_t for connection support of my clients diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/hash/gds_hash.c b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/hash/gds_hash.c index 4d7a2b8549..95bcec0a08 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/hash/gds_hash.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/hash/gds_hash.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * * $COPYRIGHT$ @@ -112,6 +112,7 @@ typedef struct { pmix_hash_table_t internal; pmix_hash_table_t remote; pmix_hash_table_t local; + bool gdata_added; } pmix_hash_trkr_t; static void htcon(pmix_hash_trkr_t *p) @@ -124,6 +125,7 @@ static void htcon(pmix_hash_trkr_t *p) pmix_hash_table_init(&p->remote, 256); PMIX_CONSTRUCT(&p->local, pmix_hash_table_t); pmix_hash_table_init(&p->local, 256); + p->gdata_added = false; } static void htdes(pmix_hash_trkr_t *p) { @@ -515,26 +517,29 @@ pmix_status_t hash_cache_job_info(struct pmix_nspace_t *ns, } /* now add any global data that was provided */ - PMIX_LIST_FOREACH(kvptr, &pmix_server_globals.gdata, pmix_kval_t) { - /* sadly, the data cannot simultaneously exist on two lists, - * so we must make a copy of it here */ - kp2 = PMIX_NEW(pmix_kval_t); - if (NULL == kp2) { - rc = PMIX_ERR_NOMEM; - goto release; - } - kp2->key = strdup(kvptr->key); - PMIX_VALUE_XFER(rc, kp2->value, kvptr->value); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE(kp2); - goto release; - } - if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, PMIX_RANK_WILDCARD, kp2))) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE(kp2); - break; + if (!trk->gdata_added) { + PMIX_LIST_FOREACH(kvptr, &pmix_server_globals.gdata, pmix_kval_t) { + /* sadly, the data cannot simultaneously exist on two lists, + * so we must make a copy of it here */ + kp2 = PMIX_NEW(pmix_kval_t); + if (NULL == kp2) { + rc = PMIX_ERR_NOMEM; + goto release; + } + kp2->key = strdup(kvptr->key); + PMIX_VALUE_XFER(rc, kp2->value, kvptr->value); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + goto release; + } + if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, PMIX_RANK_WILDCARD, kp2))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + break; + } } + trk->gdata_added = true; } release: diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/preg/native/preg_native.c b/opal/mca/pmix/pmix3x/pmix/src/mca/preg/native/preg_native.c index 12187b03c2..cc11453f5d 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/preg/native/preg_native.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/preg/native/preg_native.c @@ -1,6 +1,8 @@ /* * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * * $COPYRIGHT$ * @@ -106,7 +108,6 @@ static pmix_status_t generate_node_regex(const char *input, len = strlen(vptr); startnum = -1; memset(prefix, 0, PMIX_MAX_NODE_PREFIX); - numdigits = 0; for (i=0, j=0; i < len; i++) { if (!isalpha(vptr[i])) { /* found a non-alpha char */ @@ -120,7 +121,6 @@ static pmix_status_t generate_node_regex(const char *input, /* count the size of the numeric field - but don't * add the digits to the prefix */ - numdigits++; if (startnum < 0) { /* okay, this defines end of the prefix */ startnum = i; @@ -147,8 +147,10 @@ static pmix_status_t generate_node_regex(const char *input, vnum = strtol(&vptr[startnum], &sfx, 10); if (NULL != sfx) { suffix = strdup(sfx); + numdigits = (int)(sfx - &vptr[startnum]); } else { suffix = NULL; + numdigits = (int)strlen(&vptr[startnum]); } /* is this value already on our list? */ found = false; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/base/ptl_base_frame.c b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/base/ptl_base_frame.c index 7ca906edbc..2f1fd4f6a0 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/base/ptl_base_frame.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/base/ptl_base_frame.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -53,19 +53,23 @@ #include "src/mca/ptl/base/static-components.h" +#define PMIX_MAX_MSG_SIZE 16 + /* Instantiate the global vars */ pmix_ptl_globals_t pmix_ptl_globals = {{{0}}}; int pmix_ptl_base_output = -1; +static size_t max_msg_size = PMIX_MAX_MSG_SIZE; + static int pmix_ptl_register(pmix_mca_base_register_flag_t flags) { - pmix_ptl_globals.max_msg_size = 8000000; pmix_mca_base_var_register("pmix", "ptl", "base", "max_msg_size", - "Max size (in bytes) of a client/server msg", + "Max size (in Mbytes) of a client/server msg", PMIX_MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0, PMIX_INFO_LVL_2, PMIX_MCA_BASE_VAR_SCOPE_READONLY, - &pmix_ptl_globals.max_msg_size); + &max_msg_size); + pmix_ptl_globals.max_msg_size = max_msg_size * 1024 * 1024; return PMIX_SUCCESS; } diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c index d653571a2a..863bd3b5d3 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -1215,8 +1215,8 @@ static void connection_handler(int sd, short args, void *cbdata) } else { peer->nptr->compat.gds = pmix_gds_base_assign_module(NULL, 0); } - free(msg); // can now release the data buffer if (NULL == peer->nptr->compat.gds) { + free(msg); info->proc_cnt--; pmix_pointer_array_set_item(&pmix_server_globals.clients, peer->index, NULL); PMIX_RELEASE(peer); @@ -1224,6 +1224,17 @@ static void connection_handler(int sd, short args, void *cbdata) goto error; } + /* if we haven't previously stored the version for this + * nspace, do so now */ + if (!nptr->version_stored) { + PMIX_INFO_LOAD(&ginfo, PMIX_BFROPS_MODULE, peer->nptr->compat.bfrops->version, PMIX_STRING); + PMIX_GDS_CACHE_JOB_INFO(rc, pmix_globals.mypeer, peer->nptr, &ginfo, 1); + PMIX_INFO_DESTRUCT(&ginfo); + nptr->version_stored = true; + } + + free(msg); // can now release the data buffer + /* the choice of PTL module is obviously us */ peer->nptr->compat.ptl = &pmix_ptl_tcp_module; @@ -1447,6 +1458,7 @@ static void process_cbfunc(int sd, short args, void *cbdata) /* set the gds */ PMIX_INFO_LOAD(&ginfo, PMIX_GDS_MODULE, pnd->gds, PMIX_STRING); peer->nptr->compat.gds = pmix_gds_base_assign_module(&ginfo, 1); + PMIX_INFO_DESTRUCT(&ginfo); if (NULL == peer->nptr->compat.gds) { PMIX_RELEASE(peer); pmix_list_remove_item(&pmix_server_globals.nspaces, &nptr->super); @@ -1455,6 +1467,15 @@ static void process_cbfunc(int sd, short args, void *cbdata) goto done; } + /* if we haven't previously stored the version for this + * nspace, do so now */ + if (!peer->nptr->version_stored) { + PMIX_INFO_LOAD(&ginfo, PMIX_BFROPS_MODULE, peer->nptr->compat.bfrops->version, PMIX_STRING); + PMIX_GDS_CACHE_JOB_INFO(rc, pmix_globals.mypeer, peer->nptr, &ginfo, 1); + PMIX_INFO_DESTRUCT(&ginfo); + nptr->version_stored = true; + } + /* validate the connection */ cred.bytes = pnd->cred; cred.size = pnd->len; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/ptl_usock_component.c b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/ptl_usock_component.c index 493d4f3055..b09e147ace 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/ptl_usock_component.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/ptl_usock_component.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -668,12 +668,21 @@ static void connection_handler(int sd, short args, void *cbdata) goto error; } + /* if we haven't previously stored the version for this + * nspace, do so now */ + if (!nptr->version_stored) { + PMIX_INFO_LOAD(&ginfo, PMIX_BFROPS_MODULE, nptr->compat.bfrops->version, PMIX_STRING); + PMIX_GDS_CACHE_JOB_INFO(rc, pmix_globals.mypeer, nptr, &ginfo, 1); + PMIX_INFO_DESTRUCT(&ginfo); + nptr->version_stored = true; + } + /* the choice of PTL module was obviously made by the connecting * tool as we received this request via that channel, so simply * record it here for future use */ nptr->compat.ptl = &pmix_ptl_usock_module; - /* validate the connection */ + /* validate the connection - the macro will send the status result to the client */ PMIX_PSEC_VALIDATE_CONNECTION(rc, psave, NULL, 0, NULL, 0, &cred); /* now done with the msg */ free(msg); diff --git a/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_get.c b/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_get.c index 3d1418d476..7155e750ed 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_get.c +++ b/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_get.c @@ -871,7 +871,12 @@ static void _process_dmdx_reply(int fd, short args, void *cbdata) * store the data first so we can immediately satisfy any future * requests. Then, rather than duplicate the resolve code here, we * will let the pmix_pending_resolve function go ahead and retrieve - * it from the GDS */ + * it from the GDS + * + * NOTE: if the data returned is NULL, then it has already been + * stored (e.g., via a register_nspace call in response to a request + * for job-level data). For now, we will retrieve it so it can + * be stored for each peer */ if (PMIX_SUCCESS == caddy->status) { /* cycle across all outstanding local requests and collect their * unique nspaces so we can store this for each one */ @@ -906,8 +911,11 @@ static void _process_dmdx_reply(int fd, short args, void *cbdata) peer = (pmix_peer_t*)pmix_pointer_array_get_item(&pmix_server_globals.clients, rinfo->peerid); } PMIX_CONSTRUCT(&pbkt, pmix_buffer_t); - - PMIX_LOAD_BUFFER(pmix_globals.mypeer, &pbkt, caddy->data, caddy->ndata); + if (NULL == caddy->data) { + PMIX_GDS_REGISTER_JOB_INFO(rc, pmix_globals.mypeer, &pbkt); + } else { + PMIX_LOAD_BUFFER(pmix_globals.mypeer, &pbkt, caddy->data, caddy->ndata); + } /* unpack and store it*/ kv = PMIX_NEW(pmix_kval_t); cnt = 1; diff --git a/opal/mca/pmix/pmix3x/pmix/src/tool/pmix_tool.c b/opal/mca/pmix/pmix3x/pmix/src/tool/pmix_tool.c index 31020c56a8..0509a5cb2f 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/tool/pmix_tool.c +++ b/opal/mca/pmix/pmix3x/pmix/src/tool/pmix_tool.c @@ -295,6 +295,8 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, PMIX_CONSTRUCT(&pmix_client_globals.pending_requests, pmix_list_t); + PMIX_CONSTRUCT(&pmix_client_globals.peers, pmix_pointer_array_t); + pmix_pointer_array_init(&pmix_client_globals.peers, 1, INT_MAX, 1); pmix_client_globals.myserver = PMIX_NEW(pmix_peer_t); pmix_client_globals.myserver->nptr = PMIX_NEW(pmix_nspace_t); @@ -386,7 +388,13 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, if (NULL == pmix_globals.mypeer->nptr->nspace) { pmix_globals.mypeer->nptr->nspace = strdup(proc->nspace); } - (void)strncpy(pmix_globals.mypeer->info->pname.nspace, proc->nspace, PMIX_MAX_NSLEN); + /* setup a rank_info object for us */ + pmix_globals.mypeer->info = PMIX_NEW(pmix_rank_info_t); + if (NULL == pmix_globals.mypeer->info) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_NOMEM; + } + pmix_globals.mypeer->info->pname.nspace = strdup(proc->nspace); pmix_globals.mypeer->info->pname.rank = proc->rank; /* increment our init reference counter */ @@ -748,6 +756,8 @@ PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void) pmix_status_t rc; pmix_tool_timeout_t tev; struct timeval tv = {2, 0}; + int n; + pmix_peer_t *peer; PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (1 != pmix_globals.init_cntr) { @@ -818,6 +828,11 @@ PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void) PMIX_RELEASE(pmix_client_globals.myserver); PMIX_LIST_DESTRUCT(&pmix_client_globals.pending_requests); + for (n=0; n < pmix_client_globals.peers.size; n++) { + if (NULL != (peer = (pmix_peer_t*)pmix_pointer_array_get_item(&pmix_client_globals.peers, n))) { + PMIX_RELEASE(peer); + } + } /* shutdown services */ pmix_rte_finalize(); diff --git a/opal/mca/pmix/pmix3x/pmix/test/Makefile.am b/opal/mca/pmix/pmix3x/pmix/test/Makefile.am index 9140989a4e..64ad119878 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/test/Makefile.am @@ -12,6 +12,8 @@ # Copyright (c) 2006-2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. # Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2018 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -68,7 +70,7 @@ pmix_client_LDADD = \ $(top_builddir)/src/libpmix.la pmix_regex_SOURCES = $(headers) \ - pmix_regex.c test_common.c cli_stages.c utils.c + pmix_regex.c test_common.c cli_stages.c server_callbacks.c utils.c pmix_regex_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) pmix_regex_LDADD = \ $(top_builddir)/src/libpmix.la diff --git a/opal/mca/pmix/pmix3x/pmix/test/pmix_regex.c b/opal/mca/pmix/pmix3x/pmix/test/pmix_regex.c index cb347f704a..35a671305d 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/pmix_regex.c +++ b/opal/mca/pmix/pmix3x/pmix/test/pmix_regex.c @@ -14,6 +14,8 @@ * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -37,6 +39,9 @@ #define TEST_NODES "odin001,odin002,odin003,odin010,odin011,odin075" #define TEST_PROCS "1,2,3,4;5-8;9,11-12;17-20;21-24;100" +#define TEST_NODES2 "c712f6n01,c712f6n02,c712f6n03" + +bool spawn_wait = false; int main(int argc, char **argv) { @@ -52,6 +57,8 @@ int main(int argc, char **argv) TEST_VERBOSE(("Testing version %s", PMIx_Get_version())); + PMIx_server_init(&mymodule, NULL, 0); + TEST_VERBOSE(("Start PMIx regex smoke test")); fprintf(stderr, "NODES: %s\n", TEST_NODES); @@ -86,5 +93,20 @@ int main(int argc, char **argv) fprintf(stderr, "PPN reverse failed: %d\n", rc); } + fprintf(stderr, "NODES: %s\n", TEST_NODES2); + PMIx_generate_regex(TEST_NODES2, ®ex); + fprintf(stderr, "REGEX: %s\n\n", regex); + /* test reverse parsing */ + rc = pmix_preg.parse_nodes(regex, &nodes); + free(regex); + if (PMIX_SUCCESS == rc) { + regex = pmix_argv_join(nodes, ','); + pmix_argv_free(nodes); + fprintf(stderr, "NODES: %s\n", TEST_NODES2); + fprintf(stderr, "RSULT: %s\n\n\n", regex); + free(regex); + } else { + fprintf(stderr, "Node reverse failed: %d\n\n\n", rc); + } return 0; } diff --git a/opal/mca/pmix/pmix3x/pmix/test/simple/test_pmix.c b/opal/mca/pmix/pmix3x/pmix/test/simple/test_pmix.c index 6bfa3d793d..8ecf6c1f6b 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/simple/test_pmix.c +++ b/opal/mca/pmix/pmix3x/pmix/test/simple/test_pmix.c @@ -5,63 +5,63 @@ int main(int argc, char **argv) { - pmix_proc_t myproc; - pmix_status_t rc; + pmix_proc_t myproc; + pmix_status_t rc; - int rank; - rc = PMIx_Init(&myproc, NULL, 0); - assert(PMIX_SUCCESS == rc); + int rank; + rc = PMIx_Init(&myproc, NULL, 0); + assert(PMIX_SUCCESS == rc); - { - pmix_value_t *value; - rc = PMIx_Get(&myproc, PMIX_RANK, NULL, 0, &value); - assert(PMIX_SUCCESS == rc); - printf("%d\n", value->type); - assert(value->type == PMIX_INT); - rank = value->data.uint32; - PMIX_VALUE_RELEASE(value); - } + { + pmix_value_t *value; + rc = PMIx_Get(&myproc, PMIX_RANK, NULL, 0, &value); + assert(PMIX_SUCCESS == rc); + printf("%d\n", value->type); + assert(value->type == PMIX_INT); + rank = value->data.uint32; + PMIX_VALUE_RELEASE(value); + } - if (rank == 0 ) { - pmix_info_t *info; - PMIX_INFO_CREATE(info, 1); - snprintf(info[0].key, PMIX_MAX_KEYLEN, "magic-found"); - info[0].value.type = PMIX_STRING; - info[0].value.data.string = "yes"; - rc = PMIx_Publish(info, 1); - assert(PMIX_SUCCESS == rc); - } + if (rank == 0 ) { + pmix_info_t *info; + PMIX_INFO_CREATE(info, 1); + snprintf(info[0].key, PMIX_MAX_KEYLEN, "magic-found"); + info[0].value.type = PMIX_STRING; + info[0].value.data.string = "yes"; + rc = PMIx_Publish(info, 1); + assert(PMIX_SUCCESS == rc); + } - printf("I am rank %d\n", rank); + printf("I am rank %d\n", rank); - { - bool flag; - pmix_info_t *info; - PMIX_INFO_CREATE(info, 1); - flag = true; - PMIX_INFO_LOAD(info, PMIX_COLLECT_DATA, &flag, PMIX_BOOL); - rc = PMIx_Fence(&myproc, 1, info, 1); - assert(PMIX_SUCCESS == rc); - PMIX_INFO_FREE(info, 1); - } + { + bool flag; + pmix_info_t *info; + PMIX_INFO_CREATE(info, 1); + flag = true; + PMIX_INFO_LOAD(info, PMIX_COLLECT_DATA, &flag, PMIX_BOOL); + rc = PMIx_Fence(&myproc, 1, info, 1); + assert(PMIX_SUCCESS == rc); + PMIX_INFO_FREE(info, 1); + } - if (rank == 1) { - int i; - pmix_pdata_t *pdata; - PMIX_PDATA_CREATE(pdata, 2); - snprintf(pdata[0].key, PMIX_MAX_KEYLEN, "magic-found"); - snprintf(pdata[1].key, PMIX_MAX_KEYLEN, "magic-not-found"); - rc = PMIx_Lookup(&pdata[0], 2, NULL, 0); - assert((PMIX_SUCCESS == rc) || (PMIX_ERR_NOT_FOUND == rc)); - for ( i = 0 ; i < 2 ; i++ ) - if (pdata[i].value.type == PMIX_STRING) - printf("Found[%d] %d %s\n", i, pdata[i].value.type, pdata[i].value.data.string); - else - printf("Found[%d] %d\n", i, pdata[i].value.type); - PMIX_PDATA_FREE(pdata, 1); - } + if (rank == 1) { + int i; + pmix_pdata_t *pdata; + PMIX_PDATA_CREATE(pdata, 2); + snprintf(pdata[0].key, PMIX_MAX_KEYLEN, "magic-found"); + snprintf(pdata[1].key, PMIX_MAX_KEYLEN, "magic-not-found"); + rc = PMIx_Lookup(&pdata[0], 2, NULL, 0); + assert((PMIX_SUCCESS == rc) || (PMIX_ERR_NOT_FOUND == rc)); + for ( i = 0 ; i < 2 ; i++ ) + if (pdata[i].value.type == PMIX_STRING) + printf("Found[%d] %d %s\n", i, pdata[i].value.type, pdata[i].value.data.string); + else + printf("Found[%d] %d\n", i, pdata[i].value.type); + PMIX_PDATA_FREE(pdata, 1); + } - rc = PMIx_Finalize(NULL, 0); - assert(PMIX_SUCCESS == rc); + rc = PMIx_Finalize(NULL, 0); + assert(PMIX_SUCCESS == rc); } diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index f3e8b9ffeb..f95fb51a20 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -394,6 +394,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, } if (0 != flag) { + opal_output(0, "UNPACKING PRIOR JOBS"); /* unpack the buffer containing the info */ cnt=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &bptr, &cnt, OPAL_BUFFER))) { @@ -418,6 +419,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, /* nope - add it */ opal_hash_table_set_value_uint32(orte_job_data, jdata->jobid, jdata); } else { + opal_output(0, "DROPPING COPY"); /* yep - so we can drop this copy */ jdata->jobid = ORTE_JOBID_INVALID; OBJ_RELEASE(jdata); @@ -508,6 +510,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, * and sent us the complete array of procs in the orte_job_t, so we * don't need to do anything more here */ if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) { + opal_output(0, "JOB NOT FULLY DESCRIBED"); if (!ORTE_PROC_IS_HNP) { /* extract the ppn regex */ cnt = 1; @@ -524,11 +527,13 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, } free(ppn); /* now assign locations to the procs */ + opal_output(0, "ASSIGN LOCS"); if (ORTE_SUCCESS != (rc = orte_rmaps_base_assign_locations(jdata))) { ORTE_ERROR_LOG(rc); goto REPORT_ERROR; } } + opal_output(0, "COMPUTE VPIDS"); /* compute the ranks and add the proc objects * to the jdata->procs array */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) { diff --git a/orte/orted/pmix/pmix_server_fence.c b/orte/orted/pmix/pmix_server_fence.c index 59d5cd1902..fe0f942cd1 100644 --- a/orte/orted/pmix/pmix_server_fence.c +++ b/orte/orted/pmix/pmix_server_fence.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. * Copyright (c) 2014 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science @@ -44,6 +44,7 @@ #include "orte/mca/rml/rml.h" #include "pmix_server_internal.h" +#include "pmix_server.h" static void relcb(void *cbdata) { @@ -193,6 +194,24 @@ static void dmodex_req(int sd, short args, void *cbdata) } return; } + /* if this is a request for rank=WILDCARD, then they want the job-level data + * for this job. It was probably not stored locally because we aren't hosting + * any local procs. There is no need to request the data as we already have + * it - so just register the nspace so the local PMIx server gets it */ + if (ORTE_VPID_WILDCARD == req->target.vpid) { + rc = orte_pmix_server_register_nspace(jdata, true); + if (ORTE_SUCCESS != rc) { + goto callback; + } + /* let the server know that the data is now available */ + if (NULL != req->mdxcbfunc) { + req->mdxcbfunc(rc, NULL, 0, req->cbdata, NULL, NULL); + } + OBJ_RELEASE(req); + return; + } + + /* if they are asking about a specific proc, then fetch it */ if (NULL == (proct = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, req->target.vpid))) { /* if we find the job, but not the process, then that is an error */ ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); From 10be1df1d343e2960dbf616ec6138952994ccac6 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Sat, 3 Feb 2018 20:01:46 -0800 Subject: [PATCH 2/2] Remove debug and add target/probe programs Signed-off-by: Ralph Castain (cherry picked from commit 9a03007115fc8978f4eb5fd938c05b26adbd433e) --- orte/mca/odls/base/odls_base_default_fns.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index f95fb51a20..f3e8b9ffeb 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -394,7 +394,6 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, } if (0 != flag) { - opal_output(0, "UNPACKING PRIOR JOBS"); /* unpack the buffer containing the info */ cnt=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &bptr, &cnt, OPAL_BUFFER))) { @@ -419,7 +418,6 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, /* nope - add it */ opal_hash_table_set_value_uint32(orte_job_data, jdata->jobid, jdata); } else { - opal_output(0, "DROPPING COPY"); /* yep - so we can drop this copy */ jdata->jobid = ORTE_JOBID_INVALID; OBJ_RELEASE(jdata); @@ -510,7 +508,6 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, * and sent us the complete array of procs in the orte_job_t, so we * don't need to do anything more here */ if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) { - opal_output(0, "JOB NOT FULLY DESCRIBED"); if (!ORTE_PROC_IS_HNP) { /* extract the ppn regex */ cnt = 1; @@ -527,13 +524,11 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, } free(ppn); /* now assign locations to the procs */ - opal_output(0, "ASSIGN LOCS"); if (ORTE_SUCCESS != (rc = orte_rmaps_base_assign_locations(jdata))) { ORTE_ERROR_LOG(rc); goto REPORT_ERROR; } } - opal_output(0, "COMPUTE VPIDS"); /* compute the ranks and add the proc objects * to the jdata->procs array */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) {