From baef25338a84d208cffe63bd93c6d4e6a3b1e346 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Mon, 4 Feb 2019 10:10:58 -0800 Subject: [PATCH] Update to latest PRI master Signed-off-by: Ralph Castain --- opal/mca/pmix/pmix4x/pmix/HACKING | 33 +++- opal/mca/pmix/pmix4x/pmix/INSTALL | 28 ++- opal/mca/pmix/pmix4x/pmix/LICENSE | 6 +- opal/mca/pmix/pmix4x/pmix/NEWS | 103 +++++++++- opal/mca/pmix/pmix4x/pmix/VERSION | 6 +- opal/mca/pmix/pmix4x/pmix/config/pmix.m4 | 8 + .../pmix4x/pmix/config/pmix_search_libs.m4 | 37 +++- .../pmix/pmix4x/pmix/config/pmix_setup_cc.m4 | 15 +- .../pmix4x/pmix/config/pmix_setup_hwloc.m4 | 15 +- .../pmix4x/pmix/config/pmix_setup_libevent.m4 | 3 +- opal/mca/pmix/pmix4x/pmix/contrib/pmix.spec | 2 +- opal/mca/pmix/pmix4x/pmix/examples/tool.c | 179 +++++++++++++++--- .../pmix/pmix4x/pmix/include/pmix_common.h.in | 6 +- .../pmix/src/client/pmix_client_spawn.c | 73 ++++++- .../pmix/pmix4x/pmix/src/common/pmix_query.c | 24 +++ .../pmix/src/event/pmix_event_notification.c | 113 ++++------- .../pmix/src/event/pmix_event_registration.c | 15 +- .../pmix4x/pmix/src/include/pmix_globals.h | 2 + .../pmix/src/mca/pnet/base/pnet_base_fns.c | 27 +-- .../pmix/src/mca/preg/base/preg_base_frame.c | 4 +- .../pmix/src/mca/preg/native/preg_native.c | 20 +- .../pmix4x/pmix/src/mca/preg/preg_types.h | 4 +- .../pmix4x/pmix/src/mca/ptl/tcp/ptl_tcp.c | 42 +--- .../pmix/src/mca/ptl/tcp/ptl_tcp_component.c | 13 +- .../pmix/pmix4x/pmix/src/runtime/pmix_init.c | 19 +- .../pmix4x/pmix/src/server/pmix_server_ops.c | 122 ++++++++---- .../pmix4x/pmix/src/server/pmix_server_ops.h | 2 + .../pmix4x/pmix/src/threads/thread_usage.h | 10 +- .../mca/pmix/pmix4x/pmix/src/tool/pmix_tool.c | 9 + .../pmix/pmix4x/pmix/test/simple/simptest.c | 12 +- opal/mca/pmix/pmix4x/pmix/test/test_server.c | 55 ++---- opal/mca/pmix/pmix4x/pmix/test/test_server.h | 3 +- 32 files changed, 720 insertions(+), 290 deletions(-) diff --git a/opal/mca/pmix/pmix4x/pmix/HACKING b/opal/mca/pmix/pmix4x/pmix/HACKING index 0848c582e2..14b2c4c0b3 100644 --- a/opal/mca/pmix/pmix4x/pmix/HACKING +++ b/opal/mca/pmix/pmix4x/pmix/HACKING @@ -8,8 +8,8 @@ Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, University of Stuttgart. All rights reserved. Copyright (c) 2004-2005 The Regents of the University of California. All rights reserved. -Copyright (c) 2008-2018 Cisco Systems, Inc. All rights reserved -Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. +Copyright (c) 2013-2019 Intel, Inc. All rights reserved. $COPYRIGHT$ Additional copyrights may follow @@ -38,13 +38,21 @@ By definition, debugging builds will perform [much] slower than optimized builds of PMIx. You should *NOT* conduct timing tests or try to run production performance numbers with debugging builds. +NOTE: this version of PMIx requires the Libevent package to build +and operate. Any version of Libevent greater than or equal to +2.0.21 is acceptable. It optionally supports the HWLOC package +for providing topology information to both the host environment +(by collecting local inventory for rollup) and local client +processes. Any version of HWLOC greater than 1.10 is supported, +although versions in the 2.x series are recommended. + If you wish to build an optimized version of PMIx from a developer's checkout, you have three main options: 1. Use the "--with-platform=optimized" switch to configure. This is the preferred (and probably easiest) method. For example: - shell$ git clone git@github.com:pmix/master.git pmix + shell$ git clone https://github.com/pmix/pmix.git pmix shell$ cd pmix shell$ ./autogen.pl shell$ mkdir build @@ -57,7 +65,7 @@ developer's checkout, you have three main options: directory than the source tree -- one where the .git subdirectory is not present. For example: - shell$ git clone git@github.com:pmix/master.git pmix + shell$ git clone https://github.com/pmix/pmix.git pmix shell$ cd pmix shell$ ./autogen.pl shell$ mkdir build @@ -80,6 +88,15 @@ developer's checkout, you have three main options: shell$ make all install +Note that in all cases you must point configure at the libevent +installation using the --with-libevent= option if it is in +a non-standard location. Similarly, non-standard locations for +the HWLOC package must be specified using the --with-hwloc= +option. In both cases, PMIx will automatically detect these +packages in standard locations and build-in support for them +unless otherwise specified using the respective configure option. + + Use of GNU Autoconf, Automake, and Libtool (and m4) =================================================== @@ -96,7 +113,7 @@ required depend on if you are using the master or a release branch (and which release branch you are using). The specific versions can be found at: - https://pmix.github.io/pmix/faq/building + https://pmix.org/code/getting-the-reference-implementation/ You can check what versions of the autotools you have installed with the following: @@ -205,17 +222,17 @@ NOTE: On MacOS/X, the default "libtool" program is different than the m4, Autoconf and Automake build and install very quickly; Libtool will take a minute or two. -5. You can now run PMIx’s top-level "autogen.pl" script. This script +5. You can now run PMIx’s top-level "autogen.sh" script. This script will invoke the GNU Autoconf, Automake, and Libtool commands in the proper order and setup to run PMIx's top-level "configure" script. - 5a. You generally need to run autogen.pl only when the top-level + 5a. You generally need to run autogen.sh only when the top-level file "configure.ac" changes, or any files in the config/ or /config/ directories change (these directories are where a lot of "include" files for PMI’xs configure script live). - 5b. You do *NOT* need to re-run autogen.pl if you modify a + 5b. You do *NOT* need to re-run autogen.sh if you modify a Makefile.am. Use of Flex diff --git a/opal/mca/pmix/pmix4x/pmix/INSTALL b/opal/mca/pmix/pmix4x/pmix/INSTALL index a91c183682..3fac5ad586 100644 --- a/opal/mca/pmix/pmix4x/pmix/INSTALL +++ b/opal/mca/pmix/pmix4x/pmix/INSTALL @@ -8,8 +8,8 @@ Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, University of Stuttgart. All rights reserved. Copyright (c) 2004-2005 The Regents of the University of California. All rights reserved. -Copyright (c) 2008-2018 Cisco Systems, Inc. All rights reserved -Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. +Copyright (c) 2013-2019 Intel, Inc. All rights reserved. $COPYRIGHT$ Additional copyrights may follow @@ -22,9 +22,9 @@ For More Information This file is a *very* short overview of building and installing the PMIx library. Much more information is available in the -FAQ section on the PMIx web site: +How-To section on the PMIx web site: - http://pmix.github.io/pmix/faq + https://pmix.org/support/how-to/ Developer Builds @@ -37,7 +37,7 @@ build PMIx. You must then run: shell$ ./autogen.pl You will need very recent versions of GNU Autoconf, Automake, and -Libtool. If autogen.pl fails, read the HACKING file. If anything +Libtool. If autogen.sh fails, read the HACKING file. If anything else fails, read the HACKING file. Finally, we suggest reading the HACKING file. @@ -56,6 +56,24 @@ shell$ ./configure --prefix=/where/to/install [...lots of output...] shell$ make all install +NOTE: this version of PMIx requires the Libevent package to build +and operate. Any version of Libevent greater than or equal to +2.0.21 is acceptable. + +NOTE: this version of PMIx optionally supports the HWLOC package +for providing topology information to both the host environment +(by collecting local inventory for rollup) and local client +processes. Any version of HWLOC greater than 1.10 is supported, +although versions in the 2.x series are recommended. + +Note that you must point configure at the libevent installation +using the --with-libevent= option if it is in a non-standard +location. Similarly, non-standard locations for the HWLOC package +must be specified using the --with-hwloc= option. In both +cases, PMIx will automatically detect these packages in standard +locations and build-in support for them unless otherwise specified +using the respective configure option. + If you need special access to install, then you can execute "make all" as a user with write permissions in the build tree, and a separate "make install" as a user with write permissions to the diff --git a/opal/mca/pmix/pmix4x/pmix/LICENSE b/opal/mca/pmix/pmix4x/pmix/LICENSE index f9e6f04791..3eb0a094e0 100644 --- a/opal/mca/pmix/pmix4x/pmix/LICENSE +++ b/opal/mca/pmix/pmix4x/pmix/LICENSE @@ -26,7 +26,7 @@ Copyright (c) 2006-2010 Sun Microsystems, Inc. All rights reserved. Copyright (c) 2006-2010 The University of Houston. All rights reserved. Copyright (c) 2006-2009 Myricom, Inc. All rights reserved. Copyright (c) 2007-2008 UT-Battelle, LLC. All rights reserved. -Copyright (c) 2007-2010 IBM Corporation. All rights reserved. +Copyright (c) 2007-2019 IBM Corporation. All rights reserved. Copyright (c) 1998-2005 Forschungszentrum Juelich, Juelich Supercomputing Centre, Federal Republic of Germany Copyright (c) 2005-2008 ZIH, TU Dresden, Federal Republic of Germany @@ -36,7 +36,7 @@ Copyright (c) 2008-2009 Institut National de Recherche en Informatique. All rights reserved. Copyright (c) 2007 Lawrence Livermore National Security, LLC. All rights reserved. -Copyright (c) 2007-2009 Mellanox Technologies. All rights reserved. +Copyright (c) 2007-2019 Mellanox Technologies. All rights reserved. Copyright (c) 2006-2010 QLogic Corporation. All rights reserved. Copyright (c) 2008-2010 Oak Ridge National Labs. All rights reserved. Copyright (c) 2006-2010 Oracle and/or its affiliates. All rights reserved. @@ -45,7 +45,7 @@ Copyright (c) 2010 ARM ltd. All rights reserved. Copyright (c) 2010-2011 Alex Brick . All rights reserved. Copyright (c) 2012 The University of Wisconsin-La Crosse. All rights reserved. -Copyright (c) 2013-2014 Intel, Inc. All rights reserved. +Copyright (c) 2013-2019 Intel, Inc. All rights reserved. Copyright (c) 2011-2014 NVIDIA Corporation. All rights reserved. $COPYRIGHT$ diff --git a/opal/mca/pmix/pmix4x/pmix/NEWS b/opal/mca/pmix/pmix4x/pmix/NEWS index 43d3cb4d49..6b5cebc3c1 100644 --- a/opal/mca/pmix/pmix4x/pmix/NEWS +++ b/opal/mca/pmix/pmix4x/pmix/NEWS @@ -1,4 +1,4 @@ -Copyright (c) 2015-2018 Intel, Inc. All rights reserved. +Copyright (c) 2015-2019 Intel, Inc. All rights reserved. Copyright (c) 2017 IBM Corporation. All rights reserved. $COPYRIGHT$ @@ -17,13 +17,64 @@ the README file, PMIx typically maintains two separate version series simultaneously - the current release and one that is locked to only bug fixes. Since these series are semi-independent of each other, a single NEWS-worthy item might apply to different series. For -example, a bug might be fixed in the master, and then moved to the -current release as well as the "stable" bug fix release branch. - +example, a bug might be fixed in the master, and then moved to +multiple release branches. Master (not on release branches yet) ------------------------------------ +3.1.2 -- 24 Jan 2019 +---------------------- + - Fix a bug in macro identifying system events + - Restore some non-standard macros to the pmix_extend.h + header - these are considered "deprecated" and will be + removed from public-facing headers in future releases + + +3.1.1 -- 18 Jan 2019 +---------------------- +- Fix a bug in registration of default event handlers + that somehow slipped thru testing + + +3.1.0 -- 17 Jan 2019 +---------------------- +**** THIS RELEASE MARKS THE STARTING POINT FOR FULL COMPLIANCE +**** WITH THE PMIX v3 STANDARD. ALL API BEHAVIORS AND ATTRIBUTE +**** DEFINITIONS MEET THE v3 STANDARD SPECIFICATIONS. + - Add a new, faster dstore GDS component 'ds21' + - Performance optimizations for the dstore GDS components. + - Plug miscellaneous memory leaks + - Silence an unnecessary warning message when checking connection + to a non-supporting server + - Ensure lost-connection events get delivered to default event + handlers + - Correctly handle cache refresh for queries + - Protect against race conditions between host and internal library + when dealing with async requests + - Cleanup tool operations and add support for connections to + remote servers. Initial support for debugger direct/indirect + launch verified with PRRTE. Cleanup setting of tmpdir options. + Drop rendezvous files when acting as a launcher + - Automatically store the server URI for easy access by client + - Provide MCA parameter to control TCP connect retry/timeout + - Update event notification system to properly evict oldest events + when more space is needed + - Fix a number of error paths + - Update IOF cache code to properly drop oldest message. Provide + MCA parameter for setting cache size. + - Handle setsockopt(SO_RCVTIMEO) not being supported + - Ensure that epilogs get run even when connections unexpectedly + terminate. Properly split epilog strings to process multiple + paths + - Pass the tool's command line to the server so it can be returned + in queries + - Add support for C11 atomics + - Support collection and forwarding of fabric-specific envars + - Improve handling of hwloc configure option + - Fix PMIx_server_generate_regex to preserve node ordering + - Fix a bug when registering default event handlers + 3.0.2 -- 18 Sept 2018 ---------------------- @@ -93,6 +144,46 @@ Master (not on release branches yet) - Fix several memory and file descriptor leaks +2.2.2 -- 24 Jan 2019 +---------------------- + - Fix a bug in macro identifying system events + + +2.2.1 -- 18 Jan 2019 +---------------------- + - Fix a bug in registration of default event handlers + that somehow slipped thru testing + + +2.2.0 -- 17 Jan 2019 +---------------------- +**** THIS RELEASE MARKS THE STARTING POINT FOR FULL COMPLIANCE +**** WITH THE PMIX v2.2 STANDARD. ALL API BEHAVIORS AND ATTRIBUTE +**** DEFINITIONS MEET THE v2.2 STANDARD SPECIFICATIONS. + - Add a new, faster dstore GDS component 'ds21' + - Performance optimizations for the dstore GDS components. + - Plug miscellaneous memory leaks + - Silence an unnecessary warning message when checking connection + to a non-supporting server + - Ensure lost-connection events get delivered to default event + handlers + - Correctly handle cache refresh for queries + - Protect against race conditions between host and internal library + when dealing with async requests + - Cleanup tool operations and add support for connections to + remote servers. + - Automatically store the server URI for easy access by client + - Provide MCA parameter to control TCP connect retry/timeout + - Update event notification system to properly evict oldest events + when more space is needed + - Fix a number of error paths + - Handle setsockopt(SO_RCVTIMEO) not being supported + - Pass the tool's command line to the server so it can be returned + in queries + - Add support for C11 atomics + - Fix a bug when registering default event handlers + + 2.1.4 -- 18 Sep 2018 ---------------------- - Updated configury to silence warnings on older compilers @@ -127,7 +218,7 @@ Master (not on release branches yet) - Fix several memory and file descriptor leaks -2.1.1 -- 5 Mar 2018 +2.1.1 -- 23 Feb 2018 ---------------------- - Fix direct modex when receiving new nspace - Resolve direct modex of job-level info @@ -154,7 +245,7 @@ Master (not on release branches yet) sets of numbers -2.0.3 -- TBD +2.0.3 -- 1 Feb 2018 ---------------------- - Fix event notification so all sides of multi-library get notified of other library's existence diff --git a/opal/mca/pmix/pmix4x/pmix/VERSION b/opal/mca/pmix/pmix4x/pmix/VERSION index 0b23840c8f..f472bda640 100644 --- a/opal/mca/pmix/pmix4x/pmix/VERSION +++ b/opal/mca/pmix/pmix4x/pmix/VERSION @@ -23,14 +23,14 @@ release=0 # The only requirement is that it must be entirely printable ASCII # characters and have no white space. -greek=a1 +greek= # If repo_rev is empty, then the repository version number will be # obtained during "make dist" via the "git describe --tags --always" # command, or with the date (if "git describe" fails) in the form of # "date". -repo_rev=gitb5863c9c +repo_rev=gita1212af2 # If tarball_version is not empty, it is used as the version string in # the tarball filename, regardless of all other versions listed in @@ -44,7 +44,7 @@ tarball_version= # The date when this release was created -date="Jan 10, 2019" +date="Feb 04, 2019" # The shared library version of each of PMIx's public libraries. # These versions are maintained in accordance with the "Library diff --git a/opal/mca/pmix/pmix4x/pmix/config/pmix.m4 b/opal/mca/pmix/pmix4x/pmix/config/pmix.m4 index 5e281b9365..631fbf1c9a 100644 --- a/opal/mca/pmix/pmix4x/pmix/config/pmix.m4 +++ b/opal/mca/pmix/pmix4x/pmix/config/pmix.m4 @@ -645,6 +645,11 @@ AC_DEFUN([PMIX_SETUP_CORE],[ pmix_show_title "Library and Function tests" + # Darwin doesn't need -lutil, as it's something other than this -lutil. + PMIX_SEARCH_LIBS_CORE([openpty], [util]) + + PMIX_SEARCH_LIBS_CORE([gethostbyname], [nsl]) + PMIX_SEARCH_LIBS_CORE([socket], [socket]) # IRIX and CentOS have dirname in -lgen, usually in libc @@ -653,6 +658,9 @@ AC_DEFUN([PMIX_SETUP_CORE],[ # Darwin doesn't need -lm, as it's a symlink to libSystem.dylib PMIX_SEARCH_LIBS_CORE([ceil], [m]) + # -lrt might be needed for clock_gettime + PMIX_SEARCH_LIBS_CORE([clock_gettime], [rt]) + AC_CHECK_FUNCS([asprintf snprintf vasprintf vsnprintf strsignal socketpair strncpy_s usleep statfs statvfs getpeereid getpeerucred strnlen posix_fallocate tcgetpgrp]) # On some hosts, htonl is a define, so the AC_CHECK_FUNC will get diff --git a/opal/mca/pmix/pmix4x/pmix/config/pmix_search_libs.m4 b/opal/mca/pmix/pmix4x/pmix/config/pmix_search_libs.m4 index 9d7a8af9a7..d900c8a93b 100644 --- a/opal/mca/pmix/pmix4x/pmix/config/pmix_search_libs.m4 +++ b/opal/mca/pmix/pmix4x/pmix/config/pmix_search_libs.m4 @@ -1,7 +1,7 @@ dnl -*- shell-script -*- dnl dnl Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved. -dnl Copyright (c) 2014 Intel, Inc. All rights reserved. +dnl Copyright (c) 2014-2019 Intel, Inc. All rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -21,7 +21,13 @@ dnl # PMIX_SEARCH_LIBS_COMPONENT. The reason why is because this macro # calls PMIX_WRAPPER_FLAGS_ADD -- see big comment in # pmix_setup_wrappers.m4 for an explanation of why this is bad). +# NOTE: PMIx doesn't have wrapper compilers, so this is not an issue +# here - we leave the note just for downstream compatibility AC_DEFUN([PMIX_SEARCH_LIBS_CORE],[ + + PMIX_VAR_SCOPE_PUSH([LIBS_save add]) + LIBS_save=$LIBS + AC_SEARCH_LIBS([$1], [$2], [pmix_have_$1=1 $3], @@ -31,4 +37,33 @@ AC_DEFUN([PMIX_SEARCH_LIBS_CORE],[ AC_DEFINE_UNQUOTED([PMIX_HAVE_]m4_toupper($1), [$pmix_have_$1], [whether $1 is found and available]) + PMIX_VAR_SCOPE_POP +])dnl + +# PMIX SEARCH_LIBS_COMPONENT(prefix, func, list-of-libraries, +# action-if-found, action-if-not-found, +# other-libraries) +# +# Same as PMIX SEARCH_LIBS_CORE, above, except that we don't call PMIX +# WRAPPER_FLAGS_ADD. Instead, we add it to the ${prefix}_LIBS +# variable (i.e., $prefix is usually "framework_component", such as +# "fbtl_posix"). +AC_DEFUN([PMIX_SEARCH_LIBS_COMPONENT],[ + + PMIX_VAR_SCOPE_PUSH([LIBS_save add]) + LIBS_save=$LIBS + + AC_SEARCH_LIBS([$2], [$3], + [ # Found it! See if anything was added to LIBS + add=`printf '%s\n' "$LIBS" | sed -e "s/$LIBS_save$//"` + AS_IF([test -n "$add"], + [PMIX_FLAGS_APPEND_UNIQ($1_LIBS, [$add])]) + $1_have_$2=1 + $4], + [$1_have_$2=0 + $5], [$6]) + + AC_DEFINE_UNQUOTED([PMIX_HAVE_]m4_toupper($1), [$$1_have_$2], + [whether $1 is found and available]) + PMIX_VAR_SCOPE_POP ])dnl diff --git a/opal/mca/pmix/pmix4x/pmix/config/pmix_setup_cc.m4 b/opal/mca/pmix/pmix4x/pmix/config/pmix_setup_cc.m4 index f35c16197e..a8956a1d22 100644 --- a/opal/mca/pmix/pmix4x/pmix/config/pmix_setup_cc.m4 +++ b/opal/mca/pmix/pmix4x/pmix/config/pmix_setup_cc.m4 @@ -14,9 +14,9 @@ dnl Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. dnl Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights dnl reserved. -dnl Copyright (c) 2015 Research Organization for Information Science -dnl and Technology (RIST). All rights reserved. -dnl Copyright (c) 2018 Intel, Inc. All rights reserved. +dnl Copyright (c) 2015-2019 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. +dnl Copyright (c) 2018-2019 Intel, Inc. All rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -60,7 +60,12 @@ AC_DEFUN([PMIX_PROG_CC_C11_HELPER],[ PMIX_CC_HELPER([if $CC $1 supports C11 _Static_assert], [pmix_prog_cc_c11_helper__static_assert_available], [[#include ]],[[_Static_assert(sizeof(int64_t) == 8, "WTH");]]) - AS_IF([test $pmix_prog_cc_c11_helper__Thread_local_available -eq 1 && test $pmix_prog_cc_c11_helper_atomic_var_available -eq 1], + PMIX_CC_HELPER([if $CC $1 supports C11 atomic_fetch_xor_explicit], [pmix_prog_cc_c11_helper_atomic_fetch_xor_explicit_available], + [[#include +#include ]],[[_Atomic uint32_t a; uint32_t b; atomic_fetch_xor_explicit(&a, b, memory_order_relaxed);]]) + + + AS_IF([test $pmix_prog_cc_c11_helper__Thread_local_available -eq 1 && test $pmix_prog_cc_c11_helper_atomic_var_available -eq 1 && test $pmix_prog_cc_c11_helper_atomic_fetch_xor_explicit_available -eq 1], [$2], [$3]) @@ -128,7 +133,7 @@ AC_DEFUN([PMIX_SETUP_CC],[ AC_REQUIRE([_PMIX_PROG_CC]) AC_REQUIRE([AM_PROG_CC_C_O]) - PMIX_VAR_SCOPE_PUSH([pmix_prog_cc_c11_helper__Thread_local_available pmix_prog_cc_c11_helper_atomic_var_available pmix_prog_cc_c11_helper__Atomic_available pmix_prog_cc_c11_helper__static_assert_available pmix_prog_cc_c11_helper__Generic_available pmix_prog_cc__thread_available]) + PMIX_VAR_SCOPE_PUSH([pmix_prog_cc_c11_helper__Thread_local_available pmix_prog_cc_c11_helper_atomic_var_available pmix_prog_cc_c11_helper__Atomic_available pmix_prog_cc_c11_helper__static_assert_available pmix_prog_cc_c11_helper__Generic_available pmix_prog_cc__thread_available pmix_prog_cc_c11_helper_atomic_fetch_xor_explicit_available]) PMIX_PROG_CC_C11 diff --git a/opal/mca/pmix/pmix4x/pmix/config/pmix_setup_hwloc.m4 b/opal/mca/pmix/pmix4x/pmix/config/pmix_setup_hwloc.m4 index 9139deb960..8f6ed75176 100644 --- a/opal/mca/pmix/pmix4x/pmix/config/pmix_setup_hwloc.m4 +++ b/opal/mca/pmix/pmix4x/pmix/config/pmix_setup_hwloc.m4 @@ -82,6 +82,17 @@ AC_DEFUN([_PMIX_HWLOC_EXTERNAL],[ AC_MSG_RESULT([$pmix_hwloc_dir and $pmix_hwloc_libdir])], [AC_MSG_RESULT([$with_hwloc_libdir])]) else + pmix_hwloc_dir=/usr/include + if test -d /usr/lib; then + pmix_hwloc_libdir=/usr/lib + elif test -d /usr/lib64; then + pmix_hwloc_libdir=/usr/lib64 + else + AC_MSG_RESULT([not found]) + AC_MSG_WARN([Could not find /usr/lib or /usr/lib64 - you may]) + AC_MSG_WARN([need to specify --with-hwloc_libdir=]) + AC_MSG_ERROR([Can not continue]) + fi AC_MSG_RESULT([(default search paths)]) pmix_hwloc_standard_header_location=yes pmix_hwloc_standard_lib_location=yes @@ -104,8 +115,8 @@ AC_DEFUN([_PMIX_HWLOC_EXTERNAL],[ [PMIX_FLAGS_APPEND_UNIQ(CPPFLAGS, $pmix_hwloc_CPPFLAGS)]) AS_IF([test "$pmix_hwloc_standard_lib_location" != "yes"], - [PMIX_FLAGS_APPEND_UNIQ(LIBS, $pmix_hwloc_LIBS) - PMIX_FLAGS_APPEND_UNIQ(LDFLAGS, $pmix_hwloc_LDFLAGS)]) + [PMIX_FLAGS_APPEND_UNIQ(LDFLAGS, $pmix_hwloc_LDFLAGS)]) + PMIX_FLAGS_APPEND_UNIQ(LIBS, $pmix_hwloc_LIBS) fi if test ! -z "$with_hwloc" && test "$with_hwloc" != "no" && test "$pmix_hwloc_support" != "1"; then diff --git a/opal/mca/pmix/pmix4x/pmix/config/pmix_setup_libevent.m4 b/opal/mca/pmix/pmix4x/pmix/config/pmix_setup_libevent.m4 index 1952c1bad6..2348a87aac 100644 --- a/opal/mca/pmix/pmix4x/pmix/config/pmix_setup_libevent.m4 +++ b/opal/mca/pmix/pmix4x/pmix/config/pmix_setup_libevent.m4 @@ -118,9 +118,8 @@ AC_DEFUN([_PMIX_LIBEVENT_EXTERNAL],[ AS_IF([test "$pmix_event_defaults" = "no"], [PMIX_FLAGS_APPEND_UNIQ(CPPFLAGS, $pmix_libevent_CPPFLAGS) - PMIX_FLAGS_APPEND_UNIQ(LIBS, $pmix_libevent_LIBS) PMIX_FLAGS_APPEND_UNIQ(LDFLAGS, $pmix_libevent_LDFLAGS)]) - + PMIX_FLAGS_APPEND_UNIQ(LIBS, $pmix_libevent_LIBS) # Ensure that this libevent has the symbol # "evthread_set_lock_callbacks", which will only exist if diff --git a/opal/mca/pmix/pmix4x/pmix/contrib/pmix.spec b/opal/mca/pmix/pmix4x/pmix/contrib/pmix.spec index d8535bfee7..8f5a982460 100644 --- a/opal/mca/pmix/pmix4x/pmix/contrib/pmix.spec +++ b/opal/mca/pmix/pmix4x/pmix/contrib/pmix.spec @@ -192,7 +192,7 @@ Summary: An extended/exascale implementation of PMI Name: %{?_name:%{_name}}%{!?_name:pmix} -Version: 4.0.0a1 +Version: 4.0.0 Release: 1%{?dist} License: BSD Group: Development/Libraries diff --git a/opal/mca/pmix/pmix4x/pmix/examples/tool.c b/opal/mca/pmix/pmix4x/pmix/examples/tool.c index b8a2f247ec..e10699309e 100644 --- a/opal/mca/pmix/pmix4x/pmix/examples/tool.c +++ b/opal/mca/pmix/pmix4x/pmix/examples/tool.c @@ -37,16 +37,28 @@ static void cbfunc(pmix_status_t status, pmix_release_cbfunc_t release_fn, void *release_cbdata) { - myquery_data_t *mydata = (myquery_data_t*)cbdata; + myquery_data_t *mq = (myquery_data_t*)cbdata; + size_t n; - /* do something with the returned info - it will be + mq->lock.status = status; + + /* save the returned info - it will be * released in the release_fn */ - fprintf(stderr, "Query returned %s\n", PMIx_Error_string(status)); + if (0 < ninfo) { + PMIX_INFO_CREATE(mq->info, ninfo); + mq->ninfo = ninfo; + for (n=0; n < ninfo; n++) { + PMIX_INFO_XFER(&mq->info[n], &info[n]); + } + } + /* let the library release the data */ if (NULL != release_fn) { release_fn(release_cbdata); } - DEBUG_WAKEUP_THREAD(&mydata->lock); + + /* release the block */ + DEBUG_WAKEUP_THREAD(&mq->lock); } int main(int argc, char **argv) @@ -54,40 +66,155 @@ int main(int argc, char **argv) pmix_status_t rc; pmix_proc_t myproc; pmix_query_t *query; - size_t nq; + size_t nq, ninfo = 0, n, m; myquery_data_t mydata; - pmix_info_t info; + pmix_info_t *info = NULL, *iptr; + char *server_uri = NULL; + char *nspace = NULL; + char *nodename = NULL; + pmix_data_array_t *darray, *dptr; + bool geturi = false; + char hostname[1024]; - if (argc != 2) { - fprintf(stderr, "Must provide server URI as argument\n"); - exit(1); + gethostname(hostname, 1024); + for (n=1; n < (size_t)argc; n++) { + if (0 == strcmp("-u", argv[n]) || 0 == strcmp("--url", argv[n])) { + if (NULL == argv[n+1]) { + fprintf(stderr, "Must provide URI argument to %s option\n", argv[n]); + exit(1); + } + server_uri = argv[n+1]; + } else if (0 == strcmp("-nspace", argv[n]) || 0 == strcmp("--nspace", argv[n])) { + if (NULL == argv[n+1]) { + fprintf(stderr, "Must provide nspace argument to %s option\n", argv[n]); + exit(1); + } + nspace = argv[n+1]; + } else if (0 == strcmp("-uri", argv[n]) || 0 == strcmp("--uri", argv[n])) { + /* retrieve the PMIx server's uri from the indicated node */ + nodename = argv[n+1]; + geturi = true; + } } - PMIX_INFO_LOAD(&info, PMIX_SERVER_URI, argv[1], PMIX_STRING); - fprintf(stderr, "Connecting to %s\n", argv[1]); + if (NULL != server_uri) { + ninfo = 1; + PMIX_INFO_CREATE(info, ninfo); + PMIX_INFO_LOAD(&info[0], PMIX_SERVER_URI, server_uri, PMIX_STRING); + fprintf(stderr, "Connecting to %s\n", server_uri); + } /* init us */ - if (PMIX_SUCCESS != (rc = PMIx_tool_init(&myproc, &info, 1))) { + if (PMIX_SUCCESS != (rc = PMIx_tool_init(&myproc, info, ninfo))) { fprintf(stderr, "PMIx_tool_init failed: %d\n", rc); exit(rc); } - fprintf(stderr, "Connected\n"); + if (NULL != info) { + PMIX_INFO_FREE(info, ninfo); + } - /* query something */ - nq = 2; - PMIX_QUERY_CREATE(query, nq); - query[0].keys = (char**)malloc(2 * sizeof(char*)); - query[0].keys[0] = strdup("foobar"); - query[0].keys[1] = NULL; - query[1].keys = (char**)malloc(2 * sizeof(char*)); - query[1].keys[0] = strdup("spastic"); - query[1].keys[1] = NULL; - DEBUG_CONSTRUCT_MYQUERY(&mydata); - if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(query, nq, cbfunc, (void*)&mydata))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Query_info failed: %d\n", myproc.nspace, myproc.rank, rc); + if (geturi) { + nq = 1; + PMIX_QUERY_CREATE(query, nq); + PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_SERVER_URI); + if (NULL != nodename) { + PMIX_QUERY_QUALIFIERS_CREATE(&query[0], 1); + PMIX_INFO_LOAD(&query[0].qualifiers[0], PMIX_HOSTNAME, nodename, PMIX_STRING); + } + DEBUG_CONSTRUCT_MYQUERY(&mydata); + if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(query, nq, cbfunc, (void*)&mydata))) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Query_info failed: %d\n", myproc.nspace, myproc.rank, rc); + goto done; + } + DEBUG_WAIT_THREAD(&mydata.lock); + /* find the response */ + if (PMIX_SUCCESS == mydata.lock.status) { + /* should be in the first key */ + if (PMIX_CHECK_KEY(&mydata.info[0], PMIX_SERVER_URI)) { + fprintf(stderr, "PMIx server URI for node %s: %s\n", + (NULL == nodename) ? hostname : nodename, + mydata.info[0].value.data.string); + } else { + fprintf(stderr, "Query returned wrong info key at first posn: %s\n", mydata.info[0].key); + } + } else { + fprintf(stderr, "Query returned error: %s\n", PMIx_Error_string(mydata.lock.status)); + } + DEBUG_DESTRUCT_MYQUERY(&mydata); goto done; } - DEBUG_WAIT_THREAD(&mydata.lock); + + if (NULL == nspace) { + /* query the list of active nspaces */ + nq = 1; + PMIX_QUERY_CREATE(query, nq); + PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_QUERY_NAMESPACE_INFO); + DEBUG_CONSTRUCT_MYQUERY(&mydata); + if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(query, nq, cbfunc, (void*)&mydata))) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Query_info failed: %d\n", myproc.nspace, myproc.rank, rc); + goto done; + } + DEBUG_WAIT_THREAD(&mydata.lock); + /* find the response */ + if (PMIX_SUCCESS == mydata.lock.status) { + /* should be in the first key */ + if (PMIX_CHECK_KEY(&mydata.info[0], PMIX_QUERY_NAMESPACE_INFO)) { + darray = mydata.info[0].value.data.darray; + fprintf(stderr, "ACTIVE NSPACES:\n"); + if (NULL == darray || 0 == darray->size || NULL == darray->array) { + fprintf(stderr, "\tNone\n"); + } else { + info = (pmix_info_t*)darray->array; + if (NULL == info) { + fprintf(stderr, "Error\n"); + } else { + for (n=0; n < darray->size; n++) { + dptr = info[n].value.data.darray; + if (NULL == dptr || 0 == dptr->size || NULL == dptr->array) { + fprintf(stderr, "Error in array %s\n", (NULL == dptr) ? "NULL" : "NON-NULL"); + break; + } + iptr = (pmix_info_t*)dptr->array; + for (m=0; m < dptr->size; m++) { + fprintf(stderr, "\t%s", iptr[m].value.data.string); + } + fprintf(stderr, "\n"); + } + } + } + } else { + fprintf(stderr, "Query returned wrong info key at first posn: %s\n", mydata.info[0].key); + } + } else { + fprintf(stderr, "Query returned error: %s\n", PMIx_Error_string(mydata.lock.status)); + } + DEBUG_DESTRUCT_MYQUERY(&mydata); + } else { + nq = 1; + PMIX_QUERY_CREATE(query, nq); + PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_JOB_SIZE); + PMIX_INFO_CREATE(query[0].qualifiers, 1); + query[0].nqual = 1; + PMIX_INFO_LOAD(&query[0].qualifiers[0], PMIX_NSPACE, nspace, PMIX_STRING); + DEBUG_CONSTRUCT_MYQUERY(&mydata); + if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(query, nq, cbfunc, (void*)&mydata))) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Query_info failed: %d\n", myproc.nspace, myproc.rank, rc); + goto done; + } + DEBUG_WAIT_THREAD(&mydata.lock); + /* find the response */ + if (PMIX_SUCCESS == mydata.lock.status) { + /* should be in the first key */ + if (PMIX_CHECK_KEY(&mydata.info[0], PMIX_JOB_SIZE)) { + fprintf(stderr, "JOB SIZE FOR NSPACE %s: %lu\n", nspace, (unsigned long)mydata.info[0].value.data.uint32); + } else { + fprintf(stderr, "Query returned wrong info key at first posn: %s\n", mydata.info[0].key); + } + } else { + fprintf(stderr, "Query returned error: %s\n", PMIx_Error_string(mydata.lock.status)); + } + DEBUG_DESTRUCT_MYQUERY(&mydata); + } done: /* finalize us */ diff --git a/opal/mca/pmix/pmix4x/pmix/include/pmix_common.h.in b/opal/mca/pmix/pmix4x/pmix/include/pmix_common.h.in index 0bd5d028e5..8522f07382 100644 --- a/opal/mca/pmix/pmix4x/pmix/include/pmix_common.h.in +++ b/opal/mca/pmix/pmix4x/pmix/include/pmix_common.h.in @@ -871,7 +871,7 @@ typedef int pmix_status_t; /* define a macro for identifying system event values */ #define PMIX_SYSTEM_EVENT(a) \ - (230 > (a) && -331 < (a)) + ((a) <= PMIX_ERR_NODE_DOWN && PMIX_ERR_SYS_OTHER <= (a)) /* used by event handlers */ #define PMIX_EVENT_NO_ACTION_TAKEN -331 @@ -1596,6 +1596,10 @@ typedef struct pmix_info { #define PMIX_INFO_IS_OPTIONAL(m) \ !((m)->flags & PMIX_INFO_REQD) +/* macro for testing end of the array */ +#define PMIX_INFO_IS_END(m) \ + (m)->flags & PMIX_INFO_ARRAY_END + /* define a special macro for checking if a boolean * info is true - when info structs are provided, a * type of PMIX_UNDEF is taken to imply a boolean "true" diff --git a/opal/mca/pmix/pmix4x/pmix/src/client/pmix_client_spawn.c b/opal/mca/pmix/pmix4x/pmix/src/client/pmix_client_spawn.c index ff8cc3a910..b7aefc4316 100644 --- a/opal/mca/pmix/pmix4x/pmix/src/client/pmix_client_spawn.c +++ b/opal/mca/pmix/pmix4x/pmix/src/client/pmix_client_spawn.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Artem Y. Polyakov . @@ -49,6 +49,7 @@ #include "src/class/pmix_list.h" #include "src/threads/threads.h" #include "src/mca/bfrops/bfrops.h" +#include "src/mca/pnet/base/base.h" #include "src/util/argv.h" #include "src/util/error.h" #include "src/util/output.h" @@ -119,6 +120,12 @@ PMIX_EXPORT pmix_status_t PMIx_Spawn_nb(const pmix_info_t job_info[], size_t nin pmix_cmd_t cmd = PMIX_SPAWNNB_CMD; pmix_status_t rc; pmix_cb_t *cb; + size_t n, m; + pmix_app_t *aptr; + bool jobenvars = false; + char *harvest[2] = {"PMIX_MCA_", NULL}; + pmix_kval_t *kv; + pmix_list_t ilist; PMIX_ACQUIRE_THREAD(&pmix_global_lock); @@ -137,6 +144,70 @@ PMIX_EXPORT pmix_status_t PMIx_Spawn_nb(const pmix_info_t job_info[], size_t nin } PMIX_RELEASE_THREAD(&pmix_global_lock); + /* check job info for directives */ + if (NULL != job_info) { + for (n=0; n < ninfo; n++) { + if (PMIX_CHECK_KEY(&job_info[n], PMIX_SETUP_APP_ENVARS)) { + PMIX_CONSTRUCT(&ilist, pmix_list_t); + rc = pmix_pnet_base_harvest_envars(harvest, NULL, &ilist); + if (PMIX_SUCCESS != rc) { + PMIX_LIST_DESTRUCT(&ilist); + return rc; + } + PMIX_LIST_FOREACH(kv, &ilist, pmix_kval_t) { + /* cycle across all the apps and set this envar */ + for (m=0; m < napps; m++) { + aptr = (pmix_app_t*)&apps[m]; + pmix_setenv(kv->value->data.envar.envar, + kv->value->data.envar.value, + true, &aptr->env); + } + } + jobenvars = true; + PMIX_LIST_DESTRUCT(&ilist); + break; + } + } + } + + for (n=0; n < napps; n++) { + /* do a quick check of the apps directive array to ensure + * the ninfo field has been set */ + aptr = (pmix_app_t*)&apps[n]; + if (NULL != aptr->info && 0 == aptr->ninfo) { + /* look for the info marked as "end" */ + m = 0; + while (!(PMIX_INFO_IS_END(&aptr->info[m])) && m < SIZE_MAX) { + ++m; + } + if (SIZE_MAX == m) { + /* nothing we can do */ + return PMIX_ERR_BAD_PARAM; + } + aptr->ninfo = m; + } + if (!jobenvars) { + for (m=0; m < aptr->ninfo; m++) { + if (PMIX_CHECK_KEY(&aptr->info[m], PMIX_SETUP_APP_ENVARS)) { + PMIX_CONSTRUCT(&ilist, pmix_list_t); + rc = pmix_pnet_base_harvest_envars(harvest, NULL, &ilist); + if (PMIX_SUCCESS != rc) { + PMIX_LIST_DESTRUCT(&ilist); + return rc; + } + PMIX_LIST_FOREACH(kv, &ilist, pmix_kval_t) { + pmix_setenv(kv->value->data.envar.envar, + kv->value->data.envar.value, + true, &aptr->env); + } + jobenvars = true; + PMIX_LIST_DESTRUCT(&ilist); + break; + } + } + } + } + msg = PMIX_NEW(pmix_buffer_t); /* pack the cmd */ PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver, diff --git a/opal/mca/pmix/pmix4x/pmix/src/common/pmix_query.c b/opal/mca/pmix/pmix4x/pmix/src/common/pmix_query.c index ff62b3ed19..c40f08b6a0 100644 --- a/opal/mca/pmix/pmix4x/pmix/src/common/pmix_query.c +++ b/opal/mca/pmix/pmix4x/pmix/src/common/pmix_query.c @@ -160,6 +160,24 @@ PMIX_EXPORT pmix_status_t PMIx_Query_info_nb(pmix_query_t queries[], size_t nque return PMIX_ERR_BAD_PARAM; } + /* do a quick check of the qualifiers array to ensure + * the nqual field has been set */ + for (n=0; n < nqueries; n++) { + if (NULL != queries[n].qualifiers && 0 == queries[n].nqual) { + /* look for the info marked as "end" */ + p = 0; + while (!(PMIX_INFO_IS_END(&queries[n].qualifiers[p])) && p < SIZE_MAX) { + ++p; + } + if (SIZE_MAX == p) { + /* nothing we can do */ + PMIX_RELEASE_THREAD(&pmix_global_lock); + return PMIX_ERR_BAD_PARAM; + } + queries[n].nqual = p; + } + } + /* setup the list of local results */ PMIX_CONSTRUCT(&results, pmix_list_t); @@ -185,6 +203,12 @@ PMIX_EXPORT pmix_status_t PMIx_Query_info_nb(pmix_query_t queries[], size_t nque PMIX_LOAD_NSPACE(proc.nspace, queries[n].qualifiers[p].value.data.string); } else if (PMIX_CHECK_KEY(&queries[n].qualifiers[p], PMIX_RANK)) { proc.rank = queries[n].qualifiers[p].value.data.rank; + } else if (PMIX_CHECK_KEY(&queries[n].qualifiers[p], PMIX_HOSTNAME)) { + if (0 != strcmp(queries[n].qualifiers[p].value.data.string, pmix_globals.hostname)) { + /* asking about a different host, so ask for the info */ + PMIX_LIST_DESTRUCT(&results); + goto query; + } } } /* we get here if a refresh isn't required - first try a local diff --git a/opal/mca/pmix/pmix4x/pmix/src/event/pmix_event_notification.c b/opal/mca/pmix/pmix4x/pmix/src/event/pmix_event_notification.c index 0ec9d70923..ab7593cdae 100644 --- a/opal/mca/pmix/pmix4x/pmix/src/event/pmix_event_notification.c +++ b/opal/mca/pmix/pmix4x/pmix/src/event/pmix_event_notification.c @@ -165,9 +165,8 @@ static pmix_status_t notify_server_of_event(pmix_status_t status, pmix_cmd_t cmd = PMIX_NOTIFY_CMD; pmix_cb_t *cb; pmix_event_chain_t *chain; - size_t n, nleft; + size_t n; pmix_notify_caddy_t *cd; - pmix_namespace_t *nptr, *tmp; pmix_output_verbose(2, pmix_client_globals.event_output, "[%s:%d] client: notifying server %s:%d of status %s for range %s", @@ -253,31 +252,6 @@ static pmix_status_t notify_server_of_event(pmix_status_t status, cd->ntargets = chain->ntargets; PMIX_PROC_CREATE(cd->targets, cd->ntargets); memcpy(cd->targets, chain->targets, cd->ntargets * sizeof(pmix_proc_t)); - /* compute the number of targets that need to be notified */ - nleft = 0; - for (n=0; n < cd->ntargets; n++) { - /* if this is a single proc, then increment by one */ - if (PMIX_RANK_VALID >= cd->targets[n].rank) { - ++nleft; - } else { - /* look up the nspace for this proc */ - nptr = NULL; - PMIX_LIST_FOREACH(tmp, &pmix_server_globals.nspaces, pmix_namespace_t) { - if (PMIX_CHECK_NSPACE(tmp->nspace, cd->targets[n].nspace)) { - nptr = tmp; - break; - } - } - /* if we don't yet know it, then nothing to do */ - if (NULL == nptr) { - nleft = SIZE_MAX; - break; - } - /* might notify all local members */ - nleft += nptr->nlocalprocs; - } - } - cd->nleft = nleft; } if (NULL != chain->affected) { cd->naffected = chain->naffected; @@ -1027,32 +1001,33 @@ static void _notify_client_event(int sd, short args, void *cbdata) if (matched) { continue; } + /* check if the affected procs (if given) match those they + * wanted to know about */ + if (!pmix_notify_check_affected(cd->affected, cd->naffected, + pr->affected, pr->naffected)) { + continue; + } /* check the range */ + if (NULL == cd->targets) { + rngtrk.procs = &cd->source; + rngtrk.nprocs = 1; + } else { + rngtrk.procs = cd->targets; + rngtrk.nprocs = cd->ntargets; + } rngtrk.range = cd->range; PMIX_LOAD_PROCID(&proc, pr->peer->info->pname.nspace, pr->peer->info->pname.rank); if (!pmix_notify_check_range(&rngtrk, &proc)) { continue; } - /* if we were given specific targets, check if this is one */ if (NULL != cd->targets) { - matched = false; - for (n=0; n < cd->ntargets; n++) { - if (PMIX_CHECK_PROCID(&pr->peer->info->pname, &cd->targets[n])) { - matched = true; - /* track the number of targets we have left to notify */ - --cd->nleft; - /* if the event was cached and this is the last one, - * then evict this event from the cache */ - if (0 == cd->nleft) { - pmix_hotel_checkout(&pmix_globals.notifications, cd->room); - PMIX_RELEASE(cd); - } - break; - } - } - if (!matched) { - /* do not notify this one */ - continue; + /* track the number of targets we have left to notify */ + --cd->nleft; + /* if the event was cached and this is the last one, + * then evict this event from the cache */ + if (0 == cd->nleft) { + pmix_hotel_checkout(&pmix_globals.notifications, cd->room); + PMIX_RELEASE(cd); } } pmix_output_verbose(2, pmix_server_globals.event_output, @@ -1217,37 +1192,34 @@ bool pmix_notify_check_range(pmix_range_trkr_t *rng, return true; } if (PMIX_RANGE_NAMESPACE == rng->range) { - if (0 == strncmp(pmix_globals.myid.nspace, proc->nspace, PMIX_MAX_NSLEN)) { - return true; + for (n=0; n < rng->nprocs; n++) { + if (PMIX_CHECK_NSPACE(rng->procs[n].nspace, proc->nspace)) { + return true; + } } return false; } if (PMIX_RANGE_PROC_LOCAL == rng->range) { - if (0 == strncmp(pmix_globals.myid.nspace, proc->nspace, PMIX_MAX_NSLEN) && - pmix_globals.myid.rank == proc->rank) { - return true; + for (n=0; n < rng->nprocs; n++) { + if (PMIX_CHECK_PROCID(&rng->procs[n], proc)) { + return true; + } } return false; } if (PMIX_RANGE_CUSTOM == rng->range) { - if (NULL != rng->procs) { - /* see if this proc was included */ - for (n=0; n < rng->nprocs; n++) { - if (0 != strncmp(rng->procs[n].nspace, proc->nspace, PMIX_MAX_NSLEN)) { - continue; - } - if (PMIX_RANK_WILDCARD == rng->procs[n].rank || - rng->procs[n].rank == proc->rank) { - return true; - } + /* see if this proc was included */ + for (n=0; n < rng->nprocs; n++) { + if (0 != strncmp(rng->procs[n].nspace, proc->nspace, PMIX_MAX_NSLEN)) { + continue; + } + if (PMIX_RANK_WILDCARD == rng->procs[n].rank || + rng->procs[n].rank == proc->rank) { + return true; } - /* if we get here, then this proc isn't in range */ - return false; - } else { - /* if they didn't give us a list, then assume - * everyone included */ - return true; } + /* if we get here, then this proc isn't in range */ + return false; } /* if it is anything else, then reject it */ @@ -1270,12 +1242,7 @@ bool pmix_notify_check_affected(pmix_proc_t *interested, size_t ninterested, /* check if the two overlap */ for (n=0; n < naffected; n++) { for (m=0; m < ninterested; m++) { - if (0 != strncmp(affected[n].nspace, interested[m].nspace, PMIX_MAX_NSLEN)) { - continue; - } - if (PMIX_RANK_WILDCARD == interested[m].rank || - PMIX_RANK_WILDCARD == affected[n].rank || - affected[n].rank == interested[m].rank) { + if (PMIX_CHECK_PROCID(&affected[n], &interested[m])) { return true; } } diff --git a/opal/mca/pmix/pmix4x/pmix/src/event/pmix_event_registration.c b/opal/mca/pmix/pmix4x/pmix/src/event/pmix_event_registration.c index d63edb75a7..2607d6b101 100644 --- a/opal/mca/pmix/pmix4x/pmix/src/event/pmix_event_registration.c +++ b/opal/mca/pmix/pmix4x/pmix/src/event/pmix_event_registration.c @@ -297,11 +297,7 @@ static pmix_status_t _add_hdlr(pmix_rshift_caddy_t *cd, pmix_list_t *xfer) PMIX_INFO_CREATE(cd2->info, cd2->ninfo); n=0; PMIX_LIST_FOREACH(ixfer, xfer, pmix_info_caddy_t) { - pmix_strncpy(cd2->info[n].key, ixfer->info[n].key, PMIX_MAX_KEYLEN); - PMIX_BFROPS_VALUE_LOAD(pmix_client_globals.myserver, - &cd2->info[n].value, - &ixfer->info[n].value.data, - ixfer->info[n].value.type); + PMIX_INFO_XFER(&cd2->info[n], ixfer->info); ++n; } } @@ -526,12 +522,21 @@ static void reg_event_hdlr(int sd, short args, void *cbdata) } else if (0 == strncmp(cd->info[n].key, PMIX_EVENT_AFFECTED_PROC, PMIX_MAX_KEYLEN)) { cd->affected = cd->info[n].value.data.proc; cd->naffected = 1; + ixfer = PMIX_NEW(pmix_info_caddy_t); + ixfer->info = &cd->info[n]; + ixfer->ninfo = 1; + pmix_list_append(&xfer, &ixfer->super); } else if (0 == strncmp(cd->info[n].key, PMIX_EVENT_AFFECTED_PROCS, PMIX_MAX_KEYLEN)) { cd->affected = (pmix_proc_t*)cd->info[n].value.data.darray->array; cd->naffected = cd->info[n].value.data.darray->size; + ixfer = PMIX_NEW(pmix_info_caddy_t); + ixfer->info = &cd->info[n]; + ixfer->ninfo = 1; + pmix_list_append(&xfer, &ixfer->super); } else { ixfer = PMIX_NEW(pmix_info_caddy_t); ixfer->info = &cd->info[n]; + ixfer->ninfo = 1; pmix_list_append(&xfer, &ixfer->super); } } diff --git a/opal/mca/pmix/pmix4x/pmix/src/include/pmix_globals.h b/opal/mca/pmix/pmix4x/pmix/src/include/pmix_globals.h index 05dfd68fd4..d63d7536fe 100644 --- a/opal/mca/pmix/pmix4x/pmix/src/include/pmix_globals.h +++ b/opal/mca/pmix/pmix4x/pmix/src/include/pmix_globals.h @@ -456,6 +456,8 @@ typedef struct { pmix_peer_t *mypeer; // my own peer object uid_t uid; // my effective uid gid_t gid; // my effective gid + char *hostname; // my hostname + uint32_t nodeid; // my nodeid, if given int pindex; pmix_event_base_t *evbase; bool external_evbase; diff --git a/opal/mca/pmix/pmix4x/pmix/src/mca/pnet/base/pnet_base_fns.c b/opal/mca/pmix/pmix4x/pmix/src/mca/pnet/base/pnet_base_fns.c index 13b63bd776..c4869da529 100644 --- a/opal/mca/pmix/pmix4x/pmix/src/mca/pnet/base/pnet_base_fns.c +++ b/opal/mca/pmix/pmix4x/pmix/src/mca/pnet/base/pnet_base_fns.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2018 Research Organization for Information Science @@ -39,10 +39,11 @@ pmix_status_t pmix_pnet_base_allocate(char *nspace, pmix_list_t *ilist) { pmix_pnet_base_active_module_t *active; - pmix_status_t rc; + pmix_status_t rc = PMIX_SUCCESS; pmix_namespace_t *nptr, *ns; size_t n; char *nregex, *pregex; + char *params[2] = {"PMIX_MCA_", NULL}; if (!pmix_pnet_globals.initialized) { return PMIX_ERR_INIT; @@ -75,22 +76,7 @@ pmix_status_t pmix_pnet_base_allocate(char *nspace, pmix_list_append(&pmix_server_globals.nspaces, &nptr->super); } - /* if the info param is NULL, then we make one pass thru the actives - * in case someone specified an allocation or collection of envars - * via MCA param */ - if (NULL == info) { - PMIX_LIST_FOREACH(active, &pmix_pnet_globals.actives, pmix_pnet_base_active_module_t) { - if (NULL != active->module->allocate) { - if (PMIX_SUCCESS == (rc = active->module->allocate(nptr, NULL, ilist))) { - break; - } - if (PMIX_ERR_TAKE_NEXT_OPTION != rc) { - /* true error */ - return rc; - } - } - } - } else { + if (NULL != info) { /* check for description of the node and proc maps */ nregex = NULL; pregex = NULL; @@ -131,7 +117,10 @@ pmix_status_t pmix_pnet_base_allocate(char *nspace, } } - return PMIX_SUCCESS; + /* add any local PMIx MCA params */ + rc = pmix_pnet_base_harvest_envars(params, NULL, ilist); + + return rc; } /* can only be called by a server */ diff --git a/opal/mca/pmix/pmix4x/pmix/src/mca/preg/base/preg_base_frame.c b/opal/mca/pmix/pmix4x/pmix/src/mca/preg/base/preg_base_frame.c index dbf551ea64..5f6c5afce7 100644 --- a/opal/mca/pmix/pmix4x/pmix/src/mca/preg/base/preg_base_frame.c +++ b/opal/mca/pmix/pmix4x/pmix/src/mca/preg/base/preg_base_frame.c @@ -11,9 +11,10 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -98,6 +99,7 @@ static void rvcon(pmix_regex_value_t *p) p->prefix = NULL; p->suffix = NULL; p->num_digits = 0; + p->skip = false; PMIX_CONSTRUCT(&p->ranges, pmix_list_t); } static void rvdes(pmix_regex_value_t *p) diff --git a/opal/mca/pmix/pmix4x/pmix/src/mca/preg/native/preg_native.c b/opal/mca/pmix/pmix4x/pmix/src/mca/preg/native/preg_native.c index 2e9170f85c..502663cfdf 100644 --- a/opal/mca/pmix/pmix4x/pmix/src/mca/preg/native/preg_native.c +++ b/opal/mca/pmix/pmix4x/pmix/src/mca/preg/native/preg_native.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2016 IBM Corporation. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 IBM Corporation. All rights reserved. * Copyright (c) 2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * @@ -152,9 +152,22 @@ static pmix_status_t generate_node_regex(const char *input, suffix = NULL; numdigits = (int)strlen(&vptr[startnum]); } + /* is this value already on our list? */ found = false; PMIX_LIST_FOREACH(vreg, &vids, pmix_regex_value_t) { + // The regex must preserve ordering of the values. + // If we disqualified this entry in a previous check then exclude it + // from future checks as well. This will prevent a later entry from + // being 'pulled forward' accidentally. For example, given: + // "a28n01,a99n02,a28n02" + // Without this 'skip' the loop would have 'a28n02' combine with + // 'a28n01' jumping over the 'a99n02' entry, and thus not preserving + // the order of the list when the regex is unpacked. + if( vreg->skip ) { + continue; + } + if (0 < strlen(prefix) && NULL == vreg->prefix) { continue; } @@ -163,6 +176,7 @@ static pmix_status_t generate_node_regex(const char *input, } if (0 < strlen(prefix) && NULL != vreg->prefix && 0 != strcmp(prefix, vreg->prefix)) { + vreg->skip = true; continue; } if (NULL == suffix && NULL != vreg->suffix) { @@ -173,9 +187,11 @@ static pmix_status_t generate_node_regex(const char *input, } if (NULL != suffix && NULL != vreg->suffix && 0 != strcmp(suffix, vreg->suffix)) { + vreg->skip = true; continue; } if (numdigits != vreg->num_digits) { + vreg->skip = true; continue; } /* found a match - flag it */ diff --git a/opal/mca/pmix/pmix4x/pmix/src/mca/preg/preg_types.h b/opal/mca/pmix/pmix4x/pmix/src/mca/preg/preg_types.h index 9f1b8a8ae5..572b321b04 100644 --- a/opal/mca/pmix/pmix4x/pmix/src/mca/preg/preg_types.h +++ b/opal/mca/pmix/pmix4x/pmix/src/mca/preg/preg_types.h @@ -12,7 +12,8 @@ * All rights reserved. * Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. + * Copyright (c) 2019 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -51,6 +52,7 @@ typedef struct { char *suffix; int num_digits; pmix_list_t ranges; + bool skip; } pmix_regex_value_t; PMIX_EXPORT PMIX_CLASS_DECLARATION(pmix_regex_value_t); diff --git a/opal/mca/pmix/pmix4x/pmix/src/mca/ptl/tcp/ptl_tcp.c b/opal/mca/pmix/pmix4x/pmix/src/mca/ptl/tcp/ptl_tcp.c index a7d0b9575f..e921cd599c 100644 --- a/opal/mca/pmix/pmix4x/pmix/src/mca/ptl/tcp/ptl_tcp.c +++ b/opal/mca/pmix/pmix4x/pmix/src/mca/ptl/tcp/ptl_tcp.c @@ -190,11 +190,6 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, pmix_client_globals.myserver->nptr->compat.bfrops = pmix_globals.mypeer->nptr->compat.bfrops; /* mark that we are using the V2 (i.e., tcp) protocol */ pmix_globals.mypeer->protocol = PMIX_PROTOCOL_V2; - /* save the URI for storage */ - urikv = PMIX_NEW(pmix_kval_t); - urikv->key = strdup(PMIX_SERVER_URI); - PMIX_VALUE_CREATE(urikv->value, 1); - PMIX_VALUE_LOAD(urikv->value, evar, PMIX_STRING); /* the URI consists of the following elements: * - server nspace.rank @@ -218,6 +213,7 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, ++p2; nspace = strdup(p); rank = strtoull(p2, NULL, 10); + suri = strdup(uri[1]); pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "ptl:tcp:client attempt connect to %s", uri[1]); @@ -226,6 +222,7 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, if (PMIX_SUCCESS != (rc = try_connect(uri[1], &sd, info, ninfo))) { free(nspace); pmix_argv_free(uri); + free(suri); return rc; } pmix_argv_free(uri); @@ -497,11 +494,6 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, } return rc; } - /* save the URI for storage */ - urikv = PMIX_NEW(pmix_kval_t); - urikv->key = strdup(PMIX_SERVER_URI); - PMIX_VALUE_CREATE(urikv->value, 1); - PMIX_VALUE_LOAD(urikv->value, suri, PMIX_STRING); /* cleanup */ free(suri); suri = NULL; @@ -535,11 +527,6 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, goto complete; } } - /* save the URI for storage */ - urikv = PMIX_NEW(pmix_kval_t); - urikv->key = strdup(PMIX_SERVER_URI); - PMIX_VALUE_CREATE(urikv->value, 1); - PMIX_VALUE_LOAD(urikv->value, suri, PMIX_STRING); /* cleanup */ if (NULL != nspace) { free(nspace); @@ -579,11 +566,6 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, if (NULL != iptr) { PMIX_INFO_FREE(iptr, niptr); } - /* save the URI for storage */ - urikv = PMIX_NEW(pmix_kval_t); - urikv->key = strdup(PMIX_SERVER_URI); - PMIX_VALUE_CREATE(urikv->value, 1); - PMIX_VALUE_LOAD(urikv->value, suri, PMIX_STRING); goto complete; } free(nspace); @@ -625,11 +607,6 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, filename, iptr, niptr, &sd, &nspace, &rank, &suri); free(filename); if (PMIX_SUCCESS == rc) { - /* save the URI for storage */ - urikv = PMIX_NEW(pmix_kval_t); - urikv->key = strdup(PMIX_SERVER_URI); - PMIX_VALUE_CREATE(urikv->value, 1); - PMIX_VALUE_LOAD(urikv->value, suri, PMIX_STRING); goto complete; } if (NULL != suri) { @@ -665,11 +642,6 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, filename, iptr, niptr, &sd, &nspace, &rank, &suri); free(filename); if (PMIX_SUCCESS == rc) { - /* save the URI for storage */ - urikv = PMIX_NEW(pmix_kval_t); - urikv->key = strdup(PMIX_SERVER_URI); - PMIX_VALUE_CREATE(urikv->value, 1); - PMIX_VALUE_LOAD(urikv->value, suri, PMIX_STRING); goto complete; } if (NULL != suri) { @@ -719,11 +691,6 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, } return PMIX_ERR_UNREACH; } - /* save the URI for storage */ - urikv = PMIX_NEW(pmix_kval_t); - urikv->key = strdup(PMIX_SERVER_URI); - PMIX_VALUE_CREATE(urikv->value, 1); - PMIX_VALUE_LOAD(urikv->value, suri, PMIX_STRING); if (NULL != iptr) { PMIX_INFO_FREE(iptr, niptr); } @@ -769,6 +736,11 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, pmix_client_globals.myserver->info->pname.rank = rank; } /* store the URI for subsequent lookups */ + urikv = PMIX_NEW(pmix_kval_t); + urikv->key = strdup(PMIX_SERVER_URI); + PMIX_VALUE_CREATE(urikv->value, 1); + urikv->value->type = PMIX_STRING; + asprintf(&urikv->value->data.string, "%s.%u;%s", nspace, rank, suri); PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, &pmix_globals.myid, PMIX_INTERNAL, urikv); diff --git a/opal/mca/pmix/pmix4x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c b/opal/mca/pmix/pmix4x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c index cfc0ac644e..7f3138d52c 100644 --- a/opal/mca/pmix/pmix4x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c +++ b/opal/mca/pmix/pmix4x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c @@ -357,6 +357,7 @@ static pmix_status_t setup_listener(pmix_info_t info[], size_t ninfo, char *prefix, myhost[PMIX_MAXHOSTNAMELEN]; char myconnhost[PMIX_MAXHOSTNAMELEN]; int myport; + pmix_kval_t *urikv; pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "ptl:tcp setup_listener"); @@ -640,6 +641,16 @@ static pmix_status_t setup_listener(pmix_info_t info[], size_t ninfo, pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "ptl:tcp URI %s", lt->uri); + /* save the URI internally so we can report it */ + urikv = PMIX_NEW(pmix_kval_t); + urikv->key = strdup(PMIX_SERVER_URI); + PMIX_VALUE_CREATE(urikv->value, 1); + PMIX_VALUE_LOAD(urikv->value, lt->uri, PMIX_STRING); + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, PMIX_INTERNAL, + urikv); + PMIX_RELEASE(urikv); // maintain accounting + if (NULL != mca_ptl_tcp_component.report_uri) { /* if the string is a "-", then output to stdout */ if (0 == strcmp(mca_ptl_tcp_component.report_uri, "-")) { @@ -690,7 +701,7 @@ static pmix_status_t setup_listener(pmix_info_t info[], size_t ninfo, fprintf(fp, "v%s\n", PMIX_VERSION); fclose(fp); /* set the file mode */ - if (0 != chmod(mca_ptl_tcp_component.rendezvous_filename, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) { + if (0 != chmod(mca_ptl_tcp_component.rendezvous_filename, S_IRUSR | S_IWUSR | S_IRGRP)) { PMIX_ERROR_LOG(PMIX_ERR_FILE_OPEN_FAILURE); CLOSE_THE_SOCKET(lt->socket); free(mca_ptl_tcp_component.rendezvous_filename); diff --git a/opal/mca/pmix/pmix4x/pmix/src/runtime/pmix_init.c b/opal/mca/pmix/pmix4x/pmix/src/runtime/pmix_init.c index 5a5c9ca094..d1803de704 100644 --- a/opal/mca/pmix/pmix4x/pmix/src/runtime/pmix_init.c +++ b/opal/mca/pmix/pmix4x/pmix/src/runtime/pmix_init.c @@ -15,7 +15,7 @@ * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2010-2015 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -70,6 +70,8 @@ PMIX_EXPORT bool pmix_init_called = false; PMIX_EXPORT pmix_globals_t pmix_globals = { .init_cntr = 0, .mypeer = NULL, + .hostname = NULL, + .nodeid = UINT32_MAX, .pindex = 0, .evbase = NULL, .external_evbase = false, @@ -96,6 +98,7 @@ int pmix_rte_init(pmix_proc_type_t type, int ret, debug_level; char *error = NULL, *evar; size_t n; + char hostname[PMIX_MAXHOSTNAMELEN]; if( ++pmix_initialized != 1 ) { if( pmix_initialized < 1 ) { @@ -156,6 +159,8 @@ int pmix_rte_init(pmix_proc_type_t type, } /* setup the globals structure */ + gethostname(hostname, PMIX_MAXHOSTNAMELEN); + pmix_globals.hostname = strdup(hostname); memset(&pmix_globals.myid.nspace, 0, PMIX_MAX_NSLEN+1); pmix_globals.myid.rank = PMIX_RANK_INVALID; PMIX_CONSTRUCT(&pmix_globals.events, pmix_events_t); @@ -249,9 +254,19 @@ int pmix_rte_init(pmix_proc_type_t type, /* scan incoming info for directives */ if (NULL != info) { for (n=0; n < ninfo; n++) { - if (0 == strcmp(PMIX_EVENT_BASE, info[n].key)) { + if (PMIX_CHECK_KEY(&info[n], PMIX_EVENT_BASE)) { pmix_globals.evbase = (pmix_event_base_t*)info[n].value.data.ptr; pmix_globals.external_evbase = true; + } else if (PMIX_CHECK_KEY(&info[n], PMIX_HOSTNAME)) { + if (NULL != pmix_globals.hostname) { + free(pmix_globals.hostname); + } + pmix_globals.hostname = strdup(info[n].value.data.string); + } else if (PMIX_CHECK_KEY(&info[n], PMIX_NODEID)) { + PMIX_VALUE_GET_NUMBER(ret, &info[n].value, pmix_globals.nodeid, uint32_t); + if (PMIX_SUCCESS != ret) { + goto return_error; + } } } } diff --git a/opal/mca/pmix/pmix4x/pmix/src/server/pmix_server_ops.c b/opal/mca/pmix/pmix4x/pmix/src/server/pmix_server_ops.c index dbd727cbb0..691b868e5e 100644 --- a/opal/mca/pmix/pmix4x/pmix/src/server/pmix_server_ops.c +++ b/opal/mca/pmix/pmix4x/pmix/src/server/pmix_server_ops.c @@ -1638,7 +1638,7 @@ pmix_status_t pmix_server_register_events(pmix_peer_t *peer, pmix_info_t *info = NULL; size_t ninfo=0, ncodes, n, k; pmix_regevents_info_t *reginfo; - pmix_peer_events_info_t *prev; + pmix_peer_events_info_t *prev = NULL; pmix_notify_caddy_t *cd; pmix_setup_caddy_t *scd; int i; @@ -1738,10 +1738,36 @@ pmix_status_t pmix_server_register_events(pmix_peer_t *peer, goto cleanup; } + /* if they didn't send us any codes, then they are registering a + * default event handler. In that case, check only for default + * handlers and add this request to it, if not already present */ + if (0 == ncodes) { + PMIX_LIST_FOREACH(reginfo, &pmix_server_globals.events, pmix_regevents_info_t) { + if (PMIX_MAX_ERR_CONSTANT == reginfo->code) { + /* both are default handlers */ + prev = PMIX_NEW(pmix_peer_events_info_t); + if (NULL == prev) { + rc = PMIX_ERR_NOMEM; + goto cleanup; + } + PMIX_RETAIN(peer); + prev->peer = peer; + if (NULL != affected) { + PMIX_PROC_CREATE(prev->affected, naffected); + prev->naffected = naffected; + memcpy(prev->affected, affected, naffected * sizeof(pmix_proc_t)); + } + pmix_list_append(®info->peers, &prev->super); + break; + } + } + rc = PMIX_OPERATION_SUCCEEDED; + goto cleanup; + } + /* store the event registration info so we can call the registered * client when the server notifies the event */ - k=0; - do { + for (n=0; n < ncodes; n++) { found = false; PMIX_LIST_FOREACH(reginfo, &pmix_server_globals.events, pmix_regevents_info_t) { if (NULL == codes) { @@ -1755,35 +1781,28 @@ pmix_status_t pmix_server_register_events(pmix_peer_t *peer, } else { if (PMIX_MAX_ERR_CONSTANT == reginfo->code) { continue; - } else if (codes[k] == reginfo->code) { + } else if (codes[n] == reginfo->code) { found = true; break; } } } if (found) { - /* found it - add this peer if we don't already have it */ - found = false; - PMIX_LIST_FOREACH(prev, ®info->peers, pmix_peer_events_info_t) { - if (prev->peer == peer) { - /* already have it */ - rc = PMIX_SUCCESS; - found = true; - break; - } + /* found it - add this request */ + prev = PMIX_NEW(pmix_peer_events_info_t); + if (NULL == prev) { + rc = PMIX_ERR_NOMEM; + goto cleanup; } - if (!found) { - /* get here if we don't already have this peer */ - prev = PMIX_NEW(pmix_peer_events_info_t); - if (NULL == prev) { - rc = PMIX_ERR_NOMEM; - goto cleanup; - } - PMIX_RETAIN(peer); - prev->peer = peer; - prev->enviro_events = enviro_events; - pmix_list_append(®info->peers, &prev->super); + PMIX_RETAIN(peer); + prev->peer = peer; + if (NULL != affected) { + PMIX_PROC_CREATE(prev->affected, naffected); + prev->naffected = naffected; + memcpy(prev->affected, affected, naffected * sizeof(pmix_proc_t)); } + prev->enviro_events = enviro_events; + pmix_list_append(®info->peers, &prev->super); } else { /* if we get here, then we didn't find an existing registration for this code */ reginfo = PMIX_NEW(pmix_regevents_info_t); @@ -1794,7 +1813,7 @@ pmix_status_t pmix_server_register_events(pmix_peer_t *peer, if (NULL == codes) { reginfo->code = PMIX_MAX_ERR_CONSTANT; } else { - reginfo->code = codes[k]; + reginfo->code = codes[n]; } pmix_list_append(&pmix_server_globals.events, ®info->super); prev = PMIX_NEW(pmix_peer_events_info_t); @@ -1804,11 +1823,15 @@ pmix_status_t pmix_server_register_events(pmix_peer_t *peer, } PMIX_RETAIN(peer); prev->peer = peer; + if (NULL != affected) { + PMIX_PROC_CREATE(prev->affected, naffected); + prev->naffected = naffected; + memcpy(prev->affected, affected, naffected * sizeof(pmix_proc_t)); + } prev->enviro_events = enviro_events; pmix_list_append(®info->peers, &prev->super); } - ++k; - } while (k < ncodes); + } /* if they asked for enviro events, call the local server */ if (enviro_events) { @@ -1908,7 +1931,20 @@ pmix_status_t pmix_server_register_events(pmix_peer_t *peer, if (!found) { continue; } + /* check if the affected procs (if given) match those they + * wanted to know about */ + if (!pmix_notify_check_affected(cd->affected, cd->naffected, + affected, naffected)) { + continue; + } /* check the range */ + if (NULL == cd->targets) { + rngtrk.procs = &cd->source; + rngtrk.nprocs = 1; + } else { + rngtrk.procs = cd->targets; + rngtrk.nprocs = cd->ntargets; + } rngtrk.range = cd->range; PMIX_LOAD_PROCID(&proc, peer->info->pname.nspace, peer->info->pname.rank); if (!pmix_notify_check_range(&rngtrk, &proc)) { @@ -1944,11 +1980,6 @@ pmix_status_t pmix_server_register_events(pmix_peer_t *peer, } } - /* if they specified affected proc(s) they wanted to know about, check */ - if (!pmix_notify_check_affected(cd->affected, cd->naffected, - affected, naffected)) { - continue; - } /* all matches - notify */ relay = PMIX_NEW(pmix_buffer_t); if (NULL == relay) { @@ -2253,14 +2284,12 @@ pmix_status_t pmix_server_query(pmix_peer_t *peer, } } - /** check each query/key to see if we already have the info - * before passing the request up to the host */ /* check the directives to see if they want us to refresh * the local cached results - if we wanted to optimize this * more, we would check each query and allow those that don't * want to be refreshed to be executed locally, and those that * did would be sent to the host. However, for now we simply - * */ + * determine that if we don't have it, then ask for everything */ memset(proc.nspace, 0, PMIX_MAX_NSLEN+1); proc.rank = PMIX_RANK_INVALID; PMIX_CONSTRUCT(&results, pmix_list_t); @@ -2279,6 +2308,12 @@ pmix_status_t pmix_server_query(pmix_peer_t *peer, PMIX_LOAD_NSPACE(proc.nspace, cd->queries[n].qualifiers[p].value.data.string); } else if (PMIX_CHECK_KEY(&cd->queries[n].qualifiers[p], PMIX_RANK)) { proc.rank = cd->queries[n].qualifiers[p].value.data.rank; + } else if (PMIX_CHECK_KEY(&cd->queries[n].qualifiers[p], PMIX_HOSTNAME)) { + if (0 != strcmp(cd->queries[n].qualifiers[p].value.data.string, pmix_globals.hostname)) { + /* asking about a different host, so ask for the info */ + PMIX_LIST_DESTRUCT(&results); + goto query; + } } } /* we get here if a refresh isn't required - first try a local @@ -4152,11 +4187,17 @@ PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_setup_caddy_t, static void ncon(pmix_notify_caddy_t *p) { - struct timespec tp; - PMIX_CONSTRUCT_LOCK(&p->lock); - clock_gettime(CLOCK_MONOTONIC, &tp); +#if defined(__linux__) && OPAL_HAVE_CLOCK_GETTIME + struct timespec tp; + (void) clock_gettime(CLOCK_MONOTONIC, &tp); p->ts = tp.tv_sec; +#else + /* Fall back to gettimeofday() if we have nothing else */ + struct timeval tv; + gettimeofday(&tv, NULL); + p->ts = tv.tv_sec; +#endif p->room = -1; memset(p->source.nspace, 0, PMIX_MAX_NSLEN+1); p->source.rank = PMIX_RANK_UNDEF; @@ -4244,12 +4285,17 @@ PMIX_CLASS_INSTANCE(pmix_dmdx_local_t, static void prevcon(pmix_peer_events_info_t *p) { p->peer = NULL; + p->affected = NULL; + p->naffected = 0; } static void prevdes(pmix_peer_events_info_t *p) { if (NULL != p->peer) { PMIX_RELEASE(p->peer); } + if (NULL != p->affected) { + PMIX_PROC_FREE(p->affected, p->naffected); + } } PMIX_CLASS_INSTANCE(pmix_peer_events_info_t, pmix_list_item_t, diff --git a/opal/mca/pmix/pmix4x/pmix/src/server/pmix_server_ops.h b/opal/mca/pmix/pmix4x/pmix/src/server/pmix_server_ops.h index 6c42fdf005..18724f9d12 100644 --- a/opal/mca/pmix/pmix4x/pmix/src/server/pmix_server_ops.h +++ b/opal/mca/pmix/pmix4x/pmix/src/server/pmix_server_ops.h @@ -131,6 +131,8 @@ typedef struct { pmix_list_item_t super; pmix_peer_t *peer; bool enviro_events; + pmix_proc_t *affected; + size_t naffected; } pmix_peer_events_info_t; PMIX_CLASS_DECLARATION(pmix_peer_events_info_t); diff --git a/opal/mca/pmix/pmix4x/pmix/src/threads/thread_usage.h b/opal/mca/pmix/pmix4x/pmix/src/threads/thread_usage.h index ff3e504180..fcf25c0b39 100644 --- a/opal/mca/pmix/pmix4x/pmix/src/threads/thread_usage.h +++ b/opal/mca/pmix/pmix4x/pmix/src/threads/thread_usage.h @@ -15,7 +15,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2018 Intel, Inc. All rights reserved. + * Copyright (c) 2018-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -37,24 +37,24 @@ */ #define PMIX_THREAD_DEFINE_ATOMIC_OP(type, name, operator, suffix) \ -static inline type pmix_thread_ ## name ## _fetch_ ## suffix (pmix_atomic_ ## type *addr, type delta) \ +__pmix_attribute_always_inline__ static inline type pmix_thread_ ## name ## _fetch_ ## suffix (pmix_atomic_ ## type *addr, type delta) \ { \ return pmix_atomic_ ## name ## _fetch_ ## suffix (addr, delta); \ } \ \ -static inline type pmix_thread_fetch_ ## name ## _ ## suffix (pmix_atomic_ ## type *addr, type delta) \ +__pmix_attribute_always_inline__ static inline type pmix_thread_fetch_ ## name ## _ ## suffix (pmix_atomic_ ## type *addr, type delta) \ { \ return pmix_atomic_fetch_ ## name ## _ ## suffix (addr, delta); \ } #define PMIX_THREAD_DEFINE_ATOMIC_COMPARE_EXCHANGE(type, addr_type, suffix) \ -static inline bool pmix_thread_compare_exchange_strong_ ## suffix (pmix_atomic_ ## addr_type *addr, type *compare, type value) \ +__pmix_attribute_always_inline__ static inline bool pmix_thread_compare_exchange_strong_ ## suffix (pmix_atomic_ ## addr_type *addr, type *compare, type value) \ { \ return pmix_atomic_compare_exchange_strong_ ## suffix (addr, (addr_type *) compare, (addr_type) value); \ } #define PMIX_THREAD_DEFINE_ATOMIC_SWAP(type, addr_type, suffix) \ -static inline type pmix_thread_swap_ ## suffix (pmix_atomic_ ## addr_type *ptr, type newvalue) \ +__pmix_attribute_always_inline__ static inline type pmix_thread_swap_ ## suffix (pmix_atomic_ ## addr_type *ptr, type newvalue) \ { \ return (type) pmix_atomic_swap_ ## suffix (ptr, (addr_type) newvalue); \ } diff --git a/opal/mca/pmix/pmix4x/pmix/src/tool/pmix_tool.c b/opal/mca/pmix/pmix4x/pmix/src/tool/pmix_tool.c index 1186626a32..effa1190ff 100644 --- a/opal/mca/pmix/pmix4x/pmix/src/tool/pmix_tool.c +++ b/opal/mca/pmix/pmix4x/pmix/src/tool/pmix_tool.c @@ -63,6 +63,7 @@ #include "src/runtime/pmix_rte.h" #include "src/mca/bfrops/base/base.h" #include "src/mca/gds/base/base.h" +#include "src/mca/pnet/base/base.h" #include "src/mca/ptl/base/base.h" #include "src/mca/psec/psec.h" #include "src/include/pmix_globals.h" @@ -619,6 +620,13 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, rcv->cbfunc = pmix_server_message_handler; /* add it to the end of the list of recvs */ pmix_list_append(&pmix_ptl_globals.posted_recvs, &rcv->super); + /* open the pnet framework so we can harvest envars */ + rc = pmix_mca_base_framework_open(&pmix_pnet_base_framework, 0); + if (PMIX_SUCCESS != rc){ + PMIX_RELEASE_THREAD(&pmix_global_lock); + return rc; + } + /* note that we do not select active plugins as we don't need them */ } /* setup IOF */ @@ -1188,6 +1196,7 @@ PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void) } } + (void)pmix_mca_base_framework_close(&pmix_pnet_base_framework); PMIX_DESTRUCT(&pmix_server_globals.clients); PMIX_LIST_DESTRUCT(&pmix_server_globals.collectives); PMIX_LIST_DESTRUCT(&pmix_server_globals.remote_pnd); diff --git a/opal/mca/pmix/pmix4x/pmix/test/simple/simptest.c b/opal/mca/pmix/pmix4x/pmix/test/simple/simptest.c index 8abefd540f..8aeb187438 100644 --- a/opal/mca/pmix/pmix4x/pmix/test/simple/simptest.c +++ b/opal/mca/pmix/pmix4x/pmix/test/simple/simptest.c @@ -14,8 +14,8 @@ * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -1183,13 +1183,13 @@ static void log_fn(const pmix_proc_t *client, const pmix_info_t directives[], size_t ndirs, pmix_op_cbfunc_t cbfunc, void *cbdata) { - mylog_t lg; + mylog_t *lg = (mylog_t *)malloc(sizeof(mylog_t)); pmix_output(0, "SERVER: LOG"); - lg.cbfunc = cbfunc; - lg.cbdata = cbdata; - PMIX_THREADSHIFT(&lg, foobar); + lg->cbfunc = cbfunc; + lg->cbdata = cbdata; + PMIX_THREADSHIFT(lg, foobar); } static pmix_status_t alloc_fn(const pmix_proc_t *client, diff --git a/opal/mca/pmix/pmix4x/pmix/test/test_server.c b/opal/mca/pmix/pmix4x/pmix/test/test_server.c index 9a18b83b33..caa862b725 100644 --- a/opal/mca/pmix/pmix4x/pmix/test/test_server.c +++ b/opal/mca/pmix/pmix4x/pmix/test/test_server.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015-2018 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 Research Organization for Information Science @@ -76,25 +76,6 @@ PMIX_CLASS_INSTANCE(server_nspace_t, pmix_list_item_t, nscon, nsdes); -#define WAIT_THREAD(lck, to, ret) \ - do { \ - struct timespec ts; \ - pmix_mutex_lock(&(lck)->mutex); \ - ts.tv_sec = time(NULL); \ - ts.tv_nsec = 0; \ - ts.tv_sec += (int)to; \ - while ((lck)->active) { \ - ret = pthread_cond_timedwait(&(lck)->cond, \ - &(lck)->mutex.m_lock_pthread, \ - &ts); \ - if (ETIMEDOUT == rc) { \ - break; \ - } \ - } \ - PMIX_ACQUIRE_OBJECT(lck); \ - pmix_mutex_unlock(&(lck)->mutex); \ - } while(0) - static int server_send_procs(void); static void server_read_cb(evutil_socket_t fd, short event, void *arg); static int srv_wait_all(double timeout); @@ -427,6 +408,8 @@ static int server_send_procs(void) server->modex_cbfunc = _send_procs_cb; server->cbdata = (void*)server; + server->lock.active = true; + if (PMIX_SUCCESS != (rc = server_send_msg(&msg_hdr, buf, msg_hdr.size))) { if (buf) { free(buf); @@ -441,7 +424,7 @@ static int server_send_procs(void) return PMIX_SUCCESS; } -int server_barrier(double to) +int server_barrier(void) { server_info_t *server; msg_hdr_t msg_hdr; @@ -458,15 +441,12 @@ int server_barrier(double to) msg_hdr.src_id = my_server_id; msg_hdr.size = 0; + server->lock.active = true; + if (PMIX_SUCCESS != (rc = server_send_msg(&msg_hdr, NULL, 0))) { return PMIX_ERROR; } - - WAIT_THREAD(&server->lock, to, rc); - if (rc == ETIMEDOUT) { - TEST_ERROR(("timeout waiting from %d", server->idx)); - return PMIX_ERROR; - } + PMIX_WAIT_THREAD(&server->lock); return PMIX_SUCCESS; } @@ -820,7 +800,7 @@ int server_init(test_params *params) PMIx_Register_event_handler(NULL, 0, NULL, 0, errhandler, errhandler_reg_callbk, NULL); - if (0 != (rc = server_barrier(5))) { + if (0 != (rc = server_barrier())) { goto error; } @@ -836,7 +816,7 @@ int server_finalize(test_params *params) int rc = PMIX_SUCCESS; int total_ret = 0; - if (0 != (rc = server_barrier(5))) { + if (0 != (rc = server_barrier())) { total_ret++; goto exit; } @@ -846,12 +826,6 @@ int server_finalize(test_params *params) remove_server_item(server); } - /* finalize the server library */ - if (PMIX_SUCCESS != (rc = PMIx_server_finalize())) { - TEST_ERROR(("Finalize failed with error %d", rc)); - goto exit; - } - if (params->nservers && 0 == my_server_id) { int ret; /* wait for all servers are finished */ @@ -859,7 +833,7 @@ int server_finalize(test_params *params) if (!pmix_list_is_empty(server_list)) { total_ret += ret; } - PMIX_RELEASE(server_list); + PMIX_LIST_RELEASE(server_list); TEST_VERBOSE(("SERVER %d FINALIZE PID:%d with status %d", my_server_id, getpid(), ret)); if (0 == total_ret) { @@ -868,9 +842,16 @@ int server_finalize(test_params *params) rc = PMIX_ERROR; } } + PMIX_LIST_RELEASE(server_nspace); + + /* finalize the server library */ + if (PMIX_SUCCESS != (rc = PMIx_server_finalize())) { + TEST_ERROR(("Finalize failed with error %d", rc)); + total_ret += rc; + goto exit; + } exit: - PMIX_DESTRUCT(server_nspace); return total_ret; } diff --git a/opal/mca/pmix/pmix4x/pmix/test/test_server.h b/opal/mca/pmix/pmix4x/pmix/test/test_server.h index 2541be6274..4d9db8807e 100644 --- a/opal/mca/pmix/pmix4x/pmix/test/test_server.h +++ b/opal/mca/pmix/pmix4x/pmix/test/test_server.h @@ -2,6 +2,7 @@ * Copyright (c) 2018 Mellanox Technologies, Inc. * All rights reserved. * + * Copyright (c) 2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -66,7 +67,7 @@ extern pmix_list_t *server_nspace; int server_init(test_params *params); int server_finalize(test_params *params); -int server_barrier(double to); +int server_barrier(void); int server_fence_contrib(char *data, size_t ndata, pmix_modex_cbfunc_t cbfunc, void *cbdata); int server_dmdx_get(const char *nspace, int rank,