From e0a52354d48a7b6495d6ecbad04d831ee567559c Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 10 Sep 2015 20:47:40 -0700 Subject: [PATCH] Sync to PMIx master at open-mpi/pmix@89680d6663c61fbf7f61ac1095acaafe35425749 Includes changes to support BigEndian machines --- opal/mca/pmix/pmix1xx/pmix/Makefile.am | 26 +- opal/mca/pmix/pmix1xx/pmix/VERSION | 4 +- .../pmix/pmix1xx/pmix/man/man3/pmix_init.3 | 101 ++++++++ opal/mca/pmix/pmix1xx/pmix/man/man7/pmix.7 | 241 ++++++++++++++++++ .../pmix/pmix1xx/pmix/src/buffer_ops/pack.c | 2 +- .../pmix1xx/pmix/src/server/pmix_server.c | 3 +- 6 files changed, 365 insertions(+), 12 deletions(-) create mode 100644 opal/mca/pmix/pmix1xx/pmix/man/man3/pmix_init.3 create mode 100644 opal/mca/pmix/pmix1xx/pmix/man/man7/pmix.7 diff --git a/opal/mca/pmix/pmix1xx/pmix/Makefile.am b/opal/mca/pmix/pmix1xx/pmix/Makefile.am index 6260880043..1baa8a4eca 100644 --- a/opal/mca/pmix/pmix1xx/pmix/Makefile.am +++ b/opal/mca/pmix/pmix1xx/pmix/Makefile.am @@ -26,6 +26,7 @@ ACLOCAL_AMFLAGS = -I ./config headers = sources = nodist_headers = +EXTRA_DIST = # Only install the valgrind suppressions file if we're building in # standalone mode @@ -34,14 +35,9 @@ if ! PMIX_EMBEDDED_MODE dist_pmixdata_DATA += contrib/pmix-valgrind.supp endif -EXTRA_DIST = README INSTALL VERSION LICENSE autogen.sh \ - config/pmix_get_version.sh - -EXTRA_DIST += \ - test/test_common.h test/cli_stages.h \ - test/server_callbacks.h test/test_fence.h \ - test/test_publish.h test/test_resolve_peers.h \ - test/test_spawn.h test/utils.h test/test_cd.h +man_MANS = \ + man/man3/pmix_init.3 \ + man/man7/pmix.7 include config/Makefile.am include include/Makefile.am @@ -63,6 +59,20 @@ if ! PMIX_EMBEDDED_MODE SUBDIRS = . test examples endif +nroff: + @for file in $(man_MANS); do \ + source=`echo $$file | sed -e 's@/man[0-9]@@'`; \ + contrib/md2nroff.pl --source=$$source.md; \ + done + +EXTRA_DIST += README INSTALL VERSION LICENSE autogen.sh \ + config/pmix_get_version.sh $(man_MANS) \ + test/test_common.h test/cli_stages.h \ + test/server_callbacks.h test/test_fence.h \ + test/test_publish.h test/test_resolve_peers.h \ + test/test_spawn.h test/utils.h test/test_cd.h + + dist-hook: env LS_COLORS= sh "$(top_srcdir)/config/distscript.sh" "$(top_srcdir)" "$(distdir)" "$(PMIX_VERSION)" "$(PMIX_REPO_REV)" diff --git a/opal/mca/pmix/pmix1xx/pmix/VERSION b/opal/mca/pmix/pmix1xx/pmix/VERSION index 9cb15f99e6..790b8c0487 100644 --- a/opal/mca/pmix/pmix1xx/pmix/VERSION +++ b/opal/mca/pmix/pmix1xx/pmix/VERSION @@ -30,7 +30,7 @@ greek=a1 # command, or with the date (if "git describe" fails) in the form of # "date". -repo_rev=gita18ba6f +repo_rev=git89680d6 # If tarball_version is not empty, it is used as the version string in # the tarball filename, regardless of all other versions listed in @@ -44,7 +44,7 @@ tarball_version= # The date when this release was created -date="Sep 09, 2015" +date="Sep 10, 2015" # The shared library version of each of PMIx's public libraries. # These versions are maintained in accordance with the "Library diff --git a/opal/mca/pmix/pmix1xx/pmix/man/man3/pmix_init.3 b/opal/mca/pmix/pmix1xx/pmix/man/man3/pmix_init.3 new file mode 100644 index 0000000000..73708df289 --- /dev/null +++ b/opal/mca/pmix/pmix1xx/pmix/man/man3/pmix_init.3 @@ -0,0 +1,101 @@ +.TH "pmix_init" "3" "2015\-09\-09" "PMIx Programmer\[aq]s Manual" "\@VERSION\@" +.SH NAME +.PP +PMIx_Init \- Initialize the PMIx Client +.SH SYNOPSIS +.IP +.nf +\f[C] +#include\ + +pmix_status_t\ PMIx_Init(pmix_proc_t\ *proc); +\f[] +.fi +.SH ARGUMENTS +.PP +\f[I]proc\f[] : Fabric endpoint on which to initiate atomic operation. +.SH DESCRIPTION +.PP +Initialize the PMIx client, returning the process identifier assigned to +this client\[aq]s application in the provided pmix_proc_t struct. +Passing a parameter of \f[I]NULL\f[] for this parameter is allowed if +the user wishes solely to initialize the PMIx system and does not +require return of the identifier at that time. +.PP +When called, the PMIx client will check for the required connection +information of the local PMIx server and will establish the connection. +If the information is not found, or the server connection fails, then an +appropriate error constant will be returned. +.PP +If successful, the function will return PMIX_SUCCESS and will fill the +provided structure with the server\-assigned namespace and rank of the +process within the application. +.PP +Note that the PMIx client library is referenced counted, and so multiple +calls to PMIx_Init are allowed. +Thus, one way to obtain the namespace and rank of the process is to +simply call PMIx_Init with a non\-NULL parameter. +.SS Atomic Data Types +.PP +Atomic functions may operate on one of the following identified data +types. +A given atomic function may support any datatype, subject to provider +implementation constraints. +.PP +\f[I]FI_INT8\f[] : Signed 8\-bit integer. +.PP +\f[I]FI_UINT8\f[] : Unsigned 8\-bit integer. +.PP +\f[I]FI_INT16\f[] : Signed 16\-bit integer. +.PP +\f[I]FI_UINT16\f[] : Unsigned 16\-bit integer. +.PP +\f[I]FI_INT32\f[] : Signed 32\-bit integer. +.PP +\f[I]FI_UINT32\f[] : Unsigned 32\-bit integer. +.PP +\f[I]FI_INT64\f[] : Signed 64\-bit integer. +.PP +\f[I]FI_UINT64\f[] : Unsigned 64\-bit integer. +.PP +\f[I]FI_FLOAT\f[] : A single\-precision floating point value (IEEE 754). +.PP +\f[I]FI_DOUBLE\f[] : A double\-precision floating point value (IEEE +754). +.PP +\f[I]FI_FLOAT_COMPLEX\f[] : An ordered pair of single\-precision +floating point values (IEEE 754), with the first value representing the +real portion of a complex number and the second representing the +imaginary portion. +.PP +\f[I]FI_DOUBLE_COMPLEX\f[] : An ordered pair of double\-precision +floating point values (IEEE 754), with the first value representing the +real portion of a complex number and the second representing the +imaginary portion. +.PP +\f[I]FI_LONG_DOUBLE\f[] : A double\-extended precision floating point +value (IEEE 754). +.PP +\f[I]FI_LONG_DOUBLE_COMPLEX\f[] : An ordered pair of double\-extended +precision floating point values (IEEE 754), with the first value +representing the real portion of a complex number and the second +representing the imaginary portion. +.SH RETURN VALUE +.PP +Returns PMIX_SUCCESS on success. +On error, a negative value corresponding to a PMIx errno is returned. +PMIx errno values are defined in \f[C]pmix_common.h\f[]. +.SH ERRORS +.PP +\f[I]\-FI_EOPNOTSUPP\f[] : The requested atomic operation is not +supported on this endpoint. +.PP +\f[I]\-FI_EMSGSIZE\f[] : The number of atomic operations in a single +request exceeds that supported by the underlying provider. +.SH NOTES +.SH SEE ALSO +.PP +\f[C]fi_getinfo\f[](3), \f[C]fi_endpoint\f[](3), \f[C]fi_domain\f[](3), +\f[C]fi_cq\f[](3), \f[C]fi_rma\f[](3) +.SH AUTHORS +PMIx. diff --git a/opal/mca/pmix/pmix1xx/pmix/man/man7/pmix.7 b/opal/mca/pmix/pmix1xx/pmix/man/man7/pmix.7 new file mode 100644 index 0000000000..4ba00eca72 --- /dev/null +++ b/opal/mca/pmix/pmix1xx/pmix/man/man7/pmix.7 @@ -0,0 +1,241 @@ +.TH "pmix" "7" "2015\-09\-09" "PMIx Programmer\[aq]s Manual" "\@VERSION\@" +.SH NAME +.PP +Fabric Interface Library +.SH SYNOPSIS +.IP +.nf +\f[C] +#include\ +\f[] +.fi +.PP +Libfabric is a high\-performance fabric software library designed to +provide low\-latency interfaces to fabric hardware. +.SH OVERVIEW +.PP +Libfabric provides \[aq]process direct I/O\[aq] to application software +communicating across fabric software and hardware. +Process direct I/O, historically referred to as RDMA, allows an +application to directly access network resources without operating +system interventions. +Data transfers can occur directly to and from application memory. +.PP +There are two components to the libfabric software: +.PP +\f[I]Fabric Providers\f[] : Conceptually, a fabric provider may be +viewed as a local hardware NIC driver, though a provider is not limited +by this definition. +The first component of libfabric is a general purpose framework that is +capable of handling different types of fabric hardware. +All fabric hardware devices and their software drivers are required to +support this framework. +Devices and the drivers that plug into the libfabric framework are +referred to as fabric providers, or simply providers. +Provider details may be found in \f[C]fi_provider\f[](7). +.PP +\f[I]Fabric Interfaces\f[] : The second component is a set of +communication operations. +Libfabric defines several sets of communication functions that providers +can support. +It is not required that providers implement all the interfaces that are +defined; however, providers clearly indicate which interfaces they do +support. +.SH FABRIC INTERFACES +.PP +The fabric interfaces are designed such that they are cohesive and not +simply a union of disjoint interfaces. +The interfaces are logically divided into two groups: control interfaces +and communication operations. +The control interfaces are a common set of operations that provide +access to local communication resources, such as address vectors and +event queues. +The communication operations expose particular models of communication +and fabric functionality, such as message queues, remote memory access, +and atomic operations. +Communication operations are associated with fabric endpoints. +.PP +Applications will typically use the control interfaces to discover local +capabilities and allocate necessary resources. +They will then allocate and configure a communication endpoint to send +and receive data, or perform other types of data transfers, with remote +endpoints. +.SH CONTROL INTERFACES +.PP +The control interfaces APIs provide applications access to network +resources. +This involves listing all the interfaces available, obtaining the +capabilities of the interfaces and opening a provider. +.PP +\f[I]fi_getinfo \- Fabric Information\f[] : The fi_getinfo call is the +base call used to discover and request fabric services offered by the +system. +Applications can use this call to indicate the type of communication +that they desire. +The results from fi_getinfo, fi_info, are used to reserve and configure +fabric resources. +.PP +fi_getinfo returns a list of fi_info structures. +Each structure references a single fabric provider, indicating the +interfaces that the provider supports, along with a named set of +resources. +A fabric provider may include multiple fi_info structures in the +returned list. +.PP +\f[I]fi_fabric \- Fabric Domain\f[] : A fabric domain represents a +collection of hardware and software resources that access a single +physical or virtual network. +All network ports on a system that can communicate with each other +through the fabric belong to the same fabric domain. +A fabric domain shares network addresses and can span multiple +providers. +libfabric supports systems connected to multiple fabrics. +.PP +\f[I]fi_domain \- Access Domains\f[] : An access domain represents a +single logical connection into a fabric. +It may map to a single physical or virtual NIC or a port. +An access domain defines the boundary across which fabric resources may +be associated. +Each access domain belongs to a single fabric domain. +.PP +\f[I]fi_endpoint \- Fabric Endpoint\f[] : A fabric endpoint is a +communication portal. +An endpoint may be either active or passive. +Passive endpoints are used to listen for connection requests. +Active endpoints can perform data transfers. +Endpoints are configured with specific communication capabilities and +data transfer interfaces. +.PP +\f[I]fi_eq \- Event Queue\f[] : Event queues, are used to collect and +report the completion of asynchronous operations and events. +Event queues report events that are not directly associated with data +transfer operations. +.PP +\f[I]fi_cq \- Completion Queue\f[] : Completion queues are +high\-performance event queues used to report the completion of data +transfer operations. +.PP +\f[I]fi_cntr \- Event Counters\f[] : Event counters are used to report +the number of completed asynchronous operations. +Event counters are considered light\-weight, in that a completion simply +increments a counter, rather than placing an entry into an event queue. +.PP +\f[I]fi_mr \- Memory Region\f[] : Memory regions describe application +local memory buffers. +In order for fabric resources to access application memory, the +application must first grant permission to the fabric provider by +constructing a memory region. +Memory regions are required for specific types of data transfer +operations, such as RMA transfers (see below). +.PP +\f[I]fi_av \- Address Vector\f[] : Address vectors are used to map +higher level addresses, such as IP addresses, which may be more natural +for an application to use, into fabric specific addresses. +The use of address vectors allows providers to reduce the amount of +memory required to maintain large address look\-up tables, and eliminate +expensive address resolution and look\-up methods during data transfer +operations. +.SH DATA TRANSFER INTERFACES +.PP +Fabric endpoints are associated with multiple data transfer interfaces. +Each interface set is designed to support a specific style of +communication, with an endpoint allowing the different interfaces to be +used in conjunction. +The following data transfer interfaces are defined by libfabric. +.PP +\f[I]fi_msg \- Message Queue\f[] : Message queues expose a simple, +message\-based FIFO queue interface to the application. +Message data transfers allow applications to send and receive data with +message boundaries being maintained. +.PP +\f[I]fi_tagged \- Tagged Message Queues\f[] : Tagged message lists +expose send/receive data transfer operations built on the concept of +tagged messaging. +The tagged message queue is conceptually similar to standard message +queues, but with the addition of 64\-bit tags for each message. +Sent messages are matched with receive buffers that are tagged with a +similar value. +.PP +\f[I]fi_rma \- Remote Memory Access\f[] : RMA transfers are one\-sided +operations that read or write data directly to a remote memory region. +Other than defining the appropriate memory region, RMA operations do not +require interaction at the target side for the data transfer to +complete. +.PP +\f[I]fi_atomic \- Atomic\f[] : Atomic operations can perform one of +several operations on a remote memory region. +Atomic operations include well\-known functionality, such as atomic\-add +and compare\-and\-swap, plus several other pre\-defined calls. +Unlike other data transfer interfaces, atomic operations are aware of +the data formatting at the target memory region. +.SH LOGGING INTERFACE +.PP +Logging can be controlled using the FI_LOG_LEVEL, FI_LOG_PROV, and +FI_LOG_SUBSYS environment variables. +.PP +\f[I]FI_LOG_LEVEL\f[] : FI_LOG_LEVEL controls the amount of logging data +that is output. +The following log levels are defined. +.IP \[bu] 2 +\f[I]Warn\f[] : Warn is the least verbose setting and is intended for +reporting errors or warnings. +.IP \[bu] 2 +\f[I]Trace\f[] : Trace is more verbose and is meant to include +non\-detailed output helpful to tracing program execution. +.IP \[bu] 2 +\f[I]Info\f[] : Info is high traffic and meant for detailed output. +.IP \[bu] 2 +\f[I]Debug\f[] : Debug is high traffic and is likely to impact +application performance. +Debug output is only available if the library has been compiled with +debugging enabled. +.PP +\f[I]FI_LOG_PROV\f[] : The FI_LOG_PROV environment variable enables or +disables logging from specific providers. +Providers can be enabled by listing them in a comma separated fashion. +If the list begins with the \[aq]^\[aq] symbol, then the list will be +negated. +By default all providers are enabled. +.PP +Example: To enable logging from the psm and sockets provider: +FI_LOG_PROV="psm,sockets" +.PP +Example: To enable logging from providers other than psm: +FI_LOG_PROV="^psm" +.PP +\f[I]FI_LOG_SUBSYS\f[] : The FI_LOG_SUBSYS environment variable enables +or disables logging at the subsystem level. +The syntax for enabling or disabling subsystems is similar to that used +for FI_LOG_PROV. +The following subsystems are defined. +.IP \[bu] 2 +\f[I]core\f[] : Provides output related to the core framework and its +management of providers. +.IP \[bu] 2 +\f[I]fabric\f[] : Provides output specific to interactions associated +with the fabric object. +.IP \[bu] 2 +\f[I]domain\f[] : Provides output specific to interactions associated +with the domain object. +.IP \[bu] 2 +\f[I]ep_ctrl\f[] : Provides output specific to endpoint non\-data +transfer operations, such as CM operations. +.IP \[bu] 2 +\f[I]ep_data\f[] : Provides output specific to endpoint data transfer +operations. +.IP \[bu] 2 +\f[I]av\f[] : Provides output specific to address vector operations. +.IP \[bu] 2 +\f[I]cq\f[] : Provides output specific to completion queue operations. +.IP \[bu] 2 +\f[I]eq\f[] : Provides output specific to event queue operations. +.IP \[bu] 2 +\f[I]mr\f[] : Provides output specific to memory registration. +.SH SEE ALSO +.PP +\f[C]fi_provider\f[](7), \f[C]fi_getinfo\f[](3), +\f[C]fi_endpoint\f[](3), \f[C]fi_domain\f[](3), \f[C]fi_av\f[](3), +\f[C]fi_eq\f[](3), \f[C]fi_cq\f[](3), \f[C]fi_cntr\f[](3), +\f[C]fi_mr\f[](3) +.SH AUTHORS +PMIx. diff --git a/opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/pack.c b/opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/pack.c index faa9a6e782..cf453eeaf7 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/pack.c +++ b/opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/pack.c @@ -643,7 +643,7 @@ int pmix_bfrop_pack_proc(pmix_buffer_t *buffer, const void *src, if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_string(buffer, &ptr, 1, PMIX_STRING))) { return ret; } - if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_sizet(buffer, &proc[i].rank, 1, PMIX_INT))) { + if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_int(buffer, &proc[i].rank, 1, PMIX_INT))) { return ret; } } diff --git a/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server.c b/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server.c index 2dbfb2b473..3b311be4ae 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server.c +++ b/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server.c @@ -381,7 +381,8 @@ static void _register_nspace(int sd, short args, void *cbdata) pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata; pmix_nspace_t *nptr, *tmp; pmix_status_t rc; - size_t i, j, size, rank; + size_t i, j, size; + int rank; pmix_kval_t kv; char **nodes=NULL, **procs=NULL; pmix_buffer_t buf2;