1
1

Merge pull request from rhc54/topic/pmix

Work on cleaning up memory leaks that are causing orte-dvm to eventua…
Этот коммит содержится в:
rhc54 2015-11-06 17:16:49 -07:00
родитель e89ecac83c fed28e4cfc
Коммит 7a9b9325a8
54 изменённых файлов: 729 добавлений и 567 удалений

1
.gitignore поставляемый
Просмотреть файл

@ -301,7 +301,6 @@ opal/mca/hwloc/hwloc*/hwloc/include/private/autogen/config.h
opal/mca/installdirs/config/install_dirs.h
opal/mca/pmix/pmix1xx/pmix/include/pmix/autogen/config.h
opal/mca/pmix/pmix1xx/pmix/include/pmix/pmix_common.h
opal/mca/pmix/pmix1xx/pmix/include/private/autogen/config.h
opal/mca/pmix/pmix1xx/pmix/include/private/autogen/config.h.in

Просмотреть файл

@ -538,8 +538,11 @@ typedef int (*opal_pmix_base_module_resolve_nodes_fn_t)(opal_jobid_t jobid, char
* SERVER APIs *
************************************************************/
/* Initialize the server support library */
typedef int (*opal_pmix_base_module_server_init_fn_t)(opal_pmix_server_module_t *module);
/* Initialize the server support library - must pass the callback
* module for the server to use, plus any attributes we want to
* pass down to it */
typedef int (*opal_pmix_base_module_server_init_fn_t)(opal_pmix_server_module_t *module,
opal_list_t *info);
/* Finalize the server support library */
typedef int (*opal_pmix_base_module_server_finalize_fn_t)(void);
@ -606,6 +609,13 @@ typedef int (*opal_pmix_base_module_server_register_nspace_fn_t)(opal_jobid_t jo
opal_pmix_op_cbfunc_t cbfunc,
void *cbdata);
/* Deregister an nspace. Instruct the PMIx server to purge
* all info relating to the provided jobid so that memory
* can be freed. Note that the server will automatically
* purge all info relating to any clients it has from
* this nspace */
typedef void (*opal_pmix_base_module_server_deregister_nspace_fn_t)(opal_jobid_t jobid);
/* Register a client process with the PMIx server library. The
* expected user ID and group ID of the child process helps the
* server library to properly authenticate clients as they connect
@ -625,6 +635,15 @@ typedef int (*opal_pmix_base_module_server_register_client_fn_t)(const opal_proc
opal_pmix_op_cbfunc_t cbfunc,
void *cbdata);
/* Deregister a client. Instruct the PMIx server to purge
* all info relating to the provided client so that memory
* can be freed. As per above note, the server will automatically
* free all client-related data when the nspace is deregistered,
* so there is no need to call this function during normal
* finalize operations. Instead, this is provided for use
* during exception operations */
typedef void (*opal_pmix_base_module_server_deregister_client_fn_t)(const opal_process_name_t *proc);
/* Setup the environment of a child process to be forked
* by the host so it can correctly interact with the PMIx
* server. The PMIx client needs some setup information
@ -725,47 +744,49 @@ typedef void (*opal_pmix_base_module_register_jobid_fn_t)(opal_jobid_t jobid, co
*/
typedef struct {
/* client APIs */
opal_pmix_base_module_init_fn_t init;
opal_pmix_base_module_fini_fn_t finalize;
opal_pmix_base_module_initialized_fn_t initialized;
opal_pmix_base_module_abort_fn_t abort;
opal_pmix_base_module_commit_fn_t commit;
opal_pmix_base_module_fence_fn_t fence;
opal_pmix_base_module_fence_nb_fn_t fence_nb;
opal_pmix_base_module_put_fn_t put;
opal_pmix_base_module_get_fn_t get;
opal_pmix_base_module_get_nb_fn_t get_nb;
opal_pmix_base_module_publish_fn_t publish;
opal_pmix_base_module_publish_nb_fn_t publish_nb;
opal_pmix_base_module_lookup_fn_t lookup;
opal_pmix_base_module_lookup_nb_fn_t lookup_nb;
opal_pmix_base_module_unpublish_fn_t unpublish;
opal_pmix_base_module_unpublish_nb_fn_t unpublish_nb;
opal_pmix_base_module_spawn_fn_t spawn;
opal_pmix_base_module_spawn_nb_fn_t spawn_nb;
opal_pmix_base_module_connect_fn_t connect;
opal_pmix_base_module_connect_nb_fn_t connect_nb;
opal_pmix_base_module_disconnect_fn_t disconnect;
opal_pmix_base_module_disconnect_nb_fn_t disconnect_nb;
opal_pmix_base_module_resolve_peers_fn_t resolve_peers;
opal_pmix_base_module_resolve_nodes_fn_t resolve_nodes;
opal_pmix_base_module_init_fn_t init;
opal_pmix_base_module_fini_fn_t finalize;
opal_pmix_base_module_initialized_fn_t initialized;
opal_pmix_base_module_abort_fn_t abort;
opal_pmix_base_module_commit_fn_t commit;
opal_pmix_base_module_fence_fn_t fence;
opal_pmix_base_module_fence_nb_fn_t fence_nb;
opal_pmix_base_module_put_fn_t put;
opal_pmix_base_module_get_fn_t get;
opal_pmix_base_module_get_nb_fn_t get_nb;
opal_pmix_base_module_publish_fn_t publish;
opal_pmix_base_module_publish_nb_fn_t publish_nb;
opal_pmix_base_module_lookup_fn_t lookup;
opal_pmix_base_module_lookup_nb_fn_t lookup_nb;
opal_pmix_base_module_unpublish_fn_t unpublish;
opal_pmix_base_module_unpublish_nb_fn_t unpublish_nb;
opal_pmix_base_module_spawn_fn_t spawn;
opal_pmix_base_module_spawn_nb_fn_t spawn_nb;
opal_pmix_base_module_connect_fn_t connect;
opal_pmix_base_module_connect_nb_fn_t connect_nb;
opal_pmix_base_module_disconnect_fn_t disconnect;
opal_pmix_base_module_disconnect_nb_fn_t disconnect_nb;
opal_pmix_base_module_resolve_peers_fn_t resolve_peers;
opal_pmix_base_module_resolve_nodes_fn_t resolve_nodes;
/* server APIs */
opal_pmix_base_module_server_init_fn_t server_init;
opal_pmix_base_module_server_finalize_fn_t server_finalize;
opal_pmix_base_module_generate_regex_fn_t generate_regex;
opal_pmix_base_module_generate_ppn_fn_t generate_ppn;
opal_pmix_base_module_server_register_nspace_fn_t server_register_nspace;
opal_pmix_base_module_server_register_client_fn_t server_register_client;
opal_pmix_base_module_server_setup_fork_fn_t server_setup_fork;
opal_pmix_base_module_server_dmodex_request_fn_t server_dmodex_request;
opal_pmix_base_module_server_notify_error_fn_t server_notify_error;
opal_pmix_base_module_server_init_fn_t server_init;
opal_pmix_base_module_server_finalize_fn_t server_finalize;
opal_pmix_base_module_generate_regex_fn_t generate_regex;
opal_pmix_base_module_generate_ppn_fn_t generate_ppn;
opal_pmix_base_module_server_register_nspace_fn_t server_register_nspace;
opal_pmix_base_module_server_deregister_nspace_fn_t server_deregister_nspace;
opal_pmix_base_module_server_register_client_fn_t server_register_client;
opal_pmix_base_module_server_deregister_client_fn_t server_deregister_client;
opal_pmix_base_module_server_setup_fork_fn_t server_setup_fork;
opal_pmix_base_module_server_dmodex_request_fn_t server_dmodex_request;
opal_pmix_base_module_server_notify_error_fn_t server_notify_error;
/* Utility APIs */
opal_pmix_base_module_get_version_fn_t get_version;
opal_pmix_base_module_register_fn_t register_errhandler;
opal_pmix_base_module_deregister_fn_t deregister_errhandler;
opal_pmix_base_module_store_fn_t store_local;
opal_pmix_base_module_get_nspace_fn_t get_nspace;
opal_pmix_base_module_register_jobid_fn_t register_jobid;
opal_pmix_base_module_get_version_fn_t get_version;
opal_pmix_base_module_register_fn_t register_errhandler;
opal_pmix_base_module_deregister_fn_t deregister_errhandler;
opal_pmix_base_module_store_fn_t store_local;
opal_pmix_base_module_get_nspace_fn_t get_nspace;
opal_pmix_base_module_register_jobid_fn_t register_jobid;
} opal_pmix_base_module_t;
typedef struct {

Просмотреть файл

@ -37,6 +37,11 @@ endif
man_MANS = \
man/man3/pmix_init.3 \
man/man3/pmix_finalize.3 \
man/man3/pmix_initialized.3 \
man/man3/pmix_abort.3 \
man/man3/pmix_put.3 \
man/man3/pmix_commit.3 \
man/man7/pmix.7 \
man/man7/pmix_constants.7
@ -59,6 +64,8 @@ libpmix_la_LDFLAGS = -version-info $(libpmix_so_version)
if ! PMIX_EMBEDDED_MODE
SUBDIRS = . test examples
pmixdir = $(pmixincludedir)/$(subdir)
nobase_pmix_HEADERS = $(headers)
endif
nroff:

Просмотреть файл

@ -30,7 +30,7 @@ greek=a1
# command, or with the date (if "git describe" fails) in the form of
# "date<date>".
repo_rev=git69c398e
repo_rev=gita4d7e07
# If tarball_version is not empty, it is used as the version string in
# the tarball filename, regardless of all other versions listed in
@ -44,7 +44,7 @@ tarball_version=
# The date when this release was created
date="Oct 09, 2015"
date="Nov 06, 2015"
# The shared library version of each of PMIx's public libraries.
# These versions are maintained in accordance with the "Library

Просмотреть файл

@ -105,7 +105,6 @@ AC_DEFUN([PMIX_SETUP_CORE],[
# replaced, not the entire file.
AC_CONFIG_HEADERS(pmix_config_prefix[include/private/autogen/config.h])
AC_CONFIG_HEADERS(pmix_config_prefix[include/pmix/autogen/config.h])
AC_CONFIG_HEADERS(pmix_config_prefix[include/pmix/pmix_common.h])
# What prefix are we using?
AC_MSG_CHECKING([for pmix symbol prefix])

Просмотреть файл

@ -173,7 +173,7 @@ int main(int argc, char **argv)
fprintf(stderr, "Running version %s\n", PMIx_Get_version());
/* setup the server library */
if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule))) {
if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, NULL, 0))) {
fprintf(stderr, "Init failed with error %d\n", rc);
return rc;
}

Просмотреть файл

@ -18,11 +18,19 @@ include_HEADERS = \
include/pmix_server.h \
include/pmi.h \
include/pmi2.h
headers += \
include/private/align.h \
include/private/hash_string.h \
include/private/pmix_socket_errno.h \
include/private/pmix_stdint.h \
include/private/prefetch.h \
include/private/types.h \
include/private/autogen/config.h
include_pmixdir = $(includedir)/pmix
include_pmix_HEADERS = \
include/pmix/rename.h
nodist_include_pmix_HEADERS = \
include/pmix/pmix_common.h
include/pmix/rename.h \
include/pmix/pmix_common.h
include_pmix_autogendir = $(includedir)/pmix/autogen
include_pmix_autogen_HEADERS = \
@ -31,12 +39,4 @@ include_pmix_autogen_HEADERS = \
nodist_include_pmix_autogen_HEADERS = \
include/pmix/autogen/config.h
noinst_HEADERS = \
include/private/align.h \
include/private/hash_string.h \
include/private/pmix_socket_errno.h \
include/private/pmix_stdint.h \
include/private/prefetch.h \
include/private/types.h
endif ! PMIX_EMBEDDED_MODE

Просмотреть файл

@ -69,7 +69,7 @@
/* Maybe before gcc 2.95 too */
#ifdef PMIX_HAVE_ATTRIBUTE_UNUSED
#define __PMIX_HAVE_ATTRIBUTE_UNUSED PMIX_HAVE_ATTRIBUTE_UNUSED
#define __PMIX_HAVE_ATTRIBUTE_UNUSED PMIX_HAVE_ATTRIBUTE_UNUSED
#elif defined(__GNUC__)
# define __PMIX_HAVE_ATTRIBUTE_UNUSED (GXX_ABOVE_3_4 || GCC_ABOVE_2_95)
#else
@ -82,7 +82,7 @@
#endif
#ifdef PMIX_HAVE_ATTRIBUTE_MALLOC
#define __PMIX_HAVE_ATTRIBUTE_MALLOC PMIX_HAVE_ATTRIBUTE_MALLOC
#define __PMIX_HAVE_ATTRIBUTE_MALLOC PMIX_HAVE_ATTRIBUTE_MALLOC
#elif defined(__GNUC__)
# define __PMIX_HAVE_ATTRIBUTE_MALLOC (GXX_ABOVE_3_4 || GCC_ABOVE_2_96)
#else
@ -95,7 +95,7 @@
#endif
#ifdef PMIX_HAVE_ATTRIBUTE_CONST
#define __PMIX_HAVE_ATTRIBUTE_CONST PMIX_HAVE_ATTRIBUTE_CONST
#define __PMIX_HAVE_ATTRIBUTE_CONST PMIX_HAVE_ATTRIBUTE_CONST
#elif defined(__GNUC__)
# define __PMIX_HAVE_ATTRIBUTE_CONST (GXX_ABOVE_3_4 || GCC_ABOVE_2_95)
#else
@ -108,7 +108,7 @@
#endif
#ifdef PMIX_HAVE_ATTRIBUTE_PURE
#define __PMIX_HAVE_ATTRIBUTE_PURE PMIX_HAVE_ATTRIBUTE_PURE
#define __PMIX_HAVE_ATTRIBUTE_PURE PMIX_HAVE_ATTRIBUTE_PURE
#elif defined(__GNUC__)
# define __PMIX_HAVE_ATTRIBUTE_PURE (GXX_ABOVE_3_4 || GCC_ABOVE_2_96)
#else
@ -121,7 +121,7 @@
#endif
#ifdef PMIX_HAVE_ATTRIBUTE_DEPRECATED
#define __PMIX_HAVE_ATTRIBUTE_DEPRECATED PMIX_HAVE_ATTRIBUTE_DEPRECATED
#define __PMIX_HAVE_ATTRIBUTE_DEPRECATED PMIX_HAVE_ATTRIBUTE_DEPRECATED
#elif defined(__GNUC__)
# define __PMIX_HAVE_ATTRIBUTE_DEPRECATED (GXX_ABOVE_3_4 || GCC_ABOVE_3_3)
#else
@ -178,6 +178,12 @@
/* The pmix symbol prefix in all caps */
#undef PMIX_SYM_PREFIX_CAPS
/* ensure we have the version info available for external users */
#undef PMIX_MAJOR_VERSION
#undef PMIX_MINOR_VERSION
#undef PMIX_RELEASE_VERSION
#undef BEGIN_C_DECLS
#undef END_C_DECLS
#if defined(c_plusplus) || defined(__cplusplus)

Просмотреть файл

@ -1,3 +1,4 @@
/* include/pmix/pmix_common.h. Generated from pmix_common.h.in by configure. */
/*
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
*
@ -55,25 +56,6 @@
#include <sys/time.h> /* for struct timeval */
#endif
#ifndef PMIX_CONFIG_H
/* ensure we have the version info available for external users */
#undef PMIX_MAJOR_VERSION
#undef PMIX_MINOR_VERSION
#undef PMIX_RELEASE_VERSION
#endif
#undef BEGIN_C_DECLS
#undef END_C_DECLS
#if defined(c_plusplus) || defined(__cplusplus)
# define BEGIN_C_DECLS extern "C" {
# define END_C_DECLS }
#else
#define BEGIN_C_DECLS /* empty */
#define END_C_DECLS /* empty */
#endif
BEGIN_C_DECLS
/**** PMIX CONSTANTS ****/
@ -418,8 +400,29 @@ typedef struct {
/* release the memory in the value struct data field */
#define PMIX_VALUE_DESTRUCT(m) \
do { \
if (PMIX_STRING == (m)->type && NULL != (m)->data.string) { \
free((m)->data.string); \
if (PMIX_STRING == (m)->type) { \
if (NULL != (m)->data.string) { \
free((m)->data.string); \
} \
} else if (PMIX_BYTE_OBJECT == (m)->type) { \
if (NULL != (m)->data.bo.bytes) { \
free((m)->data.bo.bytes); \
} \
} else if (PMIX_INFO_ARRAY == (m)->type) { \
size_t _n; \
pmix_info_t *_p = (pmix_info_t*)((m)->data.array.array); \
for (_n=0; _n < (m)->data.array.size; _n++) { \
if (PMIX_STRING == _p[_n].value.type) { \
if (NULL != _p[_n].value.data.string) { \
free(_p[_n].value.data.string); \
} \
} else if (PMIX_BYTE_OBJECT == _p[_n].value.type) { \
if (NULL != _p[_n].value.data.bo.bytes) { \
free(_p[_n].value.data.bo.bytes); \
} \
} \
} \
free(_p); \
} \
} while(0);

Просмотреть файл

@ -308,8 +308,13 @@ typedef struct pmix_server_module_1_0_0_t {
/* Initialize the server support library, and provide a
* pointer to a pmix_server_module_t structure
* containing the caller's callback functions */
pmix_status_t PMIx_server_init(pmix_server_module_t *module);
* containing the caller's callback functions. The
* array of pmix_info_t structs is used to pass
* additional info that may be required by the server
* when initializing - e.g., a user/group ID to set
* on the rendezvous file for the Unix Domain Socket */
pmix_status_t PMIx_server_init(pmix_server_module_t *module,
pmix_info_t info[], size_t ninfo);
/* Finalize the server support library. If internal comm is
* in-use, the server will shut it down at this time. All
@ -376,6 +381,13 @@ pmix_status_t PMIx_server_register_nspace(const char nspace[], int nlocalprocs,
pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata);
/* Deregister an nspace and purge all objects relating to
* it, including any client info from that nspace. This is
* intended to support persistent PMIx servers by providing
* an opportunity for the host RM to tell the PMIx server
* library to release all memory for a completed job */
void PMIx_server_deregister_nspace(const char nspace[]);
/* Register a client process with the PMIx server library. The
* expected user ID and group ID of the child process helps the
* server library to properly authenticate clients as they connect
@ -394,6 +406,12 @@ pmix_status_t PMIx_server_register_client(const pmix_proc_t *proc,
void *server_object,
pmix_op_cbfunc_t cbfunc, void *cbdata);
/* Deregister a client and purge all data relating to it. The
* deregister_nspace API will automatically delete all client
* info for that nspace - this API is therefore intended solely
* for use in exception cases */
void PMIx_server_deregister_client(const pmix_proc_t *proc);
/* Setup the environment of a child process to be forked
* by the host so it can correctly interact with the PMIx
* server. The PMIx client needs some setup information

Просмотреть файл

@ -0,0 +1,62 @@
.TH "pmix_abort" "3" "2015\-10\-25" "PMIx Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
PMIx_Abort \- Abort the specified processes
.SH SYNOPSIS
.IP
.nf
\f[C]
#include\ <pmix.h>
pmix_status_t\ PMIx_Abort(int\ status,\ const\ char\ msg[],
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ pmix_proc_t\ procs[],\ size_t\ nprocs);
\f[]
.fi
.SH ARGUMENTS
.PP
\f[I]status\f[] : Status value to be returned.
A value of zero is permitted by PMIx, but may not be returned by some
resource managers.
.PP
\f[I]msg\f[] : A string message to be displayed
.PP
\f[I]procs\f[] : An array of pmix_proc_t structures defining the
processes to be aborted.
A \f[I]NULL\f[] for the proc array indicates that all processes in the
caller\[aq]s nspace are to be aborted.
A wildcard value for the rank in any structure indicates that all
processes in that nspace are to be aborted.
.PP
\f[I]nprocs\f[] : Number of pmix_proc_t structures in the \f[I]procs\f[]
array
.SH DESCRIPTION
.PP
Request that the provided array of procs be aborted, returning the
provided \f[I]status\f[] and printing the provided message.
A \f[I]NULL\f[] for the proc array indicates that all processes in the
caller\[aq]s nspace are to be aborted.
.PP
The response to this request is somewhat dependent on the specific
resource manager and its configuration (e.g., some resource managers
will not abort the application if the provided \f[I]status\f[] is zero
unless specifically configured to do so), and thus lies outside the
control of PMIx itself.
However, the client will inform the RM of the request that the
application be aborted, regardless of the value of the provided
\f[I]status\f[].
.PP
Passing a \f[I]NULL\f[] msg parameter is allowed.
Note that race conditions caused by multiple processes calling
PMIx_Abort are left to the server implementation to resolve with regard
to which status is returned and what messages (if any) are printed.
.SH RETURN VALUE
.PP
Returns PMIX_SUCCESS on success.
On error, a negative value corresponding to a PMIx errno is returned.
.SH ERRORS
.PP
PMIx errno values are defined in \f[C]pmix_common.h\f[].
.SH NOTES
.SH SEE ALSO
.SH AUTHORS
PMIx.

Просмотреть файл

@ -0,0 +1,35 @@
.TH "pmix_commit" "3" "2015\-10\-27" "PMIx Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
PMIx_Commit \- Push all previously \f[I]PMIx\f[]Put_ values to the local
PMIx server.
.SH SYNOPSIS
.IP
.nf
\f[C]
#include\ <pmix.h>
pmix_status_t\ PMIx_Commit(void);
\f[]
.fi
.SH ARGUMENTS
.PP
\f[I]none\f[]
.SH DESCRIPTION
.PP
This is an asynchronous operation \- the library will immediately return
to the caller while the data is transmitted to the local server in the
background
.SH RETURN VALUE
.PP
Returns PMIX_SUCCESS on success.
On error, a negative value corresponding to a PMIx errno is returned.
.SH ERRORS
.PP
PMIx errno values are defined in \f[C]pmix_common.h\f[].
.SH NOTES
.SH SEE ALSO
.PP
\f[C]PMIx_Put\f[](3)
.SH AUTHORS
PMIx.

Просмотреть файл

@ -0,0 +1,31 @@
.TH "pmix_finalize" "3" "2015\-10\-27" "PMIx Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
PMIx_Finalize \- Finalize the PMIx Client
.SH SYNOPSIS
.IP
.nf
\f[C]
#include\ <pmix.h>
pmix_status_t\ PMIx_Finalize(void);
\f[]
.fi
.SH ARGUMENTS
.SH DESCRIPTION
.PP
Finalize the PMIx client, closing the connection with the local PMIx
server.
.SH RETURN VALUE
.PP
Returns PMIX_SUCCESS on success.
On error, a negative value corresponding to a PMIx errno is returned.
.SH ERRORS
.PP
PMIx errno values are defined in \f[C]pmix_common.h\f[].
.SH NOTES
.SH SEE ALSO
.PP
\f[C]PMIx_Init\f[](3)
.SH AUTHORS
PMIx.

Просмотреть файл

@ -1,4 +1,4 @@
.TH "pmix_init" "3" "2015\-09\-09" "PMIx Programmer\[aq]s Manual" "\@VERSION\@"
.TH "pmix_init" "3" "2015\-10\-25" "PMIx Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
PMIx_Init \- Initialize the PMIx Client
@ -13,7 +13,8 @@ pmix_status_t\ PMIx_Init(pmix_proc_t\ *proc);
.fi
.SH ARGUMENTS
.PP
\f[I]proc\f[] : Fabric endpoint on which to initiate atomic operation.
\f[I]proc\f[] : Pointer to a pmix_proc_t object in which the
client\[aq]s namespace and rank are to be returned.
.SH DESCRIPTION
.PP
Initialize the PMIx client, returning the process identifier assigned to
@ -35,67 +36,14 @@ Note that the PMIx client library is referenced counted, and so multiple
calls to PMIx_Init are allowed.
Thus, one way to obtain the namespace and rank of the process is to
simply call PMIx_Init with a non\-NULL parameter.
.SS Atomic Data Types
.PP
Atomic functions may operate on one of the following identified data
types.
A given atomic function may support any datatype, subject to provider
implementation constraints.
.PP
\f[I]FI_INT8\f[] : Signed 8\-bit integer.
.PP
\f[I]FI_UINT8\f[] : Unsigned 8\-bit integer.
.PP
\f[I]FI_INT16\f[] : Signed 16\-bit integer.
.PP
\f[I]FI_UINT16\f[] : Unsigned 16\-bit integer.
.PP
\f[I]FI_INT32\f[] : Signed 32\-bit integer.
.PP
\f[I]FI_UINT32\f[] : Unsigned 32\-bit integer.
.PP
\f[I]FI_INT64\f[] : Signed 64\-bit integer.
.PP
\f[I]FI_UINT64\f[] : Unsigned 64\-bit integer.
.PP
\f[I]FI_FLOAT\f[] : A single\-precision floating point value (IEEE 754).
.PP
\f[I]FI_DOUBLE\f[] : A double\-precision floating point value (IEEE
754).
.PP
\f[I]FI_FLOAT_COMPLEX\f[] : An ordered pair of single\-precision
floating point values (IEEE 754), with the first value representing the
real portion of a complex number and the second representing the
imaginary portion.
.PP
\f[I]FI_DOUBLE_COMPLEX\f[] : An ordered pair of double\-precision
floating point values (IEEE 754), with the first value representing the
real portion of a complex number and the second representing the
imaginary portion.
.PP
\f[I]FI_LONG_DOUBLE\f[] : A double\-extended precision floating point
value (IEEE 754).
.PP
\f[I]FI_LONG_DOUBLE_COMPLEX\f[] : An ordered pair of double\-extended
precision floating point values (IEEE 754), with the first value
representing the real portion of a complex number and the second
representing the imaginary portion.
.SH RETURN VALUE
.PP
Returns PMIX_SUCCESS on success.
On error, a negative value corresponding to a PMIx errno is returned.
PMIx errno values are defined in \f[C]pmix_common.h\f[].
.SH ERRORS
.PP
\f[I]\-FI_EOPNOTSUPP\f[] : The requested atomic operation is not
supported on this endpoint.
.PP
\f[I]\-FI_EMSGSIZE\f[] : The number of atomic operations in a single
request exceeds that supported by the underlying provider.
PMIx errno values are defined in \f[C]pmix_common.h\f[].
.SH NOTES
.SH SEE ALSO
.PP
\f[C]fi_getinfo\f[](3), \f[C]fi_endpoint\f[](3), \f[C]fi_domain\f[](3),
\f[C]fi_cq\f[](3), \f[C]fi_rma\f[](3)
.SH AUTHORS
PMIx.

Просмотреть файл

@ -0,0 +1,30 @@
.TH "pmix_initialized" "3" "2015\-10\-25" "PMIx Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
PMIx_Initialized \- Check if \f[I]PMIx\f[]Init_ has been called
.SH SYNOPSIS
.IP
.nf
\f[C]
#include\ <pmix.h>
int\ PMIx_Initialized(void);
\f[]
.fi
.SH ARGUMENTS
.PP
\f[I]none\f[]
.SH DESCRIPTION
.PP
Check to see if the PMIx Client library has been intialized
.SH RETURN VALUE
.PP
Returns \f[I]true\f[] if the PMIx Client has been initialized, and
\f[I]false\f[] if not.
.SH ERRORS
.SH NOTES
.SH SEE ALSO
.PP
\f[C]PMIx_Init\f[](3)
.SH AUTHORS
PMIx.

Просмотреть файл

@ -0,0 +1,60 @@
.TH "pmix_put" "3" "2015\-10\-25" "PMIx Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
PMIx_Put \- Push a value into the client\[aq]s namespace
.SH SYNOPSIS
.IP
.nf
\f[C]
#include\ <pmix.h>
pmix_status_t\ PMIx_Init(pmix_scope_t\ scope,\ const\ char\ key[],\ pmix_value_t\ *val);
\f[]
.fi
.SH ARGUMENTS
.PP
\f[I]scope\f[] : Defines a scope for data "put" by PMI per the
following:
.IP
.nf
\f[C]
*\ PMI_LOCAL\ \-\ the\ data\ is\ intended\ only\ for\ other\ application
\ \ \ \ \ \ \ \ \ \ \ \ \ \ processes\ on\ the\ same\ node.\ Data\ marked\ in\ this\ way
\ \ \ \ \ \ \ \ \ \ \ \ \ \ will\ not\ be\ included\ in\ data\ packages\ sent\ to\ remote\ requestors
*\ PMI_REMOTE\ \-\ the\ data\ is\ intended\ solely\ for\ applications\ processes\ on
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ remote\ nodes.\ Data\ marked\ in\ this\ way\ will\ not\ be\ shared\ with
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ other\ processes\ on\ the\ same\ node
*\ PMI_GLOBAL\ \-\ the\ data\ is\ to\ be\ shared\ with\ all\ other\ requesting\ processes,
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ regardless\ of\ location
\f[]
.fi
.PP
\f[I]key\f[] String key identifying the information.
This can be either one of the PMIx defined attributes, or a
user\-defined value
.PP
\f[I]val\f[] Pointer to a pmix_value_t structure containing the data to
be pushed along with the type of the provided data.
.SH DESCRIPTION
.PP
Push a value into the client\[aq]s namespace.
The client library will cache the information locally until
\f[I]PMIx\f[]Commit_ is called.
The provided scope value is passed to the local PMIx server, which will
distribute the data as directed.
.SH RETURN VALUE
.PP
Returns PMIX_SUCCESS on success.
On error, a negative value corresponding to a PMIx errno is returned.
.SH ERRORS
.PP
PMIx errno values are defined in \f[C]pmix_common.h\f[].
.SH NOTES
.PP
See \[aq]pmix_common.h\[aq] for definition of the pmix_value_t
structure.
.SH SEE ALSO
.PP
\f[C]PMIx_Constants\f[](7), \f[C]PMIx_Structures\f[](7)
.SH AUTHORS
PMIx.

Просмотреть файл

@ -1,241 +1,35 @@
.TH "pmix" "7" "2015\-09\-09" "PMIx Programmer\[aq]s Manual" "\@VERSION\@"
.TH "pmix" "7" "2015\-10\-29" "PMIx Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
Fabric Interface Library
Process Management Interface \- Exascale
.SH SYNOPSIS
.IP
.nf
\f[C]
#include\ <rdma/fabric.h>
#include\ <pmix.h>
\f[]
.fi
.PP
Libfabric is a high\-performance fabric software library designed to
provide low\-latency interfaces to fabric hardware.
.SH OVERVIEW
.PP
Libfabric provides \[aq]process direct I/O\[aq] to application software
communicating across fabric software and hardware.
Process direct I/O, historically referred to as RDMA, allows an
application to directly access network resources without operating
system interventions.
Data transfers can occur directly to and from application memory.
The Process Management Interface (PMI) has been used for quite some time
as a means of exchanging wireup information needed for interprocess
communication.
Two versions (PMI\-1 and PMI\-2) have been released as part of the MPICH
effort.
While PMI\-2 demonstrates better scaling properties than its PMI\-1
predecessor, attaining rapid launch and wireup of the roughly 1M
processes executing across 100k nodes expected for exascale operations
remains challenging.
.PP
There are two components to the libfabric software:
.PP
\f[I]Fabric Providers\f[] : Conceptually, a fabric provider may be
viewed as a local hardware NIC driver, though a provider is not limited
by this definition.
The first component of libfabric is a general purpose framework that is
capable of handling different types of fabric hardware.
All fabric hardware devices and their software drivers are required to
support this framework.
Devices and the drivers that plug into the libfabric framework are
referred to as fabric providers, or simply providers.
Provider details may be found in \f[C]fi_provider\f[](7).
.PP
\f[I]Fabric Interfaces\f[] : The second component is a set of
communication operations.
Libfabric defines several sets of communication functions that providers
can support.
It is not required that providers implement all the interfaces that are
defined; however, providers clearly indicate which interfaces they do
support.
.SH FABRIC INTERFACES
.PP
The fabric interfaces are designed such that they are cohesive and not
simply a union of disjoint interfaces.
The interfaces are logically divided into two groups: control interfaces
and communication operations.
The control interfaces are a common set of operations that provide
access to local communication resources, such as address vectors and
event queues.
The communication operations expose particular models of communication
and fabric functionality, such as message queues, remote memory access,
and atomic operations.
Communication operations are associated with fabric endpoints.
.PP
Applications will typically use the control interfaces to discover local
capabilities and allocate necessary resources.
They will then allocate and configure a communication endpoint to send
and receive data, or perform other types of data transfers, with remote
endpoints.
.SH CONTROL INTERFACES
.PP
The control interfaces APIs provide applications access to network
resources.
This involves listing all the interfaces available, obtaining the
capabilities of the interfaces and opening a provider.
.PP
\f[I]fi_getinfo \- Fabric Information\f[] : The fi_getinfo call is the
base call used to discover and request fabric services offered by the
system.
Applications can use this call to indicate the type of communication
that they desire.
The results from fi_getinfo, fi_info, are used to reserve and configure
fabric resources.
.PP
fi_getinfo returns a list of fi_info structures.
Each structure references a single fabric provider, indicating the
interfaces that the provider supports, along with a named set of
resources.
A fabric provider may include multiple fi_info structures in the
returned list.
.PP
\f[I]fi_fabric \- Fabric Domain\f[] : A fabric domain represents a
collection of hardware and software resources that access a single
physical or virtual network.
All network ports on a system that can communicate with each other
through the fabric belong to the same fabric domain.
A fabric domain shares network addresses and can span multiple
providers.
libfabric supports systems connected to multiple fabrics.
.PP
\f[I]fi_domain \- Access Domains\f[] : An access domain represents a
single logical connection into a fabric.
It may map to a single physical or virtual NIC or a port.
An access domain defines the boundary across which fabric resources may
be associated.
Each access domain belongs to a single fabric domain.
.PP
\f[I]fi_endpoint \- Fabric Endpoint\f[] : A fabric endpoint is a
communication portal.
An endpoint may be either active or passive.
Passive endpoints are used to listen for connection requests.
Active endpoints can perform data transfers.
Endpoints are configured with specific communication capabilities and
data transfer interfaces.
.PP
\f[I]fi_eq \- Event Queue\f[] : Event queues, are used to collect and
report the completion of asynchronous operations and events.
Event queues report events that are not directly associated with data
transfer operations.
.PP
\f[I]fi_cq \- Completion Queue\f[] : Completion queues are
high\-performance event queues used to report the completion of data
transfer operations.
.PP
\f[I]fi_cntr \- Event Counters\f[] : Event counters are used to report
the number of completed asynchronous operations.
Event counters are considered light\-weight, in that a completion simply
increments a counter, rather than placing an entry into an event queue.
.PP
\f[I]fi_mr \- Memory Region\f[] : Memory regions describe application
local memory buffers.
In order for fabric resources to access application memory, the
application must first grant permission to the fabric provider by
constructing a memory region.
Memory regions are required for specific types of data transfer
operations, such as RMA transfers (see below).
.PP
\f[I]fi_av \- Address Vector\f[] : Address vectors are used to map
higher level addresses, such as IP addresses, which may be more natural
for an application to use, into fabric specific addresses.
The use of address vectors allows providers to reduce the amount of
memory required to maintain large address look\-up tables, and eliminate
expensive address resolution and look\-up methods during data transfer
operations.
.SH DATA TRANSFER INTERFACES
.PP
Fabric endpoints are associated with multiple data transfer interfaces.
Each interface set is designed to support a specific style of
communication, with an endpoint allowing the different interfaces to be
used in conjunction.
The following data transfer interfaces are defined by libfabric.
.PP
\f[I]fi_msg \- Message Queue\f[] : Message queues expose a simple,
message\-based FIFO queue interface to the application.
Message data transfers allow applications to send and receive data with
message boundaries being maintained.
.PP
\f[I]fi_tagged \- Tagged Message Queues\f[] : Tagged message lists
expose send/receive data transfer operations built on the concept of
tagged messaging.
The tagged message queue is conceptually similar to standard message
queues, but with the addition of 64\-bit tags for each message.
Sent messages are matched with receive buffers that are tagged with a
similar value.
.PP
\f[I]fi_rma \- Remote Memory Access\f[] : RMA transfers are one\-sided
operations that read or write data directly to a remote memory region.
Other than defining the appropriate memory region, RMA operations do not
require interaction at the target side for the data transfer to
complete.
.PP
\f[I]fi_atomic \- Atomic\f[] : Atomic operations can perform one of
several operations on a remote memory region.
Atomic operations include well\-known functionality, such as atomic\-add
and compare\-and\-swap, plus several other pre\-defined calls.
Unlike other data transfer interfaces, atomic operations are aware of
the data formatting at the target memory region.
.SH LOGGING INTERFACE
.PP
Logging can be controlled using the FI_LOG_LEVEL, FI_LOG_PROV, and
FI_LOG_SUBSYS environment variables.
.PP
\f[I]FI_LOG_LEVEL\f[] : FI_LOG_LEVEL controls the amount of logging data
that is output.
The following log levels are defined.
.IP \[bu] 2
\f[I]Warn\f[] : Warn is the least verbose setting and is intended for
reporting errors or warnings.
.IP \[bu] 2
\f[I]Trace\f[] : Trace is more verbose and is meant to include
non\-detailed output helpful to tracing program execution.
.IP \[bu] 2
\f[I]Info\f[] : Info is high traffic and meant for detailed output.
.IP \[bu] 2
\f[I]Debug\f[] : Debug is high traffic and is likely to impact
application performance.
Debug output is only available if the library has been compiled with
debugging enabled.
.PP
\f[I]FI_LOG_PROV\f[] : The FI_LOG_PROV environment variable enables or
disables logging from specific providers.
Providers can be enabled by listing them in a comma separated fashion.
If the list begins with the \[aq]^\[aq] symbol, then the list will be
negated.
By default all providers are enabled.
.PP
Example: To enable logging from the psm and sockets provider:
FI_LOG_PROV="psm,sockets"
.PP
Example: To enable logging from providers other than psm:
FI_LOG_PROV="^psm"
.PP
\f[I]FI_LOG_SUBSYS\f[] : The FI_LOG_SUBSYS environment variable enables
or disables logging at the subsystem level.
The syntax for enabling or disabling subsystems is similar to that used
for FI_LOG_PROV.
The following subsystems are defined.
.IP \[bu] 2
\f[I]core\f[] : Provides output related to the core framework and its
management of providers.
.IP \[bu] 2
\f[I]fabric\f[] : Provides output specific to interactions associated
with the fabric object.
.IP \[bu] 2
\f[I]domain\f[] : Provides output specific to interactions associated
with the domain object.
.IP \[bu] 2
\f[I]ep_ctrl\f[] : Provides output specific to endpoint non\-data
transfer operations, such as CM operations.
.IP \[bu] 2
\f[I]ep_data\f[] : Provides output specific to endpoint data transfer
operations.
.IP \[bu] 2
\f[I]av\f[] : Provides output specific to address vector operations.
.IP \[bu] 2
\f[I]cq\f[] : Provides output specific to completion queue operations.
.IP \[bu] 2
\f[I]eq\f[] : Provides output specific to event queue operations.
.IP \[bu] 2
\f[I]mr\f[] : Provides output specific to memory registration.
PMI Exascale (PMIx) represents an attempt to resolve these questions by
providing an extended version of the PMI standard specifically designed
to support clusters up to and including exascale sizes.
The overall objective of the project is not to branch the existing
pseudo\-standard definitions \- in fact, PMIx fully supports both of the
existing PMI\-1 and PMI\-2 APIs \- but rather to (a) augment and extend
those APIs to eliminate some current restrictions that impact
scalability, and (b) provide a reference implementation of the
PMI\-server that demonstrates the desired level of scalability.
.SH SEE ALSO
.PP
\f[C]fi_provider\f[](7), \f[C]fi_getinfo\f[](3),
\f[C]fi_endpoint\f[](3), \f[C]fi_domain\f[](3), \f[C]fi_av\f[](3),
\f[C]fi_eq\f[](3), \f[C]fi_cq\f[](3), \f[C]fi_cntr\f[](3),
\f[C]fi_mr\f[](3)
.SH AUTHORS
PMIx.

Просмотреть файл

@ -1,4 +1,4 @@
.TH "pmix_constants" "7" "2015\-09\-16" "PMIx Programmer\[aq]s Manual" "\@VERSION\@"
.TH "pmix_constants" "7" "2015\-10\-25" "PMIx Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
PMIx Constants
@ -87,8 +87,6 @@ Unlike other data transfer interfaces, atomic operations are aware of
the data formatting at the target memory region.
.SH SEE ALSO
.PP
\f[C]pmix\f[](7), \f[C]fi_getinfo\f[](3), \f[C]fi_endpoint\f[](3),
\f[C]fi_domain\f[](3), \f[C]fi_av\f[](3), \f[C]fi_eq\f[](3),
\f[C]fi_cq\f[](3), \f[C]fi_cntr\f[](3), \f[C]fi_mr\f[](3)
\f[C]pmix\f[](7)
.SH AUTHORS
PMIx.

Просмотреть файл

@ -34,7 +34,7 @@
#include "src/class/pmix_object.h"
#include "src/class/pmix_pointer_array.h"
#include "src/class/pmix_list.h"
#include <pmix_common.h>
#include <pmix/pmix_common.h>
BEGIN_C_DECLS

Просмотреть файл

@ -40,7 +40,7 @@
#include "src/class/pmix_list.h"
#include <pmix_common.h>
#include <pmix/pmix_common.h>
BEGIN_C_DECLS

Просмотреть файл

@ -30,7 +30,7 @@
#endif
#include "src/class/pmix_object.h"
#include <pmix_common.h>
#include <pmix/pmix_common.h>
BEGIN_C_DECLS

Просмотреть файл

@ -507,7 +507,7 @@ int PMI2_Job_Connect(const char jobid[], PMI2_Connect_comm_t *conn)
PMI2_CHECK();
if (NULL == jobid || NULL == conn) {
if (NULL == conn) {
return PMI2_ERR_INVALID_ARGS;
}
@ -524,10 +524,6 @@ int PMI2_Job_Disconnect(const char jobid[])
PMI2_CHECK();
if (NULL == jobid) {
return PMI2_ERR_INVALID_ARGS;
}
(void)strncpy(proc.nspace, (jobid ? jobid : myproc.nspace), sizeof(myproc.nspace));
proc.rank = PMIX_RANK_WILDCARD;
rc = PMIx_Disconnect(&proc, 1, NULL, 0);

Просмотреть файл

@ -14,7 +14,7 @@
#include <private/pmix_socket_errno.h>
#include <pmix.h>
#include <pmix_common.h>
#include <pmix/pmix_common.h>
#include <pmix_server.h>
#include "src/include/pmix_globals.h"

Просмотреть файл

@ -30,7 +30,7 @@
#endif
#include PMIX_EVENT_HEADER
#include <pmix_common.h>
#include <pmix/pmix_common.h>
#include "src/buffer_ops/types.h"
#include "src/class/pmix_hash_table.h"

Просмотреть файл

@ -12,7 +12,7 @@
#include <private/autogen/config.h>
#include <pmix/rename.h>
#include <pmix_common.h>
#include <pmix/pmix_common.h>
#include "src/include/pmix_globals.h"
#include "src/util/argv.h"

Просмотреть файл

@ -10,7 +10,7 @@
#include <private/autogen/config.h>
#include <pmix/rename.h>
#include <pmix_common.h>
#include <pmix/pmix_common.h>
#include "src/include/pmix_globals.h"
#include "src/util/argv.h"

Просмотреть файл

@ -13,7 +13,7 @@
#include <private/types.h>
#include <private/pmix_stdint.h>
#include <pmix_common.h>
#include <pmix/pmix_common.h>
#include "src/include/pmix_globals.h"
#ifdef HAVE_STRING_H

Просмотреть файл

@ -21,7 +21,7 @@
#include <private/pmix_socket_errno.h>
#include <pmix_server.h>
#include <pmix_common.h>
#include <pmix/pmix_common.h>
#include "src/include/pmix_globals.h"
#ifdef HAVE_STRING_H
@ -44,6 +44,7 @@
#include <sys/types.h>
#endif
#include <ctype.h>
#include <sys/stat.h>
#include PMIX_EVENT_HEADER
#include "src/util/argv.h"
@ -140,11 +141,13 @@ static void _queue_message(int fd, short args, void *cbdata)
{
pmix_usock_queue_t *queue = (pmix_usock_queue_t*)cbdata;
pmix_usock_send_t *snd;
pmix_output_verbose(2, pmix_globals.debug_output,
"[%s:%d] queue callback called: reply to %s:%d on tag %d",
__FILE__, __LINE__,
(queue->peer)->info->nptr->nspace,
(queue->peer)->info->rank, (queue->tag));
snd = PMIX_NEW(pmix_usock_send_t);
snd->hdr.pindex = pmix_globals.pindex;
snd->hdr.tag = (queue->tag);
@ -222,12 +225,16 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module)
pmix_globals.myid.rank = strtol(evar, NULL, 10);
}
/* initialize the datatype support */
pmix_bfrop_open();
/* setup the server-specific globals */
PMIX_CONSTRUCT(&pmix_server_globals.clients, pmix_pointer_array_t);
pmix_pointer_array_init(&pmix_server_globals.clients, 1, INT_MAX, 1);
PMIX_CONSTRUCT(&pmix_server_globals.collectives, pmix_list_t);
PMIX_CONSTRUCT(&pmix_server_globals.remote_pnd, pmix_list_t);
PMIX_CONSTRUCT(&pmix_server_globals.local_reqs, pmix_list_t);
PMIX_CONSTRUCT(&pmix_server_globals.gdata, pmix_buffer_t);
/* see if debug is requested */
if (NULL != (evar = getenv("PMIX_DEBUG"))) {
@ -243,9 +250,6 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module)
memset(&pmix_host_server, 0, sizeof(pmix_server_module_t));
pmix_host_server = *module;
/* initialize the datatype support */
pmix_bfrop_open();
/* init security */
pmix_sec_init();
security_mode = strdup(pmix_sec.name);
@ -267,17 +271,19 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module)
snprintf(myaddress.sun_path, sizeof(myaddress.sun_path)-1, "%s/pmix-%d", tdir, pid);
asprintf(&myuri, "%s:%lu:%s", pmix_globals.myid.nspace, (unsigned long)pmix_globals.myid.rank, myaddress.sun_path);
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:server constructed uri %s", myuri);
return PMIX_SUCCESS;
}
pmix_status_t PMIx_server_init(pmix_server_module_t *module)
pmix_status_t PMIx_server_init(pmix_server_module_t *module,
pmix_info_t info[], size_t ninfo)
{
pmix_usock_posted_recv_t *req;
pmix_status_t rc;
size_t n;
pmix_kval_t kv;
++pmix_globals.init_cntr;
if (1 < pmix_globals.init_cntr) {
@ -298,6 +304,34 @@ pmix_status_t PMIx_server_init(pmix_server_module_t *module)
if (NULL == (pmix_globals.evbase = pmix_start_progress_thread())) {
return PMIX_ERR_INIT;
}
/* check the info keys for a directive about the uid/gid
* to be set for the rendezvous file, and any info we
* need to provide to every client */
if (NULL != info) {
PMIX_CONSTRUCT(&kv, pmix_kval_t);
for (n=0; n < ninfo; n++) {
if (0 == strcmp(info[n].key, PMIX_USERID)) {
/* the userid is in the uint32_t storage */
chown(myaddress.sun_path, info[n].value.data.uint32, -1);
} else if (0 == strcmp(info[n].key, PMIX_GRPID)) {
/* the grpid is in the uint32_t storage */
chown(myaddress.sun_path, -1, info[n].value.data.uint32);
} else {
/* store and pass along to every client */
kv.key = info[n].key;
kv.value = &info[n].value;
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(&pmix_server_globals.gdata, &kv, 1, PMIX_KVAL))) {
PMIX_ERROR_LOG(rc);
PMIX_DESTRUCT(&kv);
return rc;
}
}
}
/* protect the incoming data */
kv.key = NULL;
kv.value = NULL;
PMIX_DESTRUCT(&kv);
}
/* setup the wildcard recv for inbound messages from clients */
req = PMIX_NEW(pmix_usock_posted_recv_t);
@ -329,10 +363,14 @@ static void cleanup_server_state(void)
PMIX_LIST_DESTRUCT(&pmix_server_globals.collectives);
PMIX_LIST_DESTRUCT(&pmix_server_globals.remote_pnd);
PMIX_LIST_DESTRUCT(&pmix_server_globals.local_reqs);
PMIX_DESTRUCT(&pmix_server_globals.gdata);
if (NULL != myuri) {
free(myuri);
}
if (NULL != security_mode) {
free(security_mode);
}
pmix_bfrop_close();
pmix_sec_finalize();
@ -571,6 +609,45 @@ pmix_status_t PMIx_server_register_nspace(const char nspace[], int nlocalprocs,
return PMIX_SUCCESS;
}
static void _deregister_nspace(int sd, short args, void *cbdata)
{
pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata;
pmix_nspace_t *tmp;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:server _deregister_nspace %s",
cd->proc.nspace);
/* see if we already have this nspace */
PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_nspace_t) {
if (0 == strcmp(tmp->nspace, cd->proc.nspace)) {
pmix_list_remove_item(&pmix_globals.nspaces, &tmp->super);
PMIX_RELEASE(tmp);
break;
}
}
PMIX_RELEASE(cd);
}
void PMIx_server_deregister_nspace(const char nspace[])
{
pmix_setup_caddy_t *cd;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:server deregister nspace %s",
nspace);
cd = PMIX_NEW(pmix_setup_caddy_t);
(void)strncpy(cd->proc.nspace, nspace, PMIX_MAX_NSLEN);
/* we have to push this into our event library to avoid
* potential threading issues */
event_assign(&cd->ev, pmix_globals.evbase, -1,
EV_WRITE, _deregister_nspace, cd);
event_active(&cd->ev, EV_WRITE, 1);
}
static void _execute_collective(int sd, short args, void *cbdata)
{
pmix_trkr_caddy_t *tcd = (pmix_trkr_caddy_t*)cbdata;
@ -755,6 +832,60 @@ pmix_status_t PMIx_server_register_client(const pmix_proc_t *proc,
return PMIX_SUCCESS;
}
static void _deregister_client(int sd, short args, void *cbdata)
{
pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata;
pmix_rank_info_t *info;
pmix_nspace_t *nptr, *tmp;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:server _deregister_client for nspace %s rank %d",
cd->proc.nspace, cd->proc.rank);
/* see if we already have this nspace */
nptr = NULL;
PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_nspace_t) {
if (0 == strcmp(tmp->nspace, cd->proc.nspace)) {
nptr = tmp;
break;
}
}
if (NULL == nptr) {
/* nothing to do */
goto cleanup;
}
/* find an remove this client */
PMIX_LIST_FOREACH(info, &nptr->server->ranks, pmix_rank_info_t) {
if (info->rank == cd->proc.rank) {
pmix_list_remove_item(&nptr->server->ranks, &info->super);
PMIX_RELEASE(info);
break;
}
}
cleanup:
PMIX_RELEASE(cd);
}
void PMIx_server_deregister_client(const pmix_proc_t *proc)
{
pmix_setup_caddy_t *cd;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:server deregister client %s:%d",
proc->nspace, proc->rank);
cd = PMIX_NEW(pmix_setup_caddy_t);
(void)strncpy(cd->proc.nspace, proc->nspace, PMIX_MAX_NSLEN);
cd->proc.rank = proc->rank;
/* we have to push this into our event library to avoid
* potential threading issues */
event_assign(&cd->ev, pmix_globals.evbase, -1,
EV_WRITE, _deregister_client, cd);
event_active(&cd->ev, EV_WRITE, 1);
}
/* setup the envars for a child process */
pmix_status_t PMIx_server_setup_fork(const pmix_proc_t *proc, char ***env)
{
@ -1940,6 +2071,7 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag,
if (PMIX_REQ_CMD == cmd) {
reply = PMIX_NEW(pmix_buffer_t);
pmix_bfrop.copy_payload(reply, &(peer->info->nptr->server->job_info));
pmix_bfrop.copy_payload(reply, &(pmix_server_globals.gdata));
PMIX_SERVER_QUEUE_REPLY(peer, tag, reply);
return PMIX_SUCCESS;
}

Просмотреть файл

@ -20,7 +20,6 @@
#include <private/pmix_stdint.h>
#include <private/pmix_socket_errno.h>
#include <pmix_common.h>
#include <pmix_server.h>
#include "src/include/pmix_globals.h"
@ -115,7 +114,7 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf,
pmix_nspace_t *ns, *nptr;
pmix_info_t *info=NULL;
size_t ninfo=0;
pmix_dmdx_local_t *lcd, *cd;
pmix_dmdx_local_t *lcd;
pmix_rank_info_t *iptr;
pmix_hash_table_t *ht;
bool local;
@ -458,7 +457,6 @@ static void _process_dmdx_reply(int fd, short args, void *cbdata)
pmix_kval_t *kp;
pmix_nspace_t *ns, *nptr;
pmix_status_t rc;
pmix_buffer_t xfer, pbkt, *xptr;
pmix_output_verbose(2, pmix_globals.debug_output,
"[%s:%d] process dmdx reply from %s:%d",

Просмотреть файл

@ -41,6 +41,7 @@
#include <sys/types.h>
#endif
#include <ctype.h>
#include <sys/stat.h>
#include PMIX_EVENT_HEADER
#include <pthread.h>
@ -86,6 +87,11 @@ pmix_status_t pmix_start_listening(struct sockaddr_un *address)
printf("%s:%d bind() failed", __FILE__, __LINE__);
return PMIX_ERROR;
}
/* set the mode as required */
if (0 != chmod(address->sun_path, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP)) {
pmix_output(0, "CANNOT CHMOD %s", address->sun_path);
return PMIX_ERROR;
}
/* setup listen backlog to maximum allowed by kernel */
if (listen(pmix_server_globals.listen_socket, SOMAXCONN) < 0) {

Просмотреть файл

@ -132,7 +132,7 @@ pmix_status_t pmix_server_commit(pmix_peer_t *peer, pmix_buffer_t *buf)
pmix_nspace_t *nptr;
pmix_rank_info_t *info;
pmix_dmdx_remote_t *dcd, *dcdnext;
pmix_buffer_t pbkt, xfer;
pmix_buffer_t pbkt;
pmix_value_t *val;
char *data;
size_t sz;

Просмотреть файл

@ -13,7 +13,7 @@
#include <private/autogen/config.h>
#include <pmix/rename.h>
#include <pmix_common.h>
#include <pmix/pmix_common.h>
#include <pmix_server.h>
#include "src/usock/usock.h"
#include "src/util/hash.h"
@ -145,6 +145,7 @@ typedef struct {
bool listen_thread_active; // listen thread is running
int listen_socket; // socket listener is watching
int stop_thread[2]; // pipe used to stop listener thread
pmix_buffer_t gdata; // cache of data given to me for passing to all clients
} pmix_server_globals_t;
#define PMIX_PEER_CADDY(c, p, t) \

Просмотреть файл

@ -32,7 +32,7 @@
#include <private/autogen/config.h>
#include <pmix/rename.h>
#include <private/types.h>
#include <pmix_common.h>
#include <pmix/pmix_common.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>

Просмотреть файл

@ -41,7 +41,7 @@
#include <sys/types.h>
#endif
#include <pmix_common.h>
#include <pmix/pmix_common.h>
BEGIN_C_DECLS

Просмотреть файл

@ -31,7 +31,7 @@
#include <stdlib.h>
#endif
#include <pmix_common.h>
#include <pmix/pmix_common.h>
#include "src/util/error.h"
#include "src/include/pmix_globals.h"

Просмотреть файл

@ -23,7 +23,7 @@
#include <private/autogen/config.h>
#include <pmix/rename.h>
#include <pmix_common.h>
#include <pmix/pmix_common.h>
#include "src/util/output.h"
BEGIN_C_DECLS

Просмотреть файл

@ -14,7 +14,7 @@
#include <private/autogen/config.h>
#include <pmix/rename.h>
#include <pmix_common.h>
#include <pmix/pmix_common.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>

Просмотреть файл

@ -22,7 +22,7 @@
#include <private/autogen/config.h>
#include <pmix/rename.h>
#include <pmix_common.h>
#include <pmix/pmix_common.h>
#include <stdio.h>
#include <stdarg.h>

Просмотреть файл

@ -22,7 +22,7 @@
#include <private/autogen/config.h>
#include <pmix/rename.h>
#include <pmix_common.h>
#include <pmix/pmix_common.h>
#include <stdio.h>
#include <stdlib.h>

Просмотреть файл

@ -37,7 +37,7 @@
#include <crt_externs.h>
#endif
#include <pmix_common.h>
#include <pmix/pmix_common.h>
BEGIN_C_DECLS

Просмотреть файл

@ -25,7 +25,7 @@
#include "src/util/error.h"
#include "src/util/fd.h"
#include <pmix_common.h>
#include <pmix/pmix_common.h>
#include "src/util/progress_threads.h"
static volatile bool evlib_active;

Просмотреть файл

@ -10,7 +10,7 @@
#include <private/autogen/config.h>
#include <pmix/rename.h>
#include <pmix_common.h>
#include <pmix/pmix_common.h>
#include <stdlib.h>
#include <stdarg.h>

Просмотреть файл

@ -336,24 +336,9 @@ static int test_item5(void)
static int test_item6(void)
{
int rc = 0;
char nspace[100];
log_error("pmix does not support this functionality\n");
return rc;
if (0 == rank) {
if (PMI_SUCCESS != (rc = PMI_KVS_Create(nspace, sizeof(nspace)))) {
log_fatal("PMI_KVS_Create failed: %d\n", rc);
return rc;
}
log_info("nspace=%s\n", nspace);
if (PMI_SUCCESS != (rc = PMI_KVS_Destroy(nspace))) {
log_fatal("PMI_KVS_Destroy failed: %d\n", rc);
return rc;
}
}
return rc;
}
static int test_item7(void)

Просмотреть файл

@ -81,7 +81,7 @@ int main(int argc, char **argv)
}
/* setup the server library */
if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule))) {
if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, NULL, 0))) {
TEST_ERROR(("Init failed with error %d", rc));
FREE_TEST_PARAMS(params);
return rc;

Просмотреть файл

@ -191,7 +191,7 @@ int main(int argc, char **argv)
fprintf(stderr, "Testing version %s\n", PMIx_Get_version());
/* setup the server library */
if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule))) {
if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, NULL, 0))) {
fprintf(stderr, "Init failed with error %d\n", rc);
return rc;
}

Просмотреть файл

@ -131,7 +131,8 @@ OPAL_MODULE_DECLSPEC int pmix1_store_local(const opal_process_name_t *proc,
opal_value_t *val);
/**** SERVER SOUTHBOUND FUNCTIONS ****/
OPAL_MODULE_DECLSPEC int pmix1_server_init(opal_pmix_server_module_t *module);
OPAL_MODULE_DECLSPEC int pmix1_server_init(opal_pmix_server_module_t *module,
opal_list_t *info);
OPAL_MODULE_DECLSPEC int pmix1_server_finalize(void);
OPAL_MODULE_DECLSPEC int pmix1_server_gen_regex(const char *input, char **regex);
OPAL_MODULE_DECLSPEC int pmix1_server_gen_ppn(const char *input, char **ppn);
@ -140,11 +141,13 @@ OPAL_MODULE_DECLSPEC int pmix1_server_register_nspace(opal_jobid_t jobid,
opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc,
void *cbdata);
OPAL_MODULE_DECLSPEC void pmix1_server_deregister_nspace(opal_jobid_t jobid);
OPAL_MODULE_DECLSPEC int pmix1_server_register_client(const opal_process_name_t *proc,
uid_t uid, gid_t gid,
void *server_object,
opal_pmix_op_cbfunc_t cbfunc,
void *cbdata);
OPAL_MODULE_DECLSPEC void pmix1_server_deregister_client(const opal_process_name_t *proc);
OPAL_MODULE_DECLSPEC int pmix1_server_setup_fork(const opal_process_name_t *proc, char ***env);
OPAL_MODULE_DECLSPEC int pmix1_server_dmodex(const opal_process_name_t *proc,
opal_pmix_modex_cbfunc_t cbfunc, void *cbdata);

Просмотреть файл

@ -96,19 +96,41 @@ static void errreg_cbfunc(pmix_status_t status,
status, errhandler_ref);
}
int pmix1_server_init(opal_pmix_server_module_t *module)
int pmix1_server_init(opal_pmix_server_module_t *module,
opal_list_t *info)
{
pmix_status_t rc;
int dbg;
opal_value_t *kv;
pmix_info_t *pinfo;
size_t sz, n;
if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) {
asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg);
putenv(dbgvalue);
}
if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule))) {
/* convert the list to an array of pmix_info_t */
if (NULL != info) {
sz = opal_list_get_size(info);
PMIX_INFO_CREATE(pinfo, sz);
n = 0;
OPAL_LIST_FOREACH(kv, info, opal_value_t) {
(void)strncpy(pinfo[n].key, kv->key, PMIX_MAX_KEYLEN);
pmix1_value_load(&pinfo[n].value, kv);
++n;
}
} else {
sz = 0;
pinfo = NULL;
}
if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, pinfo, sz))) {
PMIX_INFO_FREE(pinfo, sz);
return pmix1_convert_rc(rc);
}
PMIX_INFO_FREE(pinfo, sz);
/* record the host module */
host_module = module;
@ -225,6 +247,22 @@ int pmix1_server_register_nspace(opal_jobid_t jobid,
return pmix1_convert_rc(rc);
}
void pmix1_server_deregister_nspace(opal_jobid_t jobid)
{
opal_pmix1_jobid_trkr_t *jptr;
/* if we don't already have it, we can ignore this */
OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) {
if (jptr->jobid == jobid) {
/* found it - tell the server to deregister */
PMIx_server_deregister_nspace(jptr->nspace);
/* now get rid of it from our list */
opal_list_remove_item(&mca_pmix_pmix1xx_component.jobids, &jptr->super);
OBJ_RELEASE(jptr);
return;
}
}
}
int pmix1_server_register_client(const opal_process_name_t *proc,
uid_t uid, gid_t gid,
@ -252,6 +290,23 @@ int pmix1_server_register_client(const opal_process_name_t *proc,
return pmix1_convert_rc(rc);
}
void pmix1_server_deregister_client(const opal_process_name_t *proc)
{
opal_pmix1_jobid_trkr_t *jptr;
pmix_proc_t p;
/* if we don't already have it, we can ignore this */
OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) {
if (jptr->jobid == proc->jobid) {
/* found it - tell the server to deregister */
(void)strncpy(p.nspace, jptr->nspace, PMIX_MAX_NSLEN);
p.rank = proc->vpid;
PMIx_server_deregister_client(&p);
return;
}
}
}
int pmix1_server_setup_fork(const opal_process_name_t *proc, char ***env)
{

Просмотреть файл

@ -49,47 +49,49 @@ static void pmix1_register_jobid(opal_jobid_t jobid, const char *nspace);
const opal_pmix_base_module_t opal_pmix_pmix1xx_module = {
/* client APIs */
pmix1_client_init,
pmix1_client_finalize,
pmix1_initialized,
pmix1_abort,
pmix1_commit,
pmix1_fence,
pmix1_fencenb,
pmix1_put,
pmix1_get,
pmix1_getnb,
pmix1_publish,
pmix1_publishnb,
pmix1_lookup,
pmix1_lookupnb,
pmix1_unpublish,
pmix1_unpublishnb,
pmix1_spawn,
pmix1_spawnnb,
pmix1_connect,
pmix1_connectnb,
pmix1_disconnect,
pmix1_disconnectnb,
pmix1_resolve_peers,
pmix1_resolve_nodes,
.init = pmix1_client_init,
.finalize = pmix1_client_finalize,
.initialized = pmix1_initialized,
.abort = pmix1_abort,
.commit = pmix1_commit,
.fence = pmix1_fence,
.fence_nb = pmix1_fencenb,
.put = pmix1_put,
.get = pmix1_get,
.get_nb = pmix1_getnb,
.publish = pmix1_publish,
.publish_nb = pmix1_publishnb,
.lookup = pmix1_lookup,
.lookup_nb = pmix1_lookupnb,
.unpublish = pmix1_unpublish,
.unpublish_nb = pmix1_unpublishnb,
.spawn = pmix1_spawn,
.spawn_nb = pmix1_spawnnb,
.connect = pmix1_connect,
.connect_nb = pmix1_connectnb,
.disconnect = pmix1_disconnect,
.disconnect_nb = pmix1_disconnectnb,
.resolve_peers = pmix1_resolve_peers,
.resolve_nodes = pmix1_resolve_nodes,
/* server APIs */
pmix1_server_init,
pmix1_server_finalize,
pmix1_server_gen_regex,
pmix1_server_gen_ppn,
pmix1_server_register_nspace,
pmix1_server_register_client,
pmix1_server_setup_fork,
pmix1_server_dmodex,
pmix1_server_notify_error,
.server_init = pmix1_server_init,
.server_finalize = pmix1_server_finalize,
.generate_regex = pmix1_server_gen_regex,
.generate_ppn = pmix1_server_gen_ppn,
.server_register_nspace = pmix1_server_register_nspace,
.server_deregister_nspace = pmix1_server_deregister_nspace,
.server_register_client = pmix1_server_register_client,
.server_deregister_client = pmix1_server_deregister_client,
.server_setup_fork = pmix1_server_setup_fork,
.server_dmodex_request = pmix1_server_dmodex,
.server_notify_error = pmix1_server_notify_error,
/* utility APIs */
PMIx_Get_version,
opal_pmix_base_register_handler,
opal_pmix_base_deregister_handler,
pmix1_store_local,
pmix1_get_nspace,
pmix1_register_jobid
.get_version = PMIx_Get_version,
.register_errhandler = opal_pmix_base_register_handler,
.deregister_errhandler = opal_pmix_base_deregister_handler,
.store_local = pmix1_store_local,
.get_nspace = pmix1_get_nspace,
.register_jobid = pmix1_register_jobid
};
static const char *pmix1_get_nspace(opal_jobid_t jobid)
@ -455,7 +457,8 @@ int pmix1_value_unload(opal_value_t *kv,
case PMIX_BYTE_OBJECT:
kv->type = OPAL_BYTE_OBJECT;
if (NULL != v->data.bo.bytes && 0 < v->data.bo.size) {
kv->data.bo.bytes = (uint8_t*)v->data.bo.bytes;
kv->data.bo.bytes = (uint8_t*)malloc(v->data.bo.size);
memcpy(kv->data.bo.bytes, v->data.bo.bytes, v->data.bo.size);
kv->data.bo.size = (int)v->data.bo.size;
} else {
kv->data.bo.bytes = NULL;

Просмотреть файл

@ -53,47 +53,25 @@ static const char *s1_get_nspace(opal_jobid_t jobid);
static void s1_register_jobid(opal_jobid_t jobid, const char *nspace);
const opal_pmix_base_module_t opal_pmix_s1_module = {
s1_init,
s1_fini,
s1_initialized,
s1_abort,
s1_commit,
s1_fence,
NULL,
s1_put,
s1_get,
NULL,
s1_publish,
NULL,
s1_lookup,
NULL,
s1_unpublish,
NULL,
s1_spawn,
NULL,
s1_job_connect,
NULL,
s1_job_disconnect,
NULL,
NULL,
NULL,
/* server APIs */
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
/* utility APIs */
NULL,
opal_pmix_base_register_handler,
opal_pmix_base_deregister_handler,
s1_store_local,
s1_get_nspace,
s1_register_jobid
.init = s1_init,
.finalize = s1_fini,
.initialized = s1_initialized,
.abort = s1_abort,
.commit = s1_commit,
.fence = s1_fence,
.put = s1_put,
.get = s1_get,
.publish = s1_publish,
.lookup = s1_lookup,
.unpublish = s1_unpublish,
.spawn = s1_spawn,
.connect = s1_job_connect,
.disconnect = s1_job_disconnect,
.register_errhandler = opal_pmix_base_register_handler,
.deregister_errhandler = opal_pmix_base_deregister_handler,
.store_local = s1_store_local,
.get_nspace = s1_get_nspace,
.register_jobid = s1_register_jobid
};
// usage accounting

Просмотреть файл

@ -60,47 +60,25 @@ static const char *s2_get_nspace(opal_jobid_t jobid);
static void s2_register_jobid(opal_jobid_t jobid, const char *nspace);
const opal_pmix_base_module_t opal_pmix_s2_module = {
s2_init,
s2_fini,
s2_initialized,
s2_abort,
s2_commit,
s2_fence,
NULL,
s2_put,
s2_get,
NULL,
s2_publish,
NULL,
s2_lookup,
NULL,
s2_unpublish,
NULL,
s2_spawn,
NULL,
s2_job_connect,
NULL,
s2_job_disconnect,
NULL,
NULL,
NULL,
/* server APIs */
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
/* utility APIs */
NULL,
opal_pmix_base_register_handler,
opal_pmix_base_deregister_handler,
s2_store_local,
s2_get_nspace,
s2_register_jobid
.init = s2_init,
.finalize = s2_fini,
.initialized = s2_initialized,
.abort = s2_abort,
.commit = s2_commit,
.fence = s2_fence,
.put = s2_put,
.get = s2_get,
.publish = s2_publish,
.lookup = s2_lookup,
.unpublish = s2_unpublish,
.spawn = s2_spawn,
.connect = s2_job_connect,
.disconnect = s2_job_disconnect,
.register_errhandler = opal_pmix_base_register_handler,
.deregister_errhandler = opal_pmix_base_deregister_handler,
.store_local = s2_store_local,
.get_nspace = s2_get_nspace,
.register_jobid = s2_register_jobid
};
// usage accounting

Просмотреть файл

@ -1,6 +1,6 @@
/*
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -15,6 +15,7 @@
#include "opal/class/opal_list.h"
#include "opal/mca/event/event.h"
#include "opal/mca/pmix/pmix.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/errmgr/errmgr.h"
@ -709,6 +710,10 @@ void orte_state_base_check_all_complete(int fd, short args, void *cbdata)
}
OBJ_RELEASE(map);
jdata->map = NULL;
/* tell the PMIx server to release its data */
if (NULL != opal_pmix.server_deregister_nspace) {
opal_pmix.server_deregister_nspace(jdata->jobid);
}
}
CHECK_ALIVE:

Просмотреть файл

@ -16,6 +16,7 @@
#include <string.h>
#include "opal/util/output.h"
#include "opal/mca/pmix/pmix.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/filem/filem.h"
@ -381,6 +382,10 @@ void check_complete(int fd, short args, void *cbdata)
}
OBJ_RELEASE(map);
jdata->map = NULL;
/* tell the PMIx server to release its data */
if (NULL != opal_pmix.server_deregister_nspace) {
opal_pmix.server_deregister_nspace(jdata->jobid);
}
}
CHECK_ALIVE:

Просмотреть файл

@ -180,6 +180,7 @@ static void eviction_cbfunc(struct opal_hotel_t *hotel,
int pmix_server_init(void)
{
int rc;
opal_list_t info;
if (orte_pmix_server_globals.initialized) {
return ORTE_SUCCESS;
@ -215,11 +216,32 @@ int pmix_server_init(void)
/* ensure the PMIx server uses the proper rendezvous directory */
opal_setenv("PMIX_SERVER_TMPDIR", orte_process_info.proc_session_dir, true, &environ);
/* pass the server the local topology - we do this so the procs won't read the
* topology themselves as this could overwhelm the local
* system on large-scale SMPs */
OBJ_CONSTRUCT(&info, opal_list_t);
if (NULL != opal_hwloc_topology) {
char *xmlbuffer=NULL;
int len;
opal_value_t *kv;
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_LOCAL_TOPO);
if (0 != hwloc_topology_export_xmlbuffer(opal_hwloc_topology, &xmlbuffer, &len)) {
OBJ_RELEASE(kv);
OBJ_DESTRUCT(&info);
return ORTE_ERROR;
}
kv->data.string = xmlbuffer;
kv->type = OPAL_STRING;
opal_list_append(&info, &kv->super);
}
/* setup the local server */
if (ORTE_SUCCESS != (rc = opal_pmix.server_init(&pmix_server))) {
if (ORTE_SUCCESS != (rc = opal_pmix.server_init(&pmix_server, &info))) {
ORTE_ERROR_LOG(rc);
/* memory cleanup will occur when finalize is called */
}
OPAL_LIST_DESTRUCT(&info);
/* if the universal server wasn't specified, then we use
* our own HNP for that purpose */

Просмотреть файл

@ -103,23 +103,6 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata)
uid = geteuid();
gid = getegid();
/* local topology - we do this so the procs won't read the
* topology themselves as this could overwhelm the local
* system on large-scale SMPs */
if (NULL != opal_hwloc_topology) {
char *xmlbuffer=NULL;
int len;
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_LOCAL_TOPO);
if (0 != hwloc_topology_export_xmlbuffer(opal_hwloc_topology, &xmlbuffer, &len)) {
OBJ_RELEASE(kv);
return OPAL_ERROR;
}
kv->data.string = xmlbuffer;
kv->type = OPAL_STRING;
opal_list_append(info, &kv->super);
}
/* jobid */
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_JOBID);
@ -262,6 +245,7 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata)
if (orte_get_attribute(&pptr->attributes, ORTE_PROC_CPU_BITMAP, (void**)&tmp, OPAL_STRING)) {
if (NULL != tmp) {
opal_argv_append_nosize(&procs, tmp);
free(tmp);
} else {
opal_argv_append_nosize(&procs, "UNBOUND");
}