diff --git a/.gitignore b/.gitignore index 445b99ac82..8efe944358 100644 --- a/.gitignore +++ b/.gitignore @@ -301,7 +301,6 @@ opal/mca/hwloc/hwloc*/hwloc/include/private/autogen/config.h opal/mca/installdirs/config/install_dirs.h opal/mca/pmix/pmix1xx/pmix/include/pmix/autogen/config.h -opal/mca/pmix/pmix1xx/pmix/include/pmix/pmix_common.h opal/mca/pmix/pmix1xx/pmix/include/private/autogen/config.h opal/mca/pmix/pmix1xx/pmix/include/private/autogen/config.h.in diff --git a/opal/mca/pmix/pmix.h b/opal/mca/pmix/pmix.h index 722352980d..64c7895d59 100644 --- a/opal/mca/pmix/pmix.h +++ b/opal/mca/pmix/pmix.h @@ -538,8 +538,11 @@ typedef int (*opal_pmix_base_module_resolve_nodes_fn_t)(opal_jobid_t jobid, char * SERVER APIs * ************************************************************/ -/* Initialize the server support library */ -typedef int (*opal_pmix_base_module_server_init_fn_t)(opal_pmix_server_module_t *module); +/* Initialize the server support library - must pass the callback + * module for the server to use, plus any attributes we want to + * pass down to it */ +typedef int (*opal_pmix_base_module_server_init_fn_t)(opal_pmix_server_module_t *module, + opal_list_t *info); /* Finalize the server support library */ typedef int (*opal_pmix_base_module_server_finalize_fn_t)(void); @@ -606,6 +609,13 @@ typedef int (*opal_pmix_base_module_server_register_nspace_fn_t)(opal_jobid_t jo opal_pmix_op_cbfunc_t cbfunc, void *cbdata); +/* Deregister an nspace. Instruct the PMIx server to purge + * all info relating to the provided jobid so that memory + * can be freed. Note that the server will automatically + * purge all info relating to any clients it has from + * this nspace */ +typedef void (*opal_pmix_base_module_server_deregister_nspace_fn_t)(opal_jobid_t jobid); + /* Register a client process with the PMIx server library. The * expected user ID and group ID of the child process helps the * server library to properly authenticate clients as they connect @@ -625,6 +635,15 @@ typedef int (*opal_pmix_base_module_server_register_client_fn_t)(const opal_proc opal_pmix_op_cbfunc_t cbfunc, void *cbdata); +/* Deregister a client. Instruct the PMIx server to purge + * all info relating to the provided client so that memory + * can be freed. As per above note, the server will automatically + * free all client-related data when the nspace is deregistered, + * so there is no need to call this function during normal + * finalize operations. Instead, this is provided for use + * during exception operations */ +typedef void (*opal_pmix_base_module_server_deregister_client_fn_t)(const opal_process_name_t *proc); + /* Setup the environment of a child process to be forked * by the host so it can correctly interact with the PMIx * server. The PMIx client needs some setup information @@ -725,47 +744,49 @@ typedef void (*opal_pmix_base_module_register_jobid_fn_t)(opal_jobid_t jobid, co */ typedef struct { /* client APIs */ - opal_pmix_base_module_init_fn_t init; - opal_pmix_base_module_fini_fn_t finalize; - opal_pmix_base_module_initialized_fn_t initialized; - opal_pmix_base_module_abort_fn_t abort; - opal_pmix_base_module_commit_fn_t commit; - opal_pmix_base_module_fence_fn_t fence; - opal_pmix_base_module_fence_nb_fn_t fence_nb; - opal_pmix_base_module_put_fn_t put; - opal_pmix_base_module_get_fn_t get; - opal_pmix_base_module_get_nb_fn_t get_nb; - opal_pmix_base_module_publish_fn_t publish; - opal_pmix_base_module_publish_nb_fn_t publish_nb; - opal_pmix_base_module_lookup_fn_t lookup; - opal_pmix_base_module_lookup_nb_fn_t lookup_nb; - opal_pmix_base_module_unpublish_fn_t unpublish; - opal_pmix_base_module_unpublish_nb_fn_t unpublish_nb; - opal_pmix_base_module_spawn_fn_t spawn; - opal_pmix_base_module_spawn_nb_fn_t spawn_nb; - opal_pmix_base_module_connect_fn_t connect; - opal_pmix_base_module_connect_nb_fn_t connect_nb; - opal_pmix_base_module_disconnect_fn_t disconnect; - opal_pmix_base_module_disconnect_nb_fn_t disconnect_nb; - opal_pmix_base_module_resolve_peers_fn_t resolve_peers; - opal_pmix_base_module_resolve_nodes_fn_t resolve_nodes; + opal_pmix_base_module_init_fn_t init; + opal_pmix_base_module_fini_fn_t finalize; + opal_pmix_base_module_initialized_fn_t initialized; + opal_pmix_base_module_abort_fn_t abort; + opal_pmix_base_module_commit_fn_t commit; + opal_pmix_base_module_fence_fn_t fence; + opal_pmix_base_module_fence_nb_fn_t fence_nb; + opal_pmix_base_module_put_fn_t put; + opal_pmix_base_module_get_fn_t get; + opal_pmix_base_module_get_nb_fn_t get_nb; + opal_pmix_base_module_publish_fn_t publish; + opal_pmix_base_module_publish_nb_fn_t publish_nb; + opal_pmix_base_module_lookup_fn_t lookup; + opal_pmix_base_module_lookup_nb_fn_t lookup_nb; + opal_pmix_base_module_unpublish_fn_t unpublish; + opal_pmix_base_module_unpublish_nb_fn_t unpublish_nb; + opal_pmix_base_module_spawn_fn_t spawn; + opal_pmix_base_module_spawn_nb_fn_t spawn_nb; + opal_pmix_base_module_connect_fn_t connect; + opal_pmix_base_module_connect_nb_fn_t connect_nb; + opal_pmix_base_module_disconnect_fn_t disconnect; + opal_pmix_base_module_disconnect_nb_fn_t disconnect_nb; + opal_pmix_base_module_resolve_peers_fn_t resolve_peers; + opal_pmix_base_module_resolve_nodes_fn_t resolve_nodes; /* server APIs */ - opal_pmix_base_module_server_init_fn_t server_init; - opal_pmix_base_module_server_finalize_fn_t server_finalize; - opal_pmix_base_module_generate_regex_fn_t generate_regex; - opal_pmix_base_module_generate_ppn_fn_t generate_ppn; - opal_pmix_base_module_server_register_nspace_fn_t server_register_nspace; - opal_pmix_base_module_server_register_client_fn_t server_register_client; - opal_pmix_base_module_server_setup_fork_fn_t server_setup_fork; - opal_pmix_base_module_server_dmodex_request_fn_t server_dmodex_request; - opal_pmix_base_module_server_notify_error_fn_t server_notify_error; + opal_pmix_base_module_server_init_fn_t server_init; + opal_pmix_base_module_server_finalize_fn_t server_finalize; + opal_pmix_base_module_generate_regex_fn_t generate_regex; + opal_pmix_base_module_generate_ppn_fn_t generate_ppn; + opal_pmix_base_module_server_register_nspace_fn_t server_register_nspace; + opal_pmix_base_module_server_deregister_nspace_fn_t server_deregister_nspace; + opal_pmix_base_module_server_register_client_fn_t server_register_client; + opal_pmix_base_module_server_deregister_client_fn_t server_deregister_client; + opal_pmix_base_module_server_setup_fork_fn_t server_setup_fork; + opal_pmix_base_module_server_dmodex_request_fn_t server_dmodex_request; + opal_pmix_base_module_server_notify_error_fn_t server_notify_error; /* Utility APIs */ - opal_pmix_base_module_get_version_fn_t get_version; - opal_pmix_base_module_register_fn_t register_errhandler; - opal_pmix_base_module_deregister_fn_t deregister_errhandler; - opal_pmix_base_module_store_fn_t store_local; - opal_pmix_base_module_get_nspace_fn_t get_nspace; - opal_pmix_base_module_register_jobid_fn_t register_jobid; + opal_pmix_base_module_get_version_fn_t get_version; + opal_pmix_base_module_register_fn_t register_errhandler; + opal_pmix_base_module_deregister_fn_t deregister_errhandler; + opal_pmix_base_module_store_fn_t store_local; + opal_pmix_base_module_get_nspace_fn_t get_nspace; + opal_pmix_base_module_register_jobid_fn_t register_jobid; } opal_pmix_base_module_t; typedef struct { diff --git a/opal/mca/pmix/pmix1xx/pmix/Makefile.am b/opal/mca/pmix/pmix1xx/pmix/Makefile.am index 3cc7e270c8..578676adc9 100644 --- a/opal/mca/pmix/pmix1xx/pmix/Makefile.am +++ b/opal/mca/pmix/pmix1xx/pmix/Makefile.am @@ -37,6 +37,11 @@ endif man_MANS = \ man/man3/pmix_init.3 \ + man/man3/pmix_finalize.3 \ + man/man3/pmix_initialized.3 \ + man/man3/pmix_abort.3 \ + man/man3/pmix_put.3 \ + man/man3/pmix_commit.3 \ man/man7/pmix.7 \ man/man7/pmix_constants.7 @@ -59,6 +64,8 @@ libpmix_la_LDFLAGS = -version-info $(libpmix_so_version) if ! PMIX_EMBEDDED_MODE SUBDIRS = . test examples +pmixdir = $(pmixincludedir)/$(subdir) +nobase_pmix_HEADERS = $(headers) endif nroff: diff --git a/opal/mca/pmix/pmix1xx/pmix/VERSION b/opal/mca/pmix/pmix1xx/pmix/VERSION index 5fac8bdaf6..d2bc728ca0 100644 --- a/opal/mca/pmix/pmix1xx/pmix/VERSION +++ b/opal/mca/pmix/pmix1xx/pmix/VERSION @@ -30,7 +30,7 @@ greek=a1 # command, or with the date (if "git describe" fails) in the form of # "date". -repo_rev=git69c398e +repo_rev=gita4d7e07 # If tarball_version is not empty, it is used as the version string in # the tarball filename, regardless of all other versions listed in @@ -44,7 +44,7 @@ tarball_version= # The date when this release was created -date="Oct 09, 2015" +date="Nov 06, 2015" # The shared library version of each of PMIx's public libraries. # These versions are maintained in accordance with the "Library diff --git a/opal/mca/pmix/pmix1xx/pmix/config/pmix.m4 b/opal/mca/pmix/pmix1xx/pmix/config/pmix.m4 index b415b71870..4c2e757f4b 100644 --- a/opal/mca/pmix/pmix1xx/pmix/config/pmix.m4 +++ b/opal/mca/pmix/pmix1xx/pmix/config/pmix.m4 @@ -105,7 +105,6 @@ AC_DEFUN([PMIX_SETUP_CORE],[ # replaced, not the entire file. AC_CONFIG_HEADERS(pmix_config_prefix[include/private/autogen/config.h]) AC_CONFIG_HEADERS(pmix_config_prefix[include/pmix/autogen/config.h]) - AC_CONFIG_HEADERS(pmix_config_prefix[include/pmix/pmix_common.h]) # What prefix are we using? AC_MSG_CHECKING([for pmix symbol prefix]) diff --git a/opal/mca/pmix/pmix1xx/pmix/examples/server.c b/opal/mca/pmix/pmix1xx/pmix/examples/server.c index 12b7bcaa8e..f765cdef53 100644 --- a/opal/mca/pmix/pmix1xx/pmix/examples/server.c +++ b/opal/mca/pmix/pmix1xx/pmix/examples/server.c @@ -173,7 +173,7 @@ int main(int argc, char **argv) fprintf(stderr, "Running version %s\n", PMIx_Get_version()); /* setup the server library */ - if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule))) { + if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, NULL, 0))) { fprintf(stderr, "Init failed with error %d\n", rc); return rc; } diff --git a/opal/mca/pmix/pmix1xx/pmix/include/Makefile.am b/opal/mca/pmix/pmix1xx/pmix/include/Makefile.am index 360a1a62a3..fba806a158 100644 --- a/opal/mca/pmix/pmix1xx/pmix/include/Makefile.am +++ b/opal/mca/pmix/pmix1xx/pmix/include/Makefile.am @@ -18,11 +18,19 @@ include_HEADERS = \ include/pmix_server.h \ include/pmi.h \ include/pmi2.h + +headers += \ + include/private/align.h \ + include/private/hash_string.h \ + include/private/pmix_socket_errno.h \ + include/private/pmix_stdint.h \ + include/private/prefetch.h \ + include/private/types.h \ + include/private/autogen/config.h include_pmixdir = $(includedir)/pmix include_pmix_HEADERS = \ - include/pmix/rename.h -nodist_include_pmix_HEADERS = \ - include/pmix/pmix_common.h + include/pmix/rename.h \ + include/pmix/pmix_common.h include_pmix_autogendir = $(includedir)/pmix/autogen include_pmix_autogen_HEADERS = \ @@ -31,12 +39,4 @@ include_pmix_autogen_HEADERS = \ nodist_include_pmix_autogen_HEADERS = \ include/pmix/autogen/config.h -noinst_HEADERS = \ - include/private/align.h \ - include/private/hash_string.h \ - include/private/pmix_socket_errno.h \ - include/private/pmix_stdint.h \ - include/private/prefetch.h \ - include/private/types.h - endif ! PMIX_EMBEDDED_MODE diff --git a/opal/mca/pmix/pmix1xx/pmix/include/pmix/autogen/config.h.in b/opal/mca/pmix/pmix1xx/pmix/include/pmix/autogen/config.h.in index a68887ff2c..b6f5637640 100644 --- a/opal/mca/pmix/pmix1xx/pmix/include/pmix/autogen/config.h.in +++ b/opal/mca/pmix/pmix1xx/pmix/include/pmix/autogen/config.h.in @@ -69,7 +69,7 @@ /* Maybe before gcc 2.95 too */ #ifdef PMIX_HAVE_ATTRIBUTE_UNUSED -#define __PMIX_HAVE_ATTRIBUTE_UNUSED PMIX_HAVE_ATTRIBUTE_UNUSED +#define __PMIX_HAVE_ATTRIBUTE_UNUSED PMIX_HAVE_ATTRIBUTE_UNUSED #elif defined(__GNUC__) # define __PMIX_HAVE_ATTRIBUTE_UNUSED (GXX_ABOVE_3_4 || GCC_ABOVE_2_95) #else @@ -82,7 +82,7 @@ #endif #ifdef PMIX_HAVE_ATTRIBUTE_MALLOC -#define __PMIX_HAVE_ATTRIBUTE_MALLOC PMIX_HAVE_ATTRIBUTE_MALLOC +#define __PMIX_HAVE_ATTRIBUTE_MALLOC PMIX_HAVE_ATTRIBUTE_MALLOC #elif defined(__GNUC__) # define __PMIX_HAVE_ATTRIBUTE_MALLOC (GXX_ABOVE_3_4 || GCC_ABOVE_2_96) #else @@ -95,7 +95,7 @@ #endif #ifdef PMIX_HAVE_ATTRIBUTE_CONST -#define __PMIX_HAVE_ATTRIBUTE_CONST PMIX_HAVE_ATTRIBUTE_CONST +#define __PMIX_HAVE_ATTRIBUTE_CONST PMIX_HAVE_ATTRIBUTE_CONST #elif defined(__GNUC__) # define __PMIX_HAVE_ATTRIBUTE_CONST (GXX_ABOVE_3_4 || GCC_ABOVE_2_95) #else @@ -108,7 +108,7 @@ #endif #ifdef PMIX_HAVE_ATTRIBUTE_PURE -#define __PMIX_HAVE_ATTRIBUTE_PURE PMIX_HAVE_ATTRIBUTE_PURE +#define __PMIX_HAVE_ATTRIBUTE_PURE PMIX_HAVE_ATTRIBUTE_PURE #elif defined(__GNUC__) # define __PMIX_HAVE_ATTRIBUTE_PURE (GXX_ABOVE_3_4 || GCC_ABOVE_2_96) #else @@ -121,7 +121,7 @@ #endif #ifdef PMIX_HAVE_ATTRIBUTE_DEPRECATED -#define __PMIX_HAVE_ATTRIBUTE_DEPRECATED PMIX_HAVE_ATTRIBUTE_DEPRECATED +#define __PMIX_HAVE_ATTRIBUTE_DEPRECATED PMIX_HAVE_ATTRIBUTE_DEPRECATED #elif defined(__GNUC__) # define __PMIX_HAVE_ATTRIBUTE_DEPRECATED (GXX_ABOVE_3_4 || GCC_ABOVE_3_3) #else @@ -178,6 +178,12 @@ /* The pmix symbol prefix in all caps */ #undef PMIX_SYM_PREFIX_CAPS +/* ensure we have the version info available for external users */ +#undef PMIX_MAJOR_VERSION +#undef PMIX_MINOR_VERSION +#undef PMIX_RELEASE_VERSION + + #undef BEGIN_C_DECLS #undef END_C_DECLS #if defined(c_plusplus) || defined(__cplusplus) diff --git a/opal/mca/pmix/pmix1xx/pmix/include/pmix/pmix_common.h.in b/opal/mca/pmix/pmix1xx/pmix/include/pmix/pmix_common.h similarity index 95% rename from opal/mca/pmix/pmix1xx/pmix/include/pmix/pmix_common.h.in rename to opal/mca/pmix/pmix1xx/pmix/include/pmix/pmix_common.h index 5a111a1c40..b671ca87bd 100644 --- a/opal/mca/pmix/pmix1xx/pmix/include/pmix/pmix_common.h.in +++ b/opal/mca/pmix/pmix1xx/pmix/include/pmix/pmix_common.h @@ -1,3 +1,4 @@ +/* include/pmix/pmix_common.h. Generated from pmix_common.h.in by configure. */ /* * Copyright (c) 2013-2014 Intel, Inc. All rights reserved * @@ -55,25 +56,6 @@ #include /* for struct timeval */ #endif -#ifndef PMIX_CONFIG_H - -/* ensure we have the version info available for external users */ -#undef PMIX_MAJOR_VERSION -#undef PMIX_MINOR_VERSION -#undef PMIX_RELEASE_VERSION - -#endif - -#undef BEGIN_C_DECLS -#undef END_C_DECLS -#if defined(c_plusplus) || defined(__cplusplus) -# define BEGIN_C_DECLS extern "C" { -# define END_C_DECLS } -#else -#define BEGIN_C_DECLS /* empty */ -#define END_C_DECLS /* empty */ -#endif - BEGIN_C_DECLS /**** PMIX CONSTANTS ****/ @@ -418,8 +400,29 @@ typedef struct { /* release the memory in the value struct data field */ #define PMIX_VALUE_DESTRUCT(m) \ do { \ - if (PMIX_STRING == (m)->type && NULL != (m)->data.string) { \ - free((m)->data.string); \ + if (PMIX_STRING == (m)->type) { \ + if (NULL != (m)->data.string) { \ + free((m)->data.string); \ + } \ + } else if (PMIX_BYTE_OBJECT == (m)->type) { \ + if (NULL != (m)->data.bo.bytes) { \ + free((m)->data.bo.bytes); \ + } \ + } else if (PMIX_INFO_ARRAY == (m)->type) { \ + size_t _n; \ + pmix_info_t *_p = (pmix_info_t*)((m)->data.array.array); \ + for (_n=0; _n < (m)->data.array.size; _n++) { \ + if (PMIX_STRING == _p[_n].value.type) { \ + if (NULL != _p[_n].value.data.string) { \ + free(_p[_n].value.data.string); \ + } \ + } else if (PMIX_BYTE_OBJECT == _p[_n].value.type) { \ + if (NULL != _p[_n].value.data.bo.bytes) { \ + free(_p[_n].value.data.bo.bytes); \ + } \ + } \ + } \ + free(_p); \ } \ } while(0); diff --git a/opal/mca/pmix/pmix1xx/pmix/include/pmix_server.h b/opal/mca/pmix/pmix1xx/pmix/include/pmix_server.h index 6c8a1d61b3..984e00709b 100644 --- a/opal/mca/pmix/pmix1xx/pmix/include/pmix_server.h +++ b/opal/mca/pmix/pmix1xx/pmix/include/pmix_server.h @@ -308,8 +308,13 @@ typedef struct pmix_server_module_1_0_0_t { /* Initialize the server support library, and provide a * pointer to a pmix_server_module_t structure - * containing the caller's callback functions */ -pmix_status_t PMIx_server_init(pmix_server_module_t *module); + * containing the caller's callback functions. The + * array of pmix_info_t structs is used to pass + * additional info that may be required by the server + * when initializing - e.g., a user/group ID to set + * on the rendezvous file for the Unix Domain Socket */ +pmix_status_t PMIx_server_init(pmix_server_module_t *module, + pmix_info_t info[], size_t ninfo); /* Finalize the server support library. If internal comm is * in-use, the server will shut it down at this time. All @@ -376,6 +381,13 @@ pmix_status_t PMIx_server_register_nspace(const char nspace[], int nlocalprocs, pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata); +/* Deregister an nspace and purge all objects relating to + * it, including any client info from that nspace. This is + * intended to support persistent PMIx servers by providing + * an opportunity for the host RM to tell the PMIx server + * library to release all memory for a completed job */ +void PMIx_server_deregister_nspace(const char nspace[]); + /* Register a client process with the PMIx server library. The * expected user ID and group ID of the child process helps the * server library to properly authenticate clients as they connect @@ -394,6 +406,12 @@ pmix_status_t PMIx_server_register_client(const pmix_proc_t *proc, void *server_object, pmix_op_cbfunc_t cbfunc, void *cbdata); +/* Deregister a client and purge all data relating to it. The + * deregister_nspace API will automatically delete all client + * info for that nspace - this API is therefore intended solely + * for use in exception cases */ +void PMIx_server_deregister_client(const pmix_proc_t *proc); + /* Setup the environment of a child process to be forked * by the host so it can correctly interact with the PMIx * server. The PMIx client needs some setup information diff --git a/opal/mca/pmix/pmix1xx/pmix/man/man3/pmix_abort.3 b/opal/mca/pmix/pmix1xx/pmix/man/man3/pmix_abort.3 new file mode 100644 index 0000000000..e18b0d8dfa --- /dev/null +++ b/opal/mca/pmix/pmix1xx/pmix/man/man3/pmix_abort.3 @@ -0,0 +1,62 @@ +.TH "pmix_abort" "3" "2015\-10\-25" "PMIx Programmer\[aq]s Manual" "\@VERSION\@" +.SH NAME +.PP +PMIx_Abort \- Abort the specified processes +.SH SYNOPSIS +.IP +.nf +\f[C] +#include\ + +pmix_status_t\ PMIx_Abort(int\ status,\ const\ char\ msg[], +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ pmix_proc_t\ procs[],\ size_t\ nprocs); +\f[] +.fi +.SH ARGUMENTS +.PP +\f[I]status\f[] : Status value to be returned. +A value of zero is permitted by PMIx, but may not be returned by some +resource managers. +.PP +\f[I]msg\f[] : A string message to be displayed +.PP +\f[I]procs\f[] : An array of pmix_proc_t structures defining the +processes to be aborted. +A \f[I]NULL\f[] for the proc array indicates that all processes in the +caller\[aq]s nspace are to be aborted. +A wildcard value for the rank in any structure indicates that all +processes in that nspace are to be aborted. +.PP +\f[I]nprocs\f[] : Number of pmix_proc_t structures in the \f[I]procs\f[] +array +.SH DESCRIPTION +.PP +Request that the provided array of procs be aborted, returning the +provided \f[I]status\f[] and printing the provided message. +A \f[I]NULL\f[] for the proc array indicates that all processes in the +caller\[aq]s nspace are to be aborted. +.PP +The response to this request is somewhat dependent on the specific +resource manager and its configuration (e.g., some resource managers +will not abort the application if the provided \f[I]status\f[] is zero +unless specifically configured to do so), and thus lies outside the +control of PMIx itself. +However, the client will inform the RM of the request that the +application be aborted, regardless of the value of the provided +\f[I]status\f[]. +.PP +Passing a \f[I]NULL\f[] msg parameter is allowed. +Note that race conditions caused by multiple processes calling +PMIx_Abort are left to the server implementation to resolve with regard +to which status is returned and what messages (if any) are printed. +.SH RETURN VALUE +.PP +Returns PMIX_SUCCESS on success. +On error, a negative value corresponding to a PMIx errno is returned. +.SH ERRORS +.PP +PMIx errno values are defined in \f[C]pmix_common.h\f[]. +.SH NOTES +.SH SEE ALSO +.SH AUTHORS +PMIx. diff --git a/opal/mca/pmix/pmix1xx/pmix/man/man3/pmix_commit.3 b/opal/mca/pmix/pmix1xx/pmix/man/man3/pmix_commit.3 new file mode 100644 index 0000000000..ad3e3f8613 --- /dev/null +++ b/opal/mca/pmix/pmix1xx/pmix/man/man3/pmix_commit.3 @@ -0,0 +1,35 @@ +.TH "pmix_commit" "3" "2015\-10\-27" "PMIx Programmer\[aq]s Manual" "\@VERSION\@" +.SH NAME +.PP +PMIx_Commit \- Push all previously \f[I]PMIx\f[]Put_ values to the local +PMIx server. +.SH SYNOPSIS +.IP +.nf +\f[C] +#include\ + +pmix_status_t\ PMIx_Commit(void); +\f[] +.fi +.SH ARGUMENTS +.PP +\f[I]none\f[] +.SH DESCRIPTION +.PP +This is an asynchronous operation \- the library will immediately return +to the caller while the data is transmitted to the local server in the +background +.SH RETURN VALUE +.PP +Returns PMIX_SUCCESS on success. +On error, a negative value corresponding to a PMIx errno is returned. +.SH ERRORS +.PP +PMIx errno values are defined in \f[C]pmix_common.h\f[]. +.SH NOTES +.SH SEE ALSO +.PP +\f[C]PMIx_Put\f[](3) +.SH AUTHORS +PMIx. diff --git a/opal/mca/pmix/pmix1xx/pmix/man/man3/pmix_finalize.3 b/opal/mca/pmix/pmix1xx/pmix/man/man3/pmix_finalize.3 new file mode 100644 index 0000000000..46593089fd --- /dev/null +++ b/opal/mca/pmix/pmix1xx/pmix/man/man3/pmix_finalize.3 @@ -0,0 +1,31 @@ +.TH "pmix_finalize" "3" "2015\-10\-27" "PMIx Programmer\[aq]s Manual" "\@VERSION\@" +.SH NAME +.PP +PMIx_Finalize \- Finalize the PMIx Client +.SH SYNOPSIS +.IP +.nf +\f[C] +#include\ + +pmix_status_t\ PMIx_Finalize(void); +\f[] +.fi +.SH ARGUMENTS +.SH DESCRIPTION +.PP +Finalize the PMIx client, closing the connection with the local PMIx +server. +.SH RETURN VALUE +.PP +Returns PMIX_SUCCESS on success. +On error, a negative value corresponding to a PMIx errno is returned. +.SH ERRORS +.PP +PMIx errno values are defined in \f[C]pmix_common.h\f[]. +.SH NOTES +.SH SEE ALSO +.PP +\f[C]PMIx_Init\f[](3) +.SH AUTHORS +PMIx. diff --git a/opal/mca/pmix/pmix1xx/pmix/man/man3/pmix_init.3 b/opal/mca/pmix/pmix1xx/pmix/man/man3/pmix_init.3 index 73708df289..3a8486c68c 100644 --- a/opal/mca/pmix/pmix1xx/pmix/man/man3/pmix_init.3 +++ b/opal/mca/pmix/pmix1xx/pmix/man/man3/pmix_init.3 @@ -1,4 +1,4 @@ -.TH "pmix_init" "3" "2015\-09\-09" "PMIx Programmer\[aq]s Manual" "\@VERSION\@" +.TH "pmix_init" "3" "2015\-10\-25" "PMIx Programmer\[aq]s Manual" "\@VERSION\@" .SH NAME .PP PMIx_Init \- Initialize the PMIx Client @@ -13,7 +13,8 @@ pmix_status_t\ PMIx_Init(pmix_proc_t\ *proc); .fi .SH ARGUMENTS .PP -\f[I]proc\f[] : Fabric endpoint on which to initiate atomic operation. +\f[I]proc\f[] : Pointer to a pmix_proc_t object in which the +client\[aq]s namespace and rank are to be returned. .SH DESCRIPTION .PP Initialize the PMIx client, returning the process identifier assigned to @@ -35,67 +36,14 @@ Note that the PMIx client library is referenced counted, and so multiple calls to PMIx_Init are allowed. Thus, one way to obtain the namespace and rank of the process is to simply call PMIx_Init with a non\-NULL parameter. -.SS Atomic Data Types -.PP -Atomic functions may operate on one of the following identified data -types. -A given atomic function may support any datatype, subject to provider -implementation constraints. -.PP -\f[I]FI_INT8\f[] : Signed 8\-bit integer. -.PP -\f[I]FI_UINT8\f[] : Unsigned 8\-bit integer. -.PP -\f[I]FI_INT16\f[] : Signed 16\-bit integer. -.PP -\f[I]FI_UINT16\f[] : Unsigned 16\-bit integer. -.PP -\f[I]FI_INT32\f[] : Signed 32\-bit integer. -.PP -\f[I]FI_UINT32\f[] : Unsigned 32\-bit integer. -.PP -\f[I]FI_INT64\f[] : Signed 64\-bit integer. -.PP -\f[I]FI_UINT64\f[] : Unsigned 64\-bit integer. -.PP -\f[I]FI_FLOAT\f[] : A single\-precision floating point value (IEEE 754). -.PP -\f[I]FI_DOUBLE\f[] : A double\-precision floating point value (IEEE -754). -.PP -\f[I]FI_FLOAT_COMPLEX\f[] : An ordered pair of single\-precision -floating point values (IEEE 754), with the first value representing the -real portion of a complex number and the second representing the -imaginary portion. -.PP -\f[I]FI_DOUBLE_COMPLEX\f[] : An ordered pair of double\-precision -floating point values (IEEE 754), with the first value representing the -real portion of a complex number and the second representing the -imaginary portion. -.PP -\f[I]FI_LONG_DOUBLE\f[] : A double\-extended precision floating point -value (IEEE 754). -.PP -\f[I]FI_LONG_DOUBLE_COMPLEX\f[] : An ordered pair of double\-extended -precision floating point values (IEEE 754), with the first value -representing the real portion of a complex number and the second -representing the imaginary portion. .SH RETURN VALUE .PP Returns PMIX_SUCCESS on success. On error, a negative value corresponding to a PMIx errno is returned. -PMIx errno values are defined in \f[C]pmix_common.h\f[]. .SH ERRORS .PP -\f[I]\-FI_EOPNOTSUPP\f[] : The requested atomic operation is not -supported on this endpoint. -.PP -\f[I]\-FI_EMSGSIZE\f[] : The number of atomic operations in a single -request exceeds that supported by the underlying provider. +PMIx errno values are defined in \f[C]pmix_common.h\f[]. .SH NOTES .SH SEE ALSO -.PP -\f[C]fi_getinfo\f[](3), \f[C]fi_endpoint\f[](3), \f[C]fi_domain\f[](3), -\f[C]fi_cq\f[](3), \f[C]fi_rma\f[](3) .SH AUTHORS PMIx. diff --git a/opal/mca/pmix/pmix1xx/pmix/man/man3/pmix_initialized.3 b/opal/mca/pmix/pmix1xx/pmix/man/man3/pmix_initialized.3 new file mode 100644 index 0000000000..ec35e2e007 --- /dev/null +++ b/opal/mca/pmix/pmix1xx/pmix/man/man3/pmix_initialized.3 @@ -0,0 +1,30 @@ +.TH "pmix_initialized" "3" "2015\-10\-25" "PMIx Programmer\[aq]s Manual" "\@VERSION\@" +.SH NAME +.PP +PMIx_Initialized \- Check if \f[I]PMIx\f[]Init_ has been called +.SH SYNOPSIS +.IP +.nf +\f[C] +#include\ + +int\ PMIx_Initialized(void); +\f[] +.fi +.SH ARGUMENTS +.PP +\f[I]none\f[] +.SH DESCRIPTION +.PP +Check to see if the PMIx Client library has been intialized +.SH RETURN VALUE +.PP +Returns \f[I]true\f[] if the PMIx Client has been initialized, and +\f[I]false\f[] if not. +.SH ERRORS +.SH NOTES +.SH SEE ALSO +.PP +\f[C]PMIx_Init\f[](3) +.SH AUTHORS +PMIx. diff --git a/opal/mca/pmix/pmix1xx/pmix/man/man3/pmix_put.3 b/opal/mca/pmix/pmix1xx/pmix/man/man3/pmix_put.3 new file mode 100644 index 0000000000..35e41d61a0 --- /dev/null +++ b/opal/mca/pmix/pmix1xx/pmix/man/man3/pmix_put.3 @@ -0,0 +1,60 @@ +.TH "pmix_put" "3" "2015\-10\-25" "PMIx Programmer\[aq]s Manual" "\@VERSION\@" +.SH NAME +.PP +PMIx_Put \- Push a value into the client\[aq]s namespace +.SH SYNOPSIS +.IP +.nf +\f[C] +#include\ + +pmix_status_t\ PMIx_Init(pmix_scope_t\ scope,\ const\ char\ key[],\ pmix_value_t\ *val); +\f[] +.fi +.SH ARGUMENTS +.PP +\f[I]scope\f[] : Defines a scope for data "put" by PMI per the +following: +.IP +.nf +\f[C] +*\ PMI_LOCAL\ \-\ the\ data\ is\ intended\ only\ for\ other\ application +\ \ \ \ \ \ \ \ \ \ \ \ \ \ processes\ on\ the\ same\ node.\ Data\ marked\ in\ this\ way +\ \ \ \ \ \ \ \ \ \ \ \ \ \ will\ not\ be\ included\ in\ data\ packages\ sent\ to\ remote\ requestors +*\ PMI_REMOTE\ \-\ the\ data\ is\ intended\ solely\ for\ applications\ processes\ on +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ remote\ nodes.\ Data\ marked\ in\ this\ way\ will\ not\ be\ shared\ with +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ other\ processes\ on\ the\ same\ node +*\ PMI_GLOBAL\ \-\ the\ data\ is\ to\ be\ shared\ with\ all\ other\ requesting\ processes, +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ regardless\ of\ location +\f[] +.fi +.PP +\f[I]key\f[] String key identifying the information. +This can be either one of the PMIx defined attributes, or a +user\-defined value +.PP +\f[I]val\f[] Pointer to a pmix_value_t structure containing the data to +be pushed along with the type of the provided data. +.SH DESCRIPTION +.PP +Push a value into the client\[aq]s namespace. +The client library will cache the information locally until +\f[I]PMIx\f[]Commit_ is called. +The provided scope value is passed to the local PMIx server, which will +distribute the data as directed. +.SH RETURN VALUE +.PP +Returns PMIX_SUCCESS on success. +On error, a negative value corresponding to a PMIx errno is returned. +.SH ERRORS +.PP +PMIx errno values are defined in \f[C]pmix_common.h\f[]. +.SH NOTES +.PP +See \[aq]pmix_common.h\[aq] for definition of the pmix_value_t +structure. +.SH SEE ALSO +.PP +\f[C]PMIx_Constants\f[](7), \f[C]PMIx_Structures\f[](7) +.SH AUTHORS +PMIx. diff --git a/opal/mca/pmix/pmix1xx/pmix/man/man7/pmix.7 b/opal/mca/pmix/pmix1xx/pmix/man/man7/pmix.7 index 4ba00eca72..d2e545af94 100644 --- a/opal/mca/pmix/pmix1xx/pmix/man/man7/pmix.7 +++ b/opal/mca/pmix/pmix1xx/pmix/man/man7/pmix.7 @@ -1,241 +1,35 @@ -.TH "pmix" "7" "2015\-09\-09" "PMIx Programmer\[aq]s Manual" "\@VERSION\@" +.TH "pmix" "7" "2015\-10\-29" "PMIx Programmer\[aq]s Manual" "\@VERSION\@" .SH NAME .PP -Fabric Interface Library +Process Management Interface \- Exascale .SH SYNOPSIS .IP .nf \f[C] -#include\ +#include\ \f[] .fi -.PP -Libfabric is a high\-performance fabric software library designed to -provide low\-latency interfaces to fabric hardware. .SH OVERVIEW .PP -Libfabric provides \[aq]process direct I/O\[aq] to application software -communicating across fabric software and hardware. -Process direct I/O, historically referred to as RDMA, allows an -application to directly access network resources without operating -system interventions. -Data transfers can occur directly to and from application memory. +The Process Management Interface (PMI) has been used for quite some time +as a means of exchanging wireup information needed for interprocess +communication. +Two versions (PMI\-1 and PMI\-2) have been released as part of the MPICH +effort. +While PMI\-2 demonstrates better scaling properties than its PMI\-1 +predecessor, attaining rapid launch and wireup of the roughly 1M +processes executing across 100k nodes expected for exascale operations +remains challenging. .PP -There are two components to the libfabric software: -.PP -\f[I]Fabric Providers\f[] : Conceptually, a fabric provider may be -viewed as a local hardware NIC driver, though a provider is not limited -by this definition. -The first component of libfabric is a general purpose framework that is -capable of handling different types of fabric hardware. -All fabric hardware devices and their software drivers are required to -support this framework. -Devices and the drivers that plug into the libfabric framework are -referred to as fabric providers, or simply providers. -Provider details may be found in \f[C]fi_provider\f[](7). -.PP -\f[I]Fabric Interfaces\f[] : The second component is a set of -communication operations. -Libfabric defines several sets of communication functions that providers -can support. -It is not required that providers implement all the interfaces that are -defined; however, providers clearly indicate which interfaces they do -support. -.SH FABRIC INTERFACES -.PP -The fabric interfaces are designed such that they are cohesive and not -simply a union of disjoint interfaces. -The interfaces are logically divided into two groups: control interfaces -and communication operations. -The control interfaces are a common set of operations that provide -access to local communication resources, such as address vectors and -event queues. -The communication operations expose particular models of communication -and fabric functionality, such as message queues, remote memory access, -and atomic operations. -Communication operations are associated with fabric endpoints. -.PP -Applications will typically use the control interfaces to discover local -capabilities and allocate necessary resources. -They will then allocate and configure a communication endpoint to send -and receive data, or perform other types of data transfers, with remote -endpoints. -.SH CONTROL INTERFACES -.PP -The control interfaces APIs provide applications access to network -resources. -This involves listing all the interfaces available, obtaining the -capabilities of the interfaces and opening a provider. -.PP -\f[I]fi_getinfo \- Fabric Information\f[] : The fi_getinfo call is the -base call used to discover and request fabric services offered by the -system. -Applications can use this call to indicate the type of communication -that they desire. -The results from fi_getinfo, fi_info, are used to reserve and configure -fabric resources. -.PP -fi_getinfo returns a list of fi_info structures. -Each structure references a single fabric provider, indicating the -interfaces that the provider supports, along with a named set of -resources. -A fabric provider may include multiple fi_info structures in the -returned list. -.PP -\f[I]fi_fabric \- Fabric Domain\f[] : A fabric domain represents a -collection of hardware and software resources that access a single -physical or virtual network. -All network ports on a system that can communicate with each other -through the fabric belong to the same fabric domain. -A fabric domain shares network addresses and can span multiple -providers. -libfabric supports systems connected to multiple fabrics. -.PP -\f[I]fi_domain \- Access Domains\f[] : An access domain represents a -single logical connection into a fabric. -It may map to a single physical or virtual NIC or a port. -An access domain defines the boundary across which fabric resources may -be associated. -Each access domain belongs to a single fabric domain. -.PP -\f[I]fi_endpoint \- Fabric Endpoint\f[] : A fabric endpoint is a -communication portal. -An endpoint may be either active or passive. -Passive endpoints are used to listen for connection requests. -Active endpoints can perform data transfers. -Endpoints are configured with specific communication capabilities and -data transfer interfaces. -.PP -\f[I]fi_eq \- Event Queue\f[] : Event queues, are used to collect and -report the completion of asynchronous operations and events. -Event queues report events that are not directly associated with data -transfer operations. -.PP -\f[I]fi_cq \- Completion Queue\f[] : Completion queues are -high\-performance event queues used to report the completion of data -transfer operations. -.PP -\f[I]fi_cntr \- Event Counters\f[] : Event counters are used to report -the number of completed asynchronous operations. -Event counters are considered light\-weight, in that a completion simply -increments a counter, rather than placing an entry into an event queue. -.PP -\f[I]fi_mr \- Memory Region\f[] : Memory regions describe application -local memory buffers. -In order for fabric resources to access application memory, the -application must first grant permission to the fabric provider by -constructing a memory region. -Memory regions are required for specific types of data transfer -operations, such as RMA transfers (see below). -.PP -\f[I]fi_av \- Address Vector\f[] : Address vectors are used to map -higher level addresses, such as IP addresses, which may be more natural -for an application to use, into fabric specific addresses. -The use of address vectors allows providers to reduce the amount of -memory required to maintain large address look\-up tables, and eliminate -expensive address resolution and look\-up methods during data transfer -operations. -.SH DATA TRANSFER INTERFACES -.PP -Fabric endpoints are associated with multiple data transfer interfaces. -Each interface set is designed to support a specific style of -communication, with an endpoint allowing the different interfaces to be -used in conjunction. -The following data transfer interfaces are defined by libfabric. -.PP -\f[I]fi_msg \- Message Queue\f[] : Message queues expose a simple, -message\-based FIFO queue interface to the application. -Message data transfers allow applications to send and receive data with -message boundaries being maintained. -.PP -\f[I]fi_tagged \- Tagged Message Queues\f[] : Tagged message lists -expose send/receive data transfer operations built on the concept of -tagged messaging. -The tagged message queue is conceptually similar to standard message -queues, but with the addition of 64\-bit tags for each message. -Sent messages are matched with receive buffers that are tagged with a -similar value. -.PP -\f[I]fi_rma \- Remote Memory Access\f[] : RMA transfers are one\-sided -operations that read or write data directly to a remote memory region. -Other than defining the appropriate memory region, RMA operations do not -require interaction at the target side for the data transfer to -complete. -.PP -\f[I]fi_atomic \- Atomic\f[] : Atomic operations can perform one of -several operations on a remote memory region. -Atomic operations include well\-known functionality, such as atomic\-add -and compare\-and\-swap, plus several other pre\-defined calls. -Unlike other data transfer interfaces, atomic operations are aware of -the data formatting at the target memory region. -.SH LOGGING INTERFACE -.PP -Logging can be controlled using the FI_LOG_LEVEL, FI_LOG_PROV, and -FI_LOG_SUBSYS environment variables. -.PP -\f[I]FI_LOG_LEVEL\f[] : FI_LOG_LEVEL controls the amount of logging data -that is output. -The following log levels are defined. -.IP \[bu] 2 -\f[I]Warn\f[] : Warn is the least verbose setting and is intended for -reporting errors or warnings. -.IP \[bu] 2 -\f[I]Trace\f[] : Trace is more verbose and is meant to include -non\-detailed output helpful to tracing program execution. -.IP \[bu] 2 -\f[I]Info\f[] : Info is high traffic and meant for detailed output. -.IP \[bu] 2 -\f[I]Debug\f[] : Debug is high traffic and is likely to impact -application performance. -Debug output is only available if the library has been compiled with -debugging enabled. -.PP -\f[I]FI_LOG_PROV\f[] : The FI_LOG_PROV environment variable enables or -disables logging from specific providers. -Providers can be enabled by listing them in a comma separated fashion. -If the list begins with the \[aq]^\[aq] symbol, then the list will be -negated. -By default all providers are enabled. -.PP -Example: To enable logging from the psm and sockets provider: -FI_LOG_PROV="psm,sockets" -.PP -Example: To enable logging from providers other than psm: -FI_LOG_PROV="^psm" -.PP -\f[I]FI_LOG_SUBSYS\f[] : The FI_LOG_SUBSYS environment variable enables -or disables logging at the subsystem level. -The syntax for enabling or disabling subsystems is similar to that used -for FI_LOG_PROV. -The following subsystems are defined. -.IP \[bu] 2 -\f[I]core\f[] : Provides output related to the core framework and its -management of providers. -.IP \[bu] 2 -\f[I]fabric\f[] : Provides output specific to interactions associated -with the fabric object. -.IP \[bu] 2 -\f[I]domain\f[] : Provides output specific to interactions associated -with the domain object. -.IP \[bu] 2 -\f[I]ep_ctrl\f[] : Provides output specific to endpoint non\-data -transfer operations, such as CM operations. -.IP \[bu] 2 -\f[I]ep_data\f[] : Provides output specific to endpoint data transfer -operations. -.IP \[bu] 2 -\f[I]av\f[] : Provides output specific to address vector operations. -.IP \[bu] 2 -\f[I]cq\f[] : Provides output specific to completion queue operations. -.IP \[bu] 2 -\f[I]eq\f[] : Provides output specific to event queue operations. -.IP \[bu] 2 -\f[I]mr\f[] : Provides output specific to memory registration. +PMI Exascale (PMIx) represents an attempt to resolve these questions by +providing an extended version of the PMI standard specifically designed +to support clusters up to and including exascale sizes. +The overall objective of the project is not to branch the existing +pseudo\-standard definitions \- in fact, PMIx fully supports both of the +existing PMI\-1 and PMI\-2 APIs \- but rather to (a) augment and extend +those APIs to eliminate some current restrictions that impact +scalability, and (b) provide a reference implementation of the +PMI\-server that demonstrates the desired level of scalability. .SH SEE ALSO -.PP -\f[C]fi_provider\f[](7), \f[C]fi_getinfo\f[](3), -\f[C]fi_endpoint\f[](3), \f[C]fi_domain\f[](3), \f[C]fi_av\f[](3), -\f[C]fi_eq\f[](3), \f[C]fi_cq\f[](3), \f[C]fi_cntr\f[](3), -\f[C]fi_mr\f[](3) .SH AUTHORS PMIx. diff --git a/opal/mca/pmix/pmix1xx/pmix/man/man7/pmix_constants.7 b/opal/mca/pmix/pmix1xx/pmix/man/man7/pmix_constants.7 index 3de75d16b8..4561121ac5 100644 --- a/opal/mca/pmix/pmix1xx/pmix/man/man7/pmix_constants.7 +++ b/opal/mca/pmix/pmix1xx/pmix/man/man7/pmix_constants.7 @@ -1,4 +1,4 @@ -.TH "pmix_constants" "7" "2015\-09\-16" "PMIx Programmer\[aq]s Manual" "\@VERSION\@" +.TH "pmix_constants" "7" "2015\-10\-25" "PMIx Programmer\[aq]s Manual" "\@VERSION\@" .SH NAME .PP PMIx Constants @@ -87,8 +87,6 @@ Unlike other data transfer interfaces, atomic operations are aware of the data formatting at the target memory region. .SH SEE ALSO .PP -\f[C]pmix\f[](7), \f[C]fi_getinfo\f[](3), \f[C]fi_endpoint\f[](3), -\f[C]fi_domain\f[](3), \f[C]fi_av\f[](3), \f[C]fi_eq\f[](3), -\f[C]fi_cq\f[](3), \f[C]fi_cntr\f[](3), \f[C]fi_mr\f[](3) +\f[C]pmix\f[](7) .SH AUTHORS PMIx. diff --git a/opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/types.h b/opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/types.h index 62c9136846..d9734969bf 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/types.h +++ b/opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/types.h @@ -34,7 +34,7 @@ #include "src/class/pmix_object.h" #include "src/class/pmix_pointer_array.h" #include "src/class/pmix_list.h" -#include +#include BEGIN_C_DECLS diff --git a/opal/mca/pmix/pmix1xx/pmix/src/class/pmix_hash_table.h b/opal/mca/pmix/pmix1xx/pmix/src/class/pmix_hash_table.h index 55168ab409..1c0a2b32d0 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/class/pmix_hash_table.h +++ b/opal/mca/pmix/pmix1xx/pmix/src/class/pmix_hash_table.h @@ -40,7 +40,7 @@ #include "src/class/pmix_list.h" -#include +#include BEGIN_C_DECLS diff --git a/opal/mca/pmix/pmix1xx/pmix/src/class/pmix_pointer_array.h b/opal/mca/pmix/pmix1xx/pmix/src/class/pmix_pointer_array.h index 680d52cdfd..0e299b007e 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/class/pmix_pointer_array.h +++ b/opal/mca/pmix/pmix1xx/pmix/src/class/pmix_pointer_array.h @@ -30,7 +30,7 @@ #endif #include "src/class/pmix_object.h" -#include +#include BEGIN_C_DECLS diff --git a/opal/mca/pmix/pmix1xx/pmix/src/client/pmi2.c b/opal/mca/pmix/pmix1xx/pmix/src/client/pmi2.c index 7c9df035af..b8394f2037 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/client/pmi2.c +++ b/opal/mca/pmix/pmix1xx/pmix/src/client/pmi2.c @@ -507,7 +507,7 @@ int PMI2_Job_Connect(const char jobid[], PMI2_Connect_comm_t *conn) PMI2_CHECK(); - if (NULL == jobid || NULL == conn) { + if (NULL == conn) { return PMI2_ERR_INVALID_ARGS; } @@ -524,10 +524,6 @@ int PMI2_Job_Disconnect(const char jobid[]) PMI2_CHECK(); - if (NULL == jobid) { - return PMI2_ERR_INVALID_ARGS; - } - (void)strncpy(proc.nspace, (jobid ? jobid : myproc.nspace), sizeof(myproc.nspace)); proc.rank = PMIX_RANK_WILDCARD; rc = PMIx_Disconnect(&proc, 1, NULL, 0); diff --git a/opal/mca/pmix/pmix1xx/pmix/src/common/pmix_common.c b/opal/mca/pmix/pmix1xx/pmix/src/common/pmix_common.c index 43a872d85b..4e45048321 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/common/pmix_common.c +++ b/opal/mca/pmix/pmix1xx/pmix/src/common/pmix_common.c @@ -14,7 +14,7 @@ #include #include -#include +#include #include #include "src/include/pmix_globals.h" diff --git a/opal/mca/pmix/pmix1xx/pmix/src/include/pmix_globals.h b/opal/mca/pmix/pmix1xx/pmix/src/include/pmix_globals.h index 7e529737c8..2afc2db016 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/include/pmix_globals.h +++ b/opal/mca/pmix/pmix1xx/pmix/src/include/pmix_globals.h @@ -30,7 +30,7 @@ #endif #include PMIX_EVENT_HEADER -#include +#include #include "src/buffer_ops/types.h" #include "src/class/pmix_hash_table.h" diff --git a/opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_munge.c b/opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_munge.c index 0336c67cbb..70cea510f3 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_munge.c +++ b/opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_munge.c @@ -12,7 +12,7 @@ #include #include -#include +#include #include "src/include/pmix_globals.h" #include "src/util/argv.h" diff --git a/opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_native.c b/opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_native.c index d353e82720..a116556d3d 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_native.c +++ b/opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_native.c @@ -10,7 +10,7 @@ #include #include -#include +#include #include "src/include/pmix_globals.h" #include "src/util/argv.h" diff --git a/opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_sec.c b/opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_sec.c index 578cc05c0f..6afcd529fa 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_sec.c +++ b/opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_sec.c @@ -13,7 +13,7 @@ #include #include -#include +#include #include "src/include/pmix_globals.h" #ifdef HAVE_STRING_H diff --git a/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server.c b/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server.c index cf60ab0254..80c2101a79 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server.c +++ b/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server.c @@ -21,7 +21,7 @@ #include #include -#include +#include #include "src/include/pmix_globals.h" #ifdef HAVE_STRING_H @@ -44,6 +44,7 @@ #include #endif #include +#include #include PMIX_EVENT_HEADER #include "src/util/argv.h" @@ -140,11 +141,13 @@ static void _queue_message(int fd, short args, void *cbdata) { pmix_usock_queue_t *queue = (pmix_usock_queue_t*)cbdata; pmix_usock_send_t *snd; + pmix_output_verbose(2, pmix_globals.debug_output, "[%s:%d] queue callback called: reply to %s:%d on tag %d", __FILE__, __LINE__, (queue->peer)->info->nptr->nspace, (queue->peer)->info->rank, (queue->tag)); + snd = PMIX_NEW(pmix_usock_send_t); snd->hdr.pindex = pmix_globals.pindex; snd->hdr.tag = (queue->tag); @@ -222,12 +225,16 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module) pmix_globals.myid.rank = strtol(evar, NULL, 10); } + /* initialize the datatype support */ + pmix_bfrop_open(); + /* setup the server-specific globals */ PMIX_CONSTRUCT(&pmix_server_globals.clients, pmix_pointer_array_t); pmix_pointer_array_init(&pmix_server_globals.clients, 1, INT_MAX, 1); PMIX_CONSTRUCT(&pmix_server_globals.collectives, pmix_list_t); PMIX_CONSTRUCT(&pmix_server_globals.remote_pnd, pmix_list_t); PMIX_CONSTRUCT(&pmix_server_globals.local_reqs, pmix_list_t); + PMIX_CONSTRUCT(&pmix_server_globals.gdata, pmix_buffer_t); /* see if debug is requested */ if (NULL != (evar = getenv("PMIX_DEBUG"))) { @@ -243,9 +250,6 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module) memset(&pmix_host_server, 0, sizeof(pmix_server_module_t)); pmix_host_server = *module; - /* initialize the datatype support */ - pmix_bfrop_open(); - /* init security */ pmix_sec_init(); security_mode = strdup(pmix_sec.name); @@ -267,17 +271,19 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module) snprintf(myaddress.sun_path, sizeof(myaddress.sun_path)-1, "%s/pmix-%d", tdir, pid); asprintf(&myuri, "%s:%lu:%s", pmix_globals.myid.nspace, (unsigned long)pmix_globals.myid.rank, myaddress.sun_path); - pmix_output_verbose(2, pmix_globals.debug_output, "pmix:server constructed uri %s", myuri); return PMIX_SUCCESS; } -pmix_status_t PMIx_server_init(pmix_server_module_t *module) +pmix_status_t PMIx_server_init(pmix_server_module_t *module, + pmix_info_t info[], size_t ninfo) { pmix_usock_posted_recv_t *req; pmix_status_t rc; + size_t n; + pmix_kval_t kv; ++pmix_globals.init_cntr; if (1 < pmix_globals.init_cntr) { @@ -298,6 +304,34 @@ pmix_status_t PMIx_server_init(pmix_server_module_t *module) if (NULL == (pmix_globals.evbase = pmix_start_progress_thread())) { return PMIX_ERR_INIT; } + /* check the info keys for a directive about the uid/gid + * to be set for the rendezvous file, and any info we + * need to provide to every client */ + if (NULL != info) { + PMIX_CONSTRUCT(&kv, pmix_kval_t); + for (n=0; n < ninfo; n++) { + if (0 == strcmp(info[n].key, PMIX_USERID)) { + /* the userid is in the uint32_t storage */ + chown(myaddress.sun_path, info[n].value.data.uint32, -1); + } else if (0 == strcmp(info[n].key, PMIX_GRPID)) { + /* the grpid is in the uint32_t storage */ + chown(myaddress.sun_path, -1, info[n].value.data.uint32); + } else { + /* store and pass along to every client */ + kv.key = info[n].key; + kv.value = &info[n].value; + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(&pmix_server_globals.gdata, &kv, 1, PMIX_KVAL))) { + PMIX_ERROR_LOG(rc); + PMIX_DESTRUCT(&kv); + return rc; + } + } + } + /* protect the incoming data */ + kv.key = NULL; + kv.value = NULL; + PMIX_DESTRUCT(&kv); + } /* setup the wildcard recv for inbound messages from clients */ req = PMIX_NEW(pmix_usock_posted_recv_t); @@ -329,10 +363,14 @@ static void cleanup_server_state(void) PMIX_LIST_DESTRUCT(&pmix_server_globals.collectives); PMIX_LIST_DESTRUCT(&pmix_server_globals.remote_pnd); PMIX_LIST_DESTRUCT(&pmix_server_globals.local_reqs); + PMIX_DESTRUCT(&pmix_server_globals.gdata); if (NULL != myuri) { free(myuri); } + if (NULL != security_mode) { + free(security_mode); + } pmix_bfrop_close(); pmix_sec_finalize(); @@ -571,6 +609,45 @@ pmix_status_t PMIx_server_register_nspace(const char nspace[], int nlocalprocs, return PMIX_SUCCESS; } +static void _deregister_nspace(int sd, short args, void *cbdata) +{ + pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata; + pmix_nspace_t *tmp; + + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix:server _deregister_nspace %s", + cd->proc.nspace); + + /* see if we already have this nspace */ + PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_nspace_t) { + if (0 == strcmp(tmp->nspace, cd->proc.nspace)) { + pmix_list_remove_item(&pmix_globals.nspaces, &tmp->super); + PMIX_RELEASE(tmp); + break; + } + } + + PMIX_RELEASE(cd); +} + +void PMIx_server_deregister_nspace(const char nspace[]) +{ + pmix_setup_caddy_t *cd; + + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix:server deregister nspace %s", + nspace); + + cd = PMIX_NEW(pmix_setup_caddy_t); + (void)strncpy(cd->proc.nspace, nspace, PMIX_MAX_NSLEN); + + /* we have to push this into our event library to avoid + * potential threading issues */ + event_assign(&cd->ev, pmix_globals.evbase, -1, + EV_WRITE, _deregister_nspace, cd); + event_active(&cd->ev, EV_WRITE, 1); +} + static void _execute_collective(int sd, short args, void *cbdata) { pmix_trkr_caddy_t *tcd = (pmix_trkr_caddy_t*)cbdata; @@ -755,6 +832,60 @@ pmix_status_t PMIx_server_register_client(const pmix_proc_t *proc, return PMIX_SUCCESS; } +static void _deregister_client(int sd, short args, void *cbdata) +{ + pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata; + pmix_rank_info_t *info; + pmix_nspace_t *nptr, *tmp; + + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix:server _deregister_client for nspace %s rank %d", + cd->proc.nspace, cd->proc.rank); + + /* see if we already have this nspace */ + nptr = NULL; + PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_nspace_t) { + if (0 == strcmp(tmp->nspace, cd->proc.nspace)) { + nptr = tmp; + break; + } + } + if (NULL == nptr) { + /* nothing to do */ + goto cleanup; + } + /* find an remove this client */ + PMIX_LIST_FOREACH(info, &nptr->server->ranks, pmix_rank_info_t) { + if (info->rank == cd->proc.rank) { + pmix_list_remove_item(&nptr->server->ranks, &info->super); + PMIX_RELEASE(info); + break; + } + } + + cleanup: + PMIX_RELEASE(cd); +} + +void PMIx_server_deregister_client(const pmix_proc_t *proc) +{ + pmix_setup_caddy_t *cd; + + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix:server deregister client %s:%d", + proc->nspace, proc->rank); + + cd = PMIX_NEW(pmix_setup_caddy_t); + (void)strncpy(cd->proc.nspace, proc->nspace, PMIX_MAX_NSLEN); + cd->proc.rank = proc->rank; + + /* we have to push this into our event library to avoid + * potential threading issues */ + event_assign(&cd->ev, pmix_globals.evbase, -1, + EV_WRITE, _deregister_client, cd); + event_active(&cd->ev, EV_WRITE, 1); +} + /* setup the envars for a child process */ pmix_status_t PMIx_server_setup_fork(const pmix_proc_t *proc, char ***env) { @@ -1940,6 +2071,7 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag, if (PMIX_REQ_CMD == cmd) { reply = PMIX_NEW(pmix_buffer_t); pmix_bfrop.copy_payload(reply, &(peer->info->nptr->server->job_info)); + pmix_bfrop.copy_payload(reply, &(pmix_server_globals.gdata)); PMIX_SERVER_QUEUE_REPLY(peer, tag, reply); return PMIX_SUCCESS; } diff --git a/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_get.c b/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_get.c index 13f71c9b76..9d48269723 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_get.c +++ b/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_get.c @@ -20,7 +20,6 @@ #include #include -#include #include #include "src/include/pmix_globals.h" @@ -115,7 +114,7 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, pmix_nspace_t *ns, *nptr; pmix_info_t *info=NULL; size_t ninfo=0; - pmix_dmdx_local_t *lcd, *cd; + pmix_dmdx_local_t *lcd; pmix_rank_info_t *iptr; pmix_hash_table_t *ht; bool local; @@ -458,7 +457,6 @@ static void _process_dmdx_reply(int fd, short args, void *cbdata) pmix_kval_t *kp; pmix_nspace_t *ns, *nptr; pmix_status_t rc; - pmix_buffer_t xfer, pbkt, *xptr; pmix_output_verbose(2, pmix_globals.debug_output, "[%s:%d] process dmdx reply from %s:%d", diff --git a/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_listener.c b/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_listener.c index 30cd98a5e7..7c5b16f4e2 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_listener.c +++ b/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_listener.c @@ -41,6 +41,7 @@ #include #endif #include +#include #include PMIX_EVENT_HEADER #include @@ -86,6 +87,11 @@ pmix_status_t pmix_start_listening(struct sockaddr_un *address) printf("%s:%d bind() failed", __FILE__, __LINE__); return PMIX_ERROR; } + /* set the mode as required */ + if (0 != chmod(address->sun_path, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP)) { + pmix_output(0, "CANNOT CHMOD %s", address->sun_path); + return PMIX_ERROR; + } /* setup listen backlog to maximum allowed by kernel */ if (listen(pmix_server_globals.listen_socket, SOMAXCONN) < 0) { diff --git a/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_ops.c b/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_ops.c index 43d35b5def..9d21b01e02 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_ops.c +++ b/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_ops.c @@ -132,7 +132,7 @@ pmix_status_t pmix_server_commit(pmix_peer_t *peer, pmix_buffer_t *buf) pmix_nspace_t *nptr; pmix_rank_info_t *info; pmix_dmdx_remote_t *dcd, *dcdnext; - pmix_buffer_t pbkt, xfer; + pmix_buffer_t pbkt; pmix_value_t *val; char *data; size_t sz; diff --git a/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_ops.h b/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_ops.h index 9129b6bbbd..9861178759 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_ops.h +++ b/opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_ops.h @@ -13,7 +13,7 @@ #include #include -#include +#include #include #include "src/usock/usock.h" #include "src/util/hash.h" @@ -145,6 +145,7 @@ typedef struct { bool listen_thread_active; // listen thread is running int listen_socket; // socket listener is watching int stop_thread[2]; // pipe used to stop listener thread + pmix_buffer_t gdata; // cache of data given to me for passing to all clients } pmix_server_globals_t; #define PMIX_PEER_CADDY(c, p, t) \ diff --git a/opal/mca/pmix/pmix1xx/pmix/src/usock/usock.h b/opal/mca/pmix/pmix1xx/pmix/src/usock/usock.h index 42df5055cf..56c8577102 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/usock/usock.h +++ b/opal/mca/pmix/pmix1xx/pmix/src/usock/usock.h @@ -32,7 +32,7 @@ #include #include #include -#include +#include #ifdef HAVE_UNISTD_H #include diff --git a/opal/mca/pmix/pmix1xx/pmix/src/util/argv.h b/opal/mca/pmix/pmix1xx/pmix/src/util/argv.h index 2dfc7d20f7..6939b6c758 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/util/argv.h +++ b/opal/mca/pmix/pmix1xx/pmix/src/util/argv.h @@ -41,7 +41,7 @@ #include #endif -#include +#include BEGIN_C_DECLS diff --git a/opal/mca/pmix/pmix1xx/pmix/src/util/error.c b/opal/mca/pmix/pmix1xx/pmix/src/util/error.c index 90c42edb66..0bf5587d73 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/util/error.c +++ b/opal/mca/pmix/pmix1xx/pmix/src/util/error.c @@ -31,7 +31,7 @@ #include #endif -#include +#include #include "src/util/error.h" #include "src/include/pmix_globals.h" diff --git a/opal/mca/pmix/pmix1xx/pmix/src/util/error.h b/opal/mca/pmix/pmix1xx/pmix/src/util/error.h index e43ac47bd9..29e60c950f 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/util/error.h +++ b/opal/mca/pmix/pmix1xx/pmix/src/util/error.h @@ -23,7 +23,7 @@ #include #include -#include +#include #include "src/util/output.h" BEGIN_C_DECLS diff --git a/opal/mca/pmix/pmix1xx/pmix/src/util/fd.c b/opal/mca/pmix/pmix1xx/pmix/src/util/fd.c index 7c08a6d352..2683555c1c 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/util/fd.c +++ b/opal/mca/pmix/pmix1xx/pmix/src/util/fd.c @@ -14,7 +14,7 @@ #include #include -#include +#include #ifdef HAVE_UNISTD_H #include diff --git a/opal/mca/pmix/pmix1xx/pmix/src/util/output.c b/opal/mca/pmix/pmix1xx/pmix/src/util/output.c index 593c835222..d9aeab18df 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/util/output.c +++ b/opal/mca/pmix/pmix1xx/pmix/src/util/output.c @@ -22,7 +22,7 @@ #include #include -#include +#include #include #include diff --git a/opal/mca/pmix/pmix1xx/pmix/src/util/pmix_environ.c b/opal/mca/pmix/pmix1xx/pmix/src/util/pmix_environ.c index d337d8bd32..94052ab827 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/util/pmix_environ.c +++ b/opal/mca/pmix/pmix1xx/pmix/src/util/pmix_environ.c @@ -22,7 +22,7 @@ #include #include -#include +#include #include #include diff --git a/opal/mca/pmix/pmix1xx/pmix/src/util/pmix_environ.h b/opal/mca/pmix/pmix1xx/pmix/src/util/pmix_environ.h index 15e018558f..2f8a7d9e0e 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/util/pmix_environ.h +++ b/opal/mca/pmix/pmix1xx/pmix/src/util/pmix_environ.h @@ -37,7 +37,7 @@ #include #endif -#include +#include BEGIN_C_DECLS diff --git a/opal/mca/pmix/pmix1xx/pmix/src/util/progress_threads.c b/opal/mca/pmix/pmix1xx/pmix/src/util/progress_threads.c index 2ce45dfd37..ac38be1772 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/util/progress_threads.c +++ b/opal/mca/pmix/pmix1xx/pmix/src/util/progress_threads.c @@ -25,7 +25,7 @@ #include "src/util/error.h" #include "src/util/fd.h" -#include +#include #include "src/util/progress_threads.h" static volatile bool evlib_active; diff --git a/opal/mca/pmix/pmix1xx/pmix/src/util/timings.c b/opal/mca/pmix/pmix1xx/pmix/src/util/timings.c index 988157393c..f1be7a83c0 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/util/timings.c +++ b/opal/mca/pmix/pmix1xx/pmix/src/util/timings.c @@ -10,7 +10,7 @@ #include #include -#include +#include #include #include diff --git a/opal/mca/pmix/pmix1xx/pmix/test/pmi_client.c b/opal/mca/pmix/pmix1xx/pmix/test/pmi_client.c index 0bfe837513..7dccd63a8e 100644 --- a/opal/mca/pmix/pmix1xx/pmix/test/pmi_client.c +++ b/opal/mca/pmix/pmix1xx/pmix/test/pmi_client.c @@ -336,24 +336,9 @@ static int test_item5(void) static int test_item6(void) { int rc = 0; - char nspace[100]; log_error("pmix does not support this functionality\n"); return rc; - if (0 == rank) { - if (PMI_SUCCESS != (rc = PMI_KVS_Create(nspace, sizeof(nspace)))) { - log_fatal("PMI_KVS_Create failed: %d\n", rc); - return rc; - } - log_info("nspace=%s\n", nspace); - - if (PMI_SUCCESS != (rc = PMI_KVS_Destroy(nspace))) { - log_fatal("PMI_KVS_Destroy failed: %d\n", rc); - return rc; - } - } - - return rc; } static int test_item7(void) diff --git a/opal/mca/pmix/pmix1xx/pmix/test/pmix_test.c b/opal/mca/pmix/pmix1xx/pmix/test/pmix_test.c index 7d6ed93b44..8fd375accc 100644 --- a/opal/mca/pmix/pmix1xx/pmix/test/pmix_test.c +++ b/opal/mca/pmix/pmix1xx/pmix/test/pmix_test.c @@ -81,7 +81,7 @@ int main(int argc, char **argv) } /* setup the server library */ - if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule))) { + if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, NULL, 0))) { TEST_ERROR(("Init failed with error %d", rc)); FREE_TEST_PARAMS(params); return rc; diff --git a/opal/mca/pmix/pmix1xx/pmix/test/simple/simptest.c b/opal/mca/pmix/pmix1xx/pmix/test/simple/simptest.c index 2cb82419c0..54700fea73 100644 --- a/opal/mca/pmix/pmix1xx/pmix/test/simple/simptest.c +++ b/opal/mca/pmix/pmix1xx/pmix/test/simple/simptest.c @@ -191,7 +191,7 @@ int main(int argc, char **argv) fprintf(stderr, "Testing version %s\n", PMIx_Get_version()); /* setup the server library */ - if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule))) { + if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, NULL, 0))) { fprintf(stderr, "Init failed with error %d\n", rc); return rc; } diff --git a/opal/mca/pmix/pmix1xx/pmix1.h b/opal/mca/pmix/pmix1xx/pmix1.h index 835096b20b..9046f3b46f 100644 --- a/opal/mca/pmix/pmix1xx/pmix1.h +++ b/opal/mca/pmix/pmix1xx/pmix1.h @@ -131,7 +131,8 @@ OPAL_MODULE_DECLSPEC int pmix1_store_local(const opal_process_name_t *proc, opal_value_t *val); /**** SERVER SOUTHBOUND FUNCTIONS ****/ -OPAL_MODULE_DECLSPEC int pmix1_server_init(opal_pmix_server_module_t *module); +OPAL_MODULE_DECLSPEC int pmix1_server_init(opal_pmix_server_module_t *module, + opal_list_t *info); OPAL_MODULE_DECLSPEC int pmix1_server_finalize(void); OPAL_MODULE_DECLSPEC int pmix1_server_gen_regex(const char *input, char **regex); OPAL_MODULE_DECLSPEC int pmix1_server_gen_ppn(const char *input, char **ppn); @@ -140,11 +141,13 @@ OPAL_MODULE_DECLSPEC int pmix1_server_register_nspace(opal_jobid_t jobid, opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); +OPAL_MODULE_DECLSPEC void pmix1_server_deregister_nspace(opal_jobid_t jobid); OPAL_MODULE_DECLSPEC int pmix1_server_register_client(const opal_process_name_t *proc, uid_t uid, gid_t gid, void *server_object, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); +OPAL_MODULE_DECLSPEC void pmix1_server_deregister_client(const opal_process_name_t *proc); OPAL_MODULE_DECLSPEC int pmix1_server_setup_fork(const opal_process_name_t *proc, char ***env); OPAL_MODULE_DECLSPEC int pmix1_server_dmodex(const opal_process_name_t *proc, opal_pmix_modex_cbfunc_t cbfunc, void *cbdata); diff --git a/opal/mca/pmix/pmix1xx/pmix1_server_south.c b/opal/mca/pmix/pmix1xx/pmix1_server_south.c index f0d0f11d49..e964298bc5 100644 --- a/opal/mca/pmix/pmix1xx/pmix1_server_south.c +++ b/opal/mca/pmix/pmix1xx/pmix1_server_south.c @@ -96,19 +96,41 @@ static void errreg_cbfunc(pmix_status_t status, status, errhandler_ref); } -int pmix1_server_init(opal_pmix_server_module_t *module) +int pmix1_server_init(opal_pmix_server_module_t *module, + opal_list_t *info) { pmix_status_t rc; int dbg; + opal_value_t *kv; + pmix_info_t *pinfo; + size_t sz, n; if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) { asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg); putenv(dbgvalue); } - if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule))) { + /* convert the list to an array of pmix_info_t */ + if (NULL != info) { + sz = opal_list_get_size(info); + PMIX_INFO_CREATE(pinfo, sz); + n = 0; + OPAL_LIST_FOREACH(kv, info, opal_value_t) { + (void)strncpy(pinfo[n].key, kv->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&pinfo[n].value, kv); + ++n; + } + } else { + sz = 0; + pinfo = NULL; + } + + if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, pinfo, sz))) { + PMIX_INFO_FREE(pinfo, sz); return pmix1_convert_rc(rc); } + PMIX_INFO_FREE(pinfo, sz); + /* record the host module */ host_module = module; @@ -225,6 +247,22 @@ int pmix1_server_register_nspace(opal_jobid_t jobid, return pmix1_convert_rc(rc); } +void pmix1_server_deregister_nspace(opal_jobid_t jobid) +{ + opal_pmix1_jobid_trkr_t *jptr; + + /* if we don't already have it, we can ignore this */ + OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == jobid) { + /* found it - tell the server to deregister */ + PMIx_server_deregister_nspace(jptr->nspace); + /* now get rid of it from our list */ + opal_list_remove_item(&mca_pmix_pmix1xx_component.jobids, &jptr->super); + OBJ_RELEASE(jptr); + return; + } + } +} int pmix1_server_register_client(const opal_process_name_t *proc, uid_t uid, gid_t gid, @@ -252,6 +290,23 @@ int pmix1_server_register_client(const opal_process_name_t *proc, return pmix1_convert_rc(rc); } +void pmix1_server_deregister_client(const opal_process_name_t *proc) +{ + opal_pmix1_jobid_trkr_t *jptr; + pmix_proc_t p; + + /* if we don't already have it, we can ignore this */ + OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) { + if (jptr->jobid == proc->jobid) { + /* found it - tell the server to deregister */ + (void)strncpy(p.nspace, jptr->nspace, PMIX_MAX_NSLEN); + p.rank = proc->vpid; + PMIx_server_deregister_client(&p); + return; + } + } +} + int pmix1_server_setup_fork(const opal_process_name_t *proc, char ***env) { diff --git a/opal/mca/pmix/pmix1xx/pmix_pmix1.c b/opal/mca/pmix/pmix1xx/pmix_pmix1.c index 2c61220524..dd47c6d73c 100644 --- a/opal/mca/pmix/pmix1xx/pmix_pmix1.c +++ b/opal/mca/pmix/pmix1xx/pmix_pmix1.c @@ -49,47 +49,49 @@ static void pmix1_register_jobid(opal_jobid_t jobid, const char *nspace); const opal_pmix_base_module_t opal_pmix_pmix1xx_module = { /* client APIs */ - pmix1_client_init, - pmix1_client_finalize, - pmix1_initialized, - pmix1_abort, - pmix1_commit, - pmix1_fence, - pmix1_fencenb, - pmix1_put, - pmix1_get, - pmix1_getnb, - pmix1_publish, - pmix1_publishnb, - pmix1_lookup, - pmix1_lookupnb, - pmix1_unpublish, - pmix1_unpublishnb, - pmix1_spawn, - pmix1_spawnnb, - pmix1_connect, - pmix1_connectnb, - pmix1_disconnect, - pmix1_disconnectnb, - pmix1_resolve_peers, - pmix1_resolve_nodes, + .init = pmix1_client_init, + .finalize = pmix1_client_finalize, + .initialized = pmix1_initialized, + .abort = pmix1_abort, + .commit = pmix1_commit, + .fence = pmix1_fence, + .fence_nb = pmix1_fencenb, + .put = pmix1_put, + .get = pmix1_get, + .get_nb = pmix1_getnb, + .publish = pmix1_publish, + .publish_nb = pmix1_publishnb, + .lookup = pmix1_lookup, + .lookup_nb = pmix1_lookupnb, + .unpublish = pmix1_unpublish, + .unpublish_nb = pmix1_unpublishnb, + .spawn = pmix1_spawn, + .spawn_nb = pmix1_spawnnb, + .connect = pmix1_connect, + .connect_nb = pmix1_connectnb, + .disconnect = pmix1_disconnect, + .disconnect_nb = pmix1_disconnectnb, + .resolve_peers = pmix1_resolve_peers, + .resolve_nodes = pmix1_resolve_nodes, /* server APIs */ - pmix1_server_init, - pmix1_server_finalize, - pmix1_server_gen_regex, - pmix1_server_gen_ppn, - pmix1_server_register_nspace, - pmix1_server_register_client, - pmix1_server_setup_fork, - pmix1_server_dmodex, - pmix1_server_notify_error, + .server_init = pmix1_server_init, + .server_finalize = pmix1_server_finalize, + .generate_regex = pmix1_server_gen_regex, + .generate_ppn = pmix1_server_gen_ppn, + .server_register_nspace = pmix1_server_register_nspace, + .server_deregister_nspace = pmix1_server_deregister_nspace, + .server_register_client = pmix1_server_register_client, + .server_deregister_client = pmix1_server_deregister_client, + .server_setup_fork = pmix1_server_setup_fork, + .server_dmodex_request = pmix1_server_dmodex, + .server_notify_error = pmix1_server_notify_error, /* utility APIs */ - PMIx_Get_version, - opal_pmix_base_register_handler, - opal_pmix_base_deregister_handler, - pmix1_store_local, - pmix1_get_nspace, - pmix1_register_jobid + .get_version = PMIx_Get_version, + .register_errhandler = opal_pmix_base_register_handler, + .deregister_errhandler = opal_pmix_base_deregister_handler, + .store_local = pmix1_store_local, + .get_nspace = pmix1_get_nspace, + .register_jobid = pmix1_register_jobid }; static const char *pmix1_get_nspace(opal_jobid_t jobid) @@ -455,7 +457,8 @@ int pmix1_value_unload(opal_value_t *kv, case PMIX_BYTE_OBJECT: kv->type = OPAL_BYTE_OBJECT; if (NULL != v->data.bo.bytes && 0 < v->data.bo.size) { - kv->data.bo.bytes = (uint8_t*)v->data.bo.bytes; + kv->data.bo.bytes = (uint8_t*)malloc(v->data.bo.size); + memcpy(kv->data.bo.bytes, v->data.bo.bytes, v->data.bo.size); kv->data.bo.size = (int)v->data.bo.size; } else { kv->data.bo.bytes = NULL; diff --git a/opal/mca/pmix/s1/pmix_s1.c b/opal/mca/pmix/s1/pmix_s1.c index a5aca27560..c0789d6e25 100644 --- a/opal/mca/pmix/s1/pmix_s1.c +++ b/opal/mca/pmix/s1/pmix_s1.c @@ -53,47 +53,25 @@ static const char *s1_get_nspace(opal_jobid_t jobid); static void s1_register_jobid(opal_jobid_t jobid, const char *nspace); const opal_pmix_base_module_t opal_pmix_s1_module = { - s1_init, - s1_fini, - s1_initialized, - s1_abort, - s1_commit, - s1_fence, - NULL, - s1_put, - s1_get, - NULL, - s1_publish, - NULL, - s1_lookup, - NULL, - s1_unpublish, - NULL, - s1_spawn, - NULL, - s1_job_connect, - NULL, - s1_job_disconnect, - NULL, - NULL, - NULL, - /* server APIs */ - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - /* utility APIs */ - NULL, - opal_pmix_base_register_handler, - opal_pmix_base_deregister_handler, - s1_store_local, - s1_get_nspace, - s1_register_jobid + .init = s1_init, + .finalize = s1_fini, + .initialized = s1_initialized, + .abort = s1_abort, + .commit = s1_commit, + .fence = s1_fence, + .put = s1_put, + .get = s1_get, + .publish = s1_publish, + .lookup = s1_lookup, + .unpublish = s1_unpublish, + .spawn = s1_spawn, + .connect = s1_job_connect, + .disconnect = s1_job_disconnect, + .register_errhandler = opal_pmix_base_register_handler, + .deregister_errhandler = opal_pmix_base_deregister_handler, + .store_local = s1_store_local, + .get_nspace = s1_get_nspace, + .register_jobid = s1_register_jobid }; // usage accounting diff --git a/opal/mca/pmix/s2/pmix_s2.c b/opal/mca/pmix/s2/pmix_s2.c index 8f7f8ee1cc..d4415f01d5 100644 --- a/opal/mca/pmix/s2/pmix_s2.c +++ b/opal/mca/pmix/s2/pmix_s2.c @@ -60,47 +60,25 @@ static const char *s2_get_nspace(opal_jobid_t jobid); static void s2_register_jobid(opal_jobid_t jobid, const char *nspace); const opal_pmix_base_module_t opal_pmix_s2_module = { - s2_init, - s2_fini, - s2_initialized, - s2_abort, - s2_commit, - s2_fence, - NULL, - s2_put, - s2_get, - NULL, - s2_publish, - NULL, - s2_lookup, - NULL, - s2_unpublish, - NULL, - s2_spawn, - NULL, - s2_job_connect, - NULL, - s2_job_disconnect, - NULL, - NULL, - NULL, - /* server APIs */ - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - /* utility APIs */ - NULL, - opal_pmix_base_register_handler, - opal_pmix_base_deregister_handler, - s2_store_local, - s2_get_nspace, - s2_register_jobid + .init = s2_init, + .finalize = s2_fini, + .initialized = s2_initialized, + .abort = s2_abort, + .commit = s2_commit, + .fence = s2_fence, + .put = s2_put, + .get = s2_get, + .publish = s2_publish, + .lookup = s2_lookup, + .unpublish = s2_unpublish, + .spawn = s2_spawn, + .connect = s2_job_connect, + .disconnect = s2_job_disconnect, + .register_errhandler = opal_pmix_base_register_handler, + .deregister_errhandler = opal_pmix_base_deregister_handler, + .store_local = s2_store_local, + .get_nspace = s2_get_nspace, + .register_jobid = s2_register_jobid }; // usage accounting diff --git a/orte/mca/state/base/state_base_fns.c b/orte/mca/state/base/state_base_fns.c index 7af1cfd5dc..01a28c0dee 100644 --- a/orte/mca/state/base/state_base_fns.c +++ b/orte/mca/state/base/state_base_fns.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2011-2012 Los Alamos National Security, LLC. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -15,6 +15,7 @@ #include "opal/class/opal_list.h" #include "opal/mca/event/event.h" +#include "opal/mca/pmix/pmix.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/errmgr/errmgr.h" @@ -709,6 +710,10 @@ void orte_state_base_check_all_complete(int fd, short args, void *cbdata) } OBJ_RELEASE(map); jdata->map = NULL; + /* tell the PMIx server to release its data */ + if (NULL != opal_pmix.server_deregister_nspace) { + opal_pmix.server_deregister_nspace(jdata->jobid); + } } CHECK_ALIVE: diff --git a/orte/mca/state/dvm/state_dvm.c b/orte/mca/state/dvm/state_dvm.c index 0e7309c8a3..26bed1bdf3 100644 --- a/orte/mca/state/dvm/state_dvm.c +++ b/orte/mca/state/dvm/state_dvm.c @@ -16,6 +16,7 @@ #include #include "opal/util/output.h" +#include "opal/mca/pmix/pmix.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/filem/filem.h" @@ -381,6 +382,10 @@ void check_complete(int fd, short args, void *cbdata) } OBJ_RELEASE(map); jdata->map = NULL; + /* tell the PMIx server to release its data */ + if (NULL != opal_pmix.server_deregister_nspace) { + opal_pmix.server_deregister_nspace(jdata->jobid); + } } CHECK_ALIVE: diff --git a/orte/orted/pmix/pmix_server.c b/orte/orted/pmix/pmix_server.c index feebbec919..3365a5b669 100644 --- a/orte/orted/pmix/pmix_server.c +++ b/orte/orted/pmix/pmix_server.c @@ -180,6 +180,7 @@ static void eviction_cbfunc(struct opal_hotel_t *hotel, int pmix_server_init(void) { int rc; + opal_list_t info; if (orte_pmix_server_globals.initialized) { return ORTE_SUCCESS; @@ -215,11 +216,32 @@ int pmix_server_init(void) /* ensure the PMIx server uses the proper rendezvous directory */ opal_setenv("PMIX_SERVER_TMPDIR", orte_process_info.proc_session_dir, true, &environ); + /* pass the server the local topology - we do this so the procs won't read the + * topology themselves as this could overwhelm the local + * system on large-scale SMPs */ + OBJ_CONSTRUCT(&info, opal_list_t); + if (NULL != opal_hwloc_topology) { + char *xmlbuffer=NULL; + int len; + opal_value_t *kv; + kv = OBJ_NEW(opal_value_t); + kv->key = strdup(OPAL_PMIX_LOCAL_TOPO); + if (0 != hwloc_topology_export_xmlbuffer(opal_hwloc_topology, &xmlbuffer, &len)) { + OBJ_RELEASE(kv); + OBJ_DESTRUCT(&info); + return ORTE_ERROR; + } + kv->data.string = xmlbuffer; + kv->type = OPAL_STRING; + opal_list_append(&info, &kv->super); + } + /* setup the local server */ - if (ORTE_SUCCESS != (rc = opal_pmix.server_init(&pmix_server))) { + if (ORTE_SUCCESS != (rc = opal_pmix.server_init(&pmix_server, &info))) { ORTE_ERROR_LOG(rc); /* memory cleanup will occur when finalize is called */ } + OPAL_LIST_DESTRUCT(&info); /* if the universal server wasn't specified, then we use * our own HNP for that purpose */ diff --git a/orte/orted/pmix/pmix_server_register_fns.c b/orte/orted/pmix/pmix_server_register_fns.c index e95d74cb42..6ac2c340d7 100644 --- a/orte/orted/pmix/pmix_server_register_fns.c +++ b/orte/orted/pmix/pmix_server_register_fns.c @@ -103,23 +103,6 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata) uid = geteuid(); gid = getegid(); - /* local topology - we do this so the procs won't read the - * topology themselves as this could overwhelm the local - * system on large-scale SMPs */ - if (NULL != opal_hwloc_topology) { - char *xmlbuffer=NULL; - int len; - kv = OBJ_NEW(opal_value_t); - kv->key = strdup(OPAL_PMIX_LOCAL_TOPO); - if (0 != hwloc_topology_export_xmlbuffer(opal_hwloc_topology, &xmlbuffer, &len)) { - OBJ_RELEASE(kv); - return OPAL_ERROR; - } - kv->data.string = xmlbuffer; - kv->type = OPAL_STRING; - opal_list_append(info, &kv->super); - } - /* jobid */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_JOBID); @@ -262,6 +245,7 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata) if (orte_get_attribute(&pptr->attributes, ORTE_PROC_CPU_BITMAP, (void**)&tmp, OPAL_STRING)) { if (NULL != tmp) { opal_argv_append_nosize(&procs, tmp); + free(tmp); } else { opal_argv_append_nosize(&procs, "UNBOUND"); }