1
1

Merge pull request #1801 from rhc54/topic/toolconnect

Add support for PMIx tool connections and queries.
Этот коммит содержится в:
rhc54 2016-06-29 20:48:19 -07:00 коммит произвёл GitHub
родитель f18d6606da 6e434d6785
Коммит 063f8489c7
52 изменённых файлов: 2987 добавлений и 237 удалений

1
.gitignore поставляемый
Просмотреть файл

@ -450,6 +450,7 @@ orte/test/system/pmi_abort
orte/test/system/opal_hwloc
orte/test/system/opal_db
orte/test/system/ulfm
orte/test/system/pmixtool
orte/tools/orte-checkpoint/orte-checkpoint
orte/tools/orte-checkpoint/orte-checkpoint.1

Просмотреть файл

@ -82,7 +82,8 @@ enum {
OPAL_ERR_SERVER_NOT_AVAIL = (OPAL_ERR_BASE - 52),
OPAL_ERR_IN_PROCESS = (OPAL_ERR_BASE - 53),
OPAL_ERR_DEBUGGER_RELEASE = (OPAL_ERR_BASE - 54),
OPAL_ERR_HANDLERS_COMPLETE = (OPAL_ERR_BASE - 55)
OPAL_ERR_HANDLERS_COMPLETE = (OPAL_ERR_BASE - 55),
OPAL_ERR_PARTIAL_SUCCESS = (OPAL_ERR_BASE - 56)
};
#define OPAL_ERR_MAX (OPAL_ERR_BASE - 100)

Просмотреть файл

@ -59,6 +59,24 @@ AC_DEFUN([MCA_opal_pmix_ext20_CONFIG],[
[AC_MSG_RESULT([no])
opal_pmix_ext20_happy=no])
# if we have 2.0, then check further to see if we have
# the PMIx_Query_info function as that is even newer
AS_IF([test "$opal_pmix_ext20_happy" = "yes"],
[AC_MSG_CHECKING([if external component is series 2.0])
OPAL_CHECK_PACKAGE([opal_pmix_ext20],
[pmix.h],
[pmix],
[PMIx_Query_info],
[-lpmix],
[$pmix_ext_install_dir],
[$pmix_ext_install_dir/lib],
[AC_MSG_RESULT([yes])
opal_pmix_query_happy=1],
[AC_MSG_RESULT([no])
opal_pmix_query_happy=0])])
AC_DEFINE_UNQUOTED([HAVE_PMIX_QUERY_FUNCTION], [$opal_pmix_query_happy],
[Whether or not the external library has the PMIx_Query_info function])
AC_SUBST(opal_pmix_ext20_CPPFLAGS)
AC_SUBST(opal_pmix_ext20_LDFLAGS)
AC_SUBST(opal_pmix_ext20_LIBS)

23
opal/mca/pmix/ext20/pmix_ext20.c Обычный файл → Исполняемый файл
Просмотреть файл

@ -350,6 +350,7 @@ static void _event_hdlr(int sd, short args, void *cbdata)
void pmix20_event_hdlr(size_t evhdlr_registration_id,
pmix_status_t status, const pmix_proc_t *source,
pmix_info_t info[], size_t ninfo,
pmix_info_t results[], size_t nresults,
pmix_event_notification_cbfunc_fn_t cbfunc,
void *cbdata)
{
@ -559,6 +560,9 @@ opal_pmix_data_range_t pmix20_convert_range(pmix_data_range_t range) {
void pmix20_value_load(pmix_value_t *v,
opal_value_t *kv)
{
size_t n;
char nspace[PMIX_MAX_NSLEN + 1];
switch(kv->type) {
case OPAL_UNDEF:
v->type = PMIX_UNDEF;
@ -650,6 +654,19 @@ void pmix20_value_load(pmix_value_t *v,
v->data.bo.size = 0;
}
break;
case OPAL_UINT32_ARRAY:
/* an array of 32-bit jobids */
v->type = PMIX_INFO_ARRAY;
v->data.array.size = kv->data.uint32_array.size;
if (0 < v->data.array.size) {
PMIX_INFO_CREATE(v->data.array.array, v->data.array.size);
for (n=0; n < v->data.array.size; n++) {
v->data.array.array[n].value.type = PMIX_STRING;
(void)opal_snprintf_jobid(nspace, PMIX_MAX_NSLEN, kv->data.uint32_array.data[n]);
v->data.array.array[n].value.data.string = strdup(nspace);
}
}
break;
default:
/* silence warnings */
break;
@ -664,7 +681,7 @@ int pmix20_value_unload(opal_value_t *kv,
switch(v->type) {
case PMIX_UNDEF:
rc = OPAL_ERR_UNKNOWN_DATA_TYPE;
kv->type = OPAL_UNDEF;
break;
case PMIX_BOOL:
kv->type = OPAL_BOOL;
@ -1143,6 +1160,10 @@ static void ocadcon(pmix20_opalcaddy_t *p)
p->spwncbfunc = NULL;
p->cbdata = NULL;
p->odmdxfunc = NULL;
#if HAVE_PMIX_QUERY_FUNCTION
p->infocbfunc = NULL;
p->toolcbfunc = NULL;
#endif
p->ocbdata = NULL;
}
static void ocaddes(pmix20_opalcaddy_t *p)

Просмотреть файл

@ -130,6 +130,10 @@ typedef struct {
pmix_modex_cbfunc_t mdxcbfunc;
pmix_lookup_cbfunc_t lkupcbfunc;
pmix_spawn_cbfunc_t spwncbfunc;
#if HAVE_PMIX_QUERY_FUNCTION
pmix_info_cbfunc_t infocbfunc;
pmix_tool_connection_cbfunc_t toolcbfunc;
#endif
void *cbdata;
opal_pmix_release_cbfunc_t odmdxfunc;
void *ocbdata;
@ -293,6 +297,7 @@ OPAL_MODULE_DECLSPEC int pmix20_server_notify_event(int status,
OPAL_MODULE_DECLSPEC void pmix20_event_hdlr(size_t evhdlr_registration_id,
pmix_status_t status, const pmix_proc_t *source,
pmix_info_t info[], size_t ninfo,
pmix_info_t results[], size_t nresults,
pmix_event_notification_cbfunc_fn_t cbfunc,
void *cbdata);
OPAL_MODULE_DECLSPEC pmix_status_t pmix20_convert_opalrc(int rc);

173
opal/mca/pmix/ext20/pmix_ext20_server_north.c Обычный файл → Исполняемый файл
Просмотреть файл

@ -88,6 +88,17 @@
pmix_data_range_t range,
pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata);
#if HAVE_PMIX_QUERY_FUNCTION
static pmix_status_t server_query(pmix_proc_t *proct,
pmix_info_t *info, size_t ninfo,
pmix_info_t *directives, size_t ndirs,
pmix_info_cbfunc_t cbfunc,
void *cbdata);
static void server_tool_connection(pmix_info_t *info, size_t ninfo,
pmix_tool_connection_cbfunc_t cbfunc,
void *cbdata);
#endif
pmix_server_module_t mymodule = {
.client_connected = server_client_connected_fn,
.client_finalized = server_client_finalized_fn,
@ -102,7 +113,11 @@
.disconnect = server_disconnect_fn,
.register_events = server_register_events,
.deregister_events = server_deregister_events,
.notify_event = server_notify_event
.notify_event = server_notify_event,
#if HAVE_PMIX_QUERY_FUNCTION
.query = server_query,
.tool_connected = server_tool_connection
#endif
};
opal_pmix_server_module_t *host_module = NULL;
@ -787,3 +802,159 @@ static pmix_status_t server_notify_event(pmix_status_t code,
{
return PMIX_ERR_NOT_SUPPORTED;
}
#if HAVE_PMIX_QUERY_FUNCTION
static void _info_rel(void *cbdata)
{
pmix20_opcaddy_t *pcaddy = (pmix20_opcaddy_t*)cbdata;
OBJ_RELEASE(pcaddy);
}
static void info_cbfunc(int status,
opal_list_t *info,
void *cbdata,
opal_pmix_release_cbfunc_t release_fn,
void *release_cbdata)
{
pmix20_opalcaddy_t *opalcaddy = (pmix20_opalcaddy_t*)cbdata;
pmix20_opcaddy_t *pcaddy;
opal_value_t *kv;
size_t n;
pcaddy = OBJ_NEW(pmix20_opcaddy_t);
/* convert the status */
pcaddy->status = pmix20_convert_opalrc(status);
/* convert the list to a pmix_info_t array */
if (NULL != info) {
pcaddy->ninfo = opal_list_get_size(info);
if (0 < pcaddy->ninfo) {
PMIX_INFO_CREATE(pcaddy->info, pcaddy->ninfo);
n = 0;
OPAL_LIST_FOREACH(kv, info, opal_value_t) {
(void)strncpy(pcaddy->info[n].key, kv->key, PMIX_MAX_KEYLEN);
pmix20_value_load(&pcaddy->info[n].value, kv);
}
}
}
/* we are done with the incoming data */
if (NULL != release_fn) {
release_fn(release_cbdata);
}
/* provide the answer downward */
if (NULL != opalcaddy->infocbfunc) {
opalcaddy->infocbfunc(pcaddy->status, pcaddy->info, pcaddy->ninfo,
opalcaddy->cbdata, _info_rel, pcaddy);
}
OBJ_RELEASE(opalcaddy);
}
static pmix_status_t server_query(pmix_proc_t *proct,
pmix_info_t *info, size_t ninfo,
pmix_info_t *directives, size_t ndirs,
pmix_info_cbfunc_t cbfunc,
void *cbdata)
{
pmix20_opalcaddy_t *opalcaddy;
opal_process_name_t requestor;
int rc;
size_t n;
opal_value_t *oinfo;
if (NULL == host_module || NULL == host_module->query) {
return PMIX_ERR_NOT_SUPPORTED;
}
/* setup the caddy */
opalcaddy = OBJ_NEW(pmix20_opalcaddy_t);
opalcaddy->infocbfunc = cbfunc;
opalcaddy->cbdata = cbdata;
/* convert the requestor */
if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&requestor.jobid, proct->nspace))) {
OBJ_RELEASE(opalcaddy);
return pmix20_convert_opalrc(rc);
}
requestor.vpid = proct->rank;
/* convert the info */
for (n=0; n < ninfo; n++) {
oinfo = OBJ_NEW(opal_value_t);
opal_list_append(&opalcaddy->info, &oinfo->super);
oinfo->key = strdup(info[n].key);
if (OPAL_SUCCESS != (rc = pmix20_value_unload(oinfo, &info[n].value))) {
OBJ_RELEASE(opalcaddy);
return pmix20_convert_opalrc(rc);
}
}
/* we ignore directives for now */
/* pass the call upwards */
if (OPAL_SUCCESS != (rc = host_module->query(&requestor,
&opalcaddy->info, NULL,
info_cbfunc, opalcaddy))) {
OBJ_RELEASE(opalcaddy);
}
return pmix20_convert_opalrc(rc);
}
static void toolcbfunc(int status,
opal_process_name_t proc,
void *cbdata)
{
pmix20_opalcaddy_t *opalcaddy = (pmix20_opalcaddy_t*)cbdata;
pmix_status_t rc;
pmix_proc_t p;
/* convert the status */
rc = pmix20_convert_opalrc(status);
/* convert the process name */
(void)opal_snprintf_jobid(p.nspace, PMIX_MAX_NSLEN, proc.jobid);
p.rank = proc.vpid;
/* pass it down */
if (NULL != opalcaddy->toolcbfunc) {
opalcaddy->toolcbfunc(rc, &p, opalcaddy->cbdata);
}
OBJ_RELEASE(opalcaddy);
}
static void server_tool_connection(pmix_info_t *info, size_t ninfo,
pmix_tool_connection_cbfunc_t cbfunc,
void *cbdata)
{
pmix20_opalcaddy_t *opalcaddy;
size_t n;
opal_value_t *oinfo;
int rc;
pmix_status_t err;
/* setup the caddy */
opalcaddy = OBJ_NEW(pmix20_opalcaddy_t);
opalcaddy->toolcbfunc = cbfunc;
opalcaddy->cbdata = cbdata;
/* convert the info */
for (n=0; n < ninfo; n++) {
oinfo = OBJ_NEW(opal_value_t);
opal_list_append(&opalcaddy->info, &oinfo->super);
oinfo->key = strdup(info[n].key);
if (OPAL_SUCCESS != (rc = pmix20_value_unload(oinfo, &info[n].value))) {
OBJ_RELEASE(opalcaddy);
err = pmix20_convert_opalrc(rc);
if (NULL != cbfunc) {
cbfunc(err, NULL, cbdata);
}
}
}
/* pass it up */
host_module->tool_connected(&opalcaddy->info, toolcbfunc, opalcaddy);
}
#endif

4
opal/mca/pmix/pmix2x/configure.m4 Обычный файл → Исполняемый файл
Просмотреть файл

@ -41,12 +41,14 @@ AC_DEFUN([MCA_opal_pmix_pmix2x_CONFIG],[
opal_pmix_pmix2x_save_LDFLAGS=$LDFLAGS
opal_pmix_pmix2x_save_LIBS=$LIBS
opal_pmix_pmix2x_args="--enable-embedded-mode --with-pmix-symbol-prefix=opal_pmix_pmix2x_ --disable-visibility --with-libevent-header=\\\"opal/mca/event/$opal_event_base_include\\\" --with-hwloc-header=\\\"$opal_hwloc_base_include\\\""
opal_pmix_pmix2x_args="--without-tests-examples --with-pmix-symbol-prefix=opal_pmix_pmix2x_ --disable-visibility --enable-embedded-libevent --with-libevent-header=\\\"opal/mca/event/$opal_event_base_include\\\" --enable-embedded-hwloc --with-hwloc-header=\\\"$opal_hwloc_base_include\\\""
AS_IF([test "$enable_debug" = "yes"],
[opal_pmix_pmix2x_args="--enable-debug $opal_pmix_pmix2x_args"
CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS -g"],
[opal_pmix_pmix2x_args="--disable-debug $opal_pmix_pmix2x_args"
CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS"])
AS_IF([test "$with_devel_headers" = "yes"], [],
[opal_pmix_pmix2x_args="--enable-embedded-mode $opal_pmix_pmix2x_args"])
CPPFLAGS="-I$OPAL_TOP_SRCDIR -I$OPAL_TOP_BUILDDIR -I$OPAL_TOP_SRCDIR/opal/include -I$OPAL_TOP_BUILDDIR/opal/include $CPPFLAGS"
OPAL_CONFIG_SUBDIR([$opal_pmix_pmix2x_basedir/pmix],

Просмотреть файл

@ -58,6 +58,8 @@ include src/client/Makefile.am
include src/server/Makefile.am
include src/sec/Makefile.am
include src/event/Makefile.am
include src/common/Makefile.am
include src/tool/Makefile.am
if WANT_DSTORE
include src/sm/Makefile.am
@ -74,6 +76,9 @@ else
lib_LTLIBRARIES = libpmix.la
libpmix_la_SOURCES = $(headers) $(sources)
libpmix_la_LDFLAGS = -version-info $(libpmix_so_version)
endif
if PMIX_TESTS_EXAMPLES
SUBDIRS = . test examples
endif

Просмотреть файл

@ -23,14 +23,14 @@ release=0
# The only requirement is that it must be entirely printable ASCII
# characters and have no white space.
greek=a1
greek=
# If repo_rev is empty, then the repository version number will be
# obtained during "make dist" via the "git describe --tags --always"
# command, or with the date (if "git describe" fails) in the form of
# "date<date>".
repo_rev=gitaf7a389
repo_rev=git4940b48
# If tarball_version is not empty, it is used as the version string in
# the tarball filename, regardless of all other versions listed in
@ -44,7 +44,7 @@ tarball_version=
# The date when this release was created
date="Jun 16, 2016"
date="Jun 29, 2016"
# The shared library version of each of PMIx's public libraries.
# These versions are maintained in accordance with the "Library

Просмотреть файл

@ -316,7 +316,7 @@ AC_DEFUN([PMIX_SETUP_CORE],[
stdarg.h sys/stat.h sys/time.h \
sys/types.h sys/un.h sys/uio.h net/uio.h \
sys/wait.h syslog.h \
time.h unistd.h \
time.h unistd.h dirent.h \
crt_externs.h signal.h \
ioLib.h sockLib.h hostLib.h limits.h])
@ -648,6 +648,17 @@ AC_DEFUN([PMIX_DEFINE_ARGS],[
[pmix_mode=standalone
AC_MSG_RESULT([no])])
# Install tests and examples?
AC_MSG_CHECKING([if tests and examples are to be installed])
AC_ARG_WITH([tests-examples],
[AC_HELP_STRING([--with-tests-examples],
[Whether or not to install the tests and example programs.])])
AS_IF([test ! -z "$with_tests_examples" && test "$with_tests_examples" = "no"],
[pmix_tests=no
AC_MSG_RESULT([no])],
[pmix_tests=yes
AC_MSG_RESULT([yes])])
# Change the symbol prefix?
AC_ARG_WITH([pmix-symbol-prefix],
AC_HELP_STRING([--with-pmix-symbol-prefix=STRING],
@ -827,6 +838,7 @@ AC_DEFUN([PMIX_SET_SYMBOL_PREFIX],[
AC_DEFUN([PMIX_DO_AM_CONDITIONALS],[
AS_IF([test "$pmix_did_am_conditionals" != "yes"],[
AM_CONDITIONAL([PMIX_EMBEDDED_MODE], [test "x$pmix_mode" = "xembedded"])
AM_CONDITIONAL([PMIX_TESTS_EXAMPLES], [test "x$pmix_tests" = "xyes"])
AM_CONDITIONAL([PMIX_COMPILE_TIMING], [test "$WANT_TIMING" = "1"])
AM_CONDITIONAL([PMIX_WANT_MUNGE], [test "$pmix_munge_support" = "1"])
AM_CONDITIONAL([PMIX_WANT_SASL], [test "$pmix_sasl_support" = "1"])

6
opal/mca/pmix/pmix2x/pmix/config/pmix_setup_hwloc.m4 Обычный файл → Исполняемый файл
Просмотреть файл

@ -17,8 +17,12 @@ AC_DEFUN([PMIX_HWLOC_CONFIG],[
[AC_HELP_STRING([--with-hwloc-header=HEADER],
[The value that should be included in C files to include hwloc.h])])
AC_ARG_ENABLE([embedded-hwloc],
[AC_HELP_STRING([--enable-embedded-hwloc],
[Enable use of locally embedded hwloc])])
pmix_hwloc_support=0
AS_IF([test "$enable_embedded_mode" = "yes"],
AS_IF([test "$enable_embedded_hwloc" = "yes"],
[_PMIX_HWLOC_EMBEDDED_MODE],
[_PMIX_HWLOC_EXTERNAL])

6
opal/mca/pmix/pmix2x/pmix/config/pmix_setup_libevent.m4 Обычный файл → Исполняемый файл
Просмотреть файл

@ -17,7 +17,11 @@ AC_DEFUN([PMIX_LIBEVENT_CONFIG],[
[AC_HELP_STRING([--with-libevent-header=HEADER],
[The value that should be included in C files to include event.h])])
AS_IF([test "$enable_embedded_mode" = "yes"],
AC_ARG_ENABLE([embedded-libevent],
[AC_HELP_STRING([--enable-embedded-libevent],
[Enable use of locally embedded libevent])])
AS_IF([test "$enable_embedded_libevent" = "yes"],
[_PMIX_LIBEVENT_EMBEDDED_MODE],
[_PMIX_LIBEVENT_EXTERNAL])

Просмотреть файл

@ -21,7 +21,7 @@
AM_CPPFLAGS = -I$(top_builddir)/src -I$(top_builddir)/src/include -I$(top_builddir)/src/api
noinst_PROGRAMS = client dmodex dynamic fault pub server
noinst_PROGRAMS = client dmodex dynamic fault pub tool
client_SOURCES = client.c
client_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS)
@ -43,9 +43,9 @@ pub_SOURCES = pub.c
pub_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS)
pub_LDADD = $(top_builddir)/libpmix.la
server_SOURCES = pub.c
server_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS)
server_LDADD = $(top_builddir)/libpmix.la
tool_SOURCES = tool.c
tool_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS)
tool_LDADD = $(top_builddir)/libpmix.la
distclean-local:
rm -f *.o client dmodex dynamic fault pub server

92
opal/mca/pmix/pmix2x/pmix/examples/tool.c Обычный файл
Просмотреть файл

@ -0,0 +1,92 @@
/*
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2011 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <time.h>
#include <pmix_tool.h>
int main(int argc, char **argv)
{
pmix_status_t rc;
pmix_proc_t myproc;
pmix_info_t *info;
size_t ninfo;
/* init us */
if (PMIX_SUCCESS != (rc = PMIx_tool_init(&myproc, NULL, 0))) {
fprintf(stderr, "PMIx_tool_init failed: %d\n", rc);
exit(rc);
}
fprintf(stderr, "Tool ns %s rank %d: Running\n", myproc.nspace, myproc.rank);
/* query something */
ninfo = 2;
PMIX_INFO_CREATE(info, ninfo);
(void)strncpy(info[0].key, "foobar", PMIX_MAX_KEYLEN);
(void)strncpy(info[1].key, "spastic", PMIX_MAX_KEYLEN);
if (PMIX_SUCCESS != (rc = PMIx_Query_info(info, ninfo))) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Query_info failed: %d\n", myproc.nspace, myproc.rank, rc);
goto done;
}
if (0 != strncmp(info[0].key, "foobar", PMIX_MAX_KEYLEN)) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Query_info key[0] wrong: %s vs foobar\n",
myproc.nspace, myproc.rank, info[0].key);
}
if (0 != strncmp(info[1].key, "spastic", PMIX_MAX_KEYLEN)) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Query_info key[0] wrong: %s vs spastic\n",
myproc.nspace, myproc.rank, info[1].key);
}
if (PMIX_STRING != info[0].value.type) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Query_info key[0] wrong type: %d vs %d\n",
myproc.nspace, myproc.rank, info[0].value.type, PMIX_STRING);
}
if (PMIX_STRING != info[1].value.type) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Query_info key[1] wrong type: %d vs %d\n",
myproc.nspace, myproc.rank, info[1].value.type, PMIX_STRING);
}
if (0 != strcmp(info[0].value.data.string, "0")) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Query_info key[0] wrong value: %s vs 0\n",
myproc.nspace, myproc.rank, info[1].value.data.string);
}
if (0 != strcmp(info[1].value.data.string, "1")) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Query_info key[1] wrong value: %s vs 1\n",
myproc.nspace, myproc.rank, info[1].value.data.string);
}
PMIX_INFO_FREE(info, ninfo);
done:
/* finalize us */
fprintf(stderr, "Client ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank);
if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) {
fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc);
} else {
fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank);
}
fflush(stderr);
return(rc);
}

Просмотреть файл

@ -17,7 +17,8 @@ include_HEADERS = \
include/pmix.h \
include/pmix_server.h \
include/pmi.h \
include/pmi2.h
include/pmi2.h \
include/pmix_tool.h
include_pmixdir = $(includedir)/pmix
include_pmix_HEADERS = \

Просмотреть файл

@ -406,5 +406,23 @@ BEGIN_C_DECLS
* when done with it */
pmix_status_t PMIx_Resolve_nodes(const char *nspace, char **nodelist);
/* Query information about the system in general - can include
* a list of active nspaces, network topology, etc. We assume that
* the host RM will exercise appropriate access control to the
* information. The blocking form of the call will fill the
* returned values into the info array structs. The following
* return status codes are provided:
*
* PMIX_SUCCESS - all data has been returned
* PMIX_ERR_NOT_FOUND - none of the requested data was available
* PMIX_ERR_PARTIAL_SUCCESS - some of the data has been returned
* PMIX_ERR_NOT_SUPPORTED - the host RM does not support this function
*/
pmix_status_t PMIx_Query_info(pmix_info_t info[], size_t ninfo);
pmix_status_t PMIx_Query_info_nb(pmix_info_t info[], size_t ninfo,
pmix_info_t *directives, size_t ndirectives,
pmix_info_cbfunc_t cbfunc, void *cbdata);
END_C_DECLS
#endif

Просмотреть файл

@ -41,6 +41,8 @@
*
* Additional copyrights may follow
*
* Copyright (c) 2016 IBM Corporation. All rights reserved.
*
* $HEADER$
*/
@ -93,6 +95,9 @@ BEGIN_C_DECLS
/* initialization attributes */
#define PMIX_EVENT_BASE "pmix.evbase" // (struct event_base *) pointer to libevent event_base to use in place
// of the internal progress thread
#define PMIX_SERVER_TOOL_SUPPORT "pmix.srvr.tool" // (bool) The host RM wants to declare itself as willing to
// accept tool connection requests
#define PMIX_SERVER_PIDINFO "pmix.srvr.pidinfo" // (uint32_t) pid of the target server
/* identification attributes */
#define PMIX_USERID "pmix.euid" // (uint32_t) effective user id
@ -147,6 +152,7 @@ BEGIN_C_DECLS
/* size info */
#define PMIX_UNIV_SIZE "pmix.univ.size" // (uint32_t) #procs in this nspace
#define PMIX_JOB_SIZE "pmix.job.size" // (uint32_t) #procs in this job
#define PMIX_JOB_NUM_APPS "pmix.job.napps" // (uint32_t) #apps in this job
#define PMIX_APP_SIZE "pmix.app.size" // (uint32_t) #procs in this application
#define PMIX_LOCAL_SIZE "pmix.local.size" // (uint32_t) #procs in this job on this node
#define PMIX_NODE_SIZE "pmix.node.size" // (uint32_t) #procs across all jobs on this node
@ -217,6 +223,15 @@ BEGIN_C_DECLS
#define PMIX_PRELOAD_FILES "pmix.preloadfiles" // (char*) comma-delimited list of files to pre-position
#define PMIX_NON_PMI "pmix.nonpmi" // (bool) spawned procs will not call PMIx_Init
#define PMIX_STDIN_TGT "pmix.stdin" // (uint32_t) spawned proc rank that is to receive stdin
#define PMIX_FWD_STDIN "pmix.fwd.stdin" // (bool) forward my stdin to the designated proc
#define PMIX_FWD_STDOUT "pmix.fwd.stdout" // (bool) forward stdout from spawned procs to me
#define PMIX_FWD_STDERR "pmix.fwd.stderr" // (bool) forward stderr from spawned procs to me
/* query attributes */
#define PMIX_QUERY_NAMESPACES "pmix.qry.ns" // (char*) request a comma-delimited list of active nspaces
#define PMIX_QUERY_JOB_STATUS "pmix.qry.jst" // (pmix_status_t) status of a specified currently executing job
#define PMIX_QUERY_QUEUE_LIST "pmix.qry.qlst" // (char*) request a comma-delimited list of scheduler queues
#define PMIX_QUERY_QUEUE_STATUS "pmix.qry.qst" // (TBD) status of a specified scheduler queue
/**** PMIX ERROR CONSTANTS ****/
/* PMIx errors are always negative, with 0 reserved for success */
@ -265,6 +280,8 @@ typedef int pmix_status_t;
#define PMIX_EVENT_PARTIAL_ACTION_TAKEN (PMIX_ERR_BASE - 31)
#define PMIX_EVENT_ACTION_DEFERRED (PMIX_ERR_BASE - 32)
#define PMIX_EVENT_ACTION_COMPLETE (PMIX_ERR_BASE - 33)
/* used by the query system */
#define PMIX_QUERY_PARTIAL_SUCCESS (PMIX_ERR_BASE - 34)
/* define a starting point for PMIx internal error codes
@ -278,7 +295,6 @@ typedef int pmix_status_t;
* specific value as the value of the constant may change */
#define PMIX_EXTERNAL_ERR_BASE -2000
/**** PMIX DATA TYPES ****/
typedef enum {
PMIX_UNDEF = 0,
@ -869,6 +885,16 @@ typedef void (*pmix_evhdlr_reg_cbfunc_t)(pmix_status_t status,
typedef void (*pmix_value_cbfunc_t)(pmix_status_t status,
pmix_value_t *kv, void *cbdata);
/* define a callback function for calls to PMIx_Query. The status
* indicates if requested data was found or not - an array of
* pmix_info_t will contain the key/value pairs. */
typedef void (*pmix_info_cbfunc_t)(pmix_status_t status,
pmix_info_t *info, size_t ninfo,
void *cbdata,
pmix_release_cbfunc_t release_fn,
void *release_cbdata);
/**** COMMON SUPPORT FUNCTIONS ****/
/* Register an event handler to report events. Three types of events
* can be reported:

Просмотреть файл

@ -269,7 +269,6 @@ typedef pmix_status_t (*pmix_server_notify_event_fn_t)(pmix_status_t code,
pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata);
/* Callback function for incoming connection requests from
* local clients */
typedef void (*pmix_connection_cbfunc_t)(int incoming_sd, void *cbdata);
@ -288,6 +287,40 @@ typedef pmix_status_t (*pmix_server_listener_fn_t)(int listening_sd,
pmix_connection_cbfunc_t cbfunc,
void *cbdata);
/* Query information from the resource manager. The query will include
* the nspace/rank of the proc that is requesting the info, an
* array of pmix_info_t describing the request, an optional array
* of pmix_info_t directives, and a callback function/data for the return. */
typedef pmix_status_t (*pmix_server_query_fn_t)(pmix_proc_t *proct,
pmix_info_t *info, size_t ninfo,
pmix_info_t *directives, size_t ndirs,
pmix_info_cbfunc_t cbfunc,
void *cbdata);
/* Callback function for incoming tool connections - the host
* RM shall provide an nspace/rank for the connecting tool. We
* assume that a rank=0 will be the normal assignment, but allow
* for the future possibility of a parallel set of tools
* connecting, and thus each proc requiring a rank*/
typedef void (*pmix_tool_connection_cbfunc_t)(pmix_status_t status,
pmix_proc_t *proc, void *cbdata);
/* Register that a tool has connected to the server, and request
* that the tool be assigned an nspace/rank for further interactions.
* The optional pmix_info_t array can be used to pass qualifiers for
* the connection request:
*
* (a) PMIX_USERID - effective userid of the tool
* (b) PMIX_GRPID - effective groupid of the tool
* (c) PMIX_FWD_STDOUT - forward any stdout to this tool
* (d) PMIX_FWD_STDERR - forward any stderr to this tool
* (e) PMIX_FWD_STDIN - forward stdin from this tool to any
* processes spawned on its behalf
*/
typedef void (*pmix_server_tool_connection_fn_t)(pmix_info_t *info, size_t ninfo,
pmix_tool_connection_cbfunc_t cbfunc,
void *cbdata);
typedef struct pmix_server_module_2_0_0_t {
pmix_server_client_connected_fn_t client_connected;
pmix_server_client_finalized_fn_t client_finalized;
@ -304,6 +337,8 @@ typedef struct pmix_server_module_2_0_0_t {
pmix_server_deregister_events_fn_t deregister_events;
pmix_server_notify_event_fn_t notify_event;
pmix_server_listener_fn_t listener;
pmix_server_query_fn_t query;
pmix_server_tool_connection_fn_t tool_connected;
} pmix_server_module_t;
/**** SERVER SUPPORT INIT/FINALIZE FUNCTIONS ****/
@ -314,7 +349,10 @@ typedef struct pmix_server_module_2_0_0_t {
* array of pmix_info_t structs is used to pass
* additional info that may be required by the server
* when initializing - e.g., a user/group ID to set
* on the rendezvous file for the Unix Domain Socket */
* on the rendezvous file for the Unix Domain Socket. It
* also may include the PMIX_SERVER_TOOL_SUPPORT key, thereby
* indicating that the daemon is willing to accept connection
* requests from tools */
pmix_status_t PMIx_server_init(pmix_server_module_t *module,
pmix_info_t info[], size_t ninfo);

109
opal/mca/pmix/pmix2x/pmix/include/pmix_tool.h Обычный файл
Просмотреть файл

@ -0,0 +1,109 @@
/*
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
* Copyright (c) 2015 Artem Y. Polyakov <artpol84@gmail.com>.
* All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer listed
* in this license in the documentation and/or other materials
* provided with the distribution.
*
* - Neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* The copyright holders provide no reassurances that the source code
* provided does not infringe any patent, copyright, or any other
* intellectual property rights of third parties. The copyright holders
* disclaim any liability to any recipient for claims brought against
* recipient by any third party for infringement of that parties
* intellectual property rights.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $HEADER$
*
* PMIx provides a "function-shipping" approach to support for
* implementing the server-side of the protocol. This method allows
* resource managers to implement the server without being burdened
* with PMIx internal details. Accordingly, each PMIx API is mirrored
* here in a function call to be provided by the server. When a
* request is received from the client, the corresponding server function
* will be called with the information.
*
* Any functions not supported by the RM can be indicated by a NULL for
* the function pointer. Client calls to such functions will have a
* "not supported" error returned.
*/
#ifndef PMIx_TOOL_API_H
#define PMIx_TOOL_API_H
#include <pmix/autogen/config.h>
/* Symbol transforms */
#include <pmix/rename.h>
/* Structure and constant definitions */
#include <pmix/pmix_common.h>
/* provide access to the rest of the client functions */
#include <pmix.h>
BEGIN_C_DECLS
/**** TOOL INIT/FINALIZE FUNCTIONS ****/
/* Initialize the PMIx tool, returning the process identifier assigned
* to this tool in the provided pmix_proc_t struct.
*
* When called the PMIx tool library will check for the required connection
* information of the local PMIx server and will establish the connection.
* If the information is not found, or the server connection fails, then
* an appropriate error constant will be returned.
*
* If successful, the function will return PMIX_SUCCESS and will fill the
* provided structure with the server-assigned namespace and rank of the tool.
*
* Note that the PMIx tool library is referenced counted, and so multiple
* calls to PMIx_tool_init are allowed. Thus, one way to obtain the namespace and
* rank of the process is to simply call PMIx_tool_init with a non-NULL parameter.
*
* The info array is used to pass user requests pertaining to the init
* and subsequent operations. Passing a _NULL_ value for the array pointer
* is supported if no directives are desired.
*/
pmix_status_t PMIx_tool_init(pmix_proc_t *proc,
pmix_info_t info[], size_t ninfo);
/* Finalize the PMIx tool library, closing the connection to the local server.
* An error code will be returned if, for some reason, the connection
* cannot be closed.
*
* The info array is used to pass user requests regarding the finalize
* operation. */
pmix_status_t PMIx_tool_finalize(void);
END_C_DECLS
#endif

Просмотреть файл

@ -179,7 +179,15 @@ pmix_status_t pmix_bfrop_copy_payload(pmix_buffer_t *dest, pmix_buffer_t *src)
bool pmix_value_cmp(pmix_value_t *p, pmix_value_t *p1)
{
bool rc = false;
if (p->type != p1->type) {
return rc;
}
switch (p->type) {
case PMIX_UNDEF:
rc = true;
break;
case PMIX_BOOL:
rc = (p->data.flag == p1->data.flag);
break;
@ -238,6 +246,8 @@ pmix_status_t pmix_value_xfer(pmix_value_t *p, pmix_value_t *src)
/* copy the right field */
p->type = src->type;
switch (src->type) {
case PMIX_UNDEF:
break;
case PMIX_BOOL:
p->data.flag = src->data.flag;
break;

Просмотреть файл

@ -437,6 +437,8 @@ static pmix_status_t pack_val(pmix_buffer_t *buffer,
pmix_status_t ret;
switch (p->type) {
case PMIX_UNDEF:
break;
case PMIX_BOOL:
if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.flag, 1, PMIX_BOOL))) {
return ret;

Просмотреть файл

@ -527,6 +527,8 @@ pmix_status_t pmix_bfrop_unpack_status(pmix_buffer_t *buffer, void *dest,
m = 1;
switch (val->type) {
case PMIX_UNDEF:
break;
case PMIX_BOOL:
if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, &val->data.flag, &m, PMIX_BOOL))) {
return ret;

Просмотреть файл

@ -1026,41 +1026,41 @@ static pmix_status_t send_connect_ack(int sd)
}
/* receive the status reply */
rc = pmix_usock_recv_blocking(sd, (char*)&reply, sizeof(int));
if (PMIX_SUCCESS != rc) {
PMIX_ERROR_LOG(rc);
return rc;
}
/* see if they want us to do the handshake */
if (PMIX_ERR_READY_FOR_HANDSHAKE == reply) {
if (NULL == pmix_sec.client_handshake) {
return PMIX_ERR_HANDSHAKE_FAILED;
}
if (PMIX_SUCCESS != (rc = pmix_sec.client_handshake(sd))) {
rc = pmix_usock_recv_blocking(sd, (char*)&reply, sizeof(int));
if (PMIX_SUCCESS != rc) {
PMIX_ERROR_LOG(rc);
return rc;
}
} else if (PMIX_SUCCESS != reply) {
return reply;
}
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: RECV CONNECT CONFIRMATION");
/* see if they want us to do the handshake */
if (PMIX_ERR_READY_FOR_HANDSHAKE == reply) {
if (NULL == pmix_sec.client_handshake) {
return PMIX_ERR_HANDSHAKE_FAILED;
}
if (PMIX_SUCCESS != (rc = pmix_sec.client_handshake(sd))) {
return rc;
}
} else if (PMIX_SUCCESS != reply) {
return reply;
}
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: RECV CONNECT CONFIRMATION");
/* receive our index into the server's client array */
rc = pmix_usock_recv_blocking(sd, (char*)&pmix_globals.pindex, sizeof(int));
if (PMIX_SUCCESS != rc) {
PMIX_ERROR_LOG(rc);
return rc;
}
if (sockopt) {
/* return the socket to normal */
if (0 != setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &save, sz)) {
return PMIX_ERR_UNREACH;
rc = pmix_usock_recv_blocking(sd, (char*)&pmix_globals.pindex, sizeof(int));
if (PMIX_SUCCESS != rc) {
PMIX_ERROR_LOG(rc);
return rc;
}
if (sockopt) {
/* return the socket to normal */
if (0 != setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &save, sz)) {
return PMIX_ERR_UNREACH;
}
}
}
return PMIX_SUCCESS;
return PMIX_SUCCESS;
}
void pmix_client_process_nspace_blob(const char *nspace, pmix_buffer_t *bptr)

Просмотреть файл

@ -0,0 +1,11 @@
#
# Copyright (c) 2015-2016 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
sources += \
src/common/pmix_query.c

Просмотреть файл

@ -0,0 +1,251 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2016 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2016 IBM Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include <src/include/pmix_config.h>
#include <src/include/types.h>
#include <pmix/autogen/pmix_stdint.h>
#include <src/include/pmix_socket_errno.h>
#include <pmix.h>
#include <pmix/pmix_common.h>
#include <pmix_server.h>
#include "src/util/argv.h"
#include "src/util/error.h"
#include "src/util/output.h"
#include "src/buffer_ops/buffer_ops.h"
#include "src/usock/usock.h"
#include "src/client/pmix_client_ops.h"
#include "src/server/pmix_server_ops.h"
#include "src/include/pmix_globals.h"
static void wait_cbfunc(pmix_status_t status,
pmix_info_t *info, size_t ninfo,
void *cbdata,
pmix_release_cbfunc_t release_fn,
void *release_cbdata);
PMIX_EXPORT pmix_status_t PMIx_Query_info(pmix_info_t *info, size_t ninfo)
{
pmix_query_caddy_t *cd;
pmix_status_t rc, ret;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:query blocking version");
if (pmix_globals.init_cntr <= 0) {
return PMIX_ERR_INIT;
}
/* prep the caddy */
cd = PMIX_NEW(pmix_query_caddy_t);
cd->cbfunc = wait_cbfunc;
cd->cbdata = cd;
/* Use the non-blocking form as our engine */
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:query requesting %d values",
(int)ninfo);
cd->info = info;
cd->ninfo = ninfo;
cd->active = true;
if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(cd->info, cd->ninfo, NULL, 0, wait_cbfunc, cd))) {
PMIX_RELEASE(cd);
return rc;
}
PMIX_WAIT_FOR_COMPLETION(cd->active);
if (PMIX_ERR_NOT_FOUND == cd->status) {
PMIX_RELEASE(cd);
return PMIX_ERR_NOT_FOUND;
}
/* the RM always returns the data in the info array*/
ret = cd->status;
PMIX_RELEASE(cd);
return ret;
}
static void relcbfunc(void *cbdata)
{
pmix_query_caddy_t *cd = (pmix_query_caddy_t*)cbdata;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:query release callback");
if (NULL != cd->info) {
PMIX_INFO_FREE(cd->info, cd->ninfo);
}
PMIX_RELEASE(cd);
}
static void query_cbfunc(struct pmix_peer_t *peer,
pmix_usock_hdr_t *hdr,
pmix_buffer_t *buf, void *cbdata)
{
pmix_query_caddy_t *cd = (pmix_query_caddy_t*)cbdata;
pmix_status_t rc;
pmix_query_caddy_t *results;
int cnt;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:query cback from server");
results = PMIX_NEW(pmix_query_caddy_t);
/* unpack the status */
cnt = 1;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &results->status, &cnt, PMIX_STATUS))) {
PMIX_ERROR_LOG(rc);
goto complete;
}
if (PMIX_SUCCESS != results->status) {
goto complete;
}
/* unpack any returned data */
cnt = 1;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &results->ninfo, &cnt, PMIX_SIZE))) {
PMIX_ERROR_LOG(rc);
goto complete;
}
if (0 < results->ninfo) {
PMIX_INFO_CREATE(results->info, results->ninfo);
cnt = results->ninfo;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, results->info, &cnt, PMIX_INFO))) {
PMIX_ERROR_LOG(rc);
goto complete;
}
}
complete:
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:query cback from server releasing");
/* release the caller */
if (NULL != cd->cbfunc) {
cd->cbfunc(results->status, results->info, results->ninfo, cd->cbdata, relcbfunc, results);
}
PMIX_RELEASE(cd);
}
PMIX_EXPORT pmix_status_t PMIx_Query_info_nb(pmix_info_t info[], size_t ninfo,
pmix_info_t *directives, size_t ndirectives,
pmix_info_cbfunc_t cbfunc, void *cbdata)
{
pmix_query_caddy_t *cd;
pmix_cmd_t cmd = PMIX_QUERY_CMD;
pmix_buffer_t *msg;
pmix_status_t rc;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:query non-blocking");
if (pmix_globals.init_cntr <= 0) {
return PMIX_ERR_INIT;
}
if (0 == ninfo || NULL == info) {
return PMIX_ERR_BAD_PARAM;
}
/* if we are the server, then we just issue the query and
* return the response */
if (pmix_globals.server) {
if (NULL == pmix_host_server.query) {
/* nothing we can do */
return PMIX_ERR_NOT_SUPPORTED;
}
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:query handed to RM");
pmix_host_server.query(&pmix_globals.myid,
info, ninfo,
directives, ndirectives,
cbfunc, cbdata);
} else {
/* if we are a client, then relay this request to the server */
cd = PMIX_NEW(pmix_query_caddy_t);
cd->cbfunc = cbfunc;
cd->cbdata = cbdata;
msg = PMIX_NEW(pmix_buffer_t);
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE(msg);
PMIX_RELEASE(cd);
return rc;
}
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &ninfo, 1, PMIX_SIZE))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE(msg);
PMIX_RELEASE(cd);
return rc;
}
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, info, ninfo, PMIX_INFO))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE(msg);
PMIX_RELEASE(cd);
return rc;
}
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &ndirectives, 1, PMIX_SIZE))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE(msg);
PMIX_RELEASE(cd);
return rc;
}
if (0 < ndirectives) {
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, directives, ndirectives, PMIX_INFO))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE(msg);
PMIX_RELEASE(cd);
return rc;
}
}
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:query sending to server");
PMIX_ACTIVATE_SEND_RECV(&pmix_client_globals.myserver, msg, query_cbfunc, cd);
}
return PMIX_SUCCESS;
}
static void wait_cbfunc(pmix_status_t status,
pmix_info_t *results, size_t nresults,
void *cbdata,
pmix_release_cbfunc_t release_fn,
void *release_cbdata)
{
pmix_query_caddy_t *cd = (pmix_query_caddy_t*)cbdata;
size_t n, m;
pmix_status_t rc;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:query wait callback");
cd->status = status;
/* transfer the results across to our query - while these _should_
* be in the same order as our query, there is no _guarantee_ that
* this is true, so we have to do a search */
for (n=0; n < nresults; n++) {
for (m=0; m < cd->ninfo; m++) {
if (0 == strncmp(results[n].key, cd->info[m].key, PMIX_MAX_KEYLEN)) {
if (PMIX_SUCCESS != (rc = pmix_value_xfer(&cd->info[m].value, &results[n].value))) {
cd->status = rc;
goto complete;
}
break;
}
}
}
complete:
cd->relcbfunc = release_fn;
cd->cbdata = release_cbdata;
cd->active = false;
}

Просмотреть файл

@ -191,3 +191,17 @@ PMIX_CLASS_INSTANCE(pmix_shift_caddy_t,
PMIX_CLASS_INSTANCE(pmix_info_caddy_t,
pmix_list_item_t,
NULL, NULL);
static void qcon(pmix_query_caddy_t *p)
{
p->info = NULL;
p->ninfo = 0;
p->directives = NULL;
p->ndirs = 0;
p->cbfunc = NULL;
p->cbdata = NULL;
p->relcbfunc = NULL;
}
PMIX_CLASS_INSTANCE(pmix_query_caddy_t,
pmix_object_t,
qcon, NULL);

Просмотреть файл

@ -66,6 +66,7 @@ typedef enum {
PMIX_NOTIFY_CMD,
PMIX_REGEVENTS_CMD,
PMIX_DEREGEVENTS_CMD,
PMIX_QUERY_CMD
} pmix_cmd_t;
/* define a set of flags to direct collection
@ -214,6 +215,22 @@ typedef struct {
} pmix_server_caddy_t;
PMIX_CLASS_DECLARATION(pmix_server_caddy_t);
/* caddy for query requests */
typedef struct {
pmix_object_t super;
pmix_event_t ev;
volatile bool active;
pmix_status_t status;
pmix_info_t *info;
size_t ninfo;
pmix_info_t *directives;
size_t ndirs;
pmix_info_cbfunc_t cbfunc;
pmix_release_cbfunc_t relcbfunc;
void *cbdata;
} pmix_query_caddy_t;
PMIX_CLASS_DECLARATION(pmix_query_caddy_t);
/* define a tracker for collective operations */
typedef struct {
pmix_list_item_t super;

Просмотреть файл

@ -66,6 +66,7 @@ pmix_server_globals_t pmix_server_globals = {{{0}}};
// local variables
static char *security_mode = NULL;
static pid_t mypid;
// local functions for connection support
static void server_message_handler(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr,
@ -116,7 +117,6 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module)
{
int debug_level;
char *tdir, *evar;
pid_t pid;
char * pmix_pid;
pmix_listener_t *listener;
@ -139,10 +139,10 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module)
(void)strncpy(pmix_globals.myid.nspace, evar, PMIX_MAX_NSLEN);
}
/* look for our rank, if one was given */
pid = getpid();
mypid = getpid();
if (NULL == (evar = getenv("PMIX_SERVER_RANK"))) {
/* use our pid */
pmix_globals.myid.rank = pid;
pmix_globals.myid.rank = mypid;
} else {
pmix_globals.myid.rank = strtol(evar, NULL, 10);
}
@ -193,7 +193,7 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module)
/* for now, just setup the v1.1 series rendezvous point
* we use the pid to reduce collisions */
if (0 > asprintf(&pmix_pid, "%s/pmix-%d", tdir, pid)) {
if (0 > asprintf(&pmix_pid, "%s/pmix-%d", tdir, mypid)) {
return PMIX_ERR_NOMEM;
}
if ((strlen(pmix_pid) + 1) > sizeof(listener->address.sun_path)-1) {
@ -209,7 +209,7 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module)
return PMIX_ERR_NOMEM;
}
listener->varname = strdup("PMIX_SERVER_URI");
listener->protocol_type = 1;
listener->protocol = PMIX_PROTOCOL_V1;
pmix_list_append(&pmix_server_globals.listeners, &listener->super);
free(pmix_pid);
@ -224,9 +224,14 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module,
{
pmix_usock_posted_recv_t *req;
pmix_status_t rc;
size_t n;
size_t n, m;
pmix_kval_t kv;
pmix_listener_t *lt;
int myhostnamelen = 10;
char myhostname[myhostnamelen];
char *pmix_pid, *tdir;
char **protected = NULL;
bool protect;
++pmix_globals.init_cntr;
if (1 < pmix_globals.init_cntr) {
@ -255,7 +260,8 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module,
}
/* check the info keys for a directive about the uid/gid
* to be set for the rendezvous file */
* to be set for the rendezvous file, and for indication
* of willingness to support tool connections */
if (NULL != info) {
for (n=0; n < ninfo; n++) {
if (0 == strcmp(info[n].key, PMIX_USERID)) {
@ -264,17 +270,52 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module,
lt->owner = info[n].value.data.uint32;
lt->owner_given = true;
}
/* push this onto our protected list of keys not
* to be passed to the clients */
pmix_argv_append_nosize(&protected, PMIX_USERID);
} else if (0 == strcmp(info[n].key, PMIX_GRPID)) {
/* the grpid is in the uint32_t storage */
PMIX_LIST_FOREACH(lt, &pmix_server_globals.listeners, pmix_listener_t) {
lt->group = info[n].value.data.uint32;
lt->group_given = true;
}
/* push this onto our protected list of keys not
* to be passed to the clients */
pmix_argv_append_nosize(&protected, PMIX_GRPID);
} else if (0 == strcmp(info[n].key, PMIX_SOCKET_MODE)) {
/* socket mode is in the uint32_t storage */
PMIX_LIST_FOREACH(lt, &pmix_server_globals.listeners, pmix_listener_t) {
lt->mode = info[n].value.data.uint32;
}
} else if (0 == strcmp(info[n].key, PMIX_SERVER_TOOL_SUPPORT)) {
pmix_listener_t *tl = PMIX_NEW(pmix_listener_t);
tl -> address.sun_family = AF_UNIX;
tl->protocol = PMIX_PROTOCOL_TOOL;
/* Get up to 10 chars of hostname.*/
gethostname(myhostname, myhostnamelen);
/* need to put this in the global tmpdir as opposed to
* where the server tmpdir might be */
if (NULL == (tdir = getenv("TMPDIR"))) {
if (NULL == (tdir = getenv("TEMP"))) {
if (NULL == (tdir = getenv("TMP"))) {
tdir = "/tmp";
}
}
}
if (0 > asprintf(&pmix_pid, "%s/pmix.%s.tool.%d", tdir, myhostname, mypid)) {
return PMIX_ERR_NOMEM;
}
if ((strlen(pmix_pid) + 1) > sizeof(tl->address.sun_path)-1) {
free(pmix_pid);
return PMIX_ERR_INVALID_LENGTH;
}
snprintf(tl->address.sun_path, sizeof(tl->address.sun_path) - 1, "%s", pmix_pid);
free(pmix_pid);
pmix_server_globals.tool_connections_allowed = true;
pmix_list_append(&pmix_server_globals.listeners, &tl->super);
/* push this onto our protected list of keys not
* to be passed to the clients */
pmix_argv_append_nosize(&protected, PMIX_SERVER_TOOL_SUPPORT);
}
}
}
@ -299,18 +340,29 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module,
if (NULL != info) {
PMIX_CONSTRUCT(&kv, pmix_kval_t);
for (n=0; n < ninfo; n++) {
if (0 == strcmp(info[n].key, PMIX_USERID))
continue;
if (0 == strcmp(info[n].key, PMIX_GRPID))
continue;
if (0 == strcmp(info[n].key, PMIX_SOCKET_MODE))
/* check the list of protected keys */
protect = false;
if (NULL != protected) {
for (m=0; NULL != protected[m]; m++) {
if (0 == strcmp(info[n].key, protected[m])) {
protect = true;
break;
}
}
}
if (protect) {
continue;
}
/* store and pass along to every client */
kv.key = info[n].key;
kv.value = &info[n].value;
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(&pmix_server_globals.gdata, &kv, 1, PMIX_KVAL))) {
PMIX_ERROR_LOG(rc);
/* protect the incoming data */
kv.key = NULL;
kv.value = NULL;
PMIX_DESTRUCT(&kv);
PMIx_server_finalize();
return rc;
}
}
@ -357,6 +409,7 @@ static void cleanup_server_state(void)
PMIX_EXPORT pmix_status_t PMIx_server_finalize(void)
{
if (1 != pmix_globals.init_cntr) {
--pmix_globals.init_cntr;
return PMIX_SUCCESS;
@ -1954,6 +2007,44 @@ static void notifyerror_cbfunc (pmix_status_t status, void *cbdata)
PMIX_RELEASE(cd);
}
static void query_cbfunc(pmix_status_t status,
pmix_info_t *info, size_t ninfo,
void *cbdata,
pmix_release_cbfunc_t release_fn,
void *release_cbdata)
{
pmix_query_caddy_t *qcd = (pmix_query_caddy_t*)cbdata;
pmix_server_caddy_t *cd = (pmix_server_caddy_t*)qcd->cbdata;
pmix_buffer_t *reply = PMIX_NEW(pmix_buffer_t);
pmix_status_t rc;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:query callback with status %d", status);
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &status, 1, PMIX_STATUS))) {
PMIX_ERROR_LOG(rc);
goto complete;
}
/* pack the returned data */
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &ninfo, 1, PMIX_SIZE))) {
PMIX_ERROR_LOG(rc);
goto complete;
}
if (0 < ninfo) {
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, info, ninfo, PMIX_INFO))) {
PMIX_ERROR_LOG(rc);
}
}
complete:
// send reply
PMIX_SERVER_QUEUE_REPLY(cd->peer, cd->hdr.tag, reply);
// cleanup
PMIX_INFO_FREE(qcd->info, qcd->ninfo);
PMIX_RELEASE(qcd);
PMIX_RELEASE(cd);
}
/* the switchyard is the primary message handling function. It's purpose
* is to take incoming commands (packed into a buffer), unpack them,
* and then call the corresponding host server's function to execute
@ -2124,6 +2215,7 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag,
}
return rc;
}
if (PMIX_DEREGEVENTS_CMD == cmd) {
pmix_server_deregister_events(peer, buf);
return PMIX_SUCCESS;
@ -2134,6 +2226,13 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag,
rc = pmix_server_event_recvd_from_client(peer, buf, notifyerror_cbfunc, cd);
return rc;
}
if (PMIX_QUERY_CMD == cmd) {
PMIX_PEER_CADDY(cd, peer, tag);
rc = pmix_server_query(peer, buf, query_cbfunc, cd);
return rc;
}
return PMIX_ERR_NOT_SUPPORTED;
}
@ -2142,7 +2241,7 @@ static void server_message_handler(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr
{
pmix_peer_t *peer = (pmix_peer_t*)pr;
pmix_buffer_t *reply;
int rc;
pmix_status_t rc;
pmix_output_verbose(2, pmix_globals.debug_output,
"SWITCHYARD for %s:%d:%d",
@ -2153,7 +2252,7 @@ static void server_message_handler(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr
/* send the return, if there was an error returned */
if (PMIX_SUCCESS != rc) {
reply = PMIX_NEW(pmix_buffer_t);
pmix_bfrop.pack(reply, &rc, 1, PMIX_INT);
pmix_bfrop.pack(reply, &rc, 1, PMIX_STATUS);
PMIX_SERVER_QUEUE_REPLY(peer, hdr->tag, reply);
}
}

Просмотреть файл

@ -66,6 +66,7 @@
static void* listen_thread(void *obj);
static void listener_cb(int incoming_sd, void *cbdata);
static void connection_handler(int incoming_sd, short flags, void* cbdata);
static void tool_handler(int incoming_sd, short flags, void* cbdata);
static char *myversion = NULL;
static pthread_t engine;
@ -87,6 +88,7 @@ pmix_status_t pmix_start_listening(pmix_listener_t *lt)
return PMIX_ERROR;
}
addrlen = sizeof(struct sockaddr_un);
if (bind(lt->socket, (struct sockaddr*)address, addrlen) < 0) {
printf("%s:%d bind() failed\n", __FILE__, __LINE__);
@ -163,10 +165,11 @@ pmix_status_t pmix_start_listening(pmix_listener_t *lt)
return PMIX_ERR_OUT_OF_RESOURCE;
}
/* fork off the listener thread */
pmix_server_globals.listen_thread_active = true;
if (0 > pthread_create(&engine, NULL, listen_thread, NULL)) {
pmix_server_globals.listen_thread_active = false;
return PMIX_ERROR;
} else {
pmix_server_globals.listen_thread_active = true;
}
}
@ -207,7 +210,6 @@ void pmix_stop_listening(void)
CLOSE_THE_SOCKET(lt->socket);
lt->socket = -1;
}
return;
}
static void* listen_thread(void *obj)
@ -222,6 +224,7 @@ static void* listen_thread(void *obj)
pmix_output_verbose(8, pmix_globals.debug_output,
"listen_thread: active");
while (pmix_server_globals.listen_thread_active) {
FD_ZERO(&readfds);
max = -1;
@ -278,9 +281,14 @@ static void* listen_thread(void *obj)
* OS might start rejecting connections due to timeout.
*/
pending_connection = PMIX_NEW(pmix_pending_connection_t);
pending_connection->protocol = lt->protocol_type;
event_assign(&pending_connection->ev, pmix_globals.evbase, -1,
EV_WRITE, connection_handler, pending_connection);
pending_connection->protocol = lt->protocol;
if (PMIX_PROTOCOL_TOOL == lt->protocol) {
event_assign(&pending_connection->ev, pmix_globals.evbase, -1,
EV_WRITE, tool_handler, pending_connection);
} else {
event_assign(&pending_connection->ev, pmix_globals.evbase, -1,
EV_WRITE, connection_handler, pending_connection);
}
pending_connection->sd = accept(lt->socket,
(struct sockaddr*)&(pending_connection->addr),
&addrlen);
@ -325,37 +333,153 @@ static void listener_cb(int incoming_sd, void *cbdata)
incoming_sd);
pending_connection = PMIX_NEW(pmix_pending_connection_t);
pending_connection->sd = incoming_sd;
pending_connection->protocol = lt->protocol_type;
pending_connection->protocol = lt->protocol;
event_assign(&pending_connection->ev, pmix_globals.evbase, -1,
EV_WRITE, connection_handler, pending_connection);
event_active(&pending_connection->ev, EV_WRITE, 1);
}
/* process the callback with tool connection info */
static void process_cbfunc(int sd, short args, void *cbdata)
{
pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata;
pmix_pending_connection_t *pnd = (pmix_pending_connection_t*)cd->cbdata;
pmix_nspace_t *nptr;
pmix_rank_info_t *info;
int rc;
/* send this status as well so they don't hang */
if (PMIX_SUCCESS != (rc = pmix_usock_send_blocking(pnd->sd, (char*)&cd->status, sizeof(pmix_status_t)))) {
PMIX_ERROR_LOG(rc);
CLOSE_THE_SOCKET(pnd->sd);
PMIX_RELEASE(pnd);
PMIX_RELEASE(cd);
return;
}
/* if the request failed, then we are done */
if (PMIX_SUCCESS != cd->status) {
PMIX_RELEASE(pnd);
PMIX_RELEASE(cd);
return;
}
/* send the nspace back to the tool */
if (PMIX_SUCCESS != (rc = pmix_usock_send_blocking(pnd->sd, cd->proc.nspace, PMIX_MAX_NSLEN+1))) {
PMIX_ERROR_LOG(rc);
CLOSE_THE_SOCKET(pnd->sd);
PMIX_RELEASE(pnd);
PMIX_RELEASE(cd);
return;
}
/* send my nspace back to the tool */
if (PMIX_SUCCESS != (rc = pmix_usock_send_blocking(pnd->sd, pmix_globals.myid.nspace, PMIX_MAX_NSLEN+1))) {
PMIX_ERROR_LOG(rc);
CLOSE_THE_SOCKET(pnd->sd);
PMIX_RELEASE(pnd);
PMIX_RELEASE(cd);
return;
}
/* send my rank back to the tool */
if (PMIX_SUCCESS != (rc = pmix_usock_send_blocking(pnd->sd, (char*)&pmix_globals.myid.rank, sizeof(int)))) {
PMIX_ERROR_LOG(rc);
CLOSE_THE_SOCKET(pnd->sd);
PMIX_RELEASE(pnd);
PMIX_RELEASE(cd);
return;
}
/* set the socket non-blocking for all further operations */
pmix_usock_set_nonblocking(pnd->sd);
/* add this nspace to our pool */
nptr = PMIX_NEW(pmix_nspace_t);
(void)strncpy(nptr->nspace, cd->proc.nspace, PMIX_MAX_NSLEN);
nptr->server = PMIX_NEW(pmix_server_nspace_t);
pmix_list_append(&pmix_globals.nspaces, &nptr->super);
/* add this tool rank to the nspace */
info = PMIX_NEW(pmix_rank_info_t);
PMIX_RETAIN(nptr);
info->nptr = nptr;
info->rank = 0;
pmix_list_append(&nptr->server->ranks, &info->super);
/* setup a peer object for this tool */
pmix_peer_t *peer = PMIX_NEW(pmix_peer_t);
PMIX_RETAIN(info);
peer->info = info;
peer->proc_cnt = 1;
peer->sd = pnd -> sd;
if (0 > (peer->index = pmix_pointer_array_add(&pmix_server_globals.clients, peer))) {
PMIX_RELEASE(pnd);
PMIX_RELEASE(cd);
PMIX_RELEASE(peer);
pmix_list_remove_item(&pmix_globals.nspaces, &nptr->super);
PMIX_RELEASE(nptr); // will release the info object
/* probably cannot send an error reply if we are out of memory */
return;
}
/* start the events for this tool */
event_assign(&peer->recv_event, pmix_globals.evbase, pnd->sd,
EV_READ|EV_PERSIST, pmix_usock_recv_handler, peer);
event_add(&peer->recv_event, NULL);
peer->recv_ev_active = true;
event_assign(&peer->send_event, pmix_globals.evbase, pnd->sd,
EV_WRITE|EV_PERSIST, pmix_usock_send_handler, peer);
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:server tool %s:%d has connected on socket %d",
peer->info->nptr->nspace, peer->info->rank, peer->sd);
PMIX_RELEASE(pnd);
PMIX_RELEASE(cd);
}
/* receive a callback from the host RM with an nspace
* for a connecting tool */
static void cnct_cbfunc(pmix_status_t status,
pmix_proc_t *proc, void *cbdata)
{
pmix_setup_caddy_t *cd;
/* need to thread-shift this into our context */
cd = PMIX_NEW(pmix_setup_caddy_t);
cd->status = status;
(void)strncpy(cd->proc.nspace, proc->nspace, PMIX_MAX_NSLEN);
cd->cbdata = cbdata;
PMIX_THREADSHIFT(cd, process_cbfunc);
}
/* Parse init-ack message:
* NSPACE<0><rank>VERSION<0>[CRED<0>]
*/
static pmix_status_t parse_connect_ack (char *msg, int len,
static pmix_status_t parse_connect_ack (char *msg,
pmix_listener_protocol_t protocol,
int len,
char **nspace, int *rank,
char **version, char **cred)
{
int msglen;
PMIX_STRNLEN(msglen, msg, len);
if (msglen < len) {
*nspace = msg;
msg += strlen(*nspace) + 1;
len -= strlen(*nspace) + 1;
} else {
return PMIX_ERR_BAD_PARAM;
}
if (PMIX_PROTOCOL_TOOL != protocol) {
PMIX_STRNLEN(msglen, msg, len);
if (msglen < len) {
*nspace = msg;
msg += strlen(*nspace) + 1;
len -= strlen(*nspace) + 1;
} else {
return PMIX_ERR_BAD_PARAM;
}
PMIX_STRNLEN(msglen, msg, len);
if (msglen <= len) {
memcpy(rank, msg, sizeof(int));
msg += sizeof(int);
len -= sizeof(int);
} else {
return PMIX_ERR_BAD_PARAM;
PMIX_STRNLEN(msglen, msg, len);
if (msglen <= len) {
memcpy(rank, msg, sizeof(int));
msg += sizeof(int);
len -= sizeof(int);
} else {
return PMIX_ERR_BAD_PARAM;
}
}
PMIX_STRNLEN(msglen, msg, len);
@ -380,7 +504,7 @@ static pmix_status_t parse_connect_ack (char *msg, int len,
/* Receive the peer's identification info from a newly
* connected socket and verify the expected response.
*/
static pmix_status_t pmix_server_authenticate(int sd, uint16_t protocol,
static pmix_status_t pmix_server_authenticate(pmix_pending_connection_t *pnd,
int *out_rank,
pmix_peer_t **peer)
{
@ -395,14 +519,17 @@ static pmix_status_t pmix_server_authenticate(int sd, uint16_t protocol,
pmix_proc_t proc;
pmix_output_verbose(2, pmix_globals.debug_output,
"RECV CONNECT ACK FROM PEER ON SOCKET %d", sd);
"RECV CONNECT ACK FROM PEER ON SOCKET %d",
pnd->sd);
/* ensure all is zero'd */
memset(&hdr, 0, sizeof(pmix_usock_hdr_t));
*peer = NULL;
if (NULL != peer) {
*peer = NULL;
}
/* get the header */
if (PMIX_SUCCESS != (rc = pmix_usock_recv_blocking(sd, (char*)&hdr, sizeof(pmix_usock_hdr_t)))) {
if (PMIX_SUCCESS != (rc = pmix_usock_recv_blocking(pnd->sd, (char*)&hdr, sizeof(pmix_usock_hdr_t)))) {
return rc;
}
@ -415,92 +542,106 @@ static pmix_status_t pmix_server_authenticate(int sd, uint16_t protocol,
if (NULL == (msg = (char*)malloc(hdr.nbytes))) {
return PMIX_ERR_OUT_OF_RESOURCE;
}
if (PMIX_SUCCESS != pmix_usock_recv_blocking(sd, msg, hdr.nbytes)) {
if (PMIX_SUCCESS != pmix_usock_recv_blocking(pnd->sd, msg, hdr.nbytes)) {
/* unable to complete the recv */
pmix_output_verbose(2, pmix_globals.debug_output,
"unable to complete recv of connect-ack with client ON SOCKET %d", sd);
"unable to complete recv of connect-ack with client ON SOCKET %d",
pnd->sd);
free(msg);
return PMIX_ERR_UNREACH;
}
if (PMIX_SUCCESS != (rc = parse_connect_ack (msg, hdr.nbytes, &nspace,
&rank, &version, &cred))) {
if (PMIX_SUCCESS != (rc = parse_connect_ack(msg, pnd->protocol, hdr.nbytes, &nspace,
&rank, &version, &cred))) {
pmix_output_verbose(2, pmix_globals.debug_output,
"error parsing connect-ack from client ON SOCKET %d", sd);
"error parsing connect-ack from client ON SOCKET %d", pnd->sd);
free(msg);
return rc;
}
pmix_output_verbose(2, pmix_globals.debug_output,
"connect-ack recvd from peer %s:%d:%s",
nspace, rank, version);
/* if the attaching process is not a tool, then set it up as
* a known peer */
if (PMIX_PROTOCOL_TOOL != pnd->protocol) {
pmix_globals.myid.rank = rank;
/* do not check the version - we only retain it at this
* time in case we need to check it at some future date.
* For now, our intent is to retain backward compatibility
* and so we will assume that all versions are compatible. */
/* get the nspace */
nspace = msg; // a NULL terminator is in the data
/* see if we know this nspace */
nptr = NULL;
PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_nspace_t) {
if (0 == strcmp(tmp->nspace, nspace)) {
nptr = tmp;
break;
/* get the rank */
memcpy(&rank, msg+strlen(nspace)+1, sizeof(int));
pmix_output_verbose(2, pmix_globals.debug_output,
"connect-ack recvd from peer %s:%d:%s",
nspace, rank, version);
/* do not check the version - we only retain it at this
* time in case we need to check it at some future date.
* For now, our intent is to retain backward compatibility
* and so we will assume that all versions are compatible. */
/* see if we know this nspace */
nptr = NULL;
PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_nspace_t) {
if (0 == strcmp(tmp->nspace, nspace)) {
nptr = tmp;
break;
}
}
}
if (NULL == nptr) {
/* we don't know this namespace, reject it */
free(msg);
/* send an error reply to the client */
rc = PMIX_ERR_NOT_FOUND;
goto error;
}
/* see if we have this peer in our list */
info = NULL;
found = false;
PMIX_LIST_FOREACH(info, &nptr->server->ranks, pmix_rank_info_t) {
if (info->rank == rank) {
found = true;
break;
}
}
if (!found) {
/* rank unknown, reject it */
free(msg);
/* send an error reply to the client */
rc = PMIX_ERR_NOT_FOUND;
goto error;
}
*out_rank = rank;
/* a peer can connect on multiple sockets since it can fork/exec
* a child that also calls PMIx_Init, so add it here if necessary.
* Create the tracker for this peer */
psave = PMIX_NEW(pmix_peer_t);
PMIX_RETAIN(info);
psave->info = info;
info->proc_cnt++; /* increase number of processes on this rank */
psave->sd = sd;
if (0 > (psave->index = pmix_pointer_array_add(&pmix_server_globals.clients, psave))) {
free(msg);
PMIX_RELEASE(psave);
/* probably cannot send an error reply if we are out of memory */
return PMIX_ERR_OUT_OF_RESOURCE;
}
/* see if there is a credential */
if (NULL != pmix_sec.validate_cred) {
if (PMIX_SUCCESS != (rc = pmix_sec.validate_cred(psave, cred))) {
pmix_output_verbose(2, pmix_globals.debug_output,
"validation of client credential failed");
if (NULL == nptr) {
/* we don't know this namespace, reject it */
free(msg);
pmix_pointer_array_set_item(&pmix_server_globals.clients, psave->index, NULL);
PMIX_RELEASE(psave);
/* send an error reply to the client */
rc = PMIX_ERR_NOT_FOUND;
goto error;
}
pmix_output_verbose(2, pmix_globals.debug_output,
"client credential validated");
/* see if we have this peer in our list */
info = NULL;
found = false;
PMIX_LIST_FOREACH(info, &nptr->server->ranks, pmix_rank_info_t) {
if (info->rank == rank) {
found = true;
break;
}
}
if (!found) {
/* rank unknown, reject it */
free(msg);
/* send an error reply to the client */
rc = PMIX_ERR_NOT_FOUND;
goto error;
}
*out_rank = rank;
/* a peer can connect on multiple sockets since it can fork/exec
* a child that also calls PMIx_Init, so add it here if necessary.
* Create the tracker for this peer */
psave = PMIX_NEW(pmix_peer_t);
PMIX_RETAIN(info);
psave->info = info;
info->proc_cnt++; /* increase number of processes on this rank */
psave->sd = pnd->sd;
if (0 > (psave->index = pmix_pointer_array_add(&pmix_server_globals.clients, psave))) {
free(msg);
PMIX_RELEASE(psave);
/* probably cannot send an error reply if we are out of memory */
return PMIX_ERR_OUT_OF_RESOURCE;
}
/* see if there is a credential */
if (NULL != pmix_sec.validate_cred) {
if (PMIX_SUCCESS != (rc = pmix_sec.validate_cred(psave, cred))) {
pmix_output_verbose(2, pmix_globals.debug_output,
"validation of client credential failed");
free(msg);
if (NULL != psave) {
pmix_pointer_array_set_item(&pmix_server_globals.clients, psave->index, NULL);
PMIX_RELEASE(psave);
}
/* send an error reply to the client */
goto error;
}
pmix_output_verbose(2, pmix_globals.debug_output,
"client credential validated");
}
}
free(msg);
@ -509,16 +650,20 @@ static pmix_status_t pmix_server_authenticate(int sd, uint16_t protocol,
pmix_output_verbose(2, pmix_globals.debug_output,
"connect-ack executing handshake");
rc = PMIX_ERR_READY_FOR_HANDSHAKE;
if (PMIX_SUCCESS != (rc = pmix_usock_send_blocking(sd, (char*)&rc, sizeof(int)))) {
if (PMIX_SUCCESS != (rc = pmix_usock_send_blocking(pnd->sd, (char*)&rc, sizeof(int)))) {
PMIX_ERROR_LOG(rc);
pmix_pointer_array_set_item(&pmix_server_globals.clients, psave->index, NULL);
PMIX_RELEASE(psave);
if (NULL != psave) {
pmix_pointer_array_set_item(&pmix_server_globals.clients, psave->index, NULL);
PMIX_RELEASE(psave);
}
return rc;
}
if (PMIX_SUCCESS != (rc = pmix_sec.server_handshake(psave))) {
PMIX_ERROR_LOG(rc);
pmix_pointer_array_set_item(&pmix_server_globals.clients, psave->index, NULL);
PMIX_RELEASE(psave);
if (NULL != psave) {
pmix_pointer_array_set_item(&pmix_server_globals.clients, psave->index, NULL);
PMIX_RELEASE(psave);
}
return rc;
}
pmix_output_verbose(2, pmix_globals.debug_output,
@ -526,55 +671,65 @@ static pmix_status_t pmix_server_authenticate(int sd, uint16_t protocol,
} else {
/* send them success */
rc = PMIX_SUCCESS;
if (PMIX_SUCCESS != (rc = pmix_usock_send_blocking(sd, (char*)&rc, sizeof(int)))) {
if (PMIX_SUCCESS != (rc = pmix_usock_send_blocking(pnd->sd, (char*)&rc, sizeof(int)))) {
PMIX_ERROR_LOG(rc);
if (NULL != psave) {
pmix_pointer_array_set_item(&pmix_server_globals.clients, psave->index, NULL);
PMIX_RELEASE(psave);
}
return rc;
}
}
/* if the attaching process is not a tool, then send its index */
if (PMIX_PROTOCOL_TOOL != pnd->protocol) {
/* send the client's array index */
if (PMIX_SUCCESS != (rc = pmix_usock_send_blocking(pnd->sd, (char*)&psave->index, sizeof(int)))) {
PMIX_ERROR_LOG(rc);
pmix_pointer_array_set_item(&pmix_server_globals.clients, psave->index, NULL);
PMIX_RELEASE(psave);
return rc;
}
}
/* let the host server know that this client has connected */
if (NULL != pmix_host_server.client_connected) {
(void)strncpy(proc.nspace, psave->info->nptr->nspace, PMIX_MAX_NSLEN);
proc.rank = psave->info->rank;
rc = pmix_host_server.client_connected(&proc, psave->info->server_object,
NULL, NULL);
if (PMIX_SUCCESS != rc) {
PMIX_ERROR_LOG(rc);
pmix_output_verbose(2, pmix_globals.debug_output,
"connect-ack from client completed");
*peer = psave;
/* let the host server know that this client has connected */
if (NULL != pmix_host_server.client_connected) {
(void)strncpy(proc.nspace, psave->info->nptr->nspace, PMIX_MAX_NSLEN);
proc.rank = psave->info->rank;
rc = pmix_host_server.client_connected(&proc, psave->info->server_object,
NULL, NULL);
if (PMIX_SUCCESS != rc) {
PMIX_ERROR_LOG(rc);
}
}
} else {
/* request an nspace for this requestor - it will
* automatically be assigned rank=0 */
pmix_host_server.tool_connected(NULL, 0, cnct_cbfunc, pnd);
return PMIX_ERR_OPERATION_IN_PROGRESS;
}
/* send the client's array index */
if (PMIX_SUCCESS != (rc = pmix_usock_send_blocking(sd, (char*)&psave->index, sizeof(int)))) {
PMIX_ERROR_LOG(rc);
pmix_pointer_array_set_item(&pmix_server_globals.clients, psave->index, NULL);
PMIX_RELEASE(psave);
return rc;
}
pmix_output_verbose(2, pmix_globals.debug_output,
"connect-ack from client completed");
*peer = psave;
return rc;
error:
/* send an error reply to the client */
if (PMIX_SUCCESS != pmix_usock_send_blocking(sd, (char*)&rc, sizeof(int))) {
if (PMIX_SUCCESS != pmix_usock_send_blocking(pnd->sd, (char*)&rc, sizeof(int))) {
PMIX_ERROR_LOG(rc);
}
return rc;
}
/*
* Handler for accepting connections from the event library
* Handler for accepting client connections from the event library
*/
static void connection_handler(int sd, short flags, void* cbdata)
{
pmix_pending_connection_t *pnd = (pmix_pending_connection_t*)cbdata;
pmix_peer_t *peer;
int rank;
pmix_status_t status;
pmix_output_verbose(8, pmix_globals.debug_output,
"connection_handler: new connection: %d",
pnd->sd);
@ -582,14 +737,17 @@ static void connection_handler(int sd, short flags, void* cbdata)
/* ensure the socket is in blocking mode */
pmix_usock_set_blocking(pnd->sd);
/* receive identifier info from the client and authenticate it - the
/*
* Receive identifier info from the client and authenticate it - the
* function will lookup and return the peer object if the connection
* is successfully authenticated */
if (PMIX_SUCCESS != pmix_server_authenticate(pnd->sd, pnd->protocol,
&rank, &peer)) {
CLOSE_THE_SOCKET(pnd->sd);
if (PMIX_SUCCESS != (status = pmix_server_authenticate(pnd, &rank, &peer))) {
if (PMIX_ERR_OPERATION_IN_PROGRESS != status) {
CLOSE_THE_SOCKET(pnd->sd);
}
return;
}
pmix_usock_set_nonblocking(pnd->sd);
/* start the events for this client */
@ -605,3 +763,31 @@ static void connection_handler(int sd, short flags, void* cbdata)
PMIX_RELEASE(pnd);
}
/*
* Handler for accepting tool connections from the event library
*/
static void tool_handler(int sd, short flags, void* cbdata)
{
pmix_pending_connection_t *pnd = (pmix_pending_connection_t*)cbdata;
pmix_output_verbose(1, pmix_globals.debug_output,
"tool_handler: new tool connection: %d",
pnd->sd);
/* if the server doesn't support this, then abort now */
if (NULL == pmix_host_server.tool_connected) {
CLOSE_THE_SOCKET(pnd->sd);
PMIX_RELEASE(pnd);
}
/* ensure the socket is in blocking mode */
pmix_usock_set_blocking(pnd->sd);
/* initiate the authentication handshake */
if (PMIX_ERR_OPERATION_IN_PROGRESS != pmix_server_authenticate(pnd, NULL, NULL)) {
pmix_output(0, "SHOOT");
CLOSE_THE_SOCKET(pnd->sd);
PMIX_RELEASE(pnd);
}
}

92
opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c Обычный файл → Исполняемый файл
Просмотреть файл

@ -1175,8 +1175,7 @@ void pmix_server_deregister_events(pmix_peer_t *peer,
{
int32_t cnt;
pmix_status_t rc, *codes = NULL, *cdptr, maxcode = PMIX_MAX_ERR_CONSTANT;
pmix_info_t *info = NULL;
size_t ninfo, ncodes, ncds, n;
size_t ncodes, ncds, n;
pmix_regevents_info_t *reginfo = NULL;
pmix_regevents_info_t *reginfo_next;
pmix_peer_events_info_t *prev;
@ -1236,9 +1235,6 @@ cleanup:
if (NULL != codes) {
free(codes);
}
if (NULL != info) {
PMIX_INFO_FREE(info, ninfo);
}
return;
}
@ -1318,7 +1314,78 @@ pmix_status_t pmix_server_event_recvd_from_client(pmix_peer_t *peer,
return rc;
}
// instance server library classes
pmix_status_t pmix_server_query(pmix_peer_t *peer,
pmix_buffer_t *buf,
pmix_info_cbfunc_t cbfunc,
void *cbdata)
{
int32_t cnt;
pmix_status_t rc;
pmix_query_caddy_t *cd;
pmix_proc_t proc;
pmix_output_verbose(2, pmix_globals.debug_output,
"recvd query from client");
if (NULL == pmix_host_server.query) {
return PMIX_ERR_NOT_SUPPORTED;
}
cd = PMIX_NEW(pmix_query_caddy_t);
cd->cbdata = cbdata;
/* unpack the number of info */
cnt = 1;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &cd->ninfo, &cnt, PMIX_SIZE))) {
PMIX_ERROR_LOG(rc);
goto exit;
}
/* unpack the info */
if (0 < cd->ninfo) {
PMIX_INFO_CREATE(cd->info, cd->ninfo);
cnt = cd->ninfo;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, cd->info, &cnt, PMIX_INFO))) {
PMIX_ERROR_LOG(rc);
goto exit;
}
}
/* unpack any directives */
cnt = 1;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &cd->ndirs, &cnt, PMIX_SIZE))) {
PMIX_ERROR_LOG(rc);
goto exit;
}
if (0 < cd->ndirs) {
PMIX_INFO_CREATE(cd->directives, cd->ndirs);
cnt = cd->ndirs;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, cd->directives, &cnt, PMIX_INFO))) {
PMIX_ERROR_LOG(rc);
goto exit;
}
}
/* setup the requesting peer name */
(void)strncpy(proc.nspace, peer->info->nptr->nspace, PMIX_MAX_NSLEN);
proc.rank = peer->info->rank;
/* ask the host for the info */
if (PMIX_SUCCESS != (rc = pmix_host_server.query(&proc, cd->info, cd->ninfo,
cd->directives, cd->ndirs,
cbfunc, cd))) {
PMIX_RELEASE(cd);
return rc;
}
return PMIX_SUCCESS;
exit:
PMIX_RELEASE(cd);
return rc;
}
/***** INSTANCE SERVER LIBRARY CLASSES *****/
static void tcon(pmix_server_trkr_t *t)
{
t->pcs = NULL;
@ -1451,9 +1518,20 @@ PMIX_CLASS_INSTANCE(pmix_dmdx_local_t,
pmix_list_item_t,
lmcon, lmdes);
static void pccon(pmix_pending_connection_t *p)
{
p->msg = NULL;
memset(p->nspace, 0, PMIX_MAX_NSLEN+1);
}
static void pcdes(pmix_pending_connection_t *p)
{
if (NULL != p->msg) {
free(p->msg);
}
}
PMIX_CLASS_INSTANCE(pmix_pending_connection_t,
pmix_object_t,
NULL, NULL);
pccon, pcdes);
static void prevcon(pmix_peer_events_info_t *p)
{

Просмотреть файл

@ -31,6 +31,7 @@ typedef struct {
pmix_object_t super;
pmix_event_t ev;
volatile bool active;
pmix_status_t status;
pmix_proc_t proc;
uid_t uid;
gid_t gid;
@ -83,12 +84,21 @@ typedef struct {
} pmix_dmdx_local_t;
PMIX_CLASS_DECLARATION(pmix_dmdx_local_t);
/* define listener protocol types */
typedef uint16_t pmix_listener_protocol_t;
#define PMIX_PROTOCOL_V1 0
#define PMIX_PROTOCOL_TOOL 1
#define PMIX_PROTOCOL V2 2
/* connection support */
typedef struct {
pmix_object_t super;
pmix_event_t ev;
uint16_t protocol;
pmix_listener_protocol_t protocol;
int sd;
char nspace[PMIX_MAX_NSLEN+1];
char *msg;
pmix_status_t status;
struct sockaddr_storage addr;
} pmix_pending_connection_t;
PMIX_CLASS_DECLARATION(pmix_pending_connection_t);
@ -111,7 +121,7 @@ PMIX_CLASS_DECLARATION(pmix_regevents_info_t);
/* listener objects */
typedef struct pmix_listener_t {
pmix_list_item_t super;
uint16_t protocol_type;
pmix_listener_protocol_t protocol;
int socket;
struct sockaddr_un address;
char *varname;
@ -135,6 +145,7 @@ typedef struct {
pmix_buffer_t gdata; // cache of data given to me for passing to all clients
pmix_list_t events; // list of pmix_regevents_info_t registered events
pmix_ring_buffer_t notifications; // ring buffer of pending notifications
bool tool_connections_allowed;
} pmix_server_globals_t;
typedef struct {
@ -272,6 +283,11 @@ pmix_status_t pmix_server_event_recvd_from_client(pmix_peer_t *peer,
pmix_buffer_t *buf,
pmix_op_cbfunc_t cbfunc,
void *cbdata);
pmix_status_t pmix_server_query(pmix_peer_t *peer,
pmix_buffer_t *buf,
pmix_info_cbfunc_t cbfunc,
void *cbdata);
void pmix_server_execute_collective(int sd, short args, void *cbdata);
void pmix_server_queue_message(int fd, short args, void *cbdata);

Просмотреть файл

@ -0,0 +1,11 @@
#
# Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
sources += \
src/tool/pmix_tool.c

Просмотреть файл

@ -0,0 +1,889 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014 Artem Y. Polyakov <artpol84@gmail.com>.
* All rights reserved.
* Copyright (c) 2016 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2016 IBM Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include <src/include/pmix_config.h>
#include <src/include/types.h>
#include <src/include/pmix_socket_errno.h>
#include "src/client/pmix_client_ops.h"
#include <pmix_tool.h>
#include "src/include/pmix_globals.h"
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#include <fcntl.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_SYS_SOCKET_H
#include <sys/socket.h>
#endif
#ifdef HAVE_SYS_UN_H
#include <sys/un.h>
#endif
#ifdef HAVE_SYS_UIO_H
#include <sys/uio.h>
#endif
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_DIRENT_H
#include <dirent.h>
#endif /* HAVE_DIRENT_H */
#include PMIX_EVENT_HEADER
#if PMIX_CC_USE_PRAGMA_IDENT
#pragma ident PMIX_VERSION
#elif PMIX_CC_USE_IDENT
#ident PMIX_VERSION
#endif
extern pmix_client_globals_t pmix_client_globals;
#include "src/class/pmix_list.h"
#include "src/buffer_ops/buffer_ops.h"
#include "src/util/argv.h"
#include "src/util/error.h"
#include "src/util/hash.h"
#include "src/util/output.h"
#include "src/util/progress_threads.h"
#include "src/usock/usock.h"
#include "src/sec/pmix_sec.h"
#include "src/include/pmix_globals.h"
#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1)
#include "src/dstore/pmix_dstore.h"
#endif /* PMIX_ENABLE_DSTORE */
#define PMIX_MAX_RETRIES 10
static pmix_status_t usock_connect(struct sockaddr_un *address, int *fd);
static void _notify_complete(pmix_status_t status, void *cbdata)
{
pmix_event_chain_t *chain = (pmix_event_chain_t*)cbdata;
PMIX_RELEASE(chain);
}
static void pmix_tool_notify_recv(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr,
pmix_buffer_t *buf, void *cbdata)
{
pmix_status_t rc;
int32_t cnt;
pmix_cmd_t cmd;
pmix_event_chain_t *chain;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:tool_notify_recv - processing event");
/* start the local notification chain */
chain = PMIX_NEW(pmix_event_chain_t);
chain->final_cbfunc = _notify_complete;
chain->final_cbdata = chain;
cnt=1;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &cmd, &cnt, PMIX_CMD))) {
PMIX_ERROR_LOG(rc);
goto error;
}
/* unpack the status */
cnt=1;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &chain->status, &cnt, PMIX_INT))) {
PMIX_ERROR_LOG(rc);
goto error;
}
/* unpack the source of the event */
cnt=1;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &chain->source, &cnt, PMIX_PROC))) {
PMIX_ERROR_LOG(rc);
goto error;
}
/* unpack the info that might have been provided */
cnt=1;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &chain->ninfo, &cnt, PMIX_SIZE))) {
PMIX_ERROR_LOG(rc);
goto error;
}
if (0 < chain->ninfo) {
PMIX_INFO_CREATE(chain->info, chain->ninfo);
cnt = chain->ninfo;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, chain->info, &cnt, PMIX_INFO))) {
PMIX_ERROR_LOG(rc);
goto error;
}
}
pmix_output_verbose(2, pmix_globals.debug_output,
"[%s:%d] pmix:tool_notify_recv - processing event %d, calling errhandler",
pmix_globals.myid.nspace, pmix_globals.myid.rank, chain->status);
pmix_invoke_local_event_hdlr(chain);
return;
error:
/* we always need to return */
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:tool_notify_recv - unpack error status =%d, calling def errhandler", rc);
chain = PMIX_NEW(pmix_event_chain_t);
chain->status = rc;
pmix_invoke_local_event_hdlr(chain);
}
static pmix_status_t connect_to_server(struct sockaddr_un *address)
{
int sd;
pmix_status_t ret;
if (PMIX_SUCCESS != (ret = usock_connect(address, &sd))) {
PMIX_ERROR_LOG(ret);
return ret;
}
pmix_client_globals.myserver.sd = sd;
/* setup recv event */
event_assign(&pmix_client_globals.myserver.recv_event,
pmix_globals.evbase,
pmix_client_globals.myserver.sd,
EV_READ | EV_PERSIST,
pmix_usock_recv_handler, &pmix_client_globals.myserver);
event_add(&pmix_client_globals.myserver.recv_event, 0);
pmix_client_globals.myserver.recv_ev_active = true;
/* setup send event */
event_assign(&pmix_client_globals.myserver.send_event,
pmix_globals.evbase,
pmix_client_globals.myserver.sd,
EV_WRITE|EV_PERSIST,
pmix_usock_send_handler, &pmix_client_globals.myserver);
pmix_client_globals.myserver.send_ev_active = false;
return PMIX_SUCCESS;
}
PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc,
pmix_info_t info[], size_t ninfo)
{
char *evar, *tdir, *tmp;
int debug_level;
struct sockaddr_un address;
size_t n;
pmix_kval_t *kptr;
pmix_status_t rc;
pmix_nspace_t *nptr, *nsptr;
int i, server_pid = -1;
int hostnamelen = 10;
char hostname[hostnamelen];
DIR *cur_dirp = NULL;
struct dirent * dir_entry;
pmix_output(0, "TOOL INIT");
if (NULL == proc) {
return PMIX_ERR_BAD_PARAM;
}
if (0 < pmix_globals.init_cntr) {
/* since we have been called before, the nspace and
* rank should be known. So return them here if
* requested */
if (NULL != proc) {
(void)strncpy(proc->nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN);
proc->rank = pmix_globals.myid.rank;
}
++pmix_globals.init_cntr;
return PMIX_SUCCESS;
}
/* scan incoming info for directives */
if (NULL != info) {
for (n=0; n < ninfo; n++) {
if (0 == strcmp(PMIX_EVENT_BASE, info[n].key)) {
pmix_globals.evbase = (pmix_event_base_t*)info[n].value.data.ptr;
pmix_globals.external_evbase = true;
}
}
}
/* setup the globals */
pmix_globals_init();
PMIX_CONSTRUCT(&pmix_client_globals.pending_requests, pmix_list_t);
PMIX_CONSTRUCT(&pmix_client_globals.myserver, pmix_peer_t);
/* mark that we are a client */
pmix_globals.server = false;
/* get our effective id's */
pmix_globals.uid = geteuid();
pmix_globals.gid = getegid();
/* initialize the output system */
if (!pmix_output_init()) {
return PMIX_ERROR;
}
/* see if debug is requested */
if (NULL != (evar = getenv("PMIX_DEBUG"))) {
debug_level = strtol(evar, NULL, 10);
pmix_globals.debug_output = pmix_output_open(NULL);
pmix_output_set_verbosity(pmix_globals.debug_output, debug_level);
}
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: init called");
/* find the temp dir */
if (NULL == (tdir = getenv("TMPDIR"))) {
if (NULL == (tdir = getenv("TEMP"))) {
if (NULL == (tdir = getenv("TMP"))) {
tdir = "/tmp";
}
}
}
/* setup the path to the daemon rendezvous point */
memset(&address, 0, sizeof(struct sockaddr_un));
address.sun_family = AF_UNIX;
/* Get first 10 char's of hostname to match what the server is doing */
gethostname(hostname, hostnamelen);
/* Get the local hostname, and look for a file named
* /tmp/pmix.hostname.tool - this file will contain
* the URI where the server is listening. The URI consists
* of 3 parts - the code below will parse the string read
* from the file and connect accordingly */
for (i = 0; i < (int)ninfo; i++) {
if (strcmp(info[i].key, PMIX_SERVER_PIDINFO) == 0) {
server_pid = info[i].value.data.integer;
break;
}
}
/* if they gave us a specific pid, then look for that
* particular server - otherwise, see if there is only
* one on this node and default to it */
if (server_pid != -1) {
snprintf(address.sun_path, sizeof(address.sun_path)-1, "%s/pmix.%s.%d", tdir, hostname, server_pid);
/* if the rendezvous file doesn't exist, that's an error */
if (0 != access(address.sun_path, R_OK)) {
pmix_output_close(pmix_globals.debug_output);
pmix_output_finalize();
pmix_class_finalize();
return PMIX_ERR_NOT_FOUND;
}
} else {
/* open up the temp directory */
if (NULL == (cur_dirp = opendir(tdir))) {
pmix_output_close(pmix_globals.debug_output);
pmix_output_finalize();
pmix_class_finalize();
return PMIX_ERR_NOT_FOUND;
}
/* search the entries for something that starts with pmix.hostname */
if (0 > asprintf(&tmp, "pmix.%s", hostname)) {
return PMIX_ERR_NOMEM;
}
evar = NULL;
while (NULL != (dir_entry = readdir(cur_dirp))) {
if (0 == strncmp(dir_entry->d_name, tmp, strlen(tmp))) {
/* found one - if more than one, then that's an error */
if (NULL != evar) {
free(tmp);
pmix_output_close(pmix_globals.debug_output);
pmix_output_finalize();
pmix_class_finalize();
return PMIX_ERR_INIT;
}
evar = strdup(dir_entry->d_name);
}
}
free(tmp);
closedir(cur_dirp);
if (NULL == evar) {
/* none found */
pmix_output_close(pmix_globals.debug_output);
pmix_output_finalize();
pmix_class_finalize();
return PMIX_ERR_INIT;
}
/* use the found one as our contact point */
snprintf(address.sun_path, sizeof(address.sun_path)-1, "%s/%s", tdir, evar);
free(evar);
}
pmix_bfrop_open();
pmix_usock_init(pmix_tool_notify_recv);
pmix_sec_init();
if (!pmix_globals.external_evbase) {
/* create an event base and progress thread for us */
if (NULL == (pmix_globals.evbase = pmix_start_progress_thread())) {
pmix_sec_finalize();
pmix_usock_finalize();
pmix_bfrop_close();
pmix_output_close(pmix_globals.debug_output);
pmix_output_finalize();
pmix_class_finalize();
return -1;
}
}
/* connect to the server */
if (PMIX_SUCCESS != (rc = connect_to_server(&address))) {
pmix_stop_progress_thread(pmix_globals.evbase);
pmix_sec_finalize();
pmix_usock_finalize();
pmix_bfrop_close();
pmix_output_close(pmix_globals.debug_output);
pmix_output_finalize();
pmix_class_finalize();
return rc;
}
/* increment our init reference counter */
pmix_globals.init_cntr++;
/* Success, so copy the nspace and rank */
(void)strncpy(proc->nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN);
proc->rank = pmix_globals.myid.rank;
/* now finish the initialization by filling our local
* datastore with typical job-related info. No point
* in having the server generate these as we are
* obviously a singleton, and so the values are well-known */
nsptr = NULL;
PMIX_LIST_FOREACH(nptr, &pmix_globals.nspaces, pmix_nspace_t) {
if (0 == strncmp(pmix_globals.myid.nspace, nptr->nspace, PMIX_MAX_NSLEN)) {
nsptr = nptr;
break;
}
}
if (NULL == nsptr) {
/* should never happen */
pmix_stop_progress_thread(pmix_globals.evbase);
pmix_sec_finalize();
pmix_usock_finalize();
pmix_bfrop_close();
pmix_output_close(pmix_globals.debug_output);
pmix_output_finalize();
pmix_class_finalize();
return PMIX_ERR_NOT_FOUND;
}
/* the jobid is just our nspace */
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_JOBID);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_STRING;
kptr->value->data.string = strdup(nsptr->nspace);
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
/* our rank */
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_RANK);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_INT;
kptr->value->data.integer = 0;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
/* nproc offset */
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_NPROC_OFFSET);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_UINT32;
kptr->value->data.uint32 = 0;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
/* node size */
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_NODE_SIZE);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_UINT32;
kptr->value->data.uint32 = 1;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
/* local peers */
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_LOCAL_PEERS);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_STRING;
kptr->value->data.string = strdup("0");
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
/* local leader */
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_LOCALLDR);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_UINT32;
kptr->value->data.uint32 = 0;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
/* universe size */
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_UNIV_SIZE);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_UINT32;
kptr->value->data.uint32 = 1;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
/* job size - we are our very own job, so we have no peers */
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_JOB_SIZE);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_UINT32;
kptr->value->data.uint32 = 1;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
/* local size - only us in our job */
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_LOCAL_SIZE);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_UINT32;
kptr->value->data.uint32 = 1;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
/* max procs - since we are a self-started tool, there is no
* allocation within which we can grow ourselves */
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_MAX_PROCS);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_UINT32;
kptr->value->data.uint32 = 1;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
/* app number */
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_APPNUM);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_UINT32;
kptr->value->data.uint32 = 0;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
/* app leader */
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_APPLDR);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_UINT32;
kptr->value->data.uint32 = 0;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
/* app rank */
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_APP_RANK);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_UINT32;
kptr->value->data.uint32 = 0;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
}
PMIX_RELEASE(kptr); // maintain accounting
/* global rank */
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_GLOBAL_RANK);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_UINT32;
kptr->value->data.uint32 = 0;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
/* local rank - we are alone in our job */
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_LOCAL_RANK);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_UINT32;
kptr->value->data.uint32 = 0;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
/* we cannot know the node rank as we don't know what
* other processes are executing on this node - so
* we'll add that info to the server-tool handshake
* and load it from there */
/* hostname */
gethostname(hostname, PMIX_MAX_NSLEN);
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_HOSTNAME);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_STRING;
kptr->value->data.string = strdup(hostname);
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
/* we cannot know the RM's nodeid for this host, so
* we'll add that info to the server-tool handshake
* and load it from there */
/* the nodemap is simply our hostname as there is no
* regex to generate */
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_NODE_MAP);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_STRING;
kptr->value->data.string = strdup(hostname);
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
/* likewise, the proc map is just our rank as we are
* the only proc in this job */
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_PROC_MAP);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_STRING;
kptr->value->data.string = strdup("0");
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
return rc;
}
PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void)
{
if (1 != pmix_globals.init_cntr) {
--pmix_globals.init_cntr;
return PMIX_SUCCESS;
}
pmix_globals.init_cntr = 0;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:tool finalize called");
if (!pmix_globals.external_evbase) {
pmix_stop_progress_thread(pmix_globals.evbase);
}
pmix_usock_finalize();
PMIX_DESTRUCT(&pmix_client_globals.myserver);
PMIX_LIST_DESTRUCT(&pmix_client_globals.pending_requests);
if (0 <= pmix_client_globals.myserver.sd) {
CLOSE_THE_SOCKET(pmix_client_globals.myserver.sd);
}
event_base_free(pmix_globals.evbase);
#ifdef HAVE_LIBEVENT_GLOBAL_SHUTDOWN
libevent_global_shutdown();
#endif
pmix_bfrop_close();
pmix_sec_finalize();
pmix_globals_finalize();
pmix_output_close(pmix_globals.debug_output);
pmix_output_finalize();
pmix_class_finalize();
return PMIX_SUCCESS;
}
/*
* The sections below need to be updated to reflect tool
* connection handshake protocols - in this case, we
* don't know our nspace/rank in advance. So we need
* the handshake to include the security credential
* exchange, and then get our nspace/rank in return */
static pmix_status_t send_connect_ack(int sd)
{
char *msg;
pmix_usock_hdr_t hdr;
size_t sdsize=0, csize=0;
char *cred = NULL;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: TOOL SEND CONNECT ACK");
/* setup the header */
memset(&hdr, 0, sizeof(pmix_usock_hdr_t));
hdr.pindex = -1;
hdr.tag = UINT32_MAX;
/* get a credential, if the security system provides one. Not
* every SPC will do so, thus we must first check */
if (NULL != pmix_sec.create_cred) {
if (NULL == (cred = pmix_sec.create_cred())) {
/* an error occurred - we cannot continue */
return PMIX_ERR_INVALID_CRED;
}
csize = strlen(cred) + 1; // must NULL terminate the string!
}
/* set the number of bytes to be read beyond the header */
hdr.nbytes = strlen(PMIX_VERSION) + 1 + csize; // must NULL terminate the VERSION string!
/* create a space for our message */
sdsize = (sizeof(hdr) + hdr.nbytes);
if (NULL == (msg = (char*)malloc(sdsize))) {
if (NULL != cred) {
free(cred);
}
return PMIX_ERR_OUT_OF_RESOURCE;
}
memset(msg, 0, sdsize);
csize=0;
memcpy(msg, &hdr, sizeof(pmix_usock_hdr_t));
csize += sizeof(pmix_usock_hdr_t);
/* load the message */
memcpy(msg+csize, PMIX_VERSION, strlen(PMIX_VERSION));
csize += strlen(PMIX_VERSION)+1;
if (NULL != cred) {
memcpy(msg+csize, cred, strlen(cred)); // leaves last position in msg set to NULL
}
if (PMIX_SUCCESS != pmix_usock_send_blocking(sd, msg, sdsize)) {
free(msg);
if (NULL != cred) {
free(cred);
}
return PMIX_ERR_UNREACH;
}
free(msg);
if (NULL != cred) {
free(cred);
}
return PMIX_SUCCESS;
}
/* we receive a connection acknowledgement from the server,
* consisting of the status and (if success) the nspace assigned
* to us */
static pmix_status_t recv_connect_ack(int sd)
{
pmix_status_t reply;
struct timeval tv, save;
pmix_socklen_t sz;
bool sockopt = true;
pmix_nspace_t *nsptr;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: RECV CONNECT ACK FROM SERVER");
/* get the current timeout value so we can reset to it */
sz = sizeof(save);
if (0 != getsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, (void*)&save, &sz)) {
if (ENOPROTOOPT == errno) {
sockopt = false;
} else {
return PMIX_ERR_UNREACH;
}
} else {
/* set a timeout on the blocking recv so we don't hang */
tv.tv_sec = 2;
tv.tv_usec = 0;
if (0 != setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) {
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: recv_connect_ack could not setsockopt SO_RCVTIMEO");
return PMIX_ERR_UNREACH;
}
}
/* get the returned status from the security handshake */
pmix_usock_recv_blocking(sd, (char*)&reply, sizeof(pmix_status_t));
if (PMIX_SUCCESS != reply) {
return reply;
}
/* get the returned status from the request for namespace */
pmix_usock_recv_blocking(sd, (char*)&reply, sizeof(pmix_status_t));
if (PMIX_SUCCESS != reply) {
return reply;
}
/* get our assigned nspace */
pmix_usock_recv_blocking(sd, pmix_globals.myid.nspace, PMIX_MAX_NSLEN+1);
/* setup required bookkeeping */
nsptr = PMIX_NEW(pmix_nspace_t);
(void)strncpy(nsptr->nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN);
pmix_list_append(&pmix_globals.nspaces, &nsptr->super);
/* our rank is always zero */
pmix_globals.myid.rank = 0;
/* get the server's nspace and rank so we can send to it */
pmix_client_globals.myserver.info = PMIX_NEW(pmix_rank_info_t);
pmix_client_globals.myserver.info->nptr = PMIX_NEW(pmix_nspace_t);
pmix_usock_recv_blocking(sd, (char*)pmix_client_globals.myserver.info->nptr->nspace, PMIX_MAX_NSLEN+1);
pmix_usock_recv_blocking(sd, (char*)&(pmix_client_globals.myserver.info->rank), sizeof(int));
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: RECV CONNECT CONFIRMATION FOR TOOL %s:%d FROM SERVER %s:%d",
pmix_globals.myid.nspace, pmix_globals.myid.rank,
pmix_client_globals.myserver.info->nptr->nspace,
pmix_client_globals.myserver.info->rank);
if (sockopt) {
if (0 != setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &save, sz)) {
pmix_output(0, "FAILURE");
return PMIX_ERR_UNREACH;
}
}
return PMIX_SUCCESS;
}
static pmix_status_t usock_connect(struct sockaddr_un *addr, int *fd)
{
int sd=-1;
pmix_status_t rc;
pmix_socklen_t addrlen = 0;
int retries = 0;
pmix_output_verbose(2, pmix_globals.debug_output,
"usock_peer_try_connect: attempting to connect to server");
addrlen = sizeof(struct sockaddr_un);
while (retries < PMIX_MAX_RETRIES) {
retries++;
/* Create the new socket */
sd = socket(PF_UNIX, SOCK_STREAM, 0);
if (sd < 0) {
pmix_output(0, "pmix:create_socket: socket() failed: %s (%d)\n",
strerror(pmix_socket_errno),
pmix_socket_errno);
continue;
}
pmix_output_verbose(2, pmix_globals.debug_output,
"usock_peer_try_connect: attempting to connect to server on socket %d", sd);
/* try to connect */
int err = -1;
if ((err = connect(sd, (struct sockaddr*)addr, addrlen)) < 0) {
if (pmix_socket_errno == ETIMEDOUT) {
/* The server may be too busy to accept new connections */
pmix_output_verbose(2, pmix_globals.debug_output,
"timeout connecting to server");
CLOSE_THE_SOCKET(sd);
continue;
}
/* Some kernels (Linux 2.6) will automatically software
abort a connection that was ECONNREFUSED on the last
attempt, without even trying to establish the
connection. Handle that case in a semi-rational
way by trying twice before giving up */
else if (ECONNABORTED == pmix_socket_errno) {
pmix_output_verbose(2, pmix_globals.debug_output,
"connection to server aborted by OS - retrying");
CLOSE_THE_SOCKET(sd);
continue;
} else {
pmix_output_verbose(2, pmix_globals.debug_output,
"Failed to connect, errno = %d, err= %s\n", errno, strerror(errno));
continue;
}
}
/* otherwise, the connect succeeded - so break out of the loop */
break;
}
if (retries == PMIX_MAX_RETRIES || sd < 0){
/* We were unsuccessful in establishing this connection, and are
* not likely to suddenly become successful */
if (0 <= sd) {
CLOSE_THE_SOCKET(sd);
}
return PMIX_ERR_UNREACH;
}
/* send any authentication credentials to the server */
if (PMIX_SUCCESS != (rc = send_connect_ack(sd))) {
CLOSE_THE_SOCKET(sd);
return rc;
}
/* do whatever handshake is required */
if (PMIX_SUCCESS != (rc = recv_connect_ack(sd))) {
CLOSE_THE_SOCKET(sd);
return rc;
}
pmix_output_verbose(2, pmix_globals.debug_output,
"sock_peer_try_connect: Connection across to server succeeded");
/* mark the connection as made */
pmix_globals.connected = true;
pmix_usock_set_nonblocking(sd);
*fd = sd;
return PMIX_SUCCESS;
}

Просмотреть файл

@ -78,7 +78,7 @@ void pmix_usock_finalize(void)
PMIX_LIST_DESTRUCT(&pmix_usock_globals.posted_recvs);
}
pmix_status_t pmix_usock_set_nonblocking(int sd)
pmix_status_t pmix_usock_set_nonblocking(int sd)
{
int flags;
/* setup the socket as non-blocking */

Просмотреть файл

@ -56,6 +56,7 @@
#define PMIX_ERR_SILENT (PMIX_INTERNAL_ERR_BASE - 25)
#define PMIX_ERR_UNKNOWN_DATATYPE (PMIX_INTERNAL_ERR_BASE - 26)
#define PMIX_ERR_RESOURCE_BUSY (PMIX_INTERNAL_ERR_BASE - 27)
#define PMIX_ERR_OPERATION_IN_PROGRESS (PMIX_INTERNAL_ERR_BASE - 28)
#define PMIX_ERROR_LOG(r) \
do { \

Просмотреть файл

@ -21,7 +21,7 @@
AM_CPPFLAGS = -I$(top_builddir)/src -I$(top_builddir)/src/include -I$(top_builddir)/include -I$(top_builddir)/include/pmix
noinst_PROGRAMS = simptest simpclient simppub simpdyn simpft simpdmodex test_pmix
noinst_PROGRAMS = simptest simpclient simppub simpdyn simpft simpdmodex test_pmix simptool
simptest_SOURCES = \
simptest.c
@ -64,3 +64,9 @@ test_pmix_SOURCES = \
test_pmix_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS)
test_pmix_LDADD = \
$(top_builddir)/libpmix.la
simptool_SOURCES = \
simptool.c
simptool_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS)
simptool_LDADD = \
$(top_builddir)/libpmix.la

Просмотреть файл

@ -83,13 +83,21 @@ static pmix_status_t disconnect_fn(const pmix_proc_t procs[], size_t nprocs,
static pmix_status_t register_event_fn(pmix_status_t *codes, size_t ncodes,
const pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata);
static pmix_status_t deregister_events(pmix_status_t *codes, size_t ncodes,
pmix_op_cbfunc_t cbfunc, void *cbdata);
static pmix_status_t deregister_event_fn(pmix_status_t *codes, size_t ncodes,
pmix_op_cbfunc_t cbfunc, void *cbdata);
static pmix_status_t notify_event(pmix_status_t code,
const pmix_proc_t *source,
pmix_data_range_t range,
pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata);
static pmix_status_t query_fn(pmix_proc_t *proct,
pmix_info_t *info, size_t ninfo,
pmix_info_t *directives, size_t ndir,
pmix_info_cbfunc_t cbfunc,
void *cbdata);
static void tool_connect_fn(pmix_info_t *info, size_t ninfo,
pmix_tool_connection_cbfunc_t cbfunc,
void *cbdata);
static pmix_server_module_t mymodule = {
.client_connected = connected,
@ -104,8 +112,10 @@ static pmix_server_module_t mymodule = {
.connect = connect_fn,
.disconnect = disconnect_fn,
.register_events = register_event_fn,
.deregister_events = deregister_events,
.notify_event = notify_event
.deregister_events = deregister_event_fn,
.notify_event = notify_event,
.query = query_fn,
.tool_connected = tool_connect_fn
};
typedef struct {
@ -195,6 +205,7 @@ int main(int argc, char **argv)
myxfer_t *x;
pmix_proc_t proc;
wait_tracker_t *child;
pmix_info_t info;
/* smoke test */
if (PMIX_SUCCESS != 0) {
@ -204,11 +215,15 @@ int main(int argc, char **argv)
fprintf(stderr, "Testing version %s\n", PMIx_Get_version());
/* setup the server library */
if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, NULL, 0))) {
/* setup the server library and tell it to support tool connections */
PMIX_INFO_CONSTRUCT(&info);
(void)strncpy(info.key, PMIX_SERVER_TOOL_SUPPORT, PMIX_MAX_KEYLEN);
if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, &info, 1))) {
fprintf(stderr, "Init failed with error %d\n", rc);
return rc;
}
PMIX_INFO_DESTRUCT(&info);
/* register the errhandler */
PMIx_Register_event_handler(NULL, 0, NULL, 0,
errhandler, errhandler_reg_callbk, NULL);
@ -666,12 +681,19 @@ static pmix_status_t register_event_fn(pmix_status_t *codes, size_t ncodes,
const pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata)
{
if (NULL != cbfunc) {
cbfunc(PMIX_SUCCESS, cbdata);
}
return PMIX_SUCCESS;
}
static pmix_status_t deregister_events(pmix_status_t *codes, size_t ncodes,
pmix_op_cbfunc_t cbfunc, void *cbdata)
static pmix_status_t deregister_event_fn(pmix_status_t *codes, size_t ncodes,
pmix_op_cbfunc_t cbfunc, void *cbdata)
{
pmix_output(0, "SERVER: DEREGISTER EVENT");
if (NULL != cbfunc) {
cbfunc(PMIX_SUCCESS, cbdata);
}
return PMIX_SUCCESS;
}
@ -684,6 +706,52 @@ static pmix_status_t notify_event(pmix_status_t code,
return PMIX_SUCCESS;
}
typedef struct query_data_t {
pmix_info_t *data;
size_t ndata;
} query_data_t;
static pmix_status_t query_fn(pmix_proc_t *proct,
pmix_info_t *info, size_t ninfo,
pmix_info_t *directives, size_t ndirs,
pmix_info_cbfunc_t cbfunc,
void *cbdata)
{
size_t n;
pmix_output(0, "SERVER: QUERY");
if (NULL == cbfunc) {
return PMIX_ERROR;
}
/* keep this simple */
for (n=0; n < ninfo; n++) {
info[n].value.type = PMIX_STRING;
if (0 > asprintf(&info[n].value.data.string, "%d", (int)n)) {
return PMIX_ERROR;
}
}
cbfunc(PMIX_SUCCESS, info, ninfo, cbdata, NULL, NULL);
return PMIX_SUCCESS;
}
static void tool_connect_fn(pmix_info_t *info, size_t ninfo,
pmix_tool_connection_cbfunc_t cbfunc,
void *cbdata)
{
pmix_proc_t proc;
pmix_output(0, "SERVER: TOOL CONNECT");
/* just pass back an arbitrary nspace */
(void)strncpy(proc.nspace, "TOOL", PMIX_MAX_NSLEN);
proc.rank = 0;
if (NULL != cbfunc) {
cbfunc(PMIX_SUCCESS, &proc, cbdata);
}
}
static void wait_signal_callback(int fd, short event, void *arg)
{
pmix_event_t *sig = (pmix_event_t*) arg;

Просмотреть файл

@ -0,0 +1,98 @@
/*
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2011 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
*/
#include <src/include/pmix_config.h>
#include <pmix_tool.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <time.h>
#include "src/class/pmix_object.h"
#include "src/buffer_ops/types.h"
#include "src/util/output.h"
#include "src/util/printf.h"
int main(int argc, char **argv)
{
pmix_status_t rc;
pmix_proc_t myproc;
pmix_info_t *info;
size_t ninfo;
/* init us */
if (PMIX_SUCCESS != (rc = PMIx_tool_init(&myproc, NULL, 0))) {
fprintf(stderr, "PMIx_tool_init failed: %d\n", rc);
exit(rc);
}
pmix_output(0, "Tool ns %s rank %d: Running", myproc.nspace, myproc.rank);
/* query something */
ninfo = 2;
PMIX_INFO_CREATE(info, ninfo);
(void)strncpy(info[0].key, "foobar", PMIX_MAX_KEYLEN);
(void)strncpy(info[1].key, "spastic", PMIX_MAX_KEYLEN);
if (PMIX_SUCCESS != (rc = PMIx_Query_info(info, ninfo))) {
pmix_output(0, "Client ns %s rank %d: PMIx_Query_info failed: %d", myproc.nspace, myproc.rank, rc);
goto done;
}
if (0 != strncmp(info[0].key, "foobar", PMIX_MAX_KEYLEN)) {
pmix_output(0, "Client ns %s rank %d: PMIx_Query_info key[0] wrong: %s vs foobar",
myproc.nspace, myproc.rank, info[0].key);
}
if (0 != strncmp(info[1].key, "spastic", PMIX_MAX_KEYLEN)) {
pmix_output(0, "Client ns %s rank %d: PMIx_Query_info key[0] wrong: %s vs spastic",
myproc.nspace, myproc.rank, info[1].key);
}
if (PMIX_STRING != info[0].value.type) {
pmix_output(0, "Client ns %s rank %d: PMIx_Query_info key[0] wrong type: %d vs %d",
myproc.nspace, myproc.rank, info[0].value.type, PMIX_STRING);
}
if (PMIX_STRING != info[1].value.type) {
pmix_output(0, "Client ns %s rank %d: PMIx_Query_info key[1] wrong type: %d vs %d",
myproc.nspace, myproc.rank, info[1].value.type, PMIX_STRING);
}
if (0 != strcmp(info[0].value.data.string, "0")) {
pmix_output(0, "Client ns %s rank %d: PMIx_Query_info key[0] wrong value: %s vs 0",
myproc.nspace, myproc.rank, info[1].value.data.string);
}
if (0 != strcmp(info[1].value.data.string, "1")) {
pmix_output(0, "Client ns %s rank %d: PMIx_Query_info key[1] wrong value: %s vs 1",
myproc.nspace, myproc.rank, info[1].value.data.string);
}
PMIX_INFO_FREE(info, ninfo);
done:
/* finalize us */
pmix_output(0, "Client ns %s rank %d: Finalizing", myproc.nspace, myproc.rank);
if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) {
fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc);
} else {
fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank);
}
fflush(stderr);
return(rc);
}

20
opal/mca/pmix/pmix2x/pmix2x.c Обычный файл → Исполняемый файл
Просмотреть файл

@ -564,6 +564,9 @@ opal_pmix_data_range_t pmix2x_convert_range(pmix_data_range_t range) {
void pmix2x_value_load(pmix_value_t *v,
opal_value_t *kv)
{
char nspace[PMIX_MAX_NSLEN + 1];
size_t n;
switch(kv->type) {
case OPAL_UNDEF:
v->type = PMIX_UNDEF;
@ -655,6 +658,19 @@ void pmix2x_value_load(pmix_value_t *v,
v->data.bo.size = 0;
}
break;
case OPAL_UINT32_ARRAY:
/* an array of 32-bit jobids */
v->type = PMIX_INFO_ARRAY;
v->data.array.size = kv->data.uint32_array.size;
if (0 < v->data.array.size) {
PMIX_INFO_CREATE(v->data.array.array, v->data.array.size);
for (n=0; n < v->data.array.size; n++) {
v->data.array.array[n].value.type = PMIX_STRING;
(void)opal_snprintf_jobid(nspace, PMIX_MAX_NSLEN, kv->data.uint32_array.data[n]);
v->data.array.array[n].value.data.string = strdup(nspace);
}
}
break;
default:
/* silence warnings */
break;
@ -669,7 +685,7 @@ int pmix2x_value_unload(opal_value_t *kv,
switch(v->type) {
case PMIX_UNDEF:
rc = OPAL_ERR_UNKNOWN_DATA_TYPE;
kv->type = OPAL_UNDEF;
break;
case PMIX_BOOL:
kv->type = OPAL_BOOL;
@ -1148,6 +1164,8 @@ static void ocadcon(pmix2x_opalcaddy_t *p)
p->spwncbfunc = NULL;
p->cbdata = NULL;
p->odmdxfunc = NULL;
p->infocbfunc = NULL;
p->toolcbfunc = NULL;
p->ocbdata = NULL;
}
static void ocaddes(pmix2x_opalcaddy_t *p)

Просмотреть файл

@ -130,6 +130,8 @@ typedef struct {
pmix_modex_cbfunc_t mdxcbfunc;
pmix_lookup_cbfunc_t lkupcbfunc;
pmix_spawn_cbfunc_t spwncbfunc;
pmix_info_cbfunc_t infocbfunc;
pmix_tool_connection_cbfunc_t toolcbfunc;
void *cbdata;
opal_pmix_release_cbfunc_t odmdxfunc;
void *ocbdata;
@ -302,7 +304,7 @@ OPAL_MODULE_DECLSPEC pmix_scope_t pmix2x_convert_opalscope(opal_pmix_scope_t sco
OPAL_MODULE_DECLSPEC pmix_data_range_t pmix2x_convert_opalrange(opal_pmix_data_range_t range);
OPAL_MODULE_DECLSPEC opal_pmix_data_range_t pmix2x_convert_range(pmix_data_range_t range);
OPAL_MODULE_DECLSPEC void pmix2x_value_load(pmix_value_t *v,
opal_value_t *kv);
opal_value_t *kv);
OPAL_MODULE_DECLSPEC int pmix2x_value_unload(opal_value_t *kv,
const pmix_value_t *v);

167
opal/mca/pmix/pmix2x/pmix2x_server_north.c Обычный файл → Исполняемый файл
Просмотреть файл

@ -88,6 +88,15 @@
pmix_data_range_t range,
pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata);
static pmix_status_t server_query(pmix_proc_t *proct,
pmix_info_t *info, size_t ninfo,
pmix_info_t *directives, size_t ndirs,
pmix_info_cbfunc_t cbfunc,
void *cbdata);
static void server_tool_connection(pmix_info_t *info, size_t ninfo,
pmix_tool_connection_cbfunc_t cbfunc,
void *cbdata);
pmix_server_module_t mymodule = {
.client_connected = server_client_connected_fn,
.client_finalized = server_client_finalized_fn,
@ -102,7 +111,9 @@
.disconnect = server_disconnect_fn,
.register_events = server_register_events,
.deregister_events = server_deregister_events,
.notify_event = server_notify_event
.notify_event = server_notify_event,
.query = server_query,
.tool_connected = server_tool_connection
};
opal_pmix_server_module_t *host_module = NULL;
@ -787,3 +798,157 @@ static pmix_status_t server_notify_event(pmix_status_t code,
{
return PMIX_ERR_NOT_SUPPORTED;
}
static void _info_rel(void *cbdata)
{
pmix2x_opcaddy_t *pcaddy = (pmix2x_opcaddy_t*)cbdata;
OBJ_RELEASE(pcaddy);
}
static void info_cbfunc(int status,
opal_list_t *info,
void *cbdata,
opal_pmix_release_cbfunc_t release_fn,
void *release_cbdata)
{
pmix2x_opalcaddy_t *opalcaddy = (pmix2x_opalcaddy_t*)cbdata;
pmix2x_opcaddy_t *pcaddy;
opal_value_t *kv;
size_t n;
pcaddy = OBJ_NEW(pmix2x_opcaddy_t);
/* convert the status */
pcaddy->status = pmix2x_convert_opalrc(status);
/* convert the list to a pmix_info_t array */
if (NULL != info) {
pcaddy->ninfo = opal_list_get_size(info);
if (0 < pcaddy->ninfo) {
PMIX_INFO_CREATE(pcaddy->info, pcaddy->ninfo);
n = 0;
OPAL_LIST_FOREACH(kv, info, opal_value_t) {
(void)strncpy(pcaddy->info[n].key, kv->key, PMIX_MAX_KEYLEN);
pmix2x_value_load(&pcaddy->info[n].value, kv);
}
}
}
/* we are done with the incoming data */
if (NULL != release_fn) {
release_fn(release_cbdata);
}
/* provide the answer downward */
if (NULL != opalcaddy->infocbfunc) {
opalcaddy->infocbfunc(pcaddy->status, pcaddy->info, pcaddy->ninfo,
opalcaddy->cbdata, _info_rel, pcaddy);
}
OBJ_RELEASE(opalcaddy);
}
static pmix_status_t server_query(pmix_proc_t *proct,
pmix_info_t *info, size_t ninfo,
pmix_info_t *directives, size_t ndirs,
pmix_info_cbfunc_t cbfunc,
void *cbdata)
{
pmix2x_opalcaddy_t *opalcaddy;
opal_process_name_t requestor;
int rc;
size_t n;
opal_value_t *oinfo;
if (NULL == host_module || NULL == host_module->query) {
return PMIX_ERR_NOT_SUPPORTED;
}
/* setup the caddy */
opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t);
opalcaddy->infocbfunc = cbfunc;
opalcaddy->cbdata = cbdata;
/* convert the requestor */
if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&requestor.jobid, proct->nspace))) {
opal_output(0, "FILE: %s LINE %d", __FILE__, __LINE__);
OBJ_RELEASE(opalcaddy);
return pmix2x_convert_opalrc(rc);
}
requestor.vpid = proct->rank;
/* convert the info */
for (n=0; n < ninfo; n++) {
oinfo = OBJ_NEW(opal_value_t);
opal_list_append(&opalcaddy->info, &oinfo->super);
oinfo->key = strdup(info[n].key);
if (OPAL_SUCCESS != (rc = pmix2x_value_unload(oinfo, &info[n].value))) {
OBJ_RELEASE(opalcaddy);
return pmix2x_convert_opalrc(rc);
}
}
/* we ignore directives for now */
/* pass the call upwards */
if (OPAL_SUCCESS != (rc = host_module->query(&requestor,
&opalcaddy->info, NULL,
info_cbfunc, opalcaddy))) {
OBJ_RELEASE(opalcaddy);
}
return pmix2x_convert_opalrc(rc);
}
static void toolcbfunc(int status,
opal_process_name_t proc,
void *cbdata)
{
pmix2x_opalcaddy_t *opalcaddy = (pmix2x_opalcaddy_t*)cbdata;
pmix_status_t rc;
pmix_proc_t p;
/* convert the status */
rc = pmix2x_convert_opalrc(status);
/* convert the process name */
(void)opal_snprintf_jobid(p.nspace, PMIX_MAX_NSLEN, proc.jobid);
p.rank = proc.vpid;
/* pass it down */
if (NULL != opalcaddy->toolcbfunc) {
opalcaddy->toolcbfunc(rc, &p, opalcaddy->cbdata);
}
OBJ_RELEASE(opalcaddy);
}
static void server_tool_connection(pmix_info_t *info, size_t ninfo,
pmix_tool_connection_cbfunc_t cbfunc,
void *cbdata)
{
pmix2x_opalcaddy_t *opalcaddy;
size_t n;
opal_value_t *oinfo;
int rc;
pmix_status_t err;
/* setup the caddy */
opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t);
opalcaddy->toolcbfunc = cbfunc;
opalcaddy->cbdata = cbdata;
/* convert the info */
for (n=0; n < ninfo; n++) {
oinfo = OBJ_NEW(opal_value_t);
opal_list_append(&opalcaddy->info, &oinfo->super);
oinfo->key = strdup(info[n].key);
if (OPAL_SUCCESS != (rc = pmix2x_value_unload(oinfo, &info[n].value))) {
OBJ_RELEASE(opalcaddy);
err = pmix2x_convert_opalrc(rc);
if (NULL != cbfunc) {
cbfunc(err, NULL, cbdata);
}
}
}
/* pass it up */
host_module->tool_connected(&opalcaddy->info, toolcbfunc, opalcaddy);
}

23
opal/mca/pmix/pmix_server.h Обычный файл → Исполняемый файл
Просмотреть файл

@ -183,6 +183,27 @@ typedef int (*opal_pmix_server_disconnect_fn_t)(opal_list_t *procs, opal_list_t
typedef int (*opal_pmix_server_notify_fn_t)(int code, opal_list_t *procs, opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
/* Query the RTE for information */
typedef int (*opal_pmix_server_query_fn_t)(opal_process_name_t *requestor,
opal_list_t *info, opal_list_t *directives,
opal_pmix_info_cbfunc_t cbfunc, void *cbdata);
/* Register that a tool has connected to the server, and request
* that the tool be assigned a jobid for further interactions.
* The optional opal_value_t list can be used to pass qualifiers for
* the connection request:
*
* (a) OPAL_PMIX_USERID - effective userid of the tool
* (b) OPAL_PMIX_GRPID - effective groupid of the tool
* (c) OPAL_PMIX_FWD_STDOUT - forward any stdout to this tool
* (d) OPAL_PMIX_FWD_STDERR - forward any stderr to this tool
* (e) OPAL_PMIX_FWD_STDIN - forward stdin from this tool to any
* processes spawned on its behalf
*/
typedef void (*opal_pmix_server_tool_connection_fn_t)(opal_list_t *info,
opal_pmix_tool_connection_cbfunc_t cbfunc,
void *cbdata);
/* Callback function for incoming connection requests from
* local clients */
typedef void (*opal_pmix_connection_cbfunc_t)(int incoming_sd);
@ -215,6 +236,8 @@ typedef struct opal_pmix_server_module_1_0_0_t {
opal_pmix_server_register_events_fn_t register_events;
opal_pmix_server_deregister_events_fn_t deregister_events;
opal_pmix_server_notify_fn_t notify_event;
opal_pmix_server_query_fn_t query;
opal_pmix_server_tool_connection_fn_t tool_connected;
opal_pmix_server_listener_fn_t listener;
} opal_pmix_server_module_t;

Просмотреть файл

@ -41,6 +41,11 @@ BEGIN_C_DECLS
* these keys are RESERVED */
#define OPAL_PMIX_ATTR_UNDEF NULL
#define OPAL_PMIX_SERVER_TOOL_SUPPORT "pmix.srvr.tool" // (bool) The host RM wants to declare itself as willing to
// accept tool connection requests
#define OPAL_PMIX_SERVER_PIDINFO "pmix.srvr.pidinfo" // (uint32_t) pid of the target server
/* identification attributes */
#define OPAL_PMIX_USERID "pmix.euid" // (uint32_t) effective user id
#define OPAL_PMIX_GRPID "pmix.egid" // (uint32_t) effective group id
@ -166,6 +171,12 @@ BEGIN_C_DECLS
#define OPAL_PMIX_PRELOAD_FILES "pmix.preloadfiles" // (char*) comma-delimited list of files to pre-position
#define OPAL_PMIX_NON_PMI "pmix.nonpmi" // (bool) spawned procs will not call PMIx_Init
#define OPAL_PMIX_STDIN_TGT "pmix.stdin" // (uint32_t) spawned proc rank that is to receive stdin
#define OPAL_PMIX_FWD_STDIN "pmix.fwd.stdin" // (bool) forward my stdin to the designated proc
#define OPAL_PMIX_FWD_STDOUT "pmix.fwd.stdout" // (bool) forward stdout from spawned procs to me
#define OPAL_PMIX_FWD_STDERR "pmix.fwd.stderr" // (bool) forward stderr from spawned procs to me
/* query attributes */
#define OPAL_PMIX_QUERY_NAMESPACES "pmix.qry.ns" // (char*) request a comma-delimited list of active nspaces
/* define a scope for data "put" by PMI per the following:
@ -357,6 +368,24 @@ typedef void (*opal_pmix_value_cbfunc_t)(int status,
opal_value_t *kv, void *cbdata);
/* define a callback function for calls to PMIx_Query. The status
* indicates if requested data was found or not - a list of
* opal_value_t will contain the key/value pairs. */
typedef void (*opal_pmix_info_cbfunc_t)(int status,
opal_list_t *info,
void *cbdata,
opal_pmix_release_cbfunc_t release_fn,
void *release_cbdata);
/* Callback function for incoming tool connections - the host
* RTE shall provide a jobid/rank for the connecting tool. We
* assume that a rank=0 will be the normal assignment, but allow
* for the future possibility of a parallel set of tools
* connecting, and thus each proc requiring a rank */
typedef void (*opal_pmix_tool_connection_cbfunc_t)(int status,
opal_process_name_t proc,
void *cbdata);
END_C_DECLS

Просмотреть файл

@ -259,6 +259,9 @@ opal_err2str(int errnum, const char **errmsg)
case OPAL_ERR_HANDLERS_COMPLETE:
retval = "Event handler processing complete";
break;
case OPAL_ERR_PARTIAL_SUCCESS:
retval = "Partial success";
break;
default:
retval = "UNRECOGNIZED";
}

Просмотреть файл

@ -87,6 +87,7 @@ enum {
ORTE_ERR_AUTHENTICATION_FAILED = OPAL_ERR_AUTHENTICATION_FAILED,
ORTE_ERR_COMM_FAILURE = OPAL_ERR_COMM_FAILURE,
ORTE_ERR_DEBUGGER_RELEASE = OPAL_ERR_DEBUGGER_RELEASE,
ORTE_ERR_PARTIAL_SUCCESS = OPAL_ERR_PARTIAL_SUCCESS,
/* error codes specific to ORTE - don't forget to update
orte/util/error_strings.c when adding new error codes!!

19
orte/orted/orted_submit.c Обычный файл → Исполняемый файл
Просмотреть файл

@ -1896,16 +1896,17 @@ static void launch_recv(int status, orte_process_name_t* sender,
if (ORTE_SUCCESS == ret) {
printf("[ORTE] Task: %d is launched! (Job ID: %s)\n", tool_job_index, ORTE_JOBID_PRINT(jobid));
} else {
/* unpack the offending proc and node */
/* unpack the offending proc and node, if sent */
cnt = 1;
opal_dss.unpack(buffer, &trk->jdata->state, &cnt, ORTE_JOB_STATE_T);
cnt = 1;
opal_dss.unpack(buffer, &proc, &cnt, ORTE_PROC);
proc->exit_code = ret;
app = (orte_app_context_t*)opal_pointer_array_get_item(trk->jdata->apps, proc->app_idx);
cnt = 1;
opal_dss.unpack(buffer, &node, &cnt, ORTE_NODE);
orte_print_aborted_job(trk->jdata, app, proc, node);
if (OPAL_SUCCESS == opal_dss.unpack(buffer, &trk->jdata->state, &cnt, ORTE_JOB_STATE_T)) {
cnt = 1;
opal_dss.unpack(buffer, &proc, &cnt, ORTE_PROC);
proc->exit_code = ret;
app = (orte_app_context_t*)opal_pointer_array_get_item(trk->jdata->apps, proc->app_idx);
cnt = 1;
opal_dss.unpack(buffer, &node, &cnt, ORTE_NODE);
orte_print_aborted_job(trk->jdata, app, proc, node);
}
}
/* Inform client */

39
orte/orted/pmix/pmix_server.c Обычный файл → Исполняемый файл
Просмотреть файл

@ -84,20 +84,21 @@ static void pmix_server_dmdx_resp(int status, orte_process_name_t* sender,
pmix_server_globals_t orte_pmix_server_globals = {0};
static opal_pmix_server_module_t pmix_server = {
pmix_server_client_connected_fn,
pmix_server_client_finalized_fn,
pmix_server_abort_fn,
pmix_server_fencenb_fn,
pmix_server_dmodex_req_fn,
pmix_server_publish_fn,
pmix_server_lookup_fn,
pmix_server_unpublish_fn,
pmix_server_spawn_fn,
pmix_server_connect_fn,
pmix_server_disconnect_fn,
pmix_server_register_events_fn,
pmix_server_deregister_events_fn,
NULL
.client_connected = pmix_server_client_connected_fn,
.client_finalized = pmix_server_client_finalized_fn,
.abort = pmix_server_abort_fn,
.fence_nb = pmix_server_fencenb_fn,
.direct_modex = pmix_server_dmodex_req_fn,
.publish = pmix_server_publish_fn,
.lookup = pmix_server_lookup_fn,
.unpublish = pmix_server_unpublish_fn,
.spawn = pmix_server_spawn_fn,
.connect = pmix_server_connect_fn,
.disconnect = pmix_server_disconnect_fn,
.register_events = pmix_server_register_events_fn,
.deregister_events = pmix_server_deregister_events_fn,
.query = pmix_server_query_fn,
.tool_connected = pmix_tool_connected_fn
};
void pmix_server_register_params(void)
@ -182,6 +183,7 @@ int pmix_server_init(void)
{
int rc;
opal_list_t info;
opal_value_t *kv;
if (orte_pmix_server_globals.initialized) {
return ORTE_SUCCESS;
@ -229,7 +231,6 @@ int pmix_server_init(void)
if (NULL != opal_hwloc_topology) {
char *xmlbuffer=NULL;
int len;
opal_value_t *kv;
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_LOCAL_TOPO);
if (0 != hwloc_topology_export_xmlbuffer(opal_hwloc_topology, &xmlbuffer, &len)) {
@ -241,6 +242,11 @@ int pmix_server_init(void)
kv->type = OPAL_STRING;
opal_list_append(&info, &kv->super);
}
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_SERVER_TOOL_SUPPORT);
kv->type = OPAL_BOOL;
kv->data.flag = true;
opal_list_append(&info, &kv->super);
/* setup the local server */
if (ORTE_SUCCESS != (rc = opal_pmix.server_init(&pmix_server, &info))) {
@ -648,6 +654,9 @@ static void opcon(orte_pmix_server_op_caddy_t *p)
p->procs = NULL;
p->eprocs = NULL;
p->info = NULL;
p->cbfunc = NULL;
p->infocbfunc = NULL;
p->toolcbfunc = NULL;
p->cbdata = NULL;
}
OBJ_CLASS_INSTANCE(orte_pmix_server_op_caddy_t,

120
orte/orted/pmix/pmix_server_gen.c Обычный файл → Исполняемый файл
Просмотреть файл

@ -32,6 +32,7 @@
#include <unistd.h>
#endif
#include "opal/util/argv.h"
#include "opal/util/output.h"
#include "opal/dss/dss.h"
@ -40,6 +41,7 @@
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/plm/base/plm_private.h"
#include "pmix_server_internal.h"
@ -339,3 +341,121 @@ void pmix_server_notify(int status, orte_process_name_t* sender,
OBJ_RELEASE(cd);
}
}
static void _query(int sd, short args, void *cbdata)
{
orte_pmix_server_op_caddy_t *cd = (orte_pmix_server_op_caddy_t*)cbdata;
opal_value_t *kv;
orte_job_t *jdata;
int rc;
size_t nresults=0;
uint32_t key;
void *nptr;
char **nspaces=NULL, nspace[512];
/* see what they wanted */
OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) {
if (0 == strcmp(kv->key, OPAL_PMIX_QUERY_NAMESPACES)) {
/* get the current jobids */
rc = opal_hash_table_get_first_key_uint32(orte_job_data, &key, (void **)&jdata, &nptr);
while (OPAL_SUCCESS == rc) {
if (ORTE_PROC_MY_NAME->jobid != jdata->jobid) {
memset(nspace, 0, 512);
(void)opal_snprintf_jobid(nspace, 512, jdata->jobid);
opal_argv_append_nosize(&nspaces, nspace);
}
rc = opal_hash_table_get_next_key_uint32(orte_job_data, &key, (void **)&jdata, nptr, &nptr);
}
/* join the results into a single comma-delimited string */
kv->type = OPAL_STRING;
if (NULL != nspaces) {
kv->data.string = opal_argv_join(nspaces, ',');
} else {
kv->data.string = NULL;
}
++nresults;
}
}
if (0 == nresults) {
rc = ORTE_ERR_NOT_FOUND;
} else if (nresults < opal_list_get_size(cd->info)) {
rc = ORTE_ERR_PARTIAL_SUCCESS;
} else {
rc = ORTE_SUCCESS;
}
cd->infocbfunc(rc, cd->info, cd->cbdata, NULL, NULL);
}
int pmix_server_query_fn(opal_process_name_t *requestor,
opal_list_t *info, opal_list_t *directives,
opal_pmix_info_cbfunc_t cbfunc, void *cbdata)
{
orte_pmix_server_op_caddy_t *cd;
if (NULL == info || NULL == cbfunc) {
return OPAL_ERR_BAD_PARAM;
}
/* need to threadshift this request */
cd = OBJ_NEW(orte_pmix_server_op_caddy_t);
cd->proc = requestor;
cd->info = info;
cd->infocbfunc = cbfunc;
cd->cbdata = cbdata;
opal_event_set(orte_event_base, &(cd->ev), -1,
OPAL_EV_WRITE, _query, cd);
opal_event_set_priority(&(cd->ev), ORTE_MSG_PRI);
opal_event_active(&(cd->ev), OPAL_EV_WRITE, 1);
return ORTE_SUCCESS;
}
static void _toolconn(int sd, short args, void *cbdata)
{
orte_pmix_server_op_caddy_t *cd = (orte_pmix_server_op_caddy_t*)cbdata;
orte_job_t jdata;
orte_process_name_t tool;
int rc;
/* if we are the HNP, we can directly assign the jobid */
if (ORTE_PROC_IS_HNP) {
OBJ_CONSTRUCT(&jdata, orte_job_t);
rc = orte_plm_base_create_jobid(&jdata);
tool.jobid = jdata.jobid;
tool.vpid = 0;
if (NULL != cd->toolcbfunc) {
cd->toolcbfunc(rc, tool, cd->cbdata);
}
OBJ_RELEASE(cd);
return;
}
/* otherwise, we have to send the request to the HNP.
* Eventually, when we switch to nspace instead of an
* integer jobid, we'll just locally assign this value */
if (NULL != cd->toolcbfunc) {
cd->toolcbfunc(ORTE_ERR_NOT_SUPPORTED, tool, cd->cbdata);
}
OBJ_RELEASE(cd);
}
void pmix_tool_connected_fn(opal_list_t *info,
opal_pmix_tool_connection_cbfunc_t cbfunc,
void *cbdata)
{
orte_pmix_server_op_caddy_t *cd;
opal_output(0, "TOOL CONNECTION REQUEST RECVD");
/* need to threadshift this request */
cd = OBJ_NEW(orte_pmix_server_op_caddy_t);
cd->info = info;
cd->toolcbfunc = cbfunc;
cd->cbdata = cbdata;
opal_event_set(orte_event_base, &(cd->ev), -1,
OPAL_EV_WRITE, _toolconn, cd);
opal_event_set_priority(&(cd->ev), ORTE_MSG_PRI);
opal_event_active(&(cd->ev), OPAL_EV_WRITE, 1);
}

8
orte/orted/pmix/pmix_server_internal.h Обычный файл → Исполняемый файл
Просмотреть файл

@ -82,6 +82,8 @@ typedef struct {
opal_list_t *eprocs;
opal_list_t *info;
opal_pmix_op_cbfunc_t cbfunc;
opal_pmix_info_cbfunc_t infocbfunc;
opal_pmix_tool_connection_cbfunc_t toolcbfunc;
void *cbdata;
} orte_pmix_server_op_caddy_t;
OBJ_CLASS_DECLARATION(orte_pmix_server_op_caddy_t);
@ -187,6 +189,12 @@ extern int pmix_server_register_events_fn(opal_list_t *info,
extern int pmix_server_deregister_events_fn(opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc,
void *cbdata);
extern int pmix_server_query_fn(opal_process_name_t *requestor,
opal_list_t *info, opal_list_t *directives,
opal_pmix_info_cbfunc_t cbfunc, void *cbdata);
extern void pmix_tool_connected_fn(opal_list_t *info,
opal_pmix_tool_connection_cbfunc_t cbfunc,
void *cbdata);
/* declare the RML recv functions for responses */
extern void pmix_server_launch_resp(int status, orte_process_name_t* sender,

Просмотреть файл

@ -1,7 +1,7 @@
PROGS = no_op sigusr_trap spin orte_nodename orte_spawn orte_loop_spawn orte_loop_child orte_abort get_limits \
orte_tool orte_no_op binom oob_stress iof_stress iof_delay radix opal_interface orte_spin segfault \
orte_exit test-time event-threads psm_keygen regex orte_errors evpri-test opal-evpri-test evpri-test2 \
mapper reducer opal_hotel orte_dfs ulfm
mapper reducer opal_hotel orte_dfs ulfm pmixtool
all: $(PROGS)
@ -16,3 +16,6 @@ clean:
oob_stress:
ortecc -o oob_stress oob_stress.c -lm
pmixtool:
ortecc -o pmixtool pmixtool.c -lpmix

78
orte/test/system/pmixtool.c Исполняемый файл
Просмотреть файл

@ -0,0 +1,78 @@
/*
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2011 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <time.h>
#include <pmix_tool.h>
int main(int argc, char **argv)
{
pmix_status_t rc;
pmix_proc_t myproc;
pmix_info_t *info;
size_t ninfo;
/* init us */
if (PMIX_SUCCESS != (rc = PMIx_tool_init(&myproc, NULL, 0))) {
fprintf(stderr, "PMIx_tool_init failed: %s\n", PMIx_Error_string(rc));
exit(rc);
}
fprintf(stderr, "Tool ns %s rank %d: Running\n", myproc.nspace, myproc.rank);
/* query something */
ninfo = 1;
PMIX_INFO_CREATE(info, ninfo);
(void)strncpy(info[0].key, PMIX_QUERY_NAMESPACES, PMIX_MAX_KEYLEN);
if (PMIX_SUCCESS != (rc = PMIx_Query_info(info, ninfo))) {
fprintf(stderr, "Tool ns %s rank %d: PMIx_Query_info failed: %d\n", myproc.nspace, myproc.rank, rc);
goto done;
}
if (0 != strncmp(info[0].key, PMIX_QUERY_NAMESPACES, PMIX_MAX_KEYLEN)) {
fprintf(stderr, "tool ns %s rank %d: PMIx_Query_info key[0] wrong: %s vs %s\n",
myproc.nspace, myproc.rank, info[0].key, PMIX_QUERY_NAMESPACES);
}
if (PMIX_STRING != info[0].value.type) {
fprintf(stderr, "Tool ns %s rank %d: PMIx_Query_info key[0] wrong type: %d vs %d\n",
myproc.nspace, myproc.rank, info[0].value.type, PMIX_STRING);
}
fprintf(stderr, "Tool ns %s rank %d: PMIx_Query_info key[0] returned %s\n",
myproc.nspace, myproc.rank,
(NULL == info[0].value.data.string) ? "NULL" : info[0].value.data.string);
PMIX_INFO_FREE(info, ninfo);
done:
/* finalize us */
fprintf(stderr, "Tool ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank);
if (PMIX_SUCCESS != (rc = PMIx_tool_finalize())) {
fprintf(stderr, "Tool ns %s rank %d:PMIx_tool_finalize failed: %d\n", myproc.nspace, myproc.rank, rc);
} else {
fprintf(stderr, "Tool ns %s rank %d:PMIx_tool_finalize successfully completed\n", myproc.nspace, myproc.rank);
}
fflush(stderr);
return(rc);
}

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2010-2016 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -237,6 +237,9 @@ int orte_err2str(int errnum, const char **errmsg)
case ORTE_ERR_DEBUGGER_RELEASE:
retval = "Debugger release";
break;
case ORTE_ERR_PARTIAL_SUCCESS:
retval = "Partial success";
break;
default:
if (orte_report_silent_errors) {
retval = "Unknown error";