1
1

Add support for PMIx tool connections and queries. Initially only support a request to list all known namespaces (jobids) from ORTE, but other folks will extend that support to include additional information

Update to match PMIx RFC

Fix configury to point to correct libevent and hwloc locations
Этот коммит содержится в:
Ralph Castain 2016-06-17 15:15:13 -07:00
родитель f18d6606da
Коммит 6e434d6785
52 изменённых файлов: 2987 добавлений и 237 удалений

1
.gitignore поставляемый
Просмотреть файл

@ -450,6 +450,7 @@ orte/test/system/pmi_abort
orte/test/system/opal_hwloc
orte/test/system/opal_db
orte/test/system/ulfm
orte/test/system/pmixtool
orte/tools/orte-checkpoint/orte-checkpoint
orte/tools/orte-checkpoint/orte-checkpoint.1

Просмотреть файл

@ -82,7 +82,8 @@ enum {
OPAL_ERR_SERVER_NOT_AVAIL = (OPAL_ERR_BASE - 52),
OPAL_ERR_IN_PROCESS = (OPAL_ERR_BASE - 53),
OPAL_ERR_DEBUGGER_RELEASE = (OPAL_ERR_BASE - 54),
OPAL_ERR_HANDLERS_COMPLETE = (OPAL_ERR_BASE - 55)
OPAL_ERR_HANDLERS_COMPLETE = (OPAL_ERR_BASE - 55),
OPAL_ERR_PARTIAL_SUCCESS = (OPAL_ERR_BASE - 56)
};
#define OPAL_ERR_MAX (OPAL_ERR_BASE - 100)

Просмотреть файл

@ -59,6 +59,24 @@ AC_DEFUN([MCA_opal_pmix_ext20_CONFIG],[
[AC_MSG_RESULT([no])
opal_pmix_ext20_happy=no])
# if we have 2.0, then check further to see if we have
# the PMIx_Query_info function as that is even newer
AS_IF([test "$opal_pmix_ext20_happy" = "yes"],
[AC_MSG_CHECKING([if external component is series 2.0])
OPAL_CHECK_PACKAGE([opal_pmix_ext20],
[pmix.h],
[pmix],
[PMIx_Query_info],
[-lpmix],
[$pmix_ext_install_dir],
[$pmix_ext_install_dir/lib],
[AC_MSG_RESULT([yes])
opal_pmix_query_happy=1],
[AC_MSG_RESULT([no])
opal_pmix_query_happy=0])])
AC_DEFINE_UNQUOTED([HAVE_PMIX_QUERY_FUNCTION], [$opal_pmix_query_happy],
[Whether or not the external library has the PMIx_Query_info function])
AC_SUBST(opal_pmix_ext20_CPPFLAGS)
AC_SUBST(opal_pmix_ext20_LDFLAGS)
AC_SUBST(opal_pmix_ext20_LIBS)

23
opal/mca/pmix/ext20/pmix_ext20.c Обычный файл → Исполняемый файл
Просмотреть файл

@ -350,6 +350,7 @@ static void _event_hdlr(int sd, short args, void *cbdata)
void pmix20_event_hdlr(size_t evhdlr_registration_id,
pmix_status_t status, const pmix_proc_t *source,
pmix_info_t info[], size_t ninfo,
pmix_info_t results[], size_t nresults,
pmix_event_notification_cbfunc_fn_t cbfunc,
void *cbdata)
{
@ -559,6 +560,9 @@ opal_pmix_data_range_t pmix20_convert_range(pmix_data_range_t range) {
void pmix20_value_load(pmix_value_t *v,
opal_value_t *kv)
{
size_t n;
char nspace[PMIX_MAX_NSLEN + 1];
switch(kv->type) {
case OPAL_UNDEF:
v->type = PMIX_UNDEF;
@ -650,6 +654,19 @@ void pmix20_value_load(pmix_value_t *v,
v->data.bo.size = 0;
}
break;
case OPAL_UINT32_ARRAY:
/* an array of 32-bit jobids */
v->type = PMIX_INFO_ARRAY;
v->data.array.size = kv->data.uint32_array.size;
if (0 < v->data.array.size) {
PMIX_INFO_CREATE(v->data.array.array, v->data.array.size);
for (n=0; n < v->data.array.size; n++) {
v->data.array.array[n].value.type = PMIX_STRING;
(void)opal_snprintf_jobid(nspace, PMIX_MAX_NSLEN, kv->data.uint32_array.data[n]);
v->data.array.array[n].value.data.string = strdup(nspace);
}
}
break;
default:
/* silence warnings */
break;
@ -664,7 +681,7 @@ int pmix20_value_unload(opal_value_t *kv,
switch(v->type) {
case PMIX_UNDEF:
rc = OPAL_ERR_UNKNOWN_DATA_TYPE;
kv->type = OPAL_UNDEF;
break;
case PMIX_BOOL:
kv->type = OPAL_BOOL;
@ -1143,6 +1160,10 @@ static void ocadcon(pmix20_opalcaddy_t *p)
p->spwncbfunc = NULL;
p->cbdata = NULL;
p->odmdxfunc = NULL;
#if HAVE_PMIX_QUERY_FUNCTION
p->infocbfunc = NULL;
p->toolcbfunc = NULL;
#endif
p->ocbdata = NULL;
}
static void ocaddes(pmix20_opalcaddy_t *p)

Просмотреть файл

@ -130,6 +130,10 @@ typedef struct {
pmix_modex_cbfunc_t mdxcbfunc;
pmix_lookup_cbfunc_t lkupcbfunc;
pmix_spawn_cbfunc_t spwncbfunc;
#if HAVE_PMIX_QUERY_FUNCTION
pmix_info_cbfunc_t infocbfunc;
pmix_tool_connection_cbfunc_t toolcbfunc;
#endif
void *cbdata;
opal_pmix_release_cbfunc_t odmdxfunc;
void *ocbdata;
@ -293,6 +297,7 @@ OPAL_MODULE_DECLSPEC int pmix20_server_notify_event(int status,
OPAL_MODULE_DECLSPEC void pmix20_event_hdlr(size_t evhdlr_registration_id,
pmix_status_t status, const pmix_proc_t *source,
pmix_info_t info[], size_t ninfo,
pmix_info_t results[], size_t nresults,
pmix_event_notification_cbfunc_fn_t cbfunc,
void *cbdata);
OPAL_MODULE_DECLSPEC pmix_status_t pmix20_convert_opalrc(int rc);

173
opal/mca/pmix/ext20/pmix_ext20_server_north.c Обычный файл → Исполняемый файл
Просмотреть файл

@ -88,6 +88,17 @@
pmix_data_range_t range,
pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata);
#if HAVE_PMIX_QUERY_FUNCTION
static pmix_status_t server_query(pmix_proc_t *proct,
pmix_info_t *info, size_t ninfo,
pmix_info_t *directives, size_t ndirs,
pmix_info_cbfunc_t cbfunc,
void *cbdata);
static void server_tool_connection(pmix_info_t *info, size_t ninfo,
pmix_tool_connection_cbfunc_t cbfunc,
void *cbdata);
#endif
pmix_server_module_t mymodule = {
.client_connected = server_client_connected_fn,
.client_finalized = server_client_finalized_fn,
@ -102,7 +113,11 @@
.disconnect = server_disconnect_fn,
.register_events = server_register_events,
.deregister_events = server_deregister_events,
.notify_event = server_notify_event
.notify_event = server_notify_event,
#if HAVE_PMIX_QUERY_FUNCTION
.query = server_query,
.tool_connected = server_tool_connection
#endif
};
opal_pmix_server_module_t *host_module = NULL;
@ -787,3 +802,159 @@ static pmix_status_t server_notify_event(pmix_status_t code,
{
return PMIX_ERR_NOT_SUPPORTED;
}
#if HAVE_PMIX_QUERY_FUNCTION
static void _info_rel(void *cbdata)
{
pmix20_opcaddy_t *pcaddy = (pmix20_opcaddy_t*)cbdata;
OBJ_RELEASE(pcaddy);
}
static void info_cbfunc(int status,
opal_list_t *info,
void *cbdata,
opal_pmix_release_cbfunc_t release_fn,
void *release_cbdata)
{
pmix20_opalcaddy_t *opalcaddy = (pmix20_opalcaddy_t*)cbdata;
pmix20_opcaddy_t *pcaddy;
opal_value_t *kv;
size_t n;
pcaddy = OBJ_NEW(pmix20_opcaddy_t);
/* convert the status */
pcaddy->status = pmix20_convert_opalrc(status);
/* convert the list to a pmix_info_t array */
if (NULL != info) {
pcaddy->ninfo = opal_list_get_size(info);
if (0 < pcaddy->ninfo) {
PMIX_INFO_CREATE(pcaddy->info, pcaddy->ninfo);
n = 0;
OPAL_LIST_FOREACH(kv, info, opal_value_t) {
(void)strncpy(pcaddy->info[n].key, kv->key, PMIX_MAX_KEYLEN);
pmix20_value_load(&pcaddy->info[n].value, kv);
}
}
}
/* we are done with the incoming data */
if (NULL != release_fn) {
release_fn(release_cbdata);
}
/* provide the answer downward */
if (NULL != opalcaddy->infocbfunc) {
opalcaddy->infocbfunc(pcaddy->status, pcaddy->info, pcaddy->ninfo,
opalcaddy->cbdata, _info_rel, pcaddy);
}
OBJ_RELEASE(opalcaddy);
}
static pmix_status_t server_query(pmix_proc_t *proct,
pmix_info_t *info, size_t ninfo,
pmix_info_t *directives, size_t ndirs,
pmix_info_cbfunc_t cbfunc,
void *cbdata)
{
pmix20_opalcaddy_t *opalcaddy;
opal_process_name_t requestor;
int rc;
size_t n;
opal_value_t *oinfo;
if (NULL == host_module || NULL == host_module->query) {
return PMIX_ERR_NOT_SUPPORTED;
}
/* setup the caddy */
opalcaddy = OBJ_NEW(pmix20_opalcaddy_t);
opalcaddy->infocbfunc = cbfunc;
opalcaddy->cbdata = cbdata;
/* convert the requestor */
if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&requestor.jobid, proct->nspace))) {
OBJ_RELEASE(opalcaddy);
return pmix20_convert_opalrc(rc);
}
requestor.vpid = proct->rank;
/* convert the info */
for (n=0; n < ninfo; n++) {
oinfo = OBJ_NEW(opal_value_t);
opal_list_append(&opalcaddy->info, &oinfo->super);
oinfo->key = strdup(info[n].key);
if (OPAL_SUCCESS != (rc = pmix20_value_unload(oinfo, &info[n].value))) {
OBJ_RELEASE(opalcaddy);
return pmix20_convert_opalrc(rc);
}
}
/* we ignore directives for now */
/* pass the call upwards */
if (OPAL_SUCCESS != (rc = host_module->query(&requestor,
&opalcaddy->info, NULL,
info_cbfunc, opalcaddy))) {
OBJ_RELEASE(opalcaddy);
}
return pmix20_convert_opalrc(rc);
}
static void toolcbfunc(int status,
opal_process_name_t proc,
void *cbdata)
{
pmix20_opalcaddy_t *opalcaddy = (pmix20_opalcaddy_t*)cbdata;
pmix_status_t rc;
pmix_proc_t p;
/* convert the status */
rc = pmix20_convert_opalrc(status);
/* convert the process name */
(void)opal_snprintf_jobid(p.nspace, PMIX_MAX_NSLEN, proc.jobid);
p.rank = proc.vpid;
/* pass it down */
if (NULL != opalcaddy->toolcbfunc) {
opalcaddy->toolcbfunc(rc, &p, opalcaddy->cbdata);
}
OBJ_RELEASE(opalcaddy);
}
static void server_tool_connection(pmix_info_t *info, size_t ninfo,
pmix_tool_connection_cbfunc_t cbfunc,
void *cbdata)
{
pmix20_opalcaddy_t *opalcaddy;
size_t n;
opal_value_t *oinfo;
int rc;
pmix_status_t err;
/* setup the caddy */
opalcaddy = OBJ_NEW(pmix20_opalcaddy_t);
opalcaddy->toolcbfunc = cbfunc;
opalcaddy->cbdata = cbdata;
/* convert the info */
for (n=0; n < ninfo; n++) {
oinfo = OBJ_NEW(opal_value_t);
opal_list_append(&opalcaddy->info, &oinfo->super);
oinfo->key = strdup(info[n].key);
if (OPAL_SUCCESS != (rc = pmix20_value_unload(oinfo, &info[n].value))) {
OBJ_RELEASE(opalcaddy);
err = pmix20_convert_opalrc(rc);
if (NULL != cbfunc) {
cbfunc(err, NULL, cbdata);
}
}
}
/* pass it up */
host_module->tool_connected(&opalcaddy->info, toolcbfunc, opalcaddy);
}
#endif

4
opal/mca/pmix/pmix2x/configure.m4 Обычный файл → Исполняемый файл
Просмотреть файл

@ -41,12 +41,14 @@ AC_DEFUN([MCA_opal_pmix_pmix2x_CONFIG],[
opal_pmix_pmix2x_save_LDFLAGS=$LDFLAGS
opal_pmix_pmix2x_save_LIBS=$LIBS
opal_pmix_pmix2x_args="--enable-embedded-mode --with-pmix-symbol-prefix=opal_pmix_pmix2x_ --disable-visibility --with-libevent-header=\\\"opal/mca/event/$opal_event_base_include\\\" --with-hwloc-header=\\\"$opal_hwloc_base_include\\\""
opal_pmix_pmix2x_args="--without-tests-examples --with-pmix-symbol-prefix=opal_pmix_pmix2x_ --disable-visibility --enable-embedded-libevent --with-libevent-header=\\\"opal/mca/event/$opal_event_base_include\\\" --enable-embedded-hwloc --with-hwloc-header=\\\"$opal_hwloc_base_include\\\""
AS_IF([test "$enable_debug" = "yes"],
[opal_pmix_pmix2x_args="--enable-debug $opal_pmix_pmix2x_args"
CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS -g"],
[opal_pmix_pmix2x_args="--disable-debug $opal_pmix_pmix2x_args"
CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS"])
AS_IF([test "$with_devel_headers" = "yes"], [],
[opal_pmix_pmix2x_args="--enable-embedded-mode $opal_pmix_pmix2x_args"])
CPPFLAGS="-I$OPAL_TOP_SRCDIR -I$OPAL_TOP_BUILDDIR -I$OPAL_TOP_SRCDIR/opal/include -I$OPAL_TOP_BUILDDIR/opal/include $CPPFLAGS"
OPAL_CONFIG_SUBDIR([$opal_pmix_pmix2x_basedir/pmix],

Просмотреть файл

@ -58,6 +58,8 @@ include src/client/Makefile.am
include src/server/Makefile.am
include src/sec/Makefile.am
include src/event/Makefile.am
include src/common/Makefile.am
include src/tool/Makefile.am
if WANT_DSTORE
include src/sm/Makefile.am
@ -74,6 +76,9 @@ else
lib_LTLIBRARIES = libpmix.la
libpmix_la_SOURCES = $(headers) $(sources)
libpmix_la_LDFLAGS = -version-info $(libpmix_so_version)
endif
if PMIX_TESTS_EXAMPLES
SUBDIRS = . test examples
endif

Просмотреть файл

@ -23,14 +23,14 @@ release=0
# The only requirement is that it must be entirely printable ASCII
# characters and have no white space.
greek=a1
greek=
# If repo_rev is empty, then the repository version number will be
# obtained during "make dist" via the "git describe --tags --always"
# command, or with the date (if "git describe" fails) in the form of
# "date<date>".
repo_rev=gitaf7a389
repo_rev=git4940b48
# If tarball_version is not empty, it is used as the version string in
# the tarball filename, regardless of all other versions listed in
@ -44,7 +44,7 @@ tarball_version=
# The date when this release was created
date="Jun 16, 2016"
date="Jun 29, 2016"
# The shared library version of each of PMIx's public libraries.
# These versions are maintained in accordance with the "Library

Просмотреть файл

@ -316,7 +316,7 @@ AC_DEFUN([PMIX_SETUP_CORE],[
stdarg.h sys/stat.h sys/time.h \
sys/types.h sys/un.h sys/uio.h net/uio.h \
sys/wait.h syslog.h \
time.h unistd.h \
time.h unistd.h dirent.h \
crt_externs.h signal.h \
ioLib.h sockLib.h hostLib.h limits.h])
@ -648,6 +648,17 @@ AC_DEFUN([PMIX_DEFINE_ARGS],[
[pmix_mode=standalone
AC_MSG_RESULT([no])])
# Install tests and examples?
AC_MSG_CHECKING([if tests and examples are to be installed])
AC_ARG_WITH([tests-examples],
[AC_HELP_STRING([--with-tests-examples],
[Whether or not to install the tests and example programs.])])
AS_IF([test ! -z "$with_tests_examples" && test "$with_tests_examples" = "no"],
[pmix_tests=no
AC_MSG_RESULT([no])],
[pmix_tests=yes
AC_MSG_RESULT([yes])])
# Change the symbol prefix?
AC_ARG_WITH([pmix-symbol-prefix],
AC_HELP_STRING([--with-pmix-symbol-prefix=STRING],
@ -827,6 +838,7 @@ AC_DEFUN([PMIX_SET_SYMBOL_PREFIX],[
AC_DEFUN([PMIX_DO_AM_CONDITIONALS],[
AS_IF([test "$pmix_did_am_conditionals" != "yes"],[
AM_CONDITIONAL([PMIX_EMBEDDED_MODE], [test "x$pmix_mode" = "xembedded"])
AM_CONDITIONAL([PMIX_TESTS_EXAMPLES], [test "x$pmix_tests" = "xyes"])
AM_CONDITIONAL([PMIX_COMPILE_TIMING], [test "$WANT_TIMING" = "1"])
AM_CONDITIONAL([PMIX_WANT_MUNGE], [test "$pmix_munge_support" = "1"])
AM_CONDITIONAL([PMIX_WANT_SASL], [test "$pmix_sasl_support" = "1"])

6
opal/mca/pmix/pmix2x/pmix/config/pmix_setup_hwloc.m4 Обычный файл → Исполняемый файл
Просмотреть файл

@ -17,8 +17,12 @@ AC_DEFUN([PMIX_HWLOC_CONFIG],[
[AC_HELP_STRING([--with-hwloc-header=HEADER],
[The value that should be included in C files to include hwloc.h])])
AC_ARG_ENABLE([embedded-hwloc],
[AC_HELP_STRING([--enable-embedded-hwloc],
[Enable use of locally embedded hwloc])])
pmix_hwloc_support=0
AS_IF([test "$enable_embedded_mode" = "yes"],
AS_IF([test "$enable_embedded_hwloc" = "yes"],
[_PMIX_HWLOC_EMBEDDED_MODE],
[_PMIX_HWLOC_EXTERNAL])

6
opal/mca/pmix/pmix2x/pmix/config/pmix_setup_libevent.m4 Обычный файл → Исполняемый файл
Просмотреть файл

@ -17,7 +17,11 @@ AC_DEFUN([PMIX_LIBEVENT_CONFIG],[
[AC_HELP_STRING([--with-libevent-header=HEADER],
[The value that should be included in C files to include event.h])])
AS_IF([test "$enable_embedded_mode" = "yes"],
AC_ARG_ENABLE([embedded-libevent],
[AC_HELP_STRING([--enable-embedded-libevent],
[Enable use of locally embedded libevent])])
AS_IF([test "$enable_embedded_libevent" = "yes"],
[_PMIX_LIBEVENT_EMBEDDED_MODE],
[_PMIX_LIBEVENT_EXTERNAL])

Просмотреть файл

@ -21,7 +21,7 @@
AM_CPPFLAGS = -I$(top_builddir)/src -I$(top_builddir)/src/include -I$(top_builddir)/src/api
noinst_PROGRAMS = client dmodex dynamic fault pub server
noinst_PROGRAMS = client dmodex dynamic fault pub tool
client_SOURCES = client.c
client_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS)
@ -43,9 +43,9 @@ pub_SOURCES = pub.c
pub_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS)
pub_LDADD = $(top_builddir)/libpmix.la
server_SOURCES = pub.c
server_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS)
server_LDADD = $(top_builddir)/libpmix.la
tool_SOURCES = tool.c
tool_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS)
tool_LDADD = $(top_builddir)/libpmix.la
distclean-local:
rm -f *.o client dmodex dynamic fault pub server

92
opal/mca/pmix/pmix2x/pmix/examples/tool.c Обычный файл
Просмотреть файл

@ -0,0 +1,92 @@
/*
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2011 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <time.h>
#include <pmix_tool.h>
int main(int argc, char **argv)
{
pmix_status_t rc;
pmix_proc_t myproc;
pmix_info_t *info;
size_t ninfo;
/* init us */
if (PMIX_SUCCESS != (rc = PMIx_tool_init(&myproc, NULL, 0))) {
fprintf(stderr, "PMIx_tool_init failed: %d\n", rc);
exit(rc);
}
fprintf(stderr, "Tool ns %s rank %d: Running\n", myproc.nspace, myproc.rank);
/* query something */
ninfo = 2;
PMIX_INFO_CREATE(info, ninfo);
(void)strncpy(info[0].key, "foobar", PMIX_MAX_KEYLEN);
(void)strncpy(info[1].key, "spastic", PMIX_MAX_KEYLEN);
if (PMIX_SUCCESS != (rc = PMIx_Query_info(info, ninfo))) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Query_info failed: %d\n", myproc.nspace, myproc.rank, rc);
goto done;
}
if (0 != strncmp(info[0].key, "foobar", PMIX_MAX_KEYLEN)) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Query_info key[0] wrong: %s vs foobar\n",
myproc.nspace, myproc.rank, info[0].key);
}
if (0 != strncmp(info[1].key, "spastic", PMIX_MAX_KEYLEN)) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Query_info key[0] wrong: %s vs spastic\n",
myproc.nspace, myproc.rank, info[1].key);
}
if (PMIX_STRING != info[0].value.type) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Query_info key[0] wrong type: %d vs %d\n",
myproc.nspace, myproc.rank, info[0].value.type, PMIX_STRING);
}
if (PMIX_STRING != info[1].value.type) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Query_info key[1] wrong type: %d vs %d\n",
myproc.nspace, myproc.rank, info[1].value.type, PMIX_STRING);
}
if (0 != strcmp(info[0].value.data.string, "0")) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Query_info key[0] wrong value: %s vs 0\n",
myproc.nspace, myproc.rank, info[1].value.data.string);
}
if (0 != strcmp(info[1].value.data.string, "1")) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Query_info key[1] wrong value: %s vs 1\n",
myproc.nspace, myproc.rank, info[1].value.data.string);
}
PMIX_INFO_FREE(info, ninfo);
done:
/* finalize us */
fprintf(stderr, "Client ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank);
if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) {
fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc);
} else {
fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank);
}
fflush(stderr);
return(rc);
}

Просмотреть файл

@ -17,7 +17,8 @@ include_HEADERS = \
include/pmix.h \
include/pmix_server.h \
include/pmi.h \
include/pmi2.h
include/pmi2.h \
include/pmix_tool.h
include_pmixdir = $(includedir)/pmix
include_pmix_HEADERS = \

Просмотреть файл

@ -406,5 +406,23 @@ BEGIN_C_DECLS
* when done with it */
pmix_status_t PMIx_Resolve_nodes(const char *nspace, char **nodelist);
/* Query information about the system in general - can include
* a list of active nspaces, network topology, etc. We assume that
* the host RM will exercise appropriate access control to the
* information. The blocking form of the call will fill the
* returned values into the info array structs. The following
* return status codes are provided:
*
* PMIX_SUCCESS - all data has been returned
* PMIX_ERR_NOT_FOUND - none of the requested data was available
* PMIX_ERR_PARTIAL_SUCCESS - some of the data has been returned
* PMIX_ERR_NOT_SUPPORTED - the host RM does not support this function
*/
pmix_status_t PMIx_Query_info(pmix_info_t info[], size_t ninfo);
pmix_status_t PMIx_Query_info_nb(pmix_info_t info[], size_t ninfo,
pmix_info_t *directives, size_t ndirectives,
pmix_info_cbfunc_t cbfunc, void *cbdata);
END_C_DECLS
#endif

Просмотреть файл

@ -41,6 +41,8 @@
*
* Additional copyrights may follow
*
* Copyright (c) 2016 IBM Corporation. All rights reserved.
*
* $HEADER$
*/
@ -93,6 +95,9 @@ BEGIN_C_DECLS
/* initialization attributes */
#define PMIX_EVENT_BASE "pmix.evbase" // (struct event_base *) pointer to libevent event_base to use in place
// of the internal progress thread
#define PMIX_SERVER_TOOL_SUPPORT "pmix.srvr.tool" // (bool) The host RM wants to declare itself as willing to
// accept tool connection requests
#define PMIX_SERVER_PIDINFO "pmix.srvr.pidinfo" // (uint32_t) pid of the target server
/* identification attributes */
#define PMIX_USERID "pmix.euid" // (uint32_t) effective user id
@ -147,6 +152,7 @@ BEGIN_C_DECLS
/* size info */
#define PMIX_UNIV_SIZE "pmix.univ.size" // (uint32_t) #procs in this nspace
#define PMIX_JOB_SIZE "pmix.job.size" // (uint32_t) #procs in this job
#define PMIX_JOB_NUM_APPS "pmix.job.napps" // (uint32_t) #apps in this job
#define PMIX_APP_SIZE "pmix.app.size" // (uint32_t) #procs in this application
#define PMIX_LOCAL_SIZE "pmix.local.size" // (uint32_t) #procs in this job on this node
#define PMIX_NODE_SIZE "pmix.node.size" // (uint32_t) #procs across all jobs on this node
@ -217,6 +223,15 @@ BEGIN_C_DECLS
#define PMIX_PRELOAD_FILES "pmix.preloadfiles" // (char*) comma-delimited list of files to pre-position
#define PMIX_NON_PMI "pmix.nonpmi" // (bool) spawned procs will not call PMIx_Init
#define PMIX_STDIN_TGT "pmix.stdin" // (uint32_t) spawned proc rank that is to receive stdin
#define PMIX_FWD_STDIN "pmix.fwd.stdin" // (bool) forward my stdin to the designated proc
#define PMIX_FWD_STDOUT "pmix.fwd.stdout" // (bool) forward stdout from spawned procs to me
#define PMIX_FWD_STDERR "pmix.fwd.stderr" // (bool) forward stderr from spawned procs to me
/* query attributes */
#define PMIX_QUERY_NAMESPACES "pmix.qry.ns" // (char*) request a comma-delimited list of active nspaces
#define PMIX_QUERY_JOB_STATUS "pmix.qry.jst" // (pmix_status_t) status of a specified currently executing job
#define PMIX_QUERY_QUEUE_LIST "pmix.qry.qlst" // (char*) request a comma-delimited list of scheduler queues
#define PMIX_QUERY_QUEUE_STATUS "pmix.qry.qst" // (TBD) status of a specified scheduler queue
/**** PMIX ERROR CONSTANTS ****/
/* PMIx errors are always negative, with 0 reserved for success */
@ -265,6 +280,8 @@ typedef int pmix_status_t;
#define PMIX_EVENT_PARTIAL_ACTION_TAKEN (PMIX_ERR_BASE - 31)
#define PMIX_EVENT_ACTION_DEFERRED (PMIX_ERR_BASE - 32)
#define PMIX_EVENT_ACTION_COMPLETE (PMIX_ERR_BASE - 33)
/* used by the query system */
#define PMIX_QUERY_PARTIAL_SUCCESS (PMIX_ERR_BASE - 34)
/* define a starting point for PMIx internal error codes
@ -278,7 +295,6 @@ typedef int pmix_status_t;
* specific value as the value of the constant may change */
#define PMIX_EXTERNAL_ERR_BASE -2000
/**** PMIX DATA TYPES ****/
typedef enum {
PMIX_UNDEF = 0,
@ -869,6 +885,16 @@ typedef void (*pmix_evhdlr_reg_cbfunc_t)(pmix_status_t status,
typedef void (*pmix_value_cbfunc_t)(pmix_status_t status,
pmix_value_t *kv, void *cbdata);
/* define a callback function for calls to PMIx_Query. The status
* indicates if requested data was found or not - an array of
* pmix_info_t will contain the key/value pairs. */
typedef void (*pmix_info_cbfunc_t)(pmix_status_t status,
pmix_info_t *info, size_t ninfo,
void *cbdata,
pmix_release_cbfunc_t release_fn,
void *release_cbdata);
/**** COMMON SUPPORT FUNCTIONS ****/
/* Register an event handler to report events. Three types of events
* can be reported:

Просмотреть файл

@ -269,7 +269,6 @@ typedef pmix_status_t (*pmix_server_notify_event_fn_t)(pmix_status_t code,
pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata);
/* Callback function for incoming connection requests from
* local clients */
typedef void (*pmix_connection_cbfunc_t)(int incoming_sd, void *cbdata);
@ -288,6 +287,40 @@ typedef pmix_status_t (*pmix_server_listener_fn_t)(int listening_sd,
pmix_connection_cbfunc_t cbfunc,
void *cbdata);
/* Query information from the resource manager. The query will include
* the nspace/rank of the proc that is requesting the info, an
* array of pmix_info_t describing the request, an optional array
* of pmix_info_t directives, and a callback function/data for the return. */
typedef pmix_status_t (*pmix_server_query_fn_t)(pmix_proc_t *proct,
pmix_info_t *info, size_t ninfo,
pmix_info_t *directives, size_t ndirs,
pmix_info_cbfunc_t cbfunc,
void *cbdata);
/* Callback function for incoming tool connections - the host
* RM shall provide an nspace/rank for the connecting tool. We
* assume that a rank=0 will be the normal assignment, but allow
* for the future possibility of a parallel set of tools
* connecting, and thus each proc requiring a rank*/
typedef void (*pmix_tool_connection_cbfunc_t)(pmix_status_t status,
pmix_proc_t *proc, void *cbdata);
/* Register that a tool has connected to the server, and request
* that the tool be assigned an nspace/rank for further interactions.
* The optional pmix_info_t array can be used to pass qualifiers for
* the connection request:
*
* (a) PMIX_USERID - effective userid of the tool
* (b) PMIX_GRPID - effective groupid of the tool
* (c) PMIX_FWD_STDOUT - forward any stdout to this tool
* (d) PMIX_FWD_STDERR - forward any stderr to this tool
* (e) PMIX_FWD_STDIN - forward stdin from this tool to any
* processes spawned on its behalf
*/
typedef void (*pmix_server_tool_connection_fn_t)(pmix_info_t *info, size_t ninfo,
pmix_tool_connection_cbfunc_t cbfunc,
void *cbdata);
typedef struct pmix_server_module_2_0_0_t {
pmix_server_client_connected_fn_t client_connected;
pmix_server_client_finalized_fn_t client_finalized;
@ -304,6 +337,8 @@ typedef struct pmix_server_module_2_0_0_t {
pmix_server_deregister_events_fn_t deregister_events;
pmix_server_notify_event_fn_t notify_event;
pmix_server_listener_fn_t listener;
pmix_server_query_fn_t query;
pmix_server_tool_connection_fn_t tool_connected;
} pmix_server_module_t;
/**** SERVER SUPPORT INIT/FINALIZE FUNCTIONS ****/
@ -314,7 +349,10 @@ typedef struct pmix_server_module_2_0_0_t {
* array of pmix_info_t structs is used to pass
* additional info that may be required by the server
* when initializing - e.g., a user/group ID to set
* on the rendezvous file for the Unix Domain Socket */
* on the rendezvous file for the Unix Domain Socket. It
* also may include the PMIX_SERVER_TOOL_SUPPORT key, thereby
* indicating that the daemon is willing to accept connection
* requests from tools */
pmix_status_t PMIx_server_init(pmix_server_module_t *module,
pmix_info_t info[], size_t ninfo);

109
opal/mca/pmix/pmix2x/pmix/include/pmix_tool.h Обычный файл
Просмотреть файл

@ -0,0 +1,109 @@
/*
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
* Copyright (c) 2015 Artem Y. Polyakov <artpol84@gmail.com>.
* All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer listed
* in this license in the documentation and/or other materials
* provided with the distribution.
*
* - Neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* The copyright holders provide no reassurances that the source code
* provided does not infringe any patent, copyright, or any other
* intellectual property rights of third parties. The copyright holders
* disclaim any liability to any recipient for claims brought against
* recipient by any third party for infringement of that parties
* intellectual property rights.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $HEADER$
*
* PMIx provides a "function-shipping" approach to support for
* implementing the server-side of the protocol. This method allows
* resource managers to implement the server without being burdened
* with PMIx internal details. Accordingly, each PMIx API is mirrored
* here in a function call to be provided by the server. When a
* request is received from the client, the corresponding server function
* will be called with the information.
*
* Any functions not supported by the RM can be indicated by a NULL for
* the function pointer. Client calls to such functions will have a
* "not supported" error returned.
*/
#ifndef PMIx_TOOL_API_H
#define PMIx_TOOL_API_H
#include <pmix/autogen/config.h>
/* Symbol transforms */
#include <pmix/rename.h>
/* Structure and constant definitions */
#include <pmix/pmix_common.h>
/* provide access to the rest of the client functions */
#include <pmix.h>
BEGIN_C_DECLS
/**** TOOL INIT/FINALIZE FUNCTIONS ****/
/* Initialize the PMIx tool, returning the process identifier assigned
* to this tool in the provided pmix_proc_t struct.
*
* When called the PMIx tool library will check for the required connection
* information of the local PMIx server and will establish the connection.
* If the information is not found, or the server connection fails, then
* an appropriate error constant will be returned.
*
* If successful, the function will return PMIX_SUCCESS and will fill the
* provided structure with the server-assigned namespace and rank of the tool.
*
* Note that the PMIx tool library is referenced counted, and so multiple
* calls to PMIx_tool_init are allowed. Thus, one way to obtain the namespace and
* rank of the process is to simply call PMIx_tool_init with a non-NULL parameter.
*
* The info array is used to pass user requests pertaining to the init
* and subsequent operations. Passing a _NULL_ value for the array pointer
* is supported if no directives are desired.
*/
pmix_status_t PMIx_tool_init(pmix_proc_t *proc,
pmix_info_t info[], size_t ninfo);
/* Finalize the PMIx tool library, closing the connection to the local server.
* An error code will be returned if, for some reason, the connection
* cannot be closed.
*
* The info array is used to pass user requests regarding the finalize
* operation. */
pmix_status_t PMIx_tool_finalize(void);
END_C_DECLS
#endif

Просмотреть файл

@ -179,7 +179,15 @@ pmix_status_t pmix_bfrop_copy_payload(pmix_buffer_t *dest, pmix_buffer_t *src)
bool pmix_value_cmp(pmix_value_t *p, pmix_value_t *p1)
{
bool rc = false;
if (p->type != p1->type) {
return rc;
}
switch (p->type) {
case PMIX_UNDEF:
rc = true;
break;
case PMIX_BOOL:
rc = (p->data.flag == p1->data.flag);
break;
@ -238,6 +246,8 @@ pmix_status_t pmix_value_xfer(pmix_value_t *p, pmix_value_t *src)
/* copy the right field */
p->type = src->type;
switch (src->type) {
case PMIX_UNDEF:
break;
case PMIX_BOOL:
p->data.flag = src->data.flag;
break;

Просмотреть файл

@ -437,6 +437,8 @@ static pmix_status_t pack_val(pmix_buffer_t *buffer,
pmix_status_t ret;
switch (p->type) {
case PMIX_UNDEF:
break;
case PMIX_BOOL:
if (PMIX_SUCCESS != (ret = pmix_bfrop_pack_buffer(buffer, &p->data.flag, 1, PMIX_BOOL))) {
return ret;

Просмотреть файл

@ -527,6 +527,8 @@ pmix_status_t pmix_bfrop_unpack_status(pmix_buffer_t *buffer, void *dest,
m = 1;
switch (val->type) {
case PMIX_UNDEF:
break;
case PMIX_BOOL:
if (PMIX_SUCCESS != (ret = pmix_bfrop_unpack_buffer(buffer, &val->data.flag, &m, PMIX_BOOL))) {
return ret;

Просмотреть файл

@ -1026,41 +1026,41 @@ static pmix_status_t send_connect_ack(int sd)
}
/* receive the status reply */
rc = pmix_usock_recv_blocking(sd, (char*)&reply, sizeof(int));
if (PMIX_SUCCESS != rc) {
PMIX_ERROR_LOG(rc);
return rc;
}
/* see if they want us to do the handshake */
if (PMIX_ERR_READY_FOR_HANDSHAKE == reply) {
if (NULL == pmix_sec.client_handshake) {
return PMIX_ERR_HANDSHAKE_FAILED;
}
if (PMIX_SUCCESS != (rc = pmix_sec.client_handshake(sd))) {
rc = pmix_usock_recv_blocking(sd, (char*)&reply, sizeof(int));
if (PMIX_SUCCESS != rc) {
PMIX_ERROR_LOG(rc);
return rc;
}
} else if (PMIX_SUCCESS != reply) {
return reply;
}
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: RECV CONNECT CONFIRMATION");
/* see if they want us to do the handshake */
if (PMIX_ERR_READY_FOR_HANDSHAKE == reply) {
if (NULL == pmix_sec.client_handshake) {
return PMIX_ERR_HANDSHAKE_FAILED;
}
if (PMIX_SUCCESS != (rc = pmix_sec.client_handshake(sd))) {
return rc;
}
} else if (PMIX_SUCCESS != reply) {
return reply;
}
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: RECV CONNECT CONFIRMATION");
/* receive our index into the server's client array */
rc = pmix_usock_recv_blocking(sd, (char*)&pmix_globals.pindex, sizeof(int));
if (PMIX_SUCCESS != rc) {
PMIX_ERROR_LOG(rc);
return rc;
}
if (sockopt) {
/* return the socket to normal */
if (0 != setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &save, sz)) {
return PMIX_ERR_UNREACH;
rc = pmix_usock_recv_blocking(sd, (char*)&pmix_globals.pindex, sizeof(int));
if (PMIX_SUCCESS != rc) {
PMIX_ERROR_LOG(rc);
return rc;
}
if (sockopt) {
/* return the socket to normal */
if (0 != setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &save, sz)) {
return PMIX_ERR_UNREACH;
}
}
}
return PMIX_SUCCESS;
return PMIX_SUCCESS;
}
void pmix_client_process_nspace_blob(const char *nspace, pmix_buffer_t *bptr)

Просмотреть файл

@ -0,0 +1,11 @@
#
# Copyright (c) 2015-2016 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
sources += \
src/common/pmix_query.c

Просмотреть файл

@ -0,0 +1,251 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2016 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2016 IBM Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include <src/include/pmix_config.h>
#include <src/include/types.h>
#include <pmix/autogen/pmix_stdint.h>
#include <src/include/pmix_socket_errno.h>
#include <pmix.h>
#include <pmix/pmix_common.h>
#include <pmix_server.h>
#include "src/util/argv.h"
#include "src/util/error.h"
#include "src/util/output.h"
#include "src/buffer_ops/buffer_ops.h"
#include "src/usock/usock.h"
#include "src/client/pmix_client_ops.h"
#include "src/server/pmix_server_ops.h"
#include "src/include/pmix_globals.h"
static void wait_cbfunc(pmix_status_t status,
pmix_info_t *info, size_t ninfo,
void *cbdata,
pmix_release_cbfunc_t release_fn,
void *release_cbdata);
PMIX_EXPORT pmix_status_t PMIx_Query_info(pmix_info_t *info, size_t ninfo)
{
pmix_query_caddy_t *cd;
pmix_status_t rc, ret;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:query blocking version");
if (pmix_globals.init_cntr <= 0) {
return PMIX_ERR_INIT;
}
/* prep the caddy */
cd = PMIX_NEW(pmix_query_caddy_t);
cd->cbfunc = wait_cbfunc;
cd->cbdata = cd;
/* Use the non-blocking form as our engine */
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:query requesting %d values",
(int)ninfo);
cd->info = info;
cd->ninfo = ninfo;
cd->active = true;
if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(cd->info, cd->ninfo, NULL, 0, wait_cbfunc, cd))) {
PMIX_RELEASE(cd);
return rc;
}
PMIX_WAIT_FOR_COMPLETION(cd->active);
if (PMIX_ERR_NOT_FOUND == cd->status) {
PMIX_RELEASE(cd);
return PMIX_ERR_NOT_FOUND;
}
/* the RM always returns the data in the info array*/
ret = cd->status;
PMIX_RELEASE(cd);
return ret;
}
static void relcbfunc(void *cbdata)
{
pmix_query_caddy_t *cd = (pmix_query_caddy_t*)cbdata;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:query release callback");
if (NULL != cd->info) {
PMIX_INFO_FREE(cd->info, cd->ninfo);
}
PMIX_RELEASE(cd);
}
static void query_cbfunc(struct pmix_peer_t *peer,
pmix_usock_hdr_t *hdr,
pmix_buffer_t *buf, void *cbdata)
{
pmix_query_caddy_t *cd = (pmix_query_caddy_t*)cbdata;
pmix_status_t rc;
pmix_query_caddy_t *results;
int cnt;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:query cback from server");
results = PMIX_NEW(pmix_query_caddy_t);
/* unpack the status */
cnt = 1;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &results->status, &cnt, PMIX_STATUS))) {
PMIX_ERROR_LOG(rc);
goto complete;
}
if (PMIX_SUCCESS != results->status) {
goto complete;
}
/* unpack any returned data */
cnt = 1;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &results->ninfo, &cnt, PMIX_SIZE))) {
PMIX_ERROR_LOG(rc);
goto complete;
}
if (0 < results->ninfo) {
PMIX_INFO_CREATE(results->info, results->ninfo);
cnt = results->ninfo;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, results->info, &cnt, PMIX_INFO))) {
PMIX_ERROR_LOG(rc);
goto complete;
}
}
complete:
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:query cback from server releasing");
/* release the caller */
if (NULL != cd->cbfunc) {
cd->cbfunc(results->status, results->info, results->ninfo, cd->cbdata, relcbfunc, results);
}
PMIX_RELEASE(cd);
}
PMIX_EXPORT pmix_status_t PMIx_Query_info_nb(pmix_info_t info[], size_t ninfo,
pmix_info_t *directives, size_t ndirectives,
pmix_info_cbfunc_t cbfunc, void *cbdata)
{
pmix_query_caddy_t *cd;
pmix_cmd_t cmd = PMIX_QUERY_CMD;
pmix_buffer_t *msg;
pmix_status_t rc;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:query non-blocking");
if (pmix_globals.init_cntr <= 0) {
return PMIX_ERR_INIT;
}
if (0 == ninfo || NULL == info) {
return PMIX_ERR_BAD_PARAM;
}
/* if we are the server, then we just issue the query and
* return the response */
if (pmix_globals.server) {
if (NULL == pmix_host_server.query) {
/* nothing we can do */
return PMIX_ERR_NOT_SUPPORTED;
}
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:query handed to RM");
pmix_host_server.query(&pmix_globals.myid,
info, ninfo,
directives, ndirectives,
cbfunc, cbdata);
} else {
/* if we are a client, then relay this request to the server */
cd = PMIX_NEW(pmix_query_caddy_t);
cd->cbfunc = cbfunc;
cd->cbdata = cbdata;
msg = PMIX_NEW(pmix_buffer_t);
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &cmd, 1, PMIX_CMD))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE(msg);
PMIX_RELEASE(cd);
return rc;
}
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &ninfo, 1, PMIX_SIZE))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE(msg);
PMIX_RELEASE(cd);
return rc;
}
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, info, ninfo, PMIX_INFO))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE(msg);
PMIX_RELEASE(cd);
return rc;
}
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &ndirectives, 1, PMIX_SIZE))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE(msg);
PMIX_RELEASE(cd);
return rc;
}
if (0 < ndirectives) {
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, directives, ndirectives, PMIX_INFO))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE(msg);
PMIX_RELEASE(cd);
return rc;
}
}
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:query sending to server");
PMIX_ACTIVATE_SEND_RECV(&pmix_client_globals.myserver, msg, query_cbfunc, cd);
}
return PMIX_SUCCESS;
}
static void wait_cbfunc(pmix_status_t status,
pmix_info_t *results, size_t nresults,
void *cbdata,
pmix_release_cbfunc_t release_fn,
void *release_cbdata)
{
pmix_query_caddy_t *cd = (pmix_query_caddy_t*)cbdata;
size_t n, m;
pmix_status_t rc;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:query wait callback");
cd->status = status;
/* transfer the results across to our query - while these _should_
* be in the same order as our query, there is no _guarantee_ that
* this is true, so we have to do a search */
for (n=0; n < nresults; n++) {
for (m=0; m < cd->ninfo; m++) {
if (0 == strncmp(results[n].key, cd->info[m].key, PMIX_MAX_KEYLEN)) {
if (PMIX_SUCCESS != (rc = pmix_value_xfer(&cd->info[m].value, &results[n].value))) {
cd->status = rc;
goto complete;
}
break;
}
}
}
complete:
cd->relcbfunc = release_fn;
cd->cbdata = release_cbdata;
cd->active = false;
}

Просмотреть файл

@ -191,3 +191,17 @@ PMIX_CLASS_INSTANCE(pmix_shift_caddy_t,
PMIX_CLASS_INSTANCE(pmix_info_caddy_t,
pmix_list_item_t,
NULL, NULL);
static void qcon(pmix_query_caddy_t *p)
{
p->info = NULL;
p->ninfo = 0;
p->directives = NULL;
p->ndirs = 0;
p->cbfunc = NULL;
p->cbdata = NULL;
p->relcbfunc = NULL;
}
PMIX_CLASS_INSTANCE(pmix_query_caddy_t,
pmix_object_t,
qcon, NULL);

Просмотреть файл

@ -66,6 +66,7 @@ typedef enum {
PMIX_NOTIFY_CMD,
PMIX_REGEVENTS_CMD,
PMIX_DEREGEVENTS_CMD,
PMIX_QUERY_CMD
} pmix_cmd_t;
/* define a set of flags to direct collection
@ -214,6 +215,22 @@ typedef struct {
} pmix_server_caddy_t;
PMIX_CLASS_DECLARATION(pmix_server_caddy_t);
/* caddy for query requests */
typedef struct {
pmix_object_t super;
pmix_event_t ev;
volatile bool active;
pmix_status_t status;
pmix_info_t *info;
size_t ninfo;
pmix_info_t *directives;
size_t ndirs;
pmix_info_cbfunc_t cbfunc;
pmix_release_cbfunc_t relcbfunc;
void *cbdata;
} pmix_query_caddy_t;
PMIX_CLASS_DECLARATION(pmix_query_caddy_t);
/* define a tracker for collective operations */
typedef struct {
pmix_list_item_t super;

Просмотреть файл

@ -66,6 +66,7 @@ pmix_server_globals_t pmix_server_globals = {{{0}}};
// local variables
static char *security_mode = NULL;
static pid_t mypid;
// local functions for connection support
static void server_message_handler(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr,
@ -116,7 +117,6 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module)
{
int debug_level;
char *tdir, *evar;
pid_t pid;
char * pmix_pid;
pmix_listener_t *listener;
@ -139,10 +139,10 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module)
(void)strncpy(pmix_globals.myid.nspace, evar, PMIX_MAX_NSLEN);
}
/* look for our rank, if one was given */
pid = getpid();
mypid = getpid();
if (NULL == (evar = getenv("PMIX_SERVER_RANK"))) {
/* use our pid */
pmix_globals.myid.rank = pid;
pmix_globals.myid.rank = mypid;
} else {
pmix_globals.myid.rank = strtol(evar, NULL, 10);
}
@ -193,7 +193,7 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module)
/* for now, just setup the v1.1 series rendezvous point
* we use the pid to reduce collisions */
if (0 > asprintf(&pmix_pid, "%s/pmix-%d", tdir, pid)) {
if (0 > asprintf(&pmix_pid, "%s/pmix-%d", tdir, mypid)) {
return PMIX_ERR_NOMEM;
}
if ((strlen(pmix_pid) + 1) > sizeof(listener->address.sun_path)-1) {
@ -209,7 +209,7 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module)
return PMIX_ERR_NOMEM;
}
listener->varname = strdup("PMIX_SERVER_URI");
listener->protocol_type = 1;
listener->protocol = PMIX_PROTOCOL_V1;
pmix_list_append(&pmix_server_globals.listeners, &listener->super);
free(pmix_pid);
@ -224,9 +224,14 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module,
{
pmix_usock_posted_recv_t *req;
pmix_status_t rc;
size_t n;
size_t n, m;
pmix_kval_t kv;
pmix_listener_t *lt;
int myhostnamelen = 10;
char myhostname[myhostnamelen];
char *pmix_pid, *tdir;
char **protected = NULL;
bool protect;
++pmix_globals.init_cntr;
if (1 < pmix_globals.init_cntr) {
@ -255,7 +260,8 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module,
}
/* check the info keys for a directive about the uid/gid
* to be set for the rendezvous file */
* to be set for the rendezvous file, and for indication
* of willingness to support tool connections */
if (NULL != info) {
for (n=0; n < ninfo; n++) {
if (0 == strcmp(info[n].key, PMIX_USERID)) {
@ -264,17 +270,52 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module,
lt->owner = info[n].value.data.uint32;
lt->owner_given = true;
}
/* push this onto our protected list of keys not
* to be passed to the clients */
pmix_argv_append_nosize(&protected, PMIX_USERID);
} else if (0 == strcmp(info[n].key, PMIX_GRPID)) {
/* the grpid is in the uint32_t storage */
PMIX_LIST_FOREACH(lt, &pmix_server_globals.listeners, pmix_listener_t) {
lt->group = info[n].value.data.uint32;
lt->group_given = true;
}
/* push this onto our protected list of keys not
* to be passed to the clients */
pmix_argv_append_nosize(&protected, PMIX_GRPID);
} else if (0 == strcmp(info[n].key, PMIX_SOCKET_MODE)) {
/* socket mode is in the uint32_t storage */
PMIX_LIST_FOREACH(lt, &pmix_server_globals.listeners, pmix_listener_t) {
lt->mode = info[n].value.data.uint32;
}
} else if (0 == strcmp(info[n].key, PMIX_SERVER_TOOL_SUPPORT)) {
pmix_listener_t *tl = PMIX_NEW(pmix_listener_t);
tl -> address.sun_family = AF_UNIX;
tl->protocol = PMIX_PROTOCOL_TOOL;
/* Get up to 10 chars of hostname.*/
gethostname(myhostname, myhostnamelen);
/* need to put this in the global tmpdir as opposed to
* where the server tmpdir might be */
if (NULL == (tdir = getenv("TMPDIR"))) {
if (NULL == (tdir = getenv("TEMP"))) {
if (NULL == (tdir = getenv("TMP"))) {
tdir = "/tmp";
}
}
}
if (0 > asprintf(&pmix_pid, "%s/pmix.%s.tool.%d", tdir, myhostname, mypid)) {
return PMIX_ERR_NOMEM;
}
if ((strlen(pmix_pid) + 1) > sizeof(tl->address.sun_path)-1) {
free(pmix_pid);
return PMIX_ERR_INVALID_LENGTH;
}
snprintf(tl->address.sun_path, sizeof(tl->address.sun_path) - 1, "%s", pmix_pid);
free(pmix_pid);
pmix_server_globals.tool_connections_allowed = true;
pmix_list_append(&pmix_server_globals.listeners, &tl->super);
/* push this onto our protected list of keys not
* to be passed to the clients */
pmix_argv_append_nosize(&protected, PMIX_SERVER_TOOL_SUPPORT);
}
}
}
@ -299,18 +340,29 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module,
if (NULL != info) {
PMIX_CONSTRUCT(&kv, pmix_kval_t);
for (n=0; n < ninfo; n++) {
if (0 == strcmp(info[n].key, PMIX_USERID))
continue;
if (0 == strcmp(info[n].key, PMIX_GRPID))
continue;
if (0 == strcmp(info[n].key, PMIX_SOCKET_MODE))
/* check the list of protected keys */
protect = false;
if (NULL != protected) {
for (m=0; NULL != protected[m]; m++) {
if (0 == strcmp(info[n].key, protected[m])) {
protect = true;
break;
}
}
}
if (protect) {
continue;
}
/* store and pass along to every client */
kv.key = info[n].key;
kv.value = &info[n].value;
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(&pmix_server_globals.gdata, &kv, 1, PMIX_KVAL))) {
PMIX_ERROR_LOG(rc);
/* protect the incoming data */
kv.key = NULL;
kv.value = NULL;
PMIX_DESTRUCT(&kv);
PMIx_server_finalize();
return rc;
}
}
@ -357,6 +409,7 @@ static void cleanup_server_state(void)
PMIX_EXPORT pmix_status_t PMIx_server_finalize(void)
{
if (1 != pmix_globals.init_cntr) {
--pmix_globals.init_cntr;
return PMIX_SUCCESS;
@ -1954,6 +2007,44 @@ static void notifyerror_cbfunc (pmix_status_t status, void *cbdata)
PMIX_RELEASE(cd);
}
static void query_cbfunc(pmix_status_t status,
pmix_info_t *info, size_t ninfo,
void *cbdata,
pmix_release_cbfunc_t release_fn,
void *release_cbdata)
{
pmix_query_caddy_t *qcd = (pmix_query_caddy_t*)cbdata;
pmix_server_caddy_t *cd = (pmix_server_caddy_t*)qcd->cbdata;
pmix_buffer_t *reply = PMIX_NEW(pmix_buffer_t);
pmix_status_t rc;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:query callback with status %d", status);
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &status, 1, PMIX_STATUS))) {
PMIX_ERROR_LOG(rc);
goto complete;
}
/* pack the returned data */
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, &ninfo, 1, PMIX_SIZE))) {
PMIX_ERROR_LOG(rc);
goto complete;
}
if (0 < ninfo) {
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(reply, info, ninfo, PMIX_INFO))) {
PMIX_ERROR_LOG(rc);
}
}
complete:
// send reply
PMIX_SERVER_QUEUE_REPLY(cd->peer, cd->hdr.tag, reply);
// cleanup
PMIX_INFO_FREE(qcd->info, qcd->ninfo);
PMIX_RELEASE(qcd);
PMIX_RELEASE(cd);
}
/* the switchyard is the primary message handling function. It's purpose
* is to take incoming commands (packed into a buffer), unpack them,
* and then call the corresponding host server's function to execute
@ -2124,6 +2215,7 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag,
}
return rc;
}
if (PMIX_DEREGEVENTS_CMD == cmd) {
pmix_server_deregister_events(peer, buf);
return PMIX_SUCCESS;
@ -2134,6 +2226,13 @@ static pmix_status_t server_switchyard(pmix_peer_t *peer, uint32_t tag,
rc = pmix_server_event_recvd_from_client(peer, buf, notifyerror_cbfunc, cd);
return rc;
}
if (PMIX_QUERY_CMD == cmd) {
PMIX_PEER_CADDY(cd, peer, tag);
rc = pmix_server_query(peer, buf, query_cbfunc, cd);
return rc;
}
return PMIX_ERR_NOT_SUPPORTED;
}
@ -2142,7 +2241,7 @@ static void server_message_handler(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr
{
pmix_peer_t *peer = (pmix_peer_t*)pr;
pmix_buffer_t *reply;
int rc;
pmix_status_t rc;
pmix_output_verbose(2, pmix_globals.debug_output,
"SWITCHYARD for %s:%d:%d",
@ -2153,7 +2252,7 @@ static void server_message_handler(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr
/* send the return, if there was an error returned */
if (PMIX_SUCCESS != rc) {
reply = PMIX_NEW(pmix_buffer_t);
pmix_bfrop.pack(reply, &rc, 1, PMIX_INT);
pmix_bfrop.pack(reply, &rc, 1, PMIX_STATUS);
PMIX_SERVER_QUEUE_REPLY(peer, hdr->tag, reply);
}
}

Просмотреть файл

@ -66,6 +66,7 @@
static void* listen_thread(void *obj);
static void listener_cb(int incoming_sd, void *cbdata);
static void connection_handler(int incoming_sd, short flags, void* cbdata);
static void tool_handler(int incoming_sd, short flags, void* cbdata);
static char *myversion = NULL;
static pthread_t engine;
@ -87,6 +88,7 @@ pmix_status_t pmix_start_listening(pmix_listener_t *lt)
return PMIX_ERROR;
}
addrlen = sizeof(struct sockaddr_un);
if (bind(lt->socket, (struct sockaddr*)address, addrlen) < 0) {
printf("%s:%d bind() failed\n", __FILE__, __LINE__);
@ -163,10 +165,11 @@ pmix_status_t pmix_start_listening(pmix_listener_t *lt)
return PMIX_ERR_OUT_OF_RESOURCE;
}
/* fork off the listener thread */
pmix_server_globals.listen_thread_active = true;
if (0 > pthread_create(&engine, NULL, listen_thread, NULL)) {
pmix_server_globals.listen_thread_active = false;
return PMIX_ERROR;
} else {
pmix_server_globals.listen_thread_active = true;
}
}
@ -207,7 +210,6 @@ void pmix_stop_listening(void)
CLOSE_THE_SOCKET(lt->socket);
lt->socket = -1;
}
return;
}
static void* listen_thread(void *obj)
@ -222,6 +224,7 @@ static void* listen_thread(void *obj)
pmix_output_verbose(8, pmix_globals.debug_output,
"listen_thread: active");
while (pmix_server_globals.listen_thread_active) {
FD_ZERO(&readfds);
max = -1;
@ -278,9 +281,14 @@ static void* listen_thread(void *obj)
* OS might start rejecting connections due to timeout.
*/
pending_connection = PMIX_NEW(pmix_pending_connection_t);
pending_connection->protocol = lt->protocol_type;
event_assign(&pending_connection->ev, pmix_globals.evbase, -1,
EV_WRITE, connection_handler, pending_connection);
pending_connection->protocol = lt->protocol;
if (PMIX_PROTOCOL_TOOL == lt->protocol) {
event_assign(&pending_connection->ev, pmix_globals.evbase, -1,
EV_WRITE, tool_handler, pending_connection);
} else {
event_assign(&pending_connection->ev, pmix_globals.evbase, -1,
EV_WRITE, connection_handler, pending_connection);
}
pending_connection->sd = accept(lt->socket,
(struct sockaddr*)&(pending_connection->addr),
&addrlen);
@ -325,37 +333,153 @@ static void listener_cb(int incoming_sd, void *cbdata)
incoming_sd);
pending_connection = PMIX_NEW(pmix_pending_connection_t);
pending_connection->sd = incoming_sd;
pending_connection->protocol = lt->protocol_type;
pending_connection->protocol = lt->protocol;
event_assign(&pending_connection->ev, pmix_globals.evbase, -1,
EV_WRITE, connection_handler, pending_connection);
event_active(&pending_connection->ev, EV_WRITE, 1);
}
/* process the callback with tool connection info */
static void process_cbfunc(int sd, short args, void *cbdata)
{
pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata;
pmix_pending_connection_t *pnd = (pmix_pending_connection_t*)cd->cbdata;
pmix_nspace_t *nptr;
pmix_rank_info_t *info;
int rc;
/* send this status as well so they don't hang */
if (PMIX_SUCCESS != (rc = pmix_usock_send_blocking(pnd->sd, (char*)&cd->status, sizeof(pmix_status_t)))) {
PMIX_ERROR_LOG(rc);
CLOSE_THE_SOCKET(pnd->sd);
PMIX_RELEASE(pnd);
PMIX_RELEASE(cd);
return;
}
/* if the request failed, then we are done */
if (PMIX_SUCCESS != cd->status) {
PMIX_RELEASE(pnd);
PMIX_RELEASE(cd);
return;
}
/* send the nspace back to the tool */
if (PMIX_SUCCESS != (rc = pmix_usock_send_blocking(pnd->sd, cd->proc.nspace, PMIX_MAX_NSLEN+1))) {
PMIX_ERROR_LOG(rc);
CLOSE_THE_SOCKET(pnd->sd);
PMIX_RELEASE(pnd);
PMIX_RELEASE(cd);
return;
}
/* send my nspace back to the tool */
if (PMIX_SUCCESS != (rc = pmix_usock_send_blocking(pnd->sd, pmix_globals.myid.nspace, PMIX_MAX_NSLEN+1))) {
PMIX_ERROR_LOG(rc);
CLOSE_THE_SOCKET(pnd->sd);
PMIX_RELEASE(pnd);
PMIX_RELEASE(cd);
return;
}
/* send my rank back to the tool */
if (PMIX_SUCCESS != (rc = pmix_usock_send_blocking(pnd->sd, (char*)&pmix_globals.myid.rank, sizeof(int)))) {
PMIX_ERROR_LOG(rc);
CLOSE_THE_SOCKET(pnd->sd);
PMIX_RELEASE(pnd);
PMIX_RELEASE(cd);
return;
}
/* set the socket non-blocking for all further operations */
pmix_usock_set_nonblocking(pnd->sd);
/* add this nspace to our pool */
nptr = PMIX_NEW(pmix_nspace_t);
(void)strncpy(nptr->nspace, cd->proc.nspace, PMIX_MAX_NSLEN);
nptr->server = PMIX_NEW(pmix_server_nspace_t);
pmix_list_append(&pmix_globals.nspaces, &nptr->super);
/* add this tool rank to the nspace */
info = PMIX_NEW(pmix_rank_info_t);
PMIX_RETAIN(nptr);
info->nptr = nptr;
info->rank = 0;
pmix_list_append(&nptr->server->ranks, &info->super);
/* setup a peer object for this tool */
pmix_peer_t *peer = PMIX_NEW(pmix_peer_t);
PMIX_RETAIN(info);
peer->info = info;
peer->proc_cnt = 1;
peer->sd = pnd -> sd;
if (0 > (peer->index = pmix_pointer_array_add(&pmix_server_globals.clients, peer))) {
PMIX_RELEASE(pnd);
PMIX_RELEASE(cd);
PMIX_RELEASE(peer);
pmix_list_remove_item(&pmix_globals.nspaces, &nptr->super);
PMIX_RELEASE(nptr); // will release the info object
/* probably cannot send an error reply if we are out of memory */
return;
}
/* start the events for this tool */
event_assign(&peer->recv_event, pmix_globals.evbase, pnd->sd,
EV_READ|EV_PERSIST, pmix_usock_recv_handler, peer);
event_add(&peer->recv_event, NULL);
peer->recv_ev_active = true;
event_assign(&peer->send_event, pmix_globals.evbase, pnd->sd,
EV_WRITE|EV_PERSIST, pmix_usock_send_handler, peer);
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:server tool %s:%d has connected on socket %d",
peer->info->nptr->nspace, peer->info->rank, peer->sd);
PMIX_RELEASE(pnd);
PMIX_RELEASE(cd);
}
/* receive a callback from the host RM with an nspace
* for a connecting tool */
static void cnct_cbfunc(pmix_status_t status,
pmix_proc_t *proc, void *cbdata)
{
pmix_setup_caddy_t *cd;
/* need to thread-shift this into our context */
cd = PMIX_NEW(pmix_setup_caddy_t);
cd->status = status;
(void)strncpy(cd->proc.nspace, proc->nspace, PMIX_MAX_NSLEN);
cd->cbdata = cbdata;
PMIX_THREADSHIFT(cd, process_cbfunc);
}
/* Parse init-ack message:
* NSPACE<0><rank>VERSION<0>[CRED<0>]
*/
static pmix_status_t parse_connect_ack (char *msg, int len,
static pmix_status_t parse_connect_ack (char *msg,
pmix_listener_protocol_t protocol,
int len,
char **nspace, int *rank,
char **version, char **cred)
{
int msglen;
PMIX_STRNLEN(msglen, msg, len);
if (msglen < len) {
*nspace = msg;
msg += strlen(*nspace) + 1;
len -= strlen(*nspace) + 1;
} else {
return PMIX_ERR_BAD_PARAM;
}
if (PMIX_PROTOCOL_TOOL != protocol) {
PMIX_STRNLEN(msglen, msg, len);
if (msglen < len) {
*nspace = msg;
msg += strlen(*nspace) + 1;
len -= strlen(*nspace) + 1;
} else {
return PMIX_ERR_BAD_PARAM;
}
PMIX_STRNLEN(msglen, msg, len);
if (msglen <= len) {
memcpy(rank, msg, sizeof(int));
msg += sizeof(int);
len -= sizeof(int);
} else {
return PMIX_ERR_BAD_PARAM;
PMIX_STRNLEN(msglen, msg, len);
if (msglen <= len) {
memcpy(rank, msg, sizeof(int));
msg += sizeof(int);
len -= sizeof(int);
} else {
return PMIX_ERR_BAD_PARAM;
}
}
PMIX_STRNLEN(msglen, msg, len);
@ -380,7 +504,7 @@ static pmix_status_t parse_connect_ack (char *msg, int len,
/* Receive the peer's identification info from a newly
* connected socket and verify the expected response.
*/
static pmix_status_t pmix_server_authenticate(int sd, uint16_t protocol,
static pmix_status_t pmix_server_authenticate(pmix_pending_connection_t *pnd,
int *out_rank,
pmix_peer_t **peer)
{
@ -395,14 +519,17 @@ static pmix_status_t pmix_server_authenticate(int sd, uint16_t protocol,
pmix_proc_t proc;
pmix_output_verbose(2, pmix_globals.debug_output,
"RECV CONNECT ACK FROM PEER ON SOCKET %d", sd);
"RECV CONNECT ACK FROM PEER ON SOCKET %d",
pnd->sd);
/* ensure all is zero'd */
memset(&hdr, 0, sizeof(pmix_usock_hdr_t));
*peer = NULL;
if (NULL != peer) {
*peer = NULL;
}
/* get the header */
if (PMIX_SUCCESS != (rc = pmix_usock_recv_blocking(sd, (char*)&hdr, sizeof(pmix_usock_hdr_t)))) {
if (PMIX_SUCCESS != (rc = pmix_usock_recv_blocking(pnd->sd, (char*)&hdr, sizeof(pmix_usock_hdr_t)))) {
return rc;
}
@ -415,92 +542,106 @@ static pmix_status_t pmix_server_authenticate(int sd, uint16_t protocol,
if (NULL == (msg = (char*)malloc(hdr.nbytes))) {
return PMIX_ERR_OUT_OF_RESOURCE;
}
if (PMIX_SUCCESS != pmix_usock_recv_blocking(sd, msg, hdr.nbytes)) {
if (PMIX_SUCCESS != pmix_usock_recv_blocking(pnd->sd, msg, hdr.nbytes)) {
/* unable to complete the recv */
pmix_output_verbose(2, pmix_globals.debug_output,
"unable to complete recv of connect-ack with client ON SOCKET %d", sd);
"unable to complete recv of connect-ack with client ON SOCKET %d",
pnd->sd);
free(msg);
return PMIX_ERR_UNREACH;
}
if (PMIX_SUCCESS != (rc = parse_connect_ack (msg, hdr.nbytes, &nspace,
&rank, &version, &cred))) {
if (PMIX_SUCCESS != (rc = parse_connect_ack(msg, pnd->protocol, hdr.nbytes, &nspace,
&rank, &version, &cred))) {
pmix_output_verbose(2, pmix_globals.debug_output,
"error parsing connect-ack from client ON SOCKET %d", sd);
"error parsing connect-ack from client ON SOCKET %d", pnd->sd);
free(msg);
return rc;
}
pmix_output_verbose(2, pmix_globals.debug_output,
"connect-ack recvd from peer %s:%d:%s",
nspace, rank, version);
/* if the attaching process is not a tool, then set it up as
* a known peer */
if (PMIX_PROTOCOL_TOOL != pnd->protocol) {
pmix_globals.myid.rank = rank;
/* do not check the version - we only retain it at this
* time in case we need to check it at some future date.
* For now, our intent is to retain backward compatibility
* and so we will assume that all versions are compatible. */
/* get the nspace */
nspace = msg; // a NULL terminator is in the data
/* see if we know this nspace */
nptr = NULL;
PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_nspace_t) {
if (0 == strcmp(tmp->nspace, nspace)) {
nptr = tmp;
break;
/* get the rank */
memcpy(&rank, msg+strlen(nspace)+1, sizeof(int));
pmix_output_verbose(2, pmix_globals.debug_output,
"connect-ack recvd from peer %s:%d:%s",
nspace, rank, version);
/* do not check the version - we only retain it at this
* time in case we need to check it at some future date.
* For now, our intent is to retain backward compatibility
* and so we will assume that all versions are compatible. */
/* see if we know this nspace */
nptr = NULL;
PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_nspace_t) {
if (0 == strcmp(tmp->nspace, nspace)) {
nptr = tmp;
break;
}
}
}
if (NULL == nptr) {
/* we don't know this namespace, reject it */
free(msg);
/* send an error reply to the client */
rc = PMIX_ERR_NOT_FOUND;
goto error;
}
/* see if we have this peer in our list */
info = NULL;
found = false;
PMIX_LIST_FOREACH(info, &nptr->server->ranks, pmix_rank_info_t) {
if (info->rank == rank) {
found = true;
break;
}
}
if (!found) {
/* rank unknown, reject it */
free(msg);
/* send an error reply to the client */
rc = PMIX_ERR_NOT_FOUND;
goto error;
}
*out_rank = rank;
/* a peer can connect on multiple sockets since it can fork/exec
* a child that also calls PMIx_Init, so add it here if necessary.
* Create the tracker for this peer */
psave = PMIX_NEW(pmix_peer_t);
PMIX_RETAIN(info);
psave->info = info;
info->proc_cnt++; /* increase number of processes on this rank */
psave->sd = sd;
if (0 > (psave->index = pmix_pointer_array_add(&pmix_server_globals.clients, psave))) {
free(msg);
PMIX_RELEASE(psave);
/* probably cannot send an error reply if we are out of memory */
return PMIX_ERR_OUT_OF_RESOURCE;
}
/* see if there is a credential */
if (NULL != pmix_sec.validate_cred) {
if (PMIX_SUCCESS != (rc = pmix_sec.validate_cred(psave, cred))) {
pmix_output_verbose(2, pmix_globals.debug_output,
"validation of client credential failed");
if (NULL == nptr) {
/* we don't know this namespace, reject it */
free(msg);
pmix_pointer_array_set_item(&pmix_server_globals.clients, psave->index, NULL);
PMIX_RELEASE(psave);
/* send an error reply to the client */
rc = PMIX_ERR_NOT_FOUND;
goto error;
}
pmix_output_verbose(2, pmix_globals.debug_output,
"client credential validated");
/* see if we have this peer in our list */
info = NULL;
found = false;
PMIX_LIST_FOREACH(info, &nptr->server->ranks, pmix_rank_info_t) {
if (info->rank == rank) {
found = true;
break;
}
}
if (!found) {
/* rank unknown, reject it */
free(msg);
/* send an error reply to the client */
rc = PMIX_ERR_NOT_FOUND;
goto error;
}
*out_rank = rank;
/* a peer can connect on multiple sockets since it can fork/exec
* a child that also calls PMIx_Init, so add it here if necessary.
* Create the tracker for this peer */
psave = PMIX_NEW(pmix_peer_t);
PMIX_RETAIN(info);
psave->info = info;
info->proc_cnt++; /* increase number of processes on this rank */
psave->sd = pnd->sd;
if (0 > (psave->index = pmix_pointer_array_add(&pmix_server_globals.clients, psave))) {
free(msg);
PMIX_RELEASE(psave);
/* probably cannot send an error reply if we are out of memory */
return PMIX_ERR_OUT_OF_RESOURCE;
}
/* see if there is a credential */
if (NULL != pmix_sec.validate_cred) {
if (PMIX_SUCCESS != (rc = pmix_sec.validate_cred(psave, cred))) {
pmix_output_verbose(2, pmix_globals.debug_output,
"validation of client credential failed");
free(msg);
if (NULL != psave) {
pmix_pointer_array_set_item(&pmix_server_globals.clients, psave->index, NULL);
PMIX_RELEASE(psave);
}
/* send an error reply to the client */
goto error;
}
pmix_output_verbose(2, pmix_globals.debug_output,
"client credential validated");
}
}
free(msg);
@ -509,16 +650,20 @@ static pmix_status_t pmix_server_authenticate(int sd, uint16_t protocol,
pmix_output_verbose(2, pmix_globals.debug_output,
"connect-ack executing handshake");
rc = PMIX_ERR_READY_FOR_HANDSHAKE;
if (PMIX_SUCCESS != (rc = pmix_usock_send_blocking(sd, (char*)&rc, sizeof(int)))) {
if (PMIX_SUCCESS != (rc = pmix_usock_send_blocking(pnd->sd, (char*)&rc, sizeof(int)))) {
PMIX_ERROR_LOG(rc);
pmix_pointer_array_set_item(&pmix_server_globals.clients, psave->index, NULL);
PMIX_RELEASE(psave);
if (NULL != psave) {
pmix_pointer_array_set_item(&pmix_server_globals.clients, psave->index, NULL);
PMIX_RELEASE(psave);
}
return rc;
}
if (PMIX_SUCCESS != (rc = pmix_sec.server_handshake(psave))) {
PMIX_ERROR_LOG(rc);
pmix_pointer_array_set_item(&pmix_server_globals.clients, psave->index, NULL);
PMIX_RELEASE(psave);
if (NULL != psave) {
pmix_pointer_array_set_item(&pmix_server_globals.clients, psave->index, NULL);
PMIX_RELEASE(psave);
}
return rc;
}
pmix_output_verbose(2, pmix_globals.debug_output,
@ -526,55 +671,65 @@ static pmix_status_t pmix_server_authenticate(int sd, uint16_t protocol,
} else {
/* send them success */
rc = PMIX_SUCCESS;
if (PMIX_SUCCESS != (rc = pmix_usock_send_blocking(sd, (char*)&rc, sizeof(int)))) {
if (PMIX_SUCCESS != (rc = pmix_usock_send_blocking(pnd->sd, (char*)&rc, sizeof(int)))) {
PMIX_ERROR_LOG(rc);
if (NULL != psave) {
pmix_pointer_array_set_item(&pmix_server_globals.clients, psave->index, NULL);
PMIX_RELEASE(psave);
}
return rc;
}
}
/* if the attaching process is not a tool, then send its index */
if (PMIX_PROTOCOL_TOOL != pnd->protocol) {
/* send the client's array index */
if (PMIX_SUCCESS != (rc = pmix_usock_send_blocking(pnd->sd, (char*)&psave->index, sizeof(int)))) {
PMIX_ERROR_LOG(rc);
pmix_pointer_array_set_item(&pmix_server_globals.clients, psave->index, NULL);
PMIX_RELEASE(psave);
return rc;
}
}
/* let the host server know that this client has connected */
if (NULL != pmix_host_server.client_connected) {
(void)strncpy(proc.nspace, psave->info->nptr->nspace, PMIX_MAX_NSLEN);
proc.rank = psave->info->rank;
rc = pmix_host_server.client_connected(&proc, psave->info->server_object,
NULL, NULL);
if (PMIX_SUCCESS != rc) {
PMIX_ERROR_LOG(rc);
pmix_output_verbose(2, pmix_globals.debug_output,
"connect-ack from client completed");
*peer = psave;
/* let the host server know that this client has connected */
if (NULL != pmix_host_server.client_connected) {
(void)strncpy(proc.nspace, psave->info->nptr->nspace, PMIX_MAX_NSLEN);
proc.rank = psave->info->rank;
rc = pmix_host_server.client_connected(&proc, psave->info->server_object,
NULL, NULL);
if (PMIX_SUCCESS != rc) {
PMIX_ERROR_LOG(rc);
}
}
} else {
/* request an nspace for this requestor - it will
* automatically be assigned rank=0 */
pmix_host_server.tool_connected(NULL, 0, cnct_cbfunc, pnd);
return PMIX_ERR_OPERATION_IN_PROGRESS;
}
/* send the client's array index */
if (PMIX_SUCCESS != (rc = pmix_usock_send_blocking(sd, (char*)&psave->index, sizeof(int)))) {
PMIX_ERROR_LOG(rc);
pmix_pointer_array_set_item(&pmix_server_globals.clients, psave->index, NULL);
PMIX_RELEASE(psave);
return rc;
}
pmix_output_verbose(2, pmix_globals.debug_output,
"connect-ack from client completed");
*peer = psave;
return rc;
error:
/* send an error reply to the client */
if (PMIX_SUCCESS != pmix_usock_send_blocking(sd, (char*)&rc, sizeof(int))) {
if (PMIX_SUCCESS != pmix_usock_send_blocking(pnd->sd, (char*)&rc, sizeof(int))) {
PMIX_ERROR_LOG(rc);
}
return rc;
}
/*
* Handler for accepting connections from the event library
* Handler for accepting client connections from the event library
*/
static void connection_handler(int sd, short flags, void* cbdata)
{
pmix_pending_connection_t *pnd = (pmix_pending_connection_t*)cbdata;
pmix_peer_t *peer;
int rank;
pmix_status_t status;
pmix_output_verbose(8, pmix_globals.debug_output,
"connection_handler: new connection: %d",
pnd->sd);
@ -582,14 +737,17 @@ static void connection_handler(int sd, short flags, void* cbdata)
/* ensure the socket is in blocking mode */
pmix_usock_set_blocking(pnd->sd);
/* receive identifier info from the client and authenticate it - the
/*
* Receive identifier info from the client and authenticate it - the
* function will lookup and return the peer object if the connection
* is successfully authenticated */
if (PMIX_SUCCESS != pmix_server_authenticate(pnd->sd, pnd->protocol,
&rank, &peer)) {
CLOSE_THE_SOCKET(pnd->sd);
if (PMIX_SUCCESS != (status = pmix_server_authenticate(pnd, &rank, &peer))) {
if (PMIX_ERR_OPERATION_IN_PROGRESS != status) {
CLOSE_THE_SOCKET(pnd->sd);
}
return;
}
pmix_usock_set_nonblocking(pnd->sd);
/* start the events for this client */
@ -605,3 +763,31 @@ static void connection_handler(int sd, short flags, void* cbdata)
PMIX_RELEASE(pnd);
}
/*
* Handler for accepting tool connections from the event library
*/
static void tool_handler(int sd, short flags, void* cbdata)
{
pmix_pending_connection_t *pnd = (pmix_pending_connection_t*)cbdata;
pmix_output_verbose(1, pmix_globals.debug_output,
"tool_handler: new tool connection: %d",
pnd->sd);
/* if the server doesn't support this, then abort now */
if (NULL == pmix_host_server.tool_connected) {
CLOSE_THE_SOCKET(pnd->sd);
PMIX_RELEASE(pnd);
}
/* ensure the socket is in blocking mode */
pmix_usock_set_blocking(pnd->sd);
/* initiate the authentication handshake */
if (PMIX_ERR_OPERATION_IN_PROGRESS != pmix_server_authenticate(pnd, NULL, NULL)) {
pmix_output(0, "SHOOT");
CLOSE_THE_SOCKET(pnd->sd);
PMIX_RELEASE(pnd);
}
}

92
opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c Обычный файл → Исполняемый файл
Просмотреть файл

@ -1175,8 +1175,7 @@ void pmix_server_deregister_events(pmix_peer_t *peer,
{
int32_t cnt;
pmix_status_t rc, *codes = NULL, *cdptr, maxcode = PMIX_MAX_ERR_CONSTANT;
pmix_info_t *info = NULL;
size_t ninfo, ncodes, ncds, n;
size_t ncodes, ncds, n;
pmix_regevents_info_t *reginfo = NULL;
pmix_regevents_info_t *reginfo_next;
pmix_peer_events_info_t *prev;
@ -1236,9 +1235,6 @@ cleanup:
if (NULL != codes) {
free(codes);
}
if (NULL != info) {
PMIX_INFO_FREE(info, ninfo);
}
return;
}
@ -1318,7 +1314,78 @@ pmix_status_t pmix_server_event_recvd_from_client(pmix_peer_t *peer,
return rc;
}
// instance server library classes
pmix_status_t pmix_server_query(pmix_peer_t *peer,
pmix_buffer_t *buf,
pmix_info_cbfunc_t cbfunc,
void *cbdata)
{
int32_t cnt;
pmix_status_t rc;
pmix_query_caddy_t *cd;
pmix_proc_t proc;
pmix_output_verbose(2, pmix_globals.debug_output,
"recvd query from client");
if (NULL == pmix_host_server.query) {
return PMIX_ERR_NOT_SUPPORTED;
}
cd = PMIX_NEW(pmix_query_caddy_t);
cd->cbdata = cbdata;
/* unpack the number of info */
cnt = 1;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &cd->ninfo, &cnt, PMIX_SIZE))) {
PMIX_ERROR_LOG(rc);
goto exit;
}
/* unpack the info */
if (0 < cd->ninfo) {
PMIX_INFO_CREATE(cd->info, cd->ninfo);
cnt = cd->ninfo;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, cd->info, &cnt, PMIX_INFO))) {
PMIX_ERROR_LOG(rc);
goto exit;
}
}
/* unpack any directives */
cnt = 1;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &cd->ndirs, &cnt, PMIX_SIZE))) {
PMIX_ERROR_LOG(rc);
goto exit;
}
if (0 < cd->ndirs) {
PMIX_INFO_CREATE(cd->directives, cd->ndirs);
cnt = cd->ndirs;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, cd->directives, &cnt, PMIX_INFO))) {
PMIX_ERROR_LOG(rc);
goto exit;
}
}
/* setup the requesting peer name */
(void)strncpy(proc.nspace, peer->info->nptr->nspace, PMIX_MAX_NSLEN);
proc.rank = peer->info->rank;
/* ask the host for the info */
if (PMIX_SUCCESS != (rc = pmix_host_server.query(&proc, cd->info, cd->ninfo,
cd->directives, cd->ndirs,
cbfunc, cd))) {
PMIX_RELEASE(cd);
return rc;
}
return PMIX_SUCCESS;
exit:
PMIX_RELEASE(cd);
return rc;
}
/***** INSTANCE SERVER LIBRARY CLASSES *****/
static void tcon(pmix_server_trkr_t *t)
{
t->pcs = NULL;
@ -1451,9 +1518,20 @@ PMIX_CLASS_INSTANCE(pmix_dmdx_local_t,
pmix_list_item_t,
lmcon, lmdes);
static void pccon(pmix_pending_connection_t *p)
{
p->msg = NULL;
memset(p->nspace, 0, PMIX_MAX_NSLEN+1);
}
static void pcdes(pmix_pending_connection_t *p)
{
if (NULL != p->msg) {
free(p->msg);
}
}
PMIX_CLASS_INSTANCE(pmix_pending_connection_t,
pmix_object_t,
NULL, NULL);
pccon, pcdes);
static void prevcon(pmix_peer_events_info_t *p)
{

Просмотреть файл

@ -31,6 +31,7 @@ typedef struct {
pmix_object_t super;
pmix_event_t ev;
volatile bool active;
pmix_status_t status;
pmix_proc_t proc;
uid_t uid;
gid_t gid;
@ -83,12 +84,21 @@ typedef struct {
} pmix_dmdx_local_t;
PMIX_CLASS_DECLARATION(pmix_dmdx_local_t);
/* define listener protocol types */
typedef uint16_t pmix_listener_protocol_t;
#define PMIX_PROTOCOL_V1 0
#define PMIX_PROTOCOL_TOOL 1
#define PMIX_PROTOCOL V2 2
/* connection support */
typedef struct {
pmix_object_t super;
pmix_event_t ev;
uint16_t protocol;
pmix_listener_protocol_t protocol;
int sd;
char nspace[PMIX_MAX_NSLEN+1];
char *msg;
pmix_status_t status;
struct sockaddr_storage addr;
} pmix_pending_connection_t;
PMIX_CLASS_DECLARATION(pmix_pending_connection_t);
@ -111,7 +121,7 @@ PMIX_CLASS_DECLARATION(pmix_regevents_info_t);
/* listener objects */
typedef struct pmix_listener_t {
pmix_list_item_t super;
uint16_t protocol_type;
pmix_listener_protocol_t protocol;
int socket;
struct sockaddr_un address;
char *varname;
@ -135,6 +145,7 @@ typedef struct {
pmix_buffer_t gdata; // cache of data given to me for passing to all clients
pmix_list_t events; // list of pmix_regevents_info_t registered events
pmix_ring_buffer_t notifications; // ring buffer of pending notifications
bool tool_connections_allowed;
} pmix_server_globals_t;
typedef struct {
@ -272,6 +283,11 @@ pmix_status_t pmix_server_event_recvd_from_client(pmix_peer_t *peer,
pmix_buffer_t *buf,
pmix_op_cbfunc_t cbfunc,
void *cbdata);
pmix_status_t pmix_server_query(pmix_peer_t *peer,
pmix_buffer_t *buf,
pmix_info_cbfunc_t cbfunc,
void *cbdata);
void pmix_server_execute_collective(int sd, short args, void *cbdata);
void pmix_server_queue_message(int fd, short args, void *cbdata);

Просмотреть файл

@ -0,0 +1,11 @@
#
# Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
sources += \
src/tool/pmix_tool.c

Просмотреть файл

@ -0,0 +1,889 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014 Artem Y. Polyakov <artpol84@gmail.com>.
* All rights reserved.
* Copyright (c) 2016 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2016 IBM Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include <src/include/pmix_config.h>
#include <src/include/types.h>
#include <src/include/pmix_socket_errno.h>
#include "src/client/pmix_client_ops.h"
#include <pmix_tool.h>
#include "src/include/pmix_globals.h"
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#include <fcntl.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_SYS_SOCKET_H
#include <sys/socket.h>
#endif
#ifdef HAVE_SYS_UN_H
#include <sys/un.h>
#endif
#ifdef HAVE_SYS_UIO_H
#include <sys/uio.h>
#endif
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_DIRENT_H
#include <dirent.h>
#endif /* HAVE_DIRENT_H */
#include PMIX_EVENT_HEADER
#if PMIX_CC_USE_PRAGMA_IDENT
#pragma ident PMIX_VERSION
#elif PMIX_CC_USE_IDENT
#ident PMIX_VERSION
#endif
extern pmix_client_globals_t pmix_client_globals;
#include "src/class/pmix_list.h"
#include "src/buffer_ops/buffer_ops.h"
#include "src/util/argv.h"
#include "src/util/error.h"
#include "src/util/hash.h"
#include "src/util/output.h"
#include "src/util/progress_threads.h"
#include "src/usock/usock.h"
#include "src/sec/pmix_sec.h"
#include "src/include/pmix_globals.h"
#if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1)
#include "src/dstore/pmix_dstore.h"
#endif /* PMIX_ENABLE_DSTORE */
#define PMIX_MAX_RETRIES 10
static pmix_status_t usock_connect(struct sockaddr_un *address, int *fd);
static void _notify_complete(pmix_status_t status, void *cbdata)
{
pmix_event_chain_t *chain = (pmix_event_chain_t*)cbdata;
PMIX_RELEASE(chain);
}
static void pmix_tool_notify_recv(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr,
pmix_buffer_t *buf, void *cbdata)
{
pmix_status_t rc;
int32_t cnt;
pmix_cmd_t cmd;
pmix_event_chain_t *chain;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:tool_notify_recv - processing event");
/* start the local notification chain */
chain = PMIX_NEW(pmix_event_chain_t);
chain->final_cbfunc = _notify_complete;
chain->final_cbdata = chain;
cnt=1;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &cmd, &cnt, PMIX_CMD))) {
PMIX_ERROR_LOG(rc);
goto error;
}
/* unpack the status */
cnt=1;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &chain->status, &cnt, PMIX_INT))) {
PMIX_ERROR_LOG(rc);
goto error;
}
/* unpack the source of the event */
cnt=1;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &chain->source, &cnt, PMIX_PROC))) {
PMIX_ERROR_LOG(rc);
goto error;
}
/* unpack the info that might have been provided */
cnt=1;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &chain->ninfo, &cnt, PMIX_SIZE))) {
PMIX_ERROR_LOG(rc);
goto error;
}
if (0 < chain->ninfo) {
PMIX_INFO_CREATE(chain->info, chain->ninfo);
cnt = chain->ninfo;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, chain->info, &cnt, PMIX_INFO))) {
PMIX_ERROR_LOG(rc);
goto error;
}
}
pmix_output_verbose(2, pmix_globals.debug_output,
"[%s:%d] pmix:tool_notify_recv - processing event %d, calling errhandler",
pmix_globals.myid.nspace, pmix_globals.myid.rank, chain->status);
pmix_invoke_local_event_hdlr(chain);
return;
error:
/* we always need to return */
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:tool_notify_recv - unpack error status =%d, calling def errhandler", rc);
chain = PMIX_NEW(pmix_event_chain_t);
chain->status = rc;
pmix_invoke_local_event_hdlr(chain);
}
static pmix_status_t connect_to_server(struct sockaddr_un *address)
{
int sd;
pmix_status_t ret;
if (PMIX_SUCCESS != (ret = usock_connect(address, &sd))) {
PMIX_ERROR_LOG(ret);
return ret;
}
pmix_client_globals.myserver.sd = sd;
/* setup recv event */
event_assign(&pmix_client_globals.myserver.recv_event,
pmix_globals.evbase,
pmix_client_globals.myserver.sd,
EV_READ | EV_PERSIST,
pmix_usock_recv_handler, &pmix_client_globals.myserver);
event_add(&pmix_client_globals.myserver.recv_event, 0);
pmix_client_globals.myserver.recv_ev_active = true;
/* setup send event */
event_assign(&pmix_client_globals.myserver.send_event,
pmix_globals.evbase,
pmix_client_globals.myserver.sd,
EV_WRITE|EV_PERSIST,
pmix_usock_send_handler, &pmix_client_globals.myserver);
pmix_client_globals.myserver.send_ev_active = false;
return PMIX_SUCCESS;
}
PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc,
pmix_info_t info[], size_t ninfo)
{
char *evar, *tdir, *tmp;
int debug_level;
struct sockaddr_un address;
size_t n;
pmix_kval_t *kptr;
pmix_status_t rc;
pmix_nspace_t *nptr, *nsptr;
int i, server_pid = -1;
int hostnamelen = 10;
char hostname[hostnamelen];
DIR *cur_dirp = NULL;
struct dirent * dir_entry;
pmix_output(0, "TOOL INIT");
if (NULL == proc) {
return PMIX_ERR_BAD_PARAM;
}
if (0 < pmix_globals.init_cntr) {
/* since we have been called before, the nspace and
* rank should be known. So return them here if
* requested */
if (NULL != proc) {
(void)strncpy(proc->nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN);
proc->rank = pmix_globals.myid.rank;
}
++pmix_globals.init_cntr;
return PMIX_SUCCESS;
}
/* scan incoming info for directives */
if (NULL != info) {
for (n=0; n < ninfo; n++) {
if (0 == strcmp(PMIX_EVENT_BASE, info[n].key)) {
pmix_globals.evbase = (pmix_event_base_t*)info[n].value.data.ptr;
pmix_globals.external_evbase = true;
}
}
}
/* setup the globals */
pmix_globals_init();
PMIX_CONSTRUCT(&pmix_client_globals.pending_requests, pmix_list_t);
PMIX_CONSTRUCT(&pmix_client_globals.myserver, pmix_peer_t);
/* mark that we are a client */
pmix_globals.server = false;
/* get our effective id's */
pmix_globals.uid = geteuid();
pmix_globals.gid = getegid();
/* initialize the output system */
if (!pmix_output_init()) {
return PMIX_ERROR;
}
/* see if debug is requested */
if (NULL != (evar = getenv("PMIX_DEBUG"))) {
debug_level = strtol(evar, NULL, 10);
pmix_globals.debug_output = pmix_output_open(NULL);
pmix_output_set_verbosity(pmix_globals.debug_output, debug_level);
}
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: init called");
/* find the temp dir */
if (NULL == (tdir = getenv("TMPDIR"))) {
if (NULL == (tdir = getenv("TEMP"))) {
if (NULL == (tdir = getenv("TMP"))) {
tdir = "/tmp";
}
}
}
/* setup the path to the daemon rendezvous point */
memset(&address, 0, sizeof(struct sockaddr_un));
address.sun_family = AF_UNIX;
/* Get first 10 char's of hostname to match what the server is doing */
gethostname(hostname, hostnamelen);
/* Get the local hostname, and look for a file named
* /tmp/pmix.hostname.tool - this file will contain
* the URI where the server is listening. The URI consists
* of 3 parts - the code below will parse the string read
* from the file and connect accordingly */
for (i = 0; i < (int)ninfo; i++) {
if (strcmp(info[i].key, PMIX_SERVER_PIDINFO) == 0) {
server_pid = info[i].value.data.integer;
break;
}
}
/* if they gave us a specific pid, then look for that
* particular server - otherwise, see if there is only
* one on this node and default to it */
if (server_pid != -1) {
snprintf(address.sun_path, sizeof(address.sun_path)-1, "%s/pmix.%s.%d", tdir, hostname, server_pid);
/* if the rendezvous file doesn't exist, that's an error */
if (0 != access(address.sun_path, R_OK)) {
pmix_output_close(pmix_globals.debug_output);
pmix_output_finalize();
pmix_class_finalize();
return PMIX_ERR_NOT_FOUND;
}
} else {
/* open up the temp directory */
if (NULL == (cur_dirp = opendir(tdir))) {
pmix_output_close(pmix_globals.debug_output);
pmix_output_finalize();
pmix_class_finalize();
return PMIX_ERR_NOT_FOUND;
}
/* search the entries for something that starts with pmix.hostname */
if (0 > asprintf(&tmp, "pmix.%s", hostname)) {
return PMIX_ERR_NOMEM;
}
evar = NULL;
while (NULL != (dir_entry = readdir(cur_dirp))) {
if (0 == strncmp(dir_entry->d_name, tmp, strlen(tmp))) {
/* found one - if more than one, then that's an error */
if (NULL != evar) {
free(tmp);
pmix_output_close(pmix_globals.debug_output);
pmix_output_finalize();
pmix_class_finalize();
return PMIX_ERR_INIT;
}
evar = strdup(dir_entry->d_name);
}
}
free(tmp);
closedir(cur_dirp);
if (NULL == evar) {
/* none found */
pmix_output_close(pmix_globals.debug_output);
pmix_output_finalize();
pmix_class_finalize();
return PMIX_ERR_INIT;
}
/* use the found one as our contact point */
snprintf(address.sun_path, sizeof(address.sun_path)-1, "%s/%s", tdir, evar);
free(evar);
}
pmix_bfrop_open();
pmix_usock_init(pmix_tool_notify_recv);
pmix_sec_init();
if (!pmix_globals.external_evbase) {
/* create an event base and progress thread for us */
if (NULL == (pmix_globals.evbase = pmix_start_progress_thread())) {
pmix_sec_finalize();
pmix_usock_finalize();
pmix_bfrop_close();
pmix_output_close(pmix_globals.debug_output);
pmix_output_finalize();
pmix_class_finalize();
return -1;
}
}
/* connect to the server */
if (PMIX_SUCCESS != (rc = connect_to_server(&address))) {
pmix_stop_progress_thread(pmix_globals.evbase);
pmix_sec_finalize();
pmix_usock_finalize();
pmix_bfrop_close();
pmix_output_close(pmix_globals.debug_output);
pmix_output_finalize();
pmix_class_finalize();
return rc;
}
/* increment our init reference counter */
pmix_globals.init_cntr++;
/* Success, so copy the nspace and rank */
(void)strncpy(proc->nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN);
proc->rank = pmix_globals.myid.rank;
/* now finish the initialization by filling our local
* datastore with typical job-related info. No point
* in having the server generate these as we are
* obviously a singleton, and so the values are well-known */
nsptr = NULL;
PMIX_LIST_FOREACH(nptr, &pmix_globals.nspaces, pmix_nspace_t) {
if (0 == strncmp(pmix_globals.myid.nspace, nptr->nspace, PMIX_MAX_NSLEN)) {
nsptr = nptr;
break;
}
}
if (NULL == nsptr) {
/* should never happen */
pmix_stop_progress_thread(pmix_globals.evbase);
pmix_sec_finalize();
pmix_usock_finalize();
pmix_bfrop_close();
pmix_output_close(pmix_globals.debug_output);
pmix_output_finalize();
pmix_class_finalize();
return PMIX_ERR_NOT_FOUND;
}
/* the jobid is just our nspace */
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_JOBID);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_STRING;
kptr->value->data.string = strdup(nsptr->nspace);
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
/* our rank */
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_RANK);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_INT;
kptr->value->data.integer = 0;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
/* nproc offset */
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_NPROC_OFFSET);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_UINT32;
kptr->value->data.uint32 = 0;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
/* node size */
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_NODE_SIZE);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_UINT32;
kptr->value->data.uint32 = 1;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
/* local peers */
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_LOCAL_PEERS);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_STRING;
kptr->value->data.string = strdup("0");
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
/* local leader */
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_LOCALLDR);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_UINT32;
kptr->value->data.uint32 = 0;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
/* universe size */
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_UNIV_SIZE);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_UINT32;
kptr->value->data.uint32 = 1;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
/* job size - we are our very own job, so we have no peers */
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_JOB_SIZE);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_UINT32;
kptr->value->data.uint32 = 1;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
/* local size - only us in our job */
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_LOCAL_SIZE);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_UINT32;
kptr->value->data.uint32 = 1;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
/* max procs - since we are a self-started tool, there is no
* allocation within which we can grow ourselves */
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_MAX_PROCS);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_UINT32;
kptr->value->data.uint32 = 1;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
/* app number */
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_APPNUM);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_UINT32;
kptr->value->data.uint32 = 0;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
/* app leader */
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_APPLDR);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_UINT32;
kptr->value->data.uint32 = 0;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
/* app rank */
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_APP_RANK);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_UINT32;
kptr->value->data.uint32 = 0;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
}
PMIX_RELEASE(kptr); // maintain accounting
/* global rank */
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_GLOBAL_RANK);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_UINT32;
kptr->value->data.uint32 = 0;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
/* local rank - we are alone in our job */
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_LOCAL_RANK);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_UINT32;
kptr->value->data.uint32 = 0;
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
/* we cannot know the node rank as we don't know what
* other processes are executing on this node - so
* we'll add that info to the server-tool handshake
* and load it from there */
/* hostname */
gethostname(hostname, PMIX_MAX_NSLEN);
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_HOSTNAME);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_STRING;
kptr->value->data.string = strdup(hostname);
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
/* we cannot know the RM's nodeid for this host, so
* we'll add that info to the server-tool handshake
* and load it from there */
/* the nodemap is simply our hostname as there is no
* regex to generate */
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_NODE_MAP);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_STRING;
kptr->value->data.string = strdup(hostname);
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
/* likewise, the proc map is just our rank as we are
* the only proc in this job */
kptr = PMIX_NEW(pmix_kval_t);
kptr->key = strdup(PMIX_PROC_MAP);
PMIX_VALUE_CREATE(kptr->value, 1);
kptr->value->type = PMIX_STRING;
kptr->value->data.string = strdup("0");
if (PMIX_SUCCESS != (rc = pmix_hash_store(&nsptr->internal, pmix_globals.myid.rank, kptr))) {
PMIX_ERROR_LOG(rc);
return rc;
}
PMIX_RELEASE(kptr); // maintain accounting
return rc;
}
PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void)
{
if (1 != pmix_globals.init_cntr) {
--pmix_globals.init_cntr;
return PMIX_SUCCESS;
}
pmix_globals.init_cntr = 0;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix:tool finalize called");
if (!pmix_globals.external_evbase) {
pmix_stop_progress_thread(pmix_globals.evbase);
}
pmix_usock_finalize();
PMIX_DESTRUCT(&pmix_client_globals.myserver);
PMIX_LIST_DESTRUCT(&pmix_client_globals.pending_requests);
if (0 <= pmix_client_globals.myserver.sd) {
CLOSE_THE_SOCKET(pmix_client_globals.myserver.sd);
}
event_base_free(pmix_globals.evbase);
#ifdef HAVE_LIBEVENT_GLOBAL_SHUTDOWN
libevent_global_shutdown();
#endif
pmix_bfrop_close();
pmix_sec_finalize();
pmix_globals_finalize();
pmix_output_close(pmix_globals.debug_output);
pmix_output_finalize();
pmix_class_finalize();
return PMIX_SUCCESS;
}
/*
* The sections below need to be updated to reflect tool
* connection handshake protocols - in this case, we
* don't know our nspace/rank in advance. So we need
* the handshake to include the security credential
* exchange, and then get our nspace/rank in return */
static pmix_status_t send_connect_ack(int sd)
{
char *msg;
pmix_usock_hdr_t hdr;
size_t sdsize=0, csize=0;
char *cred = NULL;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: TOOL SEND CONNECT ACK");
/* setup the header */
memset(&hdr, 0, sizeof(pmix_usock_hdr_t));
hdr.pindex = -1;
hdr.tag = UINT32_MAX;
/* get a credential, if the security system provides one. Not
* every SPC will do so, thus we must first check */
if (NULL != pmix_sec.create_cred) {
if (NULL == (cred = pmix_sec.create_cred())) {
/* an error occurred - we cannot continue */
return PMIX_ERR_INVALID_CRED;
}
csize = strlen(cred) + 1; // must NULL terminate the string!
}
/* set the number of bytes to be read beyond the header */
hdr.nbytes = strlen(PMIX_VERSION) + 1 + csize; // must NULL terminate the VERSION string!
/* create a space for our message */
sdsize = (sizeof(hdr) + hdr.nbytes);
if (NULL == (msg = (char*)malloc(sdsize))) {
if (NULL != cred) {
free(cred);
}
return PMIX_ERR_OUT_OF_RESOURCE;
}
memset(msg, 0, sdsize);
csize=0;
memcpy(msg, &hdr, sizeof(pmix_usock_hdr_t));
csize += sizeof(pmix_usock_hdr_t);
/* load the message */
memcpy(msg+csize, PMIX_VERSION, strlen(PMIX_VERSION));
csize += strlen(PMIX_VERSION)+1;
if (NULL != cred) {
memcpy(msg+csize, cred, strlen(cred)); // leaves last position in msg set to NULL
}
if (PMIX_SUCCESS != pmix_usock_send_blocking(sd, msg, sdsize)) {
free(msg);
if (NULL != cred) {
free(cred);
}
return PMIX_ERR_UNREACH;
}
free(msg);
if (NULL != cred) {
free(cred);
}
return PMIX_SUCCESS;
}
/* we receive a connection acknowledgement from the server,
* consisting of the status and (if success) the nspace assigned
* to us */
static pmix_status_t recv_connect_ack(int sd)
{
pmix_status_t reply;
struct timeval tv, save;
pmix_socklen_t sz;
bool sockopt = true;
pmix_nspace_t *nsptr;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: RECV CONNECT ACK FROM SERVER");
/* get the current timeout value so we can reset to it */
sz = sizeof(save);
if (0 != getsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, (void*)&save, &sz)) {
if (ENOPROTOOPT == errno) {
sockopt = false;
} else {
return PMIX_ERR_UNREACH;
}
} else {
/* set a timeout on the blocking recv so we don't hang */
tv.tv_sec = 2;
tv.tv_usec = 0;
if (0 != setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) {
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: recv_connect_ack could not setsockopt SO_RCVTIMEO");
return PMIX_ERR_UNREACH;
}
}
/* get the returned status from the security handshake */
pmix_usock_recv_blocking(sd, (char*)&reply, sizeof(pmix_status_t));
if (PMIX_SUCCESS != reply) {
return reply;
}
/* get the returned status from the request for namespace */
pmix_usock_recv_blocking(sd, (char*)&reply, sizeof(pmix_status_t));
if (PMIX_SUCCESS != reply) {
return reply;
}
/* get our assigned nspace */
pmix_usock_recv_blocking(sd, pmix_globals.myid.nspace, PMIX_MAX_NSLEN+1);
/* setup required bookkeeping */
nsptr = PMIX_NEW(pmix_nspace_t);
(void)strncpy(nsptr->nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN);
pmix_list_append(&pmix_globals.nspaces, &nsptr->super);
/* our rank is always zero */
pmix_globals.myid.rank = 0;
/* get the server's nspace and rank so we can send to it */
pmix_client_globals.myserver.info = PMIX_NEW(pmix_rank_info_t);
pmix_client_globals.myserver.info->nptr = PMIX_NEW(pmix_nspace_t);
pmix_usock_recv_blocking(sd, (char*)pmix_client_globals.myserver.info->nptr->nspace, PMIX_MAX_NSLEN+1);
pmix_usock_recv_blocking(sd, (char*)&(pmix_client_globals.myserver.info->rank), sizeof(int));
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: RECV CONNECT CONFIRMATION FOR TOOL %s:%d FROM SERVER %s:%d",
pmix_globals.myid.nspace, pmix_globals.myid.rank,
pmix_client_globals.myserver.info->nptr->nspace,
pmix_client_globals.myserver.info->rank);
if (sockopt) {
if (0 != setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &save, sz)) {
pmix_output(0, "FAILURE");
return PMIX_ERR_UNREACH;
}
}
return PMIX_SUCCESS;
}
static pmix_status_t usock_connect(struct sockaddr_un *addr, int *fd)
{
int sd=-1;
pmix_status_t rc;
pmix_socklen_t addrlen = 0;
int retries = 0;
pmix_output_verbose(2, pmix_globals.debug_output,
"usock_peer_try_connect: attempting to connect to server");
addrlen = sizeof(struct sockaddr_un);
while (retries < PMIX_MAX_RETRIES) {
retries++;
/* Create the new socket */
sd = socket(PF_UNIX, SOCK_STREAM, 0);
if (sd < 0) {
pmix_output(0, "pmix:create_socket: socket() failed: %s (%d)\n",
strerror(pmix_socket_errno),
pmix_socket_errno);
continue;
}
pmix_output_verbose(2, pmix_globals.debug_output,
"usock_peer_try_connect: attempting to connect to server on socket %d", sd);
/* try to connect */
int err = -1;
if ((err = connect(sd, (struct sockaddr*)addr, addrlen)) < 0) {
if (pmix_socket_errno == ETIMEDOUT) {
/* The server may be too busy to accept new connections */
pmix_output_verbose(2, pmix_globals.debug_output,
"timeout connecting to server");
CLOSE_THE_SOCKET(sd);
continue;
}
/* Some kernels (Linux 2.6) will automatically software
abort a connection that was ECONNREFUSED on the last
attempt, without even trying to establish the
connection. Handle that case in a semi-rational
way by trying twice before giving up */
else if (ECONNABORTED == pmix_socket_errno) {
pmix_output_verbose(2, pmix_globals.debug_output,
"connection to server aborted by OS - retrying");
CLOSE_THE_SOCKET(sd);
continue;
} else {
pmix_output_verbose(2, pmix_globals.debug_output,
"Failed to connect, errno = %d, err= %s\n", errno, strerror(errno));
continue;
}
}
/* otherwise, the connect succeeded - so break out of the loop */
break;
}
if (retries == PMIX_MAX_RETRIES || sd < 0){
/* We were unsuccessful in establishing this connection, and are
* not likely to suddenly become successful */
if (0 <= sd) {
CLOSE_THE_SOCKET(sd);
}
return PMIX_ERR_UNREACH;
}
/* send any authentication credentials to the server */
if (PMIX_SUCCESS != (rc = send_connect_ack(sd))) {
CLOSE_THE_SOCKET(sd);
return rc;
}
/* do whatever handshake is required */
if (PMIX_SUCCESS != (rc = recv_connect_ack(sd))) {
CLOSE_THE_SOCKET(sd);
return rc;
}
pmix_output_verbose(2, pmix_globals.debug_output,
"sock_peer_try_connect: Connection across to server succeeded");
/* mark the connection as made */
pmix_globals.connected = true;
pmix_usock_set_nonblocking(sd);
*fd = sd;
return PMIX_SUCCESS;
}

Просмотреть файл

@ -78,7 +78,7 @@ void pmix_usock_finalize(void)
PMIX_LIST_DESTRUCT(&pmix_usock_globals.posted_recvs);
}
pmix_status_t pmix_usock_set_nonblocking(int sd)
pmix_status_t pmix_usock_set_nonblocking(int sd)
{
int flags;
/* setup the socket as non-blocking */

Просмотреть файл

@ -56,6 +56,7 @@
#define PMIX_ERR_SILENT (PMIX_INTERNAL_ERR_BASE - 25)
#define PMIX_ERR_UNKNOWN_DATATYPE (PMIX_INTERNAL_ERR_BASE - 26)
#define PMIX_ERR_RESOURCE_BUSY (PMIX_INTERNAL_ERR_BASE - 27)
#define PMIX_ERR_OPERATION_IN_PROGRESS (PMIX_INTERNAL_ERR_BASE - 28)
#define PMIX_ERROR_LOG(r) \
do { \

Просмотреть файл

@ -21,7 +21,7 @@
AM_CPPFLAGS = -I$(top_builddir)/src -I$(top_builddir)/src/include -I$(top_builddir)/include -I$(top_builddir)/include/pmix
noinst_PROGRAMS = simptest simpclient simppub simpdyn simpft simpdmodex test_pmix
noinst_PROGRAMS = simptest simpclient simppub simpdyn simpft simpdmodex test_pmix simptool
simptest_SOURCES = \
simptest.c
@ -64,3 +64,9 @@ test_pmix_SOURCES = \
test_pmix_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS)
test_pmix_LDADD = \
$(top_builddir)/libpmix.la
simptool_SOURCES = \
simptool.c
simptool_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS)
simptool_LDADD = \
$(top_builddir)/libpmix.la

Просмотреть файл

@ -83,13 +83,21 @@ static pmix_status_t disconnect_fn(const pmix_proc_t procs[], size_t nprocs,
static pmix_status_t register_event_fn(pmix_status_t *codes, size_t ncodes,
const pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata);
static pmix_status_t deregister_events(pmix_status_t *codes, size_t ncodes,
pmix_op_cbfunc_t cbfunc, void *cbdata);
static pmix_status_t deregister_event_fn(pmix_status_t *codes, size_t ncodes,
pmix_op_cbfunc_t cbfunc, void *cbdata);
static pmix_status_t notify_event(pmix_status_t code,
const pmix_proc_t *source,
pmix_data_range_t range,
pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata);
static pmix_status_t query_fn(pmix_proc_t *proct,
pmix_info_t *info, size_t ninfo,
pmix_info_t *directives, size_t ndir,
pmix_info_cbfunc_t cbfunc,
void *cbdata);
static void tool_connect_fn(pmix_info_t *info, size_t ninfo,
pmix_tool_connection_cbfunc_t cbfunc,
void *cbdata);
static pmix_server_module_t mymodule = {
.client_connected = connected,
@ -104,8 +112,10 @@ static pmix_server_module_t mymodule = {
.connect = connect_fn,
.disconnect = disconnect_fn,
.register_events = register_event_fn,
.deregister_events = deregister_events,
.notify_event = notify_event
.deregister_events = deregister_event_fn,
.notify_event = notify_event,
.query = query_fn,
.tool_connected = tool_connect_fn
};
typedef struct {
@ -195,6 +205,7 @@ int main(int argc, char **argv)
myxfer_t *x;
pmix_proc_t proc;
wait_tracker_t *child;
pmix_info_t info;
/* smoke test */
if (PMIX_SUCCESS != 0) {
@ -204,11 +215,15 @@ int main(int argc, char **argv)
fprintf(stderr, "Testing version %s\n", PMIx_Get_version());
/* setup the server library */
if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, NULL, 0))) {
/* setup the server library and tell it to support tool connections */
PMIX_INFO_CONSTRUCT(&info);
(void)strncpy(info.key, PMIX_SERVER_TOOL_SUPPORT, PMIX_MAX_KEYLEN);
if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, &info, 1))) {
fprintf(stderr, "Init failed with error %d\n", rc);
return rc;
}
PMIX_INFO_DESTRUCT(&info);
/* register the errhandler */
PMIx_Register_event_handler(NULL, 0, NULL, 0,
errhandler, errhandler_reg_callbk, NULL);
@ -666,12 +681,19 @@ static pmix_status_t register_event_fn(pmix_status_t *codes, size_t ncodes,
const pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata)
{
if (NULL != cbfunc) {
cbfunc(PMIX_SUCCESS, cbdata);
}
return PMIX_SUCCESS;
}
static pmix_status_t deregister_events(pmix_status_t *codes, size_t ncodes,
pmix_op_cbfunc_t cbfunc, void *cbdata)
static pmix_status_t deregister_event_fn(pmix_status_t *codes, size_t ncodes,
pmix_op_cbfunc_t cbfunc, void *cbdata)
{
pmix_output(0, "SERVER: DEREGISTER EVENT");
if (NULL != cbfunc) {
cbfunc(PMIX_SUCCESS, cbdata);
}
return PMIX_SUCCESS;
}
@ -684,6 +706,52 @@ static pmix_status_t notify_event(pmix_status_t code,
return PMIX_SUCCESS;
}
typedef struct query_data_t {
pmix_info_t *data;
size_t ndata;
} query_data_t;
static pmix_status_t query_fn(pmix_proc_t *proct,
pmix_info_t *info, size_t ninfo,
pmix_info_t *directives, size_t ndirs,
pmix_info_cbfunc_t cbfunc,
void *cbdata)
{
size_t n;
pmix_output(0, "SERVER: QUERY");
if (NULL == cbfunc) {
return PMIX_ERROR;
}
/* keep this simple */
for (n=0; n < ninfo; n++) {
info[n].value.type = PMIX_STRING;
if (0 > asprintf(&info[n].value.data.string, "%d", (int)n)) {
return PMIX_ERROR;
}
}
cbfunc(PMIX_SUCCESS, info, ninfo, cbdata, NULL, NULL);
return PMIX_SUCCESS;
}
static void tool_connect_fn(pmix_info_t *info, size_t ninfo,
pmix_tool_connection_cbfunc_t cbfunc,
void *cbdata)
{
pmix_proc_t proc;
pmix_output(0, "SERVER: TOOL CONNECT");
/* just pass back an arbitrary nspace */
(void)strncpy(proc.nspace, "TOOL", PMIX_MAX_NSLEN);
proc.rank = 0;
if (NULL != cbfunc) {
cbfunc(PMIX_SUCCESS, &proc, cbdata);
}
}
static void wait_signal_callback(int fd, short event, void *arg)
{
pmix_event_t *sig = (pmix_event_t*) arg;

Просмотреть файл

@ -0,0 +1,98 @@
/*
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2011 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
*/
#include <src/include/pmix_config.h>
#include <pmix_tool.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <time.h>
#include "src/class/pmix_object.h"
#include "src/buffer_ops/types.h"
#include "src/util/output.h"
#include "src/util/printf.h"
int main(int argc, char **argv)
{
pmix_status_t rc;
pmix_proc_t myproc;
pmix_info_t *info;
size_t ninfo;
/* init us */
if (PMIX_SUCCESS != (rc = PMIx_tool_init(&myproc, NULL, 0))) {
fprintf(stderr, "PMIx_tool_init failed: %d\n", rc);
exit(rc);
}
pmix_output(0, "Tool ns %s rank %d: Running", myproc.nspace, myproc.rank);
/* query something */
ninfo = 2;
PMIX_INFO_CREATE(info, ninfo);
(void)strncpy(info[0].key, "foobar", PMIX_MAX_KEYLEN);
(void)strncpy(info[1].key, "spastic", PMIX_MAX_KEYLEN);
if (PMIX_SUCCESS != (rc = PMIx_Query_info(info, ninfo))) {
pmix_output(0, "Client ns %s rank %d: PMIx_Query_info failed: %d", myproc.nspace, myproc.rank, rc);
goto done;
}
if (0 != strncmp(info[0].key, "foobar", PMIX_MAX_KEYLEN)) {
pmix_output(0, "Client ns %s rank %d: PMIx_Query_info key[0] wrong: %s vs foobar",
myproc.nspace, myproc.rank, info[0].key);
}
if (0 != strncmp(info[1].key, "spastic", PMIX_MAX_KEYLEN)) {
pmix_output(0, "Client ns %s rank %d: PMIx_Query_info key[0] wrong: %s vs spastic",
myproc.nspace, myproc.rank, info[1].key);
}
if (PMIX_STRING != info[0].value.type) {
pmix_output(0, "Client ns %s rank %d: PMIx_Query_info key[0] wrong type: %d vs %d",
myproc.nspace, myproc.rank, info[0].value.type, PMIX_STRING);
}
if (PMIX_STRING != info[1].value.type) {
pmix_output(0, "Client ns %s rank %d: PMIx_Query_info key[1] wrong type: %d vs %d",
myproc.nspace, myproc.rank, info[1].value.type, PMIX_STRING);
}
if (0 != strcmp(info[0].value.data.string, "0")) {
pmix_output(0, "Client ns %s rank %d: PMIx_Query_info key[0] wrong value: %s vs 0",
myproc.nspace, myproc.rank, info[1].value.data.string);
}
if (0 != strcmp(info[1].value.data.string, "1")) {
pmix_output(0, "Client ns %s rank %d: PMIx_Query_info key[1] wrong value: %s vs 1",
myproc.nspace, myproc.rank, info[1].value.data.string);
}
PMIX_INFO_FREE(info, ninfo);
done:
/* finalize us */
pmix_output(0, "Client ns %s rank %d: Finalizing", myproc.nspace, myproc.rank);
if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) {
fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc);
} else {
fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank);
}
fflush(stderr);
return(rc);
}

20
opal/mca/pmix/pmix2x/pmix2x.c Обычный файл → Исполняемый файл
Просмотреть файл

@ -564,6 +564,9 @@ opal_pmix_data_range_t pmix2x_convert_range(pmix_data_range_t range) {
void pmix2x_value_load(pmix_value_t *v,
opal_value_t *kv)
{
char nspace[PMIX_MAX_NSLEN + 1];
size_t n;
switch(kv->type) {
case OPAL_UNDEF:
v->type = PMIX_UNDEF;
@ -655,6 +658,19 @@ void pmix2x_value_load(pmix_value_t *v,
v->data.bo.size = 0;
}
break;
case OPAL_UINT32_ARRAY:
/* an array of 32-bit jobids */
v->type = PMIX_INFO_ARRAY;
v->data.array.size = kv->data.uint32_array.size;
if (0 < v->data.array.size) {
PMIX_INFO_CREATE(v->data.array.array, v->data.array.size);
for (n=0; n < v->data.array.size; n++) {
v->data.array.array[n].value.type = PMIX_STRING;
(void)opal_snprintf_jobid(nspace, PMIX_MAX_NSLEN, kv->data.uint32_array.data[n]);
v->data.array.array[n].value.data.string = strdup(nspace);
}
}
break;
default:
/* silence warnings */
break;
@ -669,7 +685,7 @@ int pmix2x_value_unload(opal_value_t *kv,
switch(v->type) {
case PMIX_UNDEF:
rc = OPAL_ERR_UNKNOWN_DATA_TYPE;
kv->type = OPAL_UNDEF;
break;
case PMIX_BOOL:
kv->type = OPAL_BOOL;
@ -1148,6 +1164,8 @@ static void ocadcon(pmix2x_opalcaddy_t *p)
p->spwncbfunc = NULL;
p->cbdata = NULL;
p->odmdxfunc = NULL;
p->infocbfunc = NULL;
p->toolcbfunc = NULL;
p->ocbdata = NULL;
}
static void ocaddes(pmix2x_opalcaddy_t *p)

Просмотреть файл

@ -130,6 +130,8 @@ typedef struct {
pmix_modex_cbfunc_t mdxcbfunc;
pmix_lookup_cbfunc_t lkupcbfunc;
pmix_spawn_cbfunc_t spwncbfunc;
pmix_info_cbfunc_t infocbfunc;
pmix_tool_connection_cbfunc_t toolcbfunc;
void *cbdata;
opal_pmix_release_cbfunc_t odmdxfunc;
void *ocbdata;
@ -302,7 +304,7 @@ OPAL_MODULE_DECLSPEC pmix_scope_t pmix2x_convert_opalscope(opal_pmix_scope_t sco
OPAL_MODULE_DECLSPEC pmix_data_range_t pmix2x_convert_opalrange(opal_pmix_data_range_t range);
OPAL_MODULE_DECLSPEC opal_pmix_data_range_t pmix2x_convert_range(pmix_data_range_t range);
OPAL_MODULE_DECLSPEC void pmix2x_value_load(pmix_value_t *v,
opal_value_t *kv);
opal_value_t *kv);
OPAL_MODULE_DECLSPEC int pmix2x_value_unload(opal_value_t *kv,
const pmix_value_t *v);

167
opal/mca/pmix/pmix2x/pmix2x_server_north.c Обычный файл → Исполняемый файл
Просмотреть файл

@ -88,6 +88,15 @@
pmix_data_range_t range,
pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata);
static pmix_status_t server_query(pmix_proc_t *proct,
pmix_info_t *info, size_t ninfo,
pmix_info_t *directives, size_t ndirs,
pmix_info_cbfunc_t cbfunc,
void *cbdata);
static void server_tool_connection(pmix_info_t *info, size_t ninfo,
pmix_tool_connection_cbfunc_t cbfunc,
void *cbdata);
pmix_server_module_t mymodule = {
.client_connected = server_client_connected_fn,
.client_finalized = server_client_finalized_fn,
@ -102,7 +111,9 @@
.disconnect = server_disconnect_fn,
.register_events = server_register_events,
.deregister_events = server_deregister_events,
.notify_event = server_notify_event
.notify_event = server_notify_event,
.query = server_query,
.tool_connected = server_tool_connection
};
opal_pmix_server_module_t *host_module = NULL;
@ -787,3 +798,157 @@ static pmix_status_t server_notify_event(pmix_status_t code,
{
return PMIX_ERR_NOT_SUPPORTED;
}
static void _info_rel(void *cbdata)
{
pmix2x_opcaddy_t *pcaddy = (pmix2x_opcaddy_t*)cbdata;
OBJ_RELEASE(pcaddy);
}
static void info_cbfunc(int status,
opal_list_t *info,
void *cbdata,
opal_pmix_release_cbfunc_t release_fn,
void *release_cbdata)
{
pmix2x_opalcaddy_t *opalcaddy = (pmix2x_opalcaddy_t*)cbdata;
pmix2x_opcaddy_t *pcaddy;
opal_value_t *kv;
size_t n;
pcaddy = OBJ_NEW(pmix2x_opcaddy_t);
/* convert the status */
pcaddy->status = pmix2x_convert_opalrc(status);
/* convert the list to a pmix_info_t array */
if (NULL != info) {
pcaddy->ninfo = opal_list_get_size(info);
if (0 < pcaddy->ninfo) {
PMIX_INFO_CREATE(pcaddy->info, pcaddy->ninfo);
n = 0;
OPAL_LIST_FOREACH(kv, info, opal_value_t) {
(void)strncpy(pcaddy->info[n].key, kv->key, PMIX_MAX_KEYLEN);
pmix2x_value_load(&pcaddy->info[n].value, kv);
}
}
}
/* we are done with the incoming data */
if (NULL != release_fn) {
release_fn(release_cbdata);
}
/* provide the answer downward */
if (NULL != opalcaddy->infocbfunc) {
opalcaddy->infocbfunc(pcaddy->status, pcaddy->info, pcaddy->ninfo,
opalcaddy->cbdata, _info_rel, pcaddy);
}
OBJ_RELEASE(opalcaddy);
}
static pmix_status_t server_query(pmix_proc_t *proct,
pmix_info_t *info, size_t ninfo,
pmix_info_t *directives, size_t ndirs,
pmix_info_cbfunc_t cbfunc,
void *cbdata)
{
pmix2x_opalcaddy_t *opalcaddy;
opal_process_name_t requestor;
int rc;
size_t n;
opal_value_t *oinfo;
if (NULL == host_module || NULL == host_module->query) {
return PMIX_ERR_NOT_SUPPORTED;
}
/* setup the caddy */
opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t);
opalcaddy->infocbfunc = cbfunc;
opalcaddy->cbdata = cbdata;
/* convert the requestor */
if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&requestor.jobid, proct->nspace))) {
opal_output(0, "FILE: %s LINE %d", __FILE__, __LINE__);
OBJ_RELEASE(opalcaddy);
return pmix2x_convert_opalrc(rc);
}
requestor.vpid = proct->rank;
/* convert the info */
for (n=0; n < ninfo; n++) {
oinfo = OBJ_NEW(opal_value_t);
opal_list_append(&opalcaddy->info, &oinfo->super);
oinfo->key = strdup(info[n].key);
if (OPAL_SUCCESS != (rc = pmix2x_value_unload(oinfo, &info[n].value))) {
OBJ_RELEASE(opalcaddy);
return pmix2x_convert_opalrc(rc);
}
}
/* we ignore directives for now */
/* pass the call upwards */
if (OPAL_SUCCESS != (rc = host_module->query(&requestor,
&opalcaddy->info, NULL,
info_cbfunc, opalcaddy))) {
OBJ_RELEASE(opalcaddy);
}
return pmix2x_convert_opalrc(rc);
}
static void toolcbfunc(int status,
opal_process_name_t proc,
void *cbdata)
{
pmix2x_opalcaddy_t *opalcaddy = (pmix2x_opalcaddy_t*)cbdata;
pmix_status_t rc;
pmix_proc_t p;
/* convert the status */
rc = pmix2x_convert_opalrc(status);
/* convert the process name */
(void)opal_snprintf_jobid(p.nspace, PMIX_MAX_NSLEN, proc.jobid);
p.rank = proc.vpid;
/* pass it down */
if (NULL != opalcaddy->toolcbfunc) {
opalcaddy->toolcbfunc(rc, &p, opalcaddy->cbdata);
}
OBJ_RELEASE(opalcaddy);
}
static void server_tool_connection(pmix_info_t *info, size_t ninfo,
pmix_tool_connection_cbfunc_t cbfunc,
void *cbdata)
{
pmix2x_opalcaddy_t *opalcaddy;
size_t n;
opal_value_t *oinfo;
int rc;
pmix_status_t err;
/* setup the caddy */
opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t);
opalcaddy->toolcbfunc = cbfunc;
opalcaddy->cbdata = cbdata;
/* convert the info */
for (n=0; n < ninfo; n++) {
oinfo = OBJ_NEW(opal_value_t);
opal_list_append(&opalcaddy->info, &oinfo->super);
oinfo->key = strdup(info[n].key);
if (OPAL_SUCCESS != (rc = pmix2x_value_unload(oinfo, &info[n].value))) {
OBJ_RELEASE(opalcaddy);
err = pmix2x_convert_opalrc(rc);
if (NULL != cbfunc) {
cbfunc(err, NULL, cbdata);
}
}
}
/* pass it up */
host_module->tool_connected(&opalcaddy->info, toolcbfunc, opalcaddy);
}

23
opal/mca/pmix/pmix_server.h Обычный файл → Исполняемый файл
Просмотреть файл

@ -183,6 +183,27 @@ typedef int (*opal_pmix_server_disconnect_fn_t)(opal_list_t *procs, opal_list_t
typedef int (*opal_pmix_server_notify_fn_t)(int code, opal_list_t *procs, opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
/* Query the RTE for information */
typedef int (*opal_pmix_server_query_fn_t)(opal_process_name_t *requestor,
opal_list_t *info, opal_list_t *directives,
opal_pmix_info_cbfunc_t cbfunc, void *cbdata);
/* Register that a tool has connected to the server, and request
* that the tool be assigned a jobid for further interactions.
* The optional opal_value_t list can be used to pass qualifiers for
* the connection request:
*
* (a) OPAL_PMIX_USERID - effective userid of the tool
* (b) OPAL_PMIX_GRPID - effective groupid of the tool
* (c) OPAL_PMIX_FWD_STDOUT - forward any stdout to this tool
* (d) OPAL_PMIX_FWD_STDERR - forward any stderr to this tool
* (e) OPAL_PMIX_FWD_STDIN - forward stdin from this tool to any
* processes spawned on its behalf
*/
typedef void (*opal_pmix_server_tool_connection_fn_t)(opal_list_t *info,
opal_pmix_tool_connection_cbfunc_t cbfunc,
void *cbdata);
/* Callback function for incoming connection requests from
* local clients */
typedef void (*opal_pmix_connection_cbfunc_t)(int incoming_sd);
@ -215,6 +236,8 @@ typedef struct opal_pmix_server_module_1_0_0_t {
opal_pmix_server_register_events_fn_t register_events;
opal_pmix_server_deregister_events_fn_t deregister_events;
opal_pmix_server_notify_fn_t notify_event;
opal_pmix_server_query_fn_t query;
opal_pmix_server_tool_connection_fn_t tool_connected;
opal_pmix_server_listener_fn_t listener;
} opal_pmix_server_module_t;

Просмотреть файл

@ -41,6 +41,11 @@ BEGIN_C_DECLS
* these keys are RESERVED */
#define OPAL_PMIX_ATTR_UNDEF NULL
#define OPAL_PMIX_SERVER_TOOL_SUPPORT "pmix.srvr.tool" // (bool) The host RM wants to declare itself as willing to
// accept tool connection requests
#define OPAL_PMIX_SERVER_PIDINFO "pmix.srvr.pidinfo" // (uint32_t) pid of the target server
/* identification attributes */
#define OPAL_PMIX_USERID "pmix.euid" // (uint32_t) effective user id
#define OPAL_PMIX_GRPID "pmix.egid" // (uint32_t) effective group id
@ -166,6 +171,12 @@ BEGIN_C_DECLS
#define OPAL_PMIX_PRELOAD_FILES "pmix.preloadfiles" // (char*) comma-delimited list of files to pre-position
#define OPAL_PMIX_NON_PMI "pmix.nonpmi" // (bool) spawned procs will not call PMIx_Init
#define OPAL_PMIX_STDIN_TGT "pmix.stdin" // (uint32_t) spawned proc rank that is to receive stdin
#define OPAL_PMIX_FWD_STDIN "pmix.fwd.stdin" // (bool) forward my stdin to the designated proc
#define OPAL_PMIX_FWD_STDOUT "pmix.fwd.stdout" // (bool) forward stdout from spawned procs to me
#define OPAL_PMIX_FWD_STDERR "pmix.fwd.stderr" // (bool) forward stderr from spawned procs to me
/* query attributes */
#define OPAL_PMIX_QUERY_NAMESPACES "pmix.qry.ns" // (char*) request a comma-delimited list of active nspaces
/* define a scope for data "put" by PMI per the following:
@ -357,6 +368,24 @@ typedef void (*opal_pmix_value_cbfunc_t)(int status,
opal_value_t *kv, void *cbdata);
/* define a callback function for calls to PMIx_Query. The status
* indicates if requested data was found or not - a list of
* opal_value_t will contain the key/value pairs. */
typedef void (*opal_pmix_info_cbfunc_t)(int status,
opal_list_t *info,
void *cbdata,
opal_pmix_release_cbfunc_t release_fn,
void *release_cbdata);
/* Callback function for incoming tool connections - the host
* RTE shall provide a jobid/rank for the connecting tool. We
* assume that a rank=0 will be the normal assignment, but allow
* for the future possibility of a parallel set of tools
* connecting, and thus each proc requiring a rank */
typedef void (*opal_pmix_tool_connection_cbfunc_t)(int status,
opal_process_name_t proc,
void *cbdata);
END_C_DECLS

Просмотреть файл

@ -259,6 +259,9 @@ opal_err2str(int errnum, const char **errmsg)
case OPAL_ERR_HANDLERS_COMPLETE:
retval = "Event handler processing complete";
break;
case OPAL_ERR_PARTIAL_SUCCESS:
retval = "Partial success";
break;
default:
retval = "UNRECOGNIZED";
}

Просмотреть файл

@ -87,6 +87,7 @@ enum {
ORTE_ERR_AUTHENTICATION_FAILED = OPAL_ERR_AUTHENTICATION_FAILED,
ORTE_ERR_COMM_FAILURE = OPAL_ERR_COMM_FAILURE,
ORTE_ERR_DEBUGGER_RELEASE = OPAL_ERR_DEBUGGER_RELEASE,
ORTE_ERR_PARTIAL_SUCCESS = OPAL_ERR_PARTIAL_SUCCESS,
/* error codes specific to ORTE - don't forget to update
orte/util/error_strings.c when adding new error codes!!

19
orte/orted/orted_submit.c Обычный файл → Исполняемый файл
Просмотреть файл

@ -1896,16 +1896,17 @@ static void launch_recv(int status, orte_process_name_t* sender,
if (ORTE_SUCCESS == ret) {
printf("[ORTE] Task: %d is launched! (Job ID: %s)\n", tool_job_index, ORTE_JOBID_PRINT(jobid));
} else {
/* unpack the offending proc and node */
/* unpack the offending proc and node, if sent */
cnt = 1;
opal_dss.unpack(buffer, &trk->jdata->state, &cnt, ORTE_JOB_STATE_T);
cnt = 1;
opal_dss.unpack(buffer, &proc, &cnt, ORTE_PROC);
proc->exit_code = ret;
app = (orte_app_context_t*)opal_pointer_array_get_item(trk->jdata->apps, proc->app_idx);
cnt = 1;
opal_dss.unpack(buffer, &node, &cnt, ORTE_NODE);
orte_print_aborted_job(trk->jdata, app, proc, node);
if (OPAL_SUCCESS == opal_dss.unpack(buffer, &trk->jdata->state, &cnt, ORTE_JOB_STATE_T)) {
cnt = 1;
opal_dss.unpack(buffer, &proc, &cnt, ORTE_PROC);
proc->exit_code = ret;
app = (orte_app_context_t*)opal_pointer_array_get_item(trk->jdata->apps, proc->app_idx);
cnt = 1;
opal_dss.unpack(buffer, &node, &cnt, ORTE_NODE);
orte_print_aborted_job(trk->jdata, app, proc, node);
}
}
/* Inform client */

39
orte/orted/pmix/pmix_server.c Обычный файл → Исполняемый файл
Просмотреть файл

@ -84,20 +84,21 @@ static void pmix_server_dmdx_resp(int status, orte_process_name_t* sender,
pmix_server_globals_t orte_pmix_server_globals = {0};
static opal_pmix_server_module_t pmix_server = {
pmix_server_client_connected_fn,
pmix_server_client_finalized_fn,
pmix_server_abort_fn,
pmix_server_fencenb_fn,
pmix_server_dmodex_req_fn,
pmix_server_publish_fn,
pmix_server_lookup_fn,
pmix_server_unpublish_fn,
pmix_server_spawn_fn,
pmix_server_connect_fn,
pmix_server_disconnect_fn,
pmix_server_register_events_fn,
pmix_server_deregister_events_fn,
NULL
.client_connected = pmix_server_client_connected_fn,
.client_finalized = pmix_server_client_finalized_fn,
.abort = pmix_server_abort_fn,
.fence_nb = pmix_server_fencenb_fn,
.direct_modex = pmix_server_dmodex_req_fn,
.publish = pmix_server_publish_fn,
.lookup = pmix_server_lookup_fn,
.unpublish = pmix_server_unpublish_fn,
.spawn = pmix_server_spawn_fn,
.connect = pmix_server_connect_fn,
.disconnect = pmix_server_disconnect_fn,
.register_events = pmix_server_register_events_fn,
.deregister_events = pmix_server_deregister_events_fn,
.query = pmix_server_query_fn,
.tool_connected = pmix_tool_connected_fn
};
void pmix_server_register_params(void)
@ -182,6 +183,7 @@ int pmix_server_init(void)
{
int rc;
opal_list_t info;
opal_value_t *kv;
if (orte_pmix_server_globals.initialized) {
return ORTE_SUCCESS;
@ -229,7 +231,6 @@ int pmix_server_init(void)
if (NULL != opal_hwloc_topology) {
char *xmlbuffer=NULL;
int len;
opal_value_t *kv;
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_LOCAL_TOPO);
if (0 != hwloc_topology_export_xmlbuffer(opal_hwloc_topology, &xmlbuffer, &len)) {
@ -241,6 +242,11 @@ int pmix_server_init(void)
kv->type = OPAL_STRING;
opal_list_append(&info, &kv->super);
}
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_SERVER_TOOL_SUPPORT);
kv->type = OPAL_BOOL;
kv->data.flag = true;
opal_list_append(&info, &kv->super);
/* setup the local server */
if (ORTE_SUCCESS != (rc = opal_pmix.server_init(&pmix_server, &info))) {
@ -648,6 +654,9 @@ static void opcon(orte_pmix_server_op_caddy_t *p)
p->procs = NULL;
p->eprocs = NULL;
p->info = NULL;
p->cbfunc = NULL;
p->infocbfunc = NULL;
p->toolcbfunc = NULL;
p->cbdata = NULL;
}
OBJ_CLASS_INSTANCE(orte_pmix_server_op_caddy_t,

120
orte/orted/pmix/pmix_server_gen.c Обычный файл → Исполняемый файл
Просмотреть файл

@ -32,6 +32,7 @@
#include <unistd.h>
#endif
#include "opal/util/argv.h"
#include "opal/util/output.h"
#include "opal/dss/dss.h"
@ -40,6 +41,7 @@
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/plm/base/plm_private.h"
#include "pmix_server_internal.h"
@ -339,3 +341,121 @@ void pmix_server_notify(int status, orte_process_name_t* sender,
OBJ_RELEASE(cd);
}
}
static void _query(int sd, short args, void *cbdata)
{
orte_pmix_server_op_caddy_t *cd = (orte_pmix_server_op_caddy_t*)cbdata;
opal_value_t *kv;
orte_job_t *jdata;
int rc;
size_t nresults=0;
uint32_t key;
void *nptr;
char **nspaces=NULL, nspace[512];
/* see what they wanted */
OPAL_LIST_FOREACH(kv, cd->info, opal_value_t) {
if (0 == strcmp(kv->key, OPAL_PMIX_QUERY_NAMESPACES)) {
/* get the current jobids */
rc = opal_hash_table_get_first_key_uint32(orte_job_data, &key, (void **)&jdata, &nptr);
while (OPAL_SUCCESS == rc) {
if (ORTE_PROC_MY_NAME->jobid != jdata->jobid) {
memset(nspace, 0, 512);
(void)opal_snprintf_jobid(nspace, 512, jdata->jobid);
opal_argv_append_nosize(&nspaces, nspace);
}
rc = opal_hash_table_get_next_key_uint32(orte_job_data, &key, (void **)&jdata, nptr, &nptr);
}
/* join the results into a single comma-delimited string */
kv->type = OPAL_STRING;
if (NULL != nspaces) {
kv->data.string = opal_argv_join(nspaces, ',');
} else {
kv->data.string = NULL;
}
++nresults;
}
}
if (0 == nresults) {
rc = ORTE_ERR_NOT_FOUND;
} else if (nresults < opal_list_get_size(cd->info)) {
rc = ORTE_ERR_PARTIAL_SUCCESS;
} else {
rc = ORTE_SUCCESS;
}
cd->infocbfunc(rc, cd->info, cd->cbdata, NULL, NULL);
}
int pmix_server_query_fn(opal_process_name_t *requestor,
opal_list_t *info, opal_list_t *directives,
opal_pmix_info_cbfunc_t cbfunc, void *cbdata)
{
orte_pmix_server_op_caddy_t *cd;
if (NULL == info || NULL == cbfunc) {
return OPAL_ERR_BAD_PARAM;
}
/* need to threadshift this request */
cd = OBJ_NEW(orte_pmix_server_op_caddy_t);
cd->proc = requestor;
cd->info = info;
cd->infocbfunc = cbfunc;
cd->cbdata = cbdata;
opal_event_set(orte_event_base, &(cd->ev), -1,
OPAL_EV_WRITE, _query, cd);
opal_event_set_priority(&(cd->ev), ORTE_MSG_PRI);
opal_event_active(&(cd->ev), OPAL_EV_WRITE, 1);
return ORTE_SUCCESS;
}
static void _toolconn(int sd, short args, void *cbdata)
{
orte_pmix_server_op_caddy_t *cd = (orte_pmix_server_op_caddy_t*)cbdata;
orte_job_t jdata;
orte_process_name_t tool;
int rc;
/* if we are the HNP, we can directly assign the jobid */
if (ORTE_PROC_IS_HNP) {
OBJ_CONSTRUCT(&jdata, orte_job_t);
rc = orte_plm_base_create_jobid(&jdata);
tool.jobid = jdata.jobid;
tool.vpid = 0;
if (NULL != cd->toolcbfunc) {
cd->toolcbfunc(rc, tool, cd->cbdata);
}
OBJ_RELEASE(cd);
return;
}
/* otherwise, we have to send the request to the HNP.
* Eventually, when we switch to nspace instead of an
* integer jobid, we'll just locally assign this value */
if (NULL != cd->toolcbfunc) {
cd->toolcbfunc(ORTE_ERR_NOT_SUPPORTED, tool, cd->cbdata);
}
OBJ_RELEASE(cd);
}
void pmix_tool_connected_fn(opal_list_t *info,
opal_pmix_tool_connection_cbfunc_t cbfunc,
void *cbdata)
{
orte_pmix_server_op_caddy_t *cd;
opal_output(0, "TOOL CONNECTION REQUEST RECVD");
/* need to threadshift this request */
cd = OBJ_NEW(orte_pmix_server_op_caddy_t);
cd->info = info;
cd->toolcbfunc = cbfunc;
cd->cbdata = cbdata;
opal_event_set(orte_event_base, &(cd->ev), -1,
OPAL_EV_WRITE, _toolconn, cd);
opal_event_set_priority(&(cd->ev), ORTE_MSG_PRI);
opal_event_active(&(cd->ev), OPAL_EV_WRITE, 1);
}

8
orte/orted/pmix/pmix_server_internal.h Обычный файл → Исполняемый файл
Просмотреть файл

@ -82,6 +82,8 @@ typedef struct {
opal_list_t *eprocs;
opal_list_t *info;
opal_pmix_op_cbfunc_t cbfunc;
opal_pmix_info_cbfunc_t infocbfunc;
opal_pmix_tool_connection_cbfunc_t toolcbfunc;
void *cbdata;
} orte_pmix_server_op_caddy_t;
OBJ_CLASS_DECLARATION(orte_pmix_server_op_caddy_t);
@ -187,6 +189,12 @@ extern int pmix_server_register_events_fn(opal_list_t *info,
extern int pmix_server_deregister_events_fn(opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc,
void *cbdata);
extern int pmix_server_query_fn(opal_process_name_t *requestor,
opal_list_t *info, opal_list_t *directives,
opal_pmix_info_cbfunc_t cbfunc, void *cbdata);
extern void pmix_tool_connected_fn(opal_list_t *info,
opal_pmix_tool_connection_cbfunc_t cbfunc,
void *cbdata);
/* declare the RML recv functions for responses */
extern void pmix_server_launch_resp(int status, orte_process_name_t* sender,

Просмотреть файл

@ -1,7 +1,7 @@
PROGS = no_op sigusr_trap spin orte_nodename orte_spawn orte_loop_spawn orte_loop_child orte_abort get_limits \
orte_tool orte_no_op binom oob_stress iof_stress iof_delay radix opal_interface orte_spin segfault \
orte_exit test-time event-threads psm_keygen regex orte_errors evpri-test opal-evpri-test evpri-test2 \
mapper reducer opal_hotel orte_dfs ulfm
mapper reducer opal_hotel orte_dfs ulfm pmixtool
all: $(PROGS)
@ -16,3 +16,6 @@ clean:
oob_stress:
ortecc -o oob_stress oob_stress.c -lm
pmixtool:
ortecc -o pmixtool pmixtool.c -lpmix

78
orte/test/system/pmixtool.c Исполняемый файл
Просмотреть файл

@ -0,0 +1,78 @@
/*
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2011 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <time.h>
#include <pmix_tool.h>
int main(int argc, char **argv)
{
pmix_status_t rc;
pmix_proc_t myproc;
pmix_info_t *info;
size_t ninfo;
/* init us */
if (PMIX_SUCCESS != (rc = PMIx_tool_init(&myproc, NULL, 0))) {
fprintf(stderr, "PMIx_tool_init failed: %s\n", PMIx_Error_string(rc));
exit(rc);
}
fprintf(stderr, "Tool ns %s rank %d: Running\n", myproc.nspace, myproc.rank);
/* query something */
ninfo = 1;
PMIX_INFO_CREATE(info, ninfo);
(void)strncpy(info[0].key, PMIX_QUERY_NAMESPACES, PMIX_MAX_KEYLEN);
if (PMIX_SUCCESS != (rc = PMIx_Query_info(info, ninfo))) {
fprintf(stderr, "Tool ns %s rank %d: PMIx_Query_info failed: %d\n", myproc.nspace, myproc.rank, rc);
goto done;
}
if (0 != strncmp(info[0].key, PMIX_QUERY_NAMESPACES, PMIX_MAX_KEYLEN)) {
fprintf(stderr, "tool ns %s rank %d: PMIx_Query_info key[0] wrong: %s vs %s\n",
myproc.nspace, myproc.rank, info[0].key, PMIX_QUERY_NAMESPACES);
}
if (PMIX_STRING != info[0].value.type) {
fprintf(stderr, "Tool ns %s rank %d: PMIx_Query_info key[0] wrong type: %d vs %d\n",
myproc.nspace, myproc.rank, info[0].value.type, PMIX_STRING);
}
fprintf(stderr, "Tool ns %s rank %d: PMIx_Query_info key[0] returned %s\n",
myproc.nspace, myproc.rank,
(NULL == info[0].value.data.string) ? "NULL" : info[0].value.data.string);
PMIX_INFO_FREE(info, ninfo);
done:
/* finalize us */
fprintf(stderr, "Tool ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank);
if (PMIX_SUCCESS != (rc = PMIx_tool_finalize())) {
fprintf(stderr, "Tool ns %s rank %d:PMIx_tool_finalize failed: %d\n", myproc.nspace, myproc.rank, rc);
} else {
fprintf(stderr, "Tool ns %s rank %d:PMIx_tool_finalize successfully completed\n", myproc.nspace, myproc.rank);
}
fflush(stderr);
return(rc);
}

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2010-2016 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -237,6 +237,9 @@ int orte_err2str(int errnum, const char **errmsg)
case ORTE_ERR_DEBUGGER_RELEASE:
retval = "Debugger release";
break;
case ORTE_ERR_PARTIAL_SUCCESS:
retval = "Partial success";
break;
default:
if (orte_report_silent_errors) {
retval = "Unknown error";