From 011403c04a7368f7266e6b6da27e41f28b49f502 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 25 Feb 2016 11:05:38 -0600 Subject: [PATCH] Fix a number of issues, some of which have lingered for a long time: * provide a more reliable way of determining that a process is a singleton by leveraging the schizo framework. Add new components for slurm, alps, and orte to detect when we are in a managed environment, and if we have been launched by mpirun or a native launcher. Set the correct envars to control ess and pmix selection in each case. * change the relative priority of the pmix120 and pmix112 components to make pmix120 the default * fix singleton comm-spawn by correctly setting the num_apps field of the orte_job_t created by the daemon - this fixes a segfault in register_nspace on newly created daemons * ensure orterun doesn't propagate any ess or pmix directives in its environment * Cleanup a few valgrind issues and memory leaks * Fix a race condition that prevented the client from completing notification registrations (missing thread shift) * Ensure the shizo/alps component detects launch by mpirun --- opal/mca/pmix/isolated/pmix_isolated.c | 24 +- .../pmix/isolated/pmix_isolated_component.c | 14 +- opal/mca/pmix/pmix112/configure.m4 | 2 +- opal/mca/pmix/pmix112/pmix_pmix1_component.c | 4 +- opal/mca/pmix/pmix120/configure.m4 | 2 +- .../pmix120/pmix/src/buffer_ops/buffer_ops.h | 10 - .../pmix120/pmix/src/client/pmix_client.c | 101 ++++--- .../pmix120/pmix/src/client/pmix_client_get.c | 2 +- .../pmix120/pmix/src/include/pmix_globals.c | 19 ++ .../pmix120/pmix/src/include/pmix_globals.h | 251 ++++++++++++++++-- .../pmix120/pmix/src/server/pmix_server.c | 66 +---- .../pmix120/pmix/src/server/pmix_server_ops.h | 48 +--- opal/mca/pmix/pmix120/pmix/src/usock/usock.h | 103 +------ .../pmix120/pmix/src/usock/usock_sendrecv.c | 43 ++- .../mca/pmix/pmix120/pmix_pmix120_component.c | 10 +- orte/mca/ess/base/ess_base_fns.c | 3 + orte/mca/ess/base/ess_base_std_orted.c | 18 +- orte/mca/ess/base/ess_base_std_tool.c | 16 +- orte/mca/ess/hnp/ess_hnp_module.c | 19 +- orte/mca/ess/pmi/ess_pmi_component.c | 54 ++-- orte/mca/ess/pmi/ess_pmi_module.c | 12 + orte/mca/ess/singleton/ess_singleton.h | 14 +- .../ess/singleton/ess_singleton_component.c | 122 ++++----- orte/mca/ess/singleton/ess_singleton_module.c | 36 ++- orte/mca/schizo/alps/Makefile.am | 35 +++ orte/mca/schizo/alps/configure.m4 | 47 ++++ orte/mca/schizo/alps/owner.txt | 7 + orte/mca/schizo/alps/schizo_alps.c | 114 ++++++++ orte/mca/schizo/alps/schizo_alps.h | 29 ++ orte/mca/schizo/alps/schizo_alps_component.c | 53 ++++ orte/mca/schizo/base/base.h | 3 + orte/mca/schizo/base/schizo_base_frame.c | 12 +- orte/mca/schizo/base/schizo_base_select.c | 2 +- orte/mca/schizo/base/schizo_base_stubs.c | 45 ++++ orte/mca/schizo/ompi/schizo_ompi.c | 8 +- orte/mca/schizo/ompi/schizo_ompi_component.c | 8 + orte/mca/schizo/orte/Makefile.am | 35 +++ orte/mca/schizo/orte/schizo_orte.c | 90 +++++++ orte/mca/schizo/orte/schizo_orte.h | 29 ++ orte/mca/schizo/orte/schizo_orte_component.c | 52 ++++ orte/mca/schizo/schizo.h | 34 ++- .../schizo/singularity/schizo_singularity.c | 5 +- .../schizo_singularity_component.c | 8 + orte/mca/schizo/slurm/Makefile.am | 35 +++ orte/mca/schizo/slurm/configure.m4 | 41 +++ orte/mca/schizo/slurm/owner.txt | 7 + orte/mca/schizo/slurm/schizo_slurm.c | 135 ++++++++++ orte/mca/schizo/slurm/schizo_slurm.h | 29 ++ .../mca/schizo/slurm/schizo_slurm_component.c | 52 ++++ orte/orted/orted_main.c | 1 + orte/runtime/orte_finalize.c | 7 +- orte/runtime/orte_init.c | 19 +- orte/tools/orterun/orterun.c | 5 + 53 files changed, 1436 insertions(+), 504 deletions(-) create mode 100644 orte/mca/schizo/alps/Makefile.am create mode 100644 orte/mca/schizo/alps/configure.m4 create mode 100644 orte/mca/schizo/alps/owner.txt create mode 100644 orte/mca/schizo/alps/schizo_alps.c create mode 100644 orte/mca/schizo/alps/schizo_alps.h create mode 100644 orte/mca/schizo/alps/schizo_alps_component.c create mode 100644 orte/mca/schizo/orte/Makefile.am create mode 100644 orte/mca/schizo/orte/schizo_orte.c create mode 100644 orte/mca/schizo/orte/schizo_orte.h create mode 100644 orte/mca/schizo/orte/schizo_orte_component.c create mode 100644 orte/mca/schizo/slurm/Makefile.am create mode 100644 orte/mca/schizo/slurm/configure.m4 create mode 100644 orte/mca/schizo/slurm/owner.txt create mode 100644 orte/mca/schizo/slurm/schizo_slurm.c create mode 100644 orte/mca/schizo/slurm/schizo_slurm.h create mode 100644 orte/mca/schizo/slurm/schizo_slurm_component.c diff --git a/opal/mca/pmix/isolated/pmix_isolated.c b/opal/mca/pmix/isolated/pmix_isolated.c index e338bcae82..83adb79479 100644 --- a/opal/mca/pmix/isolated/pmix_isolated.c +++ b/opal/mca/pmix/isolated/pmix_isolated.c @@ -113,26 +113,26 @@ const opal_pmix_base_module_t opal_pmix_isolated_module = { .register_jobid = isolated_register_jobid }; -static int pmix_init_count = 0; -static opal_process_name_t pmix_pname; +static int isolated_init_count = 0; +static opal_process_name_t isolated_pname; static int isolated_init(void) { int rc; opal_value_t kv; - ++pmix_init_count; + ++isolated_init_count; /* store our name in the opal_proc_t so that * debug messages will make sense - an upper * layer will eventually overwrite it, but that * won't do any harm */ - pmix_pname.jobid = 1; - pmix_pname.vpid = 0; - opal_proc_set_name(&pmix_pname); + isolated_pname.jobid = 1; + isolated_pname.vpid = 0; + opal_proc_set_name(&isolated_pname); opal_output_verbose(10, opal_pmix_base_framework.framework_output, "%s pmix:isolated: assigned tmp name %d %d", - OPAL_NAME_PRINT(pmix_pname),pmix_pname.jobid,pmix_pname.vpid); + OPAL_NAME_PRINT(isolated_pname),isolated_pname.jobid,isolated_pname.vpid); // setup hash table opal_pmix_base_hash_init(); @@ -248,11 +248,11 @@ err_exit: static int isolated_fini(void) { - if (0 == pmix_init_count) { + if (0 == isolated_init_count) { return OPAL_SUCCESS; } - if (0 != --pmix_init_count) { + if (0 != --isolated_init_count) { return OPAL_SUCCESS; } opal_pmix_base_hash_finalize(); @@ -261,7 +261,7 @@ static int isolated_fini(void) static int isolated_initialized(void) { - if (0 < pmix_init_count) { + if (0 < isolated_init_count) { return 1; } return 0; @@ -323,11 +323,11 @@ static int isolated_put(opal_pmix_scope_t scope, "%s pmix:isolated isolated_put key %s scope %d\n", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kv->key, scope); - if (!pmix_init_count) { + if (!isolated_init_count) { return OPAL_ERROR; } - rc = opal_pmix_base_store(&pmix_pname, kv); + rc = opal_pmix_base_store(&isolated_pname, kv); return rc; } diff --git a/opal/mca/pmix/isolated/pmix_isolated_component.c b/opal/mca/pmix/isolated/pmix_isolated_component.c index 73c32602b0..f4d9b7623d 100644 --- a/opal/mca/pmix/isolated/pmix_isolated_component.c +++ b/opal/mca/pmix/isolated/pmix_isolated_component.c @@ -79,16 +79,8 @@ static int isolated_close(void) static int isolated_component_query(mca_base_module_t **module, int *priority) { - /* if we are in a Singularity container, then we cannot spawn an - * HNP and are truly on our own and cannot call comm_spawn or - * any of its friends */ - if (NULL != getenv("SINGULARITY_CONTAINER")) { - *priority = 100; - *module = (mca_base_module_t *)&opal_pmix_isolated_module; - return OPAL_SUCCESS; - } - /* otherwise, ignore us */ + /* ignore us unless requested */ *priority = 0; - *module = NULL; - return OPAL_ERR_TAKE_NEXT_OPTION; + *module = (mca_base_module_t *)&opal_pmix_isolated_module; + return OPAL_SUCCESS; } diff --git a/opal/mca/pmix/pmix112/configure.m4 b/opal/mca/pmix/pmix112/configure.m4 index 1d920b30d6..9555789d33 100644 --- a/opal/mca/pmix/pmix112/configure.m4 +++ b/opal/mca/pmix/pmix112/configure.m4 @@ -41,7 +41,7 @@ AC_DEFUN([MCA_opal_pmix_pmix112_CONFIG],[ opal_pmix_pmix112_save_LDFLAGS=$LDFLAGS opal_pmix_pmix112_save_LIBS=$LIBS - opal_pmix_pmix112_args="--enable-embedded-mode --with-pmix-symbol-prefix=opal_pmix_pmix112_ --with-libevent-header=\\\"opal/mca/event/$opal_event_base_include\\\" --with-hwloc-header=\\\"$opal_hwloc_base_include\\\"" + opal_pmix_pmix112_args="--enable-embedded-mode --with-pmix-symbol-prefix=opal_pmix_pmix112_ --disable-visibility --with-libevent-header=\\\"opal/mca/event/$opal_event_base_include\\\" --with-hwloc-header=\\\"$opal_hwloc_base_include\\\"" AS_IF([test "$enable_debug" = "yes"], [opal_pmix_pmix112_args="--enable-debug $opal_pmix_pmix112_args" CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS -g"], diff --git a/opal/mca/pmix/pmix112/pmix_pmix1_component.c b/opal/mca/pmix/pmix112/pmix_pmix1_component.c index a53aa14d7f..50ca4cafb9 100644 --- a/opal/mca/pmix/pmix112/pmix_pmix1_component.c +++ b/opal/mca/pmix/pmix112/pmix_pmix1_component.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -95,7 +95,7 @@ static int pmix112_component_query(mca_base_module_t **module, int *priority) if (NULL != (t = getenv("PMIX_SERVER_URI")) || NULL != (id = getenv("PMIX_ID"))) { /* if PMIx is present, then we are a client and need to use it */ - *priority = 100; + *priority = 80; } else { /* we could be a server, so we still need to be considered */ *priority = 5; diff --git a/opal/mca/pmix/pmix120/configure.m4 b/opal/mca/pmix/pmix120/configure.m4 index b6cfe6e2fe..0e9eb2a072 100644 --- a/opal/mca/pmix/pmix120/configure.m4 +++ b/opal/mca/pmix/pmix120/configure.m4 @@ -41,7 +41,7 @@ AC_DEFUN([MCA_opal_pmix_pmix120_CONFIG],[ opal_pmix_pmix120_save_LDFLAGS=$LDFLAGS opal_pmix_pmix120_save_LIBS=$LIBS - opal_pmix_pmix120_args="--enable-embedded-mode --with-pmix-symbol-prefix=opal_pmix_pmix120_ --with-libevent-header=\\\"opal/mca/event/$opal_event_base_include\\\" --with-hwloc-header=\\\"$opal_hwloc_base_include\\\"" + opal_pmix_pmix120_args="--enable-embedded-mode --with-pmix-symbol-prefix=opal_pmix_pmix120_ --disable-visibility --with-libevent-header=\\\"opal/mca/event/$opal_event_base_include\\\" --with-hwloc-header=\\\"$opal_hwloc_base_include\\\"" AS_IF([test "$enable_debug" = "yes"], [opal_pmix_pmix120_args="--enable-debug $opal_pmix_pmix120_args" CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS -g"], diff --git a/opal/mca/pmix/pmix120/pmix/src/buffer_ops/buffer_ops.h b/opal/mca/pmix/pmix120/pmix/src/buffer_ops/buffer_ops.h index d8538cb5f9..d79ee0db07 100644 --- a/opal/mca/pmix/pmix120/pmix/src/buffer_ops/buffer_ops.h +++ b/opal/mca/pmix/pmix120/pmix/src/buffer_ops/buffer_ops.h @@ -38,16 +38,6 @@ BEGIN_C_DECLS -/* internally used object for transferring data - * to/from the server and for storing in the - * hash tables */ -typedef struct { - pmix_list_item_t super; - char *key; - pmix_value_t *value; -} pmix_kval_t; -PMIX_CLASS_DECLARATION(pmix_kval_t); - /* A non-API function for something that happens in a number * of places throughout the code base - transferring a value to * another pmix_value_t structure diff --git a/opal/mca/pmix/pmix120/pmix/src/client/pmix_client.c b/opal/mca/pmix/pmix120/pmix/src/client/pmix_client.c index d22e1e132f..22f9315725 100644 --- a/opal/mca/pmix/pmix120/pmix/src/client/pmix_client.c +++ b/opal/mca/pmix/pmix120/pmix/src/client/pmix_client.c @@ -62,6 +62,7 @@ static const char pmix_version_string[] = PMIX_VERSION; #include "src/util/progress_threads.h" #include "src/usock/usock.h" #include "src/sec/pmix_sec.h" +#include "src/include/pmix_globals.h" #include "pmix_client_ops.h" @@ -279,6 +280,7 @@ int PMIx_Init(pmix_proc_t *proc) } /* default to our internal errhandler */ + errhandler_ref = 0; pmix_add_errhandler(myerrhandler, NULL, 0, &errhandler_ref); /* see if debug is requested */ if (NULL != (evar = getenv("PMIX_DEBUG"))) { @@ -631,7 +633,7 @@ pmix_status_t PMIx_Put(pmix_scope_t scope, const char key[], pmix_value_t *val) cb->value = val; /* pass this into the event library for thread protection */ - PMIX_THREAD_SHIFT(cb, _putfn); + PMIX_THREADSHIFT(cb, _putfn); /* wait for the result */ PMIX_WAIT_FOR_COMPLETION(cb->active); @@ -715,7 +717,7 @@ pmix_status_t PMIx_Commit(void) cb->active = true; /* pass this into the event library for thread protection */ - PMIX_THREAD_SHIFT(cb, _commitfn); + PMIX_THREADSHIFT(cb, _commitfn); /* wait for the result */ PMIX_WAIT_FOR_COMPLETION(cb->active); @@ -792,7 +794,7 @@ pmix_status_t PMIx_Resolve_peers(const char *nodename, const char *nspace, } /* pass this into the event library for thread protection */ - PMIX_THREAD_SHIFT(cb, _peersfn); + PMIX_THREADSHIFT(cb, _peersfn); /* wait for the result */ PMIX_WAIT_FOR_COMPLETION(cb->active); @@ -850,7 +852,7 @@ pmix_status_t PMIx_Resolve_nodes(const char *nspace, char **nodelist) } /* pass this into the event library for thread protection */ - PMIX_THREAD_SHIFT(cb, _nodesfn); + PMIX_THREADSHIFT(cb, _nodesfn); /* wait for the result */ PMIX_WAIT_FOR_COMPLETION(cb->active); @@ -1297,51 +1299,50 @@ static void regevents_cbfunc(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr, PMIX_RELEASE(cb); } -void pmix_client_register_errhandler(pmix_info_t info[], size_t ninfo, - pmix_notification_fn_t errhandler, - pmix_errhandler_reg_cbfunc_t cbfunc, - void *cbdata) +static void reg_errhandler(int sd, short args, void *cbdata) { /* add err handler, process info keys and register for events and call the callback */ + pmix_shift_caddy_t *cd = (pmix_shift_caddy_t*)cbdata; int index = 0; pmix_buffer_t *msg; pmix_cb_t *cb; pmix_status_t rc; pmix_output_verbose(2, pmix_globals.debug_output, - "pmix: register errhandler with %d infos", (int)ninfo); + "pmix: register errhandler with %d infos", (int)cd->ninfo); /* check if this handler is already registered if so return error */ - if (PMIX_EXISTS == (rc = pmix_lookup_errhandler(info, ninfo, &index))) { + if (PMIX_EXISTS == (rc = pmix_lookup_errhandler(cd->info, cd->ninfo, &index))) { /* complete request with error status and return its original reference */ pmix_output_verbose(2, pmix_globals.debug_output, "pmix: register errhandler - already registered"); - cbfunc(PMIX_EXISTS, index, cbdata); + cd->cbfunc.errregcbfn(PMIX_EXISTS, index, cd->cbdata); } else if (PMIX_ERR_GRP_FOUND == rc) { /* just acknowledge it */ - cbfunc(PMIX_SUCCESS, index, cbdata); - } else if (PMIX_ERR_DFLT_FOUND == rc && NULL == info) { + cd->cbfunc.errregcbfn(PMIX_SUCCESS, index, cd->cbdata); + } else if (PMIX_ERR_DFLT_FOUND == rc && NULL == cd->info) { /* if they are registering a default errhandler, then * overwrite the existing one with it - the index will * contain its location */ - pmix_add_errhandler(errhandler, info, ninfo, &index); + rc = pmix_add_errhandler(cd->err, cd->info, cd->ninfo, &index); + cd->cbfunc.errregcbfn(rc, index, cd->cbdata); } else { /* need to add this errhandler */ - if (PMIX_SUCCESS != (rc = pmix_add_errhandler(errhandler, info, ninfo, &index))) { + if (PMIX_SUCCESS != (rc = pmix_add_errhandler(cd->err, cd->info, cd->ninfo, &index))) { pmix_output_verbose(2, pmix_globals.debug_output, "pmix: register errhandler - error status rc=%d", rc); /* complete request with error*/ - cbfunc(rc, index, cbdata); + cd->cbfunc.errregcbfn(rc, index, cd->cbdata); } else { pmix_output_verbose(10, pmix_globals.debug_output, - "pmix: register errhandler - added index=%d, ninfo =%lu", index, ninfo); + "pmix: register errhandler - added index=%d, ninfo =%lu", index, cd->ninfo); msg = PMIX_NEW(pmix_buffer_t); - if (PMIX_SUCCESS != (rc = pack_regevents(msg, PMIX_REGEVENTS_CMD, info, ninfo))) { + if (PMIX_SUCCESS != (rc = pack_regevents(msg, PMIX_REGEVENTS_CMD, cd->info, cd->ninfo))) { pmix_output_verbose(2, pmix_globals.debug_output, "pmix: register errhandler - pack events failed status=%d", rc); PMIX_RELEASE(msg); pmix_remove_errhandler(index); - cbfunc(PMIX_ERR_PACK_FAILURE, -1, cbdata); + cd->cbfunc.errregcbfn(PMIX_ERR_PACK_FAILURE, -1, cd->cbdata); } else { /* create a callback object as we need to pass it to the * recv routine so we know which callback to use when @@ -1349,8 +1350,8 @@ void pmix_client_register_errhandler(pmix_info_t info[], size_t ninfo, pmix_output_verbose(10, pmix_globals.debug_output, "pmix: register errhandler - pack events success status=%d", rc); cb = PMIX_NEW(pmix_cb_t); - cb->errreg_cbfunc = cbfunc; - cb->cbdata = cbdata; + cb->errreg_cbfunc = cd->cbfunc.errregcbfn; + cb->cbdata = cd->cbdata; cb->errhandler_ref = index; /* push the message into our event base to send to the server */ PMIX_ACTIVATE_SEND_RECV(&pmix_client_globals.myserver, msg, regevents_cbfunc, cb); @@ -1359,6 +1360,28 @@ void pmix_client_register_errhandler(pmix_info_t info[], size_t ninfo, } } +void pmix_client_register_errhandler(pmix_info_t info[], size_t ninfo, + pmix_notification_fn_t errhandler, + pmix_errhandler_reg_cbfunc_t cbfunc, + void *cbdata) +{ + pmix_shift_caddy_t *cd; + + /* need to thread shift this request */ + cd = PMIX_NEW(pmix_shift_caddy_t); + cd->info = info; + cd->ninfo = ninfo; + cd->err = errhandler; + cd->cbfunc.errregcbfn = cbfunc; + cd->cbdata = cbdata; + + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix_client_register_errhandler shifting to server thread"); + + PMIX_THREADSHIFT(cd, reg_errhandler); +} + + static void deregevents_cbfunc(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr, pmix_buffer_t *buf, void *cbdata) { @@ -1388,41 +1411,55 @@ static void deregevents_cbfunc(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr, PMIX_RELEASE(cb); } -void pmix_client_deregister_errhandler(int errhandler_ref, - pmix_op_cbfunc_t cbfunc, - void *cbdata) +static void dereg_errhandler(int sd, short args, void *cbdata) { + pmix_shift_caddy_t *cd = (pmix_shift_caddy_t*)cbdata; pmix_status_t rc; pmix_error_reg_info_t *errreg; pmix_buffer_t *msg; pmix_cb_t *cb; pmix_output_verbose(2, pmix_globals.debug_output, - "pmix_client_deregister_errhandler errhandler_ref = %d", errhandler_ref); + "pmix_client_deregister_errhandler errhandler_ref = %d", cd->ref); - errreg = (pmix_error_reg_info_t *)pmix_pointer_array_get_item(&pmix_globals.errregs, errhandler_ref); + errreg = (pmix_error_reg_info_t *)pmix_pointer_array_get_item(&pmix_globals.errregs, cd->ref); if (NULL != errreg ) { msg = PMIX_NEW(pmix_buffer_t); if (PMIX_SUCCESS != (rc = pack_regevents(msg, PMIX_DEREGEVENTS_CMD, errreg->info, errreg->ninfo))) { PMIX_RELEASE(msg); - pmix_remove_errhandler(errhandler_ref); - cbfunc(PMIX_ERR_PACK_FAILURE, cbdata); + pmix_remove_errhandler(cd->ref); + cd->cbfunc.opcbfn(PMIX_ERR_PACK_FAILURE, cd->cbdata); } else { /* create a callback object as we need to pass it to the * recv routine so we know which callback to use when * the server acks/nacks the register events request*/ cb = PMIX_NEW(pmix_cb_t); - cb->op_cbfunc = cbfunc; - cb->cbdata = cbdata; - cb->errhandler_ref = errhandler_ref; + cb->op_cbfunc = cd->cbfunc.opcbfn; + cb->cbdata = cd->cbdata; + cb->errhandler_ref = cd->ref; /* push the message into our event base to send to the server */ PMIX_ACTIVATE_SEND_RECV(&pmix_client_globals.myserver, msg, deregevents_cbfunc, cb); } } else { - cbfunc(PMIX_ERR_NOT_FOUND, cbdata); + cd->cbfunc.opcbfn(PMIX_ERR_NOT_FOUND, cd->cbdata); } + OBJ_RELEASE(cd); } +void pmix_client_deregister_errhandler(int errhandler_ref, + pmix_op_cbfunc_t cbfunc, + void *cbdata) +{ + pmix_shift_caddy_t *cd; + + /* need to thread shift this request */ + cd = PMIX_NEW(pmix_shift_caddy_t); + cd->cbfunc.opcbfn = cbfunc; + cd->cbdata = cbdata; + cd->ref = errhandler_ref; + PMIX_THREADSHIFT(cd, dereg_errhandler); + } + static void notifyerror_cbfunc(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr, pmix_buffer_t *buf, void *cbdata) { diff --git a/opal/mca/pmix/pmix120/pmix/src/client/pmix_client_get.c b/opal/mca/pmix/pmix120/pmix/src/client/pmix_client_get.c index 8ff0d84081..36e21c1314 100644 --- a/opal/mca/pmix/pmix120/pmix/src/client/pmix_client_get.c +++ b/opal/mca/pmix/pmix120/pmix/src/client/pmix_client_get.c @@ -164,7 +164,7 @@ pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const char *key, cb->ninfo = ninfo; cb->value_cbfunc = cbfunc; cb->cbdata = cbdata; - PMIX_THREAD_SHIFT(cb, _getnbfn); + PMIX_THREADSHIFT(cb, _getnbfn); return PMIX_SUCCESS; } diff --git a/opal/mca/pmix/pmix120/pmix/src/include/pmix_globals.c b/opal/mca/pmix/pmix120/pmix/src/include/pmix_globals.c index da332c752a..0b7dc50176 100644 --- a/opal/mca/pmix/pmix120/pmix/src/include/pmix_globals.c +++ b/opal/mca/pmix/pmix120/pmix/src/include/pmix_globals.c @@ -175,3 +175,22 @@ static void errdes(pmix_error_reg_info_t *p) PMIX_CLASS_INSTANCE(pmix_error_reg_info_t, pmix_object_t, errcon, errdes); + +static void scon(pmix_shift_caddy_t *p) +{ + p->active = false; + p->kv = NULL; + p->cbfunc.relfn = NULL; + p->cbfunc.errregcbfn = NULL; + p->cbfunc.opcbfn = NULL; + p->cbdata = NULL; +} +static void scdes(pmix_shift_caddy_t *p) +{ + if (NULL != p->kv) { + PMIX_RELEASE(p->kv); + } +} +PMIX_CLASS_INSTANCE(pmix_shift_caddy_t, + pmix_object_t, + scon, scdes); diff --git a/opal/mca/pmix/pmix120/pmix/src/include/pmix_globals.h b/opal/mca/pmix/pmix120/pmix/src/include/pmix_globals.h index 3f7b83fd0d..37a1d47fc7 100644 --- a/opal/mca/pmix/pmix120/pmix/src/include/pmix_globals.h +++ b/opal/mca/pmix/pmix120/pmix/src/include/pmix_globals.h @@ -38,38 +38,96 @@ BEGIN_C_DECLS -#define PMIX_MAX_CRED_SIZE 131072 // set max at 128kbytes -#define PMIX_MAX_ERROR_REGISTRATIONS 5 // maximum number of error handlers that can be registered +#define PMIX_MAX_CRED_SIZE 131072 // set max at 128kbytes +#define PMIX_MAX_ERROR_REGISTRATIONS 128 // maximum number of error handlers that can be registered -/* define a structure for tracking error registrations */ -typedef struct { - pmix_object_t super; - bool sglhdlr; // registers a specific error status handler - pmix_notification_fn_t errhandler; /* registered err handler callback fn */ - pmix_info_t *info; /* error info keys registered with the handler */ - size_t ninfo; /* size of info */ -} pmix_error_reg_info_t; -PMIX_CLASS_DECLARATION(pmix_error_reg_info_t); +/**** ENUM DEFINITIONS ****/ +/* define a command type for communicating to the + * pmix server */ +#define PMIX_CMD PMIX_UINT32 -/* define a global construct that includes values that must be shared - * between various parts of the code library. Both the client - * and server libraries must instance this structure */ +/* define some commands */ +typedef enum { + PMIX_REQ_CMD, + PMIX_ABORT_CMD, + PMIX_COMMIT_CMD, + PMIX_FENCENB_CMD, + PMIX_GETNB_CMD, + PMIX_FINALIZE_CMD, + PMIX_PUBLISHNB_CMD, + PMIX_LOOKUPNB_CMD, + PMIX_UNPUBLISHNB_CMD, + PMIX_SPAWNNB_CMD, + PMIX_CONNECTNB_CMD, + PMIX_DISCONNECTNB_CMD, + PMIX_NOTIFY_CMD, + PMIX_REGEVENTS_CMD, + PMIX_DEREGEVENTS_CMD, +} pmix_cmd_t; + +/* define a set of flags to direct collection + * of data during operations */ +typedef enum { + PMIX_COLLECT_INVALID = -1, + PMIX_COLLECT_NO, + PMIX_COLLECT_YES, + PMIX_COLLECT_MAX +} pmix_collect_t; + + +/**** MESSAGING STRUCTURES ****/ +/* header for messages */ typedef struct { - int init_cntr; // #times someone called Init - #times called Finalize - pmix_proc_t myid; - uid_t uid; // my effective uid - gid_t gid; // my effective gid int pindex; - pmix_event_base_t *evbase; - int debug_output; - pmix_pointer_array_t errregs; // my error handler registrations. - bool server; - bool connected; - pmix_list_t nspaces; // list of pmix_nspace_t for the nspaces we know about - pmix_buffer_t *cache_local; // data PUT by me to local scope - pmix_buffer_t *cache_remote; // data PUT by me to remote scope -} pmix_globals_t; + uint32_t tag; + size_t nbytes; +} pmix_usock_hdr_t; +/* internally used object for transferring data + * to/from the server and for storing in the + * hash tables */ +typedef struct { + pmix_list_item_t super; + char *key; + pmix_value_t *value; +} pmix_kval_t; +PMIX_CLASS_DECLARATION(pmix_kval_t); + +// forward declaration +struct pmix_peer_t; + +/* internally used cbfunc */ +typedef void (*pmix_usock_cbfunc_t)(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr, + pmix_buffer_t *buf, void *cbdata); + +/* usock structure for sending a message */ +typedef struct { + pmix_list_item_t super; + pmix_event_t ev; + pmix_usock_hdr_t hdr; + pmix_buffer_t *data; + bool hdr_sent; + char *sdptr; + size_t sdbytes; +} pmix_usock_send_t; +PMIX_CLASS_DECLARATION(pmix_usock_send_t); + +/* usock structure for recving a message */ +typedef struct { + pmix_list_item_t super; + pmix_event_t ev; + struct pmix_peer_t *peer; + int sd; + pmix_usock_hdr_t hdr; + char *data; + bool hdr_recvd; + char *rdptr; + size_t rdbytes; +} pmix_usock_recv_t; +PMIX_CLASS_DECLARATION(pmix_usock_recv_t); + + +/**** PEER STRUCTURES ****/ /* objects for tracking active nspaces */ typedef struct { pmix_object_t super; @@ -105,6 +163,39 @@ typedef struct pmix_rank_info_t { } pmix_rank_info_t; PMIX_CLASS_DECLARATION(pmix_rank_info_t); +/* object for tracking peers - each peer can have multiple + * connections. This can occur if the initial app executes + * a fork/exec, and the child initiates its own connection + * back to the PMIx server. Thus, the trackers should be "indexed" + * by the socket, not the process nspace/rank */ +typedef struct pmix_peer_t { + pmix_object_t super; + pmix_rank_info_t *info; + int proc_cnt; + void *server_object; + int index; + int sd; + pmix_event_t send_event; /**< registration with event thread for send events */ + bool send_ev_active; + pmix_event_t recv_event; /**< registration with event thread for recv events */ + bool recv_ev_active; + pmix_list_t send_queue; /**< list of messages to send */ + pmix_usock_send_t *send_msg; /**< current send in progress */ + pmix_usock_recv_t *recv_msg; /**< current recv in progress */ +} pmix_peer_t; +PMIX_CLASS_DECLARATION(pmix_peer_t); + + +/* define a structure for tracking error registrations */ +typedef struct { + pmix_object_t super; + bool sglhdlr; // registers a specific error status handler + pmix_notification_fn_t errhandler; /* registered err handler callback fn */ + pmix_info_t *info; /* error info keys registered with the handler */ + size_t ninfo; /* size of info */ +} pmix_error_reg_info_t; +PMIX_CLASS_DECLARATION(pmix_error_reg_info_t); + typedef struct { pmix_list_item_t super; char *name; // name of the node @@ -112,6 +203,112 @@ typedef struct { } pmix_nrec_t; PMIX_CLASS_DECLARATION(pmix_nrec_t); +/* define an object for moving a send + * request into the server's event base */ +typedef struct { + pmix_object_t super; + int sd; +} pmix_snd_caddy_t; +PMIX_CLASS_DECLARATION(pmix_snd_caddy_t); + +/* define an object for moving a send + * request into the server's event base */ +typedef struct { + pmix_list_item_t super; + pmix_usock_hdr_t hdr; + pmix_peer_t *peer; + pmix_snd_caddy_t snd; +} pmix_server_caddy_t; +PMIX_CLASS_DECLARATION(pmix_server_caddy_t); + +/* define a tracker for collective operations */ +typedef struct { + pmix_list_item_t super; + pmix_cmd_t type; + pmix_proc_t *pcs; // copy of the original array of participants + size_t npcs; // number of procs in the array + volatile bool active; // flag for waiting for completion + bool def_complete; // all local procs have been registered and the trk definition is complete + pmix_list_t ranks; // list of pmix_rank_info_t of the local participants + pmix_list_t local_cbs; // list of pmix_server_caddy_t for sending result to the local participants + uint32_t nlocal; // number of local participants + uint32_t local_cnt; // number of local participants who have contributed + pmix_info_t *info; // array of info structs + size_t ninfo; // number of info structs in array + pmix_collect_t collect_type; // whether or not data is to be returned at completion + pmix_modex_cbfunc_t modexcbfunc; + pmix_op_cbfunc_t op_cbfunc; +} pmix_server_trkr_t; +PMIX_CLASS_DECLARATION(pmix_server_trkr_t); + + +/**** THREAD-RELATED ****/ + /* define a caddy for thread-shifting operations */ + typedef struct { + pmix_object_t super; + pmix_event_t ev; + volatile bool active; + pmix_status_t status; + const char *nspace; + int rank; + const char *data; + size_t ndata; + const char *key; + pmix_info_t *info; + size_t ninfo; + pmix_notification_fn_t err; + pmix_kval_t *kv; + pmix_value_t *vptr; + pmix_server_caddy_t *cd; + pmix_server_trkr_t *tracker; + union { + pmix_release_cbfunc_t relfn; + pmix_errhandler_reg_cbfunc_t errregcbfn; + pmix_op_cbfunc_t opcbfn; + }cbfunc; + void *cbdata; + int ref; + } pmix_shift_caddy_t; +PMIX_CLASS_DECLARATION(pmix_shift_caddy_t); + +#define PMIX_THREADSHIFT(r, c) \ + do { \ + (r)->active = true; \ + event_assign(&((r)->ev), pmix_globals.evbase, \ + -1, EV_WRITE, (c), (r)); \ + event_active(&((r)->ev), EV_WRITE, 1); \ +} while(0); + + +#define PMIX_WAIT_FOR_COMPLETION(a) \ + do { \ + while ((a)) { \ + usleep(10); \ + } \ + } while (0); + + +/**** GLOBAL STORAGE ****/ +/* define a global construct that includes values that must be shared + * between various parts of the code library. Both the client + * and server libraries must instance this structure */ +typedef struct { + int init_cntr; // #times someone called Init - #times called Finalize + pmix_proc_t myid; + uid_t uid; // my effective uid + gid_t gid; // my effective gid + int pindex; + pmix_event_base_t *evbase; + int debug_output; + pmix_pointer_array_t errregs; // my error handler registrations. + bool server; + bool connected; + pmix_list_t nspaces; // list of pmix_nspace_t for the nspaces we know about + pmix_buffer_t *cache_local; // data PUT by me to local scope + pmix_buffer_t *cache_remote; // data PUT by me to remote scope +} pmix_globals_t; + + /* initialize the pmix_global structure */ void pmix_globals_init(void); diff --git a/opal/mca/pmix/pmix120/pmix/src/server/pmix_server.c b/opal/mca/pmix/pmix120/pmix/src/server/pmix_server.c index 706451d317..1bb911db87 100644 --- a/opal/mca/pmix/pmix120/pmix/src/server/pmix_server.c +++ b/opal/mca/pmix/pmix120/pmix/src/server/pmix_server.c @@ -80,63 +80,6 @@ PMIX_CLASS_INSTANCE(pmix_usock_queue_t, pmix_object_t, NULL, NULL); -/* define a caddy for thread-shifting operations when - * the host server executes a callback to us */ - typedef struct { - pmix_object_t super; - pmix_event_t ev; - volatile bool active; - pmix_status_t status; - const char *nspace; - int rank; - const char *data; - size_t ndata; - const char *key; - pmix_info_t *info; - size_t ninfo; - pmix_notification_fn_t err; - pmix_kval_t *kv; - pmix_value_t *vptr; - pmix_server_caddy_t *cd; - pmix_server_trkr_t *tracker; - union { - pmix_release_cbfunc_t relfn; - pmix_errhandler_reg_cbfunc_t errregcbfn; - pmix_op_cbfunc_t opcbfn; - }cbfunc; - void *cbdata; - int ref; - } pmix_shift_caddy_t; -static void scon(pmix_shift_caddy_t *p) -{ - p->active = false; - p->kv = NULL; - p->cbfunc.relfn = NULL; - p->cbfunc.errregcbfn = NULL; - p->cbfunc.opcbfn = NULL; - p->cbdata = NULL; -} -static void scdes(pmix_shift_caddy_t *p) -{ - if (NULL != p->kv) { - PMIX_RELEASE(p->kv); - } -} -PMIX_CLASS_INSTANCE(pmix_shift_caddy_t, - pmix_object_t, - scon, scdes); - - - #define PMIX_THREADSHIFT(r, c) \ - do { \ - (r)->active = true; \ - event_assign(&((r)->ev), pmix_globals.evbase, \ - -1, EV_WRITE, (c), (r)); \ - event_priority_set(&((r)->ev), 0); \ - event_active(&((r)->ev), EV_WRITE, 1); \ -} while(0); - - /* queue a message to be sent to one of our procs - must * provide the following params: * @@ -1336,12 +1279,12 @@ static void dereg_errhandler(int sd, short args, void *cbdata) if (NULL != cd->cbfunc.opcbfn) { cd->cbfunc.opcbfn(rc, cd->cbdata); } - cd->active = false; + OBJ_RELEASE(cd); } void pmix_server_deregister_errhandler(int errhandler_ref, - pmix_op_cbfunc_t cbfunc, - void *cbdata) + pmix_op_cbfunc_t cbfunc, + void *cbdata) { pmix_shift_caddy_t *cd; @@ -1351,9 +1294,6 @@ void pmix_server_deregister_errhandler(int errhandler_ref, cd->cbdata = cbdata; cd->ref = errhandler_ref; PMIX_THREADSHIFT(cd, dereg_errhandler); - - PMIX_WAIT_FOR_COMPLETION(cd->active); - PMIX_RELEASE(cd); } static void _store_internal(int sd, short args, void *cbdata) diff --git a/opal/mca/pmix/pmix120/pmix/src/server/pmix_server_ops.h b/opal/mca/pmix/pmix120/pmix/src/server/pmix_server_ops.h index fe0d866269..99b636fb50 100644 --- a/opal/mca/pmix/pmix120/pmix/src/server/pmix_server_ops.h +++ b/opal/mca/pmix/pmix120/pmix/src/server/pmix_server_ops.h @@ -20,52 +20,6 @@ #include "src/usock/usock.h" #include "src/util/hash.h" -/* define an object for moving a send - * request into the server's event base */ -typedef struct { - pmix_object_t super; - int sd; -} pmix_snd_caddy_t; -PMIX_CLASS_DECLARATION(pmix_snd_caddy_t); - - -/* define an object for moving a send - * request into the server's event base */ -typedef struct { - pmix_list_item_t super; - pmix_usock_hdr_t hdr; - pmix_peer_t *peer; - pmix_snd_caddy_t snd; -} pmix_server_caddy_t; -PMIX_CLASS_DECLARATION(pmix_server_caddy_t); - -typedef enum { - PMIX_COLLECT_INVALID = -1, - PMIX_COLLECT_NO, - PMIX_COLLECT_YES, - PMIX_COLLECT_MAX -} pmix_collect_t; - -/* define a tracker for collective operations */ -typedef struct { - pmix_list_item_t super; - pmix_cmd_t type; - pmix_proc_t *pcs; // copy of the original array of participants - size_t npcs; // number of procs in the array - volatile bool active; // flag for waiting for completion - bool def_complete; // all local procs have been registered and the trk definition is complete - pmix_list_t ranks; // list of pmix_rank_info_t of the local participants - pmix_list_t local_cbs; // list of pmix_server_caddy_t for sending result to the local participants - uint32_t nlocal; // number of local participants - uint32_t local_cnt; // number of local participants who have contributed - pmix_info_t *info; // array of info structs - size_t ninfo; // number of info structs in array - pmix_collect_t collect_type; // whether or not data is to be returned at completion - pmix_modex_cbfunc_t modexcbfunc; - pmix_op_cbfunc_t op_cbfunc; -} pmix_server_trkr_t; -PMIX_CLASS_DECLARATION(pmix_server_trkr_t); - typedef struct { pmix_object_t super; pmix_event_t ev; @@ -135,7 +89,7 @@ typedef struct { pmix_object_t super; pmix_event_t ev; int sd; - struct sockaddr addr; + struct sockaddr_storage addr; } pmix_pending_connection_t; PMIX_CLASS_DECLARATION(pmix_pending_connection_t); diff --git a/opal/mca/pmix/pmix120/pmix/src/usock/usock.h b/opal/mca/pmix/pmix120/pmix/src/usock/usock.h index bbb67febb6..9966bd9133 100644 --- a/opal/mca/pmix/pmix120/pmix/src/usock/usock.h +++ b/opal/mca/pmix/pmix120/pmix/src/usock/usock.h @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Artem Y. Polyakov . * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science @@ -54,73 +54,12 @@ #endif #include PMIX_EVENT_HEADER +#include "src/include/pmix_globals.h" #include "src/buffer_ops/buffer_ops.h" #include "src/class/pmix_hash_table.h" #include "src/class/pmix_list.h" -/* define a command type for communicating to the - * pmix server */ -#define PMIX_CMD PMIX_UINT32 -/* define some commands */ -typedef enum { - PMIX_REQ_CMD, - PMIX_ABORT_CMD, - PMIX_COMMIT_CMD, - PMIX_FENCENB_CMD, - PMIX_GETNB_CMD, - PMIX_FINALIZE_CMD, - PMIX_PUBLISHNB_CMD, - PMIX_LOOKUPNB_CMD, - PMIX_UNPUBLISHNB_CMD, - PMIX_SPAWNNB_CMD, - PMIX_CONNECTNB_CMD, - PMIX_DISCONNECTNB_CMD, - PMIX_NOTIFY_CMD, - PMIX_REGEVENTS_CMD, - PMIX_DEREGEVENTS_CMD, -} pmix_cmd_t; - - -/* header for messages */ -typedef struct { - int pindex; - uint32_t tag; - size_t nbytes; -} pmix_usock_hdr_t; - -// forward declaration -struct pmix_peer_t; - -/* internally used cbfunc */ -typedef void (*pmix_usock_cbfunc_t)(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr, - pmix_buffer_t *buf, void *cbdata); - -/* usock structure for sending a message */ -typedef struct { - pmix_list_item_t super; - pmix_event_t ev; - pmix_usock_hdr_t hdr; - pmix_buffer_t *data; - bool hdr_sent; - char *sdptr; - size_t sdbytes; -} pmix_usock_send_t; -PMIX_CLASS_DECLARATION(pmix_usock_send_t); - -/* usock structure for recving a message */ -typedef struct { - pmix_list_item_t super; - pmix_event_t ev; - struct pmix_peer_t *peer; - int sd; - pmix_usock_hdr_t hdr; - char *data; - bool hdr_recvd; - char *rdptr; - size_t rdbytes; -} pmix_usock_recv_t; -PMIX_CLASS_DECLARATION(pmix_usock_recv_t); /* usock structure for tracking posted recvs */ typedef struct { @@ -132,28 +71,6 @@ typedef struct { } pmix_usock_posted_recv_t; PMIX_CLASS_DECLARATION(pmix_usock_posted_recv_t); -/* object for tracking peers - each peer can have multiple - * connections. This can occur if the initial app executes - * a fork/exec, and the child initiates its own connection - * back to the PMIx server. Thus, the trackers should be "indexed" - * by the socket, not the process nspace/rank */ -typedef struct pmix_peer_t { - pmix_object_t super; - pmix_rank_info_t *info; - int proc_cnt; - void *server_object; - int index; - int sd; - pmix_event_t send_event; /**< registration with event thread for send events */ - bool send_ev_active; - pmix_event_t recv_event; /**< registration with event thread for recv events */ - bool recv_ev_active; - pmix_list_t send_queue; /**< list of messages to send */ - pmix_usock_send_t *send_msg; /**< current send in progress */ - pmix_usock_recv_t *recv_msg; /**< current recv in progress */ -} pmix_peer_t; -PMIX_CLASS_DECLARATION(pmix_peer_t); - /* usock struct for posting send/recv request */ typedef struct { pmix_object_t super; @@ -193,15 +110,6 @@ typedef struct { } pmix_cb_t; PMIX_CLASS_DECLARATION(pmix_cb_t); -/* an internal macro for shifting incoming requests - * to the internal event thread */ -#define PMIX_THREAD_SHIFT(c, f) \ - do { \ - event_assign(&((c)->ev), pmix_globals.evbase, -1, \ - EV_WRITE, (f), (c)); \ - event_active(&((c)->ev), EV_WRITE, 1); \ - } while(0); - typedef struct { pmix_object_t super; pmix_event_t ev; @@ -249,13 +157,6 @@ PMIX_CLASS_DECLARATION(pmix_timer_t); } while(0) -#define PMIX_WAIT_FOR_COMPLETION(a) \ - do { \ - while ((a)) { \ - usleep(10); \ - } \ - } while (0); - #define PMIX_TIMER_EVENT(s, f, d) \ do { \ pmix_timer_t *tm; \ diff --git a/opal/mca/pmix/pmix120/pmix/src/usock/usock_sendrecv.c b/opal/mca/pmix/pmix120/pmix/src/usock/usock_sendrecv.c index d6f8764258..1c7fa48206 100644 --- a/opal/mca/pmix/pmix120/pmix/src/usock/usock_sendrecv.c +++ b/opal/mca/pmix/pmix120/pmix/src/usock/usock_sendrecv.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014 Artem Y. Polyakov . * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science @@ -37,12 +37,44 @@ #endif #include "src/include/pmix_globals.h" +#include "src/server/pmix_server_ops.h" #include "src/util/error.h" #include "usock.h" static uint32_t current_tag = 1; // 0 is reserved for system purposes +static void lost_connection(pmix_peer_t *peer, pmix_status_t err) +{ + /* stop all events */ + if (peer->recv_ev_active) { + event_del(&peer->recv_event); + peer->recv_ev_active = false; + } + if (peer->send_ev_active) { + event_del(&peer->send_event); + peer->send_ev_active = false; + } + if (NULL != peer->recv_msg) { + PMIX_RELEASE(peer->recv_msg); + peer->recv_msg = NULL; + } + CLOSE_THE_SOCKET(peer->sd); + if (pmix_globals.server) { + /* if I am a server, then we need to + * do some cleanup as the client has + * left us */ + pmix_pointer_array_set_item(&pmix_server_globals.clients, + peer->index, NULL); + PMIX_RELEASE(peer); + } else { + /* if I am a client, there is only + * one connection we can have */ + pmix_globals.connected = false; + } + PMIX_REPORT_ERROR(err); +} + static pmix_status_t send_bytes(int sd, char **buf, size_t *remain) { pmix_status_t ret = PMIX_SUCCESS; @@ -183,8 +215,7 @@ void pmix_usock_send_handler(int sd, short flags, void *cbdata) peer->send_ev_active = false; PMIX_RELEASE(msg); peer->send_msg = NULL; - CLOSE_THE_SOCKET(peer->sd); - PMIX_REPORT_ERROR(rc); + lost_connection(peer, rc); return; } } @@ -212,8 +243,7 @@ void pmix_usock_send_handler(int sd, short flags, void *cbdata) peer->send_ev_active = false; PMIX_RELEASE(msg); peer->send_msg = NULL; - CLOSE_THE_SOCKET(peer->sd); - PMIX_REPORT_ERROR(rc); + lost_connection(peer, rc); return; } } @@ -357,8 +387,7 @@ void pmix_usock_recv_handler(int sd, short flags, void *cbdata) PMIX_RELEASE(peer->recv_msg); peer->recv_msg = NULL; } - CLOSE_THE_SOCKET(peer->sd); - PMIX_REPORT_ERROR(PMIX_ERR_UNREACH); + lost_connection(peer, PMIX_ERR_UNREACH); } void pmix_usock_send_recv(int fd, short args, void *cbdata) diff --git a/opal/mca/pmix/pmix120/pmix_pmix120_component.c b/opal/mca/pmix/pmix120/pmix_pmix120_component.c index f02027af40..eb89ac1524 100644 --- a/opal/mca/pmix/pmix120/pmix_pmix120_component.c +++ b/opal/mca/pmix/pmix120/pmix_pmix120_component.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -97,12 +97,12 @@ static int pmix120_component_query(mca_base_module_t **module, int *priority) if (NULL != (t = getenv("PMIX_SERVER_URI")) || NULL != (id = getenv("PMIX_ID"))) { /* if PMIx is present, then we are a client and need to use it, - * but only if we are requested */ - *priority = 5; + * and we are now the default */ + *priority = 100; } else { /* we could be a server, so we still need to be considered, - * but only if requested */ - *priority = 2; + * and we are now the default */ + *priority = 15; } *module = (mca_base_module_t *)&opal_pmix_pmix120_module; return OPAL_SUCCESS; diff --git a/orte/mca/ess/base/ess_base_fns.c b/orte/mca/ess/base/ess_base_fns.c index ab12172043..1458ca56f8 100644 --- a/orte/mca/ess/base/ess_base_fns.c +++ b/orte/mca/ess/base/ess_base_fns.c @@ -66,6 +66,9 @@ int orte_ess_base_proc_binding(void) goto error; } } + } else if (NULL != getenv(OPAL_MCA_PREFIX"orte_externally_bound")) { + orte_proc_is_bound = true; + /* the topology system will pickup the binding pattern */ } /* see if we were bound when launched */ diff --git a/orte/mca/ess/base/ess_base_std_orted.c b/orte/mca/ess/base/ess_base_std_orted.c index a456ddf398..555692f6c6 100644 --- a/orte/mca/ess/base/ess_base_std_orted.c +++ b/orte/mca/ess/base/ess_base_std_orted.c @@ -62,7 +62,6 @@ #include "orte/mca/snapc/base/base.h" #include "orte/mca/sstore/base/base.h" #endif -#include "orte/mca/schizo/base/base.h" #include "orte/mca/filem/base/base.h" #include "orte/util/proc_info.h" #include "orte/util/session_dir.h" @@ -512,7 +511,7 @@ int orte_ess_base_orted_setup(char **hosts) /* setup the PMIx framework - ensure it skips all non-PMIx components, * but do not override anything we were given */ - opal_setenv("OMPI_MCA_pmix", "^s1,s2,cray", false, &environ); + opal_setenv("OMPI_MCA_pmix", "^s1,s2,cray,isolated", false, &environ); if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_pmix_base_open"; @@ -612,17 +611,7 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_dfs_select"; goto error; } - /* setup the SCHIZO framework */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_schizo_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_schizo_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_schizo_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_schizo_select"; - goto error; - } + return ORTE_SUCCESS; error: orte_show_help("help-orte-runtime.txt", @@ -654,7 +643,6 @@ int orte_ess_base_orted_finalize(void) (void) mca_base_framework_close(&opal_pmix_base_framework); /* close frameworks */ - (void) mca_base_framework_close(&orte_schizo_base_framework); (void) mca_base_framework_close(&orte_filem_base_framework); (void) mca_base_framework_close(&orte_grpcomm_base_framework); (void) mca_base_framework_close(&orte_iof_base_framework); @@ -674,6 +662,8 @@ int orte_ess_base_orted_finalize(void) orte_session_dir_finalize(ORTE_PROC_MY_NAME); /* ensure we scrub the session directory tree */ orte_session_dir_cleanup(ORTE_JOBID_WILDCARD); + /* release the job hash table */ + OBJ_RELEASE(orte_job_data); return ORTE_SUCCESS; } diff --git a/orte/mca/ess/base/ess_base_std_tool.c b/orte/mca/ess/base/ess_base_std_tool.c index ea60dc72d8..607643914e 100644 --- a/orte/mca/ess/base/ess_base_std_tool.c +++ b/orte/mca/ess/base/ess_base_std_tool.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014 Hochschule Esslingen. All rights reserved. * * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. @@ -52,7 +52,6 @@ #include "orte/mca/snapc/base/base.h" #include "orte/mca/sstore/base/base.h" #endif -#include "orte/mca/schizo/base/base.h" #include "orte/util/proc_info.h" #include "orte/util/session_dir.h" #include "orte/util/show_help.h" @@ -230,18 +229,6 @@ int orte_ess_base_tool_setup(void) opal_cr_set_enabled(false); #endif - /* setup schizo in case we are parsing cmd lines */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_schizo_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_schizo_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_schizo_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_schizo_base_select"; - goto error; - } - return ORTE_SUCCESS; error: @@ -270,7 +257,6 @@ int orte_ess_base_tool_finalize(void) } (void) mca_base_framework_close(&orte_routed_base_framework); (void) mca_base_framework_close(&orte_rml_base_framework); - (void) mca_base_framework_close(&orte_schizo_base_framework); (void) mca_base_framework_close(&orte_errmgr_base_framework); return ORTE_SUCCESS; diff --git a/orte/mca/ess/hnp/ess_hnp_module.c b/orte/mca/ess/hnp/ess_hnp_module.c index c6f472a6c5..7b5d2c1067 100644 --- a/orte/mca/ess/hnp/ess_hnp_module.c +++ b/orte/mca/ess/hnp/ess_hnp_module.c @@ -74,7 +74,6 @@ #include "orte/mca/sstore/base/base.h" #endif #include "orte/mca/filem/base/base.h" -#include "orte/mca/schizo/base/base.h" #include "orte/mca/state/base/base.h" #include "orte/mca/state/state.h" @@ -620,7 +619,7 @@ static int rte_init(void) /* setup the PMIx framework - ensure it skips all non-PMIx components, but * do not override anything we were given */ - opal_setenv("OMPI_MCA_pmix", "^s1,s2,cray", false, &environ); + opal_setenv("OMPI_MCA_pmix", "^s1,s2,cray,isolated", false, &environ); if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_pmix_base_open"; @@ -721,17 +720,7 @@ static int rte_init(void) error = "orte_dfs_select"; goto error; } - /* setup the schizo framework */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_schizo_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_schizo_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_schizo_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_schizo_select"; - goto error; - } + /* if a tool has launched us and is requesting event reports, * then set its contact info into the comm system */ @@ -808,7 +797,6 @@ static int rte_finalize(void) /* cleanup our data server */ orte_data_server_finalize(); - (void) mca_base_framework_close(&orte_schizo_base_framework); (void) mca_base_framework_close(&orte_dfs_base_framework); (void) mca_base_framework_close(&orte_filem_base_framework); /* output any lingering stdout/err data */ @@ -854,6 +842,9 @@ static int rte_finalize(void) fclose(orte_xml_fp); } } + + /* release the job hash table */ + OBJ_RELEASE(orte_job_data); return ORTE_SUCCESS; } diff --git a/orte/mca/ess/pmi/ess_pmi_component.c b/orte/mca/ess/pmi/ess_pmi_component.c index b877ed3643..fed89bd3b3 100644 --- a/orte/mca/ess/pmi/ess_pmi_component.c +++ b/orte/mca/ess/pmi/ess_pmi_component.c @@ -3,7 +3,7 @@ * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All * rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -25,7 +25,7 @@ #include "opal/mca/pmix/base/base.h" #include "orte/util/proc_info.h" -#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/schizo/schizo.h" #include "orte/mca/ess/ess.h" #include "orte/mca/ess/pmi/ess_pmi.h" @@ -67,41 +67,27 @@ static int pmi_component_open(void) static int pmi_component_query(mca_base_module_t **module, int *priority) { - int ret; + orte_schizo_launch_environ_t ret; - /* all APPS must use pmix */ - if (ORTE_PROC_IS_APP) { - if (NULL == opal_pmix.initialized) { - /* open and setup pmix */ - if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - *priority = -1; - *module = NULL; - return ret; - } - if (OPAL_SUCCESS != (ret = opal_pmix_base_select())) { - /* don't error log this as it might not be an error at all */ - *priority = -1; - *module = NULL; - (void) mca_base_framework_close(&opal_pmix_base_framework); - return ret; - } - } - if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init()))) { - /* we cannot be in a PMI environment */ - *priority = -1; - *module = NULL; - return ORTE_ERROR; - } - *priority = 35; - *module = (mca_base_module_t *)&orte_ess_pmi_module; - return ORTE_SUCCESS; + if (!ORTE_PROC_IS_APP) { + *module = NULL; + *priority = 0; + return ORTE_ERROR; } - /* we can't run */ - *priority = -1; - *module = NULL; - return ORTE_ERROR; + /* find out what our environment looks like */ + ret = orte_schizo.check_launch_environment(); + if (ORTE_SCHIZO_UNMANAGED_SINGLETON == ret || + ORTE_SCHIZO_MANAGED_SINGLETON == ret) { + /* not us */ + *module = NULL; + *priority = 0; + return ORTE_ERROR; + } + + *priority = 35; + *module = (mca_base_module_t *)&orte_ess_pmi_module; + return ORTE_SUCCESS; } diff --git a/orte/mca/ess/pmi/ess_pmi_module.c b/orte/mca/ess/pmi/ess_pmi_module.c index 6723ed47d9..8ff5b96ba4 100644 --- a/orte/mca/ess/pmi/ess_pmi_module.c +++ b/orte/mca/ess/pmi/ess_pmi_module.c @@ -97,6 +97,18 @@ static int rte_init(void) goto error; } + /* open and setup pmix */ + if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) { + ORTE_ERROR_LOG(ret); + /* we cannot run */ + error = "pmix init"; + goto error; + } + if (OPAL_SUCCESS != (ret = opal_pmix_base_select())) { + /* we cannot run */ + error = "pmix init"; + goto error; + } /* initialize the selected module */ if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init()))) { /* we cannot run */ diff --git a/orte/mca/ess/singleton/ess_singleton.h b/orte/mca/ess/singleton/ess_singleton.h index 916c36f560..2eb523fd12 100644 --- a/orte/mca/ess/singleton/ess_singleton.h +++ b/orte/mca/ess/singleton/ess_singleton.h @@ -9,6 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -21,14 +22,13 @@ BEGIN_C_DECLS -/* - * Module open / close - */ -int orte_ess_singleton_component_open(void); -int orte_ess_singleton_component_close(void); -int orte_ess_singleton_component_query(mca_base_module_t **module, int *priority); -ORTE_MODULE_DECLSPEC extern orte_ess_base_component_t mca_ess_singleton_component; +typedef struct { + orte_ess_base_component_t super; + char *server_uri; + bool isolated; +} orte_ess_singleton_component_t; +ORTE_MODULE_DECLSPEC extern orte_ess_singleton_component_t mca_ess_singleton_component; END_C_DECLS diff --git a/orte/mca/ess/singleton/ess_singleton_component.c b/orte/mca/ess/singleton/ess_singleton_component.c index 05322bbd3a..f457d4109f 100644 --- a/orte/mca/ess/singleton/ess_singleton_component.c +++ b/orte/mca/ess/singleton/ess_singleton_component.c @@ -12,6 +12,7 @@ * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -32,72 +33,82 @@ #include "opal/mca/pmix/base/base.h" #include "orte/util/proc_info.h" -#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/schizo/schizo.h" #include "orte/mca/ess/ess.h" #include "orte/mca/ess/singleton/ess_singleton.h" extern orte_ess_base_module_t orte_ess_singleton_module; -char *orte_ess_singleton_server_uri = NULL; - -static int -orte_ess_singleton_component_register(void); +static int component_open(void); +static int component_close(void); +static int component_query(mca_base_module_t **module, int *priority); +static int component_register(void); /* * Instantiate the public struct with all of our public information * and pointers to our public functions in it */ -orte_ess_base_component_t mca_ess_singleton_component = { - /* First, the mca_component_t struct containing meta information - about the component itself */ - .base_version = { - ORTE_ESS_BASE_VERSION_3_0_0, +orte_ess_singleton_component_t mca_ess_singleton_component = { + { + /* First, the mca_component_t struct containing meta information + about the component itself */ + .base_version = { + ORTE_ESS_BASE_VERSION_3_0_0, - /* Component name and version */ - .mca_component_name = "singleton", - MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION), + /* Component name and version */ + .mca_component_name = "singleton", + MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, + ORTE_RELEASE_VERSION), - /* Component open and close functions */ - .mca_open_component = orte_ess_singleton_component_open, - .mca_close_component = orte_ess_singleton_component_close, - .mca_query_component = orte_ess_singleton_component_query, - .mca_register_component_params = orte_ess_singleton_component_register, - }, - .base_data = { - /* The component is not checkpoint ready */ - MCA_BASE_METADATA_PARAM_NONE + /* Component open and close functions */ + .mca_open_component = component_open, + .mca_close_component = component_close, + .mca_query_component = component_query, + .mca_register_component_params = component_register, + }, + .base_data = { + /* The component is not checkpoint ready */ + MCA_BASE_METADATA_PARAM_NONE + }, }, + .server_uri = NULL, + .isolated = false }; -static int -orte_ess_singleton_component_register(void) +static int component_register(void) { int ret; - orte_ess_singleton_server_uri = NULL; - ret = mca_base_component_var_register(&mca_ess_singleton_component.base_version, + mca_ess_singleton_component.server_uri = NULL; + ret = mca_base_component_var_register(&mca_ess_singleton_component.super.base_version, "server", "Server to be used as HNP - [file|FILE]: or just uri", MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, - &orte_ess_singleton_server_uri); + &mca_ess_singleton_component.server_uri); (void) mca_base_var_register_synonym(ret, "orte", "orte", NULL, "server", 0); + ret = mca_base_component_var_register(&mca_ess_singleton_component.super.base_version, + "isolated", + "Do not start a supporting daemon as this process will never attempt to spawn", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_ess_singleton_component.isolated); + return ORTE_SUCCESS; } -int -orte_ess_singleton_component_open(void) +static int component_open(void) { return ORTE_SUCCESS; } -int orte_ess_singleton_component_query(mca_base_module_t **module, int *priority) +static int component_query(mca_base_module_t **module, int *priority) { - int ret; + orte_schizo_launch_environ_t ret; /* if we are an HNP, daemon, or tool, then we * are definitely not a singleton! @@ -106,55 +117,28 @@ int orte_ess_singleton_component_query(mca_base_module_t **module, int *priority ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_TOOL) { *module = NULL; + *priority = 0; return ORTE_ERROR; } - /* okay, we still could be a singleton or - * an application process. If we have been - * given an HNP URI, then we are definitely - * not a singleton - */ - if (NULL != orte_process_info.my_hnp_uri) { + /* find out what our environment looks like */ + ret = orte_schizo.check_launch_environment(); + if (ORTE_SCHIZO_UNMANAGED_SINGLETON != ret && + ORTE_SCHIZO_MANAGED_SINGLETON != ret) { + /* not us */ *module = NULL; + *priority = 0; return ORTE_ERROR; } - /* open and setup pmix */ - if (NULL == opal_pmix.initialized) { - if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) { - /* if PMIx is not available, then we are indeed a singleton */ - goto single; - } - if (OPAL_SUCCESS != (ret = opal_pmix_base_select())) { - /* if PMIx is not available, then we are indeed a singleton */ - (void) mca_base_framework_close(&opal_pmix_base_framework); - goto single; - } - } - if (opal_pmix.initialized()) { - /* we are in a PMI environment and are therefore - * not a singleton */ - *priority = -1; - *module = NULL; - return ORTE_ERROR; - } - - single: - /* okay, we could still be an application process, - * but launched in "standalone" mode - i.e., directly - * launched by an environment instead of via mpirun. - * We need to set our priority low so that any enviro - * component will override us. If they don't, then we - * want to be selected as we must be a singleton - */ - *priority = 25; + /* okay, we want to be selected as we must be a singleton */ + *priority = 100; *module = (mca_base_module_t *)&orte_ess_singleton_module; return ORTE_SUCCESS; } -int -orte_ess_singleton_component_close(void) +static int component_close(void) { return ORTE_SUCCESS; } diff --git a/orte/mca/ess/singleton/ess_singleton_module.c b/orte/mca/ess/singleton/ess_singleton_module.c index 69717ae5da..f0fc467746 100644 --- a/orte/mca/ess/singleton/ess_singleton_module.c +++ b/orte/mca/ess/singleton/ess_singleton_module.c @@ -97,19 +97,19 @@ static int rte_init(void) u32ptr = &u32; u16ptr = &u16; - if (NULL != orte_ess_singleton_server_uri) { + if (NULL != mca_ess_singleton_component.server_uri) { /* we are going to connect to a server HNP */ - if (0 == strncmp(orte_ess_singleton_server_uri, "file", strlen("file")) || - 0 == strncmp(orte_ess_singleton_server_uri, "FILE", strlen("FILE"))) { + if (0 == strncmp(mca_ess_singleton_component.server_uri, "file", strlen("file")) || + 0 == strncmp(mca_ess_singleton_component.server_uri, "FILE", strlen("FILE"))) { char input[1024], *filename; FILE *fp; /* it is a file - get the filename */ - filename = strchr(orte_ess_singleton_server_uri, ':'); + filename = strchr(mca_ess_singleton_component.server_uri, ':'); if (NULL == filename) { /* filename is not correctly formatted */ orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-bad", true, - "singleton", orte_ess_singleton_server_uri); + "singleton", mca_ess_singleton_component.server_uri); return ORTE_ERROR; } ++filename; /* space past the : */ @@ -117,7 +117,7 @@ static int rte_init(void) if (0 >= strlen(filename)) { /* they forgot to give us the name! */ orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-missing", true, - "singleton", orte_ess_singleton_server_uri); + "singleton", mca_ess_singleton_component.server_uri); return ORTE_ERROR; } @@ -125,7 +125,7 @@ static int rte_init(void) fp = fopen(filename, "r"); if (NULL == fp) { /* can't find or read file! */ orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-access", true, - "singleton", orte_ess_singleton_server_uri); + "singleton", mca_ess_singleton_component.server_uri); return ORTE_ERROR; } memset(input, 0, 1024); // initialize the array to ensure a NULL termination @@ -133,14 +133,14 @@ static int rte_init(void) /* something malformed about file */ fclose(fp); orte_show_help("help-orterun.txt", "orterun:ompi-server-file-bad", true, - "singleton", orte_ess_singleton_server_uri, "singleton"); + "singleton", mca_ess_singleton_component.server_uri, "singleton"); return ORTE_ERROR; } fclose(fp); input[strlen(input)-1] = '\0'; /* remove newline */ orte_process_info.my_hnp_uri = strdup(input); } else { - orte_process_info.my_hnp_uri = strdup(orte_ess_singleton_server_uri); + orte_process_info.my_hnp_uri = strdup(mca_ess_singleton_component.server_uri); } /* save the daemon uri - we will process it later */ orte_process_info.my_daemon_uri = strdup(orte_process_info.my_hnp_uri); @@ -154,17 +154,21 @@ static int rte_init(void) ORTE_PROC_MY_NAME->vpid = 0; /* for convenience, push the pubsub version of this param into the environ */ - opal_setenv (OPAL_MCA_PREFIX"pubsub_orte_server", orte_process_info.my_hnp_uri, 1, &environ); + opal_setenv (OPAL_MCA_PREFIX"pubsub_orte_server", orte_process_info.my_hnp_uri, true, &environ); } else if (NULL != getenv("SINGULARITY_CONTAINER")) { /* mark that we are in a container */ opal_setenv("OPAL_PROC_CONTAINER", "1", true, &environ); - } else if (NULL != getenv("OPAL_ISOLATED")) { + } else if (mca_ess_singleton_component.isolated) { + /* ensure we use the isolated pmix component */ + opal_setenv (OPAL_MCA_PREFIX"pmix", "isolated", true, &environ); + } else { /* spawn our very own HNP to support us */ if (ORTE_SUCCESS != (rc = fork_hnp())) { ORTE_ERROR_LOG(rc); return rc; } /* our name was given to us by the HNP */ + opal_setenv (OPAL_MCA_PREFIX"pmix", "^s1,s2,cray,isolated", true, &environ); } /* open and setup pmix */ @@ -485,6 +489,16 @@ static int fork_hnp(void) opal_argv_append(&argc, &argv, "state_novm_select"); opal_argv_append(&argc, &argv, "1"); + /* direct the selection of the ess component */ + opal_argv_append(&argc, &argv, "-"OPAL_MCA_CMD_LINE_ID); + opal_argv_append(&argc, &argv, "ess"); + opal_argv_append(&argc, &argv, "hnp"); + + /* direct the selection of the pmix component */ + opal_argv_append(&argc, &argv, "-"OPAL_MCA_CMD_LINE_ID); + opal_argv_append(&argc, &argv, "pmix"); + opal_argv_append(&argc, &argv, "^s1,s2,cray,isolated"); + /* Fork off the child */ orte_process_info.hnp_pid = fork(); if(orte_process_info.hnp_pid < 0) { diff --git a/orte/mca/schizo/alps/Makefile.am b/orte/mca/schizo/alps/Makefile.am new file mode 100644 index 0000000000..05d47a7581 --- /dev/null +++ b/orte/mca/schizo/alps/Makefile.am @@ -0,0 +1,35 @@ +# +# Copyright (c) 2016 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + schizo_alps_component.c \ + schizo_alps.h \ + schizo_alps.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_orte_schizo_alps_DSO +component_noinst = +component_install = mca_schizo_alps.la +else +component_noinst = libmca_schizo_alps.la +component_install = +endif + +mcacomponentdir = $(ortelibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_schizo_alps_la_SOURCES = $(sources) +mca_schizo_alps_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_schizo_alps_la_SOURCES = $(sources) +libmca_schizo_alps_la_LDFLAGS = -module -avoid-version + diff --git a/orte/mca/schizo/alps/configure.m4 b/orte/mca/schizo/alps/configure.m4 new file mode 100644 index 0000000000..0bcd85a524 --- /dev/null +++ b/orte/mca/schizo/alps/configure.m4 @@ -0,0 +1,47 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008 UT-Battelle, LLC +# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2011-2013 Los Alamos National Security, LLC. +# All rights reserved. +# Copyright (c) 2016 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_schizo_alps_CONFIG([action-if-found], [action-if-not-found]) +# ----------------------------------------------------------- +AC_DEFUN([MCA_orte_schizo_alps_CONFIG],[ + AC_CONFIG_FILES([orte/mca/schizo/alps/Makefile]) + + ORTE_CHECK_ALPS([schizo_alps], [schizo_alps_happy="yes"], [schizo_alps_happy="no"]) + + # check for alps/apInfo.h + # save current CPPFLAGS + MCA_orte_schizo_save_CPPFLAGS="$CPPFLAGS" + + # add flags obtained from ORTE_CHECK_ALPS + CPPFLAGS="$CPPFLAGS $schizo_alps_CPPFLAGS" + + AC_CHECK_HEADERS([alps/apInfo.h], [], [schizo_alps_happy="no"]) + + # restore CPPFLAGS + CPPFLAGS="$MCA_orte_schizo_save_CPPFLAGS" + + AC_SUBST([schizo_alps_CPPFLAGS]) + + AS_IF([test "$schizo_alps_happy" = "yes"], [$1], [$2]) +])dnl diff --git a/orte/mca/schizo/alps/owner.txt b/orte/mca/schizo/alps/owner.txt new file mode 100644 index 0000000000..85b4416d20 --- /dev/null +++ b/orte/mca/schizo/alps/owner.txt @@ -0,0 +1,7 @@ +# +# owner/status file +# owner: institution that is responsible for this package +# status: e.g. active, maintenance, unmaintained +# +owner: INTEL +status: active diff --git a/orte/mca/schizo/alps/schizo_alps.c b/orte/mca/schizo/alps/schizo_alps.c new file mode 100644 index 0000000000..7a32df1d37 --- /dev/null +++ b/orte/mca/schizo/alps/schizo_alps.c @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2016 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "orte_config.h" +#include "orte/types.h" +#include "opal/types.h" + +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#include + +#include "opal/util/basename.h" +#include "opal/util/opal_environ.h" + +#include "orte/runtime/orte_globals.h" +#include "orte/util/name_fns.h" +#include "orte/mca/schizo/base/base.h" + +#include "schizo_alps.h" + +static orte_schizo_launch_environ_t check_launch_environment(void); + +orte_schizo_base_module_t orte_schizo_alps_module = { + .check_launch_environment = check_launch_environment +}; + +static char **pushed_envs = NULL; +static char **pushed_vals = NULL; +static orte_schizo_launch_environ_t myenv; +static bool myenvdefined = false; + +static orte_schizo_launch_environ_t check_launch_environment(void) +{ + int i; + const char proc_job_file[]="/proc/job"; + FILE *fd = NULL, *fd_task_is_app = NULL; + char task_is_app_fname[PATH_MAX]; + + if (myenvdefined) { + return myenv; + } + myenvdefined = true; + + /* we were only selected because we are an app, + * so no need to further check that here. Instead, + * see if we were direct launched vs launched via mpirun */ + if (NULL != orte_process_info.my_daemon_uri) { + /* nope */ + myenv = ORTE_SCHIZO_NATIVE_LAUNCHED; + opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"ess"); + opal_argv_append_nosize(&pushed_vals, "pmi"); + goto setup; + } + + /* see if we are running in a Cray PAGG container */ + fd = fopen(proc_job_file, "r"); + if (NULL == fd) { + /* we are a singleton */ + myenv = ORTE_SCHIZO_MANAGED_SINGLETON; + opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"ess"); + opal_argv_append_nosize(&pushed_vals, "singleton"); + } else { + if (NULL != orte_process_info.my_daemon_uri) { + myenv = ORTE_SCHIZO_NATIVE_LAUNCHED; + } else { + myenv = ORTE_SCHIZO_DIRECT_LAUNCHED; + } + opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"ess"); + opal_argv_append_nosize(&pushed_vals, "pmi"); + snprintf(task_is_app_fname,sizeof(task_is_app_fname), + "/proc/self/task/%ld/task_is_app",syscall(SYS_gettid)); + fd_task_is_app = fopen(task_is_app_fname, "r"); + if (fd_task_is_app != NULL) { /* okay we're in a PAGG container, + and we are an app task (not just a process + running on a mom node, for example) */ + opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"pmix"); + opal_argv_append_nosize(&pushed_vals, "cray"); + } + fclose(fd); + } + + setup: + opal_output_verbose(1, orte_schizo_base_framework.framework_output, + "schizo:alps DECLARED AS %s", orte_schizo_base_print_env(myenv)); + if (NULL != pushed_envs) { + for (i=0; NULL != pushed_envs[i]; i++) { + opal_setenv(pushed_envs[i], pushed_vals[i], true, &environ); + } + } + + return myenv; +} + +static void finalize(void) +{ + int i; + + if (NULL != pushed_envs) { + for (i=0; NULL != pushed_envs[i]; i++) { + opal_unsetenv(pushed_envs[i], &environ); + } + opal_argv_free(pushed_envs); + opal_argv_free(pushed_vals); + } +} diff --git a/orte/mca/schizo/alps/schizo_alps.h b/orte/mca/schizo/alps/schizo_alps.h new file mode 100644 index 0000000000..2e2d8ffb75 --- /dev/null +++ b/orte/mca/schizo/alps/schizo_alps.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2016 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef _MCA_SCHIZO_ALPS_H_ +#define _MCA_SCHIZO_ALPS_H_ + +#include "orte_config.h" + +#include "orte/types.h" + +#include "opal/mca/base/base.h" +#include "orte/mca/schizo/schizo.h" + + +BEGIN_C_DECLS + +ORTE_MODULE_DECLSPEC extern orte_schizo_base_component_t mca_schizo_alps_component; +extern orte_schizo_base_module_t orte_schizo_alps_module; + +END_C_DECLS + +#endif /* MCA_SCHIZO_ALPS_H_ */ + diff --git a/orte/mca/schizo/alps/schizo_alps_component.c b/orte/mca/schizo/alps/schizo_alps_component.c new file mode 100644 index 0000000000..c454c9d00e --- /dev/null +++ b/orte/mca/schizo/alps/schizo_alps_component.c @@ -0,0 +1,53 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/types.h" +#include "opal/types.h" + +#include "opal/util/show_help.h" + +#include "orte/mca/schizo/schizo.h" +#include "schizo_alps.h" + +static int component_query(mca_base_module_t **module, int *priority); + +/* + * Struct of function pointers and all that to let us be initialized + */ +orte_schizo_base_component_t mca_schizo_alps_component = { + .base_version = { + MCA_SCHIZO_BASE_VERSION_1_0_0, + .mca_component_name = "alps", + MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, + ORTE_RELEASE_VERSION), + .mca_query_component = component_query, + }, + .base_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, +}; + +static int component_query(mca_base_module_t **module, int *priority) +{ + /* if we are not an app, then don't bother */ + if (!ORTE_PROC_IS_APP) { + *priority = 0; + *module = NULL; + return ORTE_ERROR; + } + + /* since we were built, assume we are on an alps system */ + *priority = 90; + *module = (mca_base_module_t *)&orte_schizo_alps_module; + return ORTE_SUCCESS; +} + diff --git a/orte/mca/schizo/base/base.h b/orte/mca/schizo/base/base.h index 7df98b50df..9cf4ffdb3d 100644 --- a/orte/mca/schizo/base/base.h +++ b/orte/mca/schizo/base/base.h @@ -60,6 +60,7 @@ typedef struct { OBJ_CLASS_DECLARATION(orte_schizo_base_active_module_t); /* the base stub functions */ +ORTE_DECLSPEC const char* orte_schizo_base_print_env(orte_schizo_launch_environ_t env); ORTE_DECLSPEC int orte_schizo_base_parse_cli(char **personality, int argc, int start, char **argv); ORTE_DECLSPEC int orte_schizo_base_parse_env(char **personality, @@ -72,6 +73,8 @@ ORTE_DECLSPEC int orte_schizo_base_setup_fork(orte_job_t *jdata, ORTE_DECLSPEC int orte_schizo_base_setup_child(orte_job_t *jobdat, orte_proc_t *child, orte_app_context_t *app); +ORTE_DECLSPEC orte_schizo_launch_environ_t orte_schizo_base_check_launch_environment(void); +ORTE_DECLSPEC void orte_schizo_base_finalize(void); END_C_DECLS diff --git a/orte/mca/schizo/base/schizo_base_frame.c b/orte/mca/schizo/base/schizo_base_frame.c index 5bdd4cf3e1..33135060f2 100644 --- a/orte/mca/schizo/base/schizo_base_frame.c +++ b/orte/mca/schizo/base/schizo_base_frame.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -37,10 +37,12 @@ */ orte_schizo_base_t orte_schizo_base = {{{0}}}; orte_schizo_base_module_t orte_schizo = { - orte_schizo_base_parse_cli, - orte_schizo_base_parse_env, - orte_schizo_base_setup_fork, - orte_schizo_base_setup_child + .parse_cli = orte_schizo_base_parse_cli, + .parse_env = orte_schizo_base_parse_env, + .setup_fork = orte_schizo_base_setup_fork, + .setup_child = orte_schizo_base_setup_child, + .check_launch_environment = orte_schizo_base_check_launch_environment, + .finalize = orte_schizo_base_finalize }; static int orte_schizo_base_close(void) diff --git a/orte/mca/schizo/base/schizo_base_select.c b/orte/mca/schizo/base/schizo_base_select.c index fae11d0fc2..00fc0b0da9 100644 --- a/orte/mca/schizo/base/schizo_base_select.c +++ b/orte/mca/schizo/base/schizo_base_select.c @@ -100,7 +100,7 @@ int orte_schizo_base_select(void) } if (4 < opal_output_get_verbosity(orte_schizo_base_framework.framework_output)) { - opal_output(0, "%s: Final schizo priorities", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + opal_output(0, "Final schizo priorities"); /* show the prioritized list */ OPAL_LIST_FOREACH(mod, &orte_schizo_base.active_modules, orte_schizo_base_active_module_t) { opal_output(0, "\tSchizo: %s Priority: %d", mod->component->mca_component_name, mod->pri); diff --git a/orte/mca/schizo/base/schizo_base_stubs.c b/orte/mca/schizo/base/schizo_base_stubs.c index 0a99bd2bed..76e35a0c73 100644 --- a/orte/mca/schizo/base/schizo_base_stubs.c +++ b/orte/mca/schizo/base/schizo_base_stubs.c @@ -19,6 +19,24 @@ #include "orte/util/name_fns.h" #include "orte/mca/schizo/base/base.h" +const char* orte_schizo_base_print_env(orte_schizo_launch_environ_t env) +{ + switch(env) { + case ORTE_SCHIZO_UNDETERMINED: + return "UNDETERMINED"; + case ORTE_SCHIZO_NATIVE_LAUNCHED: + return "NATIVE_LAUNCHED"; + case ORTE_SCHIZO_UNMANAGED_SINGLETON: + return "UNMANAGED_SINGLETON"; + case ORTE_SCHIZO_DIRECT_LAUNCHED: + return "DIRECT_LAUNCHED"; + case ORTE_SCHIZO_MANAGED_SINGLETON: + return "MANAGED_SINGLETON"; + default: + return "INVALID_CODE"; + } +} + int orte_schizo_base_parse_cli(char **personality, int argc, int start, char **argv) { @@ -98,3 +116,30 @@ int orte_schizo_base_setup_child(orte_job_t *jdata, } return ORTE_SUCCESS; } + +orte_schizo_launch_environ_t orte_schizo_base_check_launch_environment(void) +{ + orte_schizo_launch_environ_t rc; + orte_schizo_base_active_module_t *mod; + + OPAL_LIST_FOREACH(mod, &orte_schizo_base.active_modules, orte_schizo_base_active_module_t) { + if (NULL != mod->module->check_launch_environment) { + rc = mod->module->check_launch_environment(); + if (ORTE_SCHIZO_UNDETERMINED != rc) { + return rc; + } + } + } + return ORTE_SCHIZO_UNDETERMINED; +} + +void orte_schizo_base_finalize(void) +{ + orte_schizo_base_active_module_t *mod; + + OPAL_LIST_FOREACH(mod, &orte_schizo_base.active_modules, orte_schizo_base_active_module_t) { + if (NULL != mod->module->finalize) { + mod->module->finalize(); + } + } +} diff --git a/orte/mca/schizo/ompi/schizo_ompi.c b/orte/mca/schizo/ompi/schizo_ompi.c index 84ee456437..9eba49c2bd 100644 --- a/orte/mca/schizo/ompi/schizo_ompi.c +++ b/orte/mca/schizo/ompi/schizo_ompi.c @@ -63,10 +63,10 @@ static int setup_child(orte_job_t *jobdat, orte_app_context_t *app); orte_schizo_base_module_t orte_schizo_ompi_module = { - parse_cli, - parse_env, - setup_fork, - setup_child + .parse_cli = parse_cli, + .parse_env = parse_env, + .setup_fork = setup_fork, + .setup_child = setup_child }; static int parse_cli(char **personality, diff --git a/orte/mca/schizo/ompi/schizo_ompi_component.c b/orte/mca/schizo/ompi/schizo_ompi_component.c index 9233c0c81d..168fe5188c 100644 --- a/orte/mca/schizo/ompi/schizo_ompi_component.c +++ b/orte/mca/schizo/ompi/schizo_ompi_component.c @@ -16,6 +16,8 @@ #include "opal/util/show_help.h" +#include "orte/runtime/orte_globals.h" + #include "orte/mca/schizo/schizo.h" #include "schizo_ompi.h" @@ -40,6 +42,12 @@ orte_schizo_base_component_t mca_schizo_ompi_component = { static int component_query(mca_base_module_t **module, int *priority) { + /* if we are an app, ignore us */ + if (ORTE_PROC_IS_APP) { + *module = NULL; + *priority = 0; + return ORTE_ERROR; + } *module = (mca_base_module_t*)&orte_schizo_ompi_module; *priority = 10; return ORTE_SUCCESS; diff --git a/orte/mca/schizo/orte/Makefile.am b/orte/mca/schizo/orte/Makefile.am new file mode 100644 index 0000000000..606b1ac822 --- /dev/null +++ b/orte/mca/schizo/orte/Makefile.am @@ -0,0 +1,35 @@ +# +# Copyright (c) 2016 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + schizo_orte_component.c \ + schizo_orte.h \ + schizo_orte.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_orte_schizo_orte_DSO +component_noinst = +component_install = mca_schizo_orte.la +else +component_noinst = libmca_schizo_orte.la +component_install = +endif + +mcacomponentdir = $(ortelibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_schizo_orte_la_SOURCES = $(sources) +mca_schizo_orte_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_schizo_orte_la_SOURCES = $(sources) +libmca_schizo_orte_la_LDFLAGS = -module -avoid-version + diff --git a/orte/mca/schizo/orte/schizo_orte.c b/orte/mca/schizo/orte/schizo_orte.c new file mode 100644 index 0000000000..05625c279e --- /dev/null +++ b/orte/mca/schizo/orte/schizo_orte.c @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2016 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "orte_config.h" +#include "orte/types.h" +#include "opal/types.h" + +#ifdef HAVE_UNISTD_H +#include +#endif +#include + +#include "opal/util/basename.h" +#include "opal/util/opal_environ.h" + +#include "orte/runtime/orte_globals.h" +#include "orte/util/name_fns.h" +#include "orte/mca/schizo/base/base.h" + +#include "schizo_orte.h" + +static orte_schizo_launch_environ_t check_launch_environment(void); +static void finalize(void); + +orte_schizo_base_module_t orte_schizo_orte_module = { + .check_launch_environment = check_launch_environment, + .finalize = finalize +}; + +static char **pushed_envs = NULL; +static char **pushed_vals = NULL; +static orte_schizo_launch_environ_t myenv; +static bool myenvdefined = false; + +static orte_schizo_launch_environ_t check_launch_environment(void) +{ + int i; + + if (myenvdefined) { + return myenv; + } + myenvdefined = true; + + /* we were only selected because we are an app, + * so no need to further check that here. Instead, + * see if we were direct launched vs launched via mpirun */ + if (NULL != orte_process_info.my_daemon_uri) { + /* nope */ + myenv = ORTE_SCHIZO_NATIVE_LAUNCHED; + opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"ess"); + opal_argv_append_nosize(&pushed_vals, "pmi"); + goto setup; + } + + /* if nobody else has laid claim to this process, + * then it must be a singleton */ + myenv = ORTE_SCHIZO_UNMANAGED_SINGLETON; + opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"ess"); + opal_argv_append_nosize(&pushed_vals, "singleton"); + + setup: + opal_output_verbose(1, orte_schizo_base_framework.framework_output, + "schizo:orte DECLARED AS %s", orte_schizo_base_print_env(myenv)); + if (NULL != pushed_envs) { + for (i=0; NULL != pushed_envs[i]; i++) { + opal_setenv(pushed_envs[i], pushed_vals[i], true, &environ); + } + } + return myenv; +} + +static void finalize(void) +{ + int i; + + if (NULL != pushed_envs) { + for (i=0; NULL != pushed_envs[i]; i++) { + opal_unsetenv(pushed_envs[i], &environ); + } + opal_argv_free(pushed_envs); + opal_argv_free(pushed_vals); + } +} diff --git a/orte/mca/schizo/orte/schizo_orte.h b/orte/mca/schizo/orte/schizo_orte.h new file mode 100644 index 0000000000..ad06724bc3 --- /dev/null +++ b/orte/mca/schizo/orte/schizo_orte.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2016 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef _MCA_SCHIZO_ORTE_H_ +#define _MCA_SCHIZO_ORTE_H_ + +#include "orte_config.h" + +#include "orte/types.h" + +#include "opal/mca/base/base.h" +#include "orte/mca/schizo/schizo.h" + + +BEGIN_C_DECLS + +ORTE_MODULE_DECLSPEC extern orte_schizo_base_component_t mca_schizo_orte_component; +extern orte_schizo_base_module_t orte_schizo_orte_module; + +END_C_DECLS + +#endif /* MCA_SCHIZO_ORTE_H_ */ + diff --git a/orte/mca/schizo/orte/schizo_orte_component.c b/orte/mca/schizo/orte/schizo_orte_component.c new file mode 100644 index 0000000000..c6a45ec774 --- /dev/null +++ b/orte/mca/schizo/orte/schizo_orte_component.c @@ -0,0 +1,52 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/types.h" +#include "opal/types.h" + +#include "opal/util/show_help.h" + +#include "orte/mca/schizo/schizo.h" +#include "schizo_orte.h" + +static int component_query(mca_base_module_t **module, int *priority); + +/* + * Struct of function pointers and all that to let us be initialized + */ +orte_schizo_base_component_t mca_schizo_orte_component = { + .base_version = { + MCA_SCHIZO_BASE_VERSION_1_0_0, + .mca_component_name = "orte", + MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, + ORTE_RELEASE_VERSION), + .mca_query_component = component_query, + }, + .base_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, +}; + +static int component_query(mca_base_module_t **module, int *priority) +{ + /* disqualify ourselves if we are not an app */ + if (!ORTE_PROC_IS_APP) { + *priority = 0; + *module = NULL; + return OPAL_ERROR; + } + + *module = (mca_base_module_t*)&orte_schizo_orte_module; + *priority = 1; + return ORTE_SUCCESS; +} + diff --git a/orte/mca/schizo/schizo.h b/orte/mca/schizo/schizo.h index 040995a597..985f0f6a60 100644 --- a/orte/mca/schizo/schizo.h +++ b/orte/mca/schizo/schizo.h @@ -39,6 +39,9 @@ BEGIN_C_DECLS * SCHIZO module functions - the modules are accessed via * the base stub functions */ + +typedef int (*orte_schizo_base_module_init_fn_t)(void); + typedef int (*orte_schizo_base_module_parse_cli_fn_t)(char **personality, int argc, int start, char **argv); @@ -56,14 +59,37 @@ typedef int (*orte_schizo_base_module_setup_child_fn_t)(orte_job_t *jdata, orte_proc_t *child, orte_app_context_t *app); + +typedef enum { + ORTE_SCHIZO_UNDETERMINED, + ORTE_SCHIZO_NATIVE_LAUNCHED, + ORTE_SCHIZO_UNMANAGED_SINGLETON, + ORTE_SCHIZO_DIRECT_LAUNCHED, + ORTE_SCHIZO_MANAGED_SINGLETON +} orte_schizo_launch_environ_t; + + +/* check if this process was directly launched by a managed environment, and + * do whatever the module wants to do under those conditions. The module + * can push any required envars into the local environment, but must remember + * to "unset" them during finalize. The module then returns a flag indicating + * the launch environment of the process */ +typedef orte_schizo_launch_environ_t (*orte_schizo_base_module_ck_launch_environ_fn_t)(void); + +/* give the component a chance to cleanup */ +typedef void (*orte_schizo_base_module_finalize_fn_t)(void); + /* * schizo module version 1.3.0 */ typedef struct { - orte_schizo_base_module_parse_cli_fn_t parse_cli; - orte_schizo_base_module_parse_env_fn_t parse_env; - orte_schizo_base_module_setup_fork_fn_t setup_fork; - orte_schizo_base_module_setup_child_fn_t setup_child; + orte_schizo_base_module_init_fn_t init; + orte_schizo_base_module_parse_cli_fn_t parse_cli; + orte_schizo_base_module_parse_env_fn_t parse_env; + orte_schizo_base_module_setup_fork_fn_t setup_fork; + orte_schizo_base_module_setup_child_fn_t setup_child; + orte_schizo_base_module_ck_launch_environ_fn_t check_launch_environment; + orte_schizo_base_module_finalize_fn_t finalize; } orte_schizo_base_module_t; ORTE_DECLSPEC extern orte_schizo_base_module_t orte_schizo; diff --git a/orte/mca/schizo/singularity/schizo_singularity.c b/orte/mca/schizo/singularity/schizo_singularity.c index dfca2e0e7d..9696e08e7d 100644 --- a/orte/mca/schizo/singularity/schizo_singularity.c +++ b/orte/mca/schizo/singularity/schizo_singularity.c @@ -30,10 +30,7 @@ static int setup_fork(orte_job_t *jdata, orte_app_context_t *context); orte_schizo_base_module_t orte_schizo_singularity_module = { - NULL, - NULL, - setup_fork, - NULL + .setup_fork = setup_fork }; static int setup_fork(orte_job_t *jdata, diff --git a/orte/mca/schizo/singularity/schizo_singularity_component.c b/orte/mca/schizo/singularity/schizo_singularity_component.c index 2b4c8cf300..a3a0f45447 100644 --- a/orte/mca/schizo/singularity/schizo_singularity_component.c +++ b/orte/mca/schizo/singularity/schizo_singularity_component.c @@ -14,6 +14,8 @@ #include "opal/util/show_help.h" +#include "orte/runtime/orte_globals.h" + #include "orte/mca/schizo/schizo.h" #include "schizo_singularity.h" @@ -38,6 +40,12 @@ orte_schizo_base_component_t mca_schizo_singularity_component = { static int component_query(mca_base_module_t **module, int *priority) { + /* if we are an app, ignore us */ + if (ORTE_PROC_IS_APP) { + *module = NULL; + *priority = 0; + return ORTE_ERROR; + } *module = (mca_base_module_t*)&orte_schizo_singularity_module; *priority = 5; return ORTE_SUCCESS; diff --git a/orte/mca/schizo/slurm/Makefile.am b/orte/mca/schizo/slurm/Makefile.am new file mode 100644 index 0000000000..e063ce7220 --- /dev/null +++ b/orte/mca/schizo/slurm/Makefile.am @@ -0,0 +1,35 @@ +# +# Copyright (c) 2016 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + schizo_slurm_component.c \ + schizo_slurm.h \ + schizo_slurm.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_orte_schizo_slurm_DSO +component_noinst = +component_install = mca_schizo_slurm.la +else +component_noinst = libmca_schizo_slurm.la +component_install = +endif + +mcacomponentdir = $(ortelibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_schizo_slurm_la_SOURCES = $(sources) +mca_schizo_slurm_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_schizo_slurm_la_SOURCES = $(sources) +libmca_schizo_slurm_la_LDFLAGS = -module -avoid-version + diff --git a/orte/mca/schizo/slurm/configure.m4 b/orte/mca/schizo/slurm/configure.m4 new file mode 100644 index 0000000000..44f2d46c18 --- /dev/null +++ b/orte/mca/schizo/slurm/configure.m4 @@ -0,0 +1,41 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2011-2013 Los Alamos National Security, LLC. +# All rights reserved. +# Copyright (c) 2016 Intel, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_schizo_slurm_CONFIG([action-if-found], [action-if-not-found]) +# ----------------------------------------------------------- +AC_DEFUN([MCA_orte_schizo_slurm_CONFIG],[ + AC_CONFIG_FILES([orte/mca/schizo/slurm/Makefile]) + + ORTE_CHECK_SLURM([schizo_slurm], [schizo_slurm_good=1], [schizo_slurm_good=0]) + + # if check worked, set wrapper flags if so. + # Evaluate succeed / fail + AS_IF([test "$schizo_slurm_good" = "1"], + [$1], + [$2]) + + # set build flags to use in makefile + AC_SUBST([schizo_slurm_CPPFLAGS]) + AC_SUBST([schizo_slurm_LDFLAGS]) + AC_SUBST([schizo_slurm_LIBS]) +])dnl diff --git a/orte/mca/schizo/slurm/owner.txt b/orte/mca/schizo/slurm/owner.txt new file mode 100644 index 0000000000..85b4416d20 --- /dev/null +++ b/orte/mca/schizo/slurm/owner.txt @@ -0,0 +1,7 @@ +# +# owner/status file +# owner: institution that is responsible for this package +# status: e.g. active, maintenance, unmaintained +# +owner: INTEL +status: active diff --git a/orte/mca/schizo/slurm/schizo_slurm.c b/orte/mca/schizo/slurm/schizo_slurm.c new file mode 100644 index 0000000000..3ddb5a9d33 --- /dev/null +++ b/orte/mca/schizo/slurm/schizo_slurm.c @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2016 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "orte_config.h" +#include "orte/types.h" +#include "opal/types.h" + +#ifdef HAVE_UNISTD_H +#include +#endif +#include + +#include "opal/util/basename.h" +#include "opal/util/opal_environ.h" + +#include "orte/runtime/orte_globals.h" +#include "orte/util/name_fns.h" +#include "orte/mca/schizo/base/base.h" + +#include "schizo_slurm.h" + +static orte_schizo_launch_environ_t check_launch_environment(void); +static void finalize(void); + +orte_schizo_base_module_t orte_schizo_slurm_module = { + .check_launch_environment = check_launch_environment, + .finalize = finalize +}; + +static char **pushed_envs = NULL; +static char **pushed_vals = NULL; +static orte_schizo_launch_environ_t myenv; +static bool myenvdefined = false; + +static orte_schizo_launch_environ_t check_launch_environment(void) +{ + char *bind, *list, *ptr; + int i; + + if (myenvdefined) { + return myenv; + } + myenvdefined = true; + + /* we were only selected because SLURM was detected + * and we are an app, so no need to further check + * that here. Instead, see if we were direct launched + * vs launched via mpirun */ + if (NULL != orte_process_info.my_daemon_uri) { + /* nope */ + myenv = ORTE_SCHIZO_NATIVE_LAUNCHED; + opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"ess"); + opal_argv_append_nosize(&pushed_vals, "pmi"); + goto setup; + } + + /* see if we are in a SLURM allocation */ + if (NULL == getenv("SLURM_NODELIST")) { + /* nope */ + myenv = ORTE_SCHIZO_UNDETERMINED; + return myenv; + } + + /* we are in an allocation, but were we direct launched + * or are we a singleton? */ + if (NULL == getenv("SLURM_STEP_ID")) { + /* not in a job step - ensure we select the + * correct things */ + opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"ess"); + opal_argv_append_nosize(&pushed_vals, "singleton"); + myenv = ORTE_SCHIZO_MANAGED_SINGLETON; + goto setup; + } + myenv = ORTE_SCHIZO_DIRECT_LAUNCHED; + opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"ess"); + opal_argv_append_nosize(&pushed_vals, "pmi"); + + /* if we are direct launched by SLURM, then we want + * to ensure that we do not override their binding + * options, so set that envar */ + if (NULL != (bind = getenv("SLURM_CPU_BIND_TYPE"))) { + if (0 == strcmp(bind, "none")) { + opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"hwloc_base_binding_policy"); + opal_argv_append_nosize(&pushed_vals, "none"); + /* indicate we are externally bound so we won't try to do it ourselves */ + opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"orte_externally_bound"); + opal_argv_append_nosize(&pushed_vals, "1"); + } else if (0 == strcmp(bind, "mask_cpu")) { + /* if the bind list is all F's, then the + * user didn't specify anything */ + if (NULL != (list = getenv("SLURM_CPU_BIND_LIST")) && + NULL != (ptr = strchr(list, 'x'))) { + ++ptr; // step over the 'x' + for (i=0; '\0' != *ptr; ptr++) { + if ('F' != *ptr) { + /* indicate we are externally bound */ + opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"orte_externally_bound"); + opal_argv_append_nosize(&pushed_vals, "1"); + break; + } + } + } + } + } + + setup: + opal_output_verbose(1, orte_schizo_base_framework.framework_output, + "schizo:slurm DECLARED AS %s", orte_schizo_base_print_env(myenv)); + if (NULL != pushed_envs) { + for (i=0; NULL != pushed_envs[i]; i++) { + opal_setenv(pushed_envs[i], pushed_vals[i], true, &environ); + } + } + return myenv; +} + +static void finalize(void) +{ + int i; + + if (NULL != pushed_envs) { + for (i=0; NULL != pushed_envs[i]; i++) { + opal_unsetenv(pushed_envs[i], &environ); + } + opal_argv_free(pushed_envs); + opal_argv_free(pushed_vals); + } +} diff --git a/orte/mca/schizo/slurm/schizo_slurm.h b/orte/mca/schizo/slurm/schizo_slurm.h new file mode 100644 index 0000000000..e9ee000821 --- /dev/null +++ b/orte/mca/schizo/slurm/schizo_slurm.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2016 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef _MCA_SCHIZO_SLURM_H_ +#define _MCA_SCHIZO_SLURM_H_ + +#include "orte_config.h" + +#include "orte/types.h" + +#include "opal/mca/base/base.h" +#include "orte/mca/schizo/schizo.h" + + +BEGIN_C_DECLS + +ORTE_MODULE_DECLSPEC extern orte_schizo_base_component_t mca_schizo_slurm_component; +extern orte_schizo_base_module_t orte_schizo_slurm_module; + +END_C_DECLS + +#endif /* MCA_SCHIZO_SLURM_H_ */ + diff --git a/orte/mca/schizo/slurm/schizo_slurm_component.c b/orte/mca/schizo/slurm/schizo_slurm_component.c new file mode 100644 index 0000000000..32d4bfbead --- /dev/null +++ b/orte/mca/schizo/slurm/schizo_slurm_component.c @@ -0,0 +1,52 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/types.h" +#include "opal/types.h" + +#include "opal/util/show_help.h" + +#include "orte/mca/schizo/schizo.h" +#include "schizo_slurm.h" + +static int component_query(mca_base_module_t **module, int *priority); + +/* + * Struct of function pointers and all that to let us be initialized + */ +orte_schizo_base_component_t mca_schizo_slurm_component = { + .base_version = { + MCA_SCHIZO_BASE_VERSION_1_0_0, + .mca_component_name = "slurm", + MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, + ORTE_RELEASE_VERSION), + .mca_query_component = component_query, + }, + .base_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, +}; + +static int component_query(mca_base_module_t **module, int *priority) +{ + /* disqualify ourselves if we are not an app or under slurm */ + if (!ORTE_PROC_IS_APP) { + *priority = 0; + *module = NULL; + return OPAL_ERROR; + } + + *module = (mca_base_module_t*)&orte_schizo_slurm_module; + *priority = 50; + return ORTE_SUCCESS; +} + diff --git a/orte/orted/orted_main.c b/orte/orted/orted_main.c index b514b1c843..a34e22489c 100644 --- a/orte/orted/orted_main.c +++ b/orte/orted/orted_main.c @@ -553,6 +553,7 @@ int orte_daemon(int argc, char *argv[]) app->app = strdup("singleton"); app->num_procs = 1; opal_pointer_array_add(jdata->apps, app); + jdata->num_apps = 1; /* setup a proc object for the singleton - since we * -must- be the HNP, and therefore we stored our diff --git a/orte/runtime/orte_finalize.c b/orte/runtime/orte_finalize.c index 4a8b3291cb..d60d802f2a 100644 --- a/orte/runtime/orte_finalize.c +++ b/orte/runtime/orte_finalize.c @@ -12,7 +12,7 @@ * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,6 +30,7 @@ #include "orte/mca/ess/ess.h" #include "orte/mca/ess/base/base.h" +#include "orte/mca/schizo/base/base.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/runtime.h" #include "orte/runtime/orte_locks.h" @@ -76,6 +77,10 @@ int orte_finalize(void) /* close the ess itself */ (void) mca_base_framework_close(&orte_ess_base_framework); + /* finalize and close schizo */ + orte_schizo.finalize(); + (void) mca_base_framework_close(&orte_schizo_base_framework); + /* cleanup the process info */ orte_proc_info_finalize(); diff --git a/orte/runtime/orte_init.c b/orte/runtime/orte_init.c index d4f56d8106..8e5ccb82be 100644 --- a/orte/runtime/orte_init.c +++ b/orte/runtime/orte_init.c @@ -13,7 +13,7 @@ * reserved. * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007-2008 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * @@ -45,6 +45,7 @@ #include "orte/mca/ess/base/base.h" #include "orte/mca/ess/ess.h" #include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/schizo/base/base.h" #include "orte/util/listener.h" #include "orte/util/name_fns.h" #include "orte/util/proc_info.h" @@ -202,6 +203,22 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) pmix_server_register_params(); } + /* open the SCHIZO framework as everyone needs it, and the + * ess will use it to help select its component */ + if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_schizo_base_framework, 0))) { + ORTE_ERROR_LOG(ret); + error = "orte_schizo_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_schizo_base_select())) { + error = "orte_schizo_base_select"; + goto error; + } + /* if we are an app, let SCHIZO help us determine our environment */ + if (ORTE_PROC_IS_APP) { + (void)orte_schizo.check_launch_environment(); + } + /* open the ESS and select the correct module for this environment */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_ess_base_framework, 0))) { ORTE_ERROR_LOG(ret); diff --git a/orte/tools/orterun/orterun.c b/orte/tools/orterun/orterun.c index 69373cf961..cbe7d07f48 100644 --- a/orte/tools/orterun/orterun.c +++ b/orte/tools/orterun/orterun.c @@ -822,6 +822,8 @@ int orterun(int argc, char *argv[]) * orterun */ orte_launch_environ = opal_argv_copy(environ); + opal_unsetenv(OPAL_MCA_PREFIX"ess", &orte_launch_environ); + opal_unsetenv(OPAL_MCA_PREFIX"pmix", &orte_launch_environ); /* Intialize our Open RTE environment * Set the flag telling orte_init that I am NOT a @@ -1106,6 +1108,9 @@ int orterun(int argc, char *argv[]) /* cleanup and leave */ orte_finalize(); + if (NULL != orte_launch_environ) { + opal_argv_free(orte_launch_environ); + } if (orte_debug_flag) { fprintf(stderr, "exiting with status %d\n", orte_exit_status); }