Fix a number of issues, some of which have lingered for a long time:
* provide a more reliable way of determining that a process is a singleton by leveraging the schizo framework. Add new components for slurm, alps, and orte to detect when we are in a managed environment, and if we have been launched by mpirun or a native launcher. Set the correct envars to control ess and pmix selection in each case. * change the relative priority of the pmix120 and pmix112 components to make pmix120 the default * fix singleton comm-spawn by correctly setting the num_apps field of the orte_job_t created by the daemon - this fixes a segfault in register_nspace on newly created daemons * ensure orterun doesn't propagate any ess or pmix directives in its environment * Cleanup a few valgrind issues and memory leaks * Fix a race condition that prevented the client from completing notification registrations (missing thread shift) * Ensure the shizo/alps component detects launch by mpirun
Этот коммит содержится в:
родитель
67e45028df
Коммит
011403c04a
@ -113,26 +113,26 @@ const opal_pmix_base_module_t opal_pmix_isolated_module = {
|
||||
.register_jobid = isolated_register_jobid
|
||||
};
|
||||
|
||||
static int pmix_init_count = 0;
|
||||
static opal_process_name_t pmix_pname;
|
||||
static int isolated_init_count = 0;
|
||||
static opal_process_name_t isolated_pname;
|
||||
|
||||
static int isolated_init(void)
|
||||
{
|
||||
int rc;
|
||||
opal_value_t kv;
|
||||
|
||||
++pmix_init_count;
|
||||
++isolated_init_count;
|
||||
|
||||
/* store our name in the opal_proc_t so that
|
||||
* debug messages will make sense - an upper
|
||||
* layer will eventually overwrite it, but that
|
||||
* won't do any harm */
|
||||
pmix_pname.jobid = 1;
|
||||
pmix_pname.vpid = 0;
|
||||
opal_proc_set_name(&pmix_pname);
|
||||
isolated_pname.jobid = 1;
|
||||
isolated_pname.vpid = 0;
|
||||
opal_proc_set_name(&isolated_pname);
|
||||
opal_output_verbose(10, opal_pmix_base_framework.framework_output,
|
||||
"%s pmix:isolated: assigned tmp name %d %d",
|
||||
OPAL_NAME_PRINT(pmix_pname),pmix_pname.jobid,pmix_pname.vpid);
|
||||
OPAL_NAME_PRINT(isolated_pname),isolated_pname.jobid,isolated_pname.vpid);
|
||||
|
||||
// setup hash table
|
||||
opal_pmix_base_hash_init();
|
||||
@ -248,11 +248,11 @@ err_exit:
|
||||
|
||||
static int isolated_fini(void)
|
||||
{
|
||||
if (0 == pmix_init_count) {
|
||||
if (0 == isolated_init_count) {
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
if (0 != --pmix_init_count) {
|
||||
if (0 != --isolated_init_count) {
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
opal_pmix_base_hash_finalize();
|
||||
@ -261,7 +261,7 @@ static int isolated_fini(void)
|
||||
|
||||
static int isolated_initialized(void)
|
||||
{
|
||||
if (0 < pmix_init_count) {
|
||||
if (0 < isolated_init_count) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
@ -323,11 +323,11 @@ static int isolated_put(opal_pmix_scope_t scope,
|
||||
"%s pmix:isolated isolated_put key %s scope %d\n",
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kv->key, scope);
|
||||
|
||||
if (!pmix_init_count) {
|
||||
if (!isolated_init_count) {
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
|
||||
rc = opal_pmix_base_store(&pmix_pname, kv);
|
||||
rc = opal_pmix_base_store(&isolated_pname, kv);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
@ -79,16 +79,8 @@ static int isolated_close(void)
|
||||
|
||||
static int isolated_component_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
/* if we are in a Singularity container, then we cannot spawn an
|
||||
* HNP and are truly on our own and cannot call comm_spawn or
|
||||
* any of its friends */
|
||||
if (NULL != getenv("SINGULARITY_CONTAINER")) {
|
||||
*priority = 100;
|
||||
*module = (mca_base_module_t *)&opal_pmix_isolated_module;
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
/* otherwise, ignore us */
|
||||
/* ignore us unless requested */
|
||||
*priority = 0;
|
||||
*module = NULL;
|
||||
return OPAL_ERR_TAKE_NEXT_OPTION;
|
||||
*module = (mca_base_module_t *)&opal_pmix_isolated_module;
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
@ -41,7 +41,7 @@ AC_DEFUN([MCA_opal_pmix_pmix112_CONFIG],[
|
||||
opal_pmix_pmix112_save_LDFLAGS=$LDFLAGS
|
||||
opal_pmix_pmix112_save_LIBS=$LIBS
|
||||
|
||||
opal_pmix_pmix112_args="--enable-embedded-mode --with-pmix-symbol-prefix=opal_pmix_pmix112_ --with-libevent-header=\\\"opal/mca/event/$opal_event_base_include\\\" --with-hwloc-header=\\\"$opal_hwloc_base_include\\\""
|
||||
opal_pmix_pmix112_args="--enable-embedded-mode --with-pmix-symbol-prefix=opal_pmix_pmix112_ --disable-visibility --with-libevent-header=\\\"opal/mca/event/$opal_event_base_include\\\" --with-hwloc-header=\\\"$opal_hwloc_base_include\\\""
|
||||
AS_IF([test "$enable_debug" = "yes"],
|
||||
[opal_pmix_pmix112_args="--enable-debug $opal_pmix_pmix112_args"
|
||||
CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS -g"],
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -95,7 +95,7 @@ static int pmix112_component_query(mca_base_module_t **module, int *priority)
|
||||
if (NULL != (t = getenv("PMIX_SERVER_URI")) ||
|
||||
NULL != (id = getenv("PMIX_ID"))) {
|
||||
/* if PMIx is present, then we are a client and need to use it */
|
||||
*priority = 100;
|
||||
*priority = 80;
|
||||
} else {
|
||||
/* we could be a server, so we still need to be considered */
|
||||
*priority = 5;
|
||||
|
@ -41,7 +41,7 @@ AC_DEFUN([MCA_opal_pmix_pmix120_CONFIG],[
|
||||
opal_pmix_pmix120_save_LDFLAGS=$LDFLAGS
|
||||
opal_pmix_pmix120_save_LIBS=$LIBS
|
||||
|
||||
opal_pmix_pmix120_args="--enable-embedded-mode --with-pmix-symbol-prefix=opal_pmix_pmix120_ --with-libevent-header=\\\"opal/mca/event/$opal_event_base_include\\\" --with-hwloc-header=\\\"$opal_hwloc_base_include\\\""
|
||||
opal_pmix_pmix120_args="--enable-embedded-mode --with-pmix-symbol-prefix=opal_pmix_pmix120_ --disable-visibility --with-libevent-header=\\\"opal/mca/event/$opal_event_base_include\\\" --with-hwloc-header=\\\"$opal_hwloc_base_include\\\""
|
||||
AS_IF([test "$enable_debug" = "yes"],
|
||||
[opal_pmix_pmix120_args="--enable-debug $opal_pmix_pmix120_args"
|
||||
CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS -g"],
|
||||
|
@ -38,16 +38,6 @@
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/* internally used object for transferring data
|
||||
* to/from the server and for storing in the
|
||||
* hash tables */
|
||||
typedef struct {
|
||||
pmix_list_item_t super;
|
||||
char *key;
|
||||
pmix_value_t *value;
|
||||
} pmix_kval_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_kval_t);
|
||||
|
||||
/* A non-API function for something that happens in a number
|
||||
* of places throughout the code base - transferring a value to
|
||||
* another pmix_value_t structure
|
||||
|
@ -62,6 +62,7 @@ static const char pmix_version_string[] = PMIX_VERSION;
|
||||
#include "src/util/progress_threads.h"
|
||||
#include "src/usock/usock.h"
|
||||
#include "src/sec/pmix_sec.h"
|
||||
#include "src/include/pmix_globals.h"
|
||||
|
||||
#include "pmix_client_ops.h"
|
||||
|
||||
@ -279,6 +280,7 @@ int PMIx_Init(pmix_proc_t *proc)
|
||||
}
|
||||
|
||||
/* default to our internal errhandler */
|
||||
errhandler_ref = 0;
|
||||
pmix_add_errhandler(myerrhandler, NULL, 0, &errhandler_ref);
|
||||
/* see if debug is requested */
|
||||
if (NULL != (evar = getenv("PMIX_DEBUG"))) {
|
||||
@ -631,7 +633,7 @@ pmix_status_t PMIx_Put(pmix_scope_t scope, const char key[], pmix_value_t *val)
|
||||
cb->value = val;
|
||||
|
||||
/* pass this into the event library for thread protection */
|
||||
PMIX_THREAD_SHIFT(cb, _putfn);
|
||||
PMIX_THREADSHIFT(cb, _putfn);
|
||||
|
||||
/* wait for the result */
|
||||
PMIX_WAIT_FOR_COMPLETION(cb->active);
|
||||
@ -715,7 +717,7 @@ pmix_status_t PMIx_Commit(void)
|
||||
cb->active = true;
|
||||
|
||||
/* pass this into the event library for thread protection */
|
||||
PMIX_THREAD_SHIFT(cb, _commitfn);
|
||||
PMIX_THREADSHIFT(cb, _commitfn);
|
||||
|
||||
/* wait for the result */
|
||||
PMIX_WAIT_FOR_COMPLETION(cb->active);
|
||||
@ -792,7 +794,7 @@ pmix_status_t PMIx_Resolve_peers(const char *nodename, const char *nspace,
|
||||
}
|
||||
|
||||
/* pass this into the event library for thread protection */
|
||||
PMIX_THREAD_SHIFT(cb, _peersfn);
|
||||
PMIX_THREADSHIFT(cb, _peersfn);
|
||||
|
||||
/* wait for the result */
|
||||
PMIX_WAIT_FOR_COMPLETION(cb->active);
|
||||
@ -850,7 +852,7 @@ pmix_status_t PMIx_Resolve_nodes(const char *nspace, char **nodelist)
|
||||
}
|
||||
|
||||
/* pass this into the event library for thread protection */
|
||||
PMIX_THREAD_SHIFT(cb, _nodesfn);
|
||||
PMIX_THREADSHIFT(cb, _nodesfn);
|
||||
|
||||
/* wait for the result */
|
||||
PMIX_WAIT_FOR_COMPLETION(cb->active);
|
||||
@ -1297,51 +1299,50 @@ static void regevents_cbfunc(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr,
|
||||
PMIX_RELEASE(cb);
|
||||
}
|
||||
|
||||
void pmix_client_register_errhandler(pmix_info_t info[], size_t ninfo,
|
||||
pmix_notification_fn_t errhandler,
|
||||
pmix_errhandler_reg_cbfunc_t cbfunc,
|
||||
void *cbdata)
|
||||
static void reg_errhandler(int sd, short args, void *cbdata)
|
||||
{
|
||||
/* add err handler, process info keys and register for events and call the callback */
|
||||
pmix_shift_caddy_t *cd = (pmix_shift_caddy_t*)cbdata;
|
||||
int index = 0;
|
||||
pmix_buffer_t *msg;
|
||||
pmix_cb_t *cb;
|
||||
pmix_status_t rc;
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: register errhandler with %d infos", (int)ninfo);
|
||||
"pmix: register errhandler with %d infos", (int)cd->ninfo);
|
||||
|
||||
/* check if this handler is already registered if so return error */
|
||||
if (PMIX_EXISTS == (rc = pmix_lookup_errhandler(info, ninfo, &index))) {
|
||||
if (PMIX_EXISTS == (rc = pmix_lookup_errhandler(cd->info, cd->ninfo, &index))) {
|
||||
/* complete request with error status and return its original reference */
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: register errhandler - already registered");
|
||||
cbfunc(PMIX_EXISTS, index, cbdata);
|
||||
cd->cbfunc.errregcbfn(PMIX_EXISTS, index, cd->cbdata);
|
||||
} else if (PMIX_ERR_GRP_FOUND == rc) {
|
||||
/* just acknowledge it */
|
||||
cbfunc(PMIX_SUCCESS, index, cbdata);
|
||||
} else if (PMIX_ERR_DFLT_FOUND == rc && NULL == info) {
|
||||
cd->cbfunc.errregcbfn(PMIX_SUCCESS, index, cd->cbdata);
|
||||
} else if (PMIX_ERR_DFLT_FOUND == rc && NULL == cd->info) {
|
||||
/* if they are registering a default errhandler, then
|
||||
* overwrite the existing one with it - the index will
|
||||
* contain its location */
|
||||
pmix_add_errhandler(errhandler, info, ninfo, &index);
|
||||
rc = pmix_add_errhandler(cd->err, cd->info, cd->ninfo, &index);
|
||||
cd->cbfunc.errregcbfn(rc, index, cd->cbdata);
|
||||
} else {
|
||||
/* need to add this errhandler */
|
||||
if (PMIX_SUCCESS != (rc = pmix_add_errhandler(errhandler, info, ninfo, &index))) {
|
||||
if (PMIX_SUCCESS != (rc = pmix_add_errhandler(cd->err, cd->info, cd->ninfo, &index))) {
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: register errhandler - error status rc=%d", rc);
|
||||
/* complete request with error*/
|
||||
cbfunc(rc, index, cbdata);
|
||||
cd->cbfunc.errregcbfn(rc, index, cd->cbdata);
|
||||
} else {
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"pmix: register errhandler - added index=%d, ninfo =%lu", index, ninfo);
|
||||
"pmix: register errhandler - added index=%d, ninfo =%lu", index, cd->ninfo);
|
||||
msg = PMIX_NEW(pmix_buffer_t);
|
||||
if (PMIX_SUCCESS != (rc = pack_regevents(msg, PMIX_REGEVENTS_CMD, info, ninfo))) {
|
||||
if (PMIX_SUCCESS != (rc = pack_regevents(msg, PMIX_REGEVENTS_CMD, cd->info, cd->ninfo))) {
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: register errhandler - pack events failed status=%d", rc);
|
||||
PMIX_RELEASE(msg);
|
||||
pmix_remove_errhandler(index);
|
||||
cbfunc(PMIX_ERR_PACK_FAILURE, -1, cbdata);
|
||||
cd->cbfunc.errregcbfn(PMIX_ERR_PACK_FAILURE, -1, cd->cbdata);
|
||||
} else {
|
||||
/* create a callback object as we need to pass it to the
|
||||
* recv routine so we know which callback to use when
|
||||
@ -1349,8 +1350,8 @@ void pmix_client_register_errhandler(pmix_info_t info[], size_t ninfo,
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"pmix: register errhandler - pack events success status=%d", rc);
|
||||
cb = PMIX_NEW(pmix_cb_t);
|
||||
cb->errreg_cbfunc = cbfunc;
|
||||
cb->cbdata = cbdata;
|
||||
cb->errreg_cbfunc = cd->cbfunc.errregcbfn;
|
||||
cb->cbdata = cd->cbdata;
|
||||
cb->errhandler_ref = index;
|
||||
/* push the message into our event base to send to the server */
|
||||
PMIX_ACTIVATE_SEND_RECV(&pmix_client_globals.myserver, msg, regevents_cbfunc, cb);
|
||||
@ -1359,6 +1360,28 @@ void pmix_client_register_errhandler(pmix_info_t info[], size_t ninfo,
|
||||
}
|
||||
}
|
||||
|
||||
void pmix_client_register_errhandler(pmix_info_t info[], size_t ninfo,
|
||||
pmix_notification_fn_t errhandler,
|
||||
pmix_errhandler_reg_cbfunc_t cbfunc,
|
||||
void *cbdata)
|
||||
{
|
||||
pmix_shift_caddy_t *cd;
|
||||
|
||||
/* need to thread shift this request */
|
||||
cd = PMIX_NEW(pmix_shift_caddy_t);
|
||||
cd->info = info;
|
||||
cd->ninfo = ninfo;
|
||||
cd->err = errhandler;
|
||||
cd->cbfunc.errregcbfn = cbfunc;
|
||||
cd->cbdata = cbdata;
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix_client_register_errhandler shifting to server thread");
|
||||
|
||||
PMIX_THREADSHIFT(cd, reg_errhandler);
|
||||
}
|
||||
|
||||
|
||||
static void deregevents_cbfunc(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr,
|
||||
pmix_buffer_t *buf, void *cbdata)
|
||||
{
|
||||
@ -1388,41 +1411,55 @@ static void deregevents_cbfunc(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr,
|
||||
PMIX_RELEASE(cb);
|
||||
}
|
||||
|
||||
void pmix_client_deregister_errhandler(int errhandler_ref,
|
||||
pmix_op_cbfunc_t cbfunc,
|
||||
void *cbdata)
|
||||
static void dereg_errhandler(int sd, short args, void *cbdata)
|
||||
{
|
||||
pmix_shift_caddy_t *cd = (pmix_shift_caddy_t*)cbdata;
|
||||
pmix_status_t rc;
|
||||
pmix_error_reg_info_t *errreg;
|
||||
pmix_buffer_t *msg;
|
||||
pmix_cb_t *cb;
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix_client_deregister_errhandler errhandler_ref = %d", errhandler_ref);
|
||||
"pmix_client_deregister_errhandler errhandler_ref = %d", cd->ref);
|
||||
|
||||
errreg = (pmix_error_reg_info_t *)pmix_pointer_array_get_item(&pmix_globals.errregs, errhandler_ref);
|
||||
errreg = (pmix_error_reg_info_t *)pmix_pointer_array_get_item(&pmix_globals.errregs, cd->ref);
|
||||
if (NULL != errreg ) {
|
||||
msg = PMIX_NEW(pmix_buffer_t);
|
||||
if (PMIX_SUCCESS != (rc = pack_regevents(msg, PMIX_DEREGEVENTS_CMD, errreg->info, errreg->ninfo))) {
|
||||
PMIX_RELEASE(msg);
|
||||
pmix_remove_errhandler(errhandler_ref);
|
||||
cbfunc(PMIX_ERR_PACK_FAILURE, cbdata);
|
||||
pmix_remove_errhandler(cd->ref);
|
||||
cd->cbfunc.opcbfn(PMIX_ERR_PACK_FAILURE, cd->cbdata);
|
||||
} else {
|
||||
/* create a callback object as we need to pass it to the
|
||||
* recv routine so we know which callback to use when
|
||||
* the server acks/nacks the register events request*/
|
||||
cb = PMIX_NEW(pmix_cb_t);
|
||||
cb->op_cbfunc = cbfunc;
|
||||
cb->cbdata = cbdata;
|
||||
cb->errhandler_ref = errhandler_ref;
|
||||
cb->op_cbfunc = cd->cbfunc.opcbfn;
|
||||
cb->cbdata = cd->cbdata;
|
||||
cb->errhandler_ref = cd->ref;
|
||||
/* push the message into our event base to send to the server */
|
||||
PMIX_ACTIVATE_SEND_RECV(&pmix_client_globals.myserver, msg, deregevents_cbfunc, cb);
|
||||
}
|
||||
} else {
|
||||
cbfunc(PMIX_ERR_NOT_FOUND, cbdata);
|
||||
cd->cbfunc.opcbfn(PMIX_ERR_NOT_FOUND, cd->cbdata);
|
||||
}
|
||||
OBJ_RELEASE(cd);
|
||||
}
|
||||
|
||||
void pmix_client_deregister_errhandler(int errhandler_ref,
|
||||
pmix_op_cbfunc_t cbfunc,
|
||||
void *cbdata)
|
||||
{
|
||||
pmix_shift_caddy_t *cd;
|
||||
|
||||
/* need to thread shift this request */
|
||||
cd = PMIX_NEW(pmix_shift_caddy_t);
|
||||
cd->cbfunc.opcbfn = cbfunc;
|
||||
cd->cbdata = cbdata;
|
||||
cd->ref = errhandler_ref;
|
||||
PMIX_THREADSHIFT(cd, dereg_errhandler);
|
||||
}
|
||||
|
||||
static void notifyerror_cbfunc(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr,
|
||||
pmix_buffer_t *buf, void *cbdata)
|
||||
{
|
||||
|
@ -164,7 +164,7 @@ pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const char *key,
|
||||
cb->ninfo = ninfo;
|
||||
cb->value_cbfunc = cbfunc;
|
||||
cb->cbdata = cbdata;
|
||||
PMIX_THREAD_SHIFT(cb, _getnbfn);
|
||||
PMIX_THREADSHIFT(cb, _getnbfn);
|
||||
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
@ -175,3 +175,22 @@ static void errdes(pmix_error_reg_info_t *p)
|
||||
PMIX_CLASS_INSTANCE(pmix_error_reg_info_t,
|
||||
pmix_object_t,
|
||||
errcon, errdes);
|
||||
|
||||
static void scon(pmix_shift_caddy_t *p)
|
||||
{
|
||||
p->active = false;
|
||||
p->kv = NULL;
|
||||
p->cbfunc.relfn = NULL;
|
||||
p->cbfunc.errregcbfn = NULL;
|
||||
p->cbfunc.opcbfn = NULL;
|
||||
p->cbdata = NULL;
|
||||
}
|
||||
static void scdes(pmix_shift_caddy_t *p)
|
||||
{
|
||||
if (NULL != p->kv) {
|
||||
PMIX_RELEASE(p->kv);
|
||||
}
|
||||
}
|
||||
PMIX_CLASS_INSTANCE(pmix_shift_caddy_t,
|
||||
pmix_object_t,
|
||||
scon, scdes);
|
||||
|
@ -38,38 +38,96 @@
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
#define PMIX_MAX_CRED_SIZE 131072 // set max at 128kbytes
|
||||
#define PMIX_MAX_ERROR_REGISTRATIONS 5 // maximum number of error handlers that can be registered
|
||||
#define PMIX_MAX_CRED_SIZE 131072 // set max at 128kbytes
|
||||
#define PMIX_MAX_ERROR_REGISTRATIONS 128 // maximum number of error handlers that can be registered
|
||||
|
||||
/* define a structure for tracking error registrations */
|
||||
typedef struct {
|
||||
pmix_object_t super;
|
||||
bool sglhdlr; // registers a specific error status handler
|
||||
pmix_notification_fn_t errhandler; /* registered err handler callback fn */
|
||||
pmix_info_t *info; /* error info keys registered with the handler */
|
||||
size_t ninfo; /* size of info */
|
||||
} pmix_error_reg_info_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_error_reg_info_t);
|
||||
/**** ENUM DEFINITIONS ****/
|
||||
/* define a command type for communicating to the
|
||||
* pmix server */
|
||||
#define PMIX_CMD PMIX_UINT32
|
||||
|
||||
/* define a global construct that includes values that must be shared
|
||||
* between various parts of the code library. Both the client
|
||||
* and server libraries must instance this structure */
|
||||
/* define some commands */
|
||||
typedef enum {
|
||||
PMIX_REQ_CMD,
|
||||
PMIX_ABORT_CMD,
|
||||
PMIX_COMMIT_CMD,
|
||||
PMIX_FENCENB_CMD,
|
||||
PMIX_GETNB_CMD,
|
||||
PMIX_FINALIZE_CMD,
|
||||
PMIX_PUBLISHNB_CMD,
|
||||
PMIX_LOOKUPNB_CMD,
|
||||
PMIX_UNPUBLISHNB_CMD,
|
||||
PMIX_SPAWNNB_CMD,
|
||||
PMIX_CONNECTNB_CMD,
|
||||
PMIX_DISCONNECTNB_CMD,
|
||||
PMIX_NOTIFY_CMD,
|
||||
PMIX_REGEVENTS_CMD,
|
||||
PMIX_DEREGEVENTS_CMD,
|
||||
} pmix_cmd_t;
|
||||
|
||||
/* define a set of flags to direct collection
|
||||
* of data during operations */
|
||||
typedef enum {
|
||||
PMIX_COLLECT_INVALID = -1,
|
||||
PMIX_COLLECT_NO,
|
||||
PMIX_COLLECT_YES,
|
||||
PMIX_COLLECT_MAX
|
||||
} pmix_collect_t;
|
||||
|
||||
|
||||
/**** MESSAGING STRUCTURES ****/
|
||||
/* header for messages */
|
||||
typedef struct {
|
||||
int init_cntr; // #times someone called Init - #times called Finalize
|
||||
pmix_proc_t myid;
|
||||
uid_t uid; // my effective uid
|
||||
gid_t gid; // my effective gid
|
||||
int pindex;
|
||||
pmix_event_base_t *evbase;
|
||||
int debug_output;
|
||||
pmix_pointer_array_t errregs; // my error handler registrations.
|
||||
bool server;
|
||||
bool connected;
|
||||
pmix_list_t nspaces; // list of pmix_nspace_t for the nspaces we know about
|
||||
pmix_buffer_t *cache_local; // data PUT by me to local scope
|
||||
pmix_buffer_t *cache_remote; // data PUT by me to remote scope
|
||||
} pmix_globals_t;
|
||||
uint32_t tag;
|
||||
size_t nbytes;
|
||||
} pmix_usock_hdr_t;
|
||||
|
||||
/* internally used object for transferring data
|
||||
* to/from the server and for storing in the
|
||||
* hash tables */
|
||||
typedef struct {
|
||||
pmix_list_item_t super;
|
||||
char *key;
|
||||
pmix_value_t *value;
|
||||
} pmix_kval_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_kval_t);
|
||||
|
||||
// forward declaration
|
||||
struct pmix_peer_t;
|
||||
|
||||
/* internally used cbfunc */
|
||||
typedef void (*pmix_usock_cbfunc_t)(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr,
|
||||
pmix_buffer_t *buf, void *cbdata);
|
||||
|
||||
/* usock structure for sending a message */
|
||||
typedef struct {
|
||||
pmix_list_item_t super;
|
||||
pmix_event_t ev;
|
||||
pmix_usock_hdr_t hdr;
|
||||
pmix_buffer_t *data;
|
||||
bool hdr_sent;
|
||||
char *sdptr;
|
||||
size_t sdbytes;
|
||||
} pmix_usock_send_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_usock_send_t);
|
||||
|
||||
/* usock structure for recving a message */
|
||||
typedef struct {
|
||||
pmix_list_item_t super;
|
||||
pmix_event_t ev;
|
||||
struct pmix_peer_t *peer;
|
||||
int sd;
|
||||
pmix_usock_hdr_t hdr;
|
||||
char *data;
|
||||
bool hdr_recvd;
|
||||
char *rdptr;
|
||||
size_t rdbytes;
|
||||
} pmix_usock_recv_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_usock_recv_t);
|
||||
|
||||
|
||||
/**** PEER STRUCTURES ****/
|
||||
/* objects for tracking active nspaces */
|
||||
typedef struct {
|
||||
pmix_object_t super;
|
||||
@ -105,6 +163,39 @@ typedef struct pmix_rank_info_t {
|
||||
} pmix_rank_info_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_rank_info_t);
|
||||
|
||||
/* object for tracking peers - each peer can have multiple
|
||||
* connections. This can occur if the initial app executes
|
||||
* a fork/exec, and the child initiates its own connection
|
||||
* back to the PMIx server. Thus, the trackers should be "indexed"
|
||||
* by the socket, not the process nspace/rank */
|
||||
typedef struct pmix_peer_t {
|
||||
pmix_object_t super;
|
||||
pmix_rank_info_t *info;
|
||||
int proc_cnt;
|
||||
void *server_object;
|
||||
int index;
|
||||
int sd;
|
||||
pmix_event_t send_event; /**< registration with event thread for send events */
|
||||
bool send_ev_active;
|
||||
pmix_event_t recv_event; /**< registration with event thread for recv events */
|
||||
bool recv_ev_active;
|
||||
pmix_list_t send_queue; /**< list of messages to send */
|
||||
pmix_usock_send_t *send_msg; /**< current send in progress */
|
||||
pmix_usock_recv_t *recv_msg; /**< current recv in progress */
|
||||
} pmix_peer_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_peer_t);
|
||||
|
||||
|
||||
/* define a structure for tracking error registrations */
|
||||
typedef struct {
|
||||
pmix_object_t super;
|
||||
bool sglhdlr; // registers a specific error status handler
|
||||
pmix_notification_fn_t errhandler; /* registered err handler callback fn */
|
||||
pmix_info_t *info; /* error info keys registered with the handler */
|
||||
size_t ninfo; /* size of info */
|
||||
} pmix_error_reg_info_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_error_reg_info_t);
|
||||
|
||||
typedef struct {
|
||||
pmix_list_item_t super;
|
||||
char *name; // name of the node
|
||||
@ -112,6 +203,112 @@ typedef struct {
|
||||
} pmix_nrec_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_nrec_t);
|
||||
|
||||
/* define an object for moving a send
|
||||
* request into the server's event base */
|
||||
typedef struct {
|
||||
pmix_object_t super;
|
||||
int sd;
|
||||
} pmix_snd_caddy_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_snd_caddy_t);
|
||||
|
||||
/* define an object for moving a send
|
||||
* request into the server's event base */
|
||||
typedef struct {
|
||||
pmix_list_item_t super;
|
||||
pmix_usock_hdr_t hdr;
|
||||
pmix_peer_t *peer;
|
||||
pmix_snd_caddy_t snd;
|
||||
} pmix_server_caddy_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_server_caddy_t);
|
||||
|
||||
/* define a tracker for collective operations */
|
||||
typedef struct {
|
||||
pmix_list_item_t super;
|
||||
pmix_cmd_t type;
|
||||
pmix_proc_t *pcs; // copy of the original array of participants
|
||||
size_t npcs; // number of procs in the array
|
||||
volatile bool active; // flag for waiting for completion
|
||||
bool def_complete; // all local procs have been registered and the trk definition is complete
|
||||
pmix_list_t ranks; // list of pmix_rank_info_t of the local participants
|
||||
pmix_list_t local_cbs; // list of pmix_server_caddy_t for sending result to the local participants
|
||||
uint32_t nlocal; // number of local participants
|
||||
uint32_t local_cnt; // number of local participants who have contributed
|
||||
pmix_info_t *info; // array of info structs
|
||||
size_t ninfo; // number of info structs in array
|
||||
pmix_collect_t collect_type; // whether or not data is to be returned at completion
|
||||
pmix_modex_cbfunc_t modexcbfunc;
|
||||
pmix_op_cbfunc_t op_cbfunc;
|
||||
} pmix_server_trkr_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_server_trkr_t);
|
||||
|
||||
|
||||
/**** THREAD-RELATED ****/
|
||||
/* define a caddy for thread-shifting operations */
|
||||
typedef struct {
|
||||
pmix_object_t super;
|
||||
pmix_event_t ev;
|
||||
volatile bool active;
|
||||
pmix_status_t status;
|
||||
const char *nspace;
|
||||
int rank;
|
||||
const char *data;
|
||||
size_t ndata;
|
||||
const char *key;
|
||||
pmix_info_t *info;
|
||||
size_t ninfo;
|
||||
pmix_notification_fn_t err;
|
||||
pmix_kval_t *kv;
|
||||
pmix_value_t *vptr;
|
||||
pmix_server_caddy_t *cd;
|
||||
pmix_server_trkr_t *tracker;
|
||||
union {
|
||||
pmix_release_cbfunc_t relfn;
|
||||
pmix_errhandler_reg_cbfunc_t errregcbfn;
|
||||
pmix_op_cbfunc_t opcbfn;
|
||||
}cbfunc;
|
||||
void *cbdata;
|
||||
int ref;
|
||||
} pmix_shift_caddy_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_shift_caddy_t);
|
||||
|
||||
#define PMIX_THREADSHIFT(r, c) \
|
||||
do { \
|
||||
(r)->active = true; \
|
||||
event_assign(&((r)->ev), pmix_globals.evbase, \
|
||||
-1, EV_WRITE, (c), (r)); \
|
||||
event_active(&((r)->ev), EV_WRITE, 1); \
|
||||
} while(0);
|
||||
|
||||
|
||||
#define PMIX_WAIT_FOR_COMPLETION(a) \
|
||||
do { \
|
||||
while ((a)) { \
|
||||
usleep(10); \
|
||||
} \
|
||||
} while (0);
|
||||
|
||||
|
||||
/**** GLOBAL STORAGE ****/
|
||||
/* define a global construct that includes values that must be shared
|
||||
* between various parts of the code library. Both the client
|
||||
* and server libraries must instance this structure */
|
||||
typedef struct {
|
||||
int init_cntr; // #times someone called Init - #times called Finalize
|
||||
pmix_proc_t myid;
|
||||
uid_t uid; // my effective uid
|
||||
gid_t gid; // my effective gid
|
||||
int pindex;
|
||||
pmix_event_base_t *evbase;
|
||||
int debug_output;
|
||||
pmix_pointer_array_t errregs; // my error handler registrations.
|
||||
bool server;
|
||||
bool connected;
|
||||
pmix_list_t nspaces; // list of pmix_nspace_t for the nspaces we know about
|
||||
pmix_buffer_t *cache_local; // data PUT by me to local scope
|
||||
pmix_buffer_t *cache_remote; // data PUT by me to remote scope
|
||||
} pmix_globals_t;
|
||||
|
||||
|
||||
/* initialize the pmix_global structure */
|
||||
void pmix_globals_init(void);
|
||||
|
||||
|
@ -80,63 +80,6 @@ PMIX_CLASS_INSTANCE(pmix_usock_queue_t,
|
||||
pmix_object_t,
|
||||
NULL, NULL);
|
||||
|
||||
/* define a caddy for thread-shifting operations when
|
||||
* the host server executes a callback to us */
|
||||
typedef struct {
|
||||
pmix_object_t super;
|
||||
pmix_event_t ev;
|
||||
volatile bool active;
|
||||
pmix_status_t status;
|
||||
const char *nspace;
|
||||
int rank;
|
||||
const char *data;
|
||||
size_t ndata;
|
||||
const char *key;
|
||||
pmix_info_t *info;
|
||||
size_t ninfo;
|
||||
pmix_notification_fn_t err;
|
||||
pmix_kval_t *kv;
|
||||
pmix_value_t *vptr;
|
||||
pmix_server_caddy_t *cd;
|
||||
pmix_server_trkr_t *tracker;
|
||||
union {
|
||||
pmix_release_cbfunc_t relfn;
|
||||
pmix_errhandler_reg_cbfunc_t errregcbfn;
|
||||
pmix_op_cbfunc_t opcbfn;
|
||||
}cbfunc;
|
||||
void *cbdata;
|
||||
int ref;
|
||||
} pmix_shift_caddy_t;
|
||||
static void scon(pmix_shift_caddy_t *p)
|
||||
{
|
||||
p->active = false;
|
||||
p->kv = NULL;
|
||||
p->cbfunc.relfn = NULL;
|
||||
p->cbfunc.errregcbfn = NULL;
|
||||
p->cbfunc.opcbfn = NULL;
|
||||
p->cbdata = NULL;
|
||||
}
|
||||
static void scdes(pmix_shift_caddy_t *p)
|
||||
{
|
||||
if (NULL != p->kv) {
|
||||
PMIX_RELEASE(p->kv);
|
||||
}
|
||||
}
|
||||
PMIX_CLASS_INSTANCE(pmix_shift_caddy_t,
|
||||
pmix_object_t,
|
||||
scon, scdes);
|
||||
|
||||
|
||||
#define PMIX_THREADSHIFT(r, c) \
|
||||
do { \
|
||||
(r)->active = true; \
|
||||
event_assign(&((r)->ev), pmix_globals.evbase, \
|
||||
-1, EV_WRITE, (c), (r)); \
|
||||
event_priority_set(&((r)->ev), 0); \
|
||||
event_active(&((r)->ev), EV_WRITE, 1); \
|
||||
} while(0);
|
||||
|
||||
|
||||
/* queue a message to be sent to one of our procs - must
|
||||
* provide the following params:
|
||||
*
|
||||
@ -1336,12 +1279,12 @@ static void dereg_errhandler(int sd, short args, void *cbdata)
|
||||
if (NULL != cd->cbfunc.opcbfn) {
|
||||
cd->cbfunc.opcbfn(rc, cd->cbdata);
|
||||
}
|
||||
cd->active = false;
|
||||
OBJ_RELEASE(cd);
|
||||
}
|
||||
|
||||
void pmix_server_deregister_errhandler(int errhandler_ref,
|
||||
pmix_op_cbfunc_t cbfunc,
|
||||
void *cbdata)
|
||||
pmix_op_cbfunc_t cbfunc,
|
||||
void *cbdata)
|
||||
{
|
||||
pmix_shift_caddy_t *cd;
|
||||
|
||||
@ -1351,9 +1294,6 @@ void pmix_server_deregister_errhandler(int errhandler_ref,
|
||||
cd->cbdata = cbdata;
|
||||
cd->ref = errhandler_ref;
|
||||
PMIX_THREADSHIFT(cd, dereg_errhandler);
|
||||
|
||||
PMIX_WAIT_FOR_COMPLETION(cd->active);
|
||||
PMIX_RELEASE(cd);
|
||||
}
|
||||
|
||||
static void _store_internal(int sd, short args, void *cbdata)
|
||||
|
@ -20,52 +20,6 @@
|
||||
#include "src/usock/usock.h"
|
||||
#include "src/util/hash.h"
|
||||
|
||||
/* define an object for moving a send
|
||||
* request into the server's event base */
|
||||
typedef struct {
|
||||
pmix_object_t super;
|
||||
int sd;
|
||||
} pmix_snd_caddy_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_snd_caddy_t);
|
||||
|
||||
|
||||
/* define an object for moving a send
|
||||
* request into the server's event base */
|
||||
typedef struct {
|
||||
pmix_list_item_t super;
|
||||
pmix_usock_hdr_t hdr;
|
||||
pmix_peer_t *peer;
|
||||
pmix_snd_caddy_t snd;
|
||||
} pmix_server_caddy_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_server_caddy_t);
|
||||
|
||||
typedef enum {
|
||||
PMIX_COLLECT_INVALID = -1,
|
||||
PMIX_COLLECT_NO,
|
||||
PMIX_COLLECT_YES,
|
||||
PMIX_COLLECT_MAX
|
||||
} pmix_collect_t;
|
||||
|
||||
/* define a tracker for collective operations */
|
||||
typedef struct {
|
||||
pmix_list_item_t super;
|
||||
pmix_cmd_t type;
|
||||
pmix_proc_t *pcs; // copy of the original array of participants
|
||||
size_t npcs; // number of procs in the array
|
||||
volatile bool active; // flag for waiting for completion
|
||||
bool def_complete; // all local procs have been registered and the trk definition is complete
|
||||
pmix_list_t ranks; // list of pmix_rank_info_t of the local participants
|
||||
pmix_list_t local_cbs; // list of pmix_server_caddy_t for sending result to the local participants
|
||||
uint32_t nlocal; // number of local participants
|
||||
uint32_t local_cnt; // number of local participants who have contributed
|
||||
pmix_info_t *info; // array of info structs
|
||||
size_t ninfo; // number of info structs in array
|
||||
pmix_collect_t collect_type; // whether or not data is to be returned at completion
|
||||
pmix_modex_cbfunc_t modexcbfunc;
|
||||
pmix_op_cbfunc_t op_cbfunc;
|
||||
} pmix_server_trkr_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_server_trkr_t);
|
||||
|
||||
typedef struct {
|
||||
pmix_object_t super;
|
||||
pmix_event_t ev;
|
||||
@ -135,7 +89,7 @@ typedef struct {
|
||||
pmix_object_t super;
|
||||
pmix_event_t ev;
|
||||
int sd;
|
||||
struct sockaddr addr;
|
||||
struct sockaddr_storage addr;
|
||||
} pmix_pending_connection_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_pending_connection_t);
|
||||
|
||||
|
@ -13,7 +13,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Artem Y. Polyakov <artpol84@gmail.com>.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
@ -54,73 +54,12 @@
|
||||
#endif
|
||||
#include PMIX_EVENT_HEADER
|
||||
|
||||
#include "src/include/pmix_globals.h"
|
||||
#include "src/buffer_ops/buffer_ops.h"
|
||||
#include "src/class/pmix_hash_table.h"
|
||||
#include "src/class/pmix_list.h"
|
||||
|
||||
/* define a command type for communicating to the
|
||||
* pmix server */
|
||||
#define PMIX_CMD PMIX_UINT32
|
||||
|
||||
/* define some commands */
|
||||
typedef enum {
|
||||
PMIX_REQ_CMD,
|
||||
PMIX_ABORT_CMD,
|
||||
PMIX_COMMIT_CMD,
|
||||
PMIX_FENCENB_CMD,
|
||||
PMIX_GETNB_CMD,
|
||||
PMIX_FINALIZE_CMD,
|
||||
PMIX_PUBLISHNB_CMD,
|
||||
PMIX_LOOKUPNB_CMD,
|
||||
PMIX_UNPUBLISHNB_CMD,
|
||||
PMIX_SPAWNNB_CMD,
|
||||
PMIX_CONNECTNB_CMD,
|
||||
PMIX_DISCONNECTNB_CMD,
|
||||
PMIX_NOTIFY_CMD,
|
||||
PMIX_REGEVENTS_CMD,
|
||||
PMIX_DEREGEVENTS_CMD,
|
||||
} pmix_cmd_t;
|
||||
|
||||
|
||||
/* header for messages */
|
||||
typedef struct {
|
||||
int pindex;
|
||||
uint32_t tag;
|
||||
size_t nbytes;
|
||||
} pmix_usock_hdr_t;
|
||||
|
||||
// forward declaration
|
||||
struct pmix_peer_t;
|
||||
|
||||
/* internally used cbfunc */
|
||||
typedef void (*pmix_usock_cbfunc_t)(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr,
|
||||
pmix_buffer_t *buf, void *cbdata);
|
||||
|
||||
/* usock structure for sending a message */
|
||||
typedef struct {
|
||||
pmix_list_item_t super;
|
||||
pmix_event_t ev;
|
||||
pmix_usock_hdr_t hdr;
|
||||
pmix_buffer_t *data;
|
||||
bool hdr_sent;
|
||||
char *sdptr;
|
||||
size_t sdbytes;
|
||||
} pmix_usock_send_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_usock_send_t);
|
||||
|
||||
/* usock structure for recving a message */
|
||||
typedef struct {
|
||||
pmix_list_item_t super;
|
||||
pmix_event_t ev;
|
||||
struct pmix_peer_t *peer;
|
||||
int sd;
|
||||
pmix_usock_hdr_t hdr;
|
||||
char *data;
|
||||
bool hdr_recvd;
|
||||
char *rdptr;
|
||||
size_t rdbytes;
|
||||
} pmix_usock_recv_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_usock_recv_t);
|
||||
|
||||
/* usock structure for tracking posted recvs */
|
||||
typedef struct {
|
||||
@ -132,28 +71,6 @@ typedef struct {
|
||||
} pmix_usock_posted_recv_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_usock_posted_recv_t);
|
||||
|
||||
/* object for tracking peers - each peer can have multiple
|
||||
* connections. This can occur if the initial app executes
|
||||
* a fork/exec, and the child initiates its own connection
|
||||
* back to the PMIx server. Thus, the trackers should be "indexed"
|
||||
* by the socket, not the process nspace/rank */
|
||||
typedef struct pmix_peer_t {
|
||||
pmix_object_t super;
|
||||
pmix_rank_info_t *info;
|
||||
int proc_cnt;
|
||||
void *server_object;
|
||||
int index;
|
||||
int sd;
|
||||
pmix_event_t send_event; /**< registration with event thread for send events */
|
||||
bool send_ev_active;
|
||||
pmix_event_t recv_event; /**< registration with event thread for recv events */
|
||||
bool recv_ev_active;
|
||||
pmix_list_t send_queue; /**< list of messages to send */
|
||||
pmix_usock_send_t *send_msg; /**< current send in progress */
|
||||
pmix_usock_recv_t *recv_msg; /**< current recv in progress */
|
||||
} pmix_peer_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_peer_t);
|
||||
|
||||
/* usock struct for posting send/recv request */
|
||||
typedef struct {
|
||||
pmix_object_t super;
|
||||
@ -193,15 +110,6 @@ typedef struct {
|
||||
} pmix_cb_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_cb_t);
|
||||
|
||||
/* an internal macro for shifting incoming requests
|
||||
* to the internal event thread */
|
||||
#define PMIX_THREAD_SHIFT(c, f) \
|
||||
do { \
|
||||
event_assign(&((c)->ev), pmix_globals.evbase, -1, \
|
||||
EV_WRITE, (f), (c)); \
|
||||
event_active(&((c)->ev), EV_WRITE, 1); \
|
||||
} while(0);
|
||||
|
||||
typedef struct {
|
||||
pmix_object_t super;
|
||||
pmix_event_t ev;
|
||||
@ -249,13 +157,6 @@ PMIX_CLASS_DECLARATION(pmix_timer_t);
|
||||
} while(0)
|
||||
|
||||
|
||||
#define PMIX_WAIT_FOR_COMPLETION(a) \
|
||||
do { \
|
||||
while ((a)) { \
|
||||
usleep(10); \
|
||||
} \
|
||||
} while (0);
|
||||
|
||||
#define PMIX_TIMER_EVENT(s, f, d) \
|
||||
do { \
|
||||
pmix_timer_t *tm; \
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Artem Y. Polyakov <artpol84@gmail.com>.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
@ -37,12 +37,44 @@
|
||||
#endif
|
||||
|
||||
#include "src/include/pmix_globals.h"
|
||||
#include "src/server/pmix_server_ops.h"
|
||||
#include "src/util/error.h"
|
||||
|
||||
#include "usock.h"
|
||||
|
||||
static uint32_t current_tag = 1; // 0 is reserved for system purposes
|
||||
|
||||
static void lost_connection(pmix_peer_t *peer, pmix_status_t err)
|
||||
{
|
||||
/* stop all events */
|
||||
if (peer->recv_ev_active) {
|
||||
event_del(&peer->recv_event);
|
||||
peer->recv_ev_active = false;
|
||||
}
|
||||
if (peer->send_ev_active) {
|
||||
event_del(&peer->send_event);
|
||||
peer->send_ev_active = false;
|
||||
}
|
||||
if (NULL != peer->recv_msg) {
|
||||
PMIX_RELEASE(peer->recv_msg);
|
||||
peer->recv_msg = NULL;
|
||||
}
|
||||
CLOSE_THE_SOCKET(peer->sd);
|
||||
if (pmix_globals.server) {
|
||||
/* if I am a server, then we need to
|
||||
* do some cleanup as the client has
|
||||
* left us */
|
||||
pmix_pointer_array_set_item(&pmix_server_globals.clients,
|
||||
peer->index, NULL);
|
||||
PMIX_RELEASE(peer);
|
||||
} else {
|
||||
/* if I am a client, there is only
|
||||
* one connection we can have */
|
||||
pmix_globals.connected = false;
|
||||
}
|
||||
PMIX_REPORT_ERROR(err);
|
||||
}
|
||||
|
||||
static pmix_status_t send_bytes(int sd, char **buf, size_t *remain)
|
||||
{
|
||||
pmix_status_t ret = PMIX_SUCCESS;
|
||||
@ -183,8 +215,7 @@ void pmix_usock_send_handler(int sd, short flags, void *cbdata)
|
||||
peer->send_ev_active = false;
|
||||
PMIX_RELEASE(msg);
|
||||
peer->send_msg = NULL;
|
||||
CLOSE_THE_SOCKET(peer->sd);
|
||||
PMIX_REPORT_ERROR(rc);
|
||||
lost_connection(peer, rc);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -212,8 +243,7 @@ void pmix_usock_send_handler(int sd, short flags, void *cbdata)
|
||||
peer->send_ev_active = false;
|
||||
PMIX_RELEASE(msg);
|
||||
peer->send_msg = NULL;
|
||||
CLOSE_THE_SOCKET(peer->sd);
|
||||
PMIX_REPORT_ERROR(rc);
|
||||
lost_connection(peer, rc);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -357,8 +387,7 @@ void pmix_usock_recv_handler(int sd, short flags, void *cbdata)
|
||||
PMIX_RELEASE(peer->recv_msg);
|
||||
peer->recv_msg = NULL;
|
||||
}
|
||||
CLOSE_THE_SOCKET(peer->sd);
|
||||
PMIX_REPORT_ERROR(PMIX_ERR_UNREACH);
|
||||
lost_connection(peer, PMIX_ERR_UNREACH);
|
||||
}
|
||||
|
||||
void pmix_usock_send_recv(int fd, short args, void *cbdata)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -97,12 +97,12 @@ static int pmix120_component_query(mca_base_module_t **module, int *priority)
|
||||
if (NULL != (t = getenv("PMIX_SERVER_URI")) ||
|
||||
NULL != (id = getenv("PMIX_ID"))) {
|
||||
/* if PMIx is present, then we are a client and need to use it,
|
||||
* but only if we are requested */
|
||||
*priority = 5;
|
||||
* and we are now the default */
|
||||
*priority = 100;
|
||||
} else {
|
||||
/* we could be a server, so we still need to be considered,
|
||||
* but only if requested */
|
||||
*priority = 2;
|
||||
* and we are now the default */
|
||||
*priority = 15;
|
||||
}
|
||||
*module = (mca_base_module_t *)&opal_pmix_pmix120_module;
|
||||
return OPAL_SUCCESS;
|
||||
|
@ -66,6 +66,9 @@ int orte_ess_base_proc_binding(void)
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
} else if (NULL != getenv(OPAL_MCA_PREFIX"orte_externally_bound")) {
|
||||
orte_proc_is_bound = true;
|
||||
/* the topology system will pickup the binding pattern */
|
||||
}
|
||||
|
||||
/* see if we were bound when launched */
|
||||
|
@ -62,7 +62,6 @@
|
||||
#include "orte/mca/snapc/base/base.h"
|
||||
#include "orte/mca/sstore/base/base.h"
|
||||
#endif
|
||||
#include "orte/mca/schizo/base/base.h"
|
||||
#include "orte/mca/filem/base/base.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/util/session_dir.h"
|
||||
@ -512,7 +511,7 @@ int orte_ess_base_orted_setup(char **hosts)
|
||||
|
||||
/* setup the PMIx framework - ensure it skips all non-PMIx components,
|
||||
* but do not override anything we were given */
|
||||
opal_setenv("OMPI_MCA_pmix", "^s1,s2,cray", false, &environ);
|
||||
opal_setenv("OMPI_MCA_pmix", "^s1,s2,cray,isolated", false, &environ);
|
||||
if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_pmix_base_open";
|
||||
@ -612,17 +611,7 @@ int orte_ess_base_orted_setup(char **hosts)
|
||||
error = "orte_dfs_select";
|
||||
goto error;
|
||||
}
|
||||
/* setup the SCHIZO framework */
|
||||
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_schizo_base_framework, 0))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_schizo_base_open";
|
||||
goto error;
|
||||
}
|
||||
if (ORTE_SUCCESS != (ret = orte_schizo_base_select())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_schizo_select";
|
||||
goto error;
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
error:
|
||||
orte_show_help("help-orte-runtime.txt",
|
||||
@ -654,7 +643,6 @@ int orte_ess_base_orted_finalize(void)
|
||||
(void) mca_base_framework_close(&opal_pmix_base_framework);
|
||||
|
||||
/* close frameworks */
|
||||
(void) mca_base_framework_close(&orte_schizo_base_framework);
|
||||
(void) mca_base_framework_close(&orte_filem_base_framework);
|
||||
(void) mca_base_framework_close(&orte_grpcomm_base_framework);
|
||||
(void) mca_base_framework_close(&orte_iof_base_framework);
|
||||
@ -674,6 +662,8 @@ int orte_ess_base_orted_finalize(void)
|
||||
orte_session_dir_finalize(ORTE_PROC_MY_NAME);
|
||||
/* ensure we scrub the session directory tree */
|
||||
orte_session_dir_cleanup(ORTE_JOBID_WILDCARD);
|
||||
/* release the job hash table */
|
||||
OBJ_RELEASE(orte_job_data);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -11,7 +11,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Hochschule Esslingen. All rights reserved.
|
||||
*
|
||||
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
|
||||
@ -52,7 +52,6 @@
|
||||
#include "orte/mca/snapc/base/base.h"
|
||||
#include "orte/mca/sstore/base/base.h"
|
||||
#endif
|
||||
#include "orte/mca/schizo/base/base.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/util/session_dir.h"
|
||||
#include "orte/util/show_help.h"
|
||||
@ -230,18 +229,6 @@ int orte_ess_base_tool_setup(void)
|
||||
opal_cr_set_enabled(false);
|
||||
#endif
|
||||
|
||||
/* setup schizo in case we are parsing cmd lines */
|
||||
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_schizo_base_framework, 0))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_schizo_base_open";
|
||||
goto error;
|
||||
}
|
||||
if (ORTE_SUCCESS != (ret = orte_schizo_base_select())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_schizo_base_select";
|
||||
goto error;
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
|
||||
error:
|
||||
@ -270,7 +257,6 @@ int orte_ess_base_tool_finalize(void)
|
||||
}
|
||||
(void) mca_base_framework_close(&orte_routed_base_framework);
|
||||
(void) mca_base_framework_close(&orte_rml_base_framework);
|
||||
(void) mca_base_framework_close(&orte_schizo_base_framework);
|
||||
(void) mca_base_framework_close(&orte_errmgr_base_framework);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
|
@ -74,7 +74,6 @@
|
||||
#include "orte/mca/sstore/base/base.h"
|
||||
#endif
|
||||
#include "orte/mca/filem/base/base.h"
|
||||
#include "orte/mca/schizo/base/base.h"
|
||||
#include "orte/mca/state/base/base.h"
|
||||
#include "orte/mca/state/state.h"
|
||||
|
||||
@ -620,7 +619,7 @@ static int rte_init(void)
|
||||
|
||||
/* setup the PMIx framework - ensure it skips all non-PMIx components, but
|
||||
* do not override anything we were given */
|
||||
opal_setenv("OMPI_MCA_pmix", "^s1,s2,cray", false, &environ);
|
||||
opal_setenv("OMPI_MCA_pmix", "^s1,s2,cray,isolated", false, &environ);
|
||||
if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_pmix_base_open";
|
||||
@ -721,17 +720,7 @@ static int rte_init(void)
|
||||
error = "orte_dfs_select";
|
||||
goto error;
|
||||
}
|
||||
/* setup the schizo framework */
|
||||
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_schizo_base_framework, 0))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_schizo_base_open";
|
||||
goto error;
|
||||
}
|
||||
if (ORTE_SUCCESS != (ret = orte_schizo_base_select())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_schizo_select";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* if a tool has launched us and is requesting event reports,
|
||||
* then set its contact info into the comm system
|
||||
*/
|
||||
@ -808,7 +797,6 @@ static int rte_finalize(void)
|
||||
/* cleanup our data server */
|
||||
orte_data_server_finalize();
|
||||
|
||||
(void) mca_base_framework_close(&orte_schizo_base_framework);
|
||||
(void) mca_base_framework_close(&orte_dfs_base_framework);
|
||||
(void) mca_base_framework_close(&orte_filem_base_framework);
|
||||
/* output any lingering stdout/err data */
|
||||
@ -854,6 +842,9 @@ static int rte_finalize(void)
|
||||
fclose(orte_xml_fp);
|
||||
}
|
||||
}
|
||||
|
||||
/* release the job hash table */
|
||||
OBJ_RELEASE(orte_job_data);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -3,7 +3,7 @@
|
||||
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All
|
||||
* rights reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -25,7 +25,7 @@
|
||||
#include "opal/mca/pmix/base/base.h"
|
||||
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/schizo/schizo.h"
|
||||
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/mca/ess/pmi/ess_pmi.h"
|
||||
@ -67,41 +67,27 @@ static int pmi_component_open(void)
|
||||
|
||||
static int pmi_component_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
int ret;
|
||||
orte_schizo_launch_environ_t ret;
|
||||
|
||||
/* all APPS must use pmix */
|
||||
if (ORTE_PROC_IS_APP) {
|
||||
if (NULL == opal_pmix.initialized) {
|
||||
/* open and setup pmix */
|
||||
if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
*priority = -1;
|
||||
*module = NULL;
|
||||
return ret;
|
||||
}
|
||||
if (OPAL_SUCCESS != (ret = opal_pmix_base_select())) {
|
||||
/* don't error log this as it might not be an error at all */
|
||||
*priority = -1;
|
||||
*module = NULL;
|
||||
(void) mca_base_framework_close(&opal_pmix_base_framework);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init()))) {
|
||||
/* we cannot be in a PMI environment */
|
||||
*priority = -1;
|
||||
*module = NULL;
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
*priority = 35;
|
||||
*module = (mca_base_module_t *)&orte_ess_pmi_module;
|
||||
return ORTE_SUCCESS;
|
||||
if (!ORTE_PROC_IS_APP) {
|
||||
*module = NULL;
|
||||
*priority = 0;
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
/* we can't run */
|
||||
*priority = -1;
|
||||
*module = NULL;
|
||||
return ORTE_ERROR;
|
||||
/* find out what our environment looks like */
|
||||
ret = orte_schizo.check_launch_environment();
|
||||
if (ORTE_SCHIZO_UNMANAGED_SINGLETON == ret ||
|
||||
ORTE_SCHIZO_MANAGED_SINGLETON == ret) {
|
||||
/* not us */
|
||||
*module = NULL;
|
||||
*priority = 0;
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
*priority = 35;
|
||||
*module = (mca_base_module_t *)&orte_ess_pmi_module;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
|
@ -97,6 +97,18 @@ static int rte_init(void)
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* open and setup pmix */
|
||||
if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
/* we cannot run */
|
||||
error = "pmix init";
|
||||
goto error;
|
||||
}
|
||||
if (OPAL_SUCCESS != (ret = opal_pmix_base_select())) {
|
||||
/* we cannot run */
|
||||
error = "pmix init";
|
||||
goto error;
|
||||
}
|
||||
/* initialize the selected module */
|
||||
if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init()))) {
|
||||
/* we cannot run */
|
||||
|
@ -9,6 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2016 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -21,14 +22,13 @@
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/*
|
||||
* Module open / close
|
||||
*/
|
||||
int orte_ess_singleton_component_open(void);
|
||||
int orte_ess_singleton_component_close(void);
|
||||
int orte_ess_singleton_component_query(mca_base_module_t **module, int *priority);
|
||||
|
||||
ORTE_MODULE_DECLSPEC extern orte_ess_base_component_t mca_ess_singleton_component;
|
||||
typedef struct {
|
||||
orte_ess_base_component_t super;
|
||||
char *server_uri;
|
||||
bool isolated;
|
||||
} orte_ess_singleton_component_t;
|
||||
ORTE_MODULE_DECLSPEC extern orte_ess_singleton_component_t mca_ess_singleton_component;
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
|
@ -12,6 +12,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2016 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -32,72 +33,82 @@
|
||||
#include "opal/mca/pmix/base/base.h"
|
||||
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/schizo/schizo.h"
|
||||
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/mca/ess/singleton/ess_singleton.h"
|
||||
|
||||
extern orte_ess_base_module_t orte_ess_singleton_module;
|
||||
|
||||
char *orte_ess_singleton_server_uri = NULL;
|
||||
|
||||
static int
|
||||
orte_ess_singleton_component_register(void);
|
||||
static int component_open(void);
|
||||
static int component_close(void);
|
||||
static int component_query(mca_base_module_t **module, int *priority);
|
||||
static int component_register(void);
|
||||
|
||||
/*
|
||||
* Instantiate the public struct with all of our public information
|
||||
* and pointers to our public functions in it
|
||||
*/
|
||||
orte_ess_base_component_t mca_ess_singleton_component = {
|
||||
/* First, the mca_component_t struct containing meta information
|
||||
about the component itself */
|
||||
.base_version = {
|
||||
ORTE_ESS_BASE_VERSION_3_0_0,
|
||||
orte_ess_singleton_component_t mca_ess_singleton_component = {
|
||||
{
|
||||
/* First, the mca_component_t struct containing meta information
|
||||
about the component itself */
|
||||
.base_version = {
|
||||
ORTE_ESS_BASE_VERSION_3_0_0,
|
||||
|
||||
/* Component name and version */
|
||||
.mca_component_name = "singleton",
|
||||
MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION,
|
||||
ORTE_RELEASE_VERSION),
|
||||
/* Component name and version */
|
||||
.mca_component_name = "singleton",
|
||||
MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION,
|
||||
ORTE_RELEASE_VERSION),
|
||||
|
||||
/* Component open and close functions */
|
||||
.mca_open_component = orte_ess_singleton_component_open,
|
||||
.mca_close_component = orte_ess_singleton_component_close,
|
||||
.mca_query_component = orte_ess_singleton_component_query,
|
||||
.mca_register_component_params = orte_ess_singleton_component_register,
|
||||
},
|
||||
.base_data = {
|
||||
/* The component is not checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_NONE
|
||||
/* Component open and close functions */
|
||||
.mca_open_component = component_open,
|
||||
.mca_close_component = component_close,
|
||||
.mca_query_component = component_query,
|
||||
.mca_register_component_params = component_register,
|
||||
},
|
||||
.base_data = {
|
||||
/* The component is not checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_NONE
|
||||
},
|
||||
},
|
||||
.server_uri = NULL,
|
||||
.isolated = false
|
||||
};
|
||||
|
||||
static int
|
||||
orte_ess_singleton_component_register(void)
|
||||
static int component_register(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
orte_ess_singleton_server_uri = NULL;
|
||||
ret = mca_base_component_var_register(&mca_ess_singleton_component.base_version,
|
||||
mca_ess_singleton_component.server_uri = NULL;
|
||||
ret = mca_base_component_var_register(&mca_ess_singleton_component.super.base_version,
|
||||
"server",
|
||||
"Server to be used as HNP - [file|FILE]:<filename> or just uri",
|
||||
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&orte_ess_singleton_server_uri);
|
||||
&mca_ess_singleton_component.server_uri);
|
||||
(void) mca_base_var_register_synonym(ret, "orte", "orte", NULL, "server", 0);
|
||||
|
||||
ret = mca_base_component_var_register(&mca_ess_singleton_component.super.base_version,
|
||||
"isolated",
|
||||
"Do not start a supporting daemon as this process will never attempt to spawn",
|
||||
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_ess_singleton_component.isolated);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
int
|
||||
orte_ess_singleton_component_open(void)
|
||||
static int component_open(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
int orte_ess_singleton_component_query(mca_base_module_t **module, int *priority)
|
||||
static int component_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
int ret;
|
||||
orte_schizo_launch_environ_t ret;
|
||||
|
||||
/* if we are an HNP, daemon, or tool, then we
|
||||
* are definitely not a singleton!
|
||||
@ -106,55 +117,28 @@ int orte_ess_singleton_component_query(mca_base_module_t **module, int *priority
|
||||
ORTE_PROC_IS_DAEMON ||
|
||||
ORTE_PROC_IS_TOOL) {
|
||||
*module = NULL;
|
||||
*priority = 0;
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
/* okay, we still could be a singleton or
|
||||
* an application process. If we have been
|
||||
* given an HNP URI, then we are definitely
|
||||
* not a singleton
|
||||
*/
|
||||
if (NULL != orte_process_info.my_hnp_uri) {
|
||||
/* find out what our environment looks like */
|
||||
ret = orte_schizo.check_launch_environment();
|
||||
if (ORTE_SCHIZO_UNMANAGED_SINGLETON != ret &&
|
||||
ORTE_SCHIZO_MANAGED_SINGLETON != ret) {
|
||||
/* not us */
|
||||
*module = NULL;
|
||||
*priority = 0;
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
/* open and setup pmix */
|
||||
if (NULL == opal_pmix.initialized) {
|
||||
if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
|
||||
/* if PMIx is not available, then we are indeed a singleton */
|
||||
goto single;
|
||||
}
|
||||
if (OPAL_SUCCESS != (ret = opal_pmix_base_select())) {
|
||||
/* if PMIx is not available, then we are indeed a singleton */
|
||||
(void) mca_base_framework_close(&opal_pmix_base_framework);
|
||||
goto single;
|
||||
}
|
||||
}
|
||||
if (opal_pmix.initialized()) {
|
||||
/* we are in a PMI environment and are therefore
|
||||
* not a singleton */
|
||||
*priority = -1;
|
||||
*module = NULL;
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
single:
|
||||
/* okay, we could still be an application process,
|
||||
* but launched in "standalone" mode - i.e., directly
|
||||
* launched by an environment instead of via mpirun.
|
||||
* We need to set our priority low so that any enviro
|
||||
* component will override us. If they don't, then we
|
||||
* want to be selected as we must be a singleton
|
||||
*/
|
||||
*priority = 25;
|
||||
/* okay, we want to be selected as we must be a singleton */
|
||||
*priority = 100;
|
||||
*module = (mca_base_module_t *)&orte_ess_singleton_module;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
orte_ess_singleton_component_close(void)
|
||||
static int component_close(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
@ -97,19 +97,19 @@ static int rte_init(void)
|
||||
u32ptr = &u32;
|
||||
u16ptr = &u16;
|
||||
|
||||
if (NULL != orte_ess_singleton_server_uri) {
|
||||
if (NULL != mca_ess_singleton_component.server_uri) {
|
||||
/* we are going to connect to a server HNP */
|
||||
if (0 == strncmp(orte_ess_singleton_server_uri, "file", strlen("file")) ||
|
||||
0 == strncmp(orte_ess_singleton_server_uri, "FILE", strlen("FILE"))) {
|
||||
if (0 == strncmp(mca_ess_singleton_component.server_uri, "file", strlen("file")) ||
|
||||
0 == strncmp(mca_ess_singleton_component.server_uri, "FILE", strlen("FILE"))) {
|
||||
char input[1024], *filename;
|
||||
FILE *fp;
|
||||
|
||||
/* it is a file - get the filename */
|
||||
filename = strchr(orte_ess_singleton_server_uri, ':');
|
||||
filename = strchr(mca_ess_singleton_component.server_uri, ':');
|
||||
if (NULL == filename) {
|
||||
/* filename is not correctly formatted */
|
||||
orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-bad", true,
|
||||
"singleton", orte_ess_singleton_server_uri);
|
||||
"singleton", mca_ess_singleton_component.server_uri);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
++filename; /* space past the : */
|
||||
@ -117,7 +117,7 @@ static int rte_init(void)
|
||||
if (0 >= strlen(filename)) {
|
||||
/* they forgot to give us the name! */
|
||||
orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-missing", true,
|
||||
"singleton", orte_ess_singleton_server_uri);
|
||||
"singleton", mca_ess_singleton_component.server_uri);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
@ -125,7 +125,7 @@ static int rte_init(void)
|
||||
fp = fopen(filename, "r");
|
||||
if (NULL == fp) { /* can't find or read file! */
|
||||
orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-access", true,
|
||||
"singleton", orte_ess_singleton_server_uri);
|
||||
"singleton", mca_ess_singleton_component.server_uri);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
memset(input, 0, 1024); // initialize the array to ensure a NULL termination
|
||||
@ -133,14 +133,14 @@ static int rte_init(void)
|
||||
/* something malformed about file */
|
||||
fclose(fp);
|
||||
orte_show_help("help-orterun.txt", "orterun:ompi-server-file-bad", true,
|
||||
"singleton", orte_ess_singleton_server_uri, "singleton");
|
||||
"singleton", mca_ess_singleton_component.server_uri, "singleton");
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
fclose(fp);
|
||||
input[strlen(input)-1] = '\0'; /* remove newline */
|
||||
orte_process_info.my_hnp_uri = strdup(input);
|
||||
} else {
|
||||
orte_process_info.my_hnp_uri = strdup(orte_ess_singleton_server_uri);
|
||||
orte_process_info.my_hnp_uri = strdup(mca_ess_singleton_component.server_uri);
|
||||
}
|
||||
/* save the daemon uri - we will process it later */
|
||||
orte_process_info.my_daemon_uri = strdup(orte_process_info.my_hnp_uri);
|
||||
@ -154,17 +154,21 @@ static int rte_init(void)
|
||||
ORTE_PROC_MY_NAME->vpid = 0;
|
||||
|
||||
/* for convenience, push the pubsub version of this param into the environ */
|
||||
opal_setenv (OPAL_MCA_PREFIX"pubsub_orte_server", orte_process_info.my_hnp_uri, 1, &environ);
|
||||
opal_setenv (OPAL_MCA_PREFIX"pubsub_orte_server", orte_process_info.my_hnp_uri, true, &environ);
|
||||
} else if (NULL != getenv("SINGULARITY_CONTAINER")) {
|
||||
/* mark that we are in a container */
|
||||
opal_setenv("OPAL_PROC_CONTAINER", "1", true, &environ);
|
||||
} else if (NULL != getenv("OPAL_ISOLATED")) {
|
||||
} else if (mca_ess_singleton_component.isolated) {
|
||||
/* ensure we use the isolated pmix component */
|
||||
opal_setenv (OPAL_MCA_PREFIX"pmix", "isolated", true, &environ);
|
||||
} else {
|
||||
/* spawn our very own HNP to support us */
|
||||
if (ORTE_SUCCESS != (rc = fork_hnp())) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
/* our name was given to us by the HNP */
|
||||
opal_setenv (OPAL_MCA_PREFIX"pmix", "^s1,s2,cray,isolated", true, &environ);
|
||||
}
|
||||
|
||||
/* open and setup pmix */
|
||||
@ -485,6 +489,16 @@ static int fork_hnp(void)
|
||||
opal_argv_append(&argc, &argv, "state_novm_select");
|
||||
opal_argv_append(&argc, &argv, "1");
|
||||
|
||||
/* direct the selection of the ess component */
|
||||
opal_argv_append(&argc, &argv, "-"OPAL_MCA_CMD_LINE_ID);
|
||||
opal_argv_append(&argc, &argv, "ess");
|
||||
opal_argv_append(&argc, &argv, "hnp");
|
||||
|
||||
/* direct the selection of the pmix component */
|
||||
opal_argv_append(&argc, &argv, "-"OPAL_MCA_CMD_LINE_ID);
|
||||
opal_argv_append(&argc, &argv, "pmix");
|
||||
opal_argv_append(&argc, &argv, "^s1,s2,cray,isolated");
|
||||
|
||||
/* Fork off the child */
|
||||
orte_process_info.hnp_pid = fork();
|
||||
if(orte_process_info.hnp_pid < 0) {
|
||||
|
35
orte/mca/schizo/alps/Makefile.am
Обычный файл
35
orte/mca/schizo/alps/Makefile.am
Обычный файл
@ -0,0 +1,35 @@
|
||||
#
|
||||
# Copyright (c) 2016 Intel, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
sources = \
|
||||
schizo_alps_component.c \
|
||||
schizo_alps.h \
|
||||
schizo_alps.c
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if MCA_BUILD_orte_schizo_alps_DSO
|
||||
component_noinst =
|
||||
component_install = mca_schizo_alps.la
|
||||
else
|
||||
component_noinst = libmca_schizo_alps.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(ortelibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_schizo_alps_la_SOURCES = $(sources)
|
||||
mca_schizo_alps_la_LDFLAGS = -module -avoid-version
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_schizo_alps_la_SOURCES = $(sources)
|
||||
libmca_schizo_alps_la_LDFLAGS = -module -avoid-version
|
||||
|
47
orte/mca/schizo/alps/configure.m4
Обычный файл
47
orte/mca/schizo/alps/configure.m4
Обычный файл
@ -0,0 +1,47 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2008 UT-Battelle, LLC
|
||||
# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2011-2013 Los Alamos National Security, LLC.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2016 Intel, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# MCA_schizo_alps_CONFIG([action-if-found], [action-if-not-found])
|
||||
# -----------------------------------------------------------
|
||||
AC_DEFUN([MCA_orte_schizo_alps_CONFIG],[
|
||||
AC_CONFIG_FILES([orte/mca/schizo/alps/Makefile])
|
||||
|
||||
ORTE_CHECK_ALPS([schizo_alps], [schizo_alps_happy="yes"], [schizo_alps_happy="no"])
|
||||
|
||||
# check for alps/apInfo.h
|
||||
# save current CPPFLAGS
|
||||
MCA_orte_schizo_save_CPPFLAGS="$CPPFLAGS"
|
||||
|
||||
# add flags obtained from ORTE_CHECK_ALPS
|
||||
CPPFLAGS="$CPPFLAGS $schizo_alps_CPPFLAGS"
|
||||
|
||||
AC_CHECK_HEADERS([alps/apInfo.h], [], [schizo_alps_happy="no"])
|
||||
|
||||
# restore CPPFLAGS
|
||||
CPPFLAGS="$MCA_orte_schizo_save_CPPFLAGS"
|
||||
|
||||
AC_SUBST([schizo_alps_CPPFLAGS])
|
||||
|
||||
AS_IF([test "$schizo_alps_happy" = "yes"], [$1], [$2])
|
||||
])dnl
|
7
orte/mca/schizo/alps/owner.txt
Обычный файл
7
orte/mca/schizo/alps/owner.txt
Обычный файл
@ -0,0 +1,7 @@
|
||||
#
|
||||
# owner/status file
|
||||
# owner: institution that is responsible for this package
|
||||
# status: e.g. active, maintenance, unmaintained
|
||||
#
|
||||
owner: INTEL
|
||||
status: active
|
114
orte/mca/schizo/alps/schizo_alps.c
Обычный файл
114
orte/mca/schizo/alps/schizo_alps.c
Обычный файл
@ -0,0 +1,114 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/types.h"
|
||||
#include "opal/types.h"
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#include <ctype.h>
|
||||
#include <sys/syscall.h>
|
||||
|
||||
#include "opal/util/basename.h"
|
||||
#include "opal/util/opal_environ.h"
|
||||
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/mca/schizo/base/base.h"
|
||||
|
||||
#include "schizo_alps.h"
|
||||
|
||||
static orte_schizo_launch_environ_t check_launch_environment(void);
|
||||
|
||||
orte_schizo_base_module_t orte_schizo_alps_module = {
|
||||
.check_launch_environment = check_launch_environment
|
||||
};
|
||||
|
||||
static char **pushed_envs = NULL;
|
||||
static char **pushed_vals = NULL;
|
||||
static orte_schizo_launch_environ_t myenv;
|
||||
static bool myenvdefined = false;
|
||||
|
||||
static orte_schizo_launch_environ_t check_launch_environment(void)
|
||||
{
|
||||
int i;
|
||||
const char proc_job_file[]="/proc/job";
|
||||
FILE *fd = NULL, *fd_task_is_app = NULL;
|
||||
char task_is_app_fname[PATH_MAX];
|
||||
|
||||
if (myenvdefined) {
|
||||
return myenv;
|
||||
}
|
||||
myenvdefined = true;
|
||||
|
||||
/* we were only selected because we are an app,
|
||||
* so no need to further check that here. Instead,
|
||||
* see if we were direct launched vs launched via mpirun */
|
||||
if (NULL != orte_process_info.my_daemon_uri) {
|
||||
/* nope */
|
||||
myenv = ORTE_SCHIZO_NATIVE_LAUNCHED;
|
||||
opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"ess");
|
||||
opal_argv_append_nosize(&pushed_vals, "pmi");
|
||||
goto setup;
|
||||
}
|
||||
|
||||
/* see if we are running in a Cray PAGG container */
|
||||
fd = fopen(proc_job_file, "r");
|
||||
if (NULL == fd) {
|
||||
/* we are a singleton */
|
||||
myenv = ORTE_SCHIZO_MANAGED_SINGLETON;
|
||||
opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"ess");
|
||||
opal_argv_append_nosize(&pushed_vals, "singleton");
|
||||
} else {
|
||||
if (NULL != orte_process_info.my_daemon_uri) {
|
||||
myenv = ORTE_SCHIZO_NATIVE_LAUNCHED;
|
||||
} else {
|
||||
myenv = ORTE_SCHIZO_DIRECT_LAUNCHED;
|
||||
}
|
||||
opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"ess");
|
||||
opal_argv_append_nosize(&pushed_vals, "pmi");
|
||||
snprintf(task_is_app_fname,sizeof(task_is_app_fname),
|
||||
"/proc/self/task/%ld/task_is_app",syscall(SYS_gettid));
|
||||
fd_task_is_app = fopen(task_is_app_fname, "r");
|
||||
if (fd_task_is_app != NULL) { /* okay we're in a PAGG container,
|
||||
and we are an app task (not just a process
|
||||
running on a mom node, for example) */
|
||||
opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"pmix");
|
||||
opal_argv_append_nosize(&pushed_vals, "cray");
|
||||
}
|
||||
fclose(fd);
|
||||
}
|
||||
|
||||
setup:
|
||||
opal_output_verbose(1, orte_schizo_base_framework.framework_output,
|
||||
"schizo:alps DECLARED AS %s", orte_schizo_base_print_env(myenv));
|
||||
if (NULL != pushed_envs) {
|
||||
for (i=0; NULL != pushed_envs[i]; i++) {
|
||||
opal_setenv(pushed_envs[i], pushed_vals[i], true, &environ);
|
||||
}
|
||||
}
|
||||
|
||||
return myenv;
|
||||
}
|
||||
|
||||
static void finalize(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (NULL != pushed_envs) {
|
||||
for (i=0; NULL != pushed_envs[i]; i++) {
|
||||
opal_unsetenv(pushed_envs[i], &environ);
|
||||
}
|
||||
opal_argv_free(pushed_envs);
|
||||
opal_argv_free(pushed_vals);
|
||||
}
|
||||
}
|
29
orte/mca/schizo/alps/schizo_alps.h
Обычный файл
29
orte/mca/schizo/alps/schizo_alps.h
Обычный файл
@ -0,0 +1,29 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef _MCA_SCHIZO_ALPS_H_
|
||||
#define _MCA_SCHIZO_ALPS_H_
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "orte/types.h"
|
||||
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "orte/mca/schizo/schizo.h"
|
||||
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
ORTE_MODULE_DECLSPEC extern orte_schizo_base_component_t mca_schizo_alps_component;
|
||||
extern orte_schizo_base_module_t orte_schizo_alps_module;
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* MCA_SCHIZO_ALPS_H_ */
|
||||
|
53
orte/mca/schizo/alps/schizo_alps_component.c
Обычный файл
53
orte/mca/schizo/alps/schizo_alps_component.c
Обычный файл
@ -0,0 +1,53 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2016 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/types.h"
|
||||
#include "opal/types.h"
|
||||
|
||||
#include "opal/util/show_help.h"
|
||||
|
||||
#include "orte/mca/schizo/schizo.h"
|
||||
#include "schizo_alps.h"
|
||||
|
||||
static int component_query(mca_base_module_t **module, int *priority);
|
||||
|
||||
/*
|
||||
* Struct of function pointers and all that to let us be initialized
|
||||
*/
|
||||
orte_schizo_base_component_t mca_schizo_alps_component = {
|
||||
.base_version = {
|
||||
MCA_SCHIZO_BASE_VERSION_1_0_0,
|
||||
.mca_component_name = "alps",
|
||||
MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION,
|
||||
ORTE_RELEASE_VERSION),
|
||||
.mca_query_component = component_query,
|
||||
},
|
||||
.base_data = {
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
},
|
||||
};
|
||||
|
||||
static int component_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
/* if we are not an app, then don't bother */
|
||||
if (!ORTE_PROC_IS_APP) {
|
||||
*priority = 0;
|
||||
*module = NULL;
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
/* since we were built, assume we are on an alps system */
|
||||
*priority = 90;
|
||||
*module = (mca_base_module_t *)&orte_schizo_alps_module;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
@ -60,6 +60,7 @@ typedef struct {
|
||||
OBJ_CLASS_DECLARATION(orte_schizo_base_active_module_t);
|
||||
|
||||
/* the base stub functions */
|
||||
ORTE_DECLSPEC const char* orte_schizo_base_print_env(orte_schizo_launch_environ_t env);
|
||||
ORTE_DECLSPEC int orte_schizo_base_parse_cli(char **personality,
|
||||
int argc, int start, char **argv);
|
||||
ORTE_DECLSPEC int orte_schizo_base_parse_env(char **personality,
|
||||
@ -72,6 +73,8 @@ ORTE_DECLSPEC int orte_schizo_base_setup_fork(orte_job_t *jdata,
|
||||
ORTE_DECLSPEC int orte_schizo_base_setup_child(orte_job_t *jobdat,
|
||||
orte_proc_t *child,
|
||||
orte_app_context_t *app);
|
||||
ORTE_DECLSPEC orte_schizo_launch_environ_t orte_schizo_base_check_launch_environment(void);
|
||||
ORTE_DECLSPEC void orte_schizo_base_finalize(void);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -37,10 +37,12 @@
|
||||
*/
|
||||
orte_schizo_base_t orte_schizo_base = {{{0}}};
|
||||
orte_schizo_base_module_t orte_schizo = {
|
||||
orte_schizo_base_parse_cli,
|
||||
orte_schizo_base_parse_env,
|
||||
orte_schizo_base_setup_fork,
|
||||
orte_schizo_base_setup_child
|
||||
.parse_cli = orte_schizo_base_parse_cli,
|
||||
.parse_env = orte_schizo_base_parse_env,
|
||||
.setup_fork = orte_schizo_base_setup_fork,
|
||||
.setup_child = orte_schizo_base_setup_child,
|
||||
.check_launch_environment = orte_schizo_base_check_launch_environment,
|
||||
.finalize = orte_schizo_base_finalize
|
||||
};
|
||||
|
||||
static int orte_schizo_base_close(void)
|
||||
|
@ -100,7 +100,7 @@ int orte_schizo_base_select(void)
|
||||
}
|
||||
|
||||
if (4 < opal_output_get_verbosity(orte_schizo_base_framework.framework_output)) {
|
||||
opal_output(0, "%s: Final schizo priorities", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
opal_output(0, "Final schizo priorities");
|
||||
/* show the prioritized list */
|
||||
OPAL_LIST_FOREACH(mod, &orte_schizo_base.active_modules, orte_schizo_base_active_module_t) {
|
||||
opal_output(0, "\tSchizo: %s Priority: %d", mod->component->mca_component_name, mod->pri);
|
||||
|
@ -19,6 +19,24 @@
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/mca/schizo/base/base.h"
|
||||
|
||||
const char* orte_schizo_base_print_env(orte_schizo_launch_environ_t env)
|
||||
{
|
||||
switch(env) {
|
||||
case ORTE_SCHIZO_UNDETERMINED:
|
||||
return "UNDETERMINED";
|
||||
case ORTE_SCHIZO_NATIVE_LAUNCHED:
|
||||
return "NATIVE_LAUNCHED";
|
||||
case ORTE_SCHIZO_UNMANAGED_SINGLETON:
|
||||
return "UNMANAGED_SINGLETON";
|
||||
case ORTE_SCHIZO_DIRECT_LAUNCHED:
|
||||
return "DIRECT_LAUNCHED";
|
||||
case ORTE_SCHIZO_MANAGED_SINGLETON:
|
||||
return "MANAGED_SINGLETON";
|
||||
default:
|
||||
return "INVALID_CODE";
|
||||
}
|
||||
}
|
||||
|
||||
int orte_schizo_base_parse_cli(char **personality,
|
||||
int argc, int start, char **argv)
|
||||
{
|
||||
@ -98,3 +116,30 @@ int orte_schizo_base_setup_child(orte_job_t *jdata,
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
orte_schizo_launch_environ_t orte_schizo_base_check_launch_environment(void)
|
||||
{
|
||||
orte_schizo_launch_environ_t rc;
|
||||
orte_schizo_base_active_module_t *mod;
|
||||
|
||||
OPAL_LIST_FOREACH(mod, &orte_schizo_base.active_modules, orte_schizo_base_active_module_t) {
|
||||
if (NULL != mod->module->check_launch_environment) {
|
||||
rc = mod->module->check_launch_environment();
|
||||
if (ORTE_SCHIZO_UNDETERMINED != rc) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
}
|
||||
return ORTE_SCHIZO_UNDETERMINED;
|
||||
}
|
||||
|
||||
void orte_schizo_base_finalize(void)
|
||||
{
|
||||
orte_schizo_base_active_module_t *mod;
|
||||
|
||||
OPAL_LIST_FOREACH(mod, &orte_schizo_base.active_modules, orte_schizo_base_active_module_t) {
|
||||
if (NULL != mod->module->finalize) {
|
||||
mod->module->finalize();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -63,10 +63,10 @@ static int setup_child(orte_job_t *jobdat,
|
||||
orte_app_context_t *app);
|
||||
|
||||
orte_schizo_base_module_t orte_schizo_ompi_module = {
|
||||
parse_cli,
|
||||
parse_env,
|
||||
setup_fork,
|
||||
setup_child
|
||||
.parse_cli = parse_cli,
|
||||
.parse_env = parse_env,
|
||||
.setup_fork = setup_fork,
|
||||
.setup_child = setup_child
|
||||
};
|
||||
|
||||
static int parse_cli(char **personality,
|
||||
|
@ -16,6 +16,8 @@
|
||||
|
||||
#include "opal/util/show_help.h"
|
||||
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
#include "orte/mca/schizo/schizo.h"
|
||||
#include "schizo_ompi.h"
|
||||
|
||||
@ -40,6 +42,12 @@ orte_schizo_base_component_t mca_schizo_ompi_component = {
|
||||
|
||||
static int component_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
/* if we are an app, ignore us */
|
||||
if (ORTE_PROC_IS_APP) {
|
||||
*module = NULL;
|
||||
*priority = 0;
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
*module = (mca_base_module_t*)&orte_schizo_ompi_module;
|
||||
*priority = 10;
|
||||
return ORTE_SUCCESS;
|
||||
|
35
orte/mca/schizo/orte/Makefile.am
Обычный файл
35
orte/mca/schizo/orte/Makefile.am
Обычный файл
@ -0,0 +1,35 @@
|
||||
#
|
||||
# Copyright (c) 2016 Intel, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
sources = \
|
||||
schizo_orte_component.c \
|
||||
schizo_orte.h \
|
||||
schizo_orte.c
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if MCA_BUILD_orte_schizo_orte_DSO
|
||||
component_noinst =
|
||||
component_install = mca_schizo_orte.la
|
||||
else
|
||||
component_noinst = libmca_schizo_orte.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(ortelibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_schizo_orte_la_SOURCES = $(sources)
|
||||
mca_schizo_orte_la_LDFLAGS = -module -avoid-version
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_schizo_orte_la_SOURCES = $(sources)
|
||||
libmca_schizo_orte_la_LDFLAGS = -module -avoid-version
|
||||
|
90
orte/mca/schizo/orte/schizo_orte.c
Обычный файл
90
orte/mca/schizo/orte/schizo_orte.c
Обычный файл
@ -0,0 +1,90 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/types.h"
|
||||
#include "opal/types.h"
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#include <ctype.h>
|
||||
|
||||
#include "opal/util/basename.h"
|
||||
#include "opal/util/opal_environ.h"
|
||||
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/mca/schizo/base/base.h"
|
||||
|
||||
#include "schizo_orte.h"
|
||||
|
||||
static orte_schizo_launch_environ_t check_launch_environment(void);
|
||||
static void finalize(void);
|
||||
|
||||
orte_schizo_base_module_t orte_schizo_orte_module = {
|
||||
.check_launch_environment = check_launch_environment,
|
||||
.finalize = finalize
|
||||
};
|
||||
|
||||
static char **pushed_envs = NULL;
|
||||
static char **pushed_vals = NULL;
|
||||
static orte_schizo_launch_environ_t myenv;
|
||||
static bool myenvdefined = false;
|
||||
|
||||
static orte_schizo_launch_environ_t check_launch_environment(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (myenvdefined) {
|
||||
return myenv;
|
||||
}
|
||||
myenvdefined = true;
|
||||
|
||||
/* we were only selected because we are an app,
|
||||
* so no need to further check that here. Instead,
|
||||
* see if we were direct launched vs launched via mpirun */
|
||||
if (NULL != orte_process_info.my_daemon_uri) {
|
||||
/* nope */
|
||||
myenv = ORTE_SCHIZO_NATIVE_LAUNCHED;
|
||||
opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"ess");
|
||||
opal_argv_append_nosize(&pushed_vals, "pmi");
|
||||
goto setup;
|
||||
}
|
||||
|
||||
/* if nobody else has laid claim to this process,
|
||||
* then it must be a singleton */
|
||||
myenv = ORTE_SCHIZO_UNMANAGED_SINGLETON;
|
||||
opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"ess");
|
||||
opal_argv_append_nosize(&pushed_vals, "singleton");
|
||||
|
||||
setup:
|
||||
opal_output_verbose(1, orte_schizo_base_framework.framework_output,
|
||||
"schizo:orte DECLARED AS %s", orte_schizo_base_print_env(myenv));
|
||||
if (NULL != pushed_envs) {
|
||||
for (i=0; NULL != pushed_envs[i]; i++) {
|
||||
opal_setenv(pushed_envs[i], pushed_vals[i], true, &environ);
|
||||
}
|
||||
}
|
||||
return myenv;
|
||||
}
|
||||
|
||||
static void finalize(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (NULL != pushed_envs) {
|
||||
for (i=0; NULL != pushed_envs[i]; i++) {
|
||||
opal_unsetenv(pushed_envs[i], &environ);
|
||||
}
|
||||
opal_argv_free(pushed_envs);
|
||||
opal_argv_free(pushed_vals);
|
||||
}
|
||||
}
|
29
orte/mca/schizo/orte/schizo_orte.h
Обычный файл
29
orte/mca/schizo/orte/schizo_orte.h
Обычный файл
@ -0,0 +1,29 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef _MCA_SCHIZO_ORTE_H_
|
||||
#define _MCA_SCHIZO_ORTE_H_
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "orte/types.h"
|
||||
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "orte/mca/schizo/schizo.h"
|
||||
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
ORTE_MODULE_DECLSPEC extern orte_schizo_base_component_t mca_schizo_orte_component;
|
||||
extern orte_schizo_base_module_t orte_schizo_orte_module;
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* MCA_SCHIZO_ORTE_H_ */
|
||||
|
52
orte/mca/schizo/orte/schizo_orte_component.c
Обычный файл
52
orte/mca/schizo/orte/schizo_orte_component.c
Обычный файл
@ -0,0 +1,52 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2016 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/types.h"
|
||||
#include "opal/types.h"
|
||||
|
||||
#include "opal/util/show_help.h"
|
||||
|
||||
#include "orte/mca/schizo/schizo.h"
|
||||
#include "schizo_orte.h"
|
||||
|
||||
static int component_query(mca_base_module_t **module, int *priority);
|
||||
|
||||
/*
|
||||
* Struct of function pointers and all that to let us be initialized
|
||||
*/
|
||||
orte_schizo_base_component_t mca_schizo_orte_component = {
|
||||
.base_version = {
|
||||
MCA_SCHIZO_BASE_VERSION_1_0_0,
|
||||
.mca_component_name = "orte",
|
||||
MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION,
|
||||
ORTE_RELEASE_VERSION),
|
||||
.mca_query_component = component_query,
|
||||
},
|
||||
.base_data = {
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
},
|
||||
};
|
||||
|
||||
static int component_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
/* disqualify ourselves if we are not an app */
|
||||
if (!ORTE_PROC_IS_APP) {
|
||||
*priority = 0;
|
||||
*module = NULL;
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
|
||||
*module = (mca_base_module_t*)&orte_schizo_orte_module;
|
||||
*priority = 1;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
@ -39,6 +39,9 @@ BEGIN_C_DECLS
|
||||
* SCHIZO module functions - the modules are accessed via
|
||||
* the base stub functions
|
||||
*/
|
||||
|
||||
typedef int (*orte_schizo_base_module_init_fn_t)(void);
|
||||
|
||||
typedef int (*orte_schizo_base_module_parse_cli_fn_t)(char **personality,
|
||||
int argc, int start,
|
||||
char **argv);
|
||||
@ -56,14 +59,37 @@ typedef int (*orte_schizo_base_module_setup_child_fn_t)(orte_job_t *jdata,
|
||||
orte_proc_t *child,
|
||||
orte_app_context_t *app);
|
||||
|
||||
|
||||
typedef enum {
|
||||
ORTE_SCHIZO_UNDETERMINED,
|
||||
ORTE_SCHIZO_NATIVE_LAUNCHED,
|
||||
ORTE_SCHIZO_UNMANAGED_SINGLETON,
|
||||
ORTE_SCHIZO_DIRECT_LAUNCHED,
|
||||
ORTE_SCHIZO_MANAGED_SINGLETON
|
||||
} orte_schizo_launch_environ_t;
|
||||
|
||||
|
||||
/* check if this process was directly launched by a managed environment, and
|
||||
* do whatever the module wants to do under those conditions. The module
|
||||
* can push any required envars into the local environment, but must remember
|
||||
* to "unset" them during finalize. The module then returns a flag indicating
|
||||
* the launch environment of the process */
|
||||
typedef orte_schizo_launch_environ_t (*orte_schizo_base_module_ck_launch_environ_fn_t)(void);
|
||||
|
||||
/* give the component a chance to cleanup */
|
||||
typedef void (*orte_schizo_base_module_finalize_fn_t)(void);
|
||||
|
||||
/*
|
||||
* schizo module version 1.3.0
|
||||
*/
|
||||
typedef struct {
|
||||
orte_schizo_base_module_parse_cli_fn_t parse_cli;
|
||||
orte_schizo_base_module_parse_env_fn_t parse_env;
|
||||
orte_schizo_base_module_setup_fork_fn_t setup_fork;
|
||||
orte_schizo_base_module_setup_child_fn_t setup_child;
|
||||
orte_schizo_base_module_init_fn_t init;
|
||||
orte_schizo_base_module_parse_cli_fn_t parse_cli;
|
||||
orte_schizo_base_module_parse_env_fn_t parse_env;
|
||||
orte_schizo_base_module_setup_fork_fn_t setup_fork;
|
||||
orte_schizo_base_module_setup_child_fn_t setup_child;
|
||||
orte_schizo_base_module_ck_launch_environ_fn_t check_launch_environment;
|
||||
orte_schizo_base_module_finalize_fn_t finalize;
|
||||
} orte_schizo_base_module_t;
|
||||
|
||||
ORTE_DECLSPEC extern orte_schizo_base_module_t orte_schizo;
|
||||
|
@ -30,10 +30,7 @@ static int setup_fork(orte_job_t *jdata,
|
||||
orte_app_context_t *context);
|
||||
|
||||
orte_schizo_base_module_t orte_schizo_singularity_module = {
|
||||
NULL,
|
||||
NULL,
|
||||
setup_fork,
|
||||
NULL
|
||||
.setup_fork = setup_fork
|
||||
};
|
||||
|
||||
static int setup_fork(orte_job_t *jdata,
|
||||
|
@ -14,6 +14,8 @@
|
||||
|
||||
#include "opal/util/show_help.h"
|
||||
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
#include "orte/mca/schizo/schizo.h"
|
||||
#include "schizo_singularity.h"
|
||||
|
||||
@ -38,6 +40,12 @@ orte_schizo_base_component_t mca_schizo_singularity_component = {
|
||||
|
||||
static int component_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
/* if we are an app, ignore us */
|
||||
if (ORTE_PROC_IS_APP) {
|
||||
*module = NULL;
|
||||
*priority = 0;
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
*module = (mca_base_module_t*)&orte_schizo_singularity_module;
|
||||
*priority = 5;
|
||||
return ORTE_SUCCESS;
|
||||
|
35
orte/mca/schizo/slurm/Makefile.am
Обычный файл
35
orte/mca/schizo/slurm/Makefile.am
Обычный файл
@ -0,0 +1,35 @@
|
||||
#
|
||||
# Copyright (c) 2016 Intel, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
sources = \
|
||||
schizo_slurm_component.c \
|
||||
schizo_slurm.h \
|
||||
schizo_slurm.c
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if MCA_BUILD_orte_schizo_slurm_DSO
|
||||
component_noinst =
|
||||
component_install = mca_schizo_slurm.la
|
||||
else
|
||||
component_noinst = libmca_schizo_slurm.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(ortelibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_schizo_slurm_la_SOURCES = $(sources)
|
||||
mca_schizo_slurm_la_LDFLAGS = -module -avoid-version
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_schizo_slurm_la_SOURCES = $(sources)
|
||||
libmca_schizo_slurm_la_LDFLAGS = -module -avoid-version
|
||||
|
41
orte/mca/schizo/slurm/configure.m4
Обычный файл
41
orte/mca/schizo/slurm/configure.m4
Обычный файл
@ -0,0 +1,41 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2011-2013 Los Alamos National Security, LLC.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2016 Intel, Inc. All rights reserved
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# MCA_schizo_slurm_CONFIG([action-if-found], [action-if-not-found])
|
||||
# -----------------------------------------------------------
|
||||
AC_DEFUN([MCA_orte_schizo_slurm_CONFIG],[
|
||||
AC_CONFIG_FILES([orte/mca/schizo/slurm/Makefile])
|
||||
|
||||
ORTE_CHECK_SLURM([schizo_slurm], [schizo_slurm_good=1], [schizo_slurm_good=0])
|
||||
|
||||
# if check worked, set wrapper flags if so.
|
||||
# Evaluate succeed / fail
|
||||
AS_IF([test "$schizo_slurm_good" = "1"],
|
||||
[$1],
|
||||
[$2])
|
||||
|
||||
# set build flags to use in makefile
|
||||
AC_SUBST([schizo_slurm_CPPFLAGS])
|
||||
AC_SUBST([schizo_slurm_LDFLAGS])
|
||||
AC_SUBST([schizo_slurm_LIBS])
|
||||
])dnl
|
7
orte/mca/schizo/slurm/owner.txt
Обычный файл
7
orte/mca/schizo/slurm/owner.txt
Обычный файл
@ -0,0 +1,7 @@
|
||||
#
|
||||
# owner/status file
|
||||
# owner: institution that is responsible for this package
|
||||
# status: e.g. active, maintenance, unmaintained
|
||||
#
|
||||
owner: INTEL
|
||||
status: active
|
135
orte/mca/schizo/slurm/schizo_slurm.c
Обычный файл
135
orte/mca/schizo/slurm/schizo_slurm.c
Обычный файл
@ -0,0 +1,135 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/types.h"
|
||||
#include "opal/types.h"
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#include <ctype.h>
|
||||
|
||||
#include "opal/util/basename.h"
|
||||
#include "opal/util/opal_environ.h"
|
||||
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/mca/schizo/base/base.h"
|
||||
|
||||
#include "schizo_slurm.h"
|
||||
|
||||
static orte_schizo_launch_environ_t check_launch_environment(void);
|
||||
static void finalize(void);
|
||||
|
||||
orte_schizo_base_module_t orte_schizo_slurm_module = {
|
||||
.check_launch_environment = check_launch_environment,
|
||||
.finalize = finalize
|
||||
};
|
||||
|
||||
static char **pushed_envs = NULL;
|
||||
static char **pushed_vals = NULL;
|
||||
static orte_schizo_launch_environ_t myenv;
|
||||
static bool myenvdefined = false;
|
||||
|
||||
static orte_schizo_launch_environ_t check_launch_environment(void)
|
||||
{
|
||||
char *bind, *list, *ptr;
|
||||
int i;
|
||||
|
||||
if (myenvdefined) {
|
||||
return myenv;
|
||||
}
|
||||
myenvdefined = true;
|
||||
|
||||
/* we were only selected because SLURM was detected
|
||||
* and we are an app, so no need to further check
|
||||
* that here. Instead, see if we were direct launched
|
||||
* vs launched via mpirun */
|
||||
if (NULL != orte_process_info.my_daemon_uri) {
|
||||
/* nope */
|
||||
myenv = ORTE_SCHIZO_NATIVE_LAUNCHED;
|
||||
opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"ess");
|
||||
opal_argv_append_nosize(&pushed_vals, "pmi");
|
||||
goto setup;
|
||||
}
|
||||
|
||||
/* see if we are in a SLURM allocation */
|
||||
if (NULL == getenv("SLURM_NODELIST")) {
|
||||
/* nope */
|
||||
myenv = ORTE_SCHIZO_UNDETERMINED;
|
||||
return myenv;
|
||||
}
|
||||
|
||||
/* we are in an allocation, but were we direct launched
|
||||
* or are we a singleton? */
|
||||
if (NULL == getenv("SLURM_STEP_ID")) {
|
||||
/* not in a job step - ensure we select the
|
||||
* correct things */
|
||||
opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"ess");
|
||||
opal_argv_append_nosize(&pushed_vals, "singleton");
|
||||
myenv = ORTE_SCHIZO_MANAGED_SINGLETON;
|
||||
goto setup;
|
||||
}
|
||||
myenv = ORTE_SCHIZO_DIRECT_LAUNCHED;
|
||||
opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"ess");
|
||||
opal_argv_append_nosize(&pushed_vals, "pmi");
|
||||
|
||||
/* if we are direct launched by SLURM, then we want
|
||||
* to ensure that we do not override their binding
|
||||
* options, so set that envar */
|
||||
if (NULL != (bind = getenv("SLURM_CPU_BIND_TYPE"))) {
|
||||
if (0 == strcmp(bind, "none")) {
|
||||
opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"hwloc_base_binding_policy");
|
||||
opal_argv_append_nosize(&pushed_vals, "none");
|
||||
/* indicate we are externally bound so we won't try to do it ourselves */
|
||||
opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"orte_externally_bound");
|
||||
opal_argv_append_nosize(&pushed_vals, "1");
|
||||
} else if (0 == strcmp(bind, "mask_cpu")) {
|
||||
/* if the bind list is all F's, then the
|
||||
* user didn't specify anything */
|
||||
if (NULL != (list = getenv("SLURM_CPU_BIND_LIST")) &&
|
||||
NULL != (ptr = strchr(list, 'x'))) {
|
||||
++ptr; // step over the 'x'
|
||||
for (i=0; '\0' != *ptr; ptr++) {
|
||||
if ('F' != *ptr) {
|
||||
/* indicate we are externally bound */
|
||||
opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"orte_externally_bound");
|
||||
opal_argv_append_nosize(&pushed_vals, "1");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
setup:
|
||||
opal_output_verbose(1, orte_schizo_base_framework.framework_output,
|
||||
"schizo:slurm DECLARED AS %s", orte_schizo_base_print_env(myenv));
|
||||
if (NULL != pushed_envs) {
|
||||
for (i=0; NULL != pushed_envs[i]; i++) {
|
||||
opal_setenv(pushed_envs[i], pushed_vals[i], true, &environ);
|
||||
}
|
||||
}
|
||||
return myenv;
|
||||
}
|
||||
|
||||
static void finalize(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (NULL != pushed_envs) {
|
||||
for (i=0; NULL != pushed_envs[i]; i++) {
|
||||
opal_unsetenv(pushed_envs[i], &environ);
|
||||
}
|
||||
opal_argv_free(pushed_envs);
|
||||
opal_argv_free(pushed_vals);
|
||||
}
|
||||
}
|
29
orte/mca/schizo/slurm/schizo_slurm.h
Обычный файл
29
orte/mca/schizo/slurm/schizo_slurm.h
Обычный файл
@ -0,0 +1,29 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef _MCA_SCHIZO_SLURM_H_
|
||||
#define _MCA_SCHIZO_SLURM_H_
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "orte/types.h"
|
||||
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "orte/mca/schizo/schizo.h"
|
||||
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
ORTE_MODULE_DECLSPEC extern orte_schizo_base_component_t mca_schizo_slurm_component;
|
||||
extern orte_schizo_base_module_t orte_schizo_slurm_module;
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* MCA_SCHIZO_SLURM_H_ */
|
||||
|
52
orte/mca/schizo/slurm/schizo_slurm_component.c
Обычный файл
52
orte/mca/schizo/slurm/schizo_slurm_component.c
Обычный файл
@ -0,0 +1,52 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2016 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/types.h"
|
||||
#include "opal/types.h"
|
||||
|
||||
#include "opal/util/show_help.h"
|
||||
|
||||
#include "orte/mca/schizo/schizo.h"
|
||||
#include "schizo_slurm.h"
|
||||
|
||||
static int component_query(mca_base_module_t **module, int *priority);
|
||||
|
||||
/*
|
||||
* Struct of function pointers and all that to let us be initialized
|
||||
*/
|
||||
orte_schizo_base_component_t mca_schizo_slurm_component = {
|
||||
.base_version = {
|
||||
MCA_SCHIZO_BASE_VERSION_1_0_0,
|
||||
.mca_component_name = "slurm",
|
||||
MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION,
|
||||
ORTE_RELEASE_VERSION),
|
||||
.mca_query_component = component_query,
|
||||
},
|
||||
.base_data = {
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
},
|
||||
};
|
||||
|
||||
static int component_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
/* disqualify ourselves if we are not an app or under slurm */
|
||||
if (!ORTE_PROC_IS_APP) {
|
||||
*priority = 0;
|
||||
*module = NULL;
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
|
||||
*module = (mca_base_module_t*)&orte_schizo_slurm_module;
|
||||
*priority = 50;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
@ -553,6 +553,7 @@ int orte_daemon(int argc, char *argv[])
|
||||
app->app = strdup("singleton");
|
||||
app->num_procs = 1;
|
||||
opal_pointer_array_add(jdata->apps, app);
|
||||
jdata->num_apps = 1;
|
||||
|
||||
/* setup a proc object for the singleton - since we
|
||||
* -must- be the HNP, and therefore we stored our
|
||||
|
@ -12,7 +12,7 @@
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -30,6 +30,7 @@
|
||||
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/mca/ess/base/base.h"
|
||||
#include "orte/mca/schizo/base/base.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/runtime/runtime.h"
|
||||
#include "orte/runtime/orte_locks.h"
|
||||
@ -76,6 +77,10 @@ int orte_finalize(void)
|
||||
/* close the ess itself */
|
||||
(void) mca_base_framework_close(&orte_ess_base_framework);
|
||||
|
||||
/* finalize and close schizo */
|
||||
orte_schizo.finalize();
|
||||
(void) mca_base_framework_close(&orte_schizo_base_framework);
|
||||
|
||||
/* cleanup the process info */
|
||||
orte_proc_info_finalize();
|
||||
|
||||
|
@ -13,7 +13,7 @@
|
||||
* reserved.
|
||||
* Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2007-2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
*
|
||||
@ -45,6 +45,7 @@
|
||||
#include "orte/mca/ess/base/base.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/schizo/base/base.h"
|
||||
#include "orte/util/listener.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
@ -202,6 +203,22 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags)
|
||||
pmix_server_register_params();
|
||||
}
|
||||
|
||||
/* open the SCHIZO framework as everyone needs it, and the
|
||||
* ess will use it to help select its component */
|
||||
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_schizo_base_framework, 0))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_schizo_base_open";
|
||||
goto error;
|
||||
}
|
||||
if (ORTE_SUCCESS != (ret = orte_schizo_base_select())) {
|
||||
error = "orte_schizo_base_select";
|
||||
goto error;
|
||||
}
|
||||
/* if we are an app, let SCHIZO help us determine our environment */
|
||||
if (ORTE_PROC_IS_APP) {
|
||||
(void)orte_schizo.check_launch_environment();
|
||||
}
|
||||
|
||||
/* open the ESS and select the correct module for this environment */
|
||||
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_ess_base_framework, 0))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
|
@ -822,6 +822,8 @@ int orterun(int argc, char *argv[])
|
||||
* orterun
|
||||
*/
|
||||
orte_launch_environ = opal_argv_copy(environ);
|
||||
opal_unsetenv(OPAL_MCA_PREFIX"ess", &orte_launch_environ);
|
||||
opal_unsetenv(OPAL_MCA_PREFIX"pmix", &orte_launch_environ);
|
||||
|
||||
/* Intialize our Open RTE environment
|
||||
* Set the flag telling orte_init that I am NOT a
|
||||
@ -1106,6 +1108,9 @@ int orterun(int argc, char *argv[])
|
||||
/* cleanup and leave */
|
||||
orte_finalize();
|
||||
|
||||
if (NULL != orte_launch_environ) {
|
||||
opal_argv_free(orte_launch_environ);
|
||||
}
|
||||
if (orte_debug_flag) {
|
||||
fprintf(stderr, "exiting with status %d\n", orte_exit_status);
|
||||
}
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user