1
1

Fix a number of issues, some of which have lingered for a long time:

* provide a more reliable way of determining that a process is a singleton by leveraging the schizo framework. Add new components for slurm, alps, and orte to detect when we are in a managed environment, and if we have been launched by mpirun or a native launcher. Set the correct envars to control ess and pmix selection in each case.

* change the relative priority of the pmix120 and pmix112 components to make pmix120 the default

* fix singleton comm-spawn by correctly setting the num_apps field of the orte_job_t created by the daemon - this fixes a segfault in register_nspace on newly created daemons

* ensure orterun doesn't propagate any ess or pmix directives in its environment

* Cleanup a few valgrind issues and memory leaks

* Fix a race condition that prevented the client from completing notification registrations (missing thread shift)

* Ensure the shizo/alps component detects launch by mpirun
Этот коммит содержится в:
Ralph Castain 2016-02-25 11:05:38 -06:00
родитель 67e45028df
Коммит 011403c04a
53 изменённых файлов: 1436 добавлений и 504 удалений

Просмотреть файл

@ -113,26 +113,26 @@ const opal_pmix_base_module_t opal_pmix_isolated_module = {
.register_jobid = isolated_register_jobid
};
static int pmix_init_count = 0;
static opal_process_name_t pmix_pname;
static int isolated_init_count = 0;
static opal_process_name_t isolated_pname;
static int isolated_init(void)
{
int rc;
opal_value_t kv;
++pmix_init_count;
++isolated_init_count;
/* store our name in the opal_proc_t so that
* debug messages will make sense - an upper
* layer will eventually overwrite it, but that
* won't do any harm */
pmix_pname.jobid = 1;
pmix_pname.vpid = 0;
opal_proc_set_name(&pmix_pname);
isolated_pname.jobid = 1;
isolated_pname.vpid = 0;
opal_proc_set_name(&isolated_pname);
opal_output_verbose(10, opal_pmix_base_framework.framework_output,
"%s pmix:isolated: assigned tmp name %d %d",
OPAL_NAME_PRINT(pmix_pname),pmix_pname.jobid,pmix_pname.vpid);
OPAL_NAME_PRINT(isolated_pname),isolated_pname.jobid,isolated_pname.vpid);
// setup hash table
opal_pmix_base_hash_init();
@ -248,11 +248,11 @@ err_exit:
static int isolated_fini(void)
{
if (0 == pmix_init_count) {
if (0 == isolated_init_count) {
return OPAL_SUCCESS;
}
if (0 != --pmix_init_count) {
if (0 != --isolated_init_count) {
return OPAL_SUCCESS;
}
opal_pmix_base_hash_finalize();
@ -261,7 +261,7 @@ static int isolated_fini(void)
static int isolated_initialized(void)
{
if (0 < pmix_init_count) {
if (0 < isolated_init_count) {
return 1;
}
return 0;
@ -323,11 +323,11 @@ static int isolated_put(opal_pmix_scope_t scope,
"%s pmix:isolated isolated_put key %s scope %d\n",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kv->key, scope);
if (!pmix_init_count) {
if (!isolated_init_count) {
return OPAL_ERROR;
}
rc = opal_pmix_base_store(&pmix_pname, kv);
rc = opal_pmix_base_store(&isolated_pname, kv);
return rc;
}

Просмотреть файл

@ -79,16 +79,8 @@ static int isolated_close(void)
static int isolated_component_query(mca_base_module_t **module, int *priority)
{
/* if we are in a Singularity container, then we cannot spawn an
* HNP and are truly on our own and cannot call comm_spawn or
* any of its friends */
if (NULL != getenv("SINGULARITY_CONTAINER")) {
*priority = 100;
*module = (mca_base_module_t *)&opal_pmix_isolated_module;
return OPAL_SUCCESS;
}
/* otherwise, ignore us */
/* ignore us unless requested */
*priority = 0;
*module = NULL;
return OPAL_ERR_TAKE_NEXT_OPTION;
*module = (mca_base_module_t *)&opal_pmix_isolated_module;
return OPAL_SUCCESS;
}

Просмотреть файл

@ -41,7 +41,7 @@ AC_DEFUN([MCA_opal_pmix_pmix112_CONFIG],[
opal_pmix_pmix112_save_LDFLAGS=$LDFLAGS
opal_pmix_pmix112_save_LIBS=$LIBS
opal_pmix_pmix112_args="--enable-embedded-mode --with-pmix-symbol-prefix=opal_pmix_pmix112_ --with-libevent-header=\\\"opal/mca/event/$opal_event_base_include\\\" --with-hwloc-header=\\\"$opal_hwloc_base_include\\\""
opal_pmix_pmix112_args="--enable-embedded-mode --with-pmix-symbol-prefix=opal_pmix_pmix112_ --disable-visibility --with-libevent-header=\\\"opal/mca/event/$opal_event_base_include\\\" --with-hwloc-header=\\\"$opal_hwloc_base_include\\\""
AS_IF([test "$enable_debug" = "yes"],
[opal_pmix_pmix112_args="--enable-debug $opal_pmix_pmix112_args"
CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS -g"],

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -95,7 +95,7 @@ static int pmix112_component_query(mca_base_module_t **module, int *priority)
if (NULL != (t = getenv("PMIX_SERVER_URI")) ||
NULL != (id = getenv("PMIX_ID"))) {
/* if PMIx is present, then we are a client and need to use it */
*priority = 100;
*priority = 80;
} else {
/* we could be a server, so we still need to be considered */
*priority = 5;

Просмотреть файл

@ -41,7 +41,7 @@ AC_DEFUN([MCA_opal_pmix_pmix120_CONFIG],[
opal_pmix_pmix120_save_LDFLAGS=$LDFLAGS
opal_pmix_pmix120_save_LIBS=$LIBS
opal_pmix_pmix120_args="--enable-embedded-mode --with-pmix-symbol-prefix=opal_pmix_pmix120_ --with-libevent-header=\\\"opal/mca/event/$opal_event_base_include\\\" --with-hwloc-header=\\\"$opal_hwloc_base_include\\\""
opal_pmix_pmix120_args="--enable-embedded-mode --with-pmix-symbol-prefix=opal_pmix_pmix120_ --disable-visibility --with-libevent-header=\\\"opal/mca/event/$opal_event_base_include\\\" --with-hwloc-header=\\\"$opal_hwloc_base_include\\\""
AS_IF([test "$enable_debug" = "yes"],
[opal_pmix_pmix120_args="--enable-debug $opal_pmix_pmix120_args"
CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS -g"],

Просмотреть файл

@ -38,16 +38,6 @@
BEGIN_C_DECLS
/* internally used object for transferring data
* to/from the server and for storing in the
* hash tables */
typedef struct {
pmix_list_item_t super;
char *key;
pmix_value_t *value;
} pmix_kval_t;
PMIX_CLASS_DECLARATION(pmix_kval_t);
/* A non-API function for something that happens in a number
* of places throughout the code base - transferring a value to
* another pmix_value_t structure

Просмотреть файл

@ -62,6 +62,7 @@ static const char pmix_version_string[] = PMIX_VERSION;
#include "src/util/progress_threads.h"
#include "src/usock/usock.h"
#include "src/sec/pmix_sec.h"
#include "src/include/pmix_globals.h"
#include "pmix_client_ops.h"
@ -279,6 +280,7 @@ int PMIx_Init(pmix_proc_t *proc)
}
/* default to our internal errhandler */
errhandler_ref = 0;
pmix_add_errhandler(myerrhandler, NULL, 0, &errhandler_ref);
/* see if debug is requested */
if (NULL != (evar = getenv("PMIX_DEBUG"))) {
@ -631,7 +633,7 @@ pmix_status_t PMIx_Put(pmix_scope_t scope, const char key[], pmix_value_t *val)
cb->value = val;
/* pass this into the event library for thread protection */
PMIX_THREAD_SHIFT(cb, _putfn);
PMIX_THREADSHIFT(cb, _putfn);
/* wait for the result */
PMIX_WAIT_FOR_COMPLETION(cb->active);
@ -715,7 +717,7 @@ pmix_status_t PMIx_Commit(void)
cb->active = true;
/* pass this into the event library for thread protection */
PMIX_THREAD_SHIFT(cb, _commitfn);
PMIX_THREADSHIFT(cb, _commitfn);
/* wait for the result */
PMIX_WAIT_FOR_COMPLETION(cb->active);
@ -792,7 +794,7 @@ pmix_status_t PMIx_Resolve_peers(const char *nodename, const char *nspace,
}
/* pass this into the event library for thread protection */
PMIX_THREAD_SHIFT(cb, _peersfn);
PMIX_THREADSHIFT(cb, _peersfn);
/* wait for the result */
PMIX_WAIT_FOR_COMPLETION(cb->active);
@ -850,7 +852,7 @@ pmix_status_t PMIx_Resolve_nodes(const char *nspace, char **nodelist)
}
/* pass this into the event library for thread protection */
PMIX_THREAD_SHIFT(cb, _nodesfn);
PMIX_THREADSHIFT(cb, _nodesfn);
/* wait for the result */
PMIX_WAIT_FOR_COMPLETION(cb->active);
@ -1297,51 +1299,50 @@ static void regevents_cbfunc(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr,
PMIX_RELEASE(cb);
}
void pmix_client_register_errhandler(pmix_info_t info[], size_t ninfo,
pmix_notification_fn_t errhandler,
pmix_errhandler_reg_cbfunc_t cbfunc,
void *cbdata)
static void reg_errhandler(int sd, short args, void *cbdata)
{
/* add err handler, process info keys and register for events and call the callback */
pmix_shift_caddy_t *cd = (pmix_shift_caddy_t*)cbdata;
int index = 0;
pmix_buffer_t *msg;
pmix_cb_t *cb;
pmix_status_t rc;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: register errhandler with %d infos", (int)ninfo);
"pmix: register errhandler with %d infos", (int)cd->ninfo);
/* check if this handler is already registered if so return error */
if (PMIX_EXISTS == (rc = pmix_lookup_errhandler(info, ninfo, &index))) {
if (PMIX_EXISTS == (rc = pmix_lookup_errhandler(cd->info, cd->ninfo, &index))) {
/* complete request with error status and return its original reference */
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: register errhandler - already registered");
cbfunc(PMIX_EXISTS, index, cbdata);
cd->cbfunc.errregcbfn(PMIX_EXISTS, index, cd->cbdata);
} else if (PMIX_ERR_GRP_FOUND == rc) {
/* just acknowledge it */
cbfunc(PMIX_SUCCESS, index, cbdata);
} else if (PMIX_ERR_DFLT_FOUND == rc && NULL == info) {
cd->cbfunc.errregcbfn(PMIX_SUCCESS, index, cd->cbdata);
} else if (PMIX_ERR_DFLT_FOUND == rc && NULL == cd->info) {
/* if they are registering a default errhandler, then
* overwrite the existing one with it - the index will
* contain its location */
pmix_add_errhandler(errhandler, info, ninfo, &index);
rc = pmix_add_errhandler(cd->err, cd->info, cd->ninfo, &index);
cd->cbfunc.errregcbfn(rc, index, cd->cbdata);
} else {
/* need to add this errhandler */
if (PMIX_SUCCESS != (rc = pmix_add_errhandler(errhandler, info, ninfo, &index))) {
if (PMIX_SUCCESS != (rc = pmix_add_errhandler(cd->err, cd->info, cd->ninfo, &index))) {
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: register errhandler - error status rc=%d", rc);
/* complete request with error*/
cbfunc(rc, index, cbdata);
cd->cbfunc.errregcbfn(rc, index, cd->cbdata);
} else {
pmix_output_verbose(10, pmix_globals.debug_output,
"pmix: register errhandler - added index=%d, ninfo =%lu", index, ninfo);
"pmix: register errhandler - added index=%d, ninfo =%lu", index, cd->ninfo);
msg = PMIX_NEW(pmix_buffer_t);
if (PMIX_SUCCESS != (rc = pack_regevents(msg, PMIX_REGEVENTS_CMD, info, ninfo))) {
if (PMIX_SUCCESS != (rc = pack_regevents(msg, PMIX_REGEVENTS_CMD, cd->info, cd->ninfo))) {
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: register errhandler - pack events failed status=%d", rc);
PMIX_RELEASE(msg);
pmix_remove_errhandler(index);
cbfunc(PMIX_ERR_PACK_FAILURE, -1, cbdata);
cd->cbfunc.errregcbfn(PMIX_ERR_PACK_FAILURE, -1, cd->cbdata);
} else {
/* create a callback object as we need to pass it to the
* recv routine so we know which callback to use when
@ -1349,8 +1350,8 @@ void pmix_client_register_errhandler(pmix_info_t info[], size_t ninfo,
pmix_output_verbose(10, pmix_globals.debug_output,
"pmix: register errhandler - pack events success status=%d", rc);
cb = PMIX_NEW(pmix_cb_t);
cb->errreg_cbfunc = cbfunc;
cb->cbdata = cbdata;
cb->errreg_cbfunc = cd->cbfunc.errregcbfn;
cb->cbdata = cd->cbdata;
cb->errhandler_ref = index;
/* push the message into our event base to send to the server */
PMIX_ACTIVATE_SEND_RECV(&pmix_client_globals.myserver, msg, regevents_cbfunc, cb);
@ -1359,6 +1360,28 @@ void pmix_client_register_errhandler(pmix_info_t info[], size_t ninfo,
}
}
void pmix_client_register_errhandler(pmix_info_t info[], size_t ninfo,
pmix_notification_fn_t errhandler,
pmix_errhandler_reg_cbfunc_t cbfunc,
void *cbdata)
{
pmix_shift_caddy_t *cd;
/* need to thread shift this request */
cd = PMIX_NEW(pmix_shift_caddy_t);
cd->info = info;
cd->ninfo = ninfo;
cd->err = errhandler;
cd->cbfunc.errregcbfn = cbfunc;
cd->cbdata = cbdata;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix_client_register_errhandler shifting to server thread");
PMIX_THREADSHIFT(cd, reg_errhandler);
}
static void deregevents_cbfunc(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr,
pmix_buffer_t *buf, void *cbdata)
{
@ -1388,41 +1411,55 @@ static void deregevents_cbfunc(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr,
PMIX_RELEASE(cb);
}
void pmix_client_deregister_errhandler(int errhandler_ref,
pmix_op_cbfunc_t cbfunc,
void *cbdata)
static void dereg_errhandler(int sd, short args, void *cbdata)
{
pmix_shift_caddy_t *cd = (pmix_shift_caddy_t*)cbdata;
pmix_status_t rc;
pmix_error_reg_info_t *errreg;
pmix_buffer_t *msg;
pmix_cb_t *cb;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix_client_deregister_errhandler errhandler_ref = %d", errhandler_ref);
"pmix_client_deregister_errhandler errhandler_ref = %d", cd->ref);
errreg = (pmix_error_reg_info_t *)pmix_pointer_array_get_item(&pmix_globals.errregs, errhandler_ref);
errreg = (pmix_error_reg_info_t *)pmix_pointer_array_get_item(&pmix_globals.errregs, cd->ref);
if (NULL != errreg ) {
msg = PMIX_NEW(pmix_buffer_t);
if (PMIX_SUCCESS != (rc = pack_regevents(msg, PMIX_DEREGEVENTS_CMD, errreg->info, errreg->ninfo))) {
PMIX_RELEASE(msg);
pmix_remove_errhandler(errhandler_ref);
cbfunc(PMIX_ERR_PACK_FAILURE, cbdata);
pmix_remove_errhandler(cd->ref);
cd->cbfunc.opcbfn(PMIX_ERR_PACK_FAILURE, cd->cbdata);
} else {
/* create a callback object as we need to pass it to the
* recv routine so we know which callback to use when
* the server acks/nacks the register events request*/
cb = PMIX_NEW(pmix_cb_t);
cb->op_cbfunc = cbfunc;
cb->cbdata = cbdata;
cb->errhandler_ref = errhandler_ref;
cb->op_cbfunc = cd->cbfunc.opcbfn;
cb->cbdata = cd->cbdata;
cb->errhandler_ref = cd->ref;
/* push the message into our event base to send to the server */
PMIX_ACTIVATE_SEND_RECV(&pmix_client_globals.myserver, msg, deregevents_cbfunc, cb);
}
} else {
cbfunc(PMIX_ERR_NOT_FOUND, cbdata);
cd->cbfunc.opcbfn(PMIX_ERR_NOT_FOUND, cd->cbdata);
}
OBJ_RELEASE(cd);
}
void pmix_client_deregister_errhandler(int errhandler_ref,
pmix_op_cbfunc_t cbfunc,
void *cbdata)
{
pmix_shift_caddy_t *cd;
/* need to thread shift this request */
cd = PMIX_NEW(pmix_shift_caddy_t);
cd->cbfunc.opcbfn = cbfunc;
cd->cbdata = cbdata;
cd->ref = errhandler_ref;
PMIX_THREADSHIFT(cd, dereg_errhandler);
}
static void notifyerror_cbfunc(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr,
pmix_buffer_t *buf, void *cbdata)
{

Просмотреть файл

@ -164,7 +164,7 @@ pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const char *key,
cb->ninfo = ninfo;
cb->value_cbfunc = cbfunc;
cb->cbdata = cbdata;
PMIX_THREAD_SHIFT(cb, _getnbfn);
PMIX_THREADSHIFT(cb, _getnbfn);
return PMIX_SUCCESS;
}

Просмотреть файл

@ -175,3 +175,22 @@ static void errdes(pmix_error_reg_info_t *p)
PMIX_CLASS_INSTANCE(pmix_error_reg_info_t,
pmix_object_t,
errcon, errdes);
static void scon(pmix_shift_caddy_t *p)
{
p->active = false;
p->kv = NULL;
p->cbfunc.relfn = NULL;
p->cbfunc.errregcbfn = NULL;
p->cbfunc.opcbfn = NULL;
p->cbdata = NULL;
}
static void scdes(pmix_shift_caddy_t *p)
{
if (NULL != p->kv) {
PMIX_RELEASE(p->kv);
}
}
PMIX_CLASS_INSTANCE(pmix_shift_caddy_t,
pmix_object_t,
scon, scdes);

Просмотреть файл

@ -38,38 +38,96 @@
BEGIN_C_DECLS
#define PMIX_MAX_CRED_SIZE 131072 // set max at 128kbytes
#define PMIX_MAX_ERROR_REGISTRATIONS 5 // maximum number of error handlers that can be registered
#define PMIX_MAX_CRED_SIZE 131072 // set max at 128kbytes
#define PMIX_MAX_ERROR_REGISTRATIONS 128 // maximum number of error handlers that can be registered
/* define a structure for tracking error registrations */
typedef struct {
pmix_object_t super;
bool sglhdlr; // registers a specific error status handler
pmix_notification_fn_t errhandler; /* registered err handler callback fn */
pmix_info_t *info; /* error info keys registered with the handler */
size_t ninfo; /* size of info */
} pmix_error_reg_info_t;
PMIX_CLASS_DECLARATION(pmix_error_reg_info_t);
/**** ENUM DEFINITIONS ****/
/* define a command type for communicating to the
* pmix server */
#define PMIX_CMD PMIX_UINT32
/* define a global construct that includes values that must be shared
* between various parts of the code library. Both the client
* and server libraries must instance this structure */
/* define some commands */
typedef enum {
PMIX_REQ_CMD,
PMIX_ABORT_CMD,
PMIX_COMMIT_CMD,
PMIX_FENCENB_CMD,
PMIX_GETNB_CMD,
PMIX_FINALIZE_CMD,
PMIX_PUBLISHNB_CMD,
PMIX_LOOKUPNB_CMD,
PMIX_UNPUBLISHNB_CMD,
PMIX_SPAWNNB_CMD,
PMIX_CONNECTNB_CMD,
PMIX_DISCONNECTNB_CMD,
PMIX_NOTIFY_CMD,
PMIX_REGEVENTS_CMD,
PMIX_DEREGEVENTS_CMD,
} pmix_cmd_t;
/* define a set of flags to direct collection
* of data during operations */
typedef enum {
PMIX_COLLECT_INVALID = -1,
PMIX_COLLECT_NO,
PMIX_COLLECT_YES,
PMIX_COLLECT_MAX
} pmix_collect_t;
/**** MESSAGING STRUCTURES ****/
/* header for messages */
typedef struct {
int init_cntr; // #times someone called Init - #times called Finalize
pmix_proc_t myid;
uid_t uid; // my effective uid
gid_t gid; // my effective gid
int pindex;
pmix_event_base_t *evbase;
int debug_output;
pmix_pointer_array_t errregs; // my error handler registrations.
bool server;
bool connected;
pmix_list_t nspaces; // list of pmix_nspace_t for the nspaces we know about
pmix_buffer_t *cache_local; // data PUT by me to local scope
pmix_buffer_t *cache_remote; // data PUT by me to remote scope
} pmix_globals_t;
uint32_t tag;
size_t nbytes;
} pmix_usock_hdr_t;
/* internally used object for transferring data
* to/from the server and for storing in the
* hash tables */
typedef struct {
pmix_list_item_t super;
char *key;
pmix_value_t *value;
} pmix_kval_t;
PMIX_CLASS_DECLARATION(pmix_kval_t);
// forward declaration
struct pmix_peer_t;
/* internally used cbfunc */
typedef void (*pmix_usock_cbfunc_t)(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr,
pmix_buffer_t *buf, void *cbdata);
/* usock structure for sending a message */
typedef struct {
pmix_list_item_t super;
pmix_event_t ev;
pmix_usock_hdr_t hdr;
pmix_buffer_t *data;
bool hdr_sent;
char *sdptr;
size_t sdbytes;
} pmix_usock_send_t;
PMIX_CLASS_DECLARATION(pmix_usock_send_t);
/* usock structure for recving a message */
typedef struct {
pmix_list_item_t super;
pmix_event_t ev;
struct pmix_peer_t *peer;
int sd;
pmix_usock_hdr_t hdr;
char *data;
bool hdr_recvd;
char *rdptr;
size_t rdbytes;
} pmix_usock_recv_t;
PMIX_CLASS_DECLARATION(pmix_usock_recv_t);
/**** PEER STRUCTURES ****/
/* objects for tracking active nspaces */
typedef struct {
pmix_object_t super;
@ -105,6 +163,39 @@ typedef struct pmix_rank_info_t {
} pmix_rank_info_t;
PMIX_CLASS_DECLARATION(pmix_rank_info_t);
/* object for tracking peers - each peer can have multiple
* connections. This can occur if the initial app executes
* a fork/exec, and the child initiates its own connection
* back to the PMIx server. Thus, the trackers should be "indexed"
* by the socket, not the process nspace/rank */
typedef struct pmix_peer_t {
pmix_object_t super;
pmix_rank_info_t *info;
int proc_cnt;
void *server_object;
int index;
int sd;
pmix_event_t send_event; /**< registration with event thread for send events */
bool send_ev_active;
pmix_event_t recv_event; /**< registration with event thread for recv events */
bool recv_ev_active;
pmix_list_t send_queue; /**< list of messages to send */
pmix_usock_send_t *send_msg; /**< current send in progress */
pmix_usock_recv_t *recv_msg; /**< current recv in progress */
} pmix_peer_t;
PMIX_CLASS_DECLARATION(pmix_peer_t);
/* define a structure for tracking error registrations */
typedef struct {
pmix_object_t super;
bool sglhdlr; // registers a specific error status handler
pmix_notification_fn_t errhandler; /* registered err handler callback fn */
pmix_info_t *info; /* error info keys registered with the handler */
size_t ninfo; /* size of info */
} pmix_error_reg_info_t;
PMIX_CLASS_DECLARATION(pmix_error_reg_info_t);
typedef struct {
pmix_list_item_t super;
char *name; // name of the node
@ -112,6 +203,112 @@ typedef struct {
} pmix_nrec_t;
PMIX_CLASS_DECLARATION(pmix_nrec_t);
/* define an object for moving a send
* request into the server's event base */
typedef struct {
pmix_object_t super;
int sd;
} pmix_snd_caddy_t;
PMIX_CLASS_DECLARATION(pmix_snd_caddy_t);
/* define an object for moving a send
* request into the server's event base */
typedef struct {
pmix_list_item_t super;
pmix_usock_hdr_t hdr;
pmix_peer_t *peer;
pmix_snd_caddy_t snd;
} pmix_server_caddy_t;
PMIX_CLASS_DECLARATION(pmix_server_caddy_t);
/* define a tracker for collective operations */
typedef struct {
pmix_list_item_t super;
pmix_cmd_t type;
pmix_proc_t *pcs; // copy of the original array of participants
size_t npcs; // number of procs in the array
volatile bool active; // flag for waiting for completion
bool def_complete; // all local procs have been registered and the trk definition is complete
pmix_list_t ranks; // list of pmix_rank_info_t of the local participants
pmix_list_t local_cbs; // list of pmix_server_caddy_t for sending result to the local participants
uint32_t nlocal; // number of local participants
uint32_t local_cnt; // number of local participants who have contributed
pmix_info_t *info; // array of info structs
size_t ninfo; // number of info structs in array
pmix_collect_t collect_type; // whether or not data is to be returned at completion
pmix_modex_cbfunc_t modexcbfunc;
pmix_op_cbfunc_t op_cbfunc;
} pmix_server_trkr_t;
PMIX_CLASS_DECLARATION(pmix_server_trkr_t);
/**** THREAD-RELATED ****/
/* define a caddy for thread-shifting operations */
typedef struct {
pmix_object_t super;
pmix_event_t ev;
volatile bool active;
pmix_status_t status;
const char *nspace;
int rank;
const char *data;
size_t ndata;
const char *key;
pmix_info_t *info;
size_t ninfo;
pmix_notification_fn_t err;
pmix_kval_t *kv;
pmix_value_t *vptr;
pmix_server_caddy_t *cd;
pmix_server_trkr_t *tracker;
union {
pmix_release_cbfunc_t relfn;
pmix_errhandler_reg_cbfunc_t errregcbfn;
pmix_op_cbfunc_t opcbfn;
}cbfunc;
void *cbdata;
int ref;
} pmix_shift_caddy_t;
PMIX_CLASS_DECLARATION(pmix_shift_caddy_t);
#define PMIX_THREADSHIFT(r, c) \
do { \
(r)->active = true; \
event_assign(&((r)->ev), pmix_globals.evbase, \
-1, EV_WRITE, (c), (r)); \
event_active(&((r)->ev), EV_WRITE, 1); \
} while(0);
#define PMIX_WAIT_FOR_COMPLETION(a) \
do { \
while ((a)) { \
usleep(10); \
} \
} while (0);
/**** GLOBAL STORAGE ****/
/* define a global construct that includes values that must be shared
* between various parts of the code library. Both the client
* and server libraries must instance this structure */
typedef struct {
int init_cntr; // #times someone called Init - #times called Finalize
pmix_proc_t myid;
uid_t uid; // my effective uid
gid_t gid; // my effective gid
int pindex;
pmix_event_base_t *evbase;
int debug_output;
pmix_pointer_array_t errregs; // my error handler registrations.
bool server;
bool connected;
pmix_list_t nspaces; // list of pmix_nspace_t for the nspaces we know about
pmix_buffer_t *cache_local; // data PUT by me to local scope
pmix_buffer_t *cache_remote; // data PUT by me to remote scope
} pmix_globals_t;
/* initialize the pmix_global structure */
void pmix_globals_init(void);

Просмотреть файл

@ -80,63 +80,6 @@ PMIX_CLASS_INSTANCE(pmix_usock_queue_t,
pmix_object_t,
NULL, NULL);
/* define a caddy for thread-shifting operations when
* the host server executes a callback to us */
typedef struct {
pmix_object_t super;
pmix_event_t ev;
volatile bool active;
pmix_status_t status;
const char *nspace;
int rank;
const char *data;
size_t ndata;
const char *key;
pmix_info_t *info;
size_t ninfo;
pmix_notification_fn_t err;
pmix_kval_t *kv;
pmix_value_t *vptr;
pmix_server_caddy_t *cd;
pmix_server_trkr_t *tracker;
union {
pmix_release_cbfunc_t relfn;
pmix_errhandler_reg_cbfunc_t errregcbfn;
pmix_op_cbfunc_t opcbfn;
}cbfunc;
void *cbdata;
int ref;
} pmix_shift_caddy_t;
static void scon(pmix_shift_caddy_t *p)
{
p->active = false;
p->kv = NULL;
p->cbfunc.relfn = NULL;
p->cbfunc.errregcbfn = NULL;
p->cbfunc.opcbfn = NULL;
p->cbdata = NULL;
}
static void scdes(pmix_shift_caddy_t *p)
{
if (NULL != p->kv) {
PMIX_RELEASE(p->kv);
}
}
PMIX_CLASS_INSTANCE(pmix_shift_caddy_t,
pmix_object_t,
scon, scdes);
#define PMIX_THREADSHIFT(r, c) \
do { \
(r)->active = true; \
event_assign(&((r)->ev), pmix_globals.evbase, \
-1, EV_WRITE, (c), (r)); \
event_priority_set(&((r)->ev), 0); \
event_active(&((r)->ev), EV_WRITE, 1); \
} while(0);
/* queue a message to be sent to one of our procs - must
* provide the following params:
*
@ -1336,12 +1279,12 @@ static void dereg_errhandler(int sd, short args, void *cbdata)
if (NULL != cd->cbfunc.opcbfn) {
cd->cbfunc.opcbfn(rc, cd->cbdata);
}
cd->active = false;
OBJ_RELEASE(cd);
}
void pmix_server_deregister_errhandler(int errhandler_ref,
pmix_op_cbfunc_t cbfunc,
void *cbdata)
pmix_op_cbfunc_t cbfunc,
void *cbdata)
{
pmix_shift_caddy_t *cd;
@ -1351,9 +1294,6 @@ void pmix_server_deregister_errhandler(int errhandler_ref,
cd->cbdata = cbdata;
cd->ref = errhandler_ref;
PMIX_THREADSHIFT(cd, dereg_errhandler);
PMIX_WAIT_FOR_COMPLETION(cd->active);
PMIX_RELEASE(cd);
}
static void _store_internal(int sd, short args, void *cbdata)

Просмотреть файл

@ -20,52 +20,6 @@
#include "src/usock/usock.h"
#include "src/util/hash.h"
/* define an object for moving a send
* request into the server's event base */
typedef struct {
pmix_object_t super;
int sd;
} pmix_snd_caddy_t;
PMIX_CLASS_DECLARATION(pmix_snd_caddy_t);
/* define an object for moving a send
* request into the server's event base */
typedef struct {
pmix_list_item_t super;
pmix_usock_hdr_t hdr;
pmix_peer_t *peer;
pmix_snd_caddy_t snd;
} pmix_server_caddy_t;
PMIX_CLASS_DECLARATION(pmix_server_caddy_t);
typedef enum {
PMIX_COLLECT_INVALID = -1,
PMIX_COLLECT_NO,
PMIX_COLLECT_YES,
PMIX_COLLECT_MAX
} pmix_collect_t;
/* define a tracker for collective operations */
typedef struct {
pmix_list_item_t super;
pmix_cmd_t type;
pmix_proc_t *pcs; // copy of the original array of participants
size_t npcs; // number of procs in the array
volatile bool active; // flag for waiting for completion
bool def_complete; // all local procs have been registered and the trk definition is complete
pmix_list_t ranks; // list of pmix_rank_info_t of the local participants
pmix_list_t local_cbs; // list of pmix_server_caddy_t for sending result to the local participants
uint32_t nlocal; // number of local participants
uint32_t local_cnt; // number of local participants who have contributed
pmix_info_t *info; // array of info structs
size_t ninfo; // number of info structs in array
pmix_collect_t collect_type; // whether or not data is to be returned at completion
pmix_modex_cbfunc_t modexcbfunc;
pmix_op_cbfunc_t op_cbfunc;
} pmix_server_trkr_t;
PMIX_CLASS_DECLARATION(pmix_server_trkr_t);
typedef struct {
pmix_object_t super;
pmix_event_t ev;
@ -135,7 +89,7 @@ typedef struct {
pmix_object_t super;
pmix_event_t ev;
int sd;
struct sockaddr addr;
struct sockaddr_storage addr;
} pmix_pending_connection_t;
PMIX_CLASS_DECLARATION(pmix_pending_connection_t);

Просмотреть файл

@ -13,7 +13,7 @@
* All rights reserved.
* Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2015 Artem Y. Polyakov <artpol84@gmail.com>.
* All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
@ -54,73 +54,12 @@
#endif
#include PMIX_EVENT_HEADER
#include "src/include/pmix_globals.h"
#include "src/buffer_ops/buffer_ops.h"
#include "src/class/pmix_hash_table.h"
#include "src/class/pmix_list.h"
/* define a command type for communicating to the
* pmix server */
#define PMIX_CMD PMIX_UINT32
/* define some commands */
typedef enum {
PMIX_REQ_CMD,
PMIX_ABORT_CMD,
PMIX_COMMIT_CMD,
PMIX_FENCENB_CMD,
PMIX_GETNB_CMD,
PMIX_FINALIZE_CMD,
PMIX_PUBLISHNB_CMD,
PMIX_LOOKUPNB_CMD,
PMIX_UNPUBLISHNB_CMD,
PMIX_SPAWNNB_CMD,
PMIX_CONNECTNB_CMD,
PMIX_DISCONNECTNB_CMD,
PMIX_NOTIFY_CMD,
PMIX_REGEVENTS_CMD,
PMIX_DEREGEVENTS_CMD,
} pmix_cmd_t;
/* header for messages */
typedef struct {
int pindex;
uint32_t tag;
size_t nbytes;
} pmix_usock_hdr_t;
// forward declaration
struct pmix_peer_t;
/* internally used cbfunc */
typedef void (*pmix_usock_cbfunc_t)(struct pmix_peer_t *peer, pmix_usock_hdr_t *hdr,
pmix_buffer_t *buf, void *cbdata);
/* usock structure for sending a message */
typedef struct {
pmix_list_item_t super;
pmix_event_t ev;
pmix_usock_hdr_t hdr;
pmix_buffer_t *data;
bool hdr_sent;
char *sdptr;
size_t sdbytes;
} pmix_usock_send_t;
PMIX_CLASS_DECLARATION(pmix_usock_send_t);
/* usock structure for recving a message */
typedef struct {
pmix_list_item_t super;
pmix_event_t ev;
struct pmix_peer_t *peer;
int sd;
pmix_usock_hdr_t hdr;
char *data;
bool hdr_recvd;
char *rdptr;
size_t rdbytes;
} pmix_usock_recv_t;
PMIX_CLASS_DECLARATION(pmix_usock_recv_t);
/* usock structure for tracking posted recvs */
typedef struct {
@ -132,28 +71,6 @@ typedef struct {
} pmix_usock_posted_recv_t;
PMIX_CLASS_DECLARATION(pmix_usock_posted_recv_t);
/* object for tracking peers - each peer can have multiple
* connections. This can occur if the initial app executes
* a fork/exec, and the child initiates its own connection
* back to the PMIx server. Thus, the trackers should be "indexed"
* by the socket, not the process nspace/rank */
typedef struct pmix_peer_t {
pmix_object_t super;
pmix_rank_info_t *info;
int proc_cnt;
void *server_object;
int index;
int sd;
pmix_event_t send_event; /**< registration with event thread for send events */
bool send_ev_active;
pmix_event_t recv_event; /**< registration with event thread for recv events */
bool recv_ev_active;
pmix_list_t send_queue; /**< list of messages to send */
pmix_usock_send_t *send_msg; /**< current send in progress */
pmix_usock_recv_t *recv_msg; /**< current recv in progress */
} pmix_peer_t;
PMIX_CLASS_DECLARATION(pmix_peer_t);
/* usock struct for posting send/recv request */
typedef struct {
pmix_object_t super;
@ -193,15 +110,6 @@ typedef struct {
} pmix_cb_t;
PMIX_CLASS_DECLARATION(pmix_cb_t);
/* an internal macro for shifting incoming requests
* to the internal event thread */
#define PMIX_THREAD_SHIFT(c, f) \
do { \
event_assign(&((c)->ev), pmix_globals.evbase, -1, \
EV_WRITE, (f), (c)); \
event_active(&((c)->ev), EV_WRITE, 1); \
} while(0);
typedef struct {
pmix_object_t super;
pmix_event_t ev;
@ -249,13 +157,6 @@ PMIX_CLASS_DECLARATION(pmix_timer_t);
} while(0)
#define PMIX_WAIT_FOR_COMPLETION(a) \
do { \
while ((a)) { \
usleep(10); \
} \
} while (0);
#define PMIX_TIMER_EVENT(s, f, d) \
do { \
pmix_timer_t *tm; \

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Artem Y. Polyakov <artpol84@gmail.com>.
* All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
@ -37,12 +37,44 @@
#endif
#include "src/include/pmix_globals.h"
#include "src/server/pmix_server_ops.h"
#include "src/util/error.h"
#include "usock.h"
static uint32_t current_tag = 1; // 0 is reserved for system purposes
static void lost_connection(pmix_peer_t *peer, pmix_status_t err)
{
/* stop all events */
if (peer->recv_ev_active) {
event_del(&peer->recv_event);
peer->recv_ev_active = false;
}
if (peer->send_ev_active) {
event_del(&peer->send_event);
peer->send_ev_active = false;
}
if (NULL != peer->recv_msg) {
PMIX_RELEASE(peer->recv_msg);
peer->recv_msg = NULL;
}
CLOSE_THE_SOCKET(peer->sd);
if (pmix_globals.server) {
/* if I am a server, then we need to
* do some cleanup as the client has
* left us */
pmix_pointer_array_set_item(&pmix_server_globals.clients,
peer->index, NULL);
PMIX_RELEASE(peer);
} else {
/* if I am a client, there is only
* one connection we can have */
pmix_globals.connected = false;
}
PMIX_REPORT_ERROR(err);
}
static pmix_status_t send_bytes(int sd, char **buf, size_t *remain)
{
pmix_status_t ret = PMIX_SUCCESS;
@ -183,8 +215,7 @@ void pmix_usock_send_handler(int sd, short flags, void *cbdata)
peer->send_ev_active = false;
PMIX_RELEASE(msg);
peer->send_msg = NULL;
CLOSE_THE_SOCKET(peer->sd);
PMIX_REPORT_ERROR(rc);
lost_connection(peer, rc);
return;
}
}
@ -212,8 +243,7 @@ void pmix_usock_send_handler(int sd, short flags, void *cbdata)
peer->send_ev_active = false;
PMIX_RELEASE(msg);
peer->send_msg = NULL;
CLOSE_THE_SOCKET(peer->sd);
PMIX_REPORT_ERROR(rc);
lost_connection(peer, rc);
return;
}
}
@ -357,8 +387,7 @@ void pmix_usock_recv_handler(int sd, short flags, void *cbdata)
PMIX_RELEASE(peer->recv_msg);
peer->recv_msg = NULL;
}
CLOSE_THE_SOCKET(peer->sd);
PMIX_REPORT_ERROR(PMIX_ERR_UNREACH);
lost_connection(peer, PMIX_ERR_UNREACH);
}
void pmix_usock_send_recv(int fd, short args, void *cbdata)

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -97,12 +97,12 @@ static int pmix120_component_query(mca_base_module_t **module, int *priority)
if (NULL != (t = getenv("PMIX_SERVER_URI")) ||
NULL != (id = getenv("PMIX_ID"))) {
/* if PMIx is present, then we are a client and need to use it,
* but only if we are requested */
*priority = 5;
* and we are now the default */
*priority = 100;
} else {
/* we could be a server, so we still need to be considered,
* but only if requested */
*priority = 2;
* and we are now the default */
*priority = 15;
}
*module = (mca_base_module_t *)&opal_pmix_pmix120_module;
return OPAL_SUCCESS;

Просмотреть файл

@ -66,6 +66,9 @@ int orte_ess_base_proc_binding(void)
goto error;
}
}
} else if (NULL != getenv(OPAL_MCA_PREFIX"orte_externally_bound")) {
orte_proc_is_bound = true;
/* the topology system will pickup the binding pattern */
}
/* see if we were bound when launched */

Просмотреть файл

@ -62,7 +62,6 @@
#include "orte/mca/snapc/base/base.h"
#include "orte/mca/sstore/base/base.h"
#endif
#include "orte/mca/schizo/base/base.h"
#include "orte/mca/filem/base/base.h"
#include "orte/util/proc_info.h"
#include "orte/util/session_dir.h"
@ -512,7 +511,7 @@ int orte_ess_base_orted_setup(char **hosts)
/* setup the PMIx framework - ensure it skips all non-PMIx components,
* but do not override anything we were given */
opal_setenv("OMPI_MCA_pmix", "^s1,s2,cray", false, &environ);
opal_setenv("OMPI_MCA_pmix", "^s1,s2,cray,isolated", false, &environ);
if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
error = "orte_pmix_base_open";
@ -612,17 +611,7 @@ int orte_ess_base_orted_setup(char **hosts)
error = "orte_dfs_select";
goto error;
}
/* setup the SCHIZO framework */
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_schizo_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
error = "orte_schizo_base_open";
goto error;
}
if (ORTE_SUCCESS != (ret = orte_schizo_base_select())) {
ORTE_ERROR_LOG(ret);
error = "orte_schizo_select";
goto error;
}
return ORTE_SUCCESS;
error:
orte_show_help("help-orte-runtime.txt",
@ -654,7 +643,6 @@ int orte_ess_base_orted_finalize(void)
(void) mca_base_framework_close(&opal_pmix_base_framework);
/* close frameworks */
(void) mca_base_framework_close(&orte_schizo_base_framework);
(void) mca_base_framework_close(&orte_filem_base_framework);
(void) mca_base_framework_close(&orte_grpcomm_base_framework);
(void) mca_base_framework_close(&orte_iof_base_framework);
@ -674,6 +662,8 @@ int orte_ess_base_orted_finalize(void)
orte_session_dir_finalize(ORTE_PROC_MY_NAME);
/* ensure we scrub the session directory tree */
orte_session_dir_cleanup(ORTE_JOBID_WILDCARD);
/* release the job hash table */
OBJ_RELEASE(orte_job_data);
return ORTE_SUCCESS;
}

Просмотреть файл

@ -11,7 +11,7 @@
* All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Hochschule Esslingen. All rights reserved.
*
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
@ -52,7 +52,6 @@
#include "orte/mca/snapc/base/base.h"
#include "orte/mca/sstore/base/base.h"
#endif
#include "orte/mca/schizo/base/base.h"
#include "orte/util/proc_info.h"
#include "orte/util/session_dir.h"
#include "orte/util/show_help.h"
@ -230,18 +229,6 @@ int orte_ess_base_tool_setup(void)
opal_cr_set_enabled(false);
#endif
/* setup schizo in case we are parsing cmd lines */
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_schizo_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
error = "orte_schizo_base_open";
goto error;
}
if (ORTE_SUCCESS != (ret = orte_schizo_base_select())) {
ORTE_ERROR_LOG(ret);
error = "orte_schizo_base_select";
goto error;
}
return ORTE_SUCCESS;
error:
@ -270,7 +257,6 @@ int orte_ess_base_tool_finalize(void)
}
(void) mca_base_framework_close(&orte_routed_base_framework);
(void) mca_base_framework_close(&orte_rml_base_framework);
(void) mca_base_framework_close(&orte_schizo_base_framework);
(void) mca_base_framework_close(&orte_errmgr_base_framework);
return ORTE_SUCCESS;

Просмотреть файл

@ -74,7 +74,6 @@
#include "orte/mca/sstore/base/base.h"
#endif
#include "orte/mca/filem/base/base.h"
#include "orte/mca/schizo/base/base.h"
#include "orte/mca/state/base/base.h"
#include "orte/mca/state/state.h"
@ -620,7 +619,7 @@ static int rte_init(void)
/* setup the PMIx framework - ensure it skips all non-PMIx components, but
* do not override anything we were given */
opal_setenv("OMPI_MCA_pmix", "^s1,s2,cray", false, &environ);
opal_setenv("OMPI_MCA_pmix", "^s1,s2,cray,isolated", false, &environ);
if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
error = "orte_pmix_base_open";
@ -721,17 +720,7 @@ static int rte_init(void)
error = "orte_dfs_select";
goto error;
}
/* setup the schizo framework */
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_schizo_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
error = "orte_schizo_base_open";
goto error;
}
if (ORTE_SUCCESS != (ret = orte_schizo_base_select())) {
ORTE_ERROR_LOG(ret);
error = "orte_schizo_select";
goto error;
}
/* if a tool has launched us and is requesting event reports,
* then set its contact info into the comm system
*/
@ -808,7 +797,6 @@ static int rte_finalize(void)
/* cleanup our data server */
orte_data_server_finalize();
(void) mca_base_framework_close(&orte_schizo_base_framework);
(void) mca_base_framework_close(&orte_dfs_base_framework);
(void) mca_base_framework_close(&orte_filem_base_framework);
/* output any lingering stdout/err data */
@ -854,6 +842,9 @@ static int rte_finalize(void)
fclose(orte_xml_fp);
}
}
/* release the job hash table */
OBJ_RELEASE(orte_job_data);
return ORTE_SUCCESS;
}

Просмотреть файл

@ -3,7 +3,7 @@
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All
* rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -25,7 +25,7 @@
#include "opal/mca/pmix/base/base.h"
#include "orte/util/proc_info.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/schizo/schizo.h"
#include "orte/mca/ess/ess.h"
#include "orte/mca/ess/pmi/ess_pmi.h"
@ -67,41 +67,27 @@ static int pmi_component_open(void)
static int pmi_component_query(mca_base_module_t **module, int *priority)
{
int ret;
orte_schizo_launch_environ_t ret;
/* all APPS must use pmix */
if (ORTE_PROC_IS_APP) {
if (NULL == opal_pmix.initialized) {
/* open and setup pmix */
if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
*priority = -1;
*module = NULL;
return ret;
}
if (OPAL_SUCCESS != (ret = opal_pmix_base_select())) {
/* don't error log this as it might not be an error at all */
*priority = -1;
*module = NULL;
(void) mca_base_framework_close(&opal_pmix_base_framework);
return ret;
}
}
if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init()))) {
/* we cannot be in a PMI environment */
*priority = -1;
*module = NULL;
return ORTE_ERROR;
}
*priority = 35;
*module = (mca_base_module_t *)&orte_ess_pmi_module;
return ORTE_SUCCESS;
if (!ORTE_PROC_IS_APP) {
*module = NULL;
*priority = 0;
return ORTE_ERROR;
}
/* we can't run */
*priority = -1;
*module = NULL;
return ORTE_ERROR;
/* find out what our environment looks like */
ret = orte_schizo.check_launch_environment();
if (ORTE_SCHIZO_UNMANAGED_SINGLETON == ret ||
ORTE_SCHIZO_MANAGED_SINGLETON == ret) {
/* not us */
*module = NULL;
*priority = 0;
return ORTE_ERROR;
}
*priority = 35;
*module = (mca_base_module_t *)&orte_ess_pmi_module;
return ORTE_SUCCESS;
}

Просмотреть файл

@ -97,6 +97,18 @@ static int rte_init(void)
goto error;
}
/* open and setup pmix */
if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
/* we cannot run */
error = "pmix init";
goto error;
}
if (OPAL_SUCCESS != (ret = opal_pmix_base_select())) {
/* we cannot run */
error = "pmix init";
goto error;
}
/* initialize the selected module */
if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init()))) {
/* we cannot run */

Просмотреть файл

@ -9,6 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -21,14 +22,13 @@
BEGIN_C_DECLS
/*
* Module open / close
*/
int orte_ess_singleton_component_open(void);
int orte_ess_singleton_component_close(void);
int orte_ess_singleton_component_query(mca_base_module_t **module, int *priority);
ORTE_MODULE_DECLSPEC extern orte_ess_base_component_t mca_ess_singleton_component;
typedef struct {
orte_ess_base_component_t super;
char *server_uri;
bool isolated;
} orte_ess_singleton_component_t;
ORTE_MODULE_DECLSPEC extern orte_ess_singleton_component_t mca_ess_singleton_component;
END_C_DECLS

Просмотреть файл

@ -12,6 +12,7 @@
* All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -32,72 +33,82 @@
#include "opal/mca/pmix/base/base.h"
#include "orte/util/proc_info.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/schizo/schizo.h"
#include "orte/mca/ess/ess.h"
#include "orte/mca/ess/singleton/ess_singleton.h"
extern orte_ess_base_module_t orte_ess_singleton_module;
char *orte_ess_singleton_server_uri = NULL;
static int
orte_ess_singleton_component_register(void);
static int component_open(void);
static int component_close(void);
static int component_query(mca_base_module_t **module, int *priority);
static int component_register(void);
/*
* Instantiate the public struct with all of our public information
* and pointers to our public functions in it
*/
orte_ess_base_component_t mca_ess_singleton_component = {
/* First, the mca_component_t struct containing meta information
about the component itself */
.base_version = {
ORTE_ESS_BASE_VERSION_3_0_0,
orte_ess_singleton_component_t mca_ess_singleton_component = {
{
/* First, the mca_component_t struct containing meta information
about the component itself */
.base_version = {
ORTE_ESS_BASE_VERSION_3_0_0,
/* Component name and version */
.mca_component_name = "singleton",
MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION,
ORTE_RELEASE_VERSION),
/* Component name and version */
.mca_component_name = "singleton",
MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION,
ORTE_RELEASE_VERSION),
/* Component open and close functions */
.mca_open_component = orte_ess_singleton_component_open,
.mca_close_component = orte_ess_singleton_component_close,
.mca_query_component = orte_ess_singleton_component_query,
.mca_register_component_params = orte_ess_singleton_component_register,
},
.base_data = {
/* The component is not checkpoint ready */
MCA_BASE_METADATA_PARAM_NONE
/* Component open and close functions */
.mca_open_component = component_open,
.mca_close_component = component_close,
.mca_query_component = component_query,
.mca_register_component_params = component_register,
},
.base_data = {
/* The component is not checkpoint ready */
MCA_BASE_METADATA_PARAM_NONE
},
},
.server_uri = NULL,
.isolated = false
};
static int
orte_ess_singleton_component_register(void)
static int component_register(void)
{
int ret;
orte_ess_singleton_server_uri = NULL;
ret = mca_base_component_var_register(&mca_ess_singleton_component.base_version,
mca_ess_singleton_component.server_uri = NULL;
ret = mca_base_component_var_register(&mca_ess_singleton_component.super.base_version,
"server",
"Server to be used as HNP - [file|FILE]:<filename> or just uri",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&orte_ess_singleton_server_uri);
&mca_ess_singleton_component.server_uri);
(void) mca_base_var_register_synonym(ret, "orte", "orte", NULL, "server", 0);
ret = mca_base_component_var_register(&mca_ess_singleton_component.super.base_version,
"isolated",
"Do not start a supporting daemon as this process will never attempt to spawn",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_ess_singleton_component.isolated);
return ORTE_SUCCESS;
}
int
orte_ess_singleton_component_open(void)
static int component_open(void)
{
return ORTE_SUCCESS;
}
int orte_ess_singleton_component_query(mca_base_module_t **module, int *priority)
static int component_query(mca_base_module_t **module, int *priority)
{
int ret;
orte_schizo_launch_environ_t ret;
/* if we are an HNP, daemon, or tool, then we
* are definitely not a singleton!
@ -106,55 +117,28 @@ int orte_ess_singleton_component_query(mca_base_module_t **module, int *priority
ORTE_PROC_IS_DAEMON ||
ORTE_PROC_IS_TOOL) {
*module = NULL;
*priority = 0;
return ORTE_ERROR;
}
/* okay, we still could be a singleton or
* an application process. If we have been
* given an HNP URI, then we are definitely
* not a singleton
*/
if (NULL != orte_process_info.my_hnp_uri) {
/* find out what our environment looks like */
ret = orte_schizo.check_launch_environment();
if (ORTE_SCHIZO_UNMANAGED_SINGLETON != ret &&
ORTE_SCHIZO_MANAGED_SINGLETON != ret) {
/* not us */
*module = NULL;
*priority = 0;
return ORTE_ERROR;
}
/* open and setup pmix */
if (NULL == opal_pmix.initialized) {
if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
/* if PMIx is not available, then we are indeed a singleton */
goto single;
}
if (OPAL_SUCCESS != (ret = opal_pmix_base_select())) {
/* if PMIx is not available, then we are indeed a singleton */
(void) mca_base_framework_close(&opal_pmix_base_framework);
goto single;
}
}
if (opal_pmix.initialized()) {
/* we are in a PMI environment and are therefore
* not a singleton */
*priority = -1;
*module = NULL;
return ORTE_ERROR;
}
single:
/* okay, we could still be an application process,
* but launched in "standalone" mode - i.e., directly
* launched by an environment instead of via mpirun.
* We need to set our priority low so that any enviro
* component will override us. If they don't, then we
* want to be selected as we must be a singleton
*/
*priority = 25;
/* okay, we want to be selected as we must be a singleton */
*priority = 100;
*module = (mca_base_module_t *)&orte_ess_singleton_module;
return ORTE_SUCCESS;
}
int
orte_ess_singleton_component_close(void)
static int component_close(void)
{
return ORTE_SUCCESS;
}

Просмотреть файл

@ -97,19 +97,19 @@ static int rte_init(void)
u32ptr = &u32;
u16ptr = &u16;
if (NULL != orte_ess_singleton_server_uri) {
if (NULL != mca_ess_singleton_component.server_uri) {
/* we are going to connect to a server HNP */
if (0 == strncmp(orte_ess_singleton_server_uri, "file", strlen("file")) ||
0 == strncmp(orte_ess_singleton_server_uri, "FILE", strlen("FILE"))) {
if (0 == strncmp(mca_ess_singleton_component.server_uri, "file", strlen("file")) ||
0 == strncmp(mca_ess_singleton_component.server_uri, "FILE", strlen("FILE"))) {
char input[1024], *filename;
FILE *fp;
/* it is a file - get the filename */
filename = strchr(orte_ess_singleton_server_uri, ':');
filename = strchr(mca_ess_singleton_component.server_uri, ':');
if (NULL == filename) {
/* filename is not correctly formatted */
orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-bad", true,
"singleton", orte_ess_singleton_server_uri);
"singleton", mca_ess_singleton_component.server_uri);
return ORTE_ERROR;
}
++filename; /* space past the : */
@ -117,7 +117,7 @@ static int rte_init(void)
if (0 >= strlen(filename)) {
/* they forgot to give us the name! */
orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-missing", true,
"singleton", orte_ess_singleton_server_uri);
"singleton", mca_ess_singleton_component.server_uri);
return ORTE_ERROR;
}
@ -125,7 +125,7 @@ static int rte_init(void)
fp = fopen(filename, "r");
if (NULL == fp) { /* can't find or read file! */
orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-access", true,
"singleton", orte_ess_singleton_server_uri);
"singleton", mca_ess_singleton_component.server_uri);
return ORTE_ERROR;
}
memset(input, 0, 1024); // initialize the array to ensure a NULL termination
@ -133,14 +133,14 @@ static int rte_init(void)
/* something malformed about file */
fclose(fp);
orte_show_help("help-orterun.txt", "orterun:ompi-server-file-bad", true,
"singleton", orte_ess_singleton_server_uri, "singleton");
"singleton", mca_ess_singleton_component.server_uri, "singleton");
return ORTE_ERROR;
}
fclose(fp);
input[strlen(input)-1] = '\0'; /* remove newline */
orte_process_info.my_hnp_uri = strdup(input);
} else {
orte_process_info.my_hnp_uri = strdup(orte_ess_singleton_server_uri);
orte_process_info.my_hnp_uri = strdup(mca_ess_singleton_component.server_uri);
}
/* save the daemon uri - we will process it later */
orte_process_info.my_daemon_uri = strdup(orte_process_info.my_hnp_uri);
@ -154,17 +154,21 @@ static int rte_init(void)
ORTE_PROC_MY_NAME->vpid = 0;
/* for convenience, push the pubsub version of this param into the environ */
opal_setenv (OPAL_MCA_PREFIX"pubsub_orte_server", orte_process_info.my_hnp_uri, 1, &environ);
opal_setenv (OPAL_MCA_PREFIX"pubsub_orte_server", orte_process_info.my_hnp_uri, true, &environ);
} else if (NULL != getenv("SINGULARITY_CONTAINER")) {
/* mark that we are in a container */
opal_setenv("OPAL_PROC_CONTAINER", "1", true, &environ);
} else if (NULL != getenv("OPAL_ISOLATED")) {
} else if (mca_ess_singleton_component.isolated) {
/* ensure we use the isolated pmix component */
opal_setenv (OPAL_MCA_PREFIX"pmix", "isolated", true, &environ);
} else {
/* spawn our very own HNP to support us */
if (ORTE_SUCCESS != (rc = fork_hnp())) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* our name was given to us by the HNP */
opal_setenv (OPAL_MCA_PREFIX"pmix", "^s1,s2,cray,isolated", true, &environ);
}
/* open and setup pmix */
@ -485,6 +489,16 @@ static int fork_hnp(void)
opal_argv_append(&argc, &argv, "state_novm_select");
opal_argv_append(&argc, &argv, "1");
/* direct the selection of the ess component */
opal_argv_append(&argc, &argv, "-"OPAL_MCA_CMD_LINE_ID);
opal_argv_append(&argc, &argv, "ess");
opal_argv_append(&argc, &argv, "hnp");
/* direct the selection of the pmix component */
opal_argv_append(&argc, &argv, "-"OPAL_MCA_CMD_LINE_ID);
opal_argv_append(&argc, &argv, "pmix");
opal_argv_append(&argc, &argv, "^s1,s2,cray,isolated");
/* Fork off the child */
orte_process_info.hnp_pid = fork();
if(orte_process_info.hnp_pid < 0) {

35
orte/mca/schizo/alps/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,35 @@
#
# Copyright (c) 2016 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
sources = \
schizo_alps_component.c \
schizo_alps.h \
schizo_alps.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_orte_schizo_alps_DSO
component_noinst =
component_install = mca_schizo_alps.la
else
component_noinst = libmca_schizo_alps.la
component_install =
endif
mcacomponentdir = $(ortelibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_schizo_alps_la_SOURCES = $(sources)
mca_schizo_alps_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_schizo_alps_la_SOURCES = $(sources)
libmca_schizo_alps_la_LDFLAGS = -module -avoid-version

47
orte/mca/schizo/alps/configure.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,47 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2008 UT-Battelle, LLC
# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2011-2013 Los Alamos National Security, LLC.
# All rights reserved.
# Copyright (c) 2016 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_schizo_alps_CONFIG([action-if-found], [action-if-not-found])
# -----------------------------------------------------------
AC_DEFUN([MCA_orte_schizo_alps_CONFIG],[
AC_CONFIG_FILES([orte/mca/schizo/alps/Makefile])
ORTE_CHECK_ALPS([schizo_alps], [schizo_alps_happy="yes"], [schizo_alps_happy="no"])
# check for alps/apInfo.h
# save current CPPFLAGS
MCA_orte_schizo_save_CPPFLAGS="$CPPFLAGS"
# add flags obtained from ORTE_CHECK_ALPS
CPPFLAGS="$CPPFLAGS $schizo_alps_CPPFLAGS"
AC_CHECK_HEADERS([alps/apInfo.h], [], [schizo_alps_happy="no"])
# restore CPPFLAGS
CPPFLAGS="$MCA_orte_schizo_save_CPPFLAGS"
AC_SUBST([schizo_alps_CPPFLAGS])
AS_IF([test "$schizo_alps_happy" = "yes"], [$1], [$2])
])dnl

7
orte/mca/schizo/alps/owner.txt Обычный файл
Просмотреть файл

@ -0,0 +1,7 @@
#
# owner/status file
# owner: institution that is responsible for this package
# status: e.g. active, maintenance, unmaintained
#
owner: INTEL
status: active

114
orte/mca/schizo/alps/schizo_alps.c Обычный файл
Просмотреть файл

@ -0,0 +1,114 @@
/*
* Copyright (c) 2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
*/
#include "orte_config.h"
#include "orte/types.h"
#include "opal/types.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <ctype.h>
#include <sys/syscall.h>
#include "opal/util/basename.h"
#include "opal/util/opal_environ.h"
#include "orte/runtime/orte_globals.h"
#include "orte/util/name_fns.h"
#include "orte/mca/schizo/base/base.h"
#include "schizo_alps.h"
static orte_schizo_launch_environ_t check_launch_environment(void);
orte_schizo_base_module_t orte_schizo_alps_module = {
.check_launch_environment = check_launch_environment
};
static char **pushed_envs = NULL;
static char **pushed_vals = NULL;
static orte_schizo_launch_environ_t myenv;
static bool myenvdefined = false;
static orte_schizo_launch_environ_t check_launch_environment(void)
{
int i;
const char proc_job_file[]="/proc/job";
FILE *fd = NULL, *fd_task_is_app = NULL;
char task_is_app_fname[PATH_MAX];
if (myenvdefined) {
return myenv;
}
myenvdefined = true;
/* we were only selected because we are an app,
* so no need to further check that here. Instead,
* see if we were direct launched vs launched via mpirun */
if (NULL != orte_process_info.my_daemon_uri) {
/* nope */
myenv = ORTE_SCHIZO_NATIVE_LAUNCHED;
opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"ess");
opal_argv_append_nosize(&pushed_vals, "pmi");
goto setup;
}
/* see if we are running in a Cray PAGG container */
fd = fopen(proc_job_file, "r");
if (NULL == fd) {
/* we are a singleton */
myenv = ORTE_SCHIZO_MANAGED_SINGLETON;
opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"ess");
opal_argv_append_nosize(&pushed_vals, "singleton");
} else {
if (NULL != orte_process_info.my_daemon_uri) {
myenv = ORTE_SCHIZO_NATIVE_LAUNCHED;
} else {
myenv = ORTE_SCHIZO_DIRECT_LAUNCHED;
}
opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"ess");
opal_argv_append_nosize(&pushed_vals, "pmi");
snprintf(task_is_app_fname,sizeof(task_is_app_fname),
"/proc/self/task/%ld/task_is_app",syscall(SYS_gettid));
fd_task_is_app = fopen(task_is_app_fname, "r");
if (fd_task_is_app != NULL) { /* okay we're in a PAGG container,
and we are an app task (not just a process
running on a mom node, for example) */
opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"pmix");
opal_argv_append_nosize(&pushed_vals, "cray");
}
fclose(fd);
}
setup:
opal_output_verbose(1, orte_schizo_base_framework.framework_output,
"schizo:alps DECLARED AS %s", orte_schizo_base_print_env(myenv));
if (NULL != pushed_envs) {
for (i=0; NULL != pushed_envs[i]; i++) {
opal_setenv(pushed_envs[i], pushed_vals[i], true, &environ);
}
}
return myenv;
}
static void finalize(void)
{
int i;
if (NULL != pushed_envs) {
for (i=0; NULL != pushed_envs[i]; i++) {
opal_unsetenv(pushed_envs[i], &environ);
}
opal_argv_free(pushed_envs);
opal_argv_free(pushed_vals);
}
}

29
orte/mca/schizo/alps/schizo_alps.h Обычный файл
Просмотреть файл

@ -0,0 +1,29 @@
/*
* Copyright (c) 2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef _MCA_SCHIZO_ALPS_H_
#define _MCA_SCHIZO_ALPS_H_
#include "orte_config.h"
#include "orte/types.h"
#include "opal/mca/base/base.h"
#include "orte/mca/schizo/schizo.h"
BEGIN_C_DECLS
ORTE_MODULE_DECLSPEC extern orte_schizo_base_component_t mca_schizo_alps_component;
extern orte_schizo_base_module_t orte_schizo_alps_module;
END_C_DECLS
#endif /* MCA_SCHIZO_ALPS_H_ */

Просмотреть файл

@ -0,0 +1,53 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/types.h"
#include "opal/types.h"
#include "opal/util/show_help.h"
#include "orte/mca/schizo/schizo.h"
#include "schizo_alps.h"
static int component_query(mca_base_module_t **module, int *priority);
/*
* Struct of function pointers and all that to let us be initialized
*/
orte_schizo_base_component_t mca_schizo_alps_component = {
.base_version = {
MCA_SCHIZO_BASE_VERSION_1_0_0,
.mca_component_name = "alps",
MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION,
ORTE_RELEASE_VERSION),
.mca_query_component = component_query,
},
.base_data = {
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
};
static int component_query(mca_base_module_t **module, int *priority)
{
/* if we are not an app, then don't bother */
if (!ORTE_PROC_IS_APP) {
*priority = 0;
*module = NULL;
return ORTE_ERROR;
}
/* since we were built, assume we are on an alps system */
*priority = 90;
*module = (mca_base_module_t *)&orte_schizo_alps_module;
return ORTE_SUCCESS;
}

Просмотреть файл

@ -60,6 +60,7 @@ typedef struct {
OBJ_CLASS_DECLARATION(orte_schizo_base_active_module_t);
/* the base stub functions */
ORTE_DECLSPEC const char* orte_schizo_base_print_env(orte_schizo_launch_environ_t env);
ORTE_DECLSPEC int orte_schizo_base_parse_cli(char **personality,
int argc, int start, char **argv);
ORTE_DECLSPEC int orte_schizo_base_parse_env(char **personality,
@ -72,6 +73,8 @@ ORTE_DECLSPEC int orte_schizo_base_setup_fork(orte_job_t *jdata,
ORTE_DECLSPEC int orte_schizo_base_setup_child(orte_job_t *jobdat,
orte_proc_t *child,
orte_app_context_t *app);
ORTE_DECLSPEC orte_schizo_launch_environ_t orte_schizo_base_check_launch_environment(void);
ORTE_DECLSPEC void orte_schizo_base_finalize(void);
END_C_DECLS

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -37,10 +37,12 @@
*/
orte_schizo_base_t orte_schizo_base = {{{0}}};
orte_schizo_base_module_t orte_schizo = {
orte_schizo_base_parse_cli,
orte_schizo_base_parse_env,
orte_schizo_base_setup_fork,
orte_schizo_base_setup_child
.parse_cli = orte_schizo_base_parse_cli,
.parse_env = orte_schizo_base_parse_env,
.setup_fork = orte_schizo_base_setup_fork,
.setup_child = orte_schizo_base_setup_child,
.check_launch_environment = orte_schizo_base_check_launch_environment,
.finalize = orte_schizo_base_finalize
};
static int orte_schizo_base_close(void)

Просмотреть файл

@ -100,7 +100,7 @@ int orte_schizo_base_select(void)
}
if (4 < opal_output_get_verbosity(orte_schizo_base_framework.framework_output)) {
opal_output(0, "%s: Final schizo priorities", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
opal_output(0, "Final schizo priorities");
/* show the prioritized list */
OPAL_LIST_FOREACH(mod, &orte_schizo_base.active_modules, orte_schizo_base_active_module_t) {
opal_output(0, "\tSchizo: %s Priority: %d", mod->component->mca_component_name, mod->pri);

Просмотреть файл

@ -19,6 +19,24 @@
#include "orte/util/name_fns.h"
#include "orte/mca/schizo/base/base.h"
const char* orte_schizo_base_print_env(orte_schizo_launch_environ_t env)
{
switch(env) {
case ORTE_SCHIZO_UNDETERMINED:
return "UNDETERMINED";
case ORTE_SCHIZO_NATIVE_LAUNCHED:
return "NATIVE_LAUNCHED";
case ORTE_SCHIZO_UNMANAGED_SINGLETON:
return "UNMANAGED_SINGLETON";
case ORTE_SCHIZO_DIRECT_LAUNCHED:
return "DIRECT_LAUNCHED";
case ORTE_SCHIZO_MANAGED_SINGLETON:
return "MANAGED_SINGLETON";
default:
return "INVALID_CODE";
}
}
int orte_schizo_base_parse_cli(char **personality,
int argc, int start, char **argv)
{
@ -98,3 +116,30 @@ int orte_schizo_base_setup_child(orte_job_t *jdata,
}
return ORTE_SUCCESS;
}
orte_schizo_launch_environ_t orte_schizo_base_check_launch_environment(void)
{
orte_schizo_launch_environ_t rc;
orte_schizo_base_active_module_t *mod;
OPAL_LIST_FOREACH(mod, &orte_schizo_base.active_modules, orte_schizo_base_active_module_t) {
if (NULL != mod->module->check_launch_environment) {
rc = mod->module->check_launch_environment();
if (ORTE_SCHIZO_UNDETERMINED != rc) {
return rc;
}
}
}
return ORTE_SCHIZO_UNDETERMINED;
}
void orte_schizo_base_finalize(void)
{
orte_schizo_base_active_module_t *mod;
OPAL_LIST_FOREACH(mod, &orte_schizo_base.active_modules, orte_schizo_base_active_module_t) {
if (NULL != mod->module->finalize) {
mod->module->finalize();
}
}
}

Просмотреть файл

@ -63,10 +63,10 @@ static int setup_child(orte_job_t *jobdat,
orte_app_context_t *app);
orte_schizo_base_module_t orte_schizo_ompi_module = {
parse_cli,
parse_env,
setup_fork,
setup_child
.parse_cli = parse_cli,
.parse_env = parse_env,
.setup_fork = setup_fork,
.setup_child = setup_child
};
static int parse_cli(char **personality,

Просмотреть файл

@ -16,6 +16,8 @@
#include "opal/util/show_help.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/schizo/schizo.h"
#include "schizo_ompi.h"
@ -40,6 +42,12 @@ orte_schizo_base_component_t mca_schizo_ompi_component = {
static int component_query(mca_base_module_t **module, int *priority)
{
/* if we are an app, ignore us */
if (ORTE_PROC_IS_APP) {
*module = NULL;
*priority = 0;
return ORTE_ERROR;
}
*module = (mca_base_module_t*)&orte_schizo_ompi_module;
*priority = 10;
return ORTE_SUCCESS;

35
orte/mca/schizo/orte/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,35 @@
#
# Copyright (c) 2016 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
sources = \
schizo_orte_component.c \
schizo_orte.h \
schizo_orte.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_orte_schizo_orte_DSO
component_noinst =
component_install = mca_schizo_orte.la
else
component_noinst = libmca_schizo_orte.la
component_install =
endif
mcacomponentdir = $(ortelibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_schizo_orte_la_SOURCES = $(sources)
mca_schizo_orte_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_schizo_orte_la_SOURCES = $(sources)
libmca_schizo_orte_la_LDFLAGS = -module -avoid-version

90
orte/mca/schizo/orte/schizo_orte.c Обычный файл
Просмотреть файл

@ -0,0 +1,90 @@
/*
* Copyright (c) 2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
*/
#include "orte_config.h"
#include "orte/types.h"
#include "opal/types.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <ctype.h>
#include "opal/util/basename.h"
#include "opal/util/opal_environ.h"
#include "orte/runtime/orte_globals.h"
#include "orte/util/name_fns.h"
#include "orte/mca/schizo/base/base.h"
#include "schizo_orte.h"
static orte_schizo_launch_environ_t check_launch_environment(void);
static void finalize(void);
orte_schizo_base_module_t orte_schizo_orte_module = {
.check_launch_environment = check_launch_environment,
.finalize = finalize
};
static char **pushed_envs = NULL;
static char **pushed_vals = NULL;
static orte_schizo_launch_environ_t myenv;
static bool myenvdefined = false;
static orte_schizo_launch_environ_t check_launch_environment(void)
{
int i;
if (myenvdefined) {
return myenv;
}
myenvdefined = true;
/* we were only selected because we are an app,
* so no need to further check that here. Instead,
* see if we were direct launched vs launched via mpirun */
if (NULL != orte_process_info.my_daemon_uri) {
/* nope */
myenv = ORTE_SCHIZO_NATIVE_LAUNCHED;
opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"ess");
opal_argv_append_nosize(&pushed_vals, "pmi");
goto setup;
}
/* if nobody else has laid claim to this process,
* then it must be a singleton */
myenv = ORTE_SCHIZO_UNMANAGED_SINGLETON;
opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"ess");
opal_argv_append_nosize(&pushed_vals, "singleton");
setup:
opal_output_verbose(1, orte_schizo_base_framework.framework_output,
"schizo:orte DECLARED AS %s", orte_schizo_base_print_env(myenv));
if (NULL != pushed_envs) {
for (i=0; NULL != pushed_envs[i]; i++) {
opal_setenv(pushed_envs[i], pushed_vals[i], true, &environ);
}
}
return myenv;
}
static void finalize(void)
{
int i;
if (NULL != pushed_envs) {
for (i=0; NULL != pushed_envs[i]; i++) {
opal_unsetenv(pushed_envs[i], &environ);
}
opal_argv_free(pushed_envs);
opal_argv_free(pushed_vals);
}
}

29
orte/mca/schizo/orte/schizo_orte.h Обычный файл
Просмотреть файл

@ -0,0 +1,29 @@
/*
* Copyright (c) 2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef _MCA_SCHIZO_ORTE_H_
#define _MCA_SCHIZO_ORTE_H_
#include "orte_config.h"
#include "orte/types.h"
#include "opal/mca/base/base.h"
#include "orte/mca/schizo/schizo.h"
BEGIN_C_DECLS
ORTE_MODULE_DECLSPEC extern orte_schizo_base_component_t mca_schizo_orte_component;
extern orte_schizo_base_module_t orte_schizo_orte_module;
END_C_DECLS
#endif /* MCA_SCHIZO_ORTE_H_ */

Просмотреть файл

@ -0,0 +1,52 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/types.h"
#include "opal/types.h"
#include "opal/util/show_help.h"
#include "orte/mca/schizo/schizo.h"
#include "schizo_orte.h"
static int component_query(mca_base_module_t **module, int *priority);
/*
* Struct of function pointers and all that to let us be initialized
*/
orte_schizo_base_component_t mca_schizo_orte_component = {
.base_version = {
MCA_SCHIZO_BASE_VERSION_1_0_0,
.mca_component_name = "orte",
MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION,
ORTE_RELEASE_VERSION),
.mca_query_component = component_query,
},
.base_data = {
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
};
static int component_query(mca_base_module_t **module, int *priority)
{
/* disqualify ourselves if we are not an app */
if (!ORTE_PROC_IS_APP) {
*priority = 0;
*module = NULL;
return OPAL_ERROR;
}
*module = (mca_base_module_t*)&orte_schizo_orte_module;
*priority = 1;
return ORTE_SUCCESS;
}

Просмотреть файл

@ -39,6 +39,9 @@ BEGIN_C_DECLS
* SCHIZO module functions - the modules are accessed via
* the base stub functions
*/
typedef int (*orte_schizo_base_module_init_fn_t)(void);
typedef int (*orte_schizo_base_module_parse_cli_fn_t)(char **personality,
int argc, int start,
char **argv);
@ -56,14 +59,37 @@ typedef int (*orte_schizo_base_module_setup_child_fn_t)(orte_job_t *jdata,
orte_proc_t *child,
orte_app_context_t *app);
typedef enum {
ORTE_SCHIZO_UNDETERMINED,
ORTE_SCHIZO_NATIVE_LAUNCHED,
ORTE_SCHIZO_UNMANAGED_SINGLETON,
ORTE_SCHIZO_DIRECT_LAUNCHED,
ORTE_SCHIZO_MANAGED_SINGLETON
} orte_schizo_launch_environ_t;
/* check if this process was directly launched by a managed environment, and
* do whatever the module wants to do under those conditions. The module
* can push any required envars into the local environment, but must remember
* to "unset" them during finalize. The module then returns a flag indicating
* the launch environment of the process */
typedef orte_schizo_launch_environ_t (*orte_schizo_base_module_ck_launch_environ_fn_t)(void);
/* give the component a chance to cleanup */
typedef void (*orte_schizo_base_module_finalize_fn_t)(void);
/*
* schizo module version 1.3.0
*/
typedef struct {
orte_schizo_base_module_parse_cli_fn_t parse_cli;
orte_schizo_base_module_parse_env_fn_t parse_env;
orte_schizo_base_module_setup_fork_fn_t setup_fork;
orte_schizo_base_module_setup_child_fn_t setup_child;
orte_schizo_base_module_init_fn_t init;
orte_schizo_base_module_parse_cli_fn_t parse_cli;
orte_schizo_base_module_parse_env_fn_t parse_env;
orte_schizo_base_module_setup_fork_fn_t setup_fork;
orte_schizo_base_module_setup_child_fn_t setup_child;
orte_schizo_base_module_ck_launch_environ_fn_t check_launch_environment;
orte_schizo_base_module_finalize_fn_t finalize;
} orte_schizo_base_module_t;
ORTE_DECLSPEC extern orte_schizo_base_module_t orte_schizo;

Просмотреть файл

@ -30,10 +30,7 @@ static int setup_fork(orte_job_t *jdata,
orte_app_context_t *context);
orte_schizo_base_module_t orte_schizo_singularity_module = {
NULL,
NULL,
setup_fork,
NULL
.setup_fork = setup_fork
};
static int setup_fork(orte_job_t *jdata,

Просмотреть файл

@ -14,6 +14,8 @@
#include "opal/util/show_help.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/schizo/schizo.h"
#include "schizo_singularity.h"
@ -38,6 +40,12 @@ orte_schizo_base_component_t mca_schizo_singularity_component = {
static int component_query(mca_base_module_t **module, int *priority)
{
/* if we are an app, ignore us */
if (ORTE_PROC_IS_APP) {
*module = NULL;
*priority = 0;
return ORTE_ERROR;
}
*module = (mca_base_module_t*)&orte_schizo_singularity_module;
*priority = 5;
return ORTE_SUCCESS;

35
orte/mca/schizo/slurm/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,35 @@
#
# Copyright (c) 2016 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
sources = \
schizo_slurm_component.c \
schizo_slurm.h \
schizo_slurm.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_orte_schizo_slurm_DSO
component_noinst =
component_install = mca_schizo_slurm.la
else
component_noinst = libmca_schizo_slurm.la
component_install =
endif
mcacomponentdir = $(ortelibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_schizo_slurm_la_SOURCES = $(sources)
mca_schizo_slurm_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_schizo_slurm_la_SOURCES = $(sources)
libmca_schizo_slurm_la_LDFLAGS = -module -avoid-version

41
orte/mca/schizo/slurm/configure.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,41 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2011-2013 Los Alamos National Security, LLC.
# All rights reserved.
# Copyright (c) 2016 Intel, Inc. All rights reserved
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_schizo_slurm_CONFIG([action-if-found], [action-if-not-found])
# -----------------------------------------------------------
AC_DEFUN([MCA_orte_schizo_slurm_CONFIG],[
AC_CONFIG_FILES([orte/mca/schizo/slurm/Makefile])
ORTE_CHECK_SLURM([schizo_slurm], [schizo_slurm_good=1], [schizo_slurm_good=0])
# if check worked, set wrapper flags if so.
# Evaluate succeed / fail
AS_IF([test "$schizo_slurm_good" = "1"],
[$1],
[$2])
# set build flags to use in makefile
AC_SUBST([schizo_slurm_CPPFLAGS])
AC_SUBST([schizo_slurm_LDFLAGS])
AC_SUBST([schizo_slurm_LIBS])
])dnl

7
orte/mca/schizo/slurm/owner.txt Обычный файл
Просмотреть файл

@ -0,0 +1,7 @@
#
# owner/status file
# owner: institution that is responsible for this package
# status: e.g. active, maintenance, unmaintained
#
owner: INTEL
status: active

135
orte/mca/schizo/slurm/schizo_slurm.c Обычный файл
Просмотреть файл

@ -0,0 +1,135 @@
/*
* Copyright (c) 2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
*/
#include "orte_config.h"
#include "orte/types.h"
#include "opal/types.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <ctype.h>
#include "opal/util/basename.h"
#include "opal/util/opal_environ.h"
#include "orte/runtime/orte_globals.h"
#include "orte/util/name_fns.h"
#include "orte/mca/schizo/base/base.h"
#include "schizo_slurm.h"
static orte_schizo_launch_environ_t check_launch_environment(void);
static void finalize(void);
orte_schizo_base_module_t orte_schizo_slurm_module = {
.check_launch_environment = check_launch_environment,
.finalize = finalize
};
static char **pushed_envs = NULL;
static char **pushed_vals = NULL;
static orte_schizo_launch_environ_t myenv;
static bool myenvdefined = false;
static orte_schizo_launch_environ_t check_launch_environment(void)
{
char *bind, *list, *ptr;
int i;
if (myenvdefined) {
return myenv;
}
myenvdefined = true;
/* we were only selected because SLURM was detected
* and we are an app, so no need to further check
* that here. Instead, see if we were direct launched
* vs launched via mpirun */
if (NULL != orte_process_info.my_daemon_uri) {
/* nope */
myenv = ORTE_SCHIZO_NATIVE_LAUNCHED;
opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"ess");
opal_argv_append_nosize(&pushed_vals, "pmi");
goto setup;
}
/* see if we are in a SLURM allocation */
if (NULL == getenv("SLURM_NODELIST")) {
/* nope */
myenv = ORTE_SCHIZO_UNDETERMINED;
return myenv;
}
/* we are in an allocation, but were we direct launched
* or are we a singleton? */
if (NULL == getenv("SLURM_STEP_ID")) {
/* not in a job step - ensure we select the
* correct things */
opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"ess");
opal_argv_append_nosize(&pushed_vals, "singleton");
myenv = ORTE_SCHIZO_MANAGED_SINGLETON;
goto setup;
}
myenv = ORTE_SCHIZO_DIRECT_LAUNCHED;
opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"ess");
opal_argv_append_nosize(&pushed_vals, "pmi");
/* if we are direct launched by SLURM, then we want
* to ensure that we do not override their binding
* options, so set that envar */
if (NULL != (bind = getenv("SLURM_CPU_BIND_TYPE"))) {
if (0 == strcmp(bind, "none")) {
opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"hwloc_base_binding_policy");
opal_argv_append_nosize(&pushed_vals, "none");
/* indicate we are externally bound so we won't try to do it ourselves */
opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"orte_externally_bound");
opal_argv_append_nosize(&pushed_vals, "1");
} else if (0 == strcmp(bind, "mask_cpu")) {
/* if the bind list is all F's, then the
* user didn't specify anything */
if (NULL != (list = getenv("SLURM_CPU_BIND_LIST")) &&
NULL != (ptr = strchr(list, 'x'))) {
++ptr; // step over the 'x'
for (i=0; '\0' != *ptr; ptr++) {
if ('F' != *ptr) {
/* indicate we are externally bound */
opal_argv_append_nosize(&pushed_envs, OPAL_MCA_PREFIX"orte_externally_bound");
opal_argv_append_nosize(&pushed_vals, "1");
break;
}
}
}
}
}
setup:
opal_output_verbose(1, orte_schizo_base_framework.framework_output,
"schizo:slurm DECLARED AS %s", orte_schizo_base_print_env(myenv));
if (NULL != pushed_envs) {
for (i=0; NULL != pushed_envs[i]; i++) {
opal_setenv(pushed_envs[i], pushed_vals[i], true, &environ);
}
}
return myenv;
}
static void finalize(void)
{
int i;
if (NULL != pushed_envs) {
for (i=0; NULL != pushed_envs[i]; i++) {
opal_unsetenv(pushed_envs[i], &environ);
}
opal_argv_free(pushed_envs);
opal_argv_free(pushed_vals);
}
}

29
orte/mca/schizo/slurm/schizo_slurm.h Обычный файл
Просмотреть файл

@ -0,0 +1,29 @@
/*
* Copyright (c) 2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef _MCA_SCHIZO_SLURM_H_
#define _MCA_SCHIZO_SLURM_H_
#include "orte_config.h"
#include "orte/types.h"
#include "opal/mca/base/base.h"
#include "orte/mca/schizo/schizo.h"
BEGIN_C_DECLS
ORTE_MODULE_DECLSPEC extern orte_schizo_base_component_t mca_schizo_slurm_component;
extern orte_schizo_base_module_t orte_schizo_slurm_module;
END_C_DECLS
#endif /* MCA_SCHIZO_SLURM_H_ */

Просмотреть файл

@ -0,0 +1,52 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/types.h"
#include "opal/types.h"
#include "opal/util/show_help.h"
#include "orte/mca/schizo/schizo.h"
#include "schizo_slurm.h"
static int component_query(mca_base_module_t **module, int *priority);
/*
* Struct of function pointers and all that to let us be initialized
*/
orte_schizo_base_component_t mca_schizo_slurm_component = {
.base_version = {
MCA_SCHIZO_BASE_VERSION_1_0_0,
.mca_component_name = "slurm",
MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION,
ORTE_RELEASE_VERSION),
.mca_query_component = component_query,
},
.base_data = {
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
};
static int component_query(mca_base_module_t **module, int *priority)
{
/* disqualify ourselves if we are not an app or under slurm */
if (!ORTE_PROC_IS_APP) {
*priority = 0;
*module = NULL;
return OPAL_ERROR;
}
*module = (mca_base_module_t*)&orte_schizo_slurm_module;
*priority = 50;
return ORTE_SUCCESS;
}

Просмотреть файл

@ -553,6 +553,7 @@ int orte_daemon(int argc, char *argv[])
app->app = strdup("singleton");
app->num_procs = 1;
opal_pointer_array_add(jdata->apps, app);
jdata->num_apps = 1;
/* setup a proc object for the singleton - since we
* -must- be the HNP, and therefore we stored our

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -30,6 +30,7 @@
#include "orte/mca/ess/ess.h"
#include "orte/mca/ess/base/base.h"
#include "orte/mca/schizo/base/base.h"
#include "orte/runtime/orte_globals.h"
#include "orte/runtime/runtime.h"
#include "orte/runtime/orte_locks.h"
@ -76,6 +77,10 @@ int orte_finalize(void)
/* close the ess itself */
(void) mca_base_framework_close(&orte_ess_base_framework);
/* finalize and close schizo */
orte_schizo.finalize();
(void) mca_base_framework_close(&orte_schizo_base_framework);
/* cleanup the process info */
orte_proc_info_finalize();

Просмотреть файл

@ -13,7 +13,7 @@
* reserved.
* Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2007-2008 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
*
@ -45,6 +45,7 @@
#include "orte/mca/ess/base/base.h"
#include "orte/mca/ess/ess.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/schizo/base/base.h"
#include "orte/util/listener.h"
#include "orte/util/name_fns.h"
#include "orte/util/proc_info.h"
@ -202,6 +203,22 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags)
pmix_server_register_params();
}
/* open the SCHIZO framework as everyone needs it, and the
* ess will use it to help select its component */
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_schizo_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
error = "orte_schizo_base_open";
goto error;
}
if (ORTE_SUCCESS != (ret = orte_schizo_base_select())) {
error = "orte_schizo_base_select";
goto error;
}
/* if we are an app, let SCHIZO help us determine our environment */
if (ORTE_PROC_IS_APP) {
(void)orte_schizo.check_launch_environment();
}
/* open the ESS and select the correct module for this environment */
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_ess_base_framework, 0))) {
ORTE_ERROR_LOG(ret);

Просмотреть файл

@ -822,6 +822,8 @@ int orterun(int argc, char *argv[])
* orterun
*/
orte_launch_environ = opal_argv_copy(environ);
opal_unsetenv(OPAL_MCA_PREFIX"ess", &orte_launch_environ);
opal_unsetenv(OPAL_MCA_PREFIX"pmix", &orte_launch_environ);
/* Intialize our Open RTE environment
* Set the flag telling orte_init that I am NOT a
@ -1106,6 +1108,9 @@ int orterun(int argc, char *argv[])
/* cleanup and leave */
orte_finalize();
if (NULL != orte_launch_environ) {
opal_argv_free(orte_launch_environ);
}
if (orte_debug_flag) {
fprintf(stderr, "exiting with status %d\n", orte_exit_status);
}