1
1

Further enhance the support for Singularity containers. Extend the "personality" command-line option to allow specifying both model (e.g., "ompi") and container (e.g., "singularity"), and add the necessary logic to support multiple options. Add a new pmix "isolated" component to handle singletons where no HNP is available since containers cannot launch the HNP.

Этот коммит содержится в:
Ralph Castain 2016-02-17 08:32:17 -08:00
родитель d544e0e6e0
Коммит 8f9508cace
22 изменённых файлов: 769 добавлений и 62 удалений

34
opal/mca/pmix/isolated/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,34 @@
#
# Copyright (c) 2016 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
sources = \
pmix_isolated.h \
pmix_isolated_component.c \
pmix_isolated.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_opal_pmix_isolated_DSO
component_noinst =
component_install = mca_pmix_isolated.la
else
component_noinst = libmca_pmix_isolated.la
component_install =
endif
mcacomponentdir = $(opallibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_pmix_isolated_la_SOURCES = $(sources)
mca_pmix_isolated_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_pmix_isolated_la_SOURCES =$(sources)
libmca_pmix_isolated_la_LDFLAGS = -module -avoid-version

438
opal/mca/pmix/isolated/pmix_isolated.c Обычный файл
Просмотреть файл

@ -0,0 +1,438 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2016 Intel, Inc. All rights reserved.
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All
* rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include "opal/constants.h"
#include "opal/types.h"
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include "opal/dss/dss.h"
#include "opal/mca/event/event.h"
#include "opal/mca/hwloc/base/base.h"
#include "opal/runtime/opal.h"
#include "opal/runtime/opal_progress_threads.h"
#include "opal/util/argv.h"
#include "opal/util/error.h"
#include "opal/util/output.h"
#include "opal/util/proc.h"
#include "opal/util/show_help.h"
#include "pmix_isolated.h"
#include "opal/mca/pmix/base/base.h"
#include "opal/mca/pmix/base/pmix_base_hash.h"
static int isolated_init(void);
static int isolated_fini(void);
static int isolated_initialized(void);
static int isolated_abort(int flat, const char *msg,
opal_list_t *procs);
static int isolated_spawn(opal_list_t *jobinfo, opal_list_t *apps, opal_jobid_t *jobid);
static int isolated_spawn_nb(opal_list_t *jobinfo, opal_list_t *apps,
opal_pmix_spawn_cbfunc_t cbfunc,
void *cbdata);
static int isolated_job_connect(opal_list_t *procs);
static int isolated_job_disconnect(opal_list_t *procs);
static int isolated_job_disconnect_nb(opal_list_t *procs,
opal_pmix_op_cbfunc_t cbfunc,
void *cbdata);
static int isolated_resolve_peers(const char *nodename,
opal_jobid_t jobid,
opal_list_t *procs);
static int isolated_resolve_nodes(opal_jobid_t jobid, char **nodelist);
static int isolated_put(opal_pmix_scope_t scope, opal_value_t *kv);
static int isolated_fence(opal_list_t *procs, int collect_data);
static int isolated_fence_nb(opal_list_t *procs, int collect_data,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
static int isolated_commit(void);
static int isolated_get(const opal_process_name_t *id,
const char *key, opal_list_t *info,
opal_value_t **kv);
static int isolated_get_nb(const opal_process_name_t *id, const char *key,
opal_list_t *info,
opal_pmix_value_cbfunc_t cbfunc, void *cbdata);
static int isolated_publish(opal_list_t *info);
static int isolated_publish_nb(opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
static int isolated_lookup(opal_list_t *data, opal_list_t *info);
static int isolated_lookup_nb(char **keys, opal_list_t *info,
opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata);
static int isolated_unpublish(char **keys, opal_list_t *info);
static int isolated_unpublish_nb(char **keys, opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
static const char *isolated_get_version(void);
static int isolated_store_local(const opal_process_name_t *proc,
opal_value_t *val);
static const char *isolated_get_nspace(opal_jobid_t jobid);
static void isolated_register_jobid(opal_jobid_t jobid, const char *nspace);
const opal_pmix_base_module_t opal_pmix_isolated_module = {
.init = isolated_init,
.finalize = isolated_fini,
.initialized = isolated_initialized,
.abort = isolated_abort,
.commit = isolated_commit,
.fence = isolated_fence,
.fence_nb = isolated_fence_nb,
.put = isolated_put,
.get = isolated_get,
.get_nb = isolated_get_nb,
.publish = isolated_publish,
.publish_nb = isolated_publish_nb,
.lookup = isolated_lookup,
.lookup_nb = isolated_lookup_nb,
.unpublish = isolated_unpublish,
.unpublish_nb = isolated_unpublish_nb,
.spawn = isolated_spawn,
.spawn_nb = isolated_spawn_nb,
.connect = isolated_job_connect,
.disconnect = isolated_job_disconnect,
.disconnect_nb = isolated_job_disconnect_nb,
.resolve_peers = isolated_resolve_peers,
.resolve_nodes = isolated_resolve_nodes,
.get_version = isolated_get_version,
.register_errhandler = opal_pmix_base_register_handler,
.deregister_errhandler = opal_pmix_base_deregister_handler,
.store_local = isolated_store_local,
.get_nspace = isolated_get_nspace,
.register_jobid = isolated_register_jobid
};
static int pmix_init_count = 0;
static opal_process_name_t pmix_pname;
static int isolated_init(void)
{
int rc;
opal_value_t kv;
++pmix_init_count;
/* store our name in the opal_proc_t so that
* debug messages will make sense - an upper
* layer will eventually overwrite it, but that
* won't do any harm */
pmix_pname.jobid = 1;
pmix_pname.vpid = 0;
opal_proc_set_name(&pmix_pname);
opal_output_verbose(10, opal_pmix_base_framework.framework_output,
"%s pmix:isolated: assigned tmp name %d %d",
OPAL_NAME_PRINT(pmix_pname),pmix_pname.jobid,pmix_pname.vpid);
// setup hash table
opal_pmix_base_hash_init();
/* save the job size */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(OPAL_PMIX_JOB_SIZE);
kv.type = OPAL_UINT32;
kv.data.uint32 = 1;
if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
OPAL_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
goto err_exit;
}
OBJ_DESTRUCT(&kv);
/* save the appnum */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(OPAL_PMIX_APPNUM);
kv.type = OPAL_UINT32;
kv.data.uint32 = 0;
if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
OPAL_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
goto err_exit;
}
OBJ_DESTRUCT(&kv);
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(OPAL_PMIX_UNIV_SIZE);
kv.type = OPAL_UINT32;
kv.data.uint32 = 1;
if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
OPAL_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
goto err_exit;
}
OBJ_DESTRUCT(&kv);
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(OPAL_PMIX_JOBID);
kv.type = OPAL_UINT32;
kv.data.uint32 = 1;
if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
OPAL_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
goto err_exit;
}
OBJ_DESTRUCT(&kv);
/* save the local size */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(OPAL_PMIX_LOCAL_SIZE);
kv.type = OPAL_UINT32;
kv.data.uint32 = 1;
if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
OPAL_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
goto err_exit;
}
OBJ_DESTRUCT(&kv);
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(OPAL_PMIX_LOCAL_PEERS);
kv.type = OPAL_STRING;
kv.data.string = strdup("0");
if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
OPAL_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
goto err_exit;
}
OBJ_DESTRUCT(&kv);
/* save the local leader */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(OPAL_PMIX_LOCALLDR);
kv.type = OPAL_UINT64;
kv.data.uint64 = 0;
if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
OPAL_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
goto err_exit;
}
/* save our local rank */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(OPAL_PMIX_LOCAL_RANK);
kv.type = OPAL_UINT16;
kv.data.uint16 = 0;
if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
OPAL_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
goto err_exit;
}
/* and our node rank */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(OPAL_PMIX_NODE_RANK);
kv.type = OPAL_UINT16;
kv.data.uint16 = 0;
if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
OPAL_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
goto err_exit;
}
OBJ_DESTRUCT(&kv);
return OPAL_SUCCESS;
err_exit:
return rc;
}
static int isolated_fini(void)
{
if (0 == pmix_init_count) {
return OPAL_SUCCESS;
}
if (0 != --pmix_init_count) {
return OPAL_SUCCESS;
}
opal_pmix_base_hash_finalize();
return OPAL_SUCCESS;
}
static int isolated_initialized(void)
{
if (0 < pmix_init_count) {
return 1;
}
return 0;
}
static int isolated_abort(int flag, const char *msg,
opal_list_t *procs)
{
return OPAL_SUCCESS;
}
static int isolated_spawn(opal_list_t *jobinfo, opal_list_t *apps, opal_jobid_t *jobid)
{
return OPAL_ERR_NOT_SUPPORTED;
}
static int isolated_spawn_nb(opal_list_t *jobinfo, opal_list_t *apps,
opal_pmix_spawn_cbfunc_t cbfunc,
void *cbdata)
{
return OPAL_ERR_NOT_SUPPORTED;
}
static int isolated_job_connect(opal_list_t *procs)
{
return OPAL_ERR_NOT_SUPPORTED;
}
static int isolated_job_disconnect(opal_list_t *procs)
{
return OPAL_ERR_NOT_SUPPORTED;
}
static int isolated_job_disconnect_nb(opal_list_t *procs,
opal_pmix_op_cbfunc_t cbfunc,
void *cbdata)
{
return OPAL_ERR_NOT_SUPPORTED;
}
static int isolated_resolve_peers(const char *nodename,
opal_jobid_t jobid,
opal_list_t *procs)
{
return OPAL_ERR_NOT_IMPLEMENTED;
}
static int isolated_resolve_nodes(opal_jobid_t jobid, char **nodelist)
{
return OPAL_ERR_NOT_IMPLEMENTED;
}
static int isolated_put(opal_pmix_scope_t scope,
opal_value_t *kv)
{
int rc;
opal_output_verbose(10, opal_pmix_base_framework.framework_output,
"%s pmix:isolated isolated_put key %s scope %d\n",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kv->key, scope);
if (!pmix_init_count) {
return OPAL_ERROR;
}
rc = opal_pmix_base_store(&pmix_pname, kv);
return rc;
}
static int isolated_commit(void)
{
return OPAL_SUCCESS;
}
static int isolated_fence(opal_list_t *procs, int collect_data)
{
return OPAL_SUCCESS;
}
static int isolated_fence_nb(opal_list_t *procs, int collect_data,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata)
{
return OPAL_ERR_NOT_IMPLEMENTED;
}
static int isolated_get(const opal_process_name_t *id,
const char *key, opal_list_t *info,
opal_value_t **kv)
{
int rc;
opal_list_t vals;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s pmix:isolated getting value for proc %s key %s",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
OPAL_NAME_PRINT(*id), key);
OBJ_CONSTRUCT(&vals, opal_list_t);
rc = opal_pmix_base_fetch(id, key, &vals);
if (OPAL_SUCCESS == rc) {
*kv = (opal_value_t*)opal_list_remove_first(&vals);
return OPAL_SUCCESS;
} else {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s pmix:isolated fetch from dstore failed: %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), rc);
}
OPAL_LIST_DESTRUCT(&vals);
return rc;
}
static int isolated_get_nb(const opal_process_name_t *id, const char *key,
opal_list_t *info, opal_pmix_value_cbfunc_t cbfunc, void *cbdata)
{
return OPAL_ERR_NOT_IMPLEMENTED;
}
static int isolated_publish(opal_list_t *info)
{
return OPAL_ERR_NOT_SUPPORTED;
}
static int isolated_publish_nb(opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata)
{
return OPAL_ERR_NOT_SUPPORTED;
}
static int isolated_lookup(opal_list_t *data, opal_list_t *info)
{
return OPAL_ERR_NOT_SUPPORTED;
}
static int isolated_lookup_nb(char **keys, opal_list_t *info,
opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata)
{
return OPAL_ERR_NOT_SUPPORTED;
}
static int isolated_unpublish(char **keys, opal_list_t *info)
{
return OPAL_ERR_NOT_SUPPORTED;
}
static int isolated_unpublish_nb(char **keys, opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata)
{
return OPAL_ERR_NOT_SUPPORTED;
}
static const char *isolated_get_version(void)
{
return "N/A";
}
static int isolated_store_local(const opal_process_name_t *proc,
opal_value_t *val)
{
opal_pmix_base_store(proc, val);
return OPAL_SUCCESS;
}
static const char *isolated_get_nspace(opal_jobid_t jobid)
{
return "N/A";
}
static void isolated_register_jobid(opal_jobid_t jobid, const char *nspace)
{
return;
}

28
opal/mca/pmix/isolated/pmix_isolated.h Обычный файл
Просмотреть файл

@ -0,0 +1,28 @@
/*
* Copyright (c) 2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_PMIX_ISOLATED_H
#define MCA_PMIX_ISOLATED_H
#include "opal_config.h"
#include "opal/mca/mca.h"
#include "opal/mca/pmix/pmix.h"
BEGIN_C_DECLS
OPAL_DECLSPEC extern opal_pmix_base_component_t mca_pmix_isolated_component;
OPAL_DECLSPEC extern const opal_pmix_base_module_t opal_pmix_isolated_module;
END_C_DECLS
#endif /* MCA_PMIX_ISOLATED_H */

Просмотреть файл

@ -0,0 +1,94 @@
/*
* Copyright (c) 2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
* These symbols are in a file by themselves to provide nice linker
* semantics. Since linkers generally pull in symbols by object
* files, keeping these symbols as the only symbols in this file
* prevents utility programs such as "ompi_info" from having to import
* entire components just to query their version and parameters.
*/
#include "opal_config.h"
#include "opal/constants.h"
#include "opal/class/opal_list.h"
#include "opal/util/proc.h"
#include "opal/mca/pmix/pmix.h"
#include "pmix_isolated.h"
/*
* Public string showing the pmix isolated component version number
*/
const char *opal_pmix_isolated_component_version_string =
"OPAL isolated pmix MCA component version " OPAL_VERSION;
/*
* Local function
*/
static int isolated_open(void);
static int isolated_close(void);
static int isolated_component_query(mca_base_module_t **module, int *priority);
/*
* Instantiate the public struct with all of our public information
* and pointers to our public functions in it
*/
opal_pmix_base_component_t mca_pmix_isolated_component = {
.base_version = {
/* Indicate that we are a pmix v1.1.0 component (which also
implies a specific MCA version) */
OPAL_PMIX_BASE_VERSION_2_0_0,
/* Component name and version */
.mca_component_name = "isolated",
MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION,
OPAL_RELEASE_VERSION),
/* Component open and close functions */
.mca_open_component = isolated_open,
.mca_close_component = isolated_close,
.mca_query_component = isolated_component_query,
},
/* Next the MCA v1.0.0 component meta data */
.base_data = {
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
}
};
static int isolated_open(void)
{
return OPAL_SUCCESS;
}
static int isolated_close(void)
{
return OPAL_SUCCESS;
}
static int isolated_component_query(mca_base_module_t **module, int *priority)
{
/* if we are in a Singularity container, then we cannot spawn an
* HNP and are truly on our own and cannot call comm_spawn or
* any of its friends */
if (NULL != getenv("SINGULARITY_CONTAINER")) {
*priority = 100;
*module = (mca_base_module_t *)&opal_pmix_isolated_module;
return OPAL_SUCCESS;
}
/* otherwise, ignore us */
*priority = 0;
*module = NULL;
return OPAL_ERR_TAKE_NEXT_OPTION;
}

Просмотреть файл

@ -102,7 +102,7 @@ static int pmix120_component_query(mca_base_module_t **module, int *priority)
} else {
/* we could be a server, so we still need to be considered,
* but only if requested */
*priority = 0;
*priority = 2;
}
*module = (mca_base_module_t *)&opal_pmix_pmix120_module;
return OPAL_SUCCESS;

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
@ -155,6 +155,9 @@ static int rte_init(void)
/* for convenience, push the pubsub version of this param into the environ */
opal_setenv (OPAL_MCA_PREFIX"pubsub_orte_server", orte_process_info.my_hnp_uri, 1, &environ);
} else if (NULL != getenv("SINGULARITY_CONTAINER")) {
/* mark that we are in a container */
opal_setenv("OPAL_PROC_CONTAINER", "1", true, &environ);
} else {
/* spawn our very own HNP to support us */
if (ORTE_SUCCESS != (rc = fork_hnp())) {

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -60,9 +60,9 @@ typedef struct {
OBJ_CLASS_DECLARATION(orte_schizo_base_active_module_t);
/* the base stub functions */
ORTE_DECLSPEC int orte_schizo_base_parse_cli(char *personality,
ORTE_DECLSPEC int orte_schizo_base_parse_cli(char **personality,
int argc, int start, char **argv);
ORTE_DECLSPEC int orte_schizo_base_parse_env(char *personality,
ORTE_DECLSPEC int orte_schizo_base_parse_env(char **personality,
char *path,
opal_cmd_line_t *cmd_line,
char **srcenv,

Просмотреть файл

@ -19,7 +19,7 @@
#include "orte/util/name_fns.h"
#include "orte/mca/schizo/base/base.h"
int orte_schizo_base_parse_cli(char *personality,
int orte_schizo_base_parse_cli(char **personality,
int argc, int start, char **argv)
{
int rc;
@ -41,7 +41,7 @@ int orte_schizo_base_parse_cli(char *personality,
return ORTE_SUCCESS;
}
int orte_schizo_base_parse_env(char *personality,
int orte_schizo_base_parse_env(char **personality,
char *path,
opal_cmd_line_t *cmd_line,
char **srcenv,

Просмотреть файл

@ -49,9 +49,9 @@
#include "orte/mca/schizo/schizo.h"
static int parse_cli(char *personality,
static int parse_cli(char **personality,
int argc, int start, char **argv);
static int parse_env(char *personality,
static int parse_env(char **personality,
char *path,
opal_cmd_line_t *cmd_line,
char **srcenv,
@ -69,7 +69,7 @@ orte_schizo_base_module_t orte_schizo_ompi_module = {
setup_child
};
static int parse_cli(char *personality,
static int parse_cli(char **personality,
int argc, int start, char **argv)
{
int i, j, k;
@ -81,8 +81,16 @@ static int parse_cli(char *personality,
"routed",
NULL
};
bool takeus = false;
if (0 != strcmp(personality, "ompi")) {
/* see if we are included */
for (i=0; NULL != personality[i]; i++) {
if (0 == strcmp(personality[i], "ompi")) {
takeus = true;
break;
}
}
if (!takeus) {
return ORTE_ERR_TAKE_NEXT_OPTION;
}
@ -154,7 +162,7 @@ static int parse_cli(char *personality,
return ORTE_SUCCESS;
}
static int parse_env(char *personality,
static int parse_env(char **personality,
char *path,
opal_cmd_line_t *cmd_line,
char **srcenv,
@ -165,8 +173,16 @@ static int parse_env(char *personality,
char *value;
char *env_set_flag;
char **vars;
bool takeus = false;
if (0 != strcmp(personality, "ompi")) {
/* see if we are included */
for (i=0; NULL != personality[i]; i++) {
if (0 == strcmp(personality[i], "ompi")) {
takeus = true;
break;
}
}
if (!takeus) {
return ORTE_ERR_TAKE_NEXT_OPTION;
}
@ -289,8 +305,16 @@ static int setup_fork(orte_job_t *jdata,
char **envcpy, **nps, **firstranks;
char *npstring, *firstrankstring;
char *num_app_ctx;
bool takeus = false;
if (0 != strcmp(jdata->personality, "ompi")) {
/* see if we are included */
for (i=0; NULL != jdata->personality[i]; i++) {
if (0 == strcmp(jdata->personality[i], "ompi")) {
takeus = true;
break;
}
}
if (!takeus) {
return ORTE_ERR_TAKE_NEXT_OPTION;
}
@ -511,10 +535,18 @@ static int setup_child(orte_job_t *jdata,
orte_app_context_t *app)
{
char *param, *value;
int rc;
int rc, i;
int32_t nrestarts=0, *nrptr;
bool takeus = false;
if (0 != strcmp(jdata->personality, "ompi")) {
/* see if we are included */
for (i=0; NULL != jdata->personality[i]; i++) {
if (0 == strcmp(jdata->personality[i], "ompi")) {
takeus = true;
break;
}
}
if (!takeus) {
return ORTE_ERR_TAKE_NEXT_OPTION;
}

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2015 Intel, Inc. All rights reserved
* Copyright (c) 2015-2016 Intel, Inc. All rights reserved
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
@ -39,11 +39,11 @@ BEGIN_C_DECLS
* SCHIZO module functions - the modules are accessed via
* the base stub functions
*/
typedef int (*orte_schizo_base_module_parse_cli_fn_t)(char *personality,
typedef int (*orte_schizo_base_module_parse_cli_fn_t)(char **personality,
int argc, int start,
char **argv);
typedef int (*orte_schizo_base_module_parse_env_fn_t)(char *personality,
typedef int (*orte_schizo_base_module_parse_env_fn_t)(char **personality,
char *path,
opal_cmd_line_t *cmd_line,
char **srcenv,

Просмотреть файл

@ -17,6 +17,7 @@
#endif
#include <ctype.h>
#include "opal/util/basename.h"
#include "opal/util/opal_environ.h"
#include "orte/runtime/orte_globals.h"
@ -40,28 +41,80 @@ static int setup_fork(orte_job_t *jdata,
{
int i;
char *newenv;
bool takeus = false;
char *cmd, *tmp = NULL, *p, *t2;
/* see if we are included */
for (i=0; NULL != jdata->personality[i]; i++) {
if (0 == strcmp(jdata->personality[i], "singularity")) {
takeus = true;
break;
}
}
if (!takeus) {
/* even if they didn't specify, check to see if
* this involves a singularity container */
if (0 != strcmp(app->argv[0],"singularity") &&
0 != strcmp(app->argv[0],"sapprun") &&
NULL == strstr(app->argv[0], ".sapp")) {
/* guess not! */
return ORTE_ERR_TAKE_NEXT_OPTION;
}
}
opal_output_verbose(1, orte_schizo_base_framework.framework_output,
"%s schizo:singularity: checking app %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), app->argv[0]);
/* see if this executable is a Singularity container */
if (0 == strcmp(app->argv[0],"singularity") ||
0 == strcmp(app->argv[0],"sapprun") ||
NULL != strstr(app->argv[0], ".sapp")) {
/* find the path and prepend it with the path to Singularity */
for (i = 0; NULL != app->env && NULL != app->env[i]; ++i) {
/* add to PATH */
if (0 == strncmp("PATH=", app->env[i], 5)) {
asprintf(&newenv, "%s:%s", OPAL_SINGULARITY_PATH, app->env[i] + 5);
opal_setenv("PATH", newenv, true, &app->env);
free(newenv);
break;
}
/* find the path and prepend it with the path to Singularity */
for (i = 0; NULL != app->env && NULL != app->env[i]; ++i) {
/* add to PATH */
if (0 == strncmp("PATH=", app->env[i], 5)) {
asprintf(&newenv, "%s:%s", OPAL_SINGULARITY_PATH, app->env[i] + 5);
opal_setenv("PATH", newenv, true, &app->env);
free(newenv);
break;
}
}
/* flag that the app is in a container */
opal_setenv("OPAL_PROC_CONTAINER", "1", true, &app->env);
/* ensure that we use "singularity run" to execute this app */
if (0 != strcmp(app->app, "singularity")) {
opal_output_verbose(1, orte_schizo_base_framework.framework_output,
"%s schizo:singularity: adding singularity cmds at %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), OPAL_SINGULARITY_PATH);
/* change the app to the "singularity" command */
free(app->app);
if (0 < strlen(OPAL_SINGULARITY_PATH)) {
asprintf(&app->app, "%s/singularity", OPAL_SINGULARITY_PATH);
} else {
app->app = strdup("singularity");
}
/* if the app contains .sapp, then we need to strip that
* extension so singularity doesn't bark at us */
if (NULL != (p = strstr(app->argv[0], ".sapp"))) {
tmp = strdup(app->argv[0]);
t2 = opal_basename(app->argv[0]);
p = strstr(t2, ".sapp");
*p = '\0'; // strip the extension
free(app->argv[0]);
app->argv[0] = t2;
}
opal_argv_prepend_nosize(&app->argv, "run");
opal_argv_prepend_nosize(&app->argv, "singularity");
}
/* ensure this application has been "installed" */
if (NULL != tmp) {
opal_output_verbose(1, orte_schizo_base_framework.framework_output,
"%s schizo:singularity: installing container %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp);
(void)asprintf(&cmd, "singularity install %s >> /dev/null", tmp);
system(cmd);
free(cmd);
free(tmp);
}
return ORTE_SUCCESS;
}

Просмотреть файл

@ -15,7 +15,7 @@
* Copyright (c) 2009 Institut National de Recherche en Informatique
* et Automatique. All rights reserved.
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -538,7 +538,7 @@ int orte_daemon(int argc, char *argv[])
/* setup the singleton's job */
jdata = OBJ_NEW(orte_job_t);
/* default to ompi for now */
jdata->personality = strdup("ompi");
opal_argv_append_nosize(&jdata->personality, "ompi");
orte_plm_base_create_jobid(jdata);
ljob = ORTE_LOCAL_JOBID(jdata->jobid);
opal_pointer_array_set_item(orte_job_data, ljob, jdata);

Просмотреть файл

@ -279,7 +279,7 @@ static opal_cmd_line_init_t cmd_line_init[] = {
{ NULL, '\0', "personality", "personality", 1,
&orte_cmd_line.personality, OPAL_CMD_LINE_TYPE_STRING,
"Programming model/language being used (default=\"ompi\")" },
"Comma-separated list of programming model, languages, and containers being used (default=\"ompi\")" },
{ NULL, 'd', "debug-devel", "debug-devel", 0,
&orte_cmd_line.debug, OPAL_CMD_LINE_TYPE_BOOL,
@ -658,7 +658,9 @@ int orte_submit_job(char *argv[], int *index,
/* default our personality to OMPI */
if (NULL == orte_cmd_line.personality) {
orte_cmd_line.personality = strdup("ompi");
opal_argv_append_nosize(&orte_cmd_line.personalities, "ompi");
} else {
orte_cmd_line.personalities = opal_argv_split(orte_cmd_line.personality, ',');
}
/* create a new job object to hold the info for this one - the
@ -672,7 +674,7 @@ int orte_submit_job(char *argv[], int *index,
*/
return ORTE_ERR_OUT_OF_RESOURCE;
}
jdata->personality = strdup(orte_cmd_line.personality);
jdata->personality = opal_argv_copy(orte_cmd_line.personalities);
trk = OBJ_NEW(trackr_t);
trk->jdata = jdata;
trk->launch_cb = launch_cb;
@ -1091,7 +1093,7 @@ static int create_app(int argc, char* argv[],
* Only pick up '-mca foo bar' on this pass.
*/
if (NULL != orte_cmd_line.appfile) {
if (ORTE_SUCCESS != (rc = orte_schizo.parse_cli(orte_cmd_line.personality, argc, 0, argv))) {
if (ORTE_SUCCESS != (rc = orte_schizo.parse_cli(orte_cmd_line.personalities, argc, 0, argv))) {
goto cleanup;
}
}
@ -1132,15 +1134,14 @@ static int create_app(int argc, char* argv[],
* mpirun -np 2 -mca foo bar ./my-app -mca bip bop
* We want to pick up '-mca foo bar' but not '-mca bip bop'
*/
if (ORTE_SUCCESS != (rc = orte_schizo.parse_cli(orte_cmd_line.personality,
argc, count, argv))) {
if (ORTE_SUCCESS != (rc = orte_schizo.parse_cli(orte_cmd_line.personalities, argc, count, argv))) {
goto cleanup;
}
/* Grab all OMPI_* environment variables */
app->env = opal_argv_copy(*app_env);
if (ORTE_SUCCESS != (rc = orte_schizo.parse_env(orte_cmd_line.personality,
if (ORTE_SUCCESS != (rc = orte_schizo.parse_env(orte_cmd_line.personalities,
orte_cmd_line.path,
&cmd_line,
environ, &app->env))) {

Просмотреть файл

@ -59,6 +59,7 @@ struct orte_cmd_line_t {
bool index_argv;
bool run_as_root;
char *personality;
char **personalities;
bool create_dvm;
bool terminate_dvm;
bool nolocal;

Просмотреть файл

@ -13,7 +13,7 @@
* All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2014-2016 Research Organization for Information Science
@ -173,7 +173,7 @@ int pmix_server_spawn_fn(opal_process_name_t *requestor,
/* transfer the job info across */
OPAL_LIST_FOREACH(info, job_info, opal_value_t) {
if (0 == strcmp(info->key, OPAL_PMIX_PERSONALITY)) {
jdata->personality = strdup(info->data.string);
jdata->personality = opal_argv_split(info->data.string, ',');
} else if (0 == strcmp(info->key, OPAL_PMIX_MAPPER)) {
if (NULL == jdata->map) {
jdata->map = OBJ_NEW(orte_job_map_t);
@ -265,7 +265,7 @@ int pmix_server_spawn_fn(opal_process_name_t *requestor,
}
/* if the job is missing a personality setting, add it */
if (NULL == jdata->personality) {
jdata->personality = strdup("ompi");
opal_argv_append_nosize(&jdata->personality, "ompi");
}
/* transfer the apps across */

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -81,10 +81,18 @@ int orte_dt_pack_job(opal_buffer_t *buffer, const void *src,
return rc;
}
/* pack the personality */
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, &jobs[i]->personality, 1, OPAL_STRING))) {
count = opal_argv_count(jobs[i]->personality);
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, &count, 1, OPAL_INT32))) {
ORTE_ERROR_LOG(rc);
return rc;
}
for (j=0; j < count; j++) {
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, &jobs[i]->personality[j], 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
/* pack the number of apps */
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer,
(void*)(&(jobs[i]->num_apps)), 1, ORTE_APP_IDX))) {

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2011-2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -186,14 +186,16 @@ int orte_dt_print_job(char **output, char *prefix, orte_job_t *src, opal_data_ty
asprintf(&pfx2, "%s", prefix);
}
tmp2 = opal_argv_join(src->personality, ',');
asprintf(&tmp, "\n%sData for job: %s\tPersonality: %s\tRecovery: %s(%s)\n%s\tNum apps: %ld\tMPI allowed: %s\tStdin target: %s\tState: %s\tAbort: %s", pfx2,
ORTE_JOBID_PRINT(src->jobid), src->personality,
ORTE_JOBID_PRINT(src->jobid), tmp2,
(ORTE_FLAG_TEST(src, ORTE_JOB_FLAG_RECOVERABLE)) ? "ENABLED" : "DISABLED",
(orte_get_attribute(&src->attributes, ORTE_JOB_RECOVER_DEFINED, NULL, OPAL_BOOL)) ? "DEFINED" : "DEFAULT",
pfx2,
(long)src->num_apps,
(ORTE_FLAG_TEST(src, ORTE_JOB_FLAG_GANG_LAUNCHED)) ? "YES" : "NO", ORTE_VPID_PRINT(src->stdin_target),
orte_job_state_to_str(src->state), (ORTE_FLAG_TEST(src, ORTE_JOB_FLAG_ABORTED)) ? "True" : "False");
free(tmp2);
asprintf(&pfx, "%s\t", pfx2);
free(pfx2);

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -64,6 +64,7 @@ int orte_dt_unpack_job(opal_buffer_t *buffer, void *dest,
orte_job_t **jobs;
orte_app_idx_t j;
orte_attribute_t *kv;
char *tmp;
/* unpack into array of orte_job_t objects */
jobs = (orte_job_t**) dest;
@ -85,10 +86,20 @@ int orte_dt_unpack_job(opal_buffer_t *buffer, void *dest,
}
/* unpack the personality */
n=1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &jobs[i]->personality, &n, OPAL_STRING))) {
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &count, &n, OPAL_INT32))) {
ORTE_ERROR_LOG(rc);
return rc;
}
for (k=0; k < count; k++) {
n=1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &tmp, &n, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
opal_argv_append_nosize(&jobs[i]->personality, tmp);
free(tmp);
}
/* unpack the num apps */
n = 1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer,

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
* Copyright (c) 2014-2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -682,7 +682,7 @@ static void orte_job_destruct(orte_job_t* job)
}
if (NULL != job->personality) {
free(job->personality);
opal_argv_free(job->personality);
}
for (n=0; n < job->apps->size; n++) {
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(job->apps, n))) {

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -303,7 +303,7 @@ typedef struct {
/** Base object so this can be put on a list */
opal_list_item_t super;
/* personality for this job */
char *personality;
char **personality;
/* jobid for this job */
orte_jobid_t jobid;
/* offset to the total number of procs so shared memory

Просмотреть файл

@ -1,3 +1,3 @@
Name: hello
Name: myhello
Exec: ./hello

Просмотреть файл

@ -517,7 +517,7 @@ static opal_cmd_line_init_t cmd_line_init[] = {
{ NULL, '\0', "personality", "personality", 1,
&orte_cmd_line.personality, OPAL_CMD_LINE_TYPE_STRING,
"Programming model/language being used (default=\"ompi\")" },
"Comma-separated list of programming model, languages, and containers being used (default=\"ompi\")" },
{ NULL, '\0', "dvm", "dvm", 0,
&orte_cmd_line.create_dvm, OPAL_CMD_LINE_TYPE_BOOL,
@ -842,9 +842,10 @@ int orterun(int argc, char *argv[])
/* default our personality to OMPI */
if (NULL == orte_cmd_line.personality) {
orte_cmd_line.personality = strdup("ompi");
opal_argv_append_nosize(&orte_cmd_line.personalities, "ompi");
} else {
orte_cmd_line.personalities = opal_argv_split(orte_cmd_line.personality, ',');
}
/* Check for some "global" command line params */
parse_globals(argc, argv, &cmd_line);
OBJ_DESTRUCT(&cmd_line);
@ -860,7 +861,7 @@ int orterun(int argc, char *argv[])
*/
return ORTE_ERR_OUT_OF_RESOURCE;
}
jdata->personality = strdup(orte_cmd_line.personality);
jdata->personality = opal_argv_copy(orte_cmd_line.personalities);
/* check what user wants us to do with stdin */
if (0 == strcmp(orte_cmd_line.stdin_target, "all")) {
@ -1126,6 +1127,7 @@ static int init_globals(void)
orte_cmd_line.index_argv = false;
orte_cmd_line.run_as_root = false;
orte_cmd_line.personality = NULL;
orte_cmd_line.personalities = NULL;
orte_cmd_line.create_dvm = false;
}
@ -1401,7 +1403,7 @@ static int create_app(int argc, char* argv[],
* Only pick up '-mca foo bar' on this pass.
*/
if (NULL != orte_cmd_line.appfile) {
if (ORTE_SUCCESS != (rc = orte_schizo.parse_cli(orte_cmd_line.personality, argc, 0, argv))) {
if (ORTE_SUCCESS != (rc = orte_schizo.parse_cli(orte_cmd_line.personalities, argc, 0, argv))) {
goto cleanup;
}
}
@ -1446,7 +1448,7 @@ static int create_app(int argc, char* argv[],
* mpirun -np 2 -mca foo bar ./my-app -mca bip bop
* We want to pick up '-mca foo bar' but not '-mca bip bop'
*/
if (ORTE_SUCCESS != (rc = orte_schizo.parse_cli(orte_cmd_line.personality,
if (ORTE_SUCCESS != (rc = orte_schizo.parse_cli(orte_cmd_line.personalities,
argc, count, argv))) {
goto cleanup;
}
@ -1454,7 +1456,7 @@ static int create_app(int argc, char* argv[],
/* Grab all OMPI_* environment variables */
app->env = opal_argv_copy(*app_env);
if (ORTE_SUCCESS != (rc = orte_schizo.parse_env(orte_cmd_line.personality,
if (ORTE_SUCCESS != (rc = orte_schizo.parse_env(orte_cmd_line.personalities,
orte_cmd_line.path,
&cmd_line,
environ, &app->env))) {