1
1

Add support for Singularity containers, including a .m4 file for checking if Singularity is available and an orte/schizo component for setting the proper support if a container was given as the executable

Cleanup the configury so we properly check for Singularity under the various typical use-cases

Bring the Singularity support online. We have to turn "off" the sm BTL as it segfaults from inside the container - root cause remains unclear. Also turned "off" the various OPAL shmem components in case they are involved and someone else tries to use them. Happily, the vader BTL works just fine!
This commit is contained in:
Ralph Castain 2016-02-10 23:43:13 -08:00
parent d98616b9ed
commit aa9e5a1a27
18 changed files with 347 additions and 24 deletions

1
.gitignore vendored
View File

@ -37,6 +37,7 @@
*.exe
*.log
*.trs
*.sapp
*~
*\\#

View File

@ -0,0 +1,60 @@
# -*- shell-script ; indent-tabs-mode:nil -*-
#
# Copyright (c) 2016 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# OPAL_CHECK_SINGULARITY(prefix, [action-if-found], [action-if-not-found])
# --------------------------------------------------------
AC_DEFUN([OPAL_CHECK_SINGULARITY],[
OPAL_VAR_SCOPE_PUSH([spath have_singularity])
AC_ARG_WITH([singularity],
[AC_HELP_STRING([--with-singularity(=DIR)],
[Build support for the Singularity container, optionally adding DIR to the search path])])
spath=
AC_MSG_CHECKING([if Singularity support is to be built])
AS_IF([test "$with_singularity" = "no"],
[AC_MSG_RESULT([no])
have_singularity=no],
[AC_MSG_RESULT([yes])
AS_IF([test -z "$with_singularity" || test "$with_singularity" = "yes"],
[ # look for the singularity command in the default path
AC_CHECK_PROG([SINGULARITY], [singularity], [singularity])
AS_IF([test "$SINGULARITY" != ""],
[have_singularity=yes],
[AS_IF([test "$with_singularity" = "yes"],
[AC_MSG_WARN([Singularity support requested, but required executable])
AC_MSG_WARN(["singularity" not found in default locations])
AC_MSG_ERROR([Cannot continue])])
have_singularity=no])],
[ AC_MSG_CHECKING([for existence of $with_singularity/bin])
# look for the singularity command in the bin subdirectory
AS_IF([test ! -d "$with_singularity/bin"],
[AC_MSG_RESULT([not found])
AC_MSG_WARN([Directory $with_singularity/bin not found])
AC_MSG_ERROR([Cannot continue])],
[AC_MSG_RESULT([found])])
save_path=$PATH
PATH=$with_singularity/bin:$PATH
AC_CHECK_PROG([SINGULARITY], [singularity], [singularity])
AS_IF([test "$SINGULARITY" != ""],
[have_singularity=yes
spath=$with_singularity/bin],
[have_singlarity=no
AC_MSG_WARN([Singularity support requested, but required executable])
AC_MSG_WARN(["singularity" not found in either default or specified path])
AC_MSG_ERROR([Cannot continue])])
PATH=$save_path
]
)])
AC_DEFINE_UNQUOTED(OPAL_SINGULARITY_PATH, "$spath", [Path to Singularity binaries])
AS_IF([test "$have_singularity" = "yes"],
[$2], [$3])
OPAL_VAR_SCOPE_POP
])

View File

@ -16,7 +16,7 @@
* All rights reserved.
* Copyright (c) 2011-2014 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2010-2012 IBM Corporation. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -764,6 +764,11 @@ mca_btl_sm_component_init(int *num_btls,
int rc;
#endif /* OPAL_BTL_SM_HAVE_KNEM | OPAL_BTL_SM_HAVE_CMA */
/* if we are in a container, then we must disqualify ourselves */
if (NULL != getenv("OPAL_PROC_CONTAINER")) {
return NULL;
}
*num_btls = 0;
/* lookup/create shared memory pool only when used */
mca_btl_sm_component.sm_mpool = NULL;

View File

@ -13,6 +13,7 @@
* Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -175,6 +176,13 @@ mmap_open(void)
static int
mmap_query(mca_base_module_t **module, int *priority)
{
/* if we are in a container, then we must disqualify ourselves */
if (NULL != getenv("OPAL_PROC_CONTAINER")) {
*priority = 0;
*module = NULL;
return OPAL_ERROR;
}
*priority = mca_shmem_mmap_component.priority;
*module = (mca_base_module_t *)&opal_shmem_mmap_module.super;
return OPAL_SUCCESS;

View File

@ -14,6 +14,7 @@
* Copyright (c) 2010-2015 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -200,6 +201,13 @@ posix_runtime_query(mca_base_module_t **module,
static int
posix_query(mca_base_module_t **module, int *priority)
{
/* if we are in a container, then we must disqualify ourselves */
if (NULL != getenv("OPAL_PROC_CONTAINER")) {
*priority = 0;
*module = NULL;
return OPAL_ERROR;
}
*priority = mca_shmem_posix_component.priority;
*module = (mca_base_module_t *)&opal_shmem_posix_module.super;
return OPAL_SUCCESS;

View File

@ -16,6 +16,7 @@
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -209,6 +210,13 @@ out:
static int
sysv_query(mca_base_module_t **module, int *priority)
{
/* if we are in a container, then we must disqualify ourselves */
if (NULL != getenv("OPAL_PROC_CONTAINER")) {
*priority = 0;
*module = NULL;
return OPAL_ERROR;
}
*priority = mca_shmem_sysv_component.priority;
*module = (mca_base_module_t *)&opal_shmem_sysv_module.super;
return OPAL_SUCCESS;

View File

@ -100,10 +100,10 @@ int orte_schizo_base_select(void)
}
if (4 < opal_output_get_verbosity(orte_schizo_base_framework.framework_output)) {
opal_output(0, "%s: Final mapper priorities", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
opal_output(0, "%s: Final schizo priorities", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
/* show the prioritized list */
OPAL_LIST_FOREACH(mod, &orte_schizo_base.active_modules, orte_schizo_base_active_module_t) {
opal_output(0, "\tMapper: %s Priority: %d", mod->component->mca_component_name, mod->pri);
opal_output(0, "\tSchizo: %s Priority: %d", mod->component->mca_component_name, mod->pri);
}
}

View File

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2015 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -13,6 +13,10 @@
#include "orte/constants.h"
#include "opal/class/opal_list.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/runtime/orte_globals.h"
#include "orte/util/name_fns.h"
#include "orte/mca/schizo/base/base.h"
int orte_schizo_base_parse_cli(char *personality,
@ -22,19 +26,19 @@ int orte_schizo_base_parse_cli(char *personality,
orte_schizo_base_active_module_t *mod;
if (NULL == personality) {
opal_output(0, "NULL PERSONALITY");
return ORTE_ERR_NOT_SUPPORTED;
}
OPAL_LIST_FOREACH(mod, &orte_schizo_base.active_modules, orte_schizo_base_active_module_t) {
if (0 == strcmp(personality, mod->component->mca_component_name)) {
if (NULL != mod->module->parse_cli) {
rc = mod->module->parse_cli(personality, argc, start, argv);
if (NULL != mod->module->parse_cli) {
rc = mod->module->parse_cli(personality, argc, start, argv);
if (ORTE_SUCCESS != rc && ORTE_ERR_TAKE_NEXT_OPTION != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
}
return ORTE_ERR_NOT_SUPPORTED;
return ORTE_SUCCESS;
}
int orte_schizo_base_parse_env(char *personality,
@ -47,14 +51,15 @@ int orte_schizo_base_parse_env(char *personality,
orte_schizo_base_active_module_t *mod;
OPAL_LIST_FOREACH(mod, &orte_schizo_base.active_modules, orte_schizo_base_active_module_t) {
if (0 == strcmp(personality, mod->component->mca_component_name)) {
if (NULL != mod->module->parse_env) {
rc = mod->module->parse_env(personality, path, cmd_line, srcenv, dstenv);
if (NULL != mod->module->parse_env) {
rc = mod->module->parse_env(personality, path, cmd_line, srcenv, dstenv);
if (ORTE_SUCCESS != rc && ORTE_ERR_TAKE_NEXT_OPTION != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
}
return ORTE_ERR_NOT_SUPPORTED;
return ORTE_SUCCESS;
}
int orte_schizo_base_setup_fork(orte_job_t *jdata,
@ -64,14 +69,15 @@ int orte_schizo_base_setup_fork(orte_job_t *jdata,
orte_schizo_base_active_module_t *mod;
OPAL_LIST_FOREACH(mod, &orte_schizo_base.active_modules, orte_schizo_base_active_module_t) {
if (0 == strcmp(jdata->personality, mod->component->mca_component_name)) {
if (NULL != mod->module->setup_fork) {
rc = mod->module->setup_fork(jdata, context);
if (NULL != mod->module->setup_fork) {
rc = mod->module->setup_fork(jdata, context);
if (ORTE_SUCCESS != rc && ORTE_ERR_TAKE_NEXT_OPTION != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
}
return ORTE_ERR_NOT_SUPPORTED;
return ORTE_SUCCESS;
}
int orte_schizo_base_setup_child(orte_job_t *jdata,
@ -82,12 +88,13 @@ int orte_schizo_base_setup_child(orte_job_t *jdata,
orte_schizo_base_active_module_t *mod;
OPAL_LIST_FOREACH(mod, &orte_schizo_base.active_modules, orte_schizo_base_active_module_t) {
if (0 == strcmp(jdata->personality, mod->component->mca_component_name)) {
if (NULL != mod->module->setup_child) {
rc = mod->module->setup_child(jdata, child, app);
if (NULL != mod->module->setup_child) {
rc = mod->module->setup_child(jdata, child, app);
if (ORTE_SUCCESS != rc && ORTE_ERR_TAKE_NEXT_OPTION != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
}
return ORTE_ERR_NOT_SUPPORTED;
return ORTE_SUCCESS;
}

View File

@ -82,6 +82,10 @@ static int parse_cli(char *personality,
NULL
};
if (0 != strcmp(personality, "ompi")) {
return ORTE_ERR_TAKE_NEXT_OPTION;
}
for (i = 0; i < (argc-start); ++i) {
if (0 == strcmp("-mca", argv[i]) ||
0 == strcmp("--mca", argv[i]) ) {
@ -162,6 +166,10 @@ static int parse_env(char *personality,
char *env_set_flag;
char **vars;
if (0 != strcmp(personality, "ompi")) {
return ORTE_ERR_TAKE_NEXT_OPTION;
}
for (i = 0; NULL != srcenv[i]; ++i) {
if (0 == strncmp("OMPI_", srcenv[i], 5)) {
/* check for duplicate in app->env - this
@ -282,6 +290,10 @@ static int setup_fork(orte_job_t *jdata,
char *npstring, *firstrankstring;
char *num_app_ctx;
if (0 != strcmp(jdata->personality, "ompi")) {
return ORTE_ERR_TAKE_NEXT_OPTION;
}
/* see if the mapper thinks we are oversubscribed */
oversubscribed = false;
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, ORTE_PROC_MY_NAME->vpid))) {
@ -502,6 +514,10 @@ static int setup_child(orte_job_t *jdata,
int rc;
int32_t nrestarts=0, *nrptr;
if (0 != strcmp(jdata->personality, "ompi")) {
return ORTE_ERR_TAKE_NEXT_OPTION;
}
/* setup the jobid */
if (ORTE_SUCCESS != (rc = orte_util_convert_jobid_to_string(&value, child->name.jobid))) {
ORTE_ERROR_LOG(rc);

View File

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2015 Intel, Inc. All rights reserved.
* Copyright (c) 2016 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
@ -41,7 +41,7 @@ orte_schizo_base_component_t mca_schizo_ompi_component = {
static int component_query(mca_base_module_t **module, int *priority)
{
*module = (mca_base_module_t*)&orte_schizo_ompi_module;
*priority = 1;
*priority = 10;
return ORTE_SUCCESS;
}

View File

@ -0,0 +1,35 @@
#
# Copyright (c) 2016 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
sources = \
schizo_singularity_component.c \
schizo_singularity.h \
schizo_singularity.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_orte_schizo_singularity_DSO
component_noinst =
component_install = mca_schizo_singularity.la
else
component_noinst = libmca_schizo_singularity.la
component_install =
endif
mcacomponentdir = $(ortelibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_schizo_singularity_la_SOURCES = $(sources)
mca_schizo_singularity_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_schizo_singularity_la_SOURCES = $(sources)
libmca_schizo_singularity_la_LDFLAGS = -module -avoid-version

View File

@ -0,0 +1,19 @@
# -*- shell-script -*-
#
# Copyright (c) 2016 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_schizo_singularity_CONFIG([action-if-found], [action-if-not-found])
# -----------------------------------------------------------
AC_DEFUN([MCA_orte_schizo_singularity_CONFIG],[
AC_CONFIG_FILES([orte/mca/schizo/singularity/Makefile])
OPAL_CHECK_SINGULARITY([schizo_singularity], [schizo_singularity_happy="yes"], [schizo_singularity_happy="no"])
AS_IF([test "$schizo_singularity_happy" = "yes"], [$1], [$2])
])dnl

View File

@ -0,0 +1,7 @@
#
# owner/status file
# owner: institution that is responsible for this package
# status: e.g. active, maintenance, unmaintained
#
owner: INTEL
status: active

View File

@ -0,0 +1,67 @@
/*
* Copyright (c) 2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
*/
#include "orte_config.h"
#include "orte/types.h"
#include "opal/types.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <ctype.h>
#include "opal/util/opal_environ.h"
#include "orte/runtime/orte_globals.h"
#include "orte/util/name_fns.h"
#include "orte/mca/schizo/base/base.h"
#include "schizo_singularity.h"
static int setup_fork(orte_job_t *jdata,
orte_app_context_t *context);
orte_schizo_base_module_t orte_schizo_singularity_module = {
NULL,
NULL,
setup_fork,
NULL
};
static int setup_fork(orte_job_t *jdata,
orte_app_context_t *app)
{
int i;
char *newenv;
opal_output_verbose(1, orte_schizo_base_framework.framework_output,
"%s schizo:singularity: checking app %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), app->argv[0]);
/* see if this executable is a Singularity container */
if (0 == strcmp(app->argv[0],"singularity") ||
0 == strcmp(app->argv[0],"sapprun") ||
NULL != strstr(app->argv[0], ".sapp")) {
/* find the path and prepend it with the path to Singularity */
for (i = 0; NULL != app->env && NULL != app->env[i]; ++i) {
/* add to PATH */
if (0 == strncmp("PATH=", app->env[i], 5)) {
asprintf(&newenv, "%s:%s", OPAL_SINGULARITY_PATH, app->env[i] + 5);
opal_setenv("PATH", newenv, true, &app->env);
free(newenv);
break;
}
}
}
/* flag that the app is in a container */
opal_setenv("OPAL_PROC_CONTAINER", "1", true, &app->env);
return ORTE_SUCCESS;
}

View File

@ -0,0 +1,29 @@
/*
* Copyright (c) 2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef _MCA_SCHIZO_SINGULARITY_H_
#define _MCA_SCHIZO_SINGULARITY_H_
#include "orte_config.h"
#include "orte/types.h"
#include "opal/mca/base/base.h"
#include "orte/mca/schizo/schizo.h"
BEGIN_C_DECLS
ORTE_MODULE_DECLSPEC extern orte_schizo_base_component_t mca_schizo_singularity_component;
extern orte_schizo_base_module_t orte_schizo_singularity_module;
END_C_DECLS
#endif /* MCA_SCHIZO_SINGULARITY_H_ */

View File

@ -0,0 +1,45 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/types.h"
#include "opal/types.h"
#include "opal/util/show_help.h"
#include "orte/mca/schizo/schizo.h"
#include "schizo_singularity.h"
static int component_query(mca_base_module_t **module, int *priority);
/*
* Struct of function pointers and all that to let us be initialized
*/
orte_schizo_base_component_t mca_schizo_singularity_component = {
.base_version = {
MCA_SCHIZO_BASE_VERSION_1_0_0,
.mca_component_name = "singularity",
MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION,
ORTE_RELEASE_VERSION),
.mca_query_component = component_query,
},
.base_data = {
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
};
static int component_query(mca_base_module_t **module, int *priority)
{
*module = (mca_base_module_t*)&orte_schizo_singularity_module;
*priority = 5;
return ORTE_SUCCESS;
}

View File

@ -1,4 +1,4 @@
PROGS = mpi_no_op mpi_barrier hello hello_nodename abort multi_abort simple_spawn concurrent_spawn spawn_multiple mpi_spin delayed_abort loop_spawn loop_child bad_exit pubsub hello_barrier segv accept connect hello_output hello_show_help crisscross read_write ziatest slave reduce-hang ziaprobe ziatest bcast_loop parallel_w8 parallel_w64 parallel_r8 parallel_r64 sio sendrecv_blaster early_abort debugger singleton_client_server intercomm_create spawn_tree init-exit77 mpi_info info_spawn server client paccept pconnect ring
PROGS = mpi_no_op mpi_barrier hello hello_nodename abort multi_abort simple_spawn concurrent_spawn spawn_multiple mpi_spin delayed_abort loop_spawn loop_child bad_exit pubsub hello_barrier segv accept connect hello_output hello_show_help crisscross read_write ziatest slave reduce-hang ziaprobe ziatest bcast_loop parallel_w8 parallel_w64 parallel_r8 parallel_r64 sio sendrecv_blaster early_abort debugger singleton_client_server intercomm_create spawn_tree init-exit77 mpi_info info_spawn server client paccept pconnect ring hello.sapp
all: $(PROGS)
@ -10,6 +10,11 @@ hello_output: hello_output.c
hello_show_help: hello_show_help.c
$(CC) $(CFLAGS) $(CFLAGS_INTERNAL) $^ -o $@
hello.sapp: hello.c myhello.spec
$(CC) $(CFLAGS) $(CLAGS_INTERNAL) hello.c -o hello
singularity build myhello.spec
singularity install hello.sapp
CC = mpicc
CFLAGS = -g --openmpi:linkall
CFLAGS_INTERNAL = -I../../.. -I../../../orte/include -I../../../opal/include

View File

@ -0,0 +1,3 @@
Name: hello
Exec: ./hello