1
1

Revert "Modify singularity support per patch from Greg Kurtzer"

This reverts commit open-mpi/ompi@f7257a8310.

Ensure that we properly cleanup the session directory tree. Prior code had issues with symlinks, especially if the file that the link points to was already removed as we traverse the tree. Also found that the dirent checks for directory type weren't fully portable, and so fall back to the stat-based approach which is known to be portable.

Fix singularity singletons by detecting we are in a container and properly setting the pmix selection to pick the isolated component. Remove a stale restriction blocking use of the sm btl
Этот коммит содержится в:
Ralph Castain 2016-03-22 10:24:03 -07:00
родитель dec23f3d39
Коммит 8c14df2328
6 изменённых файлов: 114 добавлений и 37 удалений

Просмотреть файл

@ -735,11 +735,6 @@ mca_btl_sm_component_init(int *num_btls,
int rc; int rc;
#endif /* OPAL_BTL_SM_HAVE_KNEM | OPAL_BTL_SM_HAVE_CMA */ #endif /* OPAL_BTL_SM_HAVE_KNEM | OPAL_BTL_SM_HAVE_CMA */
/* if we are in a container, then we must disqualify ourselves */
if (NULL != getenv("OPAL_PROC_CONTAINER")) {
return NULL;
}
*num_btls = 0; *num_btls = 0;
/* lookup/create shared memory pool only when used */ /* lookup/create shared memory pool only when used */
mca_btl_sm_component.sm_mpool = NULL; mca_btl_sm_component.sm_mpool = NULL;

Просмотреть файл

@ -11,6 +11,7 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science * Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
* Copyright (c) 2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -150,9 +151,7 @@ int opal_os_dirpath_destroy(const char *path,
DIR *dp; DIR *dp;
struct dirent *ep; struct dirent *ep;
char *filenm; char *filenm;
#ifndef HAVE_STRUCT_DIRENT_D_TYPE
struct stat buf; struct stat buf;
#endif
if (NULL == path) { /* protect against error */ if (NULL == path) { /* protect against error */
return OPAL_ERROR; return OPAL_ERROR;
@ -189,16 +188,11 @@ int opal_os_dirpath_destroy(const char *path,
* allocating memory here, so we need to free it later on. * allocating memory here, so we need to free it later on.
*/ */
filenm = opal_os_path(false, path, ep->d_name, NULL); filenm = opal_os_path(false, path, ep->d_name, NULL);
#ifdef HAVE_STRUCT_DIRENT_D_TYPE
if (DT_DIR == ep->d_type) {
is_dir = true;
}
#else /* have dirent.d_type */
rc = stat(filenm, &buf); rc = stat(filenm, &buf);
if (rc < 0 || S_ISDIR(buf.st_mode)) { if (S_ISDIR(buf.st_mode)) {
is_dir = true; is_dir = true;
} }
#endif /* have dirent.d_type */
/* /*
* If not recursively decending, then if we find a directory then fail * If not recursively decending, then if we find a directory then fail
@ -233,9 +227,8 @@ int opal_os_dirpath_destroy(const char *path,
closedir(dp); closedir(dp);
goto cleanup; goto cleanup;
} }
} } else {
/* Files are removed right here */ /* Files are removed right here */
else {
if (0 != (rc = unlink(filenm))) { if (0 != (rc = unlink(filenm))) {
exit_status = OPAL_ERROR; exit_status = OPAL_ERROR;
} }
@ -295,13 +288,11 @@ int opal_os_dirpath_access(const char *path, const mode_t in_mode ) {
if (0 == stat(path, &buf)) { /* exists - check access */ if (0 == stat(path, &buf)) { /* exists - check access */
if ((buf.st_mode & loc_mode) == loc_mode) { /* okay, I can work here */ if ((buf.st_mode & loc_mode) == loc_mode) { /* okay, I can work here */
return(OPAL_SUCCESS); return(OPAL_SUCCESS);
} } else {
else {
/* Don't have access rights to the existing path */ /* Don't have access rights to the existing path */
return(OPAL_ERROR); return(OPAL_ERROR);
} }
} } else {
else {
/* We could not find the path */ /* We could not find the path */
return( OPAL_ERR_NOT_FOUND ); return( OPAL_ERR_NOT_FOUND );
} }

Просмотреть файл

@ -157,10 +157,8 @@ static int rte_init(void)
/* for convenience, push the pubsub version of this param into the environ */ /* for convenience, push the pubsub version of this param into the environ */
opal_setenv (OPAL_MCA_PREFIX"pubsub_orte_server", orte_process_info.my_hnp_uri, true, &environ); opal_setenv (OPAL_MCA_PREFIX"pubsub_orte_server", orte_process_info.my_hnp_uri, true, &environ);
} else if (NULL != getenv("SINGULARITY_CONTAINER")) { } else if (NULL != getenv("SINGULARITY_CONTAINER") ||
/* mark that we are in a container */ mca_ess_singleton_component.isolated) {
opal_setenv("OPAL_PROC_CONTAINER", "1", true, &environ);
} else if (mca_ess_singleton_component.isolated) {
/* ensure we use the isolated pmix component */ /* ensure we use the isolated pmix component */
opal_setenv (OPAL_MCA_PREFIX"pmix", "isolated", true, &environ); opal_setenv (OPAL_MCA_PREFIX"pmix", "isolated", true, &environ);
} else { } else {

Просмотреть файл

@ -31,18 +31,20 @@
static int setup_app(char **personality, static int setup_app(char **personality,
orte_app_context_t *context); orte_app_context_t *context);
static int setup_fork(orte_job_t *jdata,
orte_app_context_t *context);
orte_schizo_base_module_t orte_schizo_singularity_module = { orte_schizo_base_module_t orte_schizo_singularity_module = {
.setup_app = setup_app .setup_app = setup_app,
.setup_fork = setup_fork
}; };
static int setup_app(char **personality, static int setup_app(char **personality,
orte_app_context_t *app) orte_app_context_t *app)
{ {
int i; int i;
char *newenv, *pth; char *newenv, *pth, *t2;
bool takeus = false; bool takeus = false;
char *t2;
/* see if we are included */ /* see if we are included */
for (i=0; NULL != personality[i]; i++) { for (i=0; NULL != personality[i]; i++) {
@ -90,6 +92,12 @@ static int setup_app(char **personality,
break; break;
} }
} }
free(pth);
if (0 == strcmp(app->argv[0], "singularity")) {
/* we don't want the backend to setup a cache dir */
orte_set_attribute(&app->attributes, ORTE_APP_NO_CACHEDIR, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
}
/* export an envar to permit shared memory operations */ /* export an envar to permit shared memory operations */
opal_setenv("SINGULARITY_NO_NAMESPACE_PID", "1", true, &app->env); opal_setenv("SINGULARITY_NO_NAMESPACE_PID", "1", true, &app->env);
@ -97,3 +105,85 @@ static int setup_app(char **personality,
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
static int setup_fork(orte_job_t *jdata,
orte_app_context_t *app)
{
int i;
bool takeus = false;
char *p, *t2;
char dir[MAXPATHLEN];
/* see if we are included */
for (i=0; NULL != jdata->personality[i]; i++) {
if (0 == strcmp(jdata->personality[i], "singularity")) {
takeus = true;
break;
}
}
if (!takeus) {
/* even if they didn't specify, check to see if
* this involves a singularity container */
if (0 != strcmp(app->argv[0],"singularity") &&
0 != strcmp(app->argv[0],"sapprun") &&
NULL == strstr(app->argv[0], ".sapp")) {
/* guess not! */
return ORTE_ERR_TAKE_NEXT_OPTION;
}
}
/* set the singularity cache dir, unless asked not to do so */
if (!orte_get_attribute(&app->attributes, ORTE_APP_NO_CACHEDIR, NULL, OPAL_BOOL)) {
opal_setenv("SINGULARITY_CACHEDIR", orte_process_info.job_session_dir, true, &app->env);
opal_setenv("SINGULARITY_CACHEDIR", orte_process_info.job_session_dir, true, &environ);
}
/* save our current directory */
getcwd(dir, sizeof(dir));
/* change to the working directory for this context */
chdir(app->cwd);
/* if the app contains .sapp, then we need to strip that
* extension so singularity doesn't bark at us */
if (NULL != strstr(app->argv[0], ".sapp")) {
/* ensure the app is installed */
opal_output_verbose(1, orte_schizo_base_framework.framework_output,
"%s schizo:singularity: installing app %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), app->argv[0]);
t2 = opal_basename(app->argv[0]);
p = strstr(t2, ".sapp");
*p = '\0'; // strip the extension
if (0 < opal_output_get_verbosity(orte_schizo_base_framework.framework_output)) {
(void)asprintf(&p, "singularity -vv install --runkey %s %s", t2, app->argv[0]);
} else {
(void)asprintf(&p, "singularity --quiet install --runkey %s %s", t2, app->argv[0]);
}
system(p);
free(p);
free(app->argv[0]);
app->argv[0] = t2;
}
/* ensure that we use "singularity run" to execute this app */
if (0 != strcmp(app->app, "singularity")) {
opal_output_verbose(1, orte_schizo_base_framework.framework_output,
"%s schizo:singularity: adding singularity cmd",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
/* change the app to the "singularity" command */
free(app->app);
app->app = strdup("singularity");
opal_argv_prepend_nosize(&app->argv, "run");
if (0 < opal_output_get_verbosity(orte_schizo_base_framework.framework_output)) {
opal_argv_prepend_nosize(&app->argv, "-vv");
} else {
opal_argv_prepend_nosize(&app->argv, "--quiet");
}
opal_argv_prepend_nosize(&app->argv, "singularity");
}
/* return to the original directory */
chdir(dir);
return ORTE_SUCCESS;
}

Просмотреть файл

@ -167,6 +167,8 @@ const char *orte_attr_key_to_str(orte_attribute_key_t key)
return "APP-MAX-PPN"; return "APP-MAX-PPN";
case ORTE_APP_PREFIX_DIR: case ORTE_APP_PREFIX_DIR:
return "APP-PREFIX-DIR"; return "APP-PREFIX-DIR";
case ORTE_APP_NO_CACHEDIR:
return "ORTE_APP_NO_CACHEDIR";
case ORTE_NODE_USERNAME: case ORTE_NODE_USERNAME:
return "NODE-USERNAME"; return "NODE-USERNAME";

Просмотреть файл

@ -44,6 +44,7 @@ typedef uint8_t orte_app_context_flags_t;
#define ORTE_APP_MANDATORY 13 // bool - flag if nodes requested in -host are "mandatory" vs "optional" #define ORTE_APP_MANDATORY 13 // bool - flag if nodes requested in -host are "mandatory" vs "optional"
#define ORTE_APP_MAX_PPN 14 // uint32 - maximum number of procs/node for this app #define ORTE_APP_MAX_PPN 14 // uint32 - maximum number of procs/node for this app
#define ORTE_APP_PREFIX_DIR 15 // string - prefix directory for this app, if override necessary #define ORTE_APP_PREFIX_DIR 15 // string - prefix directory for this app, if override necessary
#define ORTE_APP_NO_CACHEDIR 16 // bool - flag that a cache dir is not to be specified for a Singularity container
#define ORTE_APP_MAX_KEY 100 #define ORTE_APP_MAX_KEY 100