Per discussion with Josh, use the --preload-xxx cmd line options to broadcast files to all nodes. Add --set-cwd-to-session-dir option to start procs in their session directories. Add OMPI_FILE_LOCATION envar to tell procs where their prepositioned files went.
This commit was SVN r27125.
This commit is contained in:
parent
81cd3e77cf
commit
b4a544ad2a
@ -112,7 +112,7 @@ int orte_filem_base_none_rm( orte_filem_base_request_t *request);
|
||||
int orte_filem_base_none_rm_nb( orte_filem_base_request_t *request);
|
||||
int orte_filem_base_none_wait( orte_filem_base_request_t *request);
|
||||
int orte_filem_base_none_wait_all( opal_list_t *request_list);
|
||||
int orte_filem_base_none_preposition_files(opal_list_t *file_set,
|
||||
int orte_filem_base_none_preposition_files(orte_job_t *jdata,
|
||||
orte_filem_completion_cbfunc_t cbfunc,
|
||||
void *cbdata);
|
||||
int orte_filem_base_none_link_local_files(orte_job_t *jdata);
|
||||
|
@ -222,7 +222,7 @@ int orte_filem_base_none_wait_all(opal_list_t *request_list)
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
int orte_filem_base_none_preposition_files(opal_list_t *file_set,
|
||||
int orte_filem_base_none_preposition_files(orte_job_t *jdata,
|
||||
orte_filem_completion_cbfunc_t cbfunc,
|
||||
void *cbdata)
|
||||
{
|
||||
|
@ -330,7 +330,7 @@ typedef void (*orte_filem_completion_cbfunc_t)(int status, void *cbdata);
|
||||
|
||||
/* Pre-position files
|
||||
*/
|
||||
typedef int (*orte_filem_base_preposition_files_fn_t)(opal_list_t *file_set,
|
||||
typedef int (*orte_filem_base_preposition_files_fn_t)(orte_job_t *jdata,
|
||||
orte_filem_completion_cbfunc_t cbfunc,
|
||||
void *cbdata);
|
||||
|
||||
|
@ -23,7 +23,6 @@ BEGIN_C_DECLS
|
||||
|
||||
ORTE_MODULE_DECLSPEC extern orte_filem_base_component_t mca_filem_raw_component;
|
||||
ORTE_DECLSPEC extern orte_filem_base_module_t mca_filem_raw_module;
|
||||
ORTE_DECLSPEC extern char *orte_filem_raw_files;
|
||||
|
||||
#define ORTE_FILEM_RAW_CHUNK_MAX 16384
|
||||
|
||||
|
@ -24,8 +24,6 @@
|
||||
const char *orte_filem_raw_component_version_string =
|
||||
"ORTE FILEM raw MCA component version " ORTE_VERSION;
|
||||
|
||||
char *orte_filem_raw_files = NULL;
|
||||
|
||||
/*
|
||||
* Local functionality
|
||||
*/
|
||||
@ -55,12 +53,6 @@ orte_filem_base_component_t mca_filem_raw_component = {
|
||||
|
||||
static int filem_raw_open(void)
|
||||
{
|
||||
mca_base_component_t *c = &mca_filem_raw_component.base_version;
|
||||
|
||||
mca_base_param_reg_string(c, "files",
|
||||
"Comma-separated list of files to preposition",
|
||||
false, false, NULL, &orte_filem_raw_files);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -66,7 +66,7 @@ static int raw_rm(orte_filem_base_request_t *req);
|
||||
static int raw_rm_nb(orte_filem_base_request_t *req);
|
||||
static int raw_wait(orte_filem_base_request_t *req);
|
||||
static int raw_wait_all(opal_list_t *reqs);
|
||||
static int raw_preposition_files(opal_list_t *file_set,
|
||||
static int raw_preposition_files(orte_job_t *jdata,
|
||||
orte_filem_completion_cbfunc_t cbfunc,
|
||||
void *cbdata);
|
||||
static int raw_link_local_files(orte_job_t *jdata);
|
||||
@ -278,71 +278,88 @@ static void recv_ack(int status, orte_process_name_t* sender,
|
||||
}
|
||||
}
|
||||
|
||||
static int raw_preposition_files(opal_list_t *fset,
|
||||
static int raw_preposition_files(orte_job_t *jdata,
|
||||
orte_filem_completion_cbfunc_t cbfunc,
|
||||
void *cbdata)
|
||||
{
|
||||
#ifdef __WINDOWS__
|
||||
return ORTE_ERR_NOT_SUPPORTED;
|
||||
#else
|
||||
orte_app_context_t *app;
|
||||
opal_list_item_t *item;
|
||||
orte_filem_base_file_set_t *fs;
|
||||
int fd, rc=ORTE_SUCCESS;
|
||||
orte_filem_raw_xfer_t *xfer;
|
||||
int flags, i;
|
||||
char **files=NULL;
|
||||
opal_list_t *file_set;
|
||||
orte_filem_raw_outbound_t *outbound;
|
||||
char *cptr;
|
||||
opal_list_t fsets;
|
||||
|
||||
if (NULL == fset) {
|
||||
/* see if any were provided via MCA param */
|
||||
if (NULL == orte_filem_raw_files) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_filem_base_output,
|
||||
"%s filem:raw: no files to position",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
/* just fire the callback */
|
||||
if (NULL != cbfunc) {
|
||||
cbfunc(ORTE_SUCCESS, cbdata);
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
/* cycle across the app_contexts looking for files or
|
||||
* binaries to be prepositioned
|
||||
*/
|
||||
OBJ_CONSTRUCT(&fsets, opal_list_t);
|
||||
for (i=0; i < jdata->apps->size; i++) {
|
||||
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
|
||||
continue;
|
||||
}
|
||||
/* otherwise, use the provided files */
|
||||
files = opal_argv_split(orte_filem_raw_files, ',');
|
||||
file_set = OBJ_NEW(opal_list_t);
|
||||
for (i=0; NULL != files[i]; i++) {
|
||||
if (app->preload_binary) {
|
||||
/* add the executable to our list */
|
||||
fs = OBJ_NEW(orte_filem_base_file_set_t);
|
||||
fs->local_target = strdup(files[i]);
|
||||
/* check any suffix for file type */
|
||||
if (NULL != (cptr = strchr(files[i], '.'))) {
|
||||
if (0 == strncmp(cptr, ".tar", 4)) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_filem_base_output,
|
||||
"%s filem:raw: marking file %s as TAR",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
files[i]));
|
||||
fs->target_flag = ORTE_FILEM_TYPE_TAR;
|
||||
} else if (0 == strncmp(cptr, ".bz", 3)) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_filem_base_output,
|
||||
"%s filem:raw: marking file %s as BZIP",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
files[i]));
|
||||
fs->target_flag = ORTE_FILEM_TYPE_BZIP;
|
||||
} else if (0 == strncmp(cptr, ".gz", 3)) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_filem_base_output,
|
||||
"%s filem:raw: marking file %s as GZIP",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
files[i]));
|
||||
fs->target_flag = ORTE_FILEM_TYPE_GZIP;
|
||||
fs->local_target = strdup(app->app);
|
||||
fs->target_flag = ORTE_FILEM_TYPE_FILE;
|
||||
opal_list_append(&fsets, &fs->super);
|
||||
/* if we are preloading the binary, then the app must be in relative
|
||||
* syntax or we won't find it - the binary will be positioned in the
|
||||
* session dir
|
||||
*/
|
||||
if (opal_path_is_absolute(app->app)) {
|
||||
cptr = opal_basename(app->app);
|
||||
free(app->app);
|
||||
app->app = cptr;
|
||||
free(app->argv[0]);
|
||||
app->argv[0] = strdup(cptr);
|
||||
}
|
||||
}
|
||||
if (NULL != app->preload_files) {
|
||||
files = opal_argv_split(app->preload_files, ',');
|
||||
for (i=0; NULL != files[i]; i++) {
|
||||
fs = OBJ_NEW(orte_filem_base_file_set_t);
|
||||
fs->local_target = strdup(files[i]);
|
||||
/* check any suffix for file type */
|
||||
if (NULL != (cptr = strchr(files[i], '.'))) {
|
||||
if (0 == strncmp(cptr, ".tar", 4)) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_filem_base_output,
|
||||
"%s filem:raw: marking file %s as TAR",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
files[i]));
|
||||
fs->target_flag = ORTE_FILEM_TYPE_TAR;
|
||||
} else if (0 == strncmp(cptr, ".bz", 3)) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_filem_base_output,
|
||||
"%s filem:raw: marking file %s as BZIP",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
files[i]));
|
||||
fs->target_flag = ORTE_FILEM_TYPE_BZIP;
|
||||
} else if (0 == strncmp(cptr, ".gz", 3)) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_filem_base_output,
|
||||
"%s filem:raw: marking file %s as GZIP",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
files[i]));
|
||||
fs->target_flag = ORTE_FILEM_TYPE_GZIP;
|
||||
} else {
|
||||
fs->target_flag = ORTE_FILEM_TYPE_FILE;
|
||||
}
|
||||
} else {
|
||||
fs->target_flag = ORTE_FILEM_TYPE_FILE;
|
||||
}
|
||||
} else {
|
||||
fs->target_flag = ORTE_FILEM_TYPE_FILE;
|
||||
if (NULL != app->preload_files_dest_dir) {
|
||||
fs->remote_target = opal_os_path(false, app->preload_files_dest_dir, files[i], NULL);
|
||||
}
|
||||
opal_list_append(&fsets, &fs->super);
|
||||
}
|
||||
opal_list_append(file_set, &fs->super);
|
||||
opal_argv_free(files);
|
||||
}
|
||||
} else {
|
||||
file_set = fset;
|
||||
}
|
||||
|
||||
/* track the outbound file sets */
|
||||
@ -355,14 +372,13 @@ static int raw_preposition_files(opal_list_t *fset,
|
||||
* fileset and initiate xcast transfer of each file to every
|
||||
* daemon
|
||||
*/
|
||||
for (item = opal_list_get_first(file_set);
|
||||
item != opal_list_get_end(file_set);
|
||||
item = opal_list_get_next(item)) {
|
||||
while (NULL != (item = opal_list_remove_first(&fsets))) {
|
||||
fs = (orte_filem_base_file_set_t*)item;
|
||||
/* attempt to open the specified file */
|
||||
if (0 >= (fd = open(fs->local_target, O_RDONLY))) {
|
||||
opal_output(0, "%s CANNOT ACCESS FILE %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), fs->local_target);
|
||||
OBJ_RELEASE(item);
|
||||
rc = ORTE_ERROR;
|
||||
continue;
|
||||
}
|
||||
@ -389,7 +405,9 @@ static int raw_preposition_files(opal_list_t *fset,
|
||||
opal_event_set_priority(&xfer->ev, ORTE_MSG_PRI);
|
||||
opal_event_add(&xfer->ev, 0);
|
||||
xfer->pending = true;
|
||||
OBJ_RELEASE(item);
|
||||
}
|
||||
OBJ_DESTRUCT(&fsets);
|
||||
|
||||
return rc;
|
||||
#endif
|
||||
@ -798,8 +816,18 @@ static void recv_files(int status, orte_process_name_t* sender,
|
||||
incoming->type = type;
|
||||
/* define the full filename to point to the absolute location */
|
||||
if (NULL == target) {
|
||||
/* if it starts with "./", then we need to remove
|
||||
* that prefix
|
||||
*/
|
||||
if (0 == strncmp(file, "./", 2) ||
|
||||
0 == strncmp(file, "../", 3)) {
|
||||
cptr = strchr(file, '/');
|
||||
++cptr; /* step over the '/' */
|
||||
tmp = strdup(cptr);
|
||||
} else {
|
||||
tmp = strdup(file);
|
||||
}
|
||||
/* separate out the top-level directory of the target */
|
||||
tmp = strdup(file);
|
||||
if (NULL != (cptr = strchr(tmp, '/'))) {
|
||||
*cptr = '\0';
|
||||
}
|
||||
@ -814,8 +842,18 @@ static void recv_files(int status, orte_process_name_t* sender,
|
||||
incoming->top = strdup(target);
|
||||
incoming->fullpath = strdup(target);
|
||||
} else {
|
||||
/* if it starts with "./", then we need to remove
|
||||
* that prefix
|
||||
*/
|
||||
if (0 == strncmp(target, "./", 2) ||
|
||||
0 == strncmp(target, "../", 3)) {
|
||||
cptr = strchr(target, '/');
|
||||
++cptr; /* step over the '/' */
|
||||
tmp = strdup(cptr);
|
||||
} else {
|
||||
tmp = strdup(target);
|
||||
}
|
||||
/* separate out the top-level directory of the target */
|
||||
tmp = strdup(target);
|
||||
if (NULL != (cptr = strchr(tmp, '/'))) {
|
||||
*cptr = '\0';
|
||||
}
|
||||
|
@ -198,7 +198,7 @@ void orte_filem_rsh_work_pool_destruct( orte_filem_rsh_work_pool_item_t *obj) {
|
||||
}
|
||||
|
||||
/* placeholders */
|
||||
static int rsh_preposition_files(opal_list_t *file_set,
|
||||
static int rsh_preposition_files(orte_job_t *jdata,
|
||||
orte_filem_completion_cbfunc_t cbfunc,
|
||||
void *cbdata)
|
||||
{
|
||||
|
@ -44,6 +44,7 @@
|
||||
|
||||
#include "opal/util/opal_environ.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/os_dirpath.h"
|
||||
#include "opal/util/os_path.h"
|
||||
#include "opal/util/path.h"
|
||||
#include "opal/util/sys_limits.h"
|
||||
@ -808,11 +809,16 @@ static int odls_base_default_setup_fork(orte_app_context_t *context,
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int setup_child(orte_proc_t *child, orte_job_t *jobdat, char ***env)
|
||||
static int setup_child(orte_proc_t *child,
|
||||
orte_job_t *jobdat,
|
||||
orte_app_context_t *app)
|
||||
{
|
||||
char *param, *value;
|
||||
char *param, *value, ***env;
|
||||
int rc;
|
||||
|
||||
|
||||
/* for convenience */
|
||||
env = &app->env;
|
||||
|
||||
/* setup the jobid */
|
||||
if (ORTE_SUCCESS != (rc = orte_util_convert_jobid_to_string(&value, child->name.jobid))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -923,6 +929,57 @@ static int setup_child(orte_proc_t *child, orte_job_t *jobdat, char ***env)
|
||||
child->iof_complete = true;
|
||||
}
|
||||
|
||||
/* construct the proc's session dir name */
|
||||
if (NULL != orte_process_info.tmpdir_base) {
|
||||
value = strdup(orte_process_info.tmpdir_base);
|
||||
} else {
|
||||
value = NULL;
|
||||
}
|
||||
param = NULL;
|
||||
if (ORTE_SUCCESS != (rc = orte_session_dir_get_name(¶m, &value, NULL,
|
||||
orte_process_info.nodename,
|
||||
NULL, &child->name))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
free(value);
|
||||
/* pass an envar so the proc can find any files it had prepositioned */
|
||||
opal_setenv("OMPI_FILE_LOCATION", param, true, env);
|
||||
|
||||
/* if the user wanted the cwd to be the proc's session dir, then
|
||||
* switch to that location now
|
||||
*/
|
||||
if (app->set_cwd_to_session_dir) {
|
||||
/* create the session dir - we know it doesn't
|
||||
* already exist!
|
||||
*/
|
||||
if (OPAL_SUCCESS != (rc = opal_os_dirpath_create(param, S_IRWXU))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
/* doesn't exist with correct permissions, and/or we can't
|
||||
* create it - either way, we are done
|
||||
*/
|
||||
free(param);
|
||||
return rc;
|
||||
}
|
||||
/* change to it */
|
||||
if (0 != chdir(param)) {
|
||||
free(param);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
/* It seems that chdir doesn't
|
||||
* adjust the $PWD enviro variable when it changes the directory. This
|
||||
* can cause a user to get a different response when doing getcwd vs
|
||||
* looking at the enviro variable. To keep this consistent, we explicitly
|
||||
* ensure that the PWD enviro variable matches the CWD we moved to.
|
||||
*
|
||||
* NOTE: if a user's program does a chdir(), then $PWD will once
|
||||
* again not match getcwd! This is beyond our control - we are only
|
||||
* ensuring they start out matching.
|
||||
*/
|
||||
opal_setenv("PWD", param, true, env);
|
||||
}
|
||||
free(param);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -934,31 +991,33 @@ static int setup_path(orte_app_context_t *app)
|
||||
char *pathenv = NULL, *mpiexec_pathenv = NULL;
|
||||
char *full_search;
|
||||
|
||||
/* Try to change to the app's cwd and check that the app
|
||||
exists and is executable The function will
|
||||
take care of outputting a pretty error message, if required
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = orte_util_check_context_cwd(app, true))) {
|
||||
/* do not ERROR_LOG - it will be reported elsewhere */
|
||||
goto CLEANUP;
|
||||
if (!app->set_cwd_to_session_dir) {
|
||||
/* Try to change to the app's cwd and check that the app
|
||||
exists and is executable The function will
|
||||
take care of outputting a pretty error message, if required
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = orte_util_check_context_cwd(app, true))) {
|
||||
/* do not ERROR_LOG - it will be reported elsewhere */
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
/* The prior function will have done a chdir() to jump us to
|
||||
* wherever the app is to be executed. This could be either where
|
||||
* the user specified (via -wdir), or to the user's home directory
|
||||
* on this node if nothing was provided. It seems that chdir doesn't
|
||||
* adjust the $PWD enviro variable when it changes the directory. This
|
||||
* can cause a user to get a different response when doing getcwd vs
|
||||
* looking at the enviro variable. To keep this consistent, we explicitly
|
||||
* ensure that the PWD enviro variable matches the CWD we moved to.
|
||||
*
|
||||
* NOTE: if a user's program does a chdir(), then $PWD will once
|
||||
* again not match getcwd! This is beyond our control - we are only
|
||||
* ensuring they start out matching.
|
||||
*/
|
||||
getcwd(dir, sizeof(dir));
|
||||
opal_setenv("PWD", dir, true, &app->env);
|
||||
}
|
||||
|
||||
/* The prior function will have done a chdir() to jump us to
|
||||
* wherever the app is to be executed. This could be either where
|
||||
* the user specified (via -wdir), or to the user's home directory
|
||||
* on this node if nothing was provided. It seems that chdir doesn't
|
||||
* adjust the $PWD enviro variable when it changes the directory. This
|
||||
* can cause a user to get a different response when doing getcwd vs
|
||||
* looking at the enviro variable. To keep this consistent, we explicitly
|
||||
* ensure that the PWD enviro variable matches the CWD we moved to.
|
||||
*
|
||||
* NOTE: if a user's program does a chdir(), then $PWD will once
|
||||
* again not match getcwd! This is beyond our control - we are only
|
||||
* ensuring they start out matching.
|
||||
*/
|
||||
getcwd(dir, sizeof(dir));
|
||||
opal_setenv("PWD", dir, true, &app->env);
|
||||
|
||||
|
||||
/* Search for the OMPI_exec_path and PATH settings in the environment. */
|
||||
for (argvptr = app->env; *argvptr != NULL; argvptr++) {
|
||||
if (0 == strncmp("OMPI_exec_path=", *argvptr, 15)) {
|
||||
@ -1111,23 +1170,13 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata)
|
||||
*/
|
||||
asprintf(&num_app_ctx, "%lu", (unsigned long)jobdat->num_apps);
|
||||
|
||||
/* Now we preload any files that are needed. This is done on a per
|
||||
* app context basis, so let's take the opportunity to build
|
||||
* some common envars we need to pass for MPI-3 compatibility
|
||||
*/
|
||||
/* build some common envars we need to pass for MPI-3 compatibility */
|
||||
nps = NULL;
|
||||
firstranks = NULL;
|
||||
for (j=0; j < jobdat->apps->size; j++) {
|
||||
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jobdat->apps, j))) {
|
||||
continue;
|
||||
}
|
||||
if(app->used_on_node &&
|
||||
(app->preload_binary || NULL != app->preload_files)) {
|
||||
if( ORTE_SUCCESS != (rc = orte_odls_base_preload_files_app_context(app)) ) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
/* JJH: Do not fail here, instead try to execute without the preloaded options*/
|
||||
}
|
||||
}
|
||||
opal_argv_append_nosize(&nps, ORTE_VPID_PRINT(app->num_procs));
|
||||
opal_argv_append_nosize(&firstranks, ORTE_VPID_PRINT(app->first_rank));
|
||||
}
|
||||
@ -1423,7 +1472,7 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata)
|
||||
/* setup the rest of the environment with the proc-specific items - these
|
||||
* will be overwritten for each child
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = setup_child(child, jobdat, &app->env))) {
|
||||
if (ORTE_SUCCESS != (rc = setup_child(child, jobdat, app))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH);
|
||||
continue;
|
||||
@ -2315,7 +2364,7 @@ int orte_odls_base_default_restart_proc(orte_proc_t *child,
|
||||
app = (orte_app_context_t*)opal_pointer_array_get_item(jobdat->apps, child->app_idx);
|
||||
|
||||
/* reset envars to match this child */
|
||||
if (ORTE_SUCCESS != (rc = setup_child(child, jobdat, &app->env))) {
|
||||
if (ORTE_SUCCESS != (rc = setup_child(child, jobdat, app))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
@ -160,11 +160,8 @@ void orte_plm_base_vm_ready(int fd, short args, void *cbdata)
|
||||
/* progress the job */
|
||||
caddy->jdata->state = ORTE_JOB_STATE_VM_READY;
|
||||
|
||||
/* position any required files - these would have been
|
||||
* specified via MCA parameter, so we don't have to
|
||||
* pass them here
|
||||
*/
|
||||
if (ORTE_SUCCESS != orte_filem.preposition_files(NULL, files_ready, caddy->jdata)) {
|
||||
/* position any required files */
|
||||
if (ORTE_SUCCESS != orte_filem.preposition_files(caddy->jdata, files_ready, caddy->jdata)) {
|
||||
ORTE_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
|
||||
}
|
||||
|
||||
|
@ -9,8 +9,8 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC.
|
||||
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -581,6 +581,18 @@ int orte_dt_pack_app_context(opal_buffer_t *buffer, const void *src,
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* pack the use-session-dir cwd flag */
|
||||
if (app_context[i]->set_cwd_to_session_dir) {
|
||||
user_specified = 1;
|
||||
} else {
|
||||
user_specified = 0;
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer,
|
||||
(void*)(&user_specified), 1, OPAL_INT8))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* pack the hostfile name */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer,
|
||||
(void*)(&(app_context[i]->hostfile)), 1, OPAL_STRING))) {
|
||||
|
@ -11,7 +11,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC.
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -559,8 +559,8 @@ int orte_dt_print_app_context(char **output, char *prefix, orte_app_context_t *s
|
||||
tmp = tmp2;
|
||||
}
|
||||
|
||||
asprintf(&tmp2, "%s\n%s\tWorking dir: %s (user: %d)\n%s\tPrefix: %s\n%s\tHostfile: %s\tAdd-Hostfile: %s", tmp,
|
||||
pfx2, (NULL == src->cwd) ? "NULL" : src->cwd, (int) src->user_specified_cwd,
|
||||
asprintf(&tmp2, "%s\n%s\tWorking dir: %s (user: %d session-dir: %d)\n%s\tPrefix: %s\n%s\tHostfile: %s\tAdd-Hostfile: %s", tmp,
|
||||
pfx2, (NULL == src->cwd) ? "NULL" : src->cwd, (int) src->user_specified_cwd, (int) src->set_cwd_to_session_dir,
|
||||
pfx2, (NULL == src->prefix_dir) ? "NULL" : src->prefix_dir,
|
||||
pfx2, (NULL == src->hostfile) ? "NULL" : src->hostfile,
|
||||
(NULL == src->add_hostfile) ? "NULL" : src->add_hostfile);
|
||||
|
@ -639,6 +639,7 @@ int orte_dt_unpack_app_context(opal_buffer_t *buffer, void *dest,
|
||||
}
|
||||
|
||||
/* unpack the user-specified cwd flag */
|
||||
max_n=1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &user_specified,
|
||||
&max_n, OPAL_INT8))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -650,6 +651,19 @@ int orte_dt_unpack_app_context(opal_buffer_t *buffer, void *dest,
|
||||
app_context[i]->user_specified_cwd = false;
|
||||
}
|
||||
|
||||
/* unpack the use-session-dir cwd flag */
|
||||
max_n=1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &user_specified,
|
||||
&max_n, OPAL_INT8))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
if (user_specified) {
|
||||
app_context[i]->set_cwd_to_session_dir = true;
|
||||
} else {
|
||||
app_context[i]->set_cwd_to_session_dir = false;
|
||||
}
|
||||
|
||||
/* unpack the hostfile name */
|
||||
max_n = 1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &(app_context[i]->hostfile),
|
||||
|
@ -175,9 +175,6 @@ int orte_stat_history_size;
|
||||
/* envars to forward */
|
||||
char *orte_forward_envars = NULL;
|
||||
|
||||
/* preload binaries */
|
||||
bool orte_preload_binaries = false;
|
||||
|
||||
/* map-reduce mode */
|
||||
bool orte_map_reduce = false;
|
||||
|
||||
@ -576,6 +573,7 @@ static void orte_app_context_construct(orte_app_context_t* app_context)
|
||||
app_context->env=NULL;
|
||||
app_context->cwd=NULL;
|
||||
app_context->user_specified_cwd=false;
|
||||
app_context->set_cwd_to_session_dir = false;
|
||||
app_context->hostfile=NULL;
|
||||
app_context->add_hostfile=NULL;
|
||||
app_context->add_host = NULL;
|
||||
@ -1035,5 +1033,4 @@ OBJ_CLASS_INSTANCE(orte_job_map_t,
|
||||
opal_object_t,
|
||||
orte_job_map_construct,
|
||||
orte_job_map_destruct);
|
||||
|
||||
#endif
|
||||
|
@ -255,6 +255,8 @@ typedef struct {
|
||||
char *cwd;
|
||||
/** Whether the cwd was set by the user or by the system */
|
||||
bool user_specified_cwd;
|
||||
/** Whether to set the current working directory to the proc session dir */
|
||||
bool set_cwd_to_session_dir;
|
||||
/* Any hostfile that was specified */
|
||||
char *hostfile;
|
||||
/* Hostfile for adding hosts to an existing allocation */
|
||||
@ -676,9 +678,6 @@ ORTE_DECLSPEC extern int orte_stat_history_size;
|
||||
/* envars to forward */
|
||||
ORTE_DECLSPEC extern char *orte_forward_envars;
|
||||
|
||||
/* preload binaries */
|
||||
ORTE_DECLSPEC extern bool orte_preload_binaries;
|
||||
|
||||
/* map-reduce mode */
|
||||
ORTE_DECLSPEC extern bool orte_map_reduce;
|
||||
|
||||
|
@ -11,6 +11,8 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -538,11 +540,6 @@ int orte_register_params(void)
|
||||
false, false, NULL, &orte_forward_envars);
|
||||
|
||||
|
||||
mca_base_param_reg_int_name("orte", "preload_binaries",
|
||||
"Preload the binaries on remote machines before starting remote proceses",
|
||||
false, false, (int)false, &value);
|
||||
orte_preload_binaries = OPAL_INT_TO_BOOL(value);
|
||||
|
||||
mca_base_param_reg_int_name("orte", "max_vm_size",
|
||||
"Maximum size of virtual machine - used to subdivide allocation",
|
||||
false, false, -1, &orte_max_vm_size);
|
||||
|
@ -142,6 +142,8 @@ struct mapreduce_globals_t {
|
||||
bool combiner;
|
||||
bool single_job;
|
||||
orte_job_t *combiner_job;
|
||||
bool preload_binaries;
|
||||
bool set_cwd_to_session_dir;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -246,8 +248,8 @@ static opal_cmd_line_init_t cmd_line_init[] = {
|
||||
"Command used to start processes on remote nodes (default: orted)" },
|
||||
|
||||
/* Preload the binary on the remote machine */
|
||||
{ "orte", "preload", "binaries", 's', NULL, "preload-binary", 0,
|
||||
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
{ NULL, NULL, NULL, 's', NULL, "preload-binary", 0,
|
||||
&mapreduce_globals.preload_binaries, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
"Preload the binary on the remote machine before starting the remote process." },
|
||||
|
||||
/* Preload files on the remote machine */
|
||||
@ -404,6 +406,9 @@ static opal_cmd_line_init_t cmd_line_init[] = {
|
||||
{ NULL, NULL, NULL, '\0', "wd", "wd", 1,
|
||||
&mapreduce_globals.wdir, OPAL_CMD_LINE_TYPE_STRING,
|
||||
"Synonym for --wdir" },
|
||||
{ NULL, NULL, NULL, '\0', "set-cwd-to-session-dir", "set-cwd-to-session-dir", 0,
|
||||
&mapreduce_globals.set_cwd_to_session_dir, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
"Set the working directory of the started processes to their session directory" },
|
||||
{ NULL, NULL, NULL, '\0', "path", "path", 1,
|
||||
&mapreduce_globals.path, OPAL_CMD_LINE_TYPE_STRING,
|
||||
"PATH to be used to look for executables to start processes" },
|
||||
@ -946,13 +951,14 @@ static int init_globals(void)
|
||||
free( mapreduce_globals.path );
|
||||
mapreduce_globals.path = NULL;
|
||||
|
||||
mapreduce_globals.preload_binaries = false;
|
||||
mapreduce_globals.preload_files = NULL;
|
||||
mapreduce_globals.preload_files_dest_dir = NULL;
|
||||
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
mapreduce_globals.sstore_load = NULL;
|
||||
#endif
|
||||
|
||||
mapreduce_globals.set_cwd_to_session_dir = false;
|
||||
mapreduce_globals.mapper = false;
|
||||
mapreduce_globals.reducer = false;
|
||||
mapreduce_globals.combiner = false;
|
||||
@ -1630,7 +1636,10 @@ static int create_app(int argc, char* argv[],
|
||||
app->cwd = opal_os_path(false, cwd, mapreduce_globals.wdir, NULL);
|
||||
}
|
||||
app->user_specified_cwd = true;
|
||||
} else {
|
||||
} else if (mapreduce_globals.set_cwd_to_session_dir) {
|
||||
app->set_cwd_to_session_dir = true;
|
||||
app->user_specified_cwd = true;
|
||||
} else {
|
||||
if (OPAL_SUCCESS != (rc = opal_getcwd(cwd, sizeof(cwd)))) {
|
||||
orte_show_help("help-orterun.txt", "orterun:init-failure",
|
||||
true, "get the cwd", rc);
|
||||
@ -1743,7 +1752,14 @@ static int create_app(int argc, char* argv[],
|
||||
total_num_apps++;
|
||||
|
||||
/* Preserve if we are to preload the binary */
|
||||
app->preload_binary = orte_preload_binaries;
|
||||
app->preload_binary = mapreduce_globals.preload_binaries;
|
||||
/* if we were told to cwd to the session dir and the app was given in
|
||||
* relative syntax, then we need to preload the binary to
|
||||
* find the app
|
||||
*/
|
||||
if (app->set_cwd_to_session_dir && !opal_path_is_absolute(app->app)) {
|
||||
app->preload_binary = true;
|
||||
}
|
||||
if( NULL != mapreduce_globals.preload_files)
|
||||
app->preload_files = strdup(mapreduce_globals.preload_files);
|
||||
else
|
||||
|
@ -221,8 +221,8 @@ static opal_cmd_line_init_t cmd_line_init[] = {
|
||||
"Command used to start processes on remote nodes (default: orted)" },
|
||||
|
||||
/* Preload the binary on the remote machine */
|
||||
{ "orte", "preload", "binaries", 's', NULL, "preload-binary", 0,
|
||||
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
{ NULL, NULL, NULL, 's', NULL, "preload-binary", 0,
|
||||
&orterun_globals.preload_binaries, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
"Preload the binary on the remote machine before starting the remote process." },
|
||||
|
||||
/* Preload files on the remote machine */
|
||||
@ -437,6 +437,9 @@ static opal_cmd_line_init_t cmd_line_init[] = {
|
||||
{ NULL, NULL, NULL, '\0', "wd", "wd", 1,
|
||||
&orterun_globals.wdir, OPAL_CMD_LINE_TYPE_STRING,
|
||||
"Synonym for --wdir" },
|
||||
{ NULL, NULL, NULL, '\0', "set-cwd-to-session-dir", "set-cwd-to-session-dir", 0,
|
||||
&orterun_globals.set_cwd_to_session_dir, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
"Set the working directory of the started processes to their session directory" },
|
||||
{ NULL, NULL, NULL, '\0', "path", "path", 1,
|
||||
&orterun_globals.path, OPAL_CMD_LINE_TYPE_STRING,
|
||||
"PATH to be used to look for executables to start processes" },
|
||||
@ -983,11 +986,13 @@ static int init_globals(void)
|
||||
orterun_globals.appfile = NULL;
|
||||
if( NULL != orterun_globals.wdir )
|
||||
free( orterun_globals.wdir );
|
||||
orterun_globals.set_cwd_to_session_dir = false;
|
||||
orterun_globals.wdir = NULL;
|
||||
if( NULL != orterun_globals.path )
|
||||
free( orterun_globals.path );
|
||||
orterun_globals.path = NULL;
|
||||
|
||||
orterun_globals.preload_binaries = false;
|
||||
orterun_globals.preload_files = NULL;
|
||||
orterun_globals.preload_files_dest_dir = NULL;
|
||||
|
||||
@ -1636,6 +1641,9 @@ static int create_app(int argc, char* argv[],
|
||||
app->cwd = opal_os_path(false, cwd, orterun_globals.wdir, NULL);
|
||||
}
|
||||
app->user_specified_cwd = true;
|
||||
} else if (orterun_globals.set_cwd_to_session_dir) {
|
||||
app->set_cwd_to_session_dir = true;
|
||||
app->user_specified_cwd = true;
|
||||
} else {
|
||||
if (OPAL_SUCCESS != (rc = opal_getcwd(cwd, sizeof(cwd)))) {
|
||||
orte_show_help("help-orterun.txt", "orterun:init-failure",
|
||||
@ -1750,16 +1758,25 @@ static int create_app(int argc, char* argv[],
|
||||
|
||||
total_num_apps++;
|
||||
|
||||
/* Preserve if we are to preload the binary */
|
||||
app->preload_binary = orte_preload_binaries;
|
||||
if( NULL != orterun_globals.preload_files)
|
||||
/* Capture any preload flags */
|
||||
app->preload_binary = orterun_globals.preload_binaries;
|
||||
/* if we were told to cwd to the session dir and the app was given in
|
||||
* relative syntax, then we need to preload the binary to
|
||||
* find the app
|
||||
*/
|
||||
if (app->set_cwd_to_session_dir && !opal_path_is_absolute(app->argv[0])) {
|
||||
app->preload_binary = true;
|
||||
}
|
||||
if (NULL != orterun_globals.preload_files) {
|
||||
app->preload_files = strdup(orterun_globals.preload_files);
|
||||
else
|
||||
} else {
|
||||
app->preload_files = NULL;
|
||||
if( NULL != orterun_globals.preload_files_dest_dir)
|
||||
}
|
||||
if (NULL != orterun_globals.preload_files_dest_dir) {
|
||||
app->preload_files_dest_dir = strdup(orterun_globals.preload_files_dest_dir);
|
||||
else
|
||||
} else {
|
||||
app->preload_files_dest_dir = NULL;
|
||||
}
|
||||
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
if( NULL != orterun_globals.sstore_load ) {
|
||||
|
@ -10,6 +10,8 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -45,6 +47,7 @@ struct orterun_globals_t {
|
||||
char *env_val;
|
||||
char *appfile;
|
||||
char *wdir;
|
||||
bool set_cwd_to_session_dir;
|
||||
char *path;
|
||||
char *preload_files;
|
||||
char *preload_files_dest_dir;
|
||||
@ -60,6 +63,7 @@ struct orterun_globals_t {
|
||||
char *sstore_load;
|
||||
#endif
|
||||
bool disable_recovery;
|
||||
bool preload_binaries;
|
||||
};
|
||||
|
||||
/**
|
||||
|
Loading…
x
Reference in New Issue
Block a user