1
1

Per discussion with Josh, use the --preload-xxx cmd line options to broadcast files to all nodes. Add --set-cwd-to-session-dir option to start procs in their session directories. Add OMPI_FILE_LOCATION envar to tell procs where their prepositioned files went.

This commit was SVN r27125.
This commit is contained in:
Ralph Castain 2012-08-23 21:28:05 +00:00
parent 81cd3e77cf
commit b4a544ad2a
18 changed files with 268 additions and 137 deletions

View File

@ -112,7 +112,7 @@ int orte_filem_base_none_rm( orte_filem_base_request_t *request);
int orte_filem_base_none_rm_nb( orte_filem_base_request_t *request);
int orte_filem_base_none_wait( orte_filem_base_request_t *request);
int orte_filem_base_none_wait_all( opal_list_t *request_list);
int orte_filem_base_none_preposition_files(opal_list_t *file_set,
int orte_filem_base_none_preposition_files(orte_job_t *jdata,
orte_filem_completion_cbfunc_t cbfunc,
void *cbdata);
int orte_filem_base_none_link_local_files(orte_job_t *jdata);

View File

@ -222,7 +222,7 @@ int orte_filem_base_none_wait_all(opal_list_t *request_list)
return ORTE_SUCCESS;
}
int orte_filem_base_none_preposition_files(opal_list_t *file_set,
int orte_filem_base_none_preposition_files(orte_job_t *jdata,
orte_filem_completion_cbfunc_t cbfunc,
void *cbdata)
{

View File

@ -330,7 +330,7 @@ typedef void (*orte_filem_completion_cbfunc_t)(int status, void *cbdata);
/* Pre-position files
*/
typedef int (*orte_filem_base_preposition_files_fn_t)(opal_list_t *file_set,
typedef int (*orte_filem_base_preposition_files_fn_t)(orte_job_t *jdata,
orte_filem_completion_cbfunc_t cbfunc,
void *cbdata);

View File

@ -23,7 +23,6 @@ BEGIN_C_DECLS
ORTE_MODULE_DECLSPEC extern orte_filem_base_component_t mca_filem_raw_component;
ORTE_DECLSPEC extern orte_filem_base_module_t mca_filem_raw_module;
ORTE_DECLSPEC extern char *orte_filem_raw_files;
#define ORTE_FILEM_RAW_CHUNK_MAX 16384

View File

@ -24,8 +24,6 @@
const char *orte_filem_raw_component_version_string =
"ORTE FILEM raw MCA component version " ORTE_VERSION;
char *orte_filem_raw_files = NULL;
/*
* Local functionality
*/
@ -55,12 +53,6 @@ orte_filem_base_component_t mca_filem_raw_component = {
static int filem_raw_open(void)
{
mca_base_component_t *c = &mca_filem_raw_component.base_version;
mca_base_param_reg_string(c, "files",
"Comma-separated list of files to preposition",
false, false, NULL, &orte_filem_raw_files);
return ORTE_SUCCESS;
}

View File

@ -66,7 +66,7 @@ static int raw_rm(orte_filem_base_request_t *req);
static int raw_rm_nb(orte_filem_base_request_t *req);
static int raw_wait(orte_filem_base_request_t *req);
static int raw_wait_all(opal_list_t *reqs);
static int raw_preposition_files(opal_list_t *file_set,
static int raw_preposition_files(orte_job_t *jdata,
orte_filem_completion_cbfunc_t cbfunc,
void *cbdata);
static int raw_link_local_files(orte_job_t *jdata);
@ -278,71 +278,88 @@ static void recv_ack(int status, orte_process_name_t* sender,
}
}
static int raw_preposition_files(opal_list_t *fset,
static int raw_preposition_files(orte_job_t *jdata,
orte_filem_completion_cbfunc_t cbfunc,
void *cbdata)
{
#ifdef __WINDOWS__
return ORTE_ERR_NOT_SUPPORTED;
#else
orte_app_context_t *app;
opal_list_item_t *item;
orte_filem_base_file_set_t *fs;
int fd, rc=ORTE_SUCCESS;
orte_filem_raw_xfer_t *xfer;
int flags, i;
char **files=NULL;
opal_list_t *file_set;
orte_filem_raw_outbound_t *outbound;
char *cptr;
opal_list_t fsets;
if (NULL == fset) {
/* see if any were provided via MCA param */
if (NULL == orte_filem_raw_files) {
OPAL_OUTPUT_VERBOSE((1, orte_filem_base_output,
"%s filem:raw: no files to position",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* just fire the callback */
if (NULL != cbfunc) {
cbfunc(ORTE_SUCCESS, cbdata);
}
return ORTE_SUCCESS;
/* cycle across the app_contexts looking for files or
* binaries to be prepositioned
*/
OBJ_CONSTRUCT(&fsets, opal_list_t);
for (i=0; i < jdata->apps->size; i++) {
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
continue;
}
/* otherwise, use the provided files */
files = opal_argv_split(orte_filem_raw_files, ',');
file_set = OBJ_NEW(opal_list_t);
for (i=0; NULL != files[i]; i++) {
if (app->preload_binary) {
/* add the executable to our list */
fs = OBJ_NEW(orte_filem_base_file_set_t);
fs->local_target = strdup(files[i]);
/* check any suffix for file type */
if (NULL != (cptr = strchr(files[i], '.'))) {
if (0 == strncmp(cptr, ".tar", 4)) {
OPAL_OUTPUT_VERBOSE((1, orte_filem_base_output,
"%s filem:raw: marking file %s as TAR",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
files[i]));
fs->target_flag = ORTE_FILEM_TYPE_TAR;
} else if (0 == strncmp(cptr, ".bz", 3)) {
OPAL_OUTPUT_VERBOSE((1, orte_filem_base_output,
"%s filem:raw: marking file %s as BZIP",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
files[i]));
fs->target_flag = ORTE_FILEM_TYPE_BZIP;
} else if (0 == strncmp(cptr, ".gz", 3)) {
OPAL_OUTPUT_VERBOSE((1, orte_filem_base_output,
"%s filem:raw: marking file %s as GZIP",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
files[i]));
fs->target_flag = ORTE_FILEM_TYPE_GZIP;
fs->local_target = strdup(app->app);
fs->target_flag = ORTE_FILEM_TYPE_FILE;
opal_list_append(&fsets, &fs->super);
/* if we are preloading the binary, then the app must be in relative
* syntax or we won't find it - the binary will be positioned in the
* session dir
*/
if (opal_path_is_absolute(app->app)) {
cptr = opal_basename(app->app);
free(app->app);
app->app = cptr;
free(app->argv[0]);
app->argv[0] = strdup(cptr);
}
}
if (NULL != app->preload_files) {
files = opal_argv_split(app->preload_files, ',');
for (i=0; NULL != files[i]; i++) {
fs = OBJ_NEW(orte_filem_base_file_set_t);
fs->local_target = strdup(files[i]);
/* check any suffix for file type */
if (NULL != (cptr = strchr(files[i], '.'))) {
if (0 == strncmp(cptr, ".tar", 4)) {
OPAL_OUTPUT_VERBOSE((1, orte_filem_base_output,
"%s filem:raw: marking file %s as TAR",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
files[i]));
fs->target_flag = ORTE_FILEM_TYPE_TAR;
} else if (0 == strncmp(cptr, ".bz", 3)) {
OPAL_OUTPUT_VERBOSE((1, orte_filem_base_output,
"%s filem:raw: marking file %s as BZIP",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
files[i]));
fs->target_flag = ORTE_FILEM_TYPE_BZIP;
} else if (0 == strncmp(cptr, ".gz", 3)) {
OPAL_OUTPUT_VERBOSE((1, orte_filem_base_output,
"%s filem:raw: marking file %s as GZIP",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
files[i]));
fs->target_flag = ORTE_FILEM_TYPE_GZIP;
} else {
fs->target_flag = ORTE_FILEM_TYPE_FILE;
}
} else {
fs->target_flag = ORTE_FILEM_TYPE_FILE;
}
} else {
fs->target_flag = ORTE_FILEM_TYPE_FILE;
if (NULL != app->preload_files_dest_dir) {
fs->remote_target = opal_os_path(false, app->preload_files_dest_dir, files[i], NULL);
}
opal_list_append(&fsets, &fs->super);
}
opal_list_append(file_set, &fs->super);
opal_argv_free(files);
}
} else {
file_set = fset;
}
/* track the outbound file sets */
@ -355,14 +372,13 @@ static int raw_preposition_files(opal_list_t *fset,
* fileset and initiate xcast transfer of each file to every
* daemon
*/
for (item = opal_list_get_first(file_set);
item != opal_list_get_end(file_set);
item = opal_list_get_next(item)) {
while (NULL != (item = opal_list_remove_first(&fsets))) {
fs = (orte_filem_base_file_set_t*)item;
/* attempt to open the specified file */
if (0 >= (fd = open(fs->local_target, O_RDONLY))) {
opal_output(0, "%s CANNOT ACCESS FILE %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), fs->local_target);
OBJ_RELEASE(item);
rc = ORTE_ERROR;
continue;
}
@ -389,7 +405,9 @@ static int raw_preposition_files(opal_list_t *fset,
opal_event_set_priority(&xfer->ev, ORTE_MSG_PRI);
opal_event_add(&xfer->ev, 0);
xfer->pending = true;
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&fsets);
return rc;
#endif
@ -798,8 +816,18 @@ static void recv_files(int status, orte_process_name_t* sender,
incoming->type = type;
/* define the full filename to point to the absolute location */
if (NULL == target) {
/* if it starts with "./", then we need to remove
* that prefix
*/
if (0 == strncmp(file, "./", 2) ||
0 == strncmp(file, "../", 3)) {
cptr = strchr(file, '/');
++cptr; /* step over the '/' */
tmp = strdup(cptr);
} else {
tmp = strdup(file);
}
/* separate out the top-level directory of the target */
tmp = strdup(file);
if (NULL != (cptr = strchr(tmp, '/'))) {
*cptr = '\0';
}
@ -814,8 +842,18 @@ static void recv_files(int status, orte_process_name_t* sender,
incoming->top = strdup(target);
incoming->fullpath = strdup(target);
} else {
/* if it starts with "./", then we need to remove
* that prefix
*/
if (0 == strncmp(target, "./", 2) ||
0 == strncmp(target, "../", 3)) {
cptr = strchr(target, '/');
++cptr; /* step over the '/' */
tmp = strdup(cptr);
} else {
tmp = strdup(target);
}
/* separate out the top-level directory of the target */
tmp = strdup(target);
if (NULL != (cptr = strchr(tmp, '/'))) {
*cptr = '\0';
}

View File

@ -198,7 +198,7 @@ void orte_filem_rsh_work_pool_destruct( orte_filem_rsh_work_pool_item_t *obj) {
}
/* placeholders */
static int rsh_preposition_files(opal_list_t *file_set,
static int rsh_preposition_files(orte_job_t *jdata,
orte_filem_completion_cbfunc_t cbfunc,
void *cbdata)
{

View File

@ -44,6 +44,7 @@
#include "opal/util/opal_environ.h"
#include "opal/util/argv.h"
#include "opal/util/os_dirpath.h"
#include "opal/util/os_path.h"
#include "opal/util/path.h"
#include "opal/util/sys_limits.h"
@ -808,11 +809,16 @@ static int odls_base_default_setup_fork(orte_app_context_t *context,
return ORTE_SUCCESS;
}
static int setup_child(orte_proc_t *child, orte_job_t *jobdat, char ***env)
static int setup_child(orte_proc_t *child,
orte_job_t *jobdat,
orte_app_context_t *app)
{
char *param, *value;
char *param, *value, ***env;
int rc;
/* for convenience */
env = &app->env;
/* setup the jobid */
if (ORTE_SUCCESS != (rc = orte_util_convert_jobid_to_string(&value, child->name.jobid))) {
ORTE_ERROR_LOG(rc);
@ -923,6 +929,57 @@ static int setup_child(orte_proc_t *child, orte_job_t *jobdat, char ***env)
child->iof_complete = true;
}
/* construct the proc's session dir name */
if (NULL != orte_process_info.tmpdir_base) {
value = strdup(orte_process_info.tmpdir_base);
} else {
value = NULL;
}
param = NULL;
if (ORTE_SUCCESS != (rc = orte_session_dir_get_name(&param, &value, NULL,
orte_process_info.nodename,
NULL, &child->name))) {
ORTE_ERROR_LOG(rc);
return rc;
}
free(value);
/* pass an envar so the proc can find any files it had prepositioned */
opal_setenv("OMPI_FILE_LOCATION", param, true, env);
/* if the user wanted the cwd to be the proc's session dir, then
* switch to that location now
*/
if (app->set_cwd_to_session_dir) {
/* create the session dir - we know it doesn't
* already exist!
*/
if (OPAL_SUCCESS != (rc = opal_os_dirpath_create(param, S_IRWXU))) {
ORTE_ERROR_LOG(rc);
/* doesn't exist with correct permissions, and/or we can't
* create it - either way, we are done
*/
free(param);
return rc;
}
/* change to it */
if (0 != chdir(param)) {
free(param);
return ORTE_ERROR;
}
/* It seems that chdir doesn't
* adjust the $PWD enviro variable when it changes the directory. This
* can cause a user to get a different response when doing getcwd vs
* looking at the enviro variable. To keep this consistent, we explicitly
* ensure that the PWD enviro variable matches the CWD we moved to.
*
* NOTE: if a user's program does a chdir(), then $PWD will once
* again not match getcwd! This is beyond our control - we are only
* ensuring they start out matching.
*/
opal_setenv("PWD", param, true, env);
}
free(param);
return ORTE_SUCCESS;
}
@ -934,31 +991,33 @@ static int setup_path(orte_app_context_t *app)
char *pathenv = NULL, *mpiexec_pathenv = NULL;
char *full_search;
/* Try to change to the app's cwd and check that the app
exists and is executable The function will
take care of outputting a pretty error message, if required
*/
if (ORTE_SUCCESS != (rc = orte_util_check_context_cwd(app, true))) {
/* do not ERROR_LOG - it will be reported elsewhere */
goto CLEANUP;
if (!app->set_cwd_to_session_dir) {
/* Try to change to the app's cwd and check that the app
exists and is executable The function will
take care of outputting a pretty error message, if required
*/
if (ORTE_SUCCESS != (rc = orte_util_check_context_cwd(app, true))) {
/* do not ERROR_LOG - it will be reported elsewhere */
goto CLEANUP;
}
/* The prior function will have done a chdir() to jump us to
* wherever the app is to be executed. This could be either where
* the user specified (via -wdir), or to the user's home directory
* on this node if nothing was provided. It seems that chdir doesn't
* adjust the $PWD enviro variable when it changes the directory. This
* can cause a user to get a different response when doing getcwd vs
* looking at the enviro variable. To keep this consistent, we explicitly
* ensure that the PWD enviro variable matches the CWD we moved to.
*
* NOTE: if a user's program does a chdir(), then $PWD will once
* again not match getcwd! This is beyond our control - we are only
* ensuring they start out matching.
*/
getcwd(dir, sizeof(dir));
opal_setenv("PWD", dir, true, &app->env);
}
/* The prior function will have done a chdir() to jump us to
* wherever the app is to be executed. This could be either where
* the user specified (via -wdir), or to the user's home directory
* on this node if nothing was provided. It seems that chdir doesn't
* adjust the $PWD enviro variable when it changes the directory. This
* can cause a user to get a different response when doing getcwd vs
* looking at the enviro variable. To keep this consistent, we explicitly
* ensure that the PWD enviro variable matches the CWD we moved to.
*
* NOTE: if a user's program does a chdir(), then $PWD will once
* again not match getcwd! This is beyond our control - we are only
* ensuring they start out matching.
*/
getcwd(dir, sizeof(dir));
opal_setenv("PWD", dir, true, &app->env);
/* Search for the OMPI_exec_path and PATH settings in the environment. */
for (argvptr = app->env; *argvptr != NULL; argvptr++) {
if (0 == strncmp("OMPI_exec_path=", *argvptr, 15)) {
@ -1111,23 +1170,13 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata)
*/
asprintf(&num_app_ctx, "%lu", (unsigned long)jobdat->num_apps);
/* Now we preload any files that are needed. This is done on a per
* app context basis, so let's take the opportunity to build
* some common envars we need to pass for MPI-3 compatibility
*/
/* build some common envars we need to pass for MPI-3 compatibility */
nps = NULL;
firstranks = NULL;
for (j=0; j < jobdat->apps->size; j++) {
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jobdat->apps, j))) {
continue;
}
if(app->used_on_node &&
(app->preload_binary || NULL != app->preload_files)) {
if( ORTE_SUCCESS != (rc = orte_odls_base_preload_files_app_context(app)) ) {
ORTE_ERROR_LOG(rc);
/* JJH: Do not fail here, instead try to execute without the preloaded options*/
}
}
opal_argv_append_nosize(&nps, ORTE_VPID_PRINT(app->num_procs));
opal_argv_append_nosize(&firstranks, ORTE_VPID_PRINT(app->first_rank));
}
@ -1423,7 +1472,7 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata)
/* setup the rest of the environment with the proc-specific items - these
* will be overwritten for each child
*/
if (ORTE_SUCCESS != (rc = setup_child(child, jobdat, &app->env))) {
if (ORTE_SUCCESS != (rc = setup_child(child, jobdat, app))) {
ORTE_ERROR_LOG(rc);
ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH);
continue;
@ -2315,7 +2364,7 @@ int orte_odls_base_default_restart_proc(orte_proc_t *child,
app = (orte_app_context_t*)opal_pointer_array_get_item(jobdat->apps, child->app_idx);
/* reset envars to match this child */
if (ORTE_SUCCESS != (rc = setup_child(child, jobdat, &app->env))) {
if (ORTE_SUCCESS != (rc = setup_child(child, jobdat, app))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}

View File

@ -160,11 +160,8 @@ void orte_plm_base_vm_ready(int fd, short args, void *cbdata)
/* progress the job */
caddy->jdata->state = ORTE_JOB_STATE_VM_READY;
/* position any required files - these would have been
* specified via MCA parameter, so we don't have to
* pass them here
*/
if (ORTE_SUCCESS != orte_filem.preposition_files(NULL, files_ready, caddy->jdata)) {
/* position any required files */
if (ORTE_SUCCESS != orte_filem.preposition_files(caddy->jdata, files_ready, caddy->jdata)) {
ORTE_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
}

View File

@ -9,8 +9,8 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Los Alamos National Security, LLC.
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
@ -581,6 +581,18 @@ int orte_dt_pack_app_context(opal_buffer_t *buffer, const void *src,
return rc;
}
/* pack the use-session-dir cwd flag */
if (app_context[i]->set_cwd_to_session_dir) {
user_specified = 1;
} else {
user_specified = 0;
}
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer,
(void*)(&user_specified), 1, OPAL_INT8))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the hostfile name */
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer,
(void*)(&(app_context[i]->hostfile)), 1, OPAL_STRING))) {

View File

@ -11,7 +11,7 @@
* All rights reserved.
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Los Alamos National Security, LLC.
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
@ -559,8 +559,8 @@ int orte_dt_print_app_context(char **output, char *prefix, orte_app_context_t *s
tmp = tmp2;
}
asprintf(&tmp2, "%s\n%s\tWorking dir: %s (user: %d)\n%s\tPrefix: %s\n%s\tHostfile: %s\tAdd-Hostfile: %s", tmp,
pfx2, (NULL == src->cwd) ? "NULL" : src->cwd, (int) src->user_specified_cwd,
asprintf(&tmp2, "%s\n%s\tWorking dir: %s (user: %d session-dir: %d)\n%s\tPrefix: %s\n%s\tHostfile: %s\tAdd-Hostfile: %s", tmp,
pfx2, (NULL == src->cwd) ? "NULL" : src->cwd, (int) src->user_specified_cwd, (int) src->set_cwd_to_session_dir,
pfx2, (NULL == src->prefix_dir) ? "NULL" : src->prefix_dir,
pfx2, (NULL == src->hostfile) ? "NULL" : src->hostfile,
(NULL == src->add_hostfile) ? "NULL" : src->add_hostfile);

View File

@ -639,6 +639,7 @@ int orte_dt_unpack_app_context(opal_buffer_t *buffer, void *dest,
}
/* unpack the user-specified cwd flag */
max_n=1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &user_specified,
&max_n, OPAL_INT8))) {
ORTE_ERROR_LOG(rc);
@ -650,6 +651,19 @@ int orte_dt_unpack_app_context(opal_buffer_t *buffer, void *dest,
app_context[i]->user_specified_cwd = false;
}
/* unpack the use-session-dir cwd flag */
max_n=1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &user_specified,
&max_n, OPAL_INT8))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (user_specified) {
app_context[i]->set_cwd_to_session_dir = true;
} else {
app_context[i]->set_cwd_to_session_dir = false;
}
/* unpack the hostfile name */
max_n = 1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &(app_context[i]->hostfile),

View File

@ -175,9 +175,6 @@ int orte_stat_history_size;
/* envars to forward */
char *orte_forward_envars = NULL;
/* preload binaries */
bool orte_preload_binaries = false;
/* map-reduce mode */
bool orte_map_reduce = false;
@ -576,6 +573,7 @@ static void orte_app_context_construct(orte_app_context_t* app_context)
app_context->env=NULL;
app_context->cwd=NULL;
app_context->user_specified_cwd=false;
app_context->set_cwd_to_session_dir = false;
app_context->hostfile=NULL;
app_context->add_hostfile=NULL;
app_context->add_host = NULL;
@ -1035,5 +1033,4 @@ OBJ_CLASS_INSTANCE(orte_job_map_t,
opal_object_t,
orte_job_map_construct,
orte_job_map_destruct);
#endif

View File

@ -255,6 +255,8 @@ typedef struct {
char *cwd;
/** Whether the cwd was set by the user or by the system */
bool user_specified_cwd;
/** Whether to set the current working directory to the proc session dir */
bool set_cwd_to_session_dir;
/* Any hostfile that was specified */
char *hostfile;
/* Hostfile for adding hosts to an existing allocation */
@ -676,9 +678,6 @@ ORTE_DECLSPEC extern int orte_stat_history_size;
/* envars to forward */
ORTE_DECLSPEC extern char *orte_forward_envars;
/* preload binaries */
ORTE_DECLSPEC extern bool orte_preload_binaries;
/* map-reduce mode */
ORTE_DECLSPEC extern bool orte_map_reduce;

View File

@ -11,6 +11,8 @@
* All rights reserved.
* Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 Los Alamos National Security, LLC.
* All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -538,11 +540,6 @@ int orte_register_params(void)
false, false, NULL, &orte_forward_envars);
mca_base_param_reg_int_name("orte", "preload_binaries",
"Preload the binaries on remote machines before starting remote proceses",
false, false, (int)false, &value);
orte_preload_binaries = OPAL_INT_TO_BOOL(value);
mca_base_param_reg_int_name("orte", "max_vm_size",
"Maximum size of virtual machine - used to subdivide allocation",
false, false, -1, &orte_max_vm_size);

View File

@ -142,6 +142,8 @@ struct mapreduce_globals_t {
bool combiner;
bool single_job;
orte_job_t *combiner_job;
bool preload_binaries;
bool set_cwd_to_session_dir;
};
/*
@ -246,8 +248,8 @@ static opal_cmd_line_init_t cmd_line_init[] = {
"Command used to start processes on remote nodes (default: orted)" },
/* Preload the binary on the remote machine */
{ "orte", "preload", "binaries", 's', NULL, "preload-binary", 0,
NULL, OPAL_CMD_LINE_TYPE_BOOL,
{ NULL, NULL, NULL, 's', NULL, "preload-binary", 0,
&mapreduce_globals.preload_binaries, OPAL_CMD_LINE_TYPE_BOOL,
"Preload the binary on the remote machine before starting the remote process." },
/* Preload files on the remote machine */
@ -404,6 +406,9 @@ static opal_cmd_line_init_t cmd_line_init[] = {
{ NULL, NULL, NULL, '\0', "wd", "wd", 1,
&mapreduce_globals.wdir, OPAL_CMD_LINE_TYPE_STRING,
"Synonym for --wdir" },
{ NULL, NULL, NULL, '\0', "set-cwd-to-session-dir", "set-cwd-to-session-dir", 0,
&mapreduce_globals.set_cwd_to_session_dir, OPAL_CMD_LINE_TYPE_BOOL,
"Set the working directory of the started processes to their session directory" },
{ NULL, NULL, NULL, '\0', "path", "path", 1,
&mapreduce_globals.path, OPAL_CMD_LINE_TYPE_STRING,
"PATH to be used to look for executables to start processes" },
@ -946,13 +951,14 @@ static int init_globals(void)
free( mapreduce_globals.path );
mapreduce_globals.path = NULL;
mapreduce_globals.preload_binaries = false;
mapreduce_globals.preload_files = NULL;
mapreduce_globals.preload_files_dest_dir = NULL;
#if OPAL_ENABLE_FT_CR == 1
mapreduce_globals.sstore_load = NULL;
#endif
mapreduce_globals.set_cwd_to_session_dir = false;
mapreduce_globals.mapper = false;
mapreduce_globals.reducer = false;
mapreduce_globals.combiner = false;
@ -1630,7 +1636,10 @@ static int create_app(int argc, char* argv[],
app->cwd = opal_os_path(false, cwd, mapreduce_globals.wdir, NULL);
}
app->user_specified_cwd = true;
} else {
} else if (mapreduce_globals.set_cwd_to_session_dir) {
app->set_cwd_to_session_dir = true;
app->user_specified_cwd = true;
} else {
if (OPAL_SUCCESS != (rc = opal_getcwd(cwd, sizeof(cwd)))) {
orte_show_help("help-orterun.txt", "orterun:init-failure",
true, "get the cwd", rc);
@ -1743,7 +1752,14 @@ static int create_app(int argc, char* argv[],
total_num_apps++;
/* Preserve if we are to preload the binary */
app->preload_binary = orte_preload_binaries;
app->preload_binary = mapreduce_globals.preload_binaries;
/* if we were told to cwd to the session dir and the app was given in
* relative syntax, then we need to preload the binary to
* find the app
*/
if (app->set_cwd_to_session_dir && !opal_path_is_absolute(app->app)) {
app->preload_binary = true;
}
if( NULL != mapreduce_globals.preload_files)
app->preload_files = strdup(mapreduce_globals.preload_files);
else

View File

@ -221,8 +221,8 @@ static opal_cmd_line_init_t cmd_line_init[] = {
"Command used to start processes on remote nodes (default: orted)" },
/* Preload the binary on the remote machine */
{ "orte", "preload", "binaries", 's', NULL, "preload-binary", 0,
NULL, OPAL_CMD_LINE_TYPE_BOOL,
{ NULL, NULL, NULL, 's', NULL, "preload-binary", 0,
&orterun_globals.preload_binaries, OPAL_CMD_LINE_TYPE_BOOL,
"Preload the binary on the remote machine before starting the remote process." },
/* Preload files on the remote machine */
@ -437,6 +437,9 @@ static opal_cmd_line_init_t cmd_line_init[] = {
{ NULL, NULL, NULL, '\0', "wd", "wd", 1,
&orterun_globals.wdir, OPAL_CMD_LINE_TYPE_STRING,
"Synonym for --wdir" },
{ NULL, NULL, NULL, '\0', "set-cwd-to-session-dir", "set-cwd-to-session-dir", 0,
&orterun_globals.set_cwd_to_session_dir, OPAL_CMD_LINE_TYPE_BOOL,
"Set the working directory of the started processes to their session directory" },
{ NULL, NULL, NULL, '\0', "path", "path", 1,
&orterun_globals.path, OPAL_CMD_LINE_TYPE_STRING,
"PATH to be used to look for executables to start processes" },
@ -983,11 +986,13 @@ static int init_globals(void)
orterun_globals.appfile = NULL;
if( NULL != orterun_globals.wdir )
free( orterun_globals.wdir );
orterun_globals.set_cwd_to_session_dir = false;
orterun_globals.wdir = NULL;
if( NULL != orterun_globals.path )
free( orterun_globals.path );
orterun_globals.path = NULL;
orterun_globals.preload_binaries = false;
orterun_globals.preload_files = NULL;
orterun_globals.preload_files_dest_dir = NULL;
@ -1636,6 +1641,9 @@ static int create_app(int argc, char* argv[],
app->cwd = opal_os_path(false, cwd, orterun_globals.wdir, NULL);
}
app->user_specified_cwd = true;
} else if (orterun_globals.set_cwd_to_session_dir) {
app->set_cwd_to_session_dir = true;
app->user_specified_cwd = true;
} else {
if (OPAL_SUCCESS != (rc = opal_getcwd(cwd, sizeof(cwd)))) {
orte_show_help("help-orterun.txt", "orterun:init-failure",
@ -1750,16 +1758,25 @@ static int create_app(int argc, char* argv[],
total_num_apps++;
/* Preserve if we are to preload the binary */
app->preload_binary = orte_preload_binaries;
if( NULL != orterun_globals.preload_files)
/* Capture any preload flags */
app->preload_binary = orterun_globals.preload_binaries;
/* if we were told to cwd to the session dir and the app was given in
* relative syntax, then we need to preload the binary to
* find the app
*/
if (app->set_cwd_to_session_dir && !opal_path_is_absolute(app->argv[0])) {
app->preload_binary = true;
}
if (NULL != orterun_globals.preload_files) {
app->preload_files = strdup(orterun_globals.preload_files);
else
} else {
app->preload_files = NULL;
if( NULL != orterun_globals.preload_files_dest_dir)
}
if (NULL != orterun_globals.preload_files_dest_dir) {
app->preload_files_dest_dir = strdup(orterun_globals.preload_files_dest_dir);
else
} else {
app->preload_files_dest_dir = NULL;
}
#if OPAL_ENABLE_FT_CR == 1
if( NULL != orterun_globals.sstore_load ) {

View File

@ -10,6 +10,8 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012 Los Alamos National Security, LLC.
* All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -45,6 +47,7 @@ struct orterun_globals_t {
char *env_val;
char *appfile;
char *wdir;
bool set_cwd_to_session_dir;
char *path;
char *preload_files;
char *preload_files_dest_dir;
@ -60,6 +63,7 @@ struct orterun_globals_t {
char *sstore_load;
#endif
bool disable_recovery;
bool preload_binaries;
};
/**