1
1

Improve the efficiency of the launch system by changing the outer loop to being over app_context, and adding a flag to the app_context so the daemon can record that "this app is on my node" when decoding the launch msg.

If the --wdir option is given, check to see if the user provided a relative path. If so, convert it to an absolute path. This is needed to maintain consistent behavior across environements. Some environments automatically chdir to your current working directory when launching the remote orted, while others (e.g., ssh) don't. This levels the playing field and reduces user surprise.

This commit was SVN r20342.
Этот коммит содержится в:
Ralph Castain 2009-01-25 12:39:24 +00:00
родитель 40b6ed4a40
Коммит 0435108834
4 изменённых файлов: 190 добавлений и 170 удалений

Просмотреть файл

@ -620,6 +620,8 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data,
if (NULL != slot_str && NULL != slot_str[j]) { if (NULL != slot_str && NULL != slot_str[j]) {
child->slot_list = strdup(slot_str[j]); child->slot_list = strdup(slot_str[j]);
} }
/* mark that this app_context is being used on this node */
jobdat->apps[app_idx[j]]->used_on_node = true;
/* protect operation on the global list of children */ /* protect operation on the global list of children */
OPAL_THREAD_LOCK(&orte_odls_globals.mutex); OPAL_THREAD_LOCK(&orte_odls_globals.mutex);
opal_list_append(&orte_local_children, &child->super); opal_list_append(&orte_local_children, &child->super);
@ -1030,14 +1032,21 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
/* setup to report the proc state to the HNP */ /* setup to report the proc state to the HNP */
OBJ_CONSTRUCT(&alert, opal_buffer_t); OBJ_CONSTRUCT(&alert, opal_buffer_t);
/* setup the environment for each context */
for (i=0; i < num_apps; i++) { for (i=0; i < num_apps; i++) {
if (ORTE_SUCCESS != (rc = odls_base_default_setup_fork(apps[i], app = apps[i];
/* if this app isn't being used on our node, skip it */
if (!app->used_on_node) {
continue;
}
/* setup the environment for this app */
if (ORTE_SUCCESS != (rc = odls_base_default_setup_fork(app,
jobdat->num_local_procs, jobdat->num_local_procs,
jobdat->num_procs, jobdat->num_procs,
jobdat->total_slots_alloc, jobdat->total_slots_alloc,
oversubscribed, oversubscribed,
&apps[i]->env))) { &app->env))) {
OPAL_OUTPUT_VERBOSE((10, orte_odls_globals.output, OPAL_OUTPUT_VERBOSE((10, orte_odls_globals.output,
"%s odls:launch:setup_fork failed with error %s", "%s odls:launch:setup_fork failed with error %s",
@ -1068,53 +1077,7 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
/* okay, now tell the HNP we couldn't do it */ /* okay, now tell the HNP we couldn't do it */
goto CLEANUP; goto CLEANUP;
} }
}
/* okay, now let's launch our local procs using the provided fork_local fn */
for (proc_rank = 0, item = opal_list_get_first(&orte_local_children);
item != opal_list_get_end(&orte_local_children);
item = opal_list_get_next(item)) {
child = (orte_odls_child_t*)item;
/* is this child already alive? This can happen if
* we are asked to launch additional processes.
* If it has been launched, then do nothing
*/
if (child->alive) {
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
"%s odls:launch child %s is already alive",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(child->name)));
continue;
}
/* do we have a child from the specified job. Because the
* job could be given as a WILDCARD value, we must use
* the dss.compare function to check for equality.
*/
if (OPAL_EQUAL != opal_dss.compare(&job, &(child->name->jobid), ORTE_JOBID)) {
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
"%s odls:launch child %s is not in job %s being launched",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(child->name),
ORTE_JOBID_PRINT(job)));
continue;
}
/* find the app context for this child */
if (child->app_idx > num_apps ||
NULL == apps[child->app_idx]) {
/* get here if we couldn't find the app_context */
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
rc = ORTE_ERR_NOT_FOUND;
goto CLEANUP;
}
app = apps[child->app_idx];
/* Try to change to the app's cwd and check that the app /* Try to change to the app's cwd and check that the app
exists and is executable The function will exists and is executable The function will
@ -1182,129 +1145,172 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
opal_argv_free(argvptr); opal_argv_free(argvptr);
} }
/* setup the rest of the environment with the proc-specific items - these /* okay, now let's launch all the local procs for this app using the provided fork_local fn */
* will be overwritten for each child for (proc_rank = 0, item = opal_list_get_first(&orte_local_children);
*/ item != opal_list_get_end(&orte_local_children);
if (ORTE_SUCCESS != (rc = orte_util_convert_jobid_to_string(&job_str, child->name->jobid))) { item = opal_list_get_next(item)) {
ORTE_ERROR_LOG(rc); child = (orte_odls_child_t*)item;
goto CLEANUP;
} /* does this child belong to this app? */
if (ORTE_SUCCESS != (rc = orte_util_convert_vpid_to_string(&vpid_str, child->name->vpid))) { if (i != child->app_idx) {
ORTE_ERROR_LOG(rc); continue;
goto CLEANUP; }
}
if(NULL == (param = mca_base_param_environ_variable("orte","ess","jobid"))) { /* is this child already alive? This can happen if
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); * we are asked to launch additional processes.
rc = ORTE_ERR_OUT_OF_RESOURCE; * If it has been launched, then do nothing
goto CLEANUP; */
} if (child->alive) {
opal_setenv(param, job_str, true, &app->env);
free(param); OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
free(job_str); "%s odls:launch child %s is already alive",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
if(NULL == (param = mca_base_param_environ_variable("orte","ess","vpid"))) { ORTE_NAME_PRINT(child->name)));
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE; continue;
goto CLEANUP; }
}
opal_setenv(param, vpid_str, true, &app->env); /* do we have a child from the specified job. Because the
free(param); * job could be given as a WILDCARD value, we must use
/* although the vpid IS the process' rank within the job, users * the dss.compare function to check for equality.
* would appreciate being given a public environmental variable */
* that also represents this value - something MPI specific - so if (OPAL_EQUAL != opal_dss.compare(&job, &(child->name->jobid), ORTE_JOBID)) {
* do that here.
* OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
* AND YES - THIS BREAKS THE ABSTRACTION BARRIER TO SOME EXTENT. "%s odls:launch child %s is not in job %s being launched",
* We know - just live with it ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
*/ ORTE_NAME_PRINT(child->name),
opal_setenv("OMPI_COMM_WORLD_RANK", vpid_str, true, &app->env); ORTE_JOBID_PRINT(job)));
free(vpid_str); /* done with this now */
continue;
/* users would appreciate being given a public environmental variable }
* that also represents the local rank value - something MPI specific - so
* do that here. /* setup the rest of the environment with the proc-specific items - these
* * will be overwritten for each child
* AND YES - THIS BREAKS THE ABSTRACTION BARRIER TO SOME EXTENT. */
* We know - just live with it if (ORTE_SUCCESS != (rc = orte_util_convert_jobid_to_string(&job_str, child->name->jobid))) {
*/
if (ORTE_LOCAL_RANK_INVALID == (local_rank = orte_ess.get_local_rank(child->name))) {
ORTE_ERROR_LOG(ORTE_ERR_VALUE_OUT_OF_BOUNDS);
rc = ORTE_ERR_VALUE_OUT_OF_BOUNDS;
goto CLEANUP;
}
asprintf(&value, "%lu", (unsigned long) local_rank);
opal_setenv("OMPI_COMM_WORLD_LOCAL_RANK", value, true, &app->env);
free(value);
param = mca_base_param_environ_variable("opal", NULL, "paffinity_base_slot_list");
if ( NULL != child->slot_list ) {
asprintf(&value, "%s", child->slot_list);
opal_setenv(param, value, true, &app->env);
free(value);
} else {
opal_unsetenv(param, &app->env);
}
free(param);
/* if we are timing things, record when we are going to launch this proc */
if (orte_timing) {
gettimeofday(&child->starttime, NULL);
}
/* must unlock prior to fork to keep things clean in the
* event library
*/
opal_condition_signal(&orte_odls_globals.cond);
OPAL_THREAD_UNLOCK(&orte_odls_globals.mutex);
#if OPAL_ENABLE_FT == 1
#if OPAL_ENABLE_FT_CR == 1
/*
* OPAL CRS components need the opportunity to take action before a process
* is forked.
* Needs access to:
* - Environment
* - Rank/ORTE Name
* - Binary to exec
*/
if( NULL != opal_crs.crs_prelaunch ) {
if( OPAL_SUCCESS != (rc = opal_crs.crs_prelaunch(child->name->vpid,
orte_snapc_base_global_snapshot_loc,
&(app->app),
&(app->cwd),
&(app->argv),
&(app->env) ) ) ) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto CLEANUP; goto CLEANUP;
} }
} if (ORTE_SUCCESS != (rc = orte_util_convert_vpid_to_string(&vpid_str, child->name->vpid))) {
#endif ORTE_ERROR_LOG(rc);
#endif goto CLEANUP;
if (5 < opal_output_get_verbosity(orte_odls_globals.output)) {
opal_output(orte_odls_globals.output, "%s odls:launch: spawning child %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(child->name));
/* dump what is going to be exec'd */
if (7 < opal_output_get_verbosity(orte_odls_globals.output)) {
opal_dss.dump(orte_odls_globals.output, app, ORTE_APP_CONTEXT);
} }
} if(NULL == (param = mca_base_param_environ_variable("orte","ess","jobid"))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = fork_local(app, child, app->env, jobdat->controls, jobdat->stdin_target); rc = ORTE_ERR_OUT_OF_RESOURCE;
/* reaquire lock so we don't double unlock... */ goto CLEANUP;
OPAL_THREAD_LOCK(&orte_odls_globals.mutex); }
if (ORTE_SUCCESS != rc) { opal_setenv(param, job_str, true, &app->env);
/* do NOT ERROR_LOG this error - it generates free(param);
* a message/node as most errors will be common free(job_str);
* across the entire cluster. Instead, we let orterun
* output a consolidated error message for us if(NULL == (param = mca_base_param_environ_variable("orte","ess","vpid"))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
opal_setenv(param, vpid_str, true, &app->env);
free(param);
/* although the vpid IS the process' rank within the job, users
* would appreciate being given a public environmental variable
* that also represents this value - something MPI specific - so
* do that here.
*
* AND YES - THIS BREAKS THE ABSTRACTION BARRIER TO SOME EXTENT.
* We know - just live with it
*/ */
goto CLEANUP; opal_setenv("OMPI_COMM_WORLD_RANK", vpid_str, true, &app->env);
} else { free(vpid_str); /* done with this now */
child->alive = true;
child->state = ORTE_PROC_STATE_LAUNCHED; /* users would appreciate being given a public environmental variable
} * that also represents the local rank value - something MPI specific - so
* do that here.
*
* AND YES - THIS BREAKS THE ABSTRACTION BARRIER TO SOME EXTENT.
* We know - just live with it
*/
if (ORTE_LOCAL_RANK_INVALID == (local_rank = orte_ess.get_local_rank(child->name))) {
ORTE_ERROR_LOG(ORTE_ERR_VALUE_OUT_OF_BOUNDS);
rc = ORTE_ERR_VALUE_OUT_OF_BOUNDS;
goto CLEANUP;
}
asprintf(&value, "%lu", (unsigned long) local_rank);
opal_setenv("OMPI_COMM_WORLD_LOCAL_RANK", value, true, &app->env);
free(value);
param = mca_base_param_environ_variable("opal", NULL, "paffinity_base_slot_list");
if ( NULL != child->slot_list ) {
asprintf(&value, "%s", child->slot_list);
opal_setenv(param, value, true, &app->env);
free(value);
} else {
opal_unsetenv(param, &app->env);
}
free(param);
/* if we are timing things, record when we are going to launch this proc */
if (orte_timing) {
gettimeofday(&child->starttime, NULL);
}
/* must unlock prior to fork to keep things clean in the
* event library
*/
opal_condition_signal(&orte_odls_globals.cond);
OPAL_THREAD_UNLOCK(&orte_odls_globals.mutex);
#if OPAL_ENABLE_FT == 1
#if OPAL_ENABLE_FT_CR == 1
/*
* OPAL CRS components need the opportunity to take action before a process
* is forked.
* Needs access to:
* - Environment
* - Rank/ORTE Name
* - Binary to exec
*/
if( NULL != opal_crs.crs_prelaunch ) {
if( OPAL_SUCCESS != (rc = opal_crs.crs_prelaunch(child->name->vpid,
orte_snapc_base_global_snapshot_loc,
&(app->app),
&(app->cwd),
&(app->argv),
&(app->env) ) ) ) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
}
#endif
#endif
if (5 < opal_output_get_verbosity(orte_odls_globals.output)) {
opal_output(orte_odls_globals.output, "%s odls:launch: spawning child %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(child->name));
/* dump what is going to be exec'd */
if (7 < opal_output_get_verbosity(orte_odls_globals.output)) {
opal_dss.dump(orte_odls_globals.output, app, ORTE_APP_CONTEXT);
}
}
rc = fork_local(app, child, app->env, jobdat->controls, jobdat->stdin_target);
/* reaquire lock so we don't double unlock... */
OPAL_THREAD_LOCK(&orte_odls_globals.mutex);
if (ORTE_SUCCESS != rc) {
/* do NOT ERROR_LOG this error - it generates
* a message/node as most errors will be common
* across the entire cluster. Instead, we let orterun
* output a consolidated error message for us
*/
goto CLEANUP;
} else {
child->alive = true;
child->state = ORTE_PROC_STATE_LAUNCHED;
}
/* move to next processor */
proc_rank++;
} /* complete launching all children for this app */
/* reset our working directory back to our default location - if we /* reset our working directory back to our default location - if we
* don't do this, then we will be looking for relative paths starting * don't do this, then we will be looking for relative paths starting
* from the last wdir option specified by the user. Thus, we would * from the last wdir option specified by the user. Thus, we would
@ -1313,8 +1319,6 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
* to their current location * to their current location
*/ */
chdir(basedir); chdir(basedir);
/* move to next processor */
proc_rank++;
} }
launch_failed = false; launch_failed = false;

Просмотреть файл

@ -461,6 +461,7 @@ static void orte_app_context_construct(orte_app_context_t* app_context)
app_context->preload_binary = false; app_context->preload_binary = false;
app_context->preload_files = NULL; app_context->preload_files = NULL;
app_context->preload_files_dest_dir = NULL; app_context->preload_files_dest_dir = NULL;
app_context->used_on_node = false;
} }
static void orte_app_context_destructor(orte_app_context_t* app_context) static void orte_app_context_destructor(orte_app_context_t* app_context)

Просмотреть файл

@ -177,6 +177,8 @@ typedef struct {
/** Destination directory for the preloaded files /** Destination directory for the preloaded files
* If NULL then the absolute and relative paths are obeyed */ * If NULL then the absolute and relative paths are obeyed */
char * preload_files_dest_dir; char * preload_files_dest_dir;
/* is being used on the local node */
bool used_on_node;
} orte_app_context_t; } orte_app_context_t;
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_app_context_t); ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_app_context_t);

Просмотреть файл

@ -64,6 +64,7 @@
#include "opal/version.h" #include "opal/version.h"
#include "opal/runtime/opal.h" #include "opal/runtime/opal.h"
#include "opal/util/os_path.h" #include "opal/util/os_path.h"
#include "opal/util/path.h"
#include "opal/class/opal_pointer_array.h" #include "opal/class/opal_pointer_array.h"
#include "opal/dss/dss.h" #include "opal/dss/dss.h"
@ -1830,7 +1831,19 @@ static int create_app(int argc, char* argv[], orte_app_context_t **app_ptr,
/* Did the user request a specific wdir? */ /* Did the user request a specific wdir? */
if (NULL != orterun_globals.wdir) { if (NULL != orterun_globals.wdir) {
app->cwd = strdup(orterun_globals.wdir); /* if this is a relative path, convert it to an absolute path */
if (opal_path_is_absolute(orterun_globals.wdir)) {
app->cwd = strdup(orterun_globals.wdir);
} else {
/* get the cwd */
if (OPAL_SUCCESS != (rc = opal_getcwd(cwd, sizeof(cwd)))) {
orte_show_help("help-orterun.txt", "orterun:init-failure",
true, "get the cwd", rc);
goto cleanup;
}
/* construct the absolute path */
app->cwd = opal_os_path(false, cwd, orterun_globals.wdir, NULL);
}
app->user_specified_cwd = true; app->user_specified_cwd = true;
} else { } else {
if (OPAL_SUCCESS != (rc = opal_getcwd(cwd, sizeof(cwd)))) { if (OPAL_SUCCESS != (rc = opal_getcwd(cwd, sizeof(cwd)))) {