1
1

Add ability for local slave spawns to pre-position supporting files. Update comm_spawn and comm_spawn_multiple man pages to cover new info_keys.

This commit was SVN r20527.
Этот коммит содержится в:
Ralph Castain 2009-02-12 15:56:45 +00:00
родитель 62e08e7212
Коммит 62dd763a8f
10 изменённых файлов: 273 добавлений и 60 удалений

Просмотреть файл

@ -670,6 +670,12 @@ static int spawn(int count, char **array_of_commands,
app->preload_files_dest_dir = strdup(cwd);
}
/* check for 'preload_files_src_dir' */
ompi_info_get (array_of_info[i], "ompi_preload_files_src_dir", valuelen, cwd, &flag);
if ( flag ) {
app->preload_files_src_dir = strdup(cwd);
}
/* see if this is a non-mpi job - if so, then set the flag so ORTE
* knows what to do
*/
@ -694,15 +700,21 @@ static int spawn(int count, char **array_of_commands,
}
/* default value: If the user did not tell us where to look for the
executable, we assume the current working directory */
* executable, we assume the current working directory, or the preload destination
* if it was given
*/
if ( !have_wdir ) {
if (OMPI_SUCCESS != (rc = opal_getcwd(cwd, OMPI_PATH_MAX))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(jdata);
opal_progress_event_users_decrement();
return rc;
if (NULL != app->preload_files_dest_dir) {
app->cwd = strdup(app->preload_files_dest_dir);
} else {
if (OMPI_SUCCESS != (rc = opal_getcwd(cwd, OMPI_PATH_MAX))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(jdata);
opal_progress_event_users_decrement();
return rc;
}
app->cwd = strdup(cwd);
}
app->cwd = strdup(cwd);
}
/* leave the map info alone - the launcher will

Просмотреть файл

@ -98,25 +98,48 @@ For the SPAWN calls, \fIinfo\fP provides additional, implementation-dependent in
The following keys for \fIinfo\fP are recognized in Open MPI. (The reserved values mentioned in Section 5.3.4 of the MPI-2 standard are not implemented.)
.sp
.nf
Key Type Description
--- ---- -----------
Key Type Description
--- ---- -----------
host char * Host on which the process should be spawned.
See the \fIorte_host\fP man page for an
explanation of how this will be used.
hostfile char * Hostfile containing the hosts on which
the processes are to be spawned. See
the \fIorte_hostfile\fP man page for an
explanation of how this will be used.
wdir char * Directory where the executable is located.
ompi_prefix char * Same as the --prefix command line argument
to mpirun.
ompi_non_mpi bool If set to true, launching a non-MPI
application; the returned communicator
will be MPI_COMM_NULL. Failure to set
this flag when launching a non-MPI
application will cause both the child
and parent jobs to "hang".
host char * Host on which the process should be spawned.
See the \fIorte_host\fP man page for an
explanation of how this will be used.
hostfile char * Hostfile containing the hosts on which
the processes are to be spawned. See
the \fIorte_hostfile\fP man page for an
explanation of how this will be used.
wdir char * Directory where the executable is located. If
files are to be pre-positioned, then this
location is the desired working directory
at time of execution - if not specified,
then it will automatically be set to
\fIompi_preload_files_dest_dir\fP.
ompi_prefix char * Same as the --prefix command line argument
to mpirun.
ompi_local_slave bool If set to true, launch the specified process
as a local \fIslave\fP to the calling process.
The new process will only be known to the caller,
and will only be able to communicate with the caller.
ompi_preload_binary bool If set to true, pre-position the specified
executable onto the remote host. A destination
directory must also be provided.
ompi_preload_files_dest_dir char * Target directory where pre-positioned files
are to be placed.
ompi_preload_files char * A comma-separated list of files that are to
be pre-positioned in addition to the executable.
Note that this option does not depend upon
\fIompi_preload_binary\fP - files can be moved
to the target even if an executable is not moved.
ompi_preload_files_src_dir char * Source directory where files and executables
that are to be pre-positioned can be found. If
not specified, the current working directory
will be used.
ompi_non_mpi bool If set to true, launching a non-MPI
application; the returned communicator
will be MPI_COMM_NULL. Failure to set
this flag when launching a non-MPI
application will cause both the child
and parent jobs to "hang".
.fi
\fIbool\fP info keys are actually strings but are evaluated as

Просмотреть файл

@ -109,16 +109,48 @@ The following keys for \fIinfo\fP are recognized in "#PACKAGE_NAME#". (The reser
.sp
.sp
.nf
Key Type Description
--- ---- -----------
Key Type Description
--- ---- -----------
host char * Host on which the process should be spawned.
wdir char * Directory where the executable is located.
ompi_prefix char * Same as the --prefix command line argument
to mpirun.
ompi_non_mpi bool If set to true, launching a non-MPI
application; the returned communicator
will be MPI_COMM_NULL.
host char * Host on which the process should be spawned.
See the \fIorte_host\fP man page for an
explanation of how this will be used.
hostfile char * Hostfile containing the hosts on which
the processes are to be spawned. See
the \fIorte_hostfile\fP man page for an
explanation of how this will be used.
wdir char * Directory where the executable is located. If
files are to be pre-positioned, then this
location is the desired working directory
at time of execution - if not specified,
then it will automatically be set to
\fIompi_preload_files_dest_dir\fP.
ompi_prefix char * Same as the --prefix command line argument
to mpirun.
ompi_local_slave bool If set to true, launch the specified process
as a local \fIslave\fP to the calling process.
The new process will only be known to the caller,
and will only be able to communicate with the caller.
ompi_preload_binary bool If set to true, pre-position the specified
executable onto the remote host. A destination
directory must also be provided.
ompi_preload_files_dest_dir char * Target directory where pre-positioned files
are to be placed.
ompi_preload_files char * A comma-separated list of files that are to
be pre-positioned in addition to the executable.
Note that this option does not depend upon
\fIompi_preload_binary\fP - files can be moved
to the target even if an executable is not moved.
ompi_preload_files_src_dir char * Source directory where files and executables
that are to be pre-positioned can be found. If
not specified, the current working directory
will be used.
ompi_non_mpi bool If set to true, launching a non-MPI
application; the returned communicator
will be MPI_COMM_NULL. Failure to set
this flag when launching a non-MPI
application will cause both the child
and parent jobs to "hang".
.fi
.sp

Просмотреть файл

@ -126,3 +126,16 @@ binaries be pre-positioned on the remote host. However, we could not
find the %s command in your path.
Please check that %s is in your PATH and try again.
#
[file-not-found]
A call was made to launch a local slave process that requested pre-positioning
of one or more files, but the specified files could not be found:
File: %s
Source directory: %s
Please either specify an absolute path to the file, or check
that the file is in your current working directory...and ensure
that you have read permissions on the file.

Просмотреть файл

@ -31,6 +31,7 @@
#include "opal/dss/dss.h"
#include "opal/util/os_path.h"
#include "opal/util/os_dirpath.h"
#include "opal/util/path.h"
#include "opal/util/argv.h"
#include "opal/util/basename.h"
@ -148,6 +149,7 @@ int orte_plm_base_local_slave_launch(orte_job_t *jdata)
char *exefile=NULL, *basename, *param, *path=NULL, *bppath=NULL;
char *exec_path=NULL;
char *tmp;
char **files;
bool flag;
orte_app_context_t **apps, *app;
int i;
@ -156,6 +158,7 @@ int orte_plm_base_local_slave_launch(orte_job_t *jdata)
long fd, fdmax = sysconf(_SC_OPEN_MAX);
sigset_t sigs;
bool local_op = false;
char cwd[OMPI_PATH_MAX];
/* increment the local slave jobid */
orte_plm_globals.local_slaves++;
@ -201,6 +204,14 @@ int orte_plm_base_local_slave_launch(orte_job_t *jdata)
* then convert it to one
*/
if (!opal_path_is_absolute(app->app)) {
/* see if a source directory was given */
if (NULL!= app->preload_files_src_dir) {
/* prepend the src dir to the executable name */
path = opal_os_path(false, app->preload_files_src_dir, app->app, NULL);
free(app->app);
app->app = path;
}
/* now check for absolute path */
exefile = opal_find_absolute_path(app->app);
if (NULL == exefile) {
orte_show_help("help-plm-base.txt", "exec-not-found", true, app->app);
@ -213,6 +224,11 @@ int orte_plm_base_local_slave_launch(orte_job_t *jdata)
basename = opal_basename(exefile);
path = opal_os_path(false, app->preload_files_dest_dir, basename, NULL);
free(basename);
/* ensure the path exists */
if (ORTE_SUCCESS != (rc = opal_os_dirpath_create(app->preload_files_dest_dir, S_IRWXU))) {
orte_show_help("help-plm-base.txt", "path-not-created", true, path);
return rc;
}
/* we are going to use the "bootproxy" script to launch
* this job - so move it over to the target host as well
*/
@ -259,6 +275,10 @@ int orte_plm_base_local_slave_launch(orte_job_t *jdata)
/* add the bootproxy cmd */
opal_argv_append_nosize(&argv, bppath);
}
free(exefile);
free(path);
free(bppath);
free(scp);
} else {
/* if we are not preloading the binaries, just setup
* the path to the bootproxy script
@ -294,23 +314,85 @@ int orte_plm_base_local_slave_launch(orte_job_t *jdata)
}
}
if (NULL != exefile) {
free(exefile);
}
if (NULL != path) {
free(path);
}
if (NULL != bppath) {
free(bppath);
}
/* release the scp command */
if (NULL != scp) {
free(scp);
}
/* done with bootproxy */
free(bootproxy);
/* do we need to pre-position supporting files? */
if (NULL != app->preload_files) {
/* the target location -must- be an absolute path */
if (NULL == app->preload_files_dest_dir ||
!opal_path_is_absolute(app->preload_files_dest_dir)) {
orte_show_help("help-plm-base.txt", "abs-path-reqd", true, "files", "target", "target",
(NULL == app->preload_files_dest_dir) ? "NULL" : app->preload_files_dest_dir);
return ORTE_ERROR;
}
if (local_op) {
scp = opal_find_absolute_path("cp");
if (NULL == scp) {
orte_show_help("help-plm-base.txt", "cp-not-found", true, "cp", "cp");
return ORTE_ERROR;
}
} else {
/* find the scp command */
scp = opal_find_absolute_path("scp");
if (NULL == scp) {
orte_show_help("help-plm-base.txt", "cp-not-found", true, "scp", "scp");
return ORTE_ERROR;
}
}
/* break apart the comma-separated list of files */
files = opal_argv_split(app->preload_files, ',');
/* setup the path to the destination */
path = opal_os_path(false, app->preload_files_dest_dir, NULL);
/* ensure the path exists */
if (ORTE_SUCCESS != (rc = opal_os_dirpath_create(path, S_IRWXU))) {
orte_show_help("help-plm-base.txt", "path-not-created", true, path);
return rc;
}
/* copy each file across */
for (i=0; i < opal_argv_count(files); i++) {
/* if the file is not given in absolute path form,
* then convert it to one
*/
if (!opal_path_is_absolute(files[i])) {
/* see if a source directory was given */
if (NULL!= app->preload_files_src_dir) {
/* look for the file there */
exefile = opal_path_access(files[i], app->preload_files_src_dir, R_OK);
} else {
/* look for it in the cwd */
getcwd(cwd, OMPI_PATH_MAX);
exefile = opal_path_access(files[i], cwd, R_OK);
}
} else {
exefile = opal_path_access(files[i], NULL, R_OK);
}
if (NULL == exefile) {
getcwd(cwd, OMPI_PATH_MAX);
orte_show_help("help-plm-base.txt", "file-not-found", true, files[i],
(NULL == app->preload_files_dest_dir) ? cwd : app->preload_files_dest_dir);
return ORTE_ERROR;
}
if (local_op) {
/* form and execute the cp command */
asprintf(&cmd, "%s %s %s/%s", scp, files[i], path, files[i]);
system(cmd);
free(cmd);
} else {
/* form and execute the scp commands */
asprintf(&cmd, "%s %s %s:%s/%s", scp, files[i], nodename, path, files[i]);
system(cmd);
free(cmd);
}
}
free(path);
opal_argv_free(files);
free(scp);
}
/* done with nodename */
free(nodename);
/* if there is a prefix, add it in a special way so the bootproxy
* can deal with it
*/

Просмотреть файл

@ -645,24 +645,43 @@ int orte_dt_pack_app_context(opal_buffer_t *buffer, const void *src,
} else {
have_preload_files_dest_dir = 0;
}
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer,
(void*)(&have_preload_files_dest_dir), 1, OPAL_INT8))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if( have_preload_files_dest_dir) {
if( NULL != app_context[i]->preload_files_dest_dir) {
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer,
(void*)(&(app_context[i]->preload_files_dest_dir)), 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer,
(void*)(&(app_context[i]->preload_files_dest_dir)), 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
/* Pack the preload_files_src_dir if we have one */
if (NULL != app_context[i]->preload_files_src_dir) {
have_preload_files_dest_dir = 1;
} else {
have_preload_files_dest_dir = 0;
}
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer,
(void*)(&have_preload_files_dest_dir), 1, OPAL_INT8))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if( have_preload_files_dest_dir) {
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer,
(void*)(&(app_context[i]->preload_files_src_dir)), 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
}
return ORTE_SUCCESS;
}

Просмотреть файл

@ -726,6 +726,23 @@ int orte_dt_unpack_app_context(opal_buffer_t *buffer, void *dest,
app_context[i]->preload_files_dest_dir = NULL;
}
/* Unpack the preload_files_src_dir set */
max_n=1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &have_preload_files_dest_dir,
&max_n, OPAL_INT8))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (have_preload_files_dest_dir) {
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &app_context[i]->preload_files_src_dir,
&max_n, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
} else {
app_context[i]->preload_files_src_dir = NULL;
}
}
return ORTE_SUCCESS;

Просмотреть файл

@ -474,6 +474,7 @@ static void orte_app_context_construct(orte_app_context_t* app_context)
app_context->preload_binary = false;
app_context->preload_files = NULL;
app_context->preload_files_dest_dir = NULL;
app_context->preload_files_src_dir = NULL;
app_context->used_on_node = false;
}
@ -521,6 +522,10 @@ static void orte_app_context_destructor(orte_app_context_t* app_context)
if(NULL != app_context->preload_files_dest_dir) {
free(app_context->preload_files_dest_dir);
}
if(NULL != app_context->preload_files_src_dir) {
free(app_context->preload_files_src_dir);
}
}
OBJ_CLASS_INSTANCE(orte_app_context_t,

Просмотреть файл

@ -176,7 +176,10 @@ typedef struct {
char * preload_files;
/** Destination directory for the preloaded files
* If NULL then the absolute and relative paths are obeyed */
char * preload_files_dest_dir;
char *preload_files_dest_dir;
/** Source directory for the preloaded files
* If NULL then the absolute and relative paths are obeyed */
char *preload_files_src_dir;
/* is being used on the local node */
bool used_on_node;
} orte_app_context_t;

Просмотреть файл

@ -12,9 +12,10 @@ int main(int argc, char* argv[])
int rank, size;
pid_t pid;
char *host, *app, *rdir, *prefix;
char cwd[256];
if (argc != 5) {
printf("Usage: slave_spawn host prefix-for-host abs-path-to-exe remote-tmp-dir\n");
if (argc < 5) {
printf("Usage: slave_spawn host prefix-for-host abs-path-to-exe remote-tmp-dir <files-to-move>\n");
return 1;
}
@ -36,6 +37,12 @@ int main(int argc, char* argv[])
MPI_Info_set(info, "ompi_preload_binary", "true");
MPI_Info_set(info, "ompi_preload_files_dest_dir", rdir);
if (argc == 6) {
/* files were specified */
MPI_Info_set(info, "ompi_preload_files", argv[5]);
MPI_Info_set(info, "ompi_preload_files_src_dir", getcwd(cwd, 256));
}
pid = getpid();
printf("Slave_spawn [pid %ld] about to spawn!\n", (long)pid);
if (MPI_SUCCESS != (rc = MPI_Comm_spawn(app, MPI_ARGV_NULL, 1, info,