Add ability for local slave spawns to pre-position supporting files. Update comm_spawn and comm_spawn_multiple man pages to cover new info_keys.
This commit was SVN r20527.
Этот коммит содержится в:
родитель
62e08e7212
Коммит
62dd763a8f
@ -670,6 +670,12 @@ static int spawn(int count, char **array_of_commands,
|
||||
app->preload_files_dest_dir = strdup(cwd);
|
||||
}
|
||||
|
||||
/* check for 'preload_files_src_dir' */
|
||||
ompi_info_get (array_of_info[i], "ompi_preload_files_src_dir", valuelen, cwd, &flag);
|
||||
if ( flag ) {
|
||||
app->preload_files_src_dir = strdup(cwd);
|
||||
}
|
||||
|
||||
/* see if this is a non-mpi job - if so, then set the flag so ORTE
|
||||
* knows what to do
|
||||
*/
|
||||
@ -694,15 +700,21 @@ static int spawn(int count, char **array_of_commands,
|
||||
}
|
||||
|
||||
/* default value: If the user did not tell us where to look for the
|
||||
executable, we assume the current working directory */
|
||||
* executable, we assume the current working directory, or the preload destination
|
||||
* if it was given
|
||||
*/
|
||||
if ( !have_wdir ) {
|
||||
if (OMPI_SUCCESS != (rc = opal_getcwd(cwd, OMPI_PATH_MAX))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(jdata);
|
||||
opal_progress_event_users_decrement();
|
||||
return rc;
|
||||
if (NULL != app->preload_files_dest_dir) {
|
||||
app->cwd = strdup(app->preload_files_dest_dir);
|
||||
} else {
|
||||
if (OMPI_SUCCESS != (rc = opal_getcwd(cwd, OMPI_PATH_MAX))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(jdata);
|
||||
opal_progress_event_users_decrement();
|
||||
return rc;
|
||||
}
|
||||
app->cwd = strdup(cwd);
|
||||
}
|
||||
app->cwd = strdup(cwd);
|
||||
}
|
||||
|
||||
/* leave the map info alone - the launcher will
|
||||
|
@ -98,25 +98,48 @@ For the SPAWN calls, \fIinfo\fP provides additional, implementation-dependent in
|
||||
The following keys for \fIinfo\fP are recognized in Open MPI. (The reserved values mentioned in Section 5.3.4 of the MPI-2 standard are not implemented.)
|
||||
.sp
|
||||
.nf
|
||||
Key Type Description
|
||||
--- ---- -----------
|
||||
Key Type Description
|
||||
--- ---- -----------
|
||||
|
||||
host char * Host on which the process should be spawned.
|
||||
See the \fIorte_host\fP man page for an
|
||||
explanation of how this will be used.
|
||||
hostfile char * Hostfile containing the hosts on which
|
||||
the processes are to be spawned. See
|
||||
the \fIorte_hostfile\fP man page for an
|
||||
explanation of how this will be used.
|
||||
wdir char * Directory where the executable is located.
|
||||
ompi_prefix char * Same as the --prefix command line argument
|
||||
to mpirun.
|
||||
ompi_non_mpi bool If set to true, launching a non-MPI
|
||||
application; the returned communicator
|
||||
will be MPI_COMM_NULL. Failure to set
|
||||
this flag when launching a non-MPI
|
||||
application will cause both the child
|
||||
and parent jobs to "hang".
|
||||
host char * Host on which the process should be spawned.
|
||||
See the \fIorte_host\fP man page for an
|
||||
explanation of how this will be used.
|
||||
hostfile char * Hostfile containing the hosts on which
|
||||
the processes are to be spawned. See
|
||||
the \fIorte_hostfile\fP man page for an
|
||||
explanation of how this will be used.
|
||||
wdir char * Directory where the executable is located. If
|
||||
files are to be pre-positioned, then this
|
||||
location is the desired working directory
|
||||
at time of execution - if not specified,
|
||||
then it will automatically be set to
|
||||
\fIompi_preload_files_dest_dir\fP.
|
||||
ompi_prefix char * Same as the --prefix command line argument
|
||||
to mpirun.
|
||||
ompi_local_slave bool If set to true, launch the specified process
|
||||
as a local \fIslave\fP to the calling process.
|
||||
The new process will only be known to the caller,
|
||||
and will only be able to communicate with the caller.
|
||||
ompi_preload_binary bool If set to true, pre-position the specified
|
||||
executable onto the remote host. A destination
|
||||
directory must also be provided.
|
||||
ompi_preload_files_dest_dir char * Target directory where pre-positioned files
|
||||
are to be placed.
|
||||
ompi_preload_files char * A comma-separated list of files that are to
|
||||
be pre-positioned in addition to the executable.
|
||||
Note that this option does not depend upon
|
||||
\fIompi_preload_binary\fP - files can be moved
|
||||
to the target even if an executable is not moved.
|
||||
ompi_preload_files_src_dir char * Source directory where files and executables
|
||||
that are to be pre-positioned can be found. If
|
||||
not specified, the current working directory
|
||||
will be used.
|
||||
ompi_non_mpi bool If set to true, launching a non-MPI
|
||||
application; the returned communicator
|
||||
will be MPI_COMM_NULL. Failure to set
|
||||
this flag when launching a non-MPI
|
||||
application will cause both the child
|
||||
and parent jobs to "hang".
|
||||
.fi
|
||||
|
||||
\fIbool\fP info keys are actually strings but are evaluated as
|
||||
|
@ -109,16 +109,48 @@ The following keys for \fIinfo\fP are recognized in "#PACKAGE_NAME#". (The reser
|
||||
.sp
|
||||
.sp
|
||||
.nf
|
||||
Key Type Description
|
||||
--- ---- -----------
|
||||
Key Type Description
|
||||
--- ---- -----------
|
||||
|
||||
host char * Host on which the process should be spawned.
|
||||
wdir char * Directory where the executable is located.
|
||||
ompi_prefix char * Same as the --prefix command line argument
|
||||
to mpirun.
|
||||
ompi_non_mpi bool If set to true, launching a non-MPI
|
||||
application; the returned communicator
|
||||
will be MPI_COMM_NULL.
|
||||
host char * Host on which the process should be spawned.
|
||||
See the \fIorte_host\fP man page for an
|
||||
explanation of how this will be used.
|
||||
hostfile char * Hostfile containing the hosts on which
|
||||
the processes are to be spawned. See
|
||||
the \fIorte_hostfile\fP man page for an
|
||||
explanation of how this will be used.
|
||||
wdir char * Directory where the executable is located. If
|
||||
files are to be pre-positioned, then this
|
||||
location is the desired working directory
|
||||
at time of execution - if not specified,
|
||||
then it will automatically be set to
|
||||
\fIompi_preload_files_dest_dir\fP.
|
||||
ompi_prefix char * Same as the --prefix command line argument
|
||||
to mpirun.
|
||||
ompi_local_slave bool If set to true, launch the specified process
|
||||
as a local \fIslave\fP to the calling process.
|
||||
The new process will only be known to the caller,
|
||||
and will only be able to communicate with the caller.
|
||||
ompi_preload_binary bool If set to true, pre-position the specified
|
||||
executable onto the remote host. A destination
|
||||
directory must also be provided.
|
||||
ompi_preload_files_dest_dir char * Target directory where pre-positioned files
|
||||
are to be placed.
|
||||
ompi_preload_files char * A comma-separated list of files that are to
|
||||
be pre-positioned in addition to the executable.
|
||||
Note that this option does not depend upon
|
||||
\fIompi_preload_binary\fP - files can be moved
|
||||
to the target even if an executable is not moved.
|
||||
ompi_preload_files_src_dir char * Source directory where files and executables
|
||||
that are to be pre-positioned can be found. If
|
||||
not specified, the current working directory
|
||||
will be used.
|
||||
ompi_non_mpi bool If set to true, launching a non-MPI
|
||||
application; the returned communicator
|
||||
will be MPI_COMM_NULL. Failure to set
|
||||
this flag when launching a non-MPI
|
||||
application will cause both the child
|
||||
and parent jobs to "hang".
|
||||
.fi
|
||||
|
||||
.sp
|
||||
|
@ -126,3 +126,16 @@ binaries be pre-positioned on the remote host. However, we could not
|
||||
find the %s command in your path.
|
||||
|
||||
Please check that %s is in your PATH and try again.
|
||||
|
||||
#
|
||||
[file-not-found]
|
||||
A call was made to launch a local slave process that requested pre-positioning
|
||||
of one or more files, but the specified files could not be found:
|
||||
|
||||
File: %s
|
||||
Source directory: %s
|
||||
|
||||
Please either specify an absolute path to the file, or check
|
||||
that the file is in your current working directory...and ensure
|
||||
that you have read permissions on the file.
|
||||
|
||||
|
@ -31,6 +31,7 @@
|
||||
|
||||
#include "opal/dss/dss.h"
|
||||
#include "opal/util/os_path.h"
|
||||
#include "opal/util/os_dirpath.h"
|
||||
#include "opal/util/path.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/basename.h"
|
||||
@ -148,6 +149,7 @@ int orte_plm_base_local_slave_launch(orte_job_t *jdata)
|
||||
char *exefile=NULL, *basename, *param, *path=NULL, *bppath=NULL;
|
||||
char *exec_path=NULL;
|
||||
char *tmp;
|
||||
char **files;
|
||||
bool flag;
|
||||
orte_app_context_t **apps, *app;
|
||||
int i;
|
||||
@ -156,6 +158,7 @@ int orte_plm_base_local_slave_launch(orte_job_t *jdata)
|
||||
long fd, fdmax = sysconf(_SC_OPEN_MAX);
|
||||
sigset_t sigs;
|
||||
bool local_op = false;
|
||||
char cwd[OMPI_PATH_MAX];
|
||||
|
||||
/* increment the local slave jobid */
|
||||
orte_plm_globals.local_slaves++;
|
||||
@ -201,6 +204,14 @@ int orte_plm_base_local_slave_launch(orte_job_t *jdata)
|
||||
* then convert it to one
|
||||
*/
|
||||
if (!opal_path_is_absolute(app->app)) {
|
||||
/* see if a source directory was given */
|
||||
if (NULL!= app->preload_files_src_dir) {
|
||||
/* prepend the src dir to the executable name */
|
||||
path = opal_os_path(false, app->preload_files_src_dir, app->app, NULL);
|
||||
free(app->app);
|
||||
app->app = path;
|
||||
}
|
||||
/* now check for absolute path */
|
||||
exefile = opal_find_absolute_path(app->app);
|
||||
if (NULL == exefile) {
|
||||
orte_show_help("help-plm-base.txt", "exec-not-found", true, app->app);
|
||||
@ -213,6 +224,11 @@ int orte_plm_base_local_slave_launch(orte_job_t *jdata)
|
||||
basename = opal_basename(exefile);
|
||||
path = opal_os_path(false, app->preload_files_dest_dir, basename, NULL);
|
||||
free(basename);
|
||||
/* ensure the path exists */
|
||||
if (ORTE_SUCCESS != (rc = opal_os_dirpath_create(app->preload_files_dest_dir, S_IRWXU))) {
|
||||
orte_show_help("help-plm-base.txt", "path-not-created", true, path);
|
||||
return rc;
|
||||
}
|
||||
/* we are going to use the "bootproxy" script to launch
|
||||
* this job - so move it over to the target host as well
|
||||
*/
|
||||
@ -259,6 +275,10 @@ int orte_plm_base_local_slave_launch(orte_job_t *jdata)
|
||||
/* add the bootproxy cmd */
|
||||
opal_argv_append_nosize(&argv, bppath);
|
||||
}
|
||||
free(exefile);
|
||||
free(path);
|
||||
free(bppath);
|
||||
free(scp);
|
||||
} else {
|
||||
/* if we are not preloading the binaries, just setup
|
||||
* the path to the bootproxy script
|
||||
@ -294,23 +314,85 @@ int orte_plm_base_local_slave_launch(orte_job_t *jdata)
|
||||
}
|
||||
}
|
||||
|
||||
if (NULL != exefile) {
|
||||
free(exefile);
|
||||
}
|
||||
if (NULL != path) {
|
||||
free(path);
|
||||
}
|
||||
if (NULL != bppath) {
|
||||
free(bppath);
|
||||
}
|
||||
/* release the scp command */
|
||||
if (NULL != scp) {
|
||||
free(scp);
|
||||
}
|
||||
|
||||
/* done with bootproxy */
|
||||
free(bootproxy);
|
||||
|
||||
/* do we need to pre-position supporting files? */
|
||||
if (NULL != app->preload_files) {
|
||||
/* the target location -must- be an absolute path */
|
||||
if (NULL == app->preload_files_dest_dir ||
|
||||
!opal_path_is_absolute(app->preload_files_dest_dir)) {
|
||||
orte_show_help("help-plm-base.txt", "abs-path-reqd", true, "files", "target", "target",
|
||||
(NULL == app->preload_files_dest_dir) ? "NULL" : app->preload_files_dest_dir);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
if (local_op) {
|
||||
scp = opal_find_absolute_path("cp");
|
||||
if (NULL == scp) {
|
||||
orte_show_help("help-plm-base.txt", "cp-not-found", true, "cp", "cp");
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
} else {
|
||||
/* find the scp command */
|
||||
scp = opal_find_absolute_path("scp");
|
||||
if (NULL == scp) {
|
||||
orte_show_help("help-plm-base.txt", "cp-not-found", true, "scp", "scp");
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
}
|
||||
/* break apart the comma-separated list of files */
|
||||
files = opal_argv_split(app->preload_files, ',');
|
||||
/* setup the path to the destination */
|
||||
path = opal_os_path(false, app->preload_files_dest_dir, NULL);
|
||||
/* ensure the path exists */
|
||||
if (ORTE_SUCCESS != (rc = opal_os_dirpath_create(path, S_IRWXU))) {
|
||||
orte_show_help("help-plm-base.txt", "path-not-created", true, path);
|
||||
return rc;
|
||||
}
|
||||
/* copy each file across */
|
||||
for (i=0; i < opal_argv_count(files); i++) {
|
||||
/* if the file is not given in absolute path form,
|
||||
* then convert it to one
|
||||
*/
|
||||
if (!opal_path_is_absolute(files[i])) {
|
||||
/* see if a source directory was given */
|
||||
if (NULL!= app->preload_files_src_dir) {
|
||||
/* look for the file there */
|
||||
exefile = opal_path_access(files[i], app->preload_files_src_dir, R_OK);
|
||||
} else {
|
||||
/* look for it in the cwd */
|
||||
getcwd(cwd, OMPI_PATH_MAX);
|
||||
exefile = opal_path_access(files[i], cwd, R_OK);
|
||||
}
|
||||
} else {
|
||||
exefile = opal_path_access(files[i], NULL, R_OK);
|
||||
}
|
||||
if (NULL == exefile) {
|
||||
getcwd(cwd, OMPI_PATH_MAX);
|
||||
orte_show_help("help-plm-base.txt", "file-not-found", true, files[i],
|
||||
(NULL == app->preload_files_dest_dir) ? cwd : app->preload_files_dest_dir);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
if (local_op) {
|
||||
/* form and execute the cp command */
|
||||
asprintf(&cmd, "%s %s %s/%s", scp, files[i], path, files[i]);
|
||||
system(cmd);
|
||||
free(cmd);
|
||||
} else {
|
||||
/* form and execute the scp commands */
|
||||
asprintf(&cmd, "%s %s %s:%s/%s", scp, files[i], nodename, path, files[i]);
|
||||
system(cmd);
|
||||
free(cmd);
|
||||
}
|
||||
}
|
||||
free(path);
|
||||
opal_argv_free(files);
|
||||
free(scp);
|
||||
}
|
||||
|
||||
/* done with nodename */
|
||||
free(nodename);
|
||||
|
||||
/* if there is a prefix, add it in a special way so the bootproxy
|
||||
* can deal with it
|
||||
*/
|
||||
|
@ -645,24 +645,43 @@ int orte_dt_pack_app_context(opal_buffer_t *buffer, const void *src,
|
||||
} else {
|
||||
have_preload_files_dest_dir = 0;
|
||||
}
|
||||
|
||||
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer,
|
||||
(void*)(&have_preload_files_dest_dir), 1, OPAL_INT8))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
if( have_preload_files_dest_dir) {
|
||||
if( NULL != app_context[i]->preload_files_dest_dir) {
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer,
|
||||
(void*)(&(app_context[i]->preload_files_dest_dir)), 1, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer,
|
||||
(void*)(&(app_context[i]->preload_files_dest_dir)), 1, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
/* Pack the preload_files_src_dir if we have one */
|
||||
if (NULL != app_context[i]->preload_files_src_dir) {
|
||||
have_preload_files_dest_dir = 1;
|
||||
} else {
|
||||
have_preload_files_dest_dir = 0;
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer,
|
||||
(void*)(&have_preload_files_dest_dir), 1, OPAL_INT8))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
if( have_preload_files_dest_dir) {
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer,
|
||||
(void*)(&(app_context[i]->preload_files_src_dir)), 1, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -726,6 +726,23 @@ int orte_dt_unpack_app_context(opal_buffer_t *buffer, void *dest,
|
||||
app_context[i]->preload_files_dest_dir = NULL;
|
||||
}
|
||||
|
||||
/* Unpack the preload_files_src_dir set */
|
||||
max_n=1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &have_preload_files_dest_dir,
|
||||
&max_n, OPAL_INT8))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
if (have_preload_files_dest_dir) {
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, &app_context[i]->preload_files_src_dir,
|
||||
&max_n, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
} else {
|
||||
app_context[i]->preload_files_src_dir = NULL;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
|
@ -474,6 +474,7 @@ static void orte_app_context_construct(orte_app_context_t* app_context)
|
||||
app_context->preload_binary = false;
|
||||
app_context->preload_files = NULL;
|
||||
app_context->preload_files_dest_dir = NULL;
|
||||
app_context->preload_files_src_dir = NULL;
|
||||
app_context->used_on_node = false;
|
||||
}
|
||||
|
||||
@ -521,6 +522,10 @@ static void orte_app_context_destructor(orte_app_context_t* app_context)
|
||||
if(NULL != app_context->preload_files_dest_dir) {
|
||||
free(app_context->preload_files_dest_dir);
|
||||
}
|
||||
|
||||
if(NULL != app_context->preload_files_src_dir) {
|
||||
free(app_context->preload_files_src_dir);
|
||||
}
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(orte_app_context_t,
|
||||
|
@ -176,7 +176,10 @@ typedef struct {
|
||||
char * preload_files;
|
||||
/** Destination directory for the preloaded files
|
||||
* If NULL then the absolute and relative paths are obeyed */
|
||||
char * preload_files_dest_dir;
|
||||
char *preload_files_dest_dir;
|
||||
/** Source directory for the preloaded files
|
||||
* If NULL then the absolute and relative paths are obeyed */
|
||||
char *preload_files_src_dir;
|
||||
/* is being used on the local node */
|
||||
bool used_on_node;
|
||||
} orte_app_context_t;
|
||||
|
@ -12,9 +12,10 @@ int main(int argc, char* argv[])
|
||||
int rank, size;
|
||||
pid_t pid;
|
||||
char *host, *app, *rdir, *prefix;
|
||||
char cwd[256];
|
||||
|
||||
if (argc != 5) {
|
||||
printf("Usage: slave_spawn host prefix-for-host abs-path-to-exe remote-tmp-dir\n");
|
||||
if (argc < 5) {
|
||||
printf("Usage: slave_spawn host prefix-for-host abs-path-to-exe remote-tmp-dir <files-to-move>\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -36,6 +37,12 @@ int main(int argc, char* argv[])
|
||||
MPI_Info_set(info, "ompi_preload_binary", "true");
|
||||
MPI_Info_set(info, "ompi_preload_files_dest_dir", rdir);
|
||||
|
||||
if (argc == 6) {
|
||||
/* files were specified */
|
||||
MPI_Info_set(info, "ompi_preload_files", argv[5]);
|
||||
MPI_Info_set(info, "ompi_preload_files_src_dir", getcwd(cwd, 256));
|
||||
}
|
||||
|
||||
pid = getpid();
|
||||
printf("Slave_spawn [pid %ld] about to spawn!\n", (long)pid);
|
||||
if (MPI_SUCCESS != (rc = MPI_Comm_spawn(app, MPI_ARGV_NULL, 1, info,
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user