1
1

As requested by Aurelien at the July design meeting - long time coming, but finally got around to it.

Enable one mpirun to act as the server for another mpirun when doing MPI_Publish_name and its associated operations. The user is responsible, of course, for ensuring that the mpirun acting as a server outlives any mpiruns using it in that capacity.

Add a cmd line option to mpirun --report-pid that prints out mpirun's pid. Allow the --ompi-server option to now take pid:# (or PID:#) of the mpirun to be used as the server, and then look that pid up by searching the local mpirun contact infos for it.

This commit was SVN r20102.
Этот коммит содержится в:
Ralph Castain 2008-12-10 17:10:39 +00:00
родитель df75abd6b2
Коммит 7e3ddb09d3
10 изменённых файлов: 182 добавлений и 49 удалений

Просмотреть файл

@ -40,7 +40,7 @@ int main(int argc, char* argv[])
/*
* Get the directory listing
*/
if (ORTE_SUCCESS != (rc = orte_list_local_hnps(&hnp_list) ) ) {
if (ORTE_SUCCESS != (rc = orte_list_local_hnps(&hnp_list, true) ) ) {
fprintf(stderr, "orte_tool: couldn't get list of HNP's on this system - error %s\n",
ORTE_ERROR_NAME(rc));
goto cleanup;

Просмотреть файл

@ -389,9 +389,11 @@ static int find_hnp(void) {
opal_list_item_t *item;
orte_hnp_contact_t *hnpcandidate;
/* get the list of local hnp's available to us */
/* get the list of local hnp's available to us and setup
* contact info for them into the RML
*/
OBJ_CONSTRUCT(&hnp_list, opal_list_t);
if (ORTE_SUCCESS != (ret = orte_list_local_hnps(&hnp_list) ) ) {
if (ORTE_SUCCESS != (ret = orte_list_local_hnps(&hnp_list, true) ) ) {
ORTE_ERROR_LOG(ret);
exit_status = ret;
goto cleanup;

Просмотреть файл

@ -233,10 +233,11 @@ main(int argc, char *argv[])
opal_signal_add(&int_handler, NULL);
/*
* Get the list of available hnp's
* Get the list of available hnp's and setup contact info
* to them in the RML
*/
OBJ_CONSTRUCT(&hnp_list, opal_list_t);
if (ORTE_SUCCESS != (ret = orte_list_local_hnps(&hnp_list) ) ) {
if (ORTE_SUCCESS != (ret = orte_list_local_hnps(&hnp_list, true) ) ) {
goto cleanup;
}

Просмотреть файл

@ -231,9 +231,9 @@ main(int argc, char *argv[])
* Get the directory listing
*/
opal_output_verbose(10, orte_ps_globals.output,
"orte_ps: Acquiring list of HNPs...\n");
"orte_ps: Acquiring list of HNPs and setting contact info into RML...\n");
if (ORTE_SUCCESS != (ret = orte_list_local_hnps(&hnp_list) ) ) {
if (ORTE_SUCCESS != (ret = orte_list_local_hnps(&hnp_list, true) ) ) {
exit_status = ret;
goto cleanup;
}

Просмотреть файл

@ -391,3 +391,33 @@ Error received: %s
Please check to ensure that the requested server matches the actual server
information, and that the server is in operation.
#
[orterun:ompi-server-pid-bad]
%s was unable to parse the PID of the %s to be used as the ompi-server.
The option we were given was:
--ompi-server %s
Please remember that the correct format for this command line option is:
--ompi-server PID:pid-of-%s
where PID can be either "PID" or "pid".
#
[orterun:ompi-server-could-not-get-hnp-list]
%s was unable to search the list of local %s contact files to find the specified pid.
You might check to see if your local session directory is available and
that you have read permissions on the top of that directory tree.
#
[orterun:ompi-server-pid-not-found]
%s was unable to find an %s with the specified pid of %d that was to be used as the ompi-server.
The option we were given was:
--ompi-server %s
Please remember that the correct format for this command line option is:
--ompi-server PID:pid-of-%s
where PID can be either "PID" or "pid".

Просмотреть файл

@ -205,7 +205,7 @@ Synonym for \fI-np\fP.
.TP
.B -nolocal\fR,\fP --nolocal
Do not run any copies of the launched application on the same node as
orterun is running. This option will override listing the localhost
mpirun is running. This option will override listing the localhost
with \fB--host\fR or any other host-specifying mechanism.
.
.
@ -242,22 +242,24 @@ Launch one process on each node of the allocation - equivalent to npernode 1.
.
.TP
.B -ompi-server\fR,\fP --ompi-server <uri or file>
Specify the URI of the Open MPI server, or the name of the file (specified as file:filename) that
contains that info. The Open MPI server is used to support multi-application data exchange via
Specify the URI of the Open MPI server (or the mpirun to be used as the server), the name
of the file (specified as file:filename) that
contains that info, or the PID (specified as pid:#) of the mpirun to be used as the server.
The Open MPI server is used to support multi-application data exchange via
the MPI-2 MPI_Publish_name and MPI_Lookup_name functions.
.
.
.TP
.B -wait-for-server\fR,\fP --wait-for-server
Pause mpirun before launching the job until ompi-server is detected. This
is useful in scripts where ompi-server may be started in the background, followed immediately by
Pause mpirun before launching the job until the Open MPI server is detected. This
is useful in scripts where the server may be started in the background, followed immediately by
an \fImpirun\fP command that wishes to connect to it. Mpirun will pause until either the specified
ompi-server is contacted or the server-wait-time is exceeded.
server is contacted or the server-wait-time is exceeded.
.
.
.TP
.B -server-wait-time\fR,\fP --server-wait-time <secs>
The max amount of time (in seconds) mpirun should wait for the ompi-server to start. The default
The max amount of time (in seconds) mpirun should wait for the specified server to start. The default
is 10 seconds.
.
.
@ -295,7 +297,17 @@ directory. By default, the absolute and relative paths provided by --preload-fil
.
.TP
.B -q\fR,\fP --quiet
Suppress informative messages from orterun during application execution.
Suppress informative messages from mpirun during application execution.
.
.
.TP
.B -report-uri\fR,\fP --report-uri
Print out mpirun's URI during startup.
.
.
.TP
.B -report-pid\fR,\fP --report-pid
Print out mpirun's PID during startup.
.
.
.TP
@ -339,7 +351,7 @@ Be verbose
.TP
.B -V\fR,\fP --version
Print version number. If no other arguments are given, this will also
cause orterun to exit.
cause mpirun to exit.
.
.
.TP
@ -703,14 +715,14 @@ and distance among them. See the following example of the cartofile:
.
.SS No Local Launch
.
Using the \fB--nolocal\fR option to orterun tells the system to not
launch any of the application processes on the same node that orterun
is running. While orterun typically blocks and consumes few system
Using the \fB--nolocal\fR option to mpirun tells the system to not
launch any of the application processes on the same node that mpirun
is running. While mpirun typically blocks and consumes few system
resources, this option can be helpful for launching very large jobs
where orterun may actually need to use noticable amounts of memory
where mpirun may actually need to use noticable amounts of memory
and/or processing time. \fB--nolocal\fR allows orteun to run without
sharing the local node with the launched applications, and likewise
allows the launched applications to run unhindered by orterun's system
allows the launched applications to run unhindered by mpirun's system
usage.
.PP
Note that \fB--nolocal\fR will override any other specification to
@ -721,7 +733,7 @@ application.
.
.TP
shell$ mpirun -np 1 --host localhost --nolocal hostname
This example will result in an error because orterun will not find
This example will result in an error because mpirun will not find
anywhere to launch the application.
.
.
@ -821,14 +833,14 @@ be collected into the \fImy_output\fP file.
.
.SS Signal Propagation
.
When orterun receives a SIGTERM and SIGINT, it will attempt to kill
When mpirun receives a SIGTERM and SIGINT, it will attempt to kill
the entire job by sending all processes in the job a SIGTERM, waiting
a small number of seconds, then sending all processes in the job a
SIGKILL.
.
SIGUSR1 and SIGUSR2 signals received by orterun are propagated to
SIGUSR1 and SIGUSR2 signals received by mpirun are propagated to
all processes in the job. Other signals are not currently propagated
by orterun.
by mpirun.
.
.
.SS Process Termination / Signal Handling

Просмотреть файл

@ -71,6 +71,7 @@
#include "orte/util/pre_condition_transports.h"
#include "orte/util/session_dir.h"
#include "orte/util/name_fns.h"
#include "orte/util/hnp_contact.h"
#include "orte/mca/odls/odls.h"
#include "orte/mca/plm/plm.h"
@ -139,7 +140,10 @@ static opal_cmd_line_init_t cmd_line_init[] = {
{ NULL, NULL, NULL, '\0', "report-pid", "report-pid", 0,
&orterun_globals.report_pid, OPAL_CMD_LINE_TYPE_BOOL,
"Printout pid" },
{ NULL, NULL, NULL, '\0', "report-uri", "report-uri", 0,
&orterun_globals.report_uri, OPAL_CMD_LINE_TYPE_BOOL,
"Printout URI" },
/* hetero apps */
{ "orte", "hetero", "apps", '\0', NULL, "hetero", 0,
NULL, OPAL_CMD_LINE_TYPE_BOOL,
@ -490,6 +494,16 @@ int orterun(int argc, char *argv[])
return rc;
}
/* check for request to report uri */
if (orterun_globals.report_uri) {
char *uri;
uri = orte_rml.get_contact_info();
printf("%s uri: %s\n", orterun_basename, (NULL == uri) ? "NULL" : uri);
if (NULL != uri) {
free(uri);
}
}
/* Change the default behavior of libevent such that we want to
continually block rather than blocking for the default timeout
and then looping around the progress engine again. There
@ -1167,6 +1181,7 @@ static int init_globals(void)
orterun_globals.verbose = false;
orterun_globals.quiet = false;
orterun_globals.report_pid = false;
orterun_globals.report_uri = false;
orterun_globals.by_node = false;
orterun_globals.by_slot = false;
orterun_globals.debugger = false;
@ -1333,6 +1348,76 @@ static int parse_locals(int argc, char* argv[])
fclose(fp);
input[strlen(input)-1] = '\0'; /* remove newline */
ompi_server = strdup(input);
} else if (0 == strncmp(orterun_globals.ompi_server, "pid", strlen("pid")) ||
0 == strncmp(orterun_globals.ompi_server, "PID", strlen("PID"))) {
opal_list_t hnp_list;
opal_list_item_t *item;
orte_hnp_contact_t *hnp;
char *ptr;
pid_t pid;
ptr = strchr(orterun_globals.ompi_server, ':');
if (NULL == ptr) {
/* pid is not correctly formatted */
orte_show_help("help-orterun.txt", "orterun:ompi-server-pid-bad", true,
orterun_basename, orterun_basename,
orterun_globals.ompi_server, orterun_basename);
exit(1);
}
++ptr; /* space past the : */
if (0 >= strlen(ptr)) {
/* they forgot to give us the pid! */
orte_show_help("help-orterun.txt", "orterun:ompi-server-pid-bad", true,
orterun_basename, orterun_basename,
orterun_globals.ompi_server, orterun_basename);
exit(1);
}
pid = strtoul(ptr, NULL, 10);
/* to search the local mpirun's, we have to partially initialize the
* orte_process_info structure. This won't fully be setup until orte_init,
* but we finagle a little bit of it here
*/
if (ORTE_SUCCESS != (rc = orte_session_dir_get_name(NULL, &orte_process_info.tmpdir_base,
&orte_process_info.top_session_dir,
NULL, NULL, NULL))) {
orte_show_help("help-orterun.txt", "orterun:ompi-server-could-not-get-hnp-list", true,
orterun_basename, orterun_basename);
exit(1);
}
OBJ_CONSTRUCT(&hnp_list, opal_list_t);
/* get the list of HNPs, but do -not- setup contact info to them in the RML */
if (ORTE_SUCCESS != (rc = orte_list_local_hnps(&hnp_list, false))) {
orte_show_help("help-orterun.txt", "orterun:ompi-server-could-not-get-hnp-list", true,
orterun_basename, orterun_basename);
exit(1);
}
/* search the list for the desired pid */
while (NULL != (item = opal_list_remove_first(&hnp_list))) {
hnp = (orte_hnp_contact_t*)item;
if (pid == hnp->pid) {
ompi_server = strdup(hnp->rml_uri);
goto hnp_found;
}
OBJ_RELEASE(item);
}
/* if we got here, it wasn't found */
orte_show_help("help-orterun.txt", "orterun:ompi-server-pid-not-found", true,
orterun_basename, orterun_basename, pid, orterun_globals.ompi_server,
orterun_basename);
OBJ_DESTRUCT(&hnp_list);
exit(1);
hnp_found:
/* cleanup rest of list */
while (NULL != (item = opal_list_remove_first(&hnp_list))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&hnp_list);
} else {
ompi_server = strdup(orterun_globals.ompi_server);
}

Просмотреть файл

@ -43,6 +43,7 @@ struct orterun_globals_t {
bool verbose;
bool quiet;
bool report_pid;
bool report_uri;
bool exit;
bool by_node;
bool by_slot;

Просмотреть файл

@ -94,7 +94,7 @@ int orte_write_hnp_contact_file(char *filename)
return ORTE_SUCCESS;
}
int orte_read_hnp_contact_file(char *filename, orte_hnp_contact_t *hnp)
int orte_read_hnp_contact_file(char *filename, orte_hnp_contact_t *hnp, bool connect)
{
char *hnp_uri, *pidstr;
FILE *fp;
@ -125,25 +125,27 @@ int orte_read_hnp_contact_file(char *filename, orte_hnp_contact_t *hnp)
hnp->pid = (pid_t)atol(pidstr);
fclose(fp);
/* set the contact info into the comm hash tables*/
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(hnp_uri))) {
ORTE_ERROR_LOG(rc);
return(rc);
if (connect) {
/* set the contact info into the comm hash tables*/
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(hnp_uri))) {
ORTE_ERROR_LOG(rc);
return(rc);
}
/* extract the HNP's name and store it */
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(hnp_uri, &hnp->name, NULL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* set the route to be direct */
if (ORTE_SUCCESS != (rc = orte_routed.update_route(&hnp->name, &hnp->name))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
/* extract the HNP's name and store it */
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(hnp_uri, &hnp->name, NULL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* set the route to be direct */
if (ORTE_SUCCESS != (rc = orte_routed.update_route(&hnp->name, &hnp->name))) {
ORTE_ERROR_LOG(rc);
return rc;
}
hnp->rml_uri = hnp_uri;
return ORTE_SUCCESS;
}
@ -163,7 +165,7 @@ static char *orte_getline(FILE *fp)
}
int orte_list_local_hnps(opal_list_t *hnps)
int orte_list_local_hnps(opal_list_t *hnps, bool connect)
{
int ret;
#ifndef __WINDOWS__
@ -220,7 +222,7 @@ int orte_list_local_hnps(opal_list_t *hnps)
dir_entry->d_name, "contact.txt", NULL );
hnp = OBJ_NEW(orte_hnp_contact_t);
if (ORTE_SUCCESS == (ret = orte_read_hnp_contact_file(contact_filename, hnp))) {
if (ORTE_SUCCESS == (ret = orte_read_hnp_contact_file(contact_filename, hnp, connect))) {
opal_list_append(hnps, &(hnp->super));
} else {
OBJ_RELEASE(hnp);
@ -258,7 +260,7 @@ int orte_list_local_hnps(opal_list_t *hnps)
dir_entry->d_name, "contact.txt", NULL );
hnp = OBJ_NEW(orte_hnp_contact_t);
if (ORTE_SUCCESS == (ret = orte_read_hnp_contact_file(contact_filename, hnp))) {
if (ORTE_SUCCESS == (ret = orte_read_hnp_contact_file(contact_filename, hnp, connect))) {
opal_list_append(hnps, &(hnp->super));
} else {
OBJ_RELEASE(hnp);

Просмотреть файл

@ -53,9 +53,9 @@ ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_hnp_contact_t);
ORTE_DECLSPEC int orte_write_hnp_contact_file(char *filename);
ORTE_DECLSPEC int orte_read_hnp_contact_file(char *filename, orte_hnp_contact_t *hnp);
ORTE_DECLSPEC int orte_read_hnp_contact_file(char *filename, orte_hnp_contact_t *hnp, bool connect);
ORTE_DECLSPEC int orte_list_local_hnps(opal_list_t *hnps);
ORTE_DECLSPEC int orte_list_local_hnps(opal_list_t *hnps, bool connect);
END_C_DECLS
#endif