1
1

Continuing the cleanup process. Few minor fixes here and there - mostly just NULLing pointers that were free'd. Console now can connect to any universe, regardless of scope.

This commit was SVN r2734.
Этот коммит содержится в:
Ralph Castain 2004-09-17 00:59:14 +00:00
родитель 8699fa86b2
Коммит 0d4e6482cd
9 изменённых файлов: 332 добавлений и 70 удалений

Просмотреть файл

@ -107,6 +107,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
*/ */
universe = strdup(ompi_universe_info.name); universe = strdup(ompi_universe_info.name);
free(ompi_universe_info.name); free(ompi_universe_info.name);
ompi_universe_info.name = NULL;
pid = getpid(); pid = getpid();
if (0 > asprintf(&ompi_universe_info.name, "%s-%d", universe, pid) && ompi_rte_debug_flag) { if (0 > asprintf(&ompi_universe_info.name, "%s-%d", universe, pid) && ompi_rte_debug_flag) {
ompi_output(0, "mpi_init: error creating unique universe name"); ompi_output(0, "mpi_init: error creating unique universe name");
@ -117,15 +118,19 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
ompi_process_info.seed = true; ompi_process_info.seed = true;
if (NULL != ompi_universe_info.ns_replica) { if (NULL != ompi_universe_info.ns_replica) {
free(ompi_universe_info.ns_replica); free(ompi_universe_info.ns_replica);
ompi_universe_info.ns_replica = NULL;
} }
if (NULL != ompi_process_info.ns_replica) { if (NULL != ompi_process_info.ns_replica) {
free(ompi_process_info.ns_replica); free(ompi_process_info.ns_replica);
ompi_process_info.ns_replica = NULL;
} }
if (NULL != ompi_universe_info.gpr_replica) { if (NULL != ompi_universe_info.gpr_replica) {
free(ompi_universe_info.gpr_replica); free(ompi_universe_info.gpr_replica);
ompi_universe_info.gpr_replica = NULL;
} }
if (NULL != ompi_process_info.gpr_replica) { if (NULL != ompi_process_info.gpr_replica) {
free(ompi_process_info.gpr_replica); free(ompi_process_info.gpr_replica);
ompi_process_info.gpr_replica = NULL;
} }
} }
@ -139,6 +144,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
/***** SET MY NAME *****/ /***** SET MY NAME *****/
if (NULL != ompi_process_info.name) { /* should NOT have been previously set */ if (NULL != ompi_process_info.name) { /* should NOT have been previously set */
free(ompi_process_info.name); free(ompi_process_info.name);
ompi_process_info.name = NULL;
} }
if (NULL != ompi_rte_get_self()) { /* name set in environment - nonsingleton - record name */ if (NULL != ompi_rte_get_self()) { /* name set in environment - nonsingleton - record name */

Просмотреть файл

@ -15,6 +15,7 @@
#include <string.h> #include <string.h>
#include "mca/oob/base/base.h" #include "mca/oob/base/base.h"
#include "mca/ns/base/base.h"
#include "util/output.h" #include "util/output.h"
#include "util/cmd_line.h" #include "util/cmd_line.h"
@ -30,9 +31,6 @@ void ompi_rte_parse_cmd_line(ompi_cmd_line_t *cmd_line)
/* get universe name and store it, if user specified it */ /* get universe name and store it, if user specified it */
/* otherwise, stick with default name */ /* otherwise, stick with default name */
if (NULL != ompi_universe_info.name) {
universe = strdup(ompi_universe_info.name); /* save the current value, if exists */
}
if (ompi_cmd_line_is_taken(cmd_line, "universe") || if (ompi_cmd_line_is_taken(cmd_line, "universe") ||
ompi_cmd_line_is_taken(cmd_line, "u")) { ompi_cmd_line_is_taken(cmd_line, "u")) {
@ -52,23 +50,44 @@ void ompi_rte_parse_cmd_line(ompi_cmd_line_t *cmd_line)
if (NULL != (tmp = strchr(universe, '@'))) { /* remote name includes remote uid */ if (NULL != (tmp = strchr(universe, '@'))) { /* remote name includes remote uid */
*tmp = '\0'; *tmp = '\0';
tmp++; tmp++;
if (NULL != ompi_universe_info.host) { /* overwrite it */
free(ompi_universe_info.host);
ompi_universe_info.host = NULL;
}
ompi_universe_info.host = strdup(tmp); ompi_universe_info.host = strdup(tmp);
if (NULL != ompi_universe_info.uid) {
free(ompi_universe_info.uid);
ompi_universe_info.uid = NULL;
}
ompi_universe_info.uid = strdup(universe); ompi_universe_info.uid = strdup(universe);
} else { /* no remote id - just remote host */ } else { /* no remote id - just remote host */
if (NULL != ompi_universe_info.host) {
free(ompi_universe_info.host);
ompi_universe_info.host = NULL;
}
ompi_universe_info.host = strdup(universe); ompi_universe_info.host = strdup(universe);
} }
} else { /* no remote host - just universe name provided */ } else { /* no remote host - just universe name provided */
if (NULL != ompi_universe_info.name) {
free(ompi_universe_info.name);
ompi_universe_info.name = NULL;
}
ompi_universe_info.name = strdup(universe); ompi_universe_info.name = strdup(universe);
} }
} }
/* copy the universe name into the process_info structure */ /* copy the universe name into the process_info structure */
if (NULL != ompi_universe_info.name) { if (NULL != ompi_universe_info.name) {
if (NULL != ompi_process_info.my_universe) {
free(ompi_process_info.my_universe);
ompi_process_info.my_universe = NULL;
}
ompi_process_info.my_universe = strdup(ompi_universe_info.name); ompi_process_info.my_universe = strdup(ompi_universe_info.name);
} else { /* set it to default value */ } else { /* set it to default value */
ompi_universe_info.name = strdup("default-universe"); ompi_universe_info.name = strdup("default-universe");
if (NULL != ompi_process_info.my_universe) { /* overwrite it */ if (NULL != ompi_process_info.my_universe) { /* overwrite it */
free(ompi_process_info.my_universe); free(ompi_process_info.my_universe);
ompi_process_info.my_universe = NULL;
} }
ompi_process_info.my_universe = strdup(ompi_universe_info.name); ompi_process_info.my_universe = strdup(ompi_universe_info.name);
} }
@ -84,6 +103,7 @@ void ompi_rte_parse_cmd_line(ompi_cmd_line_t *cmd_line)
} }
if (NULL != ompi_process_info.tmpdir_base) { /* overwrite it */ if (NULL != ompi_process_info.tmpdir_base) { /* overwrite it */
free(ompi_process_info.tmpdir_base); free(ompi_process_info.tmpdir_base);
ompi_process_info.tmpdir_base = NULL;
} }
ompi_process_info.tmpdir_base = strdup(ompi_cmd_line_get_param(cmd_line, "tmpdir", 0, 0)); ompi_process_info.tmpdir_base = strdup(ompi_cmd_line_get_param(cmd_line, "tmpdir", 0, 0));
setenv("OMPI_tmpdir_base", ompi_process_info.tmpdir_base, 1); setenv("OMPI_tmpdir_base", ompi_process_info.tmpdir_base, 1);
@ -96,6 +116,16 @@ void ompi_rte_parse_cmd_line(ompi_cmd_line_t *cmd_line)
return; return;
} }
nsreplica = strdup(ompi_cmd_line_get_param(cmd_line, "nsreplica", 0, 0)); nsreplica = strdup(ompi_cmd_line_get_param(cmd_line, "nsreplica", 0, 0));
if (NULL != ompi_universe_info.ns_replica) {
free(ompi_universe_info.ns_replica);
ompi_universe_info.ns_replica = NULL;
}
ompi_universe_info.ns_replica = strdup(nsreplica);
if (NULL == ompi_process_info.ns_replica) {
ompi_process_info.ns_replica = ns_base_create_process_name(0,0,0);
}
mca_oob_parse_contact_info(ompi_universe_info.ns_replica,
ompi_process_info.ns_replica, NULL);
setenv("OMPI_MCA_ns_base_replica", nsreplica, 1); /* set the ns_replica enviro variable */ setenv("OMPI_MCA_ns_base_replica", nsreplica, 1); /* set the ns_replica enviro variable */
} /* otherwise, leave it alone */ } /* otherwise, leave it alone */
@ -106,6 +136,16 @@ void ompi_rte_parse_cmd_line(ompi_cmd_line_t *cmd_line)
return; return;
} }
gprreplica = strdup(ompi_cmd_line_get_param(cmd_line, "gprreplica", 0, 0)); gprreplica = strdup(ompi_cmd_line_get_param(cmd_line, "gprreplica", 0, 0));
if (NULL != ompi_universe_info.gpr_replica) {
free(ompi_universe_info.gpr_replica);
ompi_universe_info.gpr_replica = NULL;
}
ompi_universe_info.gpr_replica = strdup(nsreplica);
if (NULL == ompi_process_info.gpr_replica) {
ompi_process_info.gpr_replica = ns_base_create_process_name(0,0,0);
}
mca_oob_parse_contact_info(ompi_universe_info.gpr_replica,
ompi_process_info.gpr_replica, NULL);
setenv("OMPI_MCA_gpr_base_replica", gprreplica, 1); /* set the gpr_replica enviro variable */ setenv("OMPI_MCA_gpr_base_replica", gprreplica, 1); /* set the gpr_replica enviro variable */
} /* otherwise leave it alone */ } /* otherwise leave it alone */
} }

Просмотреть файл

@ -11,6 +11,8 @@
#include <string.h> #include <string.h>
#include "mca/ns/base/base.h"
#include "util/output.h" #include "util/output.h"
#include "util/cmd_line.h" #include "util/cmd_line.h"
#include "util/sys_info.h" #include "util/sys_info.h"
@ -34,6 +36,10 @@ void ompi_rte_parse_daemon_cmd_line(ompi_cmd_line_t *cmd_line)
fprintf(stderr, "error retrieving seed contact info - please report error to bugs@open-mpi.org\n"); fprintf(stderr, "error retrieving seed contact info - please report error to bugs@open-mpi.org\n");
exit(1); exit(1);
} }
if (NULL != ompi_universe_info.seed_contact_info) { /* overwrite it */
free(ompi_universe_info.seed_contact_info);
ompi_universe_info.seed_contact_info = NULL;
}
ompi_universe_info.seed_contact_info = strdup(ompi_cmd_line_get_param(cmd_line, "seedcontact", 0, 0)); ompi_universe_info.seed_contact_info = strdup(ompi_cmd_line_get_param(cmd_line, "seedcontact", 0, 0));
setenv("OMPI_universe_contact", ompi_universe_info.seed_contact_info, 1); setenv("OMPI_universe_contact", ompi_universe_info.seed_contact_info, 1);
} }
@ -51,6 +57,10 @@ void ompi_rte_parse_daemon_cmd_line(ompi_cmd_line_t *cmd_line)
fprintf(stderr, "error retrieving universe scope - please report error to bugs@open-mpi.org\n"); fprintf(stderr, "error retrieving universe scope - please report error to bugs@open-mpi.org\n");
exit(1); exit(1);
} }
if (NULL != ompi_universe_info.scope) {
free(ompi_universe_info.scope);
ompi_universe_info.scope = NULL;
}
ompi_universe_info.scope = strdup(ompi_cmd_line_get_param(cmd_line, "scope", 0, 0)); ompi_universe_info.scope = strdup(ompi_cmd_line_get_param(cmd_line, "scope", 0, 0));
setenv("OMPI_universe_scope", ompi_universe_info.scope, 1); setenv("OMPI_universe_scope", ompi_universe_info.scope, 1);
} }
@ -74,6 +84,10 @@ void ompi_rte_parse_daemon_cmd_line(ompi_cmd_line_t *cmd_line)
fprintf(stderr, "error retrieving script file name - please report error to bugs@open-mpi.org\n"); fprintf(stderr, "error retrieving script file name - please report error to bugs@open-mpi.org\n");
exit(1); exit(1);
} }
if (NULL != ompi_universe_info.scriptfile) {
free(ompi_universe_info.scriptfile);
ompi_universe_info.scriptfile = NULL;
}
ompi_universe_info.scriptfile = strdup(ompi_cmd_line_get_param(cmd_line, "script", 0, 0)); ompi_universe_info.scriptfile = strdup(ompi_cmd_line_get_param(cmd_line, "script", 0, 0));
setenv("OMPI_universe_script", ompi_universe_info.scriptfile, 1); setenv("OMPI_universe_script", ompi_universe_info.scriptfile, 1);
} }
@ -84,6 +98,10 @@ void ompi_rte_parse_daemon_cmd_line(ompi_cmd_line_t *cmd_line)
fprintf(stderr, "error retrieving host file name - please report error to bugs@open-mpi.org\n"); fprintf(stderr, "error retrieving host file name - please report error to bugs@open-mpi.org\n");
exit(1); exit(1);
} }
if (NULL != ompi_universe_info.hostfile) {
free(ompi_universe_info.hostfile);
ompi_universe_info.hostfile = NULL;
}
ompi_universe_info.hostfile = strdup(ompi_cmd_line_get_param(cmd_line, "hostfile", 0, 0)); ompi_universe_info.hostfile = strdup(ompi_cmd_line_get_param(cmd_line, "hostfile", 0, 0));
setenv("OMPI_universe_hostfile", ompi_universe_info.hostfile, 1); setenv("OMPI_universe_hostfile", ompi_universe_info.hostfile, 1);
} }

Просмотреть файл

@ -47,12 +47,14 @@ void ompi_rte_parse_environ(void)
if (NULL != enviro_val) { /* contact info passed */ if (NULL != enviro_val) { /* contact info passed */
if (NULL != ompi_universe_info.seed_contact_info) { /* overwrite */ if (NULL != ompi_universe_info.seed_contact_info) { /* overwrite */
free(ompi_universe_info.seed_contact_info); free(ompi_universe_info.seed_contact_info);
ompi_universe_info.seed_contact_info = NULL;
} }
ompi_universe_info.seed_contact_info = strdup(enviro_val); ompi_universe_info.seed_contact_info = strdup(enviro_val);
mca_oob_set_contact_info(ompi_universe_info.seed_contact_info); mca_oob_set_contact_info(ompi_universe_info.seed_contact_info);
} else { } else {
if (NULL != ompi_universe_info.seed_contact_info) { if (NULL != ompi_universe_info.seed_contact_info) {
free(ompi_universe_info.seed_contact_info); free(ompi_universe_info.seed_contact_info);
ompi_universe_info.seed_contact_info = NULL;
} }
} }
@ -66,6 +68,7 @@ void ompi_rte_parse_environ(void)
} else { } else {
if (NULL != ompi_process_info.gpr_replica) { if (NULL != ompi_process_info.gpr_replica) {
free(ompi_process_info.gpr_replica); free(ompi_process_info.gpr_replica);
ompi_process_info.gpr_replica = NULL;
} }
} }
@ -79,6 +82,7 @@ void ompi_rte_parse_environ(void)
} else { } else {
if (NULL != ompi_process_info.ns_replica) { if (NULL != ompi_process_info.ns_replica) {
free(ompi_process_info.ns_replica); free(ompi_process_info.ns_replica);
ompi_process_info.ns_replica = NULL;
} }
} }
@ -93,21 +97,17 @@ void ompi_rte_parse_environ(void)
if (NULL != enviro_val) { /* scope passed */ if (NULL != enviro_val) { /* scope passed */
if (NULL != ompi_universe_info.scope) { /* overwrite */ if (NULL != ompi_universe_info.scope) { /* overwrite */
free(ompi_universe_info.scope); free(ompi_universe_info.scope);
ompi_universe_info.scope = NULL;
} }
ompi_universe_info.scope = strdup(enviro_val); ompi_universe_info.scope = strdup(enviro_val);
} else { } else {
if (NULL != ompi_universe_info.scope) { if (NULL != ompi_universe_info.scope) {
free(ompi_universe_info.scope); free(ompi_universe_info.scope);
ompi_universe_info.scope = NULL;
} }
ompi_universe_info.scope = strdup("exclusive"); ompi_universe_info.scope = strdup("exclusive");
} }
/*** FOR DEBUGGING PURPOSES IN THIS EARLY STAGE - FORCE PUBLIC */
if (NULL != ompi_universe_info.scope) {
free(ompi_universe_info.scope);
}
ompi_universe_info.scope = strdup("public");
enviro_val = getenv("OMPI_universe_persistent"); enviro_val = getenv("OMPI_universe_persistent");
if (NULL != enviro_val) { /* persistence flag passed */ if (NULL != enviro_val) { /* persistence flag passed */
ompi_universe_info.persistence = true; ompi_universe_info.persistence = true;
@ -115,9 +115,6 @@ void ompi_rte_parse_environ(void)
ompi_universe_info.persistence = false; ompi_universe_info.persistence = false;
} }
/*** FOR DEBUGGING PURPOSES IN THIS EARLY STAGE - FORCE PERSISTENCE */
ompi_universe_info.persistence = true;
enviro_val = getenv("OMPI_universe_console"); enviro_val = getenv("OMPI_universe_console");
if (NULL != enviro_val) { /* console flag passed */ if (NULL != enviro_val) { /* console flag passed */
ompi_universe_info.console = true; ompi_universe_info.console = true;
@ -129,11 +126,13 @@ void ompi_rte_parse_environ(void)
if (NULL != enviro_val) { /* scriptfile passed */ if (NULL != enviro_val) { /* scriptfile passed */
if (NULL != ompi_universe_info.scriptfile) { /* overwrite */ if (NULL != ompi_universe_info.scriptfile) { /* overwrite */
free(ompi_universe_info.scriptfile); free(ompi_universe_info.scriptfile);
ompi_universe_info.scriptfile = NULL;
} }
ompi_universe_info.scriptfile = strdup(enviro_val); ompi_universe_info.scriptfile = strdup(enviro_val);
} else { } else {
if (NULL != ompi_universe_info.scriptfile) { if (NULL != ompi_universe_info.scriptfile) {
free(ompi_universe_info.scriptfile); free(ompi_universe_info.scriptfile);
ompi_universe_info.scriptfile = NULL;
} }
} }
@ -141,28 +140,34 @@ void ompi_rte_parse_environ(void)
if (NULL != enviro_val) { /* hostfile passed */ if (NULL != enviro_val) { /* hostfile passed */
if (NULL != ompi_universe_info.hostfile) { /* overwrite */ if (NULL != ompi_universe_info.hostfile) { /* overwrite */
free(ompi_universe_info.hostfile); free(ompi_universe_info.hostfile);
ompi_universe_info.hostfile = NULL;
} }
ompi_universe_info.hostfile = strdup(enviro_val); ompi_universe_info.hostfile = strdup(enviro_val);
} else { } else {
if (NULL != ompi_universe_info.hostfile) { if (NULL != ompi_universe_info.hostfile) {
free(ompi_universe_info.hostfile); free(ompi_universe_info.hostfile);
ompi_universe_info.hostfile = NULL;
} }
} }
if (NULL != ompi_universe_info.name) { if (NULL != ompi_universe_info.name) {
free(ompi_universe_info.name); free(ompi_universe_info.name);
ompi_universe_info.name = NULL;
} }
ompi_universe_info.name = strdup("default-universe"); ompi_universe_info.name = strdup("default-universe");
if (NULL != ompi_process_info.my_universe) { if (NULL != ompi_process_info.my_universe) {
free(ompi_process_info.my_universe); free(ompi_process_info.my_universe);
ompi_process_info.my_universe = NULL;
} }
ompi_process_info.my_universe = strdup("default-universe"); ompi_process_info.my_universe = strdup("default-universe");
if (NULL != ompi_universe_info.host) { if (NULL != ompi_universe_info.host) {
free(ompi_universe_info.host); free(ompi_universe_info.host);
ompi_universe_info.host = NULL;
} }
ompi_universe_info.host = strdup(ompi_system_info.nodename); ompi_universe_info.host = strdup(ompi_system_info.nodename);
if (NULL != ompi_universe_info.uid) { if (NULL != ompi_universe_info.uid) {
free(ompi_universe_info.uid); free(ompi_universe_info.uid);
ompi_universe_info.uid = NULL;
} }
ompi_universe_info.uid = strdup(ompi_system_info.user); ompi_universe_info.uid = strdup(ompi_system_info.user);
@ -170,10 +175,12 @@ void ompi_rte_parse_environ(void)
if (NULL != enviro_val) { /* universe name passed in environment */ if (NULL != enviro_val) { /* universe name passed in environment */
if (NULL != ompi_universe_info.name) { /* got something in it - overwrite */ if (NULL != ompi_universe_info.name) { /* got something in it - overwrite */
free(ompi_universe_info.name); free(ompi_universe_info.name);
ompi_universe_info.name = NULL;
} }
ompi_universe_info.name = strdup(enviro_val); ompi_universe_info.name = strdup(enviro_val);
if (NULL != ompi_process_info.my_universe) { if (NULL != ompi_process_info.my_universe) {
free(ompi_process_info.my_universe); free(ompi_process_info.my_universe);
ompi_process_info.my_universe = NULL;
} }
ompi_process_info.my_universe = strdup(enviro_val); ompi_process_info.my_universe = strdup(enviro_val);
} }
@ -182,11 +189,13 @@ void ompi_rte_parse_environ(void)
if (NULL != enviro_val) { /* tmpdir base passed in environment */ if (NULL != enviro_val) { /* tmpdir base passed in environment */
if (NULL != ompi_process_info.tmpdir_base) { /* overwrite it */ if (NULL != ompi_process_info.tmpdir_base) { /* overwrite it */
free(ompi_process_info.tmpdir_base); free(ompi_process_info.tmpdir_base);
ompi_process_info.tmpdir_base = NULL;
} }
ompi_process_info.tmpdir_base = strdup(enviro_val); ompi_process_info.tmpdir_base = strdup(enviro_val);
} else { } else {
if (NULL != ompi_process_info.tmpdir_base) { if (NULL != ompi_process_info.tmpdir_base) {
free(ompi_process_info.tmpdir_base); free(ompi_process_info.tmpdir_base);
ompi_process_info.tmpdir_base = NULL;
} }
} }

Просмотреть файл

@ -35,9 +35,9 @@ static struct timeval ompi_rte_ping_wait = {2, 0};
int ompi_rte_universe_exists() int ompi_rte_universe_exists()
{ {
char *contact_file; char *contact_file;
int ret, i; int ret;
ompi_process_name_t proc={0,0,0}; ompi_process_name_t proc={0,0,0};
bool ns_found, gpr_found, ping_success; bool ns_found=false, gpr_found=false, ping_success=false;
/* if both ns_replica and gpr_replica were provided, check for contact with them */ /* if both ns_replica and gpr_replica were provided, check for contact with them */
if (NULL != ompi_universe_info.ns_replica && NULL != ompi_universe_info.gpr_replica) { if (NULL != ompi_universe_info.ns_replica && NULL != ompi_universe_info.gpr_replica) {
@ -50,11 +50,13 @@ int ompi_rte_universe_exists()
free(ompi_universe_info.ns_replica); free(ompi_universe_info.ns_replica);
if (NULL != ompi_process_info.ns_replica) { if (NULL != ompi_process_info.ns_replica) {
free(ompi_process_info.ns_replica); free(ompi_process_info.ns_replica);
ompi_process_info.ns_replica = NULL;
} }
} else { /* name server found, now try gpr */ } else { /* name server found, now try gpr */
ns_found = true; ns_found = true;
if (NULL != ompi_process_info.ns_replica) { if (NULL != ompi_process_info.ns_replica) {
free(ompi_process_info.ns_replica); free(ompi_process_info.ns_replica);
ompi_process_info.ns_replica = NULL;
} }
ompi_process_info.ns_replica = ns_base_copy_process_name(&proc); ompi_process_info.ns_replica = ns_base_copy_process_name(&proc);
} }
@ -68,10 +70,12 @@ int ompi_rte_universe_exists()
free(ompi_universe_info.gpr_replica); free(ompi_universe_info.gpr_replica);
if (NULL != ompi_process_info.gpr_replica) { if (NULL != ompi_process_info.gpr_replica) {
free(ompi_process_info.gpr_replica); free(ompi_process_info.gpr_replica);
ompi_process_info.gpr_replica = NULL;
} }
} else { } else {
if (NULL != ompi_process_info.gpr_replica) { if (NULL != ompi_process_info.gpr_replica) {
free(ompi_process_info.gpr_replica); free(ompi_process_info.gpr_replica);
ompi_process_info.gpr_replica = NULL;
} }
ompi_process_info.gpr_replica = ns_base_copy_process_name(&proc); ompi_process_info.gpr_replica = ns_base_copy_process_name(&proc);
gpr_found = true; gpr_found = true;
@ -129,6 +133,7 @@ int ompi_rte_universe_exists()
ompi_output(0, "contact info read"); ompi_output(0, "contact info read");
} }
if (!ompi_universe_info.console) { /* if we aren't trying to connect a console */
if (!ompi_universe_info.persistence || /* not persistent... */ if (!ompi_universe_info.persistence || /* not persistent... */
(0 == strncmp(ompi_universe_info.scope, "exclusive", strlen("exclusive")))) { /* ...or no connection allowed */ (0 == strncmp(ompi_universe_info.scope, "exclusive", strlen("exclusive")))) { /* ...or no connection allowed */
/* also need to check "local" and that we did not specify the exact /* also need to check "local" and that we did not specify the exact
@ -139,6 +144,7 @@ int ompi_rte_universe_exists()
} }
return OMPI_ERR_NO_CONNECTION_ALLOWED; return OMPI_ERR_NO_CONNECTION_ALLOWED;
} }
}
if (ompi_rte_debug_flag) { if (ompi_rte_debug_flag) {
ompi_output(0, "contact info to set: %s", ompi_universe_info.seed_contact_info); ompi_output(0, "contact info to set: %s", ompi_universe_info.seed_contact_info);
@ -174,11 +180,34 @@ int ompi_rte_universe_exists()
} }
/* set the my_universe field */ /* set the my_universe field */
if (NULL != ompi_process_info.my_universe) {
free(ompi_process_info.my_universe);
ompi_process_info.my_universe = NULL;
}
ompi_process_info.my_universe = strdup(ompi_universe_info.name); ompi_process_info.my_universe = strdup(ompi_universe_info.name);
if (NULL != ompi_process_info.ns_replica) {
free(ompi_process_info.ns_replica);
ompi_process_info.ns_replica = NULL;
}
ompi_process_info.ns_replica = ns_base_copy_process_name(&proc); ompi_process_info.ns_replica = ns_base_copy_process_name(&proc);
if (NULL != ompi_process_info.gpr_replica) {
free(ompi_process_info.gpr_replica);
ompi_process_info.gpr_replica = NULL;
}
ompi_process_info.gpr_replica = ns_base_copy_process_name(&proc); ompi_process_info.gpr_replica = ns_base_copy_process_name(&proc);
if (NULL != ompi_universe_info.ns_replica) {
free(ompi_universe_info.ns_replica);
ompi_universe_info.ns_replica = NULL;
}
ompi_universe_info.ns_replica = strdup(ompi_universe_info.seed_contact_info); ompi_universe_info.ns_replica = strdup(ompi_universe_info.seed_contact_info);
if (NULL != ompi_universe_info.gpr_replica) {
free(ompi_universe_info.gpr_replica);
ompi_universe_info.gpr_replica = NULL;
}
ompi_universe_info.gpr_replica = strdup(ompi_universe_info.seed_contact_info); ompi_universe_info.gpr_replica = strdup(ompi_universe_info.seed_contact_info);
/* request ns_replica and gpr_replica info for this process /* request ns_replica and gpr_replica info for this process

Просмотреть файл

@ -13,6 +13,10 @@
#include "util/cmd_line.h" #include "util/cmd_line.h"
#include "util/proc_info.h" #include "util/proc_info.h"
#include "util/pack.h" #include "util/pack.h"
#include "util/session_dir.h"
#include "util/output.h"
#include "util/os_path.h"
#include "util/universe_setup_file_io.h"
#include "runtime/runtime.h" #include "runtime/runtime.h"
#include "mca/base/base.h" #include "mca/base/base.h"
@ -28,6 +32,8 @@ static char *ompi_getinputline(void);
static void ompi_console_sendcmd(ompi_daemon_cmd_flag_t usercmd); static void ompi_console_sendcmd(ompi_daemon_cmd_flag_t usercmd);
static struct timeval ompi_rte_ping_wait = {2, 0};
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
@ -43,8 +49,8 @@ int main(int argc, char *argv[])
ompi_cmd_line_t *cmd_line; ompi_cmd_line_t *cmd_line;
bool allow_multi_user_threads = false; bool allow_multi_user_threads = false;
bool have_hidden_threads = false; bool have_hidden_threads = false;
bool exit_cmd; bool exit_cmd, ping_success;
char *usercmd, *str_response; char *usercmd, *str_response, *contact_file;
ompi_buffer_t buffer; ompi_buffer_t buffer;
ompi_process_name_t seed={0,0,0}; ompi_process_name_t seed={0,0,0};
int recv_tag; int recv_tag;
@ -140,10 +146,98 @@ int main(int argc, char *argv[])
fprintf(stderr, "check local univ\n"); fprintf(stderr, "check local univ\n");
if (OMPI_SUCCESS != (ret = ompi_rte_universe_exists())) { /* check to see if local universe already exists */
fprintf(stderr, "could not contact local universe %s\n", ompi_universe_info.name); if (OMPI_SUCCESS != ompi_session_dir(false,
ompi_process_info.tmpdir_base,
ompi_system_info.user,
ompi_system_info.nodename,
NULL,
ompi_universe_info.name,
NULL,
NULL)) { /* not found */
if (ompi_rte_debug_flag) {
ompi_output(0, "could not find universe session dir");
exit(1); exit(1);
} }
}
if (ompi_rte_debug_flag) {
ompi_output(0, "check for contact info file");
}
/* check for "contact-info" file. if present, read it in. */
contact_file = ompi_os_path(false, ompi_process_info.universe_session_dir,
"universe-setup.txt", NULL);
if (OMPI_SUCCESS != (ret = ompi_read_universe_setup_file(contact_file))) {
if (ompi_rte_debug_flag) {
ompi_output(0, "could not read contact file %s", contact_file);
}
exit(ret);
}
if (ompi_rte_debug_flag) {
ompi_output(0, "contact info read");
}
/* if persistent, set contact info... */
if (OMPI_SUCCESS != mca_oob_set_contact_info(ompi_universe_info.seed_contact_info)) { /* set contact info */
if (ompi_rte_debug_flag) {
ompi_output(0, "error setting oob contact info - please report error to bugs@open-mpi.org\n");
}
exit(1);
}
mca_oob_parse_contact_info(ompi_universe_info.seed_contact_info, &seed, NULL);
if (ompi_rte_debug_flag) {
ompi_output(0, "contact info set: %s", ompi_universe_info.seed_contact_info);
ompi_output(0, "issuing ping: %d %d %d", seed.cellid, seed.jobid, seed.vpid);
}
/* ...and ping to verify it's alive */
ping_success = false;
if (OMPI_SUCCESS == mca_oob_ping(&seed, &ompi_rte_ping_wait)) {
ping_success = true;
}
if (!ping_success) {
if (ompi_rte_debug_flag) {
ompi_output(0, "ping failed");
}
exit(1);
}
/* set the my_universe field */
if (NULL != ompi_process_info.my_universe) {
free(ompi_process_info.my_universe);
ompi_process_info.my_universe = NULL;
}
ompi_process_info.my_universe = strdup(ompi_universe_info.name);
if (NULL != ompi_process_info.ns_replica) {
free(ompi_process_info.ns_replica);
ompi_process_info.ns_replica = NULL;
}
ompi_process_info.ns_replica = ns_base_copy_process_name(&seed);
if (NULL != ompi_process_info.gpr_replica) {
free(ompi_process_info.gpr_replica);
ompi_process_info.gpr_replica = NULL;
}
ompi_process_info.gpr_replica = ns_base_copy_process_name(&seed);
if (NULL != ompi_universe_info.ns_replica) {
free(ompi_universe_info.ns_replica);
ompi_universe_info.ns_replica = NULL;
}
ompi_universe_info.ns_replica = strdup(ompi_universe_info.seed_contact_info);
if (NULL != ompi_universe_info.gpr_replica) {
free(ompi_universe_info.gpr_replica);
ompi_universe_info.gpr_replica = NULL;
}
ompi_universe_info.gpr_replica = strdup(ompi_universe_info.seed_contact_info);
fprintf(stderr, "init stage 2\n"); fprintf(stderr, "init stage 2\n");
@ -156,6 +250,11 @@ int main(int argc, char *argv[])
} }
/***** SET MY NAME *****/ /***** SET MY NAME *****/
if (NULL != ompi_process_info.name) { /* should not have been previously set */
free(ompi_process_info.name);
ompi_process_info.name = NULL;
}
jobid = ompi_name_server.create_jobid(); jobid = ompi_name_server.create_jobid();
vpid = ompi_name_server.reserve_range(jobid, 1); vpid = ompi_name_server.reserve_range(jobid, 1);
ompi_process_info.name = ompi_name_server.create_process_name(0, jobid, vpid); ompi_process_info.name = ompi_name_server.create_process_name(0, jobid, vpid);
@ -163,6 +262,13 @@ int main(int argc, char *argv[])
fprintf(stderr, "my name: [%d,%d,%d]\n", ompi_process_info.name->cellid, fprintf(stderr, "my name: [%d,%d,%d]\n", ompi_process_info.name->cellid,
ompi_process_info.name->jobid, ompi_process_info.name->vpid); ompi_process_info.name->jobid, ompi_process_info.name->vpid);
/* finalize the rte startup */
if (OMPI_SUCCESS != (ret = ompi_rte_init_finalstage(&allow_multi_user_threads,
&have_hidden_threads))) {
printf("failed to finalize the rte startup\n");
return ret;
}
/* /*
* Register my process info with my replica. * Register my process info with my replica.
*/ */
@ -171,13 +277,6 @@ int main(int argc, char *argv[])
return ret; return ret;
} }
/* finalize the rte startup */
if (OMPI_SUCCESS != (ret = ompi_rte_init_finalstage(&allow_multi_user_threads,
&have_hidden_threads))) {
printf("failed to finalize the rte startup\n");
return ret;
}
exit_cmd = false; exit_cmd = false;
while (!exit_cmd) { while (!exit_cmd) {

Просмотреть файл

@ -179,6 +179,7 @@ main(int argc, char *argv[])
*/ */
universe = strdup(ompi_universe_info.name); universe = strdup(ompi_universe_info.name);
free(ompi_universe_info.name); free(ompi_universe_info.name);
ompi_universe_info.name = NULL;
pid = getpid(); pid = getpid();
if (0 > asprintf(&ompi_universe_info.name, "%s-%d", universe, pid) && ompi_rte_debug_flag) { if (0 > asprintf(&ompi_universe_info.name, "%s-%d", universe, pid) && ompi_rte_debug_flag) {
ompi_output(0, "mpi_init: error creating unique universe name"); ompi_output(0, "mpi_init: error creating unique universe name");
@ -189,15 +190,19 @@ main(int argc, char *argv[])
ompi_process_info.seed = true; ompi_process_info.seed = true;
if (NULL != ompi_universe_info.ns_replica) { if (NULL != ompi_universe_info.ns_replica) {
free(ompi_universe_info.ns_replica); free(ompi_universe_info.ns_replica);
ompi_universe_info.ns_replica = NULL;
} }
if (NULL != ompi_process_info.ns_replica) { if (NULL != ompi_process_info.ns_replica) {
free(ompi_process_info.ns_replica); free(ompi_process_info.ns_replica);
ompi_process_info.ns_replica = NULL;
} }
if (NULL != ompi_universe_info.gpr_replica) { if (NULL != ompi_universe_info.gpr_replica) {
free(ompi_universe_info.gpr_replica); free(ompi_universe_info.gpr_replica);
ompi_universe_info.gpr_replica = NULL;
} }
if (NULL != ompi_process_info.gpr_replica) { if (NULL != ompi_process_info.gpr_replica) {
free(ompi_process_info.gpr_replica); free(ompi_process_info.gpr_replica);
ompi_process_info.gpr_replica = NULL;
} }
} }
@ -209,10 +214,12 @@ main(int argc, char *argv[])
} }
/***** SET MY NAME *****/ /***** SET MY NAME *****/
if (ompi_process_info.seed) { if (NULL != ompi_process_info.name) { /* should NOT have been set yet */
if (NULL != ompi_process_info.name) { /* overwrite it */
free(ompi_process_info.name); free(ompi_process_info.name);
ompi_process_info.name = NULL;
} }
if (ompi_process_info.seed) {
ompi_process_info.name = ompi_name_server.create_process_name(0, 0, 0); ompi_process_info.name = ompi_name_server.create_process_name(0, 0, 0);
} else { /* if not seed, then we joined universe - get jobid and name */ } else { /* if not seed, then we joined universe - get jobid and name */
jobid = ompi_name_server.create_jobid(); jobid = ompi_name_server.create_jobid();
@ -256,6 +263,10 @@ main(int argc, char *argv[])
/* if i'm the seed, get my contact info and write my setup file for others to find */ /* if i'm the seed, get my contact info and write my setup file for others to find */
if (ompi_process_info.seed) { if (ompi_process_info.seed) {
if (NULL != ompi_universe_info.seed_contact_info) {
free(ompi_universe_info.seed_contact_info);
ompi_universe_info.seed_contact_info = NULL;
}
ompi_universe_info.seed_contact_info = mca_oob_get_contact_info(); ompi_universe_info.seed_contact_info = mca_oob_get_contact_info();
contact_file = ompi_os_path(false, ompi_process_info.universe_session_dir, contact_file = ompi_os_path(false, ompi_process_info.universe_session_dir,
"universe-setup.txt", NULL); "universe-setup.txt", NULL);

Просмотреть файл

@ -52,7 +52,10 @@ int main(int argc, char *argv[])
bool allow_multi_user_threads = false; bool allow_multi_user_threads = false;
bool have_hidden_threads = false; bool have_hidden_threads = false;
char *jobid_str, *procid_str, *enviro_val, *contact_file; char *jobid_str, *procid_str, *enviro_val, *contact_file;
char *filenm; char *filenm, *universe;
pid_t pid;
mca_ns_base_jobid_t jobid;
mca_ns_base_vpid_t vpid;
/* /*
* Intialize the Open MPI environment * Intialize the Open MPI environment
@ -165,6 +168,44 @@ int main(int argc, char *argv[])
*/ */
ompi_rte_parse_daemon_cmd_line(cmd_line); ompi_rte_parse_daemon_cmd_line(cmd_line);
/* check for existing universe to join */
if (OMPI_SUCCESS != (ret = ompi_rte_universe_exists())) {
if (ompi_rte_debug_flag) {
ompi_output(0, "ompi_mpi_init: could not join existing universe");
}
if (OMPI_ERR_NOT_FOUND != ret) {
/* if it exists but no contact could be established,
* define unique name based on current one.
* and start new universe with me as seed
*/
universe = strdup(ompi_universe_info.name);
free(ompi_universe_info.name);
ompi_universe_info.name = NULL;
pid = getpid();
if (0 > asprintf(&ompi_universe_info.name, "%s-%d", universe, pid) && ompi_rte_debug_flag) {
ompi_output(0, "mpi_init: error creating unique universe name");
}
}
ompi_process_info.my_universe = strdup(ompi_universe_info.name);
ompi_process_info.seed = true;
if (NULL != ompi_universe_info.ns_replica) {
free(ompi_universe_info.ns_replica);
ompi_universe_info.ns_replica = NULL;
}
if (NULL != ompi_process_info.ns_replica) {
free(ompi_process_info.ns_replica);
ompi_process_info.ns_replica = NULL;
}
if (NULL != ompi_universe_info.gpr_replica) {
free(ompi_universe_info.gpr_replica);
ompi_universe_info.gpr_replica = NULL;
}
if (NULL != ompi_process_info.gpr_replica) {
free(ompi_process_info.gpr_replica);
ompi_process_info.gpr_replica = NULL;
}
}
/* setup the rest of the rte */ /* setup the rest of the rte */
if (OMPI_SUCCESS != (ret = ompi_rte_init_stage2(&allow_multi_user_threads, if (OMPI_SUCCESS != (ret = ompi_rte_init_stage2(&allow_multi_user_threads,
@ -175,22 +216,19 @@ int main(int argc, char *argv[])
} }
/***** SET MY NAME *****/ /***** SET MY NAME *****/
if (ompi_process_info.seed) { if (NULL != ompi_process_info.name) { /* should not have been previously set */
if (ompi_daemon_debug) {
ompi_output(0, "ompid: seed flag set");
}
if (NULL != ompi_process_info.name) { /* overwrite it */
free(ompi_process_info.name);
}
ompi_process_info.name = ompi_name_server.create_process_name(0, 0, 0);
} else {
if (ompi_daemon_debug) {
ompi_output(0, "ompid: seed flag NOT set");
}
if (NULL != ompi_process_info.name) { /* overwrite it */
free(ompi_process_info.name); free(ompi_process_info.name);
ompi_process_info.name = NULL;
} }
if (NULL != ompi_rte_get_self()) { /* name set in environment - record name */
ompi_process_info.name = ompi_rte_get_self(); ompi_process_info.name = ompi_rte_get_self();
} else if (NULL == ompi_process_info.ns_replica) { /* couldn't join existing univ */
ompi_process_info.name = ompi_name_server.create_process_name(0,0,0);
} else { /* name server exists elsewhere - get a name for me */
jobid = ompi_name_server.create_jobid();
vpid = ompi_name_server.reserve_range(jobid, 1);
ompi_process_info.name = ompi_name_server.create_process_name(0, jobid, vpid);
} }
/* setup my session directory */ /* setup my session directory */
@ -219,13 +257,6 @@ int main(int argc, char *argv[])
exit(-1); exit(-1);
} }
/*
* Register my process info with my replica.
*/
if (OMPI_SUCCESS != (ret = ompi_rte_register())) {
ompi_output(0, "ompi_rte_init: failed in ompi_rte_register");
return ret;
}
/* finalize the rte startup */ /* finalize the rte startup */
if (OMPI_SUCCESS != (ret = ompi_rte_init_finalstage(&allow_multi_user_threads, if (OMPI_SUCCESS != (ret = ompi_rte_init_finalstage(&allow_multi_user_threads,
@ -235,8 +266,21 @@ int main(int argc, char *argv[])
return ret; return ret;
} }
/*
* Register my process info with my replica. Note that this must be done
* after the rte init is completed.
*/
if (OMPI_SUCCESS != (ret = ompi_rte_register())) {
ompi_output(0, "ompid: failed in ompi_rte_register()");
return ret;
}
/* if i'm the seed, get my contact info and write my setup file for others to find */ /* if i'm the seed, get my contact info and write my setup file for others to find */
if (ompi_process_info.seed) { if (ompi_process_info.seed) {
if (NULL != ompi_universe_info.seed_contact_info) {
free(ompi_universe_info.seed_contact_info);
ompi_universe_info.seed_contact_info = NULL;
}
ompi_universe_info.seed_contact_info = mca_oob_get_contact_info(); ompi_universe_info.seed_contact_info = mca_oob_get_contact_info();
contact_file = ompi_os_path(false, ompi_process_info.universe_session_dir, contact_file = ompi_os_path(false, ompi_process_info.universe_session_dir,
"universe-setup.txt", NULL); "universe-setup.txt", NULL);
@ -298,9 +342,11 @@ int main(int argc, char *argv[])
ompi_process_info.name->jobid, ompi_process_info.name->vpid); ompi_process_info.name->jobid, ompi_process_info.name->vpid);
} }
/* remove the universe-setup file */ /* if i'm the seed, remove the universe-setup file */
if (ompi_process_info.seed) {
filenm = ompi_os_path(false, ompi_process_info.universe_session_dir, "universe-setup.txt", NULL); filenm = ompi_os_path(false, ompi_process_info.universe_session_dir, "universe-setup.txt", NULL);
unlink(filenm); unlink(filenm);
}
/* finalize the system */ /* finalize the system */
ompi_rte_finalize(); ompi_rte_finalize();
@ -322,7 +368,6 @@ static void ompi_daemon_recv(int status, ompi_process_name_t* sender,
ompi_buffer_t answer; ompi_buffer_t answer;
ompi_daemon_cmd_flag_t command; ompi_daemon_cmd_flag_t command;
int ret; int ret;
int32_t str_len;
char *contact_info; char *contact_info;
OMPI_THREAD_LOCK(&ompi_daemon_mutex); OMPI_THREAD_LOCK(&ompi_daemon_mutex);

Просмотреть файл

@ -156,6 +156,7 @@ int main(int argc, char **argv)
*/ */
universe = strdup(ompi_universe_info.name); universe = strdup(ompi_universe_info.name);
free(ompi_universe_info.name); free(ompi_universe_info.name);
ompi_universe_info.name = NULL;
pid = getpid(); pid = getpid();
if (0 < asprintf(&ompi_universe_info.name, "%s-%d", universe, pid)) { if (0 < asprintf(&ompi_universe_info.name, "%s-%d", universe, pid)) {
fprintf(stderr, "error creating unique universe name - please report error to bugs@open-mpi.org\n"); fprintf(stderr, "error creating unique universe name - please report error to bugs@open-mpi.org\n");
@ -163,6 +164,10 @@ int main(int argc, char **argv)
} }
} }
if (NULL != ompi_process_info.my_universe) {
free(ompi_process_info.my_universe);
ompi_process_info.my_universe = NULL;
}
ompi_process_info.my_universe = strdup(ompi_universe_info.name); ompi_process_info.my_universe = strdup(ompi_universe_info.name);
/* ensure the enviro variables do NOT specify any replicas so that seed /* ensure the enviro variables do NOT specify any replicas so that seed