1
1

Continuing the cleanup process. Few minor fixes here and there - mostly just NULLing pointers that were free'd. Console now can connect to any universe, regardless of scope.

This commit was SVN r2734.
Этот коммит содержится в:
Ralph Castain 2004-09-17 00:59:14 +00:00
родитель 8699fa86b2
Коммит 0d4e6482cd
9 изменённых файлов: 332 добавлений и 70 удалений

Просмотреть файл

@ -107,6 +107,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
*/
universe = strdup(ompi_universe_info.name);
free(ompi_universe_info.name);
ompi_universe_info.name = NULL;
pid = getpid();
if (0 > asprintf(&ompi_universe_info.name, "%s-%d", universe, pid) && ompi_rte_debug_flag) {
ompi_output(0, "mpi_init: error creating unique universe name");
@ -117,15 +118,19 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
ompi_process_info.seed = true;
if (NULL != ompi_universe_info.ns_replica) {
free(ompi_universe_info.ns_replica);
ompi_universe_info.ns_replica = NULL;
}
if (NULL != ompi_process_info.ns_replica) {
free(ompi_process_info.ns_replica);
ompi_process_info.ns_replica = NULL;
}
if (NULL != ompi_universe_info.gpr_replica) {
free(ompi_universe_info.gpr_replica);
ompi_universe_info.gpr_replica = NULL;
}
if (NULL != ompi_process_info.gpr_replica) {
free(ompi_process_info.gpr_replica);
ompi_process_info.gpr_replica = NULL;
}
}
@ -139,6 +144,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
/***** SET MY NAME *****/
if (NULL != ompi_process_info.name) { /* should NOT have been previously set */
free(ompi_process_info.name);
ompi_process_info.name = NULL;
}
if (NULL != ompi_rte_get_self()) { /* name set in environment - nonsingleton - record name */

Просмотреть файл

@ -15,6 +15,7 @@
#include <string.h>
#include "mca/oob/base/base.h"
#include "mca/ns/base/base.h"
#include "util/output.h"
#include "util/cmd_line.h"
@ -30,9 +31,6 @@ void ompi_rte_parse_cmd_line(ompi_cmd_line_t *cmd_line)
/* get universe name and store it, if user specified it */
/* otherwise, stick with default name */
if (NULL != ompi_universe_info.name) {
universe = strdup(ompi_universe_info.name); /* save the current value, if exists */
}
if (ompi_cmd_line_is_taken(cmd_line, "universe") ||
ompi_cmd_line_is_taken(cmd_line, "u")) {
@ -52,23 +50,44 @@ void ompi_rte_parse_cmd_line(ompi_cmd_line_t *cmd_line)
if (NULL != (tmp = strchr(universe, '@'))) { /* remote name includes remote uid */
*tmp = '\0';
tmp++;
if (NULL != ompi_universe_info.host) { /* overwrite it */
free(ompi_universe_info.host);
ompi_universe_info.host = NULL;
}
ompi_universe_info.host = strdup(tmp);
if (NULL != ompi_universe_info.uid) {
free(ompi_universe_info.uid);
ompi_universe_info.uid = NULL;
}
ompi_universe_info.uid = strdup(universe);
} else { /* no remote id - just remote host */
if (NULL != ompi_universe_info.host) {
free(ompi_universe_info.host);
ompi_universe_info.host = NULL;
}
ompi_universe_info.host = strdup(universe);
}
} else { /* no remote host - just universe name provided */
if (NULL != ompi_universe_info.name) {
free(ompi_universe_info.name);
ompi_universe_info.name = NULL;
}
ompi_universe_info.name = strdup(universe);
}
}
/* copy the universe name into the process_info structure */
if (NULL != ompi_universe_info.name) {
if (NULL != ompi_process_info.my_universe) {
free(ompi_process_info.my_universe);
ompi_process_info.my_universe = NULL;
}
ompi_process_info.my_universe = strdup(ompi_universe_info.name);
} else { /* set it to default value */
ompi_universe_info.name = strdup("default-universe");
if (NULL != ompi_process_info.my_universe) { /* overwrite it */
free(ompi_process_info.my_universe);
ompi_process_info.my_universe = NULL;
}
ompi_process_info.my_universe = strdup(ompi_universe_info.name);
}
@ -84,6 +103,7 @@ void ompi_rte_parse_cmd_line(ompi_cmd_line_t *cmd_line)
}
if (NULL != ompi_process_info.tmpdir_base) { /* overwrite it */
free(ompi_process_info.tmpdir_base);
ompi_process_info.tmpdir_base = NULL;
}
ompi_process_info.tmpdir_base = strdup(ompi_cmd_line_get_param(cmd_line, "tmpdir", 0, 0));
setenv("OMPI_tmpdir_base", ompi_process_info.tmpdir_base, 1);
@ -96,6 +116,16 @@ void ompi_rte_parse_cmd_line(ompi_cmd_line_t *cmd_line)
return;
}
nsreplica = strdup(ompi_cmd_line_get_param(cmd_line, "nsreplica", 0, 0));
if (NULL != ompi_universe_info.ns_replica) {
free(ompi_universe_info.ns_replica);
ompi_universe_info.ns_replica = NULL;
}
ompi_universe_info.ns_replica = strdup(nsreplica);
if (NULL == ompi_process_info.ns_replica) {
ompi_process_info.ns_replica = ns_base_create_process_name(0,0,0);
}
mca_oob_parse_contact_info(ompi_universe_info.ns_replica,
ompi_process_info.ns_replica, NULL);
setenv("OMPI_MCA_ns_base_replica", nsreplica, 1); /* set the ns_replica enviro variable */
} /* otherwise, leave it alone */
@ -106,6 +136,16 @@ void ompi_rte_parse_cmd_line(ompi_cmd_line_t *cmd_line)
return;
}
gprreplica = strdup(ompi_cmd_line_get_param(cmd_line, "gprreplica", 0, 0));
if (NULL != ompi_universe_info.gpr_replica) {
free(ompi_universe_info.gpr_replica);
ompi_universe_info.gpr_replica = NULL;
}
ompi_universe_info.gpr_replica = strdup(nsreplica);
if (NULL == ompi_process_info.gpr_replica) {
ompi_process_info.gpr_replica = ns_base_create_process_name(0,0,0);
}
mca_oob_parse_contact_info(ompi_universe_info.gpr_replica,
ompi_process_info.gpr_replica, NULL);
setenv("OMPI_MCA_gpr_base_replica", gprreplica, 1); /* set the gpr_replica enviro variable */
} /* otherwise leave it alone */
}

Просмотреть файл

@ -11,6 +11,8 @@
#include <string.h>
#include "mca/ns/base/base.h"
#include "util/output.h"
#include "util/cmd_line.h"
#include "util/sys_info.h"
@ -34,6 +36,10 @@ void ompi_rte_parse_daemon_cmd_line(ompi_cmd_line_t *cmd_line)
fprintf(stderr, "error retrieving seed contact info - please report error to bugs@open-mpi.org\n");
exit(1);
}
if (NULL != ompi_universe_info.seed_contact_info) { /* overwrite it */
free(ompi_universe_info.seed_contact_info);
ompi_universe_info.seed_contact_info = NULL;
}
ompi_universe_info.seed_contact_info = strdup(ompi_cmd_line_get_param(cmd_line, "seedcontact", 0, 0));
setenv("OMPI_universe_contact", ompi_universe_info.seed_contact_info, 1);
}
@ -51,6 +57,10 @@ void ompi_rte_parse_daemon_cmd_line(ompi_cmd_line_t *cmd_line)
fprintf(stderr, "error retrieving universe scope - please report error to bugs@open-mpi.org\n");
exit(1);
}
if (NULL != ompi_universe_info.scope) {
free(ompi_universe_info.scope);
ompi_universe_info.scope = NULL;
}
ompi_universe_info.scope = strdup(ompi_cmd_line_get_param(cmd_line, "scope", 0, 0));
setenv("OMPI_universe_scope", ompi_universe_info.scope, 1);
}
@ -74,6 +84,10 @@ void ompi_rte_parse_daemon_cmd_line(ompi_cmd_line_t *cmd_line)
fprintf(stderr, "error retrieving script file name - please report error to bugs@open-mpi.org\n");
exit(1);
}
if (NULL != ompi_universe_info.scriptfile) {
free(ompi_universe_info.scriptfile);
ompi_universe_info.scriptfile = NULL;
}
ompi_universe_info.scriptfile = strdup(ompi_cmd_line_get_param(cmd_line, "script", 0, 0));
setenv("OMPI_universe_script", ompi_universe_info.scriptfile, 1);
}
@ -84,6 +98,10 @@ void ompi_rte_parse_daemon_cmd_line(ompi_cmd_line_t *cmd_line)
fprintf(stderr, "error retrieving host file name - please report error to bugs@open-mpi.org\n");
exit(1);
}
if (NULL != ompi_universe_info.hostfile) {
free(ompi_universe_info.hostfile);
ompi_universe_info.hostfile = NULL;
}
ompi_universe_info.hostfile = strdup(ompi_cmd_line_get_param(cmd_line, "hostfile", 0, 0));
setenv("OMPI_universe_hostfile", ompi_universe_info.hostfile, 1);
}

Просмотреть файл

@ -47,12 +47,14 @@ void ompi_rte_parse_environ(void)
if (NULL != enviro_val) { /* contact info passed */
if (NULL != ompi_universe_info.seed_contact_info) { /* overwrite */
free(ompi_universe_info.seed_contact_info);
ompi_universe_info.seed_contact_info = NULL;
}
ompi_universe_info.seed_contact_info = strdup(enviro_val);
mca_oob_set_contact_info(ompi_universe_info.seed_contact_info);
} else {
if (NULL != ompi_universe_info.seed_contact_info) {
free(ompi_universe_info.seed_contact_info);
ompi_universe_info.seed_contact_info = NULL;
}
}
@ -66,6 +68,7 @@ void ompi_rte_parse_environ(void)
} else {
if (NULL != ompi_process_info.gpr_replica) {
free(ompi_process_info.gpr_replica);
ompi_process_info.gpr_replica = NULL;
}
}
@ -79,6 +82,7 @@ void ompi_rte_parse_environ(void)
} else {
if (NULL != ompi_process_info.ns_replica) {
free(ompi_process_info.ns_replica);
ompi_process_info.ns_replica = NULL;
}
}
@ -93,21 +97,17 @@ void ompi_rte_parse_environ(void)
if (NULL != enviro_val) { /* scope passed */
if (NULL != ompi_universe_info.scope) { /* overwrite */
free(ompi_universe_info.scope);
ompi_universe_info.scope = NULL;
}
ompi_universe_info.scope = strdup(enviro_val);
} else {
if (NULL != ompi_universe_info.scope) {
free(ompi_universe_info.scope);
ompi_universe_info.scope = NULL;
}
ompi_universe_info.scope = strdup("exclusive");
}
/*** FOR DEBUGGING PURPOSES IN THIS EARLY STAGE - FORCE PUBLIC */
if (NULL != ompi_universe_info.scope) {
free(ompi_universe_info.scope);
}
ompi_universe_info.scope = strdup("public");
enviro_val = getenv("OMPI_universe_persistent");
if (NULL != enviro_val) { /* persistence flag passed */
ompi_universe_info.persistence = true;
@ -115,9 +115,6 @@ void ompi_rte_parse_environ(void)
ompi_universe_info.persistence = false;
}
/*** FOR DEBUGGING PURPOSES IN THIS EARLY STAGE - FORCE PERSISTENCE */
ompi_universe_info.persistence = true;
enviro_val = getenv("OMPI_universe_console");
if (NULL != enviro_val) { /* console flag passed */
ompi_universe_info.console = true;
@ -129,11 +126,13 @@ void ompi_rte_parse_environ(void)
if (NULL != enviro_val) { /* scriptfile passed */
if (NULL != ompi_universe_info.scriptfile) { /* overwrite */
free(ompi_universe_info.scriptfile);
ompi_universe_info.scriptfile = NULL;
}
ompi_universe_info.scriptfile = strdup(enviro_val);
} else {
if (NULL != ompi_universe_info.scriptfile) {
free(ompi_universe_info.scriptfile);
ompi_universe_info.scriptfile = NULL;
}
}
@ -141,28 +140,34 @@ void ompi_rte_parse_environ(void)
if (NULL != enviro_val) { /* hostfile passed */
if (NULL != ompi_universe_info.hostfile) { /* overwrite */
free(ompi_universe_info.hostfile);
ompi_universe_info.hostfile = NULL;
}
ompi_universe_info.hostfile = strdup(enviro_val);
} else {
if (NULL != ompi_universe_info.hostfile) {
free(ompi_universe_info.hostfile);
ompi_universe_info.hostfile = NULL;
}
}
if (NULL != ompi_universe_info.name) {
free(ompi_universe_info.name);
ompi_universe_info.name = NULL;
}
ompi_universe_info.name = strdup("default-universe");
if (NULL != ompi_process_info.my_universe) {
free(ompi_process_info.my_universe);
ompi_process_info.my_universe = NULL;
}
ompi_process_info.my_universe = strdup("default-universe");
if (NULL != ompi_universe_info.host) {
free(ompi_universe_info.host);
ompi_universe_info.host = NULL;
}
ompi_universe_info.host = strdup(ompi_system_info.nodename);
if (NULL != ompi_universe_info.uid) {
free(ompi_universe_info.uid);
ompi_universe_info.uid = NULL;
}
ompi_universe_info.uid = strdup(ompi_system_info.user);
@ -170,10 +175,12 @@ void ompi_rte_parse_environ(void)
if (NULL != enviro_val) { /* universe name passed in environment */
if (NULL != ompi_universe_info.name) { /* got something in it - overwrite */
free(ompi_universe_info.name);
ompi_universe_info.name = NULL;
}
ompi_universe_info.name = strdup(enviro_val);
if (NULL != ompi_process_info.my_universe) {
free(ompi_process_info.my_universe);
ompi_process_info.my_universe = NULL;
}
ompi_process_info.my_universe = strdup(enviro_val);
}
@ -182,11 +189,13 @@ void ompi_rte_parse_environ(void)
if (NULL != enviro_val) { /* tmpdir base passed in environment */
if (NULL != ompi_process_info.tmpdir_base) { /* overwrite it */
free(ompi_process_info.tmpdir_base);
ompi_process_info.tmpdir_base = NULL;
}
ompi_process_info.tmpdir_base = strdup(enviro_val);
} else {
if (NULL != ompi_process_info.tmpdir_base) {
free(ompi_process_info.tmpdir_base);
ompi_process_info.tmpdir_base = NULL;
}
}

Просмотреть файл

@ -35,9 +35,9 @@ static struct timeval ompi_rte_ping_wait = {2, 0};
int ompi_rte_universe_exists()
{
char *contact_file;
int ret, i;
int ret;
ompi_process_name_t proc={0,0,0};
bool ns_found, gpr_found, ping_success;
bool ns_found=false, gpr_found=false, ping_success=false;
/* if both ns_replica and gpr_replica were provided, check for contact with them */
if (NULL != ompi_universe_info.ns_replica && NULL != ompi_universe_info.gpr_replica) {
@ -50,11 +50,13 @@ int ompi_rte_universe_exists()
free(ompi_universe_info.ns_replica);
if (NULL != ompi_process_info.ns_replica) {
free(ompi_process_info.ns_replica);
ompi_process_info.ns_replica = NULL;
}
} else { /* name server found, now try gpr */
ns_found = true;
if (NULL != ompi_process_info.ns_replica) {
free(ompi_process_info.ns_replica);
ompi_process_info.ns_replica = NULL;
}
ompi_process_info.ns_replica = ns_base_copy_process_name(&proc);
}
@ -68,10 +70,12 @@ int ompi_rte_universe_exists()
free(ompi_universe_info.gpr_replica);
if (NULL != ompi_process_info.gpr_replica) {
free(ompi_process_info.gpr_replica);
ompi_process_info.gpr_replica = NULL;
}
} else {
if (NULL != ompi_process_info.gpr_replica) {
free(ompi_process_info.gpr_replica);
ompi_process_info.gpr_replica = NULL;
}
ompi_process_info.gpr_replica = ns_base_copy_process_name(&proc);
gpr_found = true;
@ -129,20 +133,22 @@ int ompi_rte_universe_exists()
ompi_output(0, "contact info read");
}
if (!ompi_universe_info.persistence || /* not persistent... */
(0 == strncmp(ompi_universe_info.scope, "exclusive", strlen("exclusive")))) { /* ...or no connection allowed */
/* also need to check "local" and that we did not specify the exact
* matching universe name
*/
if (ompi_rte_debug_flag) {
ompi_output(0, "connection not allowed");
if (!ompi_universe_info.console) { /* if we aren't trying to connect a console */
if (!ompi_universe_info.persistence || /* not persistent... */
(0 == strncmp(ompi_universe_info.scope, "exclusive", strlen("exclusive")))) { /* ...or no connection allowed */
/* also need to check "local" and that we did not specify the exact
* matching universe name
*/
if (ompi_rte_debug_flag) {
ompi_output(0, "connection not allowed");
}
return OMPI_ERR_NO_CONNECTION_ALLOWED;
}
return OMPI_ERR_NO_CONNECTION_ALLOWED;
}
if (ompi_rte_debug_flag) {
ompi_output(0, "contact info to set: %s", ompi_universe_info.seed_contact_info);
}
if (ompi_rte_debug_flag) {
ompi_output(0, "contact info to set: %s", ompi_universe_info.seed_contact_info);
}
/* if persistent, set contact info... */
@ -174,11 +180,34 @@ int ompi_rte_universe_exists()
}
/* set the my_universe field */
if (NULL != ompi_process_info.my_universe) {
free(ompi_process_info.my_universe);
ompi_process_info.my_universe = NULL;
}
ompi_process_info.my_universe = strdup(ompi_universe_info.name);
if (NULL != ompi_process_info.ns_replica) {
free(ompi_process_info.ns_replica);
ompi_process_info.ns_replica = NULL;
}
ompi_process_info.ns_replica = ns_base_copy_process_name(&proc);
if (NULL != ompi_process_info.gpr_replica) {
free(ompi_process_info.gpr_replica);
ompi_process_info.gpr_replica = NULL;
}
ompi_process_info.gpr_replica = ns_base_copy_process_name(&proc);
if (NULL != ompi_universe_info.ns_replica) {
free(ompi_universe_info.ns_replica);
ompi_universe_info.ns_replica = NULL;
}
ompi_universe_info.ns_replica = strdup(ompi_universe_info.seed_contact_info);
if (NULL != ompi_universe_info.gpr_replica) {
free(ompi_universe_info.gpr_replica);
ompi_universe_info.gpr_replica = NULL;
}
ompi_universe_info.gpr_replica = strdup(ompi_universe_info.seed_contact_info);
/* request ns_replica and gpr_replica info for this process

Просмотреть файл

@ -13,6 +13,10 @@
#include "util/cmd_line.h"
#include "util/proc_info.h"
#include "util/pack.h"
#include "util/session_dir.h"
#include "util/output.h"
#include "util/os_path.h"
#include "util/universe_setup_file_io.h"
#include "runtime/runtime.h"
#include "mca/base/base.h"
@ -28,6 +32,8 @@ static char *ompi_getinputline(void);
static void ompi_console_sendcmd(ompi_daemon_cmd_flag_t usercmd);
static struct timeval ompi_rte_ping_wait = {2, 0};
int main(int argc, char *argv[])
{
@ -43,8 +49,8 @@ int main(int argc, char *argv[])
ompi_cmd_line_t *cmd_line;
bool allow_multi_user_threads = false;
bool have_hidden_threads = false;
bool exit_cmd;
char *usercmd, *str_response;
bool exit_cmd, ping_success;
char *usercmd, *str_response, *contact_file;
ompi_buffer_t buffer;
ompi_process_name_t seed={0,0,0};
int recv_tag;
@ -140,12 +146,100 @@ int main(int argc, char *argv[])
fprintf(stderr, "check local univ\n");
if (OMPI_SUCCESS != (ret = ompi_rte_universe_exists())) {
fprintf(stderr, "could not contact local universe %s\n", ompi_universe_info.name);
/* check to see if local universe already exists */
if (OMPI_SUCCESS != ompi_session_dir(false,
ompi_process_info.tmpdir_base,
ompi_system_info.user,
ompi_system_info.nodename,
NULL,
ompi_universe_info.name,
NULL,
NULL)) { /* not found */
if (ompi_rte_debug_flag) {
ompi_output(0, "could not find universe session dir");
exit(1);
}
}
if (ompi_rte_debug_flag) {
ompi_output(0, "check for contact info file");
}
/* check for "contact-info" file. if present, read it in. */
contact_file = ompi_os_path(false, ompi_process_info.universe_session_dir,
"universe-setup.txt", NULL);
if (OMPI_SUCCESS != (ret = ompi_read_universe_setup_file(contact_file))) {
if (ompi_rte_debug_flag) {
ompi_output(0, "could not read contact file %s", contact_file);
}
exit(ret);
}
if (ompi_rte_debug_flag) {
ompi_output(0, "contact info read");
}
/* if persistent, set contact info... */
if (OMPI_SUCCESS != mca_oob_set_contact_info(ompi_universe_info.seed_contact_info)) { /* set contact info */
if (ompi_rte_debug_flag) {
ompi_output(0, "error setting oob contact info - please report error to bugs@open-mpi.org\n");
}
exit(1);
}
fprintf(stderr, "init stage 2\n");
mca_oob_parse_contact_info(ompi_universe_info.seed_contact_info, &seed, NULL);
if (ompi_rte_debug_flag) {
ompi_output(0, "contact info set: %s", ompi_universe_info.seed_contact_info);
ompi_output(0, "issuing ping: %d %d %d", seed.cellid, seed.jobid, seed.vpid);
}
/* ...and ping to verify it's alive */
ping_success = false;
if (OMPI_SUCCESS == mca_oob_ping(&seed, &ompi_rte_ping_wait)) {
ping_success = true;
}
if (!ping_success) {
if (ompi_rte_debug_flag) {
ompi_output(0, "ping failed");
}
exit(1);
}
/* set the my_universe field */
if (NULL != ompi_process_info.my_universe) {
free(ompi_process_info.my_universe);
ompi_process_info.my_universe = NULL;
}
ompi_process_info.my_universe = strdup(ompi_universe_info.name);
if (NULL != ompi_process_info.ns_replica) {
free(ompi_process_info.ns_replica);
ompi_process_info.ns_replica = NULL;
}
ompi_process_info.ns_replica = ns_base_copy_process_name(&seed);
if (NULL != ompi_process_info.gpr_replica) {
free(ompi_process_info.gpr_replica);
ompi_process_info.gpr_replica = NULL;
}
ompi_process_info.gpr_replica = ns_base_copy_process_name(&seed);
if (NULL != ompi_universe_info.ns_replica) {
free(ompi_universe_info.ns_replica);
ompi_universe_info.ns_replica = NULL;
}
ompi_universe_info.ns_replica = strdup(ompi_universe_info.seed_contact_info);
if (NULL != ompi_universe_info.gpr_replica) {
free(ompi_universe_info.gpr_replica);
ompi_universe_info.gpr_replica = NULL;
}
ompi_universe_info.gpr_replica = strdup(ompi_universe_info.seed_contact_info);
fprintf(stderr, "init stage 2\n");
/* setup the rest of the rte */
if (OMPI_SUCCESS != (ret = ompi_rte_init_stage2(&allow_multi_user_threads,
@ -156,6 +250,11 @@ int main(int argc, char *argv[])
}
/***** SET MY NAME *****/
if (NULL != ompi_process_info.name) { /* should not have been previously set */
free(ompi_process_info.name);
ompi_process_info.name = NULL;
}
jobid = ompi_name_server.create_jobid();
vpid = ompi_name_server.reserve_range(jobid, 1);
ompi_process_info.name = ompi_name_server.create_process_name(0, jobid, vpid);
@ -163,14 +262,6 @@ int main(int argc, char *argv[])
fprintf(stderr, "my name: [%d,%d,%d]\n", ompi_process_info.name->cellid,
ompi_process_info.name->jobid, ompi_process_info.name->vpid);
/*
* Register my process info with my replica.
*/
if (OMPI_SUCCESS != (ret = ompi_rte_register())) {
fprintf(stderr, "ompi_rte_init: failed in ompi_rte_register()\n");
return ret;
}
/* finalize the rte startup */
if (OMPI_SUCCESS != (ret = ompi_rte_init_finalstage(&allow_multi_user_threads,
&have_hidden_threads))) {
@ -178,6 +269,14 @@ int main(int argc, char *argv[])
return ret;
}
/*
* Register my process info with my replica.
*/
if (OMPI_SUCCESS != (ret = ompi_rte_register())) {
fprintf(stderr, "ompi_rte_init: failed in ompi_rte_register()\n");
return ret;
}
exit_cmd = false;
while (!exit_cmd) {

Просмотреть файл

@ -179,6 +179,7 @@ main(int argc, char *argv[])
*/
universe = strdup(ompi_universe_info.name);
free(ompi_universe_info.name);
ompi_universe_info.name = NULL;
pid = getpid();
if (0 > asprintf(&ompi_universe_info.name, "%s-%d", universe, pid) && ompi_rte_debug_flag) {
ompi_output(0, "mpi_init: error creating unique universe name");
@ -189,15 +190,19 @@ main(int argc, char *argv[])
ompi_process_info.seed = true;
if (NULL != ompi_universe_info.ns_replica) {
free(ompi_universe_info.ns_replica);
ompi_universe_info.ns_replica = NULL;
}
if (NULL != ompi_process_info.ns_replica) {
free(ompi_process_info.ns_replica);
ompi_process_info.ns_replica = NULL;
}
if (NULL != ompi_universe_info.gpr_replica) {
free(ompi_universe_info.gpr_replica);
ompi_universe_info.gpr_replica = NULL;
}
if (NULL != ompi_process_info.gpr_replica) {
free(ompi_process_info.gpr_replica);
ompi_process_info.gpr_replica = NULL;
}
}
@ -209,10 +214,12 @@ main(int argc, char *argv[])
}
/***** SET MY NAME *****/
if (NULL != ompi_process_info.name) { /* should NOT have been set yet */
free(ompi_process_info.name);
ompi_process_info.name = NULL;
}
if (ompi_process_info.seed) {
if (NULL != ompi_process_info.name) { /* overwrite it */
free(ompi_process_info.name);
}
ompi_process_info.name = ompi_name_server.create_process_name(0, 0, 0);
} else { /* if not seed, then we joined universe - get jobid and name */
jobid = ompi_name_server.create_jobid();
@ -256,6 +263,10 @@ main(int argc, char *argv[])
/* if i'm the seed, get my contact info and write my setup file for others to find */
if (ompi_process_info.seed) {
if (NULL != ompi_universe_info.seed_contact_info) {
free(ompi_universe_info.seed_contact_info);
ompi_universe_info.seed_contact_info = NULL;
}
ompi_universe_info.seed_contact_info = mca_oob_get_contact_info();
contact_file = ompi_os_path(false, ompi_process_info.universe_session_dir,
"universe-setup.txt", NULL);

Просмотреть файл

@ -52,7 +52,10 @@ int main(int argc, char *argv[])
bool allow_multi_user_threads = false;
bool have_hidden_threads = false;
char *jobid_str, *procid_str, *enviro_val, *contact_file;
char *filenm;
char *filenm, *universe;
pid_t pid;
mca_ns_base_jobid_t jobid;
mca_ns_base_vpid_t vpid;
/*
* Intialize the Open MPI environment
@ -165,6 +168,44 @@ int main(int argc, char *argv[])
*/
ompi_rte_parse_daemon_cmd_line(cmd_line);
/* check for existing universe to join */
if (OMPI_SUCCESS != (ret = ompi_rte_universe_exists())) {
if (ompi_rte_debug_flag) {
ompi_output(0, "ompi_mpi_init: could not join existing universe");
}
if (OMPI_ERR_NOT_FOUND != ret) {
/* if it exists but no contact could be established,
* define unique name based on current one.
* and start new universe with me as seed
*/
universe = strdup(ompi_universe_info.name);
free(ompi_universe_info.name);
ompi_universe_info.name = NULL;
pid = getpid();
if (0 > asprintf(&ompi_universe_info.name, "%s-%d", universe, pid) && ompi_rte_debug_flag) {
ompi_output(0, "mpi_init: error creating unique universe name");
}
}
ompi_process_info.my_universe = strdup(ompi_universe_info.name);
ompi_process_info.seed = true;
if (NULL != ompi_universe_info.ns_replica) {
free(ompi_universe_info.ns_replica);
ompi_universe_info.ns_replica = NULL;
}
if (NULL != ompi_process_info.ns_replica) {
free(ompi_process_info.ns_replica);
ompi_process_info.ns_replica = NULL;
}
if (NULL != ompi_universe_info.gpr_replica) {
free(ompi_universe_info.gpr_replica);
ompi_universe_info.gpr_replica = NULL;
}
if (NULL != ompi_process_info.gpr_replica) {
free(ompi_process_info.gpr_replica);
ompi_process_info.gpr_replica = NULL;
}
}
/* setup the rest of the rte */
if (OMPI_SUCCESS != (ret = ompi_rte_init_stage2(&allow_multi_user_threads,
@ -175,23 +216,20 @@ int main(int argc, char *argv[])
}
/***** SET MY NAME *****/
if (ompi_process_info.seed) {
if (ompi_daemon_debug) {
ompi_output(0, "ompid: seed flag set");
}
if (NULL != ompi_process_info.name) { /* overwrite it */
free(ompi_process_info.name);
}
ompi_process_info.name = ompi_name_server.create_process_name(0, 0, 0);
} else {
if (ompi_daemon_debug) {
ompi_output(0, "ompid: seed flag NOT set");
}
if (NULL != ompi_process_info.name) { /* overwrite it */
free(ompi_process_info.name);
}
ompi_process_info.name = ompi_rte_get_self();
}
if (NULL != ompi_process_info.name) { /* should not have been previously set */
free(ompi_process_info.name);
ompi_process_info.name = NULL;
}
if (NULL != ompi_rte_get_self()) { /* name set in environment - record name */
ompi_process_info.name = ompi_rte_get_self();
} else if (NULL == ompi_process_info.ns_replica) { /* couldn't join existing univ */
ompi_process_info.name = ompi_name_server.create_process_name(0,0,0);
} else { /* name server exists elsewhere - get a name for me */
jobid = ompi_name_server.create_jobid();
vpid = ompi_name_server.reserve_range(jobid, 1);
ompi_process_info.name = ompi_name_server.create_process_name(0, jobid, vpid);
}
/* setup my session directory */
jobid_str = ompi_name_server.get_jobid_string(ompi_process_info.name);
@ -219,13 +257,6 @@ int main(int argc, char *argv[])
exit(-1);
}
/*
* Register my process info with my replica.
*/
if (OMPI_SUCCESS != (ret = ompi_rte_register())) {
ompi_output(0, "ompi_rte_init: failed in ompi_rte_register");
return ret;
}
/* finalize the rte startup */
if (OMPI_SUCCESS != (ret = ompi_rte_init_finalstage(&allow_multi_user_threads,
@ -235,8 +266,21 @@ int main(int argc, char *argv[])
return ret;
}
/*
* Register my process info with my replica. Note that this must be done
* after the rte init is completed.
*/
if (OMPI_SUCCESS != (ret = ompi_rte_register())) {
ompi_output(0, "ompid: failed in ompi_rte_register()");
return ret;
}
/* if i'm the seed, get my contact info and write my setup file for others to find */
if (ompi_process_info.seed) {
if (NULL != ompi_universe_info.seed_contact_info) {
free(ompi_universe_info.seed_contact_info);
ompi_universe_info.seed_contact_info = NULL;
}
ompi_universe_info.seed_contact_info = mca_oob_get_contact_info();
contact_file = ompi_os_path(false, ompi_process_info.universe_session_dir,
"universe-setup.txt", NULL);
@ -298,9 +342,11 @@ int main(int argc, char *argv[])
ompi_process_info.name->jobid, ompi_process_info.name->vpid);
}
/* remove the universe-setup file */
filenm = ompi_os_path(false, ompi_process_info.universe_session_dir, "universe-setup.txt", NULL);
unlink(filenm);
/* if i'm the seed, remove the universe-setup file */
if (ompi_process_info.seed) {
filenm = ompi_os_path(false, ompi_process_info.universe_session_dir, "universe-setup.txt", NULL);
unlink(filenm);
}
/* finalize the system */
ompi_rte_finalize();
@ -322,7 +368,6 @@ static void ompi_daemon_recv(int status, ompi_process_name_t* sender,
ompi_buffer_t answer;
ompi_daemon_cmd_flag_t command;
int ret;
int32_t str_len;
char *contact_info;
OMPI_THREAD_LOCK(&ompi_daemon_mutex);

Просмотреть файл

@ -156,6 +156,7 @@ int main(int argc, char **argv)
*/
universe = strdup(ompi_universe_info.name);
free(ompi_universe_info.name);
ompi_universe_info.name = NULL;
pid = getpid();
if (0 < asprintf(&ompi_universe_info.name, "%s-%d", universe, pid)) {
fprintf(stderr, "error creating unique universe name - please report error to bugs@open-mpi.org\n");
@ -163,6 +164,10 @@ int main(int argc, char **argv)
}
}
if (NULL != ompi_process_info.my_universe) {
free(ompi_process_info.my_universe);
ompi_process_info.my_universe = NULL;
}
ompi_process_info.my_universe = strdup(ompi_universe_info.name);
/* ensure the enviro variables do NOT specify any replicas so that seed