Continuing the cleanup process. Few minor fixes here and there - mostly just NULLing pointers that were free'd. Console now can connect to any universe, regardless of scope.
This commit was SVN r2734.
Этот коммит содержится в:
родитель
8699fa86b2
Коммит
0d4e6482cd
@ -107,6 +107,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
*/
|
||||
universe = strdup(ompi_universe_info.name);
|
||||
free(ompi_universe_info.name);
|
||||
ompi_universe_info.name = NULL;
|
||||
pid = getpid();
|
||||
if (0 > asprintf(&ompi_universe_info.name, "%s-%d", universe, pid) && ompi_rte_debug_flag) {
|
||||
ompi_output(0, "mpi_init: error creating unique universe name");
|
||||
@ -117,15 +118,19 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
ompi_process_info.seed = true;
|
||||
if (NULL != ompi_universe_info.ns_replica) {
|
||||
free(ompi_universe_info.ns_replica);
|
||||
ompi_universe_info.ns_replica = NULL;
|
||||
}
|
||||
if (NULL != ompi_process_info.ns_replica) {
|
||||
free(ompi_process_info.ns_replica);
|
||||
ompi_process_info.ns_replica = NULL;
|
||||
}
|
||||
if (NULL != ompi_universe_info.gpr_replica) {
|
||||
free(ompi_universe_info.gpr_replica);
|
||||
ompi_universe_info.gpr_replica = NULL;
|
||||
}
|
||||
if (NULL != ompi_process_info.gpr_replica) {
|
||||
free(ompi_process_info.gpr_replica);
|
||||
ompi_process_info.gpr_replica = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
@ -139,6 +144,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
/***** SET MY NAME *****/
|
||||
if (NULL != ompi_process_info.name) { /* should NOT have been previously set */
|
||||
free(ompi_process_info.name);
|
||||
ompi_process_info.name = NULL;
|
||||
}
|
||||
|
||||
if (NULL != ompi_rte_get_self()) { /* name set in environment - nonsingleton - record name */
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include <string.h>
|
||||
|
||||
#include "mca/oob/base/base.h"
|
||||
#include "mca/ns/base/base.h"
|
||||
|
||||
#include "util/output.h"
|
||||
#include "util/cmd_line.h"
|
||||
@ -30,9 +31,6 @@ void ompi_rte_parse_cmd_line(ompi_cmd_line_t *cmd_line)
|
||||
|
||||
/* get universe name and store it, if user specified it */
|
||||
/* otherwise, stick with default name */
|
||||
if (NULL != ompi_universe_info.name) {
|
||||
universe = strdup(ompi_universe_info.name); /* save the current value, if exists */
|
||||
}
|
||||
|
||||
if (ompi_cmd_line_is_taken(cmd_line, "universe") ||
|
||||
ompi_cmd_line_is_taken(cmd_line, "u")) {
|
||||
@ -52,23 +50,44 @@ void ompi_rte_parse_cmd_line(ompi_cmd_line_t *cmd_line)
|
||||
if (NULL != (tmp = strchr(universe, '@'))) { /* remote name includes remote uid */
|
||||
*tmp = '\0';
|
||||
tmp++;
|
||||
if (NULL != ompi_universe_info.host) { /* overwrite it */
|
||||
free(ompi_universe_info.host);
|
||||
ompi_universe_info.host = NULL;
|
||||
}
|
||||
ompi_universe_info.host = strdup(tmp);
|
||||
if (NULL != ompi_universe_info.uid) {
|
||||
free(ompi_universe_info.uid);
|
||||
ompi_universe_info.uid = NULL;
|
||||
}
|
||||
ompi_universe_info.uid = strdup(universe);
|
||||
} else { /* no remote id - just remote host */
|
||||
if (NULL != ompi_universe_info.host) {
|
||||
free(ompi_universe_info.host);
|
||||
ompi_universe_info.host = NULL;
|
||||
}
|
||||
ompi_universe_info.host = strdup(universe);
|
||||
}
|
||||
} else { /* no remote host - just universe name provided */
|
||||
if (NULL != ompi_universe_info.name) {
|
||||
free(ompi_universe_info.name);
|
||||
ompi_universe_info.name = NULL;
|
||||
}
|
||||
ompi_universe_info.name = strdup(universe);
|
||||
}
|
||||
}
|
||||
|
||||
/* copy the universe name into the process_info structure */
|
||||
if (NULL != ompi_universe_info.name) {
|
||||
if (NULL != ompi_process_info.my_universe) {
|
||||
free(ompi_process_info.my_universe);
|
||||
ompi_process_info.my_universe = NULL;
|
||||
}
|
||||
ompi_process_info.my_universe = strdup(ompi_universe_info.name);
|
||||
} else { /* set it to default value */
|
||||
ompi_universe_info.name = strdup("default-universe");
|
||||
if (NULL != ompi_process_info.my_universe) { /* overwrite it */
|
||||
free(ompi_process_info.my_universe);
|
||||
ompi_process_info.my_universe = NULL;
|
||||
}
|
||||
ompi_process_info.my_universe = strdup(ompi_universe_info.name);
|
||||
}
|
||||
@ -84,6 +103,7 @@ void ompi_rte_parse_cmd_line(ompi_cmd_line_t *cmd_line)
|
||||
}
|
||||
if (NULL != ompi_process_info.tmpdir_base) { /* overwrite it */
|
||||
free(ompi_process_info.tmpdir_base);
|
||||
ompi_process_info.tmpdir_base = NULL;
|
||||
}
|
||||
ompi_process_info.tmpdir_base = strdup(ompi_cmd_line_get_param(cmd_line, "tmpdir", 0, 0));
|
||||
setenv("OMPI_tmpdir_base", ompi_process_info.tmpdir_base, 1);
|
||||
@ -96,6 +116,16 @@ void ompi_rte_parse_cmd_line(ompi_cmd_line_t *cmd_line)
|
||||
return;
|
||||
}
|
||||
nsreplica = strdup(ompi_cmd_line_get_param(cmd_line, "nsreplica", 0, 0));
|
||||
if (NULL != ompi_universe_info.ns_replica) {
|
||||
free(ompi_universe_info.ns_replica);
|
||||
ompi_universe_info.ns_replica = NULL;
|
||||
}
|
||||
ompi_universe_info.ns_replica = strdup(nsreplica);
|
||||
if (NULL == ompi_process_info.ns_replica) {
|
||||
ompi_process_info.ns_replica = ns_base_create_process_name(0,0,0);
|
||||
}
|
||||
mca_oob_parse_contact_info(ompi_universe_info.ns_replica,
|
||||
ompi_process_info.ns_replica, NULL);
|
||||
setenv("OMPI_MCA_ns_base_replica", nsreplica, 1); /* set the ns_replica enviro variable */
|
||||
} /* otherwise, leave it alone */
|
||||
|
||||
@ -106,6 +136,16 @@ void ompi_rte_parse_cmd_line(ompi_cmd_line_t *cmd_line)
|
||||
return;
|
||||
}
|
||||
gprreplica = strdup(ompi_cmd_line_get_param(cmd_line, "gprreplica", 0, 0));
|
||||
if (NULL != ompi_universe_info.gpr_replica) {
|
||||
free(ompi_universe_info.gpr_replica);
|
||||
ompi_universe_info.gpr_replica = NULL;
|
||||
}
|
||||
ompi_universe_info.gpr_replica = strdup(nsreplica);
|
||||
if (NULL == ompi_process_info.gpr_replica) {
|
||||
ompi_process_info.gpr_replica = ns_base_create_process_name(0,0,0);
|
||||
}
|
||||
mca_oob_parse_contact_info(ompi_universe_info.gpr_replica,
|
||||
ompi_process_info.gpr_replica, NULL);
|
||||
setenv("OMPI_MCA_gpr_base_replica", gprreplica, 1); /* set the gpr_replica enviro variable */
|
||||
} /* otherwise leave it alone */
|
||||
}
|
||||
|
@ -11,6 +11,8 @@
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "mca/ns/base/base.h"
|
||||
|
||||
#include "util/output.h"
|
||||
#include "util/cmd_line.h"
|
||||
#include "util/sys_info.h"
|
||||
@ -34,6 +36,10 @@ void ompi_rte_parse_daemon_cmd_line(ompi_cmd_line_t *cmd_line)
|
||||
fprintf(stderr, "error retrieving seed contact info - please report error to bugs@open-mpi.org\n");
|
||||
exit(1);
|
||||
}
|
||||
if (NULL != ompi_universe_info.seed_contact_info) { /* overwrite it */
|
||||
free(ompi_universe_info.seed_contact_info);
|
||||
ompi_universe_info.seed_contact_info = NULL;
|
||||
}
|
||||
ompi_universe_info.seed_contact_info = strdup(ompi_cmd_line_get_param(cmd_line, "seedcontact", 0, 0));
|
||||
setenv("OMPI_universe_contact", ompi_universe_info.seed_contact_info, 1);
|
||||
}
|
||||
@ -51,6 +57,10 @@ void ompi_rte_parse_daemon_cmd_line(ompi_cmd_line_t *cmd_line)
|
||||
fprintf(stderr, "error retrieving universe scope - please report error to bugs@open-mpi.org\n");
|
||||
exit(1);
|
||||
}
|
||||
if (NULL != ompi_universe_info.scope) {
|
||||
free(ompi_universe_info.scope);
|
||||
ompi_universe_info.scope = NULL;
|
||||
}
|
||||
ompi_universe_info.scope = strdup(ompi_cmd_line_get_param(cmd_line, "scope", 0, 0));
|
||||
setenv("OMPI_universe_scope", ompi_universe_info.scope, 1);
|
||||
}
|
||||
@ -74,6 +84,10 @@ void ompi_rte_parse_daemon_cmd_line(ompi_cmd_line_t *cmd_line)
|
||||
fprintf(stderr, "error retrieving script file name - please report error to bugs@open-mpi.org\n");
|
||||
exit(1);
|
||||
}
|
||||
if (NULL != ompi_universe_info.scriptfile) {
|
||||
free(ompi_universe_info.scriptfile);
|
||||
ompi_universe_info.scriptfile = NULL;
|
||||
}
|
||||
ompi_universe_info.scriptfile = strdup(ompi_cmd_line_get_param(cmd_line, "script", 0, 0));
|
||||
setenv("OMPI_universe_script", ompi_universe_info.scriptfile, 1);
|
||||
}
|
||||
@ -84,6 +98,10 @@ void ompi_rte_parse_daemon_cmd_line(ompi_cmd_line_t *cmd_line)
|
||||
fprintf(stderr, "error retrieving host file name - please report error to bugs@open-mpi.org\n");
|
||||
exit(1);
|
||||
}
|
||||
if (NULL != ompi_universe_info.hostfile) {
|
||||
free(ompi_universe_info.hostfile);
|
||||
ompi_universe_info.hostfile = NULL;
|
||||
}
|
||||
ompi_universe_info.hostfile = strdup(ompi_cmd_line_get_param(cmd_line, "hostfile", 0, 0));
|
||||
setenv("OMPI_universe_hostfile", ompi_universe_info.hostfile, 1);
|
||||
}
|
||||
|
@ -47,12 +47,14 @@ void ompi_rte_parse_environ(void)
|
||||
if (NULL != enviro_val) { /* contact info passed */
|
||||
if (NULL != ompi_universe_info.seed_contact_info) { /* overwrite */
|
||||
free(ompi_universe_info.seed_contact_info);
|
||||
ompi_universe_info.seed_contact_info = NULL;
|
||||
}
|
||||
ompi_universe_info.seed_contact_info = strdup(enviro_val);
|
||||
mca_oob_set_contact_info(ompi_universe_info.seed_contact_info);
|
||||
} else {
|
||||
if (NULL != ompi_universe_info.seed_contact_info) {
|
||||
free(ompi_universe_info.seed_contact_info);
|
||||
ompi_universe_info.seed_contact_info = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
@ -66,6 +68,7 @@ void ompi_rte_parse_environ(void)
|
||||
} else {
|
||||
if (NULL != ompi_process_info.gpr_replica) {
|
||||
free(ompi_process_info.gpr_replica);
|
||||
ompi_process_info.gpr_replica = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
@ -79,6 +82,7 @@ void ompi_rte_parse_environ(void)
|
||||
} else {
|
||||
if (NULL != ompi_process_info.ns_replica) {
|
||||
free(ompi_process_info.ns_replica);
|
||||
ompi_process_info.ns_replica = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
@ -93,21 +97,17 @@ void ompi_rte_parse_environ(void)
|
||||
if (NULL != enviro_val) { /* scope passed */
|
||||
if (NULL != ompi_universe_info.scope) { /* overwrite */
|
||||
free(ompi_universe_info.scope);
|
||||
ompi_universe_info.scope = NULL;
|
||||
}
|
||||
ompi_universe_info.scope = strdup(enviro_val);
|
||||
} else {
|
||||
if (NULL != ompi_universe_info.scope) {
|
||||
free(ompi_universe_info.scope);
|
||||
ompi_universe_info.scope = NULL;
|
||||
}
|
||||
ompi_universe_info.scope = strdup("exclusive");
|
||||
}
|
||||
|
||||
/*** FOR DEBUGGING PURPOSES IN THIS EARLY STAGE - FORCE PUBLIC */
|
||||
if (NULL != ompi_universe_info.scope) {
|
||||
free(ompi_universe_info.scope);
|
||||
}
|
||||
ompi_universe_info.scope = strdup("public");
|
||||
|
||||
enviro_val = getenv("OMPI_universe_persistent");
|
||||
if (NULL != enviro_val) { /* persistence flag passed */
|
||||
ompi_universe_info.persistence = true;
|
||||
@ -115,9 +115,6 @@ void ompi_rte_parse_environ(void)
|
||||
ompi_universe_info.persistence = false;
|
||||
}
|
||||
|
||||
/*** FOR DEBUGGING PURPOSES IN THIS EARLY STAGE - FORCE PERSISTENCE */
|
||||
ompi_universe_info.persistence = true;
|
||||
|
||||
enviro_val = getenv("OMPI_universe_console");
|
||||
if (NULL != enviro_val) { /* console flag passed */
|
||||
ompi_universe_info.console = true;
|
||||
@ -129,11 +126,13 @@ void ompi_rte_parse_environ(void)
|
||||
if (NULL != enviro_val) { /* scriptfile passed */
|
||||
if (NULL != ompi_universe_info.scriptfile) { /* overwrite */
|
||||
free(ompi_universe_info.scriptfile);
|
||||
ompi_universe_info.scriptfile = NULL;
|
||||
}
|
||||
ompi_universe_info.scriptfile = strdup(enviro_val);
|
||||
} else {
|
||||
if (NULL != ompi_universe_info.scriptfile) {
|
||||
free(ompi_universe_info.scriptfile);
|
||||
ompi_universe_info.scriptfile = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
@ -141,28 +140,34 @@ void ompi_rte_parse_environ(void)
|
||||
if (NULL != enviro_val) { /* hostfile passed */
|
||||
if (NULL != ompi_universe_info.hostfile) { /* overwrite */
|
||||
free(ompi_universe_info.hostfile);
|
||||
ompi_universe_info.hostfile = NULL;
|
||||
}
|
||||
ompi_universe_info.hostfile = strdup(enviro_val);
|
||||
} else {
|
||||
if (NULL != ompi_universe_info.hostfile) {
|
||||
free(ompi_universe_info.hostfile);
|
||||
ompi_universe_info.hostfile = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (NULL != ompi_universe_info.name) {
|
||||
free(ompi_universe_info.name);
|
||||
ompi_universe_info.name = NULL;
|
||||
}
|
||||
ompi_universe_info.name = strdup("default-universe");
|
||||
if (NULL != ompi_process_info.my_universe) {
|
||||
free(ompi_process_info.my_universe);
|
||||
ompi_process_info.my_universe = NULL;
|
||||
}
|
||||
ompi_process_info.my_universe = strdup("default-universe");
|
||||
if (NULL != ompi_universe_info.host) {
|
||||
free(ompi_universe_info.host);
|
||||
ompi_universe_info.host = NULL;
|
||||
}
|
||||
ompi_universe_info.host = strdup(ompi_system_info.nodename);
|
||||
if (NULL != ompi_universe_info.uid) {
|
||||
free(ompi_universe_info.uid);
|
||||
ompi_universe_info.uid = NULL;
|
||||
}
|
||||
ompi_universe_info.uid = strdup(ompi_system_info.user);
|
||||
|
||||
@ -170,10 +175,12 @@ void ompi_rte_parse_environ(void)
|
||||
if (NULL != enviro_val) { /* universe name passed in environment */
|
||||
if (NULL != ompi_universe_info.name) { /* got something in it - overwrite */
|
||||
free(ompi_universe_info.name);
|
||||
ompi_universe_info.name = NULL;
|
||||
}
|
||||
ompi_universe_info.name = strdup(enviro_val);
|
||||
if (NULL != ompi_process_info.my_universe) {
|
||||
free(ompi_process_info.my_universe);
|
||||
ompi_process_info.my_universe = NULL;
|
||||
}
|
||||
ompi_process_info.my_universe = strdup(enviro_val);
|
||||
}
|
||||
@ -182,11 +189,13 @@ void ompi_rte_parse_environ(void)
|
||||
if (NULL != enviro_val) { /* tmpdir base passed in environment */
|
||||
if (NULL != ompi_process_info.tmpdir_base) { /* overwrite it */
|
||||
free(ompi_process_info.tmpdir_base);
|
||||
ompi_process_info.tmpdir_base = NULL;
|
||||
}
|
||||
ompi_process_info.tmpdir_base = strdup(enviro_val);
|
||||
} else {
|
||||
if (NULL != ompi_process_info.tmpdir_base) {
|
||||
free(ompi_process_info.tmpdir_base);
|
||||
ompi_process_info.tmpdir_base = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -35,9 +35,9 @@ static struct timeval ompi_rte_ping_wait = {2, 0};
|
||||
int ompi_rte_universe_exists()
|
||||
{
|
||||
char *contact_file;
|
||||
int ret, i;
|
||||
int ret;
|
||||
ompi_process_name_t proc={0,0,0};
|
||||
bool ns_found, gpr_found, ping_success;
|
||||
bool ns_found=false, gpr_found=false, ping_success=false;
|
||||
|
||||
/* if both ns_replica and gpr_replica were provided, check for contact with them */
|
||||
if (NULL != ompi_universe_info.ns_replica && NULL != ompi_universe_info.gpr_replica) {
|
||||
@ -50,11 +50,13 @@ int ompi_rte_universe_exists()
|
||||
free(ompi_universe_info.ns_replica);
|
||||
if (NULL != ompi_process_info.ns_replica) {
|
||||
free(ompi_process_info.ns_replica);
|
||||
ompi_process_info.ns_replica = NULL;
|
||||
}
|
||||
} else { /* name server found, now try gpr */
|
||||
ns_found = true;
|
||||
if (NULL != ompi_process_info.ns_replica) {
|
||||
free(ompi_process_info.ns_replica);
|
||||
ompi_process_info.ns_replica = NULL;
|
||||
}
|
||||
ompi_process_info.ns_replica = ns_base_copy_process_name(&proc);
|
||||
}
|
||||
@ -68,10 +70,12 @@ int ompi_rte_universe_exists()
|
||||
free(ompi_universe_info.gpr_replica);
|
||||
if (NULL != ompi_process_info.gpr_replica) {
|
||||
free(ompi_process_info.gpr_replica);
|
||||
ompi_process_info.gpr_replica = NULL;
|
||||
}
|
||||
} else {
|
||||
if (NULL != ompi_process_info.gpr_replica) {
|
||||
free(ompi_process_info.gpr_replica);
|
||||
ompi_process_info.gpr_replica = NULL;
|
||||
}
|
||||
ompi_process_info.gpr_replica = ns_base_copy_process_name(&proc);
|
||||
gpr_found = true;
|
||||
@ -129,20 +133,22 @@ int ompi_rte_universe_exists()
|
||||
ompi_output(0, "contact info read");
|
||||
}
|
||||
|
||||
if (!ompi_universe_info.persistence || /* not persistent... */
|
||||
(0 == strncmp(ompi_universe_info.scope, "exclusive", strlen("exclusive")))) { /* ...or no connection allowed */
|
||||
/* also need to check "local" and that we did not specify the exact
|
||||
* matching universe name
|
||||
*/
|
||||
if (ompi_rte_debug_flag) {
|
||||
ompi_output(0, "connection not allowed");
|
||||
if (!ompi_universe_info.console) { /* if we aren't trying to connect a console */
|
||||
if (!ompi_universe_info.persistence || /* not persistent... */
|
||||
(0 == strncmp(ompi_universe_info.scope, "exclusive", strlen("exclusive")))) { /* ...or no connection allowed */
|
||||
/* also need to check "local" and that we did not specify the exact
|
||||
* matching universe name
|
||||
*/
|
||||
if (ompi_rte_debug_flag) {
|
||||
ompi_output(0, "connection not allowed");
|
||||
}
|
||||
return OMPI_ERR_NO_CONNECTION_ALLOWED;
|
||||
}
|
||||
return OMPI_ERR_NO_CONNECTION_ALLOWED;
|
||||
}
|
||||
|
||||
if (ompi_rte_debug_flag) {
|
||||
ompi_output(0, "contact info to set: %s", ompi_universe_info.seed_contact_info);
|
||||
}
|
||||
if (ompi_rte_debug_flag) {
|
||||
ompi_output(0, "contact info to set: %s", ompi_universe_info.seed_contact_info);
|
||||
}
|
||||
|
||||
|
||||
/* if persistent, set contact info... */
|
||||
@ -174,11 +180,34 @@ int ompi_rte_universe_exists()
|
||||
}
|
||||
|
||||
/* set the my_universe field */
|
||||
if (NULL != ompi_process_info.my_universe) {
|
||||
free(ompi_process_info.my_universe);
|
||||
ompi_process_info.my_universe = NULL;
|
||||
}
|
||||
ompi_process_info.my_universe = strdup(ompi_universe_info.name);
|
||||
|
||||
if (NULL != ompi_process_info.ns_replica) {
|
||||
free(ompi_process_info.ns_replica);
|
||||
ompi_process_info.ns_replica = NULL;
|
||||
}
|
||||
ompi_process_info.ns_replica = ns_base_copy_process_name(&proc);
|
||||
|
||||
if (NULL != ompi_process_info.gpr_replica) {
|
||||
free(ompi_process_info.gpr_replica);
|
||||
ompi_process_info.gpr_replica = NULL;
|
||||
}
|
||||
ompi_process_info.gpr_replica = ns_base_copy_process_name(&proc);
|
||||
|
||||
if (NULL != ompi_universe_info.ns_replica) {
|
||||
free(ompi_universe_info.ns_replica);
|
||||
ompi_universe_info.ns_replica = NULL;
|
||||
}
|
||||
ompi_universe_info.ns_replica = strdup(ompi_universe_info.seed_contact_info);
|
||||
|
||||
if (NULL != ompi_universe_info.gpr_replica) {
|
||||
free(ompi_universe_info.gpr_replica);
|
||||
ompi_universe_info.gpr_replica = NULL;
|
||||
}
|
||||
ompi_universe_info.gpr_replica = strdup(ompi_universe_info.seed_contact_info);
|
||||
|
||||
/* request ns_replica and gpr_replica info for this process
|
||||
|
@ -13,6 +13,10 @@
|
||||
#include "util/cmd_line.h"
|
||||
#include "util/proc_info.h"
|
||||
#include "util/pack.h"
|
||||
#include "util/session_dir.h"
|
||||
#include "util/output.h"
|
||||
#include "util/os_path.h"
|
||||
#include "util/universe_setup_file_io.h"
|
||||
#include "runtime/runtime.h"
|
||||
|
||||
#include "mca/base/base.h"
|
||||
@ -28,6 +32,8 @@ static char *ompi_getinputline(void);
|
||||
|
||||
static void ompi_console_sendcmd(ompi_daemon_cmd_flag_t usercmd);
|
||||
|
||||
static struct timeval ompi_rte_ping_wait = {2, 0};
|
||||
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
@ -43,8 +49,8 @@ int main(int argc, char *argv[])
|
||||
ompi_cmd_line_t *cmd_line;
|
||||
bool allow_multi_user_threads = false;
|
||||
bool have_hidden_threads = false;
|
||||
bool exit_cmd;
|
||||
char *usercmd, *str_response;
|
||||
bool exit_cmd, ping_success;
|
||||
char *usercmd, *str_response, *contact_file;
|
||||
ompi_buffer_t buffer;
|
||||
ompi_process_name_t seed={0,0,0};
|
||||
int recv_tag;
|
||||
@ -140,12 +146,100 @@ int main(int argc, char *argv[])
|
||||
|
||||
fprintf(stderr, "check local univ\n");
|
||||
|
||||
if (OMPI_SUCCESS != (ret = ompi_rte_universe_exists())) {
|
||||
fprintf(stderr, "could not contact local universe %s\n", ompi_universe_info.name);
|
||||
/* check to see if local universe already exists */
|
||||
if (OMPI_SUCCESS != ompi_session_dir(false,
|
||||
ompi_process_info.tmpdir_base,
|
||||
ompi_system_info.user,
|
||||
ompi_system_info.nodename,
|
||||
NULL,
|
||||
ompi_universe_info.name,
|
||||
NULL,
|
||||
NULL)) { /* not found */
|
||||
if (ompi_rte_debug_flag) {
|
||||
ompi_output(0, "could not find universe session dir");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
if (ompi_rte_debug_flag) {
|
||||
ompi_output(0, "check for contact info file");
|
||||
}
|
||||
|
||||
/* check for "contact-info" file. if present, read it in. */
|
||||
contact_file = ompi_os_path(false, ompi_process_info.universe_session_dir,
|
||||
"universe-setup.txt", NULL);
|
||||
|
||||
if (OMPI_SUCCESS != (ret = ompi_read_universe_setup_file(contact_file))) {
|
||||
if (ompi_rte_debug_flag) {
|
||||
ompi_output(0, "could not read contact file %s", contact_file);
|
||||
}
|
||||
exit(ret);
|
||||
}
|
||||
|
||||
if (ompi_rte_debug_flag) {
|
||||
ompi_output(0, "contact info read");
|
||||
}
|
||||
|
||||
/* if persistent, set contact info... */
|
||||
if (OMPI_SUCCESS != mca_oob_set_contact_info(ompi_universe_info.seed_contact_info)) { /* set contact info */
|
||||
if (ompi_rte_debug_flag) {
|
||||
ompi_output(0, "error setting oob contact info - please report error to bugs@open-mpi.org\n");
|
||||
}
|
||||
exit(1);
|
||||
}
|
||||
|
||||
fprintf(stderr, "init stage 2\n");
|
||||
mca_oob_parse_contact_info(ompi_universe_info.seed_contact_info, &seed, NULL);
|
||||
|
||||
if (ompi_rte_debug_flag) {
|
||||
ompi_output(0, "contact info set: %s", ompi_universe_info.seed_contact_info);
|
||||
ompi_output(0, "issuing ping: %d %d %d", seed.cellid, seed.jobid, seed.vpid);
|
||||
}
|
||||
|
||||
|
||||
/* ...and ping to verify it's alive */
|
||||
ping_success = false;
|
||||
if (OMPI_SUCCESS == mca_oob_ping(&seed, &ompi_rte_ping_wait)) {
|
||||
ping_success = true;
|
||||
}
|
||||
if (!ping_success) {
|
||||
if (ompi_rte_debug_flag) {
|
||||
ompi_output(0, "ping failed");
|
||||
}
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* set the my_universe field */
|
||||
if (NULL != ompi_process_info.my_universe) {
|
||||
free(ompi_process_info.my_universe);
|
||||
ompi_process_info.my_universe = NULL;
|
||||
}
|
||||
ompi_process_info.my_universe = strdup(ompi_universe_info.name);
|
||||
|
||||
if (NULL != ompi_process_info.ns_replica) {
|
||||
free(ompi_process_info.ns_replica);
|
||||
ompi_process_info.ns_replica = NULL;
|
||||
}
|
||||
ompi_process_info.ns_replica = ns_base_copy_process_name(&seed);
|
||||
|
||||
if (NULL != ompi_process_info.gpr_replica) {
|
||||
free(ompi_process_info.gpr_replica);
|
||||
ompi_process_info.gpr_replica = NULL;
|
||||
}
|
||||
ompi_process_info.gpr_replica = ns_base_copy_process_name(&seed);
|
||||
|
||||
if (NULL != ompi_universe_info.ns_replica) {
|
||||
free(ompi_universe_info.ns_replica);
|
||||
ompi_universe_info.ns_replica = NULL;
|
||||
}
|
||||
ompi_universe_info.ns_replica = strdup(ompi_universe_info.seed_contact_info);
|
||||
|
||||
if (NULL != ompi_universe_info.gpr_replica) {
|
||||
free(ompi_universe_info.gpr_replica);
|
||||
ompi_universe_info.gpr_replica = NULL;
|
||||
}
|
||||
ompi_universe_info.gpr_replica = strdup(ompi_universe_info.seed_contact_info);
|
||||
|
||||
fprintf(stderr, "init stage 2\n");
|
||||
|
||||
/* setup the rest of the rte */
|
||||
if (OMPI_SUCCESS != (ret = ompi_rte_init_stage2(&allow_multi_user_threads,
|
||||
@ -156,6 +250,11 @@ int main(int argc, char *argv[])
|
||||
}
|
||||
|
||||
/***** SET MY NAME *****/
|
||||
if (NULL != ompi_process_info.name) { /* should not have been previously set */
|
||||
free(ompi_process_info.name);
|
||||
ompi_process_info.name = NULL;
|
||||
}
|
||||
|
||||
jobid = ompi_name_server.create_jobid();
|
||||
vpid = ompi_name_server.reserve_range(jobid, 1);
|
||||
ompi_process_info.name = ompi_name_server.create_process_name(0, jobid, vpid);
|
||||
@ -163,6 +262,13 @@ int main(int argc, char *argv[])
|
||||
fprintf(stderr, "my name: [%d,%d,%d]\n", ompi_process_info.name->cellid,
|
||||
ompi_process_info.name->jobid, ompi_process_info.name->vpid);
|
||||
|
||||
/* finalize the rte startup */
|
||||
if (OMPI_SUCCESS != (ret = ompi_rte_init_finalstage(&allow_multi_user_threads,
|
||||
&have_hidden_threads))) {
|
||||
printf("failed to finalize the rte startup\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Register my process info with my replica.
|
||||
*/
|
||||
@ -171,13 +277,6 @@ int main(int argc, char *argv[])
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* finalize the rte startup */
|
||||
if (OMPI_SUCCESS != (ret = ompi_rte_init_finalstage(&allow_multi_user_threads,
|
||||
&have_hidden_threads))) {
|
||||
printf("failed to finalize the rte startup\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
exit_cmd = false;
|
||||
while (!exit_cmd) {
|
||||
|
@ -179,6 +179,7 @@ main(int argc, char *argv[])
|
||||
*/
|
||||
universe = strdup(ompi_universe_info.name);
|
||||
free(ompi_universe_info.name);
|
||||
ompi_universe_info.name = NULL;
|
||||
pid = getpid();
|
||||
if (0 > asprintf(&ompi_universe_info.name, "%s-%d", universe, pid) && ompi_rte_debug_flag) {
|
||||
ompi_output(0, "mpi_init: error creating unique universe name");
|
||||
@ -189,15 +190,19 @@ main(int argc, char *argv[])
|
||||
ompi_process_info.seed = true;
|
||||
if (NULL != ompi_universe_info.ns_replica) {
|
||||
free(ompi_universe_info.ns_replica);
|
||||
ompi_universe_info.ns_replica = NULL;
|
||||
}
|
||||
if (NULL != ompi_process_info.ns_replica) {
|
||||
free(ompi_process_info.ns_replica);
|
||||
ompi_process_info.ns_replica = NULL;
|
||||
}
|
||||
if (NULL != ompi_universe_info.gpr_replica) {
|
||||
free(ompi_universe_info.gpr_replica);
|
||||
ompi_universe_info.gpr_replica = NULL;
|
||||
}
|
||||
if (NULL != ompi_process_info.gpr_replica) {
|
||||
free(ompi_process_info.gpr_replica);
|
||||
ompi_process_info.gpr_replica = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
@ -209,10 +214,12 @@ main(int argc, char *argv[])
|
||||
}
|
||||
|
||||
/***** SET MY NAME *****/
|
||||
if (NULL != ompi_process_info.name) { /* should NOT have been set yet */
|
||||
free(ompi_process_info.name);
|
||||
ompi_process_info.name = NULL;
|
||||
}
|
||||
|
||||
if (ompi_process_info.seed) {
|
||||
if (NULL != ompi_process_info.name) { /* overwrite it */
|
||||
free(ompi_process_info.name);
|
||||
}
|
||||
ompi_process_info.name = ompi_name_server.create_process_name(0, 0, 0);
|
||||
} else { /* if not seed, then we joined universe - get jobid and name */
|
||||
jobid = ompi_name_server.create_jobid();
|
||||
@ -256,6 +263,10 @@ main(int argc, char *argv[])
|
||||
|
||||
/* if i'm the seed, get my contact info and write my setup file for others to find */
|
||||
if (ompi_process_info.seed) {
|
||||
if (NULL != ompi_universe_info.seed_contact_info) {
|
||||
free(ompi_universe_info.seed_contact_info);
|
||||
ompi_universe_info.seed_contact_info = NULL;
|
||||
}
|
||||
ompi_universe_info.seed_contact_info = mca_oob_get_contact_info();
|
||||
contact_file = ompi_os_path(false, ompi_process_info.universe_session_dir,
|
||||
"universe-setup.txt", NULL);
|
||||
|
@ -52,7 +52,10 @@ int main(int argc, char *argv[])
|
||||
bool allow_multi_user_threads = false;
|
||||
bool have_hidden_threads = false;
|
||||
char *jobid_str, *procid_str, *enviro_val, *contact_file;
|
||||
char *filenm;
|
||||
char *filenm, *universe;
|
||||
pid_t pid;
|
||||
mca_ns_base_jobid_t jobid;
|
||||
mca_ns_base_vpid_t vpid;
|
||||
|
||||
/*
|
||||
* Intialize the Open MPI environment
|
||||
@ -165,6 +168,44 @@ int main(int argc, char *argv[])
|
||||
*/
|
||||
ompi_rte_parse_daemon_cmd_line(cmd_line);
|
||||
|
||||
/* check for existing universe to join */
|
||||
if (OMPI_SUCCESS != (ret = ompi_rte_universe_exists())) {
|
||||
if (ompi_rte_debug_flag) {
|
||||
ompi_output(0, "ompi_mpi_init: could not join existing universe");
|
||||
}
|
||||
if (OMPI_ERR_NOT_FOUND != ret) {
|
||||
/* if it exists but no contact could be established,
|
||||
* define unique name based on current one.
|
||||
* and start new universe with me as seed
|
||||
*/
|
||||
universe = strdup(ompi_universe_info.name);
|
||||
free(ompi_universe_info.name);
|
||||
ompi_universe_info.name = NULL;
|
||||
pid = getpid();
|
||||
if (0 > asprintf(&ompi_universe_info.name, "%s-%d", universe, pid) && ompi_rte_debug_flag) {
|
||||
ompi_output(0, "mpi_init: error creating unique universe name");
|
||||
}
|
||||
}
|
||||
|
||||
ompi_process_info.my_universe = strdup(ompi_universe_info.name);
|
||||
ompi_process_info.seed = true;
|
||||
if (NULL != ompi_universe_info.ns_replica) {
|
||||
free(ompi_universe_info.ns_replica);
|
||||
ompi_universe_info.ns_replica = NULL;
|
||||
}
|
||||
if (NULL != ompi_process_info.ns_replica) {
|
||||
free(ompi_process_info.ns_replica);
|
||||
ompi_process_info.ns_replica = NULL;
|
||||
}
|
||||
if (NULL != ompi_universe_info.gpr_replica) {
|
||||
free(ompi_universe_info.gpr_replica);
|
||||
ompi_universe_info.gpr_replica = NULL;
|
||||
}
|
||||
if (NULL != ompi_process_info.gpr_replica) {
|
||||
free(ompi_process_info.gpr_replica);
|
||||
ompi_process_info.gpr_replica = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* setup the rest of the rte */
|
||||
if (OMPI_SUCCESS != (ret = ompi_rte_init_stage2(&allow_multi_user_threads,
|
||||
@ -175,23 +216,20 @@ int main(int argc, char *argv[])
|
||||
}
|
||||
|
||||
/***** SET MY NAME *****/
|
||||
if (ompi_process_info.seed) {
|
||||
if (ompi_daemon_debug) {
|
||||
ompi_output(0, "ompid: seed flag set");
|
||||
}
|
||||
if (NULL != ompi_process_info.name) { /* overwrite it */
|
||||
free(ompi_process_info.name);
|
||||
}
|
||||
ompi_process_info.name = ompi_name_server.create_process_name(0, 0, 0);
|
||||
} else {
|
||||
if (ompi_daemon_debug) {
|
||||
ompi_output(0, "ompid: seed flag NOT set");
|
||||
}
|
||||
if (NULL != ompi_process_info.name) { /* overwrite it */
|
||||
free(ompi_process_info.name);
|
||||
}
|
||||
ompi_process_info.name = ompi_rte_get_self();
|
||||
}
|
||||
if (NULL != ompi_process_info.name) { /* should not have been previously set */
|
||||
free(ompi_process_info.name);
|
||||
ompi_process_info.name = NULL;
|
||||
}
|
||||
|
||||
if (NULL != ompi_rte_get_self()) { /* name set in environment - record name */
|
||||
ompi_process_info.name = ompi_rte_get_self();
|
||||
} else if (NULL == ompi_process_info.ns_replica) { /* couldn't join existing univ */
|
||||
ompi_process_info.name = ompi_name_server.create_process_name(0,0,0);
|
||||
} else { /* name server exists elsewhere - get a name for me */
|
||||
jobid = ompi_name_server.create_jobid();
|
||||
vpid = ompi_name_server.reserve_range(jobid, 1);
|
||||
ompi_process_info.name = ompi_name_server.create_process_name(0, jobid, vpid);
|
||||
}
|
||||
|
||||
/* setup my session directory */
|
||||
jobid_str = ompi_name_server.get_jobid_string(ompi_process_info.name);
|
||||
@ -219,13 +257,6 @@ int main(int argc, char *argv[])
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Register my process info with my replica.
|
||||
*/
|
||||
if (OMPI_SUCCESS != (ret = ompi_rte_register())) {
|
||||
ompi_output(0, "ompi_rte_init: failed in ompi_rte_register");
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* finalize the rte startup */
|
||||
if (OMPI_SUCCESS != (ret = ompi_rte_init_finalstage(&allow_multi_user_threads,
|
||||
@ -235,8 +266,21 @@ int main(int argc, char *argv[])
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Register my process info with my replica. Note that this must be done
|
||||
* after the rte init is completed.
|
||||
*/
|
||||
if (OMPI_SUCCESS != (ret = ompi_rte_register())) {
|
||||
ompi_output(0, "ompid: failed in ompi_rte_register()");
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* if i'm the seed, get my contact info and write my setup file for others to find */
|
||||
if (ompi_process_info.seed) {
|
||||
if (NULL != ompi_universe_info.seed_contact_info) {
|
||||
free(ompi_universe_info.seed_contact_info);
|
||||
ompi_universe_info.seed_contact_info = NULL;
|
||||
}
|
||||
ompi_universe_info.seed_contact_info = mca_oob_get_contact_info();
|
||||
contact_file = ompi_os_path(false, ompi_process_info.universe_session_dir,
|
||||
"universe-setup.txt", NULL);
|
||||
@ -298,9 +342,11 @@ int main(int argc, char *argv[])
|
||||
ompi_process_info.name->jobid, ompi_process_info.name->vpid);
|
||||
}
|
||||
|
||||
/* remove the universe-setup file */
|
||||
filenm = ompi_os_path(false, ompi_process_info.universe_session_dir, "universe-setup.txt", NULL);
|
||||
unlink(filenm);
|
||||
/* if i'm the seed, remove the universe-setup file */
|
||||
if (ompi_process_info.seed) {
|
||||
filenm = ompi_os_path(false, ompi_process_info.universe_session_dir, "universe-setup.txt", NULL);
|
||||
unlink(filenm);
|
||||
}
|
||||
|
||||
/* finalize the system */
|
||||
ompi_rte_finalize();
|
||||
@ -322,7 +368,6 @@ static void ompi_daemon_recv(int status, ompi_process_name_t* sender,
|
||||
ompi_buffer_t answer;
|
||||
ompi_daemon_cmd_flag_t command;
|
||||
int ret;
|
||||
int32_t str_len;
|
||||
char *contact_info;
|
||||
|
||||
OMPI_THREAD_LOCK(&ompi_daemon_mutex);
|
||||
|
@ -156,6 +156,7 @@ int main(int argc, char **argv)
|
||||
*/
|
||||
universe = strdup(ompi_universe_info.name);
|
||||
free(ompi_universe_info.name);
|
||||
ompi_universe_info.name = NULL;
|
||||
pid = getpid();
|
||||
if (0 < asprintf(&ompi_universe_info.name, "%s-%d", universe, pid)) {
|
||||
fprintf(stderr, "error creating unique universe name - please report error to bugs@open-mpi.org\n");
|
||||
@ -163,6 +164,10 @@ int main(int argc, char **argv)
|
||||
}
|
||||
}
|
||||
|
||||
if (NULL != ompi_process_info.my_universe) {
|
||||
free(ompi_process_info.my_universe);
|
||||
ompi_process_info.my_universe = NULL;
|
||||
}
|
||||
ompi_process_info.my_universe = strdup(ompi_universe_info.name);
|
||||
|
||||
/* ensure the enviro variables do NOT specify any replicas so that seed
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user