diff --git a/src/runtime/orte_finalize.c b/src/runtime/orte_finalize.c index 2cb81355e9..3515a19212 100644 --- a/src/runtime/orte_finalize.c +++ b/src/runtime/orte_finalize.c @@ -35,6 +35,7 @@ #include "util/sys_info.h" #include "util/proc_info.h" #include "util/univ_info.h" +#include "util/os_path.h" /** * Leave ORTE. @@ -46,6 +47,16 @@ */ int orte_finalize(void) { + char *contact_path; + + /* if I'm the seed, remove the universe contact info file */ + if (orte_process_info.seed) { + contact_path = orte_os_path(false, orte_process_info.universe_session_dir, + "universe-setup.txt", NULL); + unlink(contact_path); + free(contact_path); + } + /* rmgr close depends on wait/iof */ orte_rmgr_base_close(); orte_wait_finalize(); diff --git a/src/runtime/orte_init_stage1.c b/src/runtime/orte_init_stage1.c index 797dcff6d3..7ba1628521 100644 --- a/src/runtime/orte_init_stage1.c +++ b/src/runtime/orte_init_stage1.c @@ -46,6 +46,8 @@ #include "util/session_dir.h" #include "util/sys_info.h" #include "util/cmd_line.h" +#include "util/universe_setup_file_io.h" +#include "util/os_path.h" #include "runtime/runtime.h" #include "runtime/runtime_internal.h" @@ -57,6 +59,7 @@ int orte_init_stage1(void) char *universe; char *jobid_str = NULL; char *procid_str = NULL; + char *contact_path = NULL; pid_t pid; orte_universe_t univ; @@ -204,9 +207,6 @@ int orte_init_stage1(void) orte_process_info.ns_replica_uri = strdup(univ.seed_uri); orte_process_info.gpr_replica_uri = strdup(univ.seed_uri); } else { - if (orte_debug_flag) { - ompi_output(0, "orte_init: could not join existing universe"); - } if (ORTE_ERR_NOT_FOUND != ret) { /* if it exists but no contact could be established, * define unique name based on current one. @@ -221,6 +221,9 @@ int orte_init_stage1(void) return ret; } } + ompi_output(0, "Could not join an existing universe"); + ompi_output(0, "Establishing a new one named: %s", + orte_universe_info.name); orte_process_info.seed = true; /* since we are seed, ensure that all replica info is NULL'd */ @@ -266,6 +269,11 @@ int orte_init_stage1(void) return ret; } + /* if I'm the seed, set the seed uri to be me! */ + if (orte_process_info.seed) { + orte_universe_info.seed_uri = orte_rml.get_uri(); + } + /* setup my session directory */ if (ORTE_SUCCESS != (ret = orte_ns.get_jobid_string(&jobid_str, orte_process_info.my_name))) { ORTE_ERROR_LOG(ret); @@ -306,6 +314,33 @@ int orte_init_stage1(void) free(procid_str); } + /* if i'm the seed, get my contact info and write my setup file for others to find */ + if (orte_process_info.seed) { + if (NULL != orte_universe_info.seed_uri) { + free(orte_universe_info.seed_uri); + orte_universe_info.seed_uri = NULL; + } + if (NULL == (orte_universe_info.seed_uri = orte_rml.get_uri())) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + return ORTE_ERR_NOT_FOUND; + } + contact_path = orte_os_path(false, orte_process_info.universe_session_dir, + "universe-setup.txt", NULL); + if (orte_debug_flag) { + ompi_output(0, "[%lu,%lu,%lu] contact_file %s", + ORTE_NAME_ARGS(orte_process_info.my_name), contact_path); + } + + if (OMPI_SUCCESS != (ret = orte_write_universe_setup_file(contact_path, &orte_universe_info))) { + if (orte_debug_flag) { + ompi_output(0, "[%lu,%lu,%lu] couldn't write setup file", ORTE_NAME_ARGS(orte_process_info.my_name)); + } + } else if (orte_debug_flag) { + ompi_output(0, "[%lu,%lu,%lu] wrote setup file", ORTE_NAME_ARGS(orte_process_info.my_name)); + } + free(contact_path); + } + /* set contact info for ns/gpr */ if(NULL != orte_process_info.ns_replica_uri) { orte_rml.set_uri(orte_process_info.ns_replica_uri); diff --git a/src/runtime/orte_setup_hnp.c b/src/runtime/orte_setup_hnp.c index bfa77d668f..e05fd56b0c 100644 --- a/src/runtime/orte_setup_hnp.c +++ b/src/runtime/orte_setup_hnp.c @@ -455,7 +455,6 @@ static void orte_setup_hnp_recv(int status, orte_process_name_t* sender, static void orte_setup_hnp_wait(pid_t wpid, int status, void *cbdata) { - int rc; orte_setup_hnp_cb_data_t *data; OMPI_THREAD_LOCK(&orte_setup_hnp_mutex); diff --git a/src/tools/orted/orted.c b/src/tools/orted/orted.c index 6e0fe3aac6..32156aa05a 100644 --- a/src/tools/orted/orted.c +++ b/src/tools/orted/orted.c @@ -148,7 +148,6 @@ int main(int argc, char *argv[]) int ret = 0; int fd; ompi_cmd_line_t *cmd_line = NULL; - char *contact_path = NULL; char *log_path = NULL; char log_file[PATH_MAX]; char *jobidstring; @@ -276,29 +275,6 @@ int main(int argc, char *argv[]) return ret; } - /* if i'm the seed, get my contact info and write my setup file for others to find */ - if (orte_process_info.seed) { - if (NULL != orte_universe_info.seed_uri) { - free(orte_universe_info.seed_uri); - orte_universe_info.seed_uri = NULL; - } - orte_universe_info.seed_uri = orte_rml.get_uri(); - contact_path = orte_os_path(false, orte_process_info.universe_session_dir, - "universe-setup.txt", NULL); - if (orted_globals.debug_daemons) { - ompi_output(0, "ompid: contact_file %s", contact_path); - } - - if (OMPI_SUCCESS != (ret = orte_write_universe_setup_file(contact_path, &orte_universe_info))) { - if (orted_globals.debug_daemons) { - ompi_output(0, "[%lu,%lu,%lu] ompid: couldn't write setup file", ORTE_NAME_ARGS(orte_process_info.my_name)); - } - } else if (orted_globals.debug_daemons) { - ompi_output(0, "[%lu,%lu,%lu] ompid: wrote setup file", ORTE_NAME_ARGS(orte_process_info.my_name)); - } - } - - if (orted_globals.debug_daemons) { ompi_output(0, "[%lu,%lu,%lu] ompid: issuing callback", ORTE_NAME_ARGS(orte_process_info.my_name)); } @@ -332,9 +308,6 @@ int main(int argc, char *argv[]) } /* cleanup */ - if (NULL != contact_path) { - unlink(contact_path); - } if (NULL != log_path) { unlink(log_path); } diff --git a/src/tools/orteprobe/orteprobe.c b/src/tools/orteprobe/orteprobe.c index 4d1d844c3c..041aa5a019 100644 --- a/src/tools/orteprobe/orteprobe.c +++ b/src/tools/orteprobe/orteprobe.c @@ -286,14 +286,13 @@ int main(int argc, char *argv[]) /* see if a universe already exists on this machine */ if (ORTE_SUCCESS == (ret = orte_universe_exists(&univ))) { /* universe is here! send info back and die */ + } else { + /* existing universe is not here or does not allow contact. + * ensure we have a unique universe name, fork/exec an appropriate + * daemon, and then tell whomever spawned us how to talk to the new + * daemon + */ } - - /* existing universe is not here or does not allow contact. - * ensure we have a unique universe name, fork/exec an appropriate - * daemon, and then tell whomever spawned us how to talk to the new - * daemon - */ - /* cleanup */ if (NULL != contact_path) {