1
1

Only orted was saving the universe contact info - this should actually be saved by whomever believes they are the "seed". Modified files to ensure this happens. Also includes a checkpoint of the probe and remote launch functions.

This commit was SVN r5746.
Этот коммит содержится в:
Ralph Castain 2005-05-18 16:31:03 +00:00
родитель c71f3f7152
Коммит 1b42e973d5
5 изменённых файлов: 55 добавлений и 38 удалений

Просмотреть файл

@ -35,6 +35,7 @@
#include "util/sys_info.h"
#include "util/proc_info.h"
#include "util/univ_info.h"
#include "util/os_path.h"
/**
* Leave ORTE.
@ -46,6 +47,16 @@
*/
int orte_finalize(void)
{
char *contact_path;
/* if I'm the seed, remove the universe contact info file */
if (orte_process_info.seed) {
contact_path = orte_os_path(false, orte_process_info.universe_session_dir,
"universe-setup.txt", NULL);
unlink(contact_path);
free(contact_path);
}
/* rmgr close depends on wait/iof */
orte_rmgr_base_close();
orte_wait_finalize();

Просмотреть файл

@ -46,6 +46,8 @@
#include "util/session_dir.h"
#include "util/sys_info.h"
#include "util/cmd_line.h"
#include "util/universe_setup_file_io.h"
#include "util/os_path.h"
#include "runtime/runtime.h"
#include "runtime/runtime_internal.h"
@ -57,6 +59,7 @@ int orte_init_stage1(void)
char *universe;
char *jobid_str = NULL;
char *procid_str = NULL;
char *contact_path = NULL;
pid_t pid;
orte_universe_t univ;
@ -204,9 +207,6 @@ int orte_init_stage1(void)
orte_process_info.ns_replica_uri = strdup(univ.seed_uri);
orte_process_info.gpr_replica_uri = strdup(univ.seed_uri);
} else {
if (orte_debug_flag) {
ompi_output(0, "orte_init: could not join existing universe");
}
if (ORTE_ERR_NOT_FOUND != ret) {
/* if it exists but no contact could be established,
* define unique name based on current one.
@ -221,6 +221,9 @@ int orte_init_stage1(void)
return ret;
}
}
ompi_output(0, "Could not join an existing universe");
ompi_output(0, "Establishing a new one named: %s",
orte_universe_info.name);
orte_process_info.seed = true;
/* since we are seed, ensure that all replica info is NULL'd */
@ -266,6 +269,11 @@ int orte_init_stage1(void)
return ret;
}
/* if I'm the seed, set the seed uri to be me! */
if (orte_process_info.seed) {
orte_universe_info.seed_uri = orte_rml.get_uri();
}
/* setup my session directory */
if (ORTE_SUCCESS != (ret = orte_ns.get_jobid_string(&jobid_str, orte_process_info.my_name))) {
ORTE_ERROR_LOG(ret);
@ -306,6 +314,33 @@ int orte_init_stage1(void)
free(procid_str);
}
/* if i'm the seed, get my contact info and write my setup file for others to find */
if (orte_process_info.seed) {
if (NULL != orte_universe_info.seed_uri) {
free(orte_universe_info.seed_uri);
orte_universe_info.seed_uri = NULL;
}
if (NULL == (orte_universe_info.seed_uri = orte_rml.get_uri())) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
contact_path = orte_os_path(false, orte_process_info.universe_session_dir,
"universe-setup.txt", NULL);
if (orte_debug_flag) {
ompi_output(0, "[%lu,%lu,%lu] contact_file %s",
ORTE_NAME_ARGS(orte_process_info.my_name), contact_path);
}
if (OMPI_SUCCESS != (ret = orte_write_universe_setup_file(contact_path, &orte_universe_info))) {
if (orte_debug_flag) {
ompi_output(0, "[%lu,%lu,%lu] couldn't write setup file", ORTE_NAME_ARGS(orte_process_info.my_name));
}
} else if (orte_debug_flag) {
ompi_output(0, "[%lu,%lu,%lu] wrote setup file", ORTE_NAME_ARGS(orte_process_info.my_name));
}
free(contact_path);
}
/* set contact info for ns/gpr */
if(NULL != orte_process_info.ns_replica_uri) {
orte_rml.set_uri(orte_process_info.ns_replica_uri);

Просмотреть файл

@ -455,7 +455,6 @@ static void orte_setup_hnp_recv(int status, orte_process_name_t* sender,
static void orte_setup_hnp_wait(pid_t wpid, int status, void *cbdata)
{
int rc;
orte_setup_hnp_cb_data_t *data;
OMPI_THREAD_LOCK(&orte_setup_hnp_mutex);

Просмотреть файл

@ -148,7 +148,6 @@ int main(int argc, char *argv[])
int ret = 0;
int fd;
ompi_cmd_line_t *cmd_line = NULL;
char *contact_path = NULL;
char *log_path = NULL;
char log_file[PATH_MAX];
char *jobidstring;
@ -276,29 +275,6 @@ int main(int argc, char *argv[])
return ret;
}
/* if i'm the seed, get my contact info and write my setup file for others to find */
if (orte_process_info.seed) {
if (NULL != orte_universe_info.seed_uri) {
free(orte_universe_info.seed_uri);
orte_universe_info.seed_uri = NULL;
}
orte_universe_info.seed_uri = orte_rml.get_uri();
contact_path = orte_os_path(false, orte_process_info.universe_session_dir,
"universe-setup.txt", NULL);
if (orted_globals.debug_daemons) {
ompi_output(0, "ompid: contact_file %s", contact_path);
}
if (OMPI_SUCCESS != (ret = orte_write_universe_setup_file(contact_path, &orte_universe_info))) {
if (orted_globals.debug_daemons) {
ompi_output(0, "[%lu,%lu,%lu] ompid: couldn't write setup file", ORTE_NAME_ARGS(orte_process_info.my_name));
}
} else if (orted_globals.debug_daemons) {
ompi_output(0, "[%lu,%lu,%lu] ompid: wrote setup file", ORTE_NAME_ARGS(orte_process_info.my_name));
}
}
if (orted_globals.debug_daemons) {
ompi_output(0, "[%lu,%lu,%lu] ompid: issuing callback", ORTE_NAME_ARGS(orte_process_info.my_name));
}
@ -332,9 +308,6 @@ int main(int argc, char *argv[])
}
/* cleanup */
if (NULL != contact_path) {
unlink(contact_path);
}
if (NULL != log_path) {
unlink(log_path);
}

Просмотреть файл

@ -286,14 +286,13 @@ int main(int argc, char *argv[])
/* see if a universe already exists on this machine */
if (ORTE_SUCCESS == (ret = orte_universe_exists(&univ))) {
/* universe is here! send info back and die */
} else {
/* existing universe is not here or does not allow contact.
* ensure we have a unique universe name, fork/exec an appropriate
* daemon, and then tell whomever spawned us how to talk to the new
* daemon
*/
}
/* existing universe is not here or does not allow contact.
* ensure we have a unique universe name, fork/exec an appropriate
* daemon, and then tell whomever spawned us how to talk to the new
* daemon
*/
/* cleanup */
if (NULL != contact_path) {