1
1

Update to the rte that ensures the defaults are properly set - in some cases, they weren't being set, leading to unexpecgted behavior when certain environmental variables weren't set. Added some more diagnostic messages to the registry.

Succeeded in contacting an existing persistent universe, and connecting!!! Thanks to Tim for the "ping" function.

First cut at a console - all it does right now is tell the universe to "die", but at least comm is being established.

This commit was SVN r2607.
Этот коммит содержится в:
Ralph Castain 2004-09-11 02:51:32 +00:00
родитель 3f11641c67
Коммит 0071f032fe
10 изменённых файлов: 300 добавлений и 105 удалений

Просмотреть файл

@ -184,6 +184,10 @@ int gpr_replica_put(ompi_registry_mode_t addr_mode, char *segment,
if (NULL != keylist) {
OBJ_RELEASE(keylist);
}
if (mca_gpr_replica_debug) {
ompi_output(0, "[%d,%d,%d] gpr replica-put: complete", ompi_process_info.name->cellid,
ompi_process_info.name->jobid, ompi_process_info.name->vpid);
}
return return_code;
}
@ -567,6 +571,11 @@ ompi_list_t* gpr_replica_get(ompi_registry_mode_t addr_mode,
if (NULL == seg) { /* segment not found */
return answer;
}
if (mca_gpr_replica_debug) {
ompi_output(0, "[%d,%d,%d] gpr replica-get: segment found", ompi_process_info.name->cellid,
ompi_process_info.name->jobid, ompi_process_info.name->vpid);
}
if (NULL == tokens) { /* wildcard case - return everything */
keylist = NULL;
keys = NULL;
@ -592,6 +601,10 @@ ompi_list_t* gpr_replica_get(ompi_registry_mode_t addr_mode,
key2++;
}
}
if (mca_gpr_replica_debug) {
ompi_output(0, "[%d,%d,%d] gpr replica-get: got keylist", ompi_process_info.name->cellid,
ompi_process_info.name->jobid, ompi_process_info.name->vpid);
}
/* traverse the segment's registry, looking for matching tokens per the specified mode */
for (reg = (mca_gpr_replica_core_t*)ompi_list_get_first(&seg->registry_entries);
@ -608,6 +621,10 @@ ompi_list_t* gpr_replica_get(ompi_registry_mode_t addr_mode,
ompi_list_append(answer, &ans->item);
}
}
if (mca_gpr_replica_debug) {
ompi_output(0, "[%d,%d,%d] gpr replica-get: finished search", ompi_process_info.name->cellid,
ompi_process_info.name->jobid, ompi_process_info.name->vpid);
}
CLEANUP:
/* release list of keys */
@ -618,6 +635,11 @@ ompi_list_t* gpr_replica_get(ompi_registry_mode_t addr_mode,
free(keys);
}
if (mca_gpr_replica_debug) {
ompi_output(0, "[%d,%d,%d] gpr replica-get: leaving", ompi_process_info.name->cellid,
ompi_process_info.name->jobid, ompi_process_info.name->vpid);
}
return answer;
}

Просмотреть файл

@ -22,6 +22,7 @@
#include "include/constants.h"
#include "util/output.h"
#include "util/printf.h"
#include "util/proc_info.h"
#include "mca/mca.h"
#include "mca/oob/base/base.h"
#include "mca/gpr/base/base.h"
@ -654,6 +655,10 @@ ompi_registry_notify_message_t *gpr_replica_construct_notify_message(ompi_regist
}
OBJ_RELEASE(reg_entries);
if (mca_gpr_replica_debug) {
ompi_output(0, "[%d,%d,%d] gpr replica-construct_notify: msg built", ompi_process_info.name->cellid,
ompi_process_info.name->jobid, ompi_process_info.name->vpid);
}
return msg;
}
@ -669,6 +674,11 @@ void gpr_replica_process_triggers(char *segment,
int i;
bool found;
if (mca_gpr_replica_debug) {
ompi_output(0, "[%d,%d,%d] gpr replica: process_trig entered", ompi_process_info.name->cellid,
ompi_process_info.name->jobid, ompi_process_info.name->vpid);
}
/* protect against errors */
if (NULL == message || NULL == segment) {
return;
@ -679,6 +689,11 @@ void gpr_replica_process_triggers(char *segment,
return;
}
if (mca_gpr_replica_debug) {
ompi_output(0, "[%d,%d,%d] gpr replica-process_trig: segment found", ompi_process_info.name->cellid,
ompi_process_info.name->jobid, ompi_process_info.name->vpid);
}
/* find corresponding notify request */
found = false;
for (trackptr = (mca_gpr_notify_request_tracker_t*)ompi_list_get_first(&mca_gpr_replica_notify_request_tracker);
@ -697,10 +712,15 @@ void gpr_replica_process_triggers(char *segment,
/* process request */
if (NULL == trackptr->requestor) { /* local request - callback fn with their tag */
if (mca_gpr_replica_debug) {
ompi_output(0, "[%d,%d,%d] gpr replica-process_trig: local callback", ompi_process_info.name->cellid,
ompi_process_info.name->jobid, ompi_process_info.name->vpid);
}
trackptr->callback(message, trackptr->user_tag);
/* dismantle message and free memory */
while (NULL != (data = (ompi_registry_object_t*)ompi_list_remove_first(&message->data))) {
OBJ_RELEASE(data);
}
for (i=0, tokptr=message->tokens; i < message->num_tokens; i++, tokptr++) {
free(*tokptr);
@ -709,9 +729,18 @@ void gpr_replica_process_triggers(char *segment,
free(message->tokens);
}
free(message);
if (mca_gpr_replica_debug) {
ompi_output(0, "[%d,%d,%d] gpr replica-process_trig: data released", ompi_process_info.name->cellid,
ompi_process_info.name->jobid, ompi_process_info.name->vpid);
}
} else { /* remote request - send message back */
gpr_replica_remote_notify(trackptr->requestor, trackptr->req_tag, message);
if (mca_gpr_replica_debug) {
ompi_output(0, "[%d,%d,%d] gpr replica-process_trig: remote message sent", ompi_process_info.name->cellid,
ompi_process_info.name->jobid, ompi_process_info.name->vpid);
}
}
/* if one-shot, remove request from tracking system */
@ -723,6 +752,11 @@ void gpr_replica_process_triggers(char *segment,
ompi_list_remove_item(&seg->triggers, &trig->item);
OBJ_RELEASE(trig);
}
if (mca_gpr_replica_debug) {
ompi_output(0, "[%d,%d,%d] gpr replica-process_trig: complete", ompi_process_info.name->cellid,
ompi_process_info.name->jobid, ompi_process_info.name->vpid);
}
}
ompi_list_t *gpr_replica_test_internals(int level)

Просмотреть файл

@ -12,6 +12,7 @@
#include "ompi_config.h"
#include <string.h>
#include <sys/time.h>
#include "include/constants.h"
#include "util/output.h"
@ -28,13 +29,19 @@
#include "runtime/runtime.h"
static struct timeval ompi_rte_ping_wait = {30, 0};
int ompi_rte_local_universe_exists()
{
char *contact_file;
int ret;
ompi_process_name_t seed={0,0,0};
/* does universe already exist on local host? Check session directory to see */
if (ompi_rte_debug_flag) {
ompi_output(0, "checking local universe existence: universe %s", ompi_universe_info.name);
}
if (0 != strncmp(ompi_universe_info.host, ompi_system_info.nodename, strlen(ompi_system_info.nodename))) { /* remote host specified */
ompi_output(0, "remote hosts not supported");
@ -50,11 +57,13 @@ int ompi_rte_local_universe_exists()
ompi_universe_info.name,
NULL,
NULL)) { /* found */
/* check for "contact-info" file. if present, read it in. */
contact_file = ompi_os_path(false, ompi_process_info.universe_session_dir,
"universe-setup.txt", NULL);
if (OMPI_SUCCESS != (ret = ompi_read_universe_setup_file(contact_file))) {
ompi_output(0, "could not read contact file %s", contact_file);
return ret;
}
@ -63,6 +72,7 @@ int ompi_rte_local_universe_exists()
/* also need to check "local" and that we did not specify the exact
* matching universe name
*/
ompi_output(0, "connection not allowed");
return OMPI_ERR_NO_CONNECTION_ALLOWED;
}
@ -72,13 +82,16 @@ int ompi_rte_local_universe_exists()
return OMPI_ERR_FATAL;
}
/* /\* ...and ping to verify it's alive *\/ */
/* if (OMPI_SUCCESS != mca_oob_ping(&seed)) { */
/* return OMPI_ERR_CONNECTION_FAILED; */
/* } */
mca_oob_parse_contact_info(ompi_universe_info.oob_contact_info, &seed, NULL);
/* ...and ping to verify it's alive */
if (OMPI_SUCCESS != mca_oob_ping(&seed, &ompi_rte_ping_wait)) {
ompi_output(0, "ping failed");
return OMPI_ERR_CONNECTION_FAILED;
}
/* set the my_universe field */
ompi_process_info.my_universe = strdup(ompi_universe_info.name);
return OMPI_SUCCESS;
}

Просмотреть файл

@ -22,21 +22,17 @@ void ompi_rte_parse_daemon_cmd_line(ompi_cmd_line_t *cmd_line)
{
/* see if I'm the seed */
if (ompi_cmd_line_is_taken(cmd_line, "seed")) {
if (ompi_cmd_line_is_taken(cmd_line, "seed") &&
false == ompi_process_info.seed) {
ompi_process_info.seed = true;
setenv("OMPI_universe_seed", "1", 1);
} else {
unsetenv("OMPI_universe_seed");
ompi_process_info.seed = false;
}
/* see if I'm a probe */
if (ompi_cmd_line_is_taken(cmd_line, "probe")) {
if (ompi_cmd_line_is_taken(cmd_line, "probe") &&
false == ompi_universe_info.probe) {
setenv("OMPI_universe_probe", "1", 1);
ompi_universe_info.probe = true;
} else {
unsetenv("OMPI_universe_probe");
ompi_universe_info.probe = false;
}
/* get desired universe scope, if specified */
@ -47,36 +43,24 @@ void ompi_rte_parse_daemon_cmd_line(ompi_cmd_line_t *cmd_line)
}
ompi_universe_info.scope = strdup(ompi_cmd_line_get_param(cmd_line, "scope", 0, 0));
setenv("OMPI_universe_scope", ompi_universe_info.scope, 1);
} else {
unsetenv("OMPI_universe_scope");
ompi_universe_info.scope = NULL;
}
/* find out if persistent */
if (ompi_cmd_line_is_taken(cmd_line, "persistent")) {
setenv("OMPI_universe_persistent", "1", 1);
ompi_universe_info.persistence = true;
} else {
unsetenv("OMPI_universe_persistent");
ompi_universe_info.persistence = false;
}
/* find out if silent */
if (ompi_cmd_line_is_taken(cmd_line, "silent")) {
setenv("OMPI_universe_silent", "1", 1);
ompi_universe_info.silent_mode = true;
} else {
unsetenv("OMPI_universe_silent");
ompi_universe_info.silent_mode = false;
}
/* find out if web interface is desired */
if (ompi_cmd_line_is_taken(cmd_line, "webserver")) {
setenv("OMPI_universe_webserver", "1", 1);
ompi_universe_info.web_server = true;
} else {
unsetenv("OMPI_universe_webserver");
ompi_universe_info.web_server = false;
}
/* find out if script is to be executed */
@ -87,9 +71,6 @@ void ompi_rte_parse_daemon_cmd_line(ompi_cmd_line_t *cmd_line)
}
ompi_universe_info.scriptfile = strdup(ompi_cmd_line_get_param(cmd_line, "script", 0, 0));
setenv("OMPI_universe_script", ompi_universe_info.scriptfile, 1);
} else {
unsetenv("OMPI_universe_script");
ompi_universe_info.scriptfile = NULL;
}
/* Find out if hostfile specified */
@ -100,8 +81,5 @@ void ompi_rte_parse_daemon_cmd_line(ompi_cmd_line_t *cmd_line)
}
ompi_universe_info.hostfile = strdup(ompi_cmd_line_get_param(cmd_line, "hostfile", 0, 0));
setenv("OMPI_universe_hostfile", ompi_universe_info.hostfile, 1);
} else {
unsetenv("OMPI_universe_hostfile");
ompi_universe_info.hostfile = NULL;
}
}

Просмотреть файл

@ -8,6 +8,8 @@
* Parse environmental paramater options for the Open MPI Run Time Environment. This function
* MUST be called BEFORE calling any of the rte command line parsers.
*
* NOTE: Sets all key structure values to defaults if no environ value provided!!
*
*/
#include "ompi_config.h"
@ -23,7 +25,12 @@ void ompi_rte_parse_environ(void)
{
char *enviro_val;
/* ensure that sys_info and proc_info have been run */
ompi_sys_info();
ompi_proc_info();
enviro_val = getenv("OMPI_universe_seed");
ompi_output(0, "parse_env: seed %s", enviro_val);
if (NULL != enviro_val) { /* seed flag passed */
ompi_process_info.seed = true;
} else {
@ -47,6 +54,7 @@ void ompi_rte_parse_environ(void)
if (NULL != ompi_universe_info.scope) {
free(ompi_universe_info.scope);
}
ompi_universe_info.scope = strdup("exclusive");
}
enviro_val = getenv("OMPI_universe_persistent");
@ -94,6 +102,23 @@ void ompi_rte_parse_environ(void)
}
}
if (NULL != ompi_universe_info.name) {
free(ompi_universe_info.name);
}
ompi_universe_info.name = strdup("default-universe");
if (NULL != ompi_process_info.my_universe) {
free(ompi_process_info.my_universe);
}
ompi_process_info.my_universe = strdup("default-universe");
if (NULL != ompi_universe_info.host) {
free(ompi_universe_info.host);
}
ompi_universe_info.host = strdup(ompi_system_info.nodename);
if (NULL != ompi_universe_info.uid) {
free(ompi_universe_info.uid);
}
ompi_universe_info.uid = strdup(ompi_system_info.user);
enviro_val = getenv("OMPI_universe_name");
if (NULL != enviro_val) { /* universe name passed in environment */
if (NULL != ompi_universe_info.name) { /* got something in it - overwrite */
@ -104,15 +129,6 @@ void ompi_rte_parse_environ(void)
free(ompi_process_info.my_universe);
}
ompi_process_info.my_universe = strdup(enviro_val);
} else {
if (NULL != ompi_universe_info.name) {
free(ompi_universe_info.name);
}
ompi_universe_info.name = strdup("default-universe");
if (NULL != ompi_process_info.my_universe) {
free(ompi_process_info.my_universe);
}
ompi_process_info.my_universe = strdup("default-universe");
}
enviro_val = getenv("OMPI_tmpdir_base");
@ -126,4 +142,7 @@ void ompi_rte_parse_environ(void)
free(ompi_process_info.tmpdir_base);
}
}
ompi_universe_info.pid = ompi_process_info.pid;
}

Просмотреть файл

@ -5,7 +5,5 @@
include $(top_srcdir)/config/Makefile.options
SUBDIRS = ompi_info mpirun wrappers ompid bootproxy
DIST_SUBDIRS = $(SUBDIRS) openmpi
SUBDIRS = ompi_info mpirun wrappers ompid bootproxy openmpi

Просмотреть файл

@ -28,10 +28,13 @@ static void ompi_console_recv(int status, ompi_process_name_t* sender,
int main(int argc, char *argv[])
{
int ret;
int ret, recv_tag;
ompi_cmd_line_t *cmd_line;
bool allow_multi_user_threads = false;
bool have_hidden_threads = false;
ompi_buffer_t cmd;
ompi_daemon_cmd_flag_t command;
ompi_process_name_t seed={0,0,0};
/*
* Intialize the Open MPI environment
@ -42,6 +45,11 @@ int main(int argc, char *argv[])
return ret;
}
/* get the system info and setup defaults */
ompi_sys_info();
ompi_universe_info.host = strdup(ompi_system_info.nodename);
ompi_universe_info.uid = strdup(ompi_system_info.user);
/* setup to read common command line options that span all Open MPI programs */
cmd_line = OBJ_NEW(ompi_cmd_line_t);
@ -51,6 +59,7 @@ int main(int argc, char *argv[])
ompi_cmd_line_make_opt(cmd_line, 'h', "help", 0,
"Show help for this function");
fprintf(stderr, "setting up cmd_line\n");
/* setup rte command line arguments */
ompi_rte_cmd_line_setup(cmd_line);
@ -70,6 +79,8 @@ int main(int argc, char *argv[])
return ret;
}
fprintf(stderr, "parse commands\n");
/* parse the local commands */
if (OMPI_SUCCESS != ompi_cmd_line_parse(cmd_line, true, argc, argv)) {
exit(ret);
@ -87,19 +98,28 @@ int main(int argc, char *argv[])
exit(1);
}
fprintf(stderr, "parse environ\n");
/* parse the environment */
ompi_rte_parse_environ();
fprintf(stderr, "parse rte cmds\n");
/* parse the cmd_line for rte options - override settings from enviro, where necessary
* copy everything into enviro variables for passing later on
*/
ompi_rte_parse_cmd_line(cmd_line);
/* Open up the MCA */
fprintf(stderr, "open mca\n");
if (OMPI_SUCCESS != (ret = mca_base_open())) {
/* JMS show_help */
printf("show_help: ompid failed in mca_base_open\n");
return ret;
}
fprintf(stderr, "join runtime\n");
/* Join the run-time environment */
allow_multi_user_threads = true;
have_hidden_threads = false;
@ -110,17 +130,23 @@ int main(int argc, char *argv[])
return ret;
}
fprintf(stderr, "check hostname for local host: univ %s sys %s\n", ompi_universe_info.host, ompi_system_info.nodename);
/* check for local universe existence */
if (0 != strncmp(ompi_universe_info.host, ompi_system_info.nodename, strlen(ompi_system_info.nodename))) {
fprintf(stderr, "remote universe operations not supported at this time\n");
exit(1);
}
fprintf(stderr, "check local univ\n");
if (OMPI_SUCCESS != (ret = ompi_rte_local_universe_exists())) {
fprintf(stderr, "could not contact local universe %s\n", ompi_universe_info.name);
exit(1);
}
fprintf(stderr, "init stage 2\n");
/* setup the rest of the rte */
if (OMPI_SUCCESS != (ret = ompi_rte_init_stage2(&allow_multi_user_threads,
&have_hidden_threads))) {
@ -152,6 +178,12 @@ int main(int argc, char *argv[])
return ret;
}
ompi_buffer_init(&cmd, 0);
command = OMPI_DAEMON_EXIT_CMD;
recv_tag = MCA_OOB_TAG_DAEMON;
ompi_pack(cmd, &command, 1, OMPI_DAEMON_OOB_PACK_CMD);
mca_oob_send_packed(&seed, cmd, MCA_OOB_TAG_DAEMON, 0);
ompi_rte_finalize();
mca_base_close();
ompi_finalize();

Просмотреть файл

@ -12,42 +12,46 @@
#include <sys/param.h>
#include <errno.h>
#include "runtime/runtime.h"
#include "include/constants.h"
#include "threads/mutex.h"
#include "threads/condition.h"
#include "util/output.h"
#include "util/sys_info.h"
#include "util/os_path.h"
#include "util/cmd_line.h"
#include "util/proc_info.h"
#include "util/session_dir.h"
#include "util/printf.h"
#include "util/daemon_init.h"
#include "util/universe_setup_file_io.h"
#include "mca/base/base.h"
#include "mca/ns/base/base.h"
#include "mca/gpr/base/base.h"
#include "runtime/runtime.h"
#include "tools/ompid/ompid.h"
/*
* Public variables
*/
static bool ompi_daemon_debug;
static ompi_mutex_t ompi_daemon_mutex;
static ompi_condition_t ompi_daemon_condition;
static bool ompi_daemon_exit_condition = false;
bool pretty = true;
ompi_cmd_line_t *cmd_line = NULL;
static void ompi_daemon_recv(int status, ompi_process_name_t* sender,
ompi_buffer_t buffer, int tag,
void* cbdata);
const char *type_all = "all";
const char *type_ompi = "ompi";
const char *type_base = "base";
int main(int argc, char *argv[])
{
int ret = 0;
ompi_cmd_line_t *cmd_line = NULL;
bool allow_multi_user_threads = false;
bool have_hidden_threads = false;
char *jobid_str, *procid_str;
/* daemonize myself */
ompi_daemon_init(NULL);
char *jobid_str, *procid_str, *enviro_val, *contact_file;
/*
* Intialize the Open MPI environment
@ -58,6 +62,28 @@ int main(int argc, char *argv[])
return ret;
}
/* check for debug flag */
enviro_val = getenv("OMPI_daemon_debug");
if (NULL != enviro_val) { /* flag was set */
ompi_daemon_debug = true;
ompi_output(0, "ompid: entered daemon");
} else {
ompi_daemon_debug = false;
}
ompi_daemon_debug = true; /**** DEBUGGING PURPOSES */
if (ompi_daemon_debug) {
ompi_output(0, "ompid: daemonizing");
}
/* daemonize myself */
/* ompi_daemon_init(NULL); */
/* setup the thread lock and condition variable */
OBJ_CONSTRUCT(&ompi_daemon_mutex, ompi_mutex_t);
OBJ_CONSTRUCT(&ompi_daemon_condition, ompi_condition_t);
/* get the system info and setup defaults */
ompi_sys_info();
ompi_universe_info.host = strdup(ompi_system_info.nodename);
@ -159,13 +185,23 @@ int main(int argc, char *argv[])
}
/***** SET MY NAME *****/
if (NULL == ompi_process_info.name) { /* don't overwrite an existing name */
if (ompi_process_info.seed) {
if (ompi_daemon_debug) {
ompi_output(0, "ompid: seed flag set");
}
if (NULL != ompi_process_info.name) { /* overwrite it */
free(ompi_process_info.name);
}
ompi_process_info.name = ompi_name_server.create_process_name(0, 0, 0);
} else {
if (ompi_daemon_debug) {
ompi_output(0, "ompid: seed flag NOT set");
}
if (NULL != ompi_process_info.name) { /* overwrite it */
free(ompi_process_info.name);
}
ompi_process_info.name = ompi_rte_get_self();
}
}
/* get my process info */
ompi_proc_info();
@ -174,7 +210,7 @@ int main(int argc, char *argv[])
jobid_str = ompi_name_server.get_jobid_string(ompi_process_info.name);
procid_str = ompi_name_server.get_vpid_string(ompi_process_info.name);
if (ompi_rte_debug_flag) {
if (ompi_daemon_debug) {
ompi_output(0, "[%d,%d,%d] setting up session dir with", ompi_process_info.name->cellid, ompi_process_info.name->jobid, ompi_process_info.name->vpid);
if (NULL != ompi_process_info.tmpdir_base) {
ompi_output(0, "\ttmpdir %s", ompi_process_info.tmpdir_base);
@ -200,24 +236,77 @@ int main(int argc, char *argv[])
* Register my process info with my replica.
*/
if (OMPI_SUCCESS != (ret = ompi_rte_register())) {
ompi_output(0, "ompi_rte_init: failed in ompi_rte_register()\n");
ompi_output(0, "ompi_rte_init: failed in ompi_rte_register");
return ret;
}
/* finalize the rte startup */
if (OMPI_SUCCESS != (ret = ompi_rte_init_finalstage(&allow_multi_user_threads,
&have_hidden_threads))) {
&have_hidden_threads))) {
/* JMS show_help */
printf("show_help: ompid failed in ompi_rte_init\n");
ompi_output(0, "show_help: ompid failed in ompi_rte_init");
return ret;
}
/* if i'm the seed, get my contact info and write my setup file for others to find */
if (ompi_process_info.seed) {
ompi_universe_info.oob_contact_info = mca_oob_get_contact_info();
contact_file = ompi_os_path(false, ompi_process_info.universe_session_dir,
"universe-setup.txt", NULL);
if (OMPI_SUCCESS != (ret = ompi_write_universe_setup_file(contact_file))) {
if (ompi_daemon_debug) {
ompi_output(0, "[%d,%d,%d] ompid: couldn't write setup file", ompi_process_info.name->cellid,
ompi_process_info.name->jobid, ompi_process_info.name->vpid);
}
}
}
if (ompi_daemon_debug) {
ompi_output(0, "[%d,%d,%d] ompid: registering", ompi_process_info.name->cellid,
ompi_process_info.name->jobid, ompi_process_info.name->vpid);
}
/* register this node on the virtual machine */
/* ompi_vm_register(); */
/* register the daemon callback function */
if (ompi_daemon_debug) {
ompi_output(0, "[%d,%d,%d] ompid: issuing callback", ompi_process_info.name->cellid,
ompi_process_info.name->jobid, ompi_process_info.name->vpid);
}
/* register the daemon callback function */
ret = mca_oob_recv_packed_nb(MCA_OOB_NAME_ANY, MCA_OOB_TAG_DAEMON, 0, ompi_daemon_recv, NULL);
if(ret != OMPI_SUCCESS && ret != OMPI_ERR_NOT_IMPLEMENTED) {
ompi_output(0, "daemon callback not registered: error code %d", ret);
return ret;
}
/* go through the universe fields and see what else I need to do
* - could be setup a virtual machine, spawn a console, etc.
*/
if (ompi_daemon_debug) {
ompi_output(0, "[%d,%d,%d] ompid: setting up event monitor", ompi_process_info.name->cellid,
ompi_process_info.name->jobid, ompi_process_info.name->vpid);
}
/* setup and enter the event monitor */
OMPI_THREAD_LOCK(&ompi_daemon_mutex);
while (false == ompi_daemon_exit_condition) {
ompi_condition_wait(&ompi_daemon_condition, &ompi_daemon_mutex);
}
OMPI_THREAD_UNLOCK(&ompi_daemon_mutex);
if (ompi_daemon_debug) {
ompi_output(0, "[%d,%d,%d] ompid: mutex cleared - finalizing", ompi_process_info.name->cellid,
ompi_process_info.name->jobid, ompi_process_info.name->vpid);
}
/* setup and enter the event monitor */
ompi_rte_finalize();
mca_base_close();
@ -225,23 +314,39 @@ int main(int argc, char *argv[])
return 0;
}
static void ompi_daemon_recv(int status, ompi_process_name_t* sender,
ompi_buffer_t buffer, int tag,
void* cbdata)
{
ompi_buffer_t answer;
ompi_daemon_cmd_flag_t command;
/* /\* convert myself to be a daemon *\/ */
/* if (OMPI_SUCCESS != ompi_daemon_init(ompi_process_info.universe_session_dir)) { */
/* fprintf(stderr, "could not convert to daemon - please report error to bugs@open-mpi.org\n"); */
/* exit(1); */
/* } */
OMPI_THREAD_LOCK(&ompi_daemon_mutex);
/* * as file "contact-info" so others can find us. */
/* *\/ */
if (ompi_daemon_debug) {
ompi_output(0, "[%d,%d,%d] ompid: received message", ompi_process_info.name->cellid,
ompi_process_info.name->jobid, ompi_process_info.name->vpid);
}
/* /\* Add in the calls to initialize the services *\/ */
if (OMPI_SUCCESS != ompi_buffer_init(&answer, 0)) {
/* RHC -- not sure what to do if this fails */
}
/* /\* Add the section for the event loop... *\/ */
if (OMPI_SUCCESS != ompi_unpack(buffer, &command, 1, OMPI_DAEMON_OOB_PACK_CMD)) {
goto RETURN_ERROR;
}
/* /\* All done *\/ */
/**** EXIT COMMAND ****/
if (OMPI_DAEMON_EXIT_CMD == command) {
ompi_daemon_exit_condition = true;
ompi_condition_signal(&ompi_daemon_condition);
/* /\* Close services *\/ */
} else if (OMPI_DAEMON_HEARTBEAT_CMD == command) {
/* send back an "i'm alive" message */
}
/* OBJ_RELEASE(cmd_line); */
/* mca_base_close(); */
RETURN_ERROR:
OMPI_THREAD_UNLOCK(&ompi_daemon_mutex);
return;
}

Просмотреть файл

@ -18,6 +18,8 @@
#define OMPI_DAEMON_HOSTFILE_CMD 0x01
#define OMPI_DAEMON_SCRIPTFILE_CMD 0x02
#define OMPI_DAEMON_HEARTBEAT_CMD 0xfe
#define OMPI_DAEMON_EXIT_CMD 0xff
/*
@ -26,43 +28,34 @@
typedef uint16_t ompi_daemon_cmd_flag_t;
typedef char *type_vector_t;
extern bool pretty;
extern ompi_cmd_line_t *cmd_line;
extern const char *type_all;
extern const char *type_ompi;
extern const char *type_base;
extern type_vector_t mca_types;
/*
* Version-related strings and functions
*/
extern const char *ver_full;
extern const char *ver_major;
extern const char *ver_minor;
extern const char *ver_release;
extern const char *ver_alpha;
extern const char *ver_beta;
extern const char *ver_svn;
/* extern const char *ver_full; */
/* extern const char *ver_major; */
/* extern const char *ver_minor; */
/* extern const char *ver_release; */
/* extern const char *ver_alpha; */
/* extern const char *ver_beta; */
/* extern const char *ver_svn; */
void do_version(bool want_all, ompi_cmd_line_t *cmd_line);
void show_ompi_version(const char *scope);
/* void do_version(bool want_all, ompi_cmd_line_t *cmd_line); */
/* void show_ompi_version(const char *scope); */
/*
* Parameter/configuration-related functions
*/
extern char *param_all;
/* extern char *param_all; */
extern char *path_prefix;
extern char *path_bindir;
extern char *path_libdir;
extern char *path_incdir;
extern char *path_pkglibdir;
extern char *path_sysconfdir;
/* extern char *path_prefix; */
/* extern char *path_bindir; */
/* extern char *path_libdir; */
/* extern char *path_incdir; */
/* extern char *path_pkglibdir; */
/* extern char *path_sysconfdir; */
#endif /* OMPID_H */

Просмотреть файл

@ -160,8 +160,9 @@ int main(int argc, char **argv)
ompi_process_info.my_universe = strdup(ompi_universe_info.name);
/* ensure the enviro variables do NOT specify any replicas so that seed
* will start them up
* will start them up. set seed flag
*/
setenv("OMPI_universe_seed", "1", 1);
unsetenv("OMPI_MCA_ns_base_replica");
unsetenv("OMPI_MCA_gpr_base_replica");