1
1

Some of these didn't really change - I was just in/out of them for diagnostics while chasing a bug. Got caught by my good buddy Tim again :) on his parse_contact_info function, which requires that the space for the answer be allocated in advance. Sigh. Anyway, mpirun2 now works again. My apologies if you tried it in the last few hours and found it didn't.

Also removed the mpirun3 directory since we are basically dragging mpirun2 along with us - no need to create a new version after all.

Made a few changes to the universe info structure, eliminating the "webserver" and "socket" fields since we will do those contacts through the oob channel. Also changed the "silent_mode" field to "console" since silent mode is the default - the flag needs to tell you to turn the console on, not off.

Parse environ function now gets the ns and gpr replica contact info and loads it in the proper places to hand it off to the respective components, thus allowing me to check connection to them as part of determining if the named universe already exists. Changed the local_universe_exists function accordingly and gave it a new name (since the replicas may not be local). This name will shortly be changed to "ompi_rte_join_universe" as I complete the logic for doing that function.

Please let me know if you see any problems. I successfully ran some trivial multi-process functions in both mpirun2 and singleton modes, and ran the seed daemon as well, so I think it should all be okay.

This commit was SVN r2611.
Этот коммит содержится в:
Ralph Castain 2004-09-11 12:56:52 +00:00
родитель b72ecd12d5
Коммит c6cbe33d50
16 изменённых файлов: 431 добавлений и 337 удалений

Просмотреть файл

@ -13,6 +13,7 @@
#include "mca/gpr/base/base.h"
/*
* The following file was created by configure. It contains extern
* statements and the definition of an array of pointers to each
@ -196,23 +197,6 @@ mca_gpr_base_component_t mca_gpr_base_selected_component;
*/
int mca_gpr_base_open(void)
{
int id;
char *replica;
/* check the environment for replica information */
id = mca_base_param_register_string("gpr", "base", "replica", NULL, NULL);
mca_base_param_lookup_string(id, &replica);
if (NULL != replica) {
mca_oob_set_contact_info(replica);
ompi_process_info.gpr_replica = ns_base_create_process_name(0,0,0);
mca_oob_parse_contact_info(replica, ompi_process_info.gpr_replica, NULL);
} else {
if (NULL != ompi_process_info.gpr_replica) {
free(ompi_process_info.gpr_replica);
}
}
/* Open up all available components */
if (OMPI_SUCCESS !=

Просмотреть файл

@ -41,23 +41,6 @@ mca_ns_base_component_t mca_ns_base_selected_component;
*/
int mca_ns_base_open(void)
{
int id;
char *replica;
/* check the environment for replica information */
id = mca_base_param_register_string("ns", "base", "replica", NULL, NULL);
mca_base_param_lookup_string(id, &replica);
if (NULL != replica) {
mca_oob_set_contact_info(replica);
ompi_process_info.ns_replica = ns_base_create_process_name(0,0,0);
mca_oob_parse_contact_info(replica, ompi_process_info.ns_replica, NULL);
} else {
if (NULL != ompi_process_info.ns_replica) {
free(ompi_process_info.ns_replica);
}
}
/* Open up all available components */
if (OMPI_SUCCESS !=

Просмотреть файл

@ -73,14 +73,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
goto error;
}
/* get system info and setup defaults*/
ompi_sys_info();
ompi_universe_info.host = strdup(ompi_system_info.nodename);
ompi_universe_info.uid = strdup(ompi_system_info.user);
/* parse environmental variables and fill corresponding info structures */
ompi_rte_parse_environ();
/* Open up the MCA */
if (OMPI_SUCCESS != (ret = mca_base_open())) {
@ -96,6 +88,9 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
goto error;
}
/* parse environmental variables and fill corresponding info structures */
ompi_rte_parse_environ();
/* start the rest of the rte */
if (OMPI_SUCCESS != (ret = ompi_rte_init_stage2(&allow_multi_user_threads,
&have_hidden_threads))) {
@ -104,16 +99,10 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
}
/***** SET MY NAME *****/
if (NULL == ompi_process_info.name) { /* don't overwrite an existing name */
if (ompi_process_info.seed) {
ompi_process_info.name = ompi_name_server.create_process_name(0, 0, 0);
} else {
ompi_process_info.name = ompi_rte_get_self();
}
if (NULL != ompi_process_info.name) { /* should NOT have been previously set */
free(ompi_process_info.name);
}
/* get my process info */
ompi_proc_info();
ompi_process_info.name = ompi_rte_get_self();
/* setup my session directory */
jobid_str = ompi_name_server.get_jobid_string(ompi_process_info.name);

Просмотреть файл

@ -29,7 +29,7 @@ libruntime_la_SOURCES = \
ompi_rte_llm.c \
ompi_rte_monitor.c \
ompi_rte_cmd_line_setup.c \
ompi_rte_local_universe_exists.c \
universe_exists.c \
ompi_rte_parse_environ.c \
ompi_rte_parse_cmd_line.c \
ompi_rte_parse_daemon_cmd_line.c \

Просмотреть файл

@ -38,12 +38,8 @@ void ompi_rte_cmd_line_setup(ompi_cmd_line_t *cmd_line)
"Universe is to be persistent");
ompi_cmd_line_make_opt3(cmd_line, /* read in ompi_rte_parse_daemon_cmd_line */
'w', "webserver", "webserver", 0,
"Web server available");
ompi_cmd_line_make_opt3(cmd_line, /* read in ompi_rte_parse_daemon_cmd_line */
's', "silent", "silent", 0,
"No console prompt - operate silently");
's', "console", "console", 0,
"Provide a console for user interaction");
ompi_cmd_line_make_opt3(cmd_line, /* read in ompi_rte_parse_daemon_cmd_line */
'f', "script", "script", 1,
@ -64,4 +60,8 @@ void ompi_rte_cmd_line_setup(ompi_cmd_line_t *cmd_line)
ompi_cmd_line_make_opt3(cmd_line, /* read in ompi_rte_parse_cmd_line */
'\0', "gprreplica", "gprreplica", 1,
"OOB contact info for GPR replica assigned to this process");
ompi_cmd_line_make_opt3(cmd_line, /* read in ompi_rte_parse_cmd_line */
'\0', "seedcontact", "seedcontact", 1,
"OOB contact info for seed of this universe");
}

Просмотреть файл

@ -99,12 +99,10 @@ ompi_universe_t ompi_universe_info = {
/* .persistence = */ false,
/* .scope = */ NULL,
/* .probe = */ false,
/* .silent_mode = */ true,
/* .ns_replica = */ false,
/* .gpr_replica = */ false,
/* .web_server = */ false,
/* .socket_contact_info = */ NULL,
/* .oob_contact_info = */ NULL,
/* .console = */ false,
/* .ns_replica = */ NULL,
/* .gpr_replica = */ NULL,
/* .seed_contact_info = */ NULL,
/* .console_connected = */ false,
/* .scriptfile = */ NULL,
/* .hostfile = */ NULL
@ -123,16 +121,6 @@ int ompi_rte_init_stage1(bool *allow_multi_user_threads, bool *have_hidden_threa
mca_base_param_lookup_int(ret, &ompi_rte_debug_flag);
/* check to ensure at least default values in key fields */
if (NULL == ompi_universe_info.name) {
ompi_universe_info.name = strdup("default-universe");
if (NULL != ompi_process_info.my_universe) { /* overwrite to match */
free(ompi_process_info.my_universe);
}
ompi_process_info.my_universe = strdup(ompi_universe_info.name);
}
/*
* Initialize the event library
*/

Просмотреть файл

@ -1,99 +0,0 @@
/*
* $HEADER$
*/
/**
* @file
*
* Setup command line options for the Open MPI Run Time Environment
*/
#include "ompi_config.h"
#include <string.h>
#include <sys/time.h>
#include "include/constants.h"
#include "util/output.h"
#include "util/sys_info.h"
#include "util/proc_info.h"
#include "util/os_path.h"
#include "util/pack.h"
#include "util/session_dir.h"
#include "util/universe_setup_file_io.h"
#include "mca/oob/base/base.h"
#include "mca/ns/base/base.h"
#include "runtime/runtime.h"
static struct timeval ompi_rte_ping_wait = {30, 0};
int ompi_rte_local_universe_exists()
{
char *contact_file;
int ret;
ompi_process_name_t seed={0,0,0};
/* does universe already exist on local host? Check session directory to see */
if (ompi_rte_debug_flag) {
ompi_output(0, "checking local universe existence: universe %s", ompi_universe_info.name);
}
if (0 != strncmp(ompi_universe_info.host, ompi_system_info.nodename, strlen(ompi_system_info.nodename))) { /* remote host specified */
ompi_output(0, "remote hosts not supported");
return OMPI_ERR_NOT_IMPLEMENTED;
}
/* check to see if local universe already exists */
if (OMPI_SUCCESS == ompi_session_dir(false,
ompi_process_info.tmpdir_base,
ompi_system_info.user,
ompi_system_info.nodename,
NULL,
ompi_universe_info.name,
NULL,
NULL)) { /* found */
/* check for "contact-info" file. if present, read it in. */
contact_file = ompi_os_path(false, ompi_process_info.universe_session_dir,
"universe-setup.txt", NULL);
if (OMPI_SUCCESS != (ret = ompi_read_universe_setup_file(contact_file))) {
ompi_output(0, "could not read contact file %s", contact_file);
return ret;
}
if (!ompi_universe_info.persistence || /* not persistent... */
(0 == strncmp(ompi_universe_info.scope, "exclusive", strlen("exclusive")))) { /* ...or no connection allowed */
/* also need to check "local" and that we did not specify the exact
* matching universe name
*/
ompi_output(0, "connection not allowed");
return OMPI_ERR_NO_CONNECTION_ALLOWED;
}
/* if persistent, set contact info... */
if (OMPI_SUCCESS != mca_oob_set_contact_info(ompi_universe_info.oob_contact_info)) { /* set contact info */
ompi_output(0, "error setting oob contact info - please report error to bugs@open-mpi.org\n");
return OMPI_ERR_FATAL;
}
mca_oob_parse_contact_info(ompi_universe_info.oob_contact_info, &seed, NULL);
/* ...and ping to verify it's alive */
if (OMPI_SUCCESS != mca_oob_ping(&seed, &ompi_rte_ping_wait)) {
ompi_output(0, "ping failed");
return OMPI_ERR_CONNECTION_FAILED;
}
/* set the my_universe field */
ompi_process_info.my_universe = strdup(ompi_universe_info.name);
return OMPI_SUCCESS;
}
return OMPI_ERR_NOT_FOUND;
}

Просмотреть файл

@ -28,6 +28,16 @@ void ompi_rte_parse_daemon_cmd_line(ompi_cmd_line_t *cmd_line)
setenv("OMPI_universe_seed", "1", 1);
}
/* see if seed contact info is provided */
if (ompi_cmd_line_is_taken(cmd_line, "seedcontact")) {
if (NULL == ompi_cmd_line_get_param(cmd_line, "seedcontact", 0, 0)) {
fprintf(stderr, "error retrieving seed contact info - please report error to bugs@open-mpi.org\n");
exit(1);
}
ompi_universe_info.seed_contact_info = strdup(ompi_cmd_line_get_param(cmd_line, "seedcontact", 0, 0));
setenv("OMPI_universe_contact", ompi_universe_info.seed_contact_info, 1);
}
/* see if I'm a probe */
if (ompi_cmd_line_is_taken(cmd_line, "probe") &&
false == ompi_universe_info.probe) {
@ -51,16 +61,11 @@ void ompi_rte_parse_daemon_cmd_line(ompi_cmd_line_t *cmd_line)
ompi_universe_info.persistence = true;
}
/* find out if silent */
if (ompi_cmd_line_is_taken(cmd_line, "silent")) {
setenv("OMPI_universe_silent", "1", 1);
ompi_universe_info.silent_mode = true;
}
/* find out if web interface is desired */
if (ompi_cmd_line_is_taken(cmd_line, "webserver")) {
setenv("OMPI_universe_webserver", "1", 1);
ompi_universe_info.web_server = true;
/* find out if we desire a console */
if (ompi_cmd_line_is_taken(cmd_line, "console")) {
setenv("OMPI_universe_console", "1", 1);
ompi_universe_info.console = true;
ompi_universe_info.console_connected = false;
}
/* find out if script is to be executed */

Просмотреть файл

@ -6,7 +6,8 @@
* @file
*
* Parse environmental paramater options for the Open MPI Run Time Environment. This function
* MUST be called BEFORE calling any of the rte command line parsers.
* MUST be called BEFORE calling any of the rte command line parsers, AFTER calling
* rte_init_stage1, and BEFORE calling rte_init_stage2.
*
* NOTE: Sets all key structure values to defaults if no environ value provided!!
*
@ -19,24 +20,68 @@
#include "util/sys_info.h"
#include "util/proc_info.h"
#include "mca/base/base.h"
#include "mca/base/mca_base_param.h"
#include "mca/oob/base/base.h"
#include "mca/ns/base/base.h"
#include "runtime/runtime.h"
void ompi_rte_parse_environ(void)
{
char *enviro_val;
int id;
/* ensure that sys_info and proc_info have been run */
ompi_sys_info();
ompi_proc_info();
enviro_val = getenv("OMPI_universe_seed");
ompi_output(0, "parse_env: seed %s", enviro_val);
if (NULL != enviro_val) { /* seed flag passed */
ompi_process_info.seed = true;
} else {
ompi_process_info.seed = false;
}
enviro_val = getenv("OMPI_universe_contact");
if (NULL != enviro_val) { /* contact info passed */
if (NULL != ompi_universe_info.seed_contact_info) { /* overwrite */
free(ompi_universe_info.seed_contact_info);
}
ompi_universe_info.seed_contact_info = strdup(enviro_val);
mca_oob_set_contact_info(ompi_universe_info.seed_contact_info);
} else {
if (NULL != ompi_universe_info.seed_contact_info) {
free(ompi_universe_info.seed_contact_info);
}
}
id = mca_base_param_register_string("gpr", "base", "replica", NULL, NULL);
mca_base_param_lookup_string(id, &ompi_universe_info.gpr_replica);
if (NULL != ompi_universe_info.gpr_replica) {
mca_oob_set_contact_info(ompi_universe_info.gpr_replica);
ompi_process_info.gpr_replica = ns_base_create_process_name(0,0,0);
mca_oob_parse_contact_info(ompi_universe_info.gpr_replica,
ompi_process_info.gpr_replica, NULL);
} else {
if (NULL != ompi_process_info.gpr_replica) {
free(ompi_process_info.gpr_replica);
}
}
id = mca_base_param_register_string("ns", "base", "replica", NULL, NULL);
mca_base_param_lookup_string(id, &ompi_universe_info.ns_replica);
if (NULL != ompi_universe_info.ns_replica) {
mca_oob_set_contact_info(ompi_universe_info.ns_replica);
ompi_process_info.ns_replica = ns_base_create_process_name(0,0,0);
mca_oob_parse_contact_info(ompi_universe_info.ns_replica,
ompi_process_info.ns_replica, NULL);
} else {
if (NULL != ompi_process_info.ns_replica) {
free(ompi_process_info.ns_replica);
}
}
enviro_val = getenv("OMPI_universe_probe");
if (NULL != enviro_val) { /* probe flag passed */
ompi_universe_info.probe = true;
@ -57,6 +102,12 @@ void ompi_rte_parse_environ(void)
ompi_universe_info.scope = strdup("exclusive");
}
/*** FOR DEBUGGING PURPOSES IN THIS EARLY STAGE - FORCE PUBLIC */
if (NULL != ompi_universe_info.scope) {
free(ompi_universe_info.scope);
}
ompi_universe_info.scope = strdup("public");
enviro_val = getenv("OMPI_universe_persistent");
if (NULL != enviro_val) { /* persistence flag passed */
ompi_universe_info.persistence = true;
@ -64,18 +115,14 @@ void ompi_rte_parse_environ(void)
ompi_universe_info.persistence = false;
}
enviro_val = getenv("OMPI_universe_silent");
if (NULL != enviro_val) { /* silent flag passed */
ompi_universe_info.silent_mode = true;
} else {
ompi_universe_info.silent_mode = false;
}
/*** FOR DEBUGGING PURPOSES IN THIS EARLY STAGE - FORCE PERSISTENCE */
ompi_universe_info.persistence = true;
enviro_val = getenv("OMPI_universe_webserver");
if (NULL != enviro_val) { /* webserver flag passed */
ompi_universe_info.web_server = true;
enviro_val = getenv("OMPI_universe_console");
if (NULL != enviro_val) { /* console flag passed */
ompi_universe_info.console = true;
} else {
ompi_universe_info.web_server = false;
ompi_universe_info.console = false;
}
enviro_val = getenv("OMPI_universe_script");

Просмотреть файл

@ -43,15 +43,13 @@ extern "C" {
bool persistence;
char *scope;
bool probe;
bool silent_mode;
bool ns_replica;
bool gpr_replica;
bool web_server;
char *socket_contact_info;
char *oob_contact_info;
bool console_connected;
char *scriptfile;
char *hostfile;
bool console;
char *ns_replica; /**< OOB contact info for name server */
char *gpr_replica; /**< OOB contact info for GPR */
char *seed_contact_info; /**< OOB contact info for universe seed */
bool console_connected; /**< Indicates if console is connected */
char *scriptfile; /**< Name of file containing commands to be executed */
char *hostfile; /**< Name of file containing list of hosts to be built into virtual machine */
};
typedef struct ompi_universe_t ompi_universe_t;
@ -274,8 +272,10 @@ extern "C" {
/**
* Check for universe existence
*
* Checks to see if a specified universe exists on the local host. If so, attempts
* Checks to see if a specified universe exists. If so, attempts
* to connect to verify that the universe is accepting connections.
* If both ns and gpr replicas provided, first checks for those
* connections. Gets any missing info from the universe contact.
*
* @param None Reads everything from the process_info and system_info
* structures
@ -289,13 +289,14 @@ extern "C" {
* @retval OMPI_CONNECTION_REFUSED Universe found and contact made, but
* universe refused to allow connection.
*/
int ompi_rte_local_universe_exists(void);
int ompi_rte_universe_exists(void);
/**
* Parse the RTE environmental variables
*
* Checks the environmental variables and passes their info (where
* set) into the respective info structures.
* set) into the respective info structures. Sets ALL Open MPI
* default values in universe, process, and system structures.
*
* @param None
*

151
src/runtime/universe_exists.c Обычный файл
Просмотреть файл

@ -0,0 +1,151 @@
/*
* $HEADER$
*/
/**
* @file
*
* Setup command line options for the Open MPI Run Time Environment
*/
#include "ompi_config.h"
#include <string.h>
#include <sys/time.h>
#include "include/constants.h"
#include "util/output.h"
#include "util/sys_info.h"
#include "util/proc_info.h"
#include "util/os_path.h"
#include "util/pack.h"
#include "util/session_dir.h"
#include "util/universe_setup_file_io.h"
#include "mca/oob/base/base.h"
#include "mca/ns/base/base.h"
#include "runtime/runtime.h"
static struct timeval ompi_rte_ping_wait = {30, 0};
int ompi_rte_universe_exists()
{
char *contact_file;
int ret;
ompi_process_name_t proc={0,0,0};
bool ns_found, gpr_found;
/* if both ns_replica and gpr_replica were provided, check for contact with them */
if (NULL != ompi_universe_info.ns_replica && NULL != ompi_universe_info.gpr_replica) {
mca_oob_parse_contact_info(ompi_universe_info.ns_replica, &proc, NULL);
/* ping to verify ns_replica alive */
if (OMPI_SUCCESS != mca_oob_ping(&proc, &ompi_rte_ping_wait)) {
if (ompi_rte_debug_flag) {
ompi_output(0, "univ_exists: ns_replica ping failed");
}
free(ompi_universe_info.ns_replica);
if (NULL != ompi_process_info.ns_replica) {
free(ompi_process_info.ns_replica);
}
} else { /* name server found, now try gpr */
ns_found = true;
}
mca_oob_parse_contact_info(ompi_universe_info.gpr_replica, &proc, NULL);
/* ping to verify gpr_replica alive */
if (OMPI_SUCCESS != mca_oob_ping(&proc, &ompi_rte_ping_wait)) {
if (ompi_rte_debug_flag) {
ompi_output(0, "univ_exists: gpr_replica ping failed");
}
free(ompi_universe_info.gpr_replica);
if (NULL != ompi_process_info.gpr_replica) {
free(ompi_process_info.gpr_replica);
}
} else {
gpr_found = true;
}
if (ns_found && gpr_found) { /* success on both counts - report it */
return OMPI_SUCCESS;
}
}
/* if we are missing one or both, we need to get the missing info. first check
* to see if seed_contact_info already provided. if so, then contact that daemon
* to get missing info.
*/
/* otherwise, need to find an initial "seed" contact point so we can get the info.
* check if local or remote host specified
*/
if (0 != strncmp(ompi_universe_info.host, ompi_system_info.nodename, strlen(ompi_system_info.nodename))) { /* remote host specified */
ompi_output(0, "remote hosts not currently supported");
return OMPI_ERR_NOT_IMPLEMENTED;
}
/* check to see if local universe already exists */
if (OMPI_SUCCESS == ompi_session_dir(false,
ompi_process_info.tmpdir_base,
ompi_system_info.user,
ompi_system_info.nodename,
NULL,
ompi_universe_info.name,
NULL,
NULL)) { /* found */
/* check for "contact-info" file. if present, read it in. */
contact_file = ompi_os_path(false, ompi_process_info.universe_session_dir,
"universe-setup.txt", NULL);
if (OMPI_SUCCESS != (ret = ompi_read_universe_setup_file(contact_file))) {
if (ompi_rte_debug_flag) {
ompi_output(0, "could not read contact file %s", contact_file);
}
return ret;
}
if (!ompi_universe_info.persistence || /* not persistent... */
(0 == strncmp(ompi_universe_info.scope, "exclusive", strlen("exclusive")))) { /* ...or no connection allowed */
/* also need to check "local" and that we did not specify the exact
* matching universe name
*/
if (ompi_rte_debug_flag) {
ompi_output(0, "connection not allowed");
}
return OMPI_ERR_NO_CONNECTION_ALLOWED;
}
/* if persistent, set contact info... */
if (OMPI_SUCCESS != mca_oob_set_contact_info(ompi_universe_info.seed_contact_info)) { /* set contact info */
if (ompi_rte_debug_flag) {
ompi_output(0, "error setting oob contact info - please report error to bugs@open-mpi.org\n");
}
return OMPI_ERR_FATAL;
}
mca_oob_parse_contact_info(ompi_universe_info.seed_contact_info, &proc, NULL);
/* ...and ping to verify it's alive */
if (OMPI_SUCCESS != mca_oob_ping(&proc, &ompi_rte_ping_wait)) {
if (ompi_rte_debug_flag) {
ompi_output(0, "ping failed");
}
return OMPI_ERR_CONNECTION_FAILED;
}
/* set the my_universe field */
ompi_process_info.my_universe = strdup(ompi_universe_info.name);
/* request ns_replica and gpr_replica info for this process
* only request info required - check ns_found/gpr_found
*/
return OMPI_SUCCESS;
}
return OMPI_ERR_NOT_FOUND;
}

Просмотреть файл

@ -130,17 +130,9 @@ int main(int argc, char *argv[])
return ret;
}
fprintf(stderr, "check hostname for local host: univ %s sys %s\n", ompi_universe_info.host, ompi_system_info.nodename);
/* check for local universe existence */
if (0 != strncmp(ompi_universe_info.host, ompi_system_info.nodename, strlen(ompi_system_info.nodename))) {
fprintf(stderr, "remote universe operations not supported at this time\n");
exit(1);
}
fprintf(stderr, "check local univ\n");
if (OMPI_SUCCESS != (ret = ompi_rte_local_universe_exists())) {
if (OMPI_SUCCESS != (ret = ompi_rte_universe_exists())) {
fprintf(stderr, "could not contact local universe %s\n", ompi_universe_info.name);
exit(1);
}

Просмотреть файл

@ -5,25 +5,29 @@
#include "ompi_config.h"
#include "util/proc_info.h"
#include "mca/ns/ns.h"
#include "mca/ns/base/base.h"
#include "mca/pcm/base/base.h"
#include "runtime/runtime.h"
#include "mca/base/base.h"
#include "util/argv.h"
#include "mca/oob/base/base.h"
#include "util/cmd_line.h"
#include "util/sys_info.h"
#include "runtime/runtime.h"
#include "util/session_dir.h"
#include "include/constants.h"
#include "util/output.h"
#include <stdio.h>
#include <unistd.h>
#include <sys/param.h>
#include "include/constants.h"
#include "util/proc_info.h"
#include "util/argv.h"
#include "util/cmd_line.h"
#include "util/sys_info.h"
#include "util/session_dir.h"
#include "util/output.h"
#include "util/os_path.h"
#include "util/universe_setup_file_io.h"
#include "mca/base/base.h"
#include "mca/ns/ns.h"
#include "mca/ns/base/base.h"
#include "mca/pcm/base/base.h"
#include "mca/oob/base/base.h"
#include "runtime/runtime.h"
extern char** environ;
@ -41,6 +45,7 @@ main(int argc, char *argv[])
ompi_rte_node_schedule_t *sched;
char cwd[MAXPATHLEN];
char *my_contact_info, *tmp, *jobid_str, *procid_str;
char *contact_file;
/*
* Intialize our Open MPI environment
@ -53,17 +58,54 @@ main(int argc, char *argv[])
return ret;
}
/* setup to read common command line options that span all Open MPI programs */
cmd_line = OBJ_NEW(ompi_cmd_line_t);
ompi_cmd_line_make_opt(cmd_line, 'v', "version", 0,
"Show version of Open MPI and this program");
ompi_cmd_line_make_opt(cmd_line, 'h', "help", 0,
"Show help for this function");
/* setup rte command line arguments */
ompi_rte_cmd_line_setup(cmd_line);
/*
* Start command line arguments
* setup mca command line arguments
*/
if (OMPI_SUCCESS != (ret = mca_base_cmd_line_setup(cmd_line))) {
/* BWB show_help */
printf("show_help: mca_base_cmd_line_setup failed\n");
return ret;
/* BWB show_help */
printf("show_help: mca_base_cmd_line_setup failed\n");
return ret;
}
ompi_cmd_line_make_opt(cmd_line, 'h', "help", 0,
"Show this help message");
if (OMPI_SUCCESS != mca_base_cmd_line_process_args(cmd_line)) {
/* BWB show_help */
printf("show_help: mca_base_cmd_line_process_args\n");
return ret;
}
/* parse the local commands */
if (OMPI_SUCCESS != ompi_cmd_line_parse(cmd_line, true, argc, argv)) {
exit(ret);
}
if (ompi_cmd_line_is_taken(cmd_line, "help") ||
ompi_cmd_line_is_taken(cmd_line, "h")) {
printf("...showing ompi_info help message...\n");
exit(1);
}
if (ompi_cmd_line_is_taken(cmd_line, "version") ||
ompi_cmd_line_is_taken(cmd_line, "v")) {
printf("...showing off my version!\n");
exit(1);
}
/*
* Setup mpirun-specific command line arguments
*/
ompi_cmd_line_make_opt3(cmd_line, 'n', "np", "np", 1,
"Number of processes to start");
ompi_cmd_line_make_opt3(cmd_line, '\0', "hostfile", "hostfile", 1,
@ -91,14 +133,6 @@ main(int argc, char *argv[])
/* get the rte command line options */
ompi_rte_parse_cmd_line(cmd_line);
/*
* TSW - temporarily force to be a seed - and to use tcp oob.
*
*/
ompi_process_info.seed = true;
ompi_process_info.ns_replica = NULL;
ompi_process_info.gpr_replica = NULL;
/*
* Start the Open MPI Run Time Environment
*/
@ -108,26 +142,61 @@ main(int argc, char *argv[])
return ret;
}
if (OMPI_SUCCESS != ompi_rte_init_stage1(&multi_thread, &hidden_thread) ||
OMPI_SUCCESS != ompi_rte_init_stage2(&multi_thread, &hidden_thread)) {
/* BWB show_help */
printf("show_help: ompi_rte_init failed\n");
return ret;
multi_thread = true;
hidden_thread=false;
if (OMPI_SUCCESS != ompi_rte_init_stage1(&multi_thread, &hidden_thread)) {
/* JMS show_help */
printf("show_help: mpirun failed in ompi_rte_init\n");
return ret;
}
/* parse environmental variables and fill corresponding info structures
* need the oob to be open so we can pass the contact info we extract
*/
ompi_rte_parse_environ();
/* parse the cmd_line for rte options - override settings from enviro, where necessary
* copy everything into enviro variables for passing later on
*/
ompi_rte_parse_cmd_line(cmd_line);
/* parse the cmd_line for daemon options - gets all the options relating
* specifically to seed behavior, in case i'm a seed, but also gets
* options about scripts and hostfiles that might be of use to me
* overrride enviro variables where necessary
*/
ompi_rte_parse_daemon_cmd_line(cmd_line);
/* eventually, this is where we will check for existing universe and
* spin one up if it isn't there. for now, though
* temporarily force to be a seed.
*
*/
ompi_process_info.seed = true;
ompi_process_info.ns_replica = NULL;
ompi_process_info.gpr_replica = NULL;
/* setup rest of rte */
if (OMPI_SUCCESS != ompi_rte_init_stage2(&multi_thread, &hidden_thread)) {
/* BWB show_help */
printf("show_help: ompi_rte_init failed\n");
return ret;
}
/***** SET MY NAME *****/
if (NULL == ompi_process_info.name) { /* don't overwrite an existing name */
if (ompi_process_info.seed) {
ompi_process_info.name = ompi_name_server.create_process_name(0, 0, 0);
} else {
ompi_process_info.name = ompi_rte_get_self();
if (ompi_process_info.seed) {
if (NULL != ompi_process_info.name) { /* overwrite it */
free(ompi_process_info.name);
}
ompi_process_info.name = ompi_name_server.create_process_name(0, 0, 0);
} else { /* if not seed, then someone spawned me - must have provided name info */
if (NULL != ompi_process_info.name) { /* overwrite it */
free(ompi_process_info.name);
}
ompi_process_info.name = ompi_rte_get_self();
}
/* get my process info */
ompi_proc_info();
/* setup my session directory */
/* setup my session directory */
jobid_str = ompi_name_server.get_jobid_string(ompi_process_info.name);
procid_str = ompi_name_server.get_vpid_string(ompi_process_info.name);
@ -142,7 +211,7 @@ main(int argc, char *argv[])
ompi_output(0, "\tjobid %s", jobid_str);
ompi_output(0, "\tprocid %s", procid_str);
}
if (OMPI_ERROR == ompi_session_dir(true,
if (OMPI_ERROR == ompi_session_dir(true,
ompi_process_info.tmpdir_base,
ompi_system_info.user,
ompi_system_info.nodename, NULL,
@ -153,23 +222,37 @@ main(int argc, char *argv[])
exit(-1);
}
/*
* Register my process info with my replica.
*/
if (OMPI_SUCCESS != (ret = ompi_rte_register())) {
ompi_output(0, "ompi_rte_init: failed in ompi_rte_register()\n");
return ret;
}
/*
* Register my process info with my replica.
*/
if (OMPI_SUCCESS != (ret = ompi_rte_register())) {
ompi_output(0, "ompi_rte_init: failed in ompi_rte_register()\n");
return ret;
}
/* finalize the rte startup */
if (OMPI_SUCCESS != (ret = ompi_rte_init_finalstage(&multi_thread,
&hidden_thread))) {
/* JMS show_help */
printf("show_help: ompid failed in ompi_rte_init\n");
return ret;
/* finalize the rte startup */
if (OMPI_SUCCESS != (ret = ompi_rte_init_finalstage(&multi_thread,
&hidden_thread))) {
/* JMS show_help */
printf("show_help: ompid failed in ompi_rte_init\n");
return ret;
}
/***** PREP TO START THE APPLICATION *****/
/* if i'm the seed, get my contact info and write my setup file for others to find */
if (ompi_process_info.seed) {
ompi_universe_info.seed_contact_info = mca_oob_get_contact_info();
contact_file = ompi_os_path(false, ompi_process_info.universe_session_dir,
"universe-setup.txt", NULL);
if (OMPI_SUCCESS != (ret = ompi_write_universe_setup_file(contact_file))) {
if (ompi_rte_debug_flag) {
ompi_output(0, "[%d,%d,%d] ompid: couldn't write setup file", ompi_process_info.name->cellid,
ompi_process_info.name->jobid, ompi_process_info.name->vpid);
}
}
}
/***** PREP TO START THE APPLICATION *****/
/* get the jobid for the application */
new_jobid = ompi_name_server.create_jobid();
@ -177,9 +260,9 @@ main(int argc, char *argv[])
/* BWB - fix jobid, procs, and nodes */
nodelist = ompi_rte_allocate_resources(new_jobid, 0, num_procs);
if (NULL == nodelist) {
/* BWB show_help */
printf("show_help: ompi_rte_allocate_resources failed\n");
return -1;
/* BWB show_help */
printf("show_help: ompi_rte_allocate_resources failed\n");
return -1;
}
/*
@ -214,8 +297,8 @@ main(int argc, char *argv[])
sched->nodelist = nodelist;
if (sched->argc == 0) {
printf("no app to start\n");
return 1;
printf("no app to start\n");
return 1;
}
@ -229,8 +312,8 @@ main(int argc, char *argv[])
* spawn procs
*/
if (OMPI_SUCCESS != ompi_rte_spawn_procs(new_jobid, &schedlist)) {
printf("show_help: woops! we didn't spawn :( \n");
return -1;
printf("show_help: woops! we didn't spawn :( \n");
return -1;
}
/*

Просмотреть файл

@ -84,11 +84,6 @@ int main(int argc, char *argv[])
OBJ_CONSTRUCT(&ompi_daemon_mutex, ompi_mutex_t);
OBJ_CONSTRUCT(&ompi_daemon_condition, ompi_condition_t);
/* get the system info and setup defaults */
ompi_sys_info();
ompi_universe_info.host = strdup(ompi_system_info.nodename);
ompi_universe_info.uid = strdup(ompi_system_info.user);
/* setup to read common command line options that span all Open MPI programs */
cmd_line = OBJ_NEW(ompi_cmd_line_t);
@ -134,22 +129,6 @@ int main(int argc, char *argv[])
exit(1);
}
/* parse environmental variables and fill corresponding info structures */
ompi_rte_parse_environ();
/* parse the cmd_line for rte options - override settings from enviro, where necessary
* copy everything into enviro variables for passing later on
*/
ompi_rte_parse_cmd_line(cmd_line);
/* parse the cmd_line for daemon options - gets all the options relating
* specifically to seed behavior, in case i'm a seed, but also gets
* options about scripts and hostfiles that might be of use to me
* overrride enviro variables where necessary
*/
ompi_rte_parse_daemon_cmd_line(cmd_line);
/* Open up the MCA */
if (OMPI_SUCCESS != (ret = mca_base_open())) {
@ -168,13 +147,23 @@ int main(int argc, char *argv[])
return ret;
}
/* if I'm not the seed and don't have my replica info, look for them in the
* named universe
/* parse environmental variables and fill corresponding info structures
* need the oob to be open so we can pass the contact info we extract
*/
if (!ompi_process_info.seed &&
NULL == ompi_process_info.gpr_replica &&
NULL == ompi_process_info.ns_replica) {
}
ompi_rte_parse_environ();
/* parse the cmd_line for rte options - override settings from enviro, where necessary
* copy everything into enviro variables for passing later on
*/
ompi_rte_parse_cmd_line(cmd_line);
/* parse the cmd_line for daemon options - gets all the options relating
* specifically to seed behavior, in case i'm a seed, but also gets
* options about scripts and hostfiles that might be of use to me
* overrride enviro variables where necessary
*/
ompi_rte_parse_daemon_cmd_line(cmd_line);
/* setup the rest of the rte */
if (OMPI_SUCCESS != (ret = ompi_rte_init_stage2(&allow_multi_user_threads,
@ -203,9 +192,6 @@ int main(int argc, char *argv[])
ompi_process_info.name = ompi_rte_get_self();
}
/* get my process info */
ompi_proc_info();
/* setup my session directory */
jobid_str = ompi_name_server.get_jobid_string(ompi_process_info.name);
procid_str = ompi_name_server.get_vpid_string(ompi_process_info.name);
@ -250,7 +236,7 @@ int main(int argc, char *argv[])
/* if i'm the seed, get my contact info and write my setup file for others to find */
if (ompi_process_info.seed) {
ompi_universe_info.oob_contact_info = mca_oob_get_contact_info();
ompi_universe_info.seed_contact_info = mca_oob_get_contact_info();
contact_file = ompi_os_path(false, ompi_process_info.universe_session_dir,
"universe-setup.txt", NULL);

Просмотреть файл

@ -140,7 +140,7 @@ int main(int argc, char **argv)
exit(1);
}
if (OMPI_SUCCESS != (ret = ompi_rte_local_universe_exists()) &&
if (OMPI_SUCCESS != (ret = ompi_rte_universe_exists()) &&
(OMPI_ERR_NOT_IMPLEMENTED != ret)) {
if (OMPI_ERR_NOT_FOUND != ret) {

Просмотреть файл

@ -60,22 +60,16 @@ int ompi_write_universe_setup_file(char *filename)
}
fprintf(fp, "%s\n", ompi_universe_info.scope);
if (ompi_universe_info.silent_mode) {
fprintf(fp, "silent\n");
} else {
if (ompi_universe_info.console) {
fprintf(fp, "console\n");
}
if (ompi_universe_info.web_server && NULL != ompi_universe_info.socket_contact_info) {
fprintf(fp, "%s\n", ompi_universe_info.socket_contact_info);
} else {
fprintf(fp, "none\n");
fprintf(fp, "silent\n");
}
if (NULL == ompi_universe_info.oob_contact_info) {
if (NULL == ompi_universe_info.seed_contact_info) {
goto CLEANUP;
}
fprintf(fp, "%s\n", ompi_universe_info.oob_contact_info);
fprintf(fp, "%s\n", ompi_universe_info.seed_contact_info);
fclose(fp);
return OMPI_SUCCESS;
@ -144,27 +138,17 @@ int ompi_read_universe_setup_file(char *filename)
goto CLEANUP;
}
if (0 == strncmp(input, "silent", strlen("silent"))) {
ompi_universe_info.silent_mode = true;
ompi_universe_info.console = false;
} else if (0 == strncmp(input, "console", strlen("console"))) {
ompi_universe_info.silent_mode = false;
ompi_universe_info.console = true;
} else {
free(input);
goto CLEANUP;
}
free(input);
ompi_universe_info.socket_contact_info = ompi_getline(fp);
if (NULL == ompi_universe_info.socket_contact_info) {
goto CLEANUP;
}
if (0 == strncmp(ompi_universe_info.socket_contact_info, "none", strlen("none"))) {
ompi_universe_info.web_server = false;
} else {
ompi_universe_info.web_server = true;
}
ompi_universe_info.oob_contact_info = ompi_getline(fp);
if (NULL == ompi_universe_info.oob_contact_info) {
ompi_universe_info.seed_contact_info = ompi_getline(fp);
if (NULL == ompi_universe_info.seed_contact_info) {
goto CLEANUP;
}