1
1

Fix a race condition - ensure we don't call terminate in orterun more than once, even if the timeout fires while we are doing so

This commit was SVN r17766.
Этот коммит содержится в:
Ralph Castain 2008-03-06 19:35:57 +00:00
родитель 4eace9d020
Коммит 097cc83be2
3 изменённых файлов: 16 добавлений и 26 удалений

Просмотреть файл

@ -147,8 +147,8 @@ int orte_register_params(void)
orte_max_timeout = 1000000.0 * value; /* convert to usec */ orte_max_timeout = 1000000.0 * value; /* convert to usec */
mca_base_param_reg_int_name("orte", "timeout_step", mca_base_param_reg_int_name("orte", "timeout_step",
"Time to wait [in usecs/proc] before aborting an ORTE operation (default: 10 usec/proc)", "Time to wait [in usecs/proc] before aborting an ORTE operation (default: 100 usec/proc)",
false, false, 10, &orte_timeout_usec_per_proc); false, false, 100, &orte_timeout_usec_per_proc);
/* default hostfile */ /* default hostfile */
mca_base_param_reg_string_name("default", "hostfile", mca_base_param_reg_string_name("default", "hostfile",

Просмотреть файл

@ -112,14 +112,15 @@ static orte_std_cntr_t total_num_apps = 0;
static bool want_prefix_by_default = (bool) ORTE_WANT_ORTERUN_PREFIX_BY_DEFAULT; static bool want_prefix_by_default = (bool) ORTE_WANT_ORTERUN_PREFIX_BY_DEFAULT;
static opal_event_t *orterun_event, *orteds_exit_event; static opal_event_t *orterun_event, *orteds_exit_event;
static char *ompi_server=NULL; static char *ompi_server=NULL;
static bool terminating=false;
/* /*
* Globals * Globals
*/ */
struct globals_t orterun_globals; struct orterun_globals_t orterun_globals;
bool globals_init = false; static bool globals_init = false;
opal_cmd_line_init_t cmd_line_init[] = { static opal_cmd_line_init_t cmd_line_init[] = {
/* Various "obvious" options */ /* Various "obvious" options */
{ NULL, NULL, NULL, 'h', NULL, "help", 0, { NULL, NULL, NULL, 'h', NULL, "help", 0,
&orterun_globals.help, OPAL_CMD_LINE_TYPE_BOOL, &orterun_globals.help, OPAL_CMD_LINE_TYPE_BOOL,
@ -183,13 +184,6 @@ opal_cmd_line_init_t cmd_line_init[] = {
NULL, OPAL_CMD_LINE_TYPE_STRING, NULL, OPAL_CMD_LINE_TYPE_STRING,
"Provide a cartography file" }, "Provide a cartography file" },
/* Don't wait for the process to finish before exiting */
#if 0
{ NULL, NULL, NULL, '\0', "nw", "nw", 0,
&orterun_globals.no_wait_for_job_completion, OPAL_CMD_LINE_TYPE_BOOL,
"Launch the processes and do not wait for their completion (i.e., let orterun complete as soon a successful launch occurs)" },
#endif
/* Export environment variables; potentially used multiple times, /* Export environment variables; potentially used multiple times,
so it does not make sense to set into a variable */ so it does not make sense to set into a variable */
{ NULL, NULL, NULL, 'x', NULL, NULL, 1, { NULL, NULL, NULL, 'x', NULL, NULL, 1,
@ -644,6 +638,12 @@ static void terminated(int trigpipe, short event, void *arg)
orte_proc_t **procs; orte_proc_t **procs;
orte_vpid_t i; orte_vpid_t i;
/* flag that we are here to avoid doing it twice */
if (terminating) {
return;
}
terminating = true;
/* close the trigger pipe so it cannot be called again */ /* close the trigger pipe so it cannot be called again */
if (0 <= trigpipe) { if (0 <= trigpipe) {
close(trigpipe); close(trigpipe);

Просмотреть файл

@ -37,13 +37,12 @@ int orterun(int argc, char *argv[]);
/** /**
* Global struct for catching orterun command line options. * Global struct for catching orterun command line options.
*/ */
struct globals_t { struct orterun_globals_t {
bool help; bool help;
bool version; bool version;
bool verbose; bool verbose;
bool quiet; bool quiet;
bool exit; bool exit;
bool no_wait_for_job_completion;
bool by_node; bool by_node;
bool by_slot; bool by_slot;
bool do_not_launch; bool do_not_launch;
@ -62,19 +61,10 @@ struct globals_t {
}; };
/** /**
* Struct holding values gleaned from the orterun command line * Struct holding values gleaned from the orterun command line -
* needed by debugger init
*/ */
ORTE_DECLSPEC extern struct globals_t orterun_globals; ORTE_DECLSPEC extern struct orterun_globals_t orterun_globals;
/**
* Whether orterun_globals has been initialized yet or not
*/
ORTE_DECLSPEC extern bool globals_init;
/**
* Struct holding list of allowable command line parameters
*/
ORTE_DECLSPEC extern opal_cmd_line_init_t cmd_line_init[];
END_C_DECLS END_C_DECLS