Fix a race condition - ensure we don't call terminate in orterun more than once, even if the timeout fires while we are doing so
This commit was SVN r17766.
Этот коммит содержится в:
родитель
4eace9d020
Коммит
097cc83be2
@ -147,8 +147,8 @@ int orte_register_params(void)
|
||||
orte_max_timeout = 1000000.0 * value; /* convert to usec */
|
||||
|
||||
mca_base_param_reg_int_name("orte", "timeout_step",
|
||||
"Time to wait [in usecs/proc] before aborting an ORTE operation (default: 10 usec/proc)",
|
||||
false, false, 10, &orte_timeout_usec_per_proc);
|
||||
"Time to wait [in usecs/proc] before aborting an ORTE operation (default: 100 usec/proc)",
|
||||
false, false, 100, &orte_timeout_usec_per_proc);
|
||||
|
||||
/* default hostfile */
|
||||
mca_base_param_reg_string_name("default", "hostfile",
|
||||
|
@ -112,14 +112,15 @@ static orte_std_cntr_t total_num_apps = 0;
|
||||
static bool want_prefix_by_default = (bool) ORTE_WANT_ORTERUN_PREFIX_BY_DEFAULT;
|
||||
static opal_event_t *orterun_event, *orteds_exit_event;
|
||||
static char *ompi_server=NULL;
|
||||
static bool terminating=false;
|
||||
|
||||
/*
|
||||
* Globals
|
||||
*/
|
||||
struct globals_t orterun_globals;
|
||||
bool globals_init = false;
|
||||
struct orterun_globals_t orterun_globals;
|
||||
static bool globals_init = false;
|
||||
|
||||
opal_cmd_line_init_t cmd_line_init[] = {
|
||||
static opal_cmd_line_init_t cmd_line_init[] = {
|
||||
/* Various "obvious" options */
|
||||
{ NULL, NULL, NULL, 'h', NULL, "help", 0,
|
||||
&orterun_globals.help, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
@ -183,13 +184,6 @@ opal_cmd_line_init_t cmd_line_init[] = {
|
||||
NULL, OPAL_CMD_LINE_TYPE_STRING,
|
||||
"Provide a cartography file" },
|
||||
|
||||
/* Don't wait for the process to finish before exiting */
|
||||
#if 0
|
||||
{ NULL, NULL, NULL, '\0', "nw", "nw", 0,
|
||||
&orterun_globals.no_wait_for_job_completion, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
"Launch the processes and do not wait for their completion (i.e., let orterun complete as soon a successful launch occurs)" },
|
||||
#endif
|
||||
|
||||
/* Export environment variables; potentially used multiple times,
|
||||
so it does not make sense to set into a variable */
|
||||
{ NULL, NULL, NULL, 'x', NULL, NULL, 1,
|
||||
@ -644,6 +638,12 @@ static void terminated(int trigpipe, short event, void *arg)
|
||||
orte_proc_t **procs;
|
||||
orte_vpid_t i;
|
||||
|
||||
/* flag that we are here to avoid doing it twice */
|
||||
if (terminating) {
|
||||
return;
|
||||
}
|
||||
terminating = true;
|
||||
|
||||
/* close the trigger pipe so it cannot be called again */
|
||||
if (0 <= trigpipe) {
|
||||
close(trigpipe);
|
||||
|
@ -37,13 +37,12 @@ int orterun(int argc, char *argv[]);
|
||||
/**
|
||||
* Global struct for catching orterun command line options.
|
||||
*/
|
||||
struct globals_t {
|
||||
struct orterun_globals_t {
|
||||
bool help;
|
||||
bool version;
|
||||
bool verbose;
|
||||
bool quiet;
|
||||
bool exit;
|
||||
bool no_wait_for_job_completion;
|
||||
bool by_node;
|
||||
bool by_slot;
|
||||
bool do_not_launch;
|
||||
@ -62,19 +61,10 @@ struct globals_t {
|
||||
};
|
||||
|
||||
/**
|
||||
* Struct holding values gleaned from the orterun command line
|
||||
* Struct holding values gleaned from the orterun command line -
|
||||
* needed by debugger init
|
||||
*/
|
||||
ORTE_DECLSPEC extern struct globals_t orterun_globals;
|
||||
|
||||
/**
|
||||
* Whether orterun_globals has been initialized yet or not
|
||||
*/
|
||||
ORTE_DECLSPEC extern bool globals_init;
|
||||
|
||||
/**
|
||||
* Struct holding list of allowable command line parameters
|
||||
*/
|
||||
ORTE_DECLSPEC extern opal_cmd_line_init_t cmd_line_init[];
|
||||
ORTE_DECLSPEC extern struct orterun_globals_t orterun_globals;
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user