1
1

Shift the signal forwarding code to ess/base so it can be available to more than just the hnp component. Extend the slurm component to use it so that any signals given directly to the daemons by their slurmstepd get forwarded to their local clients

Check for NULL

Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
Ralph Castain 2017-06-01 23:36:02 -07:00
родитель e45a358bf0
Коммит 066d5eedce
11 изменённых файлов: 335 добавлений и 235 удалений

Просмотреть файл

@ -10,7 +10,7 @@
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved.
# Copyright (c) 2015 Intel, Inc. All rights reserved.
# Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
@ -26,10 +26,9 @@ headers += \
libmca_ess_la_SOURCES += \
base/ess_base_frame.c \
base/ess_base_select.c \
base/ess_base_get.c \
base/ess_base_std_tool.c \
base/ess_base_std_app.c \
base/ess_base_std_orted.c \
base/ess_base_get.c \
base/ess_base_std_tool.c \
base/ess_base_std_app.c \
base/ess_base_std_orted.c \
base/ess_base_std_prolog.c \
base/ess_base_fns.c

Просмотреть файл

@ -52,6 +52,7 @@ ORTE_DECLSPEC extern int orte_ess_base_std_buffering;
ORTE_DECLSPEC extern int orte_ess_base_num_procs;
ORTE_DECLSPEC extern char *orte_ess_base_jobid;
ORTE_DECLSPEC extern char *orte_ess_base_vpid;
ORTE_DECLSPEC extern opal_list_t orte_ess_base_signals;
/*
* Internal helper functions used by components
@ -82,6 +83,13 @@ ORTE_DECLSPEC int orte_ess_env_put(orte_std_cntr_t num_procs,
orte_std_cntr_t num_local_procs,
char ***env);
typedef struct {
opal_list_item_t super;
char *signame;
int signal;
} orte_ess_base_signal_t;
OBJ_CLASS_DECLARATION(orte_ess_base_signal_t);
END_C_DECLS
#endif

Просмотреть файл

@ -11,6 +11,7 @@
* All rights reserved.
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2017 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -23,8 +24,10 @@
#include "orte/constants.h"
#include "orte/mca/mca.h"
#include "opal/util/argv.h"
#include "opal/util/output.h"
#include "opal/mca/base/base.h"
#include "orte/util/show_help.h"
#include "orte/mca/ess/base/base.h"
@ -46,6 +49,7 @@ int orte_ess_base_std_buffering = -1;
int orte_ess_base_num_procs = -1;
char *orte_ess_base_jobid = NULL;
char *orte_ess_base_vpid = NULL;
opal_list_t orte_ess_base_signals = {0};
static mca_base_var_enum_value_t stream_buffering_values[] = {
{-1, "default"},
@ -55,6 +59,9 @@ static mca_base_var_enum_value_t stream_buffering_values[] = {
{0, NULL}
};
static int setup_signals(void);
static char *forwarded_signals = NULL;
static int orte_ess_base_register(mca_base_register_flag_t flags)
{
mca_base_var_enum_t *new_enum;
@ -96,16 +103,38 @@ static int orte_ess_base_register(mca_base_register_flag_t flags)
MCA_BASE_VAR_SCOPE_READONLY, &orte_ess_base_num_procs);
mca_base_var_register_synonym(ret, "orte", "orte", "ess", "num_procs", 0);
forwarded_signals = NULL;
ret = mca_base_var_register ("orte", "ess", "base", "forward_signals",
"Comma-delimited list of additional signals (names or integers) to forward to "
"application processes [\"none\" => forward nothing]. Signals provided by "
"default include SIGTSTP, SIGUSR1, SIGUSR2, SIGABRT, SIGALRM, and SIGCONT",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
OPAL_INFO_LVL_4, MCA_BASE_VAR_SCOPE_READONLY,
&forwarded_signals);
mca_base_var_register_synonym(ret, "orte", "ess", "hnp", "forward_signals", 0);
return ORTE_SUCCESS;
}
static int orte_ess_base_close(void)
{
OPAL_LIST_DESTRUCT(&orte_ess_base_signals);
return mca_base_framework_components_close(&orte_ess_base_framework, NULL);
}
static int orte_ess_base_open(mca_base_open_flag_t flags)
{
int rc;
OBJ_CONSTRUCT(&orte_ess_base_signals, opal_list_t);
if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON) {
if (ORTE_SUCCESS != (rc = setup_signals())) {
return rc;
}
}
return mca_base_framework_components_open(&orte_ess_base_framework, flags);
}
@ -113,4 +142,161 @@ MCA_BASE_FRAMEWORK_DECLARE(orte, ess, "ORTE Environmenal System Setup",
orte_ess_base_register, orte_ess_base_open, orte_ess_base_close,
mca_ess_base_static_components, 0);
/* signal forwarding */
/* setup signal forwarding list */
struct known_signal {
/** signal number */
int signal;
/** signal name */
char *signame;
/** can this signal be forwarded */
bool can_forward;
};
static struct known_signal known_signals[] = {
{SIGTERM, "SIGTERM", false},
{SIGHUP, "SIGHUP", false},
{SIGINT, "SIGINT", false},
{SIGKILL, "SIGKILL", false},
#ifdef SIGSYS
{SIGSYS, "SIGSYS", true},
#endif
#ifdef SIGXCPU
{SIGXCPU, "SIGXCPU", true},
#endif
{SIGXFSZ, "SIGXFSZ", true},
#ifdef SIGVTALRM
{SIGVTALRM, "SIGVTALRM", true},
#endif
#ifdef SIGPROF
{SIGPROF, "SIGPROF", true},
#endif
#ifdef SIGINFO
{SIGINFO, "SIGINFO", true},
#endif
#ifdef SIGPWR
{SIGPWR, "SIGPWR", true},
#endif
#ifdef SIGURG
{SIGURG, "SIGURG", true},
#endif
#ifdef SIGUSR1
{SIGUSR1, "SIGUSR1", true},
#endif
#ifdef SIGUSR2
{SIGUSR2, "SIGUSR2", true},
#endif
{0, NULL},
};
#define ESS_ADDSIGNAL(x, s) \
do { \
orte_ess_base_signal_t *_sig; \
_sig = OBJ_NEW(orte_ess_base_signal_t); \
_sig->signal = (x); \
_sig->signame = strdup((s)); \
opal_list_append(&orte_ess_base_signals, &_sig->super); \
} while(0)
static int setup_signals(void)
{
int i, sval, nsigs;
char **signals, *tmp;
orte_ess_base_signal_t *sig;
bool ignore, found;
/* if they told us "none", then nothing to do */
if (NULL != forwarded_signals &&
0 == strcmp(forwarded_signals, "none")) {
return ORTE_SUCCESS;
}
/* we know that some signals are (nearly) always defined, regardless
* of environment, so add them here */
nsigs = sizeof(known_signals) / sizeof(struct known_signal);
for (i=0; i < nsigs; i++) {
if (known_signals[i].can_forward) {
ESS_ADDSIGNAL(known_signals[i].signal, known_signals[i].signame);
}
}
/* see if they asked for anything beyond those - note that they may
* have asked for some we already cover, and so we ignore any duplicates */
if (NULL != forwarded_signals) {
/* if they told us "none", then dump the list */
signals = opal_argv_split(forwarded_signals, ',');
for (i=0; NULL != signals[i]; i++) {
sval = 0;
if (0 != strncmp(signals[i], "SIG", 3)) {
/* treat it like a number */
errno = 0;
sval = strtoul(signals[i], &tmp, 10);
if (0 != errno || '\0' != *tmp) {
orte_show_help("help-ess-base.txt", "ess-base:unknown-signal",
true, signals[i], forwarded_signals);
opal_argv_free(signals);
return OPAL_ERR_SILENT;
}
}
/* see if it is one we already covered */
ignore = false;
OPAL_LIST_FOREACH(sig, &orte_ess_base_signals, orte_ess_base_signal_t) {
if (0 == strcasecmp(signals[i], sig->signame) || sval == sig->signal) {
/* got it - we will ignore */
ignore = true;
break;
}
}
if (ignore) {
continue;
}
/* see if they gave us a signal name */
found = false;
for (int j = 0 ; known_signals[j].signame ; ++j) {
if (0 == strcasecmp (signals[i], known_signals[j].signame) || sval == known_signals[j].signal) {
if (!known_signals[j].can_forward) {
orte_show_help("help-ess-base.txt", "ess-base:cannot-forward",
true, known_signals[j].signame, forwarded_signals);
opal_argv_free(signals);
return OPAL_ERR_SILENT;
}
found = true;
ESS_ADDSIGNAL(known_signals[j].signal, known_signals[j].signame);
break;
}
}
if (!found) {
if (0 == strncmp(signals[i], "SIG", 3)) {
orte_show_help("help-ess-base.txt", "ess-base:unknown-signal",
true, signals[i], forwarded_signals);
opal_argv_free(signals);
return OPAL_ERR_SILENT;
}
ESS_ADDSIGNAL(sval, signals[i]);
}
}
opal_argv_free (signals);
}
return ORTE_SUCCESS;
}
/* instantiate the class */
static void scon(orte_ess_base_signal_t *t)
{
t->signame = NULL;
}
static void sdes(orte_ess_base_signal_t *t)
{
if (NULL != t->signame) {
free(t->signame);
}
}
OBJ_CLASS_INSTANCE(orte_ess_base_signal_t,
opal_list_item_t,
scon, sdes);

Просмотреть файл

@ -10,6 +10,7 @@
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2017 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
@ -32,3 +33,19 @@ and got the error %s.
This could mean that your PATH or executable name is wrong, or that you do not
have the necessary permissions. Please ensure that the executable is able to be
found and executed as it is required for singleton operations.
[ess-base:cannot-forward]
The system does not support trapping and forwarding of the
specified signal:
signal: %s
param: %s
Please remove that signal from the ess_base_forward_signals MCA parameter.
[ess-base:unknown-signal]
The following signal was included in the ess_base_forward_signals
MCA parameter:
signal: %s
param: %s
This is not a recognized signal value. Please fix or remove it.

Просмотреть файл

@ -20,8 +20,6 @@
# $HEADER$
#
dist_ortedata_DATA = help-ess-hnp.txt
sources = \
ess_hnp.h \
ess_hnp_component.c \

Просмотреть файл

@ -28,19 +28,8 @@ BEGIN_C_DECLS
/*
* Module open / close
*/
typedef struct {
opal_list_item_t super;
char *signame;
int signal;
} ess_hnp_signal_t;
OBJ_CLASS_DECLARATION(ess_hnp_signal_t);
typedef struct {
orte_ess_base_component_t base;
opal_list_t signals;
} orte_ess_hnp_component_t;
ORTE_MODULE_DECLSPEC extern orte_ess_hnp_component_t mca_ess_hnp_component;
ORTE_MODULE_DECLSPEC extern orte_ess_base_component_t mca_ess_hnp_component;
END_C_DECLS

Просмотреть файл

@ -41,187 +41,36 @@
#include "orte/runtime/orte_globals.h"
extern orte_ess_base_module_t orte_ess_hnp_module;
static int hnp_component_register (void);
static int hnp_component_open(void);
static int hnp_component_close(void);
static int hnp_component_query(mca_base_module_t **module, int *priority);
struct known_signal {
/** signal number */
int signal;
/** signal name */
char *signame;
/** can this signal be forwarded */
bool can_forward;
};
static struct known_signal known_signals[] = {
{SIGTERM, "SIGTERM", false},
{SIGHUP, "SIGHUP", false},
{SIGINT, "SIGINT", false},
{SIGKILL, "SIGKILL", false},
#ifdef SIGSYS
{SIGSYS, "SIGSYS", true},
#endif
#ifdef SIGXCPU
{SIGXCPU, "SIGXCPU", true},
#endif
{SIGXFSZ, "SIGXFSZ", true},
#ifdef SIGVTALRM
{SIGVTALRM, "SIGVTALRM", true},
#endif
#ifdef SIGPROF
{SIGPROF, "SIGPROF", true},
#endif
#ifdef SIGINFO
{SIGINFO, "SIGINFO", true},
#endif
#ifdef SIGPWR
{SIGPWR, "SIGPWR", true},
#endif
{0, NULL},
};
/*
* Instantiate the public struct with all of our public information
* and pointers to our public functions in it
*/
orte_ess_hnp_component_t mca_ess_hnp_component = {
.base = {
.base_version = {
ORTE_ESS_BASE_VERSION_3_0_0,
orte_ess_base_component_t mca_ess_hnp_component = {
.base_version = {
ORTE_ESS_BASE_VERSION_3_0_0,
/* Component name and version */
.mca_component_name = "hnp",
MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION,
ORTE_RELEASE_VERSION),
/* Component name and version */
.mca_component_name = "hnp",
MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION,
ORTE_RELEASE_VERSION),
/* Component open and close functions */
.mca_open_component = hnp_component_open,
.mca_close_component = hnp_component_close,
.mca_query_component = hnp_component_query,
.mca_register_component_params = hnp_component_register,
},
.base_data = {
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
}
/* Component open and close functions */
.mca_open_component = hnp_component_open,
.mca_close_component = hnp_component_close,
.mca_query_component = hnp_component_query
},
.base_data = {
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
}
};
static char *additional_signals;
static int hnp_component_register (void)
{
additional_signals = NULL;
(void) mca_base_component_var_register (&mca_ess_hnp_component.base.base_version,
"forward_signals", "Comma-delimited list "
"of additional signals (names or integers) to forward to "
"application processes [\"none\" => forward nothing]", MCA_BASE_VAR_TYPE_STRING,
NULL, 0, 0, OPAL_INFO_LVL_4, MCA_BASE_VAR_SCOPE_READONLY,
&additional_signals);
return ORTE_SUCCESS;
}
#define ESS_ADDSIGNAL(x, s) \
do { \
ess_hnp_signal_t *_sig; \
_sig = OBJ_NEW(ess_hnp_signal_t); \
_sig->signal = (x); \
_sig->signame = strdup((s)); \
opal_list_append(&mca_ess_hnp_component.signals, &_sig->super); \
} while(0)
static int hnp_component_open(void)
{
int i, sval;
char **signals, *tmp;
ess_hnp_signal_t *sig;
bool ignore, found;
OBJ_CONSTRUCT(&mca_ess_hnp_component.signals, opal_list_t);
/* we know that some signals are (nearly) always defined, regardless
* of environment, so add them here */
ESS_ADDSIGNAL(SIGTSTP, "SIGTSTP");
ESS_ADDSIGNAL(SIGUSR1, "SIGUSR1");
ESS_ADDSIGNAL(SIGUSR2, "SIGUSR2");
ESS_ADDSIGNAL(SIGABRT, "SIGABRT");
ESS_ADDSIGNAL(SIGALRM, "SIGALRM");
ESS_ADDSIGNAL(SIGCONT, "SIGCONT");
#ifdef SIGURG
ESS_ADDSIGNAL(SIGURG, "SIGURG");
#endif
/* see if they asked for anything beyond those - note that they may
* have asked for some we already cover, and so we ignore any duplicates */
if (NULL != additional_signals) {
/* if they told us "none", then dump the list */
if (0 == strcmp(additional_signals, "none")) {
OPAL_LIST_DESTRUCT(&mca_ess_hnp_component.signals);
/* need to reconstruct it for when we close */
OBJ_CONSTRUCT(&mca_ess_hnp_component.signals, opal_list_t);
return ORTE_SUCCESS;
}
signals = opal_argv_split(additional_signals, ',');
for (i=0; NULL != signals[i]; i++) {
sval = 0;
if (0 != strncmp(signals[i], "SIG", 3)) {
/* treat it like a number */
errno = 0;
sval = strtoul(signals[i], &tmp, 10);
if (0 != errno || '\0' != *tmp) {
orte_show_help("help-ess-hnp.txt", "ess-hnp:unknown-signal",
true, signals[i], additional_signals);
opal_argv_free(signals);
return OPAL_ERR_SILENT;
}
}
/* see if it is one we already covered */
ignore = false;
OPAL_LIST_FOREACH(sig, &mca_ess_hnp_component.signals, ess_hnp_signal_t) {
if (0 == strcasecmp(signals[i], sig->signame) || sval == sig->signal) {
/* got it - we will ignore */
ignore = true;
break;
}
}
if (ignore) {
continue;
}
/* see if they gave us a signal name */
found = false;
for (int j = 0 ; known_signals[j].signame ; ++j) {
if (0 == strcasecmp (signals[i], known_signals[j].signame) || sval == known_signals[j].signal) {
if (!known_signals[j].can_forward) {
orte_show_help("help-ess-hnp.txt", "ess-hnp:cannot-forward",
true, known_signals[j].signame, additional_signals);
opal_argv_free(signals);
return OPAL_ERR_SILENT;
}
found = true;
ESS_ADDSIGNAL(known_signals[j].signal, known_signals[j].signame);
break;
}
}
if (!found) {
if (0 == strncmp(signals[i], "SIG", 3)) {
orte_show_help("help-ess-hnp.txt", "ess-hnp:unknown-signal",
true, signals[i], additional_signals);
opal_argv_free(signals);
return OPAL_ERR_SILENT;
}
ESS_ADDSIGNAL(sval, signals[i]);
}
}
opal_argv_free (signals);
}
return ORTE_SUCCESS;
}
@ -250,18 +99,3 @@ static int hnp_component_close(void)
{
return ORTE_SUCCESS;
}
/* instantiate the class */
static void scon(ess_hnp_signal_t *t)
{
t->signame = NULL;
}
static void sdes(ess_hnp_signal_t *t)
{
if (NULL != t->signame) {
free(t->signame);
}
}
OBJ_CLASS_INSTANCE(ess_hnp_signal_t,
opal_list_item_t,
scon, sdes);

Просмотреть файл

@ -149,7 +149,7 @@ static int rte_init(void)
int idx;
orte_topology_t *t;
opal_list_t transports;
ess_hnp_signal_t *sig;
orte_ess_base_signal_t *sig;
/* run the prolog */
if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) {
@ -193,7 +193,7 @@ static int rte_init(void)
signal(SIGHUP, abort_signal_callback);
/** setup callbacks for signals we should forward */
if (0 < (idx = opal_list_get_size(&mca_ess_hnp_component.signals))) {
if (0 < (idx = opal_list_get_size(&orte_ess_base_signals))) {
forward_signals_events = (opal_event_t*)malloc(sizeof(opal_event_t) * idx);
if (NULL == forward_signals_events) {
ret = ORTE_ERR_OUT_OF_RESOURCE;
@ -201,7 +201,7 @@ static int rte_init(void)
goto error;
}
idx = 0;
OPAL_LIST_FOREACH(sig, &mca_ess_hnp_component.signals, ess_hnp_signal_t) {
OPAL_LIST_FOREACH(sig, &orte_ess_base_signals, orte_ess_base_signal_t) {
setup_sighandler(sig->signal, forward_signals_events + idx, signal_forward_callback);
++idx;
}
@ -789,7 +789,7 @@ static int rte_finalize(void)
char *contact_path;
orte_job_t *jdata;
uint32_t key;
ess_hnp_signal_t *sig;
orte_ess_base_signal_t *sig;
unsigned int i;
if (signals_set) {
@ -799,7 +799,7 @@ static int rte_finalize(void)
opal_event_del(&term_handler);
/** Remove the USR signal handlers */
i = 0;
OPAL_LIST_FOREACH(sig, &mca_ess_hnp_component.signals, ess_hnp_signal_t) {
OPAL_LIST_FOREACH(sig, &orte_ess_base_signals, orte_ess_base_signal_t) {
opal_event_signal_del(forward_signals_events + i);
++i;
}

Просмотреть файл

@ -1,27 +0,0 @@
# -*- text -*-
#
# Copyright (c) 2017 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This is the US/English general help file for the SDS base.
#
[ess-hnp:cannot-forward]
The system does not support trapping and forwarding of the
specified signal:
signal: %s
param: %s
Please remove that signal from the ess_hnp_forward_signals MCA parameter.
[ess-hnp:unknown-signal]
The following signal was included in the ess_hnp_forward_signals
MCA parameter:
signal: %s
param: %s
This is not a recognized signal value. Please fix or remove it.

Просмотреть файл

@ -39,6 +39,7 @@
#include "orte/util/regex.h"
#include "orte/util/show_help.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rml/rml.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
@ -58,10 +59,24 @@ orte_ess_base_module_t orte_ess_slurm_module = {
NULL /* ft_event */
};
static void signal_forward_callback(int fd, short event, void *arg);
static opal_event_t *forward_signals_events = NULL;
static bool signals_set=false;
static void setup_sighandler(int signal, opal_event_t *ev,
opal_event_cbfunc_t cbfunc)
{
opal_event_signal_set(orte_event_base, ev, signal, cbfunc, ev);
opal_event_set_priority(ev, ORTE_ERROR_PRI);
opal_event_signal_add(ev, NULL);
}
static int rte_init(void)
{
int ret;
char *error = NULL;
orte_ess_base_signal_t *sig;
int idx;
/* run the prolog */
if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) {
@ -76,11 +91,29 @@ static int rte_init(void)
* default procedure
*/
if (ORTE_PROC_IS_DAEMON) {
/** setup callbacks for signals we should forward */
if (0 < (idx = opal_list_get_size(&orte_ess_base_signals))) {
forward_signals_events = (opal_event_t*)malloc(sizeof(opal_event_t) * idx);
if (NULL == forward_signals_events) {
ret = ORTE_ERR_OUT_OF_RESOURCE;
error = "unable to malloc";
goto error;
}
idx = 0;
OPAL_LIST_FOREACH(sig, &orte_ess_base_signals, orte_ess_base_signal_t) {
setup_sighandler(sig->signal, forward_signals_events + idx, signal_forward_callback);
++idx;
}
}
signals_set = true;
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) {
ORTE_ERROR_LOG(ret);
error = "orte_ess_base_orted_setup";
goto error;
}
/* setup the signal handlers */
return ORTE_SUCCESS;
}
@ -112,9 +145,23 @@ error:
static int rte_finalize(void)
{
int ret;
orte_ess_base_signal_t *sig;
unsigned int i;
/* if I am a daemon, finalize using the default procedure */
if (ORTE_PROC_IS_DAEMON) {
if (signals_set) {
/** Remove the USR signal handlers */
i = 0;
OPAL_LIST_FOREACH(sig, &orte_ess_base_signals, orte_ess_base_signal_t) {
opal_event_signal_del(forward_signals_events + i);
++i;
}
free (forward_signals_events);
forward_signals_events = NULL;
signals_set = false;
}
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) {
ORTE_ERROR_LOG(ret);
return ret;
@ -199,3 +246,52 @@ static int slurm_set_name(void)
return ORTE_SUCCESS;
}
/* Pass user signals to the local application processes */
static void signal_forward_callback(int fd, short event, void *arg)
{
opal_event_t *signal = (opal_event_t*)arg;
int32_t signum, rc;
opal_buffer_t *cmd;
orte_daemon_cmd_flag_t command=ORTE_DAEMON_SIGNAL_LOCAL_PROCS;
orte_jobid_t job = ORTE_JOBID_WILDCARD;
signum = OPAL_EVENT_SIGNAL(signal);
if (!orte_execute_quiet){
fprintf(stderr, "%s: Forwarding signal %d to job\n",
orte_basename, signum);
}
cmd = OBJ_NEW(opal_buffer_t);
/* pack the command */
if (ORTE_SUCCESS != (rc = opal_dss.pack(cmd, &command, 1, ORTE_DAEMON_CMD))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(cmd);
return;
}
/* pack the jobid */
if (ORTE_SUCCESS != (rc = opal_dss.pack(cmd, &job, 1, ORTE_JOBID))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(cmd);
return;
}
/* pack the signal */
if (ORTE_SUCCESS != (rc = opal_dss.pack(cmd, &signum, 1, OPAL_INT32))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(cmd);
return;
}
/* send it to ourselves */
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
ORTE_PROC_MY_NAME, cmd,
ORTE_RML_TAG_DAEMON,
NULL, NULL))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(cmd);
}
}

Просмотреть файл

@ -777,7 +777,8 @@ int orte_register_params(void)
/* Amount of time to wait for a stack trace to return from the daemons */
orte_stack_trace_wait_timeout = 30;
(void) mca_base_var_register ("orte", "orte", NULL, "timeout_for_stack_trace",
"Seconds to wait for stack traces to return before terminating the job (<= 0 wait forever)",
"Seconds to wait for stack traces to return before terminating "
"the job (<= 0 wait forever)",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&orte_stack_trace_wait_timeout);
@ -796,6 +797,5 @@ int orte_register_params(void)
OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_ALL,
&orte_data_server_uri);
return ORTE_SUCCESS;
}