Add support for detecting rapid failures so the errmgr can respond accordingly
This commit was SVN r24796.
Этот коммит содержится в:
родитель
81f38b258a
Коммит
afceaaa8e4
@ -898,6 +898,9 @@ static void orte_proc_construct(orte_proc_t* proc)
|
||||
proc->exit_code = 0; /* Assume we won't fail unless otherwise notified */
|
||||
proc->rml_uri = NULL;
|
||||
proc->restarts = 0;
|
||||
proc->fast_failures = 0;
|
||||
proc->last_failure.tv_sec = 0;
|
||||
proc->last_failure.tv_usec = 0;
|
||||
proc->reported = false;
|
||||
proc->beat = 0;
|
||||
OBJ_CONSTRUCT(&proc->stats, opal_pstats_t);
|
||||
|
@ -488,6 +488,10 @@ struct orte_proc_t {
|
||||
char *rml_uri;
|
||||
/* number of times this process has been restarted */
|
||||
int32_t restarts;
|
||||
/* time of last restart */
|
||||
struct timeval last_failure;
|
||||
/* number of failures in "fast" window */
|
||||
int32_t fast_failures;
|
||||
/* flag to indicate proc has reported in */
|
||||
bool reported;
|
||||
/* if heartbeat recvd during last time period */
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user