1
1

Record the number of heartbeats recvd each period for diag purposes

This commit was SVN r24714.
Этот коммит содержится в:
Ralph Castain 2011-05-20 00:21:33 +00:00
родитель 69dce0ec10
Коммит dc0bb0571b
3 изменённых файлов: 7 добавлений и 7 удалений

Просмотреть файл

@ -492,7 +492,7 @@ static void check_heartbeat(int fd, short dummy, void *arg)
continue;
}
if (!proc->beat) {
if (0 == proc->beat) {
/* no heartbeat recvd in last window */
OPAL_OUTPUT_VERBOSE((1, orte_sensor_base.output,
"%s sensor:check_heartbeat FAILED for daemon %s",
@ -503,12 +503,12 @@ static void check_heartbeat(int fd, short dummy, void *arg)
0, ORTE_ERR_HEARTBEAT_LOST);
} else {
OPAL_OUTPUT_VERBOSE((1, orte_sensor_base.output,
"%s HEARTBEAT DETECTED FOR %s",
"%s HEARTBEAT DETECTED FOR %s: NUM BEATS %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&proc->name)));
ORTE_NAME_PRINT(&proc->name), proc->beat));
}
/* reset for next period */
proc->beat = false;
proc->beat = 0;
}
reset:
@ -545,7 +545,7 @@ static void recv_beats(int status,
"%s marked beat from %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(sender)));
proc->beat = true;
proc->beat++;
/* if this daemon has reappeared, reset things */
if (ORTE_PROC_STATE_HEARTBEAT_FAILED == proc->state) {
proc->state = ORTE_PROC_STATE_RUNNING;

Просмотреть файл

@ -890,7 +890,7 @@ static void orte_proc_construct(orte_proc_t* proc)
proc->rml_uri = NULL;
proc->restarts = 0;
proc->reported = false;
proc->beat = false;
proc->beat = 0;
OBJ_CONSTRUCT(&proc->stats, opal_pstats_t);
#if OPAL_ENABLE_FT_CR == 1
proc->ckpt_state = 0;

Просмотреть файл

@ -482,7 +482,7 @@ struct orte_proc_t {
/* flag to indicate proc has reported in */
bool reported;
/* if heartbeat recvd during last time period */
bool beat;
int beat;
/* process stats at last sampling */
opal_pstats_t stats;
#if OPAL_ENABLE_FT_CR == 1