From 871a9e0df48d58ad46a543bdc48195c99476a820 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 31 Mar 2010 19:20:06 +0000 Subject: [PATCH] Track process heartbeats with time_t, be a little less restrictive on who can retrieve an orte_job_t object This commit was SVN r22921. --- orte/runtime/orte_globals.c | 7 ++----- orte/runtime/orte_globals.h | 8 +------- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/orte/runtime/orte_globals.c b/orte/runtime/orte_globals.c index 6f00edc581..e5d96f732b 100644 --- a/orte/runtime/orte_globals.c +++ b/orte/runtime/orte_globals.c @@ -457,8 +457,8 @@ orte_job_t* orte_get_job_data_object(orte_jobid_t job) { int32_t ljob; - /* if I am not an HNP, I cannot provide this object */ - if (!ORTE_PROC_IS_HNP && !ORTE_PROC_IS_CM) { + /* if the job data wasn't setup, we cannot provide the data */ + if (NULL == orte_job_data) { return NULL; } @@ -602,9 +602,6 @@ static void orte_job_construct(orte_job_t* job) job->abort = false; job->aborted_proc = NULL; - job->err_cbfunc = NULL; - job->err_cbstates = ORTE_PROC_STATE_UNDEF; - job->err_cbdata = NULL; job->max_restarts = INT32_MAX; #if OPAL_ENABLE_FT_CR == 1 diff --git a/orte/runtime/orte_globals.h b/orte/runtime/orte_globals.h index 90f1785dde..c0aa0bb64f 100644 --- a/orte/runtime/orte_globals.h +++ b/orte/runtime/orte_globals.h @@ -381,12 +381,6 @@ typedef struct { bool abort; /* proc that caused that to happen */ struct orte_proc_t *aborted_proc; - /* errmgr callback function for this job, if any */ - orte_err_cb_fn_t err_cbfunc; - /* states that will trigger callback */ - orte_proc_state_t err_cbstates; - /* errmgr callback data */ - void *err_cbdata; /* max number of times a process can be restarted */ int32_t max_restarts; #if OPAL_ENABLE_FT_CR == 1 @@ -440,7 +434,7 @@ struct orte_proc_t { /* RML contact info */ char *rml_uri; /* seconds when last heartbeat was detected */ - int beat; + time_t beat; /* number of times this process has been restarted */ int32_t restarts; #if OPAL_ENABLE_FT_CR == 1