From b586f2952e8dbbd0ec032f0516ff47426f1ef11f Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 28 Apr 2011 15:07:00 +0000 Subject: [PATCH] Arggg...revert r24645. I knew those fields were there for a reason...sigh. This commit was SVN r24647. The following SVN revision numbers were found above: r24645 --> open-mpi/ompi@e4732110da2d0d082cc0cc8631159fff16e0048d --- orte/mca/errmgr/hnp/errmgr_hnp.c | 10 ++++++++-- .../data_type_support/orte_dt_packing_fns.c | 14 ++++++++++++++ .../data_type_support/orte_dt_print_fns.c | 4 +++- .../data_type_support/orte_dt_unpacking_fns.c | 16 ++++++++++++++++ orte/runtime/orte_globals.c | 10 ++++++++++ orte/runtime/orte_globals.h | 4 ++++ 6 files changed, 55 insertions(+), 3 deletions(-) diff --git a/orte/mca/errmgr/hnp/errmgr_hnp.c b/orte/mca/errmgr/hnp/errmgr_hnp.c index 03c82291ad..f75bf614d0 100644 --- a/orte/mca/errmgr/hnp/errmgr_hnp.c +++ b/orte/mca/errmgr/hnp/errmgr_hnp.c @@ -1443,11 +1443,17 @@ static void check_job_complete(orte_job_t *jdata) ORTE_UPDATE_EXIT_STATUS(0); /* provide a notifier message if that framework is active - ignored otherwise */ if (NULL != (job = (orte_job_t*)opal_pointer_array_get_item(orte_job_data, 1))) { + if (NULL == job->name) { + job->name = strdup(orte_process_info.nodename); + } + if (NULL == job->instance) { + asprintf(&job->instance, "%d", orte_process_info.pid); + } if (0 == orte_exit_status) { - asprintf(&msg, "Job %s complete", ORTE_JOBID_PRINT(job->jobid)); + asprintf(&msg, "Job %s:%s complete", job->name, job->instance); orte_notifier.log(ORTE_NOTIFIER_INFO, 0, msg); } else { - asprintf(&msg, "Job %s terminated abnormally", ORTE_JOBID_PRINT(job->jobid)); + asprintf(&msg, "Job %s:%s terminated abnormally", job->name, job->instance); orte_notifier.log(ORTE_NOTIFIER_ALERT, orte_exit_status, msg); } free(msg); diff --git a/orte/runtime/data_type_support/orte_dt_packing_fns.c b/orte/runtime/data_type_support/orte_dt_packing_fns.c index 6a89d733ec..12f88f3958 100644 --- a/orte/runtime/data_type_support/orte_dt_packing_fns.c +++ b/orte/runtime/data_type_support/orte_dt_packing_fns.c @@ -156,6 +156,20 @@ int orte_dt_pack_job(opal_buffer_t *buffer, const void *src, jobs = (orte_job_t**) src; for (i=0; i < num_vals; i++) { + /* pack the name of this job - may be null */ + if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, + (void*)(&(jobs[i]->name)), 1, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* pack the name of the instance of the job - may be null */ + if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, + (void*)(&(jobs[i]->instance)), 1, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* pack the jobid */ if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, (void*)(&(jobs[i]->jobid)), 1, ORTE_JOBID))) { diff --git a/orte/runtime/data_type_support/orte_dt_print_fns.c b/orte/runtime/data_type_support/orte_dt_print_fns.c index 18d48bc961..1320339458 100644 --- a/orte/runtime/data_type_support/orte_dt_print_fns.c +++ b/orte/runtime/data_type_support/orte_dt_print_fns.c @@ -218,8 +218,10 @@ int orte_dt_print_job(char **output, char *prefix, orte_job_t *src, opal_data_ty asprintf(&pfx2, "%s", prefix); } - asprintf(&tmp, "\n%sData for job: %s\tRecovery: %s(%s)\n%s\tNum apps: %ld\tControls: %0x\tStdin target: %s\tState: %s\tAbort: %s", pfx2, + asprintf(&tmp, "\n%sData for job: %s\tName: %s\tInstance: %s\tRecovery: %s(%s)\n%s\tNum apps: %ld\tControls: %0x\tStdin target: %s\tState: %s\tAbort: %s", pfx2, ORTE_JOBID_PRINT(src->jobid), + (NULL != src->name) ? src->name : "NULL", + (NULL != src->instance) ? src->instance : "NULL", (src->enable_recovery) ? "ENABLED" : "DISABLED", (src->recovery_defined) ? "DEFINED" : "DEFAULT", pfx2, diff --git a/orte/runtime/data_type_support/orte_dt_unpacking_fns.c b/orte/runtime/data_type_support/orte_dt_unpacking_fns.c index a0cff87338..394f754da6 100644 --- a/orte/runtime/data_type_support/orte_dt_unpacking_fns.c +++ b/orte/runtime/data_type_support/orte_dt_unpacking_fns.c @@ -166,6 +166,22 @@ int orte_dt_unpack_job(opal_buffer_t *buffer, void *dest, return ORTE_ERR_OUT_OF_RESOURCE; } + /* unpack the name of this job - may be null */ + n = 1; + if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, + &(jobs[i]->name), &n, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* unpack the instance name of this job - may be null */ + n = 1; + if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, + &(jobs[i]->instance), &n, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* unpack the jobid */ n = 1; if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, diff --git a/orte/runtime/orte_globals.c b/orte/runtime/orte_globals.c index 35828ddf0e..d86cbe9940 100644 --- a/orte/runtime/orte_globals.c +++ b/orte/runtime/orte_globals.c @@ -652,6 +652,8 @@ OBJ_CLASS_INSTANCE(orte_app_context_t, static void orte_job_construct(orte_job_t* job) { + job->name = NULL; + job->instance = NULL; job->jobid = ORTE_JOBID_INVALID; job->apps = OBJ_NEW(opal_pointer_array_t); opal_pointer_array_init(job->apps, @@ -717,6 +719,14 @@ static void orte_job_destruct(orte_job_t* job) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job->jobid)); } + if (NULL != job->name) { + free(job->name); + } + + if (NULL != job->instance) { + free(job->instance); + } + for (n=0; n < job->apps->size; n++) { if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(job->apps, n))) { continue; diff --git a/orte/runtime/orte_globals.h b/orte/runtime/orte_globals.h index 4543dbc1a8..87593e80fb 100644 --- a/orte/runtime/orte_globals.h +++ b/orte/runtime/orte_globals.h @@ -364,6 +364,10 @@ typedef uint16_t orte_job_controls_t; typedef struct { /** Base object so this can be put on a list */ opal_list_item_t super; + /* a name for this job */ + char *name; + /* a name for this instance of the job */ + char *instance; /* jobid for this job */ orte_jobid_t jobid; /* app_context array for this job */