From 229ec82cf0d7ab03b8ebe25ad28ca89895b51f93 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Wed, 12 Sep 2018 17:03:55 +0900 Subject: [PATCH] orte: send error messages to stderr. When a job terminates normally but with a non zero exit code, display the error message to stderr. Thanks Emre Brookes for the bug report. Signed-off-by: Gilles Gouaillardet (cherry picked from commit open-mpi/ompi@893270caeeedd986d7d3bbef8dc7a638b27792d3) --- orte/mca/errmgr/base/help-errmgr-base.txt | 5 +++++ orte/mca/errmgr/default_hnp/errmgr_default_hnp.c | 15 +++++++-------- orte/mca/state/base/Makefile.am | 4 ++++ orte/mca/state/base/help-state-base.txt | 13 +++++++++++++ orte/mca/state/base/state_base_fns.c | 8 ++++---- 5 files changed, 33 insertions(+), 12 deletions(-) create mode 100644 orte/mca/state/base/help-state-base.txt diff --git a/orte/mca/errmgr/base/help-errmgr-base.txt b/orte/mca/errmgr/base/help-errmgr-base.txt index 07a9f71909..30ff0f882f 100644 --- a/orte/mca/errmgr/base/help-errmgr-base.txt +++ b/orte/mca/errmgr/base/help-errmgr-base.txt @@ -12,6 +12,8 @@ # All rights reserved. # Copyright (c) 2014-2017 Intel, Inc. All rights reserved. # Copyright (c) 2017 IBM Corporation. All rights reserved. +# Copyright (c) 2018 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -105,3 +107,6 @@ An internal error has occurred in ORTE: %s This is something that should be reported to the developers. +# +[normal-termination-but] +%s job %s terminated normally, but %d %s. Per user-direction, the job has been aborted. diff --git a/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c b/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c index 9c807dd54b..05a2a83713 100644 --- a/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c +++ b/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c @@ -11,6 +11,8 @@ * All rights reserved. * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -839,14 +841,11 @@ static void default_hnp_abort(orte_job_t *jdata) i32ptr = &i32; if (orte_get_attribute(&jdata->attributes, ORTE_JOB_NUM_NONZERO_EXIT, (void**)&i32ptr, OPAL_INT32)) { /* warn user */ - opal_output(orte_clean_output, - "-------------------------------------------------------\n" - "%s job %s terminated normally, but %d %s. Per user-direction, the job has been aborted.\n" - "-------------------------------------------------------", - (1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "Primary" : "Child", - (1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "" : ORTE_LOCAL_JOBID_PRINT(jdata->jobid), - i32, (1 == i32) ? "process returned\na non-zero exit code" : - "processes returned\nnon-zero exit codes"); + orte_show_help("help-errmgr-base.txt", "normal-termination-but", true, + (1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "Primary" : "Child", + (1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "" : ORTE_LOCAL_JOBID_PRINT(jdata->jobid), + i32, (1 == i32) ? "process returned\na non-zero exit code" : + "processes returned\nnon-zero exit codes"); } OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output, diff --git a/orte/mca/state/base/Makefile.am b/orte/mca/state/base/Makefile.am index e8db0481f9..623a490439 100644 --- a/orte/mca/state/base/Makefile.am +++ b/orte/mca/state/base/Makefile.am @@ -1,6 +1,8 @@ # # Copyright (c) 2011-2013 Los Alamos National Security, LLC. # All rights reserved. +# Copyright (c) 2018 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -8,6 +10,8 @@ # $HEADER$ # +dist_ortedata_DATA += base/help-state-base.txt + headers += \ base/state_private.h \ base/base.h diff --git a/orte/mca/state/base/help-state-base.txt b/orte/mca/state/base/help-state-base.txt new file mode 100644 index 0000000000..06c4c31004 --- /dev/null +++ b/orte/mca/state/base/help-state-base.txt @@ -0,0 +1,13 @@ +# -*- text -*- +# +# Copyright (c) 2018 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +# +[normal-termination-but] +While %s job %s terminated normally, %d %s. Further examination may be required. diff --git a/orte/mca/state/base/state_base_fns.c b/orte/mca/state/base/state_base_fns.c index 3a2f6e0b83..e3c5682cfd 100644 --- a/orte/mca/state/base/state_base_fns.c +++ b/orte/mca/state/base/state_base_fns.c @@ -1,6 +1,8 @@ /* * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -38,6 +40,7 @@ #include "orte/mca/routed/routed.h" #include "orte/util/session_dir.h" #include "orte/util/threads.h" +#include "orte/util/show_help.h" #include "orte/mca/state/base/base.h" #include "orte/mca/state/base/state_private.h" @@ -847,10 +850,7 @@ void orte_state_base_check_all_complete(int fd, short args, void *cbdata) } /* warn user */ - opal_output(orte_clean_output, - "-------------------------------------------------------\n" - "While %s job %s terminated normally, %d %s. Further examination may be required.\n" - "-------------------------------------------------------", + orte_show_help("help-state-base.txt", "normal-termination-but", true, (1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "the primary" : "child", (1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "" : ORTE_LOCAL_JOBID_PRINT(jdata->jobid), i32, (1 == i32) ? "process returned\na non-zero exit code." :