1
1

orte: send error messages to stderr.

When a job terminates normally but with a non zero exit code,
display the error message to stderr.

Thanks Emre Brookes for the bug report.

Signed-off-by: Gilles Gouaillardet <gilles@rist.or.jp>

(cherry picked from commit open-mpi/ompi@893270caee)
Этот коммит содержится в:
Gilles Gouaillardet 2018-09-12 17:03:55 +09:00
родитель 3bf2220b3e
Коммит 229ec82cf0
5 изменённых файлов: 33 добавлений и 12 удалений

Просмотреть файл

@ -12,6 +12,8 @@
# All rights reserved.
# Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
# Copyright (c) 2017 IBM Corporation. All rights reserved.
# Copyright (c) 2018 Research Organization for Information Science
# and Technology (RIST). All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
@ -105,3 +107,6 @@ An internal error has occurred in ORTE:
%s
This is something that should be reported to the developers.
#
[normal-termination-but]
%s job %s terminated normally, but %d %s. Per user-direction, the job has been aborted.

Просмотреть файл

@ -11,6 +11,8 @@
* All rights reserved.
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2017 IBM Corporation. All rights reserved.
* Copyright (c) 2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -839,14 +841,11 @@ static void default_hnp_abort(orte_job_t *jdata)
i32ptr = &i32;
if (orte_get_attribute(&jdata->attributes, ORTE_JOB_NUM_NONZERO_EXIT, (void**)&i32ptr, OPAL_INT32)) {
/* warn user */
opal_output(orte_clean_output,
"-------------------------------------------------------\n"
"%s job %s terminated normally, but %d %s. Per user-direction, the job has been aborted.\n"
"-------------------------------------------------------",
(1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "Primary" : "Child",
(1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "" : ORTE_LOCAL_JOBID_PRINT(jdata->jobid),
i32, (1 == i32) ? "process returned\na non-zero exit code" :
"processes returned\nnon-zero exit codes");
orte_show_help("help-errmgr-base.txt", "normal-termination-but", true,
(1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "Primary" : "Child",
(1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "" : ORTE_LOCAL_JOBID_PRINT(jdata->jobid),
i32, (1 == i32) ? "process returned\na non-zero exit code" :
"processes returned\nnon-zero exit codes");
}
OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output,

Просмотреть файл

@ -1,6 +1,8 @@
#
# Copyright (c) 2011-2013 Los Alamos National Security, LLC.
# All rights reserved.
# Copyright (c) 2018 Research Organization for Information Science
# and Technology (RIST). All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
@ -8,6 +10,8 @@
# $HEADER$
#
dist_ortedata_DATA += base/help-state-base.txt
headers += \
base/state_private.h \
base/base.h

13
orte/mca/state/base/help-state-base.txt Обычный файл
Просмотреть файл

@ -0,0 +1,13 @@
# -*- text -*-
#
# Copyright (c) 2018 Research Organization for Information Science
# and Technology (RIST). All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
#
[normal-termination-but]
While %s job %s terminated normally, %d %s. Further examination may be required.

Просмотреть файл

@ -1,6 +1,8 @@
/*
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -38,6 +40,7 @@
#include "orte/mca/routed/routed.h"
#include "orte/util/session_dir.h"
#include "orte/util/threads.h"
#include "orte/util/show_help.h"
#include "orte/mca/state/base/base.h"
#include "orte/mca/state/base/state_private.h"
@ -847,10 +850,7 @@ void orte_state_base_check_all_complete(int fd, short args, void *cbdata)
}
/* warn user */
opal_output(orte_clean_output,
"-------------------------------------------------------\n"
"While %s job %s terminated normally, %d %s. Further examination may be required.\n"
"-------------------------------------------------------",
orte_show_help("help-state-base.txt", "normal-termination-but", true,
(1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "the primary" : "child",
(1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "" : ORTE_LOCAL_JOBID_PRINT(jdata->jobid),
i32, (1 == i32) ? "process returned\na non-zero exit code." :