1
1
openmpi/orte/mca/errmgr/base/errmgr_base_fns.c
Jeff Squyres b3bd549331 - Change a few calls from exit() to orte_abort() so that we get
session directory cleanup (among other things)
- When we get an abnormal exit in orterun (i.e., timeout expires and
  we haven't gotten termination notices from all processes), print a
  better message an exit in a better way (which includes session
  directory cleanup)
- Fix tm and poe pls's to not exit() but rather propagate the error up
  the stack (where relevant)

This commit was SVN r7058.
2005-08-26 20:36:11 +00:00

89 строки
2.2 KiB
C

/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <stdlib.h>
#include "include/orte_constants.h"
#include "mca/schema/schema.h"
#include "runtime/runtime.h"
#include "runtime/orte_wait.h"
#include "opal/util/output.h"
#include "util/proc_info.h"
#include "mca/ns/ns.h"
#include "mca/rmgr/rmgr.h"
#include "mca/errmgr/base/base.h"
void orte_errmgr_base_log(int error_code, char *filename, int line)
{
if (NULL == orte_process_info.my_name) {
opal_output(0, "[NO-NAME] ORTE_ERROR_LOG: %s in file %s at line %d",
ORTE_ERROR_NAME(error_code), filename, line);
} else {
opal_output(0, "[%lu,%lu,%lu] ORTE_ERROR_LOG: %s in file %s at line %d",
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_ERROR_NAME(error_code), filename, line);
}
/* orte_errmgr_base_error_detected(error_code); */
}
void orte_errmgr_base_proc_aborted(orte_process_name_t *proc)
{
orte_jobid_t job;
int rc;
if (ORTE_SUCCESS != (rc = orte_ns.get_jobid(&job, proc))) {
ORTE_ERROR_LOG(rc);
return;
}
orte_rmgr.terminate_job(job);
}
void orte_errmgr_base_incomplete_start(orte_jobid_t job)
{
orte_rmgr.terminate_job(job);
}
void orte_errmgr_base_error_detected(int error_code)
{
}
void orte_errmgr_base_abort()
{
/* kill and reap all children */
orte_wait_kill(9);
/* abnormal exit */
orte_abort(-1, NULL);
}
int orte_errmgr_base_register_job(orte_jobid_t job)
{
/* register subscription for process_status values
* changing to abnormal termination codes
*/
return ORTE_SUCCESS;
}