2004-01-15 07:47:20 +03:00
|
|
|
/*
|
2007-03-17 02:11:45 +03:00
|
|
|
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
2005-11-05 22:57:48 +03:00
|
|
|
* University Research and Technology
|
|
|
|
* Corporation. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
|
|
* of Tennessee Research Foundation. All rights
|
|
|
|
* reserved.
|
2004-11-28 23:09:25 +03:00
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
2005-03-24 15:43:37 +03:00
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
2004-11-22 04:38:40 +03:00
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
2004-01-15 07:47:20 +03:00
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
|
2006-02-12 04:33:29 +03:00
|
|
|
#include "orte_config.h"
|
2006-09-15 01:29:51 +04:00
|
|
|
#include "orte/orte_constants.h"
|
2004-01-15 07:47:20 +03:00
|
|
|
|
|
|
|
#include <stdio.h>
|
2006-09-15 01:29:51 +04:00
|
|
|
#ifdef HAVE_FCNTL_H
|
|
|
|
#include <fcntl.h>
|
|
|
|
#endif
|
|
|
|
#ifdef HAVE_UNISTD_H
|
|
|
|
#include <unistd.h>
|
|
|
|
#endif
|
2004-01-15 07:47:20 +03:00
|
|
|
|
2005-07-04 03:31:27 +04:00
|
|
|
#include "opal/util/output.h"
|
2005-09-15 21:13:13 +04:00
|
|
|
#include "opal/runtime/opal_progress.h"
|
|
|
|
#include "opal/event/event.h"
|
2006-09-15 01:29:51 +04:00
|
|
|
#include "opal/util/os_path.h"
|
|
|
|
|
|
|
|
#include "orte/runtime/runtime.h"
|
2005-09-15 21:13:13 +04:00
|
|
|
#include "orte/util/session_dir.h"
|
|
|
|
#include "orte/util/sys_info.h"
|
2007-03-17 02:11:45 +03:00
|
|
|
|
|
|
|
#include "orte/runtime/orte_cr.h"
|
|
|
|
|
2006-09-15 01:29:51 +04:00
|
|
|
#include "orte/mca/errmgr/errmgr.h"
|
2004-01-15 07:47:20 +03:00
|
|
|
|
|
|
|
|
2006-09-15 01:29:51 +04:00
|
|
|
/*
|
|
|
|
* We do NOT call the regular C-library "abort" function, even
|
|
|
|
* though that would have alerted us to the fact that this is
|
|
|
|
* an abnormal termination, because it would automatically cause
|
|
|
|
* a core file to be generated. On large systems, that can be
|
|
|
|
* overwhelming (imagine a few thousand Gbyte-sized files hitting
|
|
|
|
* a shared file system simultaneously...ouch!).
|
|
|
|
*
|
|
|
|
* However, this causes a problem for OpenRTE as the system truly
|
|
|
|
* needs to know that this actually IS an abnormal termination.
|
|
|
|
* To get around the problem, we create a file in the session
|
|
|
|
* directory - we don't need to put anything in it, though, as its
|
|
|
|
* very existence simply alerts us that this was an abnormal
|
|
|
|
* termination.
|
|
|
|
*
|
|
|
|
* The session directory finalize system will clean this file up
|
|
|
|
* for us automagically. However, it needs to stick around long
|
|
|
|
* enough for our local daemon to find it! So, we do NOT call
|
|
|
|
* session_dir_finalize here!!! Someone will clean up for us.
|
|
|
|
*
|
|
|
|
* In some cases, however, we DON'T want to create that alert. For
|
|
|
|
* example, if an orted detects that the HNP has died, then there
|
|
|
|
* is truly nobody to alert! In these cases, we pass report=false
|
|
|
|
* to prevent the abort file from being created. This allows the
|
|
|
|
* session directory tree to cleanly be eliminated.
|
|
|
|
*/
|
|
|
|
int orte_abort(int status, bool report)
|
2004-01-15 07:47:20 +03:00
|
|
|
{
|
2006-09-15 01:29:51 +04:00
|
|
|
char *abort_file;
|
|
|
|
int fd;
|
2004-01-19 20:46:34 +03:00
|
|
|
|
2006-09-15 01:29:51 +04:00
|
|
|
/* Exit - do NOT do a normal finalize as this will very likely
|
2005-09-15 18:06:03 +04:00
|
|
|
* hang the process. We are aborting due to an abnormal condition
|
2005-09-15 21:13:13 +04:00
|
|
|
* that precludes normal cleanup
|
|
|
|
*
|
|
|
|
* We do need to do the following bits to make sure we leave a
|
|
|
|
* clean environment. Taken from orte_finalize():
|
|
|
|
* - Assume errmgr cleans up child processes before we exit.
|
|
|
|
*/
|
|
|
|
|
2007-03-17 02:11:45 +03:00
|
|
|
/* CRS cleanup since it may have a named pipe and thread active */
|
|
|
|
orte_cr_finalize();
|
|
|
|
|
2006-09-15 01:29:51 +04:00
|
|
|
/* If we were asked to report this termination,
|
|
|
|
* write an "abort" file into our session directory
|
|
|
|
*/
|
|
|
|
if (report) {
|
|
|
|
abort_file = opal_os_path(false, orte_process_info.proc_session_dir, "abort", NULL);
|
|
|
|
if (NULL == abort_file) {
|
|
|
|
/* got a problem */
|
|
|
|
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
|
|
|
goto CLEANUP;
|
|
|
|
}
|
|
|
|
fd = open(abort_file, O_CREAT);
|
|
|
|
if (0 < fd) close(fd);
|
|
|
|
}
|
|
|
|
|
|
|
|
CLEANUP:
|
2005-09-15 21:18:35 +04:00
|
|
|
/* - Clean out the global structures
|
|
|
|
* (not really necessary, but good practice) */
|
2005-09-15 21:13:13 +04:00
|
|
|
orte_sys_info_finalize();
|
|
|
|
orte_proc_info_finalize();
|
|
|
|
orte_univ_info_finalize();
|
|
|
|
|
|
|
|
/* Now Exit */
|
2005-07-12 23:33:37 +04:00
|
|
|
exit(status);
|
2004-01-15 07:47:20 +03:00
|
|
|
}
|