b9893aacc5
1. file activity - can monitor file size, access and modification times. If these fail to change over a specified number of sampling iterations (rate is an mca param), then the errmgr is notified. 2. memory usage - checks amount of memory used by a process. Limit and sampling rate can be set. This support must be enabled by configuring --enable-sensors. ompi_info and orte-info have been updated to include the new framework. Also includes some initial steps toward restoring the recovery capability. Most notably, the ODLS API has been extended to include a "restart_proc" entry for restarting a local process, and organizes the various ERRMGR framework globals into a single struct as we do in the other ORTE frameworks. Fix an oversight in the ERRMGR framework where a pointer array was constructed, but not initialized. Implementation continues. This commit was SVN r23043.
63 строки
1.9 KiB
C
63 строки
1.9 KiB
C
/*
|
|
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
|
|
* University Research and Technology
|
|
* Corporation. All rights reserved.
|
|
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
* of Tennessee Research Foundation. All rights
|
|
* reserved.
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
* University of Stuttgart. All rights reserved.
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
* All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
#include "orte_config.h"
|
|
#include "orte/constants.h"
|
|
|
|
#include <stdio.h>
|
|
|
|
#include "opal/util/trace.h"
|
|
#include "opal/mca/mca.h"
|
|
#include "opal/mca/base/base.h"
|
|
|
|
#include "orte/mca/errmgr/errmgr.h"
|
|
#include "orte/mca/errmgr/base/base.h"
|
|
#include "orte/mca/errmgr/base/errmgr_private.h"
|
|
|
|
|
|
int orte_errmgr_base_close(void)
|
|
{
|
|
orte_errmgr_base_module_t *module = NULL;
|
|
int i;
|
|
|
|
OPAL_TRACE(5);
|
|
|
|
/* Close all selected components */
|
|
for(i = 0; i < orte_errmgr_base.modules.size; ++i) {
|
|
module = (orte_errmgr_base_module_t*)opal_pointer_array_get_item(&orte_errmgr_base.modules, i);
|
|
if( NULL == module ) {
|
|
continue;
|
|
}
|
|
if( NULL != module->finalize ) {
|
|
module->finalize();
|
|
}
|
|
}
|
|
|
|
/* Close all remaining available components (may be one if this is a
|
|
OMPI RTE program, or [possibly] multiple if this is ompi_info) */
|
|
mca_base_components_close(orte_errmgr_base.output,
|
|
&orte_errmgr_base_components_available,
|
|
NULL);
|
|
|
|
OBJ_DESTRUCT(&orte_errmgr_base.modules);
|
|
|
|
orte_errmgr_base.initialized = false;
|
|
|
|
return ORTE_SUCCESS;
|
|
}
|