4e79a51395
This required a little fiddling with a number of areas. Biggest problem was that it uncovered a potential for an infinite loop to be created in the registry. If a callback function modified the registry, the registry checked the triggers to see if anything had fired. Well, if the original callback was due to a trigger firing, that condition hadn't changed - so the trigger fired again....which caused the callback to be called, which modified the registry, which checked the triggers, etc. etc. Triggers are now checked and then "flagged" as being "in process" so that the registry will NOT recheck that trigger until all callbacks have been processed. Tried doing this with subscriptions as well, but that caused a problem - when we release processes from a stagegate, they (at the moment) immediately place data on the registry that should cause a subscription to fire. Unfortunately, the system will just hang if that subscription doesn't get processed. So, I have left the subscription system alone - any callback function that modifies the registry in a fashion that will fire a subscription will indeed fire that subscription. We'll have to see if this causes problems - it shouldn't, but a careless user could lock things up if the callback generates a callback to itself. Also fixed the code that placed a process' RML contact info on the registry to eliminate the leading '/' from the string. This commit was SVN r6684.
137 строки
3.8 KiB
C
137 строки
3.8 KiB
C
/*
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
|
* All rights reserved.
|
|
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
|
* All rights reserved.
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
* University of Stuttgart. All rights reserved.
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
* All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
|
|
#include "orte_config.h"
|
|
|
|
#include "include/orte_constants.h"
|
|
|
|
#include "dps/dps.h"
|
|
#include "mca/mca.h"
|
|
#include "mca/base/base.h"
|
|
#include "mca/base/mca_base_param.h"
|
|
#include "mca/errmgr/errmgr.h"
|
|
#include "opal/util/output.h"
|
|
#include "util/proc_info.h"
|
|
#include "mca/oob/base/base.h"
|
|
|
|
#include "mca/soh/base/base.h"
|
|
|
|
#include "stdio.h" /* just for gef debug */
|
|
|
|
|
|
/*
|
|
* The following file was created by configure. It contains extern
|
|
* statements and the definition of an array of pointers to each
|
|
* component's public mca_base_component_t struct.
|
|
*/
|
|
|
|
#include "orte/mca/soh/base/static-components.h"
|
|
|
|
/*
|
|
* globals
|
|
*/
|
|
|
|
/*
|
|
* Global variables
|
|
*/
|
|
orte_soh_base_t orte_soh_base;
|
|
|
|
orte_soh_base_module_t orte_soh = {
|
|
|
|
orte_soh_base_get_proc_soh,
|
|
orte_soh_base_set_proc_soh,
|
|
orte_soh_base_get_node_soh_not_available,
|
|
orte_soh_base_set_node_soh_not_available,
|
|
orte_soh_base_get_job_soh,
|
|
orte_soh_base_set_job_soh,
|
|
orte_soh_base_begin_monitoring_not_available,
|
|
orte_soh_base_module_finalize_not_available
|
|
};
|
|
|
|
/**
|
|
* Function for finding and opening either all MCA components, or the one
|
|
* that was specifically requested via a MCA parameter.
|
|
*/
|
|
int orte_soh_base_open(void)
|
|
{
|
|
|
|
int param, value, rc;
|
|
orte_data_type_t tmp;
|
|
|
|
/* fprintf(stderr,"orte_soh_base_open:enter\n"); */
|
|
|
|
/* setup output for debug messages */
|
|
|
|
orte_soh_base.soh_output = opal_output_open(NULL);
|
|
param = mca_base_param_register_int("soh", "base", "verbose", NULL, 0);
|
|
mca_base_param_lookup_int(param, &value);
|
|
if (value != 0) {
|
|
orte_soh_base.soh_output = opal_output_open(NULL);
|
|
} else {
|
|
orte_soh_base.soh_output = -1;
|
|
}
|
|
|
|
|
|
/* register the base system types with the DPS */
|
|
tmp = ORTE_NODE_STATE;
|
|
if (ORTE_SUCCESS != (rc = orte_dps.register_type(orte_soh_base_pack_node_state,
|
|
orte_soh_base_unpack_node_state,
|
|
"ORTE_NODE_STATE", &tmp))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
return rc;
|
|
}
|
|
|
|
tmp = ORTE_PROC_STATE;
|
|
if (ORTE_SUCCESS != (rc = orte_dps.register_type(orte_soh_base_pack_proc_state,
|
|
orte_soh_base_unpack_proc_state,
|
|
"ORTE_PROC_STATE", &tmp))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
return rc;
|
|
}
|
|
|
|
tmp = ORTE_JOB_STATE;
|
|
if (ORTE_SUCCESS != (rc = orte_dps.register_type(orte_soh_base_pack_job_state,
|
|
orte_soh_base_unpack_job_state,
|
|
"ORTE_JOB_STATE", &tmp))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
return rc;
|
|
}
|
|
|
|
tmp = ORTE_EXIT_CODE;
|
|
if (ORTE_SUCCESS != (rc = orte_dps.register_type(orte_soh_base_pack_exit_code,
|
|
orte_soh_base_unpack_exit_code,
|
|
"ORTE_EXIT_CODE", &tmp))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
return rc;
|
|
}
|
|
|
|
/* Open up all available components */
|
|
|
|
if (OMPI_SUCCESS !=
|
|
mca_base_components_open("soh", 0, mca_soh_base_static_components,
|
|
&orte_soh_base.soh_components, true)) {
|
|
|
|
/* fprintf(stderr,"orte_soh_base_open:failed\n"); */
|
|
return OMPI_ERROR;
|
|
}
|
|
|
|
/* All done */
|
|
/* fprintf(stderr,"orte_soh_base_open:success\n"); */
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|