Cleanup the sensor code:
* use the global flags for linux and apple being found instead of re-doing the case statements * update select procedure to ignore components that measure the same thing (e.g., resusage and sigar), taking the higher priority module cmr=v1.7.5:reviewer=jsquyres:subject=Cleanup the sensor code This commit was SVN r30368.
Этот коммит содержится в:
родитель
7ba8bd81fa
Коммит
de07a64599
@ -1,6 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved.
|
* Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved.
|
||||||
|
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||||
*
|
*
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
@ -117,7 +118,7 @@ void orte_sensor_base_sample(int fd, short args, void *cbdata)
|
|||||||
opal_output_verbose(5, orte_sensor_base_framework.framework_output,
|
opal_output_verbose(5, orte_sensor_base_framework.framework_output,
|
||||||
"%s sensor:base: sampling component %s",
|
"%s sensor:base: sampling component %s",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
i_module->component->mca_component_name);
|
i_module->component->base_version.mca_component_name);
|
||||||
i_module->module->sample();
|
i_module->module->sample();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -147,7 +148,7 @@ void orte_sensor_base_log(char *comp, opal_buffer_t *data)
|
|||||||
if (NULL == (i_module = (orte_sensor_active_module_t*)opal_pointer_array_get_item(&orte_sensor_base.modules, i))) {
|
if (NULL == (i_module = (orte_sensor_active_module_t*)opal_pointer_array_get_item(&orte_sensor_base.modules, i))) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (0 == strcmp(comp, i_module->component->mca_component_name)) {
|
if (0 == strcmp(comp, i_module->component->base_version.mca_component_name)) {
|
||||||
if (NULL != i_module->module->log) {
|
if (NULL != i_module->module->log) {
|
||||||
i_module->module->log(data);
|
i_module->module->log(data);
|
||||||
}
|
}
|
||||||
|
@ -41,7 +41,7 @@ static bool selected = false;
|
|||||||
int orte_sensor_base_select(void)
|
int orte_sensor_base_select(void)
|
||||||
{
|
{
|
||||||
mca_base_component_list_item_t *cli = NULL;
|
mca_base_component_list_item_t *cli = NULL;
|
||||||
mca_base_component_t *component = NULL;
|
orte_sensor_base_component_t *component = NULL;
|
||||||
mca_base_module_t *module = NULL;
|
mca_base_module_t *module = NULL;
|
||||||
orte_sensor_active_module_t *i_module;
|
orte_sensor_active_module_t *i_module;
|
||||||
int priority = 0, i, j, low_i;
|
int priority = 0, i, j, low_i;
|
||||||
@ -49,6 +49,7 @@ int orte_sensor_base_select(void)
|
|||||||
bool none_found;
|
bool none_found;
|
||||||
orte_sensor_active_module_t *tmp_module = NULL, *tmp_module_sw = NULL;
|
orte_sensor_active_module_t *tmp_module = NULL, *tmp_module_sw = NULL;
|
||||||
orte_job_t *jdata;
|
orte_job_t *jdata;
|
||||||
|
bool duplicate;
|
||||||
|
|
||||||
if (selected) {
|
if (selected) {
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
@ -66,15 +67,15 @@ int orte_sensor_base_select(void)
|
|||||||
*/
|
*/
|
||||||
none_found = true;
|
none_found = true;
|
||||||
OPAL_LIST_FOREACH(cli, &orte_sensor_base_framework.framework_components, mca_base_component_list_item_t) {
|
OPAL_LIST_FOREACH(cli, &orte_sensor_base_framework.framework_components, mca_base_component_list_item_t) {
|
||||||
component = (mca_base_component_t *) cli->cli_component;
|
component = (orte_sensor_base_component_t *) cli->cli_component;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If there is a query function then use it.
|
* If there is a query function then use it.
|
||||||
*/
|
*/
|
||||||
if (NULL == component->mca_query_component) {
|
if (NULL == component->base_version.mca_query_component) {
|
||||||
opal_output_verbose(5, orte_sensor_base_framework.framework_output,
|
opal_output_verbose(5, orte_sensor_base_framework.framework_output,
|
||||||
"sensor:base:select Skipping component [%s]. It does not implement a query function",
|
"sensor:base:select Skipping component [%s]. It does not implement a query function",
|
||||||
component->mca_component_name );
|
component->base_version.mca_component_name );
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -83,9 +84,9 @@ int orte_sensor_base_select(void)
|
|||||||
*/
|
*/
|
||||||
opal_output_verbose(5, orte_sensor_base_framework.framework_output,
|
opal_output_verbose(5, orte_sensor_base_framework.framework_output,
|
||||||
"sensor:base:select Querying component [%s]",
|
"sensor:base:select Querying component [%s]",
|
||||||
component->mca_component_name);
|
component->base_version.mca_component_name);
|
||||||
|
|
||||||
component->mca_query_component(&module, &priority);
|
component->base_version.mca_query_component(&module, &priority);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If no module was returned or negative priority, then skip component
|
* If no module was returned or negative priority, then skip component
|
||||||
@ -93,7 +94,40 @@ int orte_sensor_base_select(void)
|
|||||||
if (NULL == module || priority < 0) {
|
if (NULL == module || priority < 0) {
|
||||||
opal_output_verbose(5, orte_sensor_base_framework.framework_output,
|
opal_output_verbose(5, orte_sensor_base_framework.framework_output,
|
||||||
"sensor:base:select Skipping component [%s]. Query failed to return a module",
|
"sensor:base:select Skipping component [%s]. Query failed to return a module",
|
||||||
component->mca_component_name );
|
component->base_version.mca_component_name );
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* check to see if we already have someone who senses the
|
||||||
|
* same things - if so, take the higher priority one
|
||||||
|
*/
|
||||||
|
duplicate = false;
|
||||||
|
for (i=0; i < tmp_array.size; i++) {
|
||||||
|
tmp_module = (orte_sensor_active_module_t*)opal_pointer_array_get_item(&tmp_array, i);
|
||||||
|
if (NULL == tmp_module) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (0 == strcmp(component->data_measured, tmp_module->component->data_measured)) {
|
||||||
|
if (tmp_module->priority < priority) {
|
||||||
|
opal_output_verbose(5, orte_sensor_base_framework.framework_output,
|
||||||
|
"sensor:base:select Replacing component %s with %s - both measure %s",
|
||||||
|
tmp_module->component->base_version.mca_component_name,
|
||||||
|
component->base_version.mca_component_name,
|
||||||
|
component->data_measured);
|
||||||
|
OBJ_RELEASE(tmp_module);
|
||||||
|
opal_pointer_array_set_item(&tmp_array, i, NULL);
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
duplicate = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (duplicate) {
|
||||||
|
/* ignore this component */
|
||||||
|
opal_output_verbose(5, orte_sensor_base_framework.framework_output,
|
||||||
|
"sensor:base:select Ignoring component %s - duplicate with higher priority measures %s",
|
||||||
|
component->base_version.mca_component_name,
|
||||||
|
component->data_measured);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -102,7 +136,7 @@ int orte_sensor_base_select(void)
|
|||||||
*/
|
*/
|
||||||
opal_output_verbose(5, orte_sensor_base_framework.framework_output,
|
opal_output_verbose(5, orte_sensor_base_framework.framework_output,
|
||||||
"sensor:base:select Query of component [%s] set priority to %d",
|
"sensor:base:select Query of component [%s] set priority to %d",
|
||||||
component->mca_component_name, priority);
|
component->base_version.mca_component_name, priority);
|
||||||
tmp_module = OBJ_NEW(orte_sensor_active_module_t);
|
tmp_module = OBJ_NEW(orte_sensor_active_module_t);
|
||||||
tmp_module->component = component;
|
tmp_module->component = component;
|
||||||
tmp_module->module = (orte_sensor_base_module_t*)module;
|
tmp_module->module = (orte_sensor_base_module_t*)module;
|
||||||
@ -169,7 +203,7 @@ int orte_sensor_base_select(void)
|
|||||||
}
|
}
|
||||||
opal_output_verbose(5, orte_sensor_base_framework.framework_output,
|
opal_output_verbose(5, orte_sensor_base_framework.framework_output,
|
||||||
"sensor:base:select Add module with priority [%s] %d",
|
"sensor:base:select Add module with priority [%s] %d",
|
||||||
tmp_module->component->mca_component_name, tmp_module->priority);
|
tmp_module->component->base_version.mca_component_name, tmp_module->priority);
|
||||||
opal_pointer_array_add(&orte_sensor_base.modules, tmp_module);
|
opal_pointer_array_add(&orte_sensor_base.modules, tmp_module);
|
||||||
}
|
}
|
||||||
OBJ_DESTRUCT(&tmp_array);
|
OBJ_DESTRUCT(&tmp_array);
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved.
|
* Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved.
|
||||||
|
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||||
*
|
*
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
@ -50,7 +51,7 @@ typedef struct {
|
|||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
opal_object_t super;
|
opal_object_t super;
|
||||||
mca_base_component_t *component;
|
orte_sensor_base_component_t *component;
|
||||||
orte_sensor_base_module_t *module;
|
orte_sensor_base_module_t *module;
|
||||||
int priority;
|
int priority;
|
||||||
} orte_sensor_active_module_t;
|
} orte_sensor_active_module_t;
|
||||||
|
@ -20,25 +20,20 @@ AC_DEFUN([MCA_orte_sensor_coretemp_CONFIG], [
|
|||||||
|
|
||||||
# do not build if support not requested
|
# do not build if support not requested
|
||||||
AS_IF([test "$with_coretemp" != "no"],
|
AS_IF([test "$with_coretemp" != "no"],
|
||||||
[case "${host}" in
|
[AS_IF([test "$opal_found_linux" = "yes"],
|
||||||
i?86-*linux*|x86_64*linux*|ia64-*linux*|powerpc-*linux*|powerpc64-*linux*|sparc*-*linux*)
|
[AS_IF([test -r "/sys/bus/platform/devices/coretemp.0"],
|
||||||
AS_IF([test -r "/sys/bus/platform/devices/coretemp.0"],
|
[sensor_coretemp_happy=yes],
|
||||||
[sensor_coretemp_happy=yes],
|
[AC_MSG_WARN([Core temperature sensing was requested but the required directory])
|
||||||
[AC_MSG_WARN([Core temperature sensing was requested but the required directory])
|
AC_MSG_WARN([was not found. This usually indicates that the \"coretemp\"])
|
||||||
AC_MSG_WARN([was not found. This usually indicates that the \"coretemp\"])
|
AC_MSG_WARN([kernel module is not installed. Please install the module])
|
||||||
AC_MSG_WARN([kernel module is not installed. Please install the module])
|
AC_MSG_WARN([and try again, or remove the core temperature sensing request.])
|
||||||
AC_MSG_WARN([and try again, or remove the core temperature sensing request.])
|
sensor_coretemp_happy=no])],
|
||||||
sensor_coretemp_happy=no])
|
[AC_MSG_WARN([Core temperature sensing was requested but is only supported on Linux systems])
|
||||||
;;
|
sensor_coretemp_happy=no])
|
||||||
*)
|
AS_IF([test "$sensor_coretemp_happy" = "yes"],
|
||||||
AC_MSG_WARN([Core temperature sensing was requested but is only supported on Linux systems])
|
[$1],
|
||||||
sensor_coretemp_happy=no
|
[AC_MSG_ERROR([Cannot continue])
|
||||||
;;
|
$2])
|
||||||
esac
|
],
|
||||||
AS_IF([test "$sensor_coretemp_happy" = "yes"],
|
[$2])
|
||||||
[$1],
|
|
||||||
[AC_MSG_ERROR([Cannot continue])
|
|
||||||
$2])
|
|
||||||
],
|
|
||||||
[$2])
|
|
||||||
])dnl
|
])dnl
|
||||||
|
@ -40,7 +40,8 @@ orte_sensor_coretemp_component_t mca_sensor_coretemp_component = {
|
|||||||
{
|
{
|
||||||
/* The component is checkpoint ready */
|
/* The component is checkpoint ready */
|
||||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||||
}
|
},
|
||||||
|
"coretemp" // data being sensed
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -44,7 +44,8 @@ orte_sensor_file_component_t mca_sensor_file_component = {
|
|||||||
{
|
{
|
||||||
/* The component is checkpoint ready */
|
/* The component is checkpoint ready */
|
||||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||||
}
|
},
|
||||||
|
"filemods" // data being sensed
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -45,7 +45,8 @@ orte_sensor_ft_tester_component_t mca_sensor_ft_tester_component = {
|
|||||||
{
|
{
|
||||||
/* The component is checkpoint ready */
|
/* The component is checkpoint ready */
|
||||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||||
}
|
},
|
||||||
|
NULL
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -43,7 +43,8 @@ orte_sensor_base_component_t mca_sensor_heartbeat_component = {
|
|||||||
{
|
{
|
||||||
/* The component is checkpoint ready */
|
/* The component is checkpoint ready */
|
||||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||||
}
|
},
|
||||||
|
"heartbeat"
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -44,7 +44,8 @@ orte_sensor_resusage_component_t mca_sensor_resusage_component = {
|
|||||||
{
|
{
|
||||||
/* The component is checkpoint ready */
|
/* The component is checkpoint ready */
|
||||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||||
}
|
},
|
||||||
|
"procresource,noderesource"
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved.
|
* Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved.
|
||||||
|
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||||
*
|
*
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
@ -81,6 +82,7 @@ typedef orte_sensor_base_module_1_0_0_t orte_sensor_base_module_t;
|
|||||||
struct orte_sensor_base_component_1_0_0_t {
|
struct orte_sensor_base_component_1_0_0_t {
|
||||||
mca_base_component_t base_version;
|
mca_base_component_t base_version;
|
||||||
mca_base_component_data_t base_data;
|
mca_base_component_data_t base_data;
|
||||||
|
char *data_measured;
|
||||||
};
|
};
|
||||||
typedef struct orte_sensor_base_component_1_0_0_t orte_sensor_base_component_1_0_0_t;
|
typedef struct orte_sensor_base_component_1_0_0_t orte_sensor_base_component_1_0_0_t;
|
||||||
typedef orte_sensor_base_component_1_0_0_t orte_sensor_base_component_t;
|
typedef orte_sensor_base_component_1_0_0_t orte_sensor_base_component_t;
|
||||||
|
@ -20,38 +20,32 @@ AC_DEFUN([MCA_orte_sensor_sigar_CONFIG], [
|
|||||||
|
|
||||||
# do not build if support not requested
|
# do not build if support not requested
|
||||||
AS_IF([test "$with_sigar" != "no"],
|
AS_IF([test "$with_sigar" != "no"],
|
||||||
[case "${host}" in
|
[AS_IF([test "$opal_found_linux" = "yes" || test "$opal_found_apple" = "yes"],
|
||||||
i?86-*linux*|x86_64*linux*|ia64-*linux*|powerpc-*linux*|powerpc64-*linux*|sparc*-*linux*)
|
[AS_IF([test "$opal_found_apple" = "yes"],
|
||||||
AS_IF([test -r "/proc/cpuinfo"],
|
[libname="sigar-universal-macosx"], [libname="sigar"])
|
||||||
[sensor_linux_happy="yes"],
|
|
||||||
[sensor_linux_happy="no"])
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
sensor_linux_happy="no"
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
AS_IF([test "$sensor_linux_happy" = "yes"],
|
AS_IF([test ! -z "$with_sigar" -a "$with_sigar" != "yes"],
|
||||||
[libname="sigar"], [libname="sigar-universal-macosx"])
|
[orte_check_sigar_dir="$with_sigar"])
|
||||||
|
|
||||||
AS_IF([test ! -z "$with_sigar" -a "$with_sigar" != "yes"],
|
OMPI_CHECK_PACKAGE([sensor_sigar],
|
||||||
[orte_check_sigar_dir="$with_sigar"])
|
[sigar.h],
|
||||||
|
[$libname],
|
||||||
OMPI_CHECK_PACKAGE([sensor_sigar],
|
[sigar_proc_cpu_get],
|
||||||
[sigar.h],
|
[],
|
||||||
[$libname],
|
[$orte_check_sigar_dir],
|
||||||
[sigar_proc_cpu_get],
|
[],
|
||||||
[],
|
[$1],
|
||||||
[$orte_check_sigar_dir],
|
[AC_MSG_WARN([SIGAR SENSOR SUPPORT REQUESTED])
|
||||||
[],
|
AC_MSG_WARN([BUT REQUIRED LIBRARY OR HEADER NOT FOUND])
|
||||||
[$1],
|
AC_MSG_ERROR([CANNOT CONTINUE])
|
||||||
[AC_MSG_WARN([SIGAR SENSOR SUPPORT REQUESTED])
|
$2])],
|
||||||
AC_MSG_WARN([BUT REQUIRED LIBRARY OR HEADER NOT FOUND])
|
[AC_MSG_WARN([SIGAR SENSOR SUPPORT REQUESTED])
|
||||||
AC_MSG_ERROR([CANNOT CONTINUE])
|
AC_MSG_WARN([BUT ONLY SUPPORTED ON LINUX AND MAC])
|
||||||
$2])],
|
AC_MSG_ERROR([CANNOT CONTINUE])
|
||||||
|
$2])],
|
||||||
[$2])
|
[$2])
|
||||||
|
|
||||||
AC_DEFINE_UNQUOTED(ORTE_SIGAR_LINUX, [test "$sensor_linux_happy" = "yes"],
|
AC_DEFINE_UNQUOTED(ORTE_SIGAR_LINUX, [test "$opal_found_linux" = "yes"],
|
||||||
[Which name to use for the sigar library on this OS])
|
[Which name to use for the sigar library on this OS])
|
||||||
AC_SUBST(sensor_sigar_CPPFLAGS)
|
AC_SUBST(sensor_sigar_CPPFLAGS)
|
||||||
AC_SUBST(sensor_sigar_LDFLAGS)
|
AC_SUBST(sensor_sigar_LDFLAGS)
|
||||||
|
@ -40,7 +40,8 @@ orte_sensor_sigar_component_t mca_sensor_sigar_component = {
|
|||||||
{
|
{
|
||||||
/* The component is checkpoint ready */
|
/* The component is checkpoint ready */
|
||||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||||
}
|
},
|
||||||
|
"procresource,noderesource"
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -58,7 +59,7 @@ static int orte_sensor_sigar_query(mca_base_module_t **module, int *priority)
|
|||||||
* even if we aren't going to sample as we have to be
|
* even if we aren't going to sample as we have to be
|
||||||
* present in order to log any received results
|
* present in order to log any received results
|
||||||
*/
|
*/
|
||||||
*priority = 50; /* ahead of heartbeat */
|
*priority = 150; /* ahead of heartbeat and resusage */
|
||||||
*module = (mca_base_module_t *)&orte_sensor_sigar_module;
|
*module = (mca_base_module_t *)&orte_sensor_sigar_module;
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user