1
1
* use the global flags for linux and apple being found instead of re-doing the case statements

* update select procedure to ignore components that measure the same thing (e.g., resusage and sigar), taking the higher priority module

cmr=v1.7.5:reviewer=jsquyres:subject=Cleanup the sensor code

This commit was SVN r30368.
Этот коммит содержится в:
Ralph Castain 2014-01-22 21:01:09 +00:00
родитель 7ba8bd81fa
Коммит de07a64599
12 изменённых файлов: 101 добавлений и 68 удалений

Просмотреть файл

@ -1,6 +1,7 @@
/*
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
*
* $COPYRIGHT$
*
@ -117,7 +118,7 @@ void orte_sensor_base_sample(int fd, short args, void *cbdata)
opal_output_verbose(5, orte_sensor_base_framework.framework_output,
"%s sensor:base: sampling component %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
i_module->component->mca_component_name);
i_module->component->base_version.mca_component_name);
i_module->module->sample();
}
}
@ -147,7 +148,7 @@ void orte_sensor_base_log(char *comp, opal_buffer_t *data)
if (NULL == (i_module = (orte_sensor_active_module_t*)opal_pointer_array_get_item(&orte_sensor_base.modules, i))) {
continue;
}
if (0 == strcmp(comp, i_module->component->mca_component_name)) {
if (0 == strcmp(comp, i_module->component->base_version.mca_component_name)) {
if (NULL != i_module->module->log) {
i_module->module->log(data);
}

Просмотреть файл

@ -41,7 +41,7 @@ static bool selected = false;
int orte_sensor_base_select(void)
{
mca_base_component_list_item_t *cli = NULL;
mca_base_component_t *component = NULL;
orte_sensor_base_component_t *component = NULL;
mca_base_module_t *module = NULL;
orte_sensor_active_module_t *i_module;
int priority = 0, i, j, low_i;
@ -49,6 +49,7 @@ int orte_sensor_base_select(void)
bool none_found;
orte_sensor_active_module_t *tmp_module = NULL, *tmp_module_sw = NULL;
orte_job_t *jdata;
bool duplicate;
if (selected) {
return ORTE_SUCCESS;
@ -66,15 +67,15 @@ int orte_sensor_base_select(void)
*/
none_found = true;
OPAL_LIST_FOREACH(cli, &orte_sensor_base_framework.framework_components, mca_base_component_list_item_t) {
component = (mca_base_component_t *) cli->cli_component;
component = (orte_sensor_base_component_t *) cli->cli_component;
/*
* If there is a query function then use it.
*/
if (NULL == component->mca_query_component) {
if (NULL == component->base_version.mca_query_component) {
opal_output_verbose(5, orte_sensor_base_framework.framework_output,
"sensor:base:select Skipping component [%s]. It does not implement a query function",
component->mca_component_name );
component->base_version.mca_component_name );
continue;
}
@ -83,9 +84,9 @@ int orte_sensor_base_select(void)
*/
opal_output_verbose(5, orte_sensor_base_framework.framework_output,
"sensor:base:select Querying component [%s]",
component->mca_component_name);
component->base_version.mca_component_name);
component->mca_query_component(&module, &priority);
component->base_version.mca_query_component(&module, &priority);
/*
* If no module was returned or negative priority, then skip component
@ -93,7 +94,40 @@ int orte_sensor_base_select(void)
if (NULL == module || priority < 0) {
opal_output_verbose(5, orte_sensor_base_framework.framework_output,
"sensor:base:select Skipping component [%s]. Query failed to return a module",
component->mca_component_name );
component->base_version.mca_component_name );
continue;
}
/* check to see if we already have someone who senses the
* same things - if so, take the higher priority one
*/
duplicate = false;
for (i=0; i < tmp_array.size; i++) {
tmp_module = (orte_sensor_active_module_t*)opal_pointer_array_get_item(&tmp_array, i);
if (NULL == tmp_module) {
continue;
}
if (0 == strcmp(component->data_measured, tmp_module->component->data_measured)) {
if (tmp_module->priority < priority) {
opal_output_verbose(5, orte_sensor_base_framework.framework_output,
"sensor:base:select Replacing component %s with %s - both measure %s",
tmp_module->component->base_version.mca_component_name,
component->base_version.mca_component_name,
component->data_measured);
OBJ_RELEASE(tmp_module);
opal_pointer_array_set_item(&tmp_array, i, NULL);
break;
} else {
duplicate = true;
}
}
}
if (duplicate) {
/* ignore this component */
opal_output_verbose(5, orte_sensor_base_framework.framework_output,
"sensor:base:select Ignoring component %s - duplicate with higher priority measures %s",
component->base_version.mca_component_name,
component->data_measured);
continue;
}
@ -102,7 +136,7 @@ int orte_sensor_base_select(void)
*/
opal_output_verbose(5, orte_sensor_base_framework.framework_output,
"sensor:base:select Query of component [%s] set priority to %d",
component->mca_component_name, priority);
component->base_version.mca_component_name, priority);
tmp_module = OBJ_NEW(orte_sensor_active_module_t);
tmp_module->component = component;
tmp_module->module = (orte_sensor_base_module_t*)module;
@ -169,7 +203,7 @@ int orte_sensor_base_select(void)
}
opal_output_verbose(5, orte_sensor_base_framework.framework_output,
"sensor:base:select Add module with priority [%s] %d",
tmp_module->component->mca_component_name, tmp_module->priority);
tmp_module->component->base_version.mca_component_name, tmp_module->priority);
opal_pointer_array_add(&orte_sensor_base.modules, tmp_module);
}
OBJ_DESTRUCT(&tmp_array);

Просмотреть файл

@ -1,6 +1,7 @@
/*
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
*
* $COPYRIGHT$
*
@ -50,7 +51,7 @@ typedef struct {
typedef struct {
opal_object_t super;
mca_base_component_t *component;
orte_sensor_base_component_t *component;
orte_sensor_base_module_t *module;
int priority;
} orte_sensor_active_module_t;

Просмотреть файл

@ -20,25 +20,20 @@ AC_DEFUN([MCA_orte_sensor_coretemp_CONFIG], [
# do not build if support not requested
AS_IF([test "$with_coretemp" != "no"],
[case "${host}" in
i?86-*linux*|x86_64*linux*|ia64-*linux*|powerpc-*linux*|powerpc64-*linux*|sparc*-*linux*)
AS_IF([test -r "/sys/bus/platform/devices/coretemp.0"],
[sensor_coretemp_happy=yes],
[AC_MSG_WARN([Core temperature sensing was requested but the required directory])
AC_MSG_WARN([was not found. This usually indicates that the \"coretemp\"])
AC_MSG_WARN([kernel module is not installed. Please install the module])
AC_MSG_WARN([and try again, or remove the core temperature sensing request.])
sensor_coretemp_happy=no])
;;
*)
AC_MSG_WARN([Core temperature sensing was requested but is only supported on Linux systems])
sensor_coretemp_happy=no
;;
esac
AS_IF([test "$sensor_coretemp_happy" = "yes"],
[$1],
[AC_MSG_ERROR([Cannot continue])
$2])
],
[$2])
[AS_IF([test "$opal_found_linux" = "yes"],
[AS_IF([test -r "/sys/bus/platform/devices/coretemp.0"],
[sensor_coretemp_happy=yes],
[AC_MSG_WARN([Core temperature sensing was requested but the required directory])
AC_MSG_WARN([was not found. This usually indicates that the \"coretemp\"])
AC_MSG_WARN([kernel module is not installed. Please install the module])
AC_MSG_WARN([and try again, or remove the core temperature sensing request.])
sensor_coretemp_happy=no])],
[AC_MSG_WARN([Core temperature sensing was requested but is only supported on Linux systems])
sensor_coretemp_happy=no])
AS_IF([test "$sensor_coretemp_happy" = "yes"],
[$1],
[AC_MSG_ERROR([Cannot continue])
$2])
],
[$2])
])dnl

Просмотреть файл

@ -40,7 +40,8 @@ orte_sensor_coretemp_component_t mca_sensor_coretemp_component = {
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
}
},
"coretemp" // data being sensed
}
};

Просмотреть файл

@ -44,7 +44,8 @@ orte_sensor_file_component_t mca_sensor_file_component = {
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
}
},
"filemods" // data being sensed
}
};

Просмотреть файл

@ -45,7 +45,8 @@ orte_sensor_ft_tester_component_t mca_sensor_ft_tester_component = {
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
}
},
NULL
}
};

Просмотреть файл

@ -43,7 +43,8 @@ orte_sensor_base_component_t mca_sensor_heartbeat_component = {
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
}
},
"heartbeat"
};

Просмотреть файл

@ -44,7 +44,8 @@ orte_sensor_resusage_component_t mca_sensor_resusage_component = {
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
}
},
"procresource,noderesource"
}
};

Просмотреть файл

@ -1,6 +1,7 @@
/*
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
*
* $COPYRIGHT$
*
@ -81,6 +82,7 @@ typedef orte_sensor_base_module_1_0_0_t orte_sensor_base_module_t;
struct orte_sensor_base_component_1_0_0_t {
mca_base_component_t base_version;
mca_base_component_data_t base_data;
char *data_measured;
};
typedef struct orte_sensor_base_component_1_0_0_t orte_sensor_base_component_1_0_0_t;
typedef orte_sensor_base_component_1_0_0_t orte_sensor_base_component_t;

Просмотреть файл

@ -20,38 +20,32 @@ AC_DEFUN([MCA_orte_sensor_sigar_CONFIG], [
# do not build if support not requested
AS_IF([test "$with_sigar" != "no"],
[case "${host}" in
i?86-*linux*|x86_64*linux*|ia64-*linux*|powerpc-*linux*|powerpc64-*linux*|sparc*-*linux*)
AS_IF([test -r "/proc/cpuinfo"],
[sensor_linux_happy="yes"],
[sensor_linux_happy="no"])
;;
*)
sensor_linux_happy="no"
;;
esac
[AS_IF([test "$opal_found_linux" = "yes" || test "$opal_found_apple" = "yes"],
[AS_IF([test "$opal_found_apple" = "yes"],
[libname="sigar-universal-macosx"], [libname="sigar"])
AS_IF([test "$sensor_linux_happy" = "yes"],
[libname="sigar"], [libname="sigar-universal-macosx"])
AS_IF([test ! -z "$with_sigar" -a "$with_sigar" != "yes"],
[orte_check_sigar_dir="$with_sigar"])
AS_IF([test ! -z "$with_sigar" -a "$with_sigar" != "yes"],
[orte_check_sigar_dir="$with_sigar"])
OMPI_CHECK_PACKAGE([sensor_sigar],
[sigar.h],
[$libname],
[sigar_proc_cpu_get],
[],
[$orte_check_sigar_dir],
[],
[$1],
[AC_MSG_WARN([SIGAR SENSOR SUPPORT REQUESTED])
AC_MSG_WARN([BUT REQUIRED LIBRARY OR HEADER NOT FOUND])
AC_MSG_ERROR([CANNOT CONTINUE])
$2])],
OMPI_CHECK_PACKAGE([sensor_sigar],
[sigar.h],
[$libname],
[sigar_proc_cpu_get],
[],
[$orte_check_sigar_dir],
[],
[$1],
[AC_MSG_WARN([SIGAR SENSOR SUPPORT REQUESTED])
AC_MSG_WARN([BUT REQUIRED LIBRARY OR HEADER NOT FOUND])
AC_MSG_ERROR([CANNOT CONTINUE])
$2])],
[AC_MSG_WARN([SIGAR SENSOR SUPPORT REQUESTED])
AC_MSG_WARN([BUT ONLY SUPPORTED ON LINUX AND MAC])
AC_MSG_ERROR([CANNOT CONTINUE])
$2])],
[$2])
AC_DEFINE_UNQUOTED(ORTE_SIGAR_LINUX, [test "$sensor_linux_happy" = "yes"],
AC_DEFINE_UNQUOTED(ORTE_SIGAR_LINUX, [test "$opal_found_linux" = "yes"],
[Which name to use for the sigar library on this OS])
AC_SUBST(sensor_sigar_CPPFLAGS)
AC_SUBST(sensor_sigar_LDFLAGS)

Просмотреть файл

@ -40,7 +40,8 @@ orte_sensor_sigar_component_t mca_sensor_sigar_component = {
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
}
},
"procresource,noderesource"
}
};
@ -58,7 +59,7 @@ static int orte_sensor_sigar_query(mca_base_module_t **module, int *priority)
* even if we aren't going to sample as we have to be
* present in order to log any received results
*/
*priority = 50; /* ahead of heartbeat */
*priority = 150; /* ahead of heartbeat and resusage */
*module = (mca_base_module_t *)&orte_sensor_sigar_module;
return ORTE_SUCCESS;
}