Ensure we build the sensor components even if the local system doesn't have the required directories and/or access permissions. Backend nodes that get the binary may have them, and aggregators need to load the component so they can log data even if they aren't locally monitoring. Detect that we can't access the required files when we first try to sample and turn the sampling portion of the plugin off at that time.
Refs trac:4172 This commit was SVN r30426. The following Trac tickets were found above: Ticket 4172 --> https://svn.open-mpi.org/trac/ompi/ticket/4172
Этот коммит содержится в:
родитель
967550b3ac
Коммит
11562ab7cb
@ -124,6 +124,10 @@ MCA_BASE_FRAMEWORK_DECLARE(orte, sensor, "ORTE Monitoring Sensors",
|
||||
orte_sensor_base_open, orte_sensor_base_close,
|
||||
mca_sensor_base_static_components, 0);
|
||||
|
||||
static void cons(orte_sensor_active_module_t *t)
|
||||
{
|
||||
t->sampling = true;
|
||||
}
|
||||
OBJ_CLASS_INSTANCE(orte_sensor_active_module_t,
|
||||
opal_object_t,
|
||||
NULL, NULL);
|
||||
cons, NULL);
|
||||
|
@ -219,8 +219,16 @@ int orte_sensor_base_select(void)
|
||||
}
|
||||
if( NULL != i_module->module->init ) {
|
||||
if (ORTE_SUCCESS != i_module->module->init()) {
|
||||
/* can't run after all */
|
||||
opal_pointer_array_set_item(&orte_sensor_base.modules, i, NULL);
|
||||
/* can't sample - however, if we are the HNP
|
||||
* or an aggregator, then we need this module
|
||||
* anyway so we can log incoming data
|
||||
*/
|
||||
if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_AGGREGATOR) {
|
||||
i_module->sampling = false;
|
||||
} else {
|
||||
opal_pointer_array_set_item(&orte_sensor_base.modules, i, NULL);
|
||||
OBJ_RELEASE(i_module);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -54,6 +54,7 @@ typedef struct {
|
||||
orte_sensor_base_component_t *component;
|
||||
orte_sensor_base_module_t *module;
|
||||
int priority;
|
||||
bool sampling;
|
||||
} orte_sensor_active_module_t;
|
||||
OBJ_CLASS_DECLARATION(orte_sensor_active_module_t);
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- text -*-
|
||||
#
|
||||
# Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
#
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
@ -8,13 +8,26 @@
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
# This is the US/English general help file for the memory usage sensor
|
||||
# This is the US/English general help file
|
||||
#
|
||||
[mem-limit-exceeded]
|
||||
A process has exceeded the specified limit on memory usage:
|
||||
[req-dir-not-found]
|
||||
Core temperature monitoring was requested, but this node
|
||||
lacks the required directory:
|
||||
|
||||
Node: %s
|
||||
Process rank: %s
|
||||
Memory used: %luGbytes
|
||||
Memory limit: %luGbytes
|
||||
Node: %s
|
||||
Directory: %s
|
||||
|
||||
This usually indicates that the "coretemp" kernel module
|
||||
has not been loaded. Operation will continue, but core
|
||||
temperatures will not be monitored.
|
||||
#
|
||||
[no-cores-found]
|
||||
Core temperature monitoring was requested, but this node
|
||||
does not appear to have the required core-level files, or
|
||||
you lack authority to access them:
|
||||
|
||||
Node: %s
|
||||
|
||||
This usually indicates that the "coretemp" kernel module
|
||||
has not been loaded. Operation will continue, but core
|
||||
temperatures will not be monitored.
|
||||
|
@ -35,6 +35,7 @@
|
||||
#include "opal/mca/db/db.h"
|
||||
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
|
||||
@ -120,6 +121,7 @@ static int init(void)
|
||||
coretemp_tracker_t *trk;
|
||||
int socket;
|
||||
|
||||
/* always construct this so we don't segfault in finalize */
|
||||
OBJ_CONSTRUCT(&tracking, opal_list_t);
|
||||
|
||||
/*
|
||||
@ -127,6 +129,9 @@ static int init(void)
|
||||
*/
|
||||
if (NULL == (cur_dirp = opendir("/sys/bus/platform/devices"))) {
|
||||
OBJ_DESTRUCT(&tracking);
|
||||
orte_show_help("help-orte-sensor-coretemp.txt", "req-dir-not-found",
|
||||
true, orte_process_info.nodename,
|
||||
"/sys/bus/platform/devices");
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
@ -217,6 +222,8 @@ static int init(void)
|
||||
|
||||
if (0 == opal_list_get_size(&tracking)) {
|
||||
/* nothing to read */
|
||||
orte_show_help("help-orte-sensor-coretemp.txt", "no-cores-found",
|
||||
true, orte_process_info.nodename);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
@ -245,7 +252,7 @@ static void stop(orte_jobid_t jobid)
|
||||
static void coretemp_sample(void)
|
||||
{
|
||||
int ret;
|
||||
coretemp_tracker_t *trk;
|
||||
coretemp_tracker_t *trk, *nxt;
|
||||
FILE *fp;
|
||||
char *temp;
|
||||
float degc;
|
||||
@ -256,6 +263,10 @@ static void coretemp_sample(void)
|
||||
char *timestamp_str;
|
||||
bool packed;
|
||||
|
||||
if (0 == opal_list_get_size(&tracking)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* prep to store the results */
|
||||
OBJ_CONSTRUCT(&data, opal_buffer_t);
|
||||
packed = false;
|
||||
@ -297,9 +308,18 @@ static void coretemp_sample(void)
|
||||
}
|
||||
free(timestamp_str);
|
||||
|
||||
OPAL_LIST_FOREACH(trk, &tracking, coretemp_tracker_t) {
|
||||
OPAL_LIST_FOREACH_SAFE(trk, nxt, &tracking, coretemp_tracker_t) {
|
||||
/* read the temp */
|
||||
fp = fopen(trk->file, "r");
|
||||
if (NULL == (fp = fopen(trk->file, "r"))) {
|
||||
/* we can't be read, so remove it from the list */
|
||||
opal_output_verbose(2, orte_sensor_base_framework.framework_output,
|
||||
"%s access denied to coretemp file %s - removing it",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
trk->file);
|
||||
opal_list_remove_item(&tracking, &trk->super);
|
||||
OBJ_RELEASE(trk);
|
||||
continue;
|
||||
}
|
||||
while (NULL != (temp = orte_getline(fp))) {
|
||||
degc = strtoul(temp, NULL, 10) / 100.0;
|
||||
opal_output_verbose(5, orte_sensor_base_framework.framework_output,
|
||||
|
@ -21,16 +21,9 @@ AC_DEFUN([MCA_orte_sensor_freq_CONFIG], [
|
||||
# do not build if support not requested
|
||||
AS_IF([test "$with_freq" != "no"],
|
||||
[AS_IF([test "$opal_found_linux" = "yes"],
|
||||
[AS_IF([test -r "/sys/devices/system/cpu/cpu0/cpufreq/"],
|
||||
[sensor_freq_happy=yes],
|
||||
[AC_MSG_WARN([Core frequency sensing was requested but the required directory])
|
||||
AC_MSG_WARN([was not found])
|
||||
sensor_freq_happy=no])],
|
||||
[AC_MSG_WARN([Core frequency sensing was requested but is only supported on Linux systems])
|
||||
sensor_freq_happy=no])
|
||||
AS_IF([test "$sensor_freq_happy" = "yes"],
|
||||
[$1],
|
||||
[AC_MSG_ERROR([Cannot continue])
|
||||
[AC_MSG_WARN([Core frequency sensing was requested but is only supported on Linux systems])
|
||||
AC_MSG_ERROR([Cannot continue])
|
||||
$2])
|
||||
],
|
||||
[$2])
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- text -*-
|
||||
#
|
||||
# Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
#
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
@ -10,11 +10,20 @@
|
||||
#
|
||||
# This is the US/English general help file for the memory usage sensor
|
||||
#
|
||||
[mem-limit-exceeded]
|
||||
A process has exceeded the specified limit on memory usage:
|
||||
[req-dir-not-found]
|
||||
Frequency monitoring was requested, but this node
|
||||
lacks the required directory:
|
||||
|
||||
Node: %s
|
||||
Process rank: %s
|
||||
Memory used: %luGbytes
|
||||
Memory limit: %luGbytes
|
||||
Node: %s
|
||||
Directory: %s
|
||||
|
||||
Operation will continue, but frequencies will not be monitored.
|
||||
#
|
||||
[no-cores-found]
|
||||
Frequency monitoring was requested, but this node
|
||||
does not appear to have the required core-level files, or
|
||||
you lack authority to access them:
|
||||
|
||||
Node: %s
|
||||
|
||||
Operation will continue, but frequencies will not be monitored.
|
||||
|
@ -36,6 +36,7 @@
|
||||
#include "opal/mca/db/db.h"
|
||||
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
|
||||
@ -114,6 +115,7 @@ static int init(void)
|
||||
FILE *fp;
|
||||
corefreq_tracker_t *trk;
|
||||
|
||||
/* always construct this so we don't segfault in finalize */
|
||||
OBJ_CONSTRUCT(&tracking, opal_list_t);
|
||||
|
||||
/*
|
||||
@ -121,6 +123,9 @@ static int init(void)
|
||||
*/
|
||||
if (NULL == (cur_dirp = opendir("/sys/devices/system/cpu"))) {
|
||||
OBJ_DESTRUCT(&tracking);
|
||||
orte_show_help("help-orte-sensor-freq.txt", "req-dir-not-found",
|
||||
true, orte_process_info.nodename,
|
||||
"/sys/devices/system/cpu");
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
@ -182,6 +187,8 @@ static int init(void)
|
||||
|
||||
if (0 == opal_list_get_size(&tracking)) {
|
||||
/* nothing to read */
|
||||
orte_show_help("help-orte-sensor-freq.txt", "no-cores-found",
|
||||
true, orte_process_info.nodename);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
@ -210,7 +217,7 @@ static void stop(orte_jobid_t jobid)
|
||||
static void freq_sample(void)
|
||||
{
|
||||
int ret;
|
||||
corefreq_tracker_t *trk;
|
||||
corefreq_tracker_t *trk, *nxt;
|
||||
FILE *fp;
|
||||
char *freq;
|
||||
float ghz;
|
||||
@ -221,6 +228,10 @@ static void freq_sample(void)
|
||||
char *timestamp_str;
|
||||
bool packed;
|
||||
|
||||
if (0 == opal_list_get_size(&tracking)) {
|
||||
return;
|
||||
}
|
||||
|
||||
opal_output_verbose(2, orte_sensor_base_framework.framework_output,
|
||||
"%s sampling freq",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
@ -266,13 +277,20 @@ static void freq_sample(void)
|
||||
}
|
||||
free(timestamp_str);
|
||||
|
||||
OPAL_LIST_FOREACH(trk, &tracking, corefreq_tracker_t) {
|
||||
OPAL_LIST_FOREACH_SAFE(trk, nxt, &tracking, corefreq_tracker_t) {
|
||||
opal_output_verbose(2, orte_sensor_base_framework.framework_output,
|
||||
"%s processing freq file %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
trk->file);
|
||||
/* read the temp */
|
||||
/* read the freq */
|
||||
if (NULL == (fp = fopen(trk->file, "r"))) {
|
||||
/* we can't be read, so remove it from the list */
|
||||
opal_output_verbose(2, orte_sensor_base_framework.framework_output,
|
||||
"%s access denied to freq file %s - removing it",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
trk->file);
|
||||
opal_list_remove_item(&tracking, &trk->super);
|
||||
OBJ_RELEASE(trk);
|
||||
continue;
|
||||
}
|
||||
while (NULL != (freq = orte_getline(fp))) {
|
||||
|
@ -21,17 +21,9 @@ AC_DEFUN([MCA_orte_sensor_pwr_CONFIG], [
|
||||
# do not build if support not requested
|
||||
AS_IF([test "$with_pwr" != "no"],
|
||||
[AS_IF([test "$opal_found_linux" = "yes"],
|
||||
[AS_IF([test -e "/dev/cpu/0/msr"],
|
||||
[sensor_pwr_happy=yes],
|
||||
[AC_MSG_WARN([Core power sensing was requested but the required directory])
|
||||
AC_MSG_WARN([was not found])
|
||||
sensor_pwr_happy=no])],
|
||||
[AC_MSG_WARN([Core power sensing was requested but is only supported on Intel-based Linux systems])
|
||||
sensor_pwr_happy=no])
|
||||
AS_IF([test "$sensor_pwr_happy" = "yes"],
|
||||
[$1],
|
||||
[AC_MSG_ERROR([Cannot continue])
|
||||
$2])
|
||||
],
|
||||
[AC_MSG_WARN([Core power sensing was requested but is only supported on Intel-based Linux systems])
|
||||
AC_MSG_ERROR([Cannot continue])
|
||||
$2])],
|
||||
[$2])
|
||||
])dnl
|
||||
|
@ -11,8 +11,9 @@
|
||||
#
|
||||
[no-access]
|
||||
Power sensing was requested, but you lack access authority
|
||||
to the required path:
|
||||
to the required path on this node:
|
||||
|
||||
Node: %s
|
||||
Path: %s
|
||||
|
||||
We will continue to operate, but will not monitor power.
|
||||
@ -36,3 +37,12 @@ available. This usually means that your system lacks
|
||||
the required revision level for hwloc.
|
||||
|
||||
We will continue to operate, but will not monitor power.
|
||||
#
|
||||
[no-cores-found]
|
||||
Power monitoring was requested, but this node
|
||||
does not appear to have the required core-level files,
|
||||
or you lack access authority to them:
|
||||
|
||||
Node: %s
|
||||
|
||||
We will continue to operate, but will not monitor power.
|
||||
|
@ -42,6 +42,7 @@
|
||||
#include "opal/mca/db/db.h"
|
||||
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
|
||||
@ -110,6 +111,7 @@ static int read_msr(int fd, long long *value, int offset)
|
||||
*value = (long long)data;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
static int check_cpu_type(void);
|
||||
|
||||
|
||||
static int init(void)
|
||||
@ -120,13 +122,25 @@ static int init(void)
|
||||
corepwr_tracker_t *trk;
|
||||
long long units;
|
||||
|
||||
/* always construct this so we don't segfault in finalize */
|
||||
OBJ_CONSTRUCT(&tracking, opal_list_t);
|
||||
|
||||
/* we only handle certain cpu types as we have to know the binary
|
||||
* layout of the msr file
|
||||
*/
|
||||
if (ORTE_SUCCESS != check_cpu_type()) {
|
||||
/* we provided a show help down below */
|
||||
return ORTE_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
/*
|
||||
* Open up the base directory so we can get a listing
|
||||
*/
|
||||
if (NULL == (cur_dirp = opendir("/dev/cpu"))) {
|
||||
OBJ_DESTRUCT(&tracking);
|
||||
orte_show_help("help-orte-sensor-pwr.txt", "no-access",
|
||||
true, orte_process_info.nodename,
|
||||
"/dev/cpu");
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
@ -173,6 +187,8 @@ static int init(void)
|
||||
|
||||
if (0 == opal_list_get_size(&tracking)) {
|
||||
/* nothing to read */
|
||||
orte_show_help("help-orte-sensor-pwr.txt", "no-cores-found",
|
||||
true, orte_process_info.nodename);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
@ -212,6 +228,10 @@ static void pwr_sample(void)
|
||||
char *temp;
|
||||
bool packed;
|
||||
|
||||
if (0 == opal_list_get_size(&tracking)) {
|
||||
return;
|
||||
}
|
||||
|
||||
opal_output_verbose(2, orte_sensor_base_framework.framework_output,
|
||||
"%s sampling power",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
@ -260,12 +280,20 @@ static void pwr_sample(void)
|
||||
OPAL_LIST_FOREACH_SAFE(trk, nxt, &tracking, corepwr_tracker_t) {
|
||||
if (0 >= (fd = open(trk->file, O_RDONLY))) {
|
||||
/* disable this one - cannot read the file */
|
||||
opal_output_verbose(2, orte_sensor_base_framework.framework_output,
|
||||
"%s access denied to pwr file %s - removing it",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
trk->file);
|
||||
opal_list_remove_item(&tracking, &trk->super);
|
||||
OBJ_RELEASE(trk);
|
||||
continue;
|
||||
}
|
||||
if (ORTE_SUCCESS != read_msr(fd, &value, MSR_PKG_POWER_INFO)) {
|
||||
/* disable this one - cannot read the file */
|
||||
opal_output_verbose(2, orte_sensor_base_framework.framework_output,
|
||||
"%s failed to read pwr file %s - removing it",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
trk->file);
|
||||
opal_list_remove_item(&tracking, &trk->super);
|
||||
OBJ_RELEASE(trk);
|
||||
close(fd);
|
||||
@ -381,3 +409,69 @@ static void pwr_log(opal_buffer_t *sample)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/* list of supported chipsets */
|
||||
#define CPU_SANDYBRIDGE 42
|
||||
#define CPU_SANDYBRIDGE_EP 45
|
||||
#define CPU_IVYBRIDGE 58
|
||||
#define CPU_IVYBRIDGE_EP 62
|
||||
#define CPU_HASWELL 60
|
||||
|
||||
|
||||
/* go thru our topology and check the sockets
|
||||
* to see if they contain a match - at this time,
|
||||
* we don't support hetero sockets, so any mismatch
|
||||
* will disqualify us
|
||||
*/
|
||||
static int check_cpu_type(void)
|
||||
{
|
||||
hwloc_obj_t obj;
|
||||
unsigned k;
|
||||
|
||||
if (NULL == (obj = hwloc_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_SOCKET, 0))) {
|
||||
/* there are no sockets identified in this machine */
|
||||
orte_show_help("help-orte-sensor-pwr.txt", "no-sockets", true);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
while (NULL != obj) {
|
||||
for (k=0; k < obj->infos_count; k++) {
|
||||
if (0 == strcmp(obj->infos[k].name, "model") &&
|
||||
NULL != obj->infos[k].value) {
|
||||
mca_sensor_pwr_component.model = strtoul(obj->infos[k].value, NULL, 10);
|
||||
|
||||
switch (mca_sensor_pwr_component.model) {
|
||||
case CPU_SANDYBRIDGE:
|
||||
opal_output_verbose(2, orte_sensor_base_framework.framework_output,
|
||||
"sensor:pwr Found Sandybridge CPU");
|
||||
return ORTE_SUCCESS;
|
||||
case CPU_SANDYBRIDGE_EP:
|
||||
opal_output_verbose(2, orte_sensor_base_framework.framework_output,
|
||||
"sensor:pwr Found Sandybridge-EP CPU");
|
||||
return ORTE_SUCCESS;
|
||||
case CPU_IVYBRIDGE:
|
||||
opal_output_verbose(2, orte_sensor_base_framework.framework_output,
|
||||
"sensor:pwr Found Ivybridge CPU");
|
||||
return ORTE_SUCCESS;
|
||||
case CPU_IVYBRIDGE_EP:
|
||||
opal_output_verbose(2, orte_sensor_base_framework.framework_output,
|
||||
"sensor:pwr Found Ivybridge-EP CPU");
|
||||
return ORTE_SUCCESS;
|
||||
case CPU_HASWELL:
|
||||
opal_output_verbose(2, orte_sensor_base_framework.framework_output,
|
||||
"sensor:pwr Found Haswell CPU");
|
||||
return ORTE_SUCCESS;
|
||||
default:
|
||||
orte_show_help("help-orte-sensor-pwr.txt", "unsupported-model",
|
||||
true, mca_sensor_pwr_component.model);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
obj = obj->next_sibling;
|
||||
}
|
||||
orte_show_help("help-orte-sensor-pwr.txt", "no-topo-info",
|
||||
true, mca_sensor_pwr_component.model);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
@ -26,7 +26,6 @@ static int orte_sensor_pwr_open(void);
|
||||
static int orte_sensor_pwr_close(void);
|
||||
static int orte_sensor_pwr_query(mca_base_module_t **module, int *priority);
|
||||
static int pwr_component_register(void);
|
||||
static int check_cpu_type(void);
|
||||
|
||||
orte_sensor_pwr_component_t mca_sensor_pwr_component = {
|
||||
{
|
||||
@ -60,15 +59,6 @@ static int orte_sensor_pwr_open(void)
|
||||
|
||||
static int orte_sensor_pwr_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
/* we only handle certain cpu types as we have to know the binary
|
||||
* layout of the msr file
|
||||
*/
|
||||
if (ORTE_SUCCESS != check_cpu_type()) {
|
||||
*priority = 0;
|
||||
*module = NULL;
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
*priority = 50; /* ahead of heartbeat */
|
||||
*module = (mca_base_module_t *)&orte_sensor_pwr_module;
|
||||
return ORTE_SUCCESS;
|
||||
@ -96,68 +86,3 @@ static int pwr_component_register(void)
|
||||
& mca_sensor_pwr_component.test);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* list of supported chipsets */
|
||||
#define CPU_SANDYBRIDGE 42
|
||||
#define CPU_SANDYBRIDGE_EP 45
|
||||
#define CPU_IVYBRIDGE 58
|
||||
#define CPU_IVYBRIDGE_EP 62
|
||||
#define CPU_HASWELL 60
|
||||
|
||||
|
||||
/* go thru our topology and check the sockets
|
||||
* to see if they contain a match - at this time,
|
||||
* we don't support hetero sockets, so any mismatch
|
||||
* will disqualify us
|
||||
*/
|
||||
static int check_cpu_type(void)
|
||||
{
|
||||
hwloc_obj_t obj;
|
||||
unsigned k;
|
||||
|
||||
if (NULL == (obj = hwloc_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_SOCKET, 0))) {
|
||||
/* there are no sockets identified in this machine */
|
||||
orte_show_help("help-orte-sensor-pwr.txt", "no-sockets", true);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
while (NULL != obj) {
|
||||
for (k=0; k < obj->infos_count; k++) {
|
||||
if (0 == strcmp(obj->infos[k].name, "model") &&
|
||||
NULL != obj->infos[k].value) {
|
||||
mca_sensor_pwr_component.model = strtoul(obj->infos[k].value, NULL, 10);
|
||||
|
||||
switch (mca_sensor_pwr_component.model) {
|
||||
case CPU_SANDYBRIDGE:
|
||||
opal_output_verbose(2, orte_sensor_base_framework.framework_output,
|
||||
"sensor:pwr Found Sandybridge CPU");
|
||||
return ORTE_SUCCESS;
|
||||
case CPU_SANDYBRIDGE_EP:
|
||||
opal_output_verbose(2, orte_sensor_base_framework.framework_output,
|
||||
"sensor:pwr Found Sandybridge-EP CPU");
|
||||
return ORTE_SUCCESS;
|
||||
case CPU_IVYBRIDGE:
|
||||
opal_output_verbose(2, orte_sensor_base_framework.framework_output,
|
||||
"sensor:pwr Found Ivybridge CPU");
|
||||
return ORTE_SUCCESS;
|
||||
case CPU_IVYBRIDGE_EP:
|
||||
opal_output_verbose(2, orte_sensor_base_framework.framework_output,
|
||||
"sensor:pwr Found Ivybridge-EP CPU");
|
||||
return ORTE_SUCCESS;
|
||||
case CPU_HASWELL:
|
||||
opal_output_verbose(2, orte_sensor_base_framework.framework_output,
|
||||
"sensor:pwr Found Haswell CPU");
|
||||
return ORTE_SUCCESS;
|
||||
default:
|
||||
orte_show_help("help-orte-sensor-pwr.txt", "unsupported-model",
|
||||
true, mca_sensor_pwr_component.model);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
obj = obj->next_sibling;
|
||||
}
|
||||
orte_show_help("help-orte-sensor-pwr.txt", "no-topo-info",
|
||||
true, mca_sensor_pwr_component.model);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user