Add new sensors for chip frequency and power (when permissions allow) Note that we don't support all chipsets at this time, but others are welcome to extend as desired.
cmr=v1.7.5:reviewer=rhc This commit was SVN r30399.
Этот коммит содержится в:
родитель
2435057a57
Коммит
32996cd705
37
orte/mca/sensor/freq/Makefile.am
Обычный файл
37
orte/mca/sensor/freq/Makefile.am
Обычный файл
@ -0,0 +1,37 @@
|
||||
#
|
||||
# Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
|
||||
#
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
dist_pkgdata_DATA = help-orte-sensor-freq.txt
|
||||
|
||||
sources = \
|
||||
sensor_freq.c \
|
||||
sensor_freq.h \
|
||||
sensor_freq_component.c
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if MCA_BUILD_orte_sensor_freq_DSO
|
||||
component_noinst =
|
||||
component_install = mca_sensor_freq.la
|
||||
else
|
||||
component_noinst = libmca_sensor_freq.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(ompilibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_sensor_freq_la_SOURCES = $(sources)
|
||||
mca_sensor_freq_la_LDFLAGS = -module -avoid-version
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_sensor_freq_la_SOURCES =$(sources)
|
||||
libmca_sensor_freq_la_LDFLAGS = -module -avoid-version
|
37
orte/mca/sensor/freq/configure.m4
Обычный файл
37
orte/mca/sensor/freq/configure.m4
Обычный файл
@ -0,0 +1,37 @@
|
||||
dnl -*- shell-script -*-
|
||||
dnl
|
||||
dnl Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
dnl $COPYRIGHT$
|
||||
dnl
|
||||
dnl Additional copyrights may follow
|
||||
dnl
|
||||
dnl $HEADER$
|
||||
dnl
|
||||
|
||||
# MCA_sensor_freq_CONFIG([action-if-found], [action-if-not-found])
|
||||
# -----------------------------------------------------------
|
||||
AC_DEFUN([MCA_orte_sensor_freq_CONFIG], [
|
||||
AC_CONFIG_FILES([orte/mca/sensor/freq/Makefile])
|
||||
|
||||
AC_ARG_WITH([freq],
|
||||
[AC_HELP_STRING([--with-freq],
|
||||
[Build freq support (default: no)])],
|
||||
[], with_freq=no)
|
||||
|
||||
# do not build if support not requested
|
||||
AS_IF([test "$with_freq" != "no"],
|
||||
[AS_IF([test "$opal_found_linux" = "yes"],
|
||||
[AS_IF([test -r "/sys/devices/system/cpu/cpu0/cpufreq/"],
|
||||
[sensor_freq_happy=yes],
|
||||
[AC_MSG_WARN([Core frequency sensing was requested but the required directory])
|
||||
AC_MSG_WARN([was not found])
|
||||
sensor_freq_happy=no])],
|
||||
[AC_MSG_WARN([Core frequency sensing was requested but is only supported on Linux systems])
|
||||
sensor_freq_happy=no])
|
||||
AS_IF([test "$sensor_freq_happy" = "yes"],
|
||||
[$1],
|
||||
[AC_MSG_ERROR([Cannot continue])
|
||||
$2])
|
||||
],
|
||||
[$2])
|
||||
])dnl
|
20
orte/mca/sensor/freq/help-orte-sensor-freq.txt
Обычный файл
20
orte/mca/sensor/freq/help-orte-sensor-freq.txt
Обычный файл
@ -0,0 +1,20 @@
|
||||
# -*- text -*-
|
||||
#
|
||||
# Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved.
|
||||
#
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
# This is the US/English general help file for the memory usage sensor
|
||||
#
|
||||
[mem-limit-exceeded]
|
||||
A process has exceeded the specified limit on memory usage:
|
||||
|
||||
Node: %s
|
||||
Process rank: %s
|
||||
Memory used: %luGbytes
|
||||
Memory limit: %luGbytes
|
||||
|
366
orte/mca/sensor/freq/sensor_freq.c
Обычный файл
366
orte/mca/sensor/freq/sensor_freq.c
Обычный файл
@ -0,0 +1,366 @@
|
||||
/*
|
||||
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
#include "orte/types.h"
|
||||
|
||||
#include <errno.h>
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif /* HAVE_UNISTD_H */
|
||||
#ifdef HAVE_STRING_H
|
||||
#include <string.h>
|
||||
#endif /* HAVE_STRING_H */
|
||||
#include <stdio.h>
|
||||
#ifdef HAVE_TIME_H
|
||||
#include <time.h>
|
||||
#endif
|
||||
#ifdef HAVE_DIRENT_H
|
||||
#include <dirent.h>
|
||||
#endif /* HAVE_DIRENT_H */
|
||||
#include <ctype.h>
|
||||
|
||||
#include "opal_stdint.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/dss/dss.h"
|
||||
#include "opal/util/os_path.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/os_dirpath.h"
|
||||
#include "opal/mca/db/db.h"
|
||||
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
|
||||
#include "orte/mca/sensor/base/base.h"
|
||||
#include "orte/mca/sensor/base/sensor_private.h"
|
||||
#include "sensor_freq.h"
|
||||
|
||||
/* declare the API functions */
|
||||
static int init(void);
|
||||
static void finalize(void);
|
||||
static void start(orte_jobid_t job);
|
||||
static void stop(orte_jobid_t job);
|
||||
static void freq_sample(void);
|
||||
static void freq_log(opal_buffer_t *buf);
|
||||
|
||||
/* instantiate the module */
|
||||
orte_sensor_base_module_t orte_sensor_freq_module = {
|
||||
init,
|
||||
finalize,
|
||||
start,
|
||||
stop,
|
||||
freq_sample,
|
||||
freq_log
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
opal_list_item_t super;
|
||||
char *file;
|
||||
int core;
|
||||
float max_freq;
|
||||
float min_freq;
|
||||
} corefreq_tracker_t;
|
||||
static void ctr_con(corefreq_tracker_t *trk)
|
||||
{
|
||||
trk->file = NULL;
|
||||
}
|
||||
static void ctr_des(corefreq_tracker_t *trk)
|
||||
{
|
||||
if (NULL != trk->file) {
|
||||
free(trk->file);
|
||||
}
|
||||
}
|
||||
OBJ_CLASS_INSTANCE(corefreq_tracker_t,
|
||||
opal_list_item_t,
|
||||
ctr_con, ctr_des);
|
||||
|
||||
static bool log_enabled = true;
|
||||
static opal_list_t tracking;
|
||||
|
||||
static char *orte_getline(FILE *fp)
|
||||
{
|
||||
char *ret, *buff;
|
||||
char input[1024];
|
||||
|
||||
ret = fgets(input, 1024, fp);
|
||||
if (NULL != ret) {
|
||||
input[strlen(input)-1] = '\0'; /* remove newline */
|
||||
buff = strdup(input);
|
||||
return buff;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* FOR FUTURE: extend to read cooling device speeds in
|
||||
* current speed: /sys/class/thermal/cooling_deviceN/cur_state
|
||||
* max speed: /sys/class/thermal/cooling_deviceN/max_state
|
||||
* type: /sys/class/thermal/cooling_deviceN/type
|
||||
*/
|
||||
static int init(void)
|
||||
{
|
||||
int k;
|
||||
DIR *cur_dirp = NULL;
|
||||
struct dirent *entry;
|
||||
char *filename, *tmp;
|
||||
FILE *fp;
|
||||
corefreq_tracker_t *trk;
|
||||
|
||||
OBJ_CONSTRUCT(&tracking, opal_list_t);
|
||||
|
||||
/*
|
||||
* Open up the base directory so we can get a listing
|
||||
*/
|
||||
if (NULL == (cur_dirp = opendir("/sys/devices/system/cpu"))) {
|
||||
OBJ_DESTRUCT(&tracking);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
/*
|
||||
* For each directory
|
||||
*/
|
||||
while (NULL != (entry = readdir(cur_dirp))) {
|
||||
|
||||
/*
|
||||
* Skip the obvious
|
||||
*/
|
||||
if (0 == strncmp(entry->d_name, ".", strlen(".")) ||
|
||||
0 == strncmp(entry->d_name, "..", strlen(".."))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* look for cpu directories */
|
||||
if (0 != strncmp(entry->d_name, "cpu", strlen("cpu"))) {
|
||||
/* cannot be a cpu directory */
|
||||
continue;
|
||||
}
|
||||
/* if it ends in other than a digit, then it isn't a cpu directory */
|
||||
if (!isdigit(entry->d_name[strlen(entry->d_name)-1])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* track the info for this core */
|
||||
trk = OBJ_NEW(corefreq_tracker_t);
|
||||
/* trailing digits are the core id */
|
||||
for (k=strlen(entry->d_name)-1; 0 <= k; k--) {
|
||||
if (!isdigit(entry->d_name[k])) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
trk->core = strtoul(&entry->d_name[k], NULL, 10);
|
||||
trk->file = opal_os_path(false, "/sys/devices/system/cpu", entry->d_name, "cpufreq", "cpuinfo_cur_freq", NULL);
|
||||
|
||||
/* read the static info */
|
||||
filename = opal_os_path(false, "/sys/devices/system/cpu", entry->d_name, "cpufreq", "cpuinfo_max_freq", NULL);
|
||||
fp = fopen(filename, "r");
|
||||
tmp = orte_getline(fp);
|
||||
fclose(fp);
|
||||
trk->max_freq = strtoul(tmp, NULL, 10) / 1000000.0;
|
||||
free(filename);
|
||||
|
||||
filename = opal_os_path(false, "/sys/devices/system/cpu", entry->d_name, "cpufreq", "cpuinfo_min_freq", NULL);
|
||||
fp = fopen(filename, "r");
|
||||
tmp = orte_getline(fp);
|
||||
fclose(fp);
|
||||
trk->min_freq = strtoul(tmp, NULL, 10) / 1000000.0;
|
||||
free(filename);
|
||||
|
||||
/* add to our list */
|
||||
opal_list_append(&tracking, &trk->super);
|
||||
/* cleanup */
|
||||
free(tmp);
|
||||
}
|
||||
closedir(cur_dirp);
|
||||
|
||||
if (0 == opal_list_get_size(&tracking)) {
|
||||
/* nothing to read */
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static void finalize(void)
|
||||
{
|
||||
OPAL_LIST_DESTRUCT(&tracking);
|
||||
}
|
||||
|
||||
/*
|
||||
* Start monitoring of local temps
|
||||
*/
|
||||
static void start(orte_jobid_t jobid)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
static void stop(orte_jobid_t jobid)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
static void freq_sample(void)
|
||||
{
|
||||
int ret;
|
||||
corefreq_tracker_t *trk;
|
||||
FILE *fp;
|
||||
char *freq;
|
||||
float ghz;
|
||||
opal_buffer_t data, *bptr;
|
||||
int32_t ncores;
|
||||
time_t now;
|
||||
char time_str[40];
|
||||
char *timestamp_str;
|
||||
|
||||
opal_output_verbose(2, orte_sensor_base_framework.framework_output,
|
||||
"%s sampling freq",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
/* prep to store the results */
|
||||
OBJ_CONSTRUCT(&data, opal_buffer_t);
|
||||
|
||||
/* store our hostname */
|
||||
if (OPAL_SUCCESS != (ret = opal_dss.pack(&data, &orte_process_info.nodename, 1, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
OBJ_DESTRUCT(&data);
|
||||
return;
|
||||
}
|
||||
|
||||
/* store the number of cores */
|
||||
ncores = (int32_t)opal_list_get_size(&tracking);
|
||||
if (OPAL_SUCCESS != (ret = opal_dss.pack(&data, &ncores, 1, OPAL_INT32))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
OBJ_DESTRUCT(&data);
|
||||
return;
|
||||
}
|
||||
|
||||
/* get the sample time */
|
||||
now = time(NULL);
|
||||
/* pass the time along as a simple string */
|
||||
strftime(time_str, sizeof(time_str), "%F %T%z", localtime(&now));
|
||||
asprintf(×tamp_str, "%s", time_str);
|
||||
if (OPAL_SUCCESS != (ret = opal_dss.pack(&data, ×tamp_str, 1, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
OBJ_DESTRUCT(&data);
|
||||
free(timestamp_str);
|
||||
return;
|
||||
}
|
||||
free(timestamp_str);
|
||||
|
||||
OPAL_LIST_FOREACH(trk, &tracking, corefreq_tracker_t) {
|
||||
/* read the temp */
|
||||
if (NULL == (fp = fopen(trk->file, "r"))) {
|
||||
continue;
|
||||
}
|
||||
while (NULL != (freq = orte_getline(fp))) {
|
||||
ghz = strtoul(freq, NULL, 10) / 1000000.0;
|
||||
opal_output_verbose(5, orte_sensor_base_framework.framework_output,
|
||||
"%s sensor:freq: Core %d freq %f max %f min %f",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
trk->core, ghz, trk->max_freq, trk->min_freq);
|
||||
if (OPAL_SUCCESS != (ret = opal_dss.pack(&data, &ghz, 1, OPAL_FLOAT))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
OBJ_DESTRUCT(&data);
|
||||
free(freq);
|
||||
return;
|
||||
}
|
||||
free(freq);
|
||||
}
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
/* xfer the data for transmission */
|
||||
bptr = &data;
|
||||
if (OPAL_SUCCESS != (ret = opal_dss.pack(orte_sensor_base.samples, &bptr, 1, OPAL_BUFFER))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
OBJ_DESTRUCT(&data);
|
||||
return;
|
||||
}
|
||||
OBJ_DESTRUCT(&data);
|
||||
}
|
||||
|
||||
static void freq_log(opal_buffer_t *sample)
|
||||
{
|
||||
char *hostname=NULL;
|
||||
char *sampletime;
|
||||
int rc;
|
||||
int32_t n, ncores;
|
||||
opal_value_t *kv=NULL;
|
||||
float fval;
|
||||
int i;
|
||||
|
||||
if (!log_enabled) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* unpack the host this came from */
|
||||
n=1;
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.unpack(sample, &hostname, &n, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
/* and the number of cores on that host */
|
||||
n=1;
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.unpack(sample, &ncores, &n, OPAL_INT32))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
|
||||
/* sample time */
|
||||
n=1;
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.unpack(sample, &sampletime, &n, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
|
||||
opal_output_verbose(3, orte_sensor_base_framework.framework_output,
|
||||
"%s Received log from host %s with %d cores",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == hostname) ? "NULL" : hostname, ncores);
|
||||
|
||||
/* xfr to storage */
|
||||
kv = malloc((ncores+1) * sizeof(opal_value_t));
|
||||
|
||||
/* load the sample time at the start */
|
||||
OBJ_CONSTRUCT(&kv[0], opal_value_t);
|
||||
kv[0].key = strdup("ctime");
|
||||
kv[0].type = OPAL_STRING;
|
||||
kv[0].data.string = strdup(sampletime);
|
||||
free(sampletime);
|
||||
|
||||
for (i=0; i < ncores; i++) {
|
||||
OBJ_CONSTRUCT(&kv[i+1], opal_value_t);
|
||||
asprintf(&kv[i+1].key, "core%d", i);
|
||||
kv[i+1].type = OPAL_FLOAT;
|
||||
n=1;
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.unpack(sample, &fval, &n, OPAL_FLOAT))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
kv[i+1].data.fval = fval;
|
||||
}
|
||||
|
||||
/* store it */
|
||||
if (ORTE_SUCCESS != (rc = opal_db.add_log("freq", kv, ncores+1))) {
|
||||
/* don't bark about it - just quietly disable the log */
|
||||
log_enabled = false;
|
||||
}
|
||||
|
||||
cleanup:
|
||||
/* cleanup the xfr storage */
|
||||
for (i=0; i < ncores+1; i++) {
|
||||
OBJ_DESTRUCT(&kv[i]);
|
||||
}
|
||||
if (NULL != hostname) {
|
||||
free(hostname);
|
||||
}
|
||||
|
||||
}
|
35
orte/mca/sensor/freq/sensor_freq.h
Обычный файл
35
orte/mca/sensor/freq/sensor_freq.h
Обычный файл
@ -0,0 +1,35 @@
|
||||
/*
|
||||
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* @file
|
||||
*
|
||||
* FREQ resource manager sensor
|
||||
*/
|
||||
#ifndef ORTE_SENSOR_FREQ_H
|
||||
#define ORTE_SENSOR_FREQ_H
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "orte/mca/sensor/sensor.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
typedef struct {
|
||||
orte_sensor_base_component_t super;
|
||||
bool test;
|
||||
} orte_sensor_freq_component_t;
|
||||
|
||||
ORTE_MODULE_DECLSPEC extern orte_sensor_freq_component_t mca_sensor_freq_component;
|
||||
extern orte_sensor_base_module_t orte_sensor_freq_module;
|
||||
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
91
orte/mca/sensor/freq/sensor_freq_component.c
Обычный файл
91
orte/mca/sensor/freq/sensor_freq_component.c
Обычный файл
@ -0,0 +1,91 @@
|
||||
/*
|
||||
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/mca/base/mca_base_var.h"
|
||||
|
||||
#include "orte/mca/sensor/base/sensor_private.h"
|
||||
#include "sensor_freq.h"
|
||||
|
||||
/*
|
||||
* Local functions
|
||||
*/
|
||||
|
||||
static int orte_sensor_freq_open(void);
|
||||
static int orte_sensor_freq_close(void);
|
||||
static int orte_sensor_freq_query(mca_base_module_t **module, int *priority);
|
||||
static int freq_component_register(void);
|
||||
|
||||
orte_sensor_freq_component_t mca_sensor_freq_component = {
|
||||
{
|
||||
{
|
||||
ORTE_SENSOR_BASE_VERSION_1_0_0,
|
||||
|
||||
"freq", /* MCA component name */
|
||||
ORTE_MAJOR_VERSION, /* MCA component major version */
|
||||
ORTE_MINOR_VERSION, /* MCA component minor version */
|
||||
ORTE_RELEASE_VERSION, /* MCA component release version */
|
||||
orte_sensor_freq_open, /* component open */
|
||||
orte_sensor_freq_close, /* component close */
|
||||
orte_sensor_freq_query, /* component query */
|
||||
freq_component_register
|
||||
},
|
||||
{
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
},
|
||||
"freq" // data being sensed
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* component open/close/init function
|
||||
*/
|
||||
static int orte_sensor_freq_open(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int orte_sensor_freq_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
/* if we can build, then we definitely want to be used
|
||||
* even if we aren't going to sample as we have to be
|
||||
* present in order to log any received results. Note that
|
||||
* we tested for existence and read-access for at least
|
||||
* one socket in the configure test, so we don't have to
|
||||
* check again here
|
||||
*/
|
||||
*priority = 50; /* ahead of heartbeat */
|
||||
*module = (mca_base_module_t *)&orte_sensor_freq_module;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Close all subsystems.
|
||||
*/
|
||||
|
||||
static int orte_sensor_freq_close(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int freq_component_register(void)
|
||||
{
|
||||
mca_base_component_t *c = &mca_sensor_freq_component.super.base_version;
|
||||
|
||||
mca_sensor_freq_component.test = false;
|
||||
(void) mca_base_component_var_register (c, "test",
|
||||
"Generate and pass test vector",
|
||||
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
& mca_sensor_freq_component.test);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
39
orte/mca/sensor/pwr/Makefile.am
Обычный файл
39
orte/mca/sensor/pwr/Makefile.am
Обычный файл
@ -0,0 +1,39 @@
|
||||
#
|
||||
# Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
|
||||
#
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
dist_ompidata_DATA = help-orte-sensor-pwr.txt
|
||||
|
||||
sources = \
|
||||
sensor_pwr.h \
|
||||
sensor_pwr.c \
|
||||
sensor_pwr_component.c
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if MCA_BUILD_orte_sensor_pwr_DSO
|
||||
component_noinst =
|
||||
component_install = mca_sensor_pwr.la
|
||||
else
|
||||
component_noinst = libmca_sensor_pwr.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(ompilibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_sensor_pwr_la_SOURCES = $(sources)
|
||||
mca_sensor_pwr_la_LDFLAGS = -module -avoid-version
|
||||
mca_sensor_pwr_la_LIBS = -lm
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_sensor_pwr_la_SOURCES =$(sources)
|
||||
libmca_sensor_pwr_la_LDFLAGS = -module -avoid-version
|
||||
libmca_sensor_pwr_la_LIBS = -lm
|
37
orte/mca/sensor/pwr/configure.m4
Обычный файл
37
orte/mca/sensor/pwr/configure.m4
Обычный файл
@ -0,0 +1,37 @@
|
||||
dnl -*- shell-script -*-
|
||||
dnl
|
||||
dnl Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
dnl $COPYRIGHT$
|
||||
dnl
|
||||
dnl Additional copyrights may follow
|
||||
dnl
|
||||
dnl $HEADER$
|
||||
dnl
|
||||
|
||||
# MCA_sensor_pwr_CONFIG([action-if-found], [action-if-not-found])
|
||||
# -----------------------------------------------------------
|
||||
AC_DEFUN([MCA_orte_sensor_pwr_CONFIG], [
|
||||
AC_CONFIG_FILES([orte/mca/sensor/pwr/Makefile])
|
||||
|
||||
AC_ARG_WITH([pwr],
|
||||
[AC_HELP_STRING([--with-pwr],
|
||||
[Build pwr support (default: no)])],
|
||||
[], with_pwr=no)
|
||||
|
||||
# do not build if support not requested
|
||||
AS_IF([test "$with_pwr" != "no"],
|
||||
[AS_IF([test "$opal_found_linux" = "yes"],
|
||||
[AS_IF([test -e "/dev/cpu/0/msr"],
|
||||
[sensor_pwr_happy=yes],
|
||||
[AC_MSG_WARN([Core power sensing was requested but the required directory])
|
||||
AC_MSG_WARN([was not found])
|
||||
sensor_pwr_happy=no])],
|
||||
[AC_MSG_WARN([Core power sensing was requested but is only supported on Intel-based Linux systems])
|
||||
sensor_pwr_happy=no])
|
||||
AS_IF([test "$sensor_pwr_happy" = "yes"],
|
||||
[$1],
|
||||
[AC_MSG_ERROR([Cannot continue])
|
||||
$2])
|
||||
],
|
||||
[$2])
|
||||
])dnl
|
38
orte/mca/sensor/pwr/help-orte-sensor-pwr.txt
Обычный файл
38
orte/mca/sensor/pwr/help-orte-sensor-pwr.txt
Обычный файл
@ -0,0 +1,38 @@
|
||||
# -*- text -*-
|
||||
#
|
||||
# Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
#
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
#
|
||||
[no-access]
|
||||
Power sensing was requested, but you lack access authority
|
||||
to the required path:
|
||||
|
||||
Path: %s
|
||||
|
||||
We will continue to operate, but will not monitor power.
|
||||
[no-sockets]
|
||||
Power sensing was requested, but your topology doesn't
|
||||
identify sockets and we are therefore unable to verify
|
||||
the processor type as supported.
|
||||
|
||||
We will continue to operate, but will not monitor power.
|
||||
[unsupported-model]
|
||||
Power sensing was requested, but your processor type
|
||||
is not currently supported.
|
||||
|
||||
Detected model: %d
|
||||
|
||||
We will continue to operate, but will not monitor power.
|
||||
[no-topo-info]
|
||||
Power sensing was requested, but the topology info
|
||||
required to verify processor-level support was not
|
||||
available. This usually means that your system lacks
|
||||
the required revision level for hwloc.
|
||||
|
||||
We will continue to operate, but will not monitor power.
|
358
orte/mca/sensor/pwr/sensor_pwr.c
Обычный файл
358
orte/mca/sensor/pwr/sensor_pwr.c
Обычный файл
@ -0,0 +1,358 @@
|
||||
/*
|
||||
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
#include "orte/types.h"
|
||||
|
||||
#include <errno.h>
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif /* HAVE_UNISTD_H */
|
||||
#ifdef HAVE_STRING_H
|
||||
#include <string.h>
|
||||
#endif /* HAVE_STRING_H */
|
||||
#include <stdio.h>
|
||||
#ifdef HAVE_TIME_H
|
||||
#include <time.h>
|
||||
#endif
|
||||
#ifdef HAVE_DIRENT_H
|
||||
#include <dirent.h>
|
||||
#endif /* HAVE_DIRENT_H */
|
||||
#include <ctype.h>
|
||||
#ifdef HAVE_SYS_STAT_H
|
||||
#include <sys/stat.h>
|
||||
#endif
|
||||
#ifdef HAVE_FCNTL_H
|
||||
#include <fcntl.h>
|
||||
#endif
|
||||
#include <math.h>
|
||||
|
||||
#include "opal_stdint.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/dss/dss.h"
|
||||
#include "opal/util/os_path.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/mca/db/db.h"
|
||||
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
|
||||
#include "orte/mca/sensor/base/base.h"
|
||||
#include "orte/mca/sensor/base/sensor_private.h"
|
||||
#include "sensor_pwr.h"
|
||||
|
||||
/* declare the API functions */
|
||||
static int init(void);
|
||||
static void finalize(void);
|
||||
static void start(orte_jobid_t job);
|
||||
static void stop(orte_jobid_t job);
|
||||
static void pwr_sample(void);
|
||||
static void pwr_log(opal_buffer_t *buf);
|
||||
|
||||
/* instantiate the module */
|
||||
orte_sensor_base_module_t orte_sensor_pwr_module = {
|
||||
init,
|
||||
finalize,
|
||||
start,
|
||||
stop,
|
||||
pwr_sample,
|
||||
pwr_log
|
||||
};
|
||||
|
||||
#define MSR_RAPL_POWER_UNIT 0x606
|
||||
|
||||
/*
|
||||
* Platform specific RAPL bitmasks.
|
||||
*/
|
||||
#define MSR_PKG_POWER_INFO 0x614
|
||||
#define POWER_UNIT_OFFSET 0
|
||||
#define POWER_UNIT_MASK 0x0F
|
||||
|
||||
|
||||
typedef struct {
|
||||
opal_list_item_t super;
|
||||
char *file;
|
||||
int core;
|
||||
double units;
|
||||
} corepwr_tracker_t;
|
||||
static void ctr_con(corepwr_tracker_t *trk)
|
||||
{
|
||||
trk->file = NULL;
|
||||
}
|
||||
static void ctr_des(corepwr_tracker_t *trk)
|
||||
{
|
||||
if (NULL != trk->file) {
|
||||
free(trk->file);
|
||||
}
|
||||
}
|
||||
OBJ_CLASS_INSTANCE(corepwr_tracker_t,
|
||||
opal_list_item_t,
|
||||
ctr_con, ctr_des);
|
||||
|
||||
static bool log_enabled = true;
|
||||
static opal_list_t tracking;
|
||||
|
||||
static int read_msr(int fd, long long *value, int offset)
|
||||
{
|
||||
uint64_t data;
|
||||
|
||||
if (pread(fd, &data, sizeof data, offset) != sizeof(data)) {
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
*value = (long long)data;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int init(void)
|
||||
{
|
||||
int fd;
|
||||
DIR *cur_dirp = NULL;
|
||||
struct dirent *entry;
|
||||
corepwr_tracker_t *trk;
|
||||
long long units;
|
||||
|
||||
OBJ_CONSTRUCT(&tracking, opal_list_t);
|
||||
|
||||
/*
|
||||
* Open up the base directory so we can get a listing
|
||||
*/
|
||||
if (NULL == (cur_dirp = opendir("/dev/cpu"))) {
|
||||
OBJ_DESTRUCT(&tracking);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
/*
|
||||
* For each directory
|
||||
*/
|
||||
while (NULL != (entry = readdir(cur_dirp))) {
|
||||
|
||||
/*
|
||||
* Skip the obvious
|
||||
*/
|
||||
if (0 == strncmp(entry->d_name, ".", strlen(".")) ||
|
||||
0 == strncmp(entry->d_name, "..", strlen(".."))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* if it contains anything other than a digit, then it isn't a cpu directory */
|
||||
if (!isdigit(entry->d_name[strlen(entry->d_name)-1])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* track the info for this core */
|
||||
trk = OBJ_NEW(corepwr_tracker_t);
|
||||
trk->core = strtoul(entry->d_name, NULL, 10);
|
||||
trk->file = opal_os_path(false, "/dev/cpu", entry->d_name, "msr", NULL);
|
||||
|
||||
/* get the power units for this core */
|
||||
if (0 >= (fd = open(trk->file, O_RDONLY))) {
|
||||
/* can't access file */
|
||||
OBJ_RELEASE(trk);
|
||||
continue;
|
||||
}
|
||||
if (ORTE_SUCCESS != read_msr(fd, &units, MSR_RAPL_POWER_UNIT)) {
|
||||
/* can't read required info */
|
||||
OBJ_RELEASE(trk);
|
||||
continue;
|
||||
}
|
||||
trk->units = pow(0.5,(double)(units & POWER_UNIT_MASK));
|
||||
|
||||
/* add to our list */
|
||||
opal_list_append(&tracking, &trk->super);
|
||||
}
|
||||
closedir(cur_dirp);
|
||||
|
||||
if (0 == opal_list_get_size(&tracking)) {
|
||||
/* nothing to read */
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static void finalize(void)
|
||||
{
|
||||
OPAL_LIST_DESTRUCT(&tracking);
|
||||
}
|
||||
|
||||
/*
|
||||
* Start monitoring of local temps
|
||||
*/
|
||||
static void start(orte_jobid_t jobid)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
static void stop(orte_jobid_t jobid)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
static void pwr_sample(void)
|
||||
{
|
||||
corepwr_tracker_t *trk, *nxt;
|
||||
opal_buffer_t data, *bptr;
|
||||
int32_t ncores;
|
||||
time_t now;
|
||||
char time_str[40];
|
||||
char *timestamp_str;
|
||||
long long value;
|
||||
int fd, ret;
|
||||
float power;
|
||||
|
||||
opal_output_verbose(2, orte_sensor_base_framework.framework_output,
|
||||
"%s sampling power",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
/* prep to store the results */
|
||||
OBJ_CONSTRUCT(&data, opal_buffer_t);
|
||||
|
||||
/* store our hostname */
|
||||
if (OPAL_SUCCESS != (ret = opal_dss.pack(&data, &orte_process_info.nodename, 1, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
OBJ_DESTRUCT(&data);
|
||||
return;
|
||||
}
|
||||
|
||||
/* store the number of cores */
|
||||
ncores = (int32_t)opal_list_get_size(&tracking);
|
||||
if (OPAL_SUCCESS != (ret = opal_dss.pack(&data, &ncores, 1, OPAL_INT32))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
OBJ_DESTRUCT(&data);
|
||||
return;
|
||||
}
|
||||
|
||||
/* get the sample time */
|
||||
now = time(NULL);
|
||||
/* pass the time along as a simple string */
|
||||
strftime(time_str, sizeof(time_str), "%F %T%z", localtime(&now));
|
||||
asprintf(×tamp_str, "%s", time_str);
|
||||
if (OPAL_SUCCESS != (ret = opal_dss.pack(&data, ×tamp_str, 1, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
OBJ_DESTRUCT(&data);
|
||||
free(timestamp_str);
|
||||
return;
|
||||
}
|
||||
free(timestamp_str);
|
||||
|
||||
OPAL_LIST_FOREACH_SAFE(trk, nxt, &tracking, corepwr_tracker_t) {
|
||||
if (0 >= (fd = open(trk->file, O_RDONLY))) {
|
||||
/* disable this one - cannot read the file */
|
||||
opal_list_remove_item(&tracking, &trk->super);
|
||||
OBJ_RELEASE(trk);
|
||||
continue;
|
||||
}
|
||||
if (ORTE_SUCCESS != read_msr(fd, &value, MSR_PKG_POWER_INFO)) {
|
||||
/* disable this one - cannot read the file */
|
||||
opal_list_remove_item(&tracking, &trk->super);
|
||||
OBJ_RELEASE(trk);
|
||||
close(fd);
|
||||
continue;
|
||||
}
|
||||
power = trk->units * (double)(value & 0x7fff);
|
||||
if (OPAL_SUCCESS != (ret = opal_dss.pack(&data, &power, 1, OPAL_FLOAT))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
OBJ_DESTRUCT(&data);
|
||||
close(fd);
|
||||
return;
|
||||
}
|
||||
close(fd);
|
||||
}
|
||||
|
||||
/* xfer the data for transmission */
|
||||
bptr = &data;
|
||||
if (OPAL_SUCCESS != (ret = opal_dss.pack(orte_sensor_base.samples, &bptr, 1, OPAL_BUFFER))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
OBJ_DESTRUCT(&data);
|
||||
return;
|
||||
}
|
||||
OBJ_DESTRUCT(&data);
|
||||
}
|
||||
|
||||
static void pwr_log(opal_buffer_t *sample)
|
||||
{
|
||||
char *hostname=NULL;
|
||||
char *sampletime;
|
||||
int rc;
|
||||
int32_t n, ncores;
|
||||
opal_value_t *kv=NULL;
|
||||
float fval;
|
||||
int i;
|
||||
|
||||
if (!log_enabled) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* unpack the host this came from */
|
||||
n=1;
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.unpack(sample, &hostname, &n, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
/* and the number of cores on that host */
|
||||
n=1;
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.unpack(sample, &ncores, &n, OPAL_INT32))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
|
||||
/* sample time */
|
||||
n=1;
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.unpack(sample, &sampletime, &n, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
|
||||
opal_output_verbose(3, orte_sensor_base_framework.framework_output,
|
||||
"%s Received log from host %s with %d cores",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == hostname) ? "NULL" : hostname, ncores);
|
||||
|
||||
/* xfr to storage */
|
||||
kv = malloc((ncores+1) * sizeof(opal_value_t));
|
||||
|
||||
/* load the sample time at the start */
|
||||
OBJ_CONSTRUCT(&kv[0], opal_value_t);
|
||||
kv[0].key = strdup("ctime");
|
||||
kv[0].type = OPAL_STRING;
|
||||
kv[0].data.string = strdup(sampletime);
|
||||
free(sampletime);
|
||||
|
||||
for (i=0; i < ncores; i++) {
|
||||
OBJ_CONSTRUCT(&kv[i+1], opal_value_t);
|
||||
asprintf(&kv[i+1].key, "core%d", i);
|
||||
kv[i+1].type = OPAL_FLOAT;
|
||||
n=1;
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.unpack(sample, &fval, &n, OPAL_FLOAT))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
kv[i+1].data.fval = fval;
|
||||
}
|
||||
|
||||
/* store it */
|
||||
if (ORTE_SUCCESS != (rc = opal_db.add_log("pwr", kv, ncores+1))) {
|
||||
/* don't bark about it - just quietly disable the log */
|
||||
log_enabled = false;
|
||||
}
|
||||
|
||||
cleanup:
|
||||
/* cleanup the xfr storage */
|
||||
for (i=0; i < ncores+1; i++) {
|
||||
OBJ_DESTRUCT(&kv[i]);
|
||||
}
|
||||
if (NULL != hostname) {
|
||||
free(hostname);
|
||||
}
|
||||
|
||||
}
|
36
orte/mca/sensor/pwr/sensor_pwr.h
Обычный файл
36
orte/mca/sensor/pwr/sensor_pwr.h
Обычный файл
@ -0,0 +1,36 @@
|
||||
/*
|
||||
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* @file
|
||||
*
|
||||
* PWR resource manager sensor
|
||||
*/
|
||||
#ifndef ORTE_SENSOR_PWR_H
|
||||
#define ORTE_SENSOR_PWR_H
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "orte/mca/sensor/sensor.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
typedef struct {
|
||||
orte_sensor_base_component_t super;
|
||||
int model;
|
||||
bool test;
|
||||
} orte_sensor_pwr_component_t;
|
||||
|
||||
ORTE_MODULE_DECLSPEC extern orte_sensor_pwr_component_t mca_sensor_pwr_component;
|
||||
extern orte_sensor_base_module_t orte_sensor_pwr_module;
|
||||
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
163
orte/mca/sensor/pwr/sensor_pwr_component.c
Обычный файл
163
orte/mca/sensor/pwr/sensor_pwr_component.c
Обычный файл
@ -0,0 +1,163 @@
|
||||
/*
|
||||
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/mca/base/mca_base_var.h"
|
||||
#include "opal/mca/hwloc/hwloc.h"
|
||||
#include "opal/util/os_dirpath.h"
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
|
||||
#include "orte/mca/sensor/base/base.h"
|
||||
#include "sensor_pwr.h"
|
||||
|
||||
/*
|
||||
* Local functions
|
||||
*/
|
||||
|
||||
static int orte_sensor_pwr_open(void);
|
||||
static int orte_sensor_pwr_close(void);
|
||||
static int orte_sensor_pwr_query(mca_base_module_t **module, int *priority);
|
||||
static int pwr_component_register(void);
|
||||
static int check_cpu_type(void);
|
||||
|
||||
orte_sensor_pwr_component_t mca_sensor_pwr_component = {
|
||||
{
|
||||
{
|
||||
ORTE_SENSOR_BASE_VERSION_1_0_0,
|
||||
|
||||
"pwr", /* MCA component name */
|
||||
ORTE_MAJOR_VERSION, /* MCA component major version */
|
||||
ORTE_MINOR_VERSION, /* MCA component minor version */
|
||||
ORTE_RELEASE_VERSION, /* MCA component release version */
|
||||
orte_sensor_pwr_open, /* component open */
|
||||
orte_sensor_pwr_close, /* component close */
|
||||
orte_sensor_pwr_query, /* component query */
|
||||
pwr_component_register
|
||||
},
|
||||
{
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
},
|
||||
"pwr" // data being sensed
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* component open/close/init function
|
||||
*/
|
||||
static int orte_sensor_pwr_open(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int orte_sensor_pwr_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
/* we only handle certain cpu types as we have to know the binary
|
||||
* layout of the msr file
|
||||
*/
|
||||
if (ORTE_SUCCESS != check_cpu_type()) {
|
||||
*priority = 0;
|
||||
*module = NULL;
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
*priority = 50; /* ahead of heartbeat */
|
||||
*module = (mca_base_module_t *)&orte_sensor_pwr_module;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Close all subsystems.
|
||||
*/
|
||||
|
||||
static int orte_sensor_pwr_close(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int pwr_component_register(void)
|
||||
{
|
||||
mca_base_component_t *c = &mca_sensor_pwr_component.super.base_version;
|
||||
|
||||
mca_sensor_pwr_component.test = false;
|
||||
(void) mca_base_component_var_register (c, "test",
|
||||
"Generate and pass test vector",
|
||||
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
& mca_sensor_pwr_component.test);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* list of supported chipsets */
|
||||
#define CPU_SANDYBRIDGE 42
|
||||
#define CPU_SANDYBRIDGE_EP 45
|
||||
#define CPU_IVYBRIDGE 58
|
||||
#define CPU_IVYBRIDGE_EP 62
|
||||
#define CPU_HASWELL 60
|
||||
|
||||
|
||||
/* go thru our topology and check the sockets
|
||||
* to see if they contain a match - at this time,
|
||||
* we don't support hetero sockets, so any mismatch
|
||||
* will disqualify us
|
||||
*/
|
||||
static int check_cpu_type(void)
|
||||
{
|
||||
hwloc_obj_t obj;
|
||||
unsigned k;
|
||||
|
||||
if (NULL == (obj = hwloc_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_SOCKET, 0))) {
|
||||
/* there are no sockets identified in this machine */
|
||||
orte_show_help("help-orte-sensor-pwr.txt", "no-sockets", true);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
while (NULL != obj) {
|
||||
for (k=0; k < obj->infos_count; k++) {
|
||||
if (0 == strcmp(obj->infos[k].name, "model") &&
|
||||
NULL != obj->infos[k].value) {
|
||||
mca_sensor_pwr_component.model = strtoul(obj->infos[k].value, NULL, 10);
|
||||
|
||||
switch (mca_sensor_pwr_component.model) {
|
||||
case CPU_SANDYBRIDGE:
|
||||
opal_output_verbose(2, orte_sensor_base_framework.framework_output,
|
||||
"sensor:pwr Found Sandybridge CPU");
|
||||
return ORTE_SUCCESS;
|
||||
case CPU_SANDYBRIDGE_EP:
|
||||
opal_output_verbose(2, orte_sensor_base_framework.framework_output,
|
||||
"sensor:pwr Found Sandybridge-EP CPU");
|
||||
return ORTE_SUCCESS;
|
||||
case CPU_IVYBRIDGE:
|
||||
opal_output_verbose(2, orte_sensor_base_framework.framework_output,
|
||||
"sensor:pwr Found Ivybridge CPU");
|
||||
return ORTE_SUCCESS;
|
||||
case CPU_IVYBRIDGE_EP:
|
||||
opal_output_verbose(2, orte_sensor_base_framework.framework_output,
|
||||
"sensor:pwr Found Ivybridge-EP CPU");
|
||||
return ORTE_SUCCESS;
|
||||
case CPU_HASWELL:
|
||||
opal_output_verbose(2, orte_sensor_base_framework.framework_output,
|
||||
"sensor:pwr Found Haswell CPU");
|
||||
return ORTE_SUCCESS;
|
||||
default:
|
||||
orte_show_help("help-orte-sensor-pwr.txt", "unsupported-model",
|
||||
true, mca_sensor_pwr_component.model);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
obj = obj->next_sibling;
|
||||
}
|
||||
orte_show_help("help-orte-sensor-pwr.txt", "no-topo-info",
|
||||
true, mca_sensor_pwr_component.model);
|
||||
return ORTE_ERROR;
|
||||
}
|
Загрузка…
Ссылка в новой задаче
Block a user