Some cleanup of the sensor system to ensure things go in the right place, avoid segfaults under abnormal conditions, etc.
cmr=v1.7.5:reviewer=rhc This commit was SVN r30409.
Этот коммит содержится в:
родитель
31acdb15bc
Коммит
e496e348a4
@ -8,7 +8,7 @@
|
|||||||
# $HEADER$
|
# $HEADER$
|
||||||
#
|
#
|
||||||
|
|
||||||
dist_pkgdata_DATA = help-orte-sensor-coretemp.txt
|
dist_ompidata_DATA = help-orte-sensor-coretemp.txt
|
||||||
|
|
||||||
sources = \
|
sources = \
|
||||||
sensor_coretemp.c \
|
sensor_coretemp.c \
|
||||||
|
@ -67,13 +67,13 @@ typedef struct {
|
|||||||
char *label;
|
char *label;
|
||||||
float critical_temp;
|
float critical_temp;
|
||||||
float max_temp;
|
float max_temp;
|
||||||
} core_tracker_t;
|
} coretemp_tracker_t;
|
||||||
static void ctr_con(core_tracker_t *trk)
|
static void ctr_con(coretemp_tracker_t *trk)
|
||||||
{
|
{
|
||||||
trk->file = NULL;
|
trk->file = NULL;
|
||||||
trk->label = NULL;
|
trk->label = NULL;
|
||||||
}
|
}
|
||||||
static void ctr_des(core_tracker_t *trk)
|
static void ctr_des(coretemp_tracker_t *trk)
|
||||||
{
|
{
|
||||||
if (NULL != trk->file) {
|
if (NULL != trk->file) {
|
||||||
free(trk->file);
|
free(trk->file);
|
||||||
@ -82,7 +82,7 @@ static void ctr_des(core_tracker_t *trk)
|
|||||||
free(trk->label);
|
free(trk->label);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
OBJ_CLASS_INSTANCE(core_tracker_t,
|
OBJ_CLASS_INSTANCE(coretemp_tracker_t,
|
||||||
opal_list_item_t,
|
opal_list_item_t,
|
||||||
ctr_con, ctr_des);
|
ctr_con, ctr_des);
|
||||||
|
|
||||||
@ -111,29 +111,22 @@ static char *orte_getline(FILE *fp)
|
|||||||
*/
|
*/
|
||||||
static int init(void)
|
static int init(void)
|
||||||
{
|
{
|
||||||
int ret;
|
|
||||||
DIR *cur_dirp = NULL, *tdir;
|
DIR *cur_dirp = NULL, *tdir;
|
||||||
struct dirent *dir_entry, *entry;
|
struct dirent *dir_entry, *entry;
|
||||||
char *dirname, *filename, *ptr, *tmp;
|
char *dirname, *filename, *ptr, *tmp;
|
||||||
size_t tlen = strlen("temp");
|
size_t tlen = strlen("temp");
|
||||||
size_t ilen = strlen("_input");
|
size_t ilen = strlen("_input");
|
||||||
FILE *fp;
|
FILE *fp;
|
||||||
core_tracker_t *trk;
|
coretemp_tracker_t *trk;
|
||||||
int socket;
|
int socket;
|
||||||
|
|
||||||
OBJ_CONSTRUCT(&tracking, opal_list_t);
|
OBJ_CONSTRUCT(&tracking, opal_list_t);
|
||||||
|
|
||||||
if (ORTE_SUCCESS != (ret = opal_os_dirpath_access("/sys/bus/platform/devices", 0))) {
|
|
||||||
/* if the directory doesn't exist, or we don't have
|
|
||||||
* access to it, then disqualify us
|
|
||||||
*/
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Open up the base directory so we can get a listing
|
* Open up the base directory so we can get a listing
|
||||||
*/
|
*/
|
||||||
if (NULL == (cur_dirp = opendir("/sys/bus/platform/devices"))) {
|
if (NULL == (cur_dirp = opendir("/sys/bus/platform/devices"))) {
|
||||||
|
OBJ_DESTRUCT(&tracking);
|
||||||
return ORTE_ERROR;
|
return ORTE_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -175,7 +168,7 @@ static int init(void)
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
/* track the info for this core */
|
/* track the info for this core */
|
||||||
trk = OBJ_NEW(core_tracker_t);
|
trk = OBJ_NEW(coretemp_tracker_t);
|
||||||
trk->socket = socket;
|
trk->socket = socket;
|
||||||
trk->file = opal_os_path(false, dirname, entry->d_name, NULL);
|
trk->file = opal_os_path(false, dirname, entry->d_name, NULL);
|
||||||
/* take the part up to the first underscore as this will
|
/* take the part up to the first underscore as this will
|
||||||
@ -252,7 +245,7 @@ static void stop(orte_jobid_t jobid)
|
|||||||
static void coretemp_sample(void)
|
static void coretemp_sample(void)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
core_tracker_t *trk;
|
coretemp_tracker_t *trk;
|
||||||
FILE *fp;
|
FILE *fp;
|
||||||
char *temp;
|
char *temp;
|
||||||
float degc;
|
float degc;
|
||||||
@ -261,9 +254,20 @@ static void coretemp_sample(void)
|
|||||||
time_t now;
|
time_t now;
|
||||||
char time_str[40];
|
char time_str[40];
|
||||||
char *timestamp_str;
|
char *timestamp_str;
|
||||||
|
bool packed;
|
||||||
|
|
||||||
/* prep to store the results */
|
/* prep to store the results */
|
||||||
OBJ_CONSTRUCT(&data, opal_buffer_t);
|
OBJ_CONSTRUCT(&data, opal_buffer_t);
|
||||||
|
packed = false;
|
||||||
|
|
||||||
|
/* pack our name */
|
||||||
|
temp = strdup("coretemp");
|
||||||
|
if (OPAL_SUCCESS != (ret = opal_dss.pack(&data, &temp, 1, OPAL_STRING))) {
|
||||||
|
ORTE_ERROR_LOG(ret);
|
||||||
|
OBJ_DESTRUCT(&data);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
free(temp);
|
||||||
|
|
||||||
/* store our hostname */
|
/* store our hostname */
|
||||||
if (OPAL_SUCCESS != (ret = opal_dss.pack(&data, &orte_process_info.nodename, 1, OPAL_STRING))) {
|
if (OPAL_SUCCESS != (ret = opal_dss.pack(&data, &orte_process_info.nodename, 1, OPAL_STRING))) {
|
||||||
@ -293,7 +297,7 @@ static void coretemp_sample(void)
|
|||||||
}
|
}
|
||||||
free(timestamp_str);
|
free(timestamp_str);
|
||||||
|
|
||||||
OPAL_LIST_FOREACH(trk, &tracking, core_tracker_t) {
|
OPAL_LIST_FOREACH(trk, &tracking, coretemp_tracker_t) {
|
||||||
/* read the temp */
|
/* read the temp */
|
||||||
fp = fopen(trk->file, "r");
|
fp = fopen(trk->file, "r");
|
||||||
while (NULL != (temp = orte_getline(fp))) {
|
while (NULL != (temp = orte_getline(fp))) {
|
||||||
@ -309,6 +313,7 @@ static void coretemp_sample(void)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
free(temp);
|
free(temp);
|
||||||
|
packed = true;
|
||||||
/* check for exceed critical temp */
|
/* check for exceed critical temp */
|
||||||
if (trk->critical_temp < degc) {
|
if (trk->critical_temp < degc) {
|
||||||
/* alert the errmgr - this is a critical problem */
|
/* alert the errmgr - this is a critical problem */
|
||||||
@ -328,11 +333,13 @@ static void coretemp_sample(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* xfer the data for transmission */
|
/* xfer the data for transmission */
|
||||||
bptr = &data;
|
if (packed) {
|
||||||
if (OPAL_SUCCESS != (ret = opal_dss.pack(orte_sensor_base.samples, &bptr, 1, OPAL_BUFFER))) {
|
bptr = &data;
|
||||||
ORTE_ERROR_LOG(ret);
|
if (OPAL_SUCCESS != (ret = opal_dss.pack(orte_sensor_base.samples, &bptr, 1, OPAL_BUFFER))) {
|
||||||
OBJ_DESTRUCT(&data);
|
ORTE_ERROR_LOG(ret);
|
||||||
return;
|
OBJ_DESTRUCT(&data);
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
OBJ_DESTRUCT(&data);
|
OBJ_DESTRUCT(&data);
|
||||||
}
|
}
|
||||||
@ -377,7 +384,7 @@ static void coretemp_log(opal_buffer_t *sample)
|
|||||||
(NULL == hostname) ? "NULL" : hostname, ncores);
|
(NULL == hostname) ? "NULL" : hostname, ncores);
|
||||||
|
|
||||||
/* xfr to storage */
|
/* xfr to storage */
|
||||||
kv = malloc((ncores+1) * sizeof(opal_value_t));
|
kv = malloc((ncores+2) * sizeof(opal_value_t));
|
||||||
|
|
||||||
/* load the sample time at the start */
|
/* load the sample time at the start */
|
||||||
OBJ_CONSTRUCT(&kv[0], opal_value_t);
|
OBJ_CONSTRUCT(&kv[0], opal_value_t);
|
||||||
@ -386,27 +393,37 @@ static void coretemp_log(opal_buffer_t *sample)
|
|||||||
kv[0].data.string = strdup(sampletime);
|
kv[0].data.string = strdup(sampletime);
|
||||||
free(sampletime);
|
free(sampletime);
|
||||||
|
|
||||||
|
/* load the hostname */
|
||||||
|
OBJ_CONSTRUCT(&kv[1], opal_value_t);
|
||||||
|
kv[1].key = strdup("hostname");
|
||||||
|
kv[1].type = OPAL_STRING;
|
||||||
|
kv[1].data.string = strdup(hostname);
|
||||||
|
|
||||||
|
/* protect against segfault if we jump to cleanup */
|
||||||
for (i=0; i < ncores; i++) {
|
for (i=0; i < ncores; i++) {
|
||||||
OBJ_CONSTRUCT(&kv[i+1], opal_value_t);
|
OBJ_CONSTRUCT(&kv[i+2], opal_value_t);
|
||||||
asprintf(&kv[i+1].key, "core%d", i);
|
}
|
||||||
kv[i+1].type = OPAL_FLOAT;
|
|
||||||
|
for (i=0; i < ncores; i++) {
|
||||||
|
asprintf(&kv[i+2].key, "core%d", i);
|
||||||
|
kv[i+2].type = OPAL_FLOAT;
|
||||||
n=1;
|
n=1;
|
||||||
if (OPAL_SUCCESS != (rc = opal_dss.unpack(sample, &fval, &n, OPAL_FLOAT))) {
|
if (OPAL_SUCCESS != (rc = opal_dss.unpack(sample, &fval, &n, OPAL_FLOAT))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
kv[i+1].data.fval = fval;
|
kv[i+2].data.fval = fval;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* store it */
|
/* store it */
|
||||||
if (ORTE_SUCCESS != (rc = opal_db.add_log("coretemp", kv, ncores+1))) {
|
if (ORTE_SUCCESS != (rc = opal_db.add_log("coretemp", kv, ncores+2))) {
|
||||||
/* don't bark about it - just quietly disable the log */
|
/* don't bark about it - just quietly disable the log */
|
||||||
log_enabled = false;
|
log_enabled = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
cleanup:
|
cleanup:
|
||||||
/* cleanup the xfr storage */
|
/* cleanup the xfr storage */
|
||||||
for (i=0; i < ncores+1; i++) {
|
for (i=0; i < ncores+2; i++) {
|
||||||
OBJ_DESTRUCT(&kv[i]);
|
OBJ_DESTRUCT(&kv[i]);
|
||||||
}
|
}
|
||||||
if (NULL != hostname) {
|
if (NULL != hostname) {
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
# $HEADER$
|
# $HEADER$
|
||||||
#
|
#
|
||||||
|
|
||||||
dist_pkgdata_DATA = help-orte-sensor-freq.txt
|
dist_ompidata_DATA = help-orte-sensor-freq.txt
|
||||||
|
|
||||||
sources = \
|
sources = \
|
||||||
sensor_freq.c \
|
sensor_freq.c \
|
||||||
|
@ -219,6 +219,7 @@ static void freq_sample(void)
|
|||||||
time_t now;
|
time_t now;
|
||||||
char time_str[40];
|
char time_str[40];
|
||||||
char *timestamp_str;
|
char *timestamp_str;
|
||||||
|
bool packed;
|
||||||
|
|
||||||
opal_output_verbose(2, orte_sensor_base_framework.framework_output,
|
opal_output_verbose(2, orte_sensor_base_framework.framework_output,
|
||||||
"%s sampling freq",
|
"%s sampling freq",
|
||||||
@ -226,6 +227,16 @@ static void freq_sample(void)
|
|||||||
|
|
||||||
/* prep to store the results */
|
/* prep to store the results */
|
||||||
OBJ_CONSTRUCT(&data, opal_buffer_t);
|
OBJ_CONSTRUCT(&data, opal_buffer_t);
|
||||||
|
packed = false;
|
||||||
|
|
||||||
|
/* pack our name */
|
||||||
|
freq = strdup("freq");
|
||||||
|
if (OPAL_SUCCESS != (ret = opal_dss.pack(&data, &freq, 1, OPAL_STRING))) {
|
||||||
|
ORTE_ERROR_LOG(ret);
|
||||||
|
OBJ_DESTRUCT(&data);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
free(freq);
|
||||||
|
|
||||||
/* store our hostname */
|
/* store our hostname */
|
||||||
if (OPAL_SUCCESS != (ret = opal_dss.pack(&data, &orte_process_info.nodename, 1, OPAL_STRING))) {
|
if (OPAL_SUCCESS != (ret = opal_dss.pack(&data, &orte_process_info.nodename, 1, OPAL_STRING))) {
|
||||||
@ -256,6 +267,10 @@ static void freq_sample(void)
|
|||||||
free(timestamp_str);
|
free(timestamp_str);
|
||||||
|
|
||||||
OPAL_LIST_FOREACH(trk, &tracking, corefreq_tracker_t) {
|
OPAL_LIST_FOREACH(trk, &tracking, corefreq_tracker_t) {
|
||||||
|
opal_output_verbose(2, orte_sensor_base_framework.framework_output,
|
||||||
|
"%s processing freq file %s",
|
||||||
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
|
trk->file);
|
||||||
/* read the temp */
|
/* read the temp */
|
||||||
if (NULL == (fp = fopen(trk->file, "r"))) {
|
if (NULL == (fp = fopen(trk->file, "r"))) {
|
||||||
continue;
|
continue;
|
||||||
@ -272,17 +287,20 @@ static void freq_sample(void)
|
|||||||
free(freq);
|
free(freq);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
packed = true;
|
||||||
free(freq);
|
free(freq);
|
||||||
}
|
}
|
||||||
fclose(fp);
|
fclose(fp);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* xfer the data for transmission */
|
/* xfer the data for transmission */
|
||||||
bptr = &data;
|
if (packed) {
|
||||||
if (OPAL_SUCCESS != (ret = opal_dss.pack(orte_sensor_base.samples, &bptr, 1, OPAL_BUFFER))) {
|
bptr = &data;
|
||||||
ORTE_ERROR_LOG(ret);
|
if (OPAL_SUCCESS != (ret = opal_dss.pack(orte_sensor_base.samples, &bptr, 1, OPAL_BUFFER))) {
|
||||||
OBJ_DESTRUCT(&data);
|
ORTE_ERROR_LOG(ret);
|
||||||
return;
|
OBJ_DESTRUCT(&data);
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
OBJ_DESTRUCT(&data);
|
OBJ_DESTRUCT(&data);
|
||||||
}
|
}
|
||||||
@ -322,12 +340,12 @@ static void freq_log(opal_buffer_t *sample)
|
|||||||
}
|
}
|
||||||
|
|
||||||
opal_output_verbose(3, orte_sensor_base_framework.framework_output,
|
opal_output_verbose(3, orte_sensor_base_framework.framework_output,
|
||||||
"%s Received log from host %s with %d cores",
|
"%s Received freq log from host %s with %d cores",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
(NULL == hostname) ? "NULL" : hostname, ncores);
|
(NULL == hostname) ? "NULL" : hostname, ncores);
|
||||||
|
|
||||||
/* xfr to storage */
|
/* xfr to storage */
|
||||||
kv = malloc((ncores+1) * sizeof(opal_value_t));
|
kv = malloc((ncores+2) * sizeof(opal_value_t));
|
||||||
|
|
||||||
/* load the sample time at the start */
|
/* load the sample time at the start */
|
||||||
OBJ_CONSTRUCT(&kv[0], opal_value_t);
|
OBJ_CONSTRUCT(&kv[0], opal_value_t);
|
||||||
@ -336,27 +354,37 @@ static void freq_log(opal_buffer_t *sample)
|
|||||||
kv[0].data.string = strdup(sampletime);
|
kv[0].data.string = strdup(sampletime);
|
||||||
free(sampletime);
|
free(sampletime);
|
||||||
|
|
||||||
|
/* load the hostname */
|
||||||
|
OBJ_CONSTRUCT(&kv[1], opal_value_t);
|
||||||
|
kv[1].key = strdup("hostname");
|
||||||
|
kv[1].type = OPAL_STRING;
|
||||||
|
kv[1].data.string = strdup(hostname);
|
||||||
|
|
||||||
|
/* protect against segfault if we jump to cleanup */
|
||||||
for (i=0; i < ncores; i++) {
|
for (i=0; i < ncores; i++) {
|
||||||
OBJ_CONSTRUCT(&kv[i+1], opal_value_t);
|
OBJ_CONSTRUCT(&kv[i+2], opal_value_t);
|
||||||
asprintf(&kv[i+1].key, "core%d", i);
|
}
|
||||||
kv[i+1].type = OPAL_FLOAT;
|
|
||||||
|
for (i=0; i < ncores; i++) {
|
||||||
|
asprintf(&kv[i+2].key, "core%d", i);
|
||||||
|
kv[i+2].type = OPAL_FLOAT;
|
||||||
n=1;
|
n=1;
|
||||||
if (OPAL_SUCCESS != (rc = opal_dss.unpack(sample, &fval, &n, OPAL_FLOAT))) {
|
if (OPAL_SUCCESS != (rc = opal_dss.unpack(sample, &fval, &n, OPAL_FLOAT))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
kv[i+1].data.fval = fval;
|
kv[i+2].data.fval = fval;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* store it */
|
/* store it */
|
||||||
if (ORTE_SUCCESS != (rc = opal_db.add_log("freq", kv, ncores+1))) {
|
if (ORTE_SUCCESS != (rc = opal_db.add_log("freq", kv, ncores+2))) {
|
||||||
/* don't bark about it - just quietly disable the log */
|
/* don't bark about it - just quietly disable the log */
|
||||||
log_enabled = false;
|
log_enabled = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
cleanup:
|
cleanup:
|
||||||
/* cleanup the xfr storage */
|
/* cleanup the xfr storage */
|
||||||
for (i=0; i < ncores+1; i++) {
|
for (i=0; i < ncores+2; i++) {
|
||||||
OBJ_DESTRUCT(&kv[i]);
|
OBJ_DESTRUCT(&kv[i]);
|
||||||
}
|
}
|
||||||
if (NULL != hostname) {
|
if (NULL != hostname) {
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
* Copyright (c) 2009-2011 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
|
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||||
*
|
*
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
@ -62,7 +63,7 @@ static void sample(void)
|
|||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||||
|
|
||||||
/* are we including ourselves? */
|
/* are we including ourselves? */
|
||||||
if ((ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_CMSLAVE) &&
|
if (ORTE_PROC_IS_DAEMON &&
|
||||||
0 < mca_sensor_ft_tester_component.daemon_fail_prob) {
|
0 < mca_sensor_ft_tester_component.daemon_fail_prob) {
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_sensor_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((1, orte_sensor_base_framework.framework_output,
|
||||||
"%s sample:ft_tester considering killing me!",
|
"%s sample:ft_tester considering killing me!",
|
||||||
|
@ -76,7 +76,7 @@ static int init(void)
|
|||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||||
|
|
||||||
/* setup to receive heartbeats */
|
/* setup to receive heartbeats */
|
||||||
if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_CM) {
|
if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_AGGREGATOR) {
|
||||||
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
|
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
|
||||||
ORTE_RML_TAG_HEARTBEAT,
|
ORTE_RML_TAG_HEARTBEAT,
|
||||||
ORTE_RML_PERSISTENT,
|
ORTE_RML_PERSISTENT,
|
||||||
|
@ -209,6 +209,8 @@ static void pwr_sample(void)
|
|||||||
long long value;
|
long long value;
|
||||||
int fd, ret;
|
int fd, ret;
|
||||||
float power;
|
float power;
|
||||||
|
char *temp;
|
||||||
|
bool packed;
|
||||||
|
|
||||||
opal_output_verbose(2, orte_sensor_base_framework.framework_output,
|
opal_output_verbose(2, orte_sensor_base_framework.framework_output,
|
||||||
"%s sampling power",
|
"%s sampling power",
|
||||||
@ -216,6 +218,16 @@ static void pwr_sample(void)
|
|||||||
|
|
||||||
/* prep to store the results */
|
/* prep to store the results */
|
||||||
OBJ_CONSTRUCT(&data, opal_buffer_t);
|
OBJ_CONSTRUCT(&data, opal_buffer_t);
|
||||||
|
packed = false;
|
||||||
|
|
||||||
|
/* pack our name */
|
||||||
|
temp = strdup("pwr");
|
||||||
|
if (OPAL_SUCCESS != (ret = opal_dss.pack(&data, &temp, 1, OPAL_STRING))) {
|
||||||
|
ORTE_ERROR_LOG(ret);
|
||||||
|
OBJ_DESTRUCT(&data);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
free(temp);
|
||||||
|
|
||||||
/* store our hostname */
|
/* store our hostname */
|
||||||
if (OPAL_SUCCESS != (ret = opal_dss.pack(&data, &orte_process_info.nodename, 1, OPAL_STRING))) {
|
if (OPAL_SUCCESS != (ret = opal_dss.pack(&data, &orte_process_info.nodename, 1, OPAL_STRING))) {
|
||||||
@ -266,15 +278,18 @@ static void pwr_sample(void)
|
|||||||
close(fd);
|
close(fd);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
packed = true;
|
||||||
close(fd);
|
close(fd);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* xfer the data for transmission */
|
/* xfer the data for transmission */
|
||||||
bptr = &data;
|
if (packed) {
|
||||||
if (OPAL_SUCCESS != (ret = opal_dss.pack(orte_sensor_base.samples, &bptr, 1, OPAL_BUFFER))) {
|
bptr = &data;
|
||||||
ORTE_ERROR_LOG(ret);
|
if (OPAL_SUCCESS != (ret = opal_dss.pack(orte_sensor_base.samples, &bptr, 1, OPAL_BUFFER))) {
|
||||||
OBJ_DESTRUCT(&data);
|
ORTE_ERROR_LOG(ret);
|
||||||
return;
|
OBJ_DESTRUCT(&data);
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
OBJ_DESTRUCT(&data);
|
OBJ_DESTRUCT(&data);
|
||||||
}
|
}
|
||||||
@ -319,7 +334,7 @@ static void pwr_log(opal_buffer_t *sample)
|
|||||||
(NULL == hostname) ? "NULL" : hostname, ncores);
|
(NULL == hostname) ? "NULL" : hostname, ncores);
|
||||||
|
|
||||||
/* xfr to storage */
|
/* xfr to storage */
|
||||||
kv = malloc((ncores+1) * sizeof(opal_value_t));
|
kv = malloc((ncores+2) * sizeof(opal_value_t));
|
||||||
|
|
||||||
/* load the sample time at the start */
|
/* load the sample time at the start */
|
||||||
OBJ_CONSTRUCT(&kv[0], opal_value_t);
|
OBJ_CONSTRUCT(&kv[0], opal_value_t);
|
||||||
@ -328,27 +343,37 @@ static void pwr_log(opal_buffer_t *sample)
|
|||||||
kv[0].data.string = strdup(sampletime);
|
kv[0].data.string = strdup(sampletime);
|
||||||
free(sampletime);
|
free(sampletime);
|
||||||
|
|
||||||
|
/* load the hostname */
|
||||||
|
OBJ_CONSTRUCT(&kv[1], opal_value_t);
|
||||||
|
kv[1].key = strdup("hostname");
|
||||||
|
kv[1].type = OPAL_STRING;
|
||||||
|
kv[1].data.string = strdup(hostname);
|
||||||
|
|
||||||
|
/* protect against segfault if we jump to cleanup */
|
||||||
for (i=0; i < ncores; i++) {
|
for (i=0; i < ncores; i++) {
|
||||||
OBJ_CONSTRUCT(&kv[i+1], opal_value_t);
|
OBJ_CONSTRUCT(&kv[i+2], opal_value_t);
|
||||||
asprintf(&kv[i+1].key, "core%d", i);
|
}
|
||||||
kv[i+1].type = OPAL_FLOAT;
|
|
||||||
|
for (i=0; i < ncores; i++) {
|
||||||
|
asprintf(&kv[i+2].key, "core%d", i);
|
||||||
|
kv[i+2].type = OPAL_FLOAT;
|
||||||
n=1;
|
n=1;
|
||||||
if (OPAL_SUCCESS != (rc = opal_dss.unpack(sample, &fval, &n, OPAL_FLOAT))) {
|
if (OPAL_SUCCESS != (rc = opal_dss.unpack(sample, &fval, &n, OPAL_FLOAT))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
kv[i+1].data.fval = fval;
|
kv[i+2].data.fval = fval;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* store it */
|
/* store it */
|
||||||
if (ORTE_SUCCESS != (rc = opal_db.add_log("pwr", kv, ncores+1))) {
|
if (ORTE_SUCCESS != (rc = opal_db.add_log("pwr", kv, ncores+2))) {
|
||||||
/* don't bark about it - just quietly disable the log */
|
/* don't bark about it - just quietly disable the log */
|
||||||
log_enabled = false;
|
log_enabled = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
cleanup:
|
cleanup:
|
||||||
/* cleanup the xfr storage */
|
/* cleanup the xfr storage */
|
||||||
for (i=0; i < ncores+1; i++) {
|
for (i=0; i < ncores+2; i++) {
|
||||||
OBJ_DESTRUCT(&kv[i]);
|
OBJ_DESTRUCT(&kv[i]);
|
||||||
}
|
}
|
||||||
if (NULL != hostname) {
|
if (NULL != hostname) {
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
# $HEADER$
|
# $HEADER$
|
||||||
#
|
#
|
||||||
|
|
||||||
dist_pkgdata_DATA = help-orte-sensor-sigar.txt
|
dist_ompidata_DATA = help-orte-sensor-sigar.txt
|
||||||
|
|
||||||
sources = \
|
sources = \
|
||||||
sensor_sigar.c \
|
sensor_sigar.c \
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user