1
1

Cleanup the resource usage sensor, letting the db handle any printing requests.

This commit was SVN r27990.
Этот коммит содержится в:
Ralph Castain 2013-01-31 15:20:56 +00:00
родитель 9625757a71
Коммит c87fa68f9b
3 изменённых файлов: 81 добавлений и 117 удалений

Просмотреть файл

@ -61,49 +61,15 @@ orte_sensor_base_module_t orte_sensor_resusage_module = {
res_log
};
#define ORTE_RESUSAGE_LENGTH 16
static int line_count = 0;
static bool log_enabled = true;
static FILE *nstat_fp, *pstat_fp;
static int init(void)
{
if (NULL != mca_sensor_resusage_component.nstat_log) {
if (0 == strcmp(mca_sensor_resusage_component.nstat_log, "-")) {
nstat_fp = stdout;
} else if (0 == strcmp(mca_sensor_resusage_component.nstat_log, "+")) {
nstat_fp = stderr;
} else {
nstat_fp = fopen(mca_sensor_resusage_component.nstat_log, "w");
}
}
if (NULL != mca_sensor_resusage_component.pstat_log) {
if (0 == strcmp(mca_sensor_resusage_component.pstat_log, "-")) {
pstat_fp = stdout;
} else if (0 == strcmp(mca_sensor_resusage_component.pstat_log, "+")) {
pstat_fp = stderr;
} else {
pstat_fp = fopen(mca_sensor_resusage_component.pstat_log, "w");
}
}
return ORTE_SUCCESS;
}
static void finalize(void)
{
if (NULL != mca_sensor_resusage_component.nstat_log &&
0 != strcmp(mca_sensor_resusage_component.nstat_log, "-") &&
0 != strcmp(mca_sensor_resusage_component.nstat_log, "+")) {
fclose(nstat_fp);
}
if (NULL != mca_sensor_resusage_component.pstat_log &&
0 != strcmp(mca_sensor_resusage_component.pstat_log, "-") &&
0 != strcmp(mca_sensor_resusage_component.pstat_log, "+")) {
fclose(pstat_fp);
}
}
static void sample(void)
@ -315,6 +281,12 @@ static void res_log(opal_buffer_t *sample)
int rc, n, i;
opal_value_t kv[14];
char *node;
opal_diskstats_t *dk;
opal_netstats_t *ns;
if (!log_enabled) {
return;
}
/* unpack the node name */
n=1;
@ -330,61 +302,66 @@ static void res_log(opal_buffer_t *sample)
return;
}
if (NULL != mca_sensor_resusage_component.nstat_log) {
if (0 == line_count) {
/* print the column headers */
fprintf(nstat_fp, "Node\tSampleTime\tTotMem\tLdAvg\tLdAvg5\tLdAvg15\tFreeMem\tBuffers\tCached\tSwapCached\tSwapTotal\tSwapFree\tMapped\n");
}
fprintf(nstat_fp, "%s\t%d.%06d\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\n",
node, (int)nst->sample_time.tv_sec, (int)nst->sample_time.tv_usec,
nst->total_mem, nst->la, nst->la5, nst->la15, nst->free_mem, nst->buffers,
nst->cached, nst->swap_cached, nst->swap_total, nst->swap_free, nst->mapped);
}
if (log_enabled) {
if (mca_sensor_resusage_component.log_node_stats) {
/* convert this into an array of opal_value_t's - no clean way
* to do this, so have to just manually map each field
*/
for (i=0; i < 12; i++) {
for (i=0; i < 13; i++) {
OBJ_CONSTRUCT(&kv[i], opal_value_t);
}
kv[0].key = strdup("la");
kv[0].type = OPAL_FLOAT;
kv[0].data.fval = nst->la;
kv[1].key = strdup("la5");
kv[1].type = OPAL_FLOAT;
kv[1].data.fval = nst->la5;
kv[2].key = strdup("la15");
kv[2].type = OPAL_FLOAT;
kv[2].data.fval = nst->la15;
kv[3].key = strdup("total_mem");
kv[3].type = OPAL_FLOAT;
kv[3].data.fval = nst->total_mem;
kv[4].key = strdup("free_mem");
kv[4].type = OPAL_FLOAT;
kv[4].data.fval = nst->free_mem;
kv[5].key = strdup("buffers");
kv[5].type = OPAL_FLOAT;
kv[5].data.fval = nst->buffers;
kv[6].key = strdup("cached");
kv[6].type = OPAL_FLOAT;
kv[6].data.fval = nst->cached;
kv[7].key = strdup("swap_cached");
kv[7].type = OPAL_FLOAT;
kv[7].data.fval = nst->swap_cached;
kv[8].key = strdup("swap_total");
kv[8].type = OPAL_FLOAT;
kv[8].data.fval = nst->swap_total;
kv[9].key = strdup("swap_free");
kv[9].type = OPAL_FLOAT;
kv[9].data.fval = nst->swap_free;
kv[10].key = strdup("mapped");
kv[10].type = OPAL_FLOAT;
kv[10].data.fval = nst->mapped;
kv[11].key = strdup("sample_time");
kv[11].type = OPAL_TIMEVAL;
kv[11].data.tv.tv_sec = nst->sample_time.tv_sec;
kv[11].data.tv.tv_usec = nst->sample_time.tv_usec;
i=0;
kv[i].key = strdup("ctime");
kv[i].type = OPAL_TIMEVAL;
kv[i].data.tv.tv_sec = nst->sample_time.tv_sec;
kv[i++].data.tv.tv_usec = nst->sample_time.tv_usec;
kv[i].key = "hostname";
kv[i].type = OPAL_STRING;
kv[i++].data.string = strdup(node);
kv[i].key = strdup("total_mem");
kv[i].type = OPAL_FLOAT;
kv[i++].data.fval = nst->total_mem;
kv[i].key = strdup("free_mem");
kv[i].type = OPAL_FLOAT;
kv[i++].data.fval = nst->free_mem;
kv[i].key = strdup("buffers");
kv[i].type = OPAL_FLOAT;
kv[i++].data.fval = nst->buffers;
kv[i].key = strdup("cached");
kv[i].type = OPAL_FLOAT;
kv[i++].data.fval = nst->cached;
kv[i].key = strdup("swap_total");
kv[i].type = OPAL_FLOAT;
kv[i++].data.fval = nst->swap_total;
kv[i].key = strdup("swap_free");
kv[i].type = OPAL_FLOAT;
kv[i++].data.fval = nst->swap_free;
kv[i].key = strdup("mapped");
kv[i].type = OPAL_FLOAT;
kv[i++].data.fval = nst->mapped;
kv[i].key = strdup("swap_cached");
kv[i].type = OPAL_FLOAT;
kv[i++].data.fval = nst->swap_cached;
kv[i].key = strdup("la");
kv[i].type = OPAL_FLOAT;
kv[i++].data.fval = nst->la;
kv[i].key = strdup("la5");
kv[i].type = OPAL_FLOAT;
kv[i++].data.fval = nst->la5;
kv[i].key = strdup("la15");
kv[i].type = OPAL_FLOAT;
kv[i++].data.fval = nst->la15;
/* store it */
if (ORTE_SUCCESS != (rc = orte_db.add_log("nodestats", kv, 12))) {
@ -398,22 +375,10 @@ static void res_log(opal_buffer_t *sample)
OBJ_RELEASE(nst);
/* unpack all process stats */
n=1;
while (OPAL_SUCCESS == (rc = opal_dss.unpack(sample, &st, &n, OPAL_PSTAT))) {
if (NULL != mca_sensor_resusage_component.pstat_log) {
if (0 == line_count) {
/* print the column headers */
fprintf(pstat_fp, "Node\tSampleTime\tRank\tPid\tCmd\tState\tTime\tCpu\tPri\tNumThreads\tProcessor\tVSIZE\tRSS\tPeakVSIZE\n");
}
fprintf(pstat_fp, "%s\t%d.%06d\t%lu\t%s\t%c\t%d.%06d\t%f\t%d\t%d\t%d\t%f\t%f\t%f\n",
node, (int)st->sample_time.tv_sec, (int)st->sample_time.tv_usec,
(unsigned long)st->pid, st->cmd, st->state[0],
(int)st->time.tv_sec, (int)st->time.tv_usec, st->percent_cpu,
st->priority, (int)st->num_threads, (int)st->processor,
st->vsize, st->rss, st->peak_vsize);
}
if (log_enabled) {
if (mca_sensor_resusage_component.log_process_stats) {
/* unpack all process stats */
n=1;
while (OPAL_SUCCESS == (rc = opal_dss.unpack(sample, &st, &n, OPAL_PSTAT))) {
for (i=0; i < 14; i++) {
OBJ_CONSTRUCT(&kv[i], opal_value_t);
}
@ -471,15 +436,11 @@ static void res_log(opal_buffer_t *sample)
for (i=0; i < 14; i++) {
OBJ_DESTRUCT(&kv[i]);
}
OBJ_RELEASE(st);
n=1;
}
if (OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) {
ORTE_ERROR_LOG(rc);
}
OBJ_RELEASE(st);
n=1;
}
if (OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) {
ORTE_ERROR_LOG(rc);
}
line_count++;
if (30 == line_count) {
line_count = 0;
}
}

Просмотреть файл

@ -26,8 +26,8 @@ struct orte_sensor_resusage_component_t {
int sample_rate;
float node_memory_limit;
float proc_memory_limit;
char *nstat_log;
char *pstat_log;
bool log_node_stats;
bool log_process_stats;
};
typedef struct orte_sensor_resusage_component_t orte_sensor_resusage_component_t;

Просмотреть файл

@ -77,13 +77,16 @@ static int orte_sensor_resusage_open(void)
false, false, 0, &tmp);
mca_sensor_resusage_component.proc_memory_limit = (float)tmp;
mca_base_param_reg_string(c, "node_stat_log",
"Print the node stats to the indicated file (- => stdout, + => stderr)",
false, false, NULL, &mca_sensor_resusage_component.nstat_log);
mca_base_param_reg_int(c, "log_node_stats",
"Log the node stats",
false, false, (int)false, &tmp);
mca_sensor_resusage_component.log_node_stats = OPAL_INT_TO_BOOL(tmp);
mca_base_param_reg_int(c, "log_process_stats",
"Log the process stats",
false, false, (int)false, &tmp);
mca_sensor_resusage_component.log_process_stats = OPAL_INT_TO_BOOL(tmp);
mca_base_param_reg_string(c, "process_stat_log",
"Print the process stats to the indicated file (- => stdout, + => stderr)",
false, false, NULL, &mca_sensor_resusage_component.pstat_log);
return ORTE_SUCCESS;
}