2008-12-22 23:23:05 +03:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
|
|
|
|
* University Research and Technology
|
|
|
|
* Corporation. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
|
|
* of Tennessee Research Foundation. All rights
|
|
|
|
* reserved.
|
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
|
|
|
* Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved.
|
|
|
|
*
|
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "opal_config.h"
|
|
|
|
#include "opal/constants.h"
|
|
|
|
|
|
|
|
/* This component will only be compiled on Linux, where we are
|
|
|
|
guaranteed to have <unistd.h> and friends */
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <fcntl.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <errno.h>
|
|
|
|
#include <ctype.h>
|
2011-04-22 02:55:45 +04:00
|
|
|
#ifdef HAVE_TIME_H
|
|
|
|
#include <time.h>
|
|
|
|
#endif
|
|
|
|
#ifdef HAVE_SYS_TIME_H
|
|
|
|
#include <sys/time.h>
|
|
|
|
#endif
|
2008-12-22 23:23:05 +03:00
|
|
|
|
|
|
|
#include <sys/param.h> /* for HZ to convert jiffies to actual time */
|
|
|
|
|
|
|
|
#include "opal/mca/base/mca_base_param.h"
|
|
|
|
#include "opal/dss/dss_types.h"
|
|
|
|
#include "opal/util/printf.h"
|
|
|
|
|
|
|
|
#include "pstat_linux.h"
|
|
|
|
|
|
|
|
/*
|
2011-04-22 02:55:45 +04:00
|
|
|
* API functions
|
2008-12-22 23:23:05 +03:00
|
|
|
*/
|
|
|
|
static int linux_module_init(void);
|
2011-04-22 02:55:45 +04:00
|
|
|
static int query(pid_t pid,
|
|
|
|
opal_pstats_t *stats,
|
|
|
|
opal_node_stats_t *nstats);
|
2008-12-22 23:23:05 +03:00
|
|
|
static int linux_module_fini(void);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Linux pstat module
|
|
|
|
*/
|
|
|
|
const opal_pstat_base_module_t opal_pstat_linux_module = {
|
|
|
|
/* Initialization function */
|
|
|
|
linux_module_init,
|
|
|
|
query,
|
|
|
|
linux_module_fini
|
|
|
|
};
|
|
|
|
|
2011-04-22 02:55:45 +04:00
|
|
|
/* Local functions */
|
|
|
|
static char *local_getline(FILE *fp);
|
|
|
|
static char *local_stripper(char *data);
|
|
|
|
|
|
|
|
/* Local data */
|
|
|
|
static char input[256];
|
|
|
|
|
2008-12-22 23:23:05 +03:00
|
|
|
static int linux_module_init(void)
|
|
|
|
{
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int linux_module_fini(void)
|
|
|
|
{
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
static char *next_field(char *ptr, int barrier)
|
|
|
|
{
|
|
|
|
int i=0;
|
|
|
|
|
|
|
|
/* we are probably pointing to the last char
|
|
|
|
* of the current field, so look for whitespace
|
|
|
|
*/
|
|
|
|
while (!isspace(*ptr) && i < barrier) {
|
|
|
|
ptr++; /* step over the current char */
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* now look for the next field */
|
|
|
|
while (isspace(*ptr) && i < barrier) {
|
|
|
|
ptr++;
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ptr;
|
|
|
|
}
|
|
|
|
|
2011-05-08 18:45:16 +04:00
|
|
|
static float convert_value(char *value)
|
|
|
|
{
|
|
|
|
char *ptr;
|
|
|
|
float fval;
|
|
|
|
|
|
|
|
/* compute base value */
|
|
|
|
fval = (float)strtoul(value, &ptr, 10);
|
|
|
|
/* get the unit multiplier */
|
|
|
|
if (NULL != ptr && NULL != strstr(ptr, "kB")) {
|
|
|
|
fval /= 1024.0;
|
|
|
|
}
|
|
|
|
return fval;
|
|
|
|
}
|
2008-12-22 23:23:05 +03:00
|
|
|
|
2011-04-22 02:55:45 +04:00
|
|
|
static int query(pid_t pid,
|
|
|
|
opal_pstats_t *stats,
|
|
|
|
opal_node_stats_t *nstats)
|
2008-12-22 23:23:05 +03:00
|
|
|
{
|
|
|
|
char data[4096];
|
|
|
|
int fd;
|
|
|
|
size_t numchars;
|
|
|
|
char *ptr, *eptr;
|
|
|
|
int i;
|
2011-04-22 02:55:45 +04:00
|
|
|
int len, itime;
|
|
|
|
double dtime;
|
|
|
|
FILE *fp;
|
|
|
|
char *dptr, *value;
|
|
|
|
|
|
|
|
if (NULL != stats) {
|
|
|
|
/* record the time of this sample */
|
|
|
|
gettimeofday(&stats->sample_time, NULL);
|
|
|
|
/* check the nstats - don't do gettimeofday twice
|
|
|
|
* as it is expensive
|
|
|
|
*/
|
|
|
|
if (NULL != nstats) {
|
|
|
|
nstats->sample_time.tv_sec = stats->sample_time.tv_sec;
|
|
|
|
nstats->sample_time.tv_usec = stats->sample_time.tv_usec;
|
|
|
|
}
|
|
|
|
} else if (NULL != nstats) {
|
|
|
|
/* record the time of this sample */
|
|
|
|
gettimeofday(&nstats->sample_time, NULL);
|
2008-12-22 23:23:05 +03:00
|
|
|
}
|
2011-04-22 02:55:45 +04:00
|
|
|
|
|
|
|
if (NULL != stats) {
|
|
|
|
/* create the stat filename for this proc */
|
|
|
|
numchars = snprintf(data, sizeof(data), "/proc/%d/stat", pid);
|
|
|
|
if (numchars >= sizeof(data)) {
|
|
|
|
return OPAL_ERR_VALUE_OUT_OF_BOUNDS;
|
|
|
|
}
|
2008-12-22 23:23:05 +03:00
|
|
|
|
2011-04-22 02:55:45 +04:00
|
|
|
if (0 > (fd = open(data, O_RDONLY))) {
|
|
|
|
/* can't access this file - most likely, this means we
|
|
|
|
* aren't really on a supported system, or the proc no
|
|
|
|
* longer exists. Just return an error
|
|
|
|
*/
|
|
|
|
return OPAL_ERR_FILE_OPEN_FAILURE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* absorb all of the file's contents in one gulp - we'll process
|
|
|
|
* it once it is in memory for speed
|
2008-12-22 23:23:05 +03:00
|
|
|
*/
|
2011-04-22 02:55:45 +04:00
|
|
|
memset(data, 0, sizeof(data));
|
|
|
|
len = read(fd, data, sizeof(data)-1);
|
|
|
|
if (len < 0) {
|
|
|
|
/* This shouldn't happen! */
|
|
|
|
return OPAL_ERR_FILE_OPEN_FAILURE;
|
|
|
|
}
|
|
|
|
close(fd);
|
2008-12-22 23:23:05 +03:00
|
|
|
|
2011-04-22 02:55:45 +04:00
|
|
|
/* remove newline at end */
|
|
|
|
data[len] = '\0';
|
2008-12-22 23:23:05 +03:00
|
|
|
|
2011-04-22 02:55:45 +04:00
|
|
|
/* the stat file consists of a single line in a carefully formatted
|
|
|
|
* form. Parse it field by field as per proc(3) to get the ones we want
|
|
|
|
*/
|
2008-12-22 23:23:05 +03:00
|
|
|
|
2011-04-22 02:55:45 +04:00
|
|
|
/* we don't need to read the pid from the file - we already know it! */
|
|
|
|
stats->pid = pid;
|
2008-12-22 23:23:05 +03:00
|
|
|
|
2011-04-22 02:55:45 +04:00
|
|
|
/* the cmd is surrounded by parentheses - find the start */
|
|
|
|
if (NULL == (ptr = strchr(data, '('))) {
|
|
|
|
/* no cmd => something wrong with data, return error */
|
|
|
|
return OPAL_ERR_BAD_PARAM;
|
|
|
|
}
|
|
|
|
/* step over the paren */
|
|
|
|
ptr++;
|
2008-12-22 23:23:05 +03:00
|
|
|
|
2011-04-22 02:55:45 +04:00
|
|
|
/* find the ending paren */
|
|
|
|
if (NULL == (eptr = strchr(ptr, ')'))) {
|
|
|
|
/* no end to cmd => something wrong with data, return error */
|
|
|
|
return OPAL_ERR_BAD_PARAM;
|
|
|
|
}
|
2008-12-22 23:23:05 +03:00
|
|
|
|
2011-04-22 02:55:45 +04:00
|
|
|
/* save the cmd name, up to the limit of the array */
|
|
|
|
i = 0;
|
|
|
|
while (ptr < eptr && i < OPAL_PSTAT_MAX_STRING_LEN) {
|
|
|
|
stats->cmd[i++] = *ptr++;
|
|
|
|
}
|
2008-12-22 23:23:05 +03:00
|
|
|
|
2011-04-22 02:55:45 +04:00
|
|
|
/* move to the next field in the data */
|
|
|
|
ptr = next_field(eptr, len);
|
2008-12-22 23:23:05 +03:00
|
|
|
|
2011-04-22 02:55:45 +04:00
|
|
|
/* next is the process state - a single character */
|
|
|
|
stats->state[0] = *ptr;
|
|
|
|
/* move to next field */
|
|
|
|
ptr = next_field(ptr, len);
|
2008-12-22 23:23:05 +03:00
|
|
|
|
2011-04-22 02:55:45 +04:00
|
|
|
/* skip fields until we get to the times */
|
|
|
|
ptr = next_field(ptr, len); /* ppid */
|
|
|
|
ptr = next_field(ptr, len); /* pgrp */
|
|
|
|
ptr = next_field(ptr, len); /* session */
|
|
|
|
ptr = next_field(ptr, len); /* tty_nr */
|
|
|
|
ptr = next_field(ptr, len); /* tpgid */
|
|
|
|
ptr = next_field(ptr, len); /* flags */
|
|
|
|
ptr = next_field(ptr, len); /* minflt */
|
|
|
|
ptr = next_field(ptr, len); /* cminflt */
|
|
|
|
ptr = next_field(ptr, len); /* majflt */
|
|
|
|
ptr = next_field(ptr, len); /* cmajflt */
|
2008-12-22 23:23:05 +03:00
|
|
|
|
2011-04-22 02:55:45 +04:00
|
|
|
/* grab the process time usage fields */
|
|
|
|
itime = strtoul(ptr, &ptr, 10); /* utime */
|
|
|
|
itime += strtoul(ptr, &ptr, 10); /* add the stime */
|
|
|
|
/* convert to time in seconds */
|
|
|
|
dtime = (double)itime / (double)HZ;
|
|
|
|
stats->time.tv_sec = (int)dtime;
|
|
|
|
stats->time.tv_usec = (int)(1000000.0 * (dtime - stats->time.tv_sec));
|
|
|
|
/* move to next field */
|
|
|
|
ptr = next_field(ptr, len);
|
2008-12-22 23:23:05 +03:00
|
|
|
|
2011-04-22 02:55:45 +04:00
|
|
|
/* skip fields until we get to priority */
|
|
|
|
ptr = next_field(ptr, len); /* cutime */
|
|
|
|
ptr = next_field(ptr, len); /* cstime */
|
2008-12-22 23:23:05 +03:00
|
|
|
|
2011-04-22 02:55:45 +04:00
|
|
|
/* save the priority */
|
|
|
|
stats->priority = strtol(ptr, &ptr, 10);
|
|
|
|
/* move to next field */
|
|
|
|
ptr = next_field(ptr, len);
|
2008-12-22 23:23:05 +03:00
|
|
|
|
2011-04-22 02:55:45 +04:00
|
|
|
/* skip nice */
|
|
|
|
ptr = next_field(ptr, len);
|
|
|
|
|
|
|
|
/* get number of threads */
|
|
|
|
stats->num_threads = strtoul(ptr, &ptr, 10);
|
|
|
|
/* move to next field */
|
|
|
|
ptr = next_field(ptr, len);
|
|
|
|
|
|
|
|
/* skip fields until we get to processor id */
|
|
|
|
ptr = next_field(ptr, len); /* itrealvalue */
|
|
|
|
ptr = next_field(ptr, len); /* starttime */
|
|
|
|
ptr = next_field(ptr, len); /* vsize */
|
|
|
|
ptr = next_field(ptr, len); /* rss */
|
|
|
|
ptr = next_field(ptr, len); /* rss limit */
|
|
|
|
ptr = next_field(ptr, len); /* startcode */
|
|
|
|
ptr = next_field(ptr, len); /* endcode */
|
|
|
|
ptr = next_field(ptr, len); /* startstack */
|
|
|
|
ptr = next_field(ptr, len); /* kstkesp */
|
|
|
|
ptr = next_field(ptr, len); /* kstkeip */
|
|
|
|
ptr = next_field(ptr, len); /* signal */
|
|
|
|
ptr = next_field(ptr, len); /* blocked */
|
|
|
|
ptr = next_field(ptr, len); /* sigignore */
|
|
|
|
ptr = next_field(ptr, len); /* sigcatch */
|
|
|
|
ptr = next_field(ptr, len); /* wchan */
|
|
|
|
ptr = next_field(ptr, len); /* nswap */
|
|
|
|
ptr = next_field(ptr, len); /* cnswap */
|
|
|
|
ptr = next_field(ptr, len); /* exit_signal */
|
|
|
|
|
|
|
|
/* finally - get the processor */
|
|
|
|
stats->processor = strtol(ptr, NULL, 10);
|
|
|
|
|
|
|
|
/* that's all we care about from this data - ignore the rest */
|
2008-12-22 23:23:05 +03:00
|
|
|
|
2011-04-22 02:55:45 +04:00
|
|
|
/* now create the status filename for this proc */
|
|
|
|
memset(data, 0, sizeof(data));
|
|
|
|
numchars = snprintf(data, sizeof(data), "/proc/%d/status", pid);
|
|
|
|
if (numchars >= sizeof(data)) {
|
|
|
|
return OPAL_ERR_VALUE_OUT_OF_BOUNDS;
|
|
|
|
}
|
2008-12-22 23:23:05 +03:00
|
|
|
|
2011-04-22 02:55:45 +04:00
|
|
|
if (0 > (fd = open(data, O_RDONLY))) {
|
|
|
|
/* can't access this file - most likely, this means we
|
|
|
|
* aren't really on a supported system, or the proc no
|
|
|
|
* longer exists. Just return an error
|
|
|
|
*/
|
|
|
|
return OPAL_ERR_FILE_OPEN_FAILURE;
|
|
|
|
}
|
2008-12-22 23:23:05 +03:00
|
|
|
|
2011-04-22 02:55:45 +04:00
|
|
|
/* absorb all of the file's contents in one gulp - we'll process
|
|
|
|
* it once it is in memory for speed
|
2008-12-22 23:23:05 +03:00
|
|
|
*/
|
2011-04-22 02:55:45 +04:00
|
|
|
memset(data, 0, sizeof(data));
|
|
|
|
len = read(fd, data, sizeof(data)-1);
|
|
|
|
close(fd);
|
2008-12-22 23:23:05 +03:00
|
|
|
|
2011-04-22 02:55:45 +04:00
|
|
|
/* remove newline at end */
|
|
|
|
data[len] = '\0';
|
2008-12-22 23:23:05 +03:00
|
|
|
|
2011-04-22 02:55:45 +04:00
|
|
|
/* parse it according to proc(3) */
|
|
|
|
/* look for VmPeak */
|
|
|
|
if (NULL != (ptr = strstr(data, "VmPeak:"))) {
|
|
|
|
/* found it - step past colon */
|
|
|
|
ptr += 8;
|
2011-05-08 18:45:16 +04:00
|
|
|
stats->peak_vsize = convert_value(ptr);
|
2011-04-22 02:55:45 +04:00
|
|
|
}
|
|
|
|
/* look for VmSize */
|
2011-05-16 17:32:42 +04:00
|
|
|
if (NULL != (ptr = strstr(data, "VmSize:"))) {
|
2011-04-22 02:55:45 +04:00
|
|
|
/* found it - step past colon */
|
|
|
|
ptr += 8;
|
2011-05-08 18:45:16 +04:00
|
|
|
stats->vsize = convert_value(ptr); /* convert to MBytes*/
|
2011-04-22 02:55:45 +04:00
|
|
|
}
|
2008-12-22 23:23:05 +03:00
|
|
|
|
2011-04-22 02:55:45 +04:00
|
|
|
/* look for RSS */
|
2011-05-16 17:32:42 +04:00
|
|
|
if (NULL != (ptr = strstr(data, "VmRSS:"))) {
|
2011-04-22 02:55:45 +04:00
|
|
|
/* found it - step past colon */
|
|
|
|
ptr += 8;
|
2011-05-08 18:45:16 +04:00
|
|
|
stats->rss = convert_value(ptr); /* convert to MBytes */
|
2011-04-22 02:55:45 +04:00
|
|
|
}
|
2008-12-22 23:23:05 +03:00
|
|
|
}
|
2011-04-22 02:55:45 +04:00
|
|
|
|
|
|
|
if (NULL != nstats) {
|
|
|
|
/* get the loadavg data */
|
|
|
|
if (0 > (fd = open("/proc/loadavg", O_RDONLY))) {
|
|
|
|
/* not an error if we don't find this one as it
|
|
|
|
* isn't critical
|
|
|
|
*/
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
2008-12-22 23:23:05 +03:00
|
|
|
|
2011-04-22 02:55:45 +04:00
|
|
|
/* absorb all of the file's contents in one gulp - we'll process
|
|
|
|
* it once it is in memory for speed
|
|
|
|
*/
|
|
|
|
memset(data, 0, sizeof(data));
|
|
|
|
len = read(fd, data, sizeof(data)-1);
|
|
|
|
close(fd);
|
|
|
|
|
|
|
|
/* remove newline at end */
|
|
|
|
data[len] = '\0';
|
2008-12-22 23:23:05 +03:00
|
|
|
|
2011-04-22 02:55:45 +04:00
|
|
|
/* we only care about the first three numbers */
|
|
|
|
nstats->la = strtof(data, &ptr);
|
|
|
|
nstats->la5 = strtof(ptr, &eptr);
|
|
|
|
nstats->la15 = strtof(eptr, NULL);
|
|
|
|
|
|
|
|
/* see if we can open the meminfo file */
|
|
|
|
if (NULL == (fp = fopen("/proc/meminfo", "r"))) {
|
|
|
|
/* ignore this */
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* read the file one line at a time */
|
|
|
|
while (NULL != (dptr = local_getline(fp))) {
|
|
|
|
if (NULL == (value = local_stripper(dptr))) {
|
|
|
|
/* cannot process */
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (0 == strcmp(dptr, "MemTotal")) {
|
2011-05-08 18:45:16 +04:00
|
|
|
nstats->total_mem = convert_value(value);
|
2011-04-22 02:55:45 +04:00
|
|
|
} else if (0 == strcmp(dptr, "MemFree")) {
|
2011-05-08 18:45:16 +04:00
|
|
|
nstats->free_mem = convert_value(value);
|
|
|
|
} else if (0 == strcmp(dptr, "Buffers")) {
|
|
|
|
nstats->buffers = convert_value(value);
|
|
|
|
} else if (0 == strcmp(dptr, "Cached")) {
|
|
|
|
nstats->cached = convert_value(value);
|
|
|
|
} else if (0 == strcmp(dptr, "SwapCached")) {
|
|
|
|
nstats->swap_cached = convert_value(value);
|
|
|
|
} else if (0 == strcmp(dptr, "SwapTotal")) {
|
|
|
|
nstats->swap_total = convert_value(value);
|
|
|
|
} else if (0 == strcmp(dptr, "SwapFree")) {
|
|
|
|
nstats->swap_free = convert_value(value);
|
|
|
|
} else if (0 == strcmp(dptr, "Mapped")) {
|
|
|
|
nstats->mapped = convert_value(value);
|
2011-04-22 02:55:45 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
fclose(fp);
|
2008-12-22 23:23:05 +03:00
|
|
|
}
|
|
|
|
|
2011-04-22 02:55:45 +04:00
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
static char *local_getline(FILE *fp)
|
|
|
|
{
|
|
|
|
char *ret;
|
|
|
|
|
|
|
|
ret = fgets(input, 256, fp);
|
|
|
|
if (NULL != ret) {
|
|
|
|
input[strlen(input)-1] = '\0'; /* remove newline */
|
|
|
|
return input;
|
2008-12-22 23:23:05 +03:00
|
|
|
}
|
|
|
|
|
2011-04-22 02:55:45 +04:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static char *local_stripper(char *data)
|
|
|
|
{
|
|
|
|
char *ptr, *end, *enddata;
|
|
|
|
int len = strlen(data);
|
|
|
|
|
|
|
|
/* find the colon */
|
|
|
|
if (NULL == (end = strchr(data, ':'))) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
ptr = end;
|
|
|
|
--end;
|
|
|
|
/* working backwards, look for first non-whitespace */
|
|
|
|
while (end != data && !isalnum(*end)) {
|
|
|
|
--end;
|
|
|
|
}
|
|
|
|
++end;
|
|
|
|
*end = '\0';
|
|
|
|
/* now look for value */
|
|
|
|
ptr++;
|
|
|
|
enddata = &(data[len-1]);
|
|
|
|
while (ptr != enddata && !isalnum(*ptr)) {
|
|
|
|
++ptr;
|
|
|
|
}
|
|
|
|
return ptr;
|
2008-12-22 23:23:05 +03:00
|
|
|
}
|