1
1
openmpi/opal/mca/mpool/hugepage/mpool_hugepage_component.c
Gilles Gouaillardet 78fffa25f2 mpool/hugepage: plug a memory leak
Refs. open-mpi/ompi#6242

Signed-off-by: Gilles Gouaillardet <gilles@rist.or.jp>
2019-01-07 11:50:40 +09:00

403 строки
14 KiB
C

/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2013 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Voltaire. All rights reserved.
* Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2016 Intel, Inc. All rights reserved.
* Copyright (c) 2016-2019 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#define OPAL_DISABLE_ENABLE_MEM_DEBUG 1
#include "opal_config.h"
#include "opal/mca/base/base.h"
#include "opal/runtime/opal_params.h"
#include "opal/mca/base/mca_base_pvar.h"
#include "opal/mca/mpool/base/base.h"
#include "opal/mca/allocator/base/base.h"
#include "opal/util/argv.h"
#include "mpool_hugepage.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif
#ifdef HAVE_SYS_VFS_H
#include <sys/vfs.h>
#endif
#ifdef HAVE_SYS_MOUNT_H
#include <sys/mount.h>
#endif
#ifdef HAVE_SYS_PARAM_H
#include <sys/param.h>
#endif
#ifdef HAVE_SYS_MMAN_H
#include <sys/mman.h>
#endif
#ifdef HAVE_MNTENT_H
#include <mntent.h>
#endif
#include <fcntl.h>
/*
* Note that some OS's (e.g., NetBSD and Solaris) have statfs(), but
* no struct statfs (!). So check to make sure we have struct statfs
* before allowing the use of statfs().
*/
#if defined(HAVE_STATFS) && (defined(HAVE_STRUCT_STATFS_F_FSTYPENAME) || \
defined(HAVE_STRUCT_STATFS_F_TYPE))
#define USE_STATFS 1
#endif
/*
* Local functions
*/
static int mca_mpool_hugepage_open (void);
static int mca_mpool_hugepage_close (void);
static int mca_mpool_hugepage_register (void);
static int mca_mpool_hugepage_query (const char *hints, int *priority,
mca_mpool_base_module_t **module);
static void mca_mpool_hugepage_find_hugepages (void);
static int mca_mpool_hugepage_priority;
static unsigned long mca_mpool_hugepage_page_size;
mca_mpool_hugepage_component_t mca_mpool_hugepage_component = {
{
/* First, the mca_base_component_t struct containing meta
information about the component itself */
.mpool_version ={
MCA_MPOOL_BASE_VERSION_3_0_0,
.mca_component_name = "hugepage",
MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION,
OPAL_RELEASE_VERSION),
.mca_open_component = mca_mpool_hugepage_open,
.mca_close_component = mca_mpool_hugepage_close,
.mca_register_component_params = mca_mpool_hugepage_register,
},
.mpool_data = {
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
.mpool_query = mca_mpool_hugepage_query,
},
};
/**
* component open/close/init function
*/
static int mca_mpool_hugepage_register(void)
{
mca_mpool_hugepage_priority = 50;
(void) mca_base_component_var_register (&mca_mpool_hugepage_component.super.mpool_version,
"priority", "Default priority of the hugepage mpool component "
"(default: 50)", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
&mca_mpool_hugepage_priority);
mca_mpool_hugepage_page_size = 1 << 21;
(void) mca_base_component_var_register (&mca_mpool_hugepage_component.super.mpool_version,
"page_size", "Default huge page size of the hugepage mpool component "
"(default: 2M)", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
&mca_mpool_hugepage_page_size);
mca_mpool_hugepage_component.bytes_allocated = 0;
(void) mca_base_component_pvar_register (&mca_mpool_hugepage_component.super.mpool_version,
"bytes_allocated", "Number of bytes currently allocated in the mpool "
"hugepage component", OPAL_INFO_LVL_3, MCA_BASE_PVAR_CLASS_SIZE,
MCA_BASE_VAR_TYPE_UNSIGNED_LONG, NULL, MCA_BASE_VAR_BIND_NO_OBJECT,
MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS,
NULL, NULL, NULL, (void *) &mca_mpool_hugepage_component.bytes_allocated);
return OPAL_SUCCESS;
}
static int mca_mpool_hugepage_open (void)
{
mca_mpool_hugepage_module_t *hugepage_module;
mca_mpool_hugepage_hugepage_t *hp;
int module_index, rc;
OBJ_CONSTRUCT(&mca_mpool_hugepage_component.huge_pages, opal_list_t);
mca_mpool_hugepage_find_hugepages ();
if (0 == opal_list_get_size (&mca_mpool_hugepage_component.huge_pages)) {
return OPAL_SUCCESS;
}
mca_mpool_hugepage_component.modules = (mca_mpool_hugepage_module_t *)
calloc (opal_list_get_size (&mca_mpool_hugepage_component.huge_pages),
sizeof (mca_mpool_hugepage_module_t));
if (NULL == mca_mpool_hugepage_component.modules) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
module_index = 0;
OPAL_LIST_FOREACH(hp, &mca_mpool_hugepage_component.huge_pages, mca_mpool_hugepage_hugepage_t) {
hugepage_module = mca_mpool_hugepage_component.modules + module_index;
rc = mca_mpool_hugepage_module_init (hugepage_module, hp);
if (OPAL_SUCCESS != rc) {
continue;
}
module_index++;
}
mca_mpool_hugepage_component.module_count = module_index;
return OPAL_SUCCESS;
}
static int mca_mpool_hugepage_close (void)
{
OPAL_LIST_DESTRUCT(&mca_mpool_hugepage_component.huge_pages);
for (int i = 0 ; i < mca_mpool_hugepage_component.module_count ; ++i) {
mca_mpool_hugepage_module_t *module = mca_mpool_hugepage_component.modules + i;
module->super.mpool_finalize (&module->super);
}
free (mca_mpool_hugepage_component.modules);
mca_mpool_hugepage_component.modules = NULL;
return OPAL_SUCCESS;
}
#ifdef HAVE_MNTENT_H
static int page_compare (opal_list_item_t **a, opal_list_item_t **b) {
mca_mpool_hugepage_hugepage_t *pagea = (mca_mpool_hugepage_hugepage_t *) *a;
mca_mpool_hugepage_hugepage_t *pageb = (mca_mpool_hugepage_hugepage_t *) *b;
if (pagea->page_size > pageb->page_size) {
return 1;
} else if (pagea->page_size < pageb->page_size) {
return -1;
}
return 0;
}
#endif
static void mca_mpool_hugepage_find_hugepages (void) {
#ifdef HAVE_MNTENT_H
mca_mpool_hugepage_hugepage_t *hp;
FILE *fh;
struct mntent *mntent;
char *opts, *tok, *ctx;
fh = setmntent ("/proc/mounts", "r");
if (NULL == fh) {
return;
}
while (NULL != (mntent = getmntent(fh))) {
unsigned long page_size = 0;
if (0 != strcmp(mntent->mnt_type, "hugetlbfs")) {
continue;
}
opts = strdup(mntent->mnt_opts);
if (NULL == opts) {
break;
}
tok = strtok_r (opts, ",", &ctx);
do {
if (0 == strncmp (tok, "pagesize", 8)) {
break;
}
tok = strtok_r (NULL, ",", &ctx);
} while (tok);
if (!tok) {
#if defined(USE_STATFS)
struct statfs info;
statfs (mntent->mnt_dir, &info);
#elif defined(HAVE_STATVFS)
struct statvfs info;
statvfs (mntent->mnt_dir, &info);
#endif
page_size = info.f_bsize;
} else {
(void) sscanf (tok, "pagesize=%lu", &page_size);
}
free(opts);
if (0 == page_size) {
/* could not get page size */
continue;
}
hp = OBJ_NEW(mca_mpool_hugepage_hugepage_t);
if (NULL == hp) {
break;
}
hp->path = strdup (mntent->mnt_dir);
hp->page_size = page_size;
if(0 == access (hp->path, R_OK | W_OK)){
opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output,
"found huge page with size = %lu, path = %s, mmap flags = 0x%x, adding to list",
hp->page_size, hp->path, hp->mmap_flags);
opal_list_append (&mca_mpool_hugepage_component.huge_pages, &hp->super);
} else {
opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output,
"found huge page with size = %lu, path = %s, mmap flags = 0x%x, with invalid "
"permissions, skipping", hp->page_size, hp->path, hp->mmap_flags);
OBJ_RELEASE(hp);
}
}
opal_list_sort (&mca_mpool_hugepage_component.huge_pages, page_compare);
endmntent (fh);
#endif
}
static int mca_mpool_hugepage_query (const char *hints, int *priority_out,
mca_mpool_base_module_t **module)
{
unsigned long page_size = 0;
char **hints_array;
int my_priority = mca_mpool_hugepage_priority;
char *tmp;
bool found = false;
if (0 == mca_mpool_hugepage_component.module_count) {
return OPAL_ERR_NOT_AVAILABLE;
}
if (hints) {
hints_array = opal_argv_split (hints, ',');
if (NULL == hints_array) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
for (int i = 0 ; hints_array[i] ; ++i) {
char *key = hints_array[i];
char *value = NULL;
if (NULL != (tmp = strchr (key, '='))) {
value = tmp + 1;
*tmp = '\0';
}
if (0 == strcasecmp ("mpool", key)) {
if (value && 0 == strcasecmp ("hugepage", value)) {
/* this mpool was requested by name */
my_priority = 100;
opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output,
"hugepage mpool matches hint: %s=%s", key, value);
} else {
/* different mpool requested */
my_priority = 0;
opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output,
"hugepage mpool does not match hint: %s=%s", key, value);
opal_argv_free (hints_array);
return OPAL_ERR_NOT_FOUND;
}
}
if (0 == strcasecmp ("page_size", key) && value) {
page_size = strtoul (value, &tmp, 0);
if (*tmp) {
switch (*tmp) {
case 'g':
case 'G':
page_size *= 1024;
/* fall through */
case 'm':
case 'M':
page_size *= 1024;
/* fall through */
case 'k':
case 'K':
page_size *= 1024;
break;
default:
page_size = -1;
}
}
opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output,
"hugepage mpool requested page size: %lu", page_size);
}
}
opal_argv_free (hints_array);
}
if (0 == page_size) {
/* use default huge page size */
page_size = mca_mpool_hugepage_page_size;
if (my_priority < 100) {
/* take a priority hit if this mpool was not asked for by name */
my_priority = 0;
}
opal_output_verbose (MCA_BASE_VERBOSE_WARN, opal_mpool_base_framework.framework_output,
"hugepage mpool did not match any hints: %s", hints);
}
for (int i = 0 ; i < mca_mpool_hugepage_component.module_count ; ++i) {
mca_mpool_hugepage_module_t *hugepage_module = mca_mpool_hugepage_component.modules + i;
if (hugepage_module->huge_page->page_size != page_size) {
continue;
}
my_priority = (my_priority < 80) ? my_priority + 20 : 100;
if (module) {
*module = &hugepage_module->super;
}
opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output,
"matches page size hint. page size: %lu, path: %s, mmap flags: "
"0x%x", page_size, hugepage_module->huge_page->path,
hugepage_module->huge_page->mmap_flags);
found = true;
break;
}
if (!found) {
opal_output_verbose (MCA_BASE_VERBOSE_WARN, opal_mpool_base_framework.framework_output,
"could not find page matching page request: %lu", page_size);
return OPAL_ERR_NOT_FOUND;
}
if (priority_out) {
*priority_out = my_priority;
}
return OPAL_SUCCESS;
}