1
1

Merge pull request #1118 from hjelmn/mpool_rewrite

mpool/rcache rewrite
Этот коммит содержится в:
Nathan Hjelm 2016-03-15 10:46:24 -06:00
родитель deae9e52bf eac0b110b8
Коммит ec9712050b
137 изменённых файлов: 5242 добавлений и 5106 удалений

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -9,6 +10,8 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -145,7 +148,7 @@ BEGIN_C_DECLS
ompi_crcp_base_btl_state_t*
ompi_crcp_base_none_btl_prepare_src( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
mca_mpool_base_registration_t* registration,
mca_rcache_base_registration_t* registration,
struct opal_convertor_t* convertor,
size_t reserve,
size_t* size,
@ -154,7 +157,7 @@ BEGIN_C_DECLS
ompi_crcp_base_btl_state_t*
ompi_crcp_base_none_btl_prepare_dst( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
mca_mpool_base_registration_t* registration,
mca_rcache_base_registration_t* registration,
struct opal_convertor_t* convertor,
size_t reserve,
size_t* size,

Просмотреть файл

@ -336,7 +336,7 @@ ompi_crcp_base_none_btl_free( struct mca_btl_base_module_t* btl,
ompi_crcp_base_btl_state_t*
ompi_crcp_base_none_btl_prepare_src( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
mca_mpool_base_registration_t* registration,
mca_rcache_base_registration_t* registration,
struct opal_convertor_t* convertor,
size_t reserve,
size_t* size,
@ -349,7 +349,7 @@ ompi_crcp_base_none_btl_prepare_src( struct mca_btl_base_module_t* btl,
ompi_crcp_base_btl_state_t*
ompi_crcp_base_none_btl_prepare_dst( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
mca_mpool_base_registration_t* registration,
mca_rcache_base_registration_t* registration,
struct opal_convertor_t* convertor,
size_t reserve,
size_t* size,

Просмотреть файл

@ -235,7 +235,7 @@ typedef ompi_crcp_base_btl_state_t* (*mca_crcp_base_btl_module_free_fn_t)
typedef ompi_crcp_base_btl_state_t* (*mca_crcp_base_btl_module_prepare_fn_t)
( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
mca_mpool_base_registration_t* registration,
mca_rcache_base_registration_t* registration,
struct opal_convertor_t* convertor,
size_t reserve,
size_t* size,

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -12,6 +13,8 @@
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -55,10 +58,7 @@ extern char *ompi_pml_base_bsend_allocator_name;
/*
* Routine to return pages to sub-allocator as needed
*/
static void* mca_pml_bsend_alloc_segment(
struct mca_mpool_base_module_t* module,
size_t* size_inout,
mca_mpool_base_registration_t** registration)
static void* mca_pml_bsend_alloc_segment(void *ctx, size_t *size_inout)
{
void *addr;
size_t size = *size_inout;
@ -70,7 +70,6 @@ static void* mca_pml_bsend_alloc_segment(
addr = mca_pml_bsend_addr;
mca_pml_bsend_addr += size;
*size_inout = size;
if (NULL != registration) *registration = NULL;
return addr;
}
@ -232,7 +231,7 @@ int mca_pml_base_bsend_request_start(ompi_request_t* request)
/* allocate a buffer to hold packed message */
sendreq->req_addr = mca_pml_bsend_allocator->alc_alloc(
mca_pml_bsend_allocator, sendreq->req_bytes_packed, 0, NULL);
mca_pml_bsend_allocator, sendreq->req_bytes_packed, 0);
if(NULL == sendreq->req_addr) {
/* release resources when request is freed */
sendreq->req_base.req_pml_complete = true;
@ -287,7 +286,7 @@ int mca_pml_base_bsend_request_alloc(ompi_request_t* request)
/* allocate a buffer to hold packed message */
sendreq->req_addr = mca_pml_bsend_allocator->alc_alloc(
mca_pml_bsend_allocator, sendreq->req_bytes_packed, 0, NULL);
mca_pml_bsend_allocator, sendreq->req_bytes_packed, 0);
if(NULL == sendreq->req_addr) {
/* release resources when request is freed */
sendreq->req_base.req_pml_complete = true;
@ -321,7 +320,7 @@ void* mca_pml_base_bsend_request_alloc_buf( size_t length )
/* allocate a buffer to hold packed message */
buf = mca_pml_bsend_allocator->alc_alloc(
mca_pml_bsend_allocator, length, 0, NULL);
mca_pml_bsend_allocator, length, 0);
if(NULL == buf) {
/* release resources when request is freed */
OPAL_THREAD_UNLOCK(&mca_pml_bsend_mutex);

Просмотреть файл

@ -79,12 +79,9 @@ mca_pml_base_component_2_0_0_t mca_pml_ob1_component = {
.pmlm_finalize = mca_pml_ob1_component_fini,
};
void *mca_pml_ob1_seg_alloc( struct mca_mpool_base_module_t* mpool,
size_t* size,
mca_mpool_base_registration_t** registration);
void *mca_pml_ob1_seg_alloc (void *ctx, size_t* size);
void mca_pml_ob1_seg_free( struct mca_mpool_base_module_t* mpool,
void* segment );
void mca_pml_ob1_seg_free (void *ctx, void *segment);
static inline int mca_pml_ob1_param_register_int(
const char* param_name,
@ -354,13 +351,12 @@ int mca_pml_ob1_component_fini(void)
return OMPI_SUCCESS;
}
void *mca_pml_ob1_seg_alloc( struct mca_mpool_base_module_t* mpool,
size_t* size,
mca_mpool_base_registration_t** registration) {
void *mca_pml_ob1_seg_alloc (void *ctx, size_t *size)
{
return malloc(*size);
}
void mca_pml_ob1_seg_free( struct mca_mpool_base_module_t* mpool,
void* segment ) {
void mca_pml_ob1_seg_free (void *ctx, void *segment)
{
free(segment);
}

Просмотреть файл

@ -81,7 +81,7 @@ do { \
buffers[0].addr = (char*) \
mca_pml_ob1.allocator->alc_alloc( mca_pml_ob1.allocator, \
buffers[0].len, \
0, NULL); \
0); \
_ptr = (unsigned char*)(buffers[0].addr); \
macro_segments[0].seg_addr.pval = buffers[0].addr; \
} \

Просмотреть файл

@ -42,8 +42,8 @@ int mca_vprotocol_base_request_parasite(void)
pml_fl_save.fl_max_to_alloc,
pml_fl_save.fl_num_per_alloc,
pml_fl_save.fl_mpool,
pml_fl_save.fl_mpool_reg_flags,
0,
pml_fl_save.fl_rcache_reg_flags,
pml_fl_save.fl_rcache,
pml_fl_save.item_init,
pml_fl_save.ctx);
if(OMPI_SUCCESS != ret) return ret;
@ -71,8 +71,8 @@ int mca_vprotocol_base_request_parasite(void)
pml_fl_save.fl_max_to_alloc,
pml_fl_save.fl_num_per_alloc,
pml_fl_save.fl_mpool,
pml_fl_save.fl_mpool_reg_flags,
0,
pml_fl_save.fl_rcache_reg_flags,
pml_fl_save.fl_rcache,
pml_fl_save.item_init,
pml_fl_save.ctx);
if(OMPI_SUCCESS != ret) return ret;

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -12,6 +13,8 @@
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -43,6 +46,8 @@ static const char FUNC_NAME[] = "MPI_Alloc_mem";
int MPI_Alloc_mem(MPI_Aint size, MPI_Info info, void *baseptr)
{
char info_value[MPI_MAX_INFO_VAL + 1];
char *mpool_hints = NULL;
if (MPI_PARAM_CHECK) {
OMPI_ERR_INIT_FINALIZE(FUNC_NAME);
@ -69,7 +74,16 @@ int MPI_Alloc_mem(MPI_Aint size, MPI_Info info, void *baseptr)
OPAL_CR_ENTER_LIBRARY();
*((void **) baseptr) = mca_mpool_base_alloc((size_t) size, (struct opal_info_t*)info);
if (MPI_INFO_NULL != info) {
int flag;
(void) ompi_info_get (info, "mpool_hints", MPI_MAX_INFO_VAL, info_value, &flag);
if (flag) {
mpool_hints = info_value;
}
}
*((void **) baseptr) = mca_mpool_base_alloc ((size_t) size, (struct opal_info_t*)info,
mpool_hints);
OPAL_CR_EXIT_LIBRARY();
if (NULL == *((void **) baseptr)) {
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_NO_MEM,

Просмотреть файл

@ -628,13 +628,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
/* Select which MPI components to use */
if (OMPI_SUCCESS !=
(ret = mca_mpool_base_init(OPAL_ENABLE_PROGRESS_THREADS,
ompi_mpi_thread_multiple))) {
error = "mca_mpool_base_init() failed";
goto error;
}
if (OMPI_SUCCESS !=
(ret = mca_pml_base_select(OPAL_ENABLE_PROGRESS_THREADS,
ompi_mpi_thread_multiple))) {

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2006-2007 Mellanox Technologies. All rights reserved.
* Copyright (c) 2010-2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
@ -28,6 +28,9 @@
#include "opal/align.h"
#include "opal/util/output.h"
#include "opal/mca/mpool/mpool.h"
#include "opal/mca/mpool/base/base.h"
#include "opal/mca/rcache/rcache.h"
#include "opal/util/sys_limits.h"
typedef struct opal_free_list_item_t opal_free_list_memory_t;
@ -49,17 +52,22 @@ static void opal_free_list_construct(opal_free_list_t* fl)
fl->fl_payload_buffer_alignment = 0;
fl->fl_frag_class = OBJ_CLASS(opal_free_list_item_t);
fl->fl_mpool = NULL;
fl->fl_rcache = NULL;
/* default flags */
fl->fl_mpool_reg_flags = MCA_MPOOL_FLAGS_CACHE_BYPASS |
MCA_MPOOL_FLAGS_CUDA_REGISTER_MEM;
fl->fl_rcache_reg_flags = MCA_RCACHE_FLAGS_CACHE_BYPASS |
MCA_RCACHE_FLAGS_CUDA_REGISTER_MEM;
fl->ctx = NULL;
OBJ_CONSTRUCT(&(fl->fl_allocations), opal_list_t);
}
static void opal_free_list_allocation_release (opal_free_list_t *fl, opal_free_list_memory_t *fl_mem)
{
if (NULL != fl->fl_rcache) {
fl->fl_rcache->rcache_deregister (fl->fl_rcache, fl_mem->registration);
}
if (NULL != fl->fl_mpool) {
fl->fl_mpool->mpool_free (fl->fl_mpool, fl_mem->ptr, fl_mem->registration);
fl->fl_mpool->mpool_free (fl->fl_mpool, fl_mem->ptr);
} else if (fl_mem->ptr) {
free (fl_mem->ptr);
}
@ -108,8 +116,9 @@ int opal_free_list_init (opal_free_list_t *flist, size_t frag_size, size_t frag_
opal_class_t *frag_class, size_t payload_buffer_size,
size_t payload_buffer_alignment, int num_elements_to_alloc,
int max_elements_to_alloc, int num_elements_per_alloc,
mca_mpool_base_module_t* mpool, int mpool_reg_flags,
void *unused0, opal_free_list_item_init_fn_t item_init, void *ctx)
mca_mpool_base_module_t *mpool, int rcache_reg_flags,
mca_rcache_base_module_t *rcache, opal_free_list_item_init_fn_t item_init,
void *ctx)
{
/* alignment must be more than zero and power of two */
if (frag_alignment <= 1 || (frag_alignment & (frag_alignment - 1))) {
@ -137,11 +146,12 @@ int opal_free_list_init (opal_free_list_t *flist, size_t frag_size, size_t frag_
flist->fl_max_to_alloc = max_elements_to_alloc;
flist->fl_num_allocated = 0;
flist->fl_num_per_alloc = num_elements_per_alloc;
flist->fl_mpool = mpool;
flist->fl_mpool = mpool ? mpool : mca_mpool_base_default_module;
flist->fl_rcache = rcache;
flist->fl_frag_alignment = frag_alignment;
flist->fl_payload_buffer_alignment = payload_buffer_alignment;
flist->item_init = item_init;
flist->fl_mpool_reg_flags |= mpool_reg_flags;
flist->fl_rcache_reg_flags |= rcache_reg_flags;
flist->ctx = ctx;
if (num_elements_to_alloc) {
@ -153,10 +163,10 @@ int opal_free_list_init (opal_free_list_t *flist, size_t frag_size, size_t frag_
int opal_free_list_grow_st (opal_free_list_t* flist, size_t num_elements)
{
unsigned char *ptr, *mpool_alloc_ptr = NULL, *payload_ptr = NULL;
unsigned char *ptr, *payload_ptr = NULL;
opal_free_list_memory_t *alloc_ptr;
size_t alloc_size, head_size, elem_size = 0;
mca_mpool_base_registration_t *reg = NULL;
size_t alloc_size, head_size, elem_size = 0, buffer_size, align;
mca_rcache_base_registration_t *reg = NULL;
int rc = OPAL_SUCCESS;
if (flist->fl_max_to_alloc && (flist->fl_num_allocated + num_elements) >
@ -170,6 +180,29 @@ int opal_free_list_grow_st (opal_free_list_t* flist, size_t num_elements)
head_size = OPAL_ALIGN(flist->fl_frag_size, flist->fl_frag_alignment, size_t);
/* NTH: calculate allocation alignment first as it might change the number of elements */
if (0 != flist->fl_payload_buffer_size) {
elem_size = OPAL_ALIGN(flist->fl_payload_buffer_size,
flist->fl_payload_buffer_alignment, size_t);
/* elem_size should not be 0 here */
assert (elem_size > 0);
buffer_size = num_elements * elem_size;
align = flist->fl_payload_buffer_alignment;
if (MCA_RCACHE_FLAGS_CUDA_REGISTER_MEM & flist->fl_rcache_reg_flags) {
size_t pagesize = opal_getpagesize ();
/* CUDA cannot handle registering overlapping regions, so make
* sure each region is page sized and page aligned. */
align = OPAL_ALIGN(align, pagesize, size_t);
buffer_size = OPAL_ALIGN(buffer_size, pagesize, size_t);
/* avoid wasting space in the buffer */
num_elements = buffer_size / elem_size;
}
}
/* calculate head allocation size */
alloc_size = num_elements * head_size + sizeof(opal_free_list_memory_t) +
flist->fl_frag_alignment;
@ -180,37 +213,27 @@ int opal_free_list_grow_st (opal_free_list_t* flist, size_t num_elements)
}
if (0 != flist->fl_payload_buffer_size) {
elem_size = OPAL_ALIGN(flist->fl_payload_buffer_size,
flist->fl_payload_buffer_alignment, size_t);
/* elem_size should not be 0 here */
assert (elem_size > 0);
/* allocate the rest from the mpool (or use memalign/malloc) */
if(flist->fl_mpool != NULL) {
payload_ptr = mpool_alloc_ptr =
(unsigned char *) flist->fl_mpool->mpool_alloc(flist->fl_mpool,
num_elements * elem_size,
flist->fl_payload_buffer_alignment,
flist->fl_mpool_reg_flags, &reg);
} else {
#ifdef HAVE_POSIX_MEMALIGN
posix_memalign ((void **) &mpool_alloc_ptr, flist->fl_payload_buffer_alignment,
num_elements * elem_size);
payload_ptr = mpool_alloc_ptr;
#else
mpool_alloc_ptr = (unsigned char *) malloc (num_elements * elem_size +
flist->fl_payload_buffer_alignment);
payload_ptr = (unsigned char *) OPAL_ALIGN((uintptr_t)mpool_alloc_ptr,
flist->fl_payload_buffer_alignment,
uintptr_t);
#endif
}
if(NULL == mpool_alloc_ptr) {
payload_ptr = (unsigned char *) flist->fl_mpool->mpool_alloc(flist->fl_mpool, buffer_size, align, 0);
if (NULL == payload_ptr) {
free(alloc_ptr);
return OPAL_ERR_TEMP_OUT_OF_RESOURCE;
}
if (flist->fl_rcache) {
rc = flist->fl_rcache->rcache_register (flist->fl_rcache, payload_ptr, num_elements * elem_size,
flist->fl_rcache_reg_flags, MCA_RCACHE_ACCESS_ANY, &reg);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
free (alloc_ptr);
if (flist->fl_mpool) {
flist->fl_mpool->mpool_free (flist->fl_mpool, payload_ptr);
} else {
free (payload_ptr);
}
return rc;
}
}
}
/* make the alloc_ptr a list item, save the chunk in the allocations list,
@ -219,7 +242,7 @@ int opal_free_list_grow_st (opal_free_list_t* flist, size_t num_elements)
opal_list_append(&(flist->fl_allocations), (opal_list_item_t*)alloc_ptr);
alloc_ptr->registration = reg;
alloc_ptr->ptr = mpool_alloc_ptr;
alloc_ptr->ptr = payload_ptr;
ptr = (unsigned char*)alloc_ptr + sizeof(opal_free_list_memory_t);
ptr = OPAL_ALIGN_PTR(ptr, flist->fl_frag_alignment, unsigned char*);

Просмотреть файл

@ -77,6 +77,8 @@ struct opal_free_list_t {
/** mpool to use for free list buffer allocation (posix_memalign/malloc
* are used if this is NULL) */
struct mca_mpool_base_module_t *fl_mpool;
/** registration cache */
struct mca_rcache_base_module_t *fl_rcache;
/** Multi-threaded lock. Used when the free list is empty. */
opal_mutex_t fl_lock;
/** Multi-threaded condition. Used when threads are waiting on free
@ -84,8 +86,8 @@ struct opal_free_list_t {
opal_condition_t fl_condition;
/** List of free list allocation */
opal_list_t fl_allocations;
/** Flags to pass to the mpool register function */
int fl_mpool_reg_flags;
/** Flags to pass to the rcache register function */
int fl_rcache_reg_flags;
/** Free list item initialization function */
opal_free_list_item_init_fn_t item_init;
/** Initialization function context */
@ -98,7 +100,7 @@ struct mca_mpool_base_registration_t;
struct opal_free_list_item_t
{
opal_list_item_t super;
struct mca_mpool_base_registration_t *registration;
struct mca_rcache_base_registration_t *registration;
void *ptr;
};
typedef struct opal_free_list_item_t opal_free_list_item_t;
@ -118,8 +120,8 @@ OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_free_list_item_t);
* @param max_elements_to_alloc (IN) Maximum number of elements to allocate.
* @param num_elements_per_alloc (IN) Number of elements to grow by per allocation.
* @param mpool (IN) Optional memory pool for allocations.
* @param mpool_reg_flags (IN) Flags to pass to mpool registration function.
* @param unused0 (IN) Future. Must be NULL.
* @param rcache_reg_flags (IN) Flags to pass to rcache registration function.
* @param rcache (IN) Optional registration cache.
* @param item_init (IN) Optional item initialization function
* @param ctx (IN) Initialization function context.
*/
@ -134,8 +136,8 @@ OPAL_DECLSPEC int opal_free_list_init (opal_free_list_t *free_list,
int max_elements_to_alloc,
int num_elements_per_alloc,
struct mca_mpool_base_module_t *mpool,
int mpool_reg_flags,
void *unused0,
int rcache_reg_flags,
struct mca_rcache_base_module_t *rcache,
opal_free_list_item_init_fn_t item_init,
void *ctx);

Просмотреть файл

@ -22,6 +22,8 @@
#ifndef OPAL_ALIGN_H
#define OPAL_ALIGN_H
#define OPAL_DOWN_ALIGN(x,a,t) ((x) & ~(((t)(a)-1)))
#define OPAL_DOWN_ALIGN_PTR(x,a,t) ((t)OPAL_DOWN_ALIGN((uintptr_t)x, a, uintptr_t))
#define OPAL_ALIGN(x,a,t) (((x)+((t)(a)-1)) & ~(((t)(a)-1)))
#define OPAL_ALIGN_PTR(x,a,t) ((t)OPAL_ALIGN((uintptr_t)x, a, uintptr_t))
#define OPAL_ALIGN_PAD_AMOUNT(x,s) ((~((uintptr_t)(x))+1) & ((uintptr_t)(s)-1))

Просмотреть файл

@ -27,7 +27,6 @@
#include "opal_config.h"
#include "opal/mca/mca.h"
#include "opal/mca/mpool/mpool.h"
BEGIN_C_DECLS
@ -40,16 +39,14 @@ struct mca_allocator_base_module_t;
typedef void* (*mca_allocator_base_module_alloc_fn_t)(
struct mca_allocator_base_module_t*,
size_t size,
size_t align,
mca_mpool_base_registration_t** registration);
size_t align);
/**
* The realloc function typedef
*/
typedef void* (*mca_allocator_base_module_realloc_fn_t)(
struct mca_allocator_base_module_t*,
void*, size_t,
mca_mpool_base_registration_t** registration);
void*, size_t);
/**
* Free function typedef
@ -90,7 +87,7 @@ struct mca_allocator_base_module_t {
mca_allocator_base_module_finalize_fn_t alc_finalize;
/**< Finalize and free everything */
/* memory pool and resources */
struct mca_mpool_base_module_t* alc_mpool;
void *alc_context;
};
/**
* Convenience typedef.
@ -103,18 +100,15 @@ typedef struct mca_allocator_base_module_t mca_allocator_base_module_t;
* provided by the module to the allocator framework.
*/
typedef void* (*mca_allocator_base_component_segment_alloc_fn_t)(
struct mca_mpool_base_module_t* module,
size_t* size,
mca_mpool_base_registration_t** registration);
typedef void* (*mca_allocator_base_component_segment_alloc_fn_t)(void *ctx,
size_t *size);
/**
* A function to free memory from the control of the allocator framework
* back to the system. This function is to be provided by the module to the
* allocator framework.
*/
typedef void (*mca_allocator_base_component_segment_free_fn_t)(
struct mca_mpool_base_module_t* module,
typedef void (*mca_allocator_base_component_segment_free_fn_t)(void *ctx,
void *segment);
@ -126,7 +120,7 @@ typedef struct mca_allocator_base_module_t*
bool enable_mpi_threads,
mca_allocator_base_component_segment_alloc_fn_t segment_alloc,
mca_allocator_base_component_segment_free_fn_t segment_free,
struct mca_mpool_base_module_t* mpool
void *context
);
/**

Просмотреть файл

@ -45,7 +45,7 @@ struct mca_allocator_base_selected_module_t {
typedef struct mca_allocator_base_selected_module_t mca_allocator_base_selected_module_t;
/**
* Declaces mca_mpool_base_selected_module_t as a class.
* Declaces mca_allocator_base_selected_module_t as a class.
*/
OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_allocator_base_selected_module_t);

Просмотреть файл

@ -78,7 +78,7 @@ mca_allocator_base_module_t* mca_allocator_basic_component_init(
bool enable_mpi_threads,
mca_allocator_base_component_segment_alloc_fn_t segment_alloc,
mca_allocator_base_component_segment_free_fn_t segment_free,
struct mca_mpool_base_module_t* mpool)
void *context)
{
mca_allocator_basic_module_t *module = (mca_allocator_basic_module_t *)
malloc(sizeof(mca_allocator_basic_module_t));
@ -91,7 +91,7 @@ mca_allocator_base_module_t* mca_allocator_basic_component_init(
module->super.alc_free = mca_allocator_basic_free;
module->super.alc_compact = mca_allocator_basic_compact;
module->super.alc_finalize = mca_allocator_basic_finalize;
module->super.alc_mpool = mpool;
module->super.alc_context = context;
module->seg_alloc = segment_alloc;
module->seg_free = segment_free;
OBJ_CONSTRUCT(&module->seg_list, opal_list_t);
@ -163,8 +163,7 @@ static void mca_allocator_basic_combine_next(
void *mca_allocator_basic_alloc(
mca_allocator_base_module_t * base,
size_t size,
size_t align,
mca_mpool_base_registration_t** registration)
size_t align)
{
mca_allocator_basic_module_t* module = (mca_allocator_basic_module_t*)base;
mca_allocator_basic_segment_t* seg;
@ -198,7 +197,7 @@ void *mca_allocator_basic_alloc(
/* request additional block */
allocated_size = size;
if(NULL == (addr = (unsigned char *)module->seg_alloc(module->super.alc_mpool, &allocated_size, registration))) {
if(NULL == (addr = (unsigned char *)module->seg_alloc(module->super.alc_context, &allocated_size))) {
OPAL_THREAD_UNLOCK(&module->seg_lock);
return NULL;
}
@ -239,14 +238,13 @@ void *mca_allocator_basic_alloc(
void * mca_allocator_basic_realloc(
mca_allocator_base_module_t * base,
void * ptr,
size_t size,
mca_mpool_base_registration_t** registration)
size_t size)
{
unsigned char* addr = ((unsigned char*)ptr) - sizeof(size_t);
size_t alloc_size = *(size_t*)addr;
if(size <= alloc_size)
return ptr;
addr = (unsigned char *)mca_allocator_basic_alloc(base,size,0,registration);
addr = (unsigned char *)mca_allocator_basic_alloc(base, size, 0);
if(addr == NULL)
return addr;
memcpy(addr,ptr,alloc_size);

Просмотреть файл

@ -77,7 +77,7 @@ mca_allocator_base_module_t* mca_allocator_basic_component_init(
bool enable_mpi_threads,
mca_allocator_base_component_segment_alloc_fn_t segment_alloc,
mca_allocator_base_component_segment_free_fn_t segment_free,
struct mca_mpool_base_module_t* module
void *ctx
);
/**
@ -94,8 +94,7 @@ mca_allocator_base_module_t* mca_allocator_basic_component_init(
void * mca_allocator_basic_alloc(
mca_allocator_base_module_t * mem,
size_t size,
size_t align,
mca_mpool_base_registration_t** registration);
size_t align);
/**
* Attempts to resize the passed region of memory into a larger or a smaller
@ -114,8 +113,7 @@ mca_allocator_base_module_t* mca_allocator_basic_component_init(
void * mca_allocator_basic_realloc(
mca_allocator_base_module_t * mem,
void * ptr,
size_t size,
mca_mpool_base_registration_t** registration);
size_t size);
/**
* Frees the passed region of memory

Просмотреть файл

@ -24,14 +24,13 @@
#include "opal/mca/allocator/allocator.h"
#include "opal/constants.h"
#include "opal/mca/allocator/bucket/allocator_bucket_alloc.h"
#include "opal/mca/mpool/mpool.h"
#include "opal/mca/base/mca_base_var.h"
struct mca_allocator_base_module_t* mca_allocator_bucket_module_init(
bool enable_mpi_threads,
mca_allocator_base_component_segment_alloc_fn_t segment_alloc,
mca_allocator_base_component_segment_free_fn_t segment_free,
struct mca_mpool_base_module_t* mpool
);
void *context);
int mca_allocator_bucket_module_open(void);
@ -39,8 +38,7 @@ int mca_allocator_bucket_module_close(void);
void * mca_allocator_bucket_alloc_wrapper(
struct mca_allocator_base_module_t* allocator,
size_t size, size_t align,
mca_mpool_base_registration_t** registration);
size_t size, size_t align);
static int mca_allocator_num_buckets;
@ -66,7 +64,7 @@ struct mca_allocator_base_module_t* mca_allocator_bucket_module_init(
bool enable_mpi_threads,
mca_allocator_base_component_segment_alloc_fn_t segment_alloc,
mca_allocator_base_component_segment_free_fn_t segment_free,
struct mca_mpool_base_module_t* mpool)
void *context)
{
size_t alloc_size = sizeof(mca_allocator_bucket_t);
mca_allocator_bucket_t * retval;
@ -87,7 +85,7 @@ struct mca_allocator_base_module_t* mca_allocator_bucket_module_init(
allocator->super.alc_free = mca_allocator_bucket_free;
allocator->super.alc_compact = mca_allocator_bucket_cleanup;
allocator->super.alc_finalize = mca_allocator_bucket_finalize;
allocator->super.alc_mpool = mpool;
allocator->super.alc_context = context;
return (mca_allocator_base_module_t *) allocator;
}
@ -111,13 +109,12 @@ int mca_allocator_bucket_module_close(void) {
void * mca_allocator_bucket_alloc_wrapper(
struct mca_allocator_base_module_t* allocator,
size_t size,
size_t align,
mca_mpool_base_registration_t** registration)
size_t align)
{
if(0 == align){
return mca_allocator_bucket_alloc(allocator, size, registration);
return mca_allocator_bucket_alloc(allocator, size);
}
return mca_allocator_bucket_alloc_align(allocator, size, align, registration);
return mca_allocator_bucket_alloc_align(allocator, size, align);
}

Просмотреть файл

@ -71,10 +71,8 @@ mca_allocator_bucket_t * mca_allocator_bucket_init(
* region or NULL if there was an error
*
*/
void * mca_allocator_bucket_alloc(
mca_allocator_base_module_t * mem,
size_t size,
mca_mpool_base_registration_t** registration)
void * mca_allocator_bucket_alloc(mca_allocator_base_module_t * mem,
size_t size)
{
mca_allocator_bucket_t * mem_options = (mca_allocator_bucket_t *) mem;
/* initialize for the later bit shifts */
@ -113,7 +111,7 @@ void * mca_allocator_bucket_alloc(
allocated_size += sizeof(mca_allocator_bucket_segment_head_t);
/* attempt to get the memory */
segment_header = (mca_allocator_bucket_segment_head_t *)
mem_options->get_mem_fn(mem_options->super.alc_mpool, &allocated_size, registration);
mem_options->get_mem_fn(mem_options->super.alc_context, &allocated_size);
if(NULL == segment_header) {
/* release the lock */
OPAL_THREAD_UNLOCK(&(mem_options->buckets[bucket_num].lock));
@ -153,11 +151,8 @@ void * mca_allocator_bucket_alloc(
/*
* allocates an aligned region of memory
*/
void * mca_allocator_bucket_alloc_align(
mca_allocator_base_module_t * mem,
size_t size,
size_t alignment,
mca_mpool_base_registration_t** registration)
void * mca_allocator_bucket_alloc_align(mca_allocator_base_module_t * mem,
size_t size, size_t alignment)
{
mca_allocator_bucket_t * mem_options = (mca_allocator_bucket_t *) mem;
int bucket_num = 1;
@ -177,7 +172,7 @@ void * mca_allocator_bucket_alloc_align(
bucket_size = size + sizeof(mca_allocator_bucket_chunk_header_t);
allocated_size = aligned_max_size;
/* get some memory */
ptr = mem_options->get_mem_fn(mem_options->super.alc_mpool, &allocated_size, registration);
ptr = mem_options->get_mem_fn(mem_options->super.alc_context, &allocated_size);
if(NULL == ptr) {
return(NULL);
}
@ -236,11 +231,8 @@ void * mca_allocator_bucket_alloc_align(
/*
* function to reallocate the segment of memory
*/
void * mca_allocator_bucket_realloc(
mca_allocator_base_module_t * mem,
void * ptr,
size_t size,
mca_mpool_base_registration_t** registration)
void * mca_allocator_bucket_realloc(mca_allocator_base_module_t * mem,
void * ptr, size_t size)
{
mca_allocator_bucket_t * mem_options = (mca_allocator_bucket_t *) mem;
/* initialize for later bit shifts */
@ -261,7 +253,7 @@ void * mca_allocator_bucket_realloc(
return(ptr);
}
/* we need a new space in memory, so let's get it */
ret_ptr = mca_allocator_bucket_alloc((mca_allocator_base_module_t *) mem_options, size, registration);
ret_ptr = mca_allocator_bucket_alloc((mca_allocator_base_module_t *) mem_options, size);
if(NULL == ret_ptr) {
/* we were unable to get a larger area of memory */
return(NULL);
@ -341,7 +333,7 @@ int mca_allocator_bucket_cleanup(mca_allocator_base_module_t * mem)
next_segment = segment->next_segment;
/* free the memory */
if(mem_options->free_mem_fn)
mem_options->free_mem_fn(mem->alc_mpool, segment);
mem_options->free_mem_fn(mem->alc_context, segment);
segment = next_segment;
}
mem_options->buckets[i].free_chunk = NULL;
@ -378,7 +370,7 @@ int mca_allocator_bucket_cleanup(mca_allocator_base_module_t * mem)
*segment_header = segment->next_segment;
/* free the memory */
if(mem_options->free_mem_fn)
mem_options->free_mem_fn(mem->alc_mpool, segment);
mem_options->free_mem_fn(mem->alc_context, segment);
} else {
/* go to next segment */
segment_header = &((*segment_header)->next_segment);

Просмотреть файл

@ -1,4 +1,5 @@
/**
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
@ -9,6 +10,8 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reseved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -131,8 +134,7 @@ typedef struct mca_allocator_bucket_t mca_allocator_bucket_t;
*/
void * mca_allocator_bucket_alloc(
mca_allocator_base_module_t * mem,
size_t size,
mca_mpool_base_registration_t** registration);
size_t size);
/**
* Accepts a request for memory in a specific region defined by the
@ -152,8 +154,7 @@ typedef struct mca_allocator_bucket_t mca_allocator_bucket_t;
void * mca_allocator_bucket_alloc_align(
mca_allocator_base_module_t * mem,
size_t size,
size_t alignment,
mca_mpool_base_registration_t** registration);
size_t alignment);
/**
* Attempts to resize the passed region of memory into a larger or a smaller
@ -172,8 +173,7 @@ typedef struct mca_allocator_bucket_t mca_allocator_bucket_t;
void * mca_allocator_bucket_realloc(
mca_allocator_base_module_t * mem,
void * ptr,
size_t size,
mca_mpool_base_registration_t** registration);
size_t size);
/**
* Frees the passed region of memory

Просмотреть файл

@ -218,6 +218,12 @@ static int group_register (const char *project_name, const char *framework_name,
return -1;
}
/* avoid groups of the form opal_opal, ompi_ompi, etc */
if (NULL != project_name && NULL != framework_name &&
(0 == strcmp (project_name, framework_name))) {
project_name = NULL;
}
group_id = group_find (project_name, framework_name, component_name, true);
if (0 <= group_id) {
ret = mca_base_var_group_get_internal (group_id, &group, true);

Просмотреть файл

@ -122,6 +122,7 @@
#include "opal/datatype/opal_convertor.h"
#include "opal/mca/mca.h"
#include "opal/mca/mpool/mpool.h"
#include "opal/mca/rcache/rcache.h"
#include "opal/mca/crs/crs.h"
#include "opal/mca/crs/base/base.h"
@ -256,23 +257,23 @@ enum {
/** Allow local write on the registered region. If a region is registered
* with this flag the registration can be used as the local handle for a
* btl_get operation. */
MCA_BTL_REG_FLAG_LOCAL_WRITE = MCA_MPOOL_ACCESS_LOCAL_WRITE,
MCA_BTL_REG_FLAG_LOCAL_WRITE = MCA_RCACHE_ACCESS_LOCAL_WRITE,
/** Allow remote read on the registered region. If a region is registered
* with this flag the registration can be used as the remote handle for a
* btl_get operation. */
MCA_BTL_REG_FLAG_REMOTE_READ = MCA_MPOOL_ACCESS_REMOTE_READ,
MCA_BTL_REG_FLAG_REMOTE_READ = MCA_RCACHE_ACCESS_REMOTE_READ,
/** Allow remote write on the registered region. If a region is registered
* with this flag the registration can be used as the remote handle for a
* btl_put operation. */
MCA_BTL_REG_FLAG_REMOTE_WRITE = MCA_MPOOL_ACCESS_REMOTE_WRITE,
MCA_BTL_REG_FLAG_REMOTE_WRITE = MCA_RCACHE_ACCESS_REMOTE_WRITE,
/** Allow remote atomic operations on the registered region. If a region is
* registered with this flag the registration can be used as the remote
* handle for a btl_atomic_op or btl_atomic_fop operation. */
MCA_BTL_REG_FLAG_REMOTE_ATOMIC = MCA_MPOOL_ACCESS_REMOTE_ATOMIC,
MCA_BTL_REG_FLAG_REMOTE_ATOMIC = MCA_RCACHE_ACCESS_REMOTE_ATOMIC,
/** Allow any btl operation on the registered region. If a region is registered
* with this flag the registration can be used as the local or remote handle for
* any btl operation. */
MCA_BTL_REG_FLAG_ACCESS_ANY = MCA_MPOOL_ACCESS_ANY,
MCA_BTL_REG_FLAG_ACCESS_ANY = MCA_RCACHE_ACCESS_ANY,
#if OPAL_CUDA_GDR_SUPPORT
/** Region is in GPU memory */
MCA_BTL_REG_FLAG_CUDA_GPU_MEM = 0x00010000,

Просмотреть файл

@ -57,7 +57,7 @@
#include "opal/datatype/opal_convertor.h"
#include "opal/mca/mpool/base/base.h"
#include "opal/mca/mpool/mpool.h"
#include "opal/mca/mpool/grdma/mpool_grdma.h"
#include "opal/mca/rcache/rcache.h"
#if OPAL_CUDA_SUPPORT
#include "opal/datatype/opal_datatype_cuda.h"
@ -733,7 +733,7 @@ static int prepare_device_for_use (mca_btl_openib_device_t *device)
mca_btl_openib_component.buffer_alignment,
mca_btl_openib_component.ib_free_list_num, -1,
mca_btl_openib_component.ib_free_list_inc,
device->mpool, 0, NULL, mca_btl_openib_frag_init,
device->mpool, 0, device->rcache, mca_btl_openib_frag_init,
init_data);
if (OPAL_SUCCESS != rc) {
/* If we're "out of memory", this usually means that we ran
@ -774,7 +774,7 @@ static int prepare_device_for_use (mca_btl_openib_device_t *device)
mca_btl_openib_component.ib_free_list_num,
mca_btl_openib_component.ib_free_list_max,
mca_btl_openib_component.ib_free_list_inc,
device->mpool, 0, NULL, mca_btl_openib_frag_init,
device->mpool, 0, device->rcache, mca_btl_openib_frag_init,
init_data);
if (OPAL_SUCCESS != rc) {
/* If we're "out of memory", this usually means that we
@ -807,7 +807,7 @@ static int prepare_device_for_use (mca_btl_openib_device_t *device)
mca_btl_openib_component.ib_free_list_num,
mca_btl_openib_component.ib_free_list_max,
mca_btl_openib_component.ib_free_list_inc,
device->mpool, 0, NULL, mca_btl_openib_frag_init,
device->mpool, 0, device->rcache, mca_btl_openib_frag_init,
init_data)) {
rc = OPAL_ERROR;
goto exit;
@ -1903,6 +1903,7 @@ static mca_btl_base_registration_handle_t *mca_btl_openib_register_mem (mca_btl_
mca_btl_base_endpoint_t *endpoint,
void *base, size_t size, uint32_t flags)
{
mca_btl_openib_module_t *openib_module = (mca_btl_openib_module_t *) btl;
mca_btl_openib_reg_t *reg;
uint32_t mflags = 0;
int access_flags = flags & MCA_BTL_REG_FLAG_ACCESS_ANY;
@ -1910,12 +1911,12 @@ static mca_btl_base_registration_handle_t *mca_btl_openib_register_mem (mca_btl_
#if OPAL_CUDA_GDR_SUPPORT
if (flags & MCA_BTL_REG_FLAG_CUDA_GPU_MEM) {
mflags |= MCA_MPOOL_FLAGS_CUDA_GPU_MEM;
mflags |= MCA_RCACHE_FLAGS_CUDA_GPU_MEM;
}
#endif /* OPAL_CUDA_GDR_SUPPORT */
rc = btl->btl_mpool->mpool_register (btl->btl_mpool, base, size, mflags, access_flags,
(mca_mpool_base_registration_t **) &reg);
rc = openib_module->device->rcache->rcache_register (openib_module->device->rcache, base, size, mflags,
access_flags, (mca_rcache_base_registration_t **) &reg);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc || NULL == reg)) {
return NULL;
}
@ -1925,9 +1926,10 @@ static mca_btl_base_registration_handle_t *mca_btl_openib_register_mem (mca_btl_
static int mca_btl_openib_deregister_mem (mca_btl_base_module_t *btl, mca_btl_base_registration_handle_t *handle)
{
mca_btl_openib_module_t *openib_module = (mca_btl_openib_module_t *) btl;
mca_btl_openib_reg_t *reg = (mca_btl_openib_reg_t *)((intptr_t) handle - offsetof (mca_btl_openib_reg_t, btl_handle));
btl->btl_mpool->mpool_deregister (btl->btl_mpool, (mca_mpool_base_registration_t *) reg);
openib_module->device->rcache->rcache_deregister (openib_module->device->rcache, (mca_rcache_base_registration_t *) reg);
return OPAL_SUCCESS;
}

Просмотреть файл

@ -45,6 +45,7 @@
#include "opal/mca/event/event.h"
#include "opal/threads/threads.h"
#include "opal/mca/btl/btl.h"
#include "opal/mca/rcache/rcache.h"
#include "opal/mca/mpool/mpool.h"
#include "opal/mca/btl/base/btl_base_error.h"
#include "opal/mca/btl/base/base.h"
@ -184,8 +185,11 @@ struct mca_btl_openib_component_t {
opal_mutex_t ib_lock;
/**< lock for accessing module state */
char* ib_mpool_name;
/**< name of ib memory pool */
char* ib_mpool_hints;
/**< hints for selecting an mpool component */
char *ib_rcache_name;
/**< name of ib registration cache */
uint8_t num_pp_qps; /**< number of pp qp's */
uint8_t num_srq_qps; /**< number of srq qp's */
@ -374,6 +378,7 @@ typedef struct mca_btl_openib_device_t {
struct ibv_cq *ib_cq[2];
uint32_t cq_size[2];
mca_mpool_base_module_t *mpool;
mca_rcache_base_module_t *rcache;
/* MTU for this device */
uint32_t mtu;
/* Whether this device supports eager RDMA */
@ -502,7 +507,7 @@ struct mca_btl_base_registration_handle_t {
};
struct mca_btl_openib_reg_t {
mca_mpool_base_registration_t base;
mca_rcache_base_registration_t base;
struct ibv_mr *mr;
mca_btl_base_registration_handle_t btl_handle;
};

Просмотреть файл

@ -67,7 +67,8 @@
#include "opal/mca/btl/btl.h"
#include "opal/mca/btl/base/base.h"
#include "opal/mca/mpool/base/base.h"
#include "opal/mca/mpool/grdma/mpool_grdma.h"
#include "opal/mca/rcache/rcache.h"
#include "opal/mca/rcache/base/base.h"
#include "opal/mca/common/cuda/common_cuda.h"
#include "opal/mca/common/verbs/common_verbs.h"
#include "opal/runtime/opal_params.h"
@ -513,26 +514,26 @@ static void btl_openib_control(mca_btl_base_module_t* btl,
}
static int openib_reg_mr (void *reg_data, void *base, size_t size,
mca_mpool_base_registration_t *reg)
mca_rcache_base_registration_t *reg)
{
mca_btl_openib_device_t *device = (mca_btl_openib_device_t*)reg_data;
mca_btl_openib_reg_t *openib_reg = (mca_btl_openib_reg_t*)reg;
enum ibv_access_flags access_flag = 0;
if (reg->access_flags & MCA_MPOOL_ACCESS_REMOTE_READ) {
if (reg->access_flags & MCA_RCACHE_ACCESS_REMOTE_READ) {
access_flag |= IBV_ACCESS_REMOTE_READ;
}
if (reg->access_flags & MCA_MPOOL_ACCESS_REMOTE_WRITE) {
if (reg->access_flags & MCA_RCACHE_ACCESS_REMOTE_WRITE) {
access_flag |= IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE;
}
if (reg->access_flags & MCA_MPOOL_ACCESS_LOCAL_WRITE) {
if (reg->access_flags & MCA_RCACHE_ACCESS_LOCAL_WRITE) {
access_flag |= IBV_ACCESS_LOCAL_WRITE;
}
#if HAVE_DECL_IBV_ATOMIC_HCA
if (reg->access_flags & MCA_MPOOL_ACCESS_REMOTE_ATOMIC) {
if (reg->access_flags & MCA_RCACHE_ACCESS_REMOTE_ATOMIC) {
access_flag |= IBV_ACCESS_REMOTE_ATOMIC | IBV_ACCESS_LOCAL_WRITE;
}
#endif
@ -545,7 +546,7 @@ static int openib_reg_mr(void *reg_data, void *base, size_t size,
device->mem_reg_active += size;
#if HAVE_DECL_IBV_ACCESS_SO
if (reg->flags & MCA_MPOOL_FLAGS_SO_MEM) {
if (reg->flags & MCA_RCACHE_FLAGS_SO_MEM) {
access_flag |= IBV_ACCESS_SO;
}
#endif
@ -567,16 +568,16 @@ static int openib_reg_mr(void *reg_data, void *base, size_t size,
(int) (reg->bound - reg->base + 1), reg->flags));
#if OPAL_CUDA_SUPPORT
if (reg->flags & MCA_MPOOL_FLAGS_CUDA_REGISTER_MEM) {
if (reg->flags & MCA_RCACHE_FLAGS_CUDA_REGISTER_MEM) {
mca_common_cuda_register (base, size,
openib_reg->base.mpool->mpool_component->mpool_version.mca_component_name);
openib_reg->base.rcache->rcache_component->rcache_version.mca_component_name);
}
#endif
return OPAL_SUCCESS;
}
static int openib_dereg_mr(void *reg_data, mca_mpool_base_registration_t *reg)
static int openib_dereg_mr(void *reg_data, mca_rcache_base_registration_t *reg)
{
mca_btl_openib_device_t *device = (mca_btl_openib_device_t*)reg_data;
mca_btl_openib_reg_t *openib_reg = (mca_btl_openib_reg_t*)reg;
@ -593,9 +594,9 @@ static int openib_dereg_mr(void *reg_data, mca_mpool_base_registration_t *reg)
}
#if OPAL_CUDA_SUPPORT
if (reg->flags & MCA_MPOOL_FLAGS_CUDA_REGISTER_MEM) {
if (reg->flags & MCA_RCACHE_FLAGS_CUDA_REGISTER_MEM) {
mca_common_cuda_unregister(openib_reg->base.base,
openib_reg->base.mpool->mpool_component->mpool_version.mca_component_name);
openib_reg->base.rcache->rcache_component->rcache_version.mca_component_name);
}
#endif
@ -878,6 +879,7 @@ static void device_construct(mca_btl_openib_device_t *device)
device->ib_dev_context = NULL;
device->ib_pd = NULL;
device->mpool = NULL;
device->rcache = NULL;
#if OPAL_ENABLE_PROGRESS_THREADS == 1
device->ib_channel = NULL;
#endif
@ -960,8 +962,8 @@ static void device_destruct(mca_btl_openib_device_t *device)
}
}
if (OPAL_SUCCESS != mca_mpool_base_module_destroy(device->mpool)) {
BTL_VERBOSE(("Failed to release mpool"));
if (OPAL_SUCCESS != mca_rcache_base_module_destroy (device->rcache)) {
BTL_VERBOSE(("failed to release registration cache"));
goto device_error;
}
@ -1590,7 +1592,7 @@ static uint64_t calculate_max_reg (const char *device_name)
static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
{
struct mca_mpool_base_resources_t mpool_resources;
mca_rcache_base_resources_t rcache_resources;
mca_btl_openib_device_t *device;
uint8_t i, k = 0;
int ret = -1, port_cnt;
@ -1813,20 +1815,25 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
"eager RDMA and progress threads", true);
}
asprintf (&mpool_resources.pool_name, "verbs.%" PRIu64, device->ib_dev_attr.node_guid);
mpool_resources.reg_data = (void*)device;
mpool_resources.sizeof_reg = sizeof(mca_btl_openib_reg_t);
mpool_resources.register_mem = openib_reg_mr;
mpool_resources.deregister_mem = openib_dereg_mr;
device->mpool =
mca_mpool_base_module_create(mca_btl_openib_component.ib_mpool_name,
device, &mpool_resources);
if(NULL == device->mpool){
asprintf (&rcache_resources.cache_name, "verbs.%" PRIu64, device->ib_dev_attr.node_guid);
rcache_resources.reg_data = (void*)device;
rcache_resources.sizeof_reg = sizeof(mca_btl_openib_reg_t);
rcache_resources.register_mem = openib_reg_mr;
rcache_resources.deregister_mem = openib_dereg_mr;
device->rcache =
mca_rcache_base_module_create (mca_btl_openib_component.ib_rcache_name,
device, &rcache_resources);
if (NULL == device->rcache) {
/* Don't print an error message here -- we'll get one from
mpool_create anyway */
goto error;
}
device->mpool = mca_mpool_base_module_lookup (mca_btl_openib_component.ib_mpool_hints);
if (NULL == device->mpool) {
goto error;
}
#if OPAL_ENABLE_PROGRESS_THREADS
device->ib_channel = ibv_create_comp_channel(device->ib_dev_context);
if (NULL == device->ib_channel) {
@ -2223,9 +2230,6 @@ error:
ibv_destroy_comp_channel(device->ib_channel);
}
#endif
if (device->mpool) {
mca_mpool_base_module_destroy(device->mpool);
}
if (device->ib_pd) {
ibv_dealloc_pd(device->ib_pd);

Просмотреть файл

@ -20,6 +20,7 @@ BEGIN_C_DECLS
struct mca_btl_openib_eager_rdma_local_t {
opal_ptr_t base; /**< buffer for RDMAing eager messages */
void *alloc_base; /**< allocated base */
mca_btl_openib_recv_frag_t *frags;
mca_btl_openib_reg_t *reg;
uint16_t head; /**< RDMA buffer to poll */

Просмотреть файл

@ -347,13 +347,16 @@ static void mca_btl_openib_endpoint_destruct(mca_btl_base_endpoint_t* endpoint)
* was not in "connect" or "bad" flow (failed to allocate memory)
* and changed the pointer back to NULL
*/
if(!opal_atomic_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, NULL,
(void*)1)) {
if ((void*)1 != endpoint->eager_rdma_local.base.pval &&
NULL != endpoint->eager_rdma_local.base.pval) {
endpoint->endpoint_btl->super.btl_mpool->mpool_free(endpoint->endpoint_btl->super.btl_mpool,
endpoint->eager_rdma_local.base.pval,
(mca_mpool_base_registration_t*)endpoint->eager_rdma_local.reg);
if(!opal_atomic_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, NULL, (void*)1)) {
if (NULL != endpoint->eager_rdma_local.reg) {
endpoint->endpoint_btl->device->rcache->rcache_deregister (endpoint->endpoint_btl->device->rcache,
&endpoint->eager_rdma_local.reg->base);
endpoint->eager_rdma_local.reg = NULL;
}
void *alloc_base = opal_atomic_swap_ptr (&endpoint->eager_rdma_local.alloc_base, NULL);
if (alloc_base) {
endpoint->endpoint_btl->super.btl_mpool->mpool_free (endpoint->endpoint_btl->super.btl_mpool, alloc_base);
pval_clean = true;
}
} else {
@ -861,10 +864,10 @@ void mca_btl_openib_endpoint_connect_eager_rdma(
mca_btl_openib_endpoint_t* endpoint)
{
mca_btl_openib_module_t* openib_btl = endpoint->endpoint_btl;
char *buf;
char *buf, *alloc_base;
mca_btl_openib_recv_frag_t *headers_buf;
int i;
uint32_t flag = MCA_MPOOL_FLAGS_CACHE_BYPASS;
int i, rc;
uint32_t flag = MCA_RCACHE_FLAGS_CACHE_BYPASS;
/* Set local rdma pointer to 1 temporarily so other threads will not try
* to enter the function */
@ -890,19 +893,26 @@ void mca_btl_openib_endpoint_connect_eager_rdma(
The following flag will be interpreted and the appropriate
steps will be taken when the memory is registered in
openib_reg_mr(). */
flag |= MCA_MPOOL_FLAGS_SO_MEM;
flag |= MCA_RCACHE_FLAGS_SO_MEM;
#endif
buf = (char *) openib_btl->super.btl_mpool->mpool_alloc(openib_btl->super.btl_mpool,
alloc_base = buf = (char *) openib_btl->super.btl_mpool->mpool_alloc(openib_btl->super.btl_mpool,
openib_btl->eager_rdma_frag_size *
mca_btl_openib_component.eager_rdma_num,
mca_btl_openib_component.buffer_alignment,
flag,
(mca_mpool_base_registration_t**)&endpoint->eager_rdma_local.reg);
0);
if(!buf)
goto free_headers_buf;
rc = openib_btl->device->rcache->rcache_register (openib_btl->device->rcache, buf, openib_btl->eager_rdma_frag_size *
mca_btl_openib_component.eager_rdma_num, flag, MCA_RCACHE_ACCESS_ANY,
(mca_rcache_base_registration_t**)&endpoint->eager_rdma_local.reg);
if (OPAL_SUCCESS != rc) {
openib_btl->super.btl_mpool->mpool_free (openib_btl->super.btl_mpool, alloc_base);
goto free_headers_buf;
}
buf = buf + openib_btl->eager_rdma_frag_size -
sizeof(mca_btl_openib_footer_t) - openib_btl->super.btl_eager_limit -
sizeof(mca_btl_openib_header_t);
@ -913,7 +923,7 @@ void mca_btl_openib_endpoint_connect_eager_rdma(
mca_btl_openib_frag_init_data_t init_data;
item = (opal_free_list_item_t*)&headers_buf[i];
item->registration = (mca_mpool_base_registration_t *)endpoint->eager_rdma_local.reg;
item->registration = (mca_rcache_base_registration_t *)endpoint->eager_rdma_local.reg;
item->ptr = buf + i * openib_btl->eager_rdma_frag_size;
OBJ_CONSTRUCT(item, mca_btl_openib_recv_frag_t);
@ -941,6 +951,7 @@ void mca_btl_openib_endpoint_connect_eager_rdma(
/* set local rdma pointer to real value */
(void)opal_atomic_cmpset_ptr(&endpoint->eager_rdma_local.base.pval,
(void*)1, buf);
endpoint->eager_rdma_local.alloc_base = alloc_base;
if(mca_btl_openib_endpoint_send_eager_rdma(endpoint) == OPAL_SUCCESS) {
mca_btl_openib_device_t *device = endpoint->endpoint_btl->device;
@ -957,8 +968,9 @@ void mca_btl_openib_endpoint_connect_eager_rdma(
return;
}
openib_btl->super.btl_mpool->mpool_free(openib_btl->super.btl_mpool,
buf, (mca_mpool_base_registration_t*)endpoint->eager_rdma_local.reg);
openib_btl->device->rcache->rcache_deregister (openib_btl->device->rcache,
(mca_rcache_base_registration_t*)endpoint->eager_rdma_local.reg);
openib_btl->super.btl_mpool->mpool_free(openib_btl->super.btl_mpool, buf);
free_headers_buf:
free(headers_buf);
unlock_rdma_local:

Просмотреть файл

@ -316,9 +316,12 @@ int btl_openib_register_mca_params(void)
"(must be >= 1)",
32, &mca_btl_openib_component.ib_free_list_inc,
REGINT_GE_ONE));
CHECK(reg_string("mpool", NULL,
"Name of the memory pool to be used (it is unlikely that you will ever want to change this)",
"grdma", &mca_btl_openib_component.ib_mpool_name,
CHECK(reg_string("mpool_hints", NULL, "hints for selecting a memory pool (default: none)",
NULL, &mca_btl_openib_component.ib_mpool_hints,
0));
CHECK(reg_string("rcache", NULL,
"Name of the registration cache to be used (it is unlikely that you will ever want to change this)",
"grdma", &mca_btl_openib_component.ib_rcache_name,
0));
CHECK(reg_int("reg_mru_len", NULL,
"Length of the registration cache most recently used list "

Просмотреть файл

@ -74,7 +74,6 @@
#include "btl_openib_async.h"
#include "connect/connect.h"
#include "opal/mca/mpool/grdma/mpool_grdma.h"
#include "opal/util/sys_limits.h"
#if (ENABLE_DYNAMIC_SL)
@ -1367,7 +1366,7 @@ static int udcm_rc_qp_create_one(udcm_module_t *m, mca_btl_base_endpoint_t* lcl_
&init_attr))) {
/* NTH: this process may be out of registered memory. try evicting an item from
the lru of this btl's mpool */
if (false == mca_mpool_grdma_evict (m->btl->super.btl_mpool)) {
if (false == m->btl->device->rcache->rcache_evict (m->btl->device->rcache)) {
break;
}
}
@ -1378,7 +1377,7 @@ static int udcm_rc_qp_create_one(udcm_module_t *m, mca_btl_base_endpoint_t* lcl_
&init_attr))) {
/* NTH: this process may be out of registered memory. try evicting an item from
the lru of this btl's mpool */
if (false == mca_mpool_grdma_evict (m->btl->super.btl_mpool)) {
if (false == m->btl->device->rcache->rcache_evict (m->btl->device->rcache)) {
break;
}
}

Просмотреть файл

@ -56,8 +56,6 @@
#include "opal/datatype/opal_convertor.h"
#include "opal/mca/btl/btl.h"
#include "opal/mca/mpool/base/base.h"
#include "opal/mca/mpool/sm/mpool_sm.h"
#include "opal/align.h"
#include "opal/util/sys_limits.h"
@ -111,7 +109,7 @@ static void *mpool_calloc(size_t nmemb, size_t size)
size_t bsize = nmemb * size;
mca_mpool_base_module_t *mpool = mca_btl_sm_component.sm_mpool;
buf = mpool->mpool_alloc(mpool, bsize, opal_cache_line_size, 0, NULL);
buf = mpool->mpool_alloc(mpool, bsize, opal_cache_line_size, 0);
if (NULL == buf)
return NULL;
@ -122,7 +120,7 @@ static void *mpool_calloc(size_t nmemb, size_t size)
static int
setup_mpool_base_resources(mca_btl_sm_component_t *comp_ptr,
mca_mpool_base_resources_t *out_res)
mca_common_sm_mpool_resources_t *out_res)
{
int rc = OPAL_SUCCESS;
int fd = -1;
@ -222,7 +220,7 @@ sm_btl_first_time_init(mca_btl_sm_t *sm_btl,
size_t length, length_payload;
sm_fifo_t *my_fifos;
int my_mem_node, num_mem_nodes, i, rc;
mca_mpool_base_resources_t *res = NULL;
mca_common_sm_mpool_resources_t *res = NULL;
mca_btl_sm_component_t* m = &mca_btl_sm_component;
/* Assume we don't have hwloc support and fill in dummy info */
@ -291,15 +289,14 @@ sm_btl_first_time_init(mca_btl_sm_t *sm_btl,
/* Disable memory binding, because each MPI process will claim pages in the
* mpool for their local NUMA node */
res->mem_node = -1;
res->allocator = mca_btl_sm_component.allocator;
if (OPAL_SUCCESS != (rc = setup_mpool_base_resources(m, res))) {
free(res);
return rc;
}
/* now that res is fully populated, create the thing */
mca_btl_sm_component.sm_mpools[0] =
mca_mpool_base_module_create(mca_btl_sm_component.sm_mpool_name,
sm_btl, res);
mca_btl_sm_component.sm_mpools[0] = common_sm_mpool_create (res);
/* Sanity check to ensure that we found it */
if (NULL == mca_btl_sm_component.sm_mpools[0]) {
free(res);
@ -470,7 +467,7 @@ int mca_btl_sm_add_procs(
bool have_connected_peer = false;
char **bases;
/* for easy access to the mpool_sm_module */
mca_mpool_sm_module_t *sm_mpool_modp = NULL;
mca_common_sm_mpool_module_t *sm_mpool_modp = NULL;
/* initializion */
@ -548,7 +545,7 @@ int mca_btl_sm_add_procs(
}
bases = mca_btl_sm_component.shm_bases;
sm_mpool_modp = (mca_mpool_sm_module_t *)mca_btl_sm_component.sm_mpool;
sm_mpool_modp = (mca_common_sm_mpool_module_t *)mca_btl_sm_component.sm_mpool;
/* initialize own FIFOs */
/*

Просмотреть файл

@ -212,6 +212,12 @@ struct mca_btl_sm_component_t {
char *sm_mpool_rndv_file_name;
char *sm_ctl_file_name;
char *sm_rndv_file_name;
/** minimum size of a btl/sm mpool */
unsigned long mpool_min_size;
/** allocator name to use with the mpool */
char *allocator;
};
typedef struct mca_btl_sm_component_t mca_btl_sm_component_t;
OPAL_MODULE_DECLSPEC extern mca_btl_sm_component_t mca_btl_sm_component;
@ -281,7 +287,7 @@ static inline int sm_fifo_init(int fifo_size, mca_mpool_base_module_t *mpool,
/* allocate the queue in the receiver's address space */
fifo->queue_recv = (volatile void **)mpool->mpool_alloc(
mpool, sizeof(void *) * qsize, opal_cache_line_size, 0, NULL);
mpool, sizeof(void *) * qsize, opal_cache_line_size, 0);
if(NULL == fifo->queue_recv) {
return OPAL_ERR_OUT_OF_RESOURCE;
}

Просмотреть файл

@ -221,6 +221,19 @@ static int sm_register(void)
0, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_sm_component.knem_max_simultaneous);
mca_btl_sm_component.allocator = "bucket";
(void) mca_base_component_var_register (&mca_btl_sm_component.super.btl_version, "allocator",
"Name of allocator component to use for btl/sm allocations",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_sm_component.allocator);
mca_btl_sm_component.mpool_min_size = 134217728;
(void) mca_base_component_var_register(&mca_btl_sm_component.super.btl_version, "min_size",
"Minimum size of the common/sm mpool shared memory file",
MCA_BASE_VAR_TYPE_UNSIGNED_LONG, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_sm_component.mpool_min_size);
/* CMA parameters */
mca_btl_sm_component.use_cma = 0;
(void) mca_base_component_var_register(&mca_btl_sm_component.super.btl_version,
@ -234,9 +247,6 @@ static int sm_register(void)
mca_btl_sm_param_register_int("free_list_max", -1, OPAL_INFO_LVL_5, &mca_btl_sm_component.sm_free_list_max);
mca_btl_sm_param_register_int("free_list_inc", 64, OPAL_INFO_LVL_5, &mca_btl_sm_component.sm_free_list_inc);
mca_btl_sm_param_register_int("max_procs", -1, OPAL_INFO_LVL_5, &mca_btl_sm_component.sm_max_procs);
/* there is no practical use for the mpool name parameter since mpool resources differ
between components */
mca_btl_sm_component.sm_mpool_name = "sm";
mca_btl_sm_param_register_uint("fifo_size", 4096, OPAL_INFO_LVL_4, &mca_btl_sm_component.fifo_size);
mca_btl_sm_param_register_int("num_fifos", 1, OPAL_INFO_LVL_4, &mca_btl_sm_component.nfifos);
@ -456,41 +466,6 @@ create_and_attach(mca_btl_sm_component_t *comp_ptr,
return OPAL_SUCCESS;
}
/*
* SKG - I'm not happy with this, but I can't figure out a better way of
* finding the sm mpool's minimum size 8-|. The way I see it. This BTL only
* uses the sm mpool, so maybe this isn't so bad...
*
* The problem is the we need to size the mpool resources at sm BTL component
* init. That means we need to know the mpool's minimum size at create.
*/
static int
get_min_mpool_size(mca_btl_sm_component_t *comp_ptr,
size_t *out_size)
{
const char *type_name = "mpool";
const char *param_name = "min_size";
const mca_base_var_storage_t *min_size;
int id = 0;
if (0 > (id = mca_base_var_find("ompi", type_name, comp_ptr->sm_mpool_name,
param_name))) {
opal_output(0, "mca_base_var_find: failure looking for %s_%s_%s\n",
type_name, comp_ptr->sm_mpool_name, param_name);
return OPAL_ERR_NOT_FOUND;
}
if (OPAL_SUCCESS != mca_base_var_get_value(id, &min_size, NULL, NULL)) {
opal_output(0, "mca_base_var_get_value failure\n");
return OPAL_ERROR;
}
/* the min_size variable is an unsigned long long */
*out_size = (size_t) min_size->ullval;
return OPAL_SUCCESS;
}
static int
get_mpool_res_size(int32_t max_procs,
size_t *out_res_size)
@ -612,20 +587,16 @@ create_rndv_file(mca_btl_sm_component_t *comp_ptr,
mca_common_sm_module_t *tmp_modp = NULL;
if (MCA_BTL_SM_RNDV_MOD_MPOOL == type) {
size_t min_size = 0;
/* get the segment size for the sm mpool. */
if (OPAL_SUCCESS != (rc = get_mpool_res_size(comp_ptr->sm_max_procs,
&size))) {
/* rc is already set */
goto out;
}
/* do we need to update the size based on the sm mpool's min size? */
if (OPAL_SUCCESS != (rc = get_min_mpool_size(comp_ptr, &min_size))) {
goto out;
}
/* update size if less than required minimum */
if (size < min_size) {
size = min_size;
if (size < mca_btl_sm_component.mpool_min_size) {
size = mca_btl_sm_component.mpool_min_size;
}
/* we only need the shmem_ds info at this point. initilization will be
* completed in the mpool module code. the idea is that we just need this

Просмотреть файл

@ -12,8 +12,8 @@
* All rights reserved.
* Copyright (c) 2006-2007 Voltaire. All rights reserved.
* Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010-2015 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2010-2016 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2012-2015 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
@ -53,11 +53,13 @@
#include "opal/datatype/opal_convertor.h"
#include "opal/mca/btl/btl.h"
#include "opal/mca/common/sm/common_sm_mpool.h"
#if OPAL_CUDA_SUPPORT
#include "opal/mca/common/cuda/common_cuda.h"
#endif /* OPAL_CUDA_SUPPORT */
#include "opal/mca/mpool/base/base.h"
#include "opal/mca/mpool/sm/mpool_sm.h"
#include "opal/mca/rcache/base/base.h"
#if OPAL_ENABLE_FT_CR == 1
#include "opal/mca/crs/base/base.h"
@ -122,7 +124,7 @@ static void *mpool_calloc(size_t nmemb, size_t size)
size_t bsize = nmemb * size;
mca_mpool_base_module_t *mpool = mca_btl_smcuda_component.sm_mpool;
buf = mpool->mpool_alloc(mpool, bsize, opal_cache_line_size, 0, NULL);
buf = mpool->mpool_alloc(mpool, bsize, opal_cache_line_size, 0);
if (NULL == buf)
return NULL;
@ -133,7 +135,7 @@ static void *mpool_calloc(size_t nmemb, size_t size)
static int
setup_mpool_base_resources(mca_btl_smcuda_component_t *comp_ptr,
mca_mpool_base_resources_t *out_res)
mca_common_sm_mpool_resources_t *out_res)
{
int rc = OPAL_SUCCESS;
int fd = -1;
@ -228,7 +230,7 @@ smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl,
size_t length, length_payload;
sm_fifo_t *my_fifos;
int my_mem_node, num_mem_nodes, i, rc;
mca_mpool_base_resources_t *res = NULL;
mca_common_sm_mpool_resources_t *res = NULL;
mca_btl_smcuda_component_t* m = &mca_btl_smcuda_component;
/* Assume we don't have hwloc support and fill in dummy info */
@ -297,15 +299,14 @@ smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl,
/* Disable memory binding, because each MPI process will claim pages in the
* mpool for their local NUMA node */
res->mem_node = -1;
res->allocator = mca_btl_smcuda_component.allocator;
if (OPAL_SUCCESS != (rc = setup_mpool_base_resources(m, res))) {
free(res);
return rc;
}
/* now that res is fully populated, create the thing */
mca_btl_smcuda_component.sm_mpools[0] =
mca_mpool_base_module_create(mca_btl_smcuda_component.sm_mpool_name,
smcuda_btl, res);
mca_btl_smcuda_component.sm_mpools[0] = common_sm_mpool_create (res);
/* Sanity check to ensure that we found it */
if (NULL == mca_btl_smcuda_component.sm_mpools[0]) {
free(res);
@ -345,10 +346,9 @@ smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl,
/* Create a local memory pool that sends handles to the remote
* side. Note that the res argument is not really used, but
* needed to satisfy function signature. */
smcuda_btl->super.btl_mpool = mca_mpool_base_module_create("gpusm",
smcuda_btl,
res);
if (NULL == smcuda_btl->super.btl_mpool) {
mca_rcache_base_resources_t rcache_res;
smcuda_btl->rcache = mca_rcache_base_module_create("gpusm", smcuda_btl, &rcache_res);
if (NULL == smcuda_btl->rcache) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
#endif /* OPAL_CUDA_SUPPORT */
@ -479,16 +479,9 @@ create_sm_endpoint(int local_proc, struct opal_proc_t *proc)
}
#endif
#if OPAL_CUDA_SUPPORT
{
mca_mpool_base_resources_t resources; /* unused, but needed */
/* Create a remote memory pool on the endpoint. Note that the resources
* argument is just to satisfy the function signature. The rcuda mpool
* actually takes care of filling in the resources. */
ep->mpool = mca_mpool_base_module_create("rgpusm",
NULL,
&resources);
}
/* Create a remote memory pool on the endpoint. The rgpusm component
* does not take any resources. They are filled in internally. */
ep->rcache = mca_rcache_base_module_create ("rgpusm", NULL, NULL);
#endif /* OPAL_CUDA_SUPPORT */
return ep;
}
@ -507,7 +500,7 @@ int mca_btl_smcuda_add_procs(
bool have_connected_peer = false;
char **bases;
/* for easy access to the mpool_sm_module */
mca_mpool_sm_module_t *sm_mpool_modp = NULL;
mca_common_sm_mpool_module_t *sm_mpool_modp = NULL;
/* initializion */
@ -584,7 +577,7 @@ int mca_btl_smcuda_add_procs(
}
bases = mca_btl_smcuda_component.shm_bases;
sm_mpool_modp = (mca_mpool_sm_module_t *)mca_btl_smcuda_component.sm_mpool;
sm_mpool_modp = (mca_common_sm_mpool_module_t *)mca_btl_smcuda_component.sm_mpool;
/* initialize own FIFOs */
/*
@ -693,6 +686,13 @@ int mca_btl_smcuda_del_procs(
struct opal_proc_t **procs,
struct mca_btl_base_endpoint_t **peers)
{
for (size_t i = 0 ; i < nprocs ; ++i) {
if (peers[i]->rcache) {
mca_rcache_base_module_destroy (peers[i]->rcache);
peers[i]->rcache = NULL;
}
}
return OPAL_SUCCESS;
}
@ -1009,16 +1009,17 @@ static struct mca_btl_base_registration_handle_t *mca_btl_smcuda_register_mem (
struct mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t *endpoint, void *base,
size_t size, uint32_t flags)
{
mca_mpool_common_cuda_reg_t *reg;
mca_btl_smcuda_t *smcuda_module = (mca_btl_smcuda_t *) btl;
mca_rcache_common_cuda_reg_t *reg;
int access_flags = flags & MCA_BTL_REG_FLAG_ACCESS_ANY;
int mpool_flags = 0;
int rcache_flags = 0;
if (MCA_BTL_REG_FLAG_CUDA_GPU_MEM & flags) {
mpool_flags |= MCA_MPOOL_FLAGS_CUDA_GPU_MEM;
rcache_flags |= MCA_RCACHE_FLAGS_CUDA_GPU_MEM;
}
btl->btl_mpool->mpool_register (btl->btl_mpool, base, size, mpool_flags,
access_flags, (mca_mpool_base_registration_t **) &reg);
smcuda_module->rcache->rcache_register (smcuda_module->rcache, base, size, rcache_flags,
access_flags, (mca_rcache_base_registration_t **) &reg);
if (OPAL_UNLIKELY(NULL == reg)) {
return NULL;
}
@ -1029,10 +1030,11 @@ static struct mca_btl_base_registration_handle_t *mca_btl_smcuda_register_mem (
static int mca_btl_smcuda_deregister_mem (struct mca_btl_base_module_t* btl,
struct mca_btl_base_registration_handle_t *handle)
{
mca_mpool_common_cuda_reg_t *reg = (mca_mpool_common_cuda_reg_t *)
((intptr_t) handle - offsetof (mca_mpool_common_cuda_reg_t, data));
mca_btl_smcuda_t *smcuda_module = (mca_btl_smcuda_t *) btl;
mca_rcache_common_cuda_reg_t *reg = (mca_rcache_common_cuda_reg_t *)
((intptr_t) handle - offsetof (mca_rcache_common_cuda_reg_t, data));
btl->btl_mpool->mpool_deregister (btl->btl_mpool, &reg->base);
smcuda_module->rcache->rcache_deregister (smcuda_module->rcache, &reg->base);
return OPAL_SUCCESS;
}
@ -1043,8 +1045,8 @@ int mca_btl_smcuda_get_cuda (struct mca_btl_base_module_t *btl,
struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{
mca_mpool_common_cuda_reg_t rget_reg;
mca_mpool_common_cuda_reg_t *reg_ptr = &rget_reg;
mca_rcache_common_cuda_reg_t rget_reg;
mca_rcache_common_cuda_reg_t *reg_ptr = &rget_reg;
int rc, done;
void *remote_memory_address;
size_t offset;
@ -1087,16 +1089,16 @@ int mca_btl_smcuda_get_cuda (struct mca_btl_base_module_t *btl,
* remote memory which may lie somewhere in the middle. This is taken care of
* a few lines down. Note that we hand in the peer rank just for debugging
* support. */
rc = ep->mpool->mpool_register(ep->mpool, remote_handle->reg_data.memh_seg_addr.pval,
rc = ep->rcache->rcache_register (ep->rcache, remote_handle->reg_data.memh_seg_addr.pval,
remote_handle->reg_data.memh_seg_len, ep->peer_smp_rank,
MCA_MPOOL_ACCESS_LOCAL_WRITE,
(mca_mpool_base_registration_t **)&reg_ptr);
MCA_RCACHE_ACCESS_LOCAL_WRITE,
(mca_rcache_base_registration_t **)&reg_ptr);
if (OPAL_SUCCESS != rc) {
opal_output(0, "Failed to register remote memory, rc=%d", rc);
return rc;
}
frag->registration = (mca_mpool_base_registration_t *)reg_ptr;
frag->registration = (mca_rcache_base_registration_t *)reg_ptr;
frag->endpoint = ep;
/* The registration has given us back the memory block that this

Просмотреть файл

@ -206,6 +206,8 @@ struct mca_btl_smcuda_component_t {
int use_cuda_ipc;
int use_cuda_ipc_same_gpu;
#endif /* OPAL_CUDA_SUPPORT */
unsigned long mpool_min_size;
char *allocator;
};
typedef struct mca_btl_smcuda_component_t mca_btl_smcuda_component_t;
OPAL_MODULE_DECLSPEC extern mca_btl_smcuda_component_t mca_btl_smcuda_component;
@ -217,7 +219,7 @@ struct mca_btl_smcuda_t {
mca_btl_base_module_t super; /**< base BTL interface */
bool btl_inited; /**< flag indicating if btl has been inited */
mca_btl_base_module_error_cb_fn_t error_cb;
mca_rcache_base_module_t *rcache;
};
typedef struct mca_btl_smcuda_t mca_btl_smcuda_t;
OPAL_MODULE_DECLSPEC extern mca_btl_smcuda_t mca_btl_smcuda;
@ -254,7 +256,7 @@ static inline int sm_fifo_init(int fifo_size, mca_mpool_base_module_t *mpool,
/* allocate the queue in the receiver's address space */
fifo->queue_recv = (volatile void **)mpool->mpool_alloc(
mpool, sizeof(void *) * qsize, opal_cache_line_size, 0, NULL);
mpool, sizeof(void *) * qsize, opal_cache_line_size, 0);
if(NULL == fifo->queue_recv) {
return OPAL_ERR_OUT_OF_RESOURCE;
}

Просмотреть файл

@ -12,8 +12,8 @@
* All rights reserved.
* Copyright (c) 2006-2007 Voltaire. All rights reserved.
* Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010-2015 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2010-2016 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011-2015 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
@ -141,6 +141,13 @@ static int mca_btl_smcuda_component_verify(void) {
static int smcuda_register(void)
{
/* register SM component parameters */
mca_btl_smcuda_component.mpool_min_size = 134217728;
(void) mca_base_component_var_register(&mca_btl_smcuda_component.super.btl_version, "min_size",
"Minimum size of the common/sm mpool shared memory file",
MCA_BASE_VAR_TYPE_UNSIGNED_LONG, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_smcuda_component.mpool_min_size);
mca_btl_smcuda_param_register_int("free_list_num", 8, OPAL_INFO_LVL_5, &mca_btl_smcuda_component.sm_free_list_num);
mca_btl_smcuda_param_register_int("free_list_max", -1, OPAL_INFO_LVL_5, &mca_btl_smcuda_component.sm_free_list_max);
mca_btl_smcuda_param_register_int("free_list_inc", 64, OPAL_INFO_LVL_5, &mca_btl_smcuda_component.sm_free_list_inc);
@ -156,6 +163,12 @@ static int smcuda_register(void)
/* default number of extra procs to allow for future growth */
mca_btl_smcuda_param_register_int("sm_extra_procs", 0, OPAL_INFO_LVL_9, &mca_btl_smcuda_component.sm_extra_procs);
mca_btl_smcuda_component.allocator = "bucket";
(void) mca_base_component_var_register (&mca_btl_smcuda_component.super.btl_version, "allocator",
"Name of allocator component to use for btl/smcuda allocations",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_smcuda_component.allocator);
#if OPAL_CUDA_SUPPORT
/* Lower priority when CUDA support is not requested */
if (opal_cuda_support) {
@ -366,41 +379,6 @@ create_and_attach(mca_btl_smcuda_component_t *comp_ptr,
return OPAL_SUCCESS;
}
/*
* SKG - I'm not happy with this, but I can't figure out a better way of
* finding the sm mpool's minimum size 8-|. The way I see it. This BTL only
* uses the sm mpool, so maybe this isn't so bad...
*
* The problem is the we need to size the mpool resources at sm BTL component
* init. That means we need to know the mpool's minimum size at create.
*/
static int
get_min_mpool_size(mca_btl_smcuda_component_t *comp_ptr,
size_t *out_size)
{
const char *type_name = "mpool";
const char *param_name = "min_size";
const mca_base_var_storage_t *min_size;
int id = 0;
if (0 > (id = mca_base_var_find("ompi", type_name, comp_ptr->sm_mpool_name,
param_name))) {
opal_output(0, "mca_base_var_find: failure looking for %s_%s_%s\n",
type_name, comp_ptr->sm_mpool_name, param_name);
return OPAL_ERR_NOT_FOUND;
}
if (OPAL_SUCCESS != mca_base_var_get_value(id, &min_size, NULL, NULL)) {
opal_output(0, "mca_base_var_get_value failure\n");
return OPAL_ERROR;
}
/* the min_size variable is an unsigned long long */
*out_size = (size_t) min_size->ullval;
return OPAL_SUCCESS;
}
static int
get_mpool_res_size(int32_t max_procs,
size_t *out_res_size)
@ -521,21 +499,18 @@ create_rndv_file(mca_btl_smcuda_component_t *comp_ptr,
mca_common_sm_module_t *tmp_modp = NULL;
if (MCA_BTL_SM_RNDV_MOD_MPOOL == type) {
size_t min_size = 0;
/* get the segment size for the sm mpool. */
if (OPAL_SUCCESS != (rc = get_mpool_res_size(comp_ptr->sm_max_procs,
&size))) {
/* rc is already set */
goto out;
}
/* do we need to update the size based on the sm mpool's min size? */
if (OPAL_SUCCESS != (rc = get_min_mpool_size(comp_ptr, &min_size))) {
goto out;
}
/* update size if less than required minimum */
if (size < min_size) {
size = min_size;
if (size < mca_btl_smcuda_component.mpool_min_size) {
size = mca_btl_smcuda_component.mpool_min_size;
}
/* we only need the shmem_ds info at this point. initilization will be
* completed in the mpool module code. the idea is that we just need this
* info so we can populate the rndv file (or modex when we have it). */
@ -1161,8 +1136,8 @@ int mca_btl_smcuda_component_progress(void)
OPAL_SUCCESS);
if(frag->registration != NULL) {
frag->endpoint->mpool->mpool_deregister(frag->endpoint->mpool,
(mca_mpool_base_registration_t*)frag->registration);
frag->endpoint->rcache->rcache_deregister (frag->endpoint->rcache,
(mca_rcache_base_registration_t*)frag->registration);
frag->registration = NULL;
MCA_BTL_SMCUDA_FRAG_RETURN(frag);
}

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -11,6 +12,8 @@
* All rights reserved.
* Copyright (c) 2006-2007 Voltaire. All rights reserved.
* Copyright (c) 2012 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -35,7 +38,7 @@ struct mca_btl_base_endpoint_t {
int peer_smp_rank; /**< My peer's SMP process rank. Used for accessing
* SMP specfic data structures. */
#if OPAL_CUDA_SUPPORT
mca_mpool_base_module_t *mpool; /**< mpool for remotely registered memory */
mca_rcache_base_module_t *rcache; /**< rcache for remotely registered memory */
#endif /* OPAL_CUDA_SUPPORT */
#if OPAL_ENABLE_PROGRESS_THREADS == 1
int fifo_fd; /**< pipe/fifo used to signal endpoint that data is queued */

Просмотреть файл

@ -54,7 +54,7 @@ typedef struct mca_btl_smcuda_hdr_t mca_btl_smcuda_hdr_t;
#if OPAL_CUDA_SUPPORT
struct mca_btl_base_registration_handle_t {
mca_mpool_common_cuda_reg_data_t reg_data;
mca_rcache_common_cuda_reg_data_t reg_data;
};
#endif
@ -78,7 +78,7 @@ struct mca_btl_smcuda_frag_t {
mca_btl_base_segment_t segment;
struct mca_btl_base_endpoint_t *endpoint;
#if OPAL_CUDA_SUPPORT
struct mca_mpool_base_registration_t *registration;
struct mca_rcache_base_registration_t *registration;
struct mca_btl_base_registration_handle_t *local_handle;
#endif /* OPAL_CUDA_SUPPORT */
size_t size;

Просмотреть файл

@ -25,7 +25,8 @@
#include "opal/mca/mpool/mpool.h"
#include "opal/mca/mpool/base/base.h"
#include "opal/mca/mpool/udreg/mpool_udreg.h"
#include "opal/mca/rcache/base/base.h"
#include "opal/mca/rcache/udreg/rcache_udreg.h"
#include "opal/util/output.h"
#include "opal_stdint.h"
#include "opal/mca/btl/btl.h"
@ -56,8 +57,8 @@ typedef struct mca_btl_ugni_endpoint_attr_t {
} mca_btl_ugni_endpoint_attr_t;
enum {
MCA_BTL_UGNI_MPOOL_UDREG,
MCA_BTL_UGNI_MPOOL_GRDMA
MCA_BTL_UGNI_RCACHE_UDREG,
MCA_BTL_UGNI_RCACHE_GRDMA
};
typedef struct mca_btl_ugni_module_t {
@ -86,7 +87,7 @@ typedef struct mca_btl_ugni_module_t {
opal_free_list_t post_descriptors;
mca_mpool_base_module_t *smsg_mpool;
mca_mpool_base_module_t *mpool;
opal_free_list_t smsg_mboxes;
gni_ep_handle_t wildcard_ep;
@ -128,6 +129,8 @@ typedef struct mca_btl_ugni_module_t {
int nlocal_procs;
volatile int active_send_count;
mca_rcache_base_module_t *rcache;
} mca_btl_ugni_module_t;
typedef struct mca_btl_ugni_component_t {
@ -177,8 +180,11 @@ typedef struct mca_btl_ugni_component_t {
/* Page size to use for SMSG allocations (udreg mpool) */
unsigned int smsg_page_size;
/* mpool type (grdma or udreg) */
int mpool_type;
/* rcache type (grdma or udreg) */
int rcache_type;
/* memory pool hints */
char *mpool_hints;
/* Number of mailboxes to allocate in each block */
unsigned int mbox_increment;
@ -312,7 +318,7 @@ struct mca_btl_base_registration_handle_t {
};
typedef struct mca_btl_ugni_reg_t {
mca_mpool_base_registration_t base;
mca_rcache_base_registration_t base;
mca_btl_base_registration_handle_t handle;
} mca_btl_ugni_reg_t;

Просмотреть файл

@ -266,11 +266,12 @@ struct mca_btl_base_endpoint_t *mca_btl_ugni_get_ep (struct mca_btl_base_module_
}
static int ugni_reg_rdma_mem (void *reg_data, void *base, size_t size,
mca_mpool_base_registration_t *reg)
static int ugni_reg_mem (void *reg_data, void *base, size_t size,
mca_rcache_base_registration_t *reg)
{
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) reg_data;
mca_btl_ugni_reg_t *ugni_reg = (mca_btl_ugni_reg_t *) reg;
gni_cq_handle_t cq = NULL;
gni_return_t rc;
int flags;
@ -278,18 +279,24 @@ static int ugni_reg_rdma_mem (void *reg_data, void *base, size_t size,
return OPAL_ERR_OUT_OF_RESOURCE;
}
if (reg->access_flags & (MCA_MPOOL_ACCESS_REMOTE_WRITE | MCA_MPOOL_ACCESS_LOCAL_WRITE |
MCA_MPOOL_ACCESS_REMOTE_ATOMIC)) {
if (reg->access_flags & (MCA_RCACHE_ACCESS_REMOTE_WRITE | MCA_RCACHE_ACCESS_LOCAL_WRITE |
MCA_RCACHE_ACCESS_REMOTE_ATOMIC)) {
flags = GNI_MEM_READWRITE;
} else {
flags = GNI_MEM_READ_ONLY;
}
if (!(reg->flags & MCA_RCACHE_FLAGS_SO_MEM)) {
flags |= GNI_MEM_RELAXED_PI_ORDERING;
}
if (reg->flags & MCA_RCACHE_FLAGS_RESV0) {
cq = ugni_module->smsg_remote_cq;
}
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_MemRegister (ugni_module->device->dev_handle, (uint64_t) base,
size, NULL, flags, -1, &(ugni_reg->handle.gni_handle));
size, cq, flags, -1, &(ugni_reg->handle.gni_handle));
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
@ -301,24 +308,8 @@ static int ugni_reg_rdma_mem (void *reg_data, void *base, size_t size,
return OPAL_SUCCESS;
}
static int ugni_reg_smsg_mem (void *reg_data, void *base, size_t size,
mca_mpool_base_registration_t *reg)
{
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) reg_data;
mca_btl_ugni_reg_t *ugni_reg = (mca_btl_ugni_reg_t *) reg;
gni_return_t rc;
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_MemRegister (ugni_module->device->dev_handle, (uint64_t) base,
size, ugni_module->smsg_remote_cq, GNI_MEM_READWRITE, -1,
&(ugni_reg->handle.gni_handle));
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
return opal_common_rc_ugni_to_opal (rc);
}
static int
ugni_dereg_mem (void *reg_data, mca_mpool_base_registration_t *reg)
ugni_dereg_mem (void *reg_data, mca_rcache_base_registration_t *reg)
{
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) reg_data;
mca_btl_ugni_reg_t *ugni_reg = (mca_btl_ugni_reg_t *)reg;
@ -339,10 +330,10 @@ ugni_dereg_mem (void *reg_data, mca_mpool_base_registration_t *reg)
static int
mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
{
struct mca_mpool_base_resources_t mpool_resources;
mca_rcache_udreg_resources_t rcache_resources;
unsigned int mbox_increment;
uint32_t nprocs, *u32;
const char *mpool_name;
char *rcache_name;
int rc;
rc = opal_pointer_array_init (&ugni_module->pending_smsg_frags_bb, 0,
@ -404,43 +395,35 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
return rc;
}
mpool_resources.pool_name = "ompi.ugni";
mpool_resources.reg_data = (void *) ugni_module;
mpool_resources.sizeof_reg = sizeof (mca_btl_ugni_reg_t);
mpool_resources.register_mem = ugni_reg_rdma_mem;
mpool_resources.deregister_mem = ugni_dereg_mem;
if (MCA_BTL_UGNI_MPOOL_UDREG == mca_btl_ugni_component.mpool_type) {
/* additional settings for the udreg mpool */
/* 4k should be large enough for any Gemini/Ares system */
mpool_resources.max_entries = 4096;
mpool_resources.use_kernel_cache = true;
/* request a specific page size. this request may not be honored if the
* page size does not exist. */
mpool_resources.page_size = mca_btl_ugni_component.smsg_page_size;
mpool_resources.use_evict_w_unreg = false;
mpool_name = "udreg";
} else {
mpool_name = "grdma";
}
ugni_module->super.btl_mpool =
mca_mpool_base_module_create(mpool_name, ugni_module->device, &mpool_resources);
mpool_resources.register_mem = ugni_reg_smsg_mem;
ugni_module->smsg_mpool =
mca_mpool_base_module_create(mpool_name, ugni_module->device, &mpool_resources);
ugni_module->super.btl_mpool = mca_mpool_base_module_lookup (mca_btl_ugni_component.mpool_hints);
if (NULL == ugni_module->super.btl_mpool) {
BTL_ERROR(("error creating rdma mpool"));
BTL_ERROR(("could not find mpool matching hints %s", mca_btl_ugni_component.mpool_hints));
return OPAL_ERROR;
}
if (NULL == ugni_module->smsg_mpool) {
BTL_ERROR(("error creating smsg mpool"));
rcache_resources.base.cache_name = "ompi.ugni";
rcache_resources.base.reg_data = (void *) ugni_module;
rcache_resources.base.sizeof_reg = sizeof (mca_btl_ugni_reg_t);
rcache_resources.base.register_mem = ugni_reg_mem;
rcache_resources.base.deregister_mem = ugni_dereg_mem;
if (MCA_BTL_UGNI_RCACHE_UDREG == mca_btl_ugni_component.rcache_type) {
/* additional settings for the udreg mpool */
/* 4k should be large enough for any Gemini/Ares system */
rcache_resources.max_entries = 4096;
rcache_resources.use_kernel_cache = true;
rcache_resources.use_evict_w_unreg = false;
rcache_name = "udreg";
} else {
rcache_name = "grdma";
}
ugni_module->rcache =
mca_rcache_base_module_create (rcache_name, ugni_module->device, &rcache_resources.base);
if (NULL == ugni_module->rcache) {
BTL_ERROR(("error creating registration cache"));
return OPAL_ERROR;
}
@ -451,7 +434,7 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
mca_btl_ugni_component.ugni_eager_num,
mca_btl_ugni_component.ugni_eager_max,
mca_btl_ugni_component.ugni_eager_inc,
ugni_module->super.btl_mpool, 0, NULL,
ugni_module->super.btl_mpool, 0, ugni_module->rcache,
(opal_free_list_item_init_fn_t) mca_btl_ugni_frag_init,
(void *) ugni_module);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
@ -466,7 +449,7 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
mca_btl_ugni_component.ugni_eager_num,
mca_btl_ugni_component.ugni_eager_max,
mca_btl_ugni_component.ugni_eager_inc,
ugni_module->super.btl_mpool, 0, NULL,
ugni_module->super.btl_mpool, 0, ugni_module->rcache,
(opal_free_list_item_init_fn_t) mca_btl_ugni_frag_init,
(void *) ugni_module);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
@ -487,12 +470,14 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
mbox_increment = mca_btl_ugni_component.mbox_increment;
}
/* use the MCA_RCACHE_FLAGS_RESV0 to signal this is smsg memory */
rc = opal_free_list_init (&ugni_module->smsg_mboxes,
sizeof (mca_btl_ugni_smsg_mbox_t), 8,
OBJ_CLASS(mca_btl_ugni_smsg_mbox_t),
mca_btl_ugni_component.smsg_mbox_size, 128,
32, -1, mbox_increment, ugni_module->smsg_mpool,
0, NULL, NULL, NULL);
32, -1, mbox_increment, ugni_module->super.btl_mpool,
MCA_RCACHE_FLAGS_SO_MEM | MCA_RCACHE_FLAGS_RESV0,
ugni_module->rcache, NULL, NULL);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_ERROR(("error creating smsg mailbox free list"));
return rc;

Просмотреть файл

@ -15,6 +15,11 @@
#include "btl_ugni_rdma.h"
#include "btl_ugni_smsg.h"
#include "opal/util/sys_limits.h"
#include <stdlib.h>
#include <fcntl.h>
#include "opal/memoryhooks/memory.h"
#include "opal/runtime/opal_params.h"
@ -25,6 +30,7 @@ static int btl_ugni_component_open(void);
static int btl_ugni_component_close(void);
static mca_btl_base_module_t **mca_btl_ugni_component_init(int *, bool, bool);
static int mca_btl_ugni_component_progress(void);
static unsigned long mca_btl_ugni_ugni_page_size = 0;
mca_btl_ugni_component_t mca_btl_ugni_component = {
.super = {
@ -44,9 +50,9 @@ mca_btl_ugni_component_t mca_btl_ugni_component = {
}
};
mca_base_var_enum_value_t mpool_values[] = {
{MCA_BTL_UGNI_MPOOL_UDREG, "udreg"},
{MCA_BTL_UGNI_MPOOL_GRDMA, "grdma"},
mca_base_var_enum_value_t rcache_values[] = {
{MCA_BTL_UGNI_RCACHE_UDREG, "udreg"},
{MCA_BTL_UGNI_RCACHE_GRDMA, "grdma"},
{-1, NULL} /* sentinal */
};
@ -55,6 +61,7 @@ btl_ugni_component_register(void)
{
mca_base_var_enum_t *new_enum;
gni_nic_device_t device_type;
char *mpool_hints_tmp = NULL;
int rc;
(void) mca_base_var_group_component_register(&mca_btl_ugni_component.super.btl_version,
@ -174,10 +181,29 @@ btl_ugni_component_register(void)
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.mbox_increment);
/* determine if there are get alignment restrictions */
GNI_GetDeviceType (&device_type);
mca_btl_ugni_component.smsg_page_size = 2 << 20;
if (GNI_DEVICE_GEMINI == device_type) {
if (access ("/sys/class/gemini/ghal0/mrt", R_OK)) {
int fd = open ("/sys/class/gemini/ghal0/mrt", O_RDONLY);
char buffer[10];
if (0 <= fd) {
memset (buffer, 0, sizeof (buffer));
read (fd, buffer, sizeof (buffer) - 1);
close (fd);
mca_btl_ugni_ugni_page_size = strtol (buffer, NULL, 10) * 1024;
mca_btl_ugni_component.smsg_page_size = mca_btl_ugni_ugni_page_size;
}
}
}
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
"smsg_page_size", "Page size to use for SMSG "
"mailbox allocation (default 2M)",
"mailbox allocation (default: detect)",
MCA_BASE_VAR_TYPE_INT, NULL, 0,
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_LOCAL,
@ -202,20 +228,38 @@ btl_ugni_component_register(void)
MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, NULL,
NULL, NULL, &mca_btl_ugni_progress_thread_wakeups);
/* btl/ugni can only support only a fixed set of mpools (these mpools have compatible resource
/* btl/ugni can only support only a fixed set of rcache components (these rcache components have compatible resource
* structures) */
rc = mca_base_var_enum_create ("btl_ugni_mpool", mpool_values, &new_enum);
rc = mca_base_var_enum_create ("btl_ugni_rcache", rcache_values, &new_enum);
if (OPAL_SUCCESS != rc) {
return rc;
}
mca_btl_ugni_component.mpool_type = MCA_BTL_UGNI_MPOOL_UDREG;
mca_btl_ugni_component.rcache_type = MCA_BTL_UGNI_RCACHE_UDREG;
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
"mpool", "mpool to use", MCA_BASE_VAR_TYPE_INT, new_enum,
"rcache", "registration cache to use", MCA_BASE_VAR_TYPE_INT, new_enum,
0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.mpool_type);
MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.rcache_type);
OBJ_RELEASE(new_enum);
if (mca_btl_ugni_ugni_page_size) {
rc = asprintf (&mpool_hints_tmp, "page_size=%lu", mca_btl_ugni_ugni_page_size);
if (rc < 0) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
mca_btl_ugni_component.mpool_hints = mpool_hints_tmp;
} else {
mca_btl_ugni_component.mpool_hints = "page_size=2M";
}
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
"mpool_hints", "hints to use when selecting a memory pool (default: "
"\"page_size=2M\")", MCA_BASE_VAR_TYPE_STRING, NULL, 0,
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.mpool_hints);
free (mpool_hints_tmp);
/* ensure we loose send exclusivity to sm and vader if they are enabled */
mca_btl_ugni_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH - 2;
@ -228,9 +272,6 @@ btl_ugni_component_register(void)
mca_btl_ugni_module.super.btl_get_limit = 1 * 1024 * 1024;
/* determine if there are get alignment restrictions */
GNI_GetDeviceType (&device_type);
/*
* see def. of ALIGNMENT_MASK to figure this one out
*/
@ -291,7 +332,7 @@ btl_ugni_component_close(void)
}
static void mca_btl_ugni_autoset_leave_pinned (void) {
if (MCA_BTL_UGNI_MPOOL_UDREG != mca_btl_ugni_component.mpool_type) {
if (MCA_BTL_UGNI_RCACHE_UDREG != mca_btl_ugni_component.rcache_type) {
int value = opal_mem_hooks_support_level();
if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) ==
((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & value)) {
@ -362,6 +403,12 @@ mca_btl_ugni_component_init (int *num_btl_modules,
return NULL;
}
if (mca_btl_ugni_component.smsg_page_size != (unsigned long) opal_getpagesize ()) {
if (mca_btl_ugni_ugni_page_size > mca_btl_ugni_component.smsg_page_size) {
mca_btl_ugni_component.smsg_page_size = mca_btl_ugni_ugni_page_size;
}
}
mca_btl_ugni_autoset_leave_pinned ();
mca_btl_ugni_module.super.btl_rdma_pipeline_send_length = mca_btl_ugni_module.super.btl_eager_limit;

Просмотреть файл

@ -159,8 +159,8 @@ static inline int mca_btl_ugni_frag_alloc (mca_btl_base_endpoint_t *ep,
static inline int mca_btl_ugni_frag_return (mca_btl_ugni_base_frag_t *frag)
{
if (frag->registration) {
frag->endpoint->btl->super.btl_mpool->mpool_deregister(frag->endpoint->btl->super.btl_mpool,
(mca_mpool_base_registration_t *) frag->registration);
frag->endpoint->btl->rcache->rcache_deregister (frag->endpoint->btl->rcache,
(mca_rcache_base_registration_t *) frag->registration);
frag->registration = NULL;
}

Просмотреть файл

@ -215,17 +215,8 @@ mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl)
OBJ_DESTRUCT(&ugni_module->eager_get_pending);
OBJ_DESTRUCT(&ugni_module->eager_get_pending_lock);
if (ugni_module->initialized) {
/* need to tear down the mpools *after* the free lists */
if (NULL != ugni_module->smsg_mpool) {
(void) mca_mpool_base_module_destroy (ugni_module->smsg_mpool);
ugni_module->smsg_mpool = NULL;
}
if (NULL != ugni_module->super.btl_mpool) {
(void) mca_mpool_base_module_destroy (ugni_module->super.btl_mpool);
ugni_module->super.btl_mpool = NULL;
}
if (ugni_module->rcache) {
mca_rcache_base_module_destroy (ugni_module->rcache);
}
ugni_module->initialized = false;
@ -303,12 +294,13 @@ static mca_btl_base_registration_handle_t *
mca_btl_ugni_register_mem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *base,
size_t size, uint32_t flags)
{
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
mca_btl_ugni_reg_t *reg;
int access_flags = flags & MCA_BTL_REG_FLAG_ACCESS_ANY;
int rc;
rc = btl->btl_mpool->mpool_register(btl->btl_mpool, base, size, 0, access_flags,
(mca_mpool_base_registration_t **) &reg);
rc = ugni_module->rcache->rcache_register (ugni_module->rcache, base, size, 0, access_flags,
(mca_rcache_base_registration_t **) &reg);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
return NULL;
}
@ -318,10 +310,11 @@ mca_btl_ugni_register_mem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *
static int mca_btl_ugni_deregister_mem (mca_btl_base_module_t *btl, mca_btl_base_registration_handle_t *handle)
{
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
mca_btl_ugni_reg_t *reg =
(mca_btl_ugni_reg_t *)((intptr_t) handle - offsetof (mca_btl_ugni_reg_t, handle));
(void) btl->btl_mpool->mpool_deregister (btl->btl_mpool, &reg->base);
(void) ugni_module->rcache->rcache_deregister (ugni_module->rcache, &reg->base);
return OPAL_SUCCESS;
}

Просмотреть файл

@ -57,6 +57,7 @@ mca_btl_ugni_prepare_src_send_inplace (struct mca_btl_base_module_t *btl,
uint32_t flags)
{
bool use_eager_get = (*size + reserve) > mca_btl_ugni_component.smsg_max_data;
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
mca_btl_ugni_base_frag_t *frag = NULL;
mca_btl_ugni_reg_t *registration = NULL;
void *data_ptr;
@ -74,9 +75,9 @@ mca_btl_ugni_prepare_src_send_inplace (struct mca_btl_base_module_t *btl,
(unsigned int)(*size + reserve)));
if (OPAL_UNLIKELY(true == use_eager_get)) {
rc = btl->btl_mpool->mpool_register(btl->btl_mpool, data_ptr, *size, 0,
MCA_MPOOL_ACCESS_REMOTE_READ,
(mca_mpool_base_registration_t **)&registration);
rc = ugni_module->rcache->rcache_register (ugni_module->rcache, data_ptr, *size, 0,
MCA_RCACHE_ACCESS_REMOTE_READ,
(mca_rcache_base_registration_t **)&registration);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
mca_btl_ugni_frag_return (frag);
return NULL;

Просмотреть файл

@ -16,8 +16,8 @@
static void mca_btl_ugni_smsg_mbox_construct (mca_btl_ugni_smsg_mbox_t *mbox) {
struct mca_btl_ugni_reg_t *ugni_reg =
(struct mca_btl_ugni_reg_t *) mbox->super.registration;
struct mca_mpool_base_registration_t *base_reg =
(struct mca_mpool_base_registration_t *) ugni_reg;
mca_rcache_base_registration_t *base_reg =
(mca_rcache_base_registration_t *) ugni_reg;
/* initialize mailbox attributes */
mbox->attr.smsg_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT;

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -12,6 +13,8 @@
* Copyright (c) 2006 Sandia National Laboratories. All rights
* reserved.
* Copyright (c) 2011-2016 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -37,7 +40,13 @@
#include "opal/mca/btl/btl.h"
#include "opal/mca/btl/base/btl_base_error.h"
#include "opal/mca/btl/base/base.h"
#include "opal/mca/rcache/rcache.h"
#include "btl_usnic_compat.h"
#if RCACHE_VERSION < 30
#include "opal/mca/mpool/grdma/mpool_grdma.h"
#endif
#else
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/btl/base/btl_base_error.h"
@ -45,8 +54,6 @@
#include "ompi/mca/mpool/grdma/mpool_grdma.h"
#endif
#include "btl_usnic_compat.h"
BEGIN_C_DECLS
/*
@ -153,8 +160,16 @@ typedef struct opal_btl_usnic_component_t {
/** list of usnic proc structures */
opal_list_t usnic_procs;
#if RCACHE_VERSION == 30
/** memory pool hints */
char* usnic_mpool_hints;
/** registration cache name */
char *usnic_rcache_name;
#else
/** name of memory pool */
char* usnic_mpool_name;
#endif
char *if_include;
char *if_exclude;

Просмотреть файл

@ -1,5 +1,8 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2013-2016 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -8,11 +11,13 @@
*/
/* This header contains macros to help minimize usnic BTL differences
* between v1.7/v1.8 and v1.9/v2.0. */
* between v1.7/v1.8, v1.9/v2.0, and v2.0/v2.1. */
#ifndef BTL_USNIC_COMPAT_H
#define BTL_USNIC_COMPAT_H
#include "opal/mca/rcache/rcache.h"
/************************************************************************/
/* v2.0 and beyond */
@ -358,4 +363,18 @@ opal_btl_usnic_put(struct mca_btl_base_module_t *base_module,
#endif /* BTL_VERSION */
#if defined(RCACHE_MAJOR_VERSION) && RCACHE_MAJOR_VERSION >= 3
#define RCACHE_VERSION 30
/* these structures got renamed with the mpool/rcache rewrite */
#define mca_mpool_base_registration_t mca_rcache_base_registration_t
#define mca_mpool_base_resources_t mca_rcache_base_resources_t
#else
#define RCACHE_VERSION 20
#endif
#endif /* BTL_USNIC_COMPAT_H */

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -12,7 +13,7 @@
* Copyright (c) 2006 Sandia National Laboratories. All rights
* reserved.
* Copyright (c) 2008-2016 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights
* Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2015 Intel, Inc. All rights reserved.
* $COPYRIGHT$
@ -203,9 +204,19 @@ int opal_btl_usnic_component_register(void)
0, &stats_relative, 0, OPAL_INFO_LVL_4));
mca_btl_usnic_component.stats_relative = (bool) stats_relative;
#if RCACHE_VERSION == 30
CHECK(reg_string("mpool_hints", "Hints to use when selecting mpool",
NULL, &mca_btl_usnic_component.usnic_mpool_hints, 0,
OPAL_INFO_LVL_5));
CHECK(reg_string("rcache", "Name of the registration cache to be used",
"grdma", &mca_btl_usnic_component.usnic_rcache_name, 0,
OPAL_INFO_LVL_5));
#else
CHECK(reg_string("mpool", "Name of the memory pool to be used",
"grdma", &mca_btl_usnic_component.usnic_mpool_name, 0,
OPAL_INFO_LVL_5));
#endif
want_numa_device_assignment = OPAL_HAVE_HWLOC ? 1 : -1;
CHECK(reg_int("want_numa_device_assignment",

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2006 Sandia National Laboratories. All rights
* reserved.
* Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
* Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved
* $COPYRIGHT$
@ -44,6 +44,8 @@
#include "opal/mca/btl/base/btl_base_error.h"
#include "opal/mca/mpool/base/base.h"
#include "opal/mca/mpool/mpool.h"
#include "opal/mca/rcache/base/base.h"
#include "opal/mca/rcache/rcache.h"
#else
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/btl/base/btl_base_error.h"
@ -884,7 +886,11 @@ static int usnic_finalize(struct mca_btl_base_module_t* btl)
OBJ_DESTRUCT(&module->chunk_segs);
OBJ_DESTRUCT(&module->senders);
#if RCACHE_VERSION == 30
mca_rcache_base_module_destroy(module->rcache);
#else
mca_mpool_base_module_destroy(module->super.btl_mpool);
#endif
if (NULL != module->av) {
fi_close(&module->av->fid);
@ -1736,9 +1742,9 @@ static int init_one_channel(opal_btl_usnic_module_t *module,
rd_num /* num erorments to alloc */,
rd_num /* max elements to alloc */,
rd_num /* num elements per alloc */,
module->super.btl_mpool /* mpool for reg */,
module->super.btl_mpool /* mpool for (1.x, 2.0: reg, 2.1+: allocation) */,
0 /* mpool reg flags */,
NULL /* unused0 */,
module->rcache /* registration cache for 2.1+ */,
NULL /* item_init */,
NULL /* item_init_context */);
channel->recv_segs.ctx = module; /* must come after
@ -2054,11 +2060,28 @@ static int init_mpool(opal_btl_usnic_module_t *module)
mpool_resources.sizeof_reg = sizeof(opal_btl_usnic_reg_t);
mpool_resources.register_mem = usnic_reg_mr;
mpool_resources.deregister_mem = usnic_dereg_mr;
#if RCACHE_VERSION == 30
module->rcache =
mca_rcache_base_module_create (mca_btl_usnic_component.usnic_rcache_name,
&module->super, &mpool_resources);
if (NULL == module->rcache) {
opal_show_help("help-mpi-btl-usnic.txt",
"internal error during init",
true,
opal_process_info.nodename,
module->fabric_info->fabric_attr->name,
"create rcache", __FILE__, __LINE__);
return OPAL_ERROR;
}
module->super.btl_mpool =
mca_mpool_base_module_lookup (mca_btl_usnic_component.usnic_mpool_hints);
#else
asprintf(&mpool_resources.pool_name, "%s",
module->fabric_info->fabric_attr->name);
module->super.btl_mpool =
mca_mpool_base_module_create(mca_btl_usnic_component.usnic_mpool_name,
&module->super, &mpool_resources);
#endif
if (NULL == module->super.btl_mpool) {
opal_show_help("help-mpi-btl-usnic.txt",
"internal error during init",
@ -2232,7 +2255,7 @@ static void init_freelists(opal_btl_usnic_module_t *module)
module->sd_num / 2,
module->super.btl_mpool,
0 /* mpool reg flags */,
NULL /* unused0 */,
module->rcache,
NULL /* item_init */,
NULL /* item_init_context */);
assert(OPAL_SUCCESS == rc);
@ -2287,7 +2310,7 @@ static void init_freelists(opal_btl_usnic_module_t *module)
module->sd_num / 2,
module->super.btl_mpool,
0 /* mpool reg flags */,
NULL /* unused0 */,
module->rcache,
NULL /* item_init */,
NULL /* item_init_context */);
assert(OPAL_SUCCESS == rc);
@ -2309,7 +2332,7 @@ static void init_freelists(opal_btl_usnic_module_t *module)
module->sd_num / 2,
module->super.btl_mpool,
0 /* mpool reg flags */,
NULL /* unused0 */,
module->rcache,
NULL /* item_init */,
NULL /* item_init_context */);
assert(OPAL_SUCCESS == rc);
@ -2366,7 +2389,11 @@ int opal_btl_usnic_module_init(opal_btl_usnic_module_t *module)
int ret;
if (OPAL_SUCCESS != (ret = init_mpool(module)) ||
OPAL_SUCCESS != (ret = init_channels(module))) {
#if RCACHE_VERSION == 30
mca_rcache_base_module_destroy (module->rcache);
#else
mca_mpool_base_module_destroy(module->super.btl_mpool);
#endif
return ret;
}

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -12,6 +13,8 @@
* Copyright (c) 2006 Sandia National Laboratories. All rights
* reserved.
* Copyright (c) 2011-2016 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -197,6 +200,9 @@ typedef struct opal_btl_usnic_module_t {
/* Performance / debugging statistics */
opal_btl_usnic_module_stats_t stats;
/** registration cache module (v2.1+) */
mca_rcache_base_module_t *rcache;
} opal_btl_usnic_module_t;
struct opal_btl_usnic_frag_t;

Просмотреть файл

@ -47,8 +47,7 @@
#include "opal/class/opal_free_list.h"
#include "opal/sys/atomic.h"
#include "opal/mca/btl/btl.h"
#include "opal/mca/mpool/mpool.h"
#include "opal/mca/mpool/base/base.h"
#include "opal/mca/rcache/rcache.h"
#include "opal/mca/btl/base/base.h"
#include "opal/mca/rcache/rcache.h"
#include "opal/mca/rcache/base/base.h"
@ -152,6 +151,9 @@ struct mca_btl_vader_t {
mca_btl_base_module_error_cb_fn_t error_cb;
#if OPAL_BTL_VADER_HAVE_KNEM
int knem_fd;
/* registration cache */
mca_rcache_base_module_t *knem_rcache;
#endif
};
typedef struct mca_btl_vader_t mca_btl_vader_t;

Просмотреть файл

@ -573,11 +573,12 @@ void mca_btl_vader_poll_handle_frag (mca_btl_vader_hdr_t *hdr, struct mca_btl_ba
segments[0].seg_len = hdr->len;
if (hdr->flags & MCA_BTL_VADER_FLAG_SINGLE_COPY) {
mca_mpool_base_registration_t *xpmem_reg;
mca_rcache_base_registration_t *xpmem_reg;
xpmem_reg = vader_get_registation (endpoint, hdr->sc_iov.iov_base,
hdr->sc_iov.iov_len, 0,
&segments[1].seg_addr.pval);
assert (NULL != xpmem_reg);
segments[1].seg_len = hdr->sc_iov.iov_len;
frag.des_segment_count = 2;

Просмотреть файл

@ -28,6 +28,7 @@
#include "opal_config.h"
#include "btl_vader_xpmem.h"
#include "opal/mca/rcache/base/rcache_base_vma.h"
#define MCA_BTL_VADER_FBOX_ALIGNMENT 32
#define MCA_BTL_VADER_FBOX_ALIGNMENT_MASK (MCA_BTL_VADER_FBOX_ALIGNMENT - 1)
@ -74,7 +75,7 @@ typedef struct mca_btl_base_endpoint_t {
union {
#if OPAL_BTL_VADER_HAVE_XPMEM
struct {
struct mca_rcache_base_module_t *rcache;
mca_rcache_base_vma_module_t *vma_module;
xpmem_apid_t apid; /**< xpmem apid for remote peer */
} xpmem;
#endif

Просмотреть файл

@ -38,7 +38,7 @@ int mca_btl_vader_get_xpmem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{
mca_mpool_base_registration_t *reg;
mca_rcache_base_registration_t *reg;
void *rem_ptr;
/* silence warning about unused arguments */

Просмотреть файл

@ -19,12 +19,11 @@
#include <unistd.h>
#include "opal/util/show_help.h"
#include "opal/mca/mpool/grdma/mpool_grdma.h"
OBJ_CLASS_INSTANCE(mca_btl_vader_registration_handle_t, mca_mpool_base_registration_t, NULL, NULL);
OBJ_CLASS_INSTANCE(mca_btl_vader_registration_handle_t, mca_rcache_base_registration_t, NULL, NULL);
static int mca_btl_vader_knem_reg (void *reg_data, void *base, size_t size,
mca_mpool_base_registration_t *reg)
mca_rcache_base_registration_t *reg)
{
mca_btl_vader_registration_handle_t *knem_reg = (mca_btl_vader_registration_handle_t *) reg;
struct knem_cmd_create_region knem_cr;
@ -37,11 +36,11 @@ static int mca_btl_vader_knem_reg (void *reg_data, void *base, size_t size,
knem_cr.iovec_nr = 1;
knem_cr.protection = 0;
if (reg->access_flags & (MCA_MPOOL_ACCESS_LOCAL_WRITE | MCA_MPOOL_ACCESS_REMOTE_WRITE)) {
if (reg->access_flags & (MCA_RCACHE_ACCESS_LOCAL_WRITE | MCA_RCACHE_ACCESS_REMOTE_WRITE)) {
knem_cr.protection |= PROT_WRITE;
}
if (reg->access_flags & MCA_MPOOL_ACCESS_REMOTE_READ) {
if (reg->access_flags & MCA_RCACHE_ACCESS_REMOTE_READ) {
knem_cr.protection |= PROT_READ;
}
@ -57,7 +56,7 @@ static int mca_btl_vader_knem_reg (void *reg_data, void *base, size_t size,
return OPAL_SUCCESS;
}
static int mca_btl_vader_knem_dereg (void *reg_data, mca_mpool_base_registration_t *reg)
static int mca_btl_vader_knem_dereg (void *reg_data, mca_rcache_base_registration_t *reg)
{
mca_btl_vader_registration_handle_t *knem_reg = (mca_btl_vader_registration_handle_t *) reg;
@ -72,12 +71,14 @@ mca_btl_vader_register_mem_knem (struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t *endpoint,
void *base, size_t size, uint32_t flags)
{
mca_btl_vader_t *vader_module = (mca_btl_vader_t *) btl;
mca_btl_vader_registration_handle_t *reg = NULL;
int access_flags = flags & MCA_BTL_REG_FLAG_ACCESS_ANY;
int rc;
rc = btl->btl_mpool->mpool_register (btl->btl_mpool, base, size, 0, access_flags,
(mca_mpool_base_registration_t **) &reg);
rc = vader_module->knem_rcache->rcache_register (vader_module->knem_rcache, base, size, 0,
access_flags,
(mca_rcache_base_registration_t **) &reg);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
return NULL;
}
@ -88,18 +89,19 @@ mca_btl_vader_register_mem_knem (struct mca_btl_base_module_t* btl,
static int
mca_btl_vader_deregister_mem_knem (struct mca_btl_base_module_t *btl, struct mca_btl_base_registration_handle_t *handle)
{
mca_btl_vader_t *vader_module = (mca_btl_vader_t *) btl;
mca_btl_vader_registration_handle_t *reg =
(mca_btl_vader_registration_handle_t *)((intptr_t) handle - offsetof (mca_btl_vader_registration_handle_t, btl_handle));
btl->btl_mpool->mpool_deregister (btl->btl_mpool, &reg->base);
vader_module->knem_rcache->rcache_deregister (vader_module->knem_rcache, &reg->base);
return OPAL_SUCCESS;
}
int mca_btl_vader_knem_init (void)
{
mca_mpool_base_resources_t mpool_resources = {
.pool_name = "vader", .reg_data = NULL,
mca_rcache_base_resources_t rcache_resources = {
.cache_name = "vader", .reg_data = NULL,
.sizeof_reg = sizeof (mca_btl_vader_registration_handle_t),
.register_mem = mca_btl_vader_knem_reg,
.deregister_mem = mca_btl_vader_knem_dereg
@ -107,6 +109,7 @@ int mca_btl_vader_knem_init (void)
struct knem_cmd_info knem_info;
int rc;
signal (SIGSEGV, SIG_DFL);
/* Open the knem device. Try to print a helpful message if we
fail to open it. */
mca_btl_vader.knem_fd = open("/dev/knem", O_RDWR);
@ -130,6 +133,7 @@ int mca_btl_vader_knem_init (void)
do {
/* Check that the ABI if kernel module running is the same
* as what we were compiled against. */
memset (&knem_info, 0, sizeof (knem_info));
rc = ioctl(mca_btl_vader.knem_fd, KNEM_CMD_GET_INFO, &knem_info);
if (rc < 0) {
opal_show_help("help-btl-vader.txt", "knem get ABI fail",
@ -161,9 +165,9 @@ int mca_btl_vader_knem_init (void)
mca_btl_vader.super.btl_deregister_mem = mca_btl_vader_deregister_mem_knem;
mca_btl_vader.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t);
mca_btl_vader.super.btl_mpool = mca_mpool_base_module_create ("grdma", NULL,
&mpool_resources);
if (NULL == mca_btl_vader.super.btl_mpool) {
mca_btl_vader.knem_rcache = mca_rcache_base_module_create ("grdma", NULL,
&rcache_resources);
if (NULL == mca_btl_vader.knem_rcache) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
@ -182,9 +186,9 @@ int mca_btl_vader_knem_fini (void)
mca_btl_vader.knem_fd = -1;
}
if (mca_btl_vader.super.btl_mpool) {
(void) mca_mpool_base_module_destroy (mca_btl_vader.super.btl_mpool);
mca_btl_vader.super.btl_mpool = NULL;
if (mca_btl_vader.knem_rcache) {
(void) mca_rcache_base_module_destroy (mca_btl_vader.knem_rcache);
mca_btl_vader.knem_rcache = NULL;
}
return OPAL_SUCCESS;

Просмотреть файл

@ -24,7 +24,7 @@ struct mca_btl_base_registration_handle_t {
};
struct mca_btl_vader_registration_handle_t {
mca_mpool_base_registration_t base;
mca_rcache_base_registration_t base;
mca_btl_base_registration_handle_t btl_handle;
};
typedef struct mca_btl_vader_registration_handle_t mca_btl_vader_registration_handle_t;

Просмотреть файл

@ -171,9 +171,9 @@ static int init_vader_endpoint (struct mca_btl_base_endpoint_t *ep, struct opal_
if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) {
/* always use xpmem if it is available */
ep->segment_data.xpmem.apid = xpmem_get (modex->xpmem.seg_id, XPMEM_RDWR, XPMEM_PERMIT_MODE, (void *) 0666);
ep->segment_data.xpmem.rcache = mca_rcache_base_module_create("vma");
ep->segment_data.xpmem.vma_module = mca_rcache_base_vma_module_alloc ();
(void) vader_get_registation (ep, modex->xpmem.segment_base, mca_btl_vader_component.segment_size,
MCA_MPOOL_FLAGS_PERSIST, (void **) &ep->segment_base);
MCA_RCACHE_FLAGS_PERSIST, (void **) &ep->segment_base);
} else {
#endif
/* store a copy of the segment information for detach */
@ -434,6 +434,7 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
int rc;
opal_convertor_get_current_pointer (convertor, &data_ptr);
assert (NULL != data_ptr);
/* in place send fragment */
if (OPAL_UNLIKELY(opal_convertor_need_buffers(convertor))) {
@ -545,16 +546,15 @@ static void mca_btl_vader_endpoint_destructor (mca_btl_vader_endpoint_t *ep)
#if OPAL_BTL_VADER_HAVE_XPMEM
if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) {
if (ep->segment_data.xpmem.rcache) {
if (ep->segment_data.xpmem.vma_module) {
/* clean out the registration cache */
const int nregs = 100;
mca_mpool_base_registration_t *regs[nregs];
mca_rcache_base_registration_t *regs[nregs];
int reg_cnt;
do {
reg_cnt = ep->segment_data.xpmem.rcache->rcache_find_all(ep->segment_data.xpmem.rcache, 0, (size_t)-1,
regs, nregs);
reg_cnt = mca_rcache_base_vma_find_all (ep->segment_data.xpmem.vma_module,
0, (size_t) -1, regs, nregs);
for (int i = 0 ; i < reg_cnt ; ++i) {
/* otherwise dereg will fail on assert */
regs[i]->ref_count = 0;
@ -562,7 +562,7 @@ static void mca_btl_vader_endpoint_destructor (mca_btl_vader_endpoint_t *ep)
}
} while (reg_cnt == nregs);
ep->segment_data.xpmem.rcache = NULL;
ep->segment_data.xpmem.vma_module = NULL;
}
if (ep->segment_base) {

Просмотреть файл

@ -40,7 +40,7 @@ int mca_btl_vader_put_xpmem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{
mca_mpool_base_registration_t *reg;
mca_rcache_base_registration_t *reg;
void *rem_ptr;
reg = vader_get_registation (endpoint, (void *)(intptr_t) remote_address, size, 0, &rem_ptr);

Просмотреть файл

@ -34,13 +34,14 @@ int mca_btl_vader_xpmem_init (void)
/* look up the remote pointer in the peer rcache and attach if
* necessary */
mca_mpool_base_registration_t *vader_get_registation (struct mca_btl_base_endpoint_t *ep, void *rem_ptr,
mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpoint_t *ep, void *rem_ptr,
size_t size, int flags, void **local_ptr)
{
struct mca_rcache_base_module_t *rcache = ep->segment_data.xpmem.rcache;
mca_mpool_base_registration_t *regs[10], *reg = NULL;
mca_rcache_base_vma_module_t *vma_module = ep->segment_data.xpmem.vma_module;
mca_rcache_base_registration_t *regs[10], *reg = NULL;
xpmem_addr_t xpmem_addr;
uintptr_t base, bound;
uint64_t attach_align = 1 << mca_btl_vader_component.log_attach_align;
int rc, i;
/* protect rcache access */
@ -49,15 +50,14 @@ mca_mpool_base_registration_t *vader_get_registation (struct mca_btl_base_endpoi
/* use btl/self for self communication */
assert (ep->peer_smp_rank != MCA_BTL_VADER_LOCAL_RANK);
base = (uintptr_t) down_align_addr(rem_ptr, mca_btl_vader_component.log_attach_align);
bound = (uintptr_t) up_align_addr((void *)((uintptr_t) rem_ptr + size - 1),
mca_btl_vader_component.log_attach_align) + 1;
base = OPAL_DOWN_ALIGN((uintptr_t) rem_ptr, attach_align, uintptr_t);
bound = OPAL_ALIGN((uintptr_t) rem_ptr + size - 1, attach_align, uintptr_t) + 1;
if (OPAL_UNLIKELY(bound > VADER_MAX_ADDRESS)) {
bound = VADER_MAX_ADDRESS;
}
/* several segments may match the base pointer */
rc = rcache->rcache_find_all (rcache, (void *) base, bound - base, regs, 10);
rc = mca_rcache_base_vma_find_all (vma_module, (void *) base, bound - base, regs, 10);
for (i = 0 ; i < rc ; ++i) {
if (bound <= (uintptr_t)regs[i]->bound && base >= (uintptr_t)regs[i]->base) {
(void)opal_atomic_add (&regs[i]->ref_count, 1);
@ -65,13 +65,13 @@ mca_mpool_base_registration_t *vader_get_registation (struct mca_btl_base_endpoi
goto reg_found;
}
if (regs[i]->flags & MCA_MPOOL_FLAGS_PERSIST) {
if (regs[i]->flags & MCA_RCACHE_FLAGS_PERSIST) {
continue;
}
/* remove this pointer from the rcache and decrement its reference count
(so it is detached later) */
rc = rcache->rcache_delete (rcache, regs[i]);
rc = mca_rcache_base_vma_delete (vma_module, regs[i]);
if (OPAL_UNLIKELY(0 != rc)) {
/* someone beat us to it? */
break;
@ -84,14 +84,14 @@ mca_mpool_base_registration_t *vader_get_registation (struct mca_btl_base_endpoi
if (OPAL_LIKELY(0 == regs[i]->ref_count)) {
/* this pointer is not in use */
(void) xpmem_detach (regs[i]->alloc_base);
(void) xpmem_detach (regs[i]->rcache_context);
OBJ_RELEASE(regs[i]);
}
break;
}
reg = OBJ_NEW(mca_mpool_base_registration_t);
reg = OBJ_NEW(mca_rcache_base_registration_t);
if (OPAL_LIKELY(NULL != reg)) {
/* stick around for awhile */
reg->ref_count = 2;
@ -106,21 +106,21 @@ mca_mpool_base_registration_t *vader_get_registation (struct mca_btl_base_endpoi
#endif
xpmem_addr.offset = base;
reg->alloc_base = xpmem_attach (xpmem_addr, bound - base, NULL);
if (OPAL_UNLIKELY((void *)-1 == reg->alloc_base)) {
reg->rcache_context = xpmem_attach (xpmem_addr, bound - base, NULL);
if (OPAL_UNLIKELY((void *)-1 == reg->rcache_context)) {
OPAL_THREAD_UNLOCK(&ep->lock);
OBJ_RELEASE(reg);
return NULL;
}
opal_memchecker_base_mem_defined (reg->alloc_base, bound - base);
opal_memchecker_base_mem_defined (reg->rcache_context, bound - base);
rcache->rcache_insert (rcache, reg, 0);
mca_rcache_base_vma_insert (vma_module, reg, 0);
}
reg_found:
opal_atomic_wmb ();
*local_ptr = (void *) ((uintptr_t) reg->alloc_base +
*local_ptr = (void *) ((uintptr_t) reg->rcache_context +
(ptrdiff_t)((uintptr_t) rem_ptr - (uintptr_t) reg->base));
OPAL_THREAD_UNLOCK(&ep->lock);
@ -128,20 +128,20 @@ reg_found:
return reg;
}
void vader_return_registration (mca_mpool_base_registration_t *reg, struct mca_btl_base_endpoint_t *ep)
void vader_return_registration (mca_rcache_base_registration_t *reg, struct mca_btl_base_endpoint_t *ep)
{
struct mca_rcache_base_module_t *rcache = ep->segment_data.xpmem.rcache;
mca_rcache_base_vma_module_t *vma_module = ep->segment_data.xpmem.vma_module;
int32_t ref_count;
ref_count = opal_atomic_add_32 (&reg->ref_count, -1);
if (OPAL_UNLIKELY(0 == ref_count && !(reg->flags & MCA_MPOOL_FLAGS_PERSIST))) {
if (OPAL_UNLIKELY(0 == ref_count && !(reg->flags & MCA_RCACHE_FLAGS_PERSIST))) {
/* protect rcache access */
OPAL_THREAD_LOCK(&ep->lock);
rcache->rcache_delete (rcache, reg);
mca_rcache_base_vma_delete (vma_module, reg);
OPAL_THREAD_UNLOCK(&ep->lock);
opal_memchecker_base_mem_noaccess (reg->alloc_base, (uintptr_t)(reg->bound - reg->base));
(void)xpmem_detach (reg->alloc_base);
opal_memchecker_base_mem_noaccess (reg->rcache_context, (uintptr_t)(reg->bound - reg->base));
(void)xpmem_detach (reg->rcache_context);
OBJ_RELEASE (reg);
}
}

Просмотреть файл

@ -25,6 +25,9 @@
typedef int64_t xpmem_apid_t;
#endif
#include <opal/mca/rcache/base/rcache_base_vma.h>
#include <opal/mca/rcache/rcache.h>
/* look up the remote pointer in the peer rcache and attach if
* necessary */
@ -33,14 +36,14 @@
int mca_btl_vader_xpmem_init (void);
mca_mpool_base_registration_t *vader_get_registation (struct mca_btl_base_endpoint_t *endpoint, void *rem_ptr,
mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpoint_t *endpoint, void *rem_ptr,
size_t size, int flags, void **local_ptr);
void vader_return_registration (mca_mpool_base_registration_t *reg, struct mca_btl_base_endpoint_t *endpoint);
void vader_return_registration (mca_rcache_base_registration_t *reg, struct mca_btl_base_endpoint_t *endpoint);
#else
static inline mca_mpool_base_registration_t *vader_get_registation (struct mca_btl_base_endpoint_t *endpoint, void *rem_ptr,
static inline mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpoint_t *endpoint, void *rem_ptr,
size_t size, int flags, void **local_ptr)
{
(void) endpoint;
@ -51,7 +54,7 @@ static inline mca_mpool_base_registration_t *vader_get_registation (struct mca_b
return NULL;
}
static inline void vader_return_registration (mca_mpool_base_registration_t *reg, struct mca_btl_base_endpoint_t *endpoint)
static inline void vader_return_registration (mca_rcache_base_registration_t *reg, struct mca_btl_base_endpoint_t *endpoint)
{
(void) reg;
(void) endpoint;

Просмотреть файл

@ -38,7 +38,7 @@
#include "opal/util/proc.h"
#include "opal/util/argv.h"
#include "opal/mca/mpool/base/base.h"
#include "opal/mca/rcache/base/base.h"
#include "opal/runtime/opal_params.h"
#include "opal/mca/timer/base/base.h"
#include "opal/mca/dl/base/base.h"
@ -712,7 +712,7 @@ static int mca_common_cuda_stage_three_init(void)
OPAL_PROC_MY_HOSTNAME, res, mem_reg->msg);
} else {
opal_output_verbose(20, mca_common_cuda_output,
"CUDA: cuMemHostRegister OK on mpool %s: "
"CUDA: cuMemHostRegister OK on rcache %s: "
"address=%p, bufsize=%d",
mem_reg->msg, mem_reg->ptr, (int)mem_reg->amount);
}
@ -795,7 +795,7 @@ static int mca_common_cuda_stage_three_init(void)
* Cleanup all CUDA resources.
*
* Note: Still figuring out how to get cuMemHostUnregister called from the smcuda sm
* mpool. Looks like with the memory pool from openib (grdma), the unregistering is
* rcache. Looks like with the memory pool from openib (grdma), the unregistering is
* called as the free list is destructed. Not true for the sm mpool. This means we
* are currently still leaking some host memory we registered with CUDA.
*/
@ -949,7 +949,7 @@ void mca_common_cuda_register(void *ptr, size_t amount, char *msg) {
OPAL_PROC_MY_HOSTNAME, res, msg);
} else {
opal_output_verbose(20, mca_common_cuda_output,
"CUDA: cuMemHostRegister OK on mpool %s: "
"CUDA: cuMemHostRegister OK on rcache %s: "
"address=%p, bufsize=%d",
msg, ptr, (int)amount);
}
@ -984,12 +984,12 @@ void mca_common_cuda_unregister(void *ptr, char *msg) {
/* If unregistering the memory fails, just continue. This is during
* shutdown. Only print when running in verbose mode. */
opal_output_verbose(20, mca_common_cuda_output,
"CUDA: cuMemHostUnregister failed: ptr=%p, res=%d, mpool=%s",
"CUDA: cuMemHostUnregister failed: ptr=%p, res=%d, rcache=%s",
ptr, res, msg);
} else {
opal_output_verbose(20, mca_common_cuda_output,
"CUDA: cuMemHostUnregister OK on mpool %s: "
"CUDA: cuMemHostUnregister OK on rcache %s: "
"address=%p",
msg, ptr);
}
@ -1001,8 +1001,8 @@ void mca_common_cuda_unregister(void *ptr, char *msg) {
* to the remote size so it can access the memory. This is the
* registration function for the sending side of a message transfer.
*/
int cuda_getmemhandle(void *base, size_t size, mca_mpool_base_registration_t *newreg,
mca_mpool_base_registration_t *hdrreg)
int cuda_getmemhandle(void *base, size_t size, mca_rcache_base_registration_t *newreg,
mca_rcache_base_registration_t *hdrreg)
{
CUmemorytype memType;
@ -1011,7 +1011,7 @@ int cuda_getmemhandle(void *base, size_t size, mca_mpool_base_registration_t *ne
CUdeviceptr pbase;
size_t psize;
mca_mpool_common_cuda_reg_t *cuda_reg = (mca_mpool_common_cuda_reg_t*)newreg;
mca_rcache_common_cuda_reg_t *cuda_reg = (mca_rcache_common_cuda_reg_t*)newreg;
memHandle = (CUipcMemHandle *)cuda_reg->data.memHandle;
/* We should only be there if this is a CUDA device pointer */
@ -1090,11 +1090,11 @@ int cuda_getmemhandle(void *base, size_t size, mca_mpool_base_registration_t *ne
* This function is called by the local side that called the cuda_getmemhandle.
* There is nothing to be done so just return.
*/
int cuda_ungetmemhandle(void *reg_data, mca_mpool_base_registration_t *reg)
int cuda_ungetmemhandle(void *reg_data, mca_rcache_base_registration_t *reg)
{
opal_output_verbose(10, mca_common_cuda_output,
"CUDA: cuda_ungetmemhandle (no-op): base=%p", reg->base);
CUDA_DUMP_MEMHANDLE((100, ((mca_mpool_common_cuda_reg_t *)reg)->data.memHandle, "cuda_ungetmemhandle"));
CUDA_DUMP_MEMHANDLE((100, ((mca_rcache_common_cuda_reg_t *)reg)->data.memHandle, "cuda_ungetmemhandle"));
return OPAL_SUCCESS;
}
@ -1105,12 +1105,12 @@ int cuda_ungetmemhandle(void *reg_data, mca_mpool_base_registration_t *reg)
* remote side of a transfer. newreg contains the new handle. hddrreg contains
* the memory handle that was received from the remote side.
*/
int cuda_openmemhandle(void *base, size_t size, mca_mpool_base_registration_t *newreg,
mca_mpool_base_registration_t *hdrreg)
int cuda_openmemhandle(void *base, size_t size, mca_rcache_base_registration_t *newreg,
mca_rcache_base_registration_t *hdrreg)
{
CUresult result;
CUipcMemHandle *memHandle;
mca_mpool_common_cuda_reg_t *cuda_newreg = (mca_mpool_common_cuda_reg_t*)newreg;
mca_rcache_common_cuda_reg_t *cuda_newreg = (mca_rcache_common_cuda_reg_t*)newreg;
/* Save in local variable to avoid ugly casting */
memHandle = (CUipcMemHandle *)cuda_newreg->data.memHandle;
@ -1147,10 +1147,10 @@ int cuda_openmemhandle(void *base, size_t size, mca_mpool_base_registration_t *n
/*
* Close a memory handle that refers to remote memory.
*/
int cuda_closememhandle(void *reg_data, mca_mpool_base_registration_t *reg)
int cuda_closememhandle(void *reg_data, mca_rcache_base_registration_t *reg)
{
CUresult result;
mca_mpool_common_cuda_reg_t *cuda_reg = (mca_mpool_common_cuda_reg_t*)reg;
mca_rcache_common_cuda_reg_t *cuda_reg = (mca_rcache_common_cuda_reg_t*)reg;
/* Only attempt to close if we have valid context. This can change if a call
* to the fini function is made and we discover context is gone. */
@ -1213,7 +1213,7 @@ void mca_common_cuda_destruct_event(uintptr_t event)
* Put remote event on stream to ensure that the the start of the
* copy does not start until the completion of the event.
*/
void mca_common_wait_stream_synchronize(mca_mpool_common_cuda_reg_t *rget_reg)
void mca_common_wait_stream_synchronize(mca_rcache_common_cuda_reg_t *rget_reg)
{
#if OPAL_CUDA_SYNC_MEMOPS
/* No need for any of this with SYNC_MEMOPS feature */
@ -1643,8 +1643,8 @@ int progress_one_cuda_htod_event(struct mca_btl_base_descriptor_t **frag) {
* Need to make sure the handle we are retrieving from the cache is still
* valid. Compare the cached handle to the one received.
*/
int mca_common_cuda_memhandle_matches(mca_mpool_common_cuda_reg_t *new_reg,
mca_mpool_common_cuda_reg_t *old_reg)
int mca_common_cuda_memhandle_matches(mca_rcache_common_cuda_reg_t *new_reg,
mca_rcache_common_cuda_reg_t *old_reg)
{
if (0 == memcmp(new_reg->data.memHandle, old_reg->data.memHandle, sizeof(new_reg->data.memHandle))) {
@ -2008,7 +2008,7 @@ int mca_common_cuda_get_address_range(void *pbase, size_t *psize, void *base)
* not matching the BUFFER_ID of the buffer we are checking. Return false
* if the registration is still good.
*/
bool mca_common_cuda_previously_freed_memory(mca_mpool_base_registration_t *reg)
bool mca_common_cuda_previously_freed_memory(mca_rcache_base_registration_t *reg)
{
int res;
unsigned long long bufID;
@ -2040,7 +2040,7 @@ bool mca_common_cuda_previously_freed_memory(mca_mpool_base_registration_t *reg)
* Also set SYNC_MEMOPS on any GPU registration to ensure that
* synchronous copies complete before the buffer is accessed.
*/
void mca_common_cuda_get_buffer_id(mca_mpool_base_registration_t *reg)
void mca_common_cuda_get_buffer_id(mca_rcache_base_registration_t *reg)
{
int res;
unsigned long long bufID = 0;

Просмотреть файл

@ -28,20 +28,20 @@
#define MEMHANDLE_SIZE 8
#define EVTHANDLE_SIZE 8
struct mca_mpool_common_cuda_reg_data_t {
struct mca_rcache_common_cuda_reg_data_t {
uint64_t memHandle[MEMHANDLE_SIZE];
uint64_t evtHandle[EVTHANDLE_SIZE];
uint64_t event;
opal_ptr_t memh_seg_addr;
size_t memh_seg_len;
};
typedef struct mca_mpool_common_cuda_reg_data_t mca_mpool_common_cuda_reg_data_t;
typedef struct mca_rcache_common_cuda_reg_data_t mca_rcache_common_cuda_reg_data_t;
struct mca_mpool_common_cuda_reg_t {
mca_mpool_base_registration_t base;
mca_mpool_common_cuda_reg_data_t data;
struct mca_rcache_common_cuda_reg_t {
mca_rcache_base_registration_t base;
mca_rcache_common_cuda_reg_data_t data;
};
typedef struct mca_mpool_common_cuda_reg_t mca_mpool_common_cuda_reg_t;
typedef struct mca_rcache_common_cuda_reg_t mca_rcache_common_cuda_reg_t;
extern bool mca_common_cuda_enabled;
OPAL_DECLSPEC void mca_common_cuda_register_mca_variables(void);
@ -50,7 +50,7 @@ OPAL_DECLSPEC void mca_common_cuda_register(void *ptr, size_t amount, char *msg)
OPAL_DECLSPEC void mca_common_cuda_unregister(void *ptr, char *msg);
OPAL_DECLSPEC void mca_common_wait_stream_synchronize(mca_mpool_common_cuda_reg_t *rget_reg);
OPAL_DECLSPEC void mca_common_wait_stream_synchronize(mca_rcache_common_cuda_reg_t *rget_reg);
OPAL_DECLSPEC int mca_common_cuda_memcpy(void *dst, void *src, size_t amount, char *msg,
struct mca_btl_base_descriptor_t *, int *done);
@ -69,26 +69,26 @@ OPAL_DECLSPEC int progress_one_cuda_ipc_event(struct mca_btl_base_descriptor_t *
OPAL_DECLSPEC int progress_one_cuda_dtoh_event(struct mca_btl_base_descriptor_t **);
OPAL_DECLSPEC int progress_one_cuda_htod_event(struct mca_btl_base_descriptor_t **);
OPAL_DECLSPEC int mca_common_cuda_memhandle_matches(mca_mpool_common_cuda_reg_t *new_reg,
mca_mpool_common_cuda_reg_t *old_reg);
OPAL_DECLSPEC int mca_common_cuda_memhandle_matches(mca_rcache_common_cuda_reg_t *new_reg,
mca_rcache_common_cuda_reg_t *old_reg);
OPAL_DECLSPEC void mca_common_cuda_construct_event_and_handle(uintptr_t *event, void *handle);
OPAL_DECLSPEC void mca_common_cuda_destruct_event(uintptr_t event);
OPAL_DECLSPEC int cuda_getmemhandle(void *base, size_t, mca_mpool_base_registration_t *newreg,
mca_mpool_base_registration_t *hdrreg);
OPAL_DECLSPEC int cuda_ungetmemhandle(void *reg_data, mca_mpool_base_registration_t *reg);
OPAL_DECLSPEC int cuda_openmemhandle(void *base, size_t size, mca_mpool_base_registration_t *newreg,
mca_mpool_base_registration_t *hdrreg);
OPAL_DECLSPEC int cuda_closememhandle(void *reg_data, mca_mpool_base_registration_t *reg);
OPAL_DECLSPEC int cuda_getmemhandle(void *base, size_t, mca_rcache_base_registration_t *newreg,
mca_rcache_base_registration_t *hdrreg);
OPAL_DECLSPEC int cuda_ungetmemhandle(void *reg_data, mca_rcache_base_registration_t *reg);
OPAL_DECLSPEC int cuda_openmemhandle(void *base, size_t size, mca_rcache_base_registration_t *newreg,
mca_rcache_base_registration_t *hdrreg);
OPAL_DECLSPEC int cuda_closememhandle(void *reg_data, mca_rcache_base_registration_t *reg);
OPAL_DECLSPEC int mca_common_cuda_get_device(int *devicenum);
OPAL_DECLSPEC int mca_common_cuda_device_can_access_peer(int *access, int dev1, int dev2);
OPAL_DECLSPEC int mca_common_cuda_stage_one_init(void);
OPAL_DECLSPEC int mca_common_cuda_get_address_range(void *pbase, size_t *psize, void *base);
OPAL_DECLSPEC void mca_common_cuda_fini(void);
#if OPAL_CUDA_GDR_SUPPORT
OPAL_DECLSPEC bool mca_common_cuda_previously_freed_memory(mca_mpool_base_registration_t *reg);
OPAL_DECLSPEC void mca_common_cuda_get_buffer_id(mca_mpool_base_registration_t *reg);
OPAL_DECLSPEC bool mca_common_cuda_previously_freed_memory(mca_rcache_base_registration_t *reg);
OPAL_DECLSPEC void mca_common_cuda_get_buffer_id(mca_rcache_base_registration_t *reg);
#endif /* OPAL_CUDA_GDR_SUPPORT */
/**
* Return: 0 if no packing is required for sending (the upper layer

Просмотреть файл

@ -41,13 +41,13 @@ NOTE: You can turn off this warning by setting the MCA parameter
The call to cuMemHostRegister(%p, %d, 0) failed.
Host: %s
cuMemHostRegister return value: %d
Memory Pool: %s
Registration cache: %s
#
[cuMemHostRegister failed]
The call to cuMemHostRegister(%p, %d, 0) failed.
Host: %s
cuMemHostRegister return value: %d
Memory Pool: %s
Registration cache: %s
#
[cuIpcGetMemHandle failed]
The call to cuIpcGetMemHandle failed. This means the GPU RDMA protocol

Просмотреть файл

@ -10,7 +10,7 @@
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2010-2013 Los Alamos National Security, LLC.
# Copyright (c) 2010-2015 Los Alamos National Security, LLC.
# All rights reserved.
# $COPYRIGHT$
#
@ -22,7 +22,7 @@
# A word of explanation...
#
# This library is linked against various MCA components because all
# shared-memory based components (e.g., mpool, ptl, etc.) need to
# shared-memory based components (e.g., btl/sm, btl/smcuda, etc.) need to
# share some common code and data. There's two cases:
#
# 1. libmca_common_sm.la is a shared library. By linking that shared
@ -44,12 +44,14 @@
# Header files
headers = \
common_sm.h
common_sm.h \
common_sm_mpool.h
# Source files
sources = \
common_sm.c
common_sm.c \
common_sm_mpool.c
# Help file

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -11,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010-2013 Los Alamos National Security, LLC.
* Copyright (c) 2010-2015 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved
* $COPYRIGHT$
@ -39,16 +40,13 @@
#if OPAL_ENABLE_FT_CR == 1
#include "opal/runtime/opal_cr.h"
#endif
#include "common_sm.h"
#include "opal/constants.h"
#include "opal/mca/mpool/sm/mpool_sm.h"
OBJ_CLASS_INSTANCE(
mca_common_sm_module_t,
opal_list_item_t,
NULL,
NULL
);
OBJ_CLASS_INSTANCE(mca_common_sm_module_t,opal_list_item_t,
NULL, NULL);
/* ////////////////////////////////////////////////////////////////////////// */
/* static utility functions */
@ -258,13 +256,10 @@ mca_common_sm_local_proc_reorder(opal_proc_t **procs,
*
* @retval addr virtual address
*/
void *
mca_common_sm_seg_alloc(struct mca_mpool_base_module_t *mpool,
size_t *size,
mca_mpool_base_registration_t **registration)
void *mca_common_sm_seg_alloc (void *ctx, size_t *size)
{
mca_mpool_sm_module_t *sm_module = (mca_mpool_sm_module_t *)mpool;
mca_common_sm_seg_header_t *seg = sm_module->sm_common_module->module_seg;
mca_common_sm_module_t *sm_module = (mca_common_sm_module_t *) ctx;
mca_common_sm_seg_header_t *seg = sm_module->module_seg;
void *addr;
opal_atomic_lock(&seg->seg_lock);
@ -275,7 +270,7 @@ mca_common_sm_seg_alloc(struct mca_mpool_base_module_t *mpool,
size_t fixup;
/* add base address to segment offset */
addr = sm_module->sm_common_module->module_data_addr + seg->seg_offset;
addr = sm_module->module_data_addr + seg->seg_offset;
seg->seg_offset += *size;
/* fix up seg_offset so next allocation is aligned on a
@ -286,9 +281,7 @@ mca_common_sm_seg_alloc(struct mca_mpool_base_module_t *mpool,
seg->seg_offset += sizeof(long) - fixup;
}
}
if (NULL != registration) {
*registration = NULL;
}
opal_atomic_unlock(&seg->seg_lock);
return addr;
}

Просмотреть файл

@ -32,7 +32,7 @@
#include "opal/mca/btl/base/base.h"
#include "opal/util/proc.h"
#include "opal/mca/btl/base/btl_base_error.h"
#include "opal/mca/mpool/mpool.h"
#include "common_sm_mpool.h"
BEGIN_C_DECLS
@ -66,6 +66,8 @@ typedef struct mca_common_sm_module_t {
unsigned char *module_data_addr;
/* shared memory backing facility object that encapsulates shmem info */
opal_shmem_ds_t shmem_ds;
/* memory pool interface to shared-memory region */
mca_mpool_base_module_t *mpool;
} mca_common_sm_module_t;
OBJ_CLASS_DECLARATION(mca_common_sm_module_t);
@ -126,10 +128,7 @@ mca_common_sm_module_unlink(mca_common_sm_module_t *modp);
/**
* callback from the sm mpool
*/
OPAL_DECLSPEC extern void *
mca_common_sm_seg_alloc(struct mca_mpool_base_module_t *mpool,
size_t *size,
mca_mpool_base_registration_t **registration);
OPAL_DECLSPEC extern void *mca_common_sm_seg_alloc (void *ctx, size_t *size);
/**
* This function will release all local resources attached to the
@ -150,6 +149,7 @@ mca_common_sm_fini(mca_common_sm_module_t *mca_common_sm_module);
*/
OPAL_DECLSPEC extern mca_common_sm_module_t *mca_common_sm_module;
END_C_DECLS
#endif /* _COMMON_SM_H_ */

Просмотреть файл

@ -23,9 +23,10 @@
#include "opal_config.h"
#include <string.h>
#include "opal/mca/mpool/sm/mpool_sm.h"
#include "common_sm_mpool.h"
#include "opal/mca/common/sm/common_sm.h"
#include "opal/mca/common/cuda/common_cuda.h"
#include "opal/mca/allocator/base/base.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
@ -39,22 +40,42 @@
static void sm_module_finalize(mca_mpool_base_module_t* module);
/*
* Returns base address of shared memory mapping.
*/
static void *mca_common_sm_mpool_base (mca_mpool_base_module_t *mpool);
/**
* Allocate block of shared memory.
*/
static void *mca_common_sm_mpool_alloc (mca_mpool_base_module_t *mpool,
size_t size, size_t align,
uint32_t flags);
/**
* free function typedef
*/
static void mca_common_sm_mpool_free(mca_mpool_base_module_t *mpool,
void *addr);
/**
* Fault Tolerance Event Notification Function
* @param state Checkpoint Stae
* @return OPAL_SUCCESS or failure status
*/
static int mca_common_sm_mpool_ft_event (int state);
/*
* Initializes the mpool module.
*/
void mca_mpool_sm_module_init(mca_mpool_sm_module_t* mpool)
static void mca_common_sm_mpool_module_init(mca_common_sm_mpool_module_t* mpool)
{
mpool->super.mpool_component = &mca_mpool_sm_component.super;
mpool->super.mpool_base = mca_mpool_sm_base;
mpool->super.mpool_alloc = mca_mpool_sm_alloc;
mpool->super.mpool_realloc = mca_mpool_sm_realloc;
mpool->super.mpool_free = mca_mpool_sm_free;
mpool->super.mpool_find = NULL;
mpool->super.mpool_register = NULL;
mpool->super.mpool_deregister = NULL;
mpool->super.mpool_release_memory = NULL;
mpool->super.mpool_base = mca_common_sm_mpool_base;
mpool->super.mpool_alloc = mca_common_sm_mpool_alloc;
mpool->super.mpool_free = mca_common_sm_mpool_free;
mpool->super.mpool_finalize = sm_module_finalize;
mpool->super.mpool_ft_event = mca_mpool_sm_ft_event;
mpool->super.mpool_ft_event = mca_common_sm_mpool_ft_event;
mpool->super.flags = 0;
mpool->sm_size = 0;
@ -64,12 +85,74 @@ void mca_mpool_sm_module_init(mca_mpool_sm_module_t* mpool)
mpool->mem_node = -1;
}
mca_mpool_base_module_t *common_sm_mpool_create (mca_common_sm_mpool_resources_t *resources)
{
mca_common_sm_mpool_module_t *mpool_module;
mca_allocator_base_component_t* allocator_component;
/* Make a new mpool module */
mpool_module = (mca_common_sm_mpool_module_t *) malloc (sizeof (*mpool_module));
mca_common_sm_mpool_module_init(mpool_module);
/* set sm_size */
mpool_module->sm_size = resources->size;
allocator_component = mca_allocator_component_lookup(resources->allocator);
/* if specified allocator cannot be loaded - look for an alternative */
if (NULL == allocator_component) {
if (opal_list_get_size(&opal_allocator_base_framework.framework_components) == 0) {
mca_base_component_list_item_t *item =
(mca_base_component_list_item_t *)
opal_list_get_first(&opal_allocator_base_framework.framework_components);
allocator_component =
(mca_allocator_base_component_t *)item->cli_component;
opal_output(
0, "mca_common_sm_mpool_init: "
"unable to locate allocator: %s - using %s\n",
resources->allocator,
allocator_component->allocator_version.mca_component_name);
} else {
opal_output(0, "mca_common_sm_mpool_init: "
"unable to locate allocator: %s\n",
resources->allocator);
free(mpool_module);
return NULL;
}
}
mpool_module->mem_node = resources->mem_node;
if (NULL == (mpool_module->sm_common_module =
mca_common_sm_module_attach(&resources->bs_meta_buf,
sizeof(mca_common_sm_module_t), 8))) {
opal_output(0, "mca_common_sm_mpool_init: "
"unable to create shared memory mapping (%s)",
resources->bs_meta_buf.seg_name);
free(mpool_module);
return NULL;
}
/* setup allocator */
mpool_module->sm_allocator =
allocator_component->allocator_init (true, mca_common_sm_seg_alloc,
NULL, mpool_module->sm_common_module);
if (NULL == mpool_module->sm_allocator) {
opal_output(0, "mca_common_sm_mpool_init: unable to initialize allocator");
free(mpool_module);
return NULL;
}
return &mpool_module->super;
}
/*
* base address of shared memory mapping
*/
void* mca_mpool_sm_base(mca_mpool_base_module_t* mpool)
static void *mca_common_sm_mpool_base(mca_mpool_base_module_t *mpool)
{
mca_mpool_sm_module_t *sm_mpool = (mca_mpool_sm_module_t*) mpool;
mca_common_sm_mpool_module_t *sm_mpool = (mca_common_sm_mpool_module_t *) mpool;
return (NULL != sm_mpool->sm_common_module) ?
sm_mpool->sm_common_module->module_seg_addr : NULL;
}
@ -77,18 +160,14 @@ void* mca_mpool_sm_base(mca_mpool_base_module_t* mpool)
/**
* allocate function
*/
void* mca_mpool_sm_alloc(
mca_mpool_base_module_t* mpool,
size_t size,
size_t align,
uint32_t flags,
mca_mpool_base_registration_t** registration)
static void *mca_common_sm_mpool_alloc (mca_mpool_base_module_t* mpool,
size_t size, size_t align, uint32_t flags)
{
mca_mpool_sm_module_t* mpool_sm = (mca_mpool_sm_module_t*)mpool;
mca_common_sm_mpool_module_t* mpool_sm = (mca_common_sm_mpool_module_t*)mpool;
opal_hwloc_base_memory_segment_t mseg;
mseg.mbs_start_addr =
mpool_sm->sm_allocator->alc_alloc(mpool_sm->sm_allocator, size, align, registration);
mpool_sm->sm_allocator->alc_alloc(mpool_sm->sm_allocator, size, align);
if (mpool_sm->mem_node >= 0) {
mseg.mbs_len = size;
@ -98,42 +177,18 @@ void* mca_mpool_sm_alloc(
return mseg.mbs_start_addr;
}
/**
* realloc function
*/
void* mca_mpool_sm_realloc(
mca_mpool_base_module_t* mpool,
void* addr,
size_t size,
mca_mpool_base_registration_t** registration)
{
mca_mpool_sm_module_t* mpool_sm = (mca_mpool_sm_module_t*)mpool;
opal_hwloc_base_memory_segment_t mseg;
mseg.mbs_start_addr =
mpool_sm->sm_allocator->alc_realloc(mpool_sm->sm_allocator, addr, size,
registration);
if(mpool_sm->mem_node >= 0) {
mseg.mbs_len = size;
opal_hwloc_base_membind(&mseg, 1, mpool_sm->mem_node);
}
return mseg.mbs_start_addr;
}
/**
* free function
*/
void mca_mpool_sm_free(mca_mpool_base_module_t* mpool, void * addr,
mca_mpool_base_registration_t* registration)
void mca_common_sm_mpool_free(mca_mpool_base_module_t *mpool, void *addr)
{
mca_mpool_sm_module_t* mpool_sm = (mca_mpool_sm_module_t*)mpool;
mca_common_sm_mpool_module_t* mpool_sm = (mca_common_sm_mpool_module_t*)mpool;
mpool_sm->sm_allocator->alc_free(mpool_sm->sm_allocator, addr);
}
static void sm_module_finalize(mca_mpool_base_module_t* module)
{
mca_mpool_sm_module_t *sm_module = (mca_mpool_sm_module_t*) module;
mca_common_sm_mpool_module_t *sm_module = (mca_common_sm_mpool_module_t*) module;
if (NULL != sm_module->sm_common_module) {
if (OPAL_SUCCESS ==
@ -156,13 +211,13 @@ static void sm_module_finalize(mca_mpool_base_module_t* module)
}
#if OPAL_ENABLE_FT_CR == 0
int mca_mpool_sm_ft_event(int state) {
int mca_common_sm_mpool_ft_event(int state) {
return OPAL_SUCCESS;
}
#else
int mca_mpool_sm_ft_event(int state) {
int mca_common_sm_mpool_ft_event(int state) {
mca_mpool_base_module_t *self_module = NULL;
mca_mpool_sm_module_t *self_sm_module = NULL;
mca_common_sm_mpool_module_t *self_sm_module = NULL;
char * file_name = NULL;
if(OPAL_CRS_CHECKPOINT == state) {
@ -181,7 +236,7 @@ int mca_mpool_sm_ft_event(int state) {
if (opal_cr_continue_like_restart) {
/* Find the sm module */
self_module = mca_mpool_base_module_lookup("sm");
self_sm_module = (mca_mpool_sm_module_t*) self_module;
self_sm_module = (mca_common_sm_mpool_module_t*) self_module;
/* Mark the old sm file for eventual removal via CRS */
if (NULL != self_sm_module->sm_common_module) {
@ -196,7 +251,7 @@ int mca_mpool_sm_ft_event(int state) {
OPAL_CRS_RESTART_PRE == state) {
/* Find the sm module */
self_module = mca_mpool_base_module_lookup("sm");
self_sm_module = (mca_mpool_sm_module_t*) self_module;
self_sm_module = (mca_common_sm_mpool_module_t*) self_module;
/* Mark the old sm file for eventual removal via CRS */
if (NULL != self_sm_module->sm_common_module) {

62
opal/mca/common/sm/common_sm_mpool.h Обычный файл
Просмотреть файл

@ -0,0 +1,62 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010-2015 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_COMMON_SM_MPOOL_H
#define MCA_COMMON_SM_MPOOL_H
#include "opal_config.h"
#include "opal/mca/event/event.h"
#include "opal/mca/shmem/shmem.h"
#include "opal/mca/mpool/mpool.h"
#include "opal/mca/allocator/allocator.h"
BEGIN_C_DECLS
struct mca_common_sm_module_t;
typedef struct mca_common_sm_mpool_resources_t {
size_t size;
int32_t mem_node;
const char *allocator;
/* backing store metadata */
opal_shmem_ds_t bs_meta_buf;
} mca_common_sm_mpool_resources_t;
typedef struct mca_common_sm_mpool_module_t {
mca_mpool_base_module_t super;
long sm_size;
mca_allocator_base_module_t *sm_allocator;
struct mca_common_sm_mpool_mmap_t *sm_mmap;
struct mca_common_sm_module_t *sm_common_module;
int32_t mem_node;
} mca_common_sm_mpool_module_t;
OPAL_DECLSPEC mca_mpool_base_module_t *common_sm_mpool_create (mca_common_sm_mpool_resources_t *);
END_C_DECLS
#endif

Просмотреть файл

@ -10,6 +10,8 @@
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2015 Los Alamos National Security, LLC. All rights
# reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
@ -19,15 +21,14 @@
headers += \
base/base.h \
base/mpool_base_mem_cb.h \
base/mpool_base_tree.h
libmca_mpool_la_SOURCES += \
base/mpool_base_frame.c \
base/mpool_base_init.c \
base/mpool_base_lookup.c \
base/mpool_base_alloc.c \
base/mpool_base_mem_cb.c \
base/mpool_base_tree.c
base/mpool_base_tree.c \
base/mpool_base_default.c
dist_opaldata_DATA += \
base/help-mpool-base.txt

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -10,6 +11,8 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -30,28 +33,10 @@
BEGIN_C_DECLS
static inline unsigned int my_log2(unsigned long val) {
unsigned int count = 0;
while(val > 0) {
val = val >> 1;
count++;
}
return count > 0 ? count-1: 0;
}
static inline void *down_align_addr(void* addr, unsigned int shift) {
return (void*) (((intptr_t) addr) & (~(intptr_t) 0) << shift);
}
static inline void *up_align_addr(void*addr, unsigned int shift) {
return (void*) ((((intptr_t) addr) | ~((~(intptr_t) 0) << shift)));
}
struct mca_mpool_base_selected_module_t {
opal_list_item_t super;
mca_mpool_base_component_t *mpool_component;
mca_mpool_base_module_t *mpool_module;
void* user_data;
struct mca_mpool_base_resources_t *mpool_resources;
};
typedef struct mca_mpool_base_selected_module_t mca_mpool_base_selected_module_t;
@ -65,24 +50,16 @@ OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_mpool_base_selected_module_t);
* Global functions for MCA: overall mpool open and close
*/
OPAL_DECLSPEC int mca_mpool_base_init(bool enable_progress_threads, bool enable_mpi_threads);
OPAL_DECLSPEC mca_mpool_base_component_t* mca_mpool_base_component_lookup(const char* name);
OPAL_DECLSPEC mca_mpool_base_module_t* mca_mpool_base_module_create(
const char* name,
void* user_data,
struct mca_mpool_base_resources_t* mpool_resources);
OPAL_DECLSPEC mca_mpool_base_module_t* mca_mpool_base_module_lookup(const char* name);
OPAL_DECLSPEC int mca_mpool_base_module_destroy(mca_mpool_base_module_t *module);
/*
* Globals
*/
extern opal_list_t mca_mpool_base_modules;
OPAL_DECLSPEC extern uint32_t mca_mpool_base_page_size;
OPAL_DECLSPEC extern uint32_t mca_mpool_base_page_size_log;
extern mca_mpool_base_module_t *mca_mpool_base_default_module;
extern int mca_mpool_base_default_priority;
/* only used within base -- no need to DECLSPEC */
extern int mca_mpool_base_used_mem_hooks;
OPAL_DECLSPEC extern mca_base_framework_t opal_mpool_base_framework;

Просмотреть файл

@ -30,31 +30,3 @@ PID: %d
%d additional leak%s recorded but %s not displayed here. Set the MCA
parameter mpi_show_mpi_alloc_mem_leaks to a larger number to see that
many leaks, or set it to a negative number to see all leaks.
#
[leave pinned failed]
A process attempted to use the "leave pinned" MPI feature, but no
memory registration hooks were found on the system at run time. This
may be the result of running on a system that does not support memory
hooks or having some other software subvert Open MPI's use of the
memory hooks. You can disable Open MPI's use of memory hooks by
setting both the mpi_leave_pinned and mpi_leave_pinned_pipeline MCA
parameters to 0.
Open MPI will disable any transports that are attempting to use the
leave pinned functionality; your job may still run, but may fall back
to a slower network transport (such as TCP).
Mpool name: %s
Process: %s
Local host: %s
#
[cannot deregister in-use memory]
Open MPI intercepted a call to free memory that is still being used by
an ongoing MPI communication. This usually reflects an error in the
MPI application; it may signify memory corruption. Open MPI will now
abort your job.
Mpool name: %s
Local host: %s
Buffer address: %p
Buffer size: %lu

Просмотреть файл

@ -27,7 +27,6 @@
#include "opal/mca/mpool/mpool.h"
#include "base.h"
#include "mpool_base_tree.h"
#include "mpool_base_mem_cb.h"
#include "opal/threads/mutex.h"
struct opal_info_t {
@ -44,63 +43,24 @@ struct opal_info_t {
};
typedef struct opal_info_t opal_info_t;
/**
* Memory Pool Registration
*/
static void mca_mpool_base_registration_constructor( mca_mpool_base_registration_t * reg )
{
reg->mpool = NULL;
reg->base = NULL;
reg->bound = NULL;
reg->alloc_base = NULL;
reg->ref_count = 0;
reg->flags = 0;
}
static void mca_mpool_base_registration_destructor( mca_mpool_base_registration_t * reg )
{
}
OBJ_CLASS_INSTANCE(
mca_mpool_base_registration_t,
opal_free_list_item_t,
mca_mpool_base_registration_constructor,
mca_mpool_base_registration_destructor);
static void unregister_tree_item(mca_mpool_base_tree_item_t *mpool_tree_item)
{
mca_mpool_base_module_t *mpool;
mca_mpool_base_registration_t *reg;
int i;
for(i = 1; i < mpool_tree_item->count; i++) {
mpool = mpool_tree_item->mpools[i];
reg = mpool_tree_item->regs[i];
if(mpool && mpool->mpool_deregister) {
mpool->mpool_deregister(mpool, reg);
}
}
mpool = mpool_tree_item->mpools[0];
reg = mpool_tree_item->regs[0];
mpool->mpool_free(mpool, mpool_tree_item->key, reg);
mpool = mpool_tree_item->mpool;
mpool->mpool_free(mpool, mpool_tree_item->key);
}
/**
* Function to allocate special memory according to what the user requests in
* the info object.
*
* If the user passes in a valid info structure then the function will
* try to allocate the memory and register it with every mpool that there is a
* key for it in the info struct. If it fails at registering the memory with
* one of the requested mpools, an error will be returned. Also, if there is a
* key in info that does not match any mpool, an error will be returned.
*
* If the info parameter is MPI_INFO_NULL, then this function will try to allocate
* the memory and register it with as many mpools as possible. However,
* if any of the registratons fail the mpool will simply be ignored.
* the memory with the optionally named mpool or malloc and try to register the
* pointer with as many registration caches as possible. Registration caches that
* fail to register the region will be ignored. The mpool name can optionally be
* specified in the info object.
*
* @param size the size of the memory area to allocate
* @param info an info object which tells us what kind of memory to allocate
@ -108,176 +68,37 @@ static void unregister_tree_item(mca_mpool_base_tree_item_t *mpool_tree_item)
* @retval pointer to the allocated memory
* @retval NULL on failure
*/
void *mca_mpool_base_alloc(size_t size, opal_info_t *info)
void *mca_mpool_base_alloc(size_t size, opal_info_t *info, const char *hints)
{
opal_list_item_t * item;
int num_modules = opal_list_get_size(&mca_mpool_base_modules);
int reg_module_num = 0, i;
mca_mpool_base_selected_module_t * current;
mca_mpool_base_selected_module_t * no_reg_function = NULL;
mca_mpool_base_selected_module_t ** has_reg_function = NULL;
mca_mpool_base_registration_t * registration;
mca_mpool_base_tree_item_t *mpool_tree_item = NULL;
mca_mpool_base_module_t *mpool;
void *mem = NULL;
#if defined(TODO_BTL_GB)
int flag = 0;
bool match_found = false;
#endif /* defined(TODO_BTL_GB) */
bool mpool_requested = false;
if(num_modules > 0) {
has_reg_function = (mca_mpool_base_selected_module_t **)
malloc(num_modules * sizeof(mca_mpool_base_module_t *));
if(!has_reg_function)
goto out;
}
mpool_tree_item = mca_mpool_base_tree_item_get ();
if(!mpool_tree_item)
goto out;
if (!mpool_tree_item) {
return NULL;
}
mpool_tree_item->num_bytes = size;
mpool_tree_item->count = 0;
#if defined(TODO_BTL_GB)
if(&ompi_mpi_info_null.info == info)
#endif /* defined(TODO_BTL_GB) */
{
for(item = opal_list_get_first(&mca_mpool_base_modules);
item != opal_list_get_end(&mca_mpool_base_modules);
item = opal_list_get_next(item)) {
current = ((mca_mpool_base_selected_module_t *) item);
if(current->mpool_module->flags & MCA_MPOOL_FLAGS_MPI_ALLOC_MEM) {
if(NULL == current->mpool_module->mpool_register){
no_reg_function = current;
}
else {
has_reg_function[reg_module_num++] = current;
}
}
}
}
#if defined(TODO_BTL_GB)
else
{
int num_keys;
char key[MPI_MAX_INFO_KEY + 1];
char value[MPI_MAX_INFO_VAL + 1];
ompi_info_get_nkeys(info, &num_keys);
for(i = 0; i < num_keys; i++)
{
ompi_info_get_nthkey(info, i, key);
if ( 0 != strcmp(key, "mpool") ) {
continue;
}
mpool_requested = true;
ompi_info_get(info, key, MPI_MAX_INFO_VAL, value, &flag);
if ( !flag ) {
continue;
mpool = mca_mpool_base_module_lookup (hints);
if (NULL != mpool) {
mem = mpool->mpool_alloc (mpool, size, 0, 0);
}
match_found = false;
for(item = opal_list_get_first(&mca_mpool_base_modules);
item != opal_list_get_end(&mca_mpool_base_modules);
item = opal_list_get_next(item))
{
current = ((mca_mpool_base_selected_module_t *)item);
if(0 == strcmp(value,
current->mpool_module->mpool_component->mpool_version.mca_component_name))
{
match_found = true;
if(NULL == current->mpool_module->mpool_register)
{
if(NULL != no_reg_function)
{
/* there was more than one requested mpool that lacks
* a registration function, so return failure */
goto out;
}
no_reg_function = current;
}
else
{
has_reg_function[reg_module_num++] = current;
}
}
}
if(!match_found)
{
/* one of the keys given to us by the user did not match any
* mpools, so return an error */
goto out;
}
}
}
#endif /* defined(TODO_BTL_GB) */
if(NULL == no_reg_function && 0 == reg_module_num)
{
if(!mpool_requested)
{
/* if the info argument was NULL and there were no useable mpools
* or there user provided info object but did not specifiy a "mpool" key,
* just malloc the memory and return it */
if (NULL == mem) {
/* fall back on malloc */
mem = malloc(size);
goto out;
}
/* the user passed info but we were not able to use any of the mpools
* specified */
goto out;
}
for(i = -1; i < reg_module_num; i++) {
if(-1 == i) {
if(NULL != no_reg_function)
mpool = no_reg_function->mpool_module;
else
continue;
} else {
mpool = has_reg_function[i]->mpool_module;
}
if(NULL == mem) {
mem = mpool->mpool_alloc(mpool, size, 0, MCA_MPOOL_FLAGS_PERSIST,
&registration);
if(NULL == mem) {
if(mpool_requested)
goto out;
continue;
}
mpool_tree_item->key = mem;
mpool_tree_item->mpools[mpool_tree_item->count] = mpool;
mpool_tree_item->regs[mpool_tree_item->count++] = registration;
} else {
if(mpool->mpool_register(mpool, mem, size, MCA_MPOOL_FLAGS_PERSIST,
MCA_MPOOL_ACCESS_ANY, &registration) != OPAL_SUCCESS) {
if(mpool_requested) {
unregister_tree_item(mpool_tree_item);
goto out;
}
continue;
}
mpool_tree_item->mpools[mpool_tree_item->count] = mpool;
mpool_tree_item->regs[mpool_tree_item->count++] = registration;
}
}
if(NULL == mem) {
mem = malloc(size);
goto out;
}
mca_mpool_base_tree_insert(mpool_tree_item);
mpool_tree_item = NULL; /* prevent it to be deleted below */
out:
if(mpool_tree_item)
mca_mpool_base_tree_item_put (mpool_tree_item);
if(has_reg_function)
free(has_reg_function);
} else {
mpool_tree_item->mpool = mpool;
mca_mpool_base_tree_insert (mpool_tree_item);
}
return mem;
}

85
opal/mca/mpool/base/mpool_base_default.c Обычный файл
Просмотреть файл

@ -0,0 +1,85 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include <stdio.h>
#include <stdlib.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/mca/mpool/base/base.h"
#include "opal/constants.h"
#include "opal/util/sys_limits.h"
static void *mca_mpool_default_alloc (mca_mpool_base_module_t *mpool, size_t size,
size_t align, uint32_t flags)
{
#if HAVE_POSIX_MEMALIGN
void *addr = NULL;
(void) posix_memalign (&addr, align, size);
return addr;
#else
void *addr, *ret;
addr = malloc (size + align + sizeof (void *));
ret = OPAL_ALIGN_PTR((intptr_t) addr + 8, align, void *);
*((void **) ret - 1) = addr;
return ret;
#endif
}
static void *mca_mpool_default_realloc (mca_mpool_base_module_t *mpool, void *addr, size_t size)
{
#if HAVE_POSIX_MEMALIGN
return realloc (addr, size);
#else
if (NULL != addr) {
void *base = *((void **) addr - 1);
void *ptr = realloc (base, size + (intptr_t) addr - (intptr_t) - size);
void *ret = (void *)((intptr_t) ptr + (intptr_t) addr - (intptr_t) - size);
*((void **) ret - 1) = ptr;
return ret;
} else {
return mca_mpool_default_alloc (mpool, size, 8, 0);
}
#endif
}
static void mca_mpool_default_free (mca_mpool_base_module_t *mpool, void *addr)
{
#if HAVE_POSIX_MEMALIGN
free (addr);
#else
if (NULL != addr) {
void *base = *((void **) addr - 1);
free (base);
}
#endif
}
static void mca_mpool_default_finalize (struct mca_mpool_base_module_t *mpool)
{
}
static mca_mpool_base_module_t mca_mpool_malloc_module = {
.mpool_alloc = mca_mpool_default_alloc,
.mpool_realloc = mca_mpool_default_realloc,
.mpool_free = mca_mpool_default_free,
.mpool_finalize = mca_mpool_default_finalize,
.flags = MCA_MPOOL_FLAGS_MPI_ALLOC_MEM,
};
mca_mpool_base_module_t *mca_mpool_base_default_module = &mca_mpool_malloc_module;

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -13,6 +14,8 @@
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2014-2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -30,9 +33,7 @@
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/memoryhooks/memory.h"
#include "opal/mca/mpool/base/base.h"
#include "mpool_base_mem_cb.h"
#include "opal/constants.h"
#include "opal/util/sys_limits.h"
@ -48,13 +49,33 @@
* Global variables
*/
/* whether we actually used the mem hooks or not */
int mca_mpool_base_used_mem_hooks = 0;
uint32_t mca_mpool_base_page_size = 0;
uint32_t mca_mpool_base_page_size_log = 0;
opal_list_t mca_mpool_base_modules = {{0}};
static char *mca_mpool_base_default_hints;
int mca_mpool_base_default_priority = 50;
OBJ_CLASS_INSTANCE(mca_mpool_base_selected_module_t, opal_list_item_t, NULL, NULL);
static int mca_mpool_base_register (mca_base_register_flag_t flags)
{
mca_mpool_base_default_hints = NULL;
(void) mca_base_var_register ("opal", "mpool", "base", "default_hints",
"Hints to use when selecting the default memory pool",
MCA_BASE_VAR_TYPE_STRING, NULL, 0,
MCA_BASE_VAR_FLAG_INTERNAL,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
&mca_mpool_base_default_hints);
mca_mpool_base_default_priority = 50;
(void) mca_base_var_register ("opal", "mpool", "base", "default_priority",
"Priority of the default mpool module",
MCA_BASE_VAR_TYPE_INT, NULL, 0,
MCA_BASE_VAR_FLAG_INTERNAL,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
&mca_mpool_base_default_priority);
return OPAL_SUCCESS;
}
/**
* Function for finding and opening either all MCA components, or the one
@ -69,15 +90,14 @@ static int mca_mpool_base_open(mca_base_open_flag_t flags)
return OPAL_ERROR;
}
if (mca_mpool_base_default_hints) {
mca_mpool_base_default_module = mca_mpool_base_module_lookup (mca_mpool_base_default_hints);
}
/* Initialize the list so that in mca_mpool_base_close(), we can
iterate over it (even if it's empty, as in the case of opal_info) */
OBJ_CONSTRUCT(&mca_mpool_base_modules, opal_list_t);
/* get the page size for this architecture*/
mca_mpool_base_page_size = opal_getpagesize();
mca_mpool_base_page_size_log = my_log2(mca_mpool_base_page_size);
/* setup tree for tracking MPI_Alloc_mem */
mca_mpool_base_tree_init();
@ -88,12 +108,6 @@ static int mca_mpool_base_close(void)
{
opal_list_item_t *item;
mca_mpool_base_selected_module_t *sm;
int32_t modules_length;
/* Need the initial length in order to know if some of the initializations
* are done in the open function.
*/
modules_length = opal_list_get_size(&mca_mpool_base_modules);
/* Finalize all the mpool components and free their list items */
@ -115,15 +129,8 @@ static int mca_mpool_base_close(void)
OMPI RTE program, or [possibly] multiple if this is opal_info) */
(void) mca_base_framework_components_close(&opal_mpool_base_framework, NULL);
/* deregister memory free callback */
if( (modules_length > 0) && mca_mpool_base_used_mem_hooks &&
0 != (OPAL_MEMORY_FREE_SUPPORT & opal_mem_hooks_support_level())) {
opal_mem_hooks_unregister_release(mca_mpool_base_mem_cb);
}
/* All done */
return OPAL_SUCCESS;
}
MCA_BASE_FRAMEWORK_DECLARE(opal, mpool, NULL, NULL, mca_mpool_base_open,
MCA_BASE_FRAMEWORK_DECLARE(opal, mpool, "Memory pools", mca_mpool_base_register, mca_mpool_base_open,
mca_mpool_base_close, mca_mpool_base_static_components, 0);

Просмотреть файл

@ -1,43 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/mca/mpool/base/base.h"
OBJ_CLASS_INSTANCE(mca_mpool_base_selected_module_t, opal_list_item_t, NULL, NULL);
static bool mca_mpool_enable_progress_threads = true;
static bool mca_mpool_enable_mpi_thread_multiple = true;
/**
* Function for weeding out mpool modules that don't want to run.
*
* Call the init function on all available components to find out if they
* want to run. Select all components that don't fail. Failing modules
* will be closed and unloaded. The selected modules will be returned
* to the caller in a opal_list_t.
*/
int mca_mpool_base_init(bool enable_progress_threads, bool enable_mpi_thread_multiple)
{
mca_mpool_enable_progress_threads = enable_progress_threads;
mca_mpool_enable_mpi_thread_multiple = enable_mpi_thread_multiple;
return OPAL_SUCCESS;
}

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2006-2007 Mellanox Technologies. All rights reserved.
* Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved.
* Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -30,126 +30,47 @@
#include "opal/mca/base/base.h"
#include "opal/util/show_help.h"
#include "opal/util/proc.h"
#include "opal/runtime/opal_params.h"
#include "opal/mca/mpool/mpool.h"
#include "opal/mca/mpool/base/base.h"
#include "opal/memoryhooks/memory.h"
#include "mpool_base_mem_cb.h"
mca_mpool_base_component_t* mca_mpool_base_component_lookup(const char *name)
{
mca_base_component_list_item_t *cli;
/* Traverse the list of available modules; call their init functions. */
opal_list_item_t* item;
for (item = opal_list_get_first(&opal_mpool_base_framework.framework_components);
item != opal_list_get_end(&opal_mpool_base_framework.framework_components);
item = opal_list_get_next(item)) {
mca_base_component_list_item_t *cli =
(mca_base_component_list_item_t *) item;
mca_mpool_base_component_t* component =
(mca_mpool_base_component_t *) cli->cli_component;
OPAL_LIST_FOREACH(cli, &opal_mpool_base_framework.framework_components, mca_base_component_list_item_t) {
mca_mpool_base_component_t* component = (mca_mpool_base_component_t *) cli->cli_component;
if (strcmp(component->mpool_version.mca_component_name, name) == 0) {
return component;
}
}
return NULL;
}
mca_mpool_base_module_t* mca_mpool_base_module_create(
const char* name,
void* user_data,
struct mca_mpool_base_resources_t* resources)
mca_mpool_base_module_t *mca_mpool_base_module_lookup (const char *hints)
{
mca_mpool_base_component_t* component = NULL;
mca_mpool_base_module_t* module = NULL;
mca_mpool_base_module_t *best_module = mca_mpool_base_default_module;
mca_base_component_list_item_t *cli;
mca_mpool_base_selected_module_t *sm;
int best_priority = mca_mpool_base_default_priority;
int rc;
OPAL_LIST_FOREACH(cli, &opal_mpool_base_framework.framework_components, mca_base_component_list_item_t) {
component = (mca_mpool_base_component_t *) cli->cli_component;
if(0 == strcmp(component->mpool_version.mca_component_name, name)) {
module = component->mpool_init(resources);
break;
mca_mpool_base_component_t *component = (mca_mpool_base_component_t *) cli->cli_component;
mca_mpool_base_module_t *module;
int priority;
rc = component->mpool_query (hints, &priority, &module);
if (OPAL_SUCCESS == rc) {
if (priority > best_priority) {
best_priority = priority;
best_module = module;
}
}
}
if ( NULL == module ) {
return NULL;
}
sm = OBJ_NEW(mca_mpool_base_selected_module_t);
sm->mpool_component = component;
sm->mpool_module = module;
sm->user_data = user_data;
sm->mpool_resources = resources;
opal_list_append(&mca_mpool_base_modules, (opal_list_item_t*) sm);
/* on the very first creation of a module we init the memory
callback */
if (opal_list_get_size(&mca_mpool_base_modules) == 1) {
/* Default to not using memory hooks */
int use_mem_hooks = 0;
/* Use the memory hooks if leave_pinned or
leave_pinned_pipeline is enabled (note that either of these
leave_pinned variables may have been set by a user MCA
param or elsewhere in the code base). Yes, we could have
coded this more succinctly, but this is more clear. Do not
check memory hooks if the mpool explicity asked us not to. */
if ((opal_leave_pinned > 0 || opal_leave_pinned_pipeline) &&
!(module->flags & MCA_MPOOL_FLAGS_NO_HOOKS)) {
use_mem_hooks = 1;
}
if (use_mem_hooks) {
if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) ==
((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) &
opal_mem_hooks_support_level())) {
opal_mem_hooks_register_release(mca_mpool_base_mem_cb, NULL);
} else {
opal_show_help("help-mpool-base.txt", "leave pinned failed",
true, name, OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
opal_proc_local_get()->proc_hostname);
return NULL;
}
/* Set this to true so that mpool_base_close knows to
cleanup */
mca_mpool_base_used_mem_hooks = 1;
}
}
return module;
}
mca_mpool_base_module_t* mca_mpool_base_module_lookup(const char* name)
{
mca_mpool_base_selected_module_t *mli;
OPAL_LIST_FOREACH(mli, &mca_mpool_base_modules, mca_mpool_base_selected_module_t) {
if(0 == strcmp(mli->mpool_component->mpool_version.mca_component_name,
name)) {
return mli->mpool_module;
}
}
return NULL;
}
int mca_mpool_base_module_destroy(mca_mpool_base_module_t *module)
{
mca_mpool_base_selected_module_t *sm, *next;
OPAL_LIST_FOREACH_SAFE(sm, next, &mca_mpool_base_modules, mca_mpool_base_selected_module_t) {
if (module == sm->mpool_module) {
opal_list_remove_item(&mca_mpool_base_modules, (opal_list_item_t*)sm);
if (NULL != sm->mpool_module->mpool_finalize) {
sm->mpool_module->mpool_finalize(sm->mpool_module);
}
OBJ_RELEASE(sm);
return OPAL_SUCCESS;
}
}
return OPAL_ERR_NOT_FOUND;
return best_module;
}

Просмотреть файл

@ -23,10 +23,6 @@
*
* $HEADER$
*/
/**
* @file
* Description of the Registration Cache framework
*/
#include "opal_config.h"

Просмотреть файл

@ -28,6 +28,7 @@
#define MCA_MPOOL_BASE_TREE_MAX 8
#include "opal/mca/mca.h"
#include "opal/mca/mpool/mpool.h"
#include "opal/mca/rcache/rcache.h"
BEGIN_C_DECLS
@ -46,8 +47,9 @@ struct mca_mpool_base_tree_item_t
size_t num_bytes; /**< the number of bytes in this alloc, only for
debugging reporting with
mpi_show_mpi_alloc_mem_leaks */
mca_mpool_base_module_t* mpools[MCA_MPOOL_BASE_TREE_MAX]; /**< the mpools */
mca_mpool_base_registration_t* regs[MCA_MPOOL_BASE_TREE_MAX]; /**< the registrations */
mca_mpool_base_module_t *mpool;
mca_rcache_base_module_t *rcaches[MCA_MPOOL_BASE_TREE_MAX]; /**< the registration caches */
mca_rcache_base_registration_t *regs[MCA_MPOOL_BASE_TREE_MAX]; /**< the registrations */
uint8_t count; /**< length of the mpools/regs array */
};
typedef struct mca_mpool_base_tree_item_t mca_mpool_base_tree_item_t;

Просмотреть файл

@ -1,105 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Voltaire. All rights reserved.
* Copyright (c) 2012-2015 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_MPOOL_GPUSM_H
#define MCA_MPOOL_GPUSM_H
#include "opal_config.h"
#include "opal/class/opal_list.h"
#include "opal/mca/mpool/mpool.h"
BEGIN_C_DECLS
#define MEMHANDLE_SIZE 8
#define EVTHANDLE_SIZE 8
struct mca_mpool_gpusm_registration_t {
mca_mpool_base_registration_t base;
uint64_t memHandle[MEMHANDLE_SIZE]; /* CUipcMemHandle */
uint64_t evtHandle[EVTHANDLE_SIZE]; /* CUipcEventHandle */
uintptr_t event; /* CUevent */
};
typedef struct mca_mpool_gpusm_registration_t mca_mpool_gpusm_registration_t;
OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_mpool_gpusm_registration_t);
struct mca_mpool_gpusm_component_t {
mca_mpool_base_component_t super;
};
typedef struct mca_mpool_gpusm_component_t mca_mpool_gpusm_component_t;
OPAL_DECLSPEC extern mca_mpool_gpusm_component_t mca_mpool_gpusm_component;
struct mca_mpool_base_resources_t {
void *reg_data;
size_t sizeof_reg;
int (*register_mem)(void *base, size_t size, mca_mpool_base_registration_t *newreg,
mca_mpool_base_registration_t *hdrreg);
int (*deregister_mem)(void *reg_data, mca_mpool_base_registration_t *reg);
};
typedef struct mca_mpool_base_resources_t mca_mpool_base_resources_t;
struct mca_mpool_gpusm_module_t {
mca_mpool_base_module_t super;
struct mca_mpool_base_resources_t resources;
opal_free_list_t reg_list;
}; typedef struct mca_mpool_gpusm_module_t mca_mpool_gpusm_module_t;
/*
* Initializes the mpool module.
*/
void mca_mpool_gpusm_module_init(mca_mpool_gpusm_module_t *mpool);
/**
* register block of memory
*/
int mca_mpool_gpusm_register(mca_mpool_base_module_t* mpool, void *addr,
size_t size, uint32_t flags, int32_t access_flags, mca_mpool_base_registration_t **reg);
/**
* deregister memory
*/
int mca_mpool_gpusm_deregister(mca_mpool_base_module_t *mpool,
mca_mpool_base_registration_t *reg);
/**
* find registration for a given block of memory
*/
int mca_mpool_gpusm_find(struct mca_mpool_base_module_t* mpool, void* addr,
size_t size, mca_mpool_base_registration_t **reg);
/**
* finalize mpool
*/
void mca_mpool_gpusm_finalize(struct mca_mpool_base_module_t *mpool);
/**
* Fault Tolerance Event Notification Function
* @param state Checkpoint Stae
* @return OPAL_SUCCESS or failure status
*/
int mca_mpool_gpusm_ft_event(int state);
END_C_DECLS
#endif

Просмотреть файл

@ -1,160 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Voltaire. All rights reserved.
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_MPOOL_OPENIB_H
#define MCA_MPOOL_OPENIB_H
#include "opal_config.h"
#include "opal/class/opal_list.h"
#include "opal/mca/event/event.h"
#include "opal/mca/mpool/mpool.h"
#if HAVE_SYS_MMAN_H
#include <sys/mman.h>
#endif
BEGIN_C_DECLS
#define MCA_MPOOL_GRDMA_NAME_MAX 256
struct mca_mpool_grdma_pool_t {
opal_list_item_t super;
char *pool_name;
opal_list_t lru_list;
opal_list_t gc_list;
struct mca_rcache_base_module_t *rcache;
};
typedef struct mca_mpool_grdma_pool_t mca_mpool_grdma_pool_t;
OBJ_CLASS_DECLARATION(mca_mpool_grdma_pool_t);
struct mca_mpool_grdma_component_t {
mca_mpool_base_component_t super;
opal_list_t pools;
char *rcache_name;
bool print_stats;
int leave_pinned;
};
typedef struct mca_mpool_grdma_component_t mca_mpool_grdma_component_t;
OPAL_DECLSPEC extern mca_mpool_grdma_component_t mca_mpool_grdma_component;
struct mca_mpool_grdma_module_t;
struct mca_mpool_base_resources_t {
char *pool_name;
void *reg_data;
size_t sizeof_reg;
int (*register_mem)(void *reg_data, void *base, size_t size,
mca_mpool_base_registration_t *reg);
int (*deregister_mem)(void *reg_data, mca_mpool_base_registration_t *reg);
};
typedef struct mca_mpool_base_resources_t mca_mpool_base_resources_t;
struct mca_mpool_grdma_module_t {
mca_mpool_base_module_t super;
struct mca_mpool_base_resources_t resources;
mca_mpool_grdma_pool_t *pool;
opal_free_list_t reg_list;
uint32_t stat_cache_hit;
uint32_t stat_cache_miss;
uint32_t stat_evicted;
uint32_t stat_cache_found;
uint32_t stat_cache_notfound;
};
typedef struct mca_mpool_grdma_module_t mca_mpool_grdma_module_t;
/*
* Initializes the mpool module.
*/
void mca_mpool_grdma_module_init(mca_mpool_grdma_module_t *mpool, mca_mpool_grdma_pool_t *pool);
/*
* Returns base address of shared memory mapping.
*/
void *mca_mpool_grdma_base(mca_mpool_base_module_t *mpool);
/**
* Allocate block of registered memory.
*/
void* mca_mpool_grdma_alloc(mca_mpool_base_module_t *mpool, size_t size,
size_t align, uint32_t flags,
mca_mpool_base_registration_t** registration);
/**
* realloc block of registered memory
*/
void* mca_mpool_grdma_realloc( mca_mpool_base_module_t *mpool, void* addr,
size_t size, mca_mpool_base_registration_t** registration);
/**
* register block of memory
*/
int mca_mpool_grdma_register(mca_mpool_base_module_t* mpool, void *addr,
size_t size, uint32_t flags, int32_t access_flags, mca_mpool_base_registration_t **reg);
/**
* deregister memory
*/
int mca_mpool_grdma_deregister(mca_mpool_base_module_t *mpool,
mca_mpool_base_registration_t *reg);
/**
* free memory allocated by alloc function
*/
void mca_mpool_grdma_free(mca_mpool_base_module_t *mpool, void * addr,
mca_mpool_base_registration_t *reg);
/**
* find registration for a given block of memory
*/
int mca_mpool_grdma_find(struct mca_mpool_base_module_t* mpool, void* addr,
size_t size, mca_mpool_base_registration_t **reg);
/**
* unregister all registration covering the block of memory
*/
int mca_mpool_grdma_release_memory(mca_mpool_base_module_t* mpool, void *base,
size_t size);
/**
* finalize mpool
*/
void mca_mpool_grdma_finalize(struct mca_mpool_base_module_t *mpool);
/**
* Fault Tolerance Event Notification Function
* @param state Checkpoint Stae
* @return OPAL_SUCCESS or failure status
*/
int mca_mpool_grdma_ft_event(int state);
/**
* evict one unused registration from the mpool's lru.
* @return true on success, false on failure
*/
bool mca_mpool_grdma_evict (struct mca_mpool_base_module_t *mpool);
END_C_DECLS
#endif

Просмотреть файл

@ -1,592 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2013 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006 Voltaire. All rights reserved.
* Copyright (c) 2007 Mellanox Technologies. All rights reserved.
* Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#define OPAL_DISABLE_ENABLE_MEM_DEBUG 1
#include "opal_config.h"
#include <errno.h>
#include <string.h>
#include <stdlib.h>
#include "opal/align.h"
#include "opal/util/proc.h"
#if OPAL_CUDA_GDR_SUPPORT
#include "opal/mca/common/cuda/common_cuda.h"
#endif /* OPAL_CUDA_GDR_SUPPORT */
#include "opal/mca/rcache/rcache.h"
#include "opal/mca/rcache/base/base.h"
#include "opal/mca/mpool/base/base.h"
#include "mpool_grdma.h"
static inline bool registration_is_cacheable(mca_mpool_base_registration_t *reg)
{
return (mca_mpool_grdma_component.leave_pinned &&
!(reg->flags &
(MCA_MPOOL_FLAGS_CACHE_BYPASS |
MCA_MPOOL_FLAGS_PERSIST |
MCA_MPOOL_FLAGS_INVALID)));
}
#if OPAL_CUDA_GDR_SUPPORT
static int check_for_cuda_freed_memory(mca_mpool_base_module_t *mpool, void *addr, size_t size);
#endif /* OPAL_CUDA_GDR_SUPPORT */
static void mca_mpool_grdma_pool_contructor (mca_mpool_grdma_pool_t *pool)
{
memset ((void *)((uintptr_t)pool + sizeof (pool->super)), 0, sizeof (*pool) - sizeof (pool->super));
OBJ_CONSTRUCT(&pool->lru_list, opal_list_t);
OBJ_CONSTRUCT(&pool->gc_list, opal_list_t);
pool->rcache = mca_rcache_base_module_create(mca_mpool_grdma_component.rcache_name);
}
static void mca_mpool_grdma_pool_destructor (mca_mpool_grdma_pool_t *pool)
{
OBJ_DESTRUCT(&pool->lru_list);
OBJ_DESTRUCT(&pool->gc_list);
free (pool->pool_name);
}
OBJ_CLASS_INSTANCE(mca_mpool_grdma_pool_t, opal_list_item_t,
mca_mpool_grdma_pool_contructor,
mca_mpool_grdma_pool_destructor);
/*
* Initializes the mpool module.
*/
void mca_mpool_grdma_module_init(mca_mpool_grdma_module_t* mpool, mca_mpool_grdma_pool_t *pool)
{
OBJ_RETAIN(pool);
mpool->pool = pool;
mpool->super.mpool_component = &mca_mpool_grdma_component.super;
mpool->super.mpool_base = NULL; /* no base .. */
mpool->super.mpool_alloc = mca_mpool_grdma_alloc;
mpool->super.mpool_realloc = mca_mpool_grdma_realloc;
mpool->super.mpool_free = mca_mpool_grdma_free;
mpool->super.mpool_register = mca_mpool_grdma_register;
mpool->super.mpool_find = mca_mpool_grdma_find;
mpool->super.mpool_deregister = mca_mpool_grdma_deregister;
mpool->super.mpool_release_memory = mca_mpool_grdma_release_memory;
mpool->super.mpool_finalize = mca_mpool_grdma_finalize;
mpool->super.mpool_ft_event = mca_mpool_grdma_ft_event;
mpool->super.flags = MCA_MPOOL_FLAGS_MPI_ALLOC_MEM;
mpool->super.rcache = pool->rcache;
mpool->stat_cache_hit = mpool->stat_cache_miss = mpool->stat_evicted = 0;
mpool->stat_cache_found = mpool->stat_cache_notfound = 0;
OBJ_CONSTRUCT(&mpool->reg_list, opal_free_list_t);
opal_free_list_init (&mpool->reg_list, mpool->resources.sizeof_reg,
opal_cache_line_size,
OBJ_CLASS(mca_mpool_base_registration_t),
0, opal_cache_line_size, 0, -1, 32, NULL, 0,
NULL, NULL, NULL);
}
static inline int dereg_mem(mca_mpool_base_registration_t *reg)
{
mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t *) reg->mpool;
int rc;
if(!(reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS))
reg->mpool->rcache->rcache_delete(reg->mpool->rcache, reg);
/* Drop the rcache lock before deregistring the memory */
OPAL_THREAD_UNLOCK(&reg->mpool->rcache->lock);
rc = mpool_grdma->resources.deregister_mem(mpool_grdma->resources.reg_data,
reg);
OPAL_THREAD_LOCK(&reg->mpool->rcache->lock);
if (OPAL_LIKELY(OPAL_SUCCESS == rc)) {
opal_free_list_return (&mpool_grdma->reg_list,
(opal_free_list_item_t *) reg);
}
return rc;
}
/**
* allocate function
*/
void* mca_mpool_grdma_alloc(mca_mpool_base_module_t *mpool, size_t size,
size_t align, uint32_t flags, mca_mpool_base_registration_t **reg)
{
void *base_addr, *addr;
if(0 == align)
align = mca_mpool_base_page_size;
#if OPAL_CUDA_SUPPORT
/* CUDA cannot handle registering overlapping regions, so make
* sure each region is page sized and page aligned. */
align = mca_mpool_base_page_size;
size = OPAL_ALIGN(size, mca_mpool_base_page_size, size_t);
#endif
#ifdef HAVE_POSIX_MEMALIGN
if((errno = posix_memalign(&base_addr, align, size)) != 0)
return NULL;
addr = base_addr;
#else
base_addr = malloc(size + align);
if(NULL == base_addr)
return NULL;
addr = (void*)OPAL_ALIGN((uintptr_t)base_addr, align, uintptr_t);
#endif
if(OPAL_SUCCESS != mca_mpool_grdma_register(mpool, addr, size, flags,
MCA_MPOOL_ACCESS_ANY, reg)) {
free(base_addr);
return NULL;
}
(*reg)->alloc_base = (unsigned char *) base_addr;
return addr;
}
/* This function must be called with the rcache lock held */
static inline void do_unregistration_gc(struct mca_mpool_base_module_t *mpool)
{
mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t*)mpool;
opal_list_item_t *item;
/* Remove registration from garbage collection list
before deregistering it */
while (NULL !=
(item = opal_list_remove_first(&mpool_grdma->pool->gc_list))) {
dereg_mem((mca_mpool_base_registration_t *) item);
}
}
static inline bool mca_mpool_grdma_evict_lru_local (mca_mpool_grdma_pool_t *pool)
{
mca_mpool_grdma_module_t *mpool_grdma;
mca_mpool_base_registration_t *old_reg;
old_reg = (mca_mpool_base_registration_t *)
opal_list_remove_first (&pool->lru_list);
if (NULL == old_reg) {
return false;
}
mpool_grdma = (mca_mpool_grdma_module_t *) old_reg->mpool;
(void) dereg_mem (old_reg);
mpool_grdma->stat_evicted++;
return true;
}
enum {
MCA_MPOOL_GRDMA_MSG_EMPTY = 0,
MCA_MPOOL_GRDMA_MSG_NEED_DEREG = 1,
MCA_MPOOL_GRDMA_MSG_BUSY = 2,
MCA_MPOOL_GRDMA_MSG_COMPLETE = 3
};
bool mca_mpool_grdma_evict (struct mca_mpool_base_module_t *mpool)
{
return mca_mpool_grdma_evict_lru_local (((mca_mpool_grdma_module_t *) mpool)->pool);
}
/*
* register memory
*/
int mca_mpool_grdma_register (mca_mpool_base_module_t *mpool, void *addr,
size_t size, uint32_t flags, int32_t access_flags,
mca_mpool_base_registration_t **reg)
{
mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t*)mpool;
const bool bypass_cache = !!(flags & MCA_MPOOL_FLAGS_CACHE_BYPASS);
const bool persist = !!(flags & MCA_MPOOL_FLAGS_PERSIST);
mca_mpool_base_registration_t *grdma_reg;
opal_free_list_item_t *item;
unsigned char *base, *bound;
int rc;
OPAL_THREAD_LOCK(&mpool->rcache->lock);
*reg = NULL;
/* if cache bypass is requested don't use the cache */
base = (unsigned char *) down_align_addr(addr, mca_mpool_base_page_size_log);
bound = (unsigned char *) up_align_addr((void*)((char*) addr + size - 1),
mca_mpool_base_page_size_log);
if (!opal_list_is_empty (&mpool_grdma->pool->gc_list))
do_unregistration_gc(mpool);
#if OPAL_CUDA_GDR_SUPPORT
if (flags & MCA_MPOOL_FLAGS_CUDA_GPU_MEM) {
size_t psize;
mca_common_cuda_get_address_range(&base, &psize, addr);
bound = base + psize - 1;
/* Check to see if this memory is in the cache and if it has been freed. If so,
* this call will boot it out of the cache. */
check_for_cuda_freed_memory(mpool, base, psize);
}
#endif /* OPAL_CUDA_GDR_SUPPORT */
/* look through existing regs if not persistent registration requested.
* Persistent registration are always registered and placed in the cache */
if(!(bypass_cache || persist)) {
/* check to see if memory is registered */
mpool->rcache->rcache_find(mpool->rcache, base, bound - base + 1, &grdma_reg);
if (grdma_reg && !(flags & MCA_MPOOL_FLAGS_INVALID)) {
if (OPAL_UNLIKELY((access_flags & grdma_reg->access_flags) != access_flags)) {
access_flags |= grdma_reg->access_flags;
if (0 != grdma_reg->ref_count) {
if (!(grdma_reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS)) {
grdma_reg->mpool->rcache->rcache_delete(grdma_reg->mpool->rcache, grdma_reg);
}
/* mark the registration to go away when it is deregistered */
grdma_reg->flags |= MCA_MPOOL_FLAGS_INVALID | MCA_MPOOL_FLAGS_CACHE_BYPASS;
} else {
if (registration_is_cacheable (grdma_reg)) {
/* pull the item out of the lru */
opal_list_remove_item (&mpool_grdma->pool->lru_list, (opal_list_item_t *) grdma_reg);
}
(void) dereg_mem (grdma_reg);
}
} else {
*reg = grdma_reg;
if (0 == grdma_reg->ref_count) {
/* Leave pinned must be set for this to still be in the rcache. */
opal_list_remove_item(&mpool_grdma->pool->lru_list,
(opal_list_item_t *) grdma_reg);
}
/* This segment fits fully within an existing segment. */
mpool_grdma->stat_cache_hit++;
grdma_reg->ref_count++;
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
return OPAL_SUCCESS;
}
}
mpool_grdma->stat_cache_miss++;
/* Unless explicitly requested by the caller always store the
* registration in the rcache. This will speed up the case where
* no leave pinned protocol is in use but the same segment is in
* use in multiple simultaneous transactions. We used to set bypass_cache
* here is !mca_mpool_grdma_component.leave_pinned. */
}
item = opal_free_list_get (&mpool_grdma->reg_list);
if(NULL == item) {
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
return OPAL_ERR_OUT_OF_RESOURCE;
}
grdma_reg = (mca_mpool_base_registration_t*)item;
grdma_reg->mpool = mpool;
grdma_reg->base = base;
grdma_reg->bound = bound;
grdma_reg->flags = flags;
grdma_reg->access_flags = access_flags;
#if OPAL_CUDA_GDR_SUPPORT
if (flags & MCA_MPOOL_FLAGS_CUDA_GPU_MEM) {
mca_common_cuda_get_buffer_id(grdma_reg);
}
#endif /* OPAL_CUDA_GDR_SUPPORT */
if (false == bypass_cache) {
rc = mpool->rcache->rcache_insert(mpool->rcache, grdma_reg, 0);
if (OPAL_UNLIKELY(rc != OPAL_SUCCESS)) {
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
opal_free_list_return (&mpool_grdma->reg_list, item);
return rc;
}
}
while (OPAL_ERR_OUT_OF_RESOURCE ==
(rc = mpool_grdma->resources.register_mem(mpool_grdma->resources.reg_data,
base, bound - base + 1, grdma_reg))) {
/* try to remove one unused reg and retry */
if (!mca_mpool_grdma_evict (mpool)) {
break;
}
}
if (OPAL_UNLIKELY(rc != OPAL_SUCCESS)) {
if (false == bypass_cache) {
mpool->rcache->rcache_delete(mpool->rcache, grdma_reg);
}
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
opal_free_list_return (&mpool_grdma->reg_list, item);
return rc;
}
*reg = grdma_reg;
(*reg)->ref_count++;
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
/* Cleanup any vmas that we have deferred deletion on */
mpool->rcache->rcache_clean(mpool->rcache);
return OPAL_SUCCESS;
}
/**
* realloc function
*/
void* mca_mpool_grdma_realloc(mca_mpool_base_module_t *mpool, void *addr,
size_t size, mca_mpool_base_registration_t **reg)
{
mca_mpool_base_registration_t *old_reg = *reg;
void *new_mem = mca_mpool_grdma_alloc(mpool, size, 0, old_reg->flags, reg);
memcpy(new_mem, addr, old_reg->bound - old_reg->base + 1);
mca_mpool_grdma_free(mpool, addr, old_reg);
return new_mem;
}
/**
* free function
*/
void mca_mpool_grdma_free(mca_mpool_base_module_t *mpool, void *addr,
mca_mpool_base_registration_t *registration)
{
void *alloc_base = registration->alloc_base;
mca_mpool_grdma_deregister(mpool, registration);
free(alloc_base);
}
int mca_mpool_grdma_find(struct mca_mpool_base_module_t *mpool, void *addr,
size_t size, mca_mpool_base_registration_t **reg)
{
mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t*)mpool;
unsigned char *base, *bound;
int rc;
base = (unsigned char *) down_align_addr(addr, mca_mpool_base_page_size_log);
bound = (unsigned char *) up_align_addr((void*)((char*) addr + size - 1),
mca_mpool_base_page_size_log);
OPAL_THREAD_LOCK(&mpool->rcache->lock);
rc = mpool->rcache->rcache_find(mpool->rcache, base, bound - base + 1, reg);
if(NULL != *reg &&
(mca_mpool_grdma_component.leave_pinned ||
((*reg)->flags & MCA_MPOOL_FLAGS_PERSIST) ||
((*reg)->base == base && (*reg)->bound == bound))) {
assert(((void*)(*reg)->bound) >= addr);
if(0 == (*reg)->ref_count &&
mca_mpool_grdma_component.leave_pinned) {
opal_list_remove_item(&mpool_grdma->pool->lru_list,
(opal_list_item_t*)(*reg));
}
mpool_grdma->stat_cache_found++;
(*reg)->ref_count++;
} else {
mpool_grdma->stat_cache_notfound++;
}
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
return rc;
}
int mca_mpool_grdma_deregister(struct mca_mpool_base_module_t *mpool,
mca_mpool_base_registration_t *reg)
{
mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t *) mpool;
int rc = OPAL_SUCCESS;
assert(reg->ref_count > 0);
OPAL_THREAD_LOCK(&mpool->rcache->lock);
reg->ref_count--;
if(reg->ref_count > 0) {
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
return OPAL_SUCCESS;
}
if (registration_is_cacheable(reg)) {
opal_list_append(&mpool_grdma->pool->lru_list, (opal_list_item_t *) reg);
} else {
rc = dereg_mem (reg);
}
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
/* Cleanup any vmas that we have deferred deletion on */
mpool->rcache->rcache_clean(mpool->rcache);
return rc;
}
#define GRDMA_MPOOL_NREGS 100
int mca_mpool_grdma_release_memory(struct mca_mpool_base_module_t *mpool,
void *base, size_t size)
{
mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t *) mpool;
mca_mpool_base_registration_t *regs[GRDMA_MPOOL_NREGS];
int reg_cnt, i, rc = OPAL_SUCCESS;
OPAL_THREAD_LOCK(&mpool->rcache->lock);
do {
reg_cnt = mpool->rcache->rcache_find_all(mpool->rcache, base, size,
regs, GRDMA_MPOOL_NREGS);
for(i = 0 ; i < reg_cnt ; ++i) {
regs[i]->flags |= MCA_MPOOL_FLAGS_INVALID;
if (regs[i]->ref_count) {
/* memory is being freed, but there are registration in use that
* covers the memory. This can happen even in a correct program,
* but may also be an user error. We can't tell. Mark the
* registration as invalid. It will not be used any more and
* will be unregistered when ref_count will become zero */
rc = OPAL_ERROR; /* tell caller that something was wrong */
} else {
opal_list_remove_item(&mpool_grdma->pool->lru_list,(opal_list_item_t *) regs[i]);
opal_list_append(&mpool_grdma->pool->gc_list, (opal_list_item_t *) regs[i]);
}
}
} while(reg_cnt == GRDMA_MPOOL_NREGS);
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
return rc;
}
/* Make sure this registration request is not stale. In other words, ensure
* that we do not have a cuMemAlloc, cuMemFree, cuMemAlloc state. If we do
* kick out the regisrations and deregister. This function needs to be called
* with the mpool->rcache->lock held. */
#if OPAL_CUDA_GDR_SUPPORT
static int check_for_cuda_freed_memory(mca_mpool_base_module_t *mpool, void *addr, size_t size)
{
mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t *) mpool;
mca_mpool_base_registration_t *regs[GRDMA_MPOOL_NREGS];
int reg_cnt, i, rc = OPAL_SUCCESS;
mca_mpool_base_registration_t *reg;
mpool->rcache->rcache_find(mpool->rcache, addr, size, &reg);
if (NULL == reg) {
return OPAL_SUCCESS;
}
/* If not previously freed memory, just return 0 */
if (!(mca_common_cuda_previously_freed_memory(reg))) {
return OPAL_SUCCESS;
}
/* mpool->rcache->rcache_dump_range(mpool->rcache, 0, (size_t)-1, "Before free"); */
/* This memory has been freed. Find all registrations and delete */
do {
reg_cnt = mpool->rcache->rcache_find_all(mpool->rcache, reg->base, reg->bound - reg->base + 1,
regs, GRDMA_MPOOL_NREGS);
for(i = 0 ; i < reg_cnt ; ++i) {
regs[i]->flags |= MCA_MPOOL_FLAGS_INVALID;
if (regs[i]->ref_count) {
opal_output(0, "Release FAILED: ref_count=%d, base=%p, bound=%p, size=%d",
regs[i]->ref_count, regs[i]->base, regs[i]->bound,
(int) (regs[i]->bound - regs[i]->base + 1));
/* memory is being freed, but there are registration in use that
* covers the memory. This can happen even in a correct program,
* but may also be an user error. We can't tell. Mark the
* registration as invalid. It will not be used any more and
* will be unregistered when ref_count will become zero */
rc = OPAL_ERROR; /* tell caller that something was wrong */
} else {
opal_list_remove_item(&mpool_grdma->pool->lru_list,(opal_list_item_t *) regs[i]);
/* Now deregister. Do not use gc_list as we need to kick this out now. */
dereg_mem(regs[i]);
}
}
} while(reg_cnt == GRDMA_MPOOL_NREGS);
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
/* mpool->rcache->rcache_dump_range(mpool->rcache, 0, (size_t)-1, "After free");*/
return rc;
}
#endif /* OPAL_CUDA_GDR_SUPPORT */
void mca_mpool_grdma_finalize(struct mca_mpool_base_module_t *mpool)
{
mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t*)mpool;
mca_mpool_base_registration_t *regs[GRDMA_MPOOL_NREGS];
int reg_cnt, i;
/* Statistic */
if (true == mca_mpool_grdma_component.print_stats) {
opal_output(0, "%s grdma: stats "
"(hit/miss/found/not found/evicted): %d/%d/%d/%d/%d\n",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
mpool_grdma->stat_cache_hit, mpool_grdma->stat_cache_miss,
mpool_grdma->stat_cache_found, mpool_grdma->stat_cache_notfound,
mpool_grdma->stat_evicted);
}
OPAL_THREAD_LOCK(&mpool->rcache->lock);
do_unregistration_gc(mpool);
do {
reg_cnt = mpool->rcache->rcache_find_all(mpool->rcache, 0, (size_t)-1,
regs, GRDMA_MPOOL_NREGS);
for (i = 0 ; i < reg_cnt ; ++i) {
if (regs[i]->ref_count) {
regs[i]->ref_count = 0; /* otherwise dereg will fail on assert */
} else if (mca_mpool_grdma_component.leave_pinned) {
opal_list_remove_item(&mpool_grdma->pool->lru_list,
(opal_list_item_t *) regs[i]);
}
(void) dereg_mem(regs[i]);
}
} while (reg_cnt == GRDMA_MPOOL_NREGS);
OBJ_RELEASE(mpool_grdma->pool);
OBJ_DESTRUCT(&mpool_grdma->reg_list);
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
/* Cleanup any vmas that we have deferred deletion on */
mpool->rcache->rcache_clean(mpool->rcache);
/* this mpool was allocated by grdma_init in mpool_grdma_component.c */
free(mpool);
}
int mca_mpool_grdma_ft_event(int state) {
return OPAL_SUCCESS;
}

52
opal/mca/mpool/hugepage/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,52 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
# reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
AM_CPPFLAGS = $(mpool_hugepage_CPPFLAGS)
sources = mpool_hugepage_module.c mpool_hugepage_component.c
if WANT_INSTALL_HEADERS
opaldir = $(opalincludedir)/$(subdir)
opal_HEADERS = mpool_hugepage.h
endif
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_opal_mpool_hugepage_DSO
component_noinst =
component_install = mca_mpool_hugepage.la
else
component_noinst = libmca_mpool_hugepage.la
component_install =
endif
mcacomponentdir = $(opallibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_mpool_hugepage_la_SOURCES = $(sources)
mca_mpool_hugepage_la_LDFLAGS = -module -avoid-version
mca_mpool_hugepage_la_LIBADD = $(mpool_hugepage_LIBS)
noinst_LTLIBRARIES = $(component_noinst)
libmca_mpool_hugepage_la_SOURCES = $(sources)
libmca_mpool_hugepage_la_LDFLAGS = -module -avoid-version
libmca_mpool_hugepage_la_LIBADD = $(mpool_hugepage_LIBS)

90
opal/mca/mpool/hugepage/mpool_hugepage.h Обычный файл
Просмотреть файл

@ -0,0 +1,90 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2013 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Voltaire. All rights reserved.
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_MPOOL_HUGEPAGE_H
#define MCA_MPOOL_HUGEPAGE_H
#include "opal_config.h"
#include "opal/class/opal_list.h"
#include "opal/class/opal_free_list.h"
#include "opal/mca/event/event.h"
#include "opal/mca/mpool/mpool.h"
#include "opal/util/proc.h"
#include "opal/mca/allocator/allocator.h"
#include "opal/util/sys_limits.h"
BEGIN_C_DECLS
struct mca_mpool_hugepage_module_t;
typedef struct mca_mpool_hugepage_module_t mca_mpool_hugepage_module_t;
struct mca_mpool_hugepage_component_t {
mca_mpool_base_component_t super;
bool print_stats;
opal_list_t huge_pages;
mca_mpool_hugepage_module_t *modules;
int module_count;
unsigned long bytes_allocated;
};
typedef struct mca_mpool_hugepage_component_t mca_mpool_hugepage_component_t;
OPAL_DECLSPEC extern mca_mpool_hugepage_component_t mca_mpool_hugepage_component;
struct mca_mpool_hugepage_module_t;
struct mca_mpool_hugepage_hugepage_t {
/** opal list item superclass */
opal_list_item_t super;
/** page size in bytes */
unsigned long page_size;
/** path for mmapped files */
char *path;
/** counter to help ensure unique file names for mmaped files */
volatile int32_t count;
/** some platforms allow allocation of hugepages through mmap flags */
int mmap_flags;
};
typedef struct mca_mpool_hugepage_hugepage_t mca_mpool_hugepage_hugepage_t;
OBJ_CLASS_DECLARATION(mca_mpool_hugepage_hugepage_t);
struct mca_mpool_hugepage_module_t {
mca_mpool_base_module_t super;
mca_mpool_hugepage_hugepage_t *huge_page;
mca_allocator_base_module_t *allocator;
opal_mutex_t lock;
opal_rb_tree_t allocation_tree;
};
/*
* Initializes the mpool module.
*/
int mca_mpool_hugepage_module_init (mca_mpool_hugepage_module_t *mpool,
mca_mpool_hugepage_hugepage_t *huge_page);
void *mca_mpool_hugepage_seg_alloc (void *ctx, size_t *sizep);
void mca_mpool_hugepage_seg_free (void *ctx, void *addr);
END_C_DECLS
#endif

Просмотреть файл

@ -0,0 +1,366 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2013 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Voltaire. All rights reserved.
* Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#define OPAL_DISABLE_ENABLE_MEM_DEBUG 1
#include "opal_config.h"
#include "opal/mca/base/base.h"
#include "opal/runtime/opal_params.h"
#include "opal/mca/base/mca_base_pvar.h"
#include "opal/mca/mpool/base/base.h"
#include "opal/mca/allocator/base/base.h"
#include "mpool_hugepage.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif
#ifdef HAVE_SYS_VFS_H
#include <sys/vfs.h>
#endif
#ifdef HAVE_SYS_MOUNT_H
#include <sys/mount.h>
#endif
#ifdef HAVE_SYS_PARAM_H
#include <sys/param.h>
#endif
#ifdef HAVE_SYS_MMAN_H
#include <sys/mman.h>
#endif
#include <fcntl.h>
/*
* Local functions
*/
static int mca_mpool_hugepage_open (void);
static int mca_mpool_hugepage_close (void);
static int mca_mpool_hugepage_register (void);
static int mca_mpool_hugepage_query (const char *hints, int *priority,
mca_mpool_base_module_t **module);
static void mca_mpool_hugepage_find_hugepages (void);
static int mca_mpool_hugepage_priority;
static unsigned long mca_mpool_hugepage_page_size;
mca_mpool_hugepage_component_t mca_mpool_hugepage_component = {
{
/* First, the mca_base_component_t struct containing meta
information about the component itself */
.mpool_version ={
MCA_MPOOL_BASE_VERSION_3_0_0,
.mca_component_name = "hugepage",
MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION,
OPAL_RELEASE_VERSION),
.mca_open_component = mca_mpool_hugepage_open,
.mca_close_component = mca_mpool_hugepage_close,
.mca_register_component_params = mca_mpool_hugepage_register,
},
.mpool_data = {
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
.mpool_query = mca_mpool_hugepage_query,
},
};
/**
* component open/close/init function
*/
static int mca_mpool_hugepage_register(void)
{
mca_mpool_hugepage_priority = 50;
(void) mca_base_component_var_register (&mca_mpool_hugepage_component.super.mpool_version,
"priority", "Default priority of the hugepage mpool component "
"(default: 50)", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
&mca_mpool_hugepage_priority);
mca_mpool_hugepage_page_size = 1 << 21;
(void) mca_base_component_var_register (&mca_mpool_hugepage_component.super.mpool_version,
"page_size", "Default huge page size of the hugepage mpool component "
"(default: 2M)", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
&mca_mpool_hugepage_page_size);
mca_mpool_hugepage_component.bytes_allocated = 0;
(void) mca_base_component_pvar_register (&mca_mpool_hugepage_component.super.mpool_version,
"bytes_allocated", "Number of bytes currently allocated in the mpool "
"hugepage component", OPAL_INFO_LVL_3, MCA_BASE_PVAR_CLASS_SIZE,
MCA_BASE_VAR_TYPE_UNSIGNED_LONG, NULL, MCA_BASE_VAR_BIND_NO_OBJECT,
MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS,
NULL, NULL, NULL, &mca_mpool_hugepage_component.bytes_allocated);
return OPAL_SUCCESS;
}
static int mca_mpool_hugepage_open (void)
{
mca_mpool_hugepage_module_t *hugepage_module;
mca_mpool_hugepage_hugepage_t *hp;
int module_index, rc;
OBJ_CONSTRUCT(&mca_mpool_hugepage_component.huge_pages, opal_list_t);
mca_mpool_hugepage_find_hugepages ();
if (0 == opal_list_get_size (&mca_mpool_hugepage_component.huge_pages)) {
return OPAL_SUCCESS;
}
mca_mpool_hugepage_component.modules = (mca_mpool_hugepage_module_t *)
calloc (opal_list_get_size (&mca_mpool_hugepage_component.huge_pages),
sizeof (mca_mpool_hugepage_module_t));
if (NULL == mca_mpool_hugepage_component.modules) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
module_index = 0;
OPAL_LIST_FOREACH(hp, &mca_mpool_hugepage_component.huge_pages, mca_mpool_hugepage_hugepage_t) {
hugepage_module = mca_mpool_hugepage_component.modules + module_index;
rc = mca_mpool_hugepage_module_init (hugepage_module, hp);
if (OPAL_SUCCESS != rc) {
continue;
}
module_index++;
}
mca_mpool_hugepage_component.module_count = module_index;
return OPAL_SUCCESS;
}
static int mca_mpool_hugepage_close (void)
{
OPAL_LIST_DESTRUCT(&mca_mpool_hugepage_component.huge_pages);
for (int i = 0 ; i < mca_mpool_hugepage_component.module_count ; ++i) {
mca_mpool_hugepage_module_t *module = mca_mpool_hugepage_component.modules + i;
module->super.mpool_finalize (&module->super);
}
free (mca_mpool_hugepage_component.modules);
mca_mpool_hugepage_component.modules = NULL;
return OPAL_SUCCESS;
}
static int page_compare (opal_list_item_t **a, opal_list_item_t **b) {
mca_mpool_hugepage_hugepage_t *pagea = (mca_mpool_hugepage_hugepage_t *) *a;
mca_mpool_hugepage_hugepage_t *pageb = (mca_mpool_hugepage_hugepage_t *) *b;
if (pagea->page_size > pageb->page_size) {
return 1;
} else if (pagea->page_size < pageb->page_size) {
return -1;
}
return 0;
}
static void mca_mpool_hugepage_find_hugepages (void) {
mca_mpool_hugepage_hugepage_t *hp;
FILE *fh;
char *path;
char buffer[1024];
char *ctx, *tok;
fh = fopen ("/proc/mounts", "r");
if (NULL == fh) {
return;
}
while (fgets (buffer, 1024, fh)) {
unsigned long page_size = 0;
(void) strtok_r (buffer, " ", &ctx);
path = strtok_r (NULL, " ", &ctx);
tok = strtok_r (NULL, " ", &ctx);
if (0 != strcmp (tok, "hugetlbfs")) {
continue;
}
tok = strtok_r (NULL, " ", &ctx);
tok = strtok_r (tok, ",", &ctx);
do {
if (0 == strncmp (tok, "pagesize", 8)) {
break;
}
tok = strtok_r (NULL, ",", &ctx);
} while (tok);
if (!tok) {
#if HAVE_STATFS
struct statfs info;
statfs (path, &info);
page_size = info.f_bsize;
#endif
} else {
sscanf (tok, "pagesize=%lu", &page_size);
}
if (0 == page_size) {
/* could not get page size */
continue;
}
hp = OBJ_NEW(mca_mpool_hugepage_hugepage_t);
if (NULL == hp) {
break;
}
hp->path = strdup (path);
hp->page_size = page_size;
OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output,
"found huge page with size = %lu, path = %s, mmap flags = 0x%x",
hp->page_size, hp->path, hp->mmap_flags));
opal_list_append (&mca_mpool_hugepage_component.huge_pages, &hp->super);
}
opal_list_sort (&mca_mpool_hugepage_component.huge_pages, page_compare);
fclose (fh);
}
static int mca_mpool_hugepage_query (const char *hints, int *priority_out,
mca_mpool_base_module_t **module)
{
unsigned long page_size = 0;
char **hints_array;
int my_priority = mca_mpool_hugepage_priority;
int modifier;
char *tmp;
bool found = false;
if (0 == mca_mpool_hugepage_component.module_count) {
return OPAL_ERR_NOT_AVAILABLE;
}
if (hints) {
hints_array = opal_argv_split (hints, ',');
if (NULL == hints_array) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
for (int i = 0 ; hints_array[i] ; ++i) {
char *key = hints_array[i];
char *value = NULL;
if (NULL != (tmp = strchr (key, '='))) {
value = tmp + 1;
*tmp = '\0';
}
if (0 == strcasecmp ("mpool", key)) {
if (value && 0 == strcasecmp ("hugepage", value)) {
/* this mpool was requested by name */
my_priority = 100;
opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output,
"hugepage mpool matches hint: %s=%s", key, value);
} else {
/* different mpool requested */
my_priority = 0;
opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output,
"hugepage mpool does not match hint: %s=%s", key, value);
return OPAL_ERR_NOT_FOUND;
}
}
if (0 == strcasecmp ("page_size", key)) {
page_size = strtoul (value, &tmp, 0);
if (*tmp) {
switch (*tmp) {
case 'g':
case 'G':
page_size *= 1024;
case 'm':
case 'M':
page_size *= 1024;
case 'k':
case 'K':
page_size *= 1024;
break;
default:
page_size = -1;
}
}
opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output,
"hugepage mpool requested page size: %lu", page_size);
}
}
opal_argv_free (hints_array);
}
if (0 == page_size) {
/* use default huge page size */
page_size = mca_mpool_hugepage_page_size;
if (my_priority < 100) {
/* take a priority hit if this mpool was not asked for by name */
my_priority = 0;
}
opal_output_verbose (MCA_BASE_VERBOSE_WARN, opal_mpool_base_framework.framework_output,
"hugepage mpool did not match any hints: %s", hints);
}
for (int i = 0 ; i < mca_mpool_hugepage_component.module_count ; ++i) {
mca_mpool_hugepage_module_t *hugepage_module = mca_mpool_hugepage_component.modules + i;
if (hugepage_module->huge_page->page_size != page_size) {
continue;
}
my_priority = (my_priority < 80) ? my_priority + 20 : 100;
if (module) {
*module = &hugepage_module->super;
}
opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output,
"matches page size hint. page size: %lu, path: %s, mmap flags: "
"0x%x", page_size, hugepage_module->huge_page->path,
hugepage_module->huge_page->mmap_flags);
found = true;
break;
}
if (!found) {
opal_output_verbose (MCA_BASE_VERBOSE_WARN, opal_mpool_base_framework.framework_output,
"could not find page matching page request: %lu", page_size);
return OPAL_ERR_NOT_FOUND;
}
if (priority_out) {
*priority_out = my_priority;
}
return OPAL_SUCCESS;
}

Просмотреть файл

@ -0,0 +1,255 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2013 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006 Voltaire. All rights reserved.
* Copyright (c) 2007 Mellanox Technologies. All rights reserved.
* Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#define OPAL_DISABLE_ENABLE_MEM_DEBUG 1
#include "opal_config.h"
#include "opal/align.h"
#include "mpool_hugepage.h"
#include <errno.h>
#include <string.h>
#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif
#include "opal/mca/mpool/base/base.h"
#include "opal/runtime/opal_params.h"
#include "opal/include/opal_stdint.h"
#include "opal/mca/allocator/base/base.h"
#include <fcntl.h>
#include <sys/mman.h>
static void *mca_mpool_hugepage_alloc (mca_mpool_base_module_t *mpool, size_t size, size_t align,
uint32_t flags);
static void *mca_mpool_hugepage_realloc (mca_mpool_base_module_t *mpool, void *addr, size_t size);
static void mca_mpool_hugepage_free (mca_mpool_base_module_t *mpool, void *addr);
static void mca_mpool_hugepage_finalize (mca_mpool_base_module_t *mpool);
static int mca_mpool_hugepage_ft_event (int state);
static void mca_mpool_hugepage_hugepage_constructor (mca_mpool_hugepage_hugepage_t *huge_page)
{
memset ((char *)huge_page + sizeof(huge_page->super), 0, sizeof (*huge_page) - sizeof (huge_page->super));
}
static void mca_mpool_hugepage_hugepage_destructor (mca_mpool_hugepage_hugepage_t *huge_page)
{
free (huge_page->path);
}
OBJ_CLASS_INSTANCE(mca_mpool_hugepage_hugepage_t, opal_list_item_t,
mca_mpool_hugepage_hugepage_constructor,
mca_mpool_hugepage_hugepage_destructor);
static int mca_mpool_rb_hugepage_compare (void *key1, void *key2)
{
if (key1 == key2) {
return 0;
}
return (key1 < key2) ? -1 : 1;
}
/*
* Initializes the mpool module.
*/
int mca_mpool_hugepage_module_init(mca_mpool_hugepage_module_t *mpool,
mca_mpool_hugepage_hugepage_t *huge_page)
{
mca_allocator_base_component_t *allocator_component;
int rc;
mpool->super.mpool_component = &mca_mpool_hugepage_component.super;
mpool->super.mpool_base = NULL; /* no base .. */
mpool->super.mpool_alloc = mca_mpool_hugepage_alloc;
mpool->super.mpool_realloc = mca_mpool_hugepage_realloc;
mpool->super.mpool_free = mca_mpool_hugepage_free;
mpool->super.mpool_finalize = mca_mpool_hugepage_finalize;
mpool->super.mpool_ft_event = mca_mpool_hugepage_ft_event;
mpool->super.flags = MCA_MPOOL_FLAGS_MPI_ALLOC_MEM;
OBJ_CONSTRUCT(&mpool->lock, opal_mutex_t);
mpool->huge_page = huge_page;
/* use an allocator component to reduce waste when making small allocations */
allocator_component = mca_allocator_component_lookup ("bucket");
if (NULL == allocator_component) {
return OPAL_ERR_NOT_AVAILABLE;
}
mpool->allocator = allocator_component->allocator_init (true, mca_mpool_hugepage_seg_alloc,
mca_mpool_hugepage_seg_free, mpool);
OBJ_CONSTRUCT(&mpool->allocation_tree, opal_rb_tree_t);
rc = opal_rb_tree_init (&mpool->allocation_tree, mca_mpool_rb_hugepage_compare);
if (OPAL_SUCCESS != rc) {
OBJ_DESTRUCT(&mpool->allocation_tree);
return OPAL_ERR_NOT_AVAILABLE;
}
return OPAL_SUCCESS;
}
void *mca_mpool_hugepage_seg_alloc (void *ctx, size_t *sizep)
{
mca_mpool_hugepage_module_t *hugepage_module = (mca_mpool_hugepage_module_t *) ctx;
mca_mpool_hugepage_hugepage_t *huge_page = hugepage_module->huge_page;
size_t size = *sizep;
void *base = NULL;
char *path = NULL;
int flags = MAP_PRIVATE;
int fd = -1;
int rc;
size = OPAL_ALIGN(size, huge_page->page_size, size_t);
if (huge_page->path) {
int32_t count;
count = opal_atomic_add_32 (&huge_page->count, 1);
rc = asprintf (&path, "%s/hugepage.openmpi.%d.%d", huge_page->path,
getpid (), count);
if (0 > rc) {
return NULL;
}
fd = open (path, O_RDWR | O_CREAT, 0600);
if (-1 == fd) {
free (path);
return NULL;
}
if (0 != ftruncate (fd, size)) {
close (fd);
unlink (path);
free (path);
return NULL;
}
} else {
#if defined(MAP_ANONYMOUS)
flags |= MAP_ANONYMOUS;
#elif defined(MAP_ANON)
/* older versions of OS X do not define MAP_ANONYMOUS (10.9.x and older) */
flags |= MAP_ANON;
#endif
}
base = mmap (NULL, size, PROT_READ | PROT_WRITE, flags | huge_page->mmap_flags, fd, 0);
if (path) {
close (fd);
unlink (path);
free (path);
}
if (MAP_FAILED == base) {
opal_output_verbose (MCA_BASE_VERBOSE_WARN, opal_mpool_base_framework.framework_verbose,
"could not allocate huge page(s). falling back on standard pages");
/* fall back on regular pages */
base = mmap (NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0);
}
if (MAP_FAILED == base) {
return NULL;
}
opal_mutex_lock (&hugepage_module->lock);
opal_rb_tree_insert (&hugepage_module->allocation_tree, base, (void *) (intptr_t) size);
opal_atomic_add (&mca_mpool_hugepage_component.bytes_allocated, (int64_t) size);
opal_mutex_unlock (&hugepage_module->lock);
OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_mpool_base_framework.framework_verbose,
"allocated segment %p of size %lu bytes", base, size));
*sizep = size;
return base;
}
void mca_mpool_hugepage_seg_free (void *ctx, void *addr)
{
mca_mpool_hugepage_module_t *hugepage_module = (mca_mpool_hugepage_module_t *) ctx;
size_t size;
opal_mutex_lock (&hugepage_module->lock);
size = (size_t) (intptr_t) opal_rb_tree_find (&hugepage_module->allocation_tree, addr);
if (size > 0) {
opal_rb_tree_delete (&hugepage_module->allocation_tree, addr);
OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_mpool_base_framework.framework_verbose,
"freeing segment %p of size %lu bytes", addr, size));
munmap (addr, size);
opal_atomic_add (&mca_mpool_hugepage_component.bytes_allocated, -(int64_t) size);
}
opal_mutex_unlock (&hugepage_module->lock);
}
/**
* allocate function
*/
static void *mca_mpool_hugepage_alloc (mca_mpool_base_module_t *mpool, size_t size,
size_t align, uint32_t flags)
{
mca_mpool_hugepage_module_t *hugepage_module = (mca_mpool_hugepage_module_t *) mpool;
return hugepage_module->allocator->alc_alloc (hugepage_module->allocator, size, align);
}
/**
* allocate function
*/
static void *mca_mpool_hugepage_realloc (mca_mpool_base_module_t *mpool, void *addr, size_t size)
{
mca_mpool_hugepage_module_t *hugepage_module = (mca_mpool_hugepage_module_t *) mpool;
return hugepage_module->allocator->alc_realloc (hugepage_module->allocator, addr, size);
}
/**
* free function
*/
static void mca_mpool_hugepage_free (mca_mpool_base_module_t *mpool, void *addr)
{
mca_mpool_hugepage_module_t *hugepage_module = (mca_mpool_hugepage_module_t *) mpool;
hugepage_module->allocator->alc_free (hugepage_module->allocator, addr);
}
static void mca_mpool_hugepage_finalize (struct mca_mpool_base_module_t *mpool)
{
mca_mpool_hugepage_module_t *hugepage_module = (mca_mpool_hugepage_module_t *) mpool;
OBJ_DESTRUCT(&hugepage_module->lock);
if (hugepage_module->allocator) {
(void) hugepage_module->allocator->alc_finalize (hugepage_module->allocator);
hugepage_module->allocator = NULL;
}
}
static int mca_mpool_hugepage_ft_event (int state) {
return OPAL_SUCCESS;
}

41
opal/mca/mpool/memkind/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,41 @@
# -*- indent-tabs-mode:nil -*-
#
# Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
# reserved.
#
# Additional copyrights may follow
#
# $HEADER$
#
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
AM_CPPFLAGS = $(mpool_memkind_CPPFLAGS)
if MCA_BUILD_opal_mpool_memkind_DSO
component_noinst =
component_install = mca_mpool_memkind.la
else
component_noinst = libmca_mpool_memkind.la
component_install =
endif
memkind_SOURCES = \
mpool_memkind_component.c \
mpool_memkind_module.c \
mpool_memkind.h
mcacomponentdir = $(opallibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_mpool_memkind_la_SOURCES = $(memkind_SOURCES)
nodist_mca_mpool_memkind_la_SOURCES = $(memkind_nodist_SOURCES)
mca_mpool_memkind_la_LIBADD = $(mpool_memkind_LIBS)
mca_mpool_memkind_la_LDFLAGS = -module -avoid-version $(mpool_memkind_LDFLAGS)
noinst_LTLIBRARIES = $(component_noinst)
libmca_mpool_memkind_la_SOURCES = $(memkind_SOURCES)
nodist_libmca_mpool_memkind_la_SOURCES = $(memkind_nodist_SOURCES)
libmca_mpool_memkind_la_LIBADD = $(mpool_memkind_LIBS)
libmca_mpool_memkind_la_LDFLAGS = -module -avoid-version $(mpool_memkind_LDFLAGS)

42
opal/mca/mpool/memkind/configure.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,42 @@
# -*- shell-script -*-
#
# Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
# reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
AC_DEFUN([MCA_opal_mpool_memkind_CONFIG],[
OPAL_VAR_SCOPE_PUSH([opal_mpool_memkind_happy])
AC_CONFIG_FILES([opal/mca/mpool/memkind/Makefile])
AC_ARG_WITH([memkind], [AC_HELP_STRING([--with-memkind(=DIR)]),
[Build with MEMKIND, searching for headers in DIR])])
OPAL_CHECK_WITHDIR([memkind], [$with_memkind], [include/memkind.h])
opal_mpool_memkind_happy="no"
if test "$with_memkind" != "no" ; then
if test -n "$with_memkind" -a "$with_memkind" != "yes" ; then
opal_check_memkind_dir=$with_memkind
fi
OPAL_CHECK_PACKAGE([mpool_memkind], [memkind.h], [memkind], [memkind_malloc], [ -lnuma],
[$opal_check_memkind_dir], [], [opal_mpool_memkind_happy="yes"], [])
if test "$opal_mpool_memkind_happy" != "yes" -a -n "$with_memkind" ; then
AC_MSG_ERROR([MEMKIND support requested but not found. Aborting])
fi
fi
AS_IF([test "$opal_mpool_memkind_happy" = "yes"], [$1], [$2])
# substitute in the things needed to build memkind
AC_SUBST([mpool_memkind_CPPFLAGS])
AC_SUBST([mpool_memkind_LDFLAGS])
AC_SUBST([mpool_memkind_LIBS])
OPAL_VAR_SCOPE_POP
])dnl

94
opal/mca/mpool/memkind/mpool_memkind.h Обычный файл
Просмотреть файл

@ -0,0 +1,94 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010-2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_MPOOL_MEMKIND_H
#define MCA_MPOOL_MEMKIND_H
#include "opal_config.h"
#include "opal/mca/event/event.h"
#include "opal/mca/mpool/mpool.h"
#include "opal/mca/allocator/allocator.h"
#include <memkind.h>
BEGIN_C_DECLS
static const int mca_mpool_memkind_default_pagesize = 4096;
struct mca_mpool_memkind_module_t {
mca_mpool_base_module_t super;
memkind_t kind;
int page_size;
};
typedef struct mca_mpool_memkind_module_t mca_mpool_memkind_module_t;
struct mca_mpool_memkind_component_t {
mca_mpool_base_component_t super;
int hbw;
int pagesize;
int bind;
int default_partition;
int priority;
char *memkind_file;
int output;
mca_mpool_memkind_module_t modules[MEMKIND_NUM_BASE_KIND];
};
typedef struct mca_mpool_memkind_component_t mca_mpool_memkind_component_t;
OPAL_MODULE_DECLSPEC extern mca_mpool_memkind_component_t mca_mpool_memkind_component;
/*
* Initializes the mpool module.
*/
void mca_mpool_memkind_module_init(mca_mpool_memkind_module_t *mpool, int partition);
/**
* Allocate block of high bandwidth memory.
*/
void* mca_mpool_memkind_alloc(
mca_mpool_base_module_t* mpool,
size_t size,
size_t align,
uint32_t flags);
/**
* realloc function typedef
*/
void* mca_mpool_memkind_realloc(
mca_mpool_base_module_t* mpool,
void* addr,
size_t size);
/**
* free function typedef
*/
void mca_mpool_memkind_free(
mca_mpool_base_module_t* mpool,
void * addr);
END_C_DECLS
#endif

Просмотреть файл

@ -0,0 +1,266 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 NVIDIA Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H*/
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif /* HAVE_STDLIB_H */
#include <errno.h>
#include <memkind.h>
#include "opal/mca/base/base.h"
#include "opal/mca/allocator/base/base.h"
#include "mpool_memkind.h"
/*
* Local functions
*/
static int
mca_mpool_memkind_register(void);
static int
mca_mpool_memkind_open(void);
static int
mca_mpool_memkind_close(void);
static int mca_mpool_memkind_query (const char *hints, int *priority,
mca_mpool_base_module_t **module);
mca_mpool_memkind_component_t mca_mpool_memkind_component = {
{
/* First, the mca_base_component_t struct containing meta
information about the component itself */
.mpool_version = {
MCA_MPOOL_BASE_VERSION_3_0_0,
"memkind", /* MCA component name */
MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION,
OPAL_RELEASE_VERSION),
.mca_open_component = mca_mpool_memkind_open,
.mca_close_component = mca_mpool_memkind_close,
.mca_register_component_params = mca_mpool_memkind_register
},
.mpool_data = {
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
.mpool_query = mca_mpool_memkind_query,
}
};
static mca_base_var_enum_value_t memory_kinds[] = {
{.value = MEMKIND_PARTITION_DEFAULT, .string = "memkind_default"},
{.value = MEMKIND_PARTITION_HBW, .string = "memkind_hbw"},
{.value = MEMKIND_PARTITION_HBW_HUGETLB, .string = "memkind_hwb_hugetlb"},
{.value = MEMKIND_PARTITION_HBW_PREFERRED, .string = "memkind_hbw_preferred"},
{.value = MEMKIND_PARTITION_HBW_PREFERRED_HUGETLB, .string = "memkind_hbw_preferred_hugetlb"},
{.value = MEMKIND_PARTITION_HUGETLB, .string = "memkind_hugetlb"},
{.value = MEMKIND_PARTITION_HBW_GBTLB, .string = "memkind_hbw_gbtlb"},
{.value = MEMKIND_PARTITION_HBW_PREFERRED_GBTLB, .string = "memkind_hbw_preferred_gbtlb"},
{.value = MEMKIND_PARTITION_GBTLB, .string = "memkind_gbtlb"},
{.value = MEMKIND_PARTITION_HBW_INTERLEAVE, .string = "memkind_hbw_interleave"},
{.value = MEMKIND_PARTITION_INTERLEAVE, .string = "memkind_interleave"},
{.string = NULL},
};
static mca_base_var_enum_t *mca_mpool_memkind_enum = NULL;
static int opal_mpool_memkind_verbose;
static int mca_mpool_memkind_register(void)
{
int rc;
/* register MEMKIND component parameters */
mca_mpool_memkind_component.default_partition = memory_kinds[0].value;
rc = mca_base_var_enum_create ("memkind partition types", memory_kinds, &mca_mpool_memkind_enum);
if (OPAL_SUCCESS != rc) {
return rc;
}
(void) mca_base_component_var_register(&mca_mpool_memkind_component.super.mpool_version,
"default_partition", "Default memkind partition to use",
MCA_BASE_VAR_TYPE_INT, mca_mpool_memkind_enum, 0, 0,
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
&mca_mpool_memkind_component.default_partition);
mca_mpool_memkind_component.priority = 10;
(void) mca_base_component_var_register(&mca_mpool_memkind_component.super.mpool_version,
"priority", "Default priority of the memkind component",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
&mca_mpool_memkind_component.priority);
opal_mpool_memkind_verbose = 0;
(void) mca_base_component_var_register(&mca_mpool_memkind_component.super.mpool_version,
"verbose", "Verbosity of the memkind mpool component",
MCA_BASE_VAR_TYPE_INT, &mca_base_var_enum_verbose, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
&opal_mpool_memkind_verbose);
return OPAL_SUCCESS;
}
/**
* component open/close/init function
*/
static int mca_mpool_memkind_open (void)
{
memkind_t default_kind;
int rc;
if (opal_mpool_memkind_verbose != 0) {
mca_mpool_memkind_component.output = opal_output_open(NULL);
} else {
mca_mpool_memkind_component.output = -1;
}
rc = memkind_get_kind_by_partition (mca_mpool_memkind_component.default_partition,
&default_kind);
if (0 != rc) {
return OPAL_ERR_NOT_AVAILABLE;
}
if (memkind_check_available (default_kind)) {
const char *kind_string;
mca_mpool_memkind_enum->string_from_value (mca_mpool_memkind_enum,
mca_mpool_memkind_component.default_partition,
&kind_string);
opal_output_verbose (MCA_BASE_VERBOSE_WARN, mca_mpool_memkind_component.output,
"default kind %s not available", kind_string);
return OPAL_ERR_NOT_AVAILABLE;
}
for (int i = 0 ; i < MEMKIND_NUM_BASE_KIND ; ++i) {
mca_mpool_memkind_module_init (mca_mpool_memkind_component.modules + i, i);
}
return OPAL_SUCCESS;
}
static int mca_mpool_memkind_close(void)
{
opal_output_close (mca_mpool_memkind_component.output);
mca_mpool_memkind_component.output = -1;
if (mca_mpool_memkind_enum) {
OBJ_RELEASE(mca_mpool_memkind_enum);
mca_mpool_memkind_enum = NULL;
}
return OPAL_SUCCESS;
}
static int mca_mpool_memkind_query (const char *hints, int *priority_out,
mca_mpool_base_module_t **module)
{
int my_priority = mca_mpool_memkind_component.priority;
char **hint_array, *partition_name;
int partition = -1, rc;
if (module) {
*module = &mca_mpool_memkind_component.modules[mca_mpool_memkind_component.default_partition].super;
}
if (NULL == hints) {
if (priority_out) {
*priority_out = my_priority;
}
return OPAL_SUCCESS;
}
hint_array = opal_argv_split (hints, ',');
if (NULL == hint_array) {
if (priority_out) {
*priority_out = my_priority;
}
return OPAL_SUCCESS;
}
for (int i = 0 ; hint_array[i] ; ++i) {
char *tmp, *key, *value;
key = hint_array[i];
tmp = strchr (key, '=');
if (tmp) {
*tmp = '\0';
value = tmp + 1;
}
if (0 == strcasecmp (key, "mpool")) {
if (0 == strcasecmp (value, "memkind")) {
/* specifically selected */
my_priority = 100;
} else {
if (priority_out) {
*priority_out = 0;
}
return OPAL_SUCCESS;
}
} else if (0 == strcasecmp (key, "partition")) {
rc = mca_mpool_memkind_enum->value_from_string (mca_mpool_memkind_enum,
value, &partition);
if (OPAL_SUCCESS != rc) {
opal_output_verbose (MCA_BASE_VERBOSE_WARN, mca_mpool_memkind_component.output,
"invalid partition %s specified", value);
}
partition_name = value;
}
}
if (-1 != partition) {
memkind_t kind;
my_priority = 0;
if (!memkind_get_kind_by_partition (partition, &kind)) {
if (memkind_check_available (kind)) {
opal_output_verbose (MCA_BASE_VERBOSE_WARN, mca_mpool_memkind_component.output,
"kind %s not available", partition_name);
} else {
my_priority = 100;
}
}
if (module) {
*module = &mca_mpool_memkind_component.modules[partition].super;
}
}
opal_argv_free (hint_array);
if (priority_out) {
*priority_out = my_priority;
}
return OPAL_SUCCESS;
}

Просмотреть файл

@ -0,0 +1,83 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2011 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011-2014 NVIDIA Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include <string.h>
#include "opal/mca/mpool/memkind/mpool_memkind.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include "opal/mca/mpool/base/base.h"
size_t partition_page_sizes[MEMKIND_NUM_BASE_KIND] = {
4096, 4069, 2097152, 4096, 2097152, 2097152,
1073741824, 1073741824, 1073741824, 4096, 4096,
};
/*
* Initializes the mpool module.
*/
void mca_mpool_memkind_module_init(mca_mpool_memkind_module_t *mpool, int partition)
{
mpool->super.mpool_component = &mca_mpool_memkind_component.super;
mpool->super.mpool_alloc = mca_mpool_memkind_alloc;
mpool->super.mpool_realloc = mca_mpool_memkind_realloc;
mpool->super.mpool_free = mca_mpool_memkind_free;
mpool->super.flags = MCA_MPOOL_FLAGS_MPI_ALLOC_MEM;
memkind_get_kind_by_partition (partition, &mpool->kind);
mpool->page_size = partition_page_sizes[partition];
}
void* mca_mpool_memkind_alloc(
mca_mpool_base_module_t* mpool,
size_t size,
size_t align,
uint32_t flags)
{
mca_mpool_memkind_module_t *memkind_module = (mca_mpool_memkind_module_t *) mpool;
void *addr;
if (0 == align) {
align = memkind_module->page_size;
}
if ((errno = memkind_posix_memalign(memkind_module->kind, &addr, align, size))!= 0){
return NULL;
}
return addr;
}
void* mca_mpool_memkind_realloc(mca_mpool_base_module_t *mpool, void *addr,
size_t size)
{
mca_mpool_memkind_module_t *memkind_module = (mca_mpool_memkind_module_t *) mpool;
return memkind_realloc (memkind_module->kind, addr, size);
}
void mca_mpool_memkind_free(mca_mpool_base_module_t *mpool, void *addr)
{
mca_mpool_memkind_module_t *memkind_module = (mca_mpool_memkind_module_t *) mpool;
memkind_free(memkind_module->kind, addr);
}

Просмотреть файл

@ -29,138 +29,70 @@
#include "opal_config.h"
#include "opal/mca/mca.h"
#include "opal/class/opal_free_list.h"
#include "opal/mca/rcache/base/rcache_base_vma.h"
#include "opal/mca/crs/crs.h"
#include "opal/mca/crs/base/base.h"
#define MCA_MPOOL_FLAGS_CACHE_BYPASS 0x01
#define MCA_MPOOL_FLAGS_PERSIST 0x02
#define MCA_MPOOL_FLAGS_MPI_ALLOC_MEM 0x04
#define MCA_MPOOL_FLAGS_INVALID 0x08
#define MCA_MPOOL_FLAGS_SO_MEM 0x10
#define MCA_MPOOL_FLAGS_CUDA_REGISTER_MEM 0x20
#define MCA_MPOOL_ALLOC_FLAG_DEFAULT 0x00
#define MCA_MPOOL_ALLOC_FLAG_USER 0x01
#define MCA_MPOOL_FLAGS_MPI_ALLOC_MEM 0x80
struct opal_info_t;
#define MCA_MPOOL_FLAGS_CUDA_GPU_MEM 0x40
/* Only valid in mpool flags. Used to indicate that no external memory
* hooks (ptmalloc2, etc) are required. */
#define MCA_MPOOL_FLAGS_NO_HOOKS 0x80
/* access flags */
enum {
MCA_MPOOL_ACCESS_LOCAL_WRITE = 0x01,
MCA_MPOOL_ACCESS_REMOTE_READ = 0x02,
MCA_MPOOL_ACCESS_REMOTE_WRITE = 0x04,
MCA_MPOOL_ACCESS_REMOTE_ATOMIC = 0x08,
MCA_MPOOL_ACCESS_ANY = 0x0f,
};
struct mca_mpool_base_resources_t;
struct mca_mpool_base_registration_t {
opal_free_list_item_t super;
struct mca_mpool_base_module_t *mpool;
unsigned char* base;
unsigned char* bound;
unsigned char* alloc_base;
int32_t ref_count;
uint32_t flags;
void *mpool_context;
#if OPAL_CUDA_GDR_SUPPORT
unsigned long long gpu_bufID;
#endif /* OPAL_CUDA_GDR_SUPPORT */
int32_t access_flags;
};
typedef struct mca_mpool_base_registration_t mca_mpool_base_registration_t;
OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_mpool_base_registration_t);
struct mca_mpool_base_module_t;
typedef struct mca_mpool_base_module_t mca_mpool_base_module_t;
/**
* component initialize
* component query function
*
* @param[in] hints memory pool hints in order of priority. this should
* be replaced by opal_info_t when the work to move
* info down to opal is complete.
* @param[out] priority relative priority of this memory pool component
* @param[out] module best match module
*
* This function should parse the provided hints and return a relative priority
* of the component based on the number of hints matched. For example, if the
* hints are "page_size=2M,high-bandwidth" and a pool matches the page_size but
* not the high-bandwidth hint then the component should return a lower priority
* than if both matched but a higher priority than if a pool matches only the
* high-bandwidth hint.
*
* Memory pools should try to support at a minimum name=value but can define
* any additional keys.
*/
typedef struct mca_mpool_base_module_t* (*mca_mpool_base_component_init_fn_t)(
struct mca_mpool_base_resources_t*);
typedef int (*mca_mpool_base_component_query_fn_t) (const char *hints, int *priority,
mca_mpool_base_module_t **module);
/**
* allocate function typedef
*/
typedef void* (*mca_mpool_base_module_alloc_fn_t)(
struct mca_mpool_base_module_t* mpool,
size_t size,
size_t align,
uint32_t flags,
mca_mpool_base_registration_t** registration);
typedef void *(*mca_mpool_base_module_alloc_fn_t) (mca_mpool_base_module_t *mpool,
size_t size, size_t align,
uint32_t flags);
/**
* realloc function typedef
* allocate function typedef
*/
typedef void* (*mca_mpool_base_module_realloc_fn_t)(
struct mca_mpool_base_module_t* mpool,
void* addr,
size_t size,
mca_mpool_base_registration_t** registration);
typedef void *(*mca_mpool_base_module_realloc_fn_t) (mca_mpool_base_module_t *mpool,
void *addr, size_t size);
/**
* free function typedef
*/
typedef void (*mca_mpool_base_module_free_fn_t)(
struct mca_mpool_base_module_t* mpool,
void *addr,
mca_mpool_base_registration_t* registration);
/**
* register memory
*/
typedef int (*mca_mpool_base_module_register_fn_t)(
struct mca_mpool_base_module_t* mpool,
void * addr,
size_t size,
uint32_t flags,
int32_t access_flags,
mca_mpool_base_registration_t** registration);
/**
* deregister memory
*/
typedef int (*mca_mpool_base_module_deregister_fn_t)(
struct mca_mpool_base_module_t* mpool,
mca_mpool_base_registration_t* registration);
/**
* find registration in this memory pool
*/
typedef int (*mca_mpool_base_module_find_fn_t) (
struct mca_mpool_base_module_t* mpool, void* addr, size_t size,
mca_mpool_base_registration_t **reg);
/**
* release registration
*/
typedef int (*mca_mpool_base_module_release_fn_t) (
struct mca_mpool_base_module_t* mpool,
mca_mpool_base_registration_t* registration);
/**
* release memory region
*/
typedef int (*mca_mpool_base_module_release_memory_fn_t) (
struct mca_mpool_base_module_t* mpool, void *base, size_t size);
typedef void (*mca_mpool_base_module_free_fn_t) (mca_mpool_base_module_t *mpool,
void *addr);
/**
* if appropriate - returns base address of memory pool
*/
typedef void* (*mca_mpool_base_module_address_fn_t)(struct mca_mpool_base_module_t* mpool);
typedef void* (*mca_mpool_base_module_address_fn_t) (mca_mpool_base_module_t *mpool);
/**
* finalize
*/
typedef void (*mca_mpool_base_module_finalize_fn_t)(struct mca_mpool_base_module_t*);
typedef void (*mca_mpool_base_module_finalize_fn_t)(mca_mpool_base_module_t *mpool);
/**
@ -179,7 +111,7 @@ struct mca_mpool_base_component_2_0_0_t {
mca_base_component_t mpool_version; /**< version */
mca_base_component_data_t mpool_data;/**< metadata */
mca_mpool_base_component_init_fn_t mpool_init; /**< init function */
mca_mpool_base_component_query_fn_t mpool_query; /**< query for matching pools */
};
/**
* Convenience typedef.
@ -201,20 +133,14 @@ struct mca_mpool_base_module_t {
mca_mpool_base_module_alloc_fn_t mpool_alloc; /**< allocate function */
mca_mpool_base_module_realloc_fn_t mpool_realloc; /**< reallocate function */
mca_mpool_base_module_free_fn_t mpool_free; /**< free function */
mca_mpool_base_module_register_fn_t mpool_register; /**< register memory */
mca_mpool_base_module_deregister_fn_t mpool_deregister; /**< deregister memory */
mca_mpool_base_module_find_fn_t mpool_find; /**< find regisrations in the cache */
mca_mpool_base_module_release_fn_t mpool_release; /**< release a registration from the cache */
mca_mpool_base_module_release_memory_fn_t mpool_release_memory; /**< release memor region from the cache */
mca_mpool_base_module_finalize_fn_t mpool_finalize; /**< finalize */
mca_mpool_base_module_ft_event_fn_t mpool_ft_event; /**< ft_event */
struct mca_rcache_base_module_t *rcache; /* the rcache associated with this mpool */
uint32_t flags; /**< mpool flags */
size_t mpool_allocation_unit; /**< allocation unit used by this mpool */
char *mpool_name; /**< name of this pool module */
};
/**
* Convenience typedef
*/
typedef struct mca_mpool_base_module_t mca_mpool_base_module_t;
/**
@ -237,7 +163,7 @@ typedef struct mca_mpool_base_module_t mca_mpool_base_module_t;
* @retval pointer to the allocated memory
* @retval NULL on failure
*/
OPAL_DECLSPEC void * mca_mpool_base_alloc(size_t size, struct opal_info_t * info);
OPAL_DECLSPEC void * mca_mpool_base_alloc(size_t size, struct opal_info_t * info, const char *hints);
/**
* Function to free memory previously allocated by mca_mpool_base_alloc
@ -261,21 +187,11 @@ OPAL_DECLSPEC int mca_mpool_base_free(void * base);
*/
OPAL_DECLSPEC int mca_mpool_base_tree_node_compare(void * key1, void * key2);
OPAL_DECLSPEC int mca_mpool_base_insert(
void * addr,
size_t size,
mca_mpool_base_module_t* mpool,
void* user_in,
mca_mpool_base_registration_t* registration);
OPAL_DECLSPEC int mca_mpool_base_remove(void * base);
/**
* Macro for use in components that are of type mpool
*/
#define MCA_MPOOL_BASE_VERSION_2_0_0 \
OPAL_MCA_BASE_VERSION_2_1_0("mpool", 2, 0, 0)
#define MCA_MPOOL_BASE_VERSION_3_0_0 \
OPAL_MCA_BASE_VERSION_2_1_0("mpool", 3, 0, 0)
#endif /* MCA_MPOOL_H */

Просмотреть файл

@ -1,54 +0,0 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2013 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
sources = \
mpool_sm.h \
mpool_sm_module.c \
mpool_sm_component.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_opal_mpool_sm_DSO
component_noinst =
component_install = mca_mpool_sm.la
else
component_noinst = libmca_mpool_sm.la
component_install =
endif
# See opal/mca/common/sm/Makefile.am for an explanation of
# libmca_common_sm.la.
mcacomponentdir = $(opallibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_mpool_sm_la_SOURCES = $(sources)
mca_mpool_sm_la_LDFLAGS = -module -avoid-version
mca_mpool_sm_la_LIBADD = \
$(OPAL_TOP_BUILDDIR)/opal/mca/common/sm/lib@OPAL_LIB_PREFIX@mca_common_sm.la
if OPAL_cuda_support
mca_mpool_sm_la_LIBADD += \
$(OPAL_TOP_BUILDDIR)/opal/mca/common/cuda/lib@OPAL_LIB_PREFIX@mca_common_cuda.la
endif
noinst_LTLIBRARIES = $(component_noinst)
libmca_mpool_sm_la_SOURCES = $(sources)
libmca_mpool_sm_la_LDFLAGS = -module -avoid-version

Просмотреть файл

@ -1,113 +0,0 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010-2012 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_MPOOL_SM_H
#define MCA_MPOOL_SM_H
#include "opal_config.h"
#include "opal/mca/event/event.h"
#include "opal/mca/shmem/shmem.h"
#include "opal/mca/common/sm/common_sm.h"
#include "opal/mca/mpool/mpool.h"
#include "opal/mca/allocator/allocator.h"
BEGIN_C_DECLS
struct mca_mpool_sm_component_t {
mca_mpool_base_component_t super;
/* mca_allocator_base_module_t* sm_allocator; */
char *sm_allocator_name;
int verbose;
/* struct mca_mpool_sm_mmap_t *sm_mmap; */
};
typedef struct mca_mpool_sm_component_t mca_mpool_sm_component_t;
typedef struct mca_mpool_base_resources_t {
size_t size;
int32_t mem_node;
/* backing store metadata */
opal_shmem_ds_t bs_meta_buf;
} mca_mpool_base_resources_t;
OPAL_MODULE_DECLSPEC extern mca_mpool_sm_component_t mca_mpool_sm_component;
typedef struct mca_mpool_sm_module_t {
mca_mpool_base_module_t super;
long sm_size;
mca_allocator_base_module_t *sm_allocator;
struct mca_mpool_sm_mmap_t *sm_mmap;
mca_common_sm_module_t *sm_common_module;
int32_t mem_node;
} mca_mpool_sm_module_t;
/*
* Initializes the mpool module.
*/
void mca_mpool_sm_module_init(mca_mpool_sm_module_t* mpool);
/*
* Returns base address of shared memory mapping.
*/
void* mca_mpool_sm_base(mca_mpool_base_module_t*);
/**
* Allocate block of shared memory.
*/
void* mca_mpool_sm_alloc(
mca_mpool_base_module_t* mpool,
size_t size,
size_t align,
uint32_t flags,
mca_mpool_base_registration_t** registration);
/**
* realloc function typedef
*/
void* mca_mpool_sm_realloc(
mca_mpool_base_module_t* mpool,
void* addr,
size_t size,
mca_mpool_base_registration_t** registration);
/**
* free function typedef
*/
void mca_mpool_sm_free(
mca_mpool_base_module_t* mpool,
void * addr,
mca_mpool_base_registration_t* registration);
/**
* Fault Tolerance Event Notification Function
* @param state Checkpoint Stae
* @return OPAL_SUCCESS or failure status
*/
int mca_mpool_sm_ft_event(int state);
END_C_DECLS
#endif

Просмотреть файл

@ -1,210 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 NVIDIA Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H*/
#include <stdlib.h>
#include <errno.h>
#include "opal/mca/base/base.h"
#include "opal/mca/allocator/base/base.h"
#include "mpool_sm.h"
#include "opal/mca/common/sm/common_sm.h"
#if OPAL_ENABLE_FT_CR == 1
#include "opal/runtime/opal_cr.h"
#endif
/*
* Local functions
*/
static int
mca_mpool_sm_register(void);
static int
mca_mpool_sm_open(void);
static int
mca_mpool_sm_close(void);
static mca_mpool_base_module_t *
mca_mpool_sm_init(struct mca_mpool_base_resources_t* resources);
mca_mpool_sm_component_t mca_mpool_sm_component = {
{
/* First, the mca_base_component_t struct containing meta
information about the component itself */
.mpool_version = {
MCA_MPOOL_BASE_VERSION_2_0_0,
.mca_component_name = "sm",
MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION,
OPAL_RELEASE_VERSION),
.mca_open_component = mca_mpool_sm_open,
.mca_close_component = mca_mpool_sm_close,
.mca_register_component_params = mca_mpool_sm_register,
},
.mpool_data = {
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
.mpool_init = mca_mpool_sm_init,
}
};
static long default_min = 134217728;
static unsigned long long opal_mpool_sm_min_size;
static int opal_mpool_sm_verbose;
static int mca_mpool_sm_register(void)
{
/* register SM component parameters */
(void) mca_base_var_group_component_register(&mca_mpool_sm_component.super.mpool_version,
"Shared memory pool");
mca_mpool_sm_component.sm_allocator_name = "bucket";
(void) mca_base_component_var_register(&mca_mpool_sm_component.super.mpool_version,
"allocator", "Name of allocator component "
"to use with sm mpool", MCA_BASE_VAR_TYPE_STRING,
NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_mpool_sm_component.sm_allocator_name);
/* register as an unsigned long long to get up to 64 bits for the size */
opal_mpool_sm_min_size = default_min;
(void) mca_base_component_var_register(&mca_mpool_sm_component.super.mpool_version,
"min_size", "Minimum size of the sm mpool shared memory file",
MCA_BASE_VAR_TYPE_UNSIGNED_LONG_LONG, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&opal_mpool_sm_min_size);
opal_mpool_sm_verbose = 0;
(void) mca_base_component_var_register(&mca_mpool_sm_component.super.mpool_version,
"verbose", "Enable verbose output for mpool sm component",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&opal_mpool_sm_verbose);
return OPAL_SUCCESS;
}
/**
* component open/close/init function
*/
static int mca_mpool_sm_open(void)
{
if (opal_mpool_sm_verbose != 0) {
mca_mpool_sm_component.verbose = opal_output_open(NULL);
} else {
mca_mpool_sm_component.verbose = -1;
}
return OPAL_SUCCESS;
}
static int mca_mpool_sm_close( void )
{
return OPAL_SUCCESS;
}
static mca_mpool_base_module_t *
mca_mpool_sm_init(struct mca_mpool_base_resources_t *resources)
{
mca_mpool_sm_module_t *mpool_module;
mca_allocator_base_component_t* allocator_component;
/* Make a new mpool module */
mpool_module =
(mca_mpool_sm_module_t *)malloc(sizeof(mca_mpool_sm_module_t));
mca_mpool_sm_module_init(mpool_module);
/* set sm_size */
mpool_module->sm_size = resources->size;
/* clip at the min size */
if (mpool_module->sm_size < (long) opal_mpool_sm_min_size) {
mpool_module->sm_size = (long) opal_mpool_sm_min_size;
}
allocator_component = mca_allocator_component_lookup(
mca_mpool_sm_component.sm_allocator_name);
/* if specified allocator cannot be loaded - look for an alternative */
if (NULL == allocator_component) {
if (opal_list_get_size(&opal_allocator_base_framework.framework_components) == 0) {
mca_base_component_list_item_t *item =
(mca_base_component_list_item_t *)
opal_list_get_first(&opal_allocator_base_framework.framework_components);
allocator_component =
(mca_allocator_base_component_t *)item->cli_component;
opal_output(
0, "mca_mpool_sm_init: "
"unable to locate allocator: %s - using %s\n",
mca_mpool_sm_component.sm_allocator_name,
allocator_component->allocator_version.mca_component_name);
} else {
opal_output(0, "mca_mpool_sm_init: "
"unable to locate allocator: %s\n",
mca_mpool_sm_component.sm_allocator_name);
free(mpool_module);
return NULL;
}
}
mpool_module->mem_node = resources->mem_node;
opal_output(mca_mpool_sm_component.verbose,
"mca_mpool_sm_init: shared memory size used: (%ld)",
mpool_module->sm_size);
if (NULL == (mpool_module->sm_common_module =
mca_common_sm_module_attach(&resources->bs_meta_buf,
sizeof(mca_common_sm_module_t), 8))) {
opal_output(mca_mpool_sm_component.verbose, "mca_mpool_sm_init: "
"unable to create shared memory mapping (%s)",
resources->bs_meta_buf.seg_name);
free(mpool_module);
return NULL;
}
/* setup allocator */
mpool_module->sm_allocator =
allocator_component->allocator_init(true,
mca_common_sm_seg_alloc,
NULL, &(mpool_module->super));
if (NULL == mpool_module->sm_allocator) {
opal_output(0, "mca_mpool_sm_init: unable to initialize allocator");
free(mpool_module);
return NULL;
}
return &mpool_module->super;
}

Просмотреть файл

@ -1,174 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2013 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Voltaire. All rights reserved.
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_MPOOL_UDREG_H
#define MCA_MPOOL_UDREG_H
#include "opal_config.h"
#include "opal/class/opal_list.h"
#include "opal/class/opal_free_list.h"
#include "opal/mca/event/event.h"
#include "opal/mca/mpool/mpool.h"
#include "opal/util/proc.h"
#if HAVE_SYS_MMAN_H
#include <sys/mman.h>
#endif
BEGIN_C_DECLS
struct mca_mpool_udreg_component_t {
mca_mpool_base_component_t super;
bool print_stats;
int leave_pinned;
opal_list_t huge_pages;
bool use_huge_pages;
};
typedef struct mca_mpool_udreg_component_t mca_mpool_udreg_component_t;
OPAL_DECLSPEC extern mca_mpool_udreg_component_t mca_mpool_udreg_component;
struct mca_mpool_udreg_module_t;
struct mca_mpool_base_resources_t {
/* the start of this mpool should match grdma */
char *pool_name;
void *reg_data;
size_t sizeof_reg;
int (*register_mem)(void *reg_data, void *base, size_t size,
mca_mpool_base_registration_t *reg);
int (*deregister_mem)(void *reg_data, mca_mpool_base_registration_t *reg);
/* udreg specific resources */
bool use_kernel_cache;
bool use_evict_w_unreg;
int max_entries;
size_t page_size;
};
typedef struct mca_mpool_base_resources_t mca_mpool_base_resources_t;
struct mca_mpool_udreg_hugepage_t {
opal_list_item_t super;
unsigned long page_size;
char *path;
opal_list_t allocations;
int cnt;
};
typedef struct mca_mpool_udreg_hugepage_t mca_mpool_udreg_hugepage_t;
OBJ_CLASS_DECLARATION(mca_mpool_udreg_hugepage_t);
struct mca_mpool_udreg_hugepage_alloc_t {
opal_list_item_t super;
int fd;
char *path;
void *ptr;
size_t size;
mca_mpool_udreg_hugepage_t *huge_table;
};
typedef struct mca_mpool_udreg_hugepage_alloc_t mca_mpool_udreg_hugepage_alloc_t;
OBJ_CLASS_DECLARATION(mca_mpool_udreg_hugepage_pool_item_t);
struct mca_mpool_udreg_module_t {
mca_mpool_base_module_t super;
struct mca_mpool_base_resources_t resources;
opal_free_list_t reg_list;
mca_mpool_udreg_hugepage_t *huge_page;
opal_mutex_t lock;
void *udreg_handle;
/** used to communicate the access flags to the underlying registration
* function */
int requested_access_flags;
};
typedef struct mca_mpool_udreg_module_t mca_mpool_udreg_module_t;
/*
* Initializes the mpool module.
*/
int mca_mpool_udreg_module_init(mca_mpool_udreg_module_t *mpool);
/*
* Returns base address of shared memory mapping.
*/
void *mca_mpool_udreg_base(mca_mpool_base_module_t *mpool);
/**
* Allocate block of registered memory.
*/
void* mca_mpool_udreg_alloc(mca_mpool_base_module_t *mpool, size_t size,
size_t align, uint32_t flags,
mca_mpool_base_registration_t** registration);
/**
* realloc block of registered memory
*/
void* mca_mpool_udreg_realloc( mca_mpool_base_module_t *mpool, void* addr,
size_t size, mca_mpool_base_registration_t** registration);
/**
* register block of memory
*/
int mca_mpool_udreg_register(mca_mpool_base_module_t* mpool, void *addr,
size_t size, uint32_t flags, int32_t access_flags, mca_mpool_base_registration_t **reg);
/**
* deregister memory
*/
int mca_mpool_udreg_deregister(mca_mpool_base_module_t *mpool,
mca_mpool_base_registration_t *reg);
/**
* free memory allocated by alloc function
*/
void mca_mpool_udreg_free(mca_mpool_base_module_t *mpool, void * addr,
mca_mpool_base_registration_t *reg);
/**
* find registration for a given block of memory
*/
int mca_mpool_udreg_find(struct mca_mpool_base_module_t* mpool, void* addr,
size_t size, mca_mpool_base_registration_t **reg);
/**
* finalize mpool
*/
void mca_mpool_udreg_finalize(struct mca_mpool_base_module_t *mpool);
/**
* Fault Tolerance Event Notification Function
* @param state Checkpoint Stae
* @return OPAL_SUCCESS or failure status
*/
int mca_mpool_udreg_ft_event(int state);
/**
* evict one unused registration from the mpool's lru.
* @return true on success, false on failure
*/
bool mca_mpool_udreg_evict (struct mca_mpool_base_module_t *mpool);
END_C_DECLS
#endif

Просмотреть файл

@ -1,206 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2013 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Voltaire. All rights reserved.
* Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#define OPAL_DISABLE_ENABLE_MEM_DEBUG 1
#include "opal_config.h"
#include "opal/mca/base/base.h"
#include "opal/runtime/opal_params.h"
#include "mpool_udreg.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif
#include <fcntl.h>
/*
* Local functions
*/
static int udreg_open(void);
static int udreg_close(void);
static int udreg_register(void);
static mca_mpool_base_module_t* udreg_init(
struct mca_mpool_base_resources_t* resources);
mca_mpool_udreg_component_t mca_mpool_udreg_component = {
{
/* First, the mca_base_component_t struct containing meta
information about the component itself */
.mpool_version ={
MCA_MPOOL_BASE_VERSION_2_0_0,
.mca_component_name = "udreg",
MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION,
OPAL_RELEASE_VERSION),
.mca_open_component = udreg_open,
.mca_close_component = udreg_close,
.mca_register_component_params = udreg_register,
},
.mpool_data = {
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
.mpool_init = udreg_init
}
};
/**
* component open/close/init function
*/
static int udreg_open(void)
{
OBJ_CONSTRUCT(&mca_mpool_udreg_component.huge_pages, opal_list_t);
return OPAL_SUCCESS;
}
static int udreg_register(void)
{
mca_mpool_udreg_component.print_stats = false;
(void) mca_base_component_var_register(&mca_mpool_udreg_component.super.mpool_version,
"print_stats", "print pool usage statistics at the end of the run",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_mpool_udreg_component.print_stats);
return OPAL_SUCCESS;
}
static int udreg_close(void)
{
opal_list_item_t *item;
while (NULL != (item = opal_list_remove_first (&mca_mpool_udreg_component.huge_pages))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&mca_mpool_udreg_component.huge_pages);
return OPAL_SUCCESS;
}
static int page_compare (opal_list_item_t **a,
opal_list_item_t **b) {
mca_mpool_udreg_hugepage_t *pagea = (mca_mpool_udreg_hugepage_t *) *a;
mca_mpool_udreg_hugepage_t *pageb = (mca_mpool_udreg_hugepage_t *) *b;
if (pagea->page_size > pageb->page_size) {
return 1;
} else if (pagea->page_size < pageb->page_size) {
return -1;
}
return 0;
}
static void udreg_find_hugepages (void) {
FILE *fh;
char *path;
char buffer[1024];
char *ctx, *tok;
fh = fopen ("/proc/mounts", "r");
if (NULL == fh) {
return;
}
while (fgets (buffer, 1024, fh)) {
mca_mpool_udreg_hugepage_t *pool;
(void) strtok_r (buffer, " ", &ctx);
path = strtok_r (NULL, " ", &ctx);
tok = strtok_r (NULL, " ", &ctx);
if (0 != strcmp (tok, "hugetlbfs")) {
continue;
}
pool = OBJ_NEW(mca_mpool_udreg_hugepage_t);
if (NULL == pool) {
break;
}
pool->path = strdup (path);
tok = strtok_r (NULL, " ", &ctx);
tok = strtok_r (tok, ",", &ctx);
do {
if (0 == strncmp (tok, "pagesize", 8)) {
break;
}
tok = strtok_r (NULL, ",", &ctx);
} while (tok);
sscanf (tok, "pagesize=%lu", &pool->page_size);
opal_list_append (&mca_mpool_udreg_component.huge_pages, &pool->super);
}
fclose (fh);
opal_list_sort (&mca_mpool_udreg_component.huge_pages, page_compare);
mca_mpool_udreg_component.use_huge_pages =
!!(opal_list_get_size (&mca_mpool_udreg_component.huge_pages));
}
static mca_mpool_base_module_t *
udreg_init(struct mca_mpool_base_resources_t *resources)
{
mca_mpool_udreg_module_t* mpool_module;
static int inited = false;
int rc;
/* Set this here (vs in component.c) because
opal_leave_pinned* may have been set after MCA params were
read (e.g., by the openib btl) */
mca_mpool_udreg_component.leave_pinned = (int)
(1 == opal_leave_pinned || opal_leave_pinned_pipeline);
if (!inited) {
inited = true;
udreg_find_hugepages ();
}
mpool_module =
(mca_mpool_udreg_module_t *) malloc (sizeof (mca_mpool_udreg_module_t));
memmove (&mpool_module->resources, resources, sizeof (*resources));
rc = mca_mpool_udreg_module_init(mpool_module);
if (OPAL_SUCCESS != rc) {
free (mpool_module);
return NULL;
}
return &mpool_module->super;
}

Просмотреть файл

@ -1,546 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2013 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006 Voltaire. All rights reserved.
* Copyright (c) 2007 Mellanox Technologies. All rights reserved.
* Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#define OPAL_DISABLE_ENABLE_MEM_DEBUG 1
#include "opal_config.h"
#include "opal/align.h"
#include "mpool_udreg.h"
#include <errno.h>
#include <string.h>
#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif
#include "opal/mca/mpool/base/base.h"
#include "opal/runtime/opal_params.h"
#include "opal/include/opal_stdint.h"
#include <fcntl.h>
#include <udreg_pub.h>
#include <sys/mman.h>
static void *mca_mpool_udreg_reg_func (void *addr, uint64_t len, void *reg_context);
static uint32_t mca_mpool_udreg_dereg_func (void *device_data, void *dreg_context);
static void mca_mpool_udreg_hugepage_constructor (mca_mpool_udreg_hugepage_t *huge_page)
{
memset ((char *)huge_page + sizeof(huge_page->super), 0, sizeof (*huge_page) - sizeof (huge_page->super));
OBJ_CONSTRUCT(&huge_page->allocations, opal_list_t);
}
static void mca_mpool_udreg_hugepage_destructor (mca_mpool_udreg_hugepage_t *huge_page)
{
opal_list_item_t *item;
if (huge_page->path) {
free (huge_page->path);
}
while (NULL != (item = opal_list_remove_first (&huge_page->allocations))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&huge_page->allocations);
}
OBJ_CLASS_INSTANCE(mca_mpool_udreg_hugepage_t, opal_list_item_t,
mca_mpool_udreg_hugepage_constructor,
mca_mpool_udreg_hugepage_destructor);
static void mca_mpool_udreg_hugepage_alloc_constructor (mca_mpool_udreg_hugepage_alloc_t *alloc)
{
memset ((char *)alloc + sizeof(alloc->super), 0, sizeof (*alloc) - sizeof (alloc->super));
alloc->fd = -1;
}
static void mca_mpool_udreg_hugepage_alloc_destructor (mca_mpool_udreg_hugepage_alloc_t *alloc)
{
if (NULL != alloc->ptr) {
munmap (alloc->ptr, alloc->size);
}
if (NULL == alloc->path) {
return;
}
free (alloc->path);
}
OBJ_CLASS_INSTANCE(mca_mpool_udreg_hugepage_alloc_t, opal_list_item_t,
mca_mpool_udreg_hugepage_alloc_constructor,
mca_mpool_udreg_hugepage_alloc_destructor);
static mca_mpool_udreg_hugepage_t *udreg_find_matching_pagesize (size_t size) {
mca_mpool_udreg_hugepage_t *huge_table;
opal_list_item_t *item;
for (item = opal_list_get_first (&mca_mpool_udreg_component.huge_pages) ;
item != opal_list_get_end (&mca_mpool_udreg_component.huge_pages) ;
item = opal_list_get_next (item)) {
huge_table = (mca_mpool_udreg_hugepage_t *) item;
if (huge_table->page_size == size) {
return huge_table;
}
}
return NULL;
}
/*
* Initializes the mpool module.
*/
int mca_mpool_udreg_module_init(mca_mpool_udreg_module_t* mpool)
{
struct udreg_cache_attr cache_attr;
int urc;
mpool->super.mpool_component = &mca_mpool_udreg_component.super;
mpool->super.mpool_base = NULL; /* no base .. */
mpool->super.mpool_alloc = mca_mpool_udreg_alloc;
mpool->super.mpool_realloc = mca_mpool_udreg_realloc;
mpool->super.mpool_free = mca_mpool_udreg_free;
mpool->super.mpool_register = mca_mpool_udreg_register;
mpool->super.mpool_find = mca_mpool_udreg_find;
mpool->super.mpool_deregister = mca_mpool_udreg_deregister;
/* This module relies on udreg for notification of memory release */
mpool->super.mpool_release_memory = NULL;
mpool->super.mpool_finalize = mca_mpool_udreg_finalize;
mpool->super.mpool_ft_event = mca_mpool_udreg_ft_event;
mpool->super.flags = MCA_MPOOL_FLAGS_MPI_ALLOC_MEM | MCA_MPOOL_FLAGS_NO_HOOKS;
if (4096 < mpool->resources.page_size) {
mpool->huge_page = udreg_find_matching_pagesize (mpool->resources.page_size);
} else {
mpool->huge_page = NULL;
}
cache_attr.modes = 0;
/* Create udreg cache */
if (mpool->resources.use_kernel_cache) {
cache_attr.modes |= UDREG_CC_MODE_USE_KERNEL_CACHE;
}
if (mpool->resources.use_evict_w_unreg) {
cache_attr.modes |= UDREG_CC_MODE_USE_EVICT_W_UNREG;
}
if (mca_mpool_udreg_component.leave_pinned) {
cache_attr.modes |= UDREG_CC_MODE_USE_LAZY_DEREG;
}
OBJ_CONSTRUCT(&mpool->lock,opal_mutex_t);
strncpy (cache_attr.cache_name, mpool->resources.pool_name, UDREG_MAX_CACHENAME_LEN);
cache_attr.max_entries = mpool->resources.max_entries;
cache_attr.debug_mode = 0;
cache_attr.debug_rank = 0;
cache_attr.reg_context = mpool;
cache_attr.dreg_context = mpool;
cache_attr.destructor_context = mpool;
cache_attr.device_reg_func = mca_mpool_udreg_reg_func;
cache_attr.device_dereg_func = mca_mpool_udreg_dereg_func;
cache_attr.destructor_callback = NULL;
/* attempt to create the udreg cache. this will fail if one already exists */
(void) UDREG_CacheCreate (&cache_attr);
urc = UDREG_CacheAccess (mpool->resources.pool_name, (udreg_cache_handle_t *) &mpool->udreg_handle);
if (UDREG_RC_SUCCESS != urc) {
return OPAL_ERROR;
}
OBJ_CONSTRUCT(&mpool->reg_list, opal_free_list_t);
opal_free_list_init (&mpool->reg_list, mpool->resources.sizeof_reg,
opal_cache_line_size,
OBJ_CLASS(mca_mpool_base_registration_t),
0, opal_cache_line_size, 0, -1, 32, NULL, 0,
NULL, NULL, NULL);
return OPAL_SUCCESS;
}
/* udreg callback functions */
static void *mca_mpool_udreg_reg_func (void *addr, uint64_t len, void *reg_context)
{
mca_mpool_udreg_module_t *mpool_udreg = (mca_mpool_udreg_module_t *) reg_context;
mca_mpool_base_registration_t *udreg_reg;
opal_free_list_item_t *item;
int rc;
item = opal_free_list_get (&mpool_udreg->reg_list);
if (NULL == item) {
return NULL;
}
udreg_reg = (mca_mpool_base_registration_t *) item;
udreg_reg->mpool = reg_context;
udreg_reg->base = addr;
udreg_reg->bound = (void *)((uintptr_t) addr + len);
/* pull the access flags out of the mpool module */
udreg_reg->access_flags = mpool_udreg->requested_access_flags;
rc = mpool_udreg->resources.register_mem(mpool_udreg->resources.reg_data,
addr, len, udreg_reg);
if (OPAL_SUCCESS != rc) {
opal_free_list_return (&mpool_udreg->reg_list, item);
udreg_reg = NULL;
}
return udreg_reg;
}
static uint32_t mca_mpool_udreg_dereg_func (void *device_data, void *dreg_context)
{
mca_mpool_udreg_module_t *mpool_udreg = (mca_mpool_udreg_module_t *) dreg_context;
mca_mpool_base_registration_t *udreg_reg = (mca_mpool_base_registration_t *) device_data;
int rc;
if (udreg_reg->ref_count) {
/* there are still users of this registration. leave it alone */
return 0;
}
rc = mpool_udreg->resources.deregister_mem(mpool_udreg->resources.reg_data, udreg_reg);
if (OPAL_LIKELY(OPAL_SUCCESS == rc)) {
opal_free_list_return (&mpool_udreg->reg_list,
(opal_free_list_item_t *) udreg_reg);
}
/* might be worth printing out a warning if an error occurs here */
return 0;
}
/* */
static int mca_mpool_udreg_alloc_huge (mca_mpool_udreg_module_t *mpool, size_t size,
void **addr, void **base_addr) {
mca_mpool_udreg_hugepage_alloc_t *alloc;
int rc;
alloc = OBJ_NEW(mca_mpool_udreg_hugepage_alloc_t);
alloc->size = size;
rc = asprintf (&alloc->path, "%s/hugepage.openmpi.%d.%d", mpool->huge_page->path,
getpid (), mpool->huge_page->cnt++);
if (0 > rc) {
OBJ_RELEASE(alloc);
return -1;
}
alloc->fd = open (alloc->path, O_RDWR | O_CREAT, 0600);
if (-1 == alloc->fd) {
OBJ_RELEASE(alloc);
return -1;
}
if (0 != ftruncate (alloc->fd, size)) {
close (alloc->fd);
unlink (alloc->path);
OBJ_RELEASE(alloc);
return -1;
}
alloc->ptr = mmap (NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED,
alloc->fd, 0);
if (NULL == alloc->ptr) {
OBJ_RELEASE(alloc);
return -1;
}
close (alloc->fd);
unlink (alloc->path);
alloc->huge_table = mpool->huge_page;
opal_list_append (&mpool->huge_page->allocations, &alloc->super);
*addr = alloc->ptr;
*base_addr = alloc;
return 0;
}
static void mca_mpool_udreg_free_huge (mca_mpool_udreg_hugepage_alloc_t *alloc) {
opal_list_remove_item (&alloc->huge_table->allocations, &alloc->super);
OBJ_RELEASE(alloc);
}
/**
* allocate function
*/
void* mca_mpool_udreg_alloc(mca_mpool_base_module_t *mpool, size_t size,
size_t align, uint32_t flags, mca_mpool_base_registration_t **reg)
{
mca_mpool_udreg_module_t *udreg_module = (mca_mpool_udreg_module_t *) mpool;
void *base_addr, *addr;
if(0 == align)
align = mca_mpool_base_page_size;
#if OPAL_CUDA_SUPPORT
/* CUDA cannot handle registering overlapping regions, so make
* sure each region is page sized and page aligned. */
align = mca_mpool_base_page_size;
size = OPAL_ALIGN(size, mca_mpool_base_page_size, size_t);
#endif
addr = base_addr = NULL;
if (NULL != udreg_module->huge_page) {
size = OPAL_ALIGN(size, udreg_module->huge_page->page_size, size_t);
mca_mpool_udreg_alloc_huge (udreg_module, size, &addr, &base_addr);
} else {
#ifdef HAVE_POSIX_MEMALIGN
if((errno = posix_memalign(&base_addr, align, size)) != 0)
return NULL;
addr = base_addr;
#else
base_addr = malloc(size + align);
if(NULL == base_addr)
return NULL;
addr = (void*)OPAL_ALIGN((uintptr_t)base_addr, align, uintptr_t);
#endif
}
if (OPAL_SUCCESS != mca_mpool_udreg_register(mpool, addr, size, flags, MCA_MPOOL_ACCESS_ANY, reg)) {
if (udreg_module->huge_page) {
mca_mpool_udreg_free_huge ((mca_mpool_udreg_hugepage_alloc_t *) base_addr);
} else {
free(base_addr);
}
return NULL;
}
(*reg)->alloc_base = (unsigned char *) base_addr;
return addr;
}
bool mca_mpool_udreg_evict (struct mca_mpool_base_module_t *mpool)
{
mca_mpool_udreg_module_t *mpool_udreg = (mca_mpool_udreg_module_t *) mpool;
udreg_return_t urc;
urc = UDREG_Evict (mpool_udreg->udreg_handle);
return (UDREG_RC_SUCCESS == urc);
}
/*
* register memory
*/
int mca_mpool_udreg_register(mca_mpool_base_module_t *mpool, void *addr,
size_t size, uint32_t flags, int32_t access_flags,
mca_mpool_base_registration_t **reg)
{
mca_mpool_udreg_module_t *mpool_udreg = (mca_mpool_udreg_module_t *) mpool;
mca_mpool_base_registration_t *udreg_reg, *old_reg;
bool bypass_cache = !!(flags & MCA_MPOOL_FLAGS_CACHE_BYPASS);
udreg_entry_t *udreg_entry;
udreg_return_t urc;
*reg = NULL;
OPAL_THREAD_LOCK(&mpool_udreg->lock);
/* we hold the lock so no other thread can modify these flags until the registration is complete */
mpool_udreg->requested_access_flags = access_flags;
if (false == bypass_cache) {
/* Get a udreg entry for this region */
do {
while (UDREG_RC_SUCCESS !=
(urc = UDREG_Register (mpool_udreg->udreg_handle, addr, size, &udreg_entry))) {
/* try to remove one unused reg and retry */
if (!mca_mpool_udreg_evict (mpool)) {
OPAL_THREAD_UNLOCK(&mpool_udreg->lock);
return OPAL_ERR_OUT_OF_RESOURCE;
}
}
udreg_reg = (mca_mpool_base_registration_t *) udreg_entry->device_data;
if ((udreg_reg->access_flags & access_flags) == access_flags) {
/* sufficient access */
break;
}
old_reg = udreg_reg;
/* to not confuse udreg make sure the new registration covers the same address
* range as the old one. */
addr = old_reg->base;
size = (size_t)((intptr_t) old_reg->bound - (intptr_t) old_reg->base);
/* make the new access flags more permissive */
mpool_udreg->requested_access_flags = access_flags | old_reg->access_flags;
/* get a new registration */
udreg_reg = mca_mpool_udreg_reg_func (addr, size, mpool);
if (NULL == udreg_reg) {
OPAL_THREAD_UNLOCK(&mpool_udreg->lock);
return OPAL_ERR_OUT_OF_RESOURCE;
}
/* update the device data with the new registration */
udreg_entry->device_data = udreg_reg;
/* ensure that mca_mpool_udreg_deregister does not call into udreg since
* we are forcefully evicting the registration here */
old_reg->flags |= MCA_MPOOL_FLAGS_CACHE_BYPASS | MCA_MPOOL_FLAGS_INVALID;
mca_mpool_udreg_dereg_func (old_reg, mpool);
} while (0);
udreg_reg->mpool_context = udreg_entry;
} else {
/* if cache bypass is requested don't use the udreg cache */
while (NULL == (udreg_reg = mca_mpool_udreg_reg_func (addr, size, mpool))) {
/* try to remove one unused reg and retry */
if (!mca_mpool_udreg_evict (mpool)) {
OPAL_THREAD_UNLOCK(&mpool_udreg->lock);
return OPAL_ERR_OUT_OF_RESOURCE;
}
}
udreg_reg->mpool_context = NULL;
}
OPAL_THREAD_UNLOCK(&mpool_udreg->lock);
udreg_reg->flags = flags;
*reg = udreg_reg;
udreg_reg->ref_count++;
return OPAL_SUCCESS;
}
/**
* realloc function
*/
void* mca_mpool_udreg_realloc(mca_mpool_base_module_t *mpool, void *addr,
size_t size, mca_mpool_base_registration_t **reg)
{
mca_mpool_base_registration_t *old_reg = *reg;
void *new_mem = mca_mpool_udreg_alloc(mpool, size, 0, old_reg->flags, reg);
memcpy(new_mem, addr, old_reg->bound - old_reg->base + 1);
mca_mpool_udreg_free(mpool, addr, old_reg);
return new_mem;
}
/**
* free function
*/
void mca_mpool_udreg_free(mca_mpool_base_module_t *mpool, void *addr,
mca_mpool_base_registration_t *registration)
{
mca_mpool_udreg_module_t *udreg_module = (mca_mpool_udreg_module_t *) mpool;
mca_mpool_udreg_deregister(mpool, registration);
if (udreg_module->huge_page) {
mca_mpool_udreg_free_huge ((mca_mpool_udreg_hugepage_alloc_t *) registration->alloc_base);
} else {
free (registration->alloc_base);
}
}
int mca_mpool_udreg_find(struct mca_mpool_base_module_t *mpool, void *addr,
size_t size, mca_mpool_base_registration_t **reg)
{
*reg = NULL;
return OPAL_ERR_NOT_FOUND;
}
int mca_mpool_udreg_deregister(struct mca_mpool_base_module_t *mpool,
mca_mpool_base_registration_t *reg)
{
mca_mpool_udreg_module_t *mpool_udreg = (mca_mpool_udreg_module_t *) mpool;
assert(reg->ref_count > 0);
--reg->ref_count;
if (!(reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS)) {
OPAL_THREAD_LOCK(&mpool_udreg->lock);
UDREG_DecrRefcount (mpool_udreg->udreg_handle, reg->mpool_context);
OPAL_THREAD_UNLOCK(&mpool_udreg->lock);
} else {
mca_mpool_udreg_dereg_func (reg, mpool);
}
return OPAL_SUCCESS;
}
void mca_mpool_udreg_finalize(struct mca_mpool_base_module_t *mpool)
{
mca_mpool_udreg_module_t *mpool_udreg = (mca_mpool_udreg_module_t*)mpool;
/* Statistic */
if (true == mca_mpool_udreg_component.print_stats) {
uint64_t hit = 0, miss = 0, evicted = 0;
(void) UDREG_GetStat (mpool_udreg->udreg_handle,
UDREG_STAT_CACHE_HIT, &hit);
(void) UDREG_GetStat (mpool_udreg->udreg_handle,
UDREG_STAT_CACHE_MISS, &miss);
(void) UDREG_GetStat (mpool_udreg->udreg_handle,
UDREG_STAT_CACHE_EVICTED, &evicted);
opal_output(0, "%s udreg: stats (hit/miss/evicted): %" PRIu64 "/%" PRIu64 "/%" PRIu64 "\n",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), hit, miss, evicted);
}
UDREG_CacheRelease (mpool_udreg->udreg_handle);
OBJ_DESTRUCT(&mpool_udreg->reg_list);
OBJ_DESTRUCT(&mpool_udreg->lock);
}
int mca_mpool_udreg_ft_event(int state) {
return OPAL_SUCCESS;
}

Просмотреть файл

@ -1,7 +0,0 @@
#
# owner/status file
# owner: institution that is responsible for this package
# status: e.g. active, maintenance, unmaintained
#
owner: LANL
status: maintenance

Просмотреть файл

@ -9,8 +9,8 @@
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2013 Los Alamos National Security, LLC.
# All rights reserved
# Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
# reserved
# $COPYRIGHT$
#
# Additional copyrights may follow
@ -19,8 +19,15 @@
#
headers += \
base/base.h
base/base.h \
base/rcache_base_vma.h \
base/rcache_base_vma_tree.h \
base/rcache_base_mem_cb.h
libmca_rcache_la_SOURCES += \
base/rcache_base_frame.c \
base/rcache_base_create.c
base/rcache_base_create.c \
base/rcache_base_vma.c \
base/rcache_base_vma_tree.c \
base/rcache_base_mem_cb.c

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -9,8 +10,8 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2012-2013 Los Alamos National Security, LLC.
* All rights reserved
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -34,7 +35,8 @@ BEGIN_C_DECLS
/*
* create a module by name
*/
OPAL_DECLSPEC mca_rcache_base_module_t* mca_rcache_base_module_create(const char* name);
OPAL_DECLSPEC mca_rcache_base_module_t *mca_rcache_base_module_create (const char *name, void *user_data,
mca_rcache_base_resources_t *rcache_resources);
/*
* MCA framework
@ -45,6 +47,7 @@ struct mca_rcache_base_selected_module_t {
opal_list_item_t super;
mca_rcache_base_component_t *rcache_component;
mca_rcache_base_module_t *rcache_module;
void *user_data;
};
typedef struct mca_rcache_base_selected_module_t mca_rcache_base_selected_module_t;
@ -52,6 +55,10 @@ OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_rcache_base_selected_module_t);
OPAL_DECLSPEC mca_rcache_base_component_t *mca_rcache_base_component_lookup(const char *name);
OPAL_DECLSPEC mca_rcache_base_module_t *mca_rcache_base_module_lookup (const char *name);
OPAL_DECLSPEC int mca_rcache_base_module_destroy(mca_rcache_base_module_t *module);
/* only used within base -- no need to DECLSPEC */
extern int mca_rcache_base_used_mem_hooks;
/*
* Globals

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -9,6 +10,8 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -25,33 +28,85 @@
#include "opal/mca/base/base.h"
#include "opal/mca/rcache/rcache.h"
#include "opal/mca/rcache/base/base.h"
#include "opal/mca/rcache/base/rcache_base_mem_cb.h"
#include "opal/util/show_help.h"
#include "opal/util/proc.h"
#include "opal/runtime/opal_params.h"
#include "opal/memoryhooks/memory.h"
mca_rcache_base_module_t* mca_rcache_base_module_create(const char* name)
mca_rcache_base_module_t* mca_rcache_base_module_create (const char* name, void *user_data,
struct mca_rcache_base_resources_t* resources)
{
mca_base_component_list_item_t* cli;
mca_rcache_base_component_t* component = NULL;
mca_rcache_base_module_t* module = NULL;
mca_base_component_list_item_t *cli;
mca_rcache_base_selected_module_t *sm;
bool found = false;
OPAL_LIST_FOREACH(cli, &opal_rcache_base_framework.framework_components, mca_base_component_list_item_t) {
component = (mca_rcache_base_component_t *) cli->cli_component;
if(0 == strcmp(component->rcache_version.mca_component_name, name)) {
found = true;
module = component->rcache_init (resources);
break;
}
}
if (!found) {
if ( NULL == module ) {
return NULL;
}
module = component->rcache_init();
sm = OBJ_NEW(mca_rcache_base_selected_module_t);
sm->rcache_component = component;
sm->rcache_module = module;
sm->user_data = user_data;
opal_list_append(&mca_rcache_base_modules, (opal_list_item_t*) sm);
/* on the very first creation of a module we init the memory
callback */
if (!mca_rcache_base_used_mem_hooks) {
/* Use the memory hooks if leave_pinned or
* leave_pinned_pipeline is enabled (note that either of these
* leave_pinned variables may have been set by a user MCA
* param or elsewhere in the code base). Yes, we could havexc
* coded this more succinctly, but this is more clear. Do not
* check memory hooks if the rcache does not provide an
* range invalidation function.. */
if ((opal_leave_pinned > 0 || opal_leave_pinned_pipeline) &&
module->rcache_invalidate_range) {
if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) ==
((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) &
opal_mem_hooks_support_level())) {
opal_mem_hooks_register_release(mca_rcache_base_mem_cb, NULL);
} else {
opal_show_help("help-rcache-base.txt", "leave pinned failed",
true, name, OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
opal_proc_local_get()->proc_hostname);
return NULL;
}
/* Set this to true so that rcache_base_close knows to
cleanup */
mca_rcache_base_used_mem_hooks = 1;
}
}
return module;
}
int mca_rcache_base_module_destroy(mca_rcache_base_module_t *module)
{
mca_rcache_base_selected_module_t *sm, *next;
OPAL_LIST_FOREACH_SAFE(sm, next, &mca_rcache_base_modules, mca_rcache_base_selected_module_t) {
if (module == sm->rcache_module) {
opal_list_remove_item(&mca_rcache_base_modules, (opal_list_item_t*)sm);
if (NULL != sm->rcache_module->rcache_finalize) {
sm->rcache_module->rcache_finalize(sm->rcache_module);
}
OBJ_RELEASE(sm);
return OPAL_SUCCESS;
}
}
return OPAL_ERR_NOT_FOUND;
}

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -28,7 +29,9 @@
#include "opal/mca/base/base.h"
#include "opal/mca/rcache/rcache.h"
#include "opal/mca/rcache/base/base.h"
#include "opal/memoryhooks/memory.h"
#include "opal/constants.h"
#include "rcache_base_mem_cb.h"
/*
* The following file was created by configure. It contains extern
@ -38,6 +41,24 @@
#include "opal/mca/rcache/base/static-components.h"
int mca_rcache_base_used_mem_hooks;
/**
* Memory Pool Registration
*/
static void mca_rcache_base_registration_constructor( mca_rcache_base_registration_t * reg )
{
reg->rcache = NULL;
reg->base = NULL;
reg->bound = NULL;
reg->ref_count = 0;
reg->flags = 0;
}
OBJ_CLASS_INSTANCE(mca_rcache_base_registration_t, opal_free_list_item_t,
mca_rcache_base_registration_constructor, NULL);
/*
* Global variables
@ -54,9 +75,7 @@ static int mca_rcache_base_close(void)
/* Finalize all the rcache components and free their list items */
for (item = opal_list_remove_first(&mca_rcache_base_modules);
NULL != item;
item = opal_list_remove_first(&mca_rcache_base_modules)) {
while (NULL != (item = opal_list_remove_first(&mca_rcache_base_modules))) {
sm = (mca_rcache_base_selected_module_t *) item;
/* Blatently ignore the return code (what would we do to recover,
@ -70,6 +89,12 @@ static int mca_rcache_base_close(void)
OBJ_RELEASE(sm);
}
/* deregister memory free callback */
if (mca_rcache_base_used_mem_hooks) {
opal_mem_hooks_unregister_release(mca_rcache_base_mem_cb);
}
/* All done */
/* Close all remaining available components */
return mca_base_framework_components_close(&opal_rcache_base_framework, NULL);
}
@ -89,7 +114,7 @@ static int mca_rcache_base_open(mca_base_open_flag_t flags)
return mca_base_framework_components_open(&opal_rcache_base_framework, flags);
}
MCA_BASE_FRAMEWORK_DECLARE(opal, rcache, "OPAL Rcache", NULL,
MCA_BASE_FRAMEWORK_DECLARE(opal, rcache, "OPAL Registration Cache", NULL,
mca_rcache_base_open, mca_rcache_base_close,
mca_rcache_base_static_components, 0);

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше