1
1

opal: rework mpool and rcache frameworks

This commit rewrites both the mpool and rcache frameworks. Summary of
changes:

 - Before this change a significant portion of the rcache
   functionality lived in mpool components. This meant that it was
   impossible to add a new memory pool to use with rdma networks
   (ugni, openib, etc) without duplicating the functionality of an
   existing mpool component. All the registration functionality has
   been removed from the mpool and placed in the rcache framework.

 - All registration cache mpools components (udreg, grdma, gpusm,
   rgpusm) have been changed to rcache components. rcaches are
   allocated and released in the same way mpool components were.

 - It is now valid to pass NULL as the resources argument when
   creating an rcache. At this time the gpusm and rgpusm components
   support this. All other rcache components require non-NULL
   resources.

 - A new mpool component has been added: hugepage. This component
   supports huge page allocations on linux.

 - Memory pools are now allocated using "hints". Each mpool component
   is queried with the hints and returns a priority. The current hints
   supported are NULL (uses posix_memalign/malloc), page_size=x (huge
   page mpool), and mpool=x.

 - The sm mpool has been moved to common/sm. This reflects that the sm
   mpool is specialized and not meant for any general
   allocations. This mpool may be moved back into the mpool framework
   if there is any objection.

 - The opal_free_list_init arguments have been updated. The unused0
   argument is not used to pass in the registration cache module. The
   mpool registration flags are now rcache registration flags.

 - All components have been updated to make use of the new framework
   interfaces.

As this commit makes significant changes to both the mpool and rcache
frameworks both versions have been bumped to 3.0.0.

Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
Nathan Hjelm 2015-11-02 12:07:08 -07:00
родитель 6d7ada9675
Коммит d4afb16f5a
127 изменённых файлов: 4624 добавлений и 5096 удалений

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
@ -9,6 +10,8 @@
* University of Stuttgart. All rights reserved. * University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -145,7 +148,7 @@ BEGIN_C_DECLS
ompi_crcp_base_btl_state_t* ompi_crcp_base_btl_state_t*
ompi_crcp_base_none_btl_prepare_src( struct mca_btl_base_module_t* btl, ompi_crcp_base_none_btl_prepare_src( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint, struct mca_btl_base_endpoint_t* endpoint,
mca_mpool_base_registration_t* registration, mca_rcache_base_registration_t* registration,
struct opal_convertor_t* convertor, struct opal_convertor_t* convertor,
size_t reserve, size_t reserve,
size_t* size, size_t* size,
@ -154,7 +157,7 @@ BEGIN_C_DECLS
ompi_crcp_base_btl_state_t* ompi_crcp_base_btl_state_t*
ompi_crcp_base_none_btl_prepare_dst( struct mca_btl_base_module_t* btl, ompi_crcp_base_none_btl_prepare_dst( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint, struct mca_btl_base_endpoint_t* endpoint,
mca_mpool_base_registration_t* registration, mca_rcache_base_registration_t* registration,
struct opal_convertor_t* convertor, struct opal_convertor_t* convertor,
size_t reserve, size_t reserve,
size_t* size, size_t* size,

Просмотреть файл

@ -336,7 +336,7 @@ ompi_crcp_base_none_btl_free( struct mca_btl_base_module_t* btl,
ompi_crcp_base_btl_state_t* ompi_crcp_base_btl_state_t*
ompi_crcp_base_none_btl_prepare_src( struct mca_btl_base_module_t* btl, ompi_crcp_base_none_btl_prepare_src( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint, struct mca_btl_base_endpoint_t* endpoint,
mca_mpool_base_registration_t* registration, mca_rcache_base_registration_t* registration,
struct opal_convertor_t* convertor, struct opal_convertor_t* convertor,
size_t reserve, size_t reserve,
size_t* size, size_t* size,
@ -349,7 +349,7 @@ ompi_crcp_base_none_btl_prepare_src( struct mca_btl_base_module_t* btl,
ompi_crcp_base_btl_state_t* ompi_crcp_base_btl_state_t*
ompi_crcp_base_none_btl_prepare_dst( struct mca_btl_base_module_t* btl, ompi_crcp_base_none_btl_prepare_dst( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint, struct mca_btl_base_endpoint_t* endpoint,
mca_mpool_base_registration_t* registration, mca_rcache_base_registration_t* registration,
struct opal_convertor_t* convertor, struct opal_convertor_t* convertor,
size_t reserve, size_t reserve,
size_t* size, size_t* size,

Просмотреть файл

@ -235,7 +235,7 @@ typedef ompi_crcp_base_btl_state_t* (*mca_crcp_base_btl_module_free_fn_t)
typedef ompi_crcp_base_btl_state_t* (*mca_crcp_base_btl_module_prepare_fn_t) typedef ompi_crcp_base_btl_state_t* (*mca_crcp_base_btl_module_prepare_fn_t)
( struct mca_btl_base_module_t* btl, ( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint, struct mca_btl_base_endpoint_t* endpoint,
mca_mpool_base_registration_t* registration, mca_rcache_base_registration_t* registration,
struct opal_convertor_t* convertor, struct opal_convertor_t* convertor,
size_t reserve, size_t reserve,
size_t* size, size_t* size,

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
@ -12,6 +13,8 @@
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science * Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -55,10 +58,7 @@ extern char *ompi_pml_base_bsend_allocator_name;
/* /*
* Routine to return pages to sub-allocator as needed * Routine to return pages to sub-allocator as needed
*/ */
static void* mca_pml_bsend_alloc_segment( static void* mca_pml_bsend_alloc_segment(void *ctx, size_t *size_inout)
struct mca_mpool_base_module_t* module,
size_t* size_inout,
mca_mpool_base_registration_t** registration)
{ {
void *addr; void *addr;
size_t size = *size_inout; size_t size = *size_inout;
@ -70,7 +70,6 @@ static void* mca_pml_bsend_alloc_segment(
addr = mca_pml_bsend_addr; addr = mca_pml_bsend_addr;
mca_pml_bsend_addr += size; mca_pml_bsend_addr += size;
*size_inout = size; *size_inout = size;
if (NULL != registration) *registration = NULL;
return addr; return addr;
} }
@ -232,7 +231,7 @@ int mca_pml_base_bsend_request_start(ompi_request_t* request)
/* allocate a buffer to hold packed message */ /* allocate a buffer to hold packed message */
sendreq->req_addr = mca_pml_bsend_allocator->alc_alloc( sendreq->req_addr = mca_pml_bsend_allocator->alc_alloc(
mca_pml_bsend_allocator, sendreq->req_bytes_packed, 0, NULL); mca_pml_bsend_allocator, sendreq->req_bytes_packed, 0);
if(NULL == sendreq->req_addr) { if(NULL == sendreq->req_addr) {
/* release resources when request is freed */ /* release resources when request is freed */
sendreq->req_base.req_pml_complete = true; sendreq->req_base.req_pml_complete = true;
@ -287,7 +286,7 @@ int mca_pml_base_bsend_request_alloc(ompi_request_t* request)
/* allocate a buffer to hold packed message */ /* allocate a buffer to hold packed message */
sendreq->req_addr = mca_pml_bsend_allocator->alc_alloc( sendreq->req_addr = mca_pml_bsend_allocator->alc_alloc(
mca_pml_bsend_allocator, sendreq->req_bytes_packed, 0, NULL); mca_pml_bsend_allocator, sendreq->req_bytes_packed, 0);
if(NULL == sendreq->req_addr) { if(NULL == sendreq->req_addr) {
/* release resources when request is freed */ /* release resources when request is freed */
sendreq->req_base.req_pml_complete = true; sendreq->req_base.req_pml_complete = true;
@ -321,7 +320,7 @@ void* mca_pml_base_bsend_request_alloc_buf( size_t length )
/* allocate a buffer to hold packed message */ /* allocate a buffer to hold packed message */
buf = mca_pml_bsend_allocator->alc_alloc( buf = mca_pml_bsend_allocator->alc_alloc(
mca_pml_bsend_allocator, length, 0, NULL); mca_pml_bsend_allocator, length, 0);
if(NULL == buf) { if(NULL == buf) {
/* release resources when request is freed */ /* release resources when request is freed */
OPAL_THREAD_UNLOCK(&mca_pml_bsend_mutex); OPAL_THREAD_UNLOCK(&mca_pml_bsend_mutex);

Просмотреть файл

@ -79,12 +79,9 @@ mca_pml_base_component_2_0_0_t mca_pml_ob1_component = {
.pmlm_finalize = mca_pml_ob1_component_fini, .pmlm_finalize = mca_pml_ob1_component_fini,
}; };
void *mca_pml_ob1_seg_alloc( struct mca_mpool_base_module_t* mpool, void *mca_pml_ob1_seg_alloc (void *ctx, size_t* size);
size_t* size,
mca_mpool_base_registration_t** registration);
void mca_pml_ob1_seg_free( struct mca_mpool_base_module_t* mpool, void mca_pml_ob1_seg_free (void *ctx, void *segment);
void* segment );
static inline int mca_pml_ob1_param_register_int( static inline int mca_pml_ob1_param_register_int(
const char* param_name, const char* param_name,
@ -354,13 +351,12 @@ int mca_pml_ob1_component_fini(void)
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
void *mca_pml_ob1_seg_alloc( struct mca_mpool_base_module_t* mpool, void *mca_pml_ob1_seg_alloc (void *ctx, size_t *size)
size_t* size, {
mca_mpool_base_registration_t** registration) {
return malloc(*size); return malloc(*size);
} }
void mca_pml_ob1_seg_free( struct mca_mpool_base_module_t* mpool, void mca_pml_ob1_seg_free (void *ctx, void *segment)
void* segment ) { {
free(segment); free(segment);
} }

Просмотреть файл

@ -81,7 +81,7 @@ do { \
buffers[0].addr = (char*) \ buffers[0].addr = (char*) \
mca_pml_ob1.allocator->alc_alloc( mca_pml_ob1.allocator, \ mca_pml_ob1.allocator->alc_alloc( mca_pml_ob1.allocator, \
buffers[0].len, \ buffers[0].len, \
0, NULL); \ 0); \
_ptr = (unsigned char*)(buffers[0].addr); \ _ptr = (unsigned char*)(buffers[0].addr); \
macro_segments[0].seg_addr.pval = buffers[0].addr; \ macro_segments[0].seg_addr.pval = buffers[0].addr; \
} \ } \

Просмотреть файл

@ -42,8 +42,8 @@ int mca_vprotocol_base_request_parasite(void)
pml_fl_save.fl_max_to_alloc, pml_fl_save.fl_max_to_alloc,
pml_fl_save.fl_num_per_alloc, pml_fl_save.fl_num_per_alloc,
pml_fl_save.fl_mpool, pml_fl_save.fl_mpool,
pml_fl_save.fl_mpool_reg_flags, pml_fl_save.fl_rcache_reg_flags,
0, pml_fl_save.fl_rcache,
pml_fl_save.item_init, pml_fl_save.item_init,
pml_fl_save.ctx); pml_fl_save.ctx);
if(OMPI_SUCCESS != ret) return ret; if(OMPI_SUCCESS != ret) return ret;
@ -71,8 +71,8 @@ int mca_vprotocol_base_request_parasite(void)
pml_fl_save.fl_max_to_alloc, pml_fl_save.fl_max_to_alloc,
pml_fl_save.fl_num_per_alloc, pml_fl_save.fl_num_per_alloc,
pml_fl_save.fl_mpool, pml_fl_save.fl_mpool,
pml_fl_save.fl_mpool_reg_flags, pml_fl_save.fl_rcache_reg_flags,
0, pml_fl_save.fl_rcache,
pml_fl_save.item_init, pml_fl_save.item_init,
pml_fl_save.ctx); pml_fl_save.ctx);
if(OMPI_SUCCESS != ret) return ret; if(OMPI_SUCCESS != ret) return ret;

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
@ -12,6 +13,8 @@
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science * Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -43,6 +46,8 @@ static const char FUNC_NAME[] = "MPI_Alloc_mem";
int MPI_Alloc_mem(MPI_Aint size, MPI_Info info, void *baseptr) int MPI_Alloc_mem(MPI_Aint size, MPI_Info info, void *baseptr)
{ {
char info_value[MPI_MAX_INFO_VAL + 1];
char *mpool_hints = NULL;
if (MPI_PARAM_CHECK) { if (MPI_PARAM_CHECK) {
OMPI_ERR_INIT_FINALIZE(FUNC_NAME); OMPI_ERR_INIT_FINALIZE(FUNC_NAME);
@ -69,7 +74,16 @@ int MPI_Alloc_mem(MPI_Aint size, MPI_Info info, void *baseptr)
OPAL_CR_ENTER_LIBRARY(); OPAL_CR_ENTER_LIBRARY();
*((void **) baseptr) = mca_mpool_base_alloc((size_t) size, (struct opal_info_t*)info); if (MPI_INFO_NULL != info) {
int flag;
(void) ompi_info_get (info, "mpool_hints", MPI_MAX_INFO_VAL, info_value, &flag);
if (flag) {
mpool_hints = info_value;
}
}
*((void **) baseptr) = mca_mpool_base_alloc ((size_t) size, (struct opal_info_t*)info,
mpool_hints);
OPAL_CR_EXIT_LIBRARY(); OPAL_CR_EXIT_LIBRARY();
if (NULL == *((void **) baseptr)) { if (NULL == *((void **) baseptr)) {
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_NO_MEM, return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_NO_MEM,

Просмотреть файл

@ -628,13 +628,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
/* Select which MPI components to use */ /* Select which MPI components to use */
if (OMPI_SUCCESS !=
(ret = mca_mpool_base_init(OPAL_ENABLE_PROGRESS_THREADS,
ompi_mpi_thread_multiple))) {
error = "mca_mpool_base_init() failed";
goto error;
}
if (OMPI_SUCCESS != if (OMPI_SUCCESS !=
(ret = mca_pml_base_select(OPAL_ENABLE_PROGRESS_THREADS, (ret = mca_pml_base_select(OPAL_ENABLE_PROGRESS_THREADS,
ompi_mpi_thread_multiple))) { ompi_mpi_thread_multiple))) {

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2006-2007 Mellanox Technologies. All rights reserved. * Copyright (c) 2006-2007 Mellanox Technologies. All rights reserved.
* Copyright (c) 2010-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010-2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved. * Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
@ -28,6 +28,9 @@
#include "opal/align.h" #include "opal/align.h"
#include "opal/util/output.h" #include "opal/util/output.h"
#include "opal/mca/mpool/mpool.h" #include "opal/mca/mpool/mpool.h"
#include "opal/mca/mpool/base/base.h"
#include "opal/mca/rcache/rcache.h"
#include "opal/util/sys_limits.h"
typedef struct opal_free_list_item_t opal_free_list_memory_t; typedef struct opal_free_list_item_t opal_free_list_memory_t;
@ -49,17 +52,22 @@ static void opal_free_list_construct(opal_free_list_t* fl)
fl->fl_payload_buffer_alignment = 0; fl->fl_payload_buffer_alignment = 0;
fl->fl_frag_class = OBJ_CLASS(opal_free_list_item_t); fl->fl_frag_class = OBJ_CLASS(opal_free_list_item_t);
fl->fl_mpool = NULL; fl->fl_mpool = NULL;
fl->fl_rcache = NULL;
/* default flags */ /* default flags */
fl->fl_mpool_reg_flags = MCA_MPOOL_FLAGS_CACHE_BYPASS | fl->fl_rcache_reg_flags = MCA_RCACHE_FLAGS_CACHE_BYPASS |
MCA_MPOOL_FLAGS_CUDA_REGISTER_MEM; MCA_RCACHE_FLAGS_CUDA_REGISTER_MEM;
fl->ctx = NULL; fl->ctx = NULL;
OBJ_CONSTRUCT(&(fl->fl_allocations), opal_list_t); OBJ_CONSTRUCT(&(fl->fl_allocations), opal_list_t);
} }
static void opal_free_list_allocation_release (opal_free_list_t *fl, opal_free_list_memory_t *fl_mem) static void opal_free_list_allocation_release (opal_free_list_t *fl, opal_free_list_memory_t *fl_mem)
{ {
if (NULL != fl->fl_rcache) {
fl->fl_rcache->rcache_deregister (fl->fl_rcache, fl_mem->registration);
}
if (NULL != fl->fl_mpool) { if (NULL != fl->fl_mpool) {
fl->fl_mpool->mpool_free (fl->fl_mpool, fl_mem->ptr, fl_mem->registration); fl->fl_mpool->mpool_free (fl->fl_mpool, fl_mem->ptr);
} else if (fl_mem->ptr) { } else if (fl_mem->ptr) {
free (fl_mem->ptr); free (fl_mem->ptr);
} }
@ -108,8 +116,9 @@ int opal_free_list_init (opal_free_list_t *flist, size_t frag_size, size_t frag_
opal_class_t *frag_class, size_t payload_buffer_size, opal_class_t *frag_class, size_t payload_buffer_size,
size_t payload_buffer_alignment, int num_elements_to_alloc, size_t payload_buffer_alignment, int num_elements_to_alloc,
int max_elements_to_alloc, int num_elements_per_alloc, int max_elements_to_alloc, int num_elements_per_alloc,
mca_mpool_base_module_t* mpool, int mpool_reg_flags, mca_mpool_base_module_t *mpool, int rcache_reg_flags,
void *unused0, opal_free_list_item_init_fn_t item_init, void *ctx) mca_rcache_base_module_t *rcache, opal_free_list_item_init_fn_t item_init,
void *ctx)
{ {
/* alignment must be more than zero and power of two */ /* alignment must be more than zero and power of two */
if (frag_alignment <= 1 || (frag_alignment & (frag_alignment - 1))) { if (frag_alignment <= 1 || (frag_alignment & (frag_alignment - 1))) {
@ -137,11 +146,12 @@ int opal_free_list_init (opal_free_list_t *flist, size_t frag_size, size_t frag_
flist->fl_max_to_alloc = max_elements_to_alloc; flist->fl_max_to_alloc = max_elements_to_alloc;
flist->fl_num_allocated = 0; flist->fl_num_allocated = 0;
flist->fl_num_per_alloc = num_elements_per_alloc; flist->fl_num_per_alloc = num_elements_per_alloc;
flist->fl_mpool = mpool; flist->fl_mpool = mpool ? mpool : mca_mpool_base_default_module;
flist->fl_rcache = rcache;
flist->fl_frag_alignment = frag_alignment; flist->fl_frag_alignment = frag_alignment;
flist->fl_payload_buffer_alignment = payload_buffer_alignment; flist->fl_payload_buffer_alignment = payload_buffer_alignment;
flist->item_init = item_init; flist->item_init = item_init;
flist->fl_mpool_reg_flags |= mpool_reg_flags; flist->fl_rcache_reg_flags |= rcache_reg_flags;
flist->ctx = ctx; flist->ctx = ctx;
if (num_elements_to_alloc) { if (num_elements_to_alloc) {
@ -153,10 +163,10 @@ int opal_free_list_init (opal_free_list_t *flist, size_t frag_size, size_t frag_
int opal_free_list_grow_st (opal_free_list_t* flist, size_t num_elements) int opal_free_list_grow_st (opal_free_list_t* flist, size_t num_elements)
{ {
unsigned char *ptr, *mpool_alloc_ptr = NULL, *payload_ptr = NULL; unsigned char *ptr, *payload_ptr = NULL;
opal_free_list_memory_t *alloc_ptr; opal_free_list_memory_t *alloc_ptr;
size_t alloc_size, head_size, elem_size = 0; size_t alloc_size, head_size, elem_size = 0, buffer_size, align;
mca_mpool_base_registration_t *reg = NULL; mca_rcache_base_registration_t *reg = NULL;
int rc = OPAL_SUCCESS; int rc = OPAL_SUCCESS;
if (flist->fl_max_to_alloc && (flist->fl_num_allocated + num_elements) > if (flist->fl_max_to_alloc && (flist->fl_num_allocated + num_elements) >
@ -170,6 +180,29 @@ int opal_free_list_grow_st (opal_free_list_t* flist, size_t num_elements)
head_size = OPAL_ALIGN(flist->fl_frag_size, flist->fl_frag_alignment, size_t); head_size = OPAL_ALIGN(flist->fl_frag_size, flist->fl_frag_alignment, size_t);
/* NTH: calculate allocation alignment first as it might change the number of elements */
if (0 != flist->fl_payload_buffer_size) {
elem_size = OPAL_ALIGN(flist->fl_payload_buffer_size,
flist->fl_payload_buffer_alignment, size_t);
/* elem_size should not be 0 here */
assert (elem_size > 0);
buffer_size = num_elements * elem_size;
align = flist->fl_payload_buffer_alignment;
if (MCA_RCACHE_FLAGS_CUDA_REGISTER_MEM & flist->fl_rcache_reg_flags) {
size_t pagesize = opal_getpagesize ();
/* CUDA cannot handle registering overlapping regions, so make
* sure each region is page sized and page aligned. */
align = OPAL_ALIGN(align, pagesize, size_t);
buffer_size = OPAL_ALIGN(buffer_size, pagesize, size_t);
/* avoid wasting space in the buffer */
num_elements = buffer_size / elem_size;
}
}
/* calculate head allocation size */ /* calculate head allocation size */
alloc_size = num_elements * head_size + sizeof(opal_free_list_memory_t) + alloc_size = num_elements * head_size + sizeof(opal_free_list_memory_t) +
flist->fl_frag_alignment; flist->fl_frag_alignment;
@ -180,37 +213,27 @@ int opal_free_list_grow_st (opal_free_list_t* flist, size_t num_elements)
} }
if (0 != flist->fl_payload_buffer_size) { if (0 != flist->fl_payload_buffer_size) {
elem_size = OPAL_ALIGN(flist->fl_payload_buffer_size,
flist->fl_payload_buffer_alignment, size_t);
/* elem_size should not be 0 here */
assert (elem_size > 0);
/* allocate the rest from the mpool (or use memalign/malloc) */ /* allocate the rest from the mpool (or use memalign/malloc) */
if(flist->fl_mpool != NULL) { payload_ptr = (unsigned char *) flist->fl_mpool->mpool_alloc(flist->fl_mpool, buffer_size, align, 0);
payload_ptr = mpool_alloc_ptr = if (NULL == payload_ptr) {
(unsigned char *) flist->fl_mpool->mpool_alloc(flist->fl_mpool,
num_elements * elem_size,
flist->fl_payload_buffer_alignment,
flist->fl_mpool_reg_flags, &reg);
} else {
#ifdef HAVE_POSIX_MEMALIGN
posix_memalign ((void **) &mpool_alloc_ptr, flist->fl_payload_buffer_alignment,
num_elements * elem_size);
payload_ptr = mpool_alloc_ptr;
#else
mpool_alloc_ptr = (unsigned char *) malloc (num_elements * elem_size +
flist->fl_payload_buffer_alignment);
payload_ptr = (unsigned char *) OPAL_ALIGN((uintptr_t)mpool_alloc_ptr,
flist->fl_payload_buffer_alignment,
uintptr_t);
#endif
}
if(NULL == mpool_alloc_ptr) {
free(alloc_ptr); free(alloc_ptr);
return OPAL_ERR_TEMP_OUT_OF_RESOURCE; return OPAL_ERR_TEMP_OUT_OF_RESOURCE;
} }
if (flist->fl_rcache) {
rc = flist->fl_rcache->rcache_register (flist->fl_rcache, payload_ptr, num_elements * elem_size,
flist->fl_rcache_reg_flags, MCA_RCACHE_ACCESS_ANY, &reg);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
free (alloc_ptr);
if (flist->fl_mpool) {
flist->fl_mpool->mpool_free (flist->fl_mpool, payload_ptr);
} else {
free (payload_ptr);
}
return rc;
}
}
} }
/* make the alloc_ptr a list item, save the chunk in the allocations list, /* make the alloc_ptr a list item, save the chunk in the allocations list,
@ -219,7 +242,7 @@ int opal_free_list_grow_st (opal_free_list_t* flist, size_t num_elements)
opal_list_append(&(flist->fl_allocations), (opal_list_item_t*)alloc_ptr); opal_list_append(&(flist->fl_allocations), (opal_list_item_t*)alloc_ptr);
alloc_ptr->registration = reg; alloc_ptr->registration = reg;
alloc_ptr->ptr = mpool_alloc_ptr; alloc_ptr->ptr = payload_ptr;
ptr = (unsigned char*)alloc_ptr + sizeof(opal_free_list_memory_t); ptr = (unsigned char*)alloc_ptr + sizeof(opal_free_list_memory_t);
ptr = OPAL_ALIGN_PTR(ptr, flist->fl_frag_alignment, unsigned char*); ptr = OPAL_ALIGN_PTR(ptr, flist->fl_frag_alignment, unsigned char*);

Просмотреть файл

@ -77,6 +77,8 @@ struct opal_free_list_t {
/** mpool to use for free list buffer allocation (posix_memalign/malloc /** mpool to use for free list buffer allocation (posix_memalign/malloc
* are used if this is NULL) */ * are used if this is NULL) */
struct mca_mpool_base_module_t *fl_mpool; struct mca_mpool_base_module_t *fl_mpool;
/** registration cache */
struct mca_rcache_base_module_t *fl_rcache;
/** Multi-threaded lock. Used when the free list is empty. */ /** Multi-threaded lock. Used when the free list is empty. */
opal_mutex_t fl_lock; opal_mutex_t fl_lock;
/** Multi-threaded condition. Used when threads are waiting on free /** Multi-threaded condition. Used when threads are waiting on free
@ -84,8 +86,8 @@ struct opal_free_list_t {
opal_condition_t fl_condition; opal_condition_t fl_condition;
/** List of free list allocation */ /** List of free list allocation */
opal_list_t fl_allocations; opal_list_t fl_allocations;
/** Flags to pass to the mpool register function */ /** Flags to pass to the rcache register function */
int fl_mpool_reg_flags; int fl_rcache_reg_flags;
/** Free list item initialization function */ /** Free list item initialization function */
opal_free_list_item_init_fn_t item_init; opal_free_list_item_init_fn_t item_init;
/** Initialization function context */ /** Initialization function context */
@ -98,7 +100,7 @@ struct mca_mpool_base_registration_t;
struct opal_free_list_item_t struct opal_free_list_item_t
{ {
opal_list_item_t super; opal_list_item_t super;
struct mca_mpool_base_registration_t *registration; struct mca_rcache_base_registration_t *registration;
void *ptr; void *ptr;
}; };
typedef struct opal_free_list_item_t opal_free_list_item_t; typedef struct opal_free_list_item_t opal_free_list_item_t;
@ -118,8 +120,8 @@ OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_free_list_item_t);
* @param max_elements_to_alloc (IN) Maximum number of elements to allocate. * @param max_elements_to_alloc (IN) Maximum number of elements to allocate.
* @param num_elements_per_alloc (IN) Number of elements to grow by per allocation. * @param num_elements_per_alloc (IN) Number of elements to grow by per allocation.
* @param mpool (IN) Optional memory pool for allocations. * @param mpool (IN) Optional memory pool for allocations.
* @param mpool_reg_flags (IN) Flags to pass to mpool registration function. * @param rcache_reg_flags (IN) Flags to pass to rcache registration function.
* @param unused0 (IN) Future. Must be NULL. * @param rcache (IN) Optional registration cache.
* @param item_init (IN) Optional item initialization function * @param item_init (IN) Optional item initialization function
* @param ctx (IN) Initialization function context. * @param ctx (IN) Initialization function context.
*/ */
@ -134,8 +136,8 @@ OPAL_DECLSPEC int opal_free_list_init (opal_free_list_t *free_list,
int max_elements_to_alloc, int max_elements_to_alloc,
int num_elements_per_alloc, int num_elements_per_alloc,
struct mca_mpool_base_module_t *mpool, struct mca_mpool_base_module_t *mpool,
int mpool_reg_flags, int rcache_reg_flags,
void *unused0, struct mca_rcache_base_module_t *rcache,
opal_free_list_item_init_fn_t item_init, opal_free_list_item_init_fn_t item_init,
void *ctx); void *ctx);

Просмотреть файл

@ -22,6 +22,8 @@
#ifndef OPAL_ALIGN_H #ifndef OPAL_ALIGN_H
#define OPAL_ALIGN_H #define OPAL_ALIGN_H
#define OPAL_DOWN_ALIGN(x,a,t) ((x) & ~(((t)(a)-1)))
#define OPAL_DOWN_ALIGN_PTR(x,a,t) ((t)OPAL_DOWN_ALIGN((uintptr_t)x, a, uintptr_t))
#define OPAL_ALIGN(x,a,t) (((x)+((t)(a)-1)) & ~(((t)(a)-1))) #define OPAL_ALIGN(x,a,t) (((x)+((t)(a)-1)) & ~(((t)(a)-1)))
#define OPAL_ALIGN_PTR(x,a,t) ((t)OPAL_ALIGN((uintptr_t)x, a, uintptr_t)) #define OPAL_ALIGN_PTR(x,a,t) ((t)OPAL_ALIGN((uintptr_t)x, a, uintptr_t))
#define OPAL_ALIGN_PAD_AMOUNT(x,s) ((~((uintptr_t)(x))+1) & ((uintptr_t)(s)-1)) #define OPAL_ALIGN_PAD_AMOUNT(x,s) ((~((uintptr_t)(x))+1) & ((uintptr_t)(s)-1))

Просмотреть файл

@ -27,7 +27,6 @@
#include "opal_config.h" #include "opal_config.h"
#include "opal/mca/mca.h" #include "opal/mca/mca.h"
#include "opal/mca/mpool/mpool.h"
BEGIN_C_DECLS BEGIN_C_DECLS
@ -40,16 +39,14 @@ struct mca_allocator_base_module_t;
typedef void* (*mca_allocator_base_module_alloc_fn_t)( typedef void* (*mca_allocator_base_module_alloc_fn_t)(
struct mca_allocator_base_module_t*, struct mca_allocator_base_module_t*,
size_t size, size_t size,
size_t align, size_t align);
mca_mpool_base_registration_t** registration);
/** /**
* The realloc function typedef * The realloc function typedef
*/ */
typedef void* (*mca_allocator_base_module_realloc_fn_t)( typedef void* (*mca_allocator_base_module_realloc_fn_t)(
struct mca_allocator_base_module_t*, struct mca_allocator_base_module_t*,
void*, size_t, void*, size_t);
mca_mpool_base_registration_t** registration);
/** /**
* Free function typedef * Free function typedef
@ -90,7 +87,7 @@ struct mca_allocator_base_module_t {
mca_allocator_base_module_finalize_fn_t alc_finalize; mca_allocator_base_module_finalize_fn_t alc_finalize;
/**< Finalize and free everything */ /**< Finalize and free everything */
/* memory pool and resources */ /* memory pool and resources */
struct mca_mpool_base_module_t* alc_mpool; void *alc_context;
}; };
/** /**
* Convenience typedef. * Convenience typedef.
@ -103,19 +100,16 @@ typedef struct mca_allocator_base_module_t mca_allocator_base_module_t;
* provided by the module to the allocator framework. * provided by the module to the allocator framework.
*/ */
typedef void* (*mca_allocator_base_component_segment_alloc_fn_t)( typedef void* (*mca_allocator_base_component_segment_alloc_fn_t)(void *ctx,
struct mca_mpool_base_module_t* module, size_t *size);
size_t* size,
mca_mpool_base_registration_t** registration);
/** /**
* A function to free memory from the control of the allocator framework * A function to free memory from the control of the allocator framework
* back to the system. This function is to be provided by the module to the * back to the system. This function is to be provided by the module to the
* allocator framework. * allocator framework.
*/ */
typedef void (*mca_allocator_base_component_segment_free_fn_t)( typedef void (*mca_allocator_base_component_segment_free_fn_t)(void *ctx,
struct mca_mpool_base_module_t* module, void *segment);
void* segment);
/** /**
@ -126,7 +120,7 @@ typedef struct mca_allocator_base_module_t*
bool enable_mpi_threads, bool enable_mpi_threads,
mca_allocator_base_component_segment_alloc_fn_t segment_alloc, mca_allocator_base_component_segment_alloc_fn_t segment_alloc,
mca_allocator_base_component_segment_free_fn_t segment_free, mca_allocator_base_component_segment_free_fn_t segment_free,
struct mca_mpool_base_module_t* mpool void *context
); );
/** /**

Просмотреть файл

@ -45,7 +45,7 @@ struct mca_allocator_base_selected_module_t {
typedef struct mca_allocator_base_selected_module_t mca_allocator_base_selected_module_t; typedef struct mca_allocator_base_selected_module_t mca_allocator_base_selected_module_t;
/** /**
* Declaces mca_mpool_base_selected_module_t as a class. * Declaces mca_allocator_base_selected_module_t as a class.
*/ */
OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_allocator_base_selected_module_t); OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_allocator_base_selected_module_t);

Просмотреть файл

@ -78,7 +78,7 @@ mca_allocator_base_module_t* mca_allocator_basic_component_init(
bool enable_mpi_threads, bool enable_mpi_threads,
mca_allocator_base_component_segment_alloc_fn_t segment_alloc, mca_allocator_base_component_segment_alloc_fn_t segment_alloc,
mca_allocator_base_component_segment_free_fn_t segment_free, mca_allocator_base_component_segment_free_fn_t segment_free,
struct mca_mpool_base_module_t* mpool) void *context)
{ {
mca_allocator_basic_module_t *module = (mca_allocator_basic_module_t *) mca_allocator_basic_module_t *module = (mca_allocator_basic_module_t *)
malloc(sizeof(mca_allocator_basic_module_t)); malloc(sizeof(mca_allocator_basic_module_t));
@ -91,7 +91,7 @@ mca_allocator_base_module_t* mca_allocator_basic_component_init(
module->super.alc_free = mca_allocator_basic_free; module->super.alc_free = mca_allocator_basic_free;
module->super.alc_compact = mca_allocator_basic_compact; module->super.alc_compact = mca_allocator_basic_compact;
module->super.alc_finalize = mca_allocator_basic_finalize; module->super.alc_finalize = mca_allocator_basic_finalize;
module->super.alc_mpool = mpool; module->super.alc_context = context;
module->seg_alloc = segment_alloc; module->seg_alloc = segment_alloc;
module->seg_free = segment_free; module->seg_free = segment_free;
OBJ_CONSTRUCT(&module->seg_list, opal_list_t); OBJ_CONSTRUCT(&module->seg_list, opal_list_t);
@ -163,8 +163,7 @@ static void mca_allocator_basic_combine_next(
void *mca_allocator_basic_alloc( void *mca_allocator_basic_alloc(
mca_allocator_base_module_t * base, mca_allocator_base_module_t * base,
size_t size, size_t size,
size_t align, size_t align)
mca_mpool_base_registration_t** registration)
{ {
mca_allocator_basic_module_t* module = (mca_allocator_basic_module_t*)base; mca_allocator_basic_module_t* module = (mca_allocator_basic_module_t*)base;
mca_allocator_basic_segment_t* seg; mca_allocator_basic_segment_t* seg;
@ -198,7 +197,7 @@ void *mca_allocator_basic_alloc(
/* request additional block */ /* request additional block */
allocated_size = size; allocated_size = size;
if(NULL == (addr = (unsigned char *)module->seg_alloc(module->super.alc_mpool, &allocated_size, registration))) { if(NULL == (addr = (unsigned char *)module->seg_alloc(module->super.alc_context, &allocated_size))) {
OPAL_THREAD_UNLOCK(&module->seg_lock); OPAL_THREAD_UNLOCK(&module->seg_lock);
return NULL; return NULL;
} }
@ -239,14 +238,13 @@ void *mca_allocator_basic_alloc(
void * mca_allocator_basic_realloc( void * mca_allocator_basic_realloc(
mca_allocator_base_module_t * base, mca_allocator_base_module_t * base,
void * ptr, void * ptr,
size_t size, size_t size)
mca_mpool_base_registration_t** registration)
{ {
unsigned char* addr = ((unsigned char*)ptr) - sizeof(size_t); unsigned char* addr = ((unsigned char*)ptr) - sizeof(size_t);
size_t alloc_size = *(size_t*)addr; size_t alloc_size = *(size_t*)addr;
if(size <= alloc_size) if(size <= alloc_size)
return ptr; return ptr;
addr = (unsigned char *)mca_allocator_basic_alloc(base,size,0,registration); addr = (unsigned char *)mca_allocator_basic_alloc(base, size, 0);
if(addr == NULL) if(addr == NULL)
return addr; return addr;
memcpy(addr,ptr,alloc_size); memcpy(addr,ptr,alloc_size);

Просмотреть файл

@ -77,7 +77,7 @@ mca_allocator_base_module_t* mca_allocator_basic_component_init(
bool enable_mpi_threads, bool enable_mpi_threads,
mca_allocator_base_component_segment_alloc_fn_t segment_alloc, mca_allocator_base_component_segment_alloc_fn_t segment_alloc,
mca_allocator_base_component_segment_free_fn_t segment_free, mca_allocator_base_component_segment_free_fn_t segment_free,
struct mca_mpool_base_module_t* module void *ctx
); );
/** /**
@ -94,8 +94,7 @@ mca_allocator_base_module_t* mca_allocator_basic_component_init(
void * mca_allocator_basic_alloc( void * mca_allocator_basic_alloc(
mca_allocator_base_module_t * mem, mca_allocator_base_module_t * mem,
size_t size, size_t size,
size_t align, size_t align);
mca_mpool_base_registration_t** registration);
/** /**
* Attempts to resize the passed region of memory into a larger or a smaller * Attempts to resize the passed region of memory into a larger or a smaller
@ -114,8 +113,7 @@ mca_allocator_base_module_t* mca_allocator_basic_component_init(
void * mca_allocator_basic_realloc( void * mca_allocator_basic_realloc(
mca_allocator_base_module_t * mem, mca_allocator_base_module_t * mem,
void * ptr, void * ptr,
size_t size, size_t size);
mca_mpool_base_registration_t** registration);
/** /**
* Frees the passed region of memory * Frees the passed region of memory

Просмотреть файл

@ -24,14 +24,13 @@
#include "opal/mca/allocator/allocator.h" #include "opal/mca/allocator/allocator.h"
#include "opal/constants.h" #include "opal/constants.h"
#include "opal/mca/allocator/bucket/allocator_bucket_alloc.h" #include "opal/mca/allocator/bucket/allocator_bucket_alloc.h"
#include "opal/mca/mpool/mpool.h" #include "opal/mca/base/mca_base_var.h"
struct mca_allocator_base_module_t* mca_allocator_bucket_module_init( struct mca_allocator_base_module_t* mca_allocator_bucket_module_init(
bool enable_mpi_threads, bool enable_mpi_threads,
mca_allocator_base_component_segment_alloc_fn_t segment_alloc, mca_allocator_base_component_segment_alloc_fn_t segment_alloc,
mca_allocator_base_component_segment_free_fn_t segment_free, mca_allocator_base_component_segment_free_fn_t segment_free,
struct mca_mpool_base_module_t* mpool void *context);
);
int mca_allocator_bucket_module_open(void); int mca_allocator_bucket_module_open(void);
@ -39,8 +38,7 @@ int mca_allocator_bucket_module_close(void);
void * mca_allocator_bucket_alloc_wrapper( void * mca_allocator_bucket_alloc_wrapper(
struct mca_allocator_base_module_t* allocator, struct mca_allocator_base_module_t* allocator,
size_t size, size_t align, size_t size, size_t align);
mca_mpool_base_registration_t** registration);
static int mca_allocator_num_buckets; static int mca_allocator_num_buckets;
@ -66,7 +64,7 @@ struct mca_allocator_base_module_t* mca_allocator_bucket_module_init(
bool enable_mpi_threads, bool enable_mpi_threads,
mca_allocator_base_component_segment_alloc_fn_t segment_alloc, mca_allocator_base_component_segment_alloc_fn_t segment_alloc,
mca_allocator_base_component_segment_free_fn_t segment_free, mca_allocator_base_component_segment_free_fn_t segment_free,
struct mca_mpool_base_module_t* mpool) void *context)
{ {
size_t alloc_size = sizeof(mca_allocator_bucket_t); size_t alloc_size = sizeof(mca_allocator_bucket_t);
mca_allocator_bucket_t * retval; mca_allocator_bucket_t * retval;
@ -87,7 +85,7 @@ struct mca_allocator_base_module_t* mca_allocator_bucket_module_init(
allocator->super.alc_free = mca_allocator_bucket_free; allocator->super.alc_free = mca_allocator_bucket_free;
allocator->super.alc_compact = mca_allocator_bucket_cleanup; allocator->super.alc_compact = mca_allocator_bucket_cleanup;
allocator->super.alc_finalize = mca_allocator_bucket_finalize; allocator->super.alc_finalize = mca_allocator_bucket_finalize;
allocator->super.alc_mpool = mpool; allocator->super.alc_context = context;
return (mca_allocator_base_module_t *) allocator; return (mca_allocator_base_module_t *) allocator;
} }
@ -111,13 +109,12 @@ int mca_allocator_bucket_module_close(void) {
void * mca_allocator_bucket_alloc_wrapper( void * mca_allocator_bucket_alloc_wrapper(
struct mca_allocator_base_module_t* allocator, struct mca_allocator_base_module_t* allocator,
size_t size, size_t size,
size_t align, size_t align)
mca_mpool_base_registration_t** registration)
{ {
if(0 == align){ if(0 == align){
return mca_allocator_bucket_alloc(allocator, size, registration); return mca_allocator_bucket_alloc(allocator, size);
} }
return mca_allocator_bucket_alloc_align(allocator, size, align, registration); return mca_allocator_bucket_alloc_align(allocator, size, align);
} }

Просмотреть файл

@ -71,10 +71,8 @@ mca_allocator_bucket_t * mca_allocator_bucket_init(
* region or NULL if there was an error * region or NULL if there was an error
* *
*/ */
void * mca_allocator_bucket_alloc( void * mca_allocator_bucket_alloc(mca_allocator_base_module_t * mem,
mca_allocator_base_module_t * mem, size_t size)
size_t size,
mca_mpool_base_registration_t** registration)
{ {
mca_allocator_bucket_t * mem_options = (mca_allocator_bucket_t *) mem; mca_allocator_bucket_t * mem_options = (mca_allocator_bucket_t *) mem;
/* initialize for the later bit shifts */ /* initialize for the later bit shifts */
@ -113,7 +111,7 @@ void * mca_allocator_bucket_alloc(
allocated_size += sizeof(mca_allocator_bucket_segment_head_t); allocated_size += sizeof(mca_allocator_bucket_segment_head_t);
/* attempt to get the memory */ /* attempt to get the memory */
segment_header = (mca_allocator_bucket_segment_head_t *) segment_header = (mca_allocator_bucket_segment_head_t *)
mem_options->get_mem_fn(mem_options->super.alc_mpool, &allocated_size, registration); mem_options->get_mem_fn(mem_options->super.alc_context, &allocated_size);
if(NULL == segment_header) { if(NULL == segment_header) {
/* release the lock */ /* release the lock */
OPAL_THREAD_UNLOCK(&(mem_options->buckets[bucket_num].lock)); OPAL_THREAD_UNLOCK(&(mem_options->buckets[bucket_num].lock));
@ -153,11 +151,8 @@ void * mca_allocator_bucket_alloc(
/* /*
* allocates an aligned region of memory * allocates an aligned region of memory
*/ */
void * mca_allocator_bucket_alloc_align( void * mca_allocator_bucket_alloc_align(mca_allocator_base_module_t * mem,
mca_allocator_base_module_t * mem, size_t size, size_t alignment)
size_t size,
size_t alignment,
mca_mpool_base_registration_t** registration)
{ {
mca_allocator_bucket_t * mem_options = (mca_allocator_bucket_t *) mem; mca_allocator_bucket_t * mem_options = (mca_allocator_bucket_t *) mem;
int bucket_num = 1; int bucket_num = 1;
@ -177,7 +172,7 @@ void * mca_allocator_bucket_alloc_align(
bucket_size = size + sizeof(mca_allocator_bucket_chunk_header_t); bucket_size = size + sizeof(mca_allocator_bucket_chunk_header_t);
allocated_size = aligned_max_size; allocated_size = aligned_max_size;
/* get some memory */ /* get some memory */
ptr = mem_options->get_mem_fn(mem_options->super.alc_mpool, &allocated_size, registration); ptr = mem_options->get_mem_fn(mem_options->super.alc_context, &allocated_size);
if(NULL == ptr) { if(NULL == ptr) {
return(NULL); return(NULL);
} }
@ -236,11 +231,8 @@ void * mca_allocator_bucket_alloc_align(
/* /*
* function to reallocate the segment of memory * function to reallocate the segment of memory
*/ */
void * mca_allocator_bucket_realloc( void * mca_allocator_bucket_realloc(mca_allocator_base_module_t * mem,
mca_allocator_base_module_t * mem, void * ptr, size_t size)
void * ptr,
size_t size,
mca_mpool_base_registration_t** registration)
{ {
mca_allocator_bucket_t * mem_options = (mca_allocator_bucket_t *) mem; mca_allocator_bucket_t * mem_options = (mca_allocator_bucket_t *) mem;
/* initialize for later bit shifts */ /* initialize for later bit shifts */
@ -261,7 +253,7 @@ void * mca_allocator_bucket_realloc(
return(ptr); return(ptr);
} }
/* we need a new space in memory, so let's get it */ /* we need a new space in memory, so let's get it */
ret_ptr = mca_allocator_bucket_alloc((mca_allocator_base_module_t *) mem_options, size, registration); ret_ptr = mca_allocator_bucket_alloc((mca_allocator_base_module_t *) mem_options, size);
if(NULL == ret_ptr) { if(NULL == ret_ptr) {
/* we were unable to get a larger area of memory */ /* we were unable to get a larger area of memory */
return(NULL); return(NULL);
@ -341,7 +333,7 @@ int mca_allocator_bucket_cleanup(mca_allocator_base_module_t * mem)
next_segment = segment->next_segment; next_segment = segment->next_segment;
/* free the memory */ /* free the memory */
if(mem_options->free_mem_fn) if(mem_options->free_mem_fn)
mem_options->free_mem_fn(mem->alc_mpool, segment); mem_options->free_mem_fn(mem->alc_context, segment);
segment = next_segment; segment = next_segment;
} }
mem_options->buckets[i].free_chunk = NULL; mem_options->buckets[i].free_chunk = NULL;
@ -378,7 +370,7 @@ int mca_allocator_bucket_cleanup(mca_allocator_base_module_t * mem)
*segment_header = segment->next_segment; *segment_header = segment->next_segment;
/* free the memory */ /* free the memory */
if(mem_options->free_mem_fn) if(mem_options->free_mem_fn)
mem_options->free_mem_fn(mem->alc_mpool, segment); mem_options->free_mem_fn(mem->alc_context, segment);
} else { } else {
/* go to next segment */ /* go to next segment */
segment_header = &((*segment_header)->next_segment); segment_header = &((*segment_header)->next_segment);

Просмотреть файл

@ -1,4 +1,5 @@
/** /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
* Corporation. All rights reserved. * Corporation. All rights reserved.
@ -9,6 +10,8 @@
* University of Stuttgart. All rights reserved. * University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reseved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -131,8 +134,7 @@ typedef struct mca_allocator_bucket_t mca_allocator_bucket_t;
*/ */
void * mca_allocator_bucket_alloc( void * mca_allocator_bucket_alloc(
mca_allocator_base_module_t * mem, mca_allocator_base_module_t * mem,
size_t size, size_t size);
mca_mpool_base_registration_t** registration);
/** /**
* Accepts a request for memory in a specific region defined by the * Accepts a request for memory in a specific region defined by the
@ -152,8 +154,7 @@ typedef struct mca_allocator_bucket_t mca_allocator_bucket_t;
void * mca_allocator_bucket_alloc_align( void * mca_allocator_bucket_alloc_align(
mca_allocator_base_module_t * mem, mca_allocator_base_module_t * mem,
size_t size, size_t size,
size_t alignment, size_t alignment);
mca_mpool_base_registration_t** registration);
/** /**
* Attempts to resize the passed region of memory into a larger or a smaller * Attempts to resize the passed region of memory into a larger or a smaller
@ -172,8 +173,7 @@ typedef struct mca_allocator_bucket_t mca_allocator_bucket_t;
void * mca_allocator_bucket_realloc( void * mca_allocator_bucket_realloc(
mca_allocator_base_module_t * mem, mca_allocator_base_module_t * mem,
void * ptr, void * ptr,
size_t size, size_t size);
mca_mpool_base_registration_t** registration);
/** /**
* Frees the passed region of memory * Frees the passed region of memory

Просмотреть файл

@ -218,6 +218,12 @@ static int group_register (const char *project_name, const char *framework_name,
return -1; return -1;
} }
/* avoid groups of the form opal_opal, ompi_ompi, etc */
if (NULL != project_name && NULL != framework_name &&
(0 == strcmp (project_name, framework_name))) {
project_name = NULL;
}
group_id = group_find (project_name, framework_name, component_name, true); group_id = group_find (project_name, framework_name, component_name, true);
if (0 <= group_id) { if (0 <= group_id) {
ret = mca_base_var_group_get_internal (group_id, &group, true); ret = mca_base_var_group_get_internal (group_id, &group, true);

Просмотреть файл

@ -122,6 +122,7 @@
#include "opal/datatype/opal_convertor.h" #include "opal/datatype/opal_convertor.h"
#include "opal/mca/mca.h" #include "opal/mca/mca.h"
#include "opal/mca/mpool/mpool.h" #include "opal/mca/mpool/mpool.h"
#include "opal/mca/rcache/rcache.h"
#include "opal/mca/crs/crs.h" #include "opal/mca/crs/crs.h"
#include "opal/mca/crs/base/base.h" #include "opal/mca/crs/base/base.h"
@ -256,23 +257,23 @@ enum {
/** Allow local write on the registered region. If a region is registered /** Allow local write on the registered region. If a region is registered
* with this flag the registration can be used as the local handle for a * with this flag the registration can be used as the local handle for a
* btl_get operation. */ * btl_get operation. */
MCA_BTL_REG_FLAG_LOCAL_WRITE = MCA_MPOOL_ACCESS_LOCAL_WRITE, MCA_BTL_REG_FLAG_LOCAL_WRITE = MCA_RCACHE_ACCESS_LOCAL_WRITE,
/** Allow remote read on the registered region. If a region is registered /** Allow remote read on the registered region. If a region is registered
* with this flag the registration can be used as the remote handle for a * with this flag the registration can be used as the remote handle for a
* btl_get operation. */ * btl_get operation. */
MCA_BTL_REG_FLAG_REMOTE_READ = MCA_MPOOL_ACCESS_REMOTE_READ, MCA_BTL_REG_FLAG_REMOTE_READ = MCA_RCACHE_ACCESS_REMOTE_READ,
/** Allow remote write on the registered region. If a region is registered /** Allow remote write on the registered region. If a region is registered
* with this flag the registration can be used as the remote handle for a * with this flag the registration can be used as the remote handle for a
* btl_put operation. */ * btl_put operation. */
MCA_BTL_REG_FLAG_REMOTE_WRITE = MCA_MPOOL_ACCESS_REMOTE_WRITE, MCA_BTL_REG_FLAG_REMOTE_WRITE = MCA_RCACHE_ACCESS_REMOTE_WRITE,
/** Allow remote atomic operations on the registered region. If a region is /** Allow remote atomic operations on the registered region. If a region is
* registered with this flag the registration can be used as the remote * registered with this flag the registration can be used as the remote
* handle for a btl_atomic_op or btl_atomic_fop operation. */ * handle for a btl_atomic_op or btl_atomic_fop operation. */
MCA_BTL_REG_FLAG_REMOTE_ATOMIC = MCA_MPOOL_ACCESS_REMOTE_ATOMIC, MCA_BTL_REG_FLAG_REMOTE_ATOMIC = MCA_RCACHE_ACCESS_REMOTE_ATOMIC,
/** Allow any btl operation on the registered region. If a region is registered /** Allow any btl operation on the registered region. If a region is registered
* with this flag the registration can be used as the local or remote handle for * with this flag the registration can be used as the local or remote handle for
* any btl operation. */ * any btl operation. */
MCA_BTL_REG_FLAG_ACCESS_ANY = MCA_MPOOL_ACCESS_ANY, MCA_BTL_REG_FLAG_ACCESS_ANY = MCA_RCACHE_ACCESS_ANY,
#if OPAL_CUDA_GDR_SUPPORT #if OPAL_CUDA_GDR_SUPPORT
/** Region is in GPU memory */ /** Region is in GPU memory */
MCA_BTL_REG_FLAG_CUDA_GPU_MEM = 0x00010000, MCA_BTL_REG_FLAG_CUDA_GPU_MEM = 0x00010000,

Просмотреть файл

@ -57,7 +57,7 @@
#include "opal/datatype/opal_convertor.h" #include "opal/datatype/opal_convertor.h"
#include "opal/mca/mpool/base/base.h" #include "opal/mca/mpool/base/base.h"
#include "opal/mca/mpool/mpool.h" #include "opal/mca/mpool/mpool.h"
#include "opal/mca/mpool/grdma/mpool_grdma.h" #include "opal/mca/rcache/rcache.h"
#if OPAL_CUDA_SUPPORT #if OPAL_CUDA_SUPPORT
#include "opal/datatype/opal_datatype_cuda.h" #include "opal/datatype/opal_datatype_cuda.h"
@ -733,7 +733,7 @@ static int prepare_device_for_use (mca_btl_openib_device_t *device)
mca_btl_openib_component.buffer_alignment, mca_btl_openib_component.buffer_alignment,
mca_btl_openib_component.ib_free_list_num, -1, mca_btl_openib_component.ib_free_list_num, -1,
mca_btl_openib_component.ib_free_list_inc, mca_btl_openib_component.ib_free_list_inc,
device->mpool, 0, NULL, mca_btl_openib_frag_init, device->mpool, 0, device->rcache, mca_btl_openib_frag_init,
init_data); init_data);
if (OPAL_SUCCESS != rc) { if (OPAL_SUCCESS != rc) {
/* If we're "out of memory", this usually means that we ran /* If we're "out of memory", this usually means that we ran
@ -774,7 +774,7 @@ static int prepare_device_for_use (mca_btl_openib_device_t *device)
mca_btl_openib_component.ib_free_list_num, mca_btl_openib_component.ib_free_list_num,
mca_btl_openib_component.ib_free_list_max, mca_btl_openib_component.ib_free_list_max,
mca_btl_openib_component.ib_free_list_inc, mca_btl_openib_component.ib_free_list_inc,
device->mpool, 0, NULL, mca_btl_openib_frag_init, device->mpool, 0, device->rcache, mca_btl_openib_frag_init,
init_data); init_data);
if (OPAL_SUCCESS != rc) { if (OPAL_SUCCESS != rc) {
/* If we're "out of memory", this usually means that we /* If we're "out of memory", this usually means that we
@ -807,7 +807,7 @@ static int prepare_device_for_use (mca_btl_openib_device_t *device)
mca_btl_openib_component.ib_free_list_num, mca_btl_openib_component.ib_free_list_num,
mca_btl_openib_component.ib_free_list_max, mca_btl_openib_component.ib_free_list_max,
mca_btl_openib_component.ib_free_list_inc, mca_btl_openib_component.ib_free_list_inc,
device->mpool, 0, NULL, mca_btl_openib_frag_init, device->mpool, 0, device->rcache, mca_btl_openib_frag_init,
init_data)) { init_data)) {
rc = OPAL_ERROR; rc = OPAL_ERROR;
goto exit; goto exit;
@ -1903,6 +1903,7 @@ static mca_btl_base_registration_handle_t *mca_btl_openib_register_mem (mca_btl_
mca_btl_base_endpoint_t *endpoint, mca_btl_base_endpoint_t *endpoint,
void *base, size_t size, uint32_t flags) void *base, size_t size, uint32_t flags)
{ {
mca_btl_openib_module_t *openib_module = (mca_btl_openib_module_t *) btl;
mca_btl_openib_reg_t *reg; mca_btl_openib_reg_t *reg;
uint32_t mflags = 0; uint32_t mflags = 0;
int access_flags = flags & MCA_BTL_REG_FLAG_ACCESS_ANY; int access_flags = flags & MCA_BTL_REG_FLAG_ACCESS_ANY;
@ -1910,12 +1911,12 @@ static mca_btl_base_registration_handle_t *mca_btl_openib_register_mem (mca_btl_
#if OPAL_CUDA_GDR_SUPPORT #if OPAL_CUDA_GDR_SUPPORT
if (flags & MCA_BTL_REG_FLAG_CUDA_GPU_MEM) { if (flags & MCA_BTL_REG_FLAG_CUDA_GPU_MEM) {
mflags |= MCA_MPOOL_FLAGS_CUDA_GPU_MEM; mflags |= MCA_RCACHE_FLAGS_CUDA_GPU_MEM;
} }
#endif /* OPAL_CUDA_GDR_SUPPORT */ #endif /* OPAL_CUDA_GDR_SUPPORT */
rc = btl->btl_mpool->mpool_register (btl->btl_mpool, base, size, mflags, access_flags, rc = openib_module->device->rcache->rcache_register (openib_module->device->rcache, base, size, mflags,
(mca_mpool_base_registration_t **) &reg); access_flags, (mca_rcache_base_registration_t **) &reg);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc || NULL == reg)) { if (OPAL_UNLIKELY(OPAL_SUCCESS != rc || NULL == reg)) {
return NULL; return NULL;
} }
@ -1925,9 +1926,10 @@ static mca_btl_base_registration_handle_t *mca_btl_openib_register_mem (mca_btl_
static int mca_btl_openib_deregister_mem (mca_btl_base_module_t *btl, mca_btl_base_registration_handle_t *handle) static int mca_btl_openib_deregister_mem (mca_btl_base_module_t *btl, mca_btl_base_registration_handle_t *handle)
{ {
mca_btl_openib_module_t *openib_module = (mca_btl_openib_module_t *) btl;
mca_btl_openib_reg_t *reg = (mca_btl_openib_reg_t *)((intptr_t) handle - offsetof (mca_btl_openib_reg_t, btl_handle)); mca_btl_openib_reg_t *reg = (mca_btl_openib_reg_t *)((intptr_t) handle - offsetof (mca_btl_openib_reg_t, btl_handle));
btl->btl_mpool->mpool_deregister (btl->btl_mpool, (mca_mpool_base_registration_t *) reg); openib_module->device->rcache->rcache_deregister (openib_module->device->rcache, (mca_rcache_base_registration_t *) reg);
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }

Просмотреть файл

@ -45,6 +45,7 @@
#include "opal/mca/event/event.h" #include "opal/mca/event/event.h"
#include "opal/threads/threads.h" #include "opal/threads/threads.h"
#include "opal/mca/btl/btl.h" #include "opal/mca/btl/btl.h"
#include "opal/mca/rcache/rcache.h"
#include "opal/mca/mpool/mpool.h" #include "opal/mca/mpool/mpool.h"
#include "opal/mca/btl/base/btl_base_error.h" #include "opal/mca/btl/base/btl_base_error.h"
#include "opal/mca/btl/base/base.h" #include "opal/mca/btl/base/base.h"
@ -184,8 +185,11 @@ struct mca_btl_openib_component_t {
opal_mutex_t ib_lock; opal_mutex_t ib_lock;
/**< lock for accessing module state */ /**< lock for accessing module state */
char* ib_mpool_name; char* ib_mpool_hints;
/**< name of ib memory pool */ /**< hints for selecting an mpool component */
char *ib_rcache_name;
/**< name of ib registration cache */
uint8_t num_pp_qps; /**< number of pp qp's */ uint8_t num_pp_qps; /**< number of pp qp's */
uint8_t num_srq_qps; /**< number of srq qp's */ uint8_t num_srq_qps; /**< number of srq qp's */
@ -374,6 +378,7 @@ typedef struct mca_btl_openib_device_t {
struct ibv_cq *ib_cq[2]; struct ibv_cq *ib_cq[2];
uint32_t cq_size[2]; uint32_t cq_size[2];
mca_mpool_base_module_t *mpool; mca_mpool_base_module_t *mpool;
mca_rcache_base_module_t *rcache;
/* MTU for this device */ /* MTU for this device */
uint32_t mtu; uint32_t mtu;
/* Whether this device supports eager RDMA */ /* Whether this device supports eager RDMA */
@ -502,7 +507,7 @@ struct mca_btl_base_registration_handle_t {
}; };
struct mca_btl_openib_reg_t { struct mca_btl_openib_reg_t {
mca_mpool_base_registration_t base; mca_rcache_base_registration_t base;
struct ibv_mr *mr; struct ibv_mr *mr;
mca_btl_base_registration_handle_t btl_handle; mca_btl_base_registration_handle_t btl_handle;
}; };

Просмотреть файл

@ -67,7 +67,8 @@
#include "opal/mca/btl/btl.h" #include "opal/mca/btl/btl.h"
#include "opal/mca/btl/base/base.h" #include "opal/mca/btl/base/base.h"
#include "opal/mca/mpool/base/base.h" #include "opal/mca/mpool/base/base.h"
#include "opal/mca/mpool/grdma/mpool_grdma.h" #include "opal/mca/rcache/rcache.h"
#include "opal/mca/rcache/base/base.h"
#include "opal/mca/common/cuda/common_cuda.h" #include "opal/mca/common/cuda/common_cuda.h"
#include "opal/mca/common/verbs/common_verbs.h" #include "opal/mca/common/verbs/common_verbs.h"
#include "opal/runtime/opal_params.h" #include "opal/runtime/opal_params.h"
@ -512,27 +513,27 @@ static void btl_openib_control(mca_btl_base_module_t* btl,
} }
} }
static int openib_reg_mr(void *reg_data, void *base, size_t size, static int openib_reg_mr (void *reg_data, void *base, size_t size,
mca_mpool_base_registration_t *reg) mca_rcache_base_registration_t *reg)
{ {
mca_btl_openib_device_t *device = (mca_btl_openib_device_t*)reg_data; mca_btl_openib_device_t *device = (mca_btl_openib_device_t*)reg_data;
mca_btl_openib_reg_t *openib_reg = (mca_btl_openib_reg_t*)reg; mca_btl_openib_reg_t *openib_reg = (mca_btl_openib_reg_t*)reg;
enum ibv_access_flags access_flag = 0; enum ibv_access_flags access_flag = 0;
if (reg->access_flags & MCA_MPOOL_ACCESS_REMOTE_READ) { if (reg->access_flags & MCA_RCACHE_ACCESS_REMOTE_READ) {
access_flag |= IBV_ACCESS_REMOTE_READ; access_flag |= IBV_ACCESS_REMOTE_READ;
} }
if (reg->access_flags & MCA_MPOOL_ACCESS_REMOTE_WRITE) { if (reg->access_flags & MCA_RCACHE_ACCESS_REMOTE_WRITE) {
access_flag |= IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE; access_flag |= IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE;
} }
if (reg->access_flags & MCA_MPOOL_ACCESS_LOCAL_WRITE) { if (reg->access_flags & MCA_RCACHE_ACCESS_LOCAL_WRITE) {
access_flag |= IBV_ACCESS_LOCAL_WRITE; access_flag |= IBV_ACCESS_LOCAL_WRITE;
} }
#if HAVE_DECL_IBV_ATOMIC_HCA #if HAVE_DECL_IBV_ATOMIC_HCA
if (reg->access_flags & MCA_MPOOL_ACCESS_REMOTE_ATOMIC) { if (reg->access_flags & MCA_RCACHE_ACCESS_REMOTE_ATOMIC) {
access_flag |= IBV_ACCESS_REMOTE_ATOMIC | IBV_ACCESS_LOCAL_WRITE; access_flag |= IBV_ACCESS_REMOTE_ATOMIC | IBV_ACCESS_LOCAL_WRITE;
} }
#endif #endif
@ -545,7 +546,7 @@ static int openib_reg_mr(void *reg_data, void *base, size_t size,
device->mem_reg_active += size; device->mem_reg_active += size;
#if HAVE_DECL_IBV_ACCESS_SO #if HAVE_DECL_IBV_ACCESS_SO
if (reg->flags & MCA_MPOOL_FLAGS_SO_MEM) { if (reg->flags & MCA_RCACHE_FLAGS_SO_MEM) {
access_flag |= IBV_ACCESS_SO; access_flag |= IBV_ACCESS_SO;
} }
#endif #endif
@ -567,16 +568,16 @@ static int openib_reg_mr(void *reg_data, void *base, size_t size,
(int) (reg->bound - reg->base + 1), reg->flags)); (int) (reg->bound - reg->base + 1), reg->flags));
#if OPAL_CUDA_SUPPORT #if OPAL_CUDA_SUPPORT
if (reg->flags & MCA_MPOOL_FLAGS_CUDA_REGISTER_MEM) { if (reg->flags & MCA_RCACHE_FLAGS_CUDA_REGISTER_MEM) {
mca_common_cuda_register(base, size, mca_common_cuda_register (base, size,
openib_reg->base.mpool->mpool_component->mpool_version.mca_component_name); openib_reg->base.rcache->rcache_component->rcache_version.mca_component_name);
} }
#endif #endif
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
static int openib_dereg_mr(void *reg_data, mca_mpool_base_registration_t *reg) static int openib_dereg_mr(void *reg_data, mca_rcache_base_registration_t *reg)
{ {
mca_btl_openib_device_t *device = (mca_btl_openib_device_t*)reg_data; mca_btl_openib_device_t *device = (mca_btl_openib_device_t*)reg_data;
mca_btl_openib_reg_t *openib_reg = (mca_btl_openib_reg_t*)reg; mca_btl_openib_reg_t *openib_reg = (mca_btl_openib_reg_t*)reg;
@ -593,9 +594,9 @@ static int openib_dereg_mr(void *reg_data, mca_mpool_base_registration_t *reg)
} }
#if OPAL_CUDA_SUPPORT #if OPAL_CUDA_SUPPORT
if (reg->flags & MCA_MPOOL_FLAGS_CUDA_REGISTER_MEM) { if (reg->flags & MCA_RCACHE_FLAGS_CUDA_REGISTER_MEM) {
mca_common_cuda_unregister(openib_reg->base.base, mca_common_cuda_unregister(openib_reg->base.base,
openib_reg->base.mpool->mpool_component->mpool_version.mca_component_name); openib_reg->base.rcache->rcache_component->rcache_version.mca_component_name);
} }
#endif #endif
@ -878,6 +879,7 @@ static void device_construct(mca_btl_openib_device_t *device)
device->ib_dev_context = NULL; device->ib_dev_context = NULL;
device->ib_pd = NULL; device->ib_pd = NULL;
device->mpool = NULL; device->mpool = NULL;
device->rcache = NULL;
#if OPAL_ENABLE_PROGRESS_THREADS == 1 #if OPAL_ENABLE_PROGRESS_THREADS == 1
device->ib_channel = NULL; device->ib_channel = NULL;
#endif #endif
@ -960,8 +962,8 @@ static void device_destruct(mca_btl_openib_device_t *device)
} }
} }
if (OPAL_SUCCESS != mca_mpool_base_module_destroy(device->mpool)) { if (OPAL_SUCCESS != mca_rcache_base_module_destroy (device->rcache)) {
BTL_VERBOSE(("Failed to release mpool")); BTL_VERBOSE(("failed to release registration cache"));
goto device_error; goto device_error;
} }
@ -1590,7 +1592,7 @@ static uint64_t calculate_max_reg (const char *device_name)
static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev) static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
{ {
struct mca_mpool_base_resources_t mpool_resources; mca_rcache_base_resources_t rcache_resources;
mca_btl_openib_device_t *device; mca_btl_openib_device_t *device;
uint8_t i, k = 0; uint8_t i, k = 0;
int ret = -1, port_cnt; int ret = -1, port_cnt;
@ -1813,20 +1815,25 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
"eager RDMA and progress threads", true); "eager RDMA and progress threads", true);
} }
asprintf (&mpool_resources.pool_name, "verbs.%" PRIu64, device->ib_dev_attr.node_guid); asprintf (&rcache_resources.cache_name, "verbs.%" PRIu64, device->ib_dev_attr.node_guid);
mpool_resources.reg_data = (void*)device; rcache_resources.reg_data = (void*)device;
mpool_resources.sizeof_reg = sizeof(mca_btl_openib_reg_t); rcache_resources.sizeof_reg = sizeof(mca_btl_openib_reg_t);
mpool_resources.register_mem = openib_reg_mr; rcache_resources.register_mem = openib_reg_mr;
mpool_resources.deregister_mem = openib_dereg_mr; rcache_resources.deregister_mem = openib_dereg_mr;
device->mpool = device->rcache =
mca_mpool_base_module_create(mca_btl_openib_component.ib_mpool_name, mca_rcache_base_module_create (mca_btl_openib_component.ib_rcache_name,
device, &mpool_resources); device, &rcache_resources);
if(NULL == device->mpool){ if (NULL == device->rcache) {
/* Don't print an error message here -- we'll get one from /* Don't print an error message here -- we'll get one from
mpool_create anyway */ mpool_create anyway */
goto error; goto error;
} }
device->mpool = mca_mpool_base_module_lookup (mca_btl_openib_component.ib_mpool_hints);
if (NULL == device->mpool) {
goto error;
}
#if OPAL_ENABLE_PROGRESS_THREADS #if OPAL_ENABLE_PROGRESS_THREADS
device->ib_channel = ibv_create_comp_channel(device->ib_dev_context); device->ib_channel = ibv_create_comp_channel(device->ib_dev_context);
if (NULL == device->ib_channel) { if (NULL == device->ib_channel) {
@ -2223,9 +2230,6 @@ error:
ibv_destroy_comp_channel(device->ib_channel); ibv_destroy_comp_channel(device->ib_channel);
} }
#endif #endif
if (device->mpool) {
mca_mpool_base_module_destroy(device->mpool);
}
if (device->ib_pd) { if (device->ib_pd) {
ibv_dealloc_pd(device->ib_pd); ibv_dealloc_pd(device->ib_pd);

Просмотреть файл

@ -20,6 +20,7 @@ BEGIN_C_DECLS
struct mca_btl_openib_eager_rdma_local_t { struct mca_btl_openib_eager_rdma_local_t {
opal_ptr_t base; /**< buffer for RDMAing eager messages */ opal_ptr_t base; /**< buffer for RDMAing eager messages */
void *alloc_base; /**< allocated base */
mca_btl_openib_recv_frag_t *frags; mca_btl_openib_recv_frag_t *frags;
mca_btl_openib_reg_t *reg; mca_btl_openib_reg_t *reg;
uint16_t head; /**< RDMA buffer to poll */ uint16_t head; /**< RDMA buffer to poll */

Просмотреть файл

@ -347,14 +347,17 @@ static void mca_btl_openib_endpoint_destruct(mca_btl_base_endpoint_t* endpoint)
* was not in "connect" or "bad" flow (failed to allocate memory) * was not in "connect" or "bad" flow (failed to allocate memory)
* and changed the pointer back to NULL * and changed the pointer back to NULL
*/ */
if(!opal_atomic_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, NULL, if(!opal_atomic_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, NULL, (void*)1)) {
(void*)1)) { if (NULL != endpoint->eager_rdma_local.reg) {
if ((void*)1 != endpoint->eager_rdma_local.base.pval && endpoint->endpoint_btl->device->rcache->rcache_deregister (endpoint->endpoint_btl->device->rcache,
NULL != endpoint->eager_rdma_local.base.pval) { &endpoint->eager_rdma_local.reg->base);
endpoint->endpoint_btl->super.btl_mpool->mpool_free(endpoint->endpoint_btl->super.btl_mpool, endpoint->eager_rdma_local.reg = NULL;
endpoint->eager_rdma_local.base.pval, }
(mca_mpool_base_registration_t*)endpoint->eager_rdma_local.reg);
pval_clean=true; void *alloc_base = opal_atomic_swap_ptr (&endpoint->eager_rdma_local.alloc_base, NULL);
if (alloc_base) {
endpoint->endpoint_btl->super.btl_mpool->mpool_free (endpoint->endpoint_btl->super.btl_mpool, alloc_base);
pval_clean = true;
} }
} else { } else {
pval_clean=true; pval_clean=true;
@ -861,10 +864,10 @@ void mca_btl_openib_endpoint_connect_eager_rdma(
mca_btl_openib_endpoint_t* endpoint) mca_btl_openib_endpoint_t* endpoint)
{ {
mca_btl_openib_module_t* openib_btl = endpoint->endpoint_btl; mca_btl_openib_module_t* openib_btl = endpoint->endpoint_btl;
char *buf; char *buf, *alloc_base;
mca_btl_openib_recv_frag_t *headers_buf; mca_btl_openib_recv_frag_t *headers_buf;
int i; int i, rc;
uint32_t flag = MCA_MPOOL_FLAGS_CACHE_BYPASS; uint32_t flag = MCA_RCACHE_FLAGS_CACHE_BYPASS;
/* Set local rdma pointer to 1 temporarily so other threads will not try /* Set local rdma pointer to 1 temporarily so other threads will not try
* to enter the function */ * to enter the function */
@ -890,19 +893,26 @@ void mca_btl_openib_endpoint_connect_eager_rdma(
The following flag will be interpreted and the appropriate The following flag will be interpreted and the appropriate
steps will be taken when the memory is registered in steps will be taken when the memory is registered in
openib_reg_mr(). */ openib_reg_mr(). */
flag |= MCA_MPOOL_FLAGS_SO_MEM; flag |= MCA_RCACHE_FLAGS_SO_MEM;
#endif #endif
buf = (char *) openib_btl->super.btl_mpool->mpool_alloc(openib_btl->super.btl_mpool, alloc_base = buf = (char *) openib_btl->super.btl_mpool->mpool_alloc(openib_btl->super.btl_mpool,
openib_btl->eager_rdma_frag_size * openib_btl->eager_rdma_frag_size *
mca_btl_openib_component.eager_rdma_num, mca_btl_openib_component.eager_rdma_num,
mca_btl_openib_component.buffer_alignment, mca_btl_openib_component.buffer_alignment,
flag, 0);
(mca_mpool_base_registration_t**)&endpoint->eager_rdma_local.reg);
if(!buf) if(!buf)
goto free_headers_buf; goto free_headers_buf;
rc = openib_btl->device->rcache->rcache_register (openib_btl->device->rcache, buf, openib_btl->eager_rdma_frag_size *
mca_btl_openib_component.eager_rdma_num, flag, MCA_RCACHE_ACCESS_ANY,
(mca_rcache_base_registration_t**)&endpoint->eager_rdma_local.reg);
if (OPAL_SUCCESS != rc) {
openib_btl->super.btl_mpool->mpool_free (openib_btl->super.btl_mpool, alloc_base);
goto free_headers_buf;
}
buf = buf + openib_btl->eager_rdma_frag_size - buf = buf + openib_btl->eager_rdma_frag_size -
sizeof(mca_btl_openib_footer_t) - openib_btl->super.btl_eager_limit - sizeof(mca_btl_openib_footer_t) - openib_btl->super.btl_eager_limit -
sizeof(mca_btl_openib_header_t); sizeof(mca_btl_openib_header_t);
@ -913,7 +923,7 @@ void mca_btl_openib_endpoint_connect_eager_rdma(
mca_btl_openib_frag_init_data_t init_data; mca_btl_openib_frag_init_data_t init_data;
item = (opal_free_list_item_t*)&headers_buf[i]; item = (opal_free_list_item_t*)&headers_buf[i];
item->registration = (mca_mpool_base_registration_t *)endpoint->eager_rdma_local.reg; item->registration = (mca_rcache_base_registration_t *)endpoint->eager_rdma_local.reg;
item->ptr = buf + i * openib_btl->eager_rdma_frag_size; item->ptr = buf + i * openib_btl->eager_rdma_frag_size;
OBJ_CONSTRUCT(item, mca_btl_openib_recv_frag_t); OBJ_CONSTRUCT(item, mca_btl_openib_recv_frag_t);
@ -941,6 +951,7 @@ void mca_btl_openib_endpoint_connect_eager_rdma(
/* set local rdma pointer to real value */ /* set local rdma pointer to real value */
(void)opal_atomic_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, (void)opal_atomic_cmpset_ptr(&endpoint->eager_rdma_local.base.pval,
(void*)1, buf); (void*)1, buf);
endpoint->eager_rdma_local.alloc_base = alloc_base;
if(mca_btl_openib_endpoint_send_eager_rdma(endpoint) == OPAL_SUCCESS) { if(mca_btl_openib_endpoint_send_eager_rdma(endpoint) == OPAL_SUCCESS) {
mca_btl_openib_device_t *device = endpoint->endpoint_btl->device; mca_btl_openib_device_t *device = endpoint->endpoint_btl->device;
@ -957,8 +968,9 @@ void mca_btl_openib_endpoint_connect_eager_rdma(
return; return;
} }
openib_btl->super.btl_mpool->mpool_free(openib_btl->super.btl_mpool, openib_btl->device->rcache->rcache_deregister (openib_btl->device->rcache,
buf, (mca_mpool_base_registration_t*)endpoint->eager_rdma_local.reg); (mca_rcache_base_registration_t*)endpoint->eager_rdma_local.reg);
openib_btl->super.btl_mpool->mpool_free(openib_btl->super.btl_mpool, buf);
free_headers_buf: free_headers_buf:
free(headers_buf); free(headers_buf);
unlock_rdma_local: unlock_rdma_local:

Просмотреть файл

@ -316,9 +316,12 @@ int btl_openib_register_mca_params(void)
"(must be >= 1)", "(must be >= 1)",
32, &mca_btl_openib_component.ib_free_list_inc, 32, &mca_btl_openib_component.ib_free_list_inc,
REGINT_GE_ONE)); REGINT_GE_ONE));
CHECK(reg_string("mpool", NULL, CHECK(reg_string("mpool_hints", NULL, "hints for selecting a memory pool (default: none)",
"Name of the memory pool to be used (it is unlikely that you will ever want to change this)", NULL, &mca_btl_openib_component.ib_mpool_hints,
"grdma", &mca_btl_openib_component.ib_mpool_name, 0));
CHECK(reg_string("rcache", NULL,
"Name of the registration cache to be used (it is unlikely that you will ever want to change this)",
"grdma", &mca_btl_openib_component.ib_rcache_name,
0)); 0));
CHECK(reg_int("reg_mru_len", NULL, CHECK(reg_int("reg_mru_len", NULL,
"Length of the registration cache most recently used list " "Length of the registration cache most recently used list "

Просмотреть файл

@ -74,7 +74,6 @@
#include "btl_openib_async.h" #include "btl_openib_async.h"
#include "connect/connect.h" #include "connect/connect.h"
#include "opal/mca/mpool/grdma/mpool_grdma.h"
#include "opal/util/sys_limits.h" #include "opal/util/sys_limits.h"
#if (ENABLE_DYNAMIC_SL) #if (ENABLE_DYNAMIC_SL)
@ -1367,7 +1366,7 @@ static int udcm_rc_qp_create_one(udcm_module_t *m, mca_btl_base_endpoint_t* lcl_
&init_attr))) { &init_attr))) {
/* NTH: this process may be out of registered memory. try evicting an item from /* NTH: this process may be out of registered memory. try evicting an item from
the lru of this btl's mpool */ the lru of this btl's mpool */
if (false == mca_mpool_grdma_evict (m->btl->super.btl_mpool)) { if (false == m->btl->device->rcache->rcache_evict (m->btl->device->rcache)) {
break; break;
} }
} }
@ -1378,7 +1377,7 @@ static int udcm_rc_qp_create_one(udcm_module_t *m, mca_btl_base_endpoint_t* lcl_
&init_attr))) { &init_attr))) {
/* NTH: this process may be out of registered memory. try evicting an item from /* NTH: this process may be out of registered memory. try evicting an item from
the lru of this btl's mpool */ the lru of this btl's mpool */
if (false == mca_mpool_grdma_evict (m->btl->super.btl_mpool)) { if (false == m->btl->device->rcache->rcache_evict (m->btl->device->rcache)) {
break; break;
} }
} }

Просмотреть файл

@ -56,8 +56,6 @@
#include "opal/datatype/opal_convertor.h" #include "opal/datatype/opal_convertor.h"
#include "opal/mca/btl/btl.h" #include "opal/mca/btl/btl.h"
#include "opal/mca/mpool/base/base.h"
#include "opal/mca/mpool/sm/mpool_sm.h"
#include "opal/align.h" #include "opal/align.h"
#include "opal/util/sys_limits.h" #include "opal/util/sys_limits.h"
@ -111,7 +109,7 @@ static void *mpool_calloc(size_t nmemb, size_t size)
size_t bsize = nmemb * size; size_t bsize = nmemb * size;
mca_mpool_base_module_t *mpool = mca_btl_sm_component.sm_mpool; mca_mpool_base_module_t *mpool = mca_btl_sm_component.sm_mpool;
buf = mpool->mpool_alloc(mpool, bsize, opal_cache_line_size, 0, NULL); buf = mpool->mpool_alloc(mpool, bsize, opal_cache_line_size, 0);
if (NULL == buf) if (NULL == buf)
return NULL; return NULL;
@ -122,7 +120,7 @@ static void *mpool_calloc(size_t nmemb, size_t size)
static int static int
setup_mpool_base_resources(mca_btl_sm_component_t *comp_ptr, setup_mpool_base_resources(mca_btl_sm_component_t *comp_ptr,
mca_mpool_base_resources_t *out_res) mca_common_sm_mpool_resources_t *out_res)
{ {
int rc = OPAL_SUCCESS; int rc = OPAL_SUCCESS;
int fd = -1; int fd = -1;
@ -222,7 +220,7 @@ sm_btl_first_time_init(mca_btl_sm_t *sm_btl,
size_t length, length_payload; size_t length, length_payload;
sm_fifo_t *my_fifos; sm_fifo_t *my_fifos;
int my_mem_node, num_mem_nodes, i, rc; int my_mem_node, num_mem_nodes, i, rc;
mca_mpool_base_resources_t *res = NULL; mca_common_sm_mpool_resources_t *res = NULL;
mca_btl_sm_component_t* m = &mca_btl_sm_component; mca_btl_sm_component_t* m = &mca_btl_sm_component;
/* Assume we don't have hwloc support and fill in dummy info */ /* Assume we don't have hwloc support and fill in dummy info */
@ -291,15 +289,14 @@ sm_btl_first_time_init(mca_btl_sm_t *sm_btl,
/* Disable memory binding, because each MPI process will claim pages in the /* Disable memory binding, because each MPI process will claim pages in the
* mpool for their local NUMA node */ * mpool for their local NUMA node */
res->mem_node = -1; res->mem_node = -1;
res->allocator = mca_btl_sm_component.allocator;
if (OPAL_SUCCESS != (rc = setup_mpool_base_resources(m, res))) { if (OPAL_SUCCESS != (rc = setup_mpool_base_resources(m, res))) {
free(res); free(res);
return rc; return rc;
} }
/* now that res is fully populated, create the thing */ /* now that res is fully populated, create the thing */
mca_btl_sm_component.sm_mpools[0] = mca_btl_sm_component.sm_mpools[0] = common_sm_mpool_create (res);
mca_mpool_base_module_create(mca_btl_sm_component.sm_mpool_name,
sm_btl, res);
/* Sanity check to ensure that we found it */ /* Sanity check to ensure that we found it */
if (NULL == mca_btl_sm_component.sm_mpools[0]) { if (NULL == mca_btl_sm_component.sm_mpools[0]) {
free(res); free(res);
@ -470,7 +467,7 @@ int mca_btl_sm_add_procs(
bool have_connected_peer = false; bool have_connected_peer = false;
char **bases; char **bases;
/* for easy access to the mpool_sm_module */ /* for easy access to the mpool_sm_module */
mca_mpool_sm_module_t *sm_mpool_modp = NULL; mca_common_sm_mpool_module_t *sm_mpool_modp = NULL;
/* initializion */ /* initializion */
@ -548,7 +545,7 @@ int mca_btl_sm_add_procs(
} }
bases = mca_btl_sm_component.shm_bases; bases = mca_btl_sm_component.shm_bases;
sm_mpool_modp = (mca_mpool_sm_module_t *)mca_btl_sm_component.sm_mpool; sm_mpool_modp = (mca_common_sm_mpool_module_t *)mca_btl_sm_component.sm_mpool;
/* initialize own FIFOs */ /* initialize own FIFOs */
/* /*

Просмотреть файл

@ -212,6 +212,12 @@ struct mca_btl_sm_component_t {
char *sm_mpool_rndv_file_name; char *sm_mpool_rndv_file_name;
char *sm_ctl_file_name; char *sm_ctl_file_name;
char *sm_rndv_file_name; char *sm_rndv_file_name;
/** minimum size of a btl/sm mpool */
unsigned long mpool_min_size;
/** allocator name to use with the mpool */
char *allocator;
}; };
typedef struct mca_btl_sm_component_t mca_btl_sm_component_t; typedef struct mca_btl_sm_component_t mca_btl_sm_component_t;
OPAL_MODULE_DECLSPEC extern mca_btl_sm_component_t mca_btl_sm_component; OPAL_MODULE_DECLSPEC extern mca_btl_sm_component_t mca_btl_sm_component;
@ -281,7 +287,7 @@ static inline int sm_fifo_init(int fifo_size, mca_mpool_base_module_t *mpool,
/* allocate the queue in the receiver's address space */ /* allocate the queue in the receiver's address space */
fifo->queue_recv = (volatile void **)mpool->mpool_alloc( fifo->queue_recv = (volatile void **)mpool->mpool_alloc(
mpool, sizeof(void *) * qsize, opal_cache_line_size, 0, NULL); mpool, sizeof(void *) * qsize, opal_cache_line_size, 0);
if(NULL == fifo->queue_recv) { if(NULL == fifo->queue_recv) {
return OPAL_ERR_OUT_OF_RESOURCE; return OPAL_ERR_OUT_OF_RESOURCE;
} }

Просмотреть файл

@ -221,6 +221,19 @@ static int sm_register(void)
0, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_READONLY, 0, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_sm_component.knem_max_simultaneous); &mca_btl_sm_component.knem_max_simultaneous);
mca_btl_sm_component.allocator = "bucket";
(void) mca_base_component_var_register (&mca_btl_sm_component.super.btl_version, "allocator",
"Name of allocator component to use for btl/sm allocations",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_sm_component.allocator);
mca_btl_sm_component.mpool_min_size = 134217728;
(void) mca_base_component_var_register(&mca_btl_sm_component.super.btl_version, "min_size",
"Minimum size of the common/sm mpool shared memory file",
MCA_BASE_VAR_TYPE_UNSIGNED_LONG, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_sm_component.mpool_min_size);
/* CMA parameters */ /* CMA parameters */
mca_btl_sm_component.use_cma = 0; mca_btl_sm_component.use_cma = 0;
(void) mca_base_component_var_register(&mca_btl_sm_component.super.btl_version, (void) mca_base_component_var_register(&mca_btl_sm_component.super.btl_version,
@ -234,9 +247,6 @@ static int sm_register(void)
mca_btl_sm_param_register_int("free_list_max", -1, OPAL_INFO_LVL_5, &mca_btl_sm_component.sm_free_list_max); mca_btl_sm_param_register_int("free_list_max", -1, OPAL_INFO_LVL_5, &mca_btl_sm_component.sm_free_list_max);
mca_btl_sm_param_register_int("free_list_inc", 64, OPAL_INFO_LVL_5, &mca_btl_sm_component.sm_free_list_inc); mca_btl_sm_param_register_int("free_list_inc", 64, OPAL_INFO_LVL_5, &mca_btl_sm_component.sm_free_list_inc);
mca_btl_sm_param_register_int("max_procs", -1, OPAL_INFO_LVL_5, &mca_btl_sm_component.sm_max_procs); mca_btl_sm_param_register_int("max_procs", -1, OPAL_INFO_LVL_5, &mca_btl_sm_component.sm_max_procs);
/* there is no practical use for the mpool name parameter since mpool resources differ
between components */
mca_btl_sm_component.sm_mpool_name = "sm";
mca_btl_sm_param_register_uint("fifo_size", 4096, OPAL_INFO_LVL_4, &mca_btl_sm_component.fifo_size); mca_btl_sm_param_register_uint("fifo_size", 4096, OPAL_INFO_LVL_4, &mca_btl_sm_component.fifo_size);
mca_btl_sm_param_register_int("num_fifos", 1, OPAL_INFO_LVL_4, &mca_btl_sm_component.nfifos); mca_btl_sm_param_register_int("num_fifos", 1, OPAL_INFO_LVL_4, &mca_btl_sm_component.nfifos);
@ -456,41 +466,6 @@ create_and_attach(mca_btl_sm_component_t *comp_ptr,
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
/*
* SKG - I'm not happy with this, but I can't figure out a better way of
* finding the sm mpool's minimum size 8-|. The way I see it. This BTL only
* uses the sm mpool, so maybe this isn't so bad...
*
* The problem is the we need to size the mpool resources at sm BTL component
* init. That means we need to know the mpool's minimum size at create.
*/
static int
get_min_mpool_size(mca_btl_sm_component_t *comp_ptr,
size_t *out_size)
{
const char *type_name = "mpool";
const char *param_name = "min_size";
const mca_base_var_storage_t *min_size;
int id = 0;
if (0 > (id = mca_base_var_find("ompi", type_name, comp_ptr->sm_mpool_name,
param_name))) {
opal_output(0, "mca_base_var_find: failure looking for %s_%s_%s\n",
type_name, comp_ptr->sm_mpool_name, param_name);
return OPAL_ERR_NOT_FOUND;
}
if (OPAL_SUCCESS != mca_base_var_get_value(id, &min_size, NULL, NULL)) {
opal_output(0, "mca_base_var_get_value failure\n");
return OPAL_ERROR;
}
/* the min_size variable is an unsigned long long */
*out_size = (size_t) min_size->ullval;
return OPAL_SUCCESS;
}
static int static int
get_mpool_res_size(int32_t max_procs, get_mpool_res_size(int32_t max_procs,
size_t *out_res_size) size_t *out_res_size)
@ -612,20 +587,16 @@ create_rndv_file(mca_btl_sm_component_t *comp_ptr,
mca_common_sm_module_t *tmp_modp = NULL; mca_common_sm_module_t *tmp_modp = NULL;
if (MCA_BTL_SM_RNDV_MOD_MPOOL == type) { if (MCA_BTL_SM_RNDV_MOD_MPOOL == type) {
size_t min_size = 0;
/* get the segment size for the sm mpool. */ /* get the segment size for the sm mpool. */
if (OPAL_SUCCESS != (rc = get_mpool_res_size(comp_ptr->sm_max_procs, if (OPAL_SUCCESS != (rc = get_mpool_res_size(comp_ptr->sm_max_procs,
&size))) { &size))) {
/* rc is already set */ /* rc is already set */
goto out; goto out;
} }
/* do we need to update the size based on the sm mpool's min size? */
if (OPAL_SUCCESS != (rc = get_min_mpool_size(comp_ptr, &min_size))) {
goto out;
}
/* update size if less than required minimum */ /* update size if less than required minimum */
if (size < min_size) { if (size < mca_btl_sm_component.mpool_min_size) {
size = min_size; size = mca_btl_sm_component.mpool_min_size;
} }
/* we only need the shmem_ds info at this point. initilization will be /* we only need the shmem_ds info at this point. initilization will be
* completed in the mpool module code. the idea is that we just need this * completed in the mpool module code. the idea is that we just need this

Просмотреть файл

@ -12,8 +12,8 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2006-2007 Voltaire. All rights reserved. * Copyright (c) 2006-2007 Voltaire. All rights reserved.
* Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010-2015 Los Alamos National Security, LLC. * Copyright (c) 2010-2016 Los Alamos National Security, LLC. All rights
* All rights reserved. * reserved.
* Copyright (c) 2012-2015 NVIDIA Corporation. All rights reserved. * Copyright (c) 2012-2015 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science * Copyright (c) 2014 Research Organization for Information Science
@ -53,11 +53,13 @@
#include "opal/datatype/opal_convertor.h" #include "opal/datatype/opal_convertor.h"
#include "opal/mca/btl/btl.h" #include "opal/mca/btl/btl.h"
#include "opal/mca/common/sm/common_sm_mpool.h"
#if OPAL_CUDA_SUPPORT #if OPAL_CUDA_SUPPORT
#include "opal/mca/common/cuda/common_cuda.h" #include "opal/mca/common/cuda/common_cuda.h"
#endif /* OPAL_CUDA_SUPPORT */ #endif /* OPAL_CUDA_SUPPORT */
#include "opal/mca/mpool/base/base.h" #include "opal/mca/mpool/base/base.h"
#include "opal/mca/mpool/sm/mpool_sm.h" #include "opal/mca/rcache/base/base.h"
#if OPAL_ENABLE_FT_CR == 1 #if OPAL_ENABLE_FT_CR == 1
#include "opal/mca/crs/base/base.h" #include "opal/mca/crs/base/base.h"
@ -122,7 +124,7 @@ static void *mpool_calloc(size_t nmemb, size_t size)
size_t bsize = nmemb * size; size_t bsize = nmemb * size;
mca_mpool_base_module_t *mpool = mca_btl_smcuda_component.sm_mpool; mca_mpool_base_module_t *mpool = mca_btl_smcuda_component.sm_mpool;
buf = mpool->mpool_alloc(mpool, bsize, opal_cache_line_size, 0, NULL); buf = mpool->mpool_alloc(mpool, bsize, opal_cache_line_size, 0);
if (NULL == buf) if (NULL == buf)
return NULL; return NULL;
@ -133,7 +135,7 @@ static void *mpool_calloc(size_t nmemb, size_t size)
static int static int
setup_mpool_base_resources(mca_btl_smcuda_component_t *comp_ptr, setup_mpool_base_resources(mca_btl_smcuda_component_t *comp_ptr,
mca_mpool_base_resources_t *out_res) mca_common_sm_mpool_resources_t *out_res)
{ {
int rc = OPAL_SUCCESS; int rc = OPAL_SUCCESS;
int fd = -1; int fd = -1;
@ -228,7 +230,7 @@ smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl,
size_t length, length_payload; size_t length, length_payload;
sm_fifo_t *my_fifos; sm_fifo_t *my_fifos;
int my_mem_node, num_mem_nodes, i, rc; int my_mem_node, num_mem_nodes, i, rc;
mca_mpool_base_resources_t *res = NULL; mca_common_sm_mpool_resources_t *res = NULL;
mca_btl_smcuda_component_t* m = &mca_btl_smcuda_component; mca_btl_smcuda_component_t* m = &mca_btl_smcuda_component;
/* Assume we don't have hwloc support and fill in dummy info */ /* Assume we don't have hwloc support and fill in dummy info */
@ -297,15 +299,14 @@ smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl,
/* Disable memory binding, because each MPI process will claim pages in the /* Disable memory binding, because each MPI process will claim pages in the
* mpool for their local NUMA node */ * mpool for their local NUMA node */
res->mem_node = -1; res->mem_node = -1;
res->allocator = mca_btl_smcuda_component.allocator;
if (OPAL_SUCCESS != (rc = setup_mpool_base_resources(m, res))) { if (OPAL_SUCCESS != (rc = setup_mpool_base_resources(m, res))) {
free(res); free(res);
return rc; return rc;
} }
/* now that res is fully populated, create the thing */ /* now that res is fully populated, create the thing */
mca_btl_smcuda_component.sm_mpools[0] = mca_btl_smcuda_component.sm_mpools[0] = common_sm_mpool_create (res);
mca_mpool_base_module_create(mca_btl_smcuda_component.sm_mpool_name,
smcuda_btl, res);
/* Sanity check to ensure that we found it */ /* Sanity check to ensure that we found it */
if (NULL == mca_btl_smcuda_component.sm_mpools[0]) { if (NULL == mca_btl_smcuda_component.sm_mpools[0]) {
free(res); free(res);
@ -345,10 +346,9 @@ smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl,
/* Create a local memory pool that sends handles to the remote /* Create a local memory pool that sends handles to the remote
* side. Note that the res argument is not really used, but * side. Note that the res argument is not really used, but
* needed to satisfy function signature. */ * needed to satisfy function signature. */
smcuda_btl->super.btl_mpool = mca_mpool_base_module_create("gpusm", mca_rcache_base_resources_t rcache_res;
smcuda_btl, smcuda_btl->rcache = mca_rcache_base_module_create("gpusm", smcuda_btl, &rcache_res);
res); if (NULL == smcuda_btl->rcache) {
if (NULL == smcuda_btl->super.btl_mpool) {
return OPAL_ERR_OUT_OF_RESOURCE; return OPAL_ERR_OUT_OF_RESOURCE;
} }
#endif /* OPAL_CUDA_SUPPORT */ #endif /* OPAL_CUDA_SUPPORT */
@ -479,16 +479,9 @@ create_sm_endpoint(int local_proc, struct opal_proc_t *proc)
} }
#endif #endif
#if OPAL_CUDA_SUPPORT #if OPAL_CUDA_SUPPORT
{ /* Create a remote memory pool on the endpoint. The rgpusm component
mca_mpool_base_resources_t resources; /* unused, but needed */ * does not take any resources. They are filled in internally. */
ep->rcache = mca_rcache_base_module_create ("rgpusm", NULL, NULL);
/* Create a remote memory pool on the endpoint. Note that the resources
* argument is just to satisfy the function signature. The rcuda mpool
* actually takes care of filling in the resources. */
ep->mpool = mca_mpool_base_module_create("rgpusm",
NULL,
&resources);
}
#endif /* OPAL_CUDA_SUPPORT */ #endif /* OPAL_CUDA_SUPPORT */
return ep; return ep;
} }
@ -507,7 +500,7 @@ int mca_btl_smcuda_add_procs(
bool have_connected_peer = false; bool have_connected_peer = false;
char **bases; char **bases;
/* for easy access to the mpool_sm_module */ /* for easy access to the mpool_sm_module */
mca_mpool_sm_module_t *sm_mpool_modp = NULL; mca_common_sm_mpool_module_t *sm_mpool_modp = NULL;
/* initializion */ /* initializion */
@ -584,7 +577,7 @@ int mca_btl_smcuda_add_procs(
} }
bases = mca_btl_smcuda_component.shm_bases; bases = mca_btl_smcuda_component.shm_bases;
sm_mpool_modp = (mca_mpool_sm_module_t *)mca_btl_smcuda_component.sm_mpool; sm_mpool_modp = (mca_common_sm_mpool_module_t *)mca_btl_smcuda_component.sm_mpool;
/* initialize own FIFOs */ /* initialize own FIFOs */
/* /*
@ -693,6 +686,13 @@ int mca_btl_smcuda_del_procs(
struct opal_proc_t **procs, struct opal_proc_t **procs,
struct mca_btl_base_endpoint_t **peers) struct mca_btl_base_endpoint_t **peers)
{ {
for (size_t i = 0 ; i < nprocs ; ++i) {
if (peers[i]->rcache) {
mca_rcache_base_module_destroy (peers[i]->rcache);
peers[i]->rcache = NULL;
}
}
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
@ -1009,16 +1009,17 @@ static struct mca_btl_base_registration_handle_t *mca_btl_smcuda_register_mem (
struct mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t *endpoint, void *base, struct mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t *endpoint, void *base,
size_t size, uint32_t flags) size_t size, uint32_t flags)
{ {
mca_mpool_common_cuda_reg_t *reg; mca_btl_smcuda_t *smcuda_module = (mca_btl_smcuda_t *) btl;
mca_rcache_common_cuda_reg_t *reg;
int access_flags = flags & MCA_BTL_REG_FLAG_ACCESS_ANY; int access_flags = flags & MCA_BTL_REG_FLAG_ACCESS_ANY;
int mpool_flags = 0; int rcache_flags = 0;
if (MCA_BTL_REG_FLAG_CUDA_GPU_MEM & flags) { if (MCA_BTL_REG_FLAG_CUDA_GPU_MEM & flags) {
mpool_flags |= MCA_MPOOL_FLAGS_CUDA_GPU_MEM; rcache_flags |= MCA_RCACHE_FLAGS_CUDA_GPU_MEM;
} }
btl->btl_mpool->mpool_register (btl->btl_mpool, base, size, mpool_flags, smcuda_module->rcache->rcache_register (smcuda_module->rcache, base, size, rcache_flags,
access_flags, (mca_mpool_base_registration_t **) &reg); access_flags, (mca_rcache_base_registration_t **) &reg);
if (OPAL_UNLIKELY(NULL == reg)) { if (OPAL_UNLIKELY(NULL == reg)) {
return NULL; return NULL;
} }
@ -1029,10 +1030,11 @@ static struct mca_btl_base_registration_handle_t *mca_btl_smcuda_register_mem (
static int mca_btl_smcuda_deregister_mem (struct mca_btl_base_module_t* btl, static int mca_btl_smcuda_deregister_mem (struct mca_btl_base_module_t* btl,
struct mca_btl_base_registration_handle_t *handle) struct mca_btl_base_registration_handle_t *handle)
{ {
mca_mpool_common_cuda_reg_t *reg = (mca_mpool_common_cuda_reg_t *) mca_btl_smcuda_t *smcuda_module = (mca_btl_smcuda_t *) btl;
((intptr_t) handle - offsetof (mca_mpool_common_cuda_reg_t, data)); mca_rcache_common_cuda_reg_t *reg = (mca_rcache_common_cuda_reg_t *)
((intptr_t) handle - offsetof (mca_rcache_common_cuda_reg_t, data));
btl->btl_mpool->mpool_deregister (btl->btl_mpool, &reg->base); smcuda_module->rcache->rcache_deregister (smcuda_module->rcache, &reg->base);
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
@ -1043,8 +1045,8 @@ int mca_btl_smcuda_get_cuda (struct mca_btl_base_module_t *btl,
struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{ {
mca_mpool_common_cuda_reg_t rget_reg; mca_rcache_common_cuda_reg_t rget_reg;
mca_mpool_common_cuda_reg_t *reg_ptr = &rget_reg; mca_rcache_common_cuda_reg_t *reg_ptr = &rget_reg;
int rc, done; int rc, done;
void *remote_memory_address; void *remote_memory_address;
size_t offset; size_t offset;
@ -1087,16 +1089,16 @@ int mca_btl_smcuda_get_cuda (struct mca_btl_base_module_t *btl,
* remote memory which may lie somewhere in the middle. This is taken care of * remote memory which may lie somewhere in the middle. This is taken care of
* a few lines down. Note that we hand in the peer rank just for debugging * a few lines down. Note that we hand in the peer rank just for debugging
* support. */ * support. */
rc = ep->mpool->mpool_register(ep->mpool, remote_handle->reg_data.memh_seg_addr.pval, rc = ep->rcache->rcache_register (ep->rcache, remote_handle->reg_data.memh_seg_addr.pval,
remote_handle->reg_data.memh_seg_len, ep->peer_smp_rank, remote_handle->reg_data.memh_seg_len, ep->peer_smp_rank,
MCA_MPOOL_ACCESS_LOCAL_WRITE, MCA_RCACHE_ACCESS_LOCAL_WRITE,
(mca_mpool_base_registration_t **)&reg_ptr); (mca_rcache_base_registration_t **)&reg_ptr);
if (OPAL_SUCCESS != rc) { if (OPAL_SUCCESS != rc) {
opal_output(0, "Failed to register remote memory, rc=%d", rc); opal_output(0, "Failed to register remote memory, rc=%d", rc);
return rc; return rc;
} }
frag->registration = (mca_mpool_base_registration_t *)reg_ptr; frag->registration = (mca_rcache_base_registration_t *)reg_ptr;
frag->endpoint = ep; frag->endpoint = ep;
/* The registration has given us back the memory block that this /* The registration has given us back the memory block that this

Просмотреть файл

@ -206,6 +206,8 @@ struct mca_btl_smcuda_component_t {
int use_cuda_ipc; int use_cuda_ipc;
int use_cuda_ipc_same_gpu; int use_cuda_ipc_same_gpu;
#endif /* OPAL_CUDA_SUPPORT */ #endif /* OPAL_CUDA_SUPPORT */
unsigned long mpool_min_size;
char *allocator;
}; };
typedef struct mca_btl_smcuda_component_t mca_btl_smcuda_component_t; typedef struct mca_btl_smcuda_component_t mca_btl_smcuda_component_t;
OPAL_MODULE_DECLSPEC extern mca_btl_smcuda_component_t mca_btl_smcuda_component; OPAL_MODULE_DECLSPEC extern mca_btl_smcuda_component_t mca_btl_smcuda_component;
@ -217,7 +219,7 @@ struct mca_btl_smcuda_t {
mca_btl_base_module_t super; /**< base BTL interface */ mca_btl_base_module_t super; /**< base BTL interface */
bool btl_inited; /**< flag indicating if btl has been inited */ bool btl_inited; /**< flag indicating if btl has been inited */
mca_btl_base_module_error_cb_fn_t error_cb; mca_btl_base_module_error_cb_fn_t error_cb;
mca_rcache_base_module_t *rcache;
}; };
typedef struct mca_btl_smcuda_t mca_btl_smcuda_t; typedef struct mca_btl_smcuda_t mca_btl_smcuda_t;
OPAL_MODULE_DECLSPEC extern mca_btl_smcuda_t mca_btl_smcuda; OPAL_MODULE_DECLSPEC extern mca_btl_smcuda_t mca_btl_smcuda;
@ -254,7 +256,7 @@ static inline int sm_fifo_init(int fifo_size, mca_mpool_base_module_t *mpool,
/* allocate the queue in the receiver's address space */ /* allocate the queue in the receiver's address space */
fifo->queue_recv = (volatile void **)mpool->mpool_alloc( fifo->queue_recv = (volatile void **)mpool->mpool_alloc(
mpool, sizeof(void *) * qsize, opal_cache_line_size, 0, NULL); mpool, sizeof(void *) * qsize, opal_cache_line_size, 0);
if(NULL == fifo->queue_recv) { if(NULL == fifo->queue_recv) {
return OPAL_ERR_OUT_OF_RESOURCE; return OPAL_ERR_OUT_OF_RESOURCE;
} }

Просмотреть файл

@ -12,8 +12,8 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2006-2007 Voltaire. All rights reserved. * Copyright (c) 2006-2007 Voltaire. All rights reserved.
* Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010-2015 Los Alamos National Security, LLC. * Copyright (c) 2010-2016 Los Alamos National Security, LLC. All rights
* All rights reserved. * reserved.
* Copyright (c) 2011-2015 NVIDIA Corporation. All rights reserved. * Copyright (c) 2011-2015 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
@ -141,6 +141,13 @@ static int mca_btl_smcuda_component_verify(void) {
static int smcuda_register(void) static int smcuda_register(void)
{ {
/* register SM component parameters */ /* register SM component parameters */
mca_btl_smcuda_component.mpool_min_size = 134217728;
(void) mca_base_component_var_register(&mca_btl_smcuda_component.super.btl_version, "min_size",
"Minimum size of the common/sm mpool shared memory file",
MCA_BASE_VAR_TYPE_UNSIGNED_LONG, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_smcuda_component.mpool_min_size);
mca_btl_smcuda_param_register_int("free_list_num", 8, OPAL_INFO_LVL_5, &mca_btl_smcuda_component.sm_free_list_num); mca_btl_smcuda_param_register_int("free_list_num", 8, OPAL_INFO_LVL_5, &mca_btl_smcuda_component.sm_free_list_num);
mca_btl_smcuda_param_register_int("free_list_max", -1, OPAL_INFO_LVL_5, &mca_btl_smcuda_component.sm_free_list_max); mca_btl_smcuda_param_register_int("free_list_max", -1, OPAL_INFO_LVL_5, &mca_btl_smcuda_component.sm_free_list_max);
mca_btl_smcuda_param_register_int("free_list_inc", 64, OPAL_INFO_LVL_5, &mca_btl_smcuda_component.sm_free_list_inc); mca_btl_smcuda_param_register_int("free_list_inc", 64, OPAL_INFO_LVL_5, &mca_btl_smcuda_component.sm_free_list_inc);
@ -156,6 +163,12 @@ static int smcuda_register(void)
/* default number of extra procs to allow for future growth */ /* default number of extra procs to allow for future growth */
mca_btl_smcuda_param_register_int("sm_extra_procs", 0, OPAL_INFO_LVL_9, &mca_btl_smcuda_component.sm_extra_procs); mca_btl_smcuda_param_register_int("sm_extra_procs", 0, OPAL_INFO_LVL_9, &mca_btl_smcuda_component.sm_extra_procs);
mca_btl_smcuda_component.allocator = "bucket";
(void) mca_base_component_var_register (&mca_btl_smcuda_component.super.btl_version, "allocator",
"Name of allocator component to use for btl/smcuda allocations",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_smcuda_component.allocator);
#if OPAL_CUDA_SUPPORT #if OPAL_CUDA_SUPPORT
/* Lower priority when CUDA support is not requested */ /* Lower priority when CUDA support is not requested */
if (opal_cuda_support) { if (opal_cuda_support) {
@ -366,41 +379,6 @@ create_and_attach(mca_btl_smcuda_component_t *comp_ptr,
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
/*
* SKG - I'm not happy with this, but I can't figure out a better way of
* finding the sm mpool's minimum size 8-|. The way I see it. This BTL only
* uses the sm mpool, so maybe this isn't so bad...
*
* The problem is the we need to size the mpool resources at sm BTL component
* init. That means we need to know the mpool's minimum size at create.
*/
static int
get_min_mpool_size(mca_btl_smcuda_component_t *comp_ptr,
size_t *out_size)
{
const char *type_name = "mpool";
const char *param_name = "min_size";
const mca_base_var_storage_t *min_size;
int id = 0;
if (0 > (id = mca_base_var_find("ompi", type_name, comp_ptr->sm_mpool_name,
param_name))) {
opal_output(0, "mca_base_var_find: failure looking for %s_%s_%s\n",
type_name, comp_ptr->sm_mpool_name, param_name);
return OPAL_ERR_NOT_FOUND;
}
if (OPAL_SUCCESS != mca_base_var_get_value(id, &min_size, NULL, NULL)) {
opal_output(0, "mca_base_var_get_value failure\n");
return OPAL_ERROR;
}
/* the min_size variable is an unsigned long long */
*out_size = (size_t) min_size->ullval;
return OPAL_SUCCESS;
}
static int static int
get_mpool_res_size(int32_t max_procs, get_mpool_res_size(int32_t max_procs,
size_t *out_res_size) size_t *out_res_size)
@ -521,21 +499,18 @@ create_rndv_file(mca_btl_smcuda_component_t *comp_ptr,
mca_common_sm_module_t *tmp_modp = NULL; mca_common_sm_module_t *tmp_modp = NULL;
if (MCA_BTL_SM_RNDV_MOD_MPOOL == type) { if (MCA_BTL_SM_RNDV_MOD_MPOOL == type) {
size_t min_size = 0;
/* get the segment size for the sm mpool. */ /* get the segment size for the sm mpool. */
if (OPAL_SUCCESS != (rc = get_mpool_res_size(comp_ptr->sm_max_procs, if (OPAL_SUCCESS != (rc = get_mpool_res_size(comp_ptr->sm_max_procs,
&size))) { &size))) {
/* rc is already set */ /* rc is already set */
goto out; goto out;
} }
/* do we need to update the size based on the sm mpool's min size? */
if (OPAL_SUCCESS != (rc = get_min_mpool_size(comp_ptr, &min_size))) {
goto out;
}
/* update size if less than required minimum */ /* update size if less than required minimum */
if (size < min_size) { if (size < mca_btl_smcuda_component.mpool_min_size) {
size = min_size; size = mca_btl_smcuda_component.mpool_min_size;
} }
/* we only need the shmem_ds info at this point. initilization will be /* we only need the shmem_ds info at this point. initilization will be
* completed in the mpool module code. the idea is that we just need this * completed in the mpool module code. the idea is that we just need this
* info so we can populate the rndv file (or modex when we have it). */ * info so we can populate the rndv file (or modex when we have it). */
@ -1161,8 +1136,8 @@ int mca_btl_smcuda_component_progress(void)
OPAL_SUCCESS); OPAL_SUCCESS);
if(frag->registration != NULL) { if(frag->registration != NULL) {
frag->endpoint->mpool->mpool_deregister(frag->endpoint->mpool, frag->endpoint->rcache->rcache_deregister (frag->endpoint->rcache,
(mca_mpool_base_registration_t*)frag->registration); (mca_rcache_base_registration_t*)frag->registration);
frag->registration = NULL; frag->registration = NULL;
MCA_BTL_SMCUDA_FRAG_RETURN(frag); MCA_BTL_SMCUDA_FRAG_RETURN(frag);
} }

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
@ -11,6 +12,8 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2006-2007 Voltaire. All rights reserved. * Copyright (c) 2006-2007 Voltaire. All rights reserved.
* Copyright (c) 2012 NVIDIA Corporation. All rights reserved. * Copyright (c) 2012 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -35,7 +38,7 @@ struct mca_btl_base_endpoint_t {
int peer_smp_rank; /**< My peer's SMP process rank. Used for accessing int peer_smp_rank; /**< My peer's SMP process rank. Used for accessing
* SMP specfic data structures. */ * SMP specfic data structures. */
#if OPAL_CUDA_SUPPORT #if OPAL_CUDA_SUPPORT
mca_mpool_base_module_t *mpool; /**< mpool for remotely registered memory */ mca_rcache_base_module_t *rcache; /**< rcache for remotely registered memory */
#endif /* OPAL_CUDA_SUPPORT */ #endif /* OPAL_CUDA_SUPPORT */
#if OPAL_ENABLE_PROGRESS_THREADS == 1 #if OPAL_ENABLE_PROGRESS_THREADS == 1
int fifo_fd; /**< pipe/fifo used to signal endpoint that data is queued */ int fifo_fd; /**< pipe/fifo used to signal endpoint that data is queued */

Просмотреть файл

@ -54,7 +54,7 @@ typedef struct mca_btl_smcuda_hdr_t mca_btl_smcuda_hdr_t;
#if OPAL_CUDA_SUPPORT #if OPAL_CUDA_SUPPORT
struct mca_btl_base_registration_handle_t { struct mca_btl_base_registration_handle_t {
mca_mpool_common_cuda_reg_data_t reg_data; mca_rcache_common_cuda_reg_data_t reg_data;
}; };
#endif #endif
@ -78,7 +78,7 @@ struct mca_btl_smcuda_frag_t {
mca_btl_base_segment_t segment; mca_btl_base_segment_t segment;
struct mca_btl_base_endpoint_t *endpoint; struct mca_btl_base_endpoint_t *endpoint;
#if OPAL_CUDA_SUPPORT #if OPAL_CUDA_SUPPORT
struct mca_mpool_base_registration_t *registration; struct mca_rcache_base_registration_t *registration;
struct mca_btl_base_registration_handle_t *local_handle; struct mca_btl_base_registration_handle_t *local_handle;
#endif /* OPAL_CUDA_SUPPORT */ #endif /* OPAL_CUDA_SUPPORT */
size_t size; size_t size;

Просмотреть файл

@ -25,7 +25,8 @@
#include "opal/mca/mpool/mpool.h" #include "opal/mca/mpool/mpool.h"
#include "opal/mca/mpool/base/base.h" #include "opal/mca/mpool/base/base.h"
#include "opal/mca/mpool/udreg/mpool_udreg.h" #include "opal/mca/rcache/base/base.h"
#include "opal/mca/rcache/udreg/rcache_udreg.h"
#include "opal/util/output.h" #include "opal/util/output.h"
#include "opal_stdint.h" #include "opal_stdint.h"
#include "opal/mca/btl/btl.h" #include "opal/mca/btl/btl.h"
@ -56,8 +57,8 @@ typedef struct mca_btl_ugni_endpoint_attr_t {
} mca_btl_ugni_endpoint_attr_t; } mca_btl_ugni_endpoint_attr_t;
enum { enum {
MCA_BTL_UGNI_MPOOL_UDREG, MCA_BTL_UGNI_RCACHE_UDREG,
MCA_BTL_UGNI_MPOOL_GRDMA MCA_BTL_UGNI_RCACHE_GRDMA
}; };
typedef struct mca_btl_ugni_module_t { typedef struct mca_btl_ugni_module_t {
@ -86,7 +87,7 @@ typedef struct mca_btl_ugni_module_t {
opal_free_list_t post_descriptors; opal_free_list_t post_descriptors;
mca_mpool_base_module_t *smsg_mpool; mca_mpool_base_module_t *mpool;
opal_free_list_t smsg_mboxes; opal_free_list_t smsg_mboxes;
gni_ep_handle_t wildcard_ep; gni_ep_handle_t wildcard_ep;
@ -128,6 +129,8 @@ typedef struct mca_btl_ugni_module_t {
int nlocal_procs; int nlocal_procs;
volatile int active_send_count; volatile int active_send_count;
mca_rcache_base_module_t *rcache;
} mca_btl_ugni_module_t; } mca_btl_ugni_module_t;
typedef struct mca_btl_ugni_component_t { typedef struct mca_btl_ugni_component_t {
@ -177,8 +180,11 @@ typedef struct mca_btl_ugni_component_t {
/* Page size to use for SMSG allocations (udreg mpool) */ /* Page size to use for SMSG allocations (udreg mpool) */
unsigned int smsg_page_size; unsigned int smsg_page_size;
/* mpool type (grdma or udreg) */ /* rcache type (grdma or udreg) */
int mpool_type; int rcache_type;
/* memory pool hints */
char *mpool_hints;
/* Number of mailboxes to allocate in each block */ /* Number of mailboxes to allocate in each block */
unsigned int mbox_increment; unsigned int mbox_increment;
@ -312,7 +318,7 @@ struct mca_btl_base_registration_handle_t {
}; };
typedef struct mca_btl_ugni_reg_t { typedef struct mca_btl_ugni_reg_t {
mca_mpool_base_registration_t base; mca_rcache_base_registration_t base;
mca_btl_base_registration_handle_t handle; mca_btl_base_registration_handle_t handle;
} mca_btl_ugni_reg_t; } mca_btl_ugni_reg_t;

Просмотреть файл

@ -266,11 +266,12 @@ struct mca_btl_base_endpoint_t *mca_btl_ugni_get_ep (struct mca_btl_base_module_
} }
static int ugni_reg_rdma_mem (void *reg_data, void *base, size_t size, static int ugni_reg_mem (void *reg_data, void *base, size_t size,
mca_mpool_base_registration_t *reg) mca_rcache_base_registration_t *reg)
{ {
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) reg_data; mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) reg_data;
mca_btl_ugni_reg_t *ugni_reg = (mca_btl_ugni_reg_t *) reg; mca_btl_ugni_reg_t *ugni_reg = (mca_btl_ugni_reg_t *) reg;
gni_cq_handle_t cq = NULL;
gni_return_t rc; gni_return_t rc;
int flags; int flags;
@ -278,18 +279,24 @@ static int ugni_reg_rdma_mem (void *reg_data, void *base, size_t size,
return OPAL_ERR_OUT_OF_RESOURCE; return OPAL_ERR_OUT_OF_RESOURCE;
} }
if (reg->access_flags & (MCA_MPOOL_ACCESS_REMOTE_WRITE | MCA_MPOOL_ACCESS_LOCAL_WRITE | if (reg->access_flags & (MCA_RCACHE_ACCESS_REMOTE_WRITE | MCA_RCACHE_ACCESS_LOCAL_WRITE |
MCA_MPOOL_ACCESS_REMOTE_ATOMIC)) { MCA_RCACHE_ACCESS_REMOTE_ATOMIC)) {
flags = GNI_MEM_READWRITE; flags = GNI_MEM_READWRITE;
} else { } else {
flags = GNI_MEM_READ_ONLY; flags = GNI_MEM_READ_ONLY;
} }
if (!(reg->flags & MCA_RCACHE_FLAGS_SO_MEM)) {
flags |= GNI_MEM_RELAXED_PI_ORDERING; flags |= GNI_MEM_RELAXED_PI_ORDERING;
}
if (reg->flags & MCA_RCACHE_FLAGS_RESV0) {
cq = ugni_module->smsg_remote_cq;
}
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock); OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_MemRegister (ugni_module->device->dev_handle, (uint64_t) base, rc = GNI_MemRegister (ugni_module->device->dev_handle, (uint64_t) base,
size, NULL, flags, -1, &(ugni_reg->handle.gni_handle)); size, cq, flags, -1, &(ugni_reg->handle.gni_handle));
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock); OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) { if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
@ -301,24 +308,8 @@ static int ugni_reg_rdma_mem (void *reg_data, void *base, size_t size,
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
static int ugni_reg_smsg_mem (void *reg_data, void *base, size_t size,
mca_mpool_base_registration_t *reg)
{
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) reg_data;
mca_btl_ugni_reg_t *ugni_reg = (mca_btl_ugni_reg_t *) reg;
gni_return_t rc;
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_MemRegister (ugni_module->device->dev_handle, (uint64_t) base,
size, ugni_module->smsg_remote_cq, GNI_MEM_READWRITE, -1,
&(ugni_reg->handle.gni_handle));
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
return opal_common_rc_ugni_to_opal (rc);
}
static int static int
ugni_dereg_mem (void *reg_data, mca_mpool_base_registration_t *reg) ugni_dereg_mem (void *reg_data, mca_rcache_base_registration_t *reg)
{ {
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) reg_data; mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) reg_data;
mca_btl_ugni_reg_t *ugni_reg = (mca_btl_ugni_reg_t *)reg; mca_btl_ugni_reg_t *ugni_reg = (mca_btl_ugni_reg_t *)reg;
@ -339,10 +330,10 @@ ugni_dereg_mem (void *reg_data, mca_mpool_base_registration_t *reg)
static int static int
mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module) mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
{ {
struct mca_mpool_base_resources_t mpool_resources; mca_rcache_udreg_resources_t rcache_resources;
unsigned int mbox_increment; unsigned int mbox_increment;
uint32_t nprocs, *u32; uint32_t nprocs, *u32;
const char *mpool_name; char *rcache_name;
int rc; int rc;
rc = opal_pointer_array_init (&ugni_module->pending_smsg_frags_bb, 0, rc = opal_pointer_array_init (&ugni_module->pending_smsg_frags_bb, 0,
@ -404,43 +395,35 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
return rc; return rc;
} }
mpool_resources.pool_name = "ompi.ugni"; ugni_module->super.btl_mpool = mca_mpool_base_module_lookup (mca_btl_ugni_component.mpool_hints);
mpool_resources.reg_data = (void *) ugni_module;
mpool_resources.sizeof_reg = sizeof (mca_btl_ugni_reg_t);
mpool_resources.register_mem = ugni_reg_rdma_mem;
mpool_resources.deregister_mem = ugni_dereg_mem;
if (MCA_BTL_UGNI_MPOOL_UDREG == mca_btl_ugni_component.mpool_type) {
/* additional settings for the udreg mpool */
/* 4k should be large enough for any Gemini/Ares system */
mpool_resources.max_entries = 4096;
mpool_resources.use_kernel_cache = true;
/* request a specific page size. this request may not be honored if the
* page size does not exist. */
mpool_resources.page_size = mca_btl_ugni_component.smsg_page_size;
mpool_resources.use_evict_w_unreg = false;
mpool_name = "udreg";
} else {
mpool_name = "grdma";
}
ugni_module->super.btl_mpool =
mca_mpool_base_module_create(mpool_name, ugni_module->device, &mpool_resources);
mpool_resources.register_mem = ugni_reg_smsg_mem;
ugni_module->smsg_mpool =
mca_mpool_base_module_create(mpool_name, ugni_module->device, &mpool_resources);
if (NULL == ugni_module->super.btl_mpool) { if (NULL == ugni_module->super.btl_mpool) {
BTL_ERROR(("error creating rdma mpool")); BTL_ERROR(("could not find mpool matching hints %s", mca_btl_ugni_component.mpool_hints));
return OPAL_ERROR; return OPAL_ERROR;
} }
if (NULL == ugni_module->smsg_mpool) { rcache_resources.base.cache_name = "ompi.ugni";
BTL_ERROR(("error creating smsg mpool")); rcache_resources.base.reg_data = (void *) ugni_module;
rcache_resources.base.sizeof_reg = sizeof (mca_btl_ugni_reg_t);
rcache_resources.base.register_mem = ugni_reg_mem;
rcache_resources.base.deregister_mem = ugni_dereg_mem;
if (MCA_BTL_UGNI_RCACHE_UDREG == mca_btl_ugni_component.rcache_type) {
/* additional settings for the udreg mpool */
/* 4k should be large enough for any Gemini/Ares system */
rcache_resources.max_entries = 4096;
rcache_resources.use_kernel_cache = true;
rcache_resources.use_evict_w_unreg = false;
rcache_name = "udreg";
} else {
rcache_name = "grdma";
}
ugni_module->rcache =
mca_rcache_base_module_create (rcache_name, ugni_module->device, &rcache_resources.base);
if (NULL == ugni_module->rcache) {
BTL_ERROR(("error creating registration cache"));
return OPAL_ERROR; return OPAL_ERROR;
} }
@ -451,7 +434,7 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
mca_btl_ugni_component.ugni_eager_num, mca_btl_ugni_component.ugni_eager_num,
mca_btl_ugni_component.ugni_eager_max, mca_btl_ugni_component.ugni_eager_max,
mca_btl_ugni_component.ugni_eager_inc, mca_btl_ugni_component.ugni_eager_inc,
ugni_module->super.btl_mpool, 0, NULL, ugni_module->super.btl_mpool, 0, ugni_module->rcache,
(opal_free_list_item_init_fn_t) mca_btl_ugni_frag_init, (opal_free_list_item_init_fn_t) mca_btl_ugni_frag_init,
(void *) ugni_module); (void *) ugni_module);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
@ -466,7 +449,7 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
mca_btl_ugni_component.ugni_eager_num, mca_btl_ugni_component.ugni_eager_num,
mca_btl_ugni_component.ugni_eager_max, mca_btl_ugni_component.ugni_eager_max,
mca_btl_ugni_component.ugni_eager_inc, mca_btl_ugni_component.ugni_eager_inc,
ugni_module->super.btl_mpool, 0, NULL, ugni_module->super.btl_mpool, 0, ugni_module->rcache,
(opal_free_list_item_init_fn_t) mca_btl_ugni_frag_init, (opal_free_list_item_init_fn_t) mca_btl_ugni_frag_init,
(void *) ugni_module); (void *) ugni_module);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
@ -487,12 +470,14 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
mbox_increment = mca_btl_ugni_component.mbox_increment; mbox_increment = mca_btl_ugni_component.mbox_increment;
} }
/* use the MCA_RCACHE_FLAGS_RESV0 to signal this is smsg memory */
rc = opal_free_list_init (&ugni_module->smsg_mboxes, rc = opal_free_list_init (&ugni_module->smsg_mboxes,
sizeof (mca_btl_ugni_smsg_mbox_t), 8, sizeof (mca_btl_ugni_smsg_mbox_t), 8,
OBJ_CLASS(mca_btl_ugni_smsg_mbox_t), OBJ_CLASS(mca_btl_ugni_smsg_mbox_t),
mca_btl_ugni_component.smsg_mbox_size, 128, mca_btl_ugni_component.smsg_mbox_size, 128,
32, -1, mbox_increment, ugni_module->smsg_mpool, 32, -1, mbox_increment, ugni_module->super.btl_mpool,
0, NULL, NULL, NULL); MCA_RCACHE_FLAGS_SO_MEM | MCA_RCACHE_FLAGS_RESV0,
ugni_module->rcache, NULL, NULL);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_ERROR(("error creating smsg mailbox free list")); BTL_ERROR(("error creating smsg mailbox free list"));
return rc; return rc;

Просмотреть файл

@ -15,6 +15,11 @@
#include "btl_ugni_rdma.h" #include "btl_ugni_rdma.h"
#include "btl_ugni_smsg.h" #include "btl_ugni_smsg.h"
#include "opal/util/sys_limits.h"
#include <stdlib.h>
#include <fcntl.h>
#include "opal/memoryhooks/memory.h" #include "opal/memoryhooks/memory.h"
#include "opal/runtime/opal_params.h" #include "opal/runtime/opal_params.h"
@ -25,6 +30,7 @@ static int btl_ugni_component_open(void);
static int btl_ugni_component_close(void); static int btl_ugni_component_close(void);
static mca_btl_base_module_t **mca_btl_ugni_component_init(int *, bool, bool); static mca_btl_base_module_t **mca_btl_ugni_component_init(int *, bool, bool);
static int mca_btl_ugni_component_progress(void); static int mca_btl_ugni_component_progress(void);
static unsigned long mca_btl_ugni_ugni_page_size = 0;
mca_btl_ugni_component_t mca_btl_ugni_component = { mca_btl_ugni_component_t mca_btl_ugni_component = {
.super = { .super = {
@ -44,9 +50,9 @@ mca_btl_ugni_component_t mca_btl_ugni_component = {
} }
}; };
mca_base_var_enum_value_t mpool_values[] = { mca_base_var_enum_value_t rcache_values[] = {
{MCA_BTL_UGNI_MPOOL_UDREG, "udreg"}, {MCA_BTL_UGNI_RCACHE_UDREG, "udreg"},
{MCA_BTL_UGNI_MPOOL_GRDMA, "grdma"}, {MCA_BTL_UGNI_RCACHE_GRDMA, "grdma"},
{-1, NULL} /* sentinal */ {-1, NULL} /* sentinal */
}; };
@ -55,6 +61,7 @@ btl_ugni_component_register(void)
{ {
mca_base_var_enum_t *new_enum; mca_base_var_enum_t *new_enum;
gni_nic_device_t device_type; gni_nic_device_t device_type;
char *mpool_hints_tmp = NULL;
int rc; int rc;
(void) mca_base_var_group_component_register(&mca_btl_ugni_component.super.btl_version, (void) mca_base_var_group_component_register(&mca_btl_ugni_component.super.btl_version,
@ -174,10 +181,29 @@ btl_ugni_component_register(void)
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.mbox_increment); MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.mbox_increment);
/* determine if there are get alignment restrictions */
GNI_GetDeviceType (&device_type);
mca_btl_ugni_component.smsg_page_size = 2 << 20; mca_btl_ugni_component.smsg_page_size = 2 << 20;
if (GNI_DEVICE_GEMINI == device_type) {
if (access ("/sys/class/gemini/ghal0/mrt", R_OK)) {
int fd = open ("/sys/class/gemini/ghal0/mrt", O_RDONLY);
char buffer[10];
if (0 <= fd) {
memset (buffer, 0, sizeof (buffer));
read (fd, buffer, sizeof (buffer) - 1);
close (fd);
mca_btl_ugni_ugni_page_size = strtol (buffer, NULL, 10) * 1024;
mca_btl_ugni_component.smsg_page_size = mca_btl_ugni_ugni_page_size;
}
}
}
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
"smsg_page_size", "Page size to use for SMSG " "smsg_page_size", "Page size to use for SMSG "
"mailbox allocation (default 2M)", "mailbox allocation (default: detect)",
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_TYPE_INT, NULL, 0,
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_LOCAL, MCA_BASE_VAR_SCOPE_LOCAL,
@ -202,20 +228,38 @@ btl_ugni_component_register(void)
MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, NULL, MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, NULL,
NULL, NULL, &mca_btl_ugni_progress_thread_wakeups); NULL, NULL, &mca_btl_ugni_progress_thread_wakeups);
/* btl/ugni can only support only a fixed set of mpools (these mpools have compatible resource /* btl/ugni can only support only a fixed set of rcache components (these rcache components have compatible resource
* structures) */ * structures) */
rc = mca_base_var_enum_create ("btl_ugni_mpool", mpool_values, &new_enum); rc = mca_base_var_enum_create ("btl_ugni_rcache", rcache_values, &new_enum);
if (OPAL_SUCCESS != rc) { if (OPAL_SUCCESS != rc) {
return rc; return rc;
} }
mca_btl_ugni_component.mpool_type = MCA_BTL_UGNI_MPOOL_UDREG; mca_btl_ugni_component.rcache_type = MCA_BTL_UGNI_RCACHE_UDREG;
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version, (void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
"mpool", "mpool to use", MCA_BASE_VAR_TYPE_INT, new_enum, "rcache", "registration cache to use", MCA_BASE_VAR_TYPE_INT, new_enum,
0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.mpool_type); MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.rcache_type);
OBJ_RELEASE(new_enum); OBJ_RELEASE(new_enum);
if (mca_btl_ugni_ugni_page_size) {
rc = asprintf (&mpool_hints_tmp, "page_size=%lu", mca_btl_ugni_ugni_page_size);
if (rc < 0) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
mca_btl_ugni_component.mpool_hints = mpool_hints_tmp;
} else {
mca_btl_ugni_component.mpool_hints = "page_size=2M";
}
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
"mpool_hints", "hints to use when selecting a memory pool (default: "
"\"page_size=2M\")", MCA_BASE_VAR_TYPE_STRING, NULL, 0,
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.mpool_hints);
free (mpool_hints_tmp);
/* ensure we loose send exclusivity to sm and vader if they are enabled */ /* ensure we loose send exclusivity to sm and vader if they are enabled */
mca_btl_ugni_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH - 2; mca_btl_ugni_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH - 2;
@ -228,9 +272,6 @@ btl_ugni_component_register(void)
mca_btl_ugni_module.super.btl_get_limit = 1 * 1024 * 1024; mca_btl_ugni_module.super.btl_get_limit = 1 * 1024 * 1024;
/* determine if there are get alignment restrictions */
GNI_GetDeviceType (&device_type);
/* /*
* see def. of ALIGNMENT_MASK to figure this one out * see def. of ALIGNMENT_MASK to figure this one out
*/ */
@ -291,7 +332,7 @@ btl_ugni_component_close(void)
} }
static void mca_btl_ugni_autoset_leave_pinned (void) { static void mca_btl_ugni_autoset_leave_pinned (void) {
if (MCA_BTL_UGNI_MPOOL_UDREG != mca_btl_ugni_component.mpool_type) { if (MCA_BTL_UGNI_RCACHE_UDREG != mca_btl_ugni_component.rcache_type) {
int value = opal_mem_hooks_support_level(); int value = opal_mem_hooks_support_level();
if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) == if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) ==
((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & value)) { ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & value)) {
@ -362,6 +403,12 @@ mca_btl_ugni_component_init (int *num_btl_modules,
return NULL; return NULL;
} }
if (mca_btl_ugni_component.smsg_page_size != (unsigned long) opal_getpagesize ()) {
if (mca_btl_ugni_ugni_page_size > mca_btl_ugni_component.smsg_page_size) {
mca_btl_ugni_component.smsg_page_size = mca_btl_ugni_ugni_page_size;
}
}
mca_btl_ugni_autoset_leave_pinned (); mca_btl_ugni_autoset_leave_pinned ();
mca_btl_ugni_module.super.btl_rdma_pipeline_send_length = mca_btl_ugni_module.super.btl_eager_limit; mca_btl_ugni_module.super.btl_rdma_pipeline_send_length = mca_btl_ugni_module.super.btl_eager_limit;

Просмотреть файл

@ -159,8 +159,8 @@ static inline int mca_btl_ugni_frag_alloc (mca_btl_base_endpoint_t *ep,
static inline int mca_btl_ugni_frag_return (mca_btl_ugni_base_frag_t *frag) static inline int mca_btl_ugni_frag_return (mca_btl_ugni_base_frag_t *frag)
{ {
if (frag->registration) { if (frag->registration) {
frag->endpoint->btl->super.btl_mpool->mpool_deregister(frag->endpoint->btl->super.btl_mpool, frag->endpoint->btl->rcache->rcache_deregister (frag->endpoint->btl->rcache,
(mca_mpool_base_registration_t *) frag->registration); (mca_rcache_base_registration_t *) frag->registration);
frag->registration = NULL; frag->registration = NULL;
} }

Просмотреть файл

@ -215,17 +215,8 @@ mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl)
OBJ_DESTRUCT(&ugni_module->eager_get_pending); OBJ_DESTRUCT(&ugni_module->eager_get_pending);
OBJ_DESTRUCT(&ugni_module->eager_get_pending_lock); OBJ_DESTRUCT(&ugni_module->eager_get_pending_lock);
if (ugni_module->initialized) { if (ugni_module->rcache) {
/* need to tear down the mpools *after* the free lists */ mca_rcache_base_module_destroy (ugni_module->rcache);
if (NULL != ugni_module->smsg_mpool) {
(void) mca_mpool_base_module_destroy (ugni_module->smsg_mpool);
ugni_module->smsg_mpool = NULL;
}
if (NULL != ugni_module->super.btl_mpool) {
(void) mca_mpool_base_module_destroy (ugni_module->super.btl_mpool);
ugni_module->super.btl_mpool = NULL;
}
} }
ugni_module->initialized = false; ugni_module->initialized = false;
@ -303,12 +294,13 @@ static mca_btl_base_registration_handle_t *
mca_btl_ugni_register_mem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *base, mca_btl_ugni_register_mem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *base,
size_t size, uint32_t flags) size_t size, uint32_t flags)
{ {
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
mca_btl_ugni_reg_t *reg; mca_btl_ugni_reg_t *reg;
int access_flags = flags & MCA_BTL_REG_FLAG_ACCESS_ANY; int access_flags = flags & MCA_BTL_REG_FLAG_ACCESS_ANY;
int rc; int rc;
rc = btl->btl_mpool->mpool_register(btl->btl_mpool, base, size, 0, access_flags, rc = ugni_module->rcache->rcache_register (ugni_module->rcache, base, size, 0, access_flags,
(mca_mpool_base_registration_t **) &reg); (mca_rcache_base_registration_t **) &reg);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
return NULL; return NULL;
} }
@ -318,10 +310,11 @@ mca_btl_ugni_register_mem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *
static int mca_btl_ugni_deregister_mem (mca_btl_base_module_t *btl, mca_btl_base_registration_handle_t *handle) static int mca_btl_ugni_deregister_mem (mca_btl_base_module_t *btl, mca_btl_base_registration_handle_t *handle)
{ {
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
mca_btl_ugni_reg_t *reg = mca_btl_ugni_reg_t *reg =
(mca_btl_ugni_reg_t *)((intptr_t) handle - offsetof (mca_btl_ugni_reg_t, handle)); (mca_btl_ugni_reg_t *)((intptr_t) handle - offsetof (mca_btl_ugni_reg_t, handle));
(void) btl->btl_mpool->mpool_deregister (btl->btl_mpool, &reg->base); (void) ugni_module->rcache->rcache_deregister (ugni_module->rcache, &reg->base);
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }

Просмотреть файл

@ -57,6 +57,7 @@ mca_btl_ugni_prepare_src_send_inplace (struct mca_btl_base_module_t *btl,
uint32_t flags) uint32_t flags)
{ {
bool use_eager_get = (*size + reserve) > mca_btl_ugni_component.smsg_max_data; bool use_eager_get = (*size + reserve) > mca_btl_ugni_component.smsg_max_data;
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
mca_btl_ugni_base_frag_t *frag = NULL; mca_btl_ugni_base_frag_t *frag = NULL;
mca_btl_ugni_reg_t *registration = NULL; mca_btl_ugni_reg_t *registration = NULL;
void *data_ptr; void *data_ptr;
@ -74,9 +75,9 @@ mca_btl_ugni_prepare_src_send_inplace (struct mca_btl_base_module_t *btl,
(unsigned int)(*size + reserve))); (unsigned int)(*size + reserve)));
if (OPAL_UNLIKELY(true == use_eager_get)) { if (OPAL_UNLIKELY(true == use_eager_get)) {
rc = btl->btl_mpool->mpool_register(btl->btl_mpool, data_ptr, *size, 0, rc = ugni_module->rcache->rcache_register (ugni_module->rcache, data_ptr, *size, 0,
MCA_MPOOL_ACCESS_REMOTE_READ, MCA_RCACHE_ACCESS_REMOTE_READ,
(mca_mpool_base_registration_t **)&registration); (mca_rcache_base_registration_t **)&registration);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
mca_btl_ugni_frag_return (frag); mca_btl_ugni_frag_return (frag);
return NULL; return NULL;

Просмотреть файл

@ -16,8 +16,8 @@
static void mca_btl_ugni_smsg_mbox_construct (mca_btl_ugni_smsg_mbox_t *mbox) { static void mca_btl_ugni_smsg_mbox_construct (mca_btl_ugni_smsg_mbox_t *mbox) {
struct mca_btl_ugni_reg_t *ugni_reg = struct mca_btl_ugni_reg_t *ugni_reg =
(struct mca_btl_ugni_reg_t *) mbox->super.registration; (struct mca_btl_ugni_reg_t *) mbox->super.registration;
struct mca_mpool_base_registration_t *base_reg = mca_rcache_base_registration_t *base_reg =
(struct mca_mpool_base_registration_t *) ugni_reg; (mca_rcache_base_registration_t *) ugni_reg;
/* initialize mailbox attributes */ /* initialize mailbox attributes */
mbox->attr.smsg_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT; mbox->attr.smsg_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT;

Просмотреть файл

@ -47,8 +47,7 @@
#include "opal/class/opal_free_list.h" #include "opal/class/opal_free_list.h"
#include "opal/sys/atomic.h" #include "opal/sys/atomic.h"
#include "opal/mca/btl/btl.h" #include "opal/mca/btl/btl.h"
#include "opal/mca/mpool/mpool.h" #include "opal/mca/rcache/rcache.h"
#include "opal/mca/mpool/base/base.h"
#include "opal/mca/btl/base/base.h" #include "opal/mca/btl/base/base.h"
#include "opal/mca/rcache/rcache.h" #include "opal/mca/rcache/rcache.h"
#include "opal/mca/rcache/base/base.h" #include "opal/mca/rcache/base/base.h"
@ -152,6 +151,9 @@ struct mca_btl_vader_t {
mca_btl_base_module_error_cb_fn_t error_cb; mca_btl_base_module_error_cb_fn_t error_cb;
#if OPAL_BTL_VADER_HAVE_KNEM #if OPAL_BTL_VADER_HAVE_KNEM
int knem_fd; int knem_fd;
/* registration cache */
mca_rcache_base_module_t *knem_rcache;
#endif #endif
}; };
typedef struct mca_btl_vader_t mca_btl_vader_t; typedef struct mca_btl_vader_t mca_btl_vader_t;

Просмотреть файл

@ -573,11 +573,12 @@ void mca_btl_vader_poll_handle_frag (mca_btl_vader_hdr_t *hdr, struct mca_btl_ba
segments[0].seg_len = hdr->len; segments[0].seg_len = hdr->len;
if (hdr->flags & MCA_BTL_VADER_FLAG_SINGLE_COPY) { if (hdr->flags & MCA_BTL_VADER_FLAG_SINGLE_COPY) {
mca_mpool_base_registration_t *xpmem_reg; mca_rcache_base_registration_t *xpmem_reg;
xpmem_reg = vader_get_registation (endpoint, hdr->sc_iov.iov_base, xpmem_reg = vader_get_registation (endpoint, hdr->sc_iov.iov_base,
hdr->sc_iov.iov_len, 0, hdr->sc_iov.iov_len, 0,
&segments[1].seg_addr.pval); &segments[1].seg_addr.pval);
assert (NULL != xpmem_reg);
segments[1].seg_len = hdr->sc_iov.iov_len; segments[1].seg_len = hdr->sc_iov.iov_len;
frag.des_segment_count = 2; frag.des_segment_count = 2;

Просмотреть файл

@ -28,6 +28,7 @@
#include "opal_config.h" #include "opal_config.h"
#include "btl_vader_xpmem.h" #include "btl_vader_xpmem.h"
#include "opal/mca/rcache/base/rcache_base_vma.h"
#define MCA_BTL_VADER_FBOX_ALIGNMENT 32 #define MCA_BTL_VADER_FBOX_ALIGNMENT 32
#define MCA_BTL_VADER_FBOX_ALIGNMENT_MASK (MCA_BTL_VADER_FBOX_ALIGNMENT - 1) #define MCA_BTL_VADER_FBOX_ALIGNMENT_MASK (MCA_BTL_VADER_FBOX_ALIGNMENT - 1)
@ -74,7 +75,7 @@ typedef struct mca_btl_base_endpoint_t {
union { union {
#if OPAL_BTL_VADER_HAVE_XPMEM #if OPAL_BTL_VADER_HAVE_XPMEM
struct { struct {
struct mca_rcache_base_module_t *rcache; mca_rcache_base_vma_module_t *vma_module;
xpmem_apid_t apid; /**< xpmem apid for remote peer */ xpmem_apid_t apid; /**< xpmem apid for remote peer */
} xpmem; } xpmem;
#endif #endif

Просмотреть файл

@ -38,7 +38,7 @@ int mca_btl_vader_get_xpmem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{ {
mca_mpool_base_registration_t *reg; mca_rcache_base_registration_t *reg;
void *rem_ptr; void *rem_ptr;
/* silence warning about unused arguments */ /* silence warning about unused arguments */

Просмотреть файл

@ -19,12 +19,11 @@
#include <unistd.h> #include <unistd.h>
#include "opal/util/show_help.h" #include "opal/util/show_help.h"
#include "opal/mca/mpool/grdma/mpool_grdma.h"
OBJ_CLASS_INSTANCE(mca_btl_vader_registration_handle_t, mca_mpool_base_registration_t, NULL, NULL); OBJ_CLASS_INSTANCE(mca_btl_vader_registration_handle_t, mca_rcache_base_registration_t, NULL, NULL);
static int mca_btl_vader_knem_reg (void *reg_data, void *base, size_t size, static int mca_btl_vader_knem_reg (void *reg_data, void *base, size_t size,
mca_mpool_base_registration_t *reg) mca_rcache_base_registration_t *reg)
{ {
mca_btl_vader_registration_handle_t *knem_reg = (mca_btl_vader_registration_handle_t *) reg; mca_btl_vader_registration_handle_t *knem_reg = (mca_btl_vader_registration_handle_t *) reg;
struct knem_cmd_create_region knem_cr; struct knem_cmd_create_region knem_cr;
@ -37,11 +36,11 @@ static int mca_btl_vader_knem_reg (void *reg_data, void *base, size_t size,
knem_cr.iovec_nr = 1; knem_cr.iovec_nr = 1;
knem_cr.protection = 0; knem_cr.protection = 0;
if (reg->access_flags & (MCA_MPOOL_ACCESS_LOCAL_WRITE | MCA_MPOOL_ACCESS_REMOTE_WRITE)) { if (reg->access_flags & (MCA_RCACHE_ACCESS_LOCAL_WRITE | MCA_RCACHE_ACCESS_REMOTE_WRITE)) {
knem_cr.protection |= PROT_WRITE; knem_cr.protection |= PROT_WRITE;
} }
if (reg->access_flags & MCA_MPOOL_ACCESS_REMOTE_READ) { if (reg->access_flags & MCA_RCACHE_ACCESS_REMOTE_READ) {
knem_cr.protection |= PROT_READ; knem_cr.protection |= PROT_READ;
} }
@ -57,7 +56,7 @@ static int mca_btl_vader_knem_reg (void *reg_data, void *base, size_t size,
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
static int mca_btl_vader_knem_dereg (void *reg_data, mca_mpool_base_registration_t *reg) static int mca_btl_vader_knem_dereg (void *reg_data, mca_rcache_base_registration_t *reg)
{ {
mca_btl_vader_registration_handle_t *knem_reg = (mca_btl_vader_registration_handle_t *) reg; mca_btl_vader_registration_handle_t *knem_reg = (mca_btl_vader_registration_handle_t *) reg;
@ -72,12 +71,14 @@ mca_btl_vader_register_mem_knem (struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t *endpoint, struct mca_btl_base_endpoint_t *endpoint,
void *base, size_t size, uint32_t flags) void *base, size_t size, uint32_t flags)
{ {
mca_btl_vader_t *vader_module = (mca_btl_vader_t *) btl;
mca_btl_vader_registration_handle_t *reg = NULL; mca_btl_vader_registration_handle_t *reg = NULL;
int access_flags = flags & MCA_BTL_REG_FLAG_ACCESS_ANY; int access_flags = flags & MCA_BTL_REG_FLAG_ACCESS_ANY;
int rc; int rc;
rc = btl->btl_mpool->mpool_register (btl->btl_mpool, base, size, 0, access_flags, rc = vader_module->knem_rcache->rcache_register (vader_module->knem_rcache, base, size, 0,
(mca_mpool_base_registration_t **) &reg); access_flags,
(mca_rcache_base_registration_t **) &reg);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
return NULL; return NULL;
} }
@ -88,18 +89,19 @@ mca_btl_vader_register_mem_knem (struct mca_btl_base_module_t* btl,
static int static int
mca_btl_vader_deregister_mem_knem (struct mca_btl_base_module_t *btl, struct mca_btl_base_registration_handle_t *handle) mca_btl_vader_deregister_mem_knem (struct mca_btl_base_module_t *btl, struct mca_btl_base_registration_handle_t *handle)
{ {
mca_btl_vader_t *vader_module = (mca_btl_vader_t *) btl;
mca_btl_vader_registration_handle_t *reg = mca_btl_vader_registration_handle_t *reg =
(mca_btl_vader_registration_handle_t *)((intptr_t) handle - offsetof (mca_btl_vader_registration_handle_t, btl_handle)); (mca_btl_vader_registration_handle_t *)((intptr_t) handle - offsetof (mca_btl_vader_registration_handle_t, btl_handle));
btl->btl_mpool->mpool_deregister (btl->btl_mpool, &reg->base); vader_module->knem_rcache->rcache_deregister (vader_module->knem_rcache, &reg->base);
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
int mca_btl_vader_knem_init (void) int mca_btl_vader_knem_init (void)
{ {
mca_mpool_base_resources_t mpool_resources = { mca_rcache_base_resources_t rcache_resources = {
.pool_name = "vader", .reg_data = NULL, .cache_name = "vader", .reg_data = NULL,
.sizeof_reg = sizeof (mca_btl_vader_registration_handle_t), .sizeof_reg = sizeof (mca_btl_vader_registration_handle_t),
.register_mem = mca_btl_vader_knem_reg, .register_mem = mca_btl_vader_knem_reg,
.deregister_mem = mca_btl_vader_knem_dereg .deregister_mem = mca_btl_vader_knem_dereg
@ -107,6 +109,7 @@ int mca_btl_vader_knem_init (void)
struct knem_cmd_info knem_info; struct knem_cmd_info knem_info;
int rc; int rc;
signal (SIGSEGV, SIG_DFL);
/* Open the knem device. Try to print a helpful message if we /* Open the knem device. Try to print a helpful message if we
fail to open it. */ fail to open it. */
mca_btl_vader.knem_fd = open("/dev/knem", O_RDWR); mca_btl_vader.knem_fd = open("/dev/knem", O_RDWR);
@ -130,6 +133,7 @@ int mca_btl_vader_knem_init (void)
do { do {
/* Check that the ABI if kernel module running is the same /* Check that the ABI if kernel module running is the same
* as what we were compiled against. */ * as what we were compiled against. */
memset (&knem_info, 0, sizeof (knem_info));
rc = ioctl(mca_btl_vader.knem_fd, KNEM_CMD_GET_INFO, &knem_info); rc = ioctl(mca_btl_vader.knem_fd, KNEM_CMD_GET_INFO, &knem_info);
if (rc < 0) { if (rc < 0) {
opal_show_help("help-btl-vader.txt", "knem get ABI fail", opal_show_help("help-btl-vader.txt", "knem get ABI fail",
@ -161,9 +165,9 @@ int mca_btl_vader_knem_init (void)
mca_btl_vader.super.btl_deregister_mem = mca_btl_vader_deregister_mem_knem; mca_btl_vader.super.btl_deregister_mem = mca_btl_vader_deregister_mem_knem;
mca_btl_vader.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t); mca_btl_vader.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t);
mca_btl_vader.super.btl_mpool = mca_mpool_base_module_create ("grdma", NULL, mca_btl_vader.knem_rcache = mca_rcache_base_module_create ("grdma", NULL,
&mpool_resources); &rcache_resources);
if (NULL == mca_btl_vader.super.btl_mpool) { if (NULL == mca_btl_vader.knem_rcache) {
return OPAL_ERR_OUT_OF_RESOURCE; return OPAL_ERR_OUT_OF_RESOURCE;
} }
@ -182,9 +186,9 @@ int mca_btl_vader_knem_fini (void)
mca_btl_vader.knem_fd = -1; mca_btl_vader.knem_fd = -1;
} }
if (mca_btl_vader.super.btl_mpool) { if (mca_btl_vader.knem_rcache) {
(void) mca_mpool_base_module_destroy (mca_btl_vader.super.btl_mpool); (void) mca_rcache_base_module_destroy (mca_btl_vader.knem_rcache);
mca_btl_vader.super.btl_mpool = NULL; mca_btl_vader.knem_rcache = NULL;
} }
return OPAL_SUCCESS; return OPAL_SUCCESS;

Просмотреть файл

@ -24,7 +24,7 @@ struct mca_btl_base_registration_handle_t {
}; };
struct mca_btl_vader_registration_handle_t { struct mca_btl_vader_registration_handle_t {
mca_mpool_base_registration_t base; mca_rcache_base_registration_t base;
mca_btl_base_registration_handle_t btl_handle; mca_btl_base_registration_handle_t btl_handle;
}; };
typedef struct mca_btl_vader_registration_handle_t mca_btl_vader_registration_handle_t; typedef struct mca_btl_vader_registration_handle_t mca_btl_vader_registration_handle_t;

Просмотреть файл

@ -171,9 +171,9 @@ static int init_vader_endpoint (struct mca_btl_base_endpoint_t *ep, struct opal_
if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) { if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) {
/* always use xpmem if it is available */ /* always use xpmem if it is available */
ep->segment_data.xpmem.apid = xpmem_get (modex->xpmem.seg_id, XPMEM_RDWR, XPMEM_PERMIT_MODE, (void *) 0666); ep->segment_data.xpmem.apid = xpmem_get (modex->xpmem.seg_id, XPMEM_RDWR, XPMEM_PERMIT_MODE, (void *) 0666);
ep->segment_data.xpmem.rcache = mca_rcache_base_module_create("vma"); ep->segment_data.xpmem.vma_module = mca_rcache_base_vma_module_alloc ();
(void) vader_get_registation (ep, modex->xpmem.segment_base, mca_btl_vader_component.segment_size, (void) vader_get_registation (ep, modex->xpmem.segment_base, mca_btl_vader_component.segment_size,
MCA_MPOOL_FLAGS_PERSIST, (void **) &ep->segment_base); MCA_RCACHE_FLAGS_PERSIST, (void **) &ep->segment_base);
} else { } else {
#endif #endif
/* store a copy of the segment information for detach */ /* store a copy of the segment information for detach */
@ -434,6 +434,7 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
int rc; int rc;
opal_convertor_get_current_pointer (convertor, &data_ptr); opal_convertor_get_current_pointer (convertor, &data_ptr);
assert (NULL != data_ptr);
/* in place send fragment */ /* in place send fragment */
if (OPAL_UNLIKELY(opal_convertor_need_buffers(convertor))) { if (OPAL_UNLIKELY(opal_convertor_need_buffers(convertor))) {
@ -545,16 +546,15 @@ static void mca_btl_vader_endpoint_destructor (mca_btl_vader_endpoint_t *ep)
#if OPAL_BTL_VADER_HAVE_XPMEM #if OPAL_BTL_VADER_HAVE_XPMEM
if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) { if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) {
if (ep->segment_data.xpmem.rcache) { if (ep->segment_data.xpmem.vma_module) {
/* clean out the registration cache */ /* clean out the registration cache */
const int nregs = 100; const int nregs = 100;
mca_mpool_base_registration_t *regs[nregs]; mca_rcache_base_registration_t *regs[nregs];
int reg_cnt; int reg_cnt;
do { do {
reg_cnt = ep->segment_data.xpmem.rcache->rcache_find_all(ep->segment_data.xpmem.rcache, 0, (size_t)-1, reg_cnt = mca_rcache_base_vma_find_all (ep->segment_data.xpmem.vma_module,
regs, nregs); 0, (size_t) -1, regs, nregs);
for (int i = 0 ; i < reg_cnt ; ++i) { for (int i = 0 ; i < reg_cnt ; ++i) {
/* otherwise dereg will fail on assert */ /* otherwise dereg will fail on assert */
regs[i]->ref_count = 0; regs[i]->ref_count = 0;
@ -562,7 +562,7 @@ static void mca_btl_vader_endpoint_destructor (mca_btl_vader_endpoint_t *ep)
} }
} while (reg_cnt == nregs); } while (reg_cnt == nregs);
ep->segment_data.xpmem.rcache = NULL; ep->segment_data.xpmem.vma_module = NULL;
} }
if (ep->segment_base) { if (ep->segment_base) {

Просмотреть файл

@ -40,7 +40,7 @@ int mca_btl_vader_put_xpmem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{ {
mca_mpool_base_registration_t *reg; mca_rcache_base_registration_t *reg;
void *rem_ptr; void *rem_ptr;
reg = vader_get_registation (endpoint, (void *)(intptr_t) remote_address, size, 0, &rem_ptr); reg = vader_get_registation (endpoint, (void *)(intptr_t) remote_address, size, 0, &rem_ptr);

Просмотреть файл

@ -34,13 +34,14 @@ int mca_btl_vader_xpmem_init (void)
/* look up the remote pointer in the peer rcache and attach if /* look up the remote pointer in the peer rcache and attach if
* necessary */ * necessary */
mca_mpool_base_registration_t *vader_get_registation (struct mca_btl_base_endpoint_t *ep, void *rem_ptr, mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpoint_t *ep, void *rem_ptr,
size_t size, int flags, void **local_ptr) size_t size, int flags, void **local_ptr)
{ {
struct mca_rcache_base_module_t *rcache = ep->segment_data.xpmem.rcache; mca_rcache_base_vma_module_t *vma_module = ep->segment_data.xpmem.vma_module;
mca_mpool_base_registration_t *regs[10], *reg = NULL; mca_rcache_base_registration_t *regs[10], *reg = NULL;
xpmem_addr_t xpmem_addr; xpmem_addr_t xpmem_addr;
uintptr_t base, bound; uintptr_t base, bound;
uint64_t attach_align = 1 << mca_btl_vader_component.log_attach_align;
int rc, i; int rc, i;
/* protect rcache access */ /* protect rcache access */
@ -49,15 +50,14 @@ mca_mpool_base_registration_t *vader_get_registation (struct mca_btl_base_endpoi
/* use btl/self for self communication */ /* use btl/self for self communication */
assert (ep->peer_smp_rank != MCA_BTL_VADER_LOCAL_RANK); assert (ep->peer_smp_rank != MCA_BTL_VADER_LOCAL_RANK);
base = (uintptr_t) down_align_addr(rem_ptr, mca_btl_vader_component.log_attach_align); base = OPAL_DOWN_ALIGN((uintptr_t) rem_ptr, attach_align, uintptr_t);
bound = (uintptr_t) up_align_addr((void *)((uintptr_t) rem_ptr + size - 1), bound = OPAL_ALIGN((uintptr_t) rem_ptr + size - 1, attach_align, uintptr_t) + 1;
mca_btl_vader_component.log_attach_align) + 1;
if (OPAL_UNLIKELY(bound > VADER_MAX_ADDRESS)) { if (OPAL_UNLIKELY(bound > VADER_MAX_ADDRESS)) {
bound = VADER_MAX_ADDRESS; bound = VADER_MAX_ADDRESS;
} }
/* several segments may match the base pointer */ /* several segments may match the base pointer */
rc = rcache->rcache_find_all (rcache, (void *) base, bound - base, regs, 10); rc = mca_rcache_base_vma_find_all (vma_module, (void *) base, bound - base, regs, 10);
for (i = 0 ; i < rc ; ++i) { for (i = 0 ; i < rc ; ++i) {
if (bound <= (uintptr_t)regs[i]->bound && base >= (uintptr_t)regs[i]->base) { if (bound <= (uintptr_t)regs[i]->bound && base >= (uintptr_t)regs[i]->base) {
(void)opal_atomic_add (&regs[i]->ref_count, 1); (void)opal_atomic_add (&regs[i]->ref_count, 1);
@ -65,13 +65,13 @@ mca_mpool_base_registration_t *vader_get_registation (struct mca_btl_base_endpoi
goto reg_found; goto reg_found;
} }
if (regs[i]->flags & MCA_MPOOL_FLAGS_PERSIST) { if (regs[i]->flags & MCA_RCACHE_FLAGS_PERSIST) {
continue; continue;
} }
/* remove this pointer from the rcache and decrement its reference count /* remove this pointer from the rcache and decrement its reference count
(so it is detached later) */ (so it is detached later) */
rc = rcache->rcache_delete (rcache, regs[i]); rc = mca_rcache_base_vma_delete (vma_module, regs[i]);
if (OPAL_UNLIKELY(0 != rc)) { if (OPAL_UNLIKELY(0 != rc)) {
/* someone beat us to it? */ /* someone beat us to it? */
break; break;
@ -84,14 +84,14 @@ mca_mpool_base_registration_t *vader_get_registation (struct mca_btl_base_endpoi
if (OPAL_LIKELY(0 == regs[i]->ref_count)) { if (OPAL_LIKELY(0 == regs[i]->ref_count)) {
/* this pointer is not in use */ /* this pointer is not in use */
(void) xpmem_detach (regs[i]->alloc_base); (void) xpmem_detach (regs[i]->rcache_context);
OBJ_RELEASE(regs[i]); OBJ_RELEASE(regs[i]);
} }
break; break;
} }
reg = OBJ_NEW(mca_mpool_base_registration_t); reg = OBJ_NEW(mca_rcache_base_registration_t);
if (OPAL_LIKELY(NULL != reg)) { if (OPAL_LIKELY(NULL != reg)) {
/* stick around for awhile */ /* stick around for awhile */
reg->ref_count = 2; reg->ref_count = 2;
@ -106,21 +106,21 @@ mca_mpool_base_registration_t *vader_get_registation (struct mca_btl_base_endpoi
#endif #endif
xpmem_addr.offset = base; xpmem_addr.offset = base;
reg->alloc_base = xpmem_attach (xpmem_addr, bound - base, NULL); reg->rcache_context = xpmem_attach (xpmem_addr, bound - base, NULL);
if (OPAL_UNLIKELY((void *)-1 == reg->alloc_base)) { if (OPAL_UNLIKELY((void *)-1 == reg->rcache_context)) {
OPAL_THREAD_UNLOCK(&ep->lock); OPAL_THREAD_UNLOCK(&ep->lock);
OBJ_RELEASE(reg); OBJ_RELEASE(reg);
return NULL; return NULL;
} }
opal_memchecker_base_mem_defined (reg->alloc_base, bound - base); opal_memchecker_base_mem_defined (reg->rcache_context, bound - base);
rcache->rcache_insert (rcache, reg, 0); mca_rcache_base_vma_insert (vma_module, reg, 0);
} }
reg_found: reg_found:
opal_atomic_wmb (); opal_atomic_wmb ();
*local_ptr = (void *) ((uintptr_t) reg->alloc_base + *local_ptr = (void *) ((uintptr_t) reg->rcache_context +
(ptrdiff_t)((uintptr_t) rem_ptr - (uintptr_t) reg->base)); (ptrdiff_t)((uintptr_t) rem_ptr - (uintptr_t) reg->base));
OPAL_THREAD_UNLOCK(&ep->lock); OPAL_THREAD_UNLOCK(&ep->lock);
@ -128,20 +128,20 @@ reg_found:
return reg; return reg;
} }
void vader_return_registration (mca_mpool_base_registration_t *reg, struct mca_btl_base_endpoint_t *ep) void vader_return_registration (mca_rcache_base_registration_t *reg, struct mca_btl_base_endpoint_t *ep)
{ {
struct mca_rcache_base_module_t *rcache = ep->segment_data.xpmem.rcache; mca_rcache_base_vma_module_t *vma_module = ep->segment_data.xpmem.vma_module;
int32_t ref_count; int32_t ref_count;
ref_count = opal_atomic_add_32 (&reg->ref_count, -1); ref_count = opal_atomic_add_32 (&reg->ref_count, -1);
if (OPAL_UNLIKELY(0 == ref_count && !(reg->flags & MCA_MPOOL_FLAGS_PERSIST))) { if (OPAL_UNLIKELY(0 == ref_count && !(reg->flags & MCA_RCACHE_FLAGS_PERSIST))) {
/* protect rcache access */ /* protect rcache access */
OPAL_THREAD_LOCK(&ep->lock); OPAL_THREAD_LOCK(&ep->lock);
rcache->rcache_delete (rcache, reg); mca_rcache_base_vma_delete (vma_module, reg);
OPAL_THREAD_UNLOCK(&ep->lock); OPAL_THREAD_UNLOCK(&ep->lock);
opal_memchecker_base_mem_noaccess (reg->alloc_base, (uintptr_t)(reg->bound - reg->base)); opal_memchecker_base_mem_noaccess (reg->rcache_context, (uintptr_t)(reg->bound - reg->base));
(void)xpmem_detach (reg->alloc_base); (void)xpmem_detach (reg->rcache_context);
OBJ_RELEASE (reg); OBJ_RELEASE (reg);
} }
} }

Просмотреть файл

@ -25,6 +25,9 @@
typedef int64_t xpmem_apid_t; typedef int64_t xpmem_apid_t;
#endif #endif
#include <opal/mca/rcache/base/rcache_base_vma.h>
#include <opal/mca/rcache/rcache.h>
/* look up the remote pointer in the peer rcache and attach if /* look up the remote pointer in the peer rcache and attach if
* necessary */ * necessary */
@ -33,14 +36,14 @@
int mca_btl_vader_xpmem_init (void); int mca_btl_vader_xpmem_init (void);
mca_mpool_base_registration_t *vader_get_registation (struct mca_btl_base_endpoint_t *endpoint, void *rem_ptr, mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpoint_t *endpoint, void *rem_ptr,
size_t size, int flags, void **local_ptr); size_t size, int flags, void **local_ptr);
void vader_return_registration (mca_mpool_base_registration_t *reg, struct mca_btl_base_endpoint_t *endpoint); void vader_return_registration (mca_rcache_base_registration_t *reg, struct mca_btl_base_endpoint_t *endpoint);
#else #else
static inline mca_mpool_base_registration_t *vader_get_registation (struct mca_btl_base_endpoint_t *endpoint, void *rem_ptr, static inline mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpoint_t *endpoint, void *rem_ptr,
size_t size, int flags, void **local_ptr) size_t size, int flags, void **local_ptr)
{ {
(void) endpoint; (void) endpoint;
@ -51,7 +54,7 @@ static inline mca_mpool_base_registration_t *vader_get_registation (struct mca_b
return NULL; return NULL;
} }
static inline void vader_return_registration (mca_mpool_base_registration_t *reg, struct mca_btl_base_endpoint_t *endpoint) static inline void vader_return_registration (mca_rcache_base_registration_t *reg, struct mca_btl_base_endpoint_t *endpoint)
{ {
(void) reg; (void) reg;
(void) endpoint; (void) endpoint;

Просмотреть файл

@ -38,7 +38,7 @@
#include "opal/util/proc.h" #include "opal/util/proc.h"
#include "opal/util/argv.h" #include "opal/util/argv.h"
#include "opal/mca/mpool/base/base.h" #include "opal/mca/rcache/base/base.h"
#include "opal/runtime/opal_params.h" #include "opal/runtime/opal_params.h"
#include "opal/mca/timer/base/base.h" #include "opal/mca/timer/base/base.h"
#include "opal/mca/dl/base/base.h" #include "opal/mca/dl/base/base.h"
@ -712,7 +712,7 @@ static int mca_common_cuda_stage_three_init(void)
OPAL_PROC_MY_HOSTNAME, res, mem_reg->msg); OPAL_PROC_MY_HOSTNAME, res, mem_reg->msg);
} else { } else {
opal_output_verbose(20, mca_common_cuda_output, opal_output_verbose(20, mca_common_cuda_output,
"CUDA: cuMemHostRegister OK on mpool %s: " "CUDA: cuMemHostRegister OK on rcache %s: "
"address=%p, bufsize=%d", "address=%p, bufsize=%d",
mem_reg->msg, mem_reg->ptr, (int)mem_reg->amount); mem_reg->msg, mem_reg->ptr, (int)mem_reg->amount);
} }
@ -795,7 +795,7 @@ static int mca_common_cuda_stage_three_init(void)
* Cleanup all CUDA resources. * Cleanup all CUDA resources.
* *
* Note: Still figuring out how to get cuMemHostUnregister called from the smcuda sm * Note: Still figuring out how to get cuMemHostUnregister called from the smcuda sm
* mpool. Looks like with the memory pool from openib (grdma), the unregistering is * rcache. Looks like with the memory pool from openib (grdma), the unregistering is
* called as the free list is destructed. Not true for the sm mpool. This means we * called as the free list is destructed. Not true for the sm mpool. This means we
* are currently still leaking some host memory we registered with CUDA. * are currently still leaking some host memory we registered with CUDA.
*/ */
@ -949,7 +949,7 @@ void mca_common_cuda_register(void *ptr, size_t amount, char *msg) {
OPAL_PROC_MY_HOSTNAME, res, msg); OPAL_PROC_MY_HOSTNAME, res, msg);
} else { } else {
opal_output_verbose(20, mca_common_cuda_output, opal_output_verbose(20, mca_common_cuda_output,
"CUDA: cuMemHostRegister OK on mpool %s: " "CUDA: cuMemHostRegister OK on rcache %s: "
"address=%p, bufsize=%d", "address=%p, bufsize=%d",
msg, ptr, (int)amount); msg, ptr, (int)amount);
} }
@ -984,12 +984,12 @@ void mca_common_cuda_unregister(void *ptr, char *msg) {
/* If unregistering the memory fails, just continue. This is during /* If unregistering the memory fails, just continue. This is during
* shutdown. Only print when running in verbose mode. */ * shutdown. Only print when running in verbose mode. */
opal_output_verbose(20, mca_common_cuda_output, opal_output_verbose(20, mca_common_cuda_output,
"CUDA: cuMemHostUnregister failed: ptr=%p, res=%d, mpool=%s", "CUDA: cuMemHostUnregister failed: ptr=%p, res=%d, rcache=%s",
ptr, res, msg); ptr, res, msg);
} else { } else {
opal_output_verbose(20, mca_common_cuda_output, opal_output_verbose(20, mca_common_cuda_output,
"CUDA: cuMemHostUnregister OK on mpool %s: " "CUDA: cuMemHostUnregister OK on rcache %s: "
"address=%p", "address=%p",
msg, ptr); msg, ptr);
} }
@ -1001,8 +1001,8 @@ void mca_common_cuda_unregister(void *ptr, char *msg) {
* to the remote size so it can access the memory. This is the * to the remote size so it can access the memory. This is the
* registration function for the sending side of a message transfer. * registration function for the sending side of a message transfer.
*/ */
int cuda_getmemhandle(void *base, size_t size, mca_mpool_base_registration_t *newreg, int cuda_getmemhandle(void *base, size_t size, mca_rcache_base_registration_t *newreg,
mca_mpool_base_registration_t *hdrreg) mca_rcache_base_registration_t *hdrreg)
{ {
CUmemorytype memType; CUmemorytype memType;
@ -1011,7 +1011,7 @@ int cuda_getmemhandle(void *base, size_t size, mca_mpool_base_registration_t *ne
CUdeviceptr pbase; CUdeviceptr pbase;
size_t psize; size_t psize;
mca_mpool_common_cuda_reg_t *cuda_reg = (mca_mpool_common_cuda_reg_t*)newreg; mca_rcache_common_cuda_reg_t *cuda_reg = (mca_rcache_common_cuda_reg_t*)newreg;
memHandle = (CUipcMemHandle *)cuda_reg->data.memHandle; memHandle = (CUipcMemHandle *)cuda_reg->data.memHandle;
/* We should only be there if this is a CUDA device pointer */ /* We should only be there if this is a CUDA device pointer */
@ -1090,11 +1090,11 @@ int cuda_getmemhandle(void *base, size_t size, mca_mpool_base_registration_t *ne
* This function is called by the local side that called the cuda_getmemhandle. * This function is called by the local side that called the cuda_getmemhandle.
* There is nothing to be done so just return. * There is nothing to be done so just return.
*/ */
int cuda_ungetmemhandle(void *reg_data, mca_mpool_base_registration_t *reg) int cuda_ungetmemhandle(void *reg_data, mca_rcache_base_registration_t *reg)
{ {
opal_output_verbose(10, mca_common_cuda_output, opal_output_verbose(10, mca_common_cuda_output,
"CUDA: cuda_ungetmemhandle (no-op): base=%p", reg->base); "CUDA: cuda_ungetmemhandle (no-op): base=%p", reg->base);
CUDA_DUMP_MEMHANDLE((100, ((mca_mpool_common_cuda_reg_t *)reg)->data.memHandle, "cuda_ungetmemhandle")); CUDA_DUMP_MEMHANDLE((100, ((mca_rcache_common_cuda_reg_t *)reg)->data.memHandle, "cuda_ungetmemhandle"));
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
@ -1105,12 +1105,12 @@ int cuda_ungetmemhandle(void *reg_data, mca_mpool_base_registration_t *reg)
* remote side of a transfer. newreg contains the new handle. hddrreg contains * remote side of a transfer. newreg contains the new handle. hddrreg contains
* the memory handle that was received from the remote side. * the memory handle that was received from the remote side.
*/ */
int cuda_openmemhandle(void *base, size_t size, mca_mpool_base_registration_t *newreg, int cuda_openmemhandle(void *base, size_t size, mca_rcache_base_registration_t *newreg,
mca_mpool_base_registration_t *hdrreg) mca_rcache_base_registration_t *hdrreg)
{ {
CUresult result; CUresult result;
CUipcMemHandle *memHandle; CUipcMemHandle *memHandle;
mca_mpool_common_cuda_reg_t *cuda_newreg = (mca_mpool_common_cuda_reg_t*)newreg; mca_rcache_common_cuda_reg_t *cuda_newreg = (mca_rcache_common_cuda_reg_t*)newreg;
/* Save in local variable to avoid ugly casting */ /* Save in local variable to avoid ugly casting */
memHandle = (CUipcMemHandle *)cuda_newreg->data.memHandle; memHandle = (CUipcMemHandle *)cuda_newreg->data.memHandle;
@ -1147,10 +1147,10 @@ int cuda_openmemhandle(void *base, size_t size, mca_mpool_base_registration_t *n
/* /*
* Close a memory handle that refers to remote memory. * Close a memory handle that refers to remote memory.
*/ */
int cuda_closememhandle(void *reg_data, mca_mpool_base_registration_t *reg) int cuda_closememhandle(void *reg_data, mca_rcache_base_registration_t *reg)
{ {
CUresult result; CUresult result;
mca_mpool_common_cuda_reg_t *cuda_reg = (mca_mpool_common_cuda_reg_t*)reg; mca_rcache_common_cuda_reg_t *cuda_reg = (mca_rcache_common_cuda_reg_t*)reg;
/* Only attempt to close if we have valid context. This can change if a call /* Only attempt to close if we have valid context. This can change if a call
* to the fini function is made and we discover context is gone. */ * to the fini function is made and we discover context is gone. */
@ -1213,7 +1213,7 @@ void mca_common_cuda_destruct_event(uintptr_t event)
* Put remote event on stream to ensure that the the start of the * Put remote event on stream to ensure that the the start of the
* copy does not start until the completion of the event. * copy does not start until the completion of the event.
*/ */
void mca_common_wait_stream_synchronize(mca_mpool_common_cuda_reg_t *rget_reg) void mca_common_wait_stream_synchronize(mca_rcache_common_cuda_reg_t *rget_reg)
{ {
#if OPAL_CUDA_SYNC_MEMOPS #if OPAL_CUDA_SYNC_MEMOPS
/* No need for any of this with SYNC_MEMOPS feature */ /* No need for any of this with SYNC_MEMOPS feature */
@ -1643,8 +1643,8 @@ int progress_one_cuda_htod_event(struct mca_btl_base_descriptor_t **frag) {
* Need to make sure the handle we are retrieving from the cache is still * Need to make sure the handle we are retrieving from the cache is still
* valid. Compare the cached handle to the one received. * valid. Compare the cached handle to the one received.
*/ */
int mca_common_cuda_memhandle_matches(mca_mpool_common_cuda_reg_t *new_reg, int mca_common_cuda_memhandle_matches(mca_rcache_common_cuda_reg_t *new_reg,
mca_mpool_common_cuda_reg_t *old_reg) mca_rcache_common_cuda_reg_t *old_reg)
{ {
if (0 == memcmp(new_reg->data.memHandle, old_reg->data.memHandle, sizeof(new_reg->data.memHandle))) { if (0 == memcmp(new_reg->data.memHandle, old_reg->data.memHandle, sizeof(new_reg->data.memHandle))) {
@ -2008,7 +2008,7 @@ int mca_common_cuda_get_address_range(void *pbase, size_t *psize, void *base)
* not matching the BUFFER_ID of the buffer we are checking. Return false * not matching the BUFFER_ID of the buffer we are checking. Return false
* if the registration is still good. * if the registration is still good.
*/ */
bool mca_common_cuda_previously_freed_memory(mca_mpool_base_registration_t *reg) bool mca_common_cuda_previously_freed_memory(mca_rcache_base_registration_t *reg)
{ {
int res; int res;
unsigned long long bufID; unsigned long long bufID;
@ -2040,7 +2040,7 @@ bool mca_common_cuda_previously_freed_memory(mca_mpool_base_registration_t *reg)
* Also set SYNC_MEMOPS on any GPU registration to ensure that * Also set SYNC_MEMOPS on any GPU registration to ensure that
* synchronous copies complete before the buffer is accessed. * synchronous copies complete before the buffer is accessed.
*/ */
void mca_common_cuda_get_buffer_id(mca_mpool_base_registration_t *reg) void mca_common_cuda_get_buffer_id(mca_rcache_base_registration_t *reg)
{ {
int res; int res;
unsigned long long bufID = 0; unsigned long long bufID = 0;

Просмотреть файл

@ -28,20 +28,20 @@
#define MEMHANDLE_SIZE 8 #define MEMHANDLE_SIZE 8
#define EVTHANDLE_SIZE 8 #define EVTHANDLE_SIZE 8
struct mca_mpool_common_cuda_reg_data_t { struct mca_rcache_common_cuda_reg_data_t {
uint64_t memHandle[MEMHANDLE_SIZE]; uint64_t memHandle[MEMHANDLE_SIZE];
uint64_t evtHandle[EVTHANDLE_SIZE]; uint64_t evtHandle[EVTHANDLE_SIZE];
uint64_t event; uint64_t event;
opal_ptr_t memh_seg_addr; opal_ptr_t memh_seg_addr;
size_t memh_seg_len; size_t memh_seg_len;
}; };
typedef struct mca_mpool_common_cuda_reg_data_t mca_mpool_common_cuda_reg_data_t; typedef struct mca_rcache_common_cuda_reg_data_t mca_rcache_common_cuda_reg_data_t;
struct mca_mpool_common_cuda_reg_t { struct mca_rcache_common_cuda_reg_t {
mca_mpool_base_registration_t base; mca_rcache_base_registration_t base;
mca_mpool_common_cuda_reg_data_t data; mca_rcache_common_cuda_reg_data_t data;
}; };
typedef struct mca_mpool_common_cuda_reg_t mca_mpool_common_cuda_reg_t; typedef struct mca_rcache_common_cuda_reg_t mca_rcache_common_cuda_reg_t;
extern bool mca_common_cuda_enabled; extern bool mca_common_cuda_enabled;
OPAL_DECLSPEC void mca_common_cuda_register_mca_variables(void); OPAL_DECLSPEC void mca_common_cuda_register_mca_variables(void);
@ -50,7 +50,7 @@ OPAL_DECLSPEC void mca_common_cuda_register(void *ptr, size_t amount, char *msg)
OPAL_DECLSPEC void mca_common_cuda_unregister(void *ptr, char *msg); OPAL_DECLSPEC void mca_common_cuda_unregister(void *ptr, char *msg);
OPAL_DECLSPEC void mca_common_wait_stream_synchronize(mca_mpool_common_cuda_reg_t *rget_reg); OPAL_DECLSPEC void mca_common_wait_stream_synchronize(mca_rcache_common_cuda_reg_t *rget_reg);
OPAL_DECLSPEC int mca_common_cuda_memcpy(void *dst, void *src, size_t amount, char *msg, OPAL_DECLSPEC int mca_common_cuda_memcpy(void *dst, void *src, size_t amount, char *msg,
struct mca_btl_base_descriptor_t *, int *done); struct mca_btl_base_descriptor_t *, int *done);
@ -69,26 +69,26 @@ OPAL_DECLSPEC int progress_one_cuda_ipc_event(struct mca_btl_base_descriptor_t *
OPAL_DECLSPEC int progress_one_cuda_dtoh_event(struct mca_btl_base_descriptor_t **); OPAL_DECLSPEC int progress_one_cuda_dtoh_event(struct mca_btl_base_descriptor_t **);
OPAL_DECLSPEC int progress_one_cuda_htod_event(struct mca_btl_base_descriptor_t **); OPAL_DECLSPEC int progress_one_cuda_htod_event(struct mca_btl_base_descriptor_t **);
OPAL_DECLSPEC int mca_common_cuda_memhandle_matches(mca_mpool_common_cuda_reg_t *new_reg, OPAL_DECLSPEC int mca_common_cuda_memhandle_matches(mca_rcache_common_cuda_reg_t *new_reg,
mca_mpool_common_cuda_reg_t *old_reg); mca_rcache_common_cuda_reg_t *old_reg);
OPAL_DECLSPEC void mca_common_cuda_construct_event_and_handle(uintptr_t *event, void *handle); OPAL_DECLSPEC void mca_common_cuda_construct_event_and_handle(uintptr_t *event, void *handle);
OPAL_DECLSPEC void mca_common_cuda_destruct_event(uintptr_t event); OPAL_DECLSPEC void mca_common_cuda_destruct_event(uintptr_t event);
OPAL_DECLSPEC int cuda_getmemhandle(void *base, size_t, mca_mpool_base_registration_t *newreg, OPAL_DECLSPEC int cuda_getmemhandle(void *base, size_t, mca_rcache_base_registration_t *newreg,
mca_mpool_base_registration_t *hdrreg); mca_rcache_base_registration_t *hdrreg);
OPAL_DECLSPEC int cuda_ungetmemhandle(void *reg_data, mca_mpool_base_registration_t *reg); OPAL_DECLSPEC int cuda_ungetmemhandle(void *reg_data, mca_rcache_base_registration_t *reg);
OPAL_DECLSPEC int cuda_openmemhandle(void *base, size_t size, mca_mpool_base_registration_t *newreg, OPAL_DECLSPEC int cuda_openmemhandle(void *base, size_t size, mca_rcache_base_registration_t *newreg,
mca_mpool_base_registration_t *hdrreg); mca_rcache_base_registration_t *hdrreg);
OPAL_DECLSPEC int cuda_closememhandle(void *reg_data, mca_mpool_base_registration_t *reg); OPAL_DECLSPEC int cuda_closememhandle(void *reg_data, mca_rcache_base_registration_t *reg);
OPAL_DECLSPEC int mca_common_cuda_get_device(int *devicenum); OPAL_DECLSPEC int mca_common_cuda_get_device(int *devicenum);
OPAL_DECLSPEC int mca_common_cuda_device_can_access_peer(int *access, int dev1, int dev2); OPAL_DECLSPEC int mca_common_cuda_device_can_access_peer(int *access, int dev1, int dev2);
OPAL_DECLSPEC int mca_common_cuda_stage_one_init(void); OPAL_DECLSPEC int mca_common_cuda_stage_one_init(void);
OPAL_DECLSPEC int mca_common_cuda_get_address_range(void *pbase, size_t *psize, void *base); OPAL_DECLSPEC int mca_common_cuda_get_address_range(void *pbase, size_t *psize, void *base);
OPAL_DECLSPEC void mca_common_cuda_fini(void); OPAL_DECLSPEC void mca_common_cuda_fini(void);
#if OPAL_CUDA_GDR_SUPPORT #if OPAL_CUDA_GDR_SUPPORT
OPAL_DECLSPEC bool mca_common_cuda_previously_freed_memory(mca_mpool_base_registration_t *reg); OPAL_DECLSPEC bool mca_common_cuda_previously_freed_memory(mca_rcache_base_registration_t *reg);
OPAL_DECLSPEC void mca_common_cuda_get_buffer_id(mca_mpool_base_registration_t *reg); OPAL_DECLSPEC void mca_common_cuda_get_buffer_id(mca_rcache_base_registration_t *reg);
#endif /* OPAL_CUDA_GDR_SUPPORT */ #endif /* OPAL_CUDA_GDR_SUPPORT */
/** /**
* Return: 0 if no packing is required for sending (the upper layer * Return: 0 if no packing is required for sending (the upper layer

Просмотреть файл

@ -41,13 +41,13 @@ NOTE: You can turn off this warning by setting the MCA parameter
The call to cuMemHostRegister(%p, %d, 0) failed. The call to cuMemHostRegister(%p, %d, 0) failed.
Host: %s Host: %s
cuMemHostRegister return value: %d cuMemHostRegister return value: %d
Memory Pool: %s Registration cache: %s
# #
[cuMemHostRegister failed] [cuMemHostRegister failed]
The call to cuMemHostRegister(%p, %d, 0) failed. The call to cuMemHostRegister(%p, %d, 0) failed.
Host: %s Host: %s
cuMemHostRegister return value: %d cuMemHostRegister return value: %d
Memory Pool: %s Registration cache: %s
# #
[cuIpcGetMemHandle failed] [cuIpcGetMemHandle failed]
The call to cuIpcGetMemHandle failed. This means the GPU RDMA protocol The call to cuIpcGetMemHandle failed. This means the GPU RDMA protocol

Просмотреть файл

@ -10,7 +10,7 @@
# Copyright (c) 2004-2005 The Regents of the University of California. # Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved. # All rights reserved.
# Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2010-2013 Los Alamos National Security, LLC. # Copyright (c) 2010-2015 Los Alamos National Security, LLC.
# All rights reserved. # All rights reserved.
# $COPYRIGHT$ # $COPYRIGHT$
# #
@ -22,7 +22,7 @@
# A word of explanation... # A word of explanation...
# #
# This library is linked against various MCA components because all # This library is linked against various MCA components because all
# shared-memory based components (e.g., mpool, ptl, etc.) need to # shared-memory based components (e.g., btl/sm, btl/smcuda, etc.) need to
# share some common code and data. There's two cases: # share some common code and data. There's two cases:
# #
# 1. libmca_common_sm.la is a shared library. By linking that shared # 1. libmca_common_sm.la is a shared library. By linking that shared
@ -44,12 +44,14 @@
# Header files # Header files
headers = \ headers = \
common_sm.h common_sm.h \
common_sm_mpool.h
# Source files # Source files
sources = \ sources = \
common_sm.c common_sm.c \
common_sm_mpool.c
# Help file # Help file

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
@ -11,7 +12,7 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010-2013 Los Alamos National Security, LLC. * Copyright (c) 2010-2015 Los Alamos National Security, LLC.
* All rights reserved. * All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved * Copyright (c) 2014 Intel, Inc. All rights reserved
* $COPYRIGHT$ * $COPYRIGHT$
@ -39,16 +40,13 @@
#if OPAL_ENABLE_FT_CR == 1 #if OPAL_ENABLE_FT_CR == 1
#include "opal/runtime/opal_cr.h" #include "opal/runtime/opal_cr.h"
#endif #endif
#include "common_sm.h"
#include "opal/constants.h" #include "opal/constants.h"
#include "opal/mca/mpool/sm/mpool_sm.h"
OBJ_CLASS_INSTANCE(
mca_common_sm_module_t, OBJ_CLASS_INSTANCE(mca_common_sm_module_t,opal_list_item_t,
opal_list_item_t, NULL, NULL);
NULL,
NULL
);
/* ////////////////////////////////////////////////////////////////////////// */ /* ////////////////////////////////////////////////////////////////////////// */
/* static utility functions */ /* static utility functions */
@ -258,13 +256,10 @@ mca_common_sm_local_proc_reorder(opal_proc_t **procs,
* *
* @retval addr virtual address * @retval addr virtual address
*/ */
void * void *mca_common_sm_seg_alloc (void *ctx, size_t *size)
mca_common_sm_seg_alloc(struct mca_mpool_base_module_t *mpool,
size_t *size,
mca_mpool_base_registration_t **registration)
{ {
mca_mpool_sm_module_t *sm_module = (mca_mpool_sm_module_t *)mpool; mca_common_sm_module_t *sm_module = (mca_common_sm_module_t *) ctx;
mca_common_sm_seg_header_t *seg = sm_module->sm_common_module->module_seg; mca_common_sm_seg_header_t *seg = sm_module->module_seg;
void *addr; void *addr;
opal_atomic_lock(&seg->seg_lock); opal_atomic_lock(&seg->seg_lock);
@ -275,7 +270,7 @@ mca_common_sm_seg_alloc(struct mca_mpool_base_module_t *mpool,
size_t fixup; size_t fixup;
/* add base address to segment offset */ /* add base address to segment offset */
addr = sm_module->sm_common_module->module_data_addr + seg->seg_offset; addr = sm_module->module_data_addr + seg->seg_offset;
seg->seg_offset += *size; seg->seg_offset += *size;
/* fix up seg_offset so next allocation is aligned on a /* fix up seg_offset so next allocation is aligned on a
@ -286,9 +281,7 @@ mca_common_sm_seg_alloc(struct mca_mpool_base_module_t *mpool,
seg->seg_offset += sizeof(long) - fixup; seg->seg_offset += sizeof(long) - fixup;
} }
} }
if (NULL != registration) {
*registration = NULL;
}
opal_atomic_unlock(&seg->seg_lock); opal_atomic_unlock(&seg->seg_lock);
return addr; return addr;
} }

Просмотреть файл

@ -32,7 +32,7 @@
#include "opal/mca/btl/base/base.h" #include "opal/mca/btl/base/base.h"
#include "opal/util/proc.h" #include "opal/util/proc.h"
#include "opal/mca/btl/base/btl_base_error.h" #include "opal/mca/btl/base/btl_base_error.h"
#include "opal/mca/mpool/mpool.h" #include "common_sm_mpool.h"
BEGIN_C_DECLS BEGIN_C_DECLS
@ -66,6 +66,8 @@ typedef struct mca_common_sm_module_t {
unsigned char *module_data_addr; unsigned char *module_data_addr;
/* shared memory backing facility object that encapsulates shmem info */ /* shared memory backing facility object that encapsulates shmem info */
opal_shmem_ds_t shmem_ds; opal_shmem_ds_t shmem_ds;
/* memory pool interface to shared-memory region */
mca_mpool_base_module_t *mpool;
} mca_common_sm_module_t; } mca_common_sm_module_t;
OBJ_CLASS_DECLARATION(mca_common_sm_module_t); OBJ_CLASS_DECLARATION(mca_common_sm_module_t);
@ -126,10 +128,7 @@ mca_common_sm_module_unlink(mca_common_sm_module_t *modp);
/** /**
* callback from the sm mpool * callback from the sm mpool
*/ */
OPAL_DECLSPEC extern void * OPAL_DECLSPEC extern void *mca_common_sm_seg_alloc (void *ctx, size_t *size);
mca_common_sm_seg_alloc(struct mca_mpool_base_module_t *mpool,
size_t *size,
mca_mpool_base_registration_t **registration);
/** /**
* This function will release all local resources attached to the * This function will release all local resources attached to the
@ -150,6 +149,7 @@ mca_common_sm_fini(mca_common_sm_module_t *mca_common_sm_module);
*/ */
OPAL_DECLSPEC extern mca_common_sm_module_t *mca_common_sm_module; OPAL_DECLSPEC extern mca_common_sm_module_t *mca_common_sm_module;
END_C_DECLS END_C_DECLS
#endif /* _COMMON_SM_H_ */ #endif /* _COMMON_SM_H_ */

Просмотреть файл

@ -23,9 +23,10 @@
#include "opal_config.h" #include "opal_config.h"
#include <string.h> #include <string.h>
#include "opal/mca/mpool/sm/mpool_sm.h" #include "common_sm_mpool.h"
#include "opal/mca/common/sm/common_sm.h" #include "opal/mca/common/sm/common_sm.h"
#include "opal/mca/common/cuda/common_cuda.h" #include "opal/mca/common/cuda/common_cuda.h"
#include "opal/mca/allocator/base/base.h"
#ifdef HAVE_UNISTD_H #ifdef HAVE_UNISTD_H
#include <unistd.h> #include <unistd.h>
#endif #endif
@ -39,22 +40,42 @@
static void sm_module_finalize(mca_mpool_base_module_t* module); static void sm_module_finalize(mca_mpool_base_module_t* module);
/*
* Returns base address of shared memory mapping.
*/
static void *mca_common_sm_mpool_base (mca_mpool_base_module_t *mpool);
/**
* Allocate block of shared memory.
*/
static void *mca_common_sm_mpool_alloc (mca_mpool_base_module_t *mpool,
size_t size, size_t align,
uint32_t flags);
/**
* free function typedef
*/
static void mca_common_sm_mpool_free(mca_mpool_base_module_t *mpool,
void *addr);
/**
* Fault Tolerance Event Notification Function
* @param state Checkpoint Stae
* @return OPAL_SUCCESS or failure status
*/
static int mca_common_sm_mpool_ft_event (int state);
/* /*
* Initializes the mpool module. * Initializes the mpool module.
*/ */
void mca_mpool_sm_module_init(mca_mpool_sm_module_t* mpool) static void mca_common_sm_mpool_module_init(mca_common_sm_mpool_module_t* mpool)
{ {
mpool->super.mpool_component = &mca_mpool_sm_component.super; mpool->super.mpool_base = mca_common_sm_mpool_base;
mpool->super.mpool_base = mca_mpool_sm_base; mpool->super.mpool_alloc = mca_common_sm_mpool_alloc;
mpool->super.mpool_alloc = mca_mpool_sm_alloc; mpool->super.mpool_free = mca_common_sm_mpool_free;
mpool->super.mpool_realloc = mca_mpool_sm_realloc;
mpool->super.mpool_free = mca_mpool_sm_free;
mpool->super.mpool_find = NULL;
mpool->super.mpool_register = NULL;
mpool->super.mpool_deregister = NULL;
mpool->super.mpool_release_memory = NULL;
mpool->super.mpool_finalize = sm_module_finalize; mpool->super.mpool_finalize = sm_module_finalize;
mpool->super.mpool_ft_event = mca_mpool_sm_ft_event; mpool->super.mpool_ft_event = mca_common_sm_mpool_ft_event;
mpool->super.flags = 0; mpool->super.flags = 0;
mpool->sm_size = 0; mpool->sm_size = 0;
@ -64,12 +85,74 @@ void mca_mpool_sm_module_init(mca_mpool_sm_module_t* mpool)
mpool->mem_node = -1; mpool->mem_node = -1;
} }
mca_mpool_base_module_t *common_sm_mpool_create (mca_common_sm_mpool_resources_t *resources)
{
mca_common_sm_mpool_module_t *mpool_module;
mca_allocator_base_component_t* allocator_component;
/* Make a new mpool module */
mpool_module = (mca_common_sm_mpool_module_t *) malloc (sizeof (*mpool_module));
mca_common_sm_mpool_module_init(mpool_module);
/* set sm_size */
mpool_module->sm_size = resources->size;
allocator_component = mca_allocator_component_lookup(resources->allocator);
/* if specified allocator cannot be loaded - look for an alternative */
if (NULL == allocator_component) {
if (opal_list_get_size(&opal_allocator_base_framework.framework_components) == 0) {
mca_base_component_list_item_t *item =
(mca_base_component_list_item_t *)
opal_list_get_first(&opal_allocator_base_framework.framework_components);
allocator_component =
(mca_allocator_base_component_t *)item->cli_component;
opal_output(
0, "mca_common_sm_mpool_init: "
"unable to locate allocator: %s - using %s\n",
resources->allocator,
allocator_component->allocator_version.mca_component_name);
} else {
opal_output(0, "mca_common_sm_mpool_init: "
"unable to locate allocator: %s\n",
resources->allocator);
free(mpool_module);
return NULL;
}
}
mpool_module->mem_node = resources->mem_node;
if (NULL == (mpool_module->sm_common_module =
mca_common_sm_module_attach(&resources->bs_meta_buf,
sizeof(mca_common_sm_module_t), 8))) {
opal_output(0, "mca_common_sm_mpool_init: "
"unable to create shared memory mapping (%s)",
resources->bs_meta_buf.seg_name);
free(mpool_module);
return NULL;
}
/* setup allocator */
mpool_module->sm_allocator =
allocator_component->allocator_init (true, mca_common_sm_seg_alloc,
NULL, mpool_module->sm_common_module);
if (NULL == mpool_module->sm_allocator) {
opal_output(0, "mca_common_sm_mpool_init: unable to initialize allocator");
free(mpool_module);
return NULL;
}
return &mpool_module->super;
}
/* /*
* base address of shared memory mapping * base address of shared memory mapping
*/ */
void* mca_mpool_sm_base(mca_mpool_base_module_t* mpool) static void *mca_common_sm_mpool_base(mca_mpool_base_module_t *mpool)
{ {
mca_mpool_sm_module_t *sm_mpool = (mca_mpool_sm_module_t*) mpool; mca_common_sm_mpool_module_t *sm_mpool = (mca_common_sm_mpool_module_t *) mpool;
return (NULL != sm_mpool->sm_common_module) ? return (NULL != sm_mpool->sm_common_module) ?
sm_mpool->sm_common_module->module_seg_addr : NULL; sm_mpool->sm_common_module->module_seg_addr : NULL;
} }
@ -77,43 +160,16 @@ void* mca_mpool_sm_base(mca_mpool_base_module_t* mpool)
/** /**
* allocate function * allocate function
*/ */
void* mca_mpool_sm_alloc( static void *mca_common_sm_mpool_alloc (mca_mpool_base_module_t* mpool,
mca_mpool_base_module_t* mpool, size_t size, size_t align, uint32_t flags)
size_t size,
size_t align,
uint32_t flags,
mca_mpool_base_registration_t** registration)
{ {
mca_mpool_sm_module_t* mpool_sm = (mca_mpool_sm_module_t*)mpool; mca_common_sm_mpool_module_t* mpool_sm = (mca_common_sm_mpool_module_t*)mpool;
opal_hwloc_base_memory_segment_t mseg; opal_hwloc_base_memory_segment_t mseg;
mseg.mbs_start_addr = mseg.mbs_start_addr =
mpool_sm->sm_allocator->alc_alloc(mpool_sm->sm_allocator, size, align, registration); mpool_sm->sm_allocator->alc_alloc(mpool_sm->sm_allocator, size, align);
if(mpool_sm->mem_node >= 0) { if (mpool_sm->mem_node >= 0) {
mseg.mbs_len = size;
opal_hwloc_base_membind(&mseg, 1, mpool_sm->mem_node);
}
return mseg.mbs_start_addr;
}
/**
* realloc function
*/
void* mca_mpool_sm_realloc(
mca_mpool_base_module_t* mpool,
void* addr,
size_t size,
mca_mpool_base_registration_t** registration)
{
mca_mpool_sm_module_t* mpool_sm = (mca_mpool_sm_module_t*)mpool;
opal_hwloc_base_memory_segment_t mseg;
mseg.mbs_start_addr =
mpool_sm->sm_allocator->alc_realloc(mpool_sm->sm_allocator, addr, size,
registration);
if(mpool_sm->mem_node >= 0) {
mseg.mbs_len = size; mseg.mbs_len = size;
opal_hwloc_base_membind(&mseg, 1, mpool_sm->mem_node); opal_hwloc_base_membind(&mseg, 1, mpool_sm->mem_node);
} }
@ -124,16 +180,15 @@ void* mca_mpool_sm_realloc(
/** /**
* free function * free function
*/ */
void mca_mpool_sm_free(mca_mpool_base_module_t* mpool, void * addr, void mca_common_sm_mpool_free(mca_mpool_base_module_t *mpool, void *addr)
mca_mpool_base_registration_t* registration)
{ {
mca_mpool_sm_module_t* mpool_sm = (mca_mpool_sm_module_t*)mpool; mca_common_sm_mpool_module_t* mpool_sm = (mca_common_sm_mpool_module_t*)mpool;
mpool_sm->sm_allocator->alc_free(mpool_sm->sm_allocator, addr); mpool_sm->sm_allocator->alc_free(mpool_sm->sm_allocator, addr);
} }
static void sm_module_finalize(mca_mpool_base_module_t* module) static void sm_module_finalize(mca_mpool_base_module_t* module)
{ {
mca_mpool_sm_module_t *sm_module = (mca_mpool_sm_module_t*) module; mca_common_sm_mpool_module_t *sm_module = (mca_common_sm_mpool_module_t*) module;
if (NULL != sm_module->sm_common_module) { if (NULL != sm_module->sm_common_module) {
if (OPAL_SUCCESS == if (OPAL_SUCCESS ==
@ -156,13 +211,13 @@ static void sm_module_finalize(mca_mpool_base_module_t* module)
} }
#if OPAL_ENABLE_FT_CR == 0 #if OPAL_ENABLE_FT_CR == 0
int mca_mpool_sm_ft_event(int state) { int mca_common_sm_mpool_ft_event(int state) {
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
#else #else
int mca_mpool_sm_ft_event(int state) { int mca_common_sm_mpool_ft_event(int state) {
mca_mpool_base_module_t *self_module = NULL; mca_mpool_base_module_t *self_module = NULL;
mca_mpool_sm_module_t *self_sm_module = NULL; mca_common_sm_mpool_module_t *self_sm_module = NULL;
char * file_name = NULL; char * file_name = NULL;
if(OPAL_CRS_CHECKPOINT == state) { if(OPAL_CRS_CHECKPOINT == state) {
@ -181,7 +236,7 @@ int mca_mpool_sm_ft_event(int state) {
if (opal_cr_continue_like_restart) { if (opal_cr_continue_like_restart) {
/* Find the sm module */ /* Find the sm module */
self_module = mca_mpool_base_module_lookup("sm"); self_module = mca_mpool_base_module_lookup("sm");
self_sm_module = (mca_mpool_sm_module_t*) self_module; self_sm_module = (mca_common_sm_mpool_module_t*) self_module;
/* Mark the old sm file for eventual removal via CRS */ /* Mark the old sm file for eventual removal via CRS */
if (NULL != self_sm_module->sm_common_module) { if (NULL != self_sm_module->sm_common_module) {
@ -196,7 +251,7 @@ int mca_mpool_sm_ft_event(int state) {
OPAL_CRS_RESTART_PRE == state) { OPAL_CRS_RESTART_PRE == state) {
/* Find the sm module */ /* Find the sm module */
self_module = mca_mpool_base_module_lookup("sm"); self_module = mca_mpool_base_module_lookup("sm");
self_sm_module = (mca_mpool_sm_module_t*) self_module; self_sm_module = (mca_common_sm_mpool_module_t*) self_module;
/* Mark the old sm file for eventual removal via CRS */ /* Mark the old sm file for eventual removal via CRS */
if (NULL != self_sm_module->sm_common_module) { if (NULL != self_sm_module->sm_common_module) {

62
opal/mca/common/sm/common_sm_mpool.h Обычный файл
Просмотреть файл

@ -0,0 +1,62 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010-2015 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_COMMON_SM_MPOOL_H
#define MCA_COMMON_SM_MPOOL_H
#include "opal_config.h"
#include "opal/mca/event/event.h"
#include "opal/mca/shmem/shmem.h"
#include "opal/mca/mpool/mpool.h"
#include "opal/mca/allocator/allocator.h"
BEGIN_C_DECLS
struct mca_common_sm_module_t;
typedef struct mca_common_sm_mpool_resources_t {
size_t size;
int32_t mem_node;
const char *allocator;
/* backing store metadata */
opal_shmem_ds_t bs_meta_buf;
} mca_common_sm_mpool_resources_t;
typedef struct mca_common_sm_mpool_module_t {
mca_mpool_base_module_t super;
long sm_size;
mca_allocator_base_module_t *sm_allocator;
struct mca_common_sm_mpool_mmap_t *sm_mmap;
struct mca_common_sm_module_t *sm_common_module;
int32_t mem_node;
} mca_common_sm_mpool_module_t;
OPAL_DECLSPEC mca_mpool_base_module_t *common_sm_mpool_create (mca_common_sm_mpool_resources_t *);
END_C_DECLS
#endif

Просмотреть файл

@ -10,6 +10,8 @@
# Copyright (c) 2004-2005 The Regents of the University of California. # Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved. # All rights reserved.
# Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2015 Los Alamos National Security, LLC. All rights
# reserved.
# $COPYRIGHT$ # $COPYRIGHT$
# #
# Additional copyrights may follow # Additional copyrights may follow
@ -19,15 +21,14 @@
headers += \ headers += \
base/base.h \ base/base.h \
base/mpool_base_mem_cb.h \
base/mpool_base_tree.h base/mpool_base_tree.h
libmca_mpool_la_SOURCES += \ libmca_mpool_la_SOURCES += \
base/mpool_base_frame.c \ base/mpool_base_frame.c \
base/mpool_base_init.c \
base/mpool_base_lookup.c \ base/mpool_base_lookup.c \
base/mpool_base_alloc.c \ base/mpool_base_alloc.c \
base/mpool_base_mem_cb.c \ base/mpool_base_tree.c \
base/mpool_base_tree.c base/mpool_base_default.c
dist_opaldata_DATA += \ dist_opaldata_DATA += \
base/help-mpool-base.txt base/help-mpool-base.txt

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
@ -10,6 +11,8 @@
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -30,28 +33,10 @@
BEGIN_C_DECLS BEGIN_C_DECLS
static inline unsigned int my_log2(unsigned long val) {
unsigned int count = 0;
while(val > 0) {
val = val >> 1;
count++;
}
return count > 0 ? count-1: 0;
}
static inline void *down_align_addr(void* addr, unsigned int shift) {
return (void*) (((intptr_t) addr) & (~(intptr_t) 0) << shift);
}
static inline void *up_align_addr(void*addr, unsigned int shift) {
return (void*) ((((intptr_t) addr) | ~((~(intptr_t) 0) << shift)));
}
struct mca_mpool_base_selected_module_t { struct mca_mpool_base_selected_module_t {
opal_list_item_t super; opal_list_item_t super;
mca_mpool_base_component_t *mpool_component; mca_mpool_base_component_t *mpool_component;
mca_mpool_base_module_t *mpool_module; mca_mpool_base_module_t *mpool_module;
void* user_data;
struct mca_mpool_base_resources_t *mpool_resources;
}; };
typedef struct mca_mpool_base_selected_module_t mca_mpool_base_selected_module_t; typedef struct mca_mpool_base_selected_module_t mca_mpool_base_selected_module_t;
@ -65,24 +50,16 @@ OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_mpool_base_selected_module_t);
* Global functions for MCA: overall mpool open and close * Global functions for MCA: overall mpool open and close
*/ */
OPAL_DECLSPEC int mca_mpool_base_init(bool enable_progress_threads, bool enable_mpi_threads);
OPAL_DECLSPEC mca_mpool_base_component_t* mca_mpool_base_component_lookup(const char* name); OPAL_DECLSPEC mca_mpool_base_component_t* mca_mpool_base_component_lookup(const char* name);
OPAL_DECLSPEC mca_mpool_base_module_t* mca_mpool_base_module_create(
const char* name,
void* user_data,
struct mca_mpool_base_resources_t* mpool_resources);
OPAL_DECLSPEC mca_mpool_base_module_t* mca_mpool_base_module_lookup(const char* name); OPAL_DECLSPEC mca_mpool_base_module_t* mca_mpool_base_module_lookup(const char* name);
OPAL_DECLSPEC int mca_mpool_base_module_destroy(mca_mpool_base_module_t *module);
/* /*
* Globals * Globals
*/ */
extern opal_list_t mca_mpool_base_modules; extern opal_list_t mca_mpool_base_modules;
OPAL_DECLSPEC extern uint32_t mca_mpool_base_page_size; extern mca_mpool_base_module_t *mca_mpool_base_default_module;
OPAL_DECLSPEC extern uint32_t mca_mpool_base_page_size_log; extern int mca_mpool_base_default_priority;
/* only used within base -- no need to DECLSPEC */
extern int mca_mpool_base_used_mem_hooks;
OPAL_DECLSPEC extern mca_base_framework_t opal_mpool_base_framework; OPAL_DECLSPEC extern mca_base_framework_t opal_mpool_base_framework;

Просмотреть файл

@ -30,31 +30,3 @@ PID: %d
%d additional leak%s recorded but %s not displayed here. Set the MCA %d additional leak%s recorded but %s not displayed here. Set the MCA
parameter mpi_show_mpi_alloc_mem_leaks to a larger number to see that parameter mpi_show_mpi_alloc_mem_leaks to a larger number to see that
many leaks, or set it to a negative number to see all leaks. many leaks, or set it to a negative number to see all leaks.
#
[leave pinned failed]
A process attempted to use the "leave pinned" MPI feature, but no
memory registration hooks were found on the system at run time. This
may be the result of running on a system that does not support memory
hooks or having some other software subvert Open MPI's use of the
memory hooks. You can disable Open MPI's use of memory hooks by
setting both the mpi_leave_pinned and mpi_leave_pinned_pipeline MCA
parameters to 0.
Open MPI will disable any transports that are attempting to use the
leave pinned functionality; your job may still run, but may fall back
to a slower network transport (such as TCP).
Mpool name: %s
Process: %s
Local host: %s
#
[cannot deregister in-use memory]
Open MPI intercepted a call to free memory that is still being used by
an ongoing MPI communication. This usually reflects an error in the
MPI application; it may signify memory corruption. Open MPI will now
abort your job.
Mpool name: %s
Local host: %s
Buffer address: %p
Buffer size: %lu

Просмотреть файл

@ -27,7 +27,6 @@
#include "opal/mca/mpool/mpool.h" #include "opal/mca/mpool/mpool.h"
#include "base.h" #include "base.h"
#include "mpool_base_tree.h" #include "mpool_base_tree.h"
#include "mpool_base_mem_cb.h"
#include "opal/threads/mutex.h" #include "opal/threads/mutex.h"
struct opal_info_t { struct opal_info_t {
@ -44,63 +43,24 @@ struct opal_info_t {
}; };
typedef struct opal_info_t opal_info_t; typedef struct opal_info_t opal_info_t;
/**
* Memory Pool Registration
*/
static void mca_mpool_base_registration_constructor( mca_mpool_base_registration_t * reg )
{
reg->mpool = NULL;
reg->base = NULL;
reg->bound = NULL;
reg->alloc_base = NULL;
reg->ref_count = 0;
reg->flags = 0;
}
static void mca_mpool_base_registration_destructor( mca_mpool_base_registration_t * reg )
{
}
OBJ_CLASS_INSTANCE(
mca_mpool_base_registration_t,
opal_free_list_item_t,
mca_mpool_base_registration_constructor,
mca_mpool_base_registration_destructor);
static void unregister_tree_item(mca_mpool_base_tree_item_t *mpool_tree_item) static void unregister_tree_item(mca_mpool_base_tree_item_t *mpool_tree_item)
{ {
mca_mpool_base_module_t *mpool; mca_mpool_base_module_t *mpool;
mca_mpool_base_registration_t *reg;
int i;
for(i = 1; i < mpool_tree_item->count; i++) { mpool = mpool_tree_item->mpool;
mpool = mpool_tree_item->mpools[i]; mpool->mpool_free(mpool, mpool_tree_item->key);
reg = mpool_tree_item->regs[i];
if(mpool && mpool->mpool_deregister) {
mpool->mpool_deregister(mpool, reg);
}
}
mpool = mpool_tree_item->mpools[0];
reg = mpool_tree_item->regs[0];
mpool->mpool_free(mpool, mpool_tree_item->key, reg);
} }
/** /**
* Function to allocate special memory according to what the user requests in * Function to allocate special memory according to what the user requests in
* the info object. * the info object.
* *
* If the user passes in a valid info structure then the function will
* try to allocate the memory and register it with every mpool that there is a
* key for it in the info struct. If it fails at registering the memory with
* one of the requested mpools, an error will be returned. Also, if there is a
* key in info that does not match any mpool, an error will be returned.
*
* If the info parameter is MPI_INFO_NULL, then this function will try to allocate * If the info parameter is MPI_INFO_NULL, then this function will try to allocate
* the memory and register it with as many mpools as possible. However, * the memory with the optionally named mpool or malloc and try to register the
* if any of the registratons fail the mpool will simply be ignored. * pointer with as many registration caches as possible. Registration caches that
* fail to register the region will be ignored. The mpool name can optionally be
* specified in the info object.
* *
* @param size the size of the memory area to allocate * @param size the size of the memory area to allocate
* @param info an info object which tells us what kind of memory to allocate * @param info an info object which tells us what kind of memory to allocate
@ -108,177 +68,38 @@ static void unregister_tree_item(mca_mpool_base_tree_item_t *mpool_tree_item)
* @retval pointer to the allocated memory * @retval pointer to the allocated memory
* @retval NULL on failure * @retval NULL on failure
*/ */
void *mca_mpool_base_alloc(size_t size, opal_info_t *info) void *mca_mpool_base_alloc(size_t size, opal_info_t *info, const char *hints)
{ {
opal_list_item_t * item; mca_mpool_base_tree_item_t *mpool_tree_item = NULL;
int num_modules = opal_list_get_size(&mca_mpool_base_modules);
int reg_module_num = 0, i;
mca_mpool_base_selected_module_t * current;
mca_mpool_base_selected_module_t * no_reg_function = NULL;
mca_mpool_base_selected_module_t ** has_reg_function = NULL;
mca_mpool_base_registration_t * registration;
mca_mpool_base_tree_item_t* mpool_tree_item = NULL;
mca_mpool_base_module_t *mpool; mca_mpool_base_module_t *mpool;
void * mem = NULL; void *mem = NULL;
#if defined(TODO_BTL_GB) #if defined(TODO_BTL_GB)
int flag = 0; int flag = 0;
bool match_found = false;
#endif /* defined(TODO_BTL_GB) */ #endif /* defined(TODO_BTL_GB) */
bool mpool_requested = false;
if(num_modules > 0) { mpool_tree_item = mca_mpool_base_tree_item_get ();
has_reg_function = (mca_mpool_base_selected_module_t **) if (!mpool_tree_item) {
malloc(num_modules * sizeof(mca_mpool_base_module_t *)); return NULL;
if(!has_reg_function)
goto out;
} }
mpool_tree_item = mca_mpool_base_tree_item_get();
if(!mpool_tree_item)
goto out;
mpool_tree_item->num_bytes = size; mpool_tree_item->num_bytes = size;
mpool_tree_item->count = 0; mpool_tree_item->count = 0;
#if defined(TODO_BTL_GB)
if(&ompi_mpi_info_null.info == info)
#endif /* defined(TODO_BTL_GB) */
{
for(item = opal_list_get_first(&mca_mpool_base_modules);
item != opal_list_get_end(&mca_mpool_base_modules);
item = opal_list_get_next(item)) {
current = ((mca_mpool_base_selected_module_t *) item);
if(current->mpool_module->flags & MCA_MPOOL_FLAGS_MPI_ALLOC_MEM) {
if(NULL == current->mpool_module->mpool_register){
no_reg_function = current;
}
else {
has_reg_function[reg_module_num++] = current;
}
}
}
}
#if defined(TODO_BTL_GB)
else
{
int num_keys;
char key[MPI_MAX_INFO_KEY + 1];
char value[MPI_MAX_INFO_VAL + 1];
ompi_info_get_nkeys(info, &num_keys); mpool = mca_mpool_base_module_lookup (hints);
for(i = 0; i < num_keys; i++) if (NULL != mpool) {
{ mem = mpool->mpool_alloc (mpool, size, 0, 0);
ompi_info_get_nthkey(info, i, key);
if ( 0 != strcmp(key, "mpool") ) {
continue;
}
mpool_requested = true;
ompi_info_get(info, key, MPI_MAX_INFO_VAL, value, &flag);
if ( !flag ) {
continue;
} }
match_found = false; if (NULL == mem) {
for(item = opal_list_get_first(&mca_mpool_base_modules); /* fall back on malloc */
item != opal_list_get_end(&mca_mpool_base_modules);
item = opal_list_get_next(item))
{
current = ((mca_mpool_base_selected_module_t *)item);
if(0 == strcmp(value,
current->mpool_module->mpool_component->mpool_version.mca_component_name))
{
match_found = true;
if(NULL == current->mpool_module->mpool_register)
{
if(NULL != no_reg_function)
{
/* there was more than one requested mpool that lacks
* a registration function, so return failure */
goto out;
}
no_reg_function = current;
}
else
{
has_reg_function[reg_module_num++] = current;
}
}
}
if(!match_found)
{
/* one of the keys given to us by the user did not match any
* mpools, so return an error */
goto out;
}
}
}
#endif /* defined(TODO_BTL_GB) */
if(NULL == no_reg_function && 0 == reg_module_num)
{
if(!mpool_requested)
{
/* if the info argument was NULL and there were no useable mpools
* or there user provided info object but did not specifiy a "mpool" key,
* just malloc the memory and return it */
mem = malloc(size); mem = malloc(size);
goto out;
}
/* the user passed info but we were not able to use any of the mpools mca_mpool_base_tree_item_put (mpool_tree_item);
* specified */
goto out;
}
for(i = -1; i < reg_module_num; i++) {
if(-1 == i) {
if(NULL != no_reg_function)
mpool = no_reg_function->mpool_module;
else
continue;
} else { } else {
mpool = has_reg_function[i]->mpool_module; mpool_tree_item->mpool = mpool;
mca_mpool_base_tree_insert (mpool_tree_item);
} }
if(NULL == mem) {
mem = mpool->mpool_alloc(mpool, size, 0, MCA_MPOOL_FLAGS_PERSIST,
&registration);
if(NULL == mem) {
if(mpool_requested)
goto out;
continue;
}
mpool_tree_item->key = mem;
mpool_tree_item->mpools[mpool_tree_item->count] = mpool;
mpool_tree_item->regs[mpool_tree_item->count++] = registration;
} else {
if(mpool->mpool_register(mpool, mem, size, MCA_MPOOL_FLAGS_PERSIST,
MCA_MPOOL_ACCESS_ANY, &registration) != OPAL_SUCCESS) {
if(mpool_requested) {
unregister_tree_item(mpool_tree_item);
goto out;
}
continue;
}
mpool_tree_item->mpools[mpool_tree_item->count] = mpool;
mpool_tree_item->regs[mpool_tree_item->count++] = registration;
}
}
if(NULL == mem) {
mem = malloc(size);
goto out;
}
mca_mpool_base_tree_insert(mpool_tree_item);
mpool_tree_item = NULL; /* prevent it to be deleted below */
out:
if(mpool_tree_item)
mca_mpool_base_tree_item_put(mpool_tree_item);
if(has_reg_function)
free(has_reg_function);
return mem; return mem;
} }

85
opal/mca/mpool/base/mpool_base_default.c Обычный файл
Просмотреть файл

@ -0,0 +1,85 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include <stdio.h>
#include <stdlib.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/mca/mpool/base/base.h"
#include "opal/constants.h"
#include "opal/util/sys_limits.h"
static void *mca_mpool_default_alloc (mca_mpool_base_module_t *mpool, size_t size,
size_t align, uint32_t flags)
{
#if HAVE_POSIX_MEMALIGN
void *addr = NULL;
(void) posix_memalign (&addr, align, size);
return addr;
#else
void *addr, *ret;
addr = malloc (size + align + sizeof (void *));
ret = OPAL_ALIGN_PTR((intptr_t) addr + 8, align, void *);
*((void **) ret - 1) = addr;
return ret;
#endif
}
static void *mca_mpool_default_realloc (mca_mpool_base_module_t *mpool, void *addr, size_t size)
{
#if HAVE_POSIX_MEMALIGN
return realloc (addr, size);
#else
if (NULL != addr) {
void *base = *((void **) addr - 1);
void *ptr = realloc (base, size + (intptr_t) addr - (intptr_t) - size);
void *ret = (void *)((intptr_t) ptr + (intptr_t) addr - (intptr_t) - size);
*((void **) ret - 1) = ptr;
return ret;
} else {
return mca_mpool_default_alloc (mpool, size, 8, 0);
}
#endif
}
static void mca_mpool_default_free (mca_mpool_base_module_t *mpool, void *addr)
{
#if HAVE_POSIX_MEMALIGN
free (addr);
#else
if (NULL != addr) {
void *base = *((void **) addr - 1);
free (base);
}
#endif
}
static void mca_mpool_default_finalize (struct mca_mpool_base_module_t *mpool)
{
}
static mca_mpool_base_module_t mca_mpool_malloc_module = {
.mpool_alloc = mca_mpool_default_alloc,
.mpool_realloc = mca_mpool_default_realloc,
.mpool_free = mca_mpool_default_free,
.mpool_finalize = mca_mpool_default_finalize,
.flags = MCA_MPOOL_FLAGS_MPI_ALLOC_MEM,
};
mca_mpool_base_module_t *mca_mpool_base_default_module = &mca_mpool_malloc_module;

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
@ -13,6 +14,8 @@
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved. * Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2014-2015 Research Organization for Information Science * Copyright (c) 2014-2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
* Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -30,9 +33,7 @@
#include "opal/mca/mca.h" #include "opal/mca/mca.h"
#include "opal/mca/base/base.h" #include "opal/mca/base/base.h"
#include "opal/memoryhooks/memory.h"
#include "opal/mca/mpool/base/base.h" #include "opal/mca/mpool/base/base.h"
#include "mpool_base_mem_cb.h"
#include "opal/constants.h" #include "opal/constants.h"
#include "opal/util/sys_limits.h" #include "opal/util/sys_limits.h"
@ -48,13 +49,33 @@
* Global variables * Global variables
*/ */
/* whether we actually used the mem hooks or not */
int mca_mpool_base_used_mem_hooks = 0;
uint32_t mca_mpool_base_page_size = 0;
uint32_t mca_mpool_base_page_size_log = 0;
opal_list_t mca_mpool_base_modules = {{0}}; opal_list_t mca_mpool_base_modules = {{0}};
static char *mca_mpool_base_default_hints;
int mca_mpool_base_default_priority = 50;
OBJ_CLASS_INSTANCE(mca_mpool_base_selected_module_t, opal_list_item_t, NULL, NULL);
static int mca_mpool_base_register (mca_base_register_flag_t flags)
{
mca_mpool_base_default_hints = NULL;
(void) mca_base_var_register ("opal", "mpool", "base", "default_hints",
"Hints to use when selecting the default memory pool",
MCA_BASE_VAR_TYPE_STRING, NULL, 0,
MCA_BASE_VAR_FLAG_INTERNAL,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
&mca_mpool_base_default_hints);
mca_mpool_base_default_priority = 50;
(void) mca_base_var_register ("opal", "mpool", "base", "default_priority",
"Priority of the default mpool module",
MCA_BASE_VAR_TYPE_INT, NULL, 0,
MCA_BASE_VAR_FLAG_INTERNAL,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
&mca_mpool_base_default_priority);
return OPAL_SUCCESS;
}
/** /**
* Function for finding and opening either all MCA components, or the one * Function for finding and opening either all MCA components, or the one
@ -69,15 +90,14 @@ static int mca_mpool_base_open(mca_base_open_flag_t flags)
return OPAL_ERROR; return OPAL_ERROR;
} }
if (mca_mpool_base_default_hints) {
mca_mpool_base_default_module = mca_mpool_base_module_lookup (mca_mpool_base_default_hints);
}
/* Initialize the list so that in mca_mpool_base_close(), we can /* Initialize the list so that in mca_mpool_base_close(), we can
iterate over it (even if it's empty, as in the case of opal_info) */ iterate over it (even if it's empty, as in the case of opal_info) */
OBJ_CONSTRUCT(&mca_mpool_base_modules, opal_list_t); OBJ_CONSTRUCT(&mca_mpool_base_modules, opal_list_t);
/* get the page size for this architecture*/
mca_mpool_base_page_size = opal_getpagesize();
mca_mpool_base_page_size_log = my_log2(mca_mpool_base_page_size);
/* setup tree for tracking MPI_Alloc_mem */ /* setup tree for tracking MPI_Alloc_mem */
mca_mpool_base_tree_init(); mca_mpool_base_tree_init();
@ -88,12 +108,6 @@ static int mca_mpool_base_close(void)
{ {
opal_list_item_t *item; opal_list_item_t *item;
mca_mpool_base_selected_module_t *sm; mca_mpool_base_selected_module_t *sm;
int32_t modules_length;
/* Need the initial length in order to know if some of the initializations
* are done in the open function.
*/
modules_length = opal_list_get_size(&mca_mpool_base_modules);
/* Finalize all the mpool components and free their list items */ /* Finalize all the mpool components and free their list items */
@ -115,15 +129,8 @@ static int mca_mpool_base_close(void)
OMPI RTE program, or [possibly] multiple if this is opal_info) */ OMPI RTE program, or [possibly] multiple if this is opal_info) */
(void) mca_base_framework_components_close(&opal_mpool_base_framework, NULL); (void) mca_base_framework_components_close(&opal_mpool_base_framework, NULL);
/* deregister memory free callback */
if( (modules_length > 0) && mca_mpool_base_used_mem_hooks &&
0 != (OPAL_MEMORY_FREE_SUPPORT & opal_mem_hooks_support_level())) {
opal_mem_hooks_unregister_release(mca_mpool_base_mem_cb);
}
/* All done */
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
MCA_BASE_FRAMEWORK_DECLARE(opal, mpool, NULL, NULL, mca_mpool_base_open, MCA_BASE_FRAMEWORK_DECLARE(opal, mpool, "Memory pools", mca_mpool_base_register, mca_mpool_base_open,
mca_mpool_base_close, mca_mpool_base_static_components, 0); mca_mpool_base_close, mca_mpool_base_static_components, 0);

Просмотреть файл

@ -1,43 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/mca/mpool/base/base.h"
OBJ_CLASS_INSTANCE(mca_mpool_base_selected_module_t, opal_list_item_t, NULL, NULL);
static bool mca_mpool_enable_progress_threads = true;
static bool mca_mpool_enable_mpi_thread_multiple = true;
/**
* Function for weeding out mpool modules that don't want to run.
*
* Call the init function on all available components to find out if they
* want to run. Select all components that don't fail. Failing modules
* will be closed and unloaded. The selected modules will be returned
* to the caller in a opal_list_t.
*/
int mca_mpool_base_init(bool enable_progress_threads, bool enable_mpi_thread_multiple)
{
mca_mpool_enable_progress_threads = enable_progress_threads;
mca_mpool_enable_mpi_thread_multiple = enable_mpi_thread_multiple;
return OPAL_SUCCESS;
}

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
@ -12,7 +12,7 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2006-2007 Mellanox Technologies. All rights reserved. * Copyright (c) 2006-2007 Mellanox Technologies. All rights reserved.
* Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved. * Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -30,126 +30,47 @@
#include "opal/mca/base/base.h" #include "opal/mca/base/base.h"
#include "opal/util/show_help.h" #include "opal/util/show_help.h"
#include "opal/util/proc.h" #include "opal/util/proc.h"
#include "opal/runtime/opal_params.h"
#include "opal/mca/mpool/mpool.h" #include "opal/mca/mpool/mpool.h"
#include "opal/mca/mpool/base/base.h" #include "opal/mca/mpool/base/base.h"
#include "opal/memoryhooks/memory.h"
#include "mpool_base_mem_cb.h"
mca_mpool_base_component_t* mca_mpool_base_component_lookup(const char* name) mca_mpool_base_component_t* mca_mpool_base_component_lookup(const char *name)
{ {
mca_base_component_list_item_t *cli;
/* Traverse the list of available modules; call their init functions. */ /* Traverse the list of available modules; call their init functions. */
opal_list_item_t* item; OPAL_LIST_FOREACH(cli, &opal_mpool_base_framework.framework_components, mca_base_component_list_item_t) {
for (item = opal_list_get_first(&opal_mpool_base_framework.framework_components); mca_mpool_base_component_t* component = (mca_mpool_base_component_t *) cli->cli_component;
item != opal_list_get_end(&opal_mpool_base_framework.framework_components); if (strcmp(component->mpool_version.mca_component_name, name) == 0) {
item = opal_list_get_next(item)) {
mca_base_component_list_item_t *cli =
(mca_base_component_list_item_t *) item;
mca_mpool_base_component_t* component =
(mca_mpool_base_component_t *) cli->cli_component;
if(strcmp(component->mpool_version.mca_component_name, name) == 0) {
return component; return component;
} }
} }
return NULL; return NULL;
} }
mca_mpool_base_module_t* mca_mpool_base_module_create(
const char* name, mca_mpool_base_module_t *mca_mpool_base_module_lookup (const char *hints)
void* user_data,
struct mca_mpool_base_resources_t* resources)
{ {
mca_mpool_base_component_t* component = NULL; mca_mpool_base_module_t *best_module = mca_mpool_base_default_module;
mca_mpool_base_module_t* module = NULL;
mca_base_component_list_item_t *cli; mca_base_component_list_item_t *cli;
mca_mpool_base_selected_module_t *sm; int best_priority = mca_mpool_base_default_priority;
int rc;
OPAL_LIST_FOREACH(cli, &opal_mpool_base_framework.framework_components, mca_base_component_list_item_t) { OPAL_LIST_FOREACH(cli, &opal_mpool_base_framework.framework_components, mca_base_component_list_item_t) {
component = (mca_mpool_base_component_t *) cli->cli_component; mca_mpool_base_component_t *component = (mca_mpool_base_component_t *) cli->cli_component;
if(0 == strcmp(component->mpool_version.mca_component_name, name)) { mca_mpool_base_module_t *module;
module = component->mpool_init(resources); int priority;
break;
rc = component->mpool_query (hints, &priority, &module);
if (OPAL_SUCCESS == rc) {
if (priority > best_priority) {
best_priority = priority;
best_module = module;
}
} }
} }
if ( NULL == module ) { return best_module;
return NULL;
}
sm = OBJ_NEW(mca_mpool_base_selected_module_t);
sm->mpool_component = component;
sm->mpool_module = module;
sm->user_data = user_data;
sm->mpool_resources = resources;
opal_list_append(&mca_mpool_base_modules, (opal_list_item_t*) sm);
/* on the very first creation of a module we init the memory
callback */
if (opal_list_get_size(&mca_mpool_base_modules) == 1) {
/* Default to not using memory hooks */
int use_mem_hooks = 0;
/* Use the memory hooks if leave_pinned or
leave_pinned_pipeline is enabled (note that either of these
leave_pinned variables may have been set by a user MCA
param or elsewhere in the code base). Yes, we could have
coded this more succinctly, but this is more clear. Do not
check memory hooks if the mpool explicity asked us not to. */
if ((opal_leave_pinned > 0 || opal_leave_pinned_pipeline) &&
!(module->flags & MCA_MPOOL_FLAGS_NO_HOOKS)) {
use_mem_hooks = 1;
}
if (use_mem_hooks) {
if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) ==
((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) &
opal_mem_hooks_support_level())) {
opal_mem_hooks_register_release(mca_mpool_base_mem_cb, NULL);
} else {
opal_show_help("help-mpool-base.txt", "leave pinned failed",
true, name, OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
opal_proc_local_get()->proc_hostname);
return NULL;
}
/* Set this to true so that mpool_base_close knows to
cleanup */
mca_mpool_base_used_mem_hooks = 1;
}
}
return module;
}
mca_mpool_base_module_t* mca_mpool_base_module_lookup(const char* name)
{
mca_mpool_base_selected_module_t *mli;
OPAL_LIST_FOREACH(mli, &mca_mpool_base_modules, mca_mpool_base_selected_module_t) {
if(0 == strcmp(mli->mpool_component->mpool_version.mca_component_name,
name)) {
return mli->mpool_module;
}
}
return NULL;
}
int mca_mpool_base_module_destroy(mca_mpool_base_module_t *module)
{
mca_mpool_base_selected_module_t *sm, *next;
OPAL_LIST_FOREACH_SAFE(sm, next, &mca_mpool_base_modules, mca_mpool_base_selected_module_t) {
if (module == sm->mpool_module) {
opal_list_remove_item(&mca_mpool_base_modules, (opal_list_item_t*)sm);
if (NULL != sm->mpool_module->mpool_finalize) {
sm->mpool_module->mpool_finalize(sm->mpool_module);
}
OBJ_RELEASE(sm);
return OPAL_SUCCESS;
}
}
return OPAL_ERR_NOT_FOUND;
} }

Просмотреть файл

@ -23,10 +23,6 @@
* *
* $HEADER$ * $HEADER$
*/ */
/**
* @file
* Description of the Registration Cache framework
*/
#include "opal_config.h" #include "opal_config.h"

Просмотреть файл

@ -28,6 +28,7 @@
#define MCA_MPOOL_BASE_TREE_MAX 8 #define MCA_MPOOL_BASE_TREE_MAX 8
#include "opal/mca/mca.h" #include "opal/mca/mca.h"
#include "opal/mca/mpool/mpool.h" #include "opal/mca/mpool/mpool.h"
#include "opal/mca/rcache/rcache.h"
BEGIN_C_DECLS BEGIN_C_DECLS
@ -46,8 +47,9 @@ struct mca_mpool_base_tree_item_t
size_t num_bytes; /**< the number of bytes in this alloc, only for size_t num_bytes; /**< the number of bytes in this alloc, only for
debugging reporting with debugging reporting with
mpi_show_mpi_alloc_mem_leaks */ mpi_show_mpi_alloc_mem_leaks */
mca_mpool_base_module_t* mpools[MCA_MPOOL_BASE_TREE_MAX]; /**< the mpools */ mca_mpool_base_module_t *mpool;
mca_mpool_base_registration_t* regs[MCA_MPOOL_BASE_TREE_MAX]; /**< the registrations */ mca_rcache_base_module_t *rcaches[MCA_MPOOL_BASE_TREE_MAX]; /**< the registration caches */
mca_rcache_base_registration_t *regs[MCA_MPOOL_BASE_TREE_MAX]; /**< the registrations */
uint8_t count; /**< length of the mpools/regs array */ uint8_t count; /**< length of the mpools/regs array */
}; };
typedef struct mca_mpool_base_tree_item_t mca_mpool_base_tree_item_t; typedef struct mca_mpool_base_tree_item_t mca_mpool_base_tree_item_t;

Просмотреть файл

@ -1,105 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Voltaire. All rights reserved.
* Copyright (c) 2012-2015 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_MPOOL_GPUSM_H
#define MCA_MPOOL_GPUSM_H
#include "opal_config.h"
#include "opal/class/opal_list.h"
#include "opal/mca/mpool/mpool.h"
BEGIN_C_DECLS
#define MEMHANDLE_SIZE 8
#define EVTHANDLE_SIZE 8
struct mca_mpool_gpusm_registration_t {
mca_mpool_base_registration_t base;
uint64_t memHandle[MEMHANDLE_SIZE]; /* CUipcMemHandle */
uint64_t evtHandle[EVTHANDLE_SIZE]; /* CUipcEventHandle */
uintptr_t event; /* CUevent */
};
typedef struct mca_mpool_gpusm_registration_t mca_mpool_gpusm_registration_t;
OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_mpool_gpusm_registration_t);
struct mca_mpool_gpusm_component_t {
mca_mpool_base_component_t super;
};
typedef struct mca_mpool_gpusm_component_t mca_mpool_gpusm_component_t;
OPAL_DECLSPEC extern mca_mpool_gpusm_component_t mca_mpool_gpusm_component;
struct mca_mpool_base_resources_t {
void *reg_data;
size_t sizeof_reg;
int (*register_mem)(void *base, size_t size, mca_mpool_base_registration_t *newreg,
mca_mpool_base_registration_t *hdrreg);
int (*deregister_mem)(void *reg_data, mca_mpool_base_registration_t *reg);
};
typedef struct mca_mpool_base_resources_t mca_mpool_base_resources_t;
struct mca_mpool_gpusm_module_t {
mca_mpool_base_module_t super;
struct mca_mpool_base_resources_t resources;
opal_free_list_t reg_list;
}; typedef struct mca_mpool_gpusm_module_t mca_mpool_gpusm_module_t;
/*
* Initializes the mpool module.
*/
void mca_mpool_gpusm_module_init(mca_mpool_gpusm_module_t *mpool);
/**
* register block of memory
*/
int mca_mpool_gpusm_register(mca_mpool_base_module_t* mpool, void *addr,
size_t size, uint32_t flags, int32_t access_flags, mca_mpool_base_registration_t **reg);
/**
* deregister memory
*/
int mca_mpool_gpusm_deregister(mca_mpool_base_module_t *mpool,
mca_mpool_base_registration_t *reg);
/**
* find registration for a given block of memory
*/
int mca_mpool_gpusm_find(struct mca_mpool_base_module_t* mpool, void* addr,
size_t size, mca_mpool_base_registration_t **reg);
/**
* finalize mpool
*/
void mca_mpool_gpusm_finalize(struct mca_mpool_base_module_t *mpool);
/**
* Fault Tolerance Event Notification Function
* @param state Checkpoint Stae
* @return OPAL_SUCCESS or failure status
*/
int mca_mpool_gpusm_ft_event(int state);
END_C_DECLS
#endif

Просмотреть файл

@ -1,160 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Voltaire. All rights reserved.
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_MPOOL_OPENIB_H
#define MCA_MPOOL_OPENIB_H
#include "opal_config.h"
#include "opal/class/opal_list.h"
#include "opal/mca/event/event.h"
#include "opal/mca/mpool/mpool.h"
#if HAVE_SYS_MMAN_H
#include <sys/mman.h>
#endif
BEGIN_C_DECLS
#define MCA_MPOOL_GRDMA_NAME_MAX 256
struct mca_mpool_grdma_pool_t {
opal_list_item_t super;
char *pool_name;
opal_list_t lru_list;
opal_list_t gc_list;
struct mca_rcache_base_module_t *rcache;
};
typedef struct mca_mpool_grdma_pool_t mca_mpool_grdma_pool_t;
OBJ_CLASS_DECLARATION(mca_mpool_grdma_pool_t);
struct mca_mpool_grdma_component_t {
mca_mpool_base_component_t super;
opal_list_t pools;
char *rcache_name;
bool print_stats;
int leave_pinned;
};
typedef struct mca_mpool_grdma_component_t mca_mpool_grdma_component_t;
OPAL_DECLSPEC extern mca_mpool_grdma_component_t mca_mpool_grdma_component;
struct mca_mpool_grdma_module_t;
struct mca_mpool_base_resources_t {
char *pool_name;
void *reg_data;
size_t sizeof_reg;
int (*register_mem)(void *reg_data, void *base, size_t size,
mca_mpool_base_registration_t *reg);
int (*deregister_mem)(void *reg_data, mca_mpool_base_registration_t *reg);
};
typedef struct mca_mpool_base_resources_t mca_mpool_base_resources_t;
struct mca_mpool_grdma_module_t {
mca_mpool_base_module_t super;
struct mca_mpool_base_resources_t resources;
mca_mpool_grdma_pool_t *pool;
opal_free_list_t reg_list;
uint32_t stat_cache_hit;
uint32_t stat_cache_miss;
uint32_t stat_evicted;
uint32_t stat_cache_found;
uint32_t stat_cache_notfound;
};
typedef struct mca_mpool_grdma_module_t mca_mpool_grdma_module_t;
/*
* Initializes the mpool module.
*/
void mca_mpool_grdma_module_init(mca_mpool_grdma_module_t *mpool, mca_mpool_grdma_pool_t *pool);
/*
* Returns base address of shared memory mapping.
*/
void *mca_mpool_grdma_base(mca_mpool_base_module_t *mpool);
/**
* Allocate block of registered memory.
*/
void* mca_mpool_grdma_alloc(mca_mpool_base_module_t *mpool, size_t size,
size_t align, uint32_t flags,
mca_mpool_base_registration_t** registration);
/**
* realloc block of registered memory
*/
void* mca_mpool_grdma_realloc( mca_mpool_base_module_t *mpool, void* addr,
size_t size, mca_mpool_base_registration_t** registration);
/**
* register block of memory
*/
int mca_mpool_grdma_register(mca_mpool_base_module_t* mpool, void *addr,
size_t size, uint32_t flags, int32_t access_flags, mca_mpool_base_registration_t **reg);
/**
* deregister memory
*/
int mca_mpool_grdma_deregister(mca_mpool_base_module_t *mpool,
mca_mpool_base_registration_t *reg);
/**
* free memory allocated by alloc function
*/
void mca_mpool_grdma_free(mca_mpool_base_module_t *mpool, void * addr,
mca_mpool_base_registration_t *reg);
/**
* find registration for a given block of memory
*/
int mca_mpool_grdma_find(struct mca_mpool_base_module_t* mpool, void* addr,
size_t size, mca_mpool_base_registration_t **reg);
/**
* unregister all registration covering the block of memory
*/
int mca_mpool_grdma_release_memory(mca_mpool_base_module_t* mpool, void *base,
size_t size);
/**
* finalize mpool
*/
void mca_mpool_grdma_finalize(struct mca_mpool_base_module_t *mpool);
/**
* Fault Tolerance Event Notification Function
* @param state Checkpoint Stae
* @return OPAL_SUCCESS or failure status
*/
int mca_mpool_grdma_ft_event(int state);
/**
* evict one unused registration from the mpool's lru.
* @return true on success, false on failure
*/
bool mca_mpool_grdma_evict (struct mca_mpool_base_module_t *mpool);
END_C_DECLS
#endif

Просмотреть файл

@ -1,592 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2013 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006 Voltaire. All rights reserved.
* Copyright (c) 2007 Mellanox Technologies. All rights reserved.
* Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#define OPAL_DISABLE_ENABLE_MEM_DEBUG 1
#include "opal_config.h"
#include <errno.h>
#include <string.h>
#include <stdlib.h>
#include "opal/align.h"
#include "opal/util/proc.h"
#if OPAL_CUDA_GDR_SUPPORT
#include "opal/mca/common/cuda/common_cuda.h"
#endif /* OPAL_CUDA_GDR_SUPPORT */
#include "opal/mca/rcache/rcache.h"
#include "opal/mca/rcache/base/base.h"
#include "opal/mca/mpool/base/base.h"
#include "mpool_grdma.h"
static inline bool registration_is_cacheable(mca_mpool_base_registration_t *reg)
{
return (mca_mpool_grdma_component.leave_pinned &&
!(reg->flags &
(MCA_MPOOL_FLAGS_CACHE_BYPASS |
MCA_MPOOL_FLAGS_PERSIST |
MCA_MPOOL_FLAGS_INVALID)));
}
#if OPAL_CUDA_GDR_SUPPORT
static int check_for_cuda_freed_memory(mca_mpool_base_module_t *mpool, void *addr, size_t size);
#endif /* OPAL_CUDA_GDR_SUPPORT */
static void mca_mpool_grdma_pool_contructor (mca_mpool_grdma_pool_t *pool)
{
memset ((void *)((uintptr_t)pool + sizeof (pool->super)), 0, sizeof (*pool) - sizeof (pool->super));
OBJ_CONSTRUCT(&pool->lru_list, opal_list_t);
OBJ_CONSTRUCT(&pool->gc_list, opal_list_t);
pool->rcache = mca_rcache_base_module_create(mca_mpool_grdma_component.rcache_name);
}
static void mca_mpool_grdma_pool_destructor (mca_mpool_grdma_pool_t *pool)
{
OBJ_DESTRUCT(&pool->lru_list);
OBJ_DESTRUCT(&pool->gc_list);
free (pool->pool_name);
}
OBJ_CLASS_INSTANCE(mca_mpool_grdma_pool_t, opal_list_item_t,
mca_mpool_grdma_pool_contructor,
mca_mpool_grdma_pool_destructor);
/*
* Initializes the mpool module.
*/
void mca_mpool_grdma_module_init(mca_mpool_grdma_module_t* mpool, mca_mpool_grdma_pool_t *pool)
{
OBJ_RETAIN(pool);
mpool->pool = pool;
mpool->super.mpool_component = &mca_mpool_grdma_component.super;
mpool->super.mpool_base = NULL; /* no base .. */
mpool->super.mpool_alloc = mca_mpool_grdma_alloc;
mpool->super.mpool_realloc = mca_mpool_grdma_realloc;
mpool->super.mpool_free = mca_mpool_grdma_free;
mpool->super.mpool_register = mca_mpool_grdma_register;
mpool->super.mpool_find = mca_mpool_grdma_find;
mpool->super.mpool_deregister = mca_mpool_grdma_deregister;
mpool->super.mpool_release_memory = mca_mpool_grdma_release_memory;
mpool->super.mpool_finalize = mca_mpool_grdma_finalize;
mpool->super.mpool_ft_event = mca_mpool_grdma_ft_event;
mpool->super.flags = MCA_MPOOL_FLAGS_MPI_ALLOC_MEM;
mpool->super.rcache = pool->rcache;
mpool->stat_cache_hit = mpool->stat_cache_miss = mpool->stat_evicted = 0;
mpool->stat_cache_found = mpool->stat_cache_notfound = 0;
OBJ_CONSTRUCT(&mpool->reg_list, opal_free_list_t);
opal_free_list_init (&mpool->reg_list, mpool->resources.sizeof_reg,
opal_cache_line_size,
OBJ_CLASS(mca_mpool_base_registration_t),
0, opal_cache_line_size, 0, -1, 32, NULL, 0,
NULL, NULL, NULL);
}
static inline int dereg_mem(mca_mpool_base_registration_t *reg)
{
mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t *) reg->mpool;
int rc;
if(!(reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS))
reg->mpool->rcache->rcache_delete(reg->mpool->rcache, reg);
/* Drop the rcache lock before deregistring the memory */
OPAL_THREAD_UNLOCK(&reg->mpool->rcache->lock);
rc = mpool_grdma->resources.deregister_mem(mpool_grdma->resources.reg_data,
reg);
OPAL_THREAD_LOCK(&reg->mpool->rcache->lock);
if (OPAL_LIKELY(OPAL_SUCCESS == rc)) {
opal_free_list_return (&mpool_grdma->reg_list,
(opal_free_list_item_t *) reg);
}
return rc;
}
/**
* allocate function
*/
void* mca_mpool_grdma_alloc(mca_mpool_base_module_t *mpool, size_t size,
size_t align, uint32_t flags, mca_mpool_base_registration_t **reg)
{
void *base_addr, *addr;
if(0 == align)
align = mca_mpool_base_page_size;
#if OPAL_CUDA_SUPPORT
/* CUDA cannot handle registering overlapping regions, so make
* sure each region is page sized and page aligned. */
align = mca_mpool_base_page_size;
size = OPAL_ALIGN(size, mca_mpool_base_page_size, size_t);
#endif
#ifdef HAVE_POSIX_MEMALIGN
if((errno = posix_memalign(&base_addr, align, size)) != 0)
return NULL;
addr = base_addr;
#else
base_addr = malloc(size + align);
if(NULL == base_addr)
return NULL;
addr = (void*)OPAL_ALIGN((uintptr_t)base_addr, align, uintptr_t);
#endif
if(OPAL_SUCCESS != mca_mpool_grdma_register(mpool, addr, size, flags,
MCA_MPOOL_ACCESS_ANY, reg)) {
free(base_addr);
return NULL;
}
(*reg)->alloc_base = (unsigned char *) base_addr;
return addr;
}
/* This function must be called with the rcache lock held */
static inline void do_unregistration_gc(struct mca_mpool_base_module_t *mpool)
{
mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t*)mpool;
opal_list_item_t *item;
/* Remove registration from garbage collection list
before deregistering it */
while (NULL !=
(item = opal_list_remove_first(&mpool_grdma->pool->gc_list))) {
dereg_mem((mca_mpool_base_registration_t *) item);
}
}
static inline bool mca_mpool_grdma_evict_lru_local (mca_mpool_grdma_pool_t *pool)
{
mca_mpool_grdma_module_t *mpool_grdma;
mca_mpool_base_registration_t *old_reg;
old_reg = (mca_mpool_base_registration_t *)
opal_list_remove_first (&pool->lru_list);
if (NULL == old_reg) {
return false;
}
mpool_grdma = (mca_mpool_grdma_module_t *) old_reg->mpool;
(void) dereg_mem (old_reg);
mpool_grdma->stat_evicted++;
return true;
}
enum {
MCA_MPOOL_GRDMA_MSG_EMPTY = 0,
MCA_MPOOL_GRDMA_MSG_NEED_DEREG = 1,
MCA_MPOOL_GRDMA_MSG_BUSY = 2,
MCA_MPOOL_GRDMA_MSG_COMPLETE = 3
};
bool mca_mpool_grdma_evict (struct mca_mpool_base_module_t *mpool)
{
return mca_mpool_grdma_evict_lru_local (((mca_mpool_grdma_module_t *) mpool)->pool);
}
/*
* register memory
*/
int mca_mpool_grdma_register (mca_mpool_base_module_t *mpool, void *addr,
size_t size, uint32_t flags, int32_t access_flags,
mca_mpool_base_registration_t **reg)
{
mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t*)mpool;
const bool bypass_cache = !!(flags & MCA_MPOOL_FLAGS_CACHE_BYPASS);
const bool persist = !!(flags & MCA_MPOOL_FLAGS_PERSIST);
mca_mpool_base_registration_t *grdma_reg;
opal_free_list_item_t *item;
unsigned char *base, *bound;
int rc;
OPAL_THREAD_LOCK(&mpool->rcache->lock);
*reg = NULL;
/* if cache bypass is requested don't use the cache */
base = (unsigned char *) down_align_addr(addr, mca_mpool_base_page_size_log);
bound = (unsigned char *) up_align_addr((void*)((char*) addr + size - 1),
mca_mpool_base_page_size_log);
if (!opal_list_is_empty (&mpool_grdma->pool->gc_list))
do_unregistration_gc(mpool);
#if OPAL_CUDA_GDR_SUPPORT
if (flags & MCA_MPOOL_FLAGS_CUDA_GPU_MEM) {
size_t psize;
mca_common_cuda_get_address_range(&base, &psize, addr);
bound = base + psize - 1;
/* Check to see if this memory is in the cache and if it has been freed. If so,
* this call will boot it out of the cache. */
check_for_cuda_freed_memory(mpool, base, psize);
}
#endif /* OPAL_CUDA_GDR_SUPPORT */
/* look through existing regs if not persistent registration requested.
* Persistent registration are always registered and placed in the cache */
if(!(bypass_cache || persist)) {
/* check to see if memory is registered */
mpool->rcache->rcache_find(mpool->rcache, base, bound - base + 1, &grdma_reg);
if (grdma_reg && !(flags & MCA_MPOOL_FLAGS_INVALID)) {
if (OPAL_UNLIKELY((access_flags & grdma_reg->access_flags) != access_flags)) {
access_flags |= grdma_reg->access_flags;
if (0 != grdma_reg->ref_count) {
if (!(grdma_reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS)) {
grdma_reg->mpool->rcache->rcache_delete(grdma_reg->mpool->rcache, grdma_reg);
}
/* mark the registration to go away when it is deregistered */
grdma_reg->flags |= MCA_MPOOL_FLAGS_INVALID | MCA_MPOOL_FLAGS_CACHE_BYPASS;
} else {
if (registration_is_cacheable (grdma_reg)) {
/* pull the item out of the lru */
opal_list_remove_item (&mpool_grdma->pool->lru_list, (opal_list_item_t *) grdma_reg);
}
(void) dereg_mem (grdma_reg);
}
} else {
*reg = grdma_reg;
if (0 == grdma_reg->ref_count) {
/* Leave pinned must be set for this to still be in the rcache. */
opal_list_remove_item(&mpool_grdma->pool->lru_list,
(opal_list_item_t *) grdma_reg);
}
/* This segment fits fully within an existing segment. */
mpool_grdma->stat_cache_hit++;
grdma_reg->ref_count++;
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
return OPAL_SUCCESS;
}
}
mpool_grdma->stat_cache_miss++;
/* Unless explicitly requested by the caller always store the
* registration in the rcache. This will speed up the case where
* no leave pinned protocol is in use but the same segment is in
* use in multiple simultaneous transactions. We used to set bypass_cache
* here is !mca_mpool_grdma_component.leave_pinned. */
}
item = opal_free_list_get (&mpool_grdma->reg_list);
if(NULL == item) {
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
return OPAL_ERR_OUT_OF_RESOURCE;
}
grdma_reg = (mca_mpool_base_registration_t*)item;
grdma_reg->mpool = mpool;
grdma_reg->base = base;
grdma_reg->bound = bound;
grdma_reg->flags = flags;
grdma_reg->access_flags = access_flags;
#if OPAL_CUDA_GDR_SUPPORT
if (flags & MCA_MPOOL_FLAGS_CUDA_GPU_MEM) {
mca_common_cuda_get_buffer_id(grdma_reg);
}
#endif /* OPAL_CUDA_GDR_SUPPORT */
if (false == bypass_cache) {
rc = mpool->rcache->rcache_insert(mpool->rcache, grdma_reg, 0);
if (OPAL_UNLIKELY(rc != OPAL_SUCCESS)) {
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
opal_free_list_return (&mpool_grdma->reg_list, item);
return rc;
}
}
while (OPAL_ERR_OUT_OF_RESOURCE ==
(rc = mpool_grdma->resources.register_mem(mpool_grdma->resources.reg_data,
base, bound - base + 1, grdma_reg))) {
/* try to remove one unused reg and retry */
if (!mca_mpool_grdma_evict (mpool)) {
break;
}
}
if (OPAL_UNLIKELY(rc != OPAL_SUCCESS)) {
if (false == bypass_cache) {
mpool->rcache->rcache_delete(mpool->rcache, grdma_reg);
}
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
opal_free_list_return (&mpool_grdma->reg_list, item);
return rc;
}
*reg = grdma_reg;
(*reg)->ref_count++;
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
/* Cleanup any vmas that we have deferred deletion on */
mpool->rcache->rcache_clean(mpool->rcache);
return OPAL_SUCCESS;
}
/**
* realloc function
*/
void* mca_mpool_grdma_realloc(mca_mpool_base_module_t *mpool, void *addr,
size_t size, mca_mpool_base_registration_t **reg)
{
mca_mpool_base_registration_t *old_reg = *reg;
void *new_mem = mca_mpool_grdma_alloc(mpool, size, 0, old_reg->flags, reg);
memcpy(new_mem, addr, old_reg->bound - old_reg->base + 1);
mca_mpool_grdma_free(mpool, addr, old_reg);
return new_mem;
}
/**
* free function
*/
void mca_mpool_grdma_free(mca_mpool_base_module_t *mpool, void *addr,
mca_mpool_base_registration_t *registration)
{
void *alloc_base = registration->alloc_base;
mca_mpool_grdma_deregister(mpool, registration);
free(alloc_base);
}
int mca_mpool_grdma_find(struct mca_mpool_base_module_t *mpool, void *addr,
size_t size, mca_mpool_base_registration_t **reg)
{
mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t*)mpool;
unsigned char *base, *bound;
int rc;
base = (unsigned char *) down_align_addr(addr, mca_mpool_base_page_size_log);
bound = (unsigned char *) up_align_addr((void*)((char*) addr + size - 1),
mca_mpool_base_page_size_log);
OPAL_THREAD_LOCK(&mpool->rcache->lock);
rc = mpool->rcache->rcache_find(mpool->rcache, base, bound - base + 1, reg);
if(NULL != *reg &&
(mca_mpool_grdma_component.leave_pinned ||
((*reg)->flags & MCA_MPOOL_FLAGS_PERSIST) ||
((*reg)->base == base && (*reg)->bound == bound))) {
assert(((void*)(*reg)->bound) >= addr);
if(0 == (*reg)->ref_count &&
mca_mpool_grdma_component.leave_pinned) {
opal_list_remove_item(&mpool_grdma->pool->lru_list,
(opal_list_item_t*)(*reg));
}
mpool_grdma->stat_cache_found++;
(*reg)->ref_count++;
} else {
mpool_grdma->stat_cache_notfound++;
}
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
return rc;
}
int mca_mpool_grdma_deregister(struct mca_mpool_base_module_t *mpool,
mca_mpool_base_registration_t *reg)
{
mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t *) mpool;
int rc = OPAL_SUCCESS;
assert(reg->ref_count > 0);
OPAL_THREAD_LOCK(&mpool->rcache->lock);
reg->ref_count--;
if(reg->ref_count > 0) {
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
return OPAL_SUCCESS;
}
if (registration_is_cacheable(reg)) {
opal_list_append(&mpool_grdma->pool->lru_list, (opal_list_item_t *) reg);
} else {
rc = dereg_mem (reg);
}
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
/* Cleanup any vmas that we have deferred deletion on */
mpool->rcache->rcache_clean(mpool->rcache);
return rc;
}
#define GRDMA_MPOOL_NREGS 100
int mca_mpool_grdma_release_memory(struct mca_mpool_base_module_t *mpool,
void *base, size_t size)
{
mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t *) mpool;
mca_mpool_base_registration_t *regs[GRDMA_MPOOL_NREGS];
int reg_cnt, i, rc = OPAL_SUCCESS;
OPAL_THREAD_LOCK(&mpool->rcache->lock);
do {
reg_cnt = mpool->rcache->rcache_find_all(mpool->rcache, base, size,
regs, GRDMA_MPOOL_NREGS);
for(i = 0 ; i < reg_cnt ; ++i) {
regs[i]->flags |= MCA_MPOOL_FLAGS_INVALID;
if (regs[i]->ref_count) {
/* memory is being freed, but there are registration in use that
* covers the memory. This can happen even in a correct program,
* but may also be an user error. We can't tell. Mark the
* registration as invalid. It will not be used any more and
* will be unregistered when ref_count will become zero */
rc = OPAL_ERROR; /* tell caller that something was wrong */
} else {
opal_list_remove_item(&mpool_grdma->pool->lru_list,(opal_list_item_t *) regs[i]);
opal_list_append(&mpool_grdma->pool->gc_list, (opal_list_item_t *) regs[i]);
}
}
} while(reg_cnt == GRDMA_MPOOL_NREGS);
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
return rc;
}
/* Make sure this registration request is not stale. In other words, ensure
* that we do not have a cuMemAlloc, cuMemFree, cuMemAlloc state. If we do
* kick out the regisrations and deregister. This function needs to be called
* with the mpool->rcache->lock held. */
#if OPAL_CUDA_GDR_SUPPORT
static int check_for_cuda_freed_memory(mca_mpool_base_module_t *mpool, void *addr, size_t size)
{
mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t *) mpool;
mca_mpool_base_registration_t *regs[GRDMA_MPOOL_NREGS];
int reg_cnt, i, rc = OPAL_SUCCESS;
mca_mpool_base_registration_t *reg;
mpool->rcache->rcache_find(mpool->rcache, addr, size, &reg);
if (NULL == reg) {
return OPAL_SUCCESS;
}
/* If not previously freed memory, just return 0 */
if (!(mca_common_cuda_previously_freed_memory(reg))) {
return OPAL_SUCCESS;
}
/* mpool->rcache->rcache_dump_range(mpool->rcache, 0, (size_t)-1, "Before free"); */
/* This memory has been freed. Find all registrations and delete */
do {
reg_cnt = mpool->rcache->rcache_find_all(mpool->rcache, reg->base, reg->bound - reg->base + 1,
regs, GRDMA_MPOOL_NREGS);
for(i = 0 ; i < reg_cnt ; ++i) {
regs[i]->flags |= MCA_MPOOL_FLAGS_INVALID;
if (regs[i]->ref_count) {
opal_output(0, "Release FAILED: ref_count=%d, base=%p, bound=%p, size=%d",
regs[i]->ref_count, regs[i]->base, regs[i]->bound,
(int) (regs[i]->bound - regs[i]->base + 1));
/* memory is being freed, but there are registration in use that
* covers the memory. This can happen even in a correct program,
* but may also be an user error. We can't tell. Mark the
* registration as invalid. It will not be used any more and
* will be unregistered when ref_count will become zero */
rc = OPAL_ERROR; /* tell caller that something was wrong */
} else {
opal_list_remove_item(&mpool_grdma->pool->lru_list,(opal_list_item_t *) regs[i]);
/* Now deregister. Do not use gc_list as we need to kick this out now. */
dereg_mem(regs[i]);
}
}
} while(reg_cnt == GRDMA_MPOOL_NREGS);
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
/* mpool->rcache->rcache_dump_range(mpool->rcache, 0, (size_t)-1, "After free");*/
return rc;
}
#endif /* OPAL_CUDA_GDR_SUPPORT */
void mca_mpool_grdma_finalize(struct mca_mpool_base_module_t *mpool)
{
mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t*)mpool;
mca_mpool_base_registration_t *regs[GRDMA_MPOOL_NREGS];
int reg_cnt, i;
/* Statistic */
if (true == mca_mpool_grdma_component.print_stats) {
opal_output(0, "%s grdma: stats "
"(hit/miss/found/not found/evicted): %d/%d/%d/%d/%d\n",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
mpool_grdma->stat_cache_hit, mpool_grdma->stat_cache_miss,
mpool_grdma->stat_cache_found, mpool_grdma->stat_cache_notfound,
mpool_grdma->stat_evicted);
}
OPAL_THREAD_LOCK(&mpool->rcache->lock);
do_unregistration_gc(mpool);
do {
reg_cnt = mpool->rcache->rcache_find_all(mpool->rcache, 0, (size_t)-1,
regs, GRDMA_MPOOL_NREGS);
for (i = 0 ; i < reg_cnt ; ++i) {
if (regs[i]->ref_count) {
regs[i]->ref_count = 0; /* otherwise dereg will fail on assert */
} else if (mca_mpool_grdma_component.leave_pinned) {
opal_list_remove_item(&mpool_grdma->pool->lru_list,
(opal_list_item_t *) regs[i]);
}
(void) dereg_mem(regs[i]);
}
} while (reg_cnt == GRDMA_MPOOL_NREGS);
OBJ_RELEASE(mpool_grdma->pool);
OBJ_DESTRUCT(&mpool_grdma->reg_list);
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
/* Cleanup any vmas that we have deferred deletion on */
mpool->rcache->rcache_clean(mpool->rcache);
/* this mpool was allocated by grdma_init in mpool_grdma_component.c */
free(mpool);
}
int mca_mpool_grdma_ft_event(int state) {
return OPAL_SUCCESS;
}

52
opal/mca/mpool/hugepage/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,52 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
# reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
AM_CPPFLAGS = $(mpool_hugepage_CPPFLAGS)
sources = mpool_hugepage_module.c mpool_hugepage_component.c
if WANT_INSTALL_HEADERS
opaldir = $(opalincludedir)/$(subdir)
opal_HEADERS = mpool_hugepage.h
endif
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_opal_mpool_hugepage_DSO
component_noinst =
component_install = mca_mpool_hugepage.la
else
component_noinst = libmca_mpool_hugepage.la
component_install =
endif
mcacomponentdir = $(opallibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_mpool_hugepage_la_SOURCES = $(sources)
mca_mpool_hugepage_la_LDFLAGS = -module -avoid-version
mca_mpool_hugepage_la_LIBADD = $(mpool_hugepage_LIBS)
noinst_LTLIBRARIES = $(component_noinst)
libmca_mpool_hugepage_la_SOURCES = $(sources)
libmca_mpool_hugepage_la_LDFLAGS = -module -avoid-version
libmca_mpool_hugepage_la_LIBADD = $(mpool_hugepage_LIBS)

90
opal/mca/mpool/hugepage/mpool_hugepage.h Обычный файл
Просмотреть файл

@ -0,0 +1,90 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2013 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Voltaire. All rights reserved.
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_MPOOL_HUGEPAGE_H
#define MCA_MPOOL_HUGEPAGE_H
#include "opal_config.h"
#include "opal/class/opal_list.h"
#include "opal/class/opal_free_list.h"
#include "opal/mca/event/event.h"
#include "opal/mca/mpool/mpool.h"
#include "opal/util/proc.h"
#include "opal/mca/allocator/allocator.h"
#include "opal/util/sys_limits.h"
BEGIN_C_DECLS
struct mca_mpool_hugepage_module_t;
typedef struct mca_mpool_hugepage_module_t mca_mpool_hugepage_module_t;
struct mca_mpool_hugepage_component_t {
mca_mpool_base_component_t super;
bool print_stats;
opal_list_t huge_pages;
mca_mpool_hugepage_module_t *modules;
int module_count;
unsigned long bytes_allocated;
};
typedef struct mca_mpool_hugepage_component_t mca_mpool_hugepage_component_t;
OPAL_DECLSPEC extern mca_mpool_hugepage_component_t mca_mpool_hugepage_component;
struct mca_mpool_hugepage_module_t;
struct mca_mpool_hugepage_hugepage_t {
/** opal list item superclass */
opal_list_item_t super;
/** page size in bytes */
unsigned long page_size;
/** path for mmapped files */
char *path;
/** counter to help ensure unique file names for mmaped files */
volatile int32_t count;
/** some platforms allow allocation of hugepages through mmap flags */
int mmap_flags;
};
typedef struct mca_mpool_hugepage_hugepage_t mca_mpool_hugepage_hugepage_t;
OBJ_CLASS_DECLARATION(mca_mpool_hugepage_hugepage_t);
struct mca_mpool_hugepage_module_t {
mca_mpool_base_module_t super;
mca_mpool_hugepage_hugepage_t *huge_page;
mca_allocator_base_module_t *allocator;
opal_mutex_t lock;
opal_rb_tree_t allocation_tree;
};
/*
* Initializes the mpool module.
*/
int mca_mpool_hugepage_module_init (mca_mpool_hugepage_module_t *mpool,
mca_mpool_hugepage_hugepage_t *huge_page);
void *mca_mpool_hugepage_seg_alloc (void *ctx, size_t *sizep);
void mca_mpool_hugepage_seg_free (void *ctx, void *addr);
END_C_DECLS
#endif

Просмотреть файл

@ -0,0 +1,366 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2013 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Voltaire. All rights reserved.
* Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#define OPAL_DISABLE_ENABLE_MEM_DEBUG 1
#include "opal_config.h"
#include "opal/mca/base/base.h"
#include "opal/runtime/opal_params.h"
#include "opal/mca/base/mca_base_pvar.h"
#include "opal/mca/mpool/base/base.h"
#include "opal/mca/allocator/base/base.h"
#include "mpool_hugepage.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif
#ifdef HAVE_SYS_VFS_H
#include <sys/vfs.h>
#endif
#ifdef HAVE_SYS_MOUNT_H
#include <sys/mount.h>
#endif
#ifdef HAVE_SYS_PARAM_H
#include <sys/param.h>
#endif
#ifdef HAVE_SYS_MMAN_H
#include <sys/mman.h>
#endif
#include <fcntl.h>
/*
* Local functions
*/
static int mca_mpool_hugepage_open (void);
static int mca_mpool_hugepage_close (void);
static int mca_mpool_hugepage_register (void);
static int mca_mpool_hugepage_query (const char *hints, int *priority,
mca_mpool_base_module_t **module);
static void mca_mpool_hugepage_find_hugepages (void);
static int mca_mpool_hugepage_priority;
static unsigned long mca_mpool_hugepage_page_size;
mca_mpool_hugepage_component_t mca_mpool_hugepage_component = {
{
/* First, the mca_base_component_t struct containing meta
information about the component itself */
.mpool_version ={
MCA_MPOOL_BASE_VERSION_3_0_0,
.mca_component_name = "hugepage",
MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION,
OPAL_RELEASE_VERSION),
.mca_open_component = mca_mpool_hugepage_open,
.mca_close_component = mca_mpool_hugepage_close,
.mca_register_component_params = mca_mpool_hugepage_register,
},
.mpool_data = {
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
.mpool_query = mca_mpool_hugepage_query,
},
};
/**
* component open/close/init function
*/
static int mca_mpool_hugepage_register(void)
{
mca_mpool_hugepage_priority = 50;
(void) mca_base_component_var_register (&mca_mpool_hugepage_component.super.mpool_version,
"priority", "Default priority of the hugepage mpool component "
"(default: 50)", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
&mca_mpool_hugepage_priority);
mca_mpool_hugepage_page_size = 1 << 21;
(void) mca_base_component_var_register (&mca_mpool_hugepage_component.super.mpool_version,
"page_size", "Default huge page size of the hugepage mpool component "
"(default: 2M)", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
&mca_mpool_hugepage_page_size);
mca_mpool_hugepage_component.bytes_allocated = 0;
(void) mca_base_component_pvar_register (&mca_mpool_hugepage_component.super.mpool_version,
"bytes_allocated", "Number of bytes currently allocated in the mpool "
"hugepage component", OPAL_INFO_LVL_3, MCA_BASE_PVAR_CLASS_SIZE,
MCA_BASE_VAR_TYPE_UNSIGNED_LONG, NULL, MCA_BASE_VAR_BIND_NO_OBJECT,
MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS,
NULL, NULL, NULL, &mca_mpool_hugepage_component.bytes_allocated);
return OPAL_SUCCESS;
}
static int mca_mpool_hugepage_open (void)
{
mca_mpool_hugepage_module_t *hugepage_module;
mca_mpool_hugepage_hugepage_t *hp;
int module_index, rc;
OBJ_CONSTRUCT(&mca_mpool_hugepage_component.huge_pages, opal_list_t);
mca_mpool_hugepage_find_hugepages ();
if (0 == opal_list_get_size (&mca_mpool_hugepage_component.huge_pages)) {
return OPAL_SUCCESS;
}
mca_mpool_hugepage_component.modules = (mca_mpool_hugepage_module_t *)
calloc (opal_list_get_size (&mca_mpool_hugepage_component.huge_pages),
sizeof (mca_mpool_hugepage_module_t));
if (NULL == mca_mpool_hugepage_component.modules) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
module_index = 0;
OPAL_LIST_FOREACH(hp, &mca_mpool_hugepage_component.huge_pages, mca_mpool_hugepage_hugepage_t) {
hugepage_module = mca_mpool_hugepage_component.modules + module_index;
rc = mca_mpool_hugepage_module_init (hugepage_module, hp);
if (OPAL_SUCCESS != rc) {
continue;
}
module_index++;
}
mca_mpool_hugepage_component.module_count = module_index;
return OPAL_SUCCESS;
}
static int mca_mpool_hugepage_close (void)
{
OPAL_LIST_DESTRUCT(&mca_mpool_hugepage_component.huge_pages);
for (int i = 0 ; i < mca_mpool_hugepage_component.module_count ; ++i) {
mca_mpool_hugepage_module_t *module = mca_mpool_hugepage_component.modules + i;
module->super.mpool_finalize (&module->super);
}
free (mca_mpool_hugepage_component.modules);
mca_mpool_hugepage_component.modules = NULL;
return OPAL_SUCCESS;
}
static int page_compare (opal_list_item_t **a, opal_list_item_t **b) {
mca_mpool_hugepage_hugepage_t *pagea = (mca_mpool_hugepage_hugepage_t *) *a;
mca_mpool_hugepage_hugepage_t *pageb = (mca_mpool_hugepage_hugepage_t *) *b;
if (pagea->page_size > pageb->page_size) {
return 1;
} else if (pagea->page_size < pageb->page_size) {
return -1;
}
return 0;
}
static void mca_mpool_hugepage_find_hugepages (void) {
mca_mpool_hugepage_hugepage_t *hp;
FILE *fh;
char *path;
char buffer[1024];
char *ctx, *tok;
fh = fopen ("/proc/mounts", "r");
if (NULL == fh) {
return;
}
while (fgets (buffer, 1024, fh)) {
unsigned long page_size = 0;
(void) strtok_r (buffer, " ", &ctx);
path = strtok_r (NULL, " ", &ctx);
tok = strtok_r (NULL, " ", &ctx);
if (0 != strcmp (tok, "hugetlbfs")) {
continue;
}
tok = strtok_r (NULL, " ", &ctx);
tok = strtok_r (tok, ",", &ctx);
do {
if (0 == strncmp (tok, "pagesize", 8)) {
break;
}
tok = strtok_r (NULL, ",", &ctx);
} while (tok);
if (!tok) {
#if HAVE_STATFS
struct statfs info;
statfs (path, &info);
page_size = info.f_bsize;
#endif
} else {
sscanf (tok, "pagesize=%lu", &page_size);
}
if (0 == page_size) {
/* could not get page size */
continue;
}
hp = OBJ_NEW(mca_mpool_hugepage_hugepage_t);
if (NULL == hp) {
break;
}
hp->path = strdup (path);
hp->page_size = page_size;
OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output,
"found huge page with size = %lu, path = %s, mmap flags = 0x%x",
hp->page_size, hp->path, hp->mmap_flags));
opal_list_append (&mca_mpool_hugepage_component.huge_pages, &hp->super);
}
opal_list_sort (&mca_mpool_hugepage_component.huge_pages, page_compare);
fclose (fh);
}
static int mca_mpool_hugepage_query (const char *hints, int *priority_out,
mca_mpool_base_module_t **module)
{
unsigned long page_size = 0;
char **hints_array;
int my_priority = mca_mpool_hugepage_priority;
int modifier;
char *tmp;
bool found = false;
if (0 == mca_mpool_hugepage_component.module_count) {
return OPAL_ERR_NOT_AVAILABLE;
}
if (hints) {
hints_array = opal_argv_split (hints, ',');
if (NULL == hints_array) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
for (int i = 0 ; hints_array[i] ; ++i) {
char *key = hints_array[i];
char *value = NULL;
if (NULL != (tmp = strchr (key, '='))) {
value = tmp + 1;
*tmp = '\0';
}
if (0 == strcasecmp ("mpool", key)) {
if (value && 0 == strcasecmp ("hugepage", value)) {
/* this mpool was requested by name */
my_priority = 100;
opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output,
"hugepage mpool matches hint: %s=%s", key, value);
} else {
/* different mpool requested */
my_priority = 0;
opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output,
"hugepage mpool does not match hint: %s=%s", key, value);
return OPAL_ERR_NOT_FOUND;
}
}
if (0 == strcasecmp ("page_size", key)) {
page_size = strtoul (value, &tmp, 0);
if (*tmp) {
switch (*tmp) {
case 'g':
case 'G':
page_size *= 1024;
case 'm':
case 'M':
page_size *= 1024;
case 'k':
case 'K':
page_size *= 1024;
break;
default:
page_size = -1;
}
}
opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output,
"hugepage mpool requested page size: %lu", page_size);
}
}
opal_argv_free (hints_array);
}
if (0 == page_size) {
/* use default huge page size */
page_size = mca_mpool_hugepage_page_size;
if (my_priority < 100) {
/* take a priority hit if this mpool was not asked for by name */
my_priority = 0;
}
opal_output_verbose (MCA_BASE_VERBOSE_WARN, opal_mpool_base_framework.framework_output,
"hugepage mpool did not match any hints: %s", hints);
}
for (int i = 0 ; i < mca_mpool_hugepage_component.module_count ; ++i) {
mca_mpool_hugepage_module_t *hugepage_module = mca_mpool_hugepage_component.modules + i;
if (hugepage_module->huge_page->page_size != page_size) {
continue;
}
my_priority = (my_priority < 80) ? my_priority + 20 : 100;
if (module) {
*module = &hugepage_module->super;
}
opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output,
"matches page size hint. page size: %lu, path: %s, mmap flags: "
"0x%x", page_size, hugepage_module->huge_page->path,
hugepage_module->huge_page->mmap_flags);
found = true;
break;
}
if (!found) {
opal_output_verbose (MCA_BASE_VERBOSE_WARN, opal_mpool_base_framework.framework_output,
"could not find page matching page request: %lu", page_size);
return OPAL_ERR_NOT_FOUND;
}
if (priority_out) {
*priority_out = my_priority;
}
return OPAL_SUCCESS;
}

Просмотреть файл

@ -0,0 +1,255 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2013 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006 Voltaire. All rights reserved.
* Copyright (c) 2007 Mellanox Technologies. All rights reserved.
* Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#define OPAL_DISABLE_ENABLE_MEM_DEBUG 1
#include "opal_config.h"
#include "opal/align.h"
#include "mpool_hugepage.h"
#include <errno.h>
#include <string.h>
#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif
#include "opal/mca/mpool/base/base.h"
#include "opal/runtime/opal_params.h"
#include "opal/include/opal_stdint.h"
#include "opal/mca/allocator/base/base.h"
#include <fcntl.h>
#include <sys/mman.h>
static void *mca_mpool_hugepage_alloc (mca_mpool_base_module_t *mpool, size_t size, size_t align,
uint32_t flags);
static void *mca_mpool_hugepage_realloc (mca_mpool_base_module_t *mpool, void *addr, size_t size);
static void mca_mpool_hugepage_free (mca_mpool_base_module_t *mpool, void *addr);
static void mca_mpool_hugepage_finalize (mca_mpool_base_module_t *mpool);
static int mca_mpool_hugepage_ft_event (int state);
static void mca_mpool_hugepage_hugepage_constructor (mca_mpool_hugepage_hugepage_t *huge_page)
{
memset ((char *)huge_page + sizeof(huge_page->super), 0, sizeof (*huge_page) - sizeof (huge_page->super));
}
static void mca_mpool_hugepage_hugepage_destructor (mca_mpool_hugepage_hugepage_t *huge_page)
{
free (huge_page->path);
}
OBJ_CLASS_INSTANCE(mca_mpool_hugepage_hugepage_t, opal_list_item_t,
mca_mpool_hugepage_hugepage_constructor,
mca_mpool_hugepage_hugepage_destructor);
static int mca_mpool_rb_hugepage_compare (void *key1, void *key2)
{
if (key1 == key2) {
return 0;
}
return (key1 < key2) ? -1 : 1;
}
/*
* Initializes the mpool module.
*/
int mca_mpool_hugepage_module_init(mca_mpool_hugepage_module_t *mpool,
mca_mpool_hugepage_hugepage_t *huge_page)
{
mca_allocator_base_component_t *allocator_component;
int rc;
mpool->super.mpool_component = &mca_mpool_hugepage_component.super;
mpool->super.mpool_base = NULL; /* no base .. */
mpool->super.mpool_alloc = mca_mpool_hugepage_alloc;
mpool->super.mpool_realloc = mca_mpool_hugepage_realloc;
mpool->super.mpool_free = mca_mpool_hugepage_free;
mpool->super.mpool_finalize = mca_mpool_hugepage_finalize;
mpool->super.mpool_ft_event = mca_mpool_hugepage_ft_event;
mpool->super.flags = MCA_MPOOL_FLAGS_MPI_ALLOC_MEM;
OBJ_CONSTRUCT(&mpool->lock, opal_mutex_t);
mpool->huge_page = huge_page;
/* use an allocator component to reduce waste when making small allocations */
allocator_component = mca_allocator_component_lookup ("bucket");
if (NULL == allocator_component) {
return OPAL_ERR_NOT_AVAILABLE;
}
mpool->allocator = allocator_component->allocator_init (true, mca_mpool_hugepage_seg_alloc,
mca_mpool_hugepage_seg_free, mpool);
OBJ_CONSTRUCT(&mpool->allocation_tree, opal_rb_tree_t);
rc = opal_rb_tree_init (&mpool->allocation_tree, mca_mpool_rb_hugepage_compare);
if (OPAL_SUCCESS != rc) {
OBJ_DESTRUCT(&mpool->allocation_tree);
return OPAL_ERR_NOT_AVAILABLE;
}
return OPAL_SUCCESS;
}
void *mca_mpool_hugepage_seg_alloc (void *ctx, size_t *sizep)
{
mca_mpool_hugepage_module_t *hugepage_module = (mca_mpool_hugepage_module_t *) ctx;
mca_mpool_hugepage_hugepage_t *huge_page = hugepage_module->huge_page;
size_t size = *sizep;
void *base = NULL;
char *path = NULL;
int flags = MAP_PRIVATE;
int fd = -1;
int rc;
size = OPAL_ALIGN(size, huge_page->page_size, size_t);
if (huge_page->path) {
int32_t count;
count = opal_atomic_add_32 (&huge_page->count, 1);
rc = asprintf (&path, "%s/hugepage.openmpi.%d.%d", huge_page->path,
getpid (), count);
if (0 > rc) {
return NULL;
}
fd = open (path, O_RDWR | O_CREAT, 0600);
if (-1 == fd) {
free (path);
return NULL;
}
if (0 != ftruncate (fd, size)) {
close (fd);
unlink (path);
free (path);
return NULL;
}
} else {
#if defined(MAP_ANONYMOUS)
flags |= MAP_ANONYMOUS;
#elif defined(MAP_ANON)
/* older versions of OS X do not define MAP_ANONYMOUS (10.9.x and older) */
flags |= MAP_ANON;
#endif
}
base = mmap (NULL, size, PROT_READ | PROT_WRITE, flags | huge_page->mmap_flags, fd, 0);
if (path) {
close (fd);
unlink (path);
free (path);
}
if (MAP_FAILED == base) {
opal_output_verbose (MCA_BASE_VERBOSE_WARN, opal_mpool_base_framework.framework_verbose,
"could not allocate huge page(s). falling back on standard pages");
/* fall back on regular pages */
base = mmap (NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0);
}
if (MAP_FAILED == base) {
return NULL;
}
opal_mutex_lock (&hugepage_module->lock);
opal_rb_tree_insert (&hugepage_module->allocation_tree, base, (void *) (intptr_t) size);
opal_atomic_add (&mca_mpool_hugepage_component.bytes_allocated, (int64_t) size);
opal_mutex_unlock (&hugepage_module->lock);
OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_mpool_base_framework.framework_verbose,
"allocated segment %p of size %lu bytes", base, size));
*sizep = size;
return base;
}
void mca_mpool_hugepage_seg_free (void *ctx, void *addr)
{
mca_mpool_hugepage_module_t *hugepage_module = (mca_mpool_hugepage_module_t *) ctx;
size_t size;
opal_mutex_lock (&hugepage_module->lock);
size = (size_t) (intptr_t) opal_rb_tree_find (&hugepage_module->allocation_tree, addr);
if (size > 0) {
opal_rb_tree_delete (&hugepage_module->allocation_tree, addr);
OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_mpool_base_framework.framework_verbose,
"freeing segment %p of size %lu bytes", addr, size));
munmap (addr, size);
opal_atomic_add (&mca_mpool_hugepage_component.bytes_allocated, -(int64_t) size);
}
opal_mutex_unlock (&hugepage_module->lock);
}
/**
* allocate function
*/
static void *mca_mpool_hugepage_alloc (mca_mpool_base_module_t *mpool, size_t size,
size_t align, uint32_t flags)
{
mca_mpool_hugepage_module_t *hugepage_module = (mca_mpool_hugepage_module_t *) mpool;
return hugepage_module->allocator->alc_alloc (hugepage_module->allocator, size, align);
}
/**
* allocate function
*/
static void *mca_mpool_hugepage_realloc (mca_mpool_base_module_t *mpool, void *addr, size_t size)
{
mca_mpool_hugepage_module_t *hugepage_module = (mca_mpool_hugepage_module_t *) mpool;
return hugepage_module->allocator->alc_realloc (hugepage_module->allocator, addr, size);
}
/**
* free function
*/
static void mca_mpool_hugepage_free (mca_mpool_base_module_t *mpool, void *addr)
{
mca_mpool_hugepage_module_t *hugepage_module = (mca_mpool_hugepage_module_t *) mpool;
hugepage_module->allocator->alc_free (hugepage_module->allocator, addr);
}
static void mca_mpool_hugepage_finalize (struct mca_mpool_base_module_t *mpool)
{
mca_mpool_hugepage_module_t *hugepage_module = (mca_mpool_hugepage_module_t *) mpool;
OBJ_DESTRUCT(&hugepage_module->lock);
if (hugepage_module->allocator) {
(void) hugepage_module->allocator->alc_finalize (hugepage_module->allocator);
hugepage_module->allocator = NULL;
}
}
static int mca_mpool_hugepage_ft_event (int state) {
return OPAL_SUCCESS;
}

Просмотреть файл

@ -29,138 +29,70 @@
#include "opal_config.h" #include "opal_config.h"
#include "opal/mca/mca.h" #include "opal/mca/mca.h"
#include "opal/class/opal_free_list.h" #include "opal/class/opal_free_list.h"
#include "opal/mca/rcache/base/rcache_base_vma.h"
#include "opal/mca/crs/crs.h" #include "opal/mca/crs/crs.h"
#include "opal/mca/crs/base/base.h" #include "opal/mca/crs/base/base.h"
#define MCA_MPOOL_FLAGS_CACHE_BYPASS 0x01 #define MCA_MPOOL_ALLOC_FLAG_DEFAULT 0x00
#define MCA_MPOOL_FLAGS_PERSIST 0x02 #define MCA_MPOOL_ALLOC_FLAG_USER 0x01
#define MCA_MPOOL_FLAGS_MPI_ALLOC_MEM 0x04
#define MCA_MPOOL_FLAGS_INVALID 0x08 #define MCA_MPOOL_FLAGS_MPI_ALLOC_MEM 0x80
#define MCA_MPOOL_FLAGS_SO_MEM 0x10
#define MCA_MPOOL_FLAGS_CUDA_REGISTER_MEM 0x20
struct opal_info_t; struct opal_info_t;
struct mca_mpool_base_module_t;
#define MCA_MPOOL_FLAGS_CUDA_GPU_MEM 0x40 typedef struct mca_mpool_base_module_t mca_mpool_base_module_t;
/* Only valid in mpool flags. Used to indicate that no external memory
* hooks (ptmalloc2, etc) are required. */
#define MCA_MPOOL_FLAGS_NO_HOOKS 0x80
/* access flags */
enum {
MCA_MPOOL_ACCESS_LOCAL_WRITE = 0x01,
MCA_MPOOL_ACCESS_REMOTE_READ = 0x02,
MCA_MPOOL_ACCESS_REMOTE_WRITE = 0x04,
MCA_MPOOL_ACCESS_REMOTE_ATOMIC = 0x08,
MCA_MPOOL_ACCESS_ANY = 0x0f,
};
struct mca_mpool_base_resources_t;
struct mca_mpool_base_registration_t {
opal_free_list_item_t super;
struct mca_mpool_base_module_t *mpool;
unsigned char* base;
unsigned char* bound;
unsigned char* alloc_base;
int32_t ref_count;
uint32_t flags;
void *mpool_context;
#if OPAL_CUDA_GDR_SUPPORT
unsigned long long gpu_bufID;
#endif /* OPAL_CUDA_GDR_SUPPORT */
int32_t access_flags;
};
typedef struct mca_mpool_base_registration_t mca_mpool_base_registration_t;
OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_mpool_base_registration_t);
/** /**
* component initialize * component query function
*
* @param[in] hints memory pool hints in order of priority. this should
* be replaced by opal_info_t when the work to move
* info down to opal is complete.
* @param[out] priority relative priority of this memory pool component
* @param[out] module best match module
*
* This function should parse the provided hints and return a relative priority
* of the component based on the number of hints matched. For example, if the
* hints are "page_size=2M,high-bandwidth" and a pool matches the page_size but
* not the high-bandwidth hint then the component should return a lower priority
* than if both matched but a higher priority than if a pool matches only the
* high-bandwidth hint.
*
* Memory pools should try to support at a minimum name=value but can define
* any additional keys.
*/ */
typedef struct mca_mpool_base_module_t* (*mca_mpool_base_component_init_fn_t)( typedef int (*mca_mpool_base_component_query_fn_t) (const char *hints, int *priority,
struct mca_mpool_base_resources_t*); mca_mpool_base_module_t **module);
/** /**
* allocate function typedef * allocate function typedef
*/ */
typedef void* (*mca_mpool_base_module_alloc_fn_t)( typedef void *(*mca_mpool_base_module_alloc_fn_t) (mca_mpool_base_module_t *mpool,
struct mca_mpool_base_module_t* mpool, size_t size, size_t align,
size_t size, uint32_t flags);
size_t align,
uint32_t flags,
mca_mpool_base_registration_t** registration);
/** /**
* realloc function typedef * allocate function typedef
*/ */
typedef void* (*mca_mpool_base_module_realloc_fn_t)( typedef void *(*mca_mpool_base_module_realloc_fn_t) (mca_mpool_base_module_t *mpool,
struct mca_mpool_base_module_t* mpool, void *addr, size_t size);
void* addr,
size_t size,
mca_mpool_base_registration_t** registration);
/** /**
* free function typedef * free function typedef
*/ */
typedef void (*mca_mpool_base_module_free_fn_t)( typedef void (*mca_mpool_base_module_free_fn_t) (mca_mpool_base_module_t *mpool,
struct mca_mpool_base_module_t* mpool, void *addr);
void *addr,
mca_mpool_base_registration_t* registration);
/**
* register memory
*/
typedef int (*mca_mpool_base_module_register_fn_t)(
struct mca_mpool_base_module_t* mpool,
void * addr,
size_t size,
uint32_t flags,
int32_t access_flags,
mca_mpool_base_registration_t** registration);
/**
* deregister memory
*/
typedef int (*mca_mpool_base_module_deregister_fn_t)(
struct mca_mpool_base_module_t* mpool,
mca_mpool_base_registration_t* registration);
/**
* find registration in this memory pool
*/
typedef int (*mca_mpool_base_module_find_fn_t) (
struct mca_mpool_base_module_t* mpool, void* addr, size_t size,
mca_mpool_base_registration_t **reg);
/**
* release registration
*/
typedef int (*mca_mpool_base_module_release_fn_t) (
struct mca_mpool_base_module_t* mpool,
mca_mpool_base_registration_t* registration);
/**
* release memory region
*/
typedef int (*mca_mpool_base_module_release_memory_fn_t) (
struct mca_mpool_base_module_t* mpool, void *base, size_t size);
/** /**
* if appropriate - returns base address of memory pool * if appropriate - returns base address of memory pool
*/ */
typedef void* (*mca_mpool_base_module_address_fn_t)(struct mca_mpool_base_module_t* mpool); typedef void* (*mca_mpool_base_module_address_fn_t) (mca_mpool_base_module_t *mpool);
/** /**
* finalize * finalize
*/ */
typedef void (*mca_mpool_base_module_finalize_fn_t)(struct mca_mpool_base_module_t*); typedef void (*mca_mpool_base_module_finalize_fn_t)(mca_mpool_base_module_t *mpool);
/** /**
@ -179,7 +111,7 @@ struct mca_mpool_base_component_2_0_0_t {
mca_base_component_t mpool_version; /**< version */ mca_base_component_t mpool_version; /**< version */
mca_base_component_data_t mpool_data;/**< metadata */ mca_base_component_data_t mpool_data;/**< metadata */
mca_mpool_base_component_init_fn_t mpool_init; /**< init function */ mca_mpool_base_component_query_fn_t mpool_query; /**< query for matching pools */
}; };
/** /**
* Convenience typedef. * Convenience typedef.
@ -201,20 +133,14 @@ struct mca_mpool_base_module_t {
mca_mpool_base_module_alloc_fn_t mpool_alloc; /**< allocate function */ mca_mpool_base_module_alloc_fn_t mpool_alloc; /**< allocate function */
mca_mpool_base_module_realloc_fn_t mpool_realloc; /**< reallocate function */ mca_mpool_base_module_realloc_fn_t mpool_realloc; /**< reallocate function */
mca_mpool_base_module_free_fn_t mpool_free; /**< free function */ mca_mpool_base_module_free_fn_t mpool_free; /**< free function */
mca_mpool_base_module_register_fn_t mpool_register; /**< register memory */
mca_mpool_base_module_deregister_fn_t mpool_deregister; /**< deregister memory */
mca_mpool_base_module_find_fn_t mpool_find; /**< find regisrations in the cache */
mca_mpool_base_module_release_fn_t mpool_release; /**< release a registration from the cache */
mca_mpool_base_module_release_memory_fn_t mpool_release_memory; /**< release memor region from the cache */
mca_mpool_base_module_finalize_fn_t mpool_finalize; /**< finalize */ mca_mpool_base_module_finalize_fn_t mpool_finalize; /**< finalize */
mca_mpool_base_module_ft_event_fn_t mpool_ft_event; /**< ft_event */ mca_mpool_base_module_ft_event_fn_t mpool_ft_event; /**< ft_event */
struct mca_rcache_base_module_t *rcache; /* the rcache associated with this mpool */
uint32_t flags; /**< mpool flags */ uint32_t flags; /**< mpool flags */
size_t mpool_allocation_unit; /**< allocation unit used by this mpool */
char *mpool_name; /**< name of this pool module */
}; };
/**
* Convenience typedef
*/
typedef struct mca_mpool_base_module_t mca_mpool_base_module_t;
/** /**
@ -237,7 +163,7 @@ typedef struct mca_mpool_base_module_t mca_mpool_base_module_t;
* @retval pointer to the allocated memory * @retval pointer to the allocated memory
* @retval NULL on failure * @retval NULL on failure
*/ */
OPAL_DECLSPEC void * mca_mpool_base_alloc(size_t size, struct opal_info_t * info); OPAL_DECLSPEC void * mca_mpool_base_alloc(size_t size, struct opal_info_t * info, const char *hints);
/** /**
* Function to free memory previously allocated by mca_mpool_base_alloc * Function to free memory previously allocated by mca_mpool_base_alloc
@ -261,21 +187,11 @@ OPAL_DECLSPEC int mca_mpool_base_free(void * base);
*/ */
OPAL_DECLSPEC int mca_mpool_base_tree_node_compare(void * key1, void * key2); OPAL_DECLSPEC int mca_mpool_base_tree_node_compare(void * key1, void * key2);
OPAL_DECLSPEC int mca_mpool_base_insert(
void * addr,
size_t size,
mca_mpool_base_module_t* mpool,
void* user_in,
mca_mpool_base_registration_t* registration);
OPAL_DECLSPEC int mca_mpool_base_remove(void * base);
/** /**
* Macro for use in components that are of type mpool * Macro for use in components that are of type mpool
*/ */
#define MCA_MPOOL_BASE_VERSION_2_0_0 \ #define MCA_MPOOL_BASE_VERSION_3_0_0 \
OPAL_MCA_BASE_VERSION_2_1_0("mpool", 2, 0, 0) OPAL_MCA_BASE_VERSION_2_1_0("mpool", 3, 0, 0)
#endif /* MCA_MPOOL_H */ #endif /* MCA_MPOOL_H */

Просмотреть файл

@ -1,54 +0,0 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2013 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
sources = \
mpool_sm.h \
mpool_sm_module.c \
mpool_sm_component.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_opal_mpool_sm_DSO
component_noinst =
component_install = mca_mpool_sm.la
else
component_noinst = libmca_mpool_sm.la
component_install =
endif
# See opal/mca/common/sm/Makefile.am for an explanation of
# libmca_common_sm.la.
mcacomponentdir = $(opallibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_mpool_sm_la_SOURCES = $(sources)
mca_mpool_sm_la_LDFLAGS = -module -avoid-version
mca_mpool_sm_la_LIBADD = \
$(OPAL_TOP_BUILDDIR)/opal/mca/common/sm/lib@OPAL_LIB_PREFIX@mca_common_sm.la
if OPAL_cuda_support
mca_mpool_sm_la_LIBADD += \
$(OPAL_TOP_BUILDDIR)/opal/mca/common/cuda/lib@OPAL_LIB_PREFIX@mca_common_cuda.la
endif
noinst_LTLIBRARIES = $(component_noinst)
libmca_mpool_sm_la_SOURCES = $(sources)
libmca_mpool_sm_la_LDFLAGS = -module -avoid-version

Просмотреть файл

@ -1,113 +0,0 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010-2012 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_MPOOL_SM_H
#define MCA_MPOOL_SM_H
#include "opal_config.h"
#include "opal/mca/event/event.h"
#include "opal/mca/shmem/shmem.h"
#include "opal/mca/common/sm/common_sm.h"
#include "opal/mca/mpool/mpool.h"
#include "opal/mca/allocator/allocator.h"
BEGIN_C_DECLS
struct mca_mpool_sm_component_t {
mca_mpool_base_component_t super;
/* mca_allocator_base_module_t* sm_allocator; */
char *sm_allocator_name;
int verbose;
/* struct mca_mpool_sm_mmap_t *sm_mmap; */
};
typedef struct mca_mpool_sm_component_t mca_mpool_sm_component_t;
typedef struct mca_mpool_base_resources_t {
size_t size;
int32_t mem_node;
/* backing store metadata */
opal_shmem_ds_t bs_meta_buf;
} mca_mpool_base_resources_t;
OPAL_MODULE_DECLSPEC extern mca_mpool_sm_component_t mca_mpool_sm_component;
typedef struct mca_mpool_sm_module_t {
mca_mpool_base_module_t super;
long sm_size;
mca_allocator_base_module_t *sm_allocator;
struct mca_mpool_sm_mmap_t *sm_mmap;
mca_common_sm_module_t *sm_common_module;
int32_t mem_node;
} mca_mpool_sm_module_t;
/*
* Initializes the mpool module.
*/
void mca_mpool_sm_module_init(mca_mpool_sm_module_t* mpool);
/*
* Returns base address of shared memory mapping.
*/
void* mca_mpool_sm_base(mca_mpool_base_module_t*);
/**
* Allocate block of shared memory.
*/
void* mca_mpool_sm_alloc(
mca_mpool_base_module_t* mpool,
size_t size,
size_t align,
uint32_t flags,
mca_mpool_base_registration_t** registration);
/**
* realloc function typedef
*/
void* mca_mpool_sm_realloc(
mca_mpool_base_module_t* mpool,
void* addr,
size_t size,
mca_mpool_base_registration_t** registration);
/**
* free function typedef
*/
void mca_mpool_sm_free(
mca_mpool_base_module_t* mpool,
void * addr,
mca_mpool_base_registration_t* registration);
/**
* Fault Tolerance Event Notification Function
* @param state Checkpoint Stae
* @return OPAL_SUCCESS or failure status
*/
int mca_mpool_sm_ft_event(int state);
END_C_DECLS
#endif

Просмотреть файл

@ -1,210 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 NVIDIA Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H*/
#include <stdlib.h>
#include <errno.h>
#include "opal/mca/base/base.h"
#include "opal/mca/allocator/base/base.h"
#include "mpool_sm.h"
#include "opal/mca/common/sm/common_sm.h"
#if OPAL_ENABLE_FT_CR == 1
#include "opal/runtime/opal_cr.h"
#endif
/*
* Local functions
*/
static int
mca_mpool_sm_register(void);
static int
mca_mpool_sm_open(void);
static int
mca_mpool_sm_close(void);
static mca_mpool_base_module_t *
mca_mpool_sm_init(struct mca_mpool_base_resources_t* resources);
mca_mpool_sm_component_t mca_mpool_sm_component = {
{
/* First, the mca_base_component_t struct containing meta
information about the component itself */
.mpool_version = {
MCA_MPOOL_BASE_VERSION_2_0_0,
.mca_component_name = "sm",
MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION,
OPAL_RELEASE_VERSION),
.mca_open_component = mca_mpool_sm_open,
.mca_close_component = mca_mpool_sm_close,
.mca_register_component_params = mca_mpool_sm_register,
},
.mpool_data = {
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
.mpool_init = mca_mpool_sm_init,
}
};
static long default_min = 134217728;
static unsigned long long opal_mpool_sm_min_size;
static int opal_mpool_sm_verbose;
static int mca_mpool_sm_register(void)
{
/* register SM component parameters */
(void) mca_base_var_group_component_register(&mca_mpool_sm_component.super.mpool_version,
"Shared memory pool");
mca_mpool_sm_component.sm_allocator_name = "bucket";
(void) mca_base_component_var_register(&mca_mpool_sm_component.super.mpool_version,
"allocator", "Name of allocator component "
"to use with sm mpool", MCA_BASE_VAR_TYPE_STRING,
NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_mpool_sm_component.sm_allocator_name);
/* register as an unsigned long long to get up to 64 bits for the size */
opal_mpool_sm_min_size = default_min;
(void) mca_base_component_var_register(&mca_mpool_sm_component.super.mpool_version,
"min_size", "Minimum size of the sm mpool shared memory file",
MCA_BASE_VAR_TYPE_UNSIGNED_LONG_LONG, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&opal_mpool_sm_min_size);
opal_mpool_sm_verbose = 0;
(void) mca_base_component_var_register(&mca_mpool_sm_component.super.mpool_version,
"verbose", "Enable verbose output for mpool sm component",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&opal_mpool_sm_verbose);
return OPAL_SUCCESS;
}
/**
* component open/close/init function
*/
static int mca_mpool_sm_open(void)
{
if (opal_mpool_sm_verbose != 0) {
mca_mpool_sm_component.verbose = opal_output_open(NULL);
} else {
mca_mpool_sm_component.verbose = -1;
}
return OPAL_SUCCESS;
}
static int mca_mpool_sm_close( void )
{
return OPAL_SUCCESS;
}
static mca_mpool_base_module_t *
mca_mpool_sm_init(struct mca_mpool_base_resources_t *resources)
{
mca_mpool_sm_module_t *mpool_module;
mca_allocator_base_component_t* allocator_component;
/* Make a new mpool module */
mpool_module =
(mca_mpool_sm_module_t *)malloc(sizeof(mca_mpool_sm_module_t));
mca_mpool_sm_module_init(mpool_module);
/* set sm_size */
mpool_module->sm_size = resources->size;
/* clip at the min size */
if (mpool_module->sm_size < (long) opal_mpool_sm_min_size) {
mpool_module->sm_size = (long) opal_mpool_sm_min_size;
}
allocator_component = mca_allocator_component_lookup(
mca_mpool_sm_component.sm_allocator_name);
/* if specified allocator cannot be loaded - look for an alternative */
if (NULL == allocator_component) {
if (opal_list_get_size(&opal_allocator_base_framework.framework_components) == 0) {
mca_base_component_list_item_t *item =
(mca_base_component_list_item_t *)
opal_list_get_first(&opal_allocator_base_framework.framework_components);
allocator_component =
(mca_allocator_base_component_t *)item->cli_component;
opal_output(
0, "mca_mpool_sm_init: "
"unable to locate allocator: %s - using %s\n",
mca_mpool_sm_component.sm_allocator_name,
allocator_component->allocator_version.mca_component_name);
} else {
opal_output(0, "mca_mpool_sm_init: "
"unable to locate allocator: %s\n",
mca_mpool_sm_component.sm_allocator_name);
free(mpool_module);
return NULL;
}
}
mpool_module->mem_node = resources->mem_node;
opal_output(mca_mpool_sm_component.verbose,
"mca_mpool_sm_init: shared memory size used: (%ld)",
mpool_module->sm_size);
if (NULL == (mpool_module->sm_common_module =
mca_common_sm_module_attach(&resources->bs_meta_buf,
sizeof(mca_common_sm_module_t), 8))) {
opal_output(mca_mpool_sm_component.verbose, "mca_mpool_sm_init: "
"unable to create shared memory mapping (%s)",
resources->bs_meta_buf.seg_name);
free(mpool_module);
return NULL;
}
/* setup allocator */
mpool_module->sm_allocator =
allocator_component->allocator_init(true,
mca_common_sm_seg_alloc,
NULL, &(mpool_module->super));
if (NULL == mpool_module->sm_allocator) {
opal_output(0, "mca_mpool_sm_init: unable to initialize allocator");
free(mpool_module);
return NULL;
}
return &mpool_module->super;
}

Просмотреть файл

@ -1,174 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2013 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Voltaire. All rights reserved.
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_MPOOL_UDREG_H
#define MCA_MPOOL_UDREG_H
#include "opal_config.h"
#include "opal/class/opal_list.h"
#include "opal/class/opal_free_list.h"
#include "opal/mca/event/event.h"
#include "opal/mca/mpool/mpool.h"
#include "opal/util/proc.h"
#if HAVE_SYS_MMAN_H
#include <sys/mman.h>
#endif
BEGIN_C_DECLS
struct mca_mpool_udreg_component_t {
mca_mpool_base_component_t super;
bool print_stats;
int leave_pinned;
opal_list_t huge_pages;
bool use_huge_pages;
};
typedef struct mca_mpool_udreg_component_t mca_mpool_udreg_component_t;
OPAL_DECLSPEC extern mca_mpool_udreg_component_t mca_mpool_udreg_component;
struct mca_mpool_udreg_module_t;
struct mca_mpool_base_resources_t {
/* the start of this mpool should match grdma */
char *pool_name;
void *reg_data;
size_t sizeof_reg;
int (*register_mem)(void *reg_data, void *base, size_t size,
mca_mpool_base_registration_t *reg);
int (*deregister_mem)(void *reg_data, mca_mpool_base_registration_t *reg);
/* udreg specific resources */
bool use_kernel_cache;
bool use_evict_w_unreg;
int max_entries;
size_t page_size;
};
typedef struct mca_mpool_base_resources_t mca_mpool_base_resources_t;
struct mca_mpool_udreg_hugepage_t {
opal_list_item_t super;
unsigned long page_size;
char *path;
opal_list_t allocations;
int cnt;
};
typedef struct mca_mpool_udreg_hugepage_t mca_mpool_udreg_hugepage_t;
OBJ_CLASS_DECLARATION(mca_mpool_udreg_hugepage_t);
struct mca_mpool_udreg_hugepage_alloc_t {
opal_list_item_t super;
int fd;
char *path;
void *ptr;
size_t size;
mca_mpool_udreg_hugepage_t *huge_table;
};
typedef struct mca_mpool_udreg_hugepage_alloc_t mca_mpool_udreg_hugepage_alloc_t;
OBJ_CLASS_DECLARATION(mca_mpool_udreg_hugepage_pool_item_t);
struct mca_mpool_udreg_module_t {
mca_mpool_base_module_t super;
struct mca_mpool_base_resources_t resources;
opal_free_list_t reg_list;
mca_mpool_udreg_hugepage_t *huge_page;
opal_mutex_t lock;
void *udreg_handle;
/** used to communicate the access flags to the underlying registration
* function */
int requested_access_flags;
};
typedef struct mca_mpool_udreg_module_t mca_mpool_udreg_module_t;
/*
* Initializes the mpool module.
*/
int mca_mpool_udreg_module_init(mca_mpool_udreg_module_t *mpool);
/*
* Returns base address of shared memory mapping.
*/
void *mca_mpool_udreg_base(mca_mpool_base_module_t *mpool);
/**
* Allocate block of registered memory.
*/
void* mca_mpool_udreg_alloc(mca_mpool_base_module_t *mpool, size_t size,
size_t align, uint32_t flags,
mca_mpool_base_registration_t** registration);
/**
* realloc block of registered memory
*/
void* mca_mpool_udreg_realloc( mca_mpool_base_module_t *mpool, void* addr,
size_t size, mca_mpool_base_registration_t** registration);
/**
* register block of memory
*/
int mca_mpool_udreg_register(mca_mpool_base_module_t* mpool, void *addr,
size_t size, uint32_t flags, int32_t access_flags, mca_mpool_base_registration_t **reg);
/**
* deregister memory
*/
int mca_mpool_udreg_deregister(mca_mpool_base_module_t *mpool,
mca_mpool_base_registration_t *reg);
/**
* free memory allocated by alloc function
*/
void mca_mpool_udreg_free(mca_mpool_base_module_t *mpool, void * addr,
mca_mpool_base_registration_t *reg);
/**
* find registration for a given block of memory
*/
int mca_mpool_udreg_find(struct mca_mpool_base_module_t* mpool, void* addr,
size_t size, mca_mpool_base_registration_t **reg);
/**
* finalize mpool
*/
void mca_mpool_udreg_finalize(struct mca_mpool_base_module_t *mpool);
/**
* Fault Tolerance Event Notification Function
* @param state Checkpoint Stae
* @return OPAL_SUCCESS or failure status
*/
int mca_mpool_udreg_ft_event(int state);
/**
* evict one unused registration from the mpool's lru.
* @return true on success, false on failure
*/
bool mca_mpool_udreg_evict (struct mca_mpool_base_module_t *mpool);
END_C_DECLS
#endif

Просмотреть файл

@ -1,206 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2013 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Voltaire. All rights reserved.
* Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#define OPAL_DISABLE_ENABLE_MEM_DEBUG 1
#include "opal_config.h"
#include "opal/mca/base/base.h"
#include "opal/runtime/opal_params.h"
#include "mpool_udreg.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif
#include <fcntl.h>
/*
* Local functions
*/
static int udreg_open(void);
static int udreg_close(void);
static int udreg_register(void);
static mca_mpool_base_module_t* udreg_init(
struct mca_mpool_base_resources_t* resources);
mca_mpool_udreg_component_t mca_mpool_udreg_component = {
{
/* First, the mca_base_component_t struct containing meta
information about the component itself */
.mpool_version ={
MCA_MPOOL_BASE_VERSION_2_0_0,
.mca_component_name = "udreg",
MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION,
OPAL_RELEASE_VERSION),
.mca_open_component = udreg_open,
.mca_close_component = udreg_close,
.mca_register_component_params = udreg_register,
},
.mpool_data = {
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
.mpool_init = udreg_init
}
};
/**
* component open/close/init function
*/
static int udreg_open(void)
{
OBJ_CONSTRUCT(&mca_mpool_udreg_component.huge_pages, opal_list_t);
return OPAL_SUCCESS;
}
static int udreg_register(void)
{
mca_mpool_udreg_component.print_stats = false;
(void) mca_base_component_var_register(&mca_mpool_udreg_component.super.mpool_version,
"print_stats", "print pool usage statistics at the end of the run",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_mpool_udreg_component.print_stats);
return OPAL_SUCCESS;
}
static int udreg_close(void)
{
opal_list_item_t *item;
while (NULL != (item = opal_list_remove_first (&mca_mpool_udreg_component.huge_pages))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&mca_mpool_udreg_component.huge_pages);
return OPAL_SUCCESS;
}
static int page_compare (opal_list_item_t **a,
opal_list_item_t **b) {
mca_mpool_udreg_hugepage_t *pagea = (mca_mpool_udreg_hugepage_t *) *a;
mca_mpool_udreg_hugepage_t *pageb = (mca_mpool_udreg_hugepage_t *) *b;
if (pagea->page_size > pageb->page_size) {
return 1;
} else if (pagea->page_size < pageb->page_size) {
return -1;
}
return 0;
}
static void udreg_find_hugepages (void) {
FILE *fh;
char *path;
char buffer[1024];
char *ctx, *tok;
fh = fopen ("/proc/mounts", "r");
if (NULL == fh) {
return;
}
while (fgets (buffer, 1024, fh)) {
mca_mpool_udreg_hugepage_t *pool;
(void) strtok_r (buffer, " ", &ctx);
path = strtok_r (NULL, " ", &ctx);
tok = strtok_r (NULL, " ", &ctx);
if (0 != strcmp (tok, "hugetlbfs")) {
continue;
}
pool = OBJ_NEW(mca_mpool_udreg_hugepage_t);
if (NULL == pool) {
break;
}
pool->path = strdup (path);
tok = strtok_r (NULL, " ", &ctx);
tok = strtok_r (tok, ",", &ctx);
do {
if (0 == strncmp (tok, "pagesize", 8)) {
break;
}
tok = strtok_r (NULL, ",", &ctx);
} while (tok);
sscanf (tok, "pagesize=%lu", &pool->page_size);
opal_list_append (&mca_mpool_udreg_component.huge_pages, &pool->super);
}
fclose (fh);
opal_list_sort (&mca_mpool_udreg_component.huge_pages, page_compare);
mca_mpool_udreg_component.use_huge_pages =
!!(opal_list_get_size (&mca_mpool_udreg_component.huge_pages));
}
static mca_mpool_base_module_t *
udreg_init(struct mca_mpool_base_resources_t *resources)
{
mca_mpool_udreg_module_t* mpool_module;
static int inited = false;
int rc;
/* Set this here (vs in component.c) because
opal_leave_pinned* may have been set after MCA params were
read (e.g., by the openib btl) */
mca_mpool_udreg_component.leave_pinned = (int)
(1 == opal_leave_pinned || opal_leave_pinned_pipeline);
if (!inited) {
inited = true;
udreg_find_hugepages ();
}
mpool_module =
(mca_mpool_udreg_module_t *) malloc (sizeof (mca_mpool_udreg_module_t));
memmove (&mpool_module->resources, resources, sizeof (*resources));
rc = mca_mpool_udreg_module_init(mpool_module);
if (OPAL_SUCCESS != rc) {
free (mpool_module);
return NULL;
}
return &mpool_module->super;
}

Просмотреть файл

@ -1,546 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2013 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006 Voltaire. All rights reserved.
* Copyright (c) 2007 Mellanox Technologies. All rights reserved.
* Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#define OPAL_DISABLE_ENABLE_MEM_DEBUG 1
#include "opal_config.h"
#include "opal/align.h"
#include "mpool_udreg.h"
#include <errno.h>
#include <string.h>
#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif
#include "opal/mca/mpool/base/base.h"
#include "opal/runtime/opal_params.h"
#include "opal/include/opal_stdint.h"
#include <fcntl.h>
#include <udreg_pub.h>
#include <sys/mman.h>
static void *mca_mpool_udreg_reg_func (void *addr, uint64_t len, void *reg_context);
static uint32_t mca_mpool_udreg_dereg_func (void *device_data, void *dreg_context);
static void mca_mpool_udreg_hugepage_constructor (mca_mpool_udreg_hugepage_t *huge_page)
{
memset ((char *)huge_page + sizeof(huge_page->super), 0, sizeof (*huge_page) - sizeof (huge_page->super));
OBJ_CONSTRUCT(&huge_page->allocations, opal_list_t);
}
static void mca_mpool_udreg_hugepage_destructor (mca_mpool_udreg_hugepage_t *huge_page)
{
opal_list_item_t *item;
if (huge_page->path) {
free (huge_page->path);
}
while (NULL != (item = opal_list_remove_first (&huge_page->allocations))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&huge_page->allocations);
}
OBJ_CLASS_INSTANCE(mca_mpool_udreg_hugepage_t, opal_list_item_t,
mca_mpool_udreg_hugepage_constructor,
mca_mpool_udreg_hugepage_destructor);
static void mca_mpool_udreg_hugepage_alloc_constructor (mca_mpool_udreg_hugepage_alloc_t *alloc)
{
memset ((char *)alloc + sizeof(alloc->super), 0, sizeof (*alloc) - sizeof (alloc->super));
alloc->fd = -1;
}
static void mca_mpool_udreg_hugepage_alloc_destructor (mca_mpool_udreg_hugepage_alloc_t *alloc)
{
if (NULL != alloc->ptr) {
munmap (alloc->ptr, alloc->size);
}
if (NULL == alloc->path) {
return;
}
free (alloc->path);
}
OBJ_CLASS_INSTANCE(mca_mpool_udreg_hugepage_alloc_t, opal_list_item_t,
mca_mpool_udreg_hugepage_alloc_constructor,
mca_mpool_udreg_hugepage_alloc_destructor);
static mca_mpool_udreg_hugepage_t *udreg_find_matching_pagesize (size_t size) {
mca_mpool_udreg_hugepage_t *huge_table;
opal_list_item_t *item;
for (item = opal_list_get_first (&mca_mpool_udreg_component.huge_pages) ;
item != opal_list_get_end (&mca_mpool_udreg_component.huge_pages) ;
item = opal_list_get_next (item)) {
huge_table = (mca_mpool_udreg_hugepage_t *) item;
if (huge_table->page_size == size) {
return huge_table;
}
}
return NULL;
}
/*
* Initializes the mpool module.
*/
int mca_mpool_udreg_module_init(mca_mpool_udreg_module_t* mpool)
{
struct udreg_cache_attr cache_attr;
int urc;
mpool->super.mpool_component = &mca_mpool_udreg_component.super;
mpool->super.mpool_base = NULL; /* no base .. */
mpool->super.mpool_alloc = mca_mpool_udreg_alloc;
mpool->super.mpool_realloc = mca_mpool_udreg_realloc;
mpool->super.mpool_free = mca_mpool_udreg_free;
mpool->super.mpool_register = mca_mpool_udreg_register;
mpool->super.mpool_find = mca_mpool_udreg_find;
mpool->super.mpool_deregister = mca_mpool_udreg_deregister;
/* This module relies on udreg for notification of memory release */
mpool->super.mpool_release_memory = NULL;
mpool->super.mpool_finalize = mca_mpool_udreg_finalize;
mpool->super.mpool_ft_event = mca_mpool_udreg_ft_event;
mpool->super.flags = MCA_MPOOL_FLAGS_MPI_ALLOC_MEM | MCA_MPOOL_FLAGS_NO_HOOKS;
if (4096 < mpool->resources.page_size) {
mpool->huge_page = udreg_find_matching_pagesize (mpool->resources.page_size);
} else {
mpool->huge_page = NULL;
}
cache_attr.modes = 0;
/* Create udreg cache */
if (mpool->resources.use_kernel_cache) {
cache_attr.modes |= UDREG_CC_MODE_USE_KERNEL_CACHE;
}
if (mpool->resources.use_evict_w_unreg) {
cache_attr.modes |= UDREG_CC_MODE_USE_EVICT_W_UNREG;
}
if (mca_mpool_udreg_component.leave_pinned) {
cache_attr.modes |= UDREG_CC_MODE_USE_LAZY_DEREG;
}
OBJ_CONSTRUCT(&mpool->lock,opal_mutex_t);
strncpy (cache_attr.cache_name, mpool->resources.pool_name, UDREG_MAX_CACHENAME_LEN);
cache_attr.max_entries = mpool->resources.max_entries;
cache_attr.debug_mode = 0;
cache_attr.debug_rank = 0;
cache_attr.reg_context = mpool;
cache_attr.dreg_context = mpool;
cache_attr.destructor_context = mpool;
cache_attr.device_reg_func = mca_mpool_udreg_reg_func;
cache_attr.device_dereg_func = mca_mpool_udreg_dereg_func;
cache_attr.destructor_callback = NULL;
/* attempt to create the udreg cache. this will fail if one already exists */
(void) UDREG_CacheCreate (&cache_attr);
urc = UDREG_CacheAccess (mpool->resources.pool_name, (udreg_cache_handle_t *) &mpool->udreg_handle);
if (UDREG_RC_SUCCESS != urc) {
return OPAL_ERROR;
}
OBJ_CONSTRUCT(&mpool->reg_list, opal_free_list_t);
opal_free_list_init (&mpool->reg_list, mpool->resources.sizeof_reg,
opal_cache_line_size,
OBJ_CLASS(mca_mpool_base_registration_t),
0, opal_cache_line_size, 0, -1, 32, NULL, 0,
NULL, NULL, NULL);
return OPAL_SUCCESS;
}
/* udreg callback functions */
static void *mca_mpool_udreg_reg_func (void *addr, uint64_t len, void *reg_context)
{
mca_mpool_udreg_module_t *mpool_udreg = (mca_mpool_udreg_module_t *) reg_context;
mca_mpool_base_registration_t *udreg_reg;
opal_free_list_item_t *item;
int rc;
item = opal_free_list_get (&mpool_udreg->reg_list);
if (NULL == item) {
return NULL;
}
udreg_reg = (mca_mpool_base_registration_t *) item;
udreg_reg->mpool = reg_context;
udreg_reg->base = addr;
udreg_reg->bound = (void *)((uintptr_t) addr + len);
/* pull the access flags out of the mpool module */
udreg_reg->access_flags = mpool_udreg->requested_access_flags;
rc = mpool_udreg->resources.register_mem(mpool_udreg->resources.reg_data,
addr, len, udreg_reg);
if (OPAL_SUCCESS != rc) {
opal_free_list_return (&mpool_udreg->reg_list, item);
udreg_reg = NULL;
}
return udreg_reg;
}
static uint32_t mca_mpool_udreg_dereg_func (void *device_data, void *dreg_context)
{
mca_mpool_udreg_module_t *mpool_udreg = (mca_mpool_udreg_module_t *) dreg_context;
mca_mpool_base_registration_t *udreg_reg = (mca_mpool_base_registration_t *) device_data;
int rc;
if (udreg_reg->ref_count) {
/* there are still users of this registration. leave it alone */
return 0;
}
rc = mpool_udreg->resources.deregister_mem(mpool_udreg->resources.reg_data, udreg_reg);
if (OPAL_LIKELY(OPAL_SUCCESS == rc)) {
opal_free_list_return (&mpool_udreg->reg_list,
(opal_free_list_item_t *) udreg_reg);
}
/* might be worth printing out a warning if an error occurs here */
return 0;
}
/* */
static int mca_mpool_udreg_alloc_huge (mca_mpool_udreg_module_t *mpool, size_t size,
void **addr, void **base_addr) {
mca_mpool_udreg_hugepage_alloc_t *alloc;
int rc;
alloc = OBJ_NEW(mca_mpool_udreg_hugepage_alloc_t);
alloc->size = size;
rc = asprintf (&alloc->path, "%s/hugepage.openmpi.%d.%d", mpool->huge_page->path,
getpid (), mpool->huge_page->cnt++);
if (0 > rc) {
OBJ_RELEASE(alloc);
return -1;
}
alloc->fd = open (alloc->path, O_RDWR | O_CREAT, 0600);
if (-1 == alloc->fd) {
OBJ_RELEASE(alloc);
return -1;
}
if (0 != ftruncate (alloc->fd, size)) {
close (alloc->fd);
unlink (alloc->path);
OBJ_RELEASE(alloc);
return -1;
}
alloc->ptr = mmap (NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED,
alloc->fd, 0);
if (NULL == alloc->ptr) {
OBJ_RELEASE(alloc);
return -1;
}
close (alloc->fd);
unlink (alloc->path);
alloc->huge_table = mpool->huge_page;
opal_list_append (&mpool->huge_page->allocations, &alloc->super);
*addr = alloc->ptr;
*base_addr = alloc;
return 0;
}
static void mca_mpool_udreg_free_huge (mca_mpool_udreg_hugepage_alloc_t *alloc) {
opal_list_remove_item (&alloc->huge_table->allocations, &alloc->super);
OBJ_RELEASE(alloc);
}
/**
* allocate function
*/
void* mca_mpool_udreg_alloc(mca_mpool_base_module_t *mpool, size_t size,
size_t align, uint32_t flags, mca_mpool_base_registration_t **reg)
{
mca_mpool_udreg_module_t *udreg_module = (mca_mpool_udreg_module_t *) mpool;
void *base_addr, *addr;
if(0 == align)
align = mca_mpool_base_page_size;
#if OPAL_CUDA_SUPPORT
/* CUDA cannot handle registering overlapping regions, so make
* sure each region is page sized and page aligned. */
align = mca_mpool_base_page_size;
size = OPAL_ALIGN(size, mca_mpool_base_page_size, size_t);
#endif
addr = base_addr = NULL;
if (NULL != udreg_module->huge_page) {
size = OPAL_ALIGN(size, udreg_module->huge_page->page_size, size_t);
mca_mpool_udreg_alloc_huge (udreg_module, size, &addr, &base_addr);
} else {
#ifdef HAVE_POSIX_MEMALIGN
if((errno = posix_memalign(&base_addr, align, size)) != 0)
return NULL;
addr = base_addr;
#else
base_addr = malloc(size + align);
if(NULL == base_addr)
return NULL;
addr = (void*)OPAL_ALIGN((uintptr_t)base_addr, align, uintptr_t);
#endif
}
if (OPAL_SUCCESS != mca_mpool_udreg_register(mpool, addr, size, flags, MCA_MPOOL_ACCESS_ANY, reg)) {
if (udreg_module->huge_page) {
mca_mpool_udreg_free_huge ((mca_mpool_udreg_hugepage_alloc_t *) base_addr);
} else {
free(base_addr);
}
return NULL;
}
(*reg)->alloc_base = (unsigned char *) base_addr;
return addr;
}
bool mca_mpool_udreg_evict (struct mca_mpool_base_module_t *mpool)
{
mca_mpool_udreg_module_t *mpool_udreg = (mca_mpool_udreg_module_t *) mpool;
udreg_return_t urc;
urc = UDREG_Evict (mpool_udreg->udreg_handle);
return (UDREG_RC_SUCCESS == urc);
}
/*
* register memory
*/
int mca_mpool_udreg_register(mca_mpool_base_module_t *mpool, void *addr,
size_t size, uint32_t flags, int32_t access_flags,
mca_mpool_base_registration_t **reg)
{
mca_mpool_udreg_module_t *mpool_udreg = (mca_mpool_udreg_module_t *) mpool;
mca_mpool_base_registration_t *udreg_reg, *old_reg;
bool bypass_cache = !!(flags & MCA_MPOOL_FLAGS_CACHE_BYPASS);
udreg_entry_t *udreg_entry;
udreg_return_t urc;
*reg = NULL;
OPAL_THREAD_LOCK(&mpool_udreg->lock);
/* we hold the lock so no other thread can modify these flags until the registration is complete */
mpool_udreg->requested_access_flags = access_flags;
if (false == bypass_cache) {
/* Get a udreg entry for this region */
do {
while (UDREG_RC_SUCCESS !=
(urc = UDREG_Register (mpool_udreg->udreg_handle, addr, size, &udreg_entry))) {
/* try to remove one unused reg and retry */
if (!mca_mpool_udreg_evict (mpool)) {
OPAL_THREAD_UNLOCK(&mpool_udreg->lock);
return OPAL_ERR_OUT_OF_RESOURCE;
}
}
udreg_reg = (mca_mpool_base_registration_t *) udreg_entry->device_data;
if ((udreg_reg->access_flags & access_flags) == access_flags) {
/* sufficient access */
break;
}
old_reg = udreg_reg;
/* to not confuse udreg make sure the new registration covers the same address
* range as the old one. */
addr = old_reg->base;
size = (size_t)((intptr_t) old_reg->bound - (intptr_t) old_reg->base);
/* make the new access flags more permissive */
mpool_udreg->requested_access_flags = access_flags | old_reg->access_flags;
/* get a new registration */
udreg_reg = mca_mpool_udreg_reg_func (addr, size, mpool);
if (NULL == udreg_reg) {
OPAL_THREAD_UNLOCK(&mpool_udreg->lock);
return OPAL_ERR_OUT_OF_RESOURCE;
}
/* update the device data with the new registration */
udreg_entry->device_data = udreg_reg;
/* ensure that mca_mpool_udreg_deregister does not call into udreg since
* we are forcefully evicting the registration here */
old_reg->flags |= MCA_MPOOL_FLAGS_CACHE_BYPASS | MCA_MPOOL_FLAGS_INVALID;
mca_mpool_udreg_dereg_func (old_reg, mpool);
} while (0);
udreg_reg->mpool_context = udreg_entry;
} else {
/* if cache bypass is requested don't use the udreg cache */
while (NULL == (udreg_reg = mca_mpool_udreg_reg_func (addr, size, mpool))) {
/* try to remove one unused reg and retry */
if (!mca_mpool_udreg_evict (mpool)) {
OPAL_THREAD_UNLOCK(&mpool_udreg->lock);
return OPAL_ERR_OUT_OF_RESOURCE;
}
}
udreg_reg->mpool_context = NULL;
}
OPAL_THREAD_UNLOCK(&mpool_udreg->lock);
udreg_reg->flags = flags;
*reg = udreg_reg;
udreg_reg->ref_count++;
return OPAL_SUCCESS;
}
/**
* realloc function
*/
void* mca_mpool_udreg_realloc(mca_mpool_base_module_t *mpool, void *addr,
size_t size, mca_mpool_base_registration_t **reg)
{
mca_mpool_base_registration_t *old_reg = *reg;
void *new_mem = mca_mpool_udreg_alloc(mpool, size, 0, old_reg->flags, reg);
memcpy(new_mem, addr, old_reg->bound - old_reg->base + 1);
mca_mpool_udreg_free(mpool, addr, old_reg);
return new_mem;
}
/**
* free function
*/
void mca_mpool_udreg_free(mca_mpool_base_module_t *mpool, void *addr,
mca_mpool_base_registration_t *registration)
{
mca_mpool_udreg_module_t *udreg_module = (mca_mpool_udreg_module_t *) mpool;
mca_mpool_udreg_deregister(mpool, registration);
if (udreg_module->huge_page) {
mca_mpool_udreg_free_huge ((mca_mpool_udreg_hugepage_alloc_t *) registration->alloc_base);
} else {
free (registration->alloc_base);
}
}
int mca_mpool_udreg_find(struct mca_mpool_base_module_t *mpool, void *addr,
size_t size, mca_mpool_base_registration_t **reg)
{
*reg = NULL;
return OPAL_ERR_NOT_FOUND;
}
int mca_mpool_udreg_deregister(struct mca_mpool_base_module_t *mpool,
mca_mpool_base_registration_t *reg)
{
mca_mpool_udreg_module_t *mpool_udreg = (mca_mpool_udreg_module_t *) mpool;
assert(reg->ref_count > 0);
--reg->ref_count;
if (!(reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS)) {
OPAL_THREAD_LOCK(&mpool_udreg->lock);
UDREG_DecrRefcount (mpool_udreg->udreg_handle, reg->mpool_context);
OPAL_THREAD_UNLOCK(&mpool_udreg->lock);
} else {
mca_mpool_udreg_dereg_func (reg, mpool);
}
return OPAL_SUCCESS;
}
void mca_mpool_udreg_finalize(struct mca_mpool_base_module_t *mpool)
{
mca_mpool_udreg_module_t *mpool_udreg = (mca_mpool_udreg_module_t*)mpool;
/* Statistic */
if (true == mca_mpool_udreg_component.print_stats) {
uint64_t hit = 0, miss = 0, evicted = 0;
(void) UDREG_GetStat (mpool_udreg->udreg_handle,
UDREG_STAT_CACHE_HIT, &hit);
(void) UDREG_GetStat (mpool_udreg->udreg_handle,
UDREG_STAT_CACHE_MISS, &miss);
(void) UDREG_GetStat (mpool_udreg->udreg_handle,
UDREG_STAT_CACHE_EVICTED, &evicted);
opal_output(0, "%s udreg: stats (hit/miss/evicted): %" PRIu64 "/%" PRIu64 "/%" PRIu64 "\n",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), hit, miss, evicted);
}
UDREG_CacheRelease (mpool_udreg->udreg_handle);
OBJ_DESTRUCT(&mpool_udreg->reg_list);
OBJ_DESTRUCT(&mpool_udreg->lock);
}
int mca_mpool_udreg_ft_event(int state) {
return OPAL_SUCCESS;
}

Просмотреть файл

@ -1,7 +0,0 @@
#
# owner/status file
# owner: institution that is responsible for this package
# status: e.g. active, maintenance, unmaintained
#
owner: LANL
status: maintenance

Просмотреть файл

@ -9,8 +9,8 @@
# University of Stuttgart. All rights reserved. # University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California. # Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved. # All rights reserved.
# Copyright (c) 2013 Los Alamos National Security, LLC. # Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
# All rights reserved # reserved
# $COPYRIGHT$ # $COPYRIGHT$
# #
# Additional copyrights may follow # Additional copyrights may follow
@ -19,8 +19,15 @@
# #
headers += \ headers += \
base/base.h base/base.h \
base/rcache_base_vma.h \
base/rcache_base_vma_tree.h \
base/rcache_base_mem_cb.h
libmca_rcache_la_SOURCES += \ libmca_rcache_la_SOURCES += \
base/rcache_base_frame.c \ base/rcache_base_frame.c \
base/rcache_base_create.c base/rcache_base_create.c \
base/rcache_base_vma.c \
base/rcache_base_vma_tree.c \
base/rcache_base_mem_cb.c

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
@ -9,8 +10,8 @@
* University of Stuttgart. All rights reserved. * University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2012-2013 Los Alamos National Security, LLC. * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
* All rights reserved * reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -34,7 +35,8 @@ BEGIN_C_DECLS
/* /*
* create a module by name * create a module by name
*/ */
OPAL_DECLSPEC mca_rcache_base_module_t* mca_rcache_base_module_create(const char* name); OPAL_DECLSPEC mca_rcache_base_module_t *mca_rcache_base_module_create (const char *name, void *user_data,
mca_rcache_base_resources_t *rcache_resources);
/* /*
* MCA framework * MCA framework
@ -45,13 +47,18 @@ struct mca_rcache_base_selected_module_t {
opal_list_item_t super; opal_list_item_t super;
mca_rcache_base_component_t *rcache_component; mca_rcache_base_component_t *rcache_component;
mca_rcache_base_module_t *rcache_module; mca_rcache_base_module_t *rcache_module;
void *user_data;
}; };
typedef struct mca_rcache_base_selected_module_t mca_rcache_base_selected_module_t; typedef struct mca_rcache_base_selected_module_t mca_rcache_base_selected_module_t;
OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_rcache_base_selected_module_t); OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_rcache_base_selected_module_t);
OPAL_DECLSPEC mca_rcache_base_component_t* mca_rcache_base_component_lookup(const char* name); OPAL_DECLSPEC mca_rcache_base_component_t *mca_rcache_base_component_lookup(const char *name);
OPAL_DECLSPEC mca_rcache_base_module_t* mca_rcache_base_module_lookup(const char* name); OPAL_DECLSPEC mca_rcache_base_module_t *mca_rcache_base_module_lookup (const char *name);
OPAL_DECLSPEC int mca_rcache_base_module_destroy(mca_rcache_base_module_t *module);
/* only used within base -- no need to DECLSPEC */
extern int mca_rcache_base_used_mem_hooks;
/* /*
* Globals * Globals

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
@ -9,6 +10,8 @@
* University of Stuttgart. All rights reserved. * University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -25,33 +28,85 @@
#include "opal/mca/base/base.h" #include "opal/mca/base/base.h"
#include "opal/mca/rcache/rcache.h" #include "opal/mca/rcache/rcache.h"
#include "opal/mca/rcache/base/base.h" #include "opal/mca/rcache/base/base.h"
#include "opal/mca/rcache/base/rcache_base_mem_cb.h"
#include "opal/util/show_help.h"
#include "opal/util/proc.h"
#include "opal/runtime/opal_params.h"
#include "opal/memoryhooks/memory.h"
mca_rcache_base_module_t* mca_rcache_base_module_create (const char* name, void *user_data,
mca_rcache_base_module_t* mca_rcache_base_module_create(const char* name) struct mca_rcache_base_resources_t* resources)
{ {
mca_base_component_list_item_t* cli;
mca_rcache_base_component_t* component = NULL; mca_rcache_base_component_t* component = NULL;
mca_rcache_base_module_t* module = NULL; mca_rcache_base_module_t* module = NULL;
mca_base_component_list_item_t *cli;
mca_rcache_base_selected_module_t *sm; mca_rcache_base_selected_module_t *sm;
bool found = false;
OPAL_LIST_FOREACH(cli, &opal_rcache_base_framework.framework_components, mca_base_component_list_item_t) { OPAL_LIST_FOREACH(cli, &opal_rcache_base_framework.framework_components, mca_base_component_list_item_t) {
component = (mca_rcache_base_component_t *) cli->cli_component; component = (mca_rcache_base_component_t *) cli->cli_component;
if(0 == strcmp(component->rcache_version.mca_component_name, name)) { if(0 == strcmp(component->rcache_version.mca_component_name, name)) {
found = true; module = component->rcache_init (resources);
break; break;
} }
} }
if (!found) { if ( NULL == module ) {
return NULL; return NULL;
} }
module = component->rcache_init();
sm = OBJ_NEW(mca_rcache_base_selected_module_t); sm = OBJ_NEW(mca_rcache_base_selected_module_t);
sm->rcache_component = component; sm->rcache_component = component;
sm->rcache_module = module; sm->rcache_module = module;
sm->user_data = user_data;
opal_list_append(&mca_rcache_base_modules, (opal_list_item_t*) sm); opal_list_append(&mca_rcache_base_modules, (opal_list_item_t*) sm);
/* on the very first creation of a module we init the memory
callback */
if (!mca_rcache_base_used_mem_hooks) {
/* Use the memory hooks if leave_pinned or
* leave_pinned_pipeline is enabled (note that either of these
* leave_pinned variables may have been set by a user MCA
* param or elsewhere in the code base). Yes, we could havexc
* coded this more succinctly, but this is more clear. Do not
* check memory hooks if the rcache does not provide an
* range invalidation function.. */
if ((opal_leave_pinned > 0 || opal_leave_pinned_pipeline) &&
module->rcache_invalidate_range) {
if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) ==
((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) &
opal_mem_hooks_support_level())) {
opal_mem_hooks_register_release(mca_rcache_base_mem_cb, NULL);
} else {
opal_show_help("help-rcache-base.txt", "leave pinned failed",
true, name, OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
opal_proc_local_get()->proc_hostname);
return NULL;
}
/* Set this to true so that rcache_base_close knows to
cleanup */
mca_rcache_base_used_mem_hooks = 1;
}
}
return module; return module;
} }
int mca_rcache_base_module_destroy(mca_rcache_base_module_t *module)
{
mca_rcache_base_selected_module_t *sm, *next;
OPAL_LIST_FOREACH_SAFE(sm, next, &mca_rcache_base_modules, mca_rcache_base_selected_module_t) {
if (module == sm->rcache_module) {
opal_list_remove_item(&mca_rcache_base_modules, (opal_list_item_t*)sm);
if (NULL != sm->rcache_module->rcache_finalize) {
sm->rcache_module->rcache_finalize(sm->rcache_module);
}
OBJ_RELEASE(sm);
return OPAL_SUCCESS;
}
}
return OPAL_ERR_NOT_FOUND;
}

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
@ -28,7 +29,9 @@
#include "opal/mca/base/base.h" #include "opal/mca/base/base.h"
#include "opal/mca/rcache/rcache.h" #include "opal/mca/rcache/rcache.h"
#include "opal/mca/rcache/base/base.h" #include "opal/mca/rcache/base/base.h"
#include "opal/memoryhooks/memory.h"
#include "opal/constants.h" #include "opal/constants.h"
#include "rcache_base_mem_cb.h"
/* /*
* The following file was created by configure. It contains extern * The following file was created by configure. It contains extern
@ -38,6 +41,24 @@
#include "opal/mca/rcache/base/static-components.h" #include "opal/mca/rcache/base/static-components.h"
int mca_rcache_base_used_mem_hooks;
/**
* Memory Pool Registration
*/
static void mca_rcache_base_registration_constructor( mca_rcache_base_registration_t * reg )
{
reg->rcache = NULL;
reg->base = NULL;
reg->bound = NULL;
reg->ref_count = 0;
reg->flags = 0;
}
OBJ_CLASS_INSTANCE(mca_rcache_base_registration_t, opal_free_list_item_t,
mca_rcache_base_registration_constructor, NULL);
/* /*
* Global variables * Global variables
@ -54,9 +75,7 @@ static int mca_rcache_base_close(void)
/* Finalize all the rcache components and free their list items */ /* Finalize all the rcache components and free their list items */
for (item = opal_list_remove_first(&mca_rcache_base_modules); while (NULL != (item = opal_list_remove_first(&mca_rcache_base_modules))) {
NULL != item;
item = opal_list_remove_first(&mca_rcache_base_modules)) {
sm = (mca_rcache_base_selected_module_t *) item; sm = (mca_rcache_base_selected_module_t *) item;
/* Blatently ignore the return code (what would we do to recover, /* Blatently ignore the return code (what would we do to recover,
@ -70,6 +89,12 @@ static int mca_rcache_base_close(void)
OBJ_RELEASE(sm); OBJ_RELEASE(sm);
} }
/* deregister memory free callback */
if (mca_rcache_base_used_mem_hooks) {
opal_mem_hooks_unregister_release(mca_rcache_base_mem_cb);
}
/* All done */
/* Close all remaining available components */ /* Close all remaining available components */
return mca_base_framework_components_close(&opal_rcache_base_framework, NULL); return mca_base_framework_components_close(&opal_rcache_base_framework, NULL);
} }
@ -89,7 +114,7 @@ static int mca_rcache_base_open(mca_base_open_flag_t flags)
return mca_base_framework_components_open(&opal_rcache_base_framework, flags); return mca_base_framework_components_open(&opal_rcache_base_framework, flags);
} }
MCA_BASE_FRAMEWORK_DECLARE(opal, rcache, "OPAL Rcache", NULL, MCA_BASE_FRAMEWORK_DECLARE(opal, rcache, "OPAL Registration Cache", NULL,
mca_rcache_base_open, mca_rcache_base_close, mca_rcache_base_open, mca_rcache_base_close,
mca_rcache_base_static_components, 0); mca_rcache_base_static_components, 0);

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
@ -11,7 +11,7 @@
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012 Los Alamos National Security, LLC. * Copyright (c) 2012-2015 Los Alamos National Security, LLC.
* All rights reserved. * All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
@ -32,8 +32,8 @@
#include "opal/util/proc.h" #include "opal/util/proc.h"
#include "opal/runtime/opal_params.h" #include "opal/runtime/opal_params.h"
#include "opal/mca/mpool/base/mpool_base_mem_cb.h" #include "opal/mca/rcache/base/rcache_base_mem_cb.h"
#include "opal/mca/mpool/base/base.h" #include "opal/mca/rcache/base/base.h"
#include "opal/mca/mca.h" #include "opal/mca/mca.h"
#include "opal/memoryhooks/memory.h" #include "opal/memoryhooks/memory.h"
@ -47,43 +47,34 @@ static char msg[512];
* from_alloc==true, then you cannot call malloc (or any of its * from_alloc==true, then you cannot call malloc (or any of its
* friends)! * friends)!
*/ */
void mca_mpool_base_mem_cb(void* base, size_t size, void* cbdata, void mca_rcache_base_mem_cb (void* base, size_t size, void* cbdata, bool from_alloc)
bool from_alloc)
{ {
mca_mpool_base_selected_module_t* current; mca_rcache_base_selected_module_t* current;
int rc; int rc;
opal_list_item_t* item;
/* Only do anything meaningful if the OPAL layer is up and running /* Only do anything meaningful if the OPAL layer is up and running
and size != 0 */ and size != 0 */
if ((from_alloc && (!opal_initialized)) || if ((from_alloc && (!opal_initialized)) || size == 0) {
size == 0) {
return; return;
} }
for(item = opal_list_get_first(&mca_mpool_base_modules); OPAL_LIST_FOREACH(current, &mca_rcache_base_modules, mca_rcache_base_selected_module_t) {
item != opal_list_get_end(&mca_mpool_base_modules); if (current->rcache_module->rcache_invalidate_range != NULL) {
item = opal_list_get_next(item)) { rc = current->rcache_module->rcache_invalidate_range (current->rcache_module,
current = (mca_mpool_base_selected_module_t*) item;
if(current->mpool_module->mpool_release_memory != NULL) {
rc = current->mpool_module->mpool_release_memory(current->mpool_module,
base, size); base, size);
if (rc != OPAL_SUCCESS) { if (rc != OPAL_SUCCESS) {
if (from_alloc) { if (from_alloc) {
int len; int len;
len = snprintf(msg, sizeof(msg), "[%s:%d] Attempt to free memory that is still in use by an ongoing MPI communication (buffer %p, size %lu). MPI job will now abort.\n", len = snprintf(msg, sizeof(msg), "[%s:%d] Attempt to free memory that is still in "
opal_proc_local_get()->proc_hostname, "use by an ongoing MPI communication (buffer %p, size %lu). MPI job "
getpid(), "will now abort.\n", opal_proc_local_get()->proc_hostname,
base, (unsigned long) size); getpid(), base, (unsigned long) size);
msg[sizeof(msg) - 1] = '\0'; msg[sizeof(msg) - 1] = '\0';
write(2, msg, len); write(2, msg, len);
} else { } else {
opal_show_help("help-mpool-base.txt", opal_show_help("help-rcache-base.txt",
"cannot deregister in-use memory", true, "cannot deregister in-use memory", true,
current->mpool_component->mpool_version.mca_component_name, current->rcache_component->rcache_version.mca_component_name,
opal_proc_local_get()->proc_hostname, opal_proc_local_get()->proc_hostname,
base, (unsigned long) size); base, (unsigned long) size);
} }

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
@ -9,6 +10,8 @@
* University of Stuttgart. All rights reserved. * University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -18,8 +21,8 @@
/** /**
* @file * @file
*/ */
#ifndef MCA_MPOOL_BASE_MEM_CB_H #ifndef MCA_RCACHE_BASE_MEM_CB_H
#define MCA_MPOOL_BASE_MEM_CB_H #define MCA_RCACHE_BASE_MEM_CB_H
#include "opal_config.h" #include "opal_config.h"
@ -28,12 +31,8 @@ BEGIN_C_DECLS
/* /*
* memory hook callback, called when memory is free'd out from under us * memory hook callback, called when memory is free'd out from under us
*/ */
void mca_mpool_base_mem_cb(void* base, size_t size, void* cbdata, void mca_rcache_base_mem_cb (void* base, size_t size, void* cbdata, bool from_alloc);
bool from_alloc);
END_C_DECLS END_C_DECLS
#endif /* MCA_MPOOL_BASE_MEM_CB_H */ #endif /* MCA_RCACHE_BASE_MEM_CB_H */

151
opal/mca/rcache/base/rcache_base_vma.c Обычный файл
Просмотреть файл

@ -0,0 +1,151 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2007 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Voltaire. All rights reserved.
* Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2009 IBM Corporation. All rights reserved.
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include MCA_memory_IMPLEMENTATION_HEADER
#include "opal/mca/memory/memory.h"
#include "opal/mca/rcache/rcache.h"
#include "rcache_base_vma.h"
#include "rcache_base_vma_tree.h"
/**
* Initialize the rcache
*/
static void mca_rcache_base_vma_module_construct (mca_rcache_base_vma_module_t *vma_module) {
OBJ_CONSTRUCT(&vma_module->vma_lock, opal_recursive_mutex_t);
(void) mca_rcache_base_vma_tree_init (vma_module);
}
static void mca_rcache_base_vma_module_destruct (mca_rcache_base_vma_module_t *vma_module) {
OBJ_DESTRUCT(&vma_module->vma_lock);
mca_rcache_base_vma_tree_finalize (vma_module);
}
OBJ_CLASS_INSTANCE(mca_rcache_base_vma_module_t, opal_object_t,
mca_rcache_base_vma_module_construct,
mca_rcache_base_vma_module_destruct);
mca_rcache_base_vma_module_t *mca_rcache_base_vma_module_alloc (void)
{
return OBJ_NEW(mca_rcache_base_vma_module_t);
}
int mca_rcache_base_vma_find (mca_rcache_base_vma_module_t *vma_module, void *addr,
size_t size, mca_rcache_base_registration_t **reg)
{
int rc;
unsigned char *bound_addr;
if (size == 0) {
return OPAL_ERROR;
}
bound_addr = (unsigned char *) ((intptr_t) addr + size - 1);
/* Check to ensure that the cache is valid */
if (OPAL_UNLIKELY(opal_memory_changed() &&
NULL != opal_memory->memoryc_process &&
OPAL_SUCCESS != (rc = opal_memory->memoryc_process()))) {
return rc;
}
*reg = mca_rcache_base_vma_tree_find (vma_module, (unsigned char *) addr, bound_addr);
return OPAL_SUCCESS;
}
int mca_rcache_base_vma_find_all (mca_rcache_base_vma_module_t *vma_module, void *addr,
size_t size, mca_rcache_base_registration_t **regs,
int reg_cnt)
{
int rc;
unsigned char *bound_addr;
if(size == 0) {
return OPAL_ERROR;
}
bound_addr = (unsigned char *) ((intptr_t) addr + size - 1);
/* Check to ensure that the cache is valid */
if (OPAL_UNLIKELY(opal_memory_changed() &&
NULL != opal_memory->memoryc_process &&
OPAL_SUCCESS != (rc = opal_memory->memoryc_process()))) {
return rc;
}
return mca_rcache_base_vma_tree_find_all (vma_module, (unsigned char *) addr,
bound_addr, regs, reg_cnt);
}
int mca_rcache_base_vma_insert (mca_rcache_base_vma_module_t *vma_module,
mca_rcache_base_registration_t *reg, size_t limit)
{
size_t reg_size = reg->bound - reg->base + 1;
int rc;
if (limit != 0 && reg_size > limit) {
/* return out of resources if request is bigger than cache size
* return temp out of resources otherwise */
return OPAL_ERR_OUT_OF_RESOURCE;
}
/* Check to ensure that the cache is valid */
if (OPAL_UNLIKELY(opal_memory_changed() &&
NULL != opal_memory->memoryc_process &&
OPAL_SUCCESS != (rc = opal_memory->memoryc_process()))) {
return rc;
}
rc = mca_rcache_base_vma_tree_insert (vma_module, reg, limit);
if (OPAL_LIKELY(OPAL_SUCCESS == rc)) {
/* If we successfully registered, then tell the memory manager
to start monitoring this region */
opal_memory->memoryc_register (reg->base, (uint64_t) reg_size,
(uint64_t) (uintptr_t) reg);
}
return rc;
}
int mca_rcache_base_vma_delete (mca_rcache_base_vma_module_t *vma_module,
mca_rcache_base_registration_t *reg)
{
/* Tell the memory manager that we no longer care about this
region */
opal_memory->memoryc_deregister (reg->base,
(uint64_t) (reg->bound - reg->base),
(uint64_t) (uintptr_t) reg);
return mca_rcache_base_vma_tree_delete (vma_module, reg);
}
void mca_rcache_base_vma_dump_range (mca_rcache_base_vma_module_t *vma_module,
unsigned char *base, size_t size, char *msg)
{
mca_rcache_base_vma_tree_dump_range (vma_module, base, size, msg);
}

74
opal/mca/rcache/base/rcache_base_vma.h Обычный файл
Просмотреть файл

@ -0,0 +1,74 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2007 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
*
* Copyright (c) 2006 Voltaire. All rights reserved.
* Copyright (c) 2009 IBM Corporation. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
* Registration cache VMA lookup
*/
#ifndef MCA_RCACHE_BASE_VMA_H
#define MCA_RCACHE_BASE_VMA_H
#include "opal_config.h"
#include "opal/class/opal_list.h"
#include "opal/class/opal_rb_tree.h"
BEGIN_C_DECLS
struct mca_rcache_base_registration_t;
struct mca_rcache_base_vma_module_t {
opal_object_t super;
opal_rb_tree_t rb_tree;
opal_list_t vma_list;
size_t reg_cur_cache_size;
opal_mutex_t vma_lock;
};
typedef struct mca_rcache_base_vma_module_t mca_rcache_base_vma_module_t;
OBJ_CLASS_DECLARATION(mca_rcache_base_vma_module_t);
mca_rcache_base_vma_module_t *mca_rcache_base_vma_module_alloc (void);
int mca_rcache_base_vma_find (mca_rcache_base_vma_module_t *vma_module, void *addr,
size_t size, struct mca_rcache_base_registration_t **reg);
int mca_rcache_base_vma_find_all (mca_rcache_base_vma_module_t *vma_module, void *addr,
size_t size, struct mca_rcache_base_registration_t **regs,
int reg_cnt);
int mca_rcache_base_vma_insert (mca_rcache_base_vma_module_t *vma_module,
struct mca_rcache_base_registration_t *registration,
size_t limit);
int mca_rcache_base_vma_delete (mca_rcache_base_vma_module_t *vma_module,
struct mca_rcache_base_registration_t *registration);
void mca_rcache_base_vma_dump_range (mca_rcache_base_vma_module_t *vma_module,
unsigned char *base, size_t size, char *msg);
END_C_DECLS
#endif /* MCA_RCACHE_BASE_VMA_H */

565
opal/mca/rcache/base/rcache_base_vma_tree.c Обычный файл
Просмотреть файл

@ -0,0 +1,565 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2013 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
*
* Copyright (c) 2006 Voltaire. All rights reserved.
* Copyright (c) 2007 Mellanox Technologies. All rights reserved.
* Copyright (c) 2009 IBM Corporation. All rights reserved.
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal/util/output.h"
#include "rcache_base_vma_tree.h"
OBJ_CLASS_INSTANCE(mca_rcache_base_vma_reg_list_item_t, opal_list_item_t, NULL, NULL);
static void mca_rcache_base_vma_item_construct (mca_rcache_base_vma_item_t *vma_item)
{
OBJ_CONSTRUCT(&vma_item->reg_list, opal_list_t);
}
static void mca_rcache_base_vma_item_destruct (mca_rcache_base_vma_item_t *vma_item)
{
OPAL_LIST_DESTRUCT(&vma_item->reg_list);
}
OBJ_CLASS_INSTANCE(mca_rcache_base_vma_item_t, opal_list_item_t,
mca_rcache_base_vma_item_construct,
mca_rcache_base_vma_item_destruct);
/**
* Function for the red black tree to compare 2 keys
*
* @param key1 a pointer to the 1st key
* @param key2 a pointer to the second key
*
* @retval -1 if key1 is below key2
* @retval 1 if key 1 is above key2
* @retval 0 if the keys are the same
*/
static int mca_rcache_base_vma_tree_node_compare(void *key1, void *key2)
{
mca_rcache_base_vma_item_t *vma1 = (mca_rcache_base_vma_item_t *) key1,
*vma2 = (mca_rcache_base_vma_item_t *) key2;
if (vma1->start < vma2->start) {
return -1;
}
if (vma1->start > vma2->start) {
return 1;
}
return 0;
}
static int mca_rcache_base_vma_tree_node_compare_search(void *key1, void *key2)
{
mca_rcache_base_vma_item_t *vma = (mca_rcache_base_vma_item_t *) key2;
uintptr_t addr = (uintptr_t) key1;
if (vma->end < addr) {
return 1;
}
if (vma->start <= addr) {
return 0;
}
return -1;
}
static int mca_rcache_base_vma_tree_node_compare_closest(void *key1, void *key2)
{
mca_rcache_base_vma_item_t *vma = (mca_rcache_base_vma_item_t *) key2, *prev_vma;
uintptr_t addr = (uintptr_t) key1;
if (vma->end < addr) {
return 1;
}
if (vma->start <= addr) {
return 0;
}
prev_vma = (mca_rcache_base_vma_item_t *) opal_list_get_prev (&vma->super);
if (prev_vma == (mca_rcache_base_vma_item_t *) opal_list_get_end (&vma->vma_module->vma_list)
|| prev_vma->end < addr) {
return 0;
}
return -1;
}
static inline
mca_rcache_base_vma_item_t *mca_rcache_base_vma_new (mca_rcache_base_vma_module_t *vma_module,
uintptr_t start, uintptr_t end)
{
mca_rcache_base_vma_item_t *vma_item = OBJ_NEW(mca_rcache_base_vma_item_t);
if (NULL == vma_item) {
return NULL;
}
vma_item->start = start;
vma_item->end = end;
vma_item->vma_module = vma_module;
(void) opal_rb_tree_insert (&vma_module->rb_tree, vma_item, vma_item);
return vma_item;
}
static inline int mca_rcache_base_vma_compare_regs (mca_rcache_base_registration_t *reg1,
mca_rcache_base_registration_t *reg2)
{
/* persisten registration are on top */
if ((reg1->flags & MCA_RCACHE_FLAGS_PERSIST) &&
!(reg2->flags & MCA_RCACHE_FLAGS_PERSIST)) {
return 1;
}
if (!(reg1->flags & MCA_RCACHE_FLAGS_PERSIST) &&
(reg2->flags & MCA_RCACHE_FLAGS_PERSIST)) {
return -1;
}
if (reg1->bound != reg2->bound) {
return (int)(reg1->bound - reg2->bound);
}
/* tie breaker */
return (int)((intptr_t)reg1 - (intptr_t)reg2);
}
static inline int mca_rcache_base_vma_add_reg (mca_rcache_base_vma_item_t *vma_item,
struct mca_rcache_base_registration_t *reg)
{
mca_rcache_base_vma_reg_list_item_t *item, *entry;
entry = OBJ_NEW(mca_rcache_base_vma_reg_list_item_t);
if (!entry) {
return -1;
}
entry->reg = reg;
OPAL_LIST_FOREACH(item, &vma_item->reg_list, mca_rcache_base_vma_reg_list_item_t) {
if (mca_rcache_base_vma_compare_regs(item->reg, reg) > 0) {
continue;
}
opal_list_insert_pos (&vma_item->reg_list, &item->super, &entry->super);
return 0;
}
opal_list_append (&vma_item->reg_list, &entry->super);
return 0;
}
static inline void mca_rcache_base_vma_remove_reg (mca_rcache_base_vma_item_t *vma_item,
struct mca_rcache_base_registration_t *reg)
{
mca_rcache_base_vma_reg_list_item_t *item;
OPAL_LIST_FOREACH(item, &vma_item->reg_list, mca_rcache_base_vma_reg_list_item_t) {
if(item->reg == reg) {
opal_list_remove_item(&vma_item->reg_list, &item->super);
OBJ_RELEASE(item);
break;
}
}
}
static inline int mca_rcache_base_vma_copy_reg_list (mca_rcache_base_vma_item_t *to,
mca_rcache_base_vma_item_t *from)
{
mca_rcache_base_vma_reg_list_item_t *item_f, *item_t;
OPAL_LIST_FOREACH(item_f, &from->reg_list, mca_rcache_base_vma_reg_list_item_t) {
item_t = OBJ_NEW(mca_rcache_base_vma_reg_list_item_t);
if (NULL == item_t) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
item_t->reg = item_f->reg;
opal_list_append (&to->reg_list, &item_t->super);
}
return OPAL_SUCCESS;
}
/* returns 1 iff two lists contain the same entries */
static inline int mca_rcache_base_vma_compare_reg_lists (mca_rcache_base_vma_item_t *vma1,
mca_rcache_base_vma_item_t *vma2)
{
mca_rcache_base_vma_reg_list_item_t *i1, *i2;
if (!vma1 || !vma2 || opal_list_get_size (&vma1->reg_list) != opal_list_get_size (&vma2->reg_list)) {
return 0;
}
i2 = (mca_rcache_base_vma_reg_list_item_t *) opal_list_get_first(&vma2->reg_list);
OPAL_LIST_FOREACH(i1, &vma1->reg_list, mca_rcache_base_vma_reg_list_item_t) {
if ((void *) i2 == (void *) opal_list_get_end (&vma2->reg_list) || i1->reg != i2->reg) {
return 0;
}
i2 = (mca_rcache_base_vma_reg_list_item_t *) opal_list_get_next (&i2->super);
}
return 1;
}
int mca_rcache_base_vma_tree_init (mca_rcache_base_vma_module_t *vma_module)
{
OBJ_CONSTRUCT(&vma_module->rb_tree, opal_rb_tree_t);
OBJ_CONSTRUCT(&vma_module->vma_list, opal_list_t);
vma_module->reg_cur_cache_size = 0;
return opal_rb_tree_init (&vma_module->rb_tree, mca_rcache_base_vma_tree_node_compare);
}
void mca_rcache_base_vma_tree_finalize (mca_rcache_base_vma_module_t *vma_module)
{
opal_rb_tree_init(&vma_module->rb_tree, mca_rcache_base_vma_tree_node_compare);
OBJ_DESTRUCT(&vma_module->vma_list);
OBJ_DESTRUCT(&vma_module->rb_tree);
}
mca_rcache_base_registration_t *mca_rcache_base_vma_tree_find (mca_rcache_base_vma_module_t *vma_module,
unsigned char *base, unsigned char *bound)
{
mca_rcache_base_vma_item_t *vma;
mca_rcache_base_vma_reg_list_item_t *item;
vma = (mca_rcache_base_vma_item_t *) opal_rb_tree_find_with (&vma_module->rb_tree, base,
mca_rcache_base_vma_tree_node_compare_search);
if (!vma) {
return NULL;
}
OPAL_LIST_FOREACH(item, &vma->reg_list, mca_rcache_base_vma_reg_list_item_t) {
if(item->reg->flags & MCA_RCACHE_FLAGS_INVALID) {
continue;
}
if(item->reg->bound >= bound)
return item->reg;
if(!(item->reg->flags & MCA_RCACHE_FLAGS_PERSIST))
break;
}
return NULL;
}
static inline bool is_reg_in_array (mca_rcache_base_registration_t **regs,
int cnt, mca_rcache_base_registration_t *p)
{
for (int i = 0 ; i < cnt ; ++i) {
if (regs[i] == p) {
return true;
}
}
return false;
}
int mca_rcache_base_vma_tree_find_all (mca_rcache_base_vma_module_t *vma_module, unsigned char *base,
unsigned char *bound, mca_rcache_base_registration_t **regs,
int reg_cnt)
{
int cnt = 0;
if(opal_list_get_size(&vma_module->vma_list) == 0)
return cnt;
do {
mca_rcache_base_vma_item_t *vma;
mca_rcache_base_vma_reg_list_item_t *vma_item;
vma = (mca_rcache_base_vma_item_t *) opal_rb_tree_find_with (&vma_module->rb_tree, base,
mca_rcache_base_vma_tree_node_compare_closest);
if (NULL == vma) {
/* base is bigger than any registered memory */
break;
}
if (base < (unsigned char *) vma->start) {
base = (unsigned char *) vma->start;
continue;
}
OPAL_LIST_FOREACH(vma_item, &vma->reg_list, mca_rcache_base_vma_reg_list_item_t) {
if ((vma_item->reg->flags & MCA_RCACHE_FLAGS_INVALID) ||
is_reg_in_array (regs, cnt, vma_item->reg)) {
continue;
}
regs[cnt++] = vma_item->reg;
if (cnt == reg_cnt) {
return cnt; /* no space left in the provided array */
}
}
base = (unsigned char *)vma->end + 1;
} while(bound >= base);
return cnt;
}
static inline int mca_rcache_base_vma_can_insert (mca_rcache_base_vma_module_t *vma_module, size_t nbytes, size_t limit)
{
return (0 == limit || vma_module->reg_cur_cache_size + nbytes <= limit);
}
static inline void mca_rcache_base_vma_update_byte_count (mca_rcache_base_vma_module_t *vma_module,
size_t nbytes)
{
vma_module->reg_cur_cache_size += nbytes;
}
int mca_rcache_base_vma_tree_insert (mca_rcache_base_vma_module_t *vma_module,
mca_rcache_base_registration_t *reg, size_t limit)
{
mca_rcache_base_vma_item_t *i;
uintptr_t begin = (uintptr_t)reg->base, end = (uintptr_t)reg->bound;
i = (mca_rcache_base_vma_item_t *) opal_rb_tree_find_with (&vma_module->rb_tree,
(void *) begin, mca_rcache_base_vma_tree_node_compare_closest);
if (!i) {
i = (mca_rcache_base_vma_item_t *) opal_list_get_end (&vma_module->vma_list);
}
while (begin <= end) {
mca_rcache_base_vma_item_t *vma = NULL;
if (opal_list_get_end (&vma_module->vma_list) == &i->super) {
if (mca_rcache_base_vma_can_insert (vma_module, end - begin + 1, limit)) {
vma = mca_rcache_base_vma_new(vma_module, begin, end);
}
if (!vma) {
goto remove;
}
mca_rcache_base_vma_update_byte_count (vma_module, end - begin + 1);
opal_list_append(&vma_module->vma_list, &vma->super);
begin = vma->end + 1;
mca_rcache_base_vma_add_reg (vma, reg);
return OPAL_SUCCESS;
}
if (i->start > begin) {
uintptr_t tend = (i->start <= end) ? (i->start - 1) : end;
if (mca_rcache_base_vma_can_insert(vma_module, tend - begin + 1, limit)) {
vma = mca_rcache_base_vma_new(vma_module, begin, tend);
}
if (!vma) {
goto remove;
}
mca_rcache_base_vma_update_byte_count (vma_module, tend - begin + 1);
/* insert before */
opal_list_insert_pos(&vma_module->vma_list, &i->super, &vma->super);
i = vma;
begin = vma->end + 1;
mca_rcache_base_vma_add_reg (vma, reg);
} else if(i->start == begin) {
if (i->end > end) {
vma = mca_rcache_base_vma_new (vma_module, end + 1, i->end);
if (!vma) {
goto remove;
}
i->end = end;
mca_rcache_base_vma_copy_reg_list (vma, i);
/* add after */
opal_list_insert_pos (&vma_module->vma_list,
opal_list_get_next (&i->super),
&vma->super);
mca_rcache_base_vma_add_reg (i, reg);
begin = end + 1;
} else {
mca_rcache_base_vma_add_reg(i, reg);
begin = i->end + 1;
}
} else {
vma = mca_rcache_base_vma_new (vma_module, begin, i->end);
if (!vma) {
goto remove;
}
i->end = begin - 1;
mca_rcache_base_vma_copy_reg_list (vma, i);
/* add after */
opal_list_insert_pos (&vma_module->vma_list,
opal_list_get_next (&i->super),
&vma->super);
}
i = (mca_rcache_base_vma_item_t *) opal_list_get_next (&i->super);
}
return OPAL_SUCCESS;
remove:
mca_rcache_base_vma_tree_delete (vma_module, reg);
return OPAL_ERR_TEMP_OUT_OF_RESOURCE;
}
/**
* Function to remove previously memory from the tree without freeing it
*
* @param base pointer to the memory to free
*
* @retval OPAL_SUCCESS
* @retval OPAL_ERR_BAD_PARAM if the passed base pointer was invalid
*/
int mca_rcache_base_vma_tree_delete (mca_rcache_base_vma_module_t *vma_module,
mca_rcache_base_registration_t *reg)
{
mca_rcache_base_vma_item_t *vma;
vma = (mca_rcache_base_vma_item_t *)
opal_rb_tree_find_with (&vma_module->rb_tree, reg->base,
mca_rcache_base_vma_tree_node_compare_search);
if (!vma) {
return OPAL_ERROR;
}
while (vma != (mca_rcache_base_vma_item_t *) opal_list_get_end (&vma_module->vma_list)
&& vma->start <= (uintptr_t) reg->bound) {
mca_rcache_base_vma_remove_reg(vma, reg);
if(opal_list_is_empty(&vma->reg_list)) {
mca_rcache_base_vma_item_t *next =
(mca_rcache_base_vma_item_t *) opal_list_get_next (&vma->super);
opal_rb_tree_delete (&vma_module->rb_tree, vma);
mca_rcache_base_vma_update_byte_count (vma_module,
vma->start - vma->end - 1);
opal_list_remove_item (&vma_module->vma_list, &vma->super);
OBJ_RELEASE(vma);
vma = next;
} else {
int merged;
do {
mca_rcache_base_vma_item_t *prev = NULL, *next = NULL;
if (opal_list_get_first (&vma_module->vma_list) != &vma->super) {
prev = (mca_rcache_base_vma_item_t *) opal_list_get_prev(vma);
}
merged = 0;
if (prev && vma->start == prev->end + 1 &&
mca_rcache_base_vma_compare_reg_lists(vma, prev)) {
prev->end = vma->end;
opal_list_remove_item(&vma_module->vma_list, &vma->super);
opal_rb_tree_delete(&vma_module->rb_tree, vma);
OBJ_RELEASE(vma);
vma = prev;
merged = 1;
}
if (opal_list_get_last (&vma_module->vma_list) != &vma->super) {
next = (mca_rcache_base_vma_item_t *) opal_list_get_next (vma);
}
if (next && vma->end + 1 == next->start &&
mca_rcache_base_vma_compare_reg_lists (vma, next)) {
vma->end = next->end;
opal_list_remove_item(&vma_module->vma_list, &next->super);
opal_rb_tree_delete(&vma_module->rb_tree, next);
OBJ_RELEASE(next);
merged = 1;
}
} while (merged);
vma = (mca_rcache_base_vma_item_t *) opal_list_get_next (vma);
}
}
return 0;
}
/* Dump out rcache entries within a range of memory. Useful for debugging. */
void mca_rcache_base_vma_tree_dump_range (mca_rcache_base_vma_module_t *vma_module,
unsigned char *base, size_t size, char *msg)
{
unsigned char * bound = base + size -1;
mca_rcache_base_registration_t *reg;
if (NULL == msg) {
msg = "";
}
opal_output(0, "Dumping rcache entries: %s", msg);
if(opal_list_is_empty(&vma_module->vma_list)) {
opal_output(0, " rcache is empty");
return;
}
do {
mca_rcache_base_vma_item_t *vma;
mca_rcache_base_vma_reg_list_item_t *vma_item;
vma = (mca_rcache_base_vma_item_t *)
opal_rb_tree_find_with (&vma_module->rb_tree, base,
mca_rcache_base_vma_tree_node_compare_closest);
if (NULL == vma) {
/* base is bigger than any registered memory */
break;
}
if (base < (unsigned char *) vma->start) {
base = (unsigned char *) vma->start;
continue;
}
opal_output(0, " vma: base=%p, bound=%p, size=%lu, number of registrations=%d",
(void *)vma->start, (void *)vma->end, vma->end - vma->start + 1,
(int) opal_list_get_size(&vma->reg_list));
OPAL_LIST_FOREACH(vma_item, &vma->reg_list, mca_rcache_base_vma_reg_list_item_t) {
reg = vma_item->reg;
opal_output(0, " reg: base=%p, bound=%p, ref_count=%d, flags=0x%x",
reg->base, reg->bound, reg->ref_count, reg->flags);
}
base = (unsigned char *)vma->end + 1;
} while (bound >= base);
}

109
opal/mca/rcache/base/rcache_base_vma_tree.h Обычный файл
Просмотреть файл

@ -0,0 +1,109 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2007 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
*
* Copyright (c) 2006 Voltaire. All rights reserved.
* Copyright (c) 2009 IBM Corporation. All rights reserved.
*
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
* Registation cache VMA tree implementation
*/
#ifndef MCA_RCACHE_BASE_VMA_TREE_H
#define MCA_RCACHE_BASE_VMA_TREE_H
#include "opal_config.h"
#include "opal/mca/rcache/rcache.h"
#include "rcache_base_vma.h"
/*
* Data structures for the tree of allocated memory
*/
struct mca_rcache_base_vma_reg_list_item_t
{
opal_list_item_t super;
mca_rcache_base_registration_t *reg;
};
typedef struct mca_rcache_base_vma_reg_list_item_t mca_rcache_base_vma_reg_list_item_t;
OBJ_CLASS_DECLARATION(mca_rcache_base_vma_reg_list_item_t);
/**
* The item in the vma_tree itself
*/
struct mca_rcache_base_vma_item_t
{
opal_list_item_t super; /**< the parent class */
uintptr_t start; /**< the base of the memory range */
uintptr_t end; /**< the bound of the memory range */
opal_list_t reg_list; /**< list of regs on this vma */
mca_rcache_base_vma_module_t *vma_module; /**< pointer to rcache vma belongs to */
};
typedef struct mca_rcache_base_vma_item_t mca_rcache_base_vma_item_t;
OBJ_CLASS_DECLARATION(mca_rcache_base_vma_item_t);
/*
* initialize the vma tree
*/
int mca_rcache_base_vma_tree_init (mca_rcache_base_vma_module_t *vma_module);
/*
* clean up the vma tree
*/
void mca_rcache_base_vma_tree_finalize(mca_rcache_base_vma_module_t *vma_module);
/**
* Returns the item in the vma tree
*/
mca_rcache_base_registration_t *mca_rcache_base_vma_tree_find (mca_rcache_base_vma_module_t *vma_module,
unsigned char *base,
unsigned char *bound);
/**
* Returns all registration that overlaps given memory region
*/
int mca_rcache_base_vma_tree_find_all (
mca_rcache_base_vma_module_t *vma_module, unsigned char *base,
unsigned char *bound, mca_rcache_base_registration_t **regs,
int reg_cnt);
/*
* insert an item in the vma tree
*/
int mca_rcache_base_vma_tree_insert (mca_rcache_base_vma_module_t *vma_module,
mca_rcache_base_registration_t* reg, size_t limit);
/*
* remove an item from the vma tree
*/
int mca_rcache_base_vma_tree_delete (mca_rcache_base_vma_module_t *vma_module,
mca_rcache_base_registration_t *reg);
/*
* Dump out the contents of the rcache for debugging.
*/
void mca_rcache_base_vma_tree_dump_range (mca_rcache_base_vma_module_t *vma_module,
unsigned char *base, size_t size, char *msg);
#endif /* MCA_RCACHE_BASE_VMA_TREE_H */

Просмотреть файл

@ -11,6 +11,8 @@
# All rights reserved. # All rights reserved.
# Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2012 NVIDIA Corporation. All rights reserved. # Copyright (c) 2012 NVIDIA Corporation. All rights reserved.
# Copyright (c) 2015 Los Alamos National Security, LLC. All rights
# reserved.
# $COPYRIGHT$ # $COPYRIGHT$
# #
# Additional copyrights may follow # Additional copyrights may follow
@ -18,40 +20,40 @@
# $HEADER$ # $HEADER$
# #
AM_CPPFLAGS = $(mpool_gpusm_CPPFLAGS) AM_CPPFLAGS = $(rcache_gpusm_CPPFLAGS)
sources = \ sources = \
mpool_gpusm_module.c \ rcache_gpusm_module.c \
mpool_gpusm_component.c rcache_gpusm_component.c
if WANT_INSTALL_HEADERS if WANT_INSTALL_HEADERS
opaldir = $(opalincludedir)/$(subdir) opaldir = $(opalincludedir)/$(subdir)
opal_HEADERS = mpool_gpusm.h opal_HEADERS = rcache_gpusm.h
endif endif
# Make the output library in this directory, and name it either # Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la # mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds). # (for static builds).
if MCA_BUILD_opal_mpool_gpusm_DSO if MCA_BUILD_opal_rcache_gpusm_DSO
component_noinst = component_noinst =
component_install = mca_mpool_gpusm.la component_install = mca_rcache_gpusm.la
else else
component_noinst = libmca_mpool_gpusm.la component_noinst = libmca_rcache_gpusm.la
component_install = component_install =
endif endif
mcacomponentdir = $(opallibdir) mcacomponentdir = $(opallibdir)
mcacomponent_LTLIBRARIES = $(component_install) mcacomponent_LTLIBRARIES = $(component_install)
mca_mpool_gpusm_la_SOURCES = $(sources) mca_rcache_gpusm_la_SOURCES = $(sources)
mca_mpool_gpusm_la_LDFLAGS = -module -avoid-version mca_rcache_gpusm_la_LDFLAGS = -module -avoid-version
mca_mpool_gpusm_la_LIBADD = $(mpool_gpusm_LIBS) mca_rcache_gpusm_la_LIBADD = $(rcache_gpusm_LIBS)
if OPAL_cuda_support if OPAL_cuda_support
mca_mpool_gpusm_la_LIBADD += \ mca_rcache_gpusm_la_LIBADD += \
$(OPAL_TOP_BUILDDIR)/opal/mca/common/cuda/lib@OPAL_LIB_PREFIX@mca_common_cuda.la $(OPAL_TOP_BUILDDIR)/opal/mca/common/cuda/lib@OPAL_LIB_PREFIX@mca_common_cuda.la
endif endif
noinst_LTLIBRARIES = $(component_noinst) noinst_LTLIBRARIES = $(component_noinst)
libmca_mpool_gpusm_la_SOURCES = $(sources) libmca_rcache_gpusm_la_SOURCES = $(sources)
libmca_mpool_gpusm_la_LDFLAGS = -module -avoid-version libmca_rcache_gpusm_la_LDFLAGS = -module -avoid-version
libmca_mpool_gpusm_la_LIBADD = $(mpool_gpusm_LIBS) libmca_rcache_gpusm_la_LIBADD = $(rcache_gpusm_LIBS)

Просмотреть файл

@ -1,6 +1,8 @@
# -*- shell-script -*- # -*- shell-script -*-
# #
# Copyright (c) 2012-2015 NVIDIA Corporation. All rights reserved. # Copyright (c) 2012-2015 NVIDIA Corporation. All rights reserved.
# Copyright (c) 2015 Los Alamos National Security, LLC. All rights
# reserved.
# $COPYRIGHT$ # $COPYRIGHT$
# #
# Additional copyrights may follow # Additional copyrights may follow
@ -14,8 +16,8 @@
# the configure sequence by the opal_configure_options.m4 code. # the configure sequence by the opal_configure_options.m4 code.
# #
AC_DEFUN([MCA_opal_mpool_gpusm_CONFIG],[ AC_DEFUN([MCA_opal_rcache_gpusm_CONFIG],[
AC_CONFIG_FILES([opal/mca/mpool/gpusm/Makefile]) AC_CONFIG_FILES([opal/mca/rcache/gpusm/Makefile])
# Use CUDA_SUPPORT which was filled in by the opal configure code. # Use CUDA_SUPPORT which was filled in by the opal configure code.
AS_IF([test "x$CUDA_SUPPORT" = "x1"], AS_IF([test "x$CUDA_SUPPORT" = "x1"],

Просмотреть файл

88
opal/mca/rcache/gpusm/rcache_gpusm.h Обычный файл
Просмотреть файл

@ -0,0 +1,88 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Voltaire. All rights reserved.
* Copyright (c) 2012-2015 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_RCACHE_GPUSM_H
#define MCA_RCACHE_GPUSM_H
#include "opal_config.h"
#include "opal/class/opal_list.h"
#include "opal/mca/rcache/rcache.h"
BEGIN_C_DECLS
#define MEMHANDLE_SIZE 8
#define EVTHANDLE_SIZE 8
struct mca_rcache_gpusm_registration_t {
mca_rcache_base_registration_t base;
uint64_t memHandle[MEMHANDLE_SIZE]; /* CUipcMemHandle */
uint64_t evtHandle[EVTHANDLE_SIZE]; /* CUipcEventHandle */
uintptr_t event; /* CUevent */
};
typedef struct mca_rcache_gpusm_registration_t mca_rcache_gpusm_registration_t;
OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_rcache_gpusm_registration_t);
struct mca_rcache_gpusm_component_t {
mca_rcache_base_component_t super;
};
typedef struct mca_rcache_gpusm_component_t mca_rcache_gpusm_component_t;
OPAL_DECLSPEC extern mca_rcache_gpusm_component_t mca_rcache_gpusm_component;
struct mca_rcache_gpusm_module_t {
mca_rcache_base_module_t super;
opal_free_list_t reg_list;
}; typedef struct mca_rcache_gpusm_module_t mca_rcache_gpusm_module_t;
/*
* Initializes the rcache module.
*/
void mca_rcache_gpusm_module_init(mca_rcache_gpusm_module_t *rcache);
/**
* register block of memory
*/
int mca_rcache_gpusm_register(mca_rcache_base_module_t* rcache, void *addr,
size_t size, uint32_t flags, int32_t access_flags, mca_rcache_base_registration_t **reg);
/**
* deregister memory
*/
int mca_rcache_gpusm_deregister(mca_rcache_base_module_t *rcache,
mca_rcache_base_registration_t *reg);
/**
* find registration for a given block of memory
*/
int mca_rcache_gpusm_find(struct mca_rcache_base_module_t* rcache, void* addr,
size_t size, mca_rcache_base_registration_t **reg);
/**
* finalize rcache
*/
void mca_rcache_gpusm_finalize(struct mca_rcache_base_module_t *rcache);
END_C_DECLS
#endif

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше