Merge with gleb-mpool branch. All RDMA components use same mpool now (rdma).
udapl/openib/vapi/gm mpools a deprecated. rdma mpool has parameter that allows to limit its size mpool_rdma_rcache_size_limit (default is 0 - unlimited). This commit was SVN r12878.
Этот коммит содержится в:
родитель
f1fdd7c041
Коммит
190e7a27cd
@ -155,7 +155,7 @@ int ompi_free_list_grow(ompi_free_list_t* flist, size_t num_elements)
|
||||
|
||||
if (NULL != flist->fl_mpool)
|
||||
alloc_ptr = (ompi_free_list_memory_t*)flist->fl_mpool->mpool_alloc(flist->fl_mpool,
|
||||
alloc_size, 0, 0, &user_out);
|
||||
alloc_size, 0, MCA_MPOOL_FLAGS_CACHE_BYPASS, &user_out);
|
||||
else
|
||||
alloc_ptr = (ompi_free_list_memory_t*)malloc(alloc_size);
|
||||
|
||||
|
@ -29,7 +29,6 @@ static void mca_bml_base_endpoint_construct(mca_bml_base_endpoint_t* ep)
|
||||
{
|
||||
ep->btl_rdma_offset = 0;
|
||||
ep->btl_max_send_size = 0;
|
||||
ep->btl_rdma_size = 0;
|
||||
ep->btl_rdma_align = 0;
|
||||
|
||||
OBJ_CONSTRUCT(&ep->btl_eager, mca_bml_base_btl_array_t);
|
||||
|
@ -238,7 +238,6 @@ struct mca_bml_base_endpoint_t {
|
||||
struct ompi_proc_t* btl_proc; /**< backpointer to target ompi_proc_t */
|
||||
size_t btl_rdma_offset; /**< max of min rdma size for available rmda btls */
|
||||
size_t btl_max_send_size; /**< min of max send size for available send btls */
|
||||
size_t btl_rdma_size; /**< max of min rdma size for available rmda btls */
|
||||
size_t btl_rdma_align; /**< max of min rdma size for available rmda btls */
|
||||
mca_bml_base_btl_array_t btl_eager; /**< array of btls to use for first fragments */
|
||||
mca_bml_base_btl_array_t btl_send; /**< array of btls to use for remaining fragments */
|
||||
|
@ -265,7 +265,6 @@ int mca_bml_r2_add_procs(
|
||||
mca_bml_base_btl_array_reserve(&bml_endpoint->btl_send, mca_bml_r2.num_btl_modules);
|
||||
mca_bml_base_btl_array_reserve(&bml_endpoint->btl_rdma, mca_bml_r2.num_btl_modules);
|
||||
bml_endpoint->btl_max_send_size = -1;
|
||||
bml_endpoint->btl_rdma_size = -1;
|
||||
bml_endpoint->btl_proc = proc;
|
||||
proc->proc_bml = bml_endpoint;
|
||||
|
||||
@ -426,10 +425,6 @@ int mca_bml_r2_add_procs(
|
||||
if(bml_endpoint->btl_rdma_offset < bml_btl_rdma->btl_min_rdma_size) {
|
||||
bml_endpoint->btl_rdma_offset = bml_btl_rdma->btl_min_rdma_size;
|
||||
}
|
||||
if(bml_endpoint->btl_rdma_size > btl->btl_max_rdma_size) {
|
||||
bml_endpoint->btl_rdma_size = btl->btl_max_rdma_size;
|
||||
bml_endpoint->btl_rdma_align = bml_base_log2(bml_endpoint->btl_rdma_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -677,10 +672,6 @@ int mca_bml_r2_del_proc_btl(ompi_proc_t* proc, mca_btl_base_module_t* btl)
|
||||
if (ep->btl_rdma_offset < bml_btl->btl_min_rdma_size) {
|
||||
ep->btl_rdma_offset = bml_btl->btl_min_rdma_size;
|
||||
}
|
||||
if (ep->btl_rdma_size > bml_btl->btl_max_rdma_size) {
|
||||
ep->btl_rdma_size = bml_btl->btl_max_rdma_size;
|
||||
ep->btl_rdma_align = bml_base_log2(ep->btl_rdma_size);
|
||||
}
|
||||
}
|
||||
|
||||
/* compute weighting factor for this btl */
|
||||
|
@ -245,7 +245,8 @@ int mca_btl_gm_free( struct mca_btl_base_module_t* btl,
|
||||
mca_btl_gm_frag_t* frag = (mca_btl_gm_frag_t*)des;
|
||||
|
||||
if( NULL != frag->registration ) {
|
||||
btl->btl_mpool->mpool_release(btl->btl_mpool, (mca_mpool_base_registration_t*) frag->registration);
|
||||
btl->btl_mpool->mpool_deregister(btl->btl_mpool, (mca_mpool_base_registration_t*) frag->registration);
|
||||
frag->registration = NULL;
|
||||
}
|
||||
|
||||
MCA_BTL_GM_FRAG_RETURN(btl, frag);
|
||||
@ -268,142 +269,95 @@ mca_btl_base_descriptor_t* mca_btl_gm_prepare_src(
|
||||
size_t* size
|
||||
)
|
||||
{
|
||||
mca_btl_gm_frag_t* frag;
|
||||
mca_btl_gm_frag_t *frag = NULL;
|
||||
struct iovec iov;
|
||||
uint32_t iov_count = 1;
|
||||
size_t max_data = *size;
|
||||
int rc;
|
||||
|
||||
#if (OMPI_MCA_BTL_GM_HAVE_RDMA_GET || OMPI_MCA_BTL_GM_HAVE_RDMA_PUT)
|
||||
/*
|
||||
* If the data has already been pinned and is contigous than we can
|
||||
* use it in place.
|
||||
*/
|
||||
if (NULL != registration && 0 == ompi_convertor_need_buffers(convertor)) {
|
||||
size_t reg_len;
|
||||
MCA_BTL_GM_FRAG_ALLOC_USER(btl, frag, rc);
|
||||
if(NULL == frag){
|
||||
return NULL;
|
||||
if(ompi_convertor_need_buffers(convertor) == false && 0 == reserve) {
|
||||
if(registration != NULL || max_data > btl->btl_max_send_size) {
|
||||
MCA_BTL_GM_FRAG_ALLOC_USER(btl, frag, rc);
|
||||
if(NULL == frag) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* just assign it something..
|
||||
* we will assign the real value in put/get
|
||||
*/
|
||||
frag->type = MCA_BTL_GM_PUT;
|
||||
iov.iov_len = max_data;
|
||||
iov.iov_base = NULL;
|
||||
|
||||
|
||||
ompi_convertor_pack(convertor, &iov, &iov_count, &max_data);
|
||||
|
||||
*size = max_data;
|
||||
|
||||
if(NULL == registration) {
|
||||
rc = btl->btl_mpool->mpool_register(btl->btl_mpool,
|
||||
iov.iov_base, max_data, 0, ®istration);
|
||||
if(OMPI_SUCCESS != rc || NULL == registration) {
|
||||
MCA_BTL_GM_FRAG_RETURN(btl, frag);
|
||||
return NULL;
|
||||
}
|
||||
/* keep track of the registration we did */
|
||||
frag->registration = registration;
|
||||
}
|
||||
|
||||
frag->segment.seg_len = max_data;
|
||||
frag->segment.seg_addr.pval = iov.iov_base;
|
||||
|
||||
frag->base.des_src = &frag->segment;
|
||||
frag->base.des_src_cnt = 1;
|
||||
frag->base.des_dst = NULL;
|
||||
frag->base.des_dst_cnt = 0;
|
||||
frag->base.des_flags = 0;
|
||||
|
||||
return &frag->base;
|
||||
}
|
||||
/*
|
||||
* just assign it something..
|
||||
* we will assign the real value in put/get
|
||||
*/
|
||||
frag->type = MCA_BTL_GM_PUT;
|
||||
iov.iov_len = max_data;
|
||||
iov.iov_base = NULL;
|
||||
|
||||
ompi_convertor_pack(convertor, &iov, &iov_count, &max_data );
|
||||
|
||||
frag->segment.seg_len = max_data;
|
||||
frag->segment.seg_addr.pval = iov.iov_base;
|
||||
|
||||
reg_len = (unsigned char*)registration->bound - (unsigned char*)iov.iov_base + 1;
|
||||
|
||||
/* bump reference count as so that the registration
|
||||
* doesn't go away when the operation completes
|
||||
*/
|
||||
btl->btl_mpool->mpool_retain(btl->btl_mpool,
|
||||
(mca_mpool_base_registration_t*) registration);
|
||||
|
||||
frag->registration = registration;
|
||||
|
||||
/*
|
||||
* if the data is not already pinned - but the leave pinned option is set,
|
||||
* then go ahead and pin contigous data. however, if a reserve is required
|
||||
* then we must allocated a fragment w/ buffer space
|
||||
*/
|
||||
} else if (max_data > btl->btl_max_send_size &&
|
||||
ompi_convertor_need_buffers(convertor) == 0 &&
|
||||
reserve == 0) {
|
||||
|
||||
mca_mpool_base_module_t* mpool = btl->btl_mpool;
|
||||
MCA_BTL_GM_FRAG_ALLOC_USER(btl, frag, rc);
|
||||
if(NULL == frag){
|
||||
return NULL;
|
||||
}
|
||||
/*
|
||||
* just assign it something..
|
||||
* we will assign the real value in put/get
|
||||
*/
|
||||
frag->type = MCA_BTL_GM_PUT;
|
||||
iov.iov_len = max_data;
|
||||
iov.iov_base = NULL;
|
||||
|
||||
ompi_convertor_pack(convertor, &iov, &iov_count, &max_data );
|
||||
|
||||
frag->segment.seg_len = max_data;
|
||||
frag->segment.seg_addr.pval = iov.iov_base;
|
||||
|
||||
rc = mpool->mpool_register( mpool, iov.iov_base, max_data, 0, ®istration );
|
||||
|
||||
if(rc != OMPI_SUCCESS) {
|
||||
MCA_BTL_GM_FRAG_RETURN(btl,frag);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
frag->registration = registration;
|
||||
}
|
||||
|
||||
/*
|
||||
* if we aren't pinning the data and the requested size is less
|
||||
* than the eager limit pack into a fragment from the eager pool
|
||||
*/
|
||||
else
|
||||
}
|
||||
#endif
|
||||
|
||||
if (max_data+reserve <= btl->btl_eager_limit) {
|
||||
|
||||
if (max_data + reserve <= btl->btl_eager_limit) {
|
||||
/* the data is small enough to fit in the eager frag and
|
||||
* memory is not prepinned */
|
||||
MCA_BTL_GM_FRAG_ALLOC_EAGER(btl, frag, rc);
|
||||
if(NULL == frag) {
|
||||
return NULL;
|
||||
if(frag != NULL) {
|
||||
frag->type = MCA_BTL_GM_EAGER;
|
||||
}
|
||||
frag->type = MCA_BTL_GM_EAGER;
|
||||
|
||||
iov.iov_len = max_data;
|
||||
iov.iov_base = (unsigned char*) frag->segment.seg_addr.pval + reserve;
|
||||
|
||||
rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data );
|
||||
*size = max_data;
|
||||
if( rc < 0 ) {
|
||||
MCA_BTL_GM_FRAG_RETURN(btl, frag);
|
||||
return NULL;
|
||||
}
|
||||
frag->segment.seg_len = max_data + reserve;
|
||||
}
|
||||
|
||||
/*
|
||||
* otherwise pack as much data as we can into a fragment
|
||||
* that is the max send size.
|
||||
*/
|
||||
else {
|
||||
|
||||
if(NULL == frag) {
|
||||
/* the data doesn't fit into eager frag or eger frag is
|
||||
* not available */
|
||||
MCA_BTL_GM_FRAG_ALLOC_MAX(btl, frag, rc);
|
||||
if(NULL == frag) {
|
||||
return NULL;
|
||||
}
|
||||
frag->type = MCA_BTL_GM_SEND;
|
||||
if(max_data + reserve > btl->btl_max_send_size){
|
||||
if(max_data + reserve > btl->btl_max_send_size) {
|
||||
max_data = btl->btl_max_send_size - reserve;
|
||||
}
|
||||
iov.iov_len = max_data;
|
||||
iov.iov_base = (unsigned char*) frag->segment.seg_addr.pval + reserve;
|
||||
|
||||
rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data );
|
||||
*size = max_data;
|
||||
|
||||
if( rc < 0 ) {
|
||||
MCA_BTL_GM_FRAG_RETURN(btl, frag);
|
||||
return NULL;
|
||||
}
|
||||
frag->segment.seg_len = max_data + reserve;
|
||||
}
|
||||
|
||||
iov.iov_len = max_data;
|
||||
iov.iov_base = (unsigned char*) frag->segment.seg_addr.pval + reserve;
|
||||
rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data);
|
||||
if(rc < 0) {
|
||||
MCA_BTL_GM_FRAG_RETURN(btl, frag);
|
||||
return NULL;
|
||||
}
|
||||
*size = max_data;
|
||||
frag->segment.seg_len = max_data + reserve;
|
||||
frag->base.des_src = &frag->segment;
|
||||
frag->base.des_src_cnt = 1;
|
||||
frag->base.des_dst = NULL;
|
||||
frag->base.des_dst_cnt = 0;
|
||||
frag->base.des_flags = 0;
|
||||
|
||||
return &frag->base;
|
||||
}
|
||||
|
||||
@ -462,18 +416,7 @@ mca_btl_base_descriptor_t* mca_btl_gm_prepare_dst(
|
||||
frag->base.des_dst = &frag->segment;
|
||||
frag->base.des_dst_cnt = 1;
|
||||
frag->base.des_flags = 0;
|
||||
if(NULL != registration) {
|
||||
/* bump reference count as so that the registration
|
||||
* doesn't go away when the operation completes
|
||||
*/
|
||||
|
||||
mpool->mpool_retain(mpool,
|
||||
(mca_mpool_base_registration_t*) registration);
|
||||
|
||||
frag->registration = registration;
|
||||
|
||||
} else {
|
||||
|
||||
if(NULL == registration) {
|
||||
rc = mpool->mpool_register( mpool,
|
||||
frag->segment.seg_addr.pval,
|
||||
frag->segment.seg_len,
|
||||
@ -483,7 +426,6 @@ mca_btl_base_descriptor_t* mca_btl_gm_prepare_dst(
|
||||
MCA_BTL_GM_FRAG_RETURN(btl,frag);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
frag->registration = registration;
|
||||
}
|
||||
return &frag->base;
|
||||
|
@ -30,7 +30,7 @@
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
#include "ompi/mca/mpool/gm/mpool_gm.h"
|
||||
#include "ompi/mca/mpool/rdma/mpool_rdma.h"
|
||||
#include "btl_gm.h"
|
||||
#include "btl_gm_frag.h"
|
||||
#include "btl_gm_endpoint.h"
|
||||
@ -45,6 +45,9 @@
|
||||
#if OMPI_ENABLE_PROGRESS_THREADS
|
||||
static void* mca_btl_gm_progress_thread( opal_object_t* arg );
|
||||
#endif
|
||||
static int gm_reg_mr(void *reg_data, void *base, size_t size,
|
||||
mca_mpool_base_registration_t *reg);
|
||||
static int gm_dereg_mr(void *reg_data, mca_mpool_base_registration_t *reg);
|
||||
|
||||
|
||||
mca_btl_gm_component_t mca_btl_gm_component = {
|
||||
@ -131,7 +134,7 @@ int mca_btl_gm_component_open(void)
|
||||
mca_btl_gm_component.gm_debug =
|
||||
mca_btl_gm_param_register_int("debug", 0);
|
||||
mca_btl_gm_component.gm_mpool_name =
|
||||
mca_btl_gm_param_register_string("mpool", "gm");
|
||||
mca_btl_gm_param_register_string("mpool", "rdma");
|
||||
mca_btl_gm_component.gm_max_ports =
|
||||
mca_btl_gm_param_register_int("max_ports", 16);
|
||||
mca_btl_gm_component.gm_max_boards =
|
||||
@ -201,6 +204,35 @@ int mca_btl_gm_component_close(void)
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static int gm_reg_mr(void *reg_data, void *base, size_t size,
|
||||
mca_mpool_base_registration_t *reg)
|
||||
{
|
||||
struct gm_port *port = (struct gm_port*)reg_data;
|
||||
int rc;
|
||||
|
||||
rc = gm_register_memory(port, base, size);
|
||||
|
||||
if(rc != GM_SUCCESS)
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
static int gm_dereg_mr(void *reg_data, mca_mpool_base_registration_t *reg)
|
||||
{
|
||||
struct gm_port *port = (struct gm_port*)reg_data;
|
||||
int rc;
|
||||
|
||||
rc = gm_deregister_memory(port, reg->base, reg->bound - reg->base + 1);
|
||||
|
||||
if(rc != GM_SUCCESS) {
|
||||
opal_output(0, "%s: error unpinning gm memory errno says %s\n",
|
||||
__func__, strerror(errno));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize module instance
|
||||
@ -243,7 +275,10 @@ mca_btl_gm_module_init (mca_btl_gm_module_t * btl)
|
||||
}
|
||||
|
||||
/* initialize memory pool */
|
||||
resources.port = btl->port;
|
||||
resources.reg_data = (void*)btl->port;
|
||||
resources.sizeof_reg = sizeof(mca_mpool_base_registration_t);
|
||||
resources.register_mem = gm_reg_mr;
|
||||
resources.deregister_mem = gm_dereg_mr;
|
||||
btl->super.btl_mpool = mca_mpool_base_module_create(
|
||||
mca_btl_gm_component.gm_mpool_name,
|
||||
&btl->super,
|
||||
@ -419,8 +454,6 @@ static int mca_btl_gm_discover( void )
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Register GM component addressing information. The MCA framework
|
||||
* will make this available to all peers.
|
||||
|
@ -31,7 +31,7 @@
|
||||
#include "ompi/datatype/datatype.h"
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
#include "ompi/mca/mpool/mpool.h"
|
||||
#include "ompi/mca/mpool/mvapi/mpool_mvapi.h"
|
||||
#include "ompi/mca/mpool/rdma/mpool_rdma.h"
|
||||
#include "ompi/mca/btl/base/btl_base_error.h"
|
||||
#include <vapi_types.h>
|
||||
#include <math.h> /* for log2 */
|
||||
@ -225,8 +225,9 @@ int mca_btl_mvapi_free(
|
||||
mca_btl_base_descriptor_t* des)
|
||||
{
|
||||
mca_btl_mvapi_frag_t* frag = (mca_btl_mvapi_frag_t*)des;
|
||||
if (MCA_BTL_MVAPI_FRAG_FRAG == frag->type) {
|
||||
btl->btl_mpool->mpool_release(btl->btl_mpool, (mca_mpool_base_registration_t*) frag->vapi_reg);
|
||||
if (MCA_BTL_MVAPI_FRAG_FRAG == frag->type && frag->registration != NULL) {
|
||||
btl->btl_mpool->mpool_deregister(btl->btl_mpool, (mca_mpool_base_registration_t*) frag->registration);
|
||||
frag->registration = NULL;
|
||||
}
|
||||
|
||||
MCA_BTL_IB_FRAG_RETURN(btl, frag);
|
||||
@ -267,164 +268,99 @@ mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_src(
|
||||
)
|
||||
{
|
||||
mca_btl_mvapi_module_t* mvapi_btl;
|
||||
mca_btl_mvapi_frag_t* frag;
|
||||
mca_mpool_mvapi_registration_t * vapi_reg;
|
||||
mca_btl_mvapi_frag_t* frag = NULL;
|
||||
mca_btl_mvapi_reg_t *mvapi_reg;
|
||||
struct iovec iov;
|
||||
uint32_t iov_count = 1;
|
||||
size_t max_data = *size;
|
||||
int rc;
|
||||
|
||||
|
||||
mvapi_btl = (mca_btl_mvapi_module_t*) btl;
|
||||
vapi_reg = (mca_mpool_mvapi_registration_t*) registration;
|
||||
|
||||
if(NULL != vapi_reg && 0 == ompi_convertor_need_buffers(convertor)){
|
||||
size_t reg_len;
|
||||
/* the memory is already pinned and we have contiguous user data */
|
||||
MCA_BTL_IB_FRAG_ALLOC_FRAG(btl, frag, rc);
|
||||
if(NULL == frag){
|
||||
return NULL;
|
||||
}
|
||||
|
||||
iov.iov_len = max_data;
|
||||
iov.iov_base = NULL;
|
||||
|
||||
ompi_convertor_pack(convertor, &iov, &iov_count, &max_data );
|
||||
*size = max_data;
|
||||
|
||||
frag->segment.seg_len = max_data;
|
||||
frag->segment.seg_addr.pval = iov.iov_base;
|
||||
reg_len = (unsigned char*)vapi_reg->base_reg.bound - (unsigned char*)iov.iov_base + 1;
|
||||
mvapi_btl = (mca_btl_mvapi_module_t*)btl;
|
||||
|
||||
frag->sg_entry.len = max_data;
|
||||
frag->sg_entry.lkey = vapi_reg->l_key;
|
||||
frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) iov.iov_base;
|
||||
|
||||
frag->segment.seg_key.key32[0] = (uint32_t) vapi_reg->l_key;
|
||||
|
||||
frag->base.des_src = &frag->segment;
|
||||
frag->base.des_src_cnt = 1;
|
||||
frag->base.des_dst = NULL;
|
||||
frag->base.des_dst_cnt = 0;
|
||||
frag->base.des_flags = 0;
|
||||
frag->vapi_reg = vapi_reg;
|
||||
|
||||
btl->btl_mpool->mpool_retain(btl->btl_mpool, (mca_mpool_base_registration_t*) vapi_reg);
|
||||
if(vapi_reg->base_reg.flags & MCA_MPOOL_FLAGS_CACHE) {
|
||||
assert(vapi_reg->base_reg.ref_count >= 4);
|
||||
} else {
|
||||
assert(vapi_reg->base_reg.ref_count >= 2);
|
||||
if(ompi_convertor_need_buffers(convertor) == false && 0 == reserve) {
|
||||
if(registration != NULL || max_data > btl->btl_max_send_size) {
|
||||
MCA_BTL_IB_FRAG_ALLOC_FRAG(btl, frag, rc);
|
||||
if(NULL == frag) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
iov.iov_len = max_data;
|
||||
iov.iov_base = NULL;
|
||||
|
||||
ompi_convertor_pack(convertor, &iov, &iov_count, &max_data);
|
||||
|
||||
*size = max_data;
|
||||
|
||||
if(NULL == registration) {
|
||||
rc = btl->btl_mpool->mpool_register(btl->btl_mpool,
|
||||
iov.iov_base, max_data, 0, ®istration);
|
||||
if(OMPI_SUCCESS != rc || NULL == registration) {
|
||||
MCA_BTL_IB_FRAG_RETURN(mvapi_btl, frag);
|
||||
return NULL;
|
||||
}
|
||||
frag->registration = (mca_btl_mvapi_reg_t*)registration;
|
||||
}
|
||||
mvapi_reg = (mca_btl_mvapi_reg_t*)registration;
|
||||
|
||||
frag->base.des_flags = 0;
|
||||
frag->base.des_src = &frag->segment;
|
||||
frag->base.des_src_cnt = 1;
|
||||
frag->base.des_dst = NULL;
|
||||
frag->base.des_dst_cnt = 0;
|
||||
frag->base.des_flags = 0;
|
||||
|
||||
frag->sg_entry.len = max_data;
|
||||
frag->sg_entry.lkey = mvapi_reg->l_key;
|
||||
frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t)iov.iov_base;
|
||||
|
||||
frag->segment.seg_len = max_data;
|
||||
frag->segment.seg_addr.pval = iov.iov_base;
|
||||
frag->segment.seg_key.key32[0] = (uint32_t)frag->sg_entry.lkey;
|
||||
|
||||
BTL_VERBOSE(("frag->sg_entry.lkey = %lu .addr = %llu "
|
||||
"frag->segment.seg_key.key32[0] = %lu",
|
||||
frag->sg_entry.lkey, frag->sg_entry.addr,
|
||||
frag->segment.seg_key.key32[0]));
|
||||
return &frag->base;
|
||||
}
|
||||
|
||||
return &frag->base;
|
||||
|
||||
} else if( max_data > btl->btl_max_send_size &&
|
||||
ompi_convertor_need_buffers(convertor) == 0 &&
|
||||
reserve == 0)
|
||||
{
|
||||
/* The user buffer is contigous and we are asked to send more than the max send size. */
|
||||
|
||||
MCA_BTL_IB_FRAG_ALLOC_FRAG(btl, frag, rc);
|
||||
if(NULL == frag){
|
||||
return NULL;
|
||||
}
|
||||
|
||||
iov.iov_len = max_data;
|
||||
iov.iov_base = NULL;
|
||||
|
||||
ompi_convertor_pack(convertor, &iov, &iov_count, &max_data );
|
||||
*size = max_data;
|
||||
|
||||
frag->segment.seg_len = max_data;
|
||||
frag->segment.seg_addr.pval = iov.iov_base;
|
||||
frag->base.des_flags = 0;
|
||||
|
||||
rc = btl->btl_mpool->mpool_register(btl->btl_mpool,
|
||||
iov.iov_base,
|
||||
max_data,
|
||||
0,
|
||||
(mca_mpool_base_registration_t**) &vapi_reg);
|
||||
if(OMPI_SUCCESS != rc || NULL == vapi_reg) {
|
||||
BTL_ERROR(("mpool_register(%p,%lu) failed", iov.iov_base, max_data));
|
||||
MCA_BTL_IB_FRAG_RETURN(btl, frag);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
frag->sg_entry.len = max_data;
|
||||
frag->sg_entry.lkey = vapi_reg->l_key;
|
||||
frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) iov.iov_base;
|
||||
|
||||
frag->segment.seg_key.key32[0] = (uint32_t) vapi_reg->l_key;
|
||||
|
||||
frag->base.des_src = &frag->segment;
|
||||
frag->base.des_src_cnt = 1;
|
||||
frag->base.des_dst = NULL;
|
||||
frag->base.des_dst_cnt = 0;
|
||||
frag->vapi_reg = vapi_reg;
|
||||
|
||||
return &frag->base;
|
||||
|
||||
} else if (max_data+reserve <= btl->btl_eager_limit) {
|
||||
/* the data is small enough to fit in the eager frag and
|
||||
either we received no prepinned memory or leave pinned is
|
||||
not set
|
||||
*/
|
||||
MCA_BTL_IB_FRAG_ALLOC_EAGER(btl, frag, rc);
|
||||
if(NULL == frag) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
iov.iov_len = max_data;
|
||||
iov.iov_base = (unsigned char*)frag->segment.seg_addr.pval + reserve;
|
||||
|
||||
rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data );
|
||||
*size = max_data;
|
||||
if( rc < 0 ) {
|
||||
MCA_BTL_IB_FRAG_RETURN(btl, frag);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
frag->segment.seg_len = max_data + reserve;
|
||||
frag->segment.seg_key.key32[0] = (uint32_t) frag->sg_entry.lkey;
|
||||
frag->base.des_src = &frag->segment;
|
||||
frag->base.des_src_cnt = 1;
|
||||
frag->base.des_dst = NULL;
|
||||
frag->base.des_dst_cnt = 0;
|
||||
frag->base.des_flags = 0;
|
||||
|
||||
return &frag->base;
|
||||
|
||||
} else {
|
||||
|
||||
MCA_BTL_IB_FRAG_ALLOC_MAX(btl, frag, rc);
|
||||
if(NULL == frag) {
|
||||
return NULL;
|
||||
}
|
||||
if(max_data + reserve > btl->btl_max_send_size){
|
||||
max_data = btl->btl_max_send_size - reserve;
|
||||
}
|
||||
iov.iov_len = max_data;
|
||||
iov.iov_base = (unsigned char*)frag->segment.seg_addr.pval + reserve;
|
||||
|
||||
rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data );
|
||||
*size = max_data;
|
||||
|
||||
if( rc < 0 ) {
|
||||
MCA_BTL_IB_FRAG_RETURN(btl, frag);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
frag->segment.seg_len = max_data + reserve;
|
||||
frag->segment.seg_key.key32[0] = (uint32_t) frag->sg_entry.lkey;
|
||||
frag->base.des_src = &frag->segment;
|
||||
frag->base.des_src_cnt = 1;
|
||||
frag->base.des_dst = NULL;
|
||||
frag->base.des_dst_cnt = 0;
|
||||
frag->base.des_flags=0;
|
||||
|
||||
return &frag->base;
|
||||
}
|
||||
return NULL;
|
||||
|
||||
if(max_data + reserve <= btl->btl_eager_limit) {
|
||||
/* the data is small enough to fit in the eager frag and
|
||||
* memory is not prepinned */
|
||||
MCA_BTL_IB_FRAG_ALLOC_EAGER(btl, frag, rc);
|
||||
}
|
||||
|
||||
if(NULL == frag) {
|
||||
/* the data doesn't fit into eager frag or eger frag is
|
||||
* not available */
|
||||
MCA_BTL_IB_FRAG_ALLOC_MAX(btl, frag, rc);
|
||||
if(NULL == frag) {
|
||||
return NULL;
|
||||
}
|
||||
if(max_data + reserve > btl->btl_max_send_size) {
|
||||
max_data = btl->btl_max_send_size - reserve;
|
||||
}
|
||||
}
|
||||
|
||||
iov.iov_len = max_data;
|
||||
iov.iov_base = (unsigned char*)frag->segment.seg_addr.pval + reserve;
|
||||
rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data);
|
||||
if( rc < 0 ) {
|
||||
MCA_BTL_IB_FRAG_RETURN(mvapi_btl, frag);
|
||||
return NULL;
|
||||
}
|
||||
*size = max_data;
|
||||
frag->segment.seg_len = max_data + reserve;
|
||||
frag->segment.seg_key.key32[0] = (uint32_t)frag->sg_entry.lkey;
|
||||
frag->base.des_src = &frag->segment;
|
||||
frag->base.des_src_cnt = 1;
|
||||
frag->base.des_dst = NULL;
|
||||
frag->base.des_dst_cnt = 0;
|
||||
frag->base.des_flags = 0;
|
||||
|
||||
return &frag->base;
|
||||
}
|
||||
|
||||
|
||||
@ -453,12 +389,11 @@ mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_dst(
|
||||
{
|
||||
mca_btl_mvapi_module_t* mvapi_btl;
|
||||
mca_btl_mvapi_frag_t* frag;
|
||||
mca_mpool_mvapi_registration_t * vapi_reg;
|
||||
mca_btl_mvapi_reg_t *mvapi_reg;
|
||||
ptrdiff_t lb;
|
||||
int rc;
|
||||
|
||||
mvapi_btl = (mca_btl_mvapi_module_t*) btl;
|
||||
vapi_reg = (mca_mpool_mvapi_registration_t*) registration;
|
||||
|
||||
MCA_BTL_IB_FRAG_ALLOC_FRAG(btl, frag, rc);
|
||||
|
||||
@ -471,42 +406,32 @@ mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_dst(
|
||||
frag->segment.seg_addr.pval = convertor->pBaseBuf + lb + convertor->bConverted;
|
||||
frag->base.des_flags = 0;
|
||||
|
||||
if(NULL!= vapi_reg){
|
||||
/* the memory is already pinned- use it*/
|
||||
btl->btl_mpool->mpool_retain(btl->btl_mpool, (mca_mpool_base_registration_t*) vapi_reg);
|
||||
if(vapi_reg->base_reg.flags & MCA_MPOOL_FLAGS_CACHE) {
|
||||
assert(vapi_reg->base_reg.ref_count >= 4);
|
||||
} else {
|
||||
assert(vapi_reg->base_reg.ref_count >= 2);
|
||||
}
|
||||
} else {
|
||||
if(NULL == registration) {
|
||||
/* we didn't get a memory registration passed in, so we have to register the region
|
||||
* ourselves
|
||||
*/
|
||||
rc = btl->btl_mpool->mpool_register(btl->btl_mpool,
|
||||
frag->segment.seg_addr.pval,
|
||||
*size,
|
||||
0,
|
||||
(mca_mpool_base_registration_t**) &vapi_reg);
|
||||
if(OMPI_SUCCESS != rc || NULL == vapi_reg) {
|
||||
frag->segment.seg_addr.pval, *size, 0, ®istration);
|
||||
if(OMPI_SUCCESS != rc || NULL == registration) {
|
||||
BTL_ERROR(("mpool_register(%p,%lu) failed: base %p lb %lu offset %lu",
|
||||
frag->segment.seg_addr.pval, *size, convertor->pBaseBuf, lb, convertor->bConverted));
|
||||
MCA_BTL_IB_FRAG_RETURN(btl, frag);
|
||||
return NULL;
|
||||
}
|
||||
frag->registration = (mca_btl_mvapi_reg_t*)registration;
|
||||
}
|
||||
mvapi_reg = (mca_btl_mvapi_reg_t*)registration;
|
||||
|
||||
frag->sg_entry.len = *size;
|
||||
frag->sg_entry.lkey = vapi_reg->l_key;
|
||||
frag->sg_entry.len = *size;
|
||||
frag->sg_entry.lkey = mvapi_reg->l_key;
|
||||
frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag->segment.seg_addr.pval;
|
||||
|
||||
frag->segment.seg_key.key32[0] = (uint32_t) vapi_reg->r_key;
|
||||
|
||||
frag->segment.seg_key.key32[0] =mvapi_reg->r_key;
|
||||
|
||||
frag->base.des_dst = &frag->segment;
|
||||
frag->base.des_dst_cnt = 1;
|
||||
frag->base.des_src = NULL;
|
||||
frag->base.des_src_cnt = 0;
|
||||
frag->vapi_reg = vapi_reg;
|
||||
|
||||
return &frag->base;
|
||||
|
||||
|
@ -192,7 +192,13 @@ struct mca_btl_mvapi_module_t {
|
||||
uint32_t eager_rdma_buffers_count; /**< number of RDMA buffers */
|
||||
}; typedef struct mca_btl_mvapi_module_t mca_btl_mvapi_module_t;
|
||||
|
||||
|
||||
struct mca_btl_mvapi_reg_t {
|
||||
mca_mpool_base_registration_t base;
|
||||
VAPI_mr_hndl_t hndl; /* Memory region handle */
|
||||
VAPI_lkey_t l_key; /* Local key to registered memory */
|
||||
VAPI_rkey_t r_key; /* Remote key to registered memory */
|
||||
};
|
||||
typedef struct mca_btl_mvapi_reg_t mca_btl_mvapi_reg_t;
|
||||
|
||||
#define MCA_BTL_MVAPI_POST_SRR_HIGH(mvapi_btl, \
|
||||
additional) \
|
||||
|
@ -47,10 +47,13 @@
|
||||
#include <vapi.h>
|
||||
#include <vapi_common.h>
|
||||
#include "ompi/datatype/convertor.h"
|
||||
#include "ompi/mca/mpool/mvapi/mpool_mvapi.h"
|
||||
#include "ompi/mca/mpool/rdma/mpool_rdma.h"
|
||||
#include "btl_mvapi_endpoint.h"
|
||||
#include "ompi/mca/pml/base/pml_base_module_exchange.h"
|
||||
|
||||
static int mvapi_reg_mr(void *reg_data, void *base, size_t size,
|
||||
mca_mpool_base_registration_t *reg);
|
||||
static int mvapi_dereg_mr(void *reg_data, mca_mpool_base_registration_t *reg);
|
||||
|
||||
mca_btl_mvapi_component_t mca_btl_mvapi_component = {
|
||||
{
|
||||
@ -147,7 +150,7 @@ int mca_btl_mvapi_component_open(void)
|
||||
mca_btl_mvapi_param_register_int ("free_list_inc", "increment size of free lists",
|
||||
32, &mca_btl_mvapi_component.ib_free_list_inc);
|
||||
mca_btl_mvapi_param_register_string("mpool", "name of the memory pool to be used",
|
||||
"mvapi", &mca_btl_mvapi_component.ib_mpool_name);
|
||||
"rdma", &mca_btl_mvapi_component.ib_mpool_name);
|
||||
mca_btl_mvapi_param_register_int("reg_mru_len", "length of the registration cache most recently used list",
|
||||
16, (int*) &mca_btl_mvapi_component.reg_mru_len);
|
||||
#ifdef VAPI_FEATURE_SRQ
|
||||
@ -337,7 +340,51 @@ static void mca_btl_mvapi_control(
|
||||
}
|
||||
}
|
||||
|
||||
static int mvapi_reg_mr(void *reg_data, void *base, size_t size,
|
||||
mca_mpool_base_registration_t *reg)
|
||||
{
|
||||
mca_btl_mvapi_module_t *mvapi_btl = (mca_btl_mvapi_module_t*)reg_data;
|
||||
mca_btl_mvapi_reg_t *mvapi_reg = (mca_btl_mvapi_reg_t*)reg;
|
||||
VAPI_mrw_t mr_in, mr_out;
|
||||
VAPI_ret_t ret;
|
||||
|
||||
memset(&mr_in, 0, sizeof(VAPI_mrw_t));
|
||||
memset(&mr_out, 0, sizeof(VAPI_mrw_t));
|
||||
mr_in.acl =
|
||||
VAPI_EN_LOCAL_WRITE | VAPI_EN_REMOTE_WRITE | VAPI_EN_REMOTE_READ;
|
||||
mr_in.pd_hndl = mvapi_btl->ptag;
|
||||
mr_in.size = size;
|
||||
mr_in.start = (VAPI_virt_addr_t)(MT_virt_addr_t)base;
|
||||
mr_in.type = VAPI_MR;
|
||||
mvapi_reg->hndl = VAPI_INVAL_HNDL;
|
||||
|
||||
ret = VAPI_register_mr(mvapi_btl->nic, &mr_in, &mvapi_reg->hndl, &mr_out);
|
||||
|
||||
if(ret != VAPI_OK) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
mvapi_reg->l_key = mr_out.l_key;
|
||||
mvapi_reg->r_key = mr_out.r_key;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static int mvapi_dereg_mr(void *reg_data, mca_mpool_base_registration_t *reg)
|
||||
{
|
||||
mca_btl_mvapi_module_t *mvapi_btl = (mca_btl_mvapi_module_t*)reg_data;
|
||||
mca_btl_mvapi_reg_t *mvapi_reg = (mca_btl_mvapi_reg_t*)reg;
|
||||
VAPI_ret_t ret;
|
||||
|
||||
if(mvapi_reg->hndl != VAPI_INVAL_HNDL) {
|
||||
ret = VAPI_deregister_mr(mvapi_btl->nic, mvapi_reg->hndl);
|
||||
if(ret != VAPI_OK) {
|
||||
opal_output(0, "%s: error unpinning mvapi memory errno says %s\n",
|
||||
__func__, strerror(errno));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* IB component initialization:
|
||||
@ -513,9 +560,10 @@ mca_btl_base_module_t** mca_btl_mvapi_component_init(int *num_btl_modules,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
hca_pd.hca = mvapi_btl->nic;
|
||||
hca_pd.pd_tag = mvapi_btl->ptag;
|
||||
|
||||
hca_pd.reg_data = mvapi_btl;
|
||||
hca_pd.sizeof_reg = sizeof(mca_btl_mvapi_reg_t);
|
||||
hca_pd.register_mem = mvapi_reg_mr;
|
||||
hca_pd.deregister_mem = mvapi_dereg_mr;
|
||||
/* initialize the memory pool using the hca */
|
||||
mvapi_btl->super.btl_mpool =
|
||||
mca_mpool_base_module_create(mca_btl_mvapi_component.ib_mpool_name,
|
||||
|
@ -12,16 +12,16 @@
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "btl_mvapi.h"
|
||||
#include "btl_mvapi_endpoint.h"
|
||||
#include "ompi/mca/mpool/mvapi/mpool_mvapi.h"
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct mca_btl_mvapi_reg_t;
|
||||
|
||||
struct mca_btl_mvapi_eager_rdma_local_t {
|
||||
ompi_ptr_t base; /**< buffer for RDMAing eager messages */
|
||||
mca_mpool_mvapi_registration_t *reg;
|
||||
struct mca_btl_mvapi_reg_t *reg;
|
||||
uint16_t head; /**< RDMA buffer to poll */
|
||||
uint16_t tail; /**< Needed for credit managment */
|
||||
int32_t credits; /**< number of RDMA credits */
|
||||
|
@ -1230,7 +1230,8 @@ void mca_btl_mvapi_endpoint_connect_eager_rdma(
|
||||
|
||||
buf = mvapi_btl->super.btl_mpool->mpool_alloc(mvapi_btl->super.btl_mpool,
|
||||
mvapi_btl->eager_rdma_frag_size *
|
||||
mca_btl_mvapi_component.eager_rdma_num, 0, 0,
|
||||
mca_btl_mvapi_component.eager_rdma_num, 0,
|
||||
MCA_MPOOL_FLAGS_CACHE_BYPASS,
|
||||
(mca_mpool_base_registration_t**)&endpoint->eager_rdma_local.reg);
|
||||
|
||||
if(!buf)
|
||||
@ -1239,7 +1240,7 @@ void mca_btl_mvapi_endpoint_connect_eager_rdma(
|
||||
for(i = 0; i < mca_btl_mvapi_component.eager_rdma_num; i++) {
|
||||
ompi_free_list_item_t *item = (ompi_free_list_item_t *)(buf +
|
||||
i*mvapi_btl->eager_rdma_frag_size);
|
||||
item->user_data = endpoint->eager_rdma_local.reg;
|
||||
item->user_data = (void*)endpoint->eager_rdma_local.reg;
|
||||
OBJ_CONSTRUCT(item, mca_btl_mvapi_recv_frag_eager_t);
|
||||
((mca_btl_mvapi_frag_t*)item)->endpoint = endpoint;
|
||||
((mca_btl_mvapi_frag_t*)item)->type = MCA_BTL_MVAPI_FRAG_EAGER_RDMA;
|
||||
|
@ -26,7 +26,7 @@
|
||||
#include "btl_mvapi_frag.h"
|
||||
#include "btl_mvapi.h"
|
||||
#include "btl_mvapi_eager_rdma.h"
|
||||
#include "ompi/mca/mpool/mvapi/mpool_mvapi.h"
|
||||
#include "ompi/mca/mpool/rdma/mpool_rdma.h"
|
||||
|
||||
#include <vapi.h>
|
||||
#include <mtl_common.h>
|
||||
|
@ -18,25 +18,22 @@
|
||||
|
||||
|
||||
#include "btl_mvapi_frag.h"
|
||||
#include "ompi/mca/mpool/mvapi/mpool_mvapi.h"
|
||||
|
||||
|
||||
|
||||
static void mca_btl_mvapi_frag_common_constructor( mca_btl_mvapi_frag_t* frag)
|
||||
{
|
||||
mca_mpool_mvapi_registration_t* mem_hndl = (mca_mpool_mvapi_registration_t*) frag->base.super.user_data;
|
||||
mca_btl_mvapi_reg_t* mem_hndl =
|
||||
(mca_btl_mvapi_reg_t*)frag->base.super.user_data;
|
||||
frag->hdr = (mca_btl_mvapi_header_t*) (frag+1); /* initialize btl header to start at end of frag */
|
||||
frag->segment.seg_addr.pval = ((unsigned char* )frag->hdr) + sizeof(mca_btl_mvapi_header_t);
|
||||
/* init the segment address to start after the btl header */
|
||||
|
||||
frag->segment.seg_len = frag->size;
|
||||
frag->segment.seg_key.key32[0] = (uint32_t) mem_hndl->l_key;
|
||||
frag->sg_entry.lkey = mem_hndl->l_key;
|
||||
frag->sg_entry.lkey = mem_hndl->l_key;
|
||||
frag->segment.seg_key.key32[0] = frag->sg_entry.lkey;
|
||||
frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag->hdr;
|
||||
frag->base.des_flags = 0;
|
||||
}
|
||||
|
||||
|
||||
static void mca_btl_mvapi_send_frag_common_constructor(mca_btl_mvapi_frag_t* frag)
|
||||
{
|
||||
|
||||
|
@ -27,12 +27,13 @@
|
||||
#include <vapi.h>
|
||||
#include <mtl_common.h>
|
||||
#include <vapi_common.h>
|
||||
#include "ompi/mca/mpool/mvapi/mpool_mvapi.h"
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct mca_btl_mvapi_reg_t;
|
||||
|
||||
struct mca_btl_mvapi_header_t {
|
||||
mca_btl_base_tag_t tag;
|
||||
int16_t credits;
|
||||
@ -95,7 +96,7 @@ struct mca_btl_mvapi_frag_t {
|
||||
VAPI_sg_lst_entry_t sg_entry;
|
||||
mca_btl_mvapi_header_t *hdr;
|
||||
mca_btl_mvapi_footer_t *ftr;
|
||||
mca_mpool_mvapi_registration_t * vapi_reg;
|
||||
struct mca_btl_mvapi_reg_t *registration;
|
||||
ompi_free_list_t* my_list;
|
||||
};
|
||||
typedef struct mca_btl_mvapi_frag_t mca_btl_mvapi_frag_t;
|
||||
|
@ -32,7 +32,7 @@
|
||||
#include "ompi/datatype/datatype.h"
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
#include "ompi/mca/mpool/mpool.h"
|
||||
#include "ompi/mca/mpool/openib/mpool_openib.h"
|
||||
#include "ompi/mca/mpool/rdma/mpool_rdma.h"
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
@ -292,10 +292,11 @@ int mca_btl_openib_free(
|
||||
{
|
||||
mca_btl_openib_frag_t* frag = (mca_btl_openib_frag_t*)des;
|
||||
|
||||
if(frag->size == 0) {
|
||||
btl->btl_mpool->mpool_release(btl->btl_mpool,
|
||||
(mca_mpool_base_registration_t*)
|
||||
frag->openib_reg);
|
||||
if(MCA_BTL_OPENIB_FRAG_FRAG == frag->type && frag->registration != NULL) {
|
||||
btl->btl_mpool->mpool_deregister(btl->btl_mpool,
|
||||
(mca_mpool_base_registration_t*)
|
||||
frag->registration);
|
||||
frag->registration = NULL;
|
||||
}
|
||||
MCA_BTL_IB_FRAG_RETURN(((mca_btl_openib_module_t*) btl), frag);
|
||||
|
||||
@ -335,165 +336,101 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
|
||||
size_t* size
|
||||
)
|
||||
{
|
||||
mca_btl_openib_module_t* openib_btl;
|
||||
mca_btl_openib_frag_t* frag;
|
||||
mca_mpool_openib_registration_t * openib_reg;
|
||||
struct iovec iov;
|
||||
uint32_t iov_count = 1;
|
||||
size_t max_data = *size;
|
||||
int rc;
|
||||
|
||||
openib_btl = (mca_btl_openib_module_t*) btl;
|
||||
openib_reg = (mca_mpool_openib_registration_t*) registration;
|
||||
mca_btl_openib_module_t *openib_btl;
|
||||
mca_btl_openib_frag_t *frag = NULL;
|
||||
mca_btl_openib_reg_t *openib_reg;
|
||||
struct iovec iov;
|
||||
uint32_t iov_count = 1;
|
||||
size_t max_data = *size;
|
||||
int rc;
|
||||
|
||||
|
||||
if(NULL != openib_reg && 0 == ompi_convertor_need_buffers(convertor)){
|
||||
size_t reg_len;
|
||||
openib_btl = (mca_btl_openib_module_t*)btl;
|
||||
|
||||
/* the memory is already pinned and we have contiguous user data */
|
||||
if(ompi_convertor_need_buffers(convertor) == false && 0 == reserve) {
|
||||
if(registration != NULL || max_data > btl->btl_max_send_size) {
|
||||
MCA_BTL_IB_FRAG_ALLOC_FRAG(btl, frag, rc);
|
||||
if(NULL == frag) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
MCA_BTL_IB_FRAG_ALLOC_FRAG(btl, frag, rc);
|
||||
if(NULL == frag){
|
||||
return NULL;
|
||||
}
|
||||
iov.iov_len = max_data;
|
||||
iov.iov_base = NULL;
|
||||
|
||||
iov.iov_len = max_data;
|
||||
iov.iov_base = NULL;
|
||||
|
||||
ompi_convertor_pack(convertor, &iov, &iov_count, &max_data );
|
||||
ompi_convertor_pack(convertor, &iov, &iov_count, &max_data);
|
||||
|
||||
frag->segment.seg_len = max_data;
|
||||
frag->segment.seg_addr.pval = iov.iov_base;
|
||||
|
||||
|
||||
reg_len = (unsigned char*)openib_reg->base_reg.bound - (unsigned char*)iov.iov_base + 1;
|
||||
|
||||
frag->mr = openib_reg->mr;
|
||||
frag->sg_entry.length = max_data;
|
||||
frag->sg_entry.lkey = frag->mr->lkey;
|
||||
|
||||
frag->sg_entry.addr = (unsigned long) iov.iov_base;
|
||||
|
||||
frag->segment.seg_key.key32[0] = (uint32_t) frag->sg_entry.lkey;
|
||||
|
||||
frag->base.des_src = &frag->segment;
|
||||
frag->base.des_src_cnt = 1;
|
||||
frag->base.des_dst = NULL;
|
||||
frag->base.des_dst_cnt = 0;
|
||||
frag->base.des_flags = 0;
|
||||
frag->openib_reg= openib_reg;
|
||||
btl->btl_mpool->mpool_retain(btl->btl_mpool, (mca_mpool_base_registration_t*) openib_reg);
|
||||
return &frag->base;
|
||||
|
||||
} else if( max_data > btl->btl_max_send_size &&
|
||||
ompi_convertor_need_buffers(convertor) == 0 &&
|
||||
reserve == 0) {
|
||||
/* The user buffer is contigous and we are asked to send more than the max send size. */
|
||||
|
||||
MCA_BTL_IB_FRAG_ALLOC_FRAG(openib_btl, frag, rc);
|
||||
if(NULL == frag){
|
||||
return NULL;
|
||||
}
|
||||
|
||||
iov.iov_len = max_data;
|
||||
iov.iov_base = NULL;
|
||||
|
||||
ompi_convertor_pack(convertor, &iov, &iov_count, &max_data );
|
||||
|
||||
frag->segment.seg_len = max_data;
|
||||
frag->segment.seg_addr.pval = iov.iov_base;
|
||||
frag->base.des_flags = 0;
|
||||
*size = max_data;
|
||||
|
||||
|
||||
rc = btl->btl_mpool->mpool_register(btl->btl_mpool,
|
||||
iov.iov_base,
|
||||
max_data,
|
||||
0,
|
||||
(mca_mpool_base_registration_t**) &openib_reg);
|
||||
if(OMPI_SUCCESS != rc || NULL == openib_reg) {
|
||||
MCA_BTL_IB_FRAG_RETURN(openib_btl, frag);
|
||||
if(NULL == registration) {
|
||||
rc = btl->btl_mpool->mpool_register(btl->btl_mpool,
|
||||
iov.iov_base, max_data, 0, ®istration);
|
||||
if(OMPI_SUCCESS != rc || NULL == registration) {
|
||||
MCA_BTL_IB_FRAG_RETURN(openib_btl, frag);
|
||||
return NULL;
|
||||
}
|
||||
/* keep track of the registration we did */
|
||||
frag->registration = (mca_btl_openib_reg_t*)registration;
|
||||
}
|
||||
openib_reg = (mca_btl_openib_reg_t*)registration;
|
||||
|
||||
frag->base.des_flags = 0;
|
||||
frag->base.des_src = &frag->segment;
|
||||
frag->base.des_src_cnt = 1;
|
||||
frag->base.des_dst = NULL;
|
||||
frag->base.des_dst_cnt = 0;
|
||||
frag->base.des_flags = 0;
|
||||
|
||||
frag->sg_entry.length = max_data;
|
||||
frag->sg_entry.lkey = openib_reg->mr->lkey;
|
||||
frag->sg_entry.addr = (unsigned long)iov.iov_base;
|
||||
|
||||
frag->segment.seg_len = max_data;
|
||||
frag->segment.seg_addr.pval = iov.iov_base;
|
||||
frag->segment.seg_key.key32[0] = (uint32_t)frag->sg_entry.lkey;
|
||||
|
||||
BTL_VERBOSE(("frag->sg_entry.lkey = %lu .addr = %llu "
|
||||
"frag->segment.seg_key.key32[0] = %lu",
|
||||
frag->sg_entry.lkey, frag->sg_entry.addr,
|
||||
frag->segment.seg_key.key32[0]));
|
||||
|
||||
return &frag->base;
|
||||
}
|
||||
}
|
||||
|
||||
if(max_data + reserve <= btl->btl_eager_limit) {
|
||||
/* the data is small enough to fit in the eager frag and
|
||||
* memory is not prepinned */
|
||||
MCA_BTL_IB_FRAG_ALLOC_EAGER(btl, frag, rc);
|
||||
}
|
||||
|
||||
if(NULL == frag) {
|
||||
/* the data doesn't fit into eager frag or eger frag is
|
||||
* not available */
|
||||
MCA_BTL_IB_FRAG_ALLOC_MAX(btl, frag, rc);
|
||||
if(NULL == frag) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
frag->mr = openib_reg->mr;
|
||||
frag->sg_entry.length = max_data;
|
||||
frag->sg_entry.lkey = openib_reg->mr->lkey;
|
||||
|
||||
frag->sg_entry.addr = (unsigned long) iov.iov_base;
|
||||
|
||||
frag->segment.seg_key.key32[0] = (uint32_t) frag->mr->rkey;
|
||||
|
||||
frag->base.des_src = &frag->segment;
|
||||
frag->base.des_src_cnt = 1;
|
||||
frag->base.des_dst = NULL;
|
||||
frag->base.des_dst_cnt = 0;
|
||||
frag->openib_reg = openib_reg;
|
||||
BTL_VERBOSE(("frag->sg_entry.lkey = %lu .addr = %llu", frag->sg_entry.lkey, frag->sg_entry.addr));
|
||||
|
||||
return &frag->base;
|
||||
|
||||
} else if (max_data+reserve <= btl->btl_eager_limit) {
|
||||
/* the data is small enough to fit in the eager frag and
|
||||
either we received no prepinned memory or leave pinned is
|
||||
not set
|
||||
*/
|
||||
MCA_BTL_IB_FRAG_ALLOC_EAGER(btl, frag, rc);
|
||||
if(NULL == frag) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
iov.iov_len = max_data;
|
||||
iov.iov_base = (unsigned char*)frag->segment.seg_addr.pval + reserve;
|
||||
|
||||
rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data );
|
||||
*size = max_data;
|
||||
if( rc < 0 ) {
|
||||
MCA_BTL_IB_FRAG_RETURN(openib_btl, frag);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
frag->segment.seg_len = max_data + reserve;
|
||||
frag->segment.seg_key.key32[0] = (uint32_t) frag->sg_entry.lkey;
|
||||
frag->base.des_src = &frag->segment;
|
||||
frag->base.des_src_cnt = 1;
|
||||
frag->base.des_dst = NULL;
|
||||
frag->base.des_dst_cnt = 0;
|
||||
frag->base.des_flags = 0;
|
||||
|
||||
return &frag->base;
|
||||
|
||||
} else {
|
||||
|
||||
MCA_BTL_IB_FRAG_ALLOC_MAX(btl, frag, rc);
|
||||
if(NULL == frag) {
|
||||
return NULL;
|
||||
}
|
||||
if(max_data + reserve > btl->btl_max_send_size){
|
||||
max_data = btl->btl_max_send_size - reserve;
|
||||
if(max_data + reserve > btl->btl_max_send_size) {
|
||||
max_data = btl->btl_max_send_size - reserve;
|
||||
}
|
||||
iov.iov_len = max_data;
|
||||
iov.iov_base = (unsigned char*)frag->segment.seg_addr.pval + reserve;
|
||||
|
||||
rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data );
|
||||
*size = max_data;
|
||||
|
||||
if( rc < 0 ) {
|
||||
MCA_BTL_IB_FRAG_RETURN(openib_btl, frag);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
frag->segment.seg_len = max_data + reserve;
|
||||
frag->segment.seg_key.key32[0] = (uint32_t) frag->sg_entry.lkey;
|
||||
frag->base.des_src = &frag->segment;
|
||||
frag->base.des_src_cnt = 1;
|
||||
frag->base.des_dst = NULL;
|
||||
frag->base.des_dst_cnt = 0;
|
||||
frag->base.des_flags=0;
|
||||
|
||||
return &frag->base;
|
||||
}
|
||||
return NULL;
|
||||
|
||||
iov.iov_len = max_data;
|
||||
iov.iov_base = (unsigned char*)frag->segment.seg_addr.pval + reserve;
|
||||
rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data);
|
||||
if(rc < 0) {
|
||||
MCA_BTL_IB_FRAG_RETURN(openib_btl, frag);
|
||||
return NULL;
|
||||
}
|
||||
*size = max_data;
|
||||
frag->segment.seg_len = max_data + reserve;
|
||||
frag->segment.seg_key.key32[0] = (uint32_t)frag->sg_entry.lkey;
|
||||
frag->base.des_src = &frag->segment;
|
||||
frag->base.des_src_cnt = 1;
|
||||
frag->base.des_dst = NULL;
|
||||
frag->base.des_dst_cnt = 0;
|
||||
frag->base.des_flags = 0;
|
||||
|
||||
return &frag->base;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -513,69 +450,62 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
|
||||
mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* endpoint,
|
||||
mca_mpool_base_registration_t* registration,
|
||||
mca_mpool_base_registration_t* registration,
|
||||
struct ompi_convertor_t* convertor,
|
||||
size_t reserve,
|
||||
size_t* size)
|
||||
{
|
||||
mca_btl_openib_module_t* openib_btl;
|
||||
mca_btl_openib_frag_t* frag;
|
||||
mca_mpool_openib_registration_t * openib_reg;
|
||||
int rc;
|
||||
mca_btl_openib_module_t *openib_btl;
|
||||
mca_btl_openib_frag_t *frag;
|
||||
mca_btl_openib_reg_t *openib_reg;
|
||||
int rc;
|
||||
ptrdiff_t lb;
|
||||
size_t reg_len;
|
||||
|
||||
openib_btl = (mca_btl_openib_module_t*) btl;
|
||||
openib_reg = (mca_mpool_openib_registration_t*) registration;
|
||||
openib_btl = (mca_btl_openib_module_t*)btl;
|
||||
|
||||
MCA_BTL_IB_FRAG_ALLOC_FRAG(btl, frag, rc);
|
||||
if(NULL == frag){
|
||||
return NULL;
|
||||
MCA_BTL_IB_FRAG_ALLOC_FRAG(btl, frag, rc);
|
||||
if(NULL == frag) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ompi_ddt_type_lb(convertor->pDesc, &lb);
|
||||
frag->segment.seg_len = *size;
|
||||
frag->segment.seg_addr.pval = convertor->pBaseBuf + lb + convertor->bConverted;
|
||||
frag->base.des_flags = 0;
|
||||
frag->segment.seg_addr.pval = convertor->pBaseBuf + lb +
|
||||
convertor->bConverted;
|
||||
|
||||
if(NULL!= openib_reg){
|
||||
/* the memory is already pinned try to use it if the pinned region is large enough*/
|
||||
reg_len = (unsigned char*)openib_reg->base_reg.bound - (unsigned char*)frag->segment.seg_addr.pval + 1;
|
||||
btl->btl_mpool->mpool_retain(btl->btl_mpool,
|
||||
(mca_mpool_base_registration_t*) openib_reg);
|
||||
} else {
|
||||
/* we didn't get a memory registration passed in, so we have to register the region
|
||||
* ourselves
|
||||
if(NULL == registration){
|
||||
/* we didn't get a memory registration passed in, so we have to
|
||||
* register the region ourselves
|
||||
*/
|
||||
|
||||
rc = btl->btl_mpool->mpool_register(btl->btl_mpool,
|
||||
frag->segment.seg_addr.pval,
|
||||
*size,
|
||||
0,
|
||||
(mca_mpool_base_registration_t**) &openib_reg);
|
||||
if(OMPI_SUCCESS != rc || NULL == openib_reg) {
|
||||
frag->segment.seg_addr.pval, *size, 0, ®istration);
|
||||
if(OMPI_SUCCESS != rc || NULL == registration) {
|
||||
MCA_BTL_IB_FRAG_RETURN(openib_btl, frag);
|
||||
return NULL;
|
||||
}
|
||||
/* keep track of the registration we did */
|
||||
frag->registration = (mca_btl_openib_reg_t*)registration;
|
||||
}
|
||||
openib_reg = (mca_btl_openib_reg_t*)registration;
|
||||
|
||||
|
||||
frag->mr = openib_reg->mr;
|
||||
frag->sg_entry.length = *size;
|
||||
frag->sg_entry.lkey = openib_reg->mr->lkey;
|
||||
frag->sg_entry.addr = (unsigned long) frag->segment.seg_addr.pval;
|
||||
|
||||
frag->segment.seg_key.key32[0] = frag->mr->rkey;
|
||||
|
||||
frag->base.des_dst = &frag->segment;
|
||||
frag->base.des_dst_cnt = 1;
|
||||
frag->base.des_src = NULL;
|
||||
frag->base.des_src_cnt = 0;
|
||||
frag->openib_reg = openib_reg;
|
||||
BTL_VERBOSE(("frag->sg_entry.lkey = %lu .addr = %llu frag->segment.seg_key.key32[0] = %lu" , frag->sg_entry.lkey, frag->sg_entry.addr, frag->segment.seg_key.key32[0]));
|
||||
frag->sg_entry.length = *size;
|
||||
frag->sg_entry.lkey = openib_reg->mr->lkey;
|
||||
frag->sg_entry.addr = (unsigned long) frag->segment.seg_addr.pval;
|
||||
|
||||
return &frag->base;
|
||||
|
||||
frag->segment.seg_len = *size;
|
||||
frag->segment.seg_key.key32[0] = openib_reg->mr->rkey;
|
||||
|
||||
frag->base.des_dst = &frag->segment;
|
||||
frag->base.des_dst_cnt = 1;
|
||||
frag->base.des_src = NULL;
|
||||
frag->base.des_src_cnt = 0;
|
||||
frag->base.des_flags = 0;
|
||||
|
||||
BTL_VERBOSE(("frag->sg_entry.lkey = %lu .addr = %llu "
|
||||
"frag->segment.seg_key.key32[0] = %lu",
|
||||
frag->sg_entry.lkey, frag->sg_entry.addr,
|
||||
frag->segment.seg_key.key32[0]));
|
||||
|
||||
return &frag->base;
|
||||
}
|
||||
|
||||
int mca_btl_openib_finalize(struct mca_btl_base_module_t* btl)
|
||||
|
@ -220,9 +220,15 @@ struct mca_btl_openib_module_t {
|
||||
|
||||
orte_pointer_array_t *endpoints;
|
||||
}; typedef struct mca_btl_openib_module_t mca_btl_openib_module_t;
|
||||
|
||||
|
||||
extern mca_btl_openib_module_t mca_btl_openib_module;
|
||||
|
||||
struct mca_btl_openib_reg_t {
|
||||
mca_mpool_base_registration_t base;
|
||||
struct ibv_mr *mr;
|
||||
};
|
||||
typedef struct mca_btl_openib_reg_t mca_btl_openib_reg_t;
|
||||
|
||||
#if OMPI_ENABLE_PROGRESS_THREADS == 1
|
||||
extern void* mca_btl_openib_progress_thread(opal_object_t*);
|
||||
#endif
|
||||
@ -417,10 +423,8 @@ extern mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
|
||||
* @param frag (IN) IB send fragment
|
||||
*
|
||||
*/
|
||||
extern void mca_btl_openib_send_frag_return(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_openib_frag_t*
|
||||
);
|
||||
extern void mca_btl_openib_send_frag_return(mca_btl_base_module_t* btl,
|
||||
mca_btl_openib_frag_t*);
|
||||
|
||||
|
||||
int mca_btl_openib_create_cq_srq(mca_btl_openib_module_t* openib_btl);
|
||||
|
@ -35,6 +35,7 @@
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/util/sys_info.h"
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
#include "ompi/mca/mpool/rdma/mpool_rdma.h"
|
||||
#include "ompi/mca/btl/base/base.h"
|
||||
#include "btl_openib.h"
|
||||
#include "btl_openib_frag.h"
|
||||
@ -81,6 +82,9 @@ static int btl_openib_module_progress(mca_btl_openib_module_t *openib_btl);
|
||||
static void btl_openib_frag_progress_pending(
|
||||
mca_btl_openib_module_t* openib_btl, mca_btl_base_endpoint_t *endpoint,
|
||||
const int prio);
|
||||
static int openib_reg_mr(void *reg_data, void *base, size_t size,
|
||||
mca_mpool_base_registration_t *reg);
|
||||
static int openib_dereg_mr(void *reg_data, mca_mpool_base_registration_t *reg);
|
||||
|
||||
|
||||
mca_btl_openib_component_t mca_btl_openib_component = {
|
||||
@ -235,6 +239,36 @@ static void btl_openib_control(struct mca_btl_base_module_t* btl,
|
||||
}
|
||||
}
|
||||
|
||||
static int openib_reg_mr(void *reg_data, void *base, size_t size,
|
||||
mca_mpool_base_registration_t *reg)
|
||||
{
|
||||
mca_btl_openib_hca_t *hca = (mca_btl_openib_hca_t*)reg_data;
|
||||
mca_btl_openib_reg_t *openib_reg = (mca_btl_openib_reg_t*)reg;
|
||||
|
||||
openib_reg->mr = ibv_reg_mr(hca->ib_pd, base, size, IBV_ACCESS_LOCAL_WRITE |
|
||||
IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ);
|
||||
|
||||
if(NULL == openib_reg->mr)
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static int openib_dereg_mr(void *reg_data, mca_mpool_base_registration_t *reg)
|
||||
{
|
||||
mca_btl_openib_reg_t *openib_reg = (mca_btl_openib_reg_t*)reg;
|
||||
|
||||
if(openib_reg->mr != NULL) {
|
||||
if(ibv_dereg_mr(openib_reg->mr)) {
|
||||
opal_output(0, "%s: error unpinning openib memory errno says %s\n",
|
||||
__func__, strerror(errno));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
}
|
||||
openib_reg->mr = NULL;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static int init_one_port(opal_list_t *btl_list, mca_btl_openib_hca_t *hca,
|
||||
uint8_t port_num, struct ibv_port_attr *ib_port_attr)
|
||||
{
|
||||
@ -399,7 +433,10 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
|
||||
goto close_hca;
|
||||
}
|
||||
|
||||
mpool_resources.ib_pd = hca->ib_pd;
|
||||
mpool_resources.reg_data = (void*)hca;
|
||||
mpool_resources.sizeof_reg = sizeof(mca_btl_openib_reg_t);
|
||||
mpool_resources.register_mem = openib_reg_mr;
|
||||
mpool_resources.deregister_mem = openib_dereg_mr;
|
||||
hca->mpool =
|
||||
mca_mpool_base_module_create(mca_btl_openib_component.ib_mpool_name,
|
||||
hca, &mpool_resources);
|
||||
@ -469,6 +506,7 @@ free_hca:
|
||||
free(hca);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* IB component initialization:
|
||||
* (1) read interface list from kernel and compare against component parameters
|
||||
|
@ -12,7 +12,6 @@
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "btl_openib.h"
|
||||
#include "ompi/mca/mpool/openib/mpool_openib.h"
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
@ -20,7 +19,7 @@ extern "C" {
|
||||
|
||||
struct mca_btl_openib_eager_rdma_local_t {
|
||||
ompi_ptr_t base; /**< buffer for RDMAing eager messages */
|
||||
mca_mpool_openib_registration_t *reg;
|
||||
mca_btl_openib_reg_t *reg;
|
||||
uint16_t head; /**< RDMA buffer to poll */
|
||||
uint16_t tail; /**< Needed for credit managment */
|
||||
int32_t credits; /**< number of RDMA credits */
|
||||
|
@ -1206,7 +1206,8 @@ void mca_btl_openib_endpoint_connect_eager_rdma(
|
||||
openib_btl->eager_rdma_frag_size *
|
||||
mca_btl_openib_component.eager_rdma_num +
|
||||
mca_btl_openib_component.buffer_alignment +
|
||||
sizeof(mca_btl_openib_recv_frag_eager_t), 0, 0,
|
||||
sizeof(mca_btl_openib_recv_frag_eager_t), 0,
|
||||
MCA_MPOOL_FLAGS_CACHE_BYPASS,
|
||||
(mca_mpool_base_registration_t**)&endpoint->eager_rdma_local.reg);
|
||||
|
||||
if(!buf)
|
||||
@ -1221,7 +1222,7 @@ void mca_btl_openib_endpoint_connect_eager_rdma(
|
||||
for(i = 0; i < mca_btl_openib_component.eager_rdma_num; i++) {
|
||||
ompi_free_list_item_t *item = (ompi_free_list_item_t *)(buf +
|
||||
i*openib_btl->eager_rdma_frag_size);
|
||||
item->user_data = endpoint->eager_rdma_local.reg;
|
||||
item->user_data = (void*)endpoint->eager_rdma_local.reg;
|
||||
OBJ_CONSTRUCT(item, mca_btl_openib_recv_frag_eager_t);
|
||||
((mca_btl_openib_frag_t*)item)->endpoint = endpoint;
|
||||
((mca_btl_openib_frag_t*)item)->type = MCA_BTL_OPENIB_FRAG_EAGER_RDMA;
|
||||
|
@ -29,7 +29,6 @@
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include "ompi/mca/btl/base/btl_base_error.h"
|
||||
#include "ompi/mca/mpool/openib/mpool_openib.h"
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
|
@ -18,23 +18,21 @@
|
||||
|
||||
#include "btl_openib_frag.h"
|
||||
#include "btl_openib_eager_rdma.h"
|
||||
#include "ompi/mca/mpool/openib/mpool_openib.h"
|
||||
|
||||
|
||||
|
||||
static void mca_btl_openib_frag_common_constructor( mca_btl_openib_frag_t* frag)
|
||||
{
|
||||
mca_mpool_openib_registration_t* registration =
|
||||
(mca_mpool_openib_registration_t*) frag->base.super.user_data;
|
||||
mca_btl_openib_reg_t* registration =
|
||||
(mca_btl_openib_reg_t*)frag->base.super.user_data;
|
||||
|
||||
frag->hdr = (mca_btl_openib_header_t*) (frag+1); /* initialize the btl header to start at end of frag */
|
||||
frag->segment.seg_addr.pval = ((unsigned char* )frag->hdr) + sizeof(mca_btl_openib_header_t);
|
||||
/* init the segment address to start after the btl header */
|
||||
|
||||
if(registration) {
|
||||
frag->mr = registration->mr;
|
||||
frag->segment.seg_key.key32[0] = (uint32_t) frag->mr->lkey;
|
||||
frag->sg_entry.lkey = frag->mr->lkey;
|
||||
frag->registration = registration;
|
||||
frag->sg_entry.lkey = registration->mr->lkey;
|
||||
frag->segment.seg_key.key32[0] = frag->sg_entry.lkey;
|
||||
}
|
||||
frag->segment.seg_len = frag->size;
|
||||
frag->sg_entry.addr = (unsigned long) frag->hdr;
|
||||
|
@ -22,13 +22,14 @@
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include <infiniband/verbs.h>
|
||||
#include "ompi/mca/mpool/openib/mpool_openib.h"
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct mca_btl_openib_reg_t;
|
||||
|
||||
struct mca_btl_openib_header_t {
|
||||
mca_btl_base_tag_t tag;
|
||||
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
@ -173,8 +174,7 @@ struct mca_btl_openib_frag_t {
|
||||
struct ibv_send_wr sr_desc;
|
||||
} wr_desc;
|
||||
struct ibv_sge sg_entry;
|
||||
struct ibv_mr *mr;
|
||||
mca_mpool_openib_registration_t * openib_reg;
|
||||
struct mca_btl_openib_reg_t *registration;
|
||||
};
|
||||
typedef struct mca_btl_openib_frag_t mca_btl_openib_frag_t;
|
||||
OBJ_CLASS_DECLARATION(mca_btl_openib_frag_t);
|
||||
|
@ -145,7 +145,7 @@ int btl_openib_register_mca_params(void)
|
||||
REGINT_GE_ONE));
|
||||
CHECK(reg_string("mpool",
|
||||
"Name of the memory pool to be used (it is unlikely that you will ever want to change this",
|
||||
"openib", &mca_btl_openib_component.ib_mpool_name,
|
||||
"rdma", &mca_btl_openib_component.ib_mpool_name,
|
||||
0));
|
||||
CHECK(reg_int("reg_mru_len",
|
||||
"Length of the registration cache most recently used list "
|
||||
|
@ -34,9 +34,12 @@
|
||||
#include "ompi/datatype/convertor.h"
|
||||
#include "ompi/datatype/datatype.h"
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
#include "ompi/mca/mpool/udapl/mpool_udapl.h"
|
||||
#include "ompi/mca/mpool/rdma/mpool_rdma.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
|
||||
static int udapl_reg_mr(void *reg_data, void *base, size_t size,
|
||||
mca_mpool_base_registration_t *reg);
|
||||
static int udapl_dereg_mr(void *reg_data, mca_mpool_base_registration_t *reg);
|
||||
|
||||
mca_btl_udapl_module_t mca_btl_udapl_module = {
|
||||
{
|
||||
@ -67,6 +70,49 @@ mca_btl_udapl_module_t mca_btl_udapl_module = {
|
||||
}
|
||||
};
|
||||
|
||||
static int udapl_reg_mr(void *reg_data, void *base, size_t size,
|
||||
mca_mpool_base_registration_t *reg)
|
||||
{
|
||||
mca_btl_udapl_module_t *btl = (mca_btl_udapl_module_t*)reg_data;
|
||||
mca_btl_udapl_reg_t *udapl_reg = (mca_btl_udapl_reg_t*)reg;
|
||||
DAT_REGION_DESCRIPTION region;
|
||||
DAT_VLEN dat_size;
|
||||
DAT_VADDR dat_addr;
|
||||
int rc;
|
||||
|
||||
region.for_va = base;
|
||||
udapl_reg->lmr_triplet.virtual_address = (DAT_VADDR)base;
|
||||
udapl_reg->lmr_triplet.segment_length = size;
|
||||
udapl_reg->lmr = NULL;
|
||||
|
||||
rc = dat_lmr_create(btl->udapl_ia, DAT_MEM_TYPE_VIRTUAL, region, size,
|
||||
btl->udapl_pz, DAT_MEM_PRIV_ALL_FLAG, &udapl_reg->lmr,
|
||||
&udapl_reg->lmr_triplet.lmr_context, &udapl_reg->rmr_context,
|
||||
&dat_size, &dat_addr);
|
||||
|
||||
if(rc != DAT_SUCCESS) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static int udapl_dereg_mr(void *reg_data, mca_mpool_base_registration_t *reg)
|
||||
{
|
||||
mca_btl_udapl_reg_t *udapl_reg = (mca_btl_udapl_reg_t*)reg;
|
||||
int rc;
|
||||
|
||||
if(udapl_reg->lmr != NULL) {
|
||||
rc = dat_lmr_free(udapl_reg->lmr);
|
||||
if(rc != DAT_SUCCESS) {
|
||||
opal_output(0, "%s: error unpinning dapl memory errno says %s\n",
|
||||
__func__, strerror(errno));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize module module resources.
|
||||
@ -153,9 +199,10 @@ mca_btl_udapl_init(DAT_NAME_PTR ia_name, mca_btl_udapl_module_t* btl)
|
||||
((struct sockaddr_in*)&btl->udapl_addr.addr)->sin_port = htons(port);
|
||||
|
||||
/* initialize the memory pool */
|
||||
res.udapl_ia = btl->udapl_ia;
|
||||
res.udapl_pz = btl->udapl_pz;
|
||||
|
||||
res.reg_data = btl;
|
||||
res.sizeof_reg = sizeof(mca_btl_udapl_reg_t);
|
||||
res.register_mem = udapl_reg_mr;
|
||||
res.deregister_mem = udapl_dereg_mr;
|
||||
btl->super.btl_mpool = mca_mpool_base_module_create(
|
||||
mca_btl_udapl_component.udapl_mpool_name, &btl->super, &res);
|
||||
|
||||
@ -200,7 +247,6 @@ failure:
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Cleanup/release module resources.
|
||||
*/
|
||||
@ -352,8 +398,7 @@ mca_btl_base_descriptor_t* mca_btl_udapl_alloc(
|
||||
((char *)frag->segment.seg_addr.pval + frag->segment.seg_len);
|
||||
frag->triplet.segment_length =
|
||||
frag->segment.seg_len + sizeof(mca_btl_udapl_footer_t);
|
||||
assert(frag->triplet.lmr_context ==
|
||||
((mca_mpool_udapl_registration_t*)frag->registration)->lmr_triplet.lmr_context);
|
||||
assert(frag->triplet.lmr_context == frag->registration->lmr_triplet.lmr_context);
|
||||
|
||||
frag->btl = udapl_btl;
|
||||
frag->base.des_src = &frag->segment;
|
||||
@ -376,8 +421,8 @@ int mca_btl_udapl_free(
|
||||
{
|
||||
mca_btl_udapl_frag_t* frag = (mca_btl_udapl_frag_t*)des;
|
||||
|
||||
if(frag->size == 0) {
|
||||
btl->btl_mpool->mpool_release(btl->btl_mpool, frag->registration);
|
||||
if(frag->size == 0 && frag->registration != NULL) {
|
||||
btl->btl_mpool->mpool_deregister(btl->btl_mpool, frag->registration);
|
||||
MCA_BTL_UDAPL_FRAG_RETURN_USER(btl, frag);
|
||||
} else if(frag->size == mca_btl_udapl_component.udapl_eager_frag_size) {
|
||||
MCA_BTL_UDAPL_FRAG_RETURN_EAGER(btl, frag);
|
||||
|
@ -106,6 +106,14 @@ struct mca_btl_udapl_module_t {
|
||||
typedef struct mca_btl_udapl_module_t mca_btl_udapl_module_t;
|
||||
extern mca_btl_udapl_module_t mca_btl_udapl_module;
|
||||
|
||||
struct mca_btl_udapl_reg_t {
|
||||
mca_mpool_base_registration_t base;
|
||||
DAT_LMR_HANDLE lmr; /* local memory region (LMR) */
|
||||
DAT_LMR_TRIPLET lmr_triplet; /* LMR triplet - context, address, length */
|
||||
DAT_RMR_CONTEXT rmr_context; /* remote memory region context handle */
|
||||
|
||||
};
|
||||
typedef struct mca_btl_udapl_reg_t mca_btl_udapl_reg_t;
|
||||
|
||||
/**
|
||||
* Report a uDAPL error - for debugging
|
||||
|
@ -33,7 +33,7 @@
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
#include "ompi/mca/mpool/udapl/mpool_udapl.h"
|
||||
#include "ompi/mca/mpool/rdma/mpool_rdma.h"
|
||||
#include "btl_udapl.h"
|
||||
#include "btl_udapl_frag.h"
|
||||
#include "btl_udapl_endpoint.h"
|
||||
@ -149,7 +149,7 @@ int mca_btl_udapl_component_open(void)
|
||||
mca_btl_udapl_component.udapl_free_list_inc =
|
||||
mca_btl_udapl_param_register_int("free_list_inc", 8);
|
||||
mca_btl_udapl_component.udapl_mpool_name =
|
||||
mca_btl_udapl_param_register_string("mpool", "udapl");
|
||||
mca_btl_udapl_param_register_string("mpool", "rdma");
|
||||
mca_btl_udapl_component.udapl_max_btls =
|
||||
mca_btl_udapl_param_register_int("max_modules", 8);
|
||||
mca_btl_udapl_component.udapl_evd_qlen =
|
||||
|
@ -30,7 +30,7 @@
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/dss/dss.h"
|
||||
#include "ompi/mca/mpool/udapl/mpool_udapl.h"
|
||||
#include "ompi/mca/mpool/rdma/mpool_rdma.h"
|
||||
#include "btl_udapl.h"
|
||||
#include "btl_udapl_endpoint.h"
|
||||
#include "btl_udapl_proc.h"
|
||||
|
@ -21,12 +21,12 @@
|
||||
|
||||
#include "btl_udapl.h"
|
||||
#include "btl_udapl_frag.h"
|
||||
#include "ompi/mca/mpool/udapl/mpool_udapl.h"
|
||||
#include "ompi/mca/mpool/rdma/mpool_rdma.h"
|
||||
|
||||
|
||||
static void mca_btl_udapl_frag_common_constructor(mca_btl_udapl_frag_t* frag)
|
||||
{
|
||||
mca_mpool_udapl_registration_t* reg = frag->base.super.user_data;
|
||||
mca_btl_udapl_reg_t* reg = (mca_btl_udapl_reg_t*)frag->base.super.user_data;
|
||||
|
||||
#if OMPI_ENABLE_DEBUG
|
||||
frag->base.des_src = NULL;
|
||||
|
@ -58,7 +58,7 @@ struct mca_btl_udapl_frag_t {
|
||||
|
||||
struct mca_btl_udapl_module_t* btl;
|
||||
struct mca_btl_base_endpoint_t *endpoint;
|
||||
struct mca_mpool_base_registration_t* registration;
|
||||
struct mca_btl_udapl_reg_t* registration;
|
||||
DAT_LMR_TRIPLET triplet;
|
||||
|
||||
mca_btl_udapl_footer_t *ftr;
|
||||
|
@ -40,7 +40,9 @@ static void mca_mpool_base_registration_constructor( mca_mpool_base_registration
|
||||
reg->mpool = NULL;
|
||||
reg->base = NULL;
|
||||
reg->bound = NULL;
|
||||
reg->alloc_base = NULL;
|
||||
reg->ref_count = 0;
|
||||
reg->flags = 0;
|
||||
}
|
||||
|
||||
static void mca_mpool_base_registration_destructor( mca_mpool_base_registration_t * reg )
|
||||
@ -74,58 +76,37 @@ OBJ_CLASS_INSTANCE(
|
||||
* @retval pointer to the allocated memory
|
||||
* @retval NULL on failure
|
||||
*/
|
||||
void * mca_mpool_base_alloc(size_t size, ompi_info_t * info)
|
||||
void *mca_mpool_base_alloc(size_t size, ompi_info_t *info)
|
||||
{
|
||||
opal_list_item_t * item;
|
||||
int num_modules = opal_list_get_size(&mca_mpool_base_modules);
|
||||
int reg_module_num = 0;
|
||||
int i, j, num_keys;
|
||||
int i, num_keys;
|
||||
mca_mpool_base_selected_module_t * current;
|
||||
mca_mpool_base_selected_module_t * no_reg_function = NULL;
|
||||
mca_mpool_base_selected_module_t ** has_reg_function = NULL;
|
||||
mca_mpool_base_registration_t * registration;
|
||||
mca_mpool_base_tree_item_t* mpool_tree_item;
|
||||
|
||||
mca_mpool_base_tree_item_t* mpool_tree_item = NULL;
|
||||
mca_mpool_base_module_t *mpool;
|
||||
void * mem = NULL;
|
||||
char * key = NULL;
|
||||
char * value = NULL;
|
||||
int flag = 0;
|
||||
bool match_found = false;
|
||||
bool mpool_requested = false;
|
||||
bool match_found = false, mpool_requested = false;
|
||||
|
||||
if (mca_mpool_base_use_mem_hooks &&
|
||||
0 != (OPAL_MEMORY_FREE_SUPPORT & opal_mem_hooks_support_level())) {
|
||||
/* if we're using memory hooks, it's possible (likely, based
|
||||
on testing) that for some tests the memory returned from
|
||||
any of the malloc functions below will be part of a larger
|
||||
(lazily) freed chunk and therefore already be pinned.
|
||||
Which causes our caches to get a little confused, as the
|
||||
alloc/free pair are supposed to always have an exact match
|
||||
in the rcache. This wasn't happening, leading to badness.
|
||||
Instead, just malloc and we'll get to the pinning later,
|
||||
when we try to first use it. Since we're leaving things
|
||||
pinned, there's no advantage to doing it now over first
|
||||
use, and it works if we wait ... */
|
||||
return malloc(size);
|
||||
}
|
||||
|
||||
|
||||
if (num_modules > 0) {
|
||||
if(num_modules > 0) {
|
||||
has_reg_function = (mca_mpool_base_selected_module_t **)
|
||||
malloc(num_modules * sizeof(mca_mpool_base_module_t *));
|
||||
if(!has_reg_function){
|
||||
return NULL;
|
||||
}
|
||||
malloc(num_modules * sizeof(mca_mpool_base_module_t *));
|
||||
if(!has_reg_function)
|
||||
goto out;
|
||||
}
|
||||
|
||||
mpool_tree_item = mca_mpool_base_tree_item_get();
|
||||
|
||||
if(NULL == mpool_tree_item){
|
||||
if(has_reg_function) {
|
||||
free(has_reg_function);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
if(!mpool_tree_item)
|
||||
goto out;
|
||||
|
||||
mpool_tree_item->count = 0;
|
||||
|
||||
if(&ompi_mpi_info_null == info)
|
||||
{
|
||||
@ -182,10 +163,7 @@ void * mca_mpool_base_alloc(size_t size, ompi_info_t * info)
|
||||
/* there was more than one requested mpool that lacks
|
||||
* a registration function, so return failure */
|
||||
free(key);
|
||||
if(has_reg_function) {
|
||||
free(has_reg_function);
|
||||
}
|
||||
return NULL;
|
||||
goto out;
|
||||
}
|
||||
no_reg_function = current;
|
||||
}
|
||||
@ -200,10 +178,7 @@ void * mca_mpool_base_alloc(size_t size, ompi_info_t * info)
|
||||
/* one of the keys given to us by the user did not match any
|
||||
* mpools, so return an error */
|
||||
free(key);
|
||||
if(has_reg_function) {
|
||||
free(has_reg_function);
|
||||
}
|
||||
return NULL;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
free(key);
|
||||
@ -211,76 +186,59 @@ void * mca_mpool_base_alloc(size_t size, ompi_info_t * info)
|
||||
|
||||
if(NULL == no_reg_function && 0 == reg_module_num)
|
||||
{
|
||||
if(has_reg_function) {
|
||||
free(has_reg_function);
|
||||
}
|
||||
if(!mpool_requested)
|
||||
{
|
||||
/* if the info argument was NULL and there were no useable mpools
|
||||
* or there user provided info object but did not specifiy a "mpool" key,
|
||||
* just malloc the memory and return it */
|
||||
mem = malloc(size);
|
||||
if(NULL != mem){
|
||||
/* don't need the tree */
|
||||
mca_mpool_base_tree_item_put(mpool_tree_item);
|
||||
return mem;
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* the user passed info but we were not able to use any of the mpools
|
||||
* specified */
|
||||
return NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
||||
i = j = 0;
|
||||
num_modules = 0;
|
||||
if(NULL != no_reg_function)
|
||||
{
|
||||
mca_mpool_base_module_t* mpool = no_reg_function->mpool_module;
|
||||
mem = mpool->mpool_alloc(mpool, size, 0, MCA_MPOOL_FLAGS_PERSIST, ®istration);
|
||||
num_modules++;
|
||||
mpool_tree_item->key = mem;
|
||||
mpool_tree_item->mpools[j] = mpool;
|
||||
mpool_tree_item->regs[j++] = registration;
|
||||
}
|
||||
else
|
||||
{
|
||||
mca_mpool_base_module_t* mpool = has_reg_function[i]->mpool_module;
|
||||
mem = mpool->mpool_alloc(mpool, size, 0, MCA_MPOOL_FLAGS_PERSIST, ®istration);
|
||||
i++;
|
||||
num_modules++;
|
||||
mpool_tree_item->key = mem;
|
||||
mpool_tree_item->mpools[j] = mpool;
|
||||
mpool_tree_item->regs[j++] = registration;
|
||||
mpool = no_reg_function->mpool_module;
|
||||
i = 0;
|
||||
} else {
|
||||
mpool = has_reg_function[0]->mpool_module;
|
||||
i = 1;
|
||||
}
|
||||
mem = mpool->mpool_alloc(mpool, size, 0, MCA_MPOOL_FLAGS_PERSIST,
|
||||
®istration);
|
||||
if(NULL == mem)
|
||||
goto out;
|
||||
|
||||
mpool_tree_item->key = mem;
|
||||
mpool_tree_item->mpools[mpool_tree_item->count] = mpool;
|
||||
mpool_tree_item->regs[mpool_tree_item->count++] = registration;
|
||||
|
||||
while(i < reg_module_num)
|
||||
{
|
||||
mca_mpool_base_module_t* mpool = has_reg_function[i]->mpool_module;
|
||||
if(OMPI_SUCCESS != mpool->mpool_register(mpool, mem, size, MCA_MPOOL_FLAGS_PERSIST, ®istration))
|
||||
{
|
||||
if (has_reg_function) {
|
||||
free(has_reg_function);
|
||||
}
|
||||
return NULL;
|
||||
} else {
|
||||
mpool_tree_item->mpools[j] = mpool;
|
||||
mpool_tree_item->regs[j++] = registration;
|
||||
num_modules++;
|
||||
mpool = has_reg_function[i]->mpool_module;
|
||||
if(mpool->mpool_register(mpool, mem, size, MCA_MPOOL_FLAGS_PERSIST,
|
||||
®istration) != OMPI_SUCCESS) {
|
||||
goto out;
|
||||
}
|
||||
mpool_tree_item->mpools[mpool_tree_item->count] = mpool;
|
||||
mpool_tree_item->regs[mpool_tree_item->count++] = registration;
|
||||
i++;
|
||||
}
|
||||
if(has_reg_function) {
|
||||
free(has_reg_function);
|
||||
}
|
||||
|
||||
/* null terminated array */
|
||||
mpool_tree_item->mpools[j] = NULL;
|
||||
mpool_tree_item->regs[j] = NULL;
|
||||
|
||||
mca_mpool_base_tree_insert(mpool_tree_item);
|
||||
|
||||
mpool_tree_item = NULL; /* prevent it to be deleted below */
|
||||
out:
|
||||
if(mpool_tree_item)
|
||||
mca_mpool_base_tree_item_put(mpool_tree_item);
|
||||
|
||||
if(has_reg_function)
|
||||
free(has_reg_function);
|
||||
|
||||
return mem;
|
||||
}
|
||||
|
||||
@ -292,49 +250,38 @@ void * mca_mpool_base_alloc(size_t size, ompi_info_t * info)
|
||||
* @retval OMPI_SUCCESS
|
||||
* @retval OMPI_ERR_BAD_PARAM if the passed base pointer was invalid
|
||||
*/
|
||||
int mca_mpool_base_free(void * base)
|
||||
int mca_mpool_base_free(void *base)
|
||||
{
|
||||
int i = 0, rc = OMPI_SUCCESS;
|
||||
mca_mpool_base_tree_item_t* mpool_tree_item = NULL;
|
||||
mca_mpool_base_module_t* mpool;
|
||||
mca_mpool_base_registration_t* reg;
|
||||
|
||||
if(!base) {
|
||||
mca_mpool_base_tree_item_t *mpool_tree_item = NULL;
|
||||
mca_mpool_base_module_t *mpool;
|
||||
mca_mpool_base_registration_t *reg;
|
||||
int i, rc;
|
||||
|
||||
if(!base) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* see comment in alloc function above */
|
||||
if (mca_mpool_base_use_mem_hooks &&
|
||||
0 != (OPAL_MEMORY_FREE_SUPPORT & opal_mem_hooks_support_level())) {
|
||||
mpool_tree_item = mca_mpool_base_tree_find(base);
|
||||
|
||||
if(!mpool_tree_item) {
|
||||
/* nothing in the tree this was just plain old malloc'd memory */
|
||||
free(base);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
mpool_tree_item = mca_mpool_base_tree_find(base);
|
||||
|
||||
if(!mpool_tree_item) {
|
||||
/* nothing in the tree this was just
|
||||
plain old malloc'd memory */
|
||||
free(base);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
for(i = 1; i < MCA_MPOOL_BASE_TREE_MAX; i++) {
|
||||
for(i = 1; i < mpool_tree_item->count; i++) {
|
||||
mpool = mpool_tree_item->mpools[i];
|
||||
reg = mpool_tree_item->regs[i];
|
||||
if(mpool) {
|
||||
if(mpool && mpool->mpool_deregister) {
|
||||
mpool->mpool_deregister(mpool, reg);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
mpool = mpool_tree_item->mpools[0];
|
||||
reg = mpool_tree_item->regs[0];
|
||||
mpool->mpool_free(mpool, base, reg);
|
||||
|
||||
|
||||
rc = mca_mpool_base_tree_delete(mpool_tree_item);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@ -19,6 +19,7 @@
|
||||
* @file
|
||||
*/
|
||||
#include "ompi_config.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "mpool_base_mem_cb.h"
|
||||
#include "base.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
@ -35,51 +36,30 @@ ompi_pointer_array_t mca_mpool_base_mem_cb_array;
|
||||
void mca_mpool_base_mem_cb(void* base, size_t size, void* cbdata,
|
||||
bool from_alloc)
|
||||
{
|
||||
uint32_t i, cnt;
|
||||
mca_mpool_base_registration_t* reg;
|
||||
mca_mpool_base_selected_module_t* current;
|
||||
int rc;
|
||||
opal_list_item_t* item;
|
||||
void* base_addr;
|
||||
void* bound_addr;
|
||||
if(size == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
base_addr = down_align_addr( base, mca_mpool_base_page_size_log);
|
||||
bound_addr = up_align_addr((void*) ((ptrdiff_t) base + size - 1), mca_mpool_base_page_size_log);
|
||||
for(item = opal_list_get_first(&mca_mpool_base_modules);
|
||||
item != opal_list_get_end(&mca_mpool_base_modules);
|
||||
item = opal_list_get_next(item)) {
|
||||
bool warn = true;
|
||||
|
||||
current = (mca_mpool_base_selected_module_t*) item;
|
||||
|
||||
if(NULL != current->mpool_module->mpool_find) {
|
||||
rc = current->mpool_module->mpool_find(
|
||||
current->mpool_module,
|
||||
base_addr,
|
||||
size,
|
||||
&mca_mpool_base_mem_cb_array,
|
||||
&cnt
|
||||
);
|
||||
if(OMPI_SUCCESS != rc) {
|
||||
continue;
|
||||
|
||||
if(current->mpool_module->mpool_release_memory != NULL) {
|
||||
rc = current->mpool_module->mpool_release_memory(current->mpool_module,
|
||||
base, size);
|
||||
|
||||
if(rc != OMPI_SUCCESS && true == warn) {
|
||||
opal_output(0, "Memory %p:%llu cannot be freed from the "
|
||||
"registration cache. Possible memory corruption.\n",
|
||||
base, size);
|
||||
warn = false;
|
||||
}
|
||||
for(i = 0; i < cnt; i++) {
|
||||
reg = (mca_mpool_base_registration_t*)ompi_pointer_array_get_item(&mca_mpool_base_mem_cb_array, i);
|
||||
#if !defined(NDEBUG)
|
||||
if(reg->flags & MCA_MPOOL_FLAGS_CACHE) {
|
||||
assert(reg->ref_count <= 3);
|
||||
} else if(reg->flags & MCA_MPOOL_FLAGS_PERSIST) {
|
||||
assert(reg->ref_count <= 2);
|
||||
} else {
|
||||
assert(reg->ref_count <= 1);
|
||||
}
|
||||
#endif
|
||||
current->mpool_module->mpool_deregister(current->mpool_module, reg);
|
||||
}
|
||||
ompi_pointer_array_remove_all(&mca_mpool_base_mem_cb_array);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -38,7 +38,8 @@ struct mca_mpool_base_tree_item_t
|
||||
ompi_free_list_item_t super; /**< the parent class */
|
||||
void* key; /* the address this was alloc'd on */
|
||||
mca_mpool_base_module_t* mpools[MCA_MPOOL_BASE_TREE_MAX]; /**< the mpools */
|
||||
mca_mpool_base_registration_t* regs[MCA_MPOOL_BASE_TREE_MAX]; /**< the registrations */
|
||||
mca_mpool_base_registration_t* regs[MCA_MPOOL_BASE_TREE_MAX]; /**< the registrations */
|
||||
uint8_t count;
|
||||
};
|
||||
typedef struct mca_mpool_base_tree_item_t mca_mpool_base_tree_item_t;
|
||||
|
||||
|
1
ompi/mca/mpool/gm/.ompi_ignore
Обычный файл
1
ompi/mca/mpool/gm/.ompi_ignore
Обычный файл
@ -0,0 +1 @@
|
||||
quilt
|
@ -26,7 +26,7 @@
|
||||
#include "ompi/class/ompi_free_list.h"
|
||||
#include "ompi/class/ompi_pointer_array.h"
|
||||
|
||||
#define MCA_MPOOL_FLAGS_CACHE 0x1
|
||||
#define MCA_MPOOL_FLAGS_CACHE_BYPASS 0x1
|
||||
#define MCA_MPOOL_FLAGS_PERSIST 0x2
|
||||
#define MCA_MPOOL_FLAGS_MPI_ALLOC_MEM 0x4
|
||||
|
||||
@ -38,7 +38,6 @@ struct mca_mpool_base_registration_t {
|
||||
unsigned char* base;
|
||||
unsigned char* bound;
|
||||
unsigned char* alloc_base;
|
||||
void* user_data;
|
||||
int32_t ref_count;
|
||||
uint32_t flags;
|
||||
};
|
||||
@ -47,19 +46,6 @@ typedef struct mca_mpool_base_registration_t mca_mpool_base_registration_t;
|
||||
|
||||
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_mpool_base_registration_t);
|
||||
|
||||
#define MCA_MPOOL_REG_RETAIN(reg) { \
|
||||
do{ \
|
||||
OPAL_THREAD_ADD32(®->ref_count, 1); \
|
||||
} while(0); \
|
||||
}
|
||||
|
||||
#define MCA_MPOOL_REG_RELEASE(reg) { \
|
||||
do{ \
|
||||
OPAL_THREAD_ADD32(®->ref_count, -1); \
|
||||
} while(0); \
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* component initialize
|
||||
*/
|
||||
@ -111,26 +97,12 @@ typedef int (*mca_mpool_base_module_deregister_fn_t)(
|
||||
mca_mpool_base_registration_t* registration);
|
||||
|
||||
/**
|
||||
* find registrations in this memory pool
|
||||
* find registration in this memory pool
|
||||
*/
|
||||
|
||||
typedef int (*mca_mpool_base_module_find_fn_t) (
|
||||
struct mca_mpool_base_module_t* mpool,
|
||||
void* addr,
|
||||
size_t size,
|
||||
ompi_pointer_array_t* regs,
|
||||
uint32_t *cnt
|
||||
);
|
||||
|
||||
|
||||
/**
|
||||
* retain registration
|
||||
*/
|
||||
|
||||
typedef int (*mca_mpool_base_module_retain_fn_t) (
|
||||
struct mca_mpool_base_module_t* mpool,
|
||||
mca_mpool_base_registration_t* registration);
|
||||
|
||||
struct mca_mpool_base_module_t* mpool, void* addr, size_t size,
|
||||
mca_mpool_base_registration_t **reg);
|
||||
|
||||
/**
|
||||
* release registration
|
||||
@ -141,6 +113,12 @@ typedef int (*mca_mpool_base_module_release_fn_t) (
|
||||
mca_mpool_base_registration_t* registration);
|
||||
|
||||
|
||||
/**
|
||||
* release memory region
|
||||
*/
|
||||
typedef int (*mca_mpool_base_module_release_memory_fn_t) (
|
||||
struct mca_mpool_base_module_t* mpool, void *base, size_t size);
|
||||
|
||||
/**
|
||||
* if appropriate - returns base address of memory pool
|
||||
*/
|
||||
@ -185,8 +163,8 @@ struct mca_mpool_base_module_t {
|
||||
mca_mpool_base_module_register_fn_t mpool_register; /**< register memory */
|
||||
mca_mpool_base_module_deregister_fn_t mpool_deregister; /**< deregister memory */
|
||||
mca_mpool_base_module_find_fn_t mpool_find; /**< find regisrations in the cache */
|
||||
mca_mpool_base_module_retain_fn_t mpool_retain; /**< retain a registration from the cache */
|
||||
mca_mpool_base_module_release_fn_t mpool_release; /**< release a registration from the cache */
|
||||
mca_mpool_base_module_release_memory_fn_t mpool_release_memory; /**< release memor region from the cache */
|
||||
mca_mpool_base_module_finalize_fn_t mpool_finalize; /**< finalize */
|
||||
struct mca_rcache_base_module_t *rcache; /* the rcache associated with this mpool */
|
||||
uint32_t flags; /**< mpool flags */
|
||||
|
1
ompi/mca/mpool/mvapi/.ompi_ignore
Обычный файл
1
ompi/mca/mpool/mvapi/.ompi_ignore
Обычный файл
@ -0,0 +1 @@
|
||||
quilt
|
1
ompi/mca/mpool/openib/.ompi_ignore
Обычный файл
1
ompi/mca/mpool/openib/.ompi_ignore
Обычный файл
@ -0,0 +1 @@
|
||||
quilt
|
55
ompi/mca/mpool/rdma/Makefile.am
Обычный файл
55
ompi/mca/mpool/rdma/Makefile.am
Обычный файл
@ -0,0 +1,55 @@
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# Use the top-level Makefile.options
|
||||
|
||||
|
||||
|
||||
AM_CPPFLAGS = $(mpool_rdma_CPPFLAGS)
|
||||
|
||||
sources = \
|
||||
mpool_rdma.h \
|
||||
mpool_rdma_module.c \
|
||||
mpool_rdma_component.c
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if OMPI_BUILD_mpool_rdma_DSO
|
||||
component_noinst =
|
||||
component_install = mca_mpool_rdma.la
|
||||
else
|
||||
component_noinst = libmca_mpool_rdma.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(libdir)/openmpi
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_mpool_rdma_la_SOURCES = $(sources)
|
||||
mca_mpool_rdma_la_LDFLAGS = -module -avoid-version
|
||||
mca_mpool_rdma_la_LIBADD = \
|
||||
$(mpool_rdma_LIBS) \
|
||||
$(top_ompi_builddir)/ompi/libmpi.la \
|
||||
$(top_ompi_builddir)/orte/libopen-rte.la \
|
||||
$(top_ompi_builddir)/opal/libopen-pal.la
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_mpool_rdma_la_SOURCES = $(sources)
|
||||
libmca_mpool_rdma_la_LDFLAGS = -module -avoid-version
|
||||
libmca_mpool_rdma_la_LIBADD = $(mpool_rdma_LIBS)
|
25
ompi/mca/mpool/rdma/configure.params
Обычный файл
25
ompi/mca/mpool/rdma/configure.params
Обычный файл
@ -0,0 +1,25 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2006 Voltaire. All rights reserved.
|
||||
#
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# Specific to this module
|
||||
|
||||
PARAM_INIT_FILE=mpool_rdma_component.c
|
||||
PARAM_CONFIG_FILES="Makefile"
|
127
ompi/mca/mpool/rdma/mpool_rdma.h
Обычный файл
127
ompi/mca/mpool/rdma/mpool_rdma.h
Обычный файл
@ -0,0 +1,127 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006 Voltaire. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* @file
|
||||
*/
|
||||
#ifndef MCA_MPOOL_OPENIB_H
|
||||
#define MCA_MPOOL_OPENIB_H
|
||||
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "ompi/class/ompi_free_list.h"
|
||||
#include "opal/event/event.h"
|
||||
#include "ompi/mca/mpool/mpool.h"
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct mca_mpool_rdma_component_t {
|
||||
mca_mpool_base_component_t super;
|
||||
char* rcache_name;
|
||||
size_t rcache_size_limit;
|
||||
bool print_stats;
|
||||
uint32_t leave_pinned;
|
||||
};
|
||||
typedef struct mca_mpool_rdma_component_t mca_mpool_rdma_component_t;
|
||||
|
||||
OMPI_DECLSPEC extern mca_mpool_rdma_component_t mca_mpool_rdma_component;
|
||||
|
||||
struct mca_mpool_base_resources_t {
|
||||
void *reg_data;
|
||||
size_t sizeof_reg;
|
||||
int (*register_mem)(void *reg_data, void *base, size_t size,
|
||||
mca_mpool_base_registration_t *reg);
|
||||
int (*deregister_mem)(void *reg_data, mca_mpool_base_registration_t *reg);
|
||||
};
|
||||
typedef struct mca_mpool_base_resources_t mca_mpool_base_resources_t;
|
||||
|
||||
struct mca_mpool_rdma_module_t {
|
||||
mca_mpool_base_module_t super;
|
||||
struct mca_mpool_base_resources_t resources;
|
||||
ompi_free_list_t reg_list;
|
||||
opal_list_t mru_list;
|
||||
uint32_t stat_cache_hit;
|
||||
uint32_t stat_cache_miss;
|
||||
uint32_t stat_evicted;
|
||||
uint32_t stat_cache_found;
|
||||
uint32_t stat_cache_notfound;
|
||||
}; typedef struct mca_mpool_rdma_module_t mca_mpool_rdma_module_t;
|
||||
|
||||
/*
|
||||
* Initializes the mpool module.
|
||||
*/
|
||||
void mca_mpool_rdma_module_init(mca_mpool_rdma_module_t *mpool);
|
||||
|
||||
/*
|
||||
* Returns base address of shared memory mapping.
|
||||
*/
|
||||
void *mca_mpool_rdma_base(mca_mpool_base_module_t *mpool);
|
||||
|
||||
/**
|
||||
* Allocate block of registered memory.
|
||||
*/
|
||||
void* mca_mpool_rdma_alloc(mca_mpool_base_module_t *mpool, size_t size,
|
||||
size_t align, uint32_t flags,
|
||||
mca_mpool_base_registration_t** registration);
|
||||
|
||||
/**
|
||||
* realloc block of registered memory
|
||||
*/
|
||||
void* mca_mpool_rdma_realloc( mca_mpool_base_module_t *mpool, void* addr,
|
||||
size_t size, mca_mpool_base_registration_t** registration);
|
||||
|
||||
/**
|
||||
* register block of memory
|
||||
*/
|
||||
int mca_mpool_rdma_register(mca_mpool_base_module_t* mpool, void *addr,
|
||||
size_t size, uint32_t flags, mca_mpool_base_registration_t **reg);
|
||||
|
||||
/**
|
||||
* deregister memory
|
||||
*/
|
||||
int mca_mpool_rdma_deregister(mca_mpool_base_module_t *mpool,
|
||||
mca_mpool_base_registration_t *reg);
|
||||
|
||||
/**
|
||||
* free memory allocated by alloc function
|
||||
*/
|
||||
void mca_mpool_rdma_free(mca_mpool_base_module_t *mpool, void * addr,
|
||||
mca_mpool_base_registration_t *reg);
|
||||
|
||||
/**
|
||||
* find registration for a given block of memory
|
||||
*/
|
||||
int mca_mpool_rdma_find(struct mca_mpool_base_module_t* mpool, void* addr,
|
||||
size_t size, mca_mpool_base_registration_t **reg);
|
||||
|
||||
/**
|
||||
* unregister all registration covering the block of memory
|
||||
*/
|
||||
int mca_mpool_rdma_release_memory(mca_mpool_base_module_t* mpool, void *base,
|
||||
size_t size);
|
||||
|
||||
/**
|
||||
* finalize mpool
|
||||
*/
|
||||
void mca_mpool_rdma_finalize(struct mca_mpool_base_module_t *mpool);
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
#endif
|
122
ompi/mca/mpool/rdma/mpool_rdma_component.c
Обычный файл
122
ompi/mca/mpool/rdma/mpool_rdma_component.c
Обычный файл
@ -0,0 +1,122 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006 Voltaire. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "mpool_rdma.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/util/sys_info.h"
|
||||
#include <unistd.h>
|
||||
#include <malloc.h>
|
||||
|
||||
/*
|
||||
* Local functions
|
||||
*/
|
||||
static int mca_mpool_rdma_open(void);
|
||||
static mca_mpool_base_module_t* mca_mpool_rdma_init(
|
||||
struct mca_mpool_base_resources_t* resources);
|
||||
|
||||
mca_mpool_rdma_component_t mca_mpool_rdma_component = {
|
||||
{
|
||||
/* First, the mca_base_component_t struct containing meta
|
||||
information about the component itself */
|
||||
|
||||
{
|
||||
/* Indicate that we are a mpool v1.0.0 component (which also
|
||||
implies a specific MCA version) */
|
||||
|
||||
MCA_MPOOL_BASE_VERSION_1_0_0,
|
||||
|
||||
"rdma", /* MCA component name */
|
||||
OMPI_MAJOR_VERSION, /* MCA component major version */
|
||||
OMPI_MINOR_VERSION, /* MCA component minor version */
|
||||
OMPI_RELEASE_VERSION, /* MCA component release version */
|
||||
mca_mpool_rdma_open, /* component open */
|
||||
NULL
|
||||
},
|
||||
|
||||
/* Next the MCA v1.0.0 component meta data */
|
||||
|
||||
{
|
||||
/* Whether the component is checkpointable or not */
|
||||
false
|
||||
},
|
||||
|
||||
mca_mpool_rdma_init
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* component open/close/init function
|
||||
*/
|
||||
static int mca_mpool_rdma_open(void)
|
||||
{
|
||||
int param, val;
|
||||
|
||||
mca_base_param_reg_string(&mca_mpool_rdma_component.super.mpool_version,
|
||||
"rcache_name",
|
||||
"The name of the registration cache the mpool should use",
|
||||
false, false, "vma", &mca_mpool_rdma_component.rcache_name);
|
||||
|
||||
mca_base_param_reg_int(&mca_mpool_rdma_component.super.mpool_version,
|
||||
"rcache_size_limit",
|
||||
"the maximum size of registration cache in bytes. "
|
||||
"0 is unlimited (default 0)", false, false, 0, &val);
|
||||
|
||||
mca_mpool_rdma_component.rcache_size_limit = (size_t)val;
|
||||
|
||||
mca_base_param_reg_int(&mca_mpool_rdma_component.super.mpool_version,
|
||||
"print_stats",
|
||||
"print pool usage statistics at the end of the run",
|
||||
false, false, 0, &val);
|
||||
|
||||
mca_mpool_rdma_component.print_stats = val?true:false;
|
||||
|
||||
mca_base_param_register_int("mpi", NULL, "leave_pinned", "leave_pinned", 0);
|
||||
param = mca_base_param_find("mpi", NULL, "leave_pinned");
|
||||
mca_base_param_lookup_int(param, (int*)&mca_mpool_rdma_component.leave_pinned);
|
||||
|
||||
if(0 == mca_mpool_rdma_component.leave_pinned) {
|
||||
/* and now check leave_pinned_pipeline if necessary */
|
||||
mca_base_param_register_int("mpi", NULL, "leave_pinned_pipeline",
|
||||
"leave_pinned_pipeline", 0);
|
||||
param = mca_base_param_find("mpi", NULL, "leave_pinned_pipeline");
|
||||
mca_base_param_lookup_int(param, (int*)&mca_mpool_rdma_component.leave_pinned);
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static mca_mpool_base_module_t* mca_mpool_rdma_init(
|
||||
struct mca_mpool_base_resources_t *resources)
|
||||
{
|
||||
mca_mpool_rdma_module_t* mpool_module;
|
||||
|
||||
mpool_module =
|
||||
(mca_mpool_rdma_module_t*)malloc(sizeof(mca_mpool_rdma_module_t));
|
||||
|
||||
mpool_module->resources = *resources;
|
||||
|
||||
mca_mpool_rdma_module_init(mpool_module);
|
||||
|
||||
return &mpool_module->super;
|
||||
}
|
395
ompi/mca/mpool/rdma/mpool_rdma_module.c
Обычный файл
395
ompi/mca/mpool/rdma/mpool_rdma_module.c
Обычный файл
@ -0,0 +1,395 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2006 Voltaire. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "ompi/mca/mpool/rdma/mpool_rdma.h"
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <malloc.h>
|
||||
#include "ompi/mca/rcache/rcache.h"
|
||||
#include "ompi/mca/rcache/base/base.h"
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
|
||||
extern uint32_t mca_mpool_base_page_size;
|
||||
extern uint32_t mca_mpool_base_page_size_log;
|
||||
|
||||
/*
|
||||
* Initializes the mpool module.
|
||||
*/
|
||||
void mca_mpool_rdma_module_init(mca_mpool_rdma_module_t* mpool)
|
||||
{
|
||||
mpool->super.mpool_component = &mca_mpool_rdma_component.super;
|
||||
mpool->super.mpool_base = NULL; /* no base .. */
|
||||
mpool->super.mpool_alloc = mca_mpool_rdma_alloc;
|
||||
mpool->super.mpool_realloc = mca_mpool_rdma_realloc;
|
||||
mpool->super.mpool_free = mca_mpool_rdma_free;
|
||||
mpool->super.mpool_register = mca_mpool_rdma_register;
|
||||
mpool->super.mpool_find = mca_mpool_rdma_find;
|
||||
mpool->super.mpool_deregister = mca_mpool_rdma_deregister;
|
||||
mpool->super.mpool_release_memory = mca_mpool_rdma_release_memory;
|
||||
if(mca_mpool_rdma_component.print_stats == true)
|
||||
mpool->super.mpool_finalize = mca_mpool_rdma_finalize;
|
||||
else
|
||||
mpool->super.mpool_finalize = NULL;
|
||||
mpool->super.rcache =
|
||||
mca_rcache_base_module_create(mca_mpool_rdma_component.rcache_name);
|
||||
mpool->super.flags = MCA_MPOOL_FLAGS_MPI_ALLOC_MEM;
|
||||
|
||||
OBJ_CONSTRUCT(&mpool->reg_list, ompi_free_list_t);
|
||||
ompi_free_list_init(&mpool->reg_list, mpool->resources.sizeof_reg,
|
||||
OBJ_CLASS(mca_mpool_base_registration_t), 0, -1, 32,
|
||||
NULL);
|
||||
OBJ_CONSTRUCT(&mpool->mru_list, opal_list_t);
|
||||
mpool->stat_cache_hit = mpool->stat_cache_miss = mpool->stat_evicted = 0;
|
||||
mpool->stat_cache_found = mpool->stat_cache_notfound = 0;
|
||||
}
|
||||
|
||||
static inline int dereg_mem(mca_mpool_base_module_t *mpool,
|
||||
mca_mpool_base_registration_t *reg)
|
||||
{
|
||||
mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t *)mpool;
|
||||
|
||||
assert(reg->ref_count == 0);
|
||||
return mpool_rdma->resources.deregister_mem(mpool_rdma->resources.reg_data,
|
||||
reg);
|
||||
}
|
||||
|
||||
/**
|
||||
* allocate function
|
||||
*/
|
||||
void* mca_mpool_rdma_alloc(mca_mpool_base_module_t *mpool, size_t size,
|
||||
size_t align, uint32_t flags, mca_mpool_base_registration_t **reg)
|
||||
{
|
||||
void *addr;
|
||||
|
||||
if(posix_memalign(&addr, mca_mpool_base_page_size, size) != 0)
|
||||
return NULL;
|
||||
|
||||
if(OMPI_SUCCESS != mca_mpool_rdma_register(mpool, addr, size, flags, reg)) {
|
||||
free(addr);
|
||||
return NULL;
|
||||
}
|
||||
(*reg)->alloc_base = addr;
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
static int register_cache_bypass(mca_mpool_base_module_t *mpool,
|
||||
void *addr, size_t size, uint32_t flags,
|
||||
mca_mpool_base_registration_t **reg)
|
||||
{
|
||||
mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool;
|
||||
mca_mpool_base_registration_t *rdma_reg;
|
||||
ompi_free_list_item_t *item;
|
||||
unsigned char *base, *bound;
|
||||
int rc;
|
||||
|
||||
base = down_align_addr(addr, mca_mpool_base_page_size_log);
|
||||
bound = up_align_addr( (void*) ((char*) addr + size - 1),
|
||||
mca_mpool_base_page_size_log);
|
||||
OMPI_FREE_LIST_GET(&mpool_rdma->reg_list, item, rc);
|
||||
if(OMPI_SUCCESS != rc) {
|
||||
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
||||
return rc;
|
||||
}
|
||||
rdma_reg = (mca_mpool_base_registration_t*)item;
|
||||
|
||||
rdma_reg->mpool = mpool;
|
||||
rdma_reg->base = base;
|
||||
rdma_reg->bound = bound;
|
||||
rdma_reg->flags = flags;
|
||||
|
||||
rc = mpool_rdma->resources.register_mem(mpool_rdma->resources.reg_data,
|
||||
base, bound - base + 1, rdma_reg);
|
||||
|
||||
if(rc != OMPI_SUCCESS) {
|
||||
OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list, item);
|
||||
return rc;
|
||||
}
|
||||
|
||||
*reg = rdma_reg;
|
||||
(*reg)->ref_count++;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* register memory
|
||||
*/
|
||||
int mca_mpool_rdma_register(mca_mpool_base_module_t *mpool, void *addr,
|
||||
size_t size, uint32_t flags,
|
||||
mca_mpool_base_registration_t **reg)
|
||||
{
|
||||
mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool;
|
||||
mca_mpool_base_registration_t *rdma_reg;
|
||||
ompi_free_list_item_t *item;
|
||||
unsigned char *base, *bound;
|
||||
int rc;
|
||||
|
||||
/* if cache bypass is requested don't use the cache */
|
||||
if(flags & MCA_MPOOL_FLAGS_CACHE_BYPASS) {
|
||||
return register_cache_bypass(mpool, addr, size, flags, reg);
|
||||
}
|
||||
|
||||
base = down_align_addr(addr, mca_mpool_base_page_size_log);
|
||||
bound = up_align_addr((void*)((char*) addr + size - 1),
|
||||
mca_mpool_base_page_size_log);
|
||||
OPAL_THREAD_LOCK(&mpool->rcache->lock);
|
||||
/* look through existing regs if not persistent registration requested.
|
||||
* Persistent registration are always registered and placed in the cache */
|
||||
if(!(flags & MCA_MPOOL_FLAGS_PERSIST)) {
|
||||
/* check to see if memory is registered */
|
||||
mpool->rcache->rcache_find(mpool->rcache, addr, size, reg);
|
||||
if(*reg != NULL &&
|
||||
(mca_mpool_rdma_component.leave_pinned ||
|
||||
((*reg)->flags & MCA_MPOOL_FLAGS_PERSIST) ||
|
||||
((*reg)->base == base && (*reg)->bound == bound))) {
|
||||
if(0 == (*reg)->ref_count &&
|
||||
mca_mpool_rdma_component.leave_pinned) {
|
||||
opal_list_remove_item(&mpool_rdma->mru_list,
|
||||
(opal_list_item_t*)(*reg));
|
||||
}
|
||||
mpool_rdma->stat_cache_hit++;
|
||||
(*reg)->ref_count++;
|
||||
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
mpool_rdma->stat_cache_miss++;
|
||||
*reg = NULL; /* in case previous find found something */
|
||||
|
||||
/* If no suitable registration is in cache and leave_pinned isn't
|
||||
* set and size of registration cache is unlimited don't use the cache.
|
||||
* This is optimisation in case limit is not set. If limit is set we
|
||||
* have to put registration into the cache to determine when we hit
|
||||
* memory registration limit.
|
||||
* NONE: cache is still used for persistent registrations so previous
|
||||
* find can find something */
|
||||
if(!mca_mpool_rdma_component.leave_pinned &&
|
||||
mca_mpool_rdma_component.rcache_size_limit == 0) {
|
||||
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
||||
return register_cache_bypass(mpool, addr, size, flags, reg);
|
||||
}
|
||||
}
|
||||
|
||||
OMPI_FREE_LIST_GET(&mpool_rdma->reg_list, item, rc);
|
||||
if(OMPI_SUCCESS != rc) {
|
||||
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
||||
return rc;
|
||||
}
|
||||
rdma_reg = (mca_mpool_base_registration_t*)item;
|
||||
|
||||
rdma_reg->mpool = mpool;
|
||||
rdma_reg->base = base;
|
||||
rdma_reg->bound = bound;
|
||||
rdma_reg->flags = flags;
|
||||
|
||||
while((rc = mpool->rcache->rcache_insert(mpool->rcache, rdma_reg,
|
||||
mca_mpool_rdma_component.rcache_size_limit)) ==
|
||||
OMPI_ERR_TEMP_OUT_OF_RESOURCE) {
|
||||
mca_mpool_base_registration_t *old_reg;
|
||||
/* try to remove one unused reg and retry */
|
||||
old_reg = (mca_mpool_base_registration_t*)
|
||||
opal_list_get_last(&mpool_rdma->mru_list);
|
||||
if(opal_list_get_end(&mpool_rdma->mru_list) !=
|
||||
(opal_list_item_t*)old_reg) {
|
||||
rc = dereg_mem(mpool, old_reg);
|
||||
if(MPI_SUCCESS == rc) {
|
||||
mpool->rcache->rcache_delete(mpool->rcache, old_reg);
|
||||
opal_list_remove_item(&mpool_rdma->mru_list,
|
||||
(opal_list_item_t*)old_reg);
|
||||
OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list,
|
||||
(ompi_free_list_item_t*)old_reg);
|
||||
mpool_rdma->stat_evicted++;
|
||||
} else
|
||||
break;
|
||||
} else
|
||||
break;
|
||||
}
|
||||
|
||||
if(rc != OMPI_SUCCESS) {
|
||||
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
||||
OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list, item);
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc = mpool_rdma->resources.register_mem(mpool_rdma->resources.reg_data,
|
||||
base, bound - base + 1, rdma_reg);
|
||||
|
||||
if(rc != OMPI_SUCCESS) {
|
||||
mpool->rcache->rcache_delete(mpool->rcache, rdma_reg);
|
||||
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
||||
OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list, item);
|
||||
return rc;
|
||||
}
|
||||
|
||||
*reg = rdma_reg;
|
||||
(*reg)->ref_count++;
|
||||
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* realloc function
|
||||
*/
|
||||
void* mca_mpool_rdma_realloc(mca_mpool_base_module_t *mpool, void *addr,
|
||||
size_t size, mca_mpool_base_registration_t **reg)
|
||||
{
|
||||
mca_mpool_base_registration_t *old_reg = *reg;
|
||||
void *new_mem = mca_mpool_rdma_alloc(mpool, size, 0, old_reg->flags, reg);
|
||||
memcpy(new_mem, addr, old_reg->bound - old_reg->base + 1);
|
||||
mca_mpool_rdma_free(mpool, addr, old_reg);
|
||||
|
||||
return new_mem;
|
||||
}
|
||||
|
||||
/**
|
||||
* free function
|
||||
*/
|
||||
void mca_mpool_rdma_free(mca_mpool_base_module_t *mpool, void *addr,
|
||||
mca_mpool_base_registration_t *registration)
|
||||
{
|
||||
mca_mpool_rdma_deregister(mpool, registration);
|
||||
free(registration->alloc_base);
|
||||
}
|
||||
|
||||
int mca_mpool_rdma_find(struct mca_mpool_base_module_t *mpool, void *addr,
|
||||
size_t size, mca_mpool_base_registration_t **reg)
|
||||
{
|
||||
mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool;
|
||||
int rc;
|
||||
unsigned char *base, *bound;
|
||||
|
||||
base = down_align_addr(addr, mca_mpool_base_page_size_log);
|
||||
bound = up_align_addr((void*)((char*) addr + size - 1),
|
||||
mca_mpool_base_page_size_log);
|
||||
|
||||
OPAL_THREAD_LOCK(&mpool->rcache->lock);
|
||||
rc = mpool->rcache->rcache_find(mpool->rcache, addr, size, reg);
|
||||
if(*reg != NULL &&
|
||||
(mca_mpool_rdma_component.leave_pinned ||
|
||||
((*reg)->flags & MCA_MPOOL_FLAGS_PERSIST) ||
|
||||
((*reg)->base == base && (*reg)->bound == bound))) {
|
||||
assert(((void*)(*reg)->bound) >= addr);
|
||||
if(0 == (*reg)->ref_count &&
|
||||
mca_mpool_rdma_component.leave_pinned) {
|
||||
opal_list_remove_item(&mpool_rdma->mru_list,
|
||||
(opal_list_item_t*)(*reg));
|
||||
}
|
||||
mpool_rdma->stat_cache_found++;
|
||||
(*reg)->ref_count++;
|
||||
} else {
|
||||
mpool_rdma->stat_cache_notfound++;
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
int mca_mpool_rdma_deregister(struct mca_mpool_base_module_t *mpool,
|
||||
mca_mpool_base_registration_t *reg)
|
||||
{
|
||||
mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool;
|
||||
int rc = OMPI_SUCCESS;
|
||||
assert(reg->ref_count > 0);
|
||||
|
||||
OPAL_THREAD_LOCK(&mpool->rcache->lock);
|
||||
reg->ref_count--;
|
||||
if(reg->ref_count > 0) {
|
||||
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
if(mca_mpool_rdma_component.leave_pinned &&
|
||||
!(reg->flags & (MCA_MPOOL_FLAGS_CACHE_BYPASS|MCA_MPOOL_FLAGS_PERSIST))) {
|
||||
/* if leave_pinned is set don't deregister memory, but put it
|
||||
* on MRU list for future use */
|
||||
opal_list_prepend(&mpool_rdma->mru_list, (opal_list_item_t*)reg);
|
||||
} else {
|
||||
rc = dereg_mem(mpool, reg);
|
||||
if(OMPI_SUCCESS == rc) {
|
||||
if(!(reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS))
|
||||
mpool->rcache->rcache_delete(mpool->rcache, reg);
|
||||
OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list,
|
||||
(ompi_free_list_item_t*)reg);
|
||||
}
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
int mca_mpool_rdma_release_memory(struct mca_mpool_base_module_t *mpool,
|
||||
void *base, size_t size)
|
||||
{
|
||||
mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool;
|
||||
mca_mpool_base_registration_t *reg;
|
||||
ompi_pointer_array_t regs;
|
||||
int reg_cnt, i, err = 0;
|
||||
|
||||
OBJ_CONSTRUCT(®s, ompi_pointer_array_t);
|
||||
|
||||
OPAL_THREAD_LOCK(&mpool->rcache->lock);
|
||||
reg_cnt = mpool->rcache->rcache_find_all(mpool->rcache, base, size, ®s);
|
||||
|
||||
for(i = 0; i < reg_cnt; i++) {
|
||||
reg = (mca_mpool_base_registration_t*)
|
||||
ompi_pointer_array_get_item(®s, i);
|
||||
|
||||
if(0 == reg->ref_count) {
|
||||
if(dereg_mem(mpool, reg) != OMPI_SUCCESS) {
|
||||
err++;
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
/* remove registration from cache and wait for ref_count goes to
|
||||
* zero before unregister memory. Note that our registered memory
|
||||
* statistic can go wrong at this point, but it is better than
|
||||
* potential memory corruption. And we return error in this case to
|
||||
* the caller */
|
||||
reg->flags |= MCA_MPOOL_FLAGS_CACHE_BYPASS;
|
||||
err++; /* tell caller that something was wrong */
|
||||
}
|
||||
mpool->rcache->rcache_delete(mpool->rcache, reg);
|
||||
if(0 == reg->ref_count) {
|
||||
opal_list_remove_item(&mpool_rdma->mru_list,
|
||||
(opal_list_item_t*)reg);
|
||||
OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list,
|
||||
(ompi_free_list_item_t*)reg);
|
||||
}
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
|
||||
ompi_pointer_array_remove_all(®s);
|
||||
|
||||
return err?OMPI_ERROR:OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
void mca_mpool_rdma_finalize(struct mca_mpool_base_module_t *mpool)
|
||||
{
|
||||
mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool;
|
||||
opal_output(0, "[%lu,%lu,%lu] rdma: stats "
|
||||
"(hit/miss/found/not found/evicted): %d/%d/%d/%d/%d\n",
|
||||
ORTE_NAME_ARGS(orte_process_info.my_name),
|
||||
mpool_rdma->stat_cache_hit, mpool_rdma->stat_cache_miss,
|
||||
mpool_rdma->stat_cache_found, mpool_rdma->stat_cache_notfound,
|
||||
mpool_rdma->stat_evicted);
|
||||
}
|
@ -36,6 +36,7 @@ void mca_mpool_sm_module_init(mca_mpool_sm_module_t* mpool)
|
||||
mpool->super.mpool_find = NULL;
|
||||
mpool->super.mpool_register = NULL;
|
||||
mpool->super.mpool_deregister = NULL;
|
||||
mpool->super.mpool_release_memory = NULL;
|
||||
mpool->super.mpool_finalize = NULL;
|
||||
mpool->super.flags = 0;
|
||||
}
|
||||
|
1
ompi/mca/mpool/udapl/.ompi_ignore
Обычный файл
1
ompi/mca/mpool/udapl/.ompi_ignore
Обычный файл
@ -0,0 +1 @@
|
||||
quilt
|
@ -42,10 +42,8 @@ size_t mca_pml_ob1_rdma_btls(
|
||||
mca_pml_ob1_rdma_btl_t* rdma_btls)
|
||||
{
|
||||
size_t num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma);
|
||||
ompi_pointer_array_t regs;
|
||||
size_t num_btls_used = 0;
|
||||
size_t n;
|
||||
int rc;
|
||||
|
||||
/* shortcut when there are no rdma capable btls */
|
||||
if(num_btls == 0) {
|
||||
@ -53,223 +51,34 @@ size_t mca_pml_ob1_rdma_btls(
|
||||
}
|
||||
|
||||
/* check to see if memory is registered */
|
||||
OBJ_CONSTRUCT(®s, ompi_pointer_array_t);
|
||||
for(n = 0; n < num_btls && num_btls_used < MCA_PML_OB1_MAX_RDMA_PER_REQUEST; n++) {
|
||||
|
||||
mca_bml_base_btl_t* bml_btl = mca_bml_base_btl_array_get_index(&bml_endpoint->btl_rdma, n);
|
||||
mca_mpool_base_registration_t* fit = NULL;
|
||||
mca_mpool_base_module_t* btl_mpool = bml_btl->btl_mpool;
|
||||
uint32_t reg_cnt;
|
||||
size_t r;
|
||||
for(n = 0; n < num_btls && num_btls_used < MCA_PML_OB1_MAX_RDMA_PER_REQUEST;
|
||||
n++) {
|
||||
mca_bml_base_btl_t* bml_btl =
|
||||
mca_bml_base_btl_array_get_index(&bml_endpoint->btl_rdma, n);
|
||||
mca_mpool_base_registration_t* reg = NULL;
|
||||
mca_mpool_base_module_t *btl_mpool = bml_btl->btl_mpool;
|
||||
|
||||
/* btl is rdma capable and registration is not required */
|
||||
if(NULL == btl_mpool) {
|
||||
rdma_btls[num_btls_used].bml_btl = bml_btl;
|
||||
rdma_btls[num_btls_used].btl_reg = NULL;
|
||||
num_btls_used++;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* look through existing registrations */
|
||||
ompi_pointer_array_remove_all(®s);
|
||||
btl_mpool->mpool_find(btl_mpool,
|
||||
base,
|
||||
size,
|
||||
®s,
|
||||
®_cnt);
|
||||
|
||||
/*
|
||||
* find the best fit when there are multiple registrations
|
||||
*/
|
||||
for(r = 0; r < reg_cnt; r++) {
|
||||
mca_mpool_base_registration_t* reg = (mca_mpool_base_registration_t*)ompi_pointer_array_get_item(®s, r);
|
||||
size_t reg_len = reg->bound - base + 1;
|
||||
reg = NULL;
|
||||
} else {
|
||||
if(!mca_pml_ob1.leave_pinned) {
|
||||
/* look through existing registrations */
|
||||
btl_mpool->mpool_find(btl_mpool, base, size, ®);
|
||||
} else {
|
||||
/* register the memory */
|
||||
btl_mpool->mpool_register(btl_mpool, base, size, 0, ®);
|
||||
}
|
||||
|
||||
if(reg->flags & MCA_MPOOL_FLAGS_CACHE) {
|
||||
assert(reg->ref_count >= 3);
|
||||
}
|
||||
if(reg->base <= base && reg_len >= size) {
|
||||
fit = reg;
|
||||
} else if(mca_pml_ob1.leave_pinned){
|
||||
btl_mpool->mpool_deregister(btl_mpool, reg);
|
||||
} else {
|
||||
btl_mpool->mpool_release(btl_mpool, reg);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/* if the leave pinned option is set - and there is not an existing
|
||||
* registration that satisfies this request, create one.
|
||||
*/
|
||||
if(NULL == fit && mca_pml_ob1.leave_pinned) {
|
||||
/* register the memory */
|
||||
rc = btl_mpool->mpool_register(
|
||||
btl_mpool,
|
||||
base,
|
||||
size,
|
||||
MCA_MPOOL_FLAGS_CACHE,
|
||||
&fit);
|
||||
if(ORTE_SUCCESS != rc || NULL == fit) {
|
||||
opal_output(0, "[%s:%d] mpool_register(%p,%lu) failed, \n", __FILE__, __LINE__, base, size);
|
||||
continue;
|
||||
}
|
||||
assert(fit->ref_count == 3);
|
||||
if(NULL == reg)
|
||||
bml_btl = NULL; /* skip it */
|
||||
}
|
||||
|
||||
|
||||
if(NULL != fit) {
|
||||
if(bml_btl != NULL) {
|
||||
rdma_btls[num_btls_used].bml_btl = bml_btl;
|
||||
rdma_btls[num_btls_used].btl_reg = fit;
|
||||
rdma_btls[num_btls_used].btl_reg = reg;
|
||||
num_btls_used++;
|
||||
}
|
||||
}
|
||||
return num_btls_used;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* For a given btl - find the best fit registration or
|
||||
* optionally create one for leave pinned.
|
||||
*/
|
||||
|
||||
mca_mpool_base_registration_t* mca_pml_ob1_rdma_registration(
|
||||
mca_bml_base_btl_t* bml_btl,
|
||||
unsigned char* base,
|
||||
size_t size)
|
||||
{
|
||||
ompi_pointer_array_t regs;
|
||||
mca_mpool_base_registration_t* fit = NULL;
|
||||
mca_mpool_base_module_t* btl_mpool = bml_btl->btl_mpool;
|
||||
uint32_t reg_cnt;
|
||||
size_t r;
|
||||
int rc;
|
||||
|
||||
/* btl is rdma capable and registration is not required */
|
||||
if(NULL == btl_mpool) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* check to see if memory is registered */
|
||||
OBJ_CONSTRUCT(®s, ompi_pointer_array_t);
|
||||
ompi_pointer_array_remove_all(®s);
|
||||
|
||||
/* look through existing registrations */
|
||||
btl_mpool->mpool_find(btl_mpool,
|
||||
base,
|
||||
size,
|
||||
®s,
|
||||
®_cnt);
|
||||
|
||||
|
||||
/*
|
||||
* find the best fit when there are multiple registrations
|
||||
*/
|
||||
for(r = 0; r < reg_cnt; r++) {
|
||||
mca_mpool_base_registration_t* reg = (mca_mpool_base_registration_t*)ompi_pointer_array_get_item(®s, r);
|
||||
size_t reg_len = reg->bound - base + 1;
|
||||
|
||||
if(reg->flags & MCA_MPOOL_FLAGS_CACHE) {
|
||||
assert(reg->ref_count >= 3);
|
||||
}
|
||||
if(reg->base <= base && reg_len >= size) {
|
||||
fit = reg;
|
||||
} else if(mca_pml_ob1.leave_pinned){
|
||||
btl_mpool->mpool_deregister(btl_mpool, reg);
|
||||
} else {
|
||||
btl_mpool->mpool_release(btl_mpool, reg);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* if the leave pinned option is set - and there is not an existing
|
||||
* registration that satisfies this request, create one.
|
||||
*/
|
||||
if(NULL == fit && mca_pml_ob1.leave_pinned) {
|
||||
/* register the memory */
|
||||
rc = btl_mpool->mpool_register(
|
||||
btl_mpool,
|
||||
base,
|
||||
size,
|
||||
MCA_MPOOL_FLAGS_CACHE,
|
||||
&fit);
|
||||
if(ORTE_SUCCESS != rc || NULL == fit) {
|
||||
opal_output(0, "[%s:%d] mpool_register(%p,%lu) failed, \n", __FILE__, __LINE__, base, size);
|
||||
return NULL;
|
||||
}
|
||||
assert(fit->ref_count == 3);
|
||||
}
|
||||
|
||||
OBJ_DESTRUCT(®s);
|
||||
return fit;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* For a given btl - find the best fit registration or
|
||||
* optionally create one for leave pinned.
|
||||
*/
|
||||
|
||||
mca_mpool_base_registration_t* mca_pml_ob1_rdma_register(
|
||||
mca_bml_base_btl_t* bml_btl,
|
||||
unsigned char* base,
|
||||
size_t size)
|
||||
{
|
||||
ompi_pointer_array_t regs;
|
||||
mca_mpool_base_registration_t* fit = NULL;
|
||||
mca_mpool_base_module_t* btl_mpool = bml_btl->btl_mpool;
|
||||
uint32_t reg_cnt;
|
||||
size_t r;
|
||||
int rc;
|
||||
|
||||
/* btl is rdma capable and registration is not required */
|
||||
if(NULL == btl_mpool) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* check to see if memory is registered */
|
||||
OBJ_CONSTRUCT(®s, ompi_pointer_array_t);
|
||||
ompi_pointer_array_remove_all(®s);
|
||||
|
||||
/* look through existing registrations */
|
||||
btl_mpool->mpool_find(btl_mpool,
|
||||
base,
|
||||
size,
|
||||
®s,
|
||||
®_cnt);
|
||||
|
||||
|
||||
/*
|
||||
* find the best fit when there are multiple registrations
|
||||
*/
|
||||
for(r = 0; r < reg_cnt; r++) {
|
||||
mca_mpool_base_registration_t* reg = (mca_mpool_base_registration_t*)ompi_pointer_array_get_item(®s, r);
|
||||
size_t reg_len = reg->bound - base + 1;
|
||||
if(reg->base <= base && reg_len >= size) {
|
||||
fit = reg;
|
||||
} else {
|
||||
btl_mpool->mpool_deregister(btl_mpool, reg);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* if the leave pinned option is set - and there is not an existing
|
||||
* registration that satisfies this request, create one.
|
||||
*/
|
||||
if(NULL == fit) {
|
||||
/* register the memory */
|
||||
rc = btl_mpool->mpool_register(
|
||||
btl_mpool,
|
||||
base,
|
||||
size,
|
||||
MCA_MPOOL_FLAGS_CACHE,
|
||||
&fit);
|
||||
if(ORTE_SUCCESS != rc || NULL == fit) {
|
||||
opal_output(0, "[%s:%d] mpool_register(%p,%lu) failed, \n", __FILE__, __LINE__, base, size);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
OBJ_DESTRUCT(®s);
|
||||
return fit;
|
||||
}
|
||||
|
@ -43,31 +43,8 @@ typedef struct mca_pml_ob1_rdma_btl_t mca_pml_ob1_rdma_btl_t;
|
||||
* find those that already have registrations - or
|
||||
* register if required (for leave_pinned option)
|
||||
*/
|
||||
|
||||
size_t mca_pml_ob1_rdma_btls(
|
||||
struct mca_bml_base_endpoint_t* endpoint,
|
||||
unsigned char* base,
|
||||
size_t size,
|
||||
struct mca_pml_ob1_rdma_btl_t* btls);
|
||||
|
||||
/*
|
||||
* For a given rdma capable btl - find the best fit
|
||||
* registration or create one for leave pinned.
|
||||
*/
|
||||
|
||||
mca_mpool_base_registration_t* mca_pml_ob1_rdma_registration(
|
||||
struct mca_bml_base_btl_t* bml_btl,
|
||||
unsigned char* base,
|
||||
size_t size);
|
||||
|
||||
/*
|
||||
* Create a registration
|
||||
*/
|
||||
|
||||
mca_mpool_base_registration_t* mca_pml_ob1_rdma_register(
|
||||
struct mca_bml_base_btl_t* bml_btl,
|
||||
unsigned char* base,
|
||||
size_t size);
|
||||
size_t mca_pml_ob1_rdma_btls(struct mca_bml_base_endpoint_t* endpoint,
|
||||
unsigned char* base, size_t size, struct mca_pml_ob1_rdma_btl_t* btls);
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -257,38 +257,9 @@ static int mca_pml_ob1_recv_request_ack(
|
||||
if (hdr->hdr_match.hdr_common.hdr_flags & MCA_PML_OB1_HDR_FLAGS_PIN &&
|
||||
recvreq->req_rdma_cnt != 0) {
|
||||
|
||||
/* start rdma at current fragment offset - no need to ack */
|
||||
recvreq->req_rdma_offset = bytes_received;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
/* are rdma devices available for long rdma protocol */
|
||||
if( mca_pml_ob1.leave_pinned_pipeline &&
|
||||
hdr->hdr_msg_length > bml_endpoint->btl_rdma_size &&
|
||||
mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma) ) {
|
||||
char* base;
|
||||
char* align;
|
||||
ptrdiff_t lb;
|
||||
|
||||
/* round this up/down to the next aligned address */
|
||||
ompi_ddt_type_lb(recvreq->req_recv.req_convertor.pDesc, &lb);
|
||||
base = recvreq->req_recv.req_convertor.pBaseBuf + lb;
|
||||
align = (char*)up_align_addr(base, bml_endpoint->btl_rdma_align)+1;
|
||||
recvreq->req_rdma_offset = align - base;
|
||||
|
||||
/* still w/in range */
|
||||
if(recvreq->req_rdma_offset < bytes_received) {
|
||||
recvreq->req_rdma_offset = bytes_received;
|
||||
}
|
||||
if(recvreq->req_rdma_offset > hdr->hdr_msg_length) {
|
||||
recvreq->req_rdma_offset = hdr->hdr_msg_length;
|
||||
} else {
|
||||
ompi_convertor_set_position( &recvreq->req_recv.req_convertor,
|
||||
&recvreq->req_rdma_offset );
|
||||
}
|
||||
|
||||
/* are rdma devices available for long rdma protocol */
|
||||
} else if (!mca_pml_ob1.leave_pinned_pipeline &&
|
||||
bml_endpoint->btl_rdma_offset < hdr->hdr_msg_length &&
|
||||
} else if (bml_endpoint->btl_rdma_offset < hdr->hdr_msg_length &&
|
||||
mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma)) {
|
||||
|
||||
/* use convertor to figure out the rdma offset for this request */
|
||||
@ -300,6 +271,9 @@ static int mca_pml_ob1_recv_request_ack(
|
||||
&recvreq->req_rdma_offset );
|
||||
}
|
||||
}
|
||||
/* start rdma at current fragment offset - no need to ack */
|
||||
if(recvreq->req_rdma_offset == bytes_received)
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
/* let know to shedule function there is no need to put ACK flag */
|
||||
recvreq->req_ack_sent = true;
|
||||
@ -359,7 +333,6 @@ int mca_pml_ob1_recv_request_get_frag(
|
||||
mca_bml_base_endpoint_t* bml_endpoint = frag->rdma_ep;
|
||||
mca_bml_base_btl_t* bml_btl;
|
||||
mca_btl_base_descriptor_t* descriptor;
|
||||
mca_mpool_base_registration_t* reg;
|
||||
size_t save_size = frag->rdma_length;
|
||||
int rc;
|
||||
|
||||
@ -370,21 +343,10 @@ int mca_pml_ob1_recv_request_get_frag(
|
||||
orte_errmgr.abort();
|
||||
}
|
||||
|
||||
/* is there an existing registration for this btl */
|
||||
reg = mca_pml_ob1_rdma_registration(
|
||||
bml_btl,
|
||||
(unsigned char*)recvreq->req_recv.req_base.req_addr,
|
||||
recvreq->req_recv.req_bytes_packed);
|
||||
if(NULL != reg) {
|
||||
recvreq->req_rdma[0].bml_btl = bml_btl;
|
||||
recvreq->req_rdma[0].btl_reg = reg;
|
||||
recvreq->req_rdma_cnt = 1;
|
||||
}
|
||||
|
||||
/* prepare descriptor */
|
||||
mca_bml_base_prepare_dst(
|
||||
bml_btl,
|
||||
reg,
|
||||
NULL,
|
||||
&recvreq->req_recv.req_convertor,
|
||||
0,
|
||||
&frag->rdma_length,
|
||||
@ -622,7 +584,6 @@ int mca_pml_ob1_recv_request_schedule_exclusive( mca_pml_ob1_recv_request_t* rec
|
||||
mca_btl_base_descriptor_t* ctl;
|
||||
mca_mpool_base_registration_t * reg = NULL;
|
||||
int rc;
|
||||
bool release = false;
|
||||
|
||||
if(prev_bytes_remaining == bytes_remaining) {
|
||||
if( ++num_fail == num_tries ) {
|
||||
@ -689,29 +650,9 @@ int mca_pml_ob1_recv_request_schedule_exclusive( mca_pml_ob1_recv_request_t* rec
|
||||
size = bml_btl->btl_max_rdma_size;
|
||||
}
|
||||
|
||||
if(0 == recvreq->req_rdma_cnt) {
|
||||
char* base;
|
||||
ptrdiff_t lb;
|
||||
|
||||
if(mca_pml_ob1.leave_pinned_pipeline) {
|
||||
/* lookup and/or create a cached registration */
|
||||
ompi_ddt_type_lb(recvreq->req_recv.req_convertor.pDesc,
|
||||
&lb);
|
||||
base = recvreq->req_recv.req_convertor.pBaseBuf + lb +
|
||||
recvreq->req_rdma_offset;
|
||||
reg = mca_pml_ob1_rdma_register(bml_btl,
|
||||
(unsigned char*)base, size);
|
||||
release = true;
|
||||
}
|
||||
}
|
||||
|
||||
/* prepare a descriptor for RDMA */
|
||||
mca_bml_base_prepare_dst(bml_btl, reg,
|
||||
&recvreq->req_recv.req_convertor, 0, &size, &dst);
|
||||
if(reg && release == true && NULL != bml_btl->btl_mpool) {
|
||||
bml_btl->btl_mpool->mpool_release(bml_btl->btl_mpool, reg);
|
||||
}
|
||||
|
||||
if(dst == NULL) {
|
||||
continue;
|
||||
}
|
||||
|
@ -135,7 +135,7 @@ do {
|
||||
for( r = 0; r < recvreq->req_rdma_cnt; r++ ) { \
|
||||
mca_mpool_base_registration_t* btl_reg = recvreq->req_rdma[r].btl_reg; \
|
||||
if( NULL != btl_reg ) { \
|
||||
btl_reg->mpool->mpool_release( btl_reg->mpool, btl_reg ); \
|
||||
btl_reg->mpool->mpool_deregister( btl_reg->mpool, btl_reg ); \
|
||||
} \
|
||||
} \
|
||||
recvreq->req_rdma_cnt = 0; \
|
||||
|
@ -652,7 +652,7 @@ int mca_pml_ob1_send_request_start_rdma(
|
||||
bml_btl->btl_flags & MCA_BTL_FLAGS_GET) {
|
||||
size_t old_position = sendreq->req_send.req_convertor.bConverted;
|
||||
|
||||
/* prepare source descriptor/segment(s) */
|
||||
/* prepare source descriptor/segment(s) */
|
||||
mca_bml_base_prepare_src(
|
||||
bml_btl,
|
||||
reg,
|
||||
@ -846,6 +846,7 @@ int mca_pml_ob1_send_request_start_rndv(
|
||||
des->des_cbdata = sendreq;
|
||||
des->des_cbfunc = mca_pml_ob1_rndv_completion;
|
||||
sendreq->req_send_offset = size;
|
||||
sendreq->req_rdma_offset = size;
|
||||
|
||||
/* send */
|
||||
rc = mca_bml_base_send(bml_btl, des, MCA_BTL_TAG_PML);
|
||||
@ -1023,15 +1024,7 @@ static void mca_pml_ob1_put_completion( mca_btl_base_module_t* btl,
|
||||
/* check for request completion */
|
||||
if( OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length)
|
||||
>= sendreq->req_send.req_bytes_packed) {
|
||||
/* bump up the req_state after the last fin was sent..
|
||||
if rndv completion occurs after this (can happen!) then
|
||||
the rndv completion will properly clean up after the request
|
||||
we can't just do this on the first RDMA PUT + ACK ctl message in
|
||||
mca_pml_ob1_send_request_put because then we might fall into sender
|
||||
side scheduleing (pml pipeline protocol) */
|
||||
if(true == sendreq->req_got_put_ack) {
|
||||
MCA_PML_OB1_SEND_REQUEST_ADVANCE_NO_SCHEDULE(sendreq);
|
||||
}
|
||||
|
||||
/* if we've got completion on rndv packet */
|
||||
if (sendreq->req_state == 2) {
|
||||
MCA_PML_OB1_SEND_REQUEST_PML_COMPLETE(sendreq);
|
||||
@ -1058,7 +1051,6 @@ int mca_pml_ob1_send_request_put_frag( mca_pml_ob1_rdma_frag_t* frag )
|
||||
size_t offset = (size_t)frag->rdma_hdr.hdr_rdma.hdr_rdma_offset;
|
||||
size_t i, save_size = frag->rdma_length;
|
||||
int rc;
|
||||
bool release = false;
|
||||
|
||||
bml_btl = mca_bml_base_btl_array_find(&frag->rdma_ep->btl_rdma,
|
||||
frag->rdma_btl);
|
||||
@ -1074,16 +1066,6 @@ int mca_pml_ob1_send_request_put_frag( mca_pml_ob1_rdma_frag_t* frag )
|
||||
/* set convertor at current offset */
|
||||
ompi_convertor_set_position(&sendreq->req_send.req_convertor, &offset);
|
||||
|
||||
/* if registration doesnt exist - create one */
|
||||
if (mca_pml_ob1.leave_pinned_pipeline && reg == NULL) {
|
||||
unsigned char* base;
|
||||
ptrdiff_t lb;
|
||||
ompi_ddt_type_lb(sendreq->req_send.req_convertor.pDesc, &lb);
|
||||
base = (unsigned char*)sendreq->req_send.req_convertor.pBaseBuf + lb + offset;
|
||||
reg = mca_pml_ob1_rdma_register(bml_btl, base, frag->rdma_length);
|
||||
release = true;
|
||||
}
|
||||
|
||||
/* setup descriptor */
|
||||
mca_bml_base_prepare_src( bml_btl,
|
||||
reg,
|
||||
@ -1092,10 +1074,6 @@ int mca_pml_ob1_send_request_put_frag( mca_pml_ob1_rdma_frag_t* frag )
|
||||
&frag->rdma_length,
|
||||
&des );
|
||||
|
||||
if(reg && release == true && bml_btl->btl_mpool) {
|
||||
bml_btl->btl_mpool->mpool_release(bml_btl->btl_mpool, reg);
|
||||
}
|
||||
|
||||
if(NULL == des) {
|
||||
frag->rdma_length = save_size;
|
||||
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
|
||||
@ -1148,7 +1126,7 @@ void mca_pml_ob1_send_request_put(
|
||||
size_t i, size = 0;
|
||||
|
||||
if(hdr->hdr_common.hdr_flags & MCA_PML_OB1_HDR_TYPE_ACK) {
|
||||
sendreq->req_got_put_ack = true;
|
||||
MCA_PML_OB1_SEND_REQUEST_ADVANCE_NO_SCHEDULE(sendreq);
|
||||
}
|
||||
|
||||
MCA_PML_OB1_RDMA_FRAG_ALLOC(frag, rc);
|
||||
|
@ -55,7 +55,6 @@ struct mca_pml_ob1_send_request_t {
|
||||
size_t req_bytes_delivered;
|
||||
size_t req_send_offset;
|
||||
size_t req_rdma_offset;
|
||||
bool req_got_put_ack;
|
||||
mca_pml_ob1_rdma_btl_t req_rdma[MCA_PML_OB1_MAX_RDMA_PER_REQUEST];
|
||||
uint32_t req_rdma_cnt;
|
||||
mca_pml_ob1_send_pending_t req_pending;
|
||||
@ -116,7 +115,7 @@ static inline void mca_pml_ob1_free_rdma_resources(mca_pml_ob1_send_request_t* s
|
||||
for(r = 0; r < sendreq->req_rdma_cnt; r++) {
|
||||
mca_mpool_base_registration_t* reg = sendreq->req_rdma[r].btl_reg;
|
||||
if( NULL != reg ) {
|
||||
reg->mpool->mpool_release(reg->mpool, reg);
|
||||
reg->mpool->mpool_deregister(reg->mpool, reg);
|
||||
}
|
||||
}
|
||||
sendreq->req_rdma_cnt = 0;
|
||||
@ -359,7 +358,6 @@ static inline int mca_pml_ob1_send_request_start(
|
||||
sendreq->req_pipeline_depth = 0;
|
||||
sendreq->req_bytes_delivered = 0;
|
||||
sendreq->req_send_offset = 0;
|
||||
sendreq->req_got_put_ack = false;
|
||||
sendreq->req_pending = MCA_PML_OB1_SEND_PENDING_NONE;
|
||||
sendreq->req_send.req_base.req_sequence = OPAL_THREAD_ADD32(
|
||||
&comm->procs[sendreq->req_send.req_base.req_peer].send_sequence,1);
|
||||
|
1
ompi/mca/rcache/rb/.ompi_ignore
Обычный файл
1
ompi/mca/rcache/rb/.ompi_ignore
Обычный файл
@ -0,0 +1 @@
|
||||
quilt
|
@ -35,34 +35,26 @@ typedef struct mca_rcache_base_module_t* (*mca_rcache_base_component_init_fn_t)(
|
||||
|
||||
|
||||
typedef int (*mca_rcache_base_module_find_fn_t) (
|
||||
struct mca_rcache_base_module_t* rcache,
|
||||
void* addr,
|
||||
size_t size,
|
||||
ompi_pointer_array_t *regs,
|
||||
uint32_t *cnt
|
||||
);
|
||||
struct mca_rcache_base_module_t* rcache, void* addr, size_t size,
|
||||
mca_mpool_base_registration_t **reg);
|
||||
|
||||
typedef int (*mca_rcache_base_module_insert_fn_t)(
|
||||
struct mca_rcache_base_module_t* rcache,
|
||||
mca_mpool_base_registration_t* registration,
|
||||
uint32_t flags
|
||||
);
|
||||
typedef int (*mca_rcache_base_module_find_all_fn_t)(
|
||||
struct mca_rcache_base_module_t* rcache, void* addr, size_t size,
|
||||
ompi_pointer_array_t *regs);
|
||||
|
||||
typedef int (*mca_rcache_base_module_delete_fn_t) (
|
||||
struct mca_rcache_base_module_t* rcache,
|
||||
mca_mpool_base_registration_t* registration,
|
||||
uint32_t flags
|
||||
);
|
||||
typedef int (*mca_rcache_base_module_insert_fn_t)(
|
||||
struct mca_rcache_base_module_t* rcache,
|
||||
mca_mpool_base_registration_t* registration, size_t limit);
|
||||
|
||||
typedef int (*mca_rcache_base_module_delete_fn_t)(
|
||||
struct mca_rcache_base_module_t* rcache,
|
||||
mca_mpool_base_registration_t* registration);
|
||||
|
||||
/**
|
||||
* finalize
|
||||
*/
|
||||
typedef void (*mca_rcache_base_module_finalize_fn_t)(
|
||||
struct mca_rcache_base_module_t*
|
||||
);
|
||||
|
||||
|
||||
struct mca_rcache_base_module_t*);
|
||||
|
||||
/**
|
||||
* rcache component descriptor. Contains component version information and
|
||||
@ -83,19 +75,16 @@ typedef struct mca_rcache_base_component_1_0_0_t mca_rcache_base_component_t;
|
||||
/**
|
||||
* rcache module descriptor
|
||||
*/
|
||||
struct mca_rcache_base_module_t {
|
||||
mca_rcache_base_component_t *rcache_component; /**< component struct */
|
||||
mca_rcache_base_module_find_fn_t rcache_find;
|
||||
mca_rcache_base_module_insert_fn_t rcache_insert;
|
||||
mca_rcache_base_module_delete_fn_t rcache_delete;
|
||||
mca_rcache_base_module_finalize_fn_t rcache_finalize;
|
||||
struct mca_rcache_base_module_t {
|
||||
mca_rcache_base_component_t *rcache_component; /**< component struct */
|
||||
mca_rcache_base_module_find_fn_t rcache_find;
|
||||
mca_rcache_base_module_find_all_fn_t rcache_find_all;
|
||||
mca_rcache_base_module_insert_fn_t rcache_insert;
|
||||
mca_rcache_base_module_delete_fn_t rcache_delete;
|
||||
mca_rcache_base_module_finalize_fn_t rcache_finalize;
|
||||
opal_mutex_t lock;
|
||||
};
|
||||
typedef struct mca_rcache_base_module_t mca_rcache_base_module_t;
|
||||
|
||||
|
||||
|
||||
|
||||
};
|
||||
typedef struct mca_rcache_base_module_t mca_rcache_base_module_t;
|
||||
|
||||
/**
|
||||
* Macro for use in components that are of type rcache v1.0.0
|
||||
|
@ -25,9 +25,7 @@ sources = \
|
||||
rcache_vma.h \
|
||||
rcache_vma_component.c \
|
||||
rcache_vma_tree.c \
|
||||
rcache_vma_tree.h \
|
||||
rcache_vma_mru.c \
|
||||
rcache_vma_mru.h
|
||||
rcache_vma_tree.h
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
|
@ -22,7 +22,6 @@
|
||||
#include "ompi/mca/rcache/rcache.h"
|
||||
#include "rcache_vma.h"
|
||||
#include "rcache_vma_tree.h"
|
||||
#include "rcache_vma_mru.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
|
||||
@ -34,26 +33,18 @@ extern unsigned int mca_mpool_base_page_size_log;
|
||||
*/
|
||||
|
||||
void mca_rcache_vma_module_init( mca_rcache_vma_module_t* rcache ) {
|
||||
|
||||
rcache->base.rcache_find = mca_rcache_vma_find;
|
||||
rcache->base.rcache_find_all = mca_rcache_vma_find_all;
|
||||
rcache->base.rcache_insert = mca_rcache_vma_insert;
|
||||
rcache->base.rcache_delete = mca_rcache_vma_delete;
|
||||
rcache->base.rcache_finalize = mca_rcache_vma_finalize;
|
||||
OBJ_CONSTRUCT(&rcache->base.lock, opal_mutex_t);
|
||||
mca_rcache_vma_tree_init(rcache);
|
||||
mca_rcache_vma_mru_init(rcache);
|
||||
}
|
||||
|
||||
int mca_rcache_vma_find (
|
||||
struct mca_rcache_base_module_t* rcache,
|
||||
void* addr,
|
||||
size_t size,
|
||||
ompi_pointer_array_t* regs,
|
||||
uint32_t *cnt
|
||||
){
|
||||
|
||||
int rc = OMPI_SUCCESS;
|
||||
mca_mpool_base_registration_t *reg;
|
||||
int mca_rcache_vma_find(struct mca_rcache_base_module_t* rcache,
|
||||
void* addr, size_t size, mca_mpool_base_registration_t **reg)
|
||||
{
|
||||
void* base_addr;
|
||||
void* bound_addr;
|
||||
|
||||
@ -61,108 +52,56 @@ int mca_rcache_vma_find (
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK(&rcache->lock);
|
||||
*cnt = 0;
|
||||
|
||||
base_addr = down_align_addr(addr, mca_mpool_base_page_size_log);
|
||||
bound_addr = up_align_addr((void*) ((unsigned long) addr + size - 1), mca_mpool_base_page_size_log);
|
||||
|
||||
reg = mca_rcache_vma_tree_find((mca_rcache_vma_module_t*)rcache, base_addr,
|
||||
*reg = mca_rcache_vma_tree_find((mca_rcache_vma_module_t*)rcache, base_addr,
|
||||
bound_addr);
|
||||
if (reg != NULL) {
|
||||
ompi_pointer_array_add(regs, (void*) reg);
|
||||
if(reg->flags & MCA_MPOOL_FLAGS_CACHE) {
|
||||
rc = mca_rcache_vma_mru_touch((mca_rcache_vma_module_t*)rcache, reg);
|
||||
if(OMPI_SUCCESS != rc) {
|
||||
OPAL_THREAD_UNLOCK(&rcache->lock);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
OPAL_THREAD_ADD32((int32_t*) ®->ref_count, 1);
|
||||
(*cnt)++;
|
||||
assert(((void*)reg->bound) >= addr);
|
||||
}
|
||||
|
||||
OPAL_THREAD_UNLOCK(&rcache->lock);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int mca_rcache_vma_insert (
|
||||
struct mca_rcache_base_module_t* rcache,
|
||||
mca_mpool_base_registration_t* reg,
|
||||
uint32_t flags
|
||||
) {
|
||||
int mca_rcache_vma_find_all(struct mca_rcache_base_module_t* rcache,
|
||||
void* addr, size_t size, ompi_pointer_array_t *regs)
|
||||
{
|
||||
void *base_addr, *bound_addr;
|
||||
|
||||
if(size == 0) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
base_addr = down_align_addr(addr, mca_mpool_base_page_size_log);
|
||||
bound_addr = up_align_addr((void*) ((unsigned long) addr + size - 1), mca_mpool_base_page_size_log);
|
||||
|
||||
return mca_rcache_vma_tree_find_all((mca_rcache_vma_module_t*)rcache,
|
||||
base_addr, bound_addr, regs);
|
||||
}
|
||||
|
||||
int mca_rcache_vma_insert(struct mca_rcache_base_module_t* rcache,
|
||||
mca_mpool_base_registration_t* reg, size_t limit)
|
||||
{
|
||||
size_t reg_size = reg->bound - reg->base + 1;
|
||||
mca_mpool_base_registration_t* old_reg;
|
||||
mca_rcache_vma_module_t *vma_rcache = (mca_rcache_vma_module_t*)rcache;
|
||||
|
||||
OPAL_THREAD_LOCK(&rcache->lock);
|
||||
|
||||
if((flags & MCA_MPOOL_FLAGS_CACHE) &&
|
||||
reg_size > ((mca_rcache_vma_module_t*)rcache)->reg_max_mru_size)
|
||||
{
|
||||
OPAL_THREAD_UNLOCK(&rcache->lock);
|
||||
/* if the registration is too big for the rcache,
|
||||
don't cache it and reset the flags so the upper level
|
||||
handles things appropriatly */
|
||||
reg->flags = 0;
|
||||
return OMPI_SUCCESS;
|
||||
if(limit != 0 && reg_size > limit) {
|
||||
/* return out of resources if request is bigger than cache size
|
||||
* return temp out of resources otherwise */
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
reg->flags = flags;
|
||||
|
||||
while(mca_rcache_vma_tree_insert((mca_rcache_vma_module_t*)rcache, reg) ==
|
||||
OMPI_ERR_TEMP_OUT_OF_RESOURCE) {
|
||||
/* call deregister - which removes the registration from
|
||||
* the tree and mru list. memory will be deregistered when
|
||||
* the reference count goes to zero.
|
||||
*/
|
||||
old_reg = (mca_mpool_base_registration_t*)opal_list_get_first(&((mca_rcache_vma_module_t*)rcache)->mru_list);
|
||||
/* we need to retain first, because we only want the registration
|
||||
removed from the tree and the mru */
|
||||
old_reg->mpool->mpool_retain(old_reg->mpool, old_reg);
|
||||
old_reg->mpool->mpool_deregister(old_reg->mpool, old_reg);
|
||||
}
|
||||
OPAL_THREAD_ADD32((int32_t*) ®->ref_count, 1);
|
||||
|
||||
if(flags & MCA_MPOOL_FLAGS_CACHE) {
|
||||
mca_rcache_vma_mru_insert((mca_rcache_vma_module_t*)rcache, reg);
|
||||
OPAL_THREAD_ADD32((int32_t*)®->ref_count, 1);
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&rcache->lock);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
return mca_rcache_vma_tree_insert(vma_rcache, reg, limit);
|
||||
}
|
||||
|
||||
int mca_rcache_vma_delete (
|
||||
struct mca_rcache_base_module_t* rcache,
|
||||
mca_mpool_base_registration_t* reg,
|
||||
uint32_t flags
|
||||
) {
|
||||
int rc = OMPI_SUCCESS;
|
||||
assert(reg->ref_count >= 1);
|
||||
OPAL_THREAD_LOCK(&rcache->lock);
|
||||
if(flags & MCA_MPOOL_FLAGS_CACHE) {
|
||||
assert(reg->ref_count >= 2);
|
||||
OPAL_THREAD_ADD32((int32_t*)®->ref_count, -1);
|
||||
rc = mca_rcache_vma_mru_delete((mca_rcache_vma_module_t*)rcache, reg);
|
||||
}
|
||||
if(OMPI_SUCCESS != rc) {
|
||||
OPAL_THREAD_UNLOCK(&rcache->lock);
|
||||
return rc;
|
||||
}
|
||||
reg->flags = 0;
|
||||
OPAL_THREAD_ADD32((int32_t*)®->ref_count, -1);
|
||||
rc = mca_rcache_vma_tree_delete((mca_rcache_vma_module_t*)rcache, reg );
|
||||
OPAL_THREAD_UNLOCK(&rcache->lock);
|
||||
return rc;
|
||||
int mca_rcache_vma_delete(struct mca_rcache_base_module_t* rcache,
|
||||
mca_mpool_base_registration_t* reg)
|
||||
{
|
||||
mca_rcache_vma_module_t *vma_rcache = (mca_rcache_vma_module_t*)rcache;
|
||||
return mca_rcache_vma_tree_delete(vma_rcache, reg);
|
||||
}
|
||||
|
||||
/**
|
||||
* finalize
|
||||
*/
|
||||
void mca_rcache_vma_finalize(
|
||||
struct mca_rcache_base_module_t* rcache
|
||||
) {
|
||||
|
||||
void mca_rcache_vma_finalize(struct mca_rcache_base_module_t* rcache)
|
||||
{
|
||||
}
|
||||
|
@ -34,57 +34,41 @@ struct mca_rcache_vma_module_t {
|
||||
mca_rcache_base_module_t base;
|
||||
ompi_rb_tree_t rb_tree;
|
||||
opal_list_t vma_list;
|
||||
opal_list_t mru_list;
|
||||
size_t reg_mru_len;
|
||||
size_t reg_max_mru_size;
|
||||
size_t reg_cur_mru_size;
|
||||
|
||||
size_t reg_cur_cache_size;
|
||||
};
|
||||
typedef struct mca_rcache_vma_module_t mca_rcache_vma_module_t;
|
||||
|
||||
|
||||
struct mca_rcache_vma_component_t {
|
||||
mca_rcache_base_component_t super;
|
||||
size_t reg_mru_len;
|
||||
size_t reg_max_mru_size;
|
||||
}; typedef struct mca_rcache_vma_component_t mca_rcache_vma_component_t;
|
||||
|
||||
OMPI_DECLSPEC extern mca_rcache_vma_component_t mca_rcache_vma_component;
|
||||
|
||||
|
||||
|
||||
void mca_rcache_vma_module_init( mca_rcache_vma_module_t* rcache );
|
||||
void mca_rcache_vma_module_init(mca_rcache_vma_module_t* rcache);
|
||||
|
||||
int mca_rcache_vma_find (
|
||||
mca_rcache_base_module_t* rcache,
|
||||
void* addr,
|
||||
size_t size,
|
||||
ompi_pointer_array_t* regs,
|
||||
uint32_t *cnt
|
||||
);
|
||||
int mca_rcache_vma_find(mca_rcache_base_module_t* rcache, void* addr,
|
||||
size_t size, mca_mpool_base_registration_t **reg);
|
||||
|
||||
int mca_rcache_vma_insert (
|
||||
struct mca_rcache_base_module_t* rcache,
|
||||
mca_mpool_base_registration_t* registration,
|
||||
uint32_t flags
|
||||
);
|
||||
int mca_rcache_vma_find_all(mca_rcache_base_module_t* rcache, void* addr,
|
||||
size_t size, ompi_pointer_array_t *regs);
|
||||
|
||||
int mca_rcache_vma_delete (
|
||||
struct mca_rcache_base_module_t* rcache,
|
||||
mca_mpool_base_registration_t* registration,
|
||||
uint32_t flags
|
||||
);
|
||||
int mca_rcache_vma_insert(struct mca_rcache_base_module_t* rcache,
|
||||
mca_mpool_base_registration_t* registration, size_t limit);
|
||||
|
||||
int mca_rcache_vma_delete(struct mca_rcache_base_module_t* rcache,
|
||||
mca_mpool_base_registration_t* registration);
|
||||
|
||||
|
||||
/**
|
||||
* init/finalize
|
||||
*/
|
||||
|
||||
void mca_rcache_vma_module_init( mca_rcache_vma_module_t* rcache );
|
||||
void mca_rcache_vma_module_init(mca_rcache_vma_module_t *rcache);
|
||||
|
||||
void mca_rcache_vma_finalize(
|
||||
struct mca_rcache_base_module_t*
|
||||
);
|
||||
void mca_rcache_vma_finalize(struct mca_rcache_base_module_t*);
|
||||
|
||||
#endif /* MCA_RCACHE_VMA_H */
|
||||
|
||||
|
@ -44,22 +44,6 @@ mca_rcache_vma_component_t mca_rcache_vma_component = {
|
||||
|
||||
static int mca_rcache_vma_component_open(void)
|
||||
{
|
||||
mca_base_param_reg_int(&mca_rcache_vma_component.super.rcache_version,
|
||||
"mru_len",
|
||||
"The maximum size IN ENTRIES of the MRU (most recently used) rcache list",
|
||||
false,
|
||||
false,
|
||||
256,
|
||||
(int*)&(mca_rcache_vma_component.reg_mru_len));
|
||||
|
||||
mca_base_param_reg_int(&mca_rcache_vma_component.super.rcache_version,
|
||||
"mru_size",
|
||||
"The maximum size IN BYTES of the MRU (most recently used) rcache list",
|
||||
false,
|
||||
false,
|
||||
1*1024*1024*1024, /* default to 1GB? */
|
||||
(int*)&(mca_rcache_vma_component.reg_max_mru_size));
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -68,8 +52,6 @@ mca_rcache_base_module_t* mca_rcache_vma_component_init(void) {
|
||||
|
||||
rcache = (mca_rcache_vma_module_t*) malloc(sizeof(mca_rcache_vma_module_t));
|
||||
mca_rcache_vma_module_init(rcache);
|
||||
rcache->reg_mru_len = mca_rcache_vma_component.reg_mru_len;
|
||||
rcache->reg_max_mru_size = mca_rcache_vma_component.reg_max_mru_size;
|
||||
|
||||
return &rcache->base;
|
||||
}
|
||||
|
@ -1,98 +0,0 @@
|
||||
/**
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Copyright (c) 2006 Voltaire. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* @file
|
||||
* Description of the Registration Cache framework
|
||||
*/
|
||||
|
||||
#include "opal/mca/mca.h"
|
||||
#include "rcache_vma_mru.h"
|
||||
#include "ompi/mca/mpool/mpool.h"
|
||||
|
||||
/*
|
||||
* initialize the vma mru
|
||||
*/
|
||||
int mca_rcache_vma_mru_init(mca_rcache_vma_module_t* rcache){
|
||||
OBJ_CONSTRUCT(&rcache->mru_list, opal_list_t);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* insert an item in the vma mru
|
||||
*/
|
||||
int mca_rcache_vma_mru_insert(
|
||||
mca_rcache_vma_module_t* rcache,
|
||||
mca_mpool_base_registration_t* reg
|
||||
) {
|
||||
mca_mpool_base_registration_t* old_reg;
|
||||
|
||||
if(rcache->reg_mru_len <= rcache->mru_list.opal_list_length) {
|
||||
/* call deregister - which removes the registration from
|
||||
* the tree and mru list. memory will be deregistered when
|
||||
* the reference count goes to zero.
|
||||
*/
|
||||
old_reg = (mca_mpool_base_registration_t*)
|
||||
opal_list_get_first(&rcache->mru_list);
|
||||
/* we need to retain first, because we only want the registration
|
||||
removed from the tree and the mru */
|
||||
old_reg->mpool->mpool_retain(old_reg->mpool, old_reg);
|
||||
old_reg->mpool->mpool_deregister(old_reg->mpool, old_reg);
|
||||
}
|
||||
|
||||
opal_list_append(&rcache->mru_list,(opal_list_item_t*) reg);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* remove an item from the vma mru
|
||||
*/
|
||||
int mca_rcache_vma_mru_delete(
|
||||
mca_rcache_vma_module_t* rcache,
|
||||
mca_mpool_base_registration_t *reg
|
||||
){
|
||||
int rc;
|
||||
if(NULL == opal_list_remove_item(&rcache->mru_list,
|
||||
(opal_list_item_t*)reg)) {
|
||||
rc = OMPI_ERROR;
|
||||
} else {
|
||||
rc = OMPI_SUCCESS;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* touch an item in the mru list
|
||||
*/
|
||||
int mca_rcache_vma_mru_touch(
|
||||
mca_rcache_vma_module_t* rcache,
|
||||
mca_mpool_base_registration_t* reg
|
||||
){
|
||||
int rc;
|
||||
if(NULL == opal_list_remove_item(&rcache->mru_list,
|
||||
(opal_list_item_t*)reg)) {
|
||||
rc = OMPI_ERROR;
|
||||
} else {
|
||||
opal_list_append(&rcache->mru_list, (opal_list_item_t*)reg);
|
||||
rc = OMPI_SUCCESS;
|
||||
}
|
||||
return rc;
|
||||
}
|
@ -1,62 +0,0 @@
|
||||
|
||||
/**
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Copyright (c) 2006 Voltaire. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* @file
|
||||
* Description of the Registration Cache framework
|
||||
*/
|
||||
#ifndef MCA_RCACHE_VMA_MRU_H
|
||||
#define MCA_RCACHE_VMA_MRU_H
|
||||
#include "opal/mca/mca.h"
|
||||
#include "ompi/info/info.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "rcache_vma.h"
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* initialize the rb mru
|
||||
*/
|
||||
int mca_rcache_vma_mru_init(mca_rcache_vma_module_t* rcache);
|
||||
|
||||
/*
|
||||
* insert an item in the rb mru
|
||||
*/
|
||||
int mca_rcache_vma_mru_insert(
|
||||
mca_rcache_vma_module_t* rcache,
|
||||
mca_mpool_base_registration_t* reg
|
||||
);
|
||||
|
||||
/*
|
||||
* remove an item from the rb mru
|
||||
*/
|
||||
int mca_rcache_vma_mru_delete(
|
||||
mca_rcache_vma_module_t* rcache,
|
||||
mca_mpool_base_registration_t* reg
|
||||
);
|
||||
|
||||
int mca_rcache_vma_mru_touch(
|
||||
mca_rcache_vma_module_t* rcache,
|
||||
mca_mpool_base_registration_t* reg
|
||||
);
|
||||
|
||||
#endif /* MCA_RCACHE_VMA_MRU_H */
|
||||
|
@ -26,6 +26,8 @@
|
||||
#include "opal/mca/mca.h"
|
||||
#include "rcache_vma_tree.h"
|
||||
|
||||
extern unsigned int mca_mpool_base_page_size;
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_rcache_vma_reg_list_item_t, opal_list_item_t, NULL, NULL);
|
||||
|
||||
static void mca_rcache_vma_construct(opal_object_t *object)
|
||||
@ -130,8 +132,17 @@ static inline int mca_rcache_vma_compare_regs(
|
||||
mca_mpool_base_registration_t *reg1,
|
||||
mca_mpool_base_registration_t *reg2)
|
||||
{
|
||||
/* persisten registration are on top */
|
||||
if((reg1->flags & MCA_MPOOL_FLAGS_PERSIST) &&
|
||||
!(reg2->flags & MCA_MPOOL_FLAGS_PERSIST))
|
||||
return 1;
|
||||
|
||||
if(!(reg1->flags & MCA_MPOOL_FLAGS_PERSIST) &&
|
||||
(reg2->flags & MCA_MPOOL_FLAGS_PERSIST))
|
||||
return -1;
|
||||
|
||||
if (reg1->bound != reg2->bound)
|
||||
return (int)(reg1->bound - reg2->bound);
|
||||
return (int)(reg1->bound - reg2->bound);
|
||||
|
||||
/* tie breaker */
|
||||
return (int)((uintptr_t)reg1 - (uintptr_t)reg2);
|
||||
@ -241,7 +252,7 @@ int mca_rcache_vma_tree_init(mca_rcache_vma_module_t* rcache)
|
||||
{
|
||||
OBJ_CONSTRUCT(&rcache->rb_tree, ompi_rb_tree_t);
|
||||
OBJ_CONSTRUCT(&rcache->vma_list, opal_list_t);
|
||||
rcache->reg_cur_mru_size = 0;
|
||||
rcache->reg_cur_cache_size = 0;
|
||||
return ompi_rb_tree_init(&rcache->rb_tree,
|
||||
mca_rcache_vma_tree_node_compare);
|
||||
}
|
||||
@ -261,23 +272,81 @@ mca_mpool_base_registration_t *mca_rcache_vma_tree_find(
|
||||
|
||||
item = (mca_rcache_vma_reg_list_item_t*)opal_list_get_first(&vma->reg_list);
|
||||
|
||||
if(item->reg->bound >= bound)
|
||||
return item->reg;
|
||||
do {
|
||||
if(item->reg->bound >= bound)
|
||||
return item->reg;
|
||||
if(!(item->reg->flags & MCA_MPOOL_FLAGS_PERSIST))
|
||||
break;
|
||||
item = (mca_rcache_vma_reg_list_item_t*)opal_list_get_next(item);
|
||||
} while(item !=
|
||||
(mca_rcache_vma_reg_list_item_t*)opal_list_get_end(&vma->reg_list));
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline int mca_rcache_vma_can_insert(
|
||||
mca_rcache_vma_module_t *vma_rcache,
|
||||
uint32_t reg_flags,
|
||||
size_t nbytes)
|
||||
static inline bool is_reg_in_array(ompi_pointer_array_t *regs, void *p)
|
||||
{
|
||||
if(0 == vma_rcache->reg_max_mru_size ||
|
||||
!(reg_flags & MCA_MPOOL_FLAGS_CACHE))
|
||||
int i;
|
||||
|
||||
for(i = 0; i < ompi_pointer_array_get_size(regs); i++) {
|
||||
if(ompi_pointer_array_get_item(regs, i) == p)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
int mca_rcache_vma_tree_find_all(
|
||||
mca_rcache_vma_module_t *vma_rcache, unsigned char *base,
|
||||
unsigned char *bound, ompi_pointer_array_t *regs)
|
||||
{
|
||||
int cnt = 0;
|
||||
|
||||
if(opal_list_get_size(&vma_rcache->vma_list) == 0)
|
||||
return cnt;
|
||||
|
||||
do {
|
||||
mca_rcache_vma_t *vma;
|
||||
opal_list_item_t *item;
|
||||
vma = ompi_rb_tree_find_with(&vma_rcache->rb_tree, base,
|
||||
mca_rcache_vma_tree_node_compare_closest);
|
||||
|
||||
if(NULL == vma) {
|
||||
/* base is bigger than any registered memory */
|
||||
base = bound + 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if(base < (unsigned char*)vma->start) {
|
||||
base = (unsigned char*)vma->start;
|
||||
continue;
|
||||
}
|
||||
|
||||
for(item = opal_list_get_first(&vma->reg_list);
|
||||
item != opal_list_get_end(&vma->reg_list);
|
||||
item = opal_list_get_next(item)) {
|
||||
mca_rcache_vma_reg_list_item_t *vma_item;
|
||||
vma_item = (mca_rcache_vma_reg_list_item_t*)item;
|
||||
if(is_reg_in_array(regs, (void*)vma_item->reg)) {
|
||||
continue;
|
||||
}
|
||||
ompi_pointer_array_add(regs, (void*)vma_item->reg);
|
||||
cnt++;
|
||||
}
|
||||
|
||||
base = (unsigned char *)vma->end + 1;
|
||||
} while(bound >= base);
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
static inline int mca_rcache_vma_can_insert(
|
||||
mca_rcache_vma_module_t *vma_rcache, size_t nbytes, size_t limit)
|
||||
{
|
||||
if(0 == limit)
|
||||
return 1;
|
||||
|
||||
if(vma_rcache->reg_cur_mru_size + nbytes <=
|
||||
vma_rcache->reg_max_mru_size)
|
||||
if(vma_rcache->reg_cur_cache_size + nbytes <= limit)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
@ -287,13 +356,11 @@ static inline void mca_rcache_vma_update_byte_count(
|
||||
mca_rcache_vma_module_t* vma_rcache,
|
||||
size_t nbytes)
|
||||
{
|
||||
vma_rcache->reg_cur_mru_size += nbytes;
|
||||
vma_rcache->reg_cur_cache_size += nbytes;
|
||||
}
|
||||
|
||||
int mca_rcache_vma_tree_insert(
|
||||
mca_rcache_vma_module_t* vma_rcache,
|
||||
mca_mpool_base_registration_t* reg
|
||||
)
|
||||
int mca_rcache_vma_tree_insert(mca_rcache_vma_module_t* vma_rcache,
|
||||
mca_mpool_base_registration_t* reg, size_t limit)
|
||||
{
|
||||
mca_rcache_vma_t *i;
|
||||
uintptr_t begin = (uintptr_t)reg->base, end = (uintptr_t)reg->bound;
|
||||
@ -309,7 +376,7 @@ int mca_rcache_vma_tree_insert(
|
||||
|
||||
if((mca_rcache_vma_t*)opal_list_get_end(&vma_rcache->vma_list) == i) {
|
||||
vma = NULL;
|
||||
if(mca_rcache_vma_can_insert(vma_rcache, reg->flags, end - begin + 1))
|
||||
if(mca_rcache_vma_can_insert(vma_rcache, end - begin + 1, limit))
|
||||
vma = mca_rcache_vma_new(vma_rcache, begin, end);
|
||||
|
||||
if(!vma)
|
||||
@ -323,7 +390,7 @@ int mca_rcache_vma_tree_insert(
|
||||
} else if(i->start > begin) {
|
||||
uintptr_t tend = (i->start <= end)?(i->start - 1):end;
|
||||
vma = NULL;
|
||||
if(mca_rcache_vma_can_insert(vma_rcache, reg->flags, tend - begin + 1))
|
||||
if(mca_rcache_vma_can_insert(vma_rcache, tend - begin + 1, limit))
|
||||
vma = mca_rcache_vma_new(vma_rcache, begin, tend);
|
||||
|
||||
if(!vma)
|
||||
|
@ -71,14 +71,18 @@ mca_mpool_base_registration_t* mca_rcache_vma_tree_find(
|
||||
unsigned char* base,
|
||||
unsigned char *bound
|
||||
);
|
||||
/**
|
||||
* Returns all registration that overlaps given memory region
|
||||
*/
|
||||
int mca_rcache_vma_tree_find_all(
|
||||
mca_rcache_vma_module_t *vma_rcache, unsigned char *base,
|
||||
unsigned char *bound, ompi_pointer_array_t *regs);
|
||||
|
||||
/*
|
||||
* insert an item in the vma tree
|
||||
*/
|
||||
int mca_rcache_vma_tree_insert(
|
||||
mca_rcache_vma_module_t* rcache,
|
||||
mca_mpool_base_registration_t* reg
|
||||
);
|
||||
int mca_rcache_vma_tree_insert(mca_rcache_vma_module_t* rcache,
|
||||
mca_mpool_base_registration_t* reg, size_t limit);
|
||||
|
||||
/*
|
||||
* remove an item from the vma tree
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user