1
1

Merge with gleb-mpool branch. All RDMA components use same mpool now (rdma).

udapl/openib/vapi/gm mpools a deprecated. rdma mpool has parameter that allows
to limit its size mpool_rdma_rcache_size_limit (default is 0 - unlimited).

This commit was SVN r12878.
Этот коммит содержится в:
Gleb Natapov 2006-12-17 12:26:41 +00:00
родитель f1fdd7c041
Коммит 190e7a27cd
59 изменённых файлов: 1546 добавлений и 1440 удалений

Просмотреть файл

@ -155,7 +155,7 @@ int ompi_free_list_grow(ompi_free_list_t* flist, size_t num_elements)
if (NULL != flist->fl_mpool)
alloc_ptr = (ompi_free_list_memory_t*)flist->fl_mpool->mpool_alloc(flist->fl_mpool,
alloc_size, 0, 0, &user_out);
alloc_size, 0, MCA_MPOOL_FLAGS_CACHE_BYPASS, &user_out);
else
alloc_ptr = (ompi_free_list_memory_t*)malloc(alloc_size);

Просмотреть файл

@ -29,7 +29,6 @@ static void mca_bml_base_endpoint_construct(mca_bml_base_endpoint_t* ep)
{
ep->btl_rdma_offset = 0;
ep->btl_max_send_size = 0;
ep->btl_rdma_size = 0;
ep->btl_rdma_align = 0;
OBJ_CONSTRUCT(&ep->btl_eager, mca_bml_base_btl_array_t);

Просмотреть файл

@ -238,7 +238,6 @@ struct mca_bml_base_endpoint_t {
struct ompi_proc_t* btl_proc; /**< backpointer to target ompi_proc_t */
size_t btl_rdma_offset; /**< max of min rdma size for available rmda btls */
size_t btl_max_send_size; /**< min of max send size for available send btls */
size_t btl_rdma_size; /**< max of min rdma size for available rmda btls */
size_t btl_rdma_align; /**< max of min rdma size for available rmda btls */
mca_bml_base_btl_array_t btl_eager; /**< array of btls to use for first fragments */
mca_bml_base_btl_array_t btl_send; /**< array of btls to use for remaining fragments */

Просмотреть файл

@ -265,7 +265,6 @@ int mca_bml_r2_add_procs(
mca_bml_base_btl_array_reserve(&bml_endpoint->btl_send, mca_bml_r2.num_btl_modules);
mca_bml_base_btl_array_reserve(&bml_endpoint->btl_rdma, mca_bml_r2.num_btl_modules);
bml_endpoint->btl_max_send_size = -1;
bml_endpoint->btl_rdma_size = -1;
bml_endpoint->btl_proc = proc;
proc->proc_bml = bml_endpoint;
@ -426,10 +425,6 @@ int mca_bml_r2_add_procs(
if(bml_endpoint->btl_rdma_offset < bml_btl_rdma->btl_min_rdma_size) {
bml_endpoint->btl_rdma_offset = bml_btl_rdma->btl_min_rdma_size;
}
if(bml_endpoint->btl_rdma_size > btl->btl_max_rdma_size) {
bml_endpoint->btl_rdma_size = btl->btl_max_rdma_size;
bml_endpoint->btl_rdma_align = bml_base_log2(bml_endpoint->btl_rdma_size);
}
}
}
}
@ -677,10 +672,6 @@ int mca_bml_r2_del_proc_btl(ompi_proc_t* proc, mca_btl_base_module_t* btl)
if (ep->btl_rdma_offset < bml_btl->btl_min_rdma_size) {
ep->btl_rdma_offset = bml_btl->btl_min_rdma_size;
}
if (ep->btl_rdma_size > bml_btl->btl_max_rdma_size) {
ep->btl_rdma_size = bml_btl->btl_max_rdma_size;
ep->btl_rdma_align = bml_base_log2(ep->btl_rdma_size);
}
}
/* compute weighting factor for this btl */

Просмотреть файл

@ -245,7 +245,8 @@ int mca_btl_gm_free( struct mca_btl_base_module_t* btl,
mca_btl_gm_frag_t* frag = (mca_btl_gm_frag_t*)des;
if( NULL != frag->registration ) {
btl->btl_mpool->mpool_release(btl->btl_mpool, (mca_mpool_base_registration_t*) frag->registration);
btl->btl_mpool->mpool_deregister(btl->btl_mpool, (mca_mpool_base_registration_t*) frag->registration);
frag->registration = NULL;
}
MCA_BTL_GM_FRAG_RETURN(btl, frag);
@ -268,142 +269,95 @@ mca_btl_base_descriptor_t* mca_btl_gm_prepare_src(
size_t* size
)
{
mca_btl_gm_frag_t* frag;
mca_btl_gm_frag_t *frag = NULL;
struct iovec iov;
uint32_t iov_count = 1;
size_t max_data = *size;
int rc;
#if (OMPI_MCA_BTL_GM_HAVE_RDMA_GET || OMPI_MCA_BTL_GM_HAVE_RDMA_PUT)
/*
* If the data has already been pinned and is contigous than we can
* use it in place.
*/
if (NULL != registration && 0 == ompi_convertor_need_buffers(convertor)) {
size_t reg_len;
MCA_BTL_GM_FRAG_ALLOC_USER(btl, frag, rc);
if(NULL == frag){
return NULL;
if(ompi_convertor_need_buffers(convertor) == false && 0 == reserve) {
if(registration != NULL || max_data > btl->btl_max_send_size) {
MCA_BTL_GM_FRAG_ALLOC_USER(btl, frag, rc);
if(NULL == frag) {
return NULL;
}
/*
* just assign it something..
* we will assign the real value in put/get
*/
frag->type = MCA_BTL_GM_PUT;
iov.iov_len = max_data;
iov.iov_base = NULL;
ompi_convertor_pack(convertor, &iov, &iov_count, &max_data);
*size = max_data;
if(NULL == registration) {
rc = btl->btl_mpool->mpool_register(btl->btl_mpool,
iov.iov_base, max_data, 0, &registration);
if(OMPI_SUCCESS != rc || NULL == registration) {
MCA_BTL_GM_FRAG_RETURN(btl, frag);
return NULL;
}
/* keep track of the registration we did */
frag->registration = registration;
}
frag->segment.seg_len = max_data;
frag->segment.seg_addr.pval = iov.iov_base;
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->base.des_flags = 0;
return &frag->base;
}
/*
* just assign it something..
* we will assign the real value in put/get
*/
frag->type = MCA_BTL_GM_PUT;
iov.iov_len = max_data;
iov.iov_base = NULL;
ompi_convertor_pack(convertor, &iov, &iov_count, &max_data );
frag->segment.seg_len = max_data;
frag->segment.seg_addr.pval = iov.iov_base;
reg_len = (unsigned char*)registration->bound - (unsigned char*)iov.iov_base + 1;
/* bump reference count as so that the registration
* doesn't go away when the operation completes
*/
btl->btl_mpool->mpool_retain(btl->btl_mpool,
(mca_mpool_base_registration_t*) registration);
frag->registration = registration;
/*
* if the data is not already pinned - but the leave pinned option is set,
* then go ahead and pin contigous data. however, if a reserve is required
* then we must allocated a fragment w/ buffer space
*/
} else if (max_data > btl->btl_max_send_size &&
ompi_convertor_need_buffers(convertor) == 0 &&
reserve == 0) {
mca_mpool_base_module_t* mpool = btl->btl_mpool;
MCA_BTL_GM_FRAG_ALLOC_USER(btl, frag, rc);
if(NULL == frag){
return NULL;
}
/*
* just assign it something..
* we will assign the real value in put/get
*/
frag->type = MCA_BTL_GM_PUT;
iov.iov_len = max_data;
iov.iov_base = NULL;
ompi_convertor_pack(convertor, &iov, &iov_count, &max_data );
frag->segment.seg_len = max_data;
frag->segment.seg_addr.pval = iov.iov_base;
rc = mpool->mpool_register( mpool, iov.iov_base, max_data, 0, &registration );
if(rc != OMPI_SUCCESS) {
MCA_BTL_GM_FRAG_RETURN(btl,frag);
return NULL;
}
frag->registration = registration;
}
/*
* if we aren't pinning the data and the requested size is less
* than the eager limit pack into a fragment from the eager pool
*/
else
}
#endif
if (max_data+reserve <= btl->btl_eager_limit) {
if (max_data + reserve <= btl->btl_eager_limit) {
/* the data is small enough to fit in the eager frag and
* memory is not prepinned */
MCA_BTL_GM_FRAG_ALLOC_EAGER(btl, frag, rc);
if(NULL == frag) {
return NULL;
if(frag != NULL) {
frag->type = MCA_BTL_GM_EAGER;
}
frag->type = MCA_BTL_GM_EAGER;
iov.iov_len = max_data;
iov.iov_base = (unsigned char*) frag->segment.seg_addr.pval + reserve;
rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data );
*size = max_data;
if( rc < 0 ) {
MCA_BTL_GM_FRAG_RETURN(btl, frag);
return NULL;
}
frag->segment.seg_len = max_data + reserve;
}
/*
* otherwise pack as much data as we can into a fragment
* that is the max send size.
*/
else {
if(NULL == frag) {
/* the data doesn't fit into eager frag or eger frag is
* not available */
MCA_BTL_GM_FRAG_ALLOC_MAX(btl, frag, rc);
if(NULL == frag) {
return NULL;
}
frag->type = MCA_BTL_GM_SEND;
if(max_data + reserve > btl->btl_max_send_size){
if(max_data + reserve > btl->btl_max_send_size) {
max_data = btl->btl_max_send_size - reserve;
}
iov.iov_len = max_data;
iov.iov_base = (unsigned char*) frag->segment.seg_addr.pval + reserve;
rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data );
*size = max_data;
if( rc < 0 ) {
MCA_BTL_GM_FRAG_RETURN(btl, frag);
return NULL;
}
frag->segment.seg_len = max_data + reserve;
}
iov.iov_len = max_data;
iov.iov_base = (unsigned char*) frag->segment.seg_addr.pval + reserve;
rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data);
if(rc < 0) {
MCA_BTL_GM_FRAG_RETURN(btl, frag);
return NULL;
}
*size = max_data;
frag->segment.seg_len = max_data + reserve;
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->base.des_flags = 0;
return &frag->base;
}
@ -462,18 +416,7 @@ mca_btl_base_descriptor_t* mca_btl_gm_prepare_dst(
frag->base.des_dst = &frag->segment;
frag->base.des_dst_cnt = 1;
frag->base.des_flags = 0;
if(NULL != registration) {
/* bump reference count as so that the registration
* doesn't go away when the operation completes
*/
mpool->mpool_retain(mpool,
(mca_mpool_base_registration_t*) registration);
frag->registration = registration;
} else {
if(NULL == registration) {
rc = mpool->mpool_register( mpool,
frag->segment.seg_addr.pval,
frag->segment.seg_len,
@ -483,7 +426,6 @@ mca_btl_base_descriptor_t* mca_btl_gm_prepare_dst(
MCA_BTL_GM_FRAG_RETURN(btl,frag);
return NULL;
}
frag->registration = registration;
}
return &frag->base;

Просмотреть файл

@ -30,7 +30,7 @@
#include "opal/mca/base/mca_base_param.h"
#include "orte/mca/errmgr/errmgr.h"
#include "ompi/mca/mpool/base/base.h"
#include "ompi/mca/mpool/gm/mpool_gm.h"
#include "ompi/mca/mpool/rdma/mpool_rdma.h"
#include "btl_gm.h"
#include "btl_gm_frag.h"
#include "btl_gm_endpoint.h"
@ -45,6 +45,9 @@
#if OMPI_ENABLE_PROGRESS_THREADS
static void* mca_btl_gm_progress_thread( opal_object_t* arg );
#endif
static int gm_reg_mr(void *reg_data, void *base, size_t size,
mca_mpool_base_registration_t *reg);
static int gm_dereg_mr(void *reg_data, mca_mpool_base_registration_t *reg);
mca_btl_gm_component_t mca_btl_gm_component = {
@ -131,7 +134,7 @@ int mca_btl_gm_component_open(void)
mca_btl_gm_component.gm_debug =
mca_btl_gm_param_register_int("debug", 0);
mca_btl_gm_component.gm_mpool_name =
mca_btl_gm_param_register_string("mpool", "gm");
mca_btl_gm_param_register_string("mpool", "rdma");
mca_btl_gm_component.gm_max_ports =
mca_btl_gm_param_register_int("max_ports", 16);
mca_btl_gm_component.gm_max_boards =
@ -201,6 +204,35 @@ int mca_btl_gm_component_close(void)
return OMPI_SUCCESS;
}
static int gm_reg_mr(void *reg_data, void *base, size_t size,
mca_mpool_base_registration_t *reg)
{
struct gm_port *port = (struct gm_port*)reg_data;
int rc;
rc = gm_register_memory(port, base, size);
if(rc != GM_SUCCESS)
return OMPI_ERR_OUT_OF_RESOURCE;
return MPI_SUCCESS;
}
static int gm_dereg_mr(void *reg_data, mca_mpool_base_registration_t *reg)
{
struct gm_port *port = (struct gm_port*)reg_data;
int rc;
rc = gm_deregister_memory(port, reg->base, reg->bound - reg->base + 1);
if(rc != GM_SUCCESS) {
opal_output(0, "%s: error unpinning gm memory errno says %s\n",
__func__, strerror(errno));
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}
/**
* Initialize module instance
@ -243,7 +275,10 @@ mca_btl_gm_module_init (mca_btl_gm_module_t * btl)
}
/* initialize memory pool */
resources.port = btl->port;
resources.reg_data = (void*)btl->port;
resources.sizeof_reg = sizeof(mca_mpool_base_registration_t);
resources.register_mem = gm_reg_mr;
resources.deregister_mem = gm_dereg_mr;
btl->super.btl_mpool = mca_mpool_base_module_create(
mca_btl_gm_component.gm_mpool_name,
&btl->super,
@ -419,8 +454,6 @@ static int mca_btl_gm_discover( void )
return OMPI_SUCCESS;
}
/*
* Register GM component addressing information. The MCA framework
* will make this available to all peers.

Просмотреть файл

@ -31,7 +31,7 @@
#include "ompi/datatype/datatype.h"
#include "ompi/mca/mpool/base/base.h"
#include "ompi/mca/mpool/mpool.h"
#include "ompi/mca/mpool/mvapi/mpool_mvapi.h"
#include "ompi/mca/mpool/rdma/mpool_rdma.h"
#include "ompi/mca/btl/base/btl_base_error.h"
#include <vapi_types.h>
#include <math.h> /* for log2 */
@ -225,8 +225,9 @@ int mca_btl_mvapi_free(
mca_btl_base_descriptor_t* des)
{
mca_btl_mvapi_frag_t* frag = (mca_btl_mvapi_frag_t*)des;
if (MCA_BTL_MVAPI_FRAG_FRAG == frag->type) {
btl->btl_mpool->mpool_release(btl->btl_mpool, (mca_mpool_base_registration_t*) frag->vapi_reg);
if (MCA_BTL_MVAPI_FRAG_FRAG == frag->type && frag->registration != NULL) {
btl->btl_mpool->mpool_deregister(btl->btl_mpool, (mca_mpool_base_registration_t*) frag->registration);
frag->registration = NULL;
}
MCA_BTL_IB_FRAG_RETURN(btl, frag);
@ -267,164 +268,99 @@ mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_src(
)
{
mca_btl_mvapi_module_t* mvapi_btl;
mca_btl_mvapi_frag_t* frag;
mca_mpool_mvapi_registration_t * vapi_reg;
mca_btl_mvapi_frag_t* frag = NULL;
mca_btl_mvapi_reg_t *mvapi_reg;
struct iovec iov;
uint32_t iov_count = 1;
size_t max_data = *size;
int rc;
mvapi_btl = (mca_btl_mvapi_module_t*) btl;
vapi_reg = (mca_mpool_mvapi_registration_t*) registration;
if(NULL != vapi_reg && 0 == ompi_convertor_need_buffers(convertor)){
size_t reg_len;
/* the memory is already pinned and we have contiguous user data */
MCA_BTL_IB_FRAG_ALLOC_FRAG(btl, frag, rc);
if(NULL == frag){
return NULL;
}
iov.iov_len = max_data;
iov.iov_base = NULL;
ompi_convertor_pack(convertor, &iov, &iov_count, &max_data );
*size = max_data;
frag->segment.seg_len = max_data;
frag->segment.seg_addr.pval = iov.iov_base;
reg_len = (unsigned char*)vapi_reg->base_reg.bound - (unsigned char*)iov.iov_base + 1;
mvapi_btl = (mca_btl_mvapi_module_t*)btl;
frag->sg_entry.len = max_data;
frag->sg_entry.lkey = vapi_reg->l_key;
frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) iov.iov_base;
frag->segment.seg_key.key32[0] = (uint32_t) vapi_reg->l_key;
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->base.des_flags = 0;
frag->vapi_reg = vapi_reg;
btl->btl_mpool->mpool_retain(btl->btl_mpool, (mca_mpool_base_registration_t*) vapi_reg);
if(vapi_reg->base_reg.flags & MCA_MPOOL_FLAGS_CACHE) {
assert(vapi_reg->base_reg.ref_count >= 4);
} else {
assert(vapi_reg->base_reg.ref_count >= 2);
if(ompi_convertor_need_buffers(convertor) == false && 0 == reserve) {
if(registration != NULL || max_data > btl->btl_max_send_size) {
MCA_BTL_IB_FRAG_ALLOC_FRAG(btl, frag, rc);
if(NULL == frag) {
return NULL;
}
iov.iov_len = max_data;
iov.iov_base = NULL;
ompi_convertor_pack(convertor, &iov, &iov_count, &max_data);
*size = max_data;
if(NULL == registration) {
rc = btl->btl_mpool->mpool_register(btl->btl_mpool,
iov.iov_base, max_data, 0, &registration);
if(OMPI_SUCCESS != rc || NULL == registration) {
MCA_BTL_IB_FRAG_RETURN(mvapi_btl, frag);
return NULL;
}
frag->registration = (mca_btl_mvapi_reg_t*)registration;
}
mvapi_reg = (mca_btl_mvapi_reg_t*)registration;
frag->base.des_flags = 0;
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->base.des_flags = 0;
frag->sg_entry.len = max_data;
frag->sg_entry.lkey = mvapi_reg->l_key;
frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t)iov.iov_base;
frag->segment.seg_len = max_data;
frag->segment.seg_addr.pval = iov.iov_base;
frag->segment.seg_key.key32[0] = (uint32_t)frag->sg_entry.lkey;
BTL_VERBOSE(("frag->sg_entry.lkey = %lu .addr = %llu "
"frag->segment.seg_key.key32[0] = %lu",
frag->sg_entry.lkey, frag->sg_entry.addr,
frag->segment.seg_key.key32[0]));
return &frag->base;
}
return &frag->base;
} else if( max_data > btl->btl_max_send_size &&
ompi_convertor_need_buffers(convertor) == 0 &&
reserve == 0)
{
/* The user buffer is contigous and we are asked to send more than the max send size. */
MCA_BTL_IB_FRAG_ALLOC_FRAG(btl, frag, rc);
if(NULL == frag){
return NULL;
}
iov.iov_len = max_data;
iov.iov_base = NULL;
ompi_convertor_pack(convertor, &iov, &iov_count, &max_data );
*size = max_data;
frag->segment.seg_len = max_data;
frag->segment.seg_addr.pval = iov.iov_base;
frag->base.des_flags = 0;
rc = btl->btl_mpool->mpool_register(btl->btl_mpool,
iov.iov_base,
max_data,
0,
(mca_mpool_base_registration_t**) &vapi_reg);
if(OMPI_SUCCESS != rc || NULL == vapi_reg) {
BTL_ERROR(("mpool_register(%p,%lu) failed", iov.iov_base, max_data));
MCA_BTL_IB_FRAG_RETURN(btl, frag);
return NULL;
}
frag->sg_entry.len = max_data;
frag->sg_entry.lkey = vapi_reg->l_key;
frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) iov.iov_base;
frag->segment.seg_key.key32[0] = (uint32_t) vapi_reg->l_key;
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->vapi_reg = vapi_reg;
return &frag->base;
} else if (max_data+reserve <= btl->btl_eager_limit) {
/* the data is small enough to fit in the eager frag and
either we received no prepinned memory or leave pinned is
not set
*/
MCA_BTL_IB_FRAG_ALLOC_EAGER(btl, frag, rc);
if(NULL == frag) {
return NULL;
}
iov.iov_len = max_data;
iov.iov_base = (unsigned char*)frag->segment.seg_addr.pval + reserve;
rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data );
*size = max_data;
if( rc < 0 ) {
MCA_BTL_IB_FRAG_RETURN(btl, frag);
return NULL;
}
frag->segment.seg_len = max_data + reserve;
frag->segment.seg_key.key32[0] = (uint32_t) frag->sg_entry.lkey;
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->base.des_flags = 0;
return &frag->base;
} else {
MCA_BTL_IB_FRAG_ALLOC_MAX(btl, frag, rc);
if(NULL == frag) {
return NULL;
}
if(max_data + reserve > btl->btl_max_send_size){
max_data = btl->btl_max_send_size - reserve;
}
iov.iov_len = max_data;
iov.iov_base = (unsigned char*)frag->segment.seg_addr.pval + reserve;
rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data );
*size = max_data;
if( rc < 0 ) {
MCA_BTL_IB_FRAG_RETURN(btl, frag);
return NULL;
}
frag->segment.seg_len = max_data + reserve;
frag->segment.seg_key.key32[0] = (uint32_t) frag->sg_entry.lkey;
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->base.des_flags=0;
return &frag->base;
}
return NULL;
if(max_data + reserve <= btl->btl_eager_limit) {
/* the data is small enough to fit in the eager frag and
* memory is not prepinned */
MCA_BTL_IB_FRAG_ALLOC_EAGER(btl, frag, rc);
}
if(NULL == frag) {
/* the data doesn't fit into eager frag or eger frag is
* not available */
MCA_BTL_IB_FRAG_ALLOC_MAX(btl, frag, rc);
if(NULL == frag) {
return NULL;
}
if(max_data + reserve > btl->btl_max_send_size) {
max_data = btl->btl_max_send_size - reserve;
}
}
iov.iov_len = max_data;
iov.iov_base = (unsigned char*)frag->segment.seg_addr.pval + reserve;
rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data);
if( rc < 0 ) {
MCA_BTL_IB_FRAG_RETURN(mvapi_btl, frag);
return NULL;
}
*size = max_data;
frag->segment.seg_len = max_data + reserve;
frag->segment.seg_key.key32[0] = (uint32_t)frag->sg_entry.lkey;
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->base.des_flags = 0;
return &frag->base;
}
@ -453,12 +389,11 @@ mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_dst(
{
mca_btl_mvapi_module_t* mvapi_btl;
mca_btl_mvapi_frag_t* frag;
mca_mpool_mvapi_registration_t * vapi_reg;
mca_btl_mvapi_reg_t *mvapi_reg;
ptrdiff_t lb;
int rc;
mvapi_btl = (mca_btl_mvapi_module_t*) btl;
vapi_reg = (mca_mpool_mvapi_registration_t*) registration;
MCA_BTL_IB_FRAG_ALLOC_FRAG(btl, frag, rc);
@ -471,42 +406,32 @@ mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_dst(
frag->segment.seg_addr.pval = convertor->pBaseBuf + lb + convertor->bConverted;
frag->base.des_flags = 0;
if(NULL!= vapi_reg){
/* the memory is already pinned- use it*/
btl->btl_mpool->mpool_retain(btl->btl_mpool, (mca_mpool_base_registration_t*) vapi_reg);
if(vapi_reg->base_reg.flags & MCA_MPOOL_FLAGS_CACHE) {
assert(vapi_reg->base_reg.ref_count >= 4);
} else {
assert(vapi_reg->base_reg.ref_count >= 2);
}
} else {
if(NULL == registration) {
/* we didn't get a memory registration passed in, so we have to register the region
* ourselves
*/
rc = btl->btl_mpool->mpool_register(btl->btl_mpool,
frag->segment.seg_addr.pval,
*size,
0,
(mca_mpool_base_registration_t**) &vapi_reg);
if(OMPI_SUCCESS != rc || NULL == vapi_reg) {
frag->segment.seg_addr.pval, *size, 0, &registration);
if(OMPI_SUCCESS != rc || NULL == registration) {
BTL_ERROR(("mpool_register(%p,%lu) failed: base %p lb %lu offset %lu",
frag->segment.seg_addr.pval, *size, convertor->pBaseBuf, lb, convertor->bConverted));
MCA_BTL_IB_FRAG_RETURN(btl, frag);
return NULL;
}
frag->registration = (mca_btl_mvapi_reg_t*)registration;
}
mvapi_reg = (mca_btl_mvapi_reg_t*)registration;
frag->sg_entry.len = *size;
frag->sg_entry.lkey = vapi_reg->l_key;
frag->sg_entry.len = *size;
frag->sg_entry.lkey = mvapi_reg->l_key;
frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag->segment.seg_addr.pval;
frag->segment.seg_key.key32[0] = (uint32_t) vapi_reg->r_key;
frag->segment.seg_key.key32[0] =mvapi_reg->r_key;
frag->base.des_dst = &frag->segment;
frag->base.des_dst_cnt = 1;
frag->base.des_src = NULL;
frag->base.des_src_cnt = 0;
frag->vapi_reg = vapi_reg;
return &frag->base;

Просмотреть файл

@ -192,7 +192,13 @@ struct mca_btl_mvapi_module_t {
uint32_t eager_rdma_buffers_count; /**< number of RDMA buffers */
}; typedef struct mca_btl_mvapi_module_t mca_btl_mvapi_module_t;
struct mca_btl_mvapi_reg_t {
mca_mpool_base_registration_t base;
VAPI_mr_hndl_t hndl; /* Memory region handle */
VAPI_lkey_t l_key; /* Local key to registered memory */
VAPI_rkey_t r_key; /* Remote key to registered memory */
};
typedef struct mca_btl_mvapi_reg_t mca_btl_mvapi_reg_t;
#define MCA_BTL_MVAPI_POST_SRR_HIGH(mvapi_btl, \
additional) \

Просмотреть файл

@ -47,10 +47,13 @@
#include <vapi.h>
#include <vapi_common.h>
#include "ompi/datatype/convertor.h"
#include "ompi/mca/mpool/mvapi/mpool_mvapi.h"
#include "ompi/mca/mpool/rdma/mpool_rdma.h"
#include "btl_mvapi_endpoint.h"
#include "ompi/mca/pml/base/pml_base_module_exchange.h"
static int mvapi_reg_mr(void *reg_data, void *base, size_t size,
mca_mpool_base_registration_t *reg);
static int mvapi_dereg_mr(void *reg_data, mca_mpool_base_registration_t *reg);
mca_btl_mvapi_component_t mca_btl_mvapi_component = {
{
@ -147,7 +150,7 @@ int mca_btl_mvapi_component_open(void)
mca_btl_mvapi_param_register_int ("free_list_inc", "increment size of free lists",
32, &mca_btl_mvapi_component.ib_free_list_inc);
mca_btl_mvapi_param_register_string("mpool", "name of the memory pool to be used",
"mvapi", &mca_btl_mvapi_component.ib_mpool_name);
"rdma", &mca_btl_mvapi_component.ib_mpool_name);
mca_btl_mvapi_param_register_int("reg_mru_len", "length of the registration cache most recently used list",
16, (int*) &mca_btl_mvapi_component.reg_mru_len);
#ifdef VAPI_FEATURE_SRQ
@ -337,7 +340,51 @@ static void mca_btl_mvapi_control(
}
}
static int mvapi_reg_mr(void *reg_data, void *base, size_t size,
mca_mpool_base_registration_t *reg)
{
mca_btl_mvapi_module_t *mvapi_btl = (mca_btl_mvapi_module_t*)reg_data;
mca_btl_mvapi_reg_t *mvapi_reg = (mca_btl_mvapi_reg_t*)reg;
VAPI_mrw_t mr_in, mr_out;
VAPI_ret_t ret;
memset(&mr_in, 0, sizeof(VAPI_mrw_t));
memset(&mr_out, 0, sizeof(VAPI_mrw_t));
mr_in.acl =
VAPI_EN_LOCAL_WRITE | VAPI_EN_REMOTE_WRITE | VAPI_EN_REMOTE_READ;
mr_in.pd_hndl = mvapi_btl->ptag;
mr_in.size = size;
mr_in.start = (VAPI_virt_addr_t)(MT_virt_addr_t)base;
mr_in.type = VAPI_MR;
mvapi_reg->hndl = VAPI_INVAL_HNDL;
ret = VAPI_register_mr(mvapi_btl->nic, &mr_in, &mvapi_reg->hndl, &mr_out);
if(ret != VAPI_OK) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
mvapi_reg->l_key = mr_out.l_key;
mvapi_reg->r_key = mr_out.r_key;
return OMPI_SUCCESS;
}
static int mvapi_dereg_mr(void *reg_data, mca_mpool_base_registration_t *reg)
{
mca_btl_mvapi_module_t *mvapi_btl = (mca_btl_mvapi_module_t*)reg_data;
mca_btl_mvapi_reg_t *mvapi_reg = (mca_btl_mvapi_reg_t*)reg;
VAPI_ret_t ret;
if(mvapi_reg->hndl != VAPI_INVAL_HNDL) {
ret = VAPI_deregister_mr(mvapi_btl->nic, mvapi_reg->hndl);
if(ret != VAPI_OK) {
opal_output(0, "%s: error unpinning mvapi memory errno says %s\n",
__func__, strerror(errno));
return OMPI_ERROR;
}
}
return OMPI_SUCCESS;
}
/*
* IB component initialization:
@ -513,9 +560,10 @@ mca_btl_base_module_t** mca_btl_mvapi_component_init(int *num_btl_modules,
return NULL;
}
hca_pd.hca = mvapi_btl->nic;
hca_pd.pd_tag = mvapi_btl->ptag;
hca_pd.reg_data = mvapi_btl;
hca_pd.sizeof_reg = sizeof(mca_btl_mvapi_reg_t);
hca_pd.register_mem = mvapi_reg_mr;
hca_pd.deregister_mem = mvapi_dereg_mr;
/* initialize the memory pool using the hca */
mvapi_btl->super.btl_mpool =
mca_mpool_base_module_create(mca_btl_mvapi_component.ib_mpool_name,

Просмотреть файл

@ -12,16 +12,16 @@
#include "ompi_config.h"
#include "btl_mvapi.h"
#include "btl_mvapi_endpoint.h"
#include "ompi/mca/mpool/mvapi/mpool_mvapi.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
struct mca_btl_mvapi_reg_t;
struct mca_btl_mvapi_eager_rdma_local_t {
ompi_ptr_t base; /**< buffer for RDMAing eager messages */
mca_mpool_mvapi_registration_t *reg;
struct mca_btl_mvapi_reg_t *reg;
uint16_t head; /**< RDMA buffer to poll */
uint16_t tail; /**< Needed for credit managment */
int32_t credits; /**< number of RDMA credits */

Просмотреть файл

@ -1230,7 +1230,8 @@ void mca_btl_mvapi_endpoint_connect_eager_rdma(
buf = mvapi_btl->super.btl_mpool->mpool_alloc(mvapi_btl->super.btl_mpool,
mvapi_btl->eager_rdma_frag_size *
mca_btl_mvapi_component.eager_rdma_num, 0, 0,
mca_btl_mvapi_component.eager_rdma_num, 0,
MCA_MPOOL_FLAGS_CACHE_BYPASS,
(mca_mpool_base_registration_t**)&endpoint->eager_rdma_local.reg);
if(!buf)
@ -1239,7 +1240,7 @@ void mca_btl_mvapi_endpoint_connect_eager_rdma(
for(i = 0; i < mca_btl_mvapi_component.eager_rdma_num; i++) {
ompi_free_list_item_t *item = (ompi_free_list_item_t *)(buf +
i*mvapi_btl->eager_rdma_frag_size);
item->user_data = endpoint->eager_rdma_local.reg;
item->user_data = (void*)endpoint->eager_rdma_local.reg;
OBJ_CONSTRUCT(item, mca_btl_mvapi_recv_frag_eager_t);
((mca_btl_mvapi_frag_t*)item)->endpoint = endpoint;
((mca_btl_mvapi_frag_t*)item)->type = MCA_BTL_MVAPI_FRAG_EAGER_RDMA;

Просмотреть файл

@ -26,7 +26,7 @@
#include "btl_mvapi_frag.h"
#include "btl_mvapi.h"
#include "btl_mvapi_eager_rdma.h"
#include "ompi/mca/mpool/mvapi/mpool_mvapi.h"
#include "ompi/mca/mpool/rdma/mpool_rdma.h"
#include <vapi.h>
#include <mtl_common.h>

Просмотреть файл

@ -18,25 +18,22 @@
#include "btl_mvapi_frag.h"
#include "ompi/mca/mpool/mvapi/mpool_mvapi.h"
static void mca_btl_mvapi_frag_common_constructor( mca_btl_mvapi_frag_t* frag)
{
mca_mpool_mvapi_registration_t* mem_hndl = (mca_mpool_mvapi_registration_t*) frag->base.super.user_data;
mca_btl_mvapi_reg_t* mem_hndl =
(mca_btl_mvapi_reg_t*)frag->base.super.user_data;
frag->hdr = (mca_btl_mvapi_header_t*) (frag+1); /* initialize btl header to start at end of frag */
frag->segment.seg_addr.pval = ((unsigned char* )frag->hdr) + sizeof(mca_btl_mvapi_header_t);
/* init the segment address to start after the btl header */
frag->segment.seg_len = frag->size;
frag->segment.seg_key.key32[0] = (uint32_t) mem_hndl->l_key;
frag->sg_entry.lkey = mem_hndl->l_key;
frag->sg_entry.lkey = mem_hndl->l_key;
frag->segment.seg_key.key32[0] = frag->sg_entry.lkey;
frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag->hdr;
frag->base.des_flags = 0;
}
static void mca_btl_mvapi_send_frag_common_constructor(mca_btl_mvapi_frag_t* frag)
{

Просмотреть файл

@ -27,12 +27,13 @@
#include <vapi.h>
#include <mtl_common.h>
#include <vapi_common.h>
#include "ompi/mca/mpool/mvapi/mpool_mvapi.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
struct mca_btl_mvapi_reg_t;
struct mca_btl_mvapi_header_t {
mca_btl_base_tag_t tag;
int16_t credits;
@ -95,7 +96,7 @@ struct mca_btl_mvapi_frag_t {
VAPI_sg_lst_entry_t sg_entry;
mca_btl_mvapi_header_t *hdr;
mca_btl_mvapi_footer_t *ftr;
mca_mpool_mvapi_registration_t * vapi_reg;
struct mca_btl_mvapi_reg_t *registration;
ompi_free_list_t* my_list;
};
typedef struct mca_btl_mvapi_frag_t mca_btl_mvapi_frag_t;

Просмотреть файл

@ -32,7 +32,7 @@
#include "ompi/datatype/datatype.h"
#include "ompi/mca/mpool/base/base.h"
#include "ompi/mca/mpool/mpool.h"
#include "ompi/mca/mpool/openib/mpool_openib.h"
#include "ompi/mca/mpool/rdma/mpool_rdma.h"
#include <errno.h>
#include <string.h>
#include <math.h>
@ -292,10 +292,11 @@ int mca_btl_openib_free(
{
mca_btl_openib_frag_t* frag = (mca_btl_openib_frag_t*)des;
if(frag->size == 0) {
btl->btl_mpool->mpool_release(btl->btl_mpool,
(mca_mpool_base_registration_t*)
frag->openib_reg);
if(MCA_BTL_OPENIB_FRAG_FRAG == frag->type && frag->registration != NULL) {
btl->btl_mpool->mpool_deregister(btl->btl_mpool,
(mca_mpool_base_registration_t*)
frag->registration);
frag->registration = NULL;
}
MCA_BTL_IB_FRAG_RETURN(((mca_btl_openib_module_t*) btl), frag);
@ -335,165 +336,101 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
size_t* size
)
{
mca_btl_openib_module_t* openib_btl;
mca_btl_openib_frag_t* frag;
mca_mpool_openib_registration_t * openib_reg;
struct iovec iov;
uint32_t iov_count = 1;
size_t max_data = *size;
int rc;
openib_btl = (mca_btl_openib_module_t*) btl;
openib_reg = (mca_mpool_openib_registration_t*) registration;
mca_btl_openib_module_t *openib_btl;
mca_btl_openib_frag_t *frag = NULL;
mca_btl_openib_reg_t *openib_reg;
struct iovec iov;
uint32_t iov_count = 1;
size_t max_data = *size;
int rc;
if(NULL != openib_reg && 0 == ompi_convertor_need_buffers(convertor)){
size_t reg_len;
openib_btl = (mca_btl_openib_module_t*)btl;
/* the memory is already pinned and we have contiguous user data */
if(ompi_convertor_need_buffers(convertor) == false && 0 == reserve) {
if(registration != NULL || max_data > btl->btl_max_send_size) {
MCA_BTL_IB_FRAG_ALLOC_FRAG(btl, frag, rc);
if(NULL == frag) {
return NULL;
}
MCA_BTL_IB_FRAG_ALLOC_FRAG(btl, frag, rc);
if(NULL == frag){
return NULL;
}
iov.iov_len = max_data;
iov.iov_base = NULL;
iov.iov_len = max_data;
iov.iov_base = NULL;
ompi_convertor_pack(convertor, &iov, &iov_count, &max_data );
ompi_convertor_pack(convertor, &iov, &iov_count, &max_data);
frag->segment.seg_len = max_data;
frag->segment.seg_addr.pval = iov.iov_base;
reg_len = (unsigned char*)openib_reg->base_reg.bound - (unsigned char*)iov.iov_base + 1;
frag->mr = openib_reg->mr;
frag->sg_entry.length = max_data;
frag->sg_entry.lkey = frag->mr->lkey;
frag->sg_entry.addr = (unsigned long) iov.iov_base;
frag->segment.seg_key.key32[0] = (uint32_t) frag->sg_entry.lkey;
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->base.des_flags = 0;
frag->openib_reg= openib_reg;
btl->btl_mpool->mpool_retain(btl->btl_mpool, (mca_mpool_base_registration_t*) openib_reg);
return &frag->base;
} else if( max_data > btl->btl_max_send_size &&
ompi_convertor_need_buffers(convertor) == 0 &&
reserve == 0) {
/* The user buffer is contigous and we are asked to send more than the max send size. */
MCA_BTL_IB_FRAG_ALLOC_FRAG(openib_btl, frag, rc);
if(NULL == frag){
return NULL;
}
iov.iov_len = max_data;
iov.iov_base = NULL;
ompi_convertor_pack(convertor, &iov, &iov_count, &max_data );
frag->segment.seg_len = max_data;
frag->segment.seg_addr.pval = iov.iov_base;
frag->base.des_flags = 0;
*size = max_data;
rc = btl->btl_mpool->mpool_register(btl->btl_mpool,
iov.iov_base,
max_data,
0,
(mca_mpool_base_registration_t**) &openib_reg);
if(OMPI_SUCCESS != rc || NULL == openib_reg) {
MCA_BTL_IB_FRAG_RETURN(openib_btl, frag);
if(NULL == registration) {
rc = btl->btl_mpool->mpool_register(btl->btl_mpool,
iov.iov_base, max_data, 0, &registration);
if(OMPI_SUCCESS != rc || NULL == registration) {
MCA_BTL_IB_FRAG_RETURN(openib_btl, frag);
return NULL;
}
/* keep track of the registration we did */
frag->registration = (mca_btl_openib_reg_t*)registration;
}
openib_reg = (mca_btl_openib_reg_t*)registration;
frag->base.des_flags = 0;
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->base.des_flags = 0;
frag->sg_entry.length = max_data;
frag->sg_entry.lkey = openib_reg->mr->lkey;
frag->sg_entry.addr = (unsigned long)iov.iov_base;
frag->segment.seg_len = max_data;
frag->segment.seg_addr.pval = iov.iov_base;
frag->segment.seg_key.key32[0] = (uint32_t)frag->sg_entry.lkey;
BTL_VERBOSE(("frag->sg_entry.lkey = %lu .addr = %llu "
"frag->segment.seg_key.key32[0] = %lu",
frag->sg_entry.lkey, frag->sg_entry.addr,
frag->segment.seg_key.key32[0]));
return &frag->base;
}
}
if(max_data + reserve <= btl->btl_eager_limit) {
/* the data is small enough to fit in the eager frag and
* memory is not prepinned */
MCA_BTL_IB_FRAG_ALLOC_EAGER(btl, frag, rc);
}
if(NULL == frag) {
/* the data doesn't fit into eager frag or eger frag is
* not available */
MCA_BTL_IB_FRAG_ALLOC_MAX(btl, frag, rc);
if(NULL == frag) {
return NULL;
}
frag->mr = openib_reg->mr;
frag->sg_entry.length = max_data;
frag->sg_entry.lkey = openib_reg->mr->lkey;
frag->sg_entry.addr = (unsigned long) iov.iov_base;
frag->segment.seg_key.key32[0] = (uint32_t) frag->mr->rkey;
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->openib_reg = openib_reg;
BTL_VERBOSE(("frag->sg_entry.lkey = %lu .addr = %llu", frag->sg_entry.lkey, frag->sg_entry.addr));
return &frag->base;
} else if (max_data+reserve <= btl->btl_eager_limit) {
/* the data is small enough to fit in the eager frag and
either we received no prepinned memory or leave pinned is
not set
*/
MCA_BTL_IB_FRAG_ALLOC_EAGER(btl, frag, rc);
if(NULL == frag) {
return NULL;
}
iov.iov_len = max_data;
iov.iov_base = (unsigned char*)frag->segment.seg_addr.pval + reserve;
rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data );
*size = max_data;
if( rc < 0 ) {
MCA_BTL_IB_FRAG_RETURN(openib_btl, frag);
return NULL;
}
frag->segment.seg_len = max_data + reserve;
frag->segment.seg_key.key32[0] = (uint32_t) frag->sg_entry.lkey;
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->base.des_flags = 0;
return &frag->base;
} else {
MCA_BTL_IB_FRAG_ALLOC_MAX(btl, frag, rc);
if(NULL == frag) {
return NULL;
}
if(max_data + reserve > btl->btl_max_send_size){
max_data = btl->btl_max_send_size - reserve;
if(max_data + reserve > btl->btl_max_send_size) {
max_data = btl->btl_max_send_size - reserve;
}
iov.iov_len = max_data;
iov.iov_base = (unsigned char*)frag->segment.seg_addr.pval + reserve;
rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data );
*size = max_data;
if( rc < 0 ) {
MCA_BTL_IB_FRAG_RETURN(openib_btl, frag);
return NULL;
}
frag->segment.seg_len = max_data + reserve;
frag->segment.seg_key.key32[0] = (uint32_t) frag->sg_entry.lkey;
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->base.des_flags=0;
return &frag->base;
}
return NULL;
iov.iov_len = max_data;
iov.iov_base = (unsigned char*)frag->segment.seg_addr.pval + reserve;
rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data);
if(rc < 0) {
MCA_BTL_IB_FRAG_RETURN(openib_btl, frag);
return NULL;
}
*size = max_data;
frag->segment.seg_len = max_data + reserve;
frag->segment.seg_key.key32[0] = (uint32_t)frag->sg_entry.lkey;
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->base.des_flags = 0;
return &frag->base;
}
/**
@ -513,69 +450,62 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
mca_mpool_base_registration_t* registration,
mca_mpool_base_registration_t* registration,
struct ompi_convertor_t* convertor,
size_t reserve,
size_t* size)
{
mca_btl_openib_module_t* openib_btl;
mca_btl_openib_frag_t* frag;
mca_mpool_openib_registration_t * openib_reg;
int rc;
mca_btl_openib_module_t *openib_btl;
mca_btl_openib_frag_t *frag;
mca_btl_openib_reg_t *openib_reg;
int rc;
ptrdiff_t lb;
size_t reg_len;
openib_btl = (mca_btl_openib_module_t*) btl;
openib_reg = (mca_mpool_openib_registration_t*) registration;
openib_btl = (mca_btl_openib_module_t*)btl;
MCA_BTL_IB_FRAG_ALLOC_FRAG(btl, frag, rc);
if(NULL == frag){
return NULL;
MCA_BTL_IB_FRAG_ALLOC_FRAG(btl, frag, rc);
if(NULL == frag) {
return NULL;
}
ompi_ddt_type_lb(convertor->pDesc, &lb);
frag->segment.seg_len = *size;
frag->segment.seg_addr.pval = convertor->pBaseBuf + lb + convertor->bConverted;
frag->base.des_flags = 0;
frag->segment.seg_addr.pval = convertor->pBaseBuf + lb +
convertor->bConverted;
if(NULL!= openib_reg){
/* the memory is already pinned try to use it if the pinned region is large enough*/
reg_len = (unsigned char*)openib_reg->base_reg.bound - (unsigned char*)frag->segment.seg_addr.pval + 1;
btl->btl_mpool->mpool_retain(btl->btl_mpool,
(mca_mpool_base_registration_t*) openib_reg);
} else {
/* we didn't get a memory registration passed in, so we have to register the region
* ourselves
if(NULL == registration){
/* we didn't get a memory registration passed in, so we have to
* register the region ourselves
*/
rc = btl->btl_mpool->mpool_register(btl->btl_mpool,
frag->segment.seg_addr.pval,
*size,
0,
(mca_mpool_base_registration_t**) &openib_reg);
if(OMPI_SUCCESS != rc || NULL == openib_reg) {
frag->segment.seg_addr.pval, *size, 0, &registration);
if(OMPI_SUCCESS != rc || NULL == registration) {
MCA_BTL_IB_FRAG_RETURN(openib_btl, frag);
return NULL;
}
/* keep track of the registration we did */
frag->registration = (mca_btl_openib_reg_t*)registration;
}
openib_reg = (mca_btl_openib_reg_t*)registration;
frag->mr = openib_reg->mr;
frag->sg_entry.length = *size;
frag->sg_entry.lkey = openib_reg->mr->lkey;
frag->sg_entry.addr = (unsigned long) frag->segment.seg_addr.pval;
frag->segment.seg_key.key32[0] = frag->mr->rkey;
frag->base.des_dst = &frag->segment;
frag->base.des_dst_cnt = 1;
frag->base.des_src = NULL;
frag->base.des_src_cnt = 0;
frag->openib_reg = openib_reg;
BTL_VERBOSE(("frag->sg_entry.lkey = %lu .addr = %llu frag->segment.seg_key.key32[0] = %lu" , frag->sg_entry.lkey, frag->sg_entry.addr, frag->segment.seg_key.key32[0]));
frag->sg_entry.length = *size;
frag->sg_entry.lkey = openib_reg->mr->lkey;
frag->sg_entry.addr = (unsigned long) frag->segment.seg_addr.pval;
return &frag->base;
frag->segment.seg_len = *size;
frag->segment.seg_key.key32[0] = openib_reg->mr->rkey;
frag->base.des_dst = &frag->segment;
frag->base.des_dst_cnt = 1;
frag->base.des_src = NULL;
frag->base.des_src_cnt = 0;
frag->base.des_flags = 0;
BTL_VERBOSE(("frag->sg_entry.lkey = %lu .addr = %llu "
"frag->segment.seg_key.key32[0] = %lu",
frag->sg_entry.lkey, frag->sg_entry.addr,
frag->segment.seg_key.key32[0]));
return &frag->base;
}
int mca_btl_openib_finalize(struct mca_btl_base_module_t* btl)

Просмотреть файл

@ -220,9 +220,15 @@ struct mca_btl_openib_module_t {
orte_pointer_array_t *endpoints;
}; typedef struct mca_btl_openib_module_t mca_btl_openib_module_t;
extern mca_btl_openib_module_t mca_btl_openib_module;
struct mca_btl_openib_reg_t {
mca_mpool_base_registration_t base;
struct ibv_mr *mr;
};
typedef struct mca_btl_openib_reg_t mca_btl_openib_reg_t;
#if OMPI_ENABLE_PROGRESS_THREADS == 1
extern void* mca_btl_openib_progress_thread(opal_object_t*);
#endif
@ -417,10 +423,8 @@ extern mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
* @param frag (IN) IB send fragment
*
*/
extern void mca_btl_openib_send_frag_return(
struct mca_btl_base_module_t* btl,
struct mca_btl_openib_frag_t*
);
extern void mca_btl_openib_send_frag_return(mca_btl_base_module_t* btl,
mca_btl_openib_frag_t*);
int mca_btl_openib_create_cq_srq(mca_btl_openib_module_t* openib_btl);

Просмотреть файл

@ -35,6 +35,7 @@
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/sys_info.h"
#include "ompi/mca/mpool/base/base.h"
#include "ompi/mca/mpool/rdma/mpool_rdma.h"
#include "ompi/mca/btl/base/base.h"
#include "btl_openib.h"
#include "btl_openib_frag.h"
@ -81,6 +82,9 @@ static int btl_openib_module_progress(mca_btl_openib_module_t *openib_btl);
static void btl_openib_frag_progress_pending(
mca_btl_openib_module_t* openib_btl, mca_btl_base_endpoint_t *endpoint,
const int prio);
static int openib_reg_mr(void *reg_data, void *base, size_t size,
mca_mpool_base_registration_t *reg);
static int openib_dereg_mr(void *reg_data, mca_mpool_base_registration_t *reg);
mca_btl_openib_component_t mca_btl_openib_component = {
@ -235,6 +239,36 @@ static void btl_openib_control(struct mca_btl_base_module_t* btl,
}
}
static int openib_reg_mr(void *reg_data, void *base, size_t size,
mca_mpool_base_registration_t *reg)
{
mca_btl_openib_hca_t *hca = (mca_btl_openib_hca_t*)reg_data;
mca_btl_openib_reg_t *openib_reg = (mca_btl_openib_reg_t*)reg;
openib_reg->mr = ibv_reg_mr(hca->ib_pd, base, size, IBV_ACCESS_LOCAL_WRITE |
IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ);
if(NULL == openib_reg->mr)
return OMPI_ERR_OUT_OF_RESOURCE;
return OMPI_SUCCESS;
}
static int openib_dereg_mr(void *reg_data, mca_mpool_base_registration_t *reg)
{
mca_btl_openib_reg_t *openib_reg = (mca_btl_openib_reg_t*)reg;
if(openib_reg->mr != NULL) {
if(ibv_dereg_mr(openib_reg->mr)) {
opal_output(0, "%s: error unpinning openib memory errno says %s\n",
__func__, strerror(errno));
return OMPI_ERROR;
}
}
openib_reg->mr = NULL;
return OMPI_SUCCESS;
}
static int init_one_port(opal_list_t *btl_list, mca_btl_openib_hca_t *hca,
uint8_t port_num, struct ibv_port_attr *ib_port_attr)
{
@ -399,7 +433,10 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
goto close_hca;
}
mpool_resources.ib_pd = hca->ib_pd;
mpool_resources.reg_data = (void*)hca;
mpool_resources.sizeof_reg = sizeof(mca_btl_openib_reg_t);
mpool_resources.register_mem = openib_reg_mr;
mpool_resources.deregister_mem = openib_dereg_mr;
hca->mpool =
mca_mpool_base_module_create(mca_btl_openib_component.ib_mpool_name,
hca, &mpool_resources);
@ -469,6 +506,7 @@ free_hca:
free(hca);
return ret;
}
/*
* IB component initialization:
* (1) read interface list from kernel and compare against component parameters

Просмотреть файл

@ -12,7 +12,6 @@
#include "ompi_config.h"
#include "btl_openib.h"
#include "ompi/mca/mpool/openib/mpool_openib.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
@ -20,7 +19,7 @@ extern "C" {
struct mca_btl_openib_eager_rdma_local_t {
ompi_ptr_t base; /**< buffer for RDMAing eager messages */
mca_mpool_openib_registration_t *reg;
mca_btl_openib_reg_t *reg;
uint16_t head; /**< RDMA buffer to poll */
uint16_t tail; /**< Needed for credit managment */
int32_t credits; /**< number of RDMA credits */

Просмотреть файл

@ -1206,7 +1206,8 @@ void mca_btl_openib_endpoint_connect_eager_rdma(
openib_btl->eager_rdma_frag_size *
mca_btl_openib_component.eager_rdma_num +
mca_btl_openib_component.buffer_alignment +
sizeof(mca_btl_openib_recv_frag_eager_t), 0, 0,
sizeof(mca_btl_openib_recv_frag_eager_t), 0,
MCA_MPOOL_FLAGS_CACHE_BYPASS,
(mca_mpool_base_registration_t**)&endpoint->eager_rdma_local.reg);
if(!buf)
@ -1221,7 +1222,7 @@ void mca_btl_openib_endpoint_connect_eager_rdma(
for(i = 0; i < mca_btl_openib_component.eager_rdma_num; i++) {
ompi_free_list_item_t *item = (ompi_free_list_item_t *)(buf +
i*openib_btl->eager_rdma_frag_size);
item->user_data = endpoint->eager_rdma_local.reg;
item->user_data = (void*)endpoint->eager_rdma_local.reg;
OBJ_CONSTRUCT(item, mca_btl_openib_recv_frag_eager_t);
((mca_btl_openib_frag_t*)item)->endpoint = endpoint;
((mca_btl_openib_frag_t*)item)->type = MCA_BTL_OPENIB_FRAG_EAGER_RDMA;

Просмотреть файл

@ -29,7 +29,6 @@
#include <errno.h>
#include <string.h>
#include "ompi/mca/btl/base/btl_base_error.h"
#include "ompi/mca/mpool/openib/mpool_openib.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {

Просмотреть файл

@ -18,23 +18,21 @@
#include "btl_openib_frag.h"
#include "btl_openib_eager_rdma.h"
#include "ompi/mca/mpool/openib/mpool_openib.h"
static void mca_btl_openib_frag_common_constructor( mca_btl_openib_frag_t* frag)
{
mca_mpool_openib_registration_t* registration =
(mca_mpool_openib_registration_t*) frag->base.super.user_data;
mca_btl_openib_reg_t* registration =
(mca_btl_openib_reg_t*)frag->base.super.user_data;
frag->hdr = (mca_btl_openib_header_t*) (frag+1); /* initialize the btl header to start at end of frag */
frag->segment.seg_addr.pval = ((unsigned char* )frag->hdr) + sizeof(mca_btl_openib_header_t);
/* init the segment address to start after the btl header */
if(registration) {
frag->mr = registration->mr;
frag->segment.seg_key.key32[0] = (uint32_t) frag->mr->lkey;
frag->sg_entry.lkey = frag->mr->lkey;
frag->registration = registration;
frag->sg_entry.lkey = registration->mr->lkey;
frag->segment.seg_key.key32[0] = frag->sg_entry.lkey;
}
frag->segment.seg_len = frag->size;
frag->sg_entry.addr = (unsigned long) frag->hdr;

Просмотреть файл

@ -22,13 +22,14 @@
#include "ompi_config.h"
#include <infiniband/verbs.h>
#include "ompi/mca/mpool/openib/mpool_openib.h"
#include "ompi/mca/btl/btl.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
struct mca_btl_openib_reg_t;
struct mca_btl_openib_header_t {
mca_btl_base_tag_t tag;
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
@ -173,8 +174,7 @@ struct mca_btl_openib_frag_t {
struct ibv_send_wr sr_desc;
} wr_desc;
struct ibv_sge sg_entry;
struct ibv_mr *mr;
mca_mpool_openib_registration_t * openib_reg;
struct mca_btl_openib_reg_t *registration;
};
typedef struct mca_btl_openib_frag_t mca_btl_openib_frag_t;
OBJ_CLASS_DECLARATION(mca_btl_openib_frag_t);

Просмотреть файл

@ -145,7 +145,7 @@ int btl_openib_register_mca_params(void)
REGINT_GE_ONE));
CHECK(reg_string("mpool",
"Name of the memory pool to be used (it is unlikely that you will ever want to change this",
"openib", &mca_btl_openib_component.ib_mpool_name,
"rdma", &mca_btl_openib_component.ib_mpool_name,
0));
CHECK(reg_int("reg_mru_len",
"Length of the registration cache most recently used list "

Просмотреть файл

@ -34,9 +34,12 @@
#include "ompi/datatype/convertor.h"
#include "ompi/datatype/datatype.h"
#include "ompi/mca/mpool/base/base.h"
#include "ompi/mca/mpool/udapl/mpool_udapl.h"
#include "ompi/mca/mpool/rdma/mpool_rdma.h"
#include "ompi/proc/proc.h"
static int udapl_reg_mr(void *reg_data, void *base, size_t size,
mca_mpool_base_registration_t *reg);
static int udapl_dereg_mr(void *reg_data, mca_mpool_base_registration_t *reg);
mca_btl_udapl_module_t mca_btl_udapl_module = {
{
@ -67,6 +70,49 @@ mca_btl_udapl_module_t mca_btl_udapl_module = {
}
};
static int udapl_reg_mr(void *reg_data, void *base, size_t size,
mca_mpool_base_registration_t *reg)
{
mca_btl_udapl_module_t *btl = (mca_btl_udapl_module_t*)reg_data;
mca_btl_udapl_reg_t *udapl_reg = (mca_btl_udapl_reg_t*)reg;
DAT_REGION_DESCRIPTION region;
DAT_VLEN dat_size;
DAT_VADDR dat_addr;
int rc;
region.for_va = base;
udapl_reg->lmr_triplet.virtual_address = (DAT_VADDR)base;
udapl_reg->lmr_triplet.segment_length = size;
udapl_reg->lmr = NULL;
rc = dat_lmr_create(btl->udapl_ia, DAT_MEM_TYPE_VIRTUAL, region, size,
btl->udapl_pz, DAT_MEM_PRIV_ALL_FLAG, &udapl_reg->lmr,
&udapl_reg->lmr_triplet.lmr_context, &udapl_reg->rmr_context,
&dat_size, &dat_addr);
if(rc != DAT_SUCCESS) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
return OMPI_SUCCESS;
}
static int udapl_dereg_mr(void *reg_data, mca_mpool_base_registration_t *reg)
{
mca_btl_udapl_reg_t *udapl_reg = (mca_btl_udapl_reg_t*)reg;
int rc;
if(udapl_reg->lmr != NULL) {
rc = dat_lmr_free(udapl_reg->lmr);
if(rc != DAT_SUCCESS) {
opal_output(0, "%s: error unpinning dapl memory errno says %s\n",
__func__, strerror(errno));
return OMPI_ERROR;
}
}
return OMPI_SUCCESS;
}
/**
* Initialize module module resources.
@ -153,9 +199,10 @@ mca_btl_udapl_init(DAT_NAME_PTR ia_name, mca_btl_udapl_module_t* btl)
((struct sockaddr_in*)&btl->udapl_addr.addr)->sin_port = htons(port);
/* initialize the memory pool */
res.udapl_ia = btl->udapl_ia;
res.udapl_pz = btl->udapl_pz;
res.reg_data = btl;
res.sizeof_reg = sizeof(mca_btl_udapl_reg_t);
res.register_mem = udapl_reg_mr;
res.deregister_mem = udapl_dereg_mr;
btl->super.btl_mpool = mca_mpool_base_module_create(
mca_btl_udapl_component.udapl_mpool_name, &btl->super, &res);
@ -200,7 +247,6 @@ failure:
return OMPI_ERROR;
}
/*
* Cleanup/release module resources.
*/
@ -352,8 +398,7 @@ mca_btl_base_descriptor_t* mca_btl_udapl_alloc(
((char *)frag->segment.seg_addr.pval + frag->segment.seg_len);
frag->triplet.segment_length =
frag->segment.seg_len + sizeof(mca_btl_udapl_footer_t);
assert(frag->triplet.lmr_context ==
((mca_mpool_udapl_registration_t*)frag->registration)->lmr_triplet.lmr_context);
assert(frag->triplet.lmr_context == frag->registration->lmr_triplet.lmr_context);
frag->btl = udapl_btl;
frag->base.des_src = &frag->segment;
@ -376,8 +421,8 @@ int mca_btl_udapl_free(
{
mca_btl_udapl_frag_t* frag = (mca_btl_udapl_frag_t*)des;
if(frag->size == 0) {
btl->btl_mpool->mpool_release(btl->btl_mpool, frag->registration);
if(frag->size == 0 && frag->registration != NULL) {
btl->btl_mpool->mpool_deregister(btl->btl_mpool, frag->registration);
MCA_BTL_UDAPL_FRAG_RETURN_USER(btl, frag);
} else if(frag->size == mca_btl_udapl_component.udapl_eager_frag_size) {
MCA_BTL_UDAPL_FRAG_RETURN_EAGER(btl, frag);

Просмотреть файл

@ -106,6 +106,14 @@ struct mca_btl_udapl_module_t {
typedef struct mca_btl_udapl_module_t mca_btl_udapl_module_t;
extern mca_btl_udapl_module_t mca_btl_udapl_module;
struct mca_btl_udapl_reg_t {
mca_mpool_base_registration_t base;
DAT_LMR_HANDLE lmr; /* local memory region (LMR) */
DAT_LMR_TRIPLET lmr_triplet; /* LMR triplet - context, address, length */
DAT_RMR_CONTEXT rmr_context; /* remote memory region context handle */
};
typedef struct mca_btl_udapl_reg_t mca_btl_udapl_reg_t;
/**
* Report a uDAPL error - for debugging

Просмотреть файл

@ -33,7 +33,7 @@
#include "opal/mca/base/mca_base_param.h"
#include "orte/mca/errmgr/errmgr.h"
#include "ompi/mca/mpool/base/base.h"
#include "ompi/mca/mpool/udapl/mpool_udapl.h"
#include "ompi/mca/mpool/rdma/mpool_rdma.h"
#include "btl_udapl.h"
#include "btl_udapl_frag.h"
#include "btl_udapl_endpoint.h"
@ -149,7 +149,7 @@ int mca_btl_udapl_component_open(void)
mca_btl_udapl_component.udapl_free_list_inc =
mca_btl_udapl_param_register_int("free_list_inc", 8);
mca_btl_udapl_component.udapl_mpool_name =
mca_btl_udapl_param_register_string("mpool", "udapl");
mca_btl_udapl_param_register_string("mpool", "rdma");
mca_btl_udapl_component.udapl_max_btls =
mca_btl_udapl_param_register_int("max_modules", 8);
mca_btl_udapl_component.udapl_evd_qlen =

Просмотреть файл

@ -30,7 +30,7 @@
#include "orte/mca/rml/rml.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/dss/dss.h"
#include "ompi/mca/mpool/udapl/mpool_udapl.h"
#include "ompi/mca/mpool/rdma/mpool_rdma.h"
#include "btl_udapl.h"
#include "btl_udapl_endpoint.h"
#include "btl_udapl_proc.h"

Просмотреть файл

@ -21,12 +21,12 @@
#include "btl_udapl.h"
#include "btl_udapl_frag.h"
#include "ompi/mca/mpool/udapl/mpool_udapl.h"
#include "ompi/mca/mpool/rdma/mpool_rdma.h"
static void mca_btl_udapl_frag_common_constructor(mca_btl_udapl_frag_t* frag)
{
mca_mpool_udapl_registration_t* reg = frag->base.super.user_data;
mca_btl_udapl_reg_t* reg = (mca_btl_udapl_reg_t*)frag->base.super.user_data;
#if OMPI_ENABLE_DEBUG
frag->base.des_src = NULL;

Просмотреть файл

@ -58,7 +58,7 @@ struct mca_btl_udapl_frag_t {
struct mca_btl_udapl_module_t* btl;
struct mca_btl_base_endpoint_t *endpoint;
struct mca_mpool_base_registration_t* registration;
struct mca_btl_udapl_reg_t* registration;
DAT_LMR_TRIPLET triplet;
mca_btl_udapl_footer_t *ftr;

Просмотреть файл

@ -40,7 +40,9 @@ static void mca_mpool_base_registration_constructor( mca_mpool_base_registration
reg->mpool = NULL;
reg->base = NULL;
reg->bound = NULL;
reg->alloc_base = NULL;
reg->ref_count = 0;
reg->flags = 0;
}
static void mca_mpool_base_registration_destructor( mca_mpool_base_registration_t * reg )
@ -74,58 +76,37 @@ OBJ_CLASS_INSTANCE(
* @retval pointer to the allocated memory
* @retval NULL on failure
*/
void * mca_mpool_base_alloc(size_t size, ompi_info_t * info)
void *mca_mpool_base_alloc(size_t size, ompi_info_t *info)
{
opal_list_item_t * item;
int num_modules = opal_list_get_size(&mca_mpool_base_modules);
int reg_module_num = 0;
int i, j, num_keys;
int i, num_keys;
mca_mpool_base_selected_module_t * current;
mca_mpool_base_selected_module_t * no_reg_function = NULL;
mca_mpool_base_selected_module_t ** has_reg_function = NULL;
mca_mpool_base_registration_t * registration;
mca_mpool_base_tree_item_t* mpool_tree_item;
mca_mpool_base_tree_item_t* mpool_tree_item = NULL;
mca_mpool_base_module_t *mpool;
void * mem = NULL;
char * key = NULL;
char * value = NULL;
int flag = 0;
bool match_found = false;
bool mpool_requested = false;
bool match_found = false, mpool_requested = false;
if (mca_mpool_base_use_mem_hooks &&
0 != (OPAL_MEMORY_FREE_SUPPORT & opal_mem_hooks_support_level())) {
/* if we're using memory hooks, it's possible (likely, based
on testing) that for some tests the memory returned from
any of the malloc functions below will be part of a larger
(lazily) freed chunk and therefore already be pinned.
Which causes our caches to get a little confused, as the
alloc/free pair are supposed to always have an exact match
in the rcache. This wasn't happening, leading to badness.
Instead, just malloc and we'll get to the pinning later,
when we try to first use it. Since we're leaving things
pinned, there's no advantage to doing it now over first
use, and it works if we wait ... */
return malloc(size);
}
if (num_modules > 0) {
if(num_modules > 0) {
has_reg_function = (mca_mpool_base_selected_module_t **)
malloc(num_modules * sizeof(mca_mpool_base_module_t *));
if(!has_reg_function){
return NULL;
}
malloc(num_modules * sizeof(mca_mpool_base_module_t *));
if(!has_reg_function)
goto out;
}
mpool_tree_item = mca_mpool_base_tree_item_get();
if(NULL == mpool_tree_item){
if(has_reg_function) {
free(has_reg_function);
}
return NULL;
}
if(!mpool_tree_item)
goto out;
mpool_tree_item->count = 0;
if(&ompi_mpi_info_null == info)
{
@ -182,10 +163,7 @@ void * mca_mpool_base_alloc(size_t size, ompi_info_t * info)
/* there was more than one requested mpool that lacks
* a registration function, so return failure */
free(key);
if(has_reg_function) {
free(has_reg_function);
}
return NULL;
goto out;
}
no_reg_function = current;
}
@ -200,10 +178,7 @@ void * mca_mpool_base_alloc(size_t size, ompi_info_t * info)
/* one of the keys given to us by the user did not match any
* mpools, so return an error */
free(key);
if(has_reg_function) {
free(has_reg_function);
}
return NULL;
goto out;
}
}
free(key);
@ -211,76 +186,59 @@ void * mca_mpool_base_alloc(size_t size, ompi_info_t * info)
if(NULL == no_reg_function && 0 == reg_module_num)
{
if(has_reg_function) {
free(has_reg_function);
}
if(!mpool_requested)
{
/* if the info argument was NULL and there were no useable mpools
* or there user provided info object but did not specifiy a "mpool" key,
* just malloc the memory and return it */
mem = malloc(size);
if(NULL != mem){
/* don't need the tree */
mca_mpool_base_tree_item_put(mpool_tree_item);
return mem;
}
goto out;
}
/* the user passed info but we were not able to use any of the mpools
* specified */
return NULL;
goto out;
}
i = j = 0;
num_modules = 0;
if(NULL != no_reg_function)
{
mca_mpool_base_module_t* mpool = no_reg_function->mpool_module;
mem = mpool->mpool_alloc(mpool, size, 0, MCA_MPOOL_FLAGS_PERSIST, &registration);
num_modules++;
mpool_tree_item->key = mem;
mpool_tree_item->mpools[j] = mpool;
mpool_tree_item->regs[j++] = registration;
}
else
{
mca_mpool_base_module_t* mpool = has_reg_function[i]->mpool_module;
mem = mpool->mpool_alloc(mpool, size, 0, MCA_MPOOL_FLAGS_PERSIST, &registration);
i++;
num_modules++;
mpool_tree_item->key = mem;
mpool_tree_item->mpools[j] = mpool;
mpool_tree_item->regs[j++] = registration;
mpool = no_reg_function->mpool_module;
i = 0;
} else {
mpool = has_reg_function[0]->mpool_module;
i = 1;
}
mem = mpool->mpool_alloc(mpool, size, 0, MCA_MPOOL_FLAGS_PERSIST,
&registration);
if(NULL == mem)
goto out;
mpool_tree_item->key = mem;
mpool_tree_item->mpools[mpool_tree_item->count] = mpool;
mpool_tree_item->regs[mpool_tree_item->count++] = registration;
while(i < reg_module_num)
{
mca_mpool_base_module_t* mpool = has_reg_function[i]->mpool_module;
if(OMPI_SUCCESS != mpool->mpool_register(mpool, mem, size, MCA_MPOOL_FLAGS_PERSIST, &registration))
{
if (has_reg_function) {
free(has_reg_function);
}
return NULL;
} else {
mpool_tree_item->mpools[j] = mpool;
mpool_tree_item->regs[j++] = registration;
num_modules++;
mpool = has_reg_function[i]->mpool_module;
if(mpool->mpool_register(mpool, mem, size, MCA_MPOOL_FLAGS_PERSIST,
&registration) != OMPI_SUCCESS) {
goto out;
}
mpool_tree_item->mpools[mpool_tree_item->count] = mpool;
mpool_tree_item->regs[mpool_tree_item->count++] = registration;
i++;
}
if(has_reg_function) {
free(has_reg_function);
}
/* null terminated array */
mpool_tree_item->mpools[j] = NULL;
mpool_tree_item->regs[j] = NULL;
mca_mpool_base_tree_insert(mpool_tree_item);
mpool_tree_item = NULL; /* prevent it to be deleted below */
out:
if(mpool_tree_item)
mca_mpool_base_tree_item_put(mpool_tree_item);
if(has_reg_function)
free(has_reg_function);
return mem;
}
@ -292,49 +250,38 @@ void * mca_mpool_base_alloc(size_t size, ompi_info_t * info)
* @retval OMPI_SUCCESS
* @retval OMPI_ERR_BAD_PARAM if the passed base pointer was invalid
*/
int mca_mpool_base_free(void * base)
int mca_mpool_base_free(void *base)
{
int i = 0, rc = OMPI_SUCCESS;
mca_mpool_base_tree_item_t* mpool_tree_item = NULL;
mca_mpool_base_module_t* mpool;
mca_mpool_base_registration_t* reg;
if(!base) {
mca_mpool_base_tree_item_t *mpool_tree_item = NULL;
mca_mpool_base_module_t *mpool;
mca_mpool_base_registration_t *reg;
int i, rc;
if(!base) {
return OMPI_ERROR;
}
/* see comment in alloc function above */
if (mca_mpool_base_use_mem_hooks &&
0 != (OPAL_MEMORY_FREE_SUPPORT & opal_mem_hooks_support_level())) {
mpool_tree_item = mca_mpool_base_tree_find(base);
if(!mpool_tree_item) {
/* nothing in the tree this was just plain old malloc'd memory */
free(base);
return OMPI_SUCCESS;
}
mpool_tree_item = mca_mpool_base_tree_find(base);
if(!mpool_tree_item) {
/* nothing in the tree this was just
plain old malloc'd memory */
free(base);
return OMPI_SUCCESS;
}
for(i = 1; i < MCA_MPOOL_BASE_TREE_MAX; i++) {
for(i = 1; i < mpool_tree_item->count; i++) {
mpool = mpool_tree_item->mpools[i];
reg = mpool_tree_item->regs[i];
if(mpool) {
if(mpool && mpool->mpool_deregister) {
mpool->mpool_deregister(mpool, reg);
} else {
break;
}
}
mpool = mpool_tree_item->mpools[0];
reg = mpool_tree_item->regs[0];
mpool->mpool_free(mpool, base, reg);
rc = mca_mpool_base_tree_delete(mpool_tree_item);
return rc;
}

Просмотреть файл

@ -19,6 +19,7 @@
* @file
*/
#include "ompi_config.h"
#include "opal/util/output.h"
#include "mpool_base_mem_cb.h"
#include "base.h"
#include "orte/util/proc_info.h"
@ -35,51 +36,30 @@ ompi_pointer_array_t mca_mpool_base_mem_cb_array;
void mca_mpool_base_mem_cb(void* base, size_t size, void* cbdata,
bool from_alloc)
{
uint32_t i, cnt;
mca_mpool_base_registration_t* reg;
mca_mpool_base_selected_module_t* current;
int rc;
opal_list_item_t* item;
void* base_addr;
void* bound_addr;
if(size == 0) {
return;
}
base_addr = down_align_addr( base, mca_mpool_base_page_size_log);
bound_addr = up_align_addr((void*) ((ptrdiff_t) base + size - 1), mca_mpool_base_page_size_log);
for(item = opal_list_get_first(&mca_mpool_base_modules);
item != opal_list_get_end(&mca_mpool_base_modules);
item = opal_list_get_next(item)) {
bool warn = true;
current = (mca_mpool_base_selected_module_t*) item;
if(NULL != current->mpool_module->mpool_find) {
rc = current->mpool_module->mpool_find(
current->mpool_module,
base_addr,
size,
&mca_mpool_base_mem_cb_array,
&cnt
);
if(OMPI_SUCCESS != rc) {
continue;
if(current->mpool_module->mpool_release_memory != NULL) {
rc = current->mpool_module->mpool_release_memory(current->mpool_module,
base, size);
if(rc != OMPI_SUCCESS && true == warn) {
opal_output(0, "Memory %p:%llu cannot be freed from the "
"registration cache. Possible memory corruption.\n",
base, size);
warn = false;
}
for(i = 0; i < cnt; i++) {
reg = (mca_mpool_base_registration_t*)ompi_pointer_array_get_item(&mca_mpool_base_mem_cb_array, i);
#if !defined(NDEBUG)
if(reg->flags & MCA_MPOOL_FLAGS_CACHE) {
assert(reg->ref_count <= 3);
} else if(reg->flags & MCA_MPOOL_FLAGS_PERSIST) {
assert(reg->ref_count <= 2);
} else {
assert(reg->ref_count <= 1);
}
#endif
current->mpool_module->mpool_deregister(current->mpool_module, reg);
}
ompi_pointer_array_remove_all(&mca_mpool_base_mem_cb_array);
}
}
}

Просмотреть файл

@ -38,7 +38,8 @@ struct mca_mpool_base_tree_item_t
ompi_free_list_item_t super; /**< the parent class */
void* key; /* the address this was alloc'd on */
mca_mpool_base_module_t* mpools[MCA_MPOOL_BASE_TREE_MAX]; /**< the mpools */
mca_mpool_base_registration_t* regs[MCA_MPOOL_BASE_TREE_MAX]; /**< the registrations */
mca_mpool_base_registration_t* regs[MCA_MPOOL_BASE_TREE_MAX]; /**< the registrations */
uint8_t count;
};
typedef struct mca_mpool_base_tree_item_t mca_mpool_base_tree_item_t;

1
ompi/mca/mpool/gm/.ompi_ignore Обычный файл
Просмотреть файл

@ -0,0 +1 @@
quilt

Просмотреть файл

@ -26,7 +26,7 @@
#include "ompi/class/ompi_free_list.h"
#include "ompi/class/ompi_pointer_array.h"
#define MCA_MPOOL_FLAGS_CACHE 0x1
#define MCA_MPOOL_FLAGS_CACHE_BYPASS 0x1
#define MCA_MPOOL_FLAGS_PERSIST 0x2
#define MCA_MPOOL_FLAGS_MPI_ALLOC_MEM 0x4
@ -38,7 +38,6 @@ struct mca_mpool_base_registration_t {
unsigned char* base;
unsigned char* bound;
unsigned char* alloc_base;
void* user_data;
int32_t ref_count;
uint32_t flags;
};
@ -47,19 +46,6 @@ typedef struct mca_mpool_base_registration_t mca_mpool_base_registration_t;
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_mpool_base_registration_t);
#define MCA_MPOOL_REG_RETAIN(reg) { \
do{ \
OPAL_THREAD_ADD32(&reg->ref_count, 1); \
} while(0); \
}
#define MCA_MPOOL_REG_RELEASE(reg) { \
do{ \
OPAL_THREAD_ADD32(&reg->ref_count, -1); \
} while(0); \
}
/**
* component initialize
*/
@ -111,26 +97,12 @@ typedef int (*mca_mpool_base_module_deregister_fn_t)(
mca_mpool_base_registration_t* registration);
/**
* find registrations in this memory pool
* find registration in this memory pool
*/
typedef int (*mca_mpool_base_module_find_fn_t) (
struct mca_mpool_base_module_t* mpool,
void* addr,
size_t size,
ompi_pointer_array_t* regs,
uint32_t *cnt
);
/**
* retain registration
*/
typedef int (*mca_mpool_base_module_retain_fn_t) (
struct mca_mpool_base_module_t* mpool,
mca_mpool_base_registration_t* registration);
struct mca_mpool_base_module_t* mpool, void* addr, size_t size,
mca_mpool_base_registration_t **reg);
/**
* release registration
@ -141,6 +113,12 @@ typedef int (*mca_mpool_base_module_release_fn_t) (
mca_mpool_base_registration_t* registration);
/**
* release memory region
*/
typedef int (*mca_mpool_base_module_release_memory_fn_t) (
struct mca_mpool_base_module_t* mpool, void *base, size_t size);
/**
* if appropriate - returns base address of memory pool
*/
@ -185,8 +163,8 @@ struct mca_mpool_base_module_t {
mca_mpool_base_module_register_fn_t mpool_register; /**< register memory */
mca_mpool_base_module_deregister_fn_t mpool_deregister; /**< deregister memory */
mca_mpool_base_module_find_fn_t mpool_find; /**< find regisrations in the cache */
mca_mpool_base_module_retain_fn_t mpool_retain; /**< retain a registration from the cache */
mca_mpool_base_module_release_fn_t mpool_release; /**< release a registration from the cache */
mca_mpool_base_module_release_memory_fn_t mpool_release_memory; /**< release memor region from the cache */
mca_mpool_base_module_finalize_fn_t mpool_finalize; /**< finalize */
struct mca_rcache_base_module_t *rcache; /* the rcache associated with this mpool */
uint32_t flags; /**< mpool flags */

1
ompi/mca/mpool/mvapi/.ompi_ignore Обычный файл
Просмотреть файл

@ -0,0 +1 @@
quilt

1
ompi/mca/mpool/openib/.ompi_ignore Обычный файл
Просмотреть файл

@ -0,0 +1 @@
quilt

55
ompi/mca/mpool/rdma/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,55 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Use the top-level Makefile.options
AM_CPPFLAGS = $(mpool_rdma_CPPFLAGS)
sources = \
mpool_rdma.h \
mpool_rdma_module.c \
mpool_rdma_component.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if OMPI_BUILD_mpool_rdma_DSO
component_noinst =
component_install = mca_mpool_rdma.la
else
component_noinst = libmca_mpool_rdma.la
component_install =
endif
mcacomponentdir = $(libdir)/openmpi
mcacomponent_LTLIBRARIES = $(component_install)
mca_mpool_rdma_la_SOURCES = $(sources)
mca_mpool_rdma_la_LDFLAGS = -module -avoid-version
mca_mpool_rdma_la_LIBADD = \
$(mpool_rdma_LIBS) \
$(top_ompi_builddir)/ompi/libmpi.la \
$(top_ompi_builddir)/orte/libopen-rte.la \
$(top_ompi_builddir)/opal/libopen-pal.la
noinst_LTLIBRARIES = $(component_noinst)
libmca_mpool_rdma_la_SOURCES = $(sources)
libmca_mpool_rdma_la_LDFLAGS = -module -avoid-version
libmca_mpool_rdma_la_LIBADD = $(mpool_rdma_LIBS)

25
ompi/mca/mpool/rdma/configure.params Обычный файл
Просмотреть файл

@ -0,0 +1,25 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2006 Voltaire. All rights reserved.
#
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Specific to this module
PARAM_INIT_FILE=mpool_rdma_component.c
PARAM_CONFIG_FILES="Makefile"

127
ompi/mca/mpool/rdma/mpool_rdma.h Обычный файл
Просмотреть файл

@ -0,0 +1,127 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Voltaire. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_MPOOL_OPENIB_H
#define MCA_MPOOL_OPENIB_H
#include "opal/class/opal_list.h"
#include "ompi/class/ompi_free_list.h"
#include "opal/event/event.h"
#include "ompi/mca/mpool/mpool.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
struct mca_mpool_rdma_component_t {
mca_mpool_base_component_t super;
char* rcache_name;
size_t rcache_size_limit;
bool print_stats;
uint32_t leave_pinned;
};
typedef struct mca_mpool_rdma_component_t mca_mpool_rdma_component_t;
OMPI_DECLSPEC extern mca_mpool_rdma_component_t mca_mpool_rdma_component;
struct mca_mpool_base_resources_t {
void *reg_data;
size_t sizeof_reg;
int (*register_mem)(void *reg_data, void *base, size_t size,
mca_mpool_base_registration_t *reg);
int (*deregister_mem)(void *reg_data, mca_mpool_base_registration_t *reg);
};
typedef struct mca_mpool_base_resources_t mca_mpool_base_resources_t;
struct mca_mpool_rdma_module_t {
mca_mpool_base_module_t super;
struct mca_mpool_base_resources_t resources;
ompi_free_list_t reg_list;
opal_list_t mru_list;
uint32_t stat_cache_hit;
uint32_t stat_cache_miss;
uint32_t stat_evicted;
uint32_t stat_cache_found;
uint32_t stat_cache_notfound;
}; typedef struct mca_mpool_rdma_module_t mca_mpool_rdma_module_t;
/*
* Initializes the mpool module.
*/
void mca_mpool_rdma_module_init(mca_mpool_rdma_module_t *mpool);
/*
* Returns base address of shared memory mapping.
*/
void *mca_mpool_rdma_base(mca_mpool_base_module_t *mpool);
/**
* Allocate block of registered memory.
*/
void* mca_mpool_rdma_alloc(mca_mpool_base_module_t *mpool, size_t size,
size_t align, uint32_t flags,
mca_mpool_base_registration_t** registration);
/**
* realloc block of registered memory
*/
void* mca_mpool_rdma_realloc( mca_mpool_base_module_t *mpool, void* addr,
size_t size, mca_mpool_base_registration_t** registration);
/**
* register block of memory
*/
int mca_mpool_rdma_register(mca_mpool_base_module_t* mpool, void *addr,
size_t size, uint32_t flags, mca_mpool_base_registration_t **reg);
/**
* deregister memory
*/
int mca_mpool_rdma_deregister(mca_mpool_base_module_t *mpool,
mca_mpool_base_registration_t *reg);
/**
* free memory allocated by alloc function
*/
void mca_mpool_rdma_free(mca_mpool_base_module_t *mpool, void * addr,
mca_mpool_base_registration_t *reg);
/**
* find registration for a given block of memory
*/
int mca_mpool_rdma_find(struct mca_mpool_base_module_t* mpool, void* addr,
size_t size, mca_mpool_base_registration_t **reg);
/**
* unregister all registration covering the block of memory
*/
int mca_mpool_rdma_release_memory(mca_mpool_base_module_t* mpool, void *base,
size_t size);
/**
* finalize mpool
*/
void mca_mpool_rdma_finalize(struct mca_mpool_base_module_t *mpool);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

122
ompi/mca/mpool/rdma/mpool_rdma_component.c Обычный файл
Просмотреть файл

@ -0,0 +1,122 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Voltaire. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "opal/util/output.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "mpool_rdma.h"
#include "orte/util/proc_info.h"
#include "orte/util/sys_info.h"
#include <unistd.h>
#include <malloc.h>
/*
* Local functions
*/
static int mca_mpool_rdma_open(void);
static mca_mpool_base_module_t* mca_mpool_rdma_init(
struct mca_mpool_base_resources_t* resources);
mca_mpool_rdma_component_t mca_mpool_rdma_component = {
{
/* First, the mca_base_component_t struct containing meta
information about the component itself */
{
/* Indicate that we are a mpool v1.0.0 component (which also
implies a specific MCA version) */
MCA_MPOOL_BASE_VERSION_1_0_0,
"rdma", /* MCA component name */
OMPI_MAJOR_VERSION, /* MCA component major version */
OMPI_MINOR_VERSION, /* MCA component minor version */
OMPI_RELEASE_VERSION, /* MCA component release version */
mca_mpool_rdma_open, /* component open */
NULL
},
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
},
mca_mpool_rdma_init
}
};
/**
* component open/close/init function
*/
static int mca_mpool_rdma_open(void)
{
int param, val;
mca_base_param_reg_string(&mca_mpool_rdma_component.super.mpool_version,
"rcache_name",
"The name of the registration cache the mpool should use",
false, false, "vma", &mca_mpool_rdma_component.rcache_name);
mca_base_param_reg_int(&mca_mpool_rdma_component.super.mpool_version,
"rcache_size_limit",
"the maximum size of registration cache in bytes. "
"0 is unlimited (default 0)", false, false, 0, &val);
mca_mpool_rdma_component.rcache_size_limit = (size_t)val;
mca_base_param_reg_int(&mca_mpool_rdma_component.super.mpool_version,
"print_stats",
"print pool usage statistics at the end of the run",
false, false, 0, &val);
mca_mpool_rdma_component.print_stats = val?true:false;
mca_base_param_register_int("mpi", NULL, "leave_pinned", "leave_pinned", 0);
param = mca_base_param_find("mpi", NULL, "leave_pinned");
mca_base_param_lookup_int(param, (int*)&mca_mpool_rdma_component.leave_pinned);
if(0 == mca_mpool_rdma_component.leave_pinned) {
/* and now check leave_pinned_pipeline if necessary */
mca_base_param_register_int("mpi", NULL, "leave_pinned_pipeline",
"leave_pinned_pipeline", 0);
param = mca_base_param_find("mpi", NULL, "leave_pinned_pipeline");
mca_base_param_lookup_int(param, (int*)&mca_mpool_rdma_component.leave_pinned);
}
return OMPI_SUCCESS;
}
static mca_mpool_base_module_t* mca_mpool_rdma_init(
struct mca_mpool_base_resources_t *resources)
{
mca_mpool_rdma_module_t* mpool_module;
mpool_module =
(mca_mpool_rdma_module_t*)malloc(sizeof(mca_mpool_rdma_module_t));
mpool_module->resources = *resources;
mca_mpool_rdma_module_init(mpool_module);
return &mpool_module->super;
}

395
ompi/mca/mpool/rdma/mpool_rdma_module.c Обычный файл
Просмотреть файл

@ -0,0 +1,395 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006 Voltaire. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "orte/util/proc_info.h"
#include "opal/util/output.h"
#include "ompi/mca/mpool/rdma/mpool_rdma.h"
#include <errno.h>
#include <string.h>
#include <malloc.h>
#include "ompi/mca/rcache/rcache.h"
#include "ompi/mca/rcache/base/base.h"
#include "ompi/mca/mpool/base/base.h"
extern uint32_t mca_mpool_base_page_size;
extern uint32_t mca_mpool_base_page_size_log;
/*
* Initializes the mpool module.
*/
void mca_mpool_rdma_module_init(mca_mpool_rdma_module_t* mpool)
{
mpool->super.mpool_component = &mca_mpool_rdma_component.super;
mpool->super.mpool_base = NULL; /* no base .. */
mpool->super.mpool_alloc = mca_mpool_rdma_alloc;
mpool->super.mpool_realloc = mca_mpool_rdma_realloc;
mpool->super.mpool_free = mca_mpool_rdma_free;
mpool->super.mpool_register = mca_mpool_rdma_register;
mpool->super.mpool_find = mca_mpool_rdma_find;
mpool->super.mpool_deregister = mca_mpool_rdma_deregister;
mpool->super.mpool_release_memory = mca_mpool_rdma_release_memory;
if(mca_mpool_rdma_component.print_stats == true)
mpool->super.mpool_finalize = mca_mpool_rdma_finalize;
else
mpool->super.mpool_finalize = NULL;
mpool->super.rcache =
mca_rcache_base_module_create(mca_mpool_rdma_component.rcache_name);
mpool->super.flags = MCA_MPOOL_FLAGS_MPI_ALLOC_MEM;
OBJ_CONSTRUCT(&mpool->reg_list, ompi_free_list_t);
ompi_free_list_init(&mpool->reg_list, mpool->resources.sizeof_reg,
OBJ_CLASS(mca_mpool_base_registration_t), 0, -1, 32,
NULL);
OBJ_CONSTRUCT(&mpool->mru_list, opal_list_t);
mpool->stat_cache_hit = mpool->stat_cache_miss = mpool->stat_evicted = 0;
mpool->stat_cache_found = mpool->stat_cache_notfound = 0;
}
static inline int dereg_mem(mca_mpool_base_module_t *mpool,
mca_mpool_base_registration_t *reg)
{
mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t *)mpool;
assert(reg->ref_count == 0);
return mpool_rdma->resources.deregister_mem(mpool_rdma->resources.reg_data,
reg);
}
/**
* allocate function
*/
void* mca_mpool_rdma_alloc(mca_mpool_base_module_t *mpool, size_t size,
size_t align, uint32_t flags, mca_mpool_base_registration_t **reg)
{
void *addr;
if(posix_memalign(&addr, mca_mpool_base_page_size, size) != 0)
return NULL;
if(OMPI_SUCCESS != mca_mpool_rdma_register(mpool, addr, size, flags, reg)) {
free(addr);
return NULL;
}
(*reg)->alloc_base = addr;
return addr;
}
static int register_cache_bypass(mca_mpool_base_module_t *mpool,
void *addr, size_t size, uint32_t flags,
mca_mpool_base_registration_t **reg)
{
mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool;
mca_mpool_base_registration_t *rdma_reg;
ompi_free_list_item_t *item;
unsigned char *base, *bound;
int rc;
base = down_align_addr(addr, mca_mpool_base_page_size_log);
bound = up_align_addr( (void*) ((char*) addr + size - 1),
mca_mpool_base_page_size_log);
OMPI_FREE_LIST_GET(&mpool_rdma->reg_list, item, rc);
if(OMPI_SUCCESS != rc) {
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
return rc;
}
rdma_reg = (mca_mpool_base_registration_t*)item;
rdma_reg->mpool = mpool;
rdma_reg->base = base;
rdma_reg->bound = bound;
rdma_reg->flags = flags;
rc = mpool_rdma->resources.register_mem(mpool_rdma->resources.reg_data,
base, bound - base + 1, rdma_reg);
if(rc != OMPI_SUCCESS) {
OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list, item);
return rc;
}
*reg = rdma_reg;
(*reg)->ref_count++;
return OMPI_SUCCESS;
}
/*
* register memory
*/
int mca_mpool_rdma_register(mca_mpool_base_module_t *mpool, void *addr,
size_t size, uint32_t flags,
mca_mpool_base_registration_t **reg)
{
mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool;
mca_mpool_base_registration_t *rdma_reg;
ompi_free_list_item_t *item;
unsigned char *base, *bound;
int rc;
/* if cache bypass is requested don't use the cache */
if(flags & MCA_MPOOL_FLAGS_CACHE_BYPASS) {
return register_cache_bypass(mpool, addr, size, flags, reg);
}
base = down_align_addr(addr, mca_mpool_base_page_size_log);
bound = up_align_addr((void*)((char*) addr + size - 1),
mca_mpool_base_page_size_log);
OPAL_THREAD_LOCK(&mpool->rcache->lock);
/* look through existing regs if not persistent registration requested.
* Persistent registration are always registered and placed in the cache */
if(!(flags & MCA_MPOOL_FLAGS_PERSIST)) {
/* check to see if memory is registered */
mpool->rcache->rcache_find(mpool->rcache, addr, size, reg);
if(*reg != NULL &&
(mca_mpool_rdma_component.leave_pinned ||
((*reg)->flags & MCA_MPOOL_FLAGS_PERSIST) ||
((*reg)->base == base && (*reg)->bound == bound))) {
if(0 == (*reg)->ref_count &&
mca_mpool_rdma_component.leave_pinned) {
opal_list_remove_item(&mpool_rdma->mru_list,
(opal_list_item_t*)(*reg));
}
mpool_rdma->stat_cache_hit++;
(*reg)->ref_count++;
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
return MPI_SUCCESS;
}
mpool_rdma->stat_cache_miss++;
*reg = NULL; /* in case previous find found something */
/* If no suitable registration is in cache and leave_pinned isn't
* set and size of registration cache is unlimited don't use the cache.
* This is optimisation in case limit is not set. If limit is set we
* have to put registration into the cache to determine when we hit
* memory registration limit.
* NONE: cache is still used for persistent registrations so previous
* find can find something */
if(!mca_mpool_rdma_component.leave_pinned &&
mca_mpool_rdma_component.rcache_size_limit == 0) {
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
return register_cache_bypass(mpool, addr, size, flags, reg);
}
}
OMPI_FREE_LIST_GET(&mpool_rdma->reg_list, item, rc);
if(OMPI_SUCCESS != rc) {
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
return rc;
}
rdma_reg = (mca_mpool_base_registration_t*)item;
rdma_reg->mpool = mpool;
rdma_reg->base = base;
rdma_reg->bound = bound;
rdma_reg->flags = flags;
while((rc = mpool->rcache->rcache_insert(mpool->rcache, rdma_reg,
mca_mpool_rdma_component.rcache_size_limit)) ==
OMPI_ERR_TEMP_OUT_OF_RESOURCE) {
mca_mpool_base_registration_t *old_reg;
/* try to remove one unused reg and retry */
old_reg = (mca_mpool_base_registration_t*)
opal_list_get_last(&mpool_rdma->mru_list);
if(opal_list_get_end(&mpool_rdma->mru_list) !=
(opal_list_item_t*)old_reg) {
rc = dereg_mem(mpool, old_reg);
if(MPI_SUCCESS == rc) {
mpool->rcache->rcache_delete(mpool->rcache, old_reg);
opal_list_remove_item(&mpool_rdma->mru_list,
(opal_list_item_t*)old_reg);
OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list,
(ompi_free_list_item_t*)old_reg);
mpool_rdma->stat_evicted++;
} else
break;
} else
break;
}
if(rc != OMPI_SUCCESS) {
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list, item);
return rc;
}
rc = mpool_rdma->resources.register_mem(mpool_rdma->resources.reg_data,
base, bound - base + 1, rdma_reg);
if(rc != OMPI_SUCCESS) {
mpool->rcache->rcache_delete(mpool->rcache, rdma_reg);
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list, item);
return rc;
}
*reg = rdma_reg;
(*reg)->ref_count++;
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
return OMPI_SUCCESS;
}
/**
* realloc function
*/
void* mca_mpool_rdma_realloc(mca_mpool_base_module_t *mpool, void *addr,
size_t size, mca_mpool_base_registration_t **reg)
{
mca_mpool_base_registration_t *old_reg = *reg;
void *new_mem = mca_mpool_rdma_alloc(mpool, size, 0, old_reg->flags, reg);
memcpy(new_mem, addr, old_reg->bound - old_reg->base + 1);
mca_mpool_rdma_free(mpool, addr, old_reg);
return new_mem;
}
/**
* free function
*/
void mca_mpool_rdma_free(mca_mpool_base_module_t *mpool, void *addr,
mca_mpool_base_registration_t *registration)
{
mca_mpool_rdma_deregister(mpool, registration);
free(registration->alloc_base);
}
int mca_mpool_rdma_find(struct mca_mpool_base_module_t *mpool, void *addr,
size_t size, mca_mpool_base_registration_t **reg)
{
mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool;
int rc;
unsigned char *base, *bound;
base = down_align_addr(addr, mca_mpool_base_page_size_log);
bound = up_align_addr((void*)((char*) addr + size - 1),
mca_mpool_base_page_size_log);
OPAL_THREAD_LOCK(&mpool->rcache->lock);
rc = mpool->rcache->rcache_find(mpool->rcache, addr, size, reg);
if(*reg != NULL &&
(mca_mpool_rdma_component.leave_pinned ||
((*reg)->flags & MCA_MPOOL_FLAGS_PERSIST) ||
((*reg)->base == base && (*reg)->bound == bound))) {
assert(((void*)(*reg)->bound) >= addr);
if(0 == (*reg)->ref_count &&
mca_mpool_rdma_component.leave_pinned) {
opal_list_remove_item(&mpool_rdma->mru_list,
(opal_list_item_t*)(*reg));
}
mpool_rdma->stat_cache_found++;
(*reg)->ref_count++;
} else {
mpool_rdma->stat_cache_notfound++;
}
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
return rc;
}
int mca_mpool_rdma_deregister(struct mca_mpool_base_module_t *mpool,
mca_mpool_base_registration_t *reg)
{
mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool;
int rc = OMPI_SUCCESS;
assert(reg->ref_count > 0);
OPAL_THREAD_LOCK(&mpool->rcache->lock);
reg->ref_count--;
if(reg->ref_count > 0) {
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
return OMPI_SUCCESS;
}
if(mca_mpool_rdma_component.leave_pinned &&
!(reg->flags & (MCA_MPOOL_FLAGS_CACHE_BYPASS|MCA_MPOOL_FLAGS_PERSIST))) {
/* if leave_pinned is set don't deregister memory, but put it
* on MRU list for future use */
opal_list_prepend(&mpool_rdma->mru_list, (opal_list_item_t*)reg);
} else {
rc = dereg_mem(mpool, reg);
if(OMPI_SUCCESS == rc) {
if(!(reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS))
mpool->rcache->rcache_delete(mpool->rcache, reg);
OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list,
(ompi_free_list_item_t*)reg);
}
}
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
return rc;
}
int mca_mpool_rdma_release_memory(struct mca_mpool_base_module_t *mpool,
void *base, size_t size)
{
mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool;
mca_mpool_base_registration_t *reg;
ompi_pointer_array_t regs;
int reg_cnt, i, err = 0;
OBJ_CONSTRUCT(&regs, ompi_pointer_array_t);
OPAL_THREAD_LOCK(&mpool->rcache->lock);
reg_cnt = mpool->rcache->rcache_find_all(mpool->rcache, base, size, &regs);
for(i = 0; i < reg_cnt; i++) {
reg = (mca_mpool_base_registration_t*)
ompi_pointer_array_get_item(&regs, i);
if(0 == reg->ref_count) {
if(dereg_mem(mpool, reg) != OMPI_SUCCESS) {
err++;
continue;
}
} else {
/* remove registration from cache and wait for ref_count goes to
* zero before unregister memory. Note that our registered memory
* statistic can go wrong at this point, but it is better than
* potential memory corruption. And we return error in this case to
* the caller */
reg->flags |= MCA_MPOOL_FLAGS_CACHE_BYPASS;
err++; /* tell caller that something was wrong */
}
mpool->rcache->rcache_delete(mpool->rcache, reg);
if(0 == reg->ref_count) {
opal_list_remove_item(&mpool_rdma->mru_list,
(opal_list_item_t*)reg);
OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list,
(ompi_free_list_item_t*)reg);
}
}
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
ompi_pointer_array_remove_all(&regs);
return err?OMPI_ERROR:OMPI_SUCCESS;
}
void mca_mpool_rdma_finalize(struct mca_mpool_base_module_t *mpool)
{
mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool;
opal_output(0, "[%lu,%lu,%lu] rdma: stats "
"(hit/miss/found/not found/evicted): %d/%d/%d/%d/%d\n",
ORTE_NAME_ARGS(orte_process_info.my_name),
mpool_rdma->stat_cache_hit, mpool_rdma->stat_cache_miss,
mpool_rdma->stat_cache_found, mpool_rdma->stat_cache_notfound,
mpool_rdma->stat_evicted);
}

Просмотреть файл

@ -36,6 +36,7 @@ void mca_mpool_sm_module_init(mca_mpool_sm_module_t* mpool)
mpool->super.mpool_find = NULL;
mpool->super.mpool_register = NULL;
mpool->super.mpool_deregister = NULL;
mpool->super.mpool_release_memory = NULL;
mpool->super.mpool_finalize = NULL;
mpool->super.flags = 0;
}

1
ompi/mca/mpool/udapl/.ompi_ignore Обычный файл
Просмотреть файл

@ -0,0 +1 @@
quilt

Просмотреть файл

@ -42,10 +42,8 @@ size_t mca_pml_ob1_rdma_btls(
mca_pml_ob1_rdma_btl_t* rdma_btls)
{
size_t num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma);
ompi_pointer_array_t regs;
size_t num_btls_used = 0;
size_t n;
int rc;
/* shortcut when there are no rdma capable btls */
if(num_btls == 0) {
@ -53,223 +51,34 @@ size_t mca_pml_ob1_rdma_btls(
}
/* check to see if memory is registered */
OBJ_CONSTRUCT(&regs, ompi_pointer_array_t);
for(n = 0; n < num_btls && num_btls_used < MCA_PML_OB1_MAX_RDMA_PER_REQUEST; n++) {
mca_bml_base_btl_t* bml_btl = mca_bml_base_btl_array_get_index(&bml_endpoint->btl_rdma, n);
mca_mpool_base_registration_t* fit = NULL;
mca_mpool_base_module_t* btl_mpool = bml_btl->btl_mpool;
uint32_t reg_cnt;
size_t r;
for(n = 0; n < num_btls && num_btls_used < MCA_PML_OB1_MAX_RDMA_PER_REQUEST;
n++) {
mca_bml_base_btl_t* bml_btl =
mca_bml_base_btl_array_get_index(&bml_endpoint->btl_rdma, n);
mca_mpool_base_registration_t* reg = NULL;
mca_mpool_base_module_t *btl_mpool = bml_btl->btl_mpool;
/* btl is rdma capable and registration is not required */
if(NULL == btl_mpool) {
rdma_btls[num_btls_used].bml_btl = bml_btl;
rdma_btls[num_btls_used].btl_reg = NULL;
num_btls_used++;
continue;
}
/* look through existing registrations */
ompi_pointer_array_remove_all(&regs);
btl_mpool->mpool_find(btl_mpool,
base,
size,
&regs,
&reg_cnt);
/*
* find the best fit when there are multiple registrations
*/
for(r = 0; r < reg_cnt; r++) {
mca_mpool_base_registration_t* reg = (mca_mpool_base_registration_t*)ompi_pointer_array_get_item(&regs, r);
size_t reg_len = reg->bound - base + 1;
reg = NULL;
} else {
if(!mca_pml_ob1.leave_pinned) {
/* look through existing registrations */
btl_mpool->mpool_find(btl_mpool, base, size, &reg);
} else {
/* register the memory */
btl_mpool->mpool_register(btl_mpool, base, size, 0, &reg);
}
if(reg->flags & MCA_MPOOL_FLAGS_CACHE) {
assert(reg->ref_count >= 3);
}
if(reg->base <= base && reg_len >= size) {
fit = reg;
} else if(mca_pml_ob1.leave_pinned){
btl_mpool->mpool_deregister(btl_mpool, reg);
} else {
btl_mpool->mpool_release(btl_mpool, reg);
}
}
/* if the leave pinned option is set - and there is not an existing
* registration that satisfies this request, create one.
*/
if(NULL == fit && mca_pml_ob1.leave_pinned) {
/* register the memory */
rc = btl_mpool->mpool_register(
btl_mpool,
base,
size,
MCA_MPOOL_FLAGS_CACHE,
&fit);
if(ORTE_SUCCESS != rc || NULL == fit) {
opal_output(0, "[%s:%d] mpool_register(%p,%lu) failed, \n", __FILE__, __LINE__, base, size);
continue;
}
assert(fit->ref_count == 3);
if(NULL == reg)
bml_btl = NULL; /* skip it */
}
if(NULL != fit) {
if(bml_btl != NULL) {
rdma_btls[num_btls_used].bml_btl = bml_btl;
rdma_btls[num_btls_used].btl_reg = fit;
rdma_btls[num_btls_used].btl_reg = reg;
num_btls_used++;
}
}
return num_btls_used;
}
/*
* For a given btl - find the best fit registration or
* optionally create one for leave pinned.
*/
mca_mpool_base_registration_t* mca_pml_ob1_rdma_registration(
mca_bml_base_btl_t* bml_btl,
unsigned char* base,
size_t size)
{
ompi_pointer_array_t regs;
mca_mpool_base_registration_t* fit = NULL;
mca_mpool_base_module_t* btl_mpool = bml_btl->btl_mpool;
uint32_t reg_cnt;
size_t r;
int rc;
/* btl is rdma capable and registration is not required */
if(NULL == btl_mpool) {
return NULL;
}
/* check to see if memory is registered */
OBJ_CONSTRUCT(&regs, ompi_pointer_array_t);
ompi_pointer_array_remove_all(&regs);
/* look through existing registrations */
btl_mpool->mpool_find(btl_mpool,
base,
size,
&regs,
&reg_cnt);
/*
* find the best fit when there are multiple registrations
*/
for(r = 0; r < reg_cnt; r++) {
mca_mpool_base_registration_t* reg = (mca_mpool_base_registration_t*)ompi_pointer_array_get_item(&regs, r);
size_t reg_len = reg->bound - base + 1;
if(reg->flags & MCA_MPOOL_FLAGS_CACHE) {
assert(reg->ref_count >= 3);
}
if(reg->base <= base && reg_len >= size) {
fit = reg;
} else if(mca_pml_ob1.leave_pinned){
btl_mpool->mpool_deregister(btl_mpool, reg);
} else {
btl_mpool->mpool_release(btl_mpool, reg);
}
}
/* if the leave pinned option is set - and there is not an existing
* registration that satisfies this request, create one.
*/
if(NULL == fit && mca_pml_ob1.leave_pinned) {
/* register the memory */
rc = btl_mpool->mpool_register(
btl_mpool,
base,
size,
MCA_MPOOL_FLAGS_CACHE,
&fit);
if(ORTE_SUCCESS != rc || NULL == fit) {
opal_output(0, "[%s:%d] mpool_register(%p,%lu) failed, \n", __FILE__, __LINE__, base, size);
return NULL;
}
assert(fit->ref_count == 3);
}
OBJ_DESTRUCT(&regs);
return fit;
}
/*
* For a given btl - find the best fit registration or
* optionally create one for leave pinned.
*/
mca_mpool_base_registration_t* mca_pml_ob1_rdma_register(
mca_bml_base_btl_t* bml_btl,
unsigned char* base,
size_t size)
{
ompi_pointer_array_t regs;
mca_mpool_base_registration_t* fit = NULL;
mca_mpool_base_module_t* btl_mpool = bml_btl->btl_mpool;
uint32_t reg_cnt;
size_t r;
int rc;
/* btl is rdma capable and registration is not required */
if(NULL == btl_mpool) {
return NULL;
}
/* check to see if memory is registered */
OBJ_CONSTRUCT(&regs, ompi_pointer_array_t);
ompi_pointer_array_remove_all(&regs);
/* look through existing registrations */
btl_mpool->mpool_find(btl_mpool,
base,
size,
&regs,
&reg_cnt);
/*
* find the best fit when there are multiple registrations
*/
for(r = 0; r < reg_cnt; r++) {
mca_mpool_base_registration_t* reg = (mca_mpool_base_registration_t*)ompi_pointer_array_get_item(&regs, r);
size_t reg_len = reg->bound - base + 1;
if(reg->base <= base && reg_len >= size) {
fit = reg;
} else {
btl_mpool->mpool_deregister(btl_mpool, reg);
}
}
/* if the leave pinned option is set - and there is not an existing
* registration that satisfies this request, create one.
*/
if(NULL == fit) {
/* register the memory */
rc = btl_mpool->mpool_register(
btl_mpool,
base,
size,
MCA_MPOOL_FLAGS_CACHE,
&fit);
if(ORTE_SUCCESS != rc || NULL == fit) {
opal_output(0, "[%s:%d] mpool_register(%p,%lu) failed, \n", __FILE__, __LINE__, base, size);
return NULL;
}
}
OBJ_DESTRUCT(&regs);
return fit;
}

Просмотреть файл

@ -43,31 +43,8 @@ typedef struct mca_pml_ob1_rdma_btl_t mca_pml_ob1_rdma_btl_t;
* find those that already have registrations - or
* register if required (for leave_pinned option)
*/
size_t mca_pml_ob1_rdma_btls(
struct mca_bml_base_endpoint_t* endpoint,
unsigned char* base,
size_t size,
struct mca_pml_ob1_rdma_btl_t* btls);
/*
* For a given rdma capable btl - find the best fit
* registration or create one for leave pinned.
*/
mca_mpool_base_registration_t* mca_pml_ob1_rdma_registration(
struct mca_bml_base_btl_t* bml_btl,
unsigned char* base,
size_t size);
/*
* Create a registration
*/
mca_mpool_base_registration_t* mca_pml_ob1_rdma_register(
struct mca_bml_base_btl_t* bml_btl,
unsigned char* base,
size_t size);
size_t mca_pml_ob1_rdma_btls(struct mca_bml_base_endpoint_t* endpoint,
unsigned char* base, size_t size, struct mca_pml_ob1_rdma_btl_t* btls);
#endif

Просмотреть файл

@ -257,38 +257,9 @@ static int mca_pml_ob1_recv_request_ack(
if (hdr->hdr_match.hdr_common.hdr_flags & MCA_PML_OB1_HDR_FLAGS_PIN &&
recvreq->req_rdma_cnt != 0) {
/* start rdma at current fragment offset - no need to ack */
recvreq->req_rdma_offset = bytes_received;
return OMPI_SUCCESS;
}
/* are rdma devices available for long rdma protocol */
if( mca_pml_ob1.leave_pinned_pipeline &&
hdr->hdr_msg_length > bml_endpoint->btl_rdma_size &&
mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma) ) {
char* base;
char* align;
ptrdiff_t lb;
/* round this up/down to the next aligned address */
ompi_ddt_type_lb(recvreq->req_recv.req_convertor.pDesc, &lb);
base = recvreq->req_recv.req_convertor.pBaseBuf + lb;
align = (char*)up_align_addr(base, bml_endpoint->btl_rdma_align)+1;
recvreq->req_rdma_offset = align - base;
/* still w/in range */
if(recvreq->req_rdma_offset < bytes_received) {
recvreq->req_rdma_offset = bytes_received;
}
if(recvreq->req_rdma_offset > hdr->hdr_msg_length) {
recvreq->req_rdma_offset = hdr->hdr_msg_length;
} else {
ompi_convertor_set_position( &recvreq->req_recv.req_convertor,
&recvreq->req_rdma_offset );
}
/* are rdma devices available for long rdma protocol */
} else if (!mca_pml_ob1.leave_pinned_pipeline &&
bml_endpoint->btl_rdma_offset < hdr->hdr_msg_length &&
} else if (bml_endpoint->btl_rdma_offset < hdr->hdr_msg_length &&
mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma)) {
/* use convertor to figure out the rdma offset for this request */
@ -300,6 +271,9 @@ static int mca_pml_ob1_recv_request_ack(
&recvreq->req_rdma_offset );
}
}
/* start rdma at current fragment offset - no need to ack */
if(recvreq->req_rdma_offset == bytes_received)
return OMPI_SUCCESS;
}
/* let know to shedule function there is no need to put ACK flag */
recvreq->req_ack_sent = true;
@ -359,7 +333,6 @@ int mca_pml_ob1_recv_request_get_frag(
mca_bml_base_endpoint_t* bml_endpoint = frag->rdma_ep;
mca_bml_base_btl_t* bml_btl;
mca_btl_base_descriptor_t* descriptor;
mca_mpool_base_registration_t* reg;
size_t save_size = frag->rdma_length;
int rc;
@ -370,21 +343,10 @@ int mca_pml_ob1_recv_request_get_frag(
orte_errmgr.abort();
}
/* is there an existing registration for this btl */
reg = mca_pml_ob1_rdma_registration(
bml_btl,
(unsigned char*)recvreq->req_recv.req_base.req_addr,
recvreq->req_recv.req_bytes_packed);
if(NULL != reg) {
recvreq->req_rdma[0].bml_btl = bml_btl;
recvreq->req_rdma[0].btl_reg = reg;
recvreq->req_rdma_cnt = 1;
}
/* prepare descriptor */
mca_bml_base_prepare_dst(
bml_btl,
reg,
NULL,
&recvreq->req_recv.req_convertor,
0,
&frag->rdma_length,
@ -622,7 +584,6 @@ int mca_pml_ob1_recv_request_schedule_exclusive( mca_pml_ob1_recv_request_t* rec
mca_btl_base_descriptor_t* ctl;
mca_mpool_base_registration_t * reg = NULL;
int rc;
bool release = false;
if(prev_bytes_remaining == bytes_remaining) {
if( ++num_fail == num_tries ) {
@ -689,29 +650,9 @@ int mca_pml_ob1_recv_request_schedule_exclusive( mca_pml_ob1_recv_request_t* rec
size = bml_btl->btl_max_rdma_size;
}
if(0 == recvreq->req_rdma_cnt) {
char* base;
ptrdiff_t lb;
if(mca_pml_ob1.leave_pinned_pipeline) {
/* lookup and/or create a cached registration */
ompi_ddt_type_lb(recvreq->req_recv.req_convertor.pDesc,
&lb);
base = recvreq->req_recv.req_convertor.pBaseBuf + lb +
recvreq->req_rdma_offset;
reg = mca_pml_ob1_rdma_register(bml_btl,
(unsigned char*)base, size);
release = true;
}
}
/* prepare a descriptor for RDMA */
mca_bml_base_prepare_dst(bml_btl, reg,
&recvreq->req_recv.req_convertor, 0, &size, &dst);
if(reg && release == true && NULL != bml_btl->btl_mpool) {
bml_btl->btl_mpool->mpool_release(bml_btl->btl_mpool, reg);
}
if(dst == NULL) {
continue;
}

Просмотреть файл

@ -135,7 +135,7 @@ do {
for( r = 0; r < recvreq->req_rdma_cnt; r++ ) { \
mca_mpool_base_registration_t* btl_reg = recvreq->req_rdma[r].btl_reg; \
if( NULL != btl_reg ) { \
btl_reg->mpool->mpool_release( btl_reg->mpool, btl_reg ); \
btl_reg->mpool->mpool_deregister( btl_reg->mpool, btl_reg ); \
} \
} \
recvreq->req_rdma_cnt = 0; \

Просмотреть файл

@ -652,7 +652,7 @@ int mca_pml_ob1_send_request_start_rdma(
bml_btl->btl_flags & MCA_BTL_FLAGS_GET) {
size_t old_position = sendreq->req_send.req_convertor.bConverted;
/* prepare source descriptor/segment(s) */
/* prepare source descriptor/segment(s) */
mca_bml_base_prepare_src(
bml_btl,
reg,
@ -846,6 +846,7 @@ int mca_pml_ob1_send_request_start_rndv(
des->des_cbdata = sendreq;
des->des_cbfunc = mca_pml_ob1_rndv_completion;
sendreq->req_send_offset = size;
sendreq->req_rdma_offset = size;
/* send */
rc = mca_bml_base_send(bml_btl, des, MCA_BTL_TAG_PML);
@ -1023,15 +1024,7 @@ static void mca_pml_ob1_put_completion( mca_btl_base_module_t* btl,
/* check for request completion */
if( OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length)
>= sendreq->req_send.req_bytes_packed) {
/* bump up the req_state after the last fin was sent..
if rndv completion occurs after this (can happen!) then
the rndv completion will properly clean up after the request
we can't just do this on the first RDMA PUT + ACK ctl message in
mca_pml_ob1_send_request_put because then we might fall into sender
side scheduleing (pml pipeline protocol) */
if(true == sendreq->req_got_put_ack) {
MCA_PML_OB1_SEND_REQUEST_ADVANCE_NO_SCHEDULE(sendreq);
}
/* if we've got completion on rndv packet */
if (sendreq->req_state == 2) {
MCA_PML_OB1_SEND_REQUEST_PML_COMPLETE(sendreq);
@ -1058,7 +1051,6 @@ int mca_pml_ob1_send_request_put_frag( mca_pml_ob1_rdma_frag_t* frag )
size_t offset = (size_t)frag->rdma_hdr.hdr_rdma.hdr_rdma_offset;
size_t i, save_size = frag->rdma_length;
int rc;
bool release = false;
bml_btl = mca_bml_base_btl_array_find(&frag->rdma_ep->btl_rdma,
frag->rdma_btl);
@ -1074,16 +1066,6 @@ int mca_pml_ob1_send_request_put_frag( mca_pml_ob1_rdma_frag_t* frag )
/* set convertor at current offset */
ompi_convertor_set_position(&sendreq->req_send.req_convertor, &offset);
/* if registration doesnt exist - create one */
if (mca_pml_ob1.leave_pinned_pipeline && reg == NULL) {
unsigned char* base;
ptrdiff_t lb;
ompi_ddt_type_lb(sendreq->req_send.req_convertor.pDesc, &lb);
base = (unsigned char*)sendreq->req_send.req_convertor.pBaseBuf + lb + offset;
reg = mca_pml_ob1_rdma_register(bml_btl, base, frag->rdma_length);
release = true;
}
/* setup descriptor */
mca_bml_base_prepare_src( bml_btl,
reg,
@ -1092,10 +1074,6 @@ int mca_pml_ob1_send_request_put_frag( mca_pml_ob1_rdma_frag_t* frag )
&frag->rdma_length,
&des );
if(reg && release == true && bml_btl->btl_mpool) {
bml_btl->btl_mpool->mpool_release(bml_btl->btl_mpool, reg);
}
if(NULL == des) {
frag->rdma_length = save_size;
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
@ -1148,7 +1126,7 @@ void mca_pml_ob1_send_request_put(
size_t i, size = 0;
if(hdr->hdr_common.hdr_flags & MCA_PML_OB1_HDR_TYPE_ACK) {
sendreq->req_got_put_ack = true;
MCA_PML_OB1_SEND_REQUEST_ADVANCE_NO_SCHEDULE(sendreq);
}
MCA_PML_OB1_RDMA_FRAG_ALLOC(frag, rc);

Просмотреть файл

@ -55,7 +55,6 @@ struct mca_pml_ob1_send_request_t {
size_t req_bytes_delivered;
size_t req_send_offset;
size_t req_rdma_offset;
bool req_got_put_ack;
mca_pml_ob1_rdma_btl_t req_rdma[MCA_PML_OB1_MAX_RDMA_PER_REQUEST];
uint32_t req_rdma_cnt;
mca_pml_ob1_send_pending_t req_pending;
@ -116,7 +115,7 @@ static inline void mca_pml_ob1_free_rdma_resources(mca_pml_ob1_send_request_t* s
for(r = 0; r < sendreq->req_rdma_cnt; r++) {
mca_mpool_base_registration_t* reg = sendreq->req_rdma[r].btl_reg;
if( NULL != reg ) {
reg->mpool->mpool_release(reg->mpool, reg);
reg->mpool->mpool_deregister(reg->mpool, reg);
}
}
sendreq->req_rdma_cnt = 0;
@ -359,7 +358,6 @@ static inline int mca_pml_ob1_send_request_start(
sendreq->req_pipeline_depth = 0;
sendreq->req_bytes_delivered = 0;
sendreq->req_send_offset = 0;
sendreq->req_got_put_ack = false;
sendreq->req_pending = MCA_PML_OB1_SEND_PENDING_NONE;
sendreq->req_send.req_base.req_sequence = OPAL_THREAD_ADD32(
&comm->procs[sendreq->req_send.req_base.req_peer].send_sequence,1);

1
ompi/mca/rcache/rb/.ompi_ignore Обычный файл
Просмотреть файл

@ -0,0 +1 @@
quilt

Просмотреть файл

@ -35,34 +35,26 @@ typedef struct mca_rcache_base_module_t* (*mca_rcache_base_component_init_fn_t)(
typedef int (*mca_rcache_base_module_find_fn_t) (
struct mca_rcache_base_module_t* rcache,
void* addr,
size_t size,
ompi_pointer_array_t *regs,
uint32_t *cnt
);
struct mca_rcache_base_module_t* rcache, void* addr, size_t size,
mca_mpool_base_registration_t **reg);
typedef int (*mca_rcache_base_module_insert_fn_t)(
struct mca_rcache_base_module_t* rcache,
mca_mpool_base_registration_t* registration,
uint32_t flags
);
typedef int (*mca_rcache_base_module_find_all_fn_t)(
struct mca_rcache_base_module_t* rcache, void* addr, size_t size,
ompi_pointer_array_t *regs);
typedef int (*mca_rcache_base_module_delete_fn_t) (
struct mca_rcache_base_module_t* rcache,
mca_mpool_base_registration_t* registration,
uint32_t flags
);
typedef int (*mca_rcache_base_module_insert_fn_t)(
struct mca_rcache_base_module_t* rcache,
mca_mpool_base_registration_t* registration, size_t limit);
typedef int (*mca_rcache_base_module_delete_fn_t)(
struct mca_rcache_base_module_t* rcache,
mca_mpool_base_registration_t* registration);
/**
* finalize
*/
typedef void (*mca_rcache_base_module_finalize_fn_t)(
struct mca_rcache_base_module_t*
);
struct mca_rcache_base_module_t*);
/**
* rcache component descriptor. Contains component version information and
@ -83,19 +75,16 @@ typedef struct mca_rcache_base_component_1_0_0_t mca_rcache_base_component_t;
/**
* rcache module descriptor
*/
struct mca_rcache_base_module_t {
mca_rcache_base_component_t *rcache_component; /**< component struct */
mca_rcache_base_module_find_fn_t rcache_find;
mca_rcache_base_module_insert_fn_t rcache_insert;
mca_rcache_base_module_delete_fn_t rcache_delete;
mca_rcache_base_module_finalize_fn_t rcache_finalize;
struct mca_rcache_base_module_t {
mca_rcache_base_component_t *rcache_component; /**< component struct */
mca_rcache_base_module_find_fn_t rcache_find;
mca_rcache_base_module_find_all_fn_t rcache_find_all;
mca_rcache_base_module_insert_fn_t rcache_insert;
mca_rcache_base_module_delete_fn_t rcache_delete;
mca_rcache_base_module_finalize_fn_t rcache_finalize;
opal_mutex_t lock;
};
typedef struct mca_rcache_base_module_t mca_rcache_base_module_t;
};
typedef struct mca_rcache_base_module_t mca_rcache_base_module_t;
/**
* Macro for use in components that are of type rcache v1.0.0

Просмотреть файл

@ -25,9 +25,7 @@ sources = \
rcache_vma.h \
rcache_vma_component.c \
rcache_vma_tree.c \
rcache_vma_tree.h \
rcache_vma_mru.c \
rcache_vma_mru.h
rcache_vma_tree.h
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la

Просмотреть файл

@ -22,7 +22,6 @@
#include "ompi/mca/rcache/rcache.h"
#include "rcache_vma.h"
#include "rcache_vma_tree.h"
#include "rcache_vma_mru.h"
#include "opal/util/output.h"
#include "ompi/mca/mpool/base/base.h"
@ -34,26 +33,18 @@ extern unsigned int mca_mpool_base_page_size_log;
*/
void mca_rcache_vma_module_init( mca_rcache_vma_module_t* rcache ) {
rcache->base.rcache_find = mca_rcache_vma_find;
rcache->base.rcache_find_all = mca_rcache_vma_find_all;
rcache->base.rcache_insert = mca_rcache_vma_insert;
rcache->base.rcache_delete = mca_rcache_vma_delete;
rcache->base.rcache_finalize = mca_rcache_vma_finalize;
OBJ_CONSTRUCT(&rcache->base.lock, opal_mutex_t);
mca_rcache_vma_tree_init(rcache);
mca_rcache_vma_mru_init(rcache);
}
int mca_rcache_vma_find (
struct mca_rcache_base_module_t* rcache,
void* addr,
size_t size,
ompi_pointer_array_t* regs,
uint32_t *cnt
){
int rc = OMPI_SUCCESS;
mca_mpool_base_registration_t *reg;
int mca_rcache_vma_find(struct mca_rcache_base_module_t* rcache,
void* addr, size_t size, mca_mpool_base_registration_t **reg)
{
void* base_addr;
void* bound_addr;
@ -61,108 +52,56 @@ int mca_rcache_vma_find (
return OMPI_ERROR;
}
OPAL_THREAD_LOCK(&rcache->lock);
*cnt = 0;
base_addr = down_align_addr(addr, mca_mpool_base_page_size_log);
bound_addr = up_align_addr((void*) ((unsigned long) addr + size - 1), mca_mpool_base_page_size_log);
reg = mca_rcache_vma_tree_find((mca_rcache_vma_module_t*)rcache, base_addr,
*reg = mca_rcache_vma_tree_find((mca_rcache_vma_module_t*)rcache, base_addr,
bound_addr);
if (reg != NULL) {
ompi_pointer_array_add(regs, (void*) reg);
if(reg->flags & MCA_MPOOL_FLAGS_CACHE) {
rc = mca_rcache_vma_mru_touch((mca_rcache_vma_module_t*)rcache, reg);
if(OMPI_SUCCESS != rc) {
OPAL_THREAD_UNLOCK(&rcache->lock);
return OMPI_ERROR;
}
}
OPAL_THREAD_ADD32((int32_t*) &reg->ref_count, 1);
(*cnt)++;
assert(((void*)reg->bound) >= addr);
}
OPAL_THREAD_UNLOCK(&rcache->lock);
return OMPI_SUCCESS;
}
int mca_rcache_vma_insert (
struct mca_rcache_base_module_t* rcache,
mca_mpool_base_registration_t* reg,
uint32_t flags
) {
int mca_rcache_vma_find_all(struct mca_rcache_base_module_t* rcache,
void* addr, size_t size, ompi_pointer_array_t *regs)
{
void *base_addr, *bound_addr;
if(size == 0) {
return OMPI_ERROR;
}
base_addr = down_align_addr(addr, mca_mpool_base_page_size_log);
bound_addr = up_align_addr((void*) ((unsigned long) addr + size - 1), mca_mpool_base_page_size_log);
return mca_rcache_vma_tree_find_all((mca_rcache_vma_module_t*)rcache,
base_addr, bound_addr, regs);
}
int mca_rcache_vma_insert(struct mca_rcache_base_module_t* rcache,
mca_mpool_base_registration_t* reg, size_t limit)
{
size_t reg_size = reg->bound - reg->base + 1;
mca_mpool_base_registration_t* old_reg;
mca_rcache_vma_module_t *vma_rcache = (mca_rcache_vma_module_t*)rcache;
OPAL_THREAD_LOCK(&rcache->lock);
if((flags & MCA_MPOOL_FLAGS_CACHE) &&
reg_size > ((mca_rcache_vma_module_t*)rcache)->reg_max_mru_size)
{
OPAL_THREAD_UNLOCK(&rcache->lock);
/* if the registration is too big for the rcache,
don't cache it and reset the flags so the upper level
handles things appropriatly */
reg->flags = 0;
return OMPI_SUCCESS;
if(limit != 0 && reg_size > limit) {
/* return out of resources if request is bigger than cache size
* return temp out of resources otherwise */
return OMPI_ERR_OUT_OF_RESOURCE;
}
reg->flags = flags;
while(mca_rcache_vma_tree_insert((mca_rcache_vma_module_t*)rcache, reg) ==
OMPI_ERR_TEMP_OUT_OF_RESOURCE) {
/* call deregister - which removes the registration from
* the tree and mru list. memory will be deregistered when
* the reference count goes to zero.
*/
old_reg = (mca_mpool_base_registration_t*)opal_list_get_first(&((mca_rcache_vma_module_t*)rcache)->mru_list);
/* we need to retain first, because we only want the registration
removed from the tree and the mru */
old_reg->mpool->mpool_retain(old_reg->mpool, old_reg);
old_reg->mpool->mpool_deregister(old_reg->mpool, old_reg);
}
OPAL_THREAD_ADD32((int32_t*) &reg->ref_count, 1);
if(flags & MCA_MPOOL_FLAGS_CACHE) {
mca_rcache_vma_mru_insert((mca_rcache_vma_module_t*)rcache, reg);
OPAL_THREAD_ADD32((int32_t*)&reg->ref_count, 1);
}
OPAL_THREAD_UNLOCK(&rcache->lock);
return OMPI_SUCCESS;
return mca_rcache_vma_tree_insert(vma_rcache, reg, limit);
}
int mca_rcache_vma_delete (
struct mca_rcache_base_module_t* rcache,
mca_mpool_base_registration_t* reg,
uint32_t flags
) {
int rc = OMPI_SUCCESS;
assert(reg->ref_count >= 1);
OPAL_THREAD_LOCK(&rcache->lock);
if(flags & MCA_MPOOL_FLAGS_CACHE) {
assert(reg->ref_count >= 2);
OPAL_THREAD_ADD32((int32_t*)&reg->ref_count, -1);
rc = mca_rcache_vma_mru_delete((mca_rcache_vma_module_t*)rcache, reg);
}
if(OMPI_SUCCESS != rc) {
OPAL_THREAD_UNLOCK(&rcache->lock);
return rc;
}
reg->flags = 0;
OPAL_THREAD_ADD32((int32_t*)&reg->ref_count, -1);
rc = mca_rcache_vma_tree_delete((mca_rcache_vma_module_t*)rcache, reg );
OPAL_THREAD_UNLOCK(&rcache->lock);
return rc;
int mca_rcache_vma_delete(struct mca_rcache_base_module_t* rcache,
mca_mpool_base_registration_t* reg)
{
mca_rcache_vma_module_t *vma_rcache = (mca_rcache_vma_module_t*)rcache;
return mca_rcache_vma_tree_delete(vma_rcache, reg);
}
/**
* finalize
*/
void mca_rcache_vma_finalize(
struct mca_rcache_base_module_t* rcache
) {
void mca_rcache_vma_finalize(struct mca_rcache_base_module_t* rcache)
{
}

Просмотреть файл

@ -34,57 +34,41 @@ struct mca_rcache_vma_module_t {
mca_rcache_base_module_t base;
ompi_rb_tree_t rb_tree;
opal_list_t vma_list;
opal_list_t mru_list;
size_t reg_mru_len;
size_t reg_max_mru_size;
size_t reg_cur_mru_size;
size_t reg_cur_cache_size;
};
typedef struct mca_rcache_vma_module_t mca_rcache_vma_module_t;
struct mca_rcache_vma_component_t {
mca_rcache_base_component_t super;
size_t reg_mru_len;
size_t reg_max_mru_size;
}; typedef struct mca_rcache_vma_component_t mca_rcache_vma_component_t;
OMPI_DECLSPEC extern mca_rcache_vma_component_t mca_rcache_vma_component;
void mca_rcache_vma_module_init( mca_rcache_vma_module_t* rcache );
void mca_rcache_vma_module_init(mca_rcache_vma_module_t* rcache);
int mca_rcache_vma_find (
mca_rcache_base_module_t* rcache,
void* addr,
size_t size,
ompi_pointer_array_t* regs,
uint32_t *cnt
);
int mca_rcache_vma_find(mca_rcache_base_module_t* rcache, void* addr,
size_t size, mca_mpool_base_registration_t **reg);
int mca_rcache_vma_insert (
struct mca_rcache_base_module_t* rcache,
mca_mpool_base_registration_t* registration,
uint32_t flags
);
int mca_rcache_vma_find_all(mca_rcache_base_module_t* rcache, void* addr,
size_t size, ompi_pointer_array_t *regs);
int mca_rcache_vma_delete (
struct mca_rcache_base_module_t* rcache,
mca_mpool_base_registration_t* registration,
uint32_t flags
);
int mca_rcache_vma_insert(struct mca_rcache_base_module_t* rcache,
mca_mpool_base_registration_t* registration, size_t limit);
int mca_rcache_vma_delete(struct mca_rcache_base_module_t* rcache,
mca_mpool_base_registration_t* registration);
/**
* init/finalize
*/
void mca_rcache_vma_module_init( mca_rcache_vma_module_t* rcache );
void mca_rcache_vma_module_init(mca_rcache_vma_module_t *rcache);
void mca_rcache_vma_finalize(
struct mca_rcache_base_module_t*
);
void mca_rcache_vma_finalize(struct mca_rcache_base_module_t*);
#endif /* MCA_RCACHE_VMA_H */

Просмотреть файл

@ -44,22 +44,6 @@ mca_rcache_vma_component_t mca_rcache_vma_component = {
static int mca_rcache_vma_component_open(void)
{
mca_base_param_reg_int(&mca_rcache_vma_component.super.rcache_version,
"mru_len",
"The maximum size IN ENTRIES of the MRU (most recently used) rcache list",
false,
false,
256,
(int*)&(mca_rcache_vma_component.reg_mru_len));
mca_base_param_reg_int(&mca_rcache_vma_component.super.rcache_version,
"mru_size",
"The maximum size IN BYTES of the MRU (most recently used) rcache list",
false,
false,
1*1024*1024*1024, /* default to 1GB? */
(int*)&(mca_rcache_vma_component.reg_max_mru_size));
return OMPI_SUCCESS;
}
@ -68,8 +52,6 @@ mca_rcache_base_module_t* mca_rcache_vma_component_init(void) {
rcache = (mca_rcache_vma_module_t*) malloc(sizeof(mca_rcache_vma_module_t));
mca_rcache_vma_module_init(rcache);
rcache->reg_mru_len = mca_rcache_vma_component.reg_mru_len;
rcache->reg_max_mru_size = mca_rcache_vma_component.reg_max_mru_size;
return &rcache->base;
}

Просмотреть файл

@ -1,98 +0,0 @@
/**
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
*
* Copyright (c) 2006 Voltaire. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
* Description of the Registration Cache framework
*/
#include "opal/mca/mca.h"
#include "rcache_vma_mru.h"
#include "ompi/mca/mpool/mpool.h"
/*
* initialize the vma mru
*/
int mca_rcache_vma_mru_init(mca_rcache_vma_module_t* rcache){
OBJ_CONSTRUCT(&rcache->mru_list, opal_list_t);
return OMPI_SUCCESS;
}
/*
* insert an item in the vma mru
*/
int mca_rcache_vma_mru_insert(
mca_rcache_vma_module_t* rcache,
mca_mpool_base_registration_t* reg
) {
mca_mpool_base_registration_t* old_reg;
if(rcache->reg_mru_len <= rcache->mru_list.opal_list_length) {
/* call deregister - which removes the registration from
* the tree and mru list. memory will be deregistered when
* the reference count goes to zero.
*/
old_reg = (mca_mpool_base_registration_t*)
opal_list_get_first(&rcache->mru_list);
/* we need to retain first, because we only want the registration
removed from the tree and the mru */
old_reg->mpool->mpool_retain(old_reg->mpool, old_reg);
old_reg->mpool->mpool_deregister(old_reg->mpool, old_reg);
}
opal_list_append(&rcache->mru_list,(opal_list_item_t*) reg);
return OMPI_SUCCESS;
}
/*
* remove an item from the vma mru
*/
int mca_rcache_vma_mru_delete(
mca_rcache_vma_module_t* rcache,
mca_mpool_base_registration_t *reg
){
int rc;
if(NULL == opal_list_remove_item(&rcache->mru_list,
(opal_list_item_t*)reg)) {
rc = OMPI_ERROR;
} else {
rc = OMPI_SUCCESS;
}
return rc;
}
/*
* touch an item in the mru list
*/
int mca_rcache_vma_mru_touch(
mca_rcache_vma_module_t* rcache,
mca_mpool_base_registration_t* reg
){
int rc;
if(NULL == opal_list_remove_item(&rcache->mru_list,
(opal_list_item_t*)reg)) {
rc = OMPI_ERROR;
} else {
opal_list_append(&rcache->mru_list, (opal_list_item_t*)reg);
rc = OMPI_SUCCESS;
}
return rc;
}

Просмотреть файл

@ -1,62 +0,0 @@
/**
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
*
* Copyright (c) 2006 Voltaire. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
* Description of the Registration Cache framework
*/
#ifndef MCA_RCACHE_VMA_MRU_H
#define MCA_RCACHE_VMA_MRU_H
#include "opal/mca/mca.h"
#include "ompi/info/info.h"
#include "opal/class/opal_list.h"
#include "rcache_vma.h"
/*
* initialize the rb mru
*/
int mca_rcache_vma_mru_init(mca_rcache_vma_module_t* rcache);
/*
* insert an item in the rb mru
*/
int mca_rcache_vma_mru_insert(
mca_rcache_vma_module_t* rcache,
mca_mpool_base_registration_t* reg
);
/*
* remove an item from the rb mru
*/
int mca_rcache_vma_mru_delete(
mca_rcache_vma_module_t* rcache,
mca_mpool_base_registration_t* reg
);
int mca_rcache_vma_mru_touch(
mca_rcache_vma_module_t* rcache,
mca_mpool_base_registration_t* reg
);
#endif /* MCA_RCACHE_VMA_MRU_H */

Просмотреть файл

@ -26,6 +26,8 @@
#include "opal/mca/mca.h"
#include "rcache_vma_tree.h"
extern unsigned int mca_mpool_base_page_size;
OBJ_CLASS_INSTANCE(mca_rcache_vma_reg_list_item_t, opal_list_item_t, NULL, NULL);
static void mca_rcache_vma_construct(opal_object_t *object)
@ -130,8 +132,17 @@ static inline int mca_rcache_vma_compare_regs(
mca_mpool_base_registration_t *reg1,
mca_mpool_base_registration_t *reg2)
{
/* persisten registration are on top */
if((reg1->flags & MCA_MPOOL_FLAGS_PERSIST) &&
!(reg2->flags & MCA_MPOOL_FLAGS_PERSIST))
return 1;
if(!(reg1->flags & MCA_MPOOL_FLAGS_PERSIST) &&
(reg2->flags & MCA_MPOOL_FLAGS_PERSIST))
return -1;
if (reg1->bound != reg2->bound)
return (int)(reg1->bound - reg2->bound);
return (int)(reg1->bound - reg2->bound);
/* tie breaker */
return (int)((uintptr_t)reg1 - (uintptr_t)reg2);
@ -241,7 +252,7 @@ int mca_rcache_vma_tree_init(mca_rcache_vma_module_t* rcache)
{
OBJ_CONSTRUCT(&rcache->rb_tree, ompi_rb_tree_t);
OBJ_CONSTRUCT(&rcache->vma_list, opal_list_t);
rcache->reg_cur_mru_size = 0;
rcache->reg_cur_cache_size = 0;
return ompi_rb_tree_init(&rcache->rb_tree,
mca_rcache_vma_tree_node_compare);
}
@ -261,23 +272,81 @@ mca_mpool_base_registration_t *mca_rcache_vma_tree_find(
item = (mca_rcache_vma_reg_list_item_t*)opal_list_get_first(&vma->reg_list);
if(item->reg->bound >= bound)
return item->reg;
do {
if(item->reg->bound >= bound)
return item->reg;
if(!(item->reg->flags & MCA_MPOOL_FLAGS_PERSIST))
break;
item = (mca_rcache_vma_reg_list_item_t*)opal_list_get_next(item);
} while(item !=
(mca_rcache_vma_reg_list_item_t*)opal_list_get_end(&vma->reg_list));
return NULL;
}
static inline int mca_rcache_vma_can_insert(
mca_rcache_vma_module_t *vma_rcache,
uint32_t reg_flags,
size_t nbytes)
static inline bool is_reg_in_array(ompi_pointer_array_t *regs, void *p)
{
if(0 == vma_rcache->reg_max_mru_size ||
!(reg_flags & MCA_MPOOL_FLAGS_CACHE))
int i;
for(i = 0; i < ompi_pointer_array_get_size(regs); i++) {
if(ompi_pointer_array_get_item(regs, i) == p)
return true;
}
return false;
}
int mca_rcache_vma_tree_find_all(
mca_rcache_vma_module_t *vma_rcache, unsigned char *base,
unsigned char *bound, ompi_pointer_array_t *regs)
{
int cnt = 0;
if(opal_list_get_size(&vma_rcache->vma_list) == 0)
return cnt;
do {
mca_rcache_vma_t *vma;
opal_list_item_t *item;
vma = ompi_rb_tree_find_with(&vma_rcache->rb_tree, base,
mca_rcache_vma_tree_node_compare_closest);
if(NULL == vma) {
/* base is bigger than any registered memory */
base = bound + 1;
continue;
}
if(base < (unsigned char*)vma->start) {
base = (unsigned char*)vma->start;
continue;
}
for(item = opal_list_get_first(&vma->reg_list);
item != opal_list_get_end(&vma->reg_list);
item = opal_list_get_next(item)) {
mca_rcache_vma_reg_list_item_t *vma_item;
vma_item = (mca_rcache_vma_reg_list_item_t*)item;
if(is_reg_in_array(regs, (void*)vma_item->reg)) {
continue;
}
ompi_pointer_array_add(regs, (void*)vma_item->reg);
cnt++;
}
base = (unsigned char *)vma->end + 1;
} while(bound >= base);
return cnt;
}
static inline int mca_rcache_vma_can_insert(
mca_rcache_vma_module_t *vma_rcache, size_t nbytes, size_t limit)
{
if(0 == limit)
return 1;
if(vma_rcache->reg_cur_mru_size + nbytes <=
vma_rcache->reg_max_mru_size)
if(vma_rcache->reg_cur_cache_size + nbytes <= limit)
return 1;
return 0;
@ -287,13 +356,11 @@ static inline void mca_rcache_vma_update_byte_count(
mca_rcache_vma_module_t* vma_rcache,
size_t nbytes)
{
vma_rcache->reg_cur_mru_size += nbytes;
vma_rcache->reg_cur_cache_size += nbytes;
}
int mca_rcache_vma_tree_insert(
mca_rcache_vma_module_t* vma_rcache,
mca_mpool_base_registration_t* reg
)
int mca_rcache_vma_tree_insert(mca_rcache_vma_module_t* vma_rcache,
mca_mpool_base_registration_t* reg, size_t limit)
{
mca_rcache_vma_t *i;
uintptr_t begin = (uintptr_t)reg->base, end = (uintptr_t)reg->bound;
@ -309,7 +376,7 @@ int mca_rcache_vma_tree_insert(
if((mca_rcache_vma_t*)opal_list_get_end(&vma_rcache->vma_list) == i) {
vma = NULL;
if(mca_rcache_vma_can_insert(vma_rcache, reg->flags, end - begin + 1))
if(mca_rcache_vma_can_insert(vma_rcache, end - begin + 1, limit))
vma = mca_rcache_vma_new(vma_rcache, begin, end);
if(!vma)
@ -323,7 +390,7 @@ int mca_rcache_vma_tree_insert(
} else if(i->start > begin) {
uintptr_t tend = (i->start <= end)?(i->start - 1):end;
vma = NULL;
if(mca_rcache_vma_can_insert(vma_rcache, reg->flags, tend - begin + 1))
if(mca_rcache_vma_can_insert(vma_rcache, tend - begin + 1, limit))
vma = mca_rcache_vma_new(vma_rcache, begin, tend);
if(!vma)

Просмотреть файл

@ -71,14 +71,18 @@ mca_mpool_base_registration_t* mca_rcache_vma_tree_find(
unsigned char* base,
unsigned char *bound
);
/**
* Returns all registration that overlaps given memory region
*/
int mca_rcache_vma_tree_find_all(
mca_rcache_vma_module_t *vma_rcache, unsigned char *base,
unsigned char *bound, ompi_pointer_array_t *regs);
/*
* insert an item in the vma tree
*/
int mca_rcache_vma_tree_insert(
mca_rcache_vma_module_t* rcache,
mca_mpool_base_registration_t* reg
);
int mca_rcache_vma_tree_insert(mca_rcache_vma_module_t* rcache,
mca_mpool_base_registration_t* reg, size_t limit);
/*
* remove an item from the vma tree