1
1

Add support to get alignemnt buffers from free_list_t. Convert openib BTL to new interface.

This commit was SVN r10899.
Этот коммит содержится в:
Gleb Natapov 2006-07-20 14:39:05 +00:00
родитель 7899057d4e
Коммит 383694c68d
4 изменённых файлов: 100 добавлений и 61 удалений

Просмотреть файл

@ -23,16 +23,27 @@
#include "opal/util/output.h" #include "opal/util/output.h"
#include "ompi/mca/mpool/mpool.h" #include "ompi/mca/mpool/mpool.h"
static inline size_t align_to(size_t val, size_t alignment);
static inline size_t align_to(size_t val, size_t alignment)
{
size_t mod;
if(0 == alignment)
return val;
mod = val % alignment;
if(mod)
val += (alignment - mod);
return val;
}
static void ompi_free_list_construct(ompi_free_list_t* fl); static void ompi_free_list_construct(ompi_free_list_t* fl);
static void ompi_free_list_destruct(ompi_free_list_t* fl); static void ompi_free_list_destruct(ompi_free_list_t* fl);
OBJ_CLASS_INSTANCE(ompi_free_list_t, opal_atomic_lifo_t,
opal_class_t ompi_free_list_t_class = { ompi_free_list_construct, ompi_free_list_destruct);
"ompi_free_list_t",
OBJ_CLASS(opal_atomic_lifo_t),
(opal_construct_t)ompi_free_list_construct,
(opal_destruct_t)ompi_free_list_destruct
};
struct ompi_free_list_memory_t { struct ompi_free_list_memory_t {
opal_list_item_t super; opal_list_item_t super;
@ -55,8 +66,10 @@ static void ompi_free_list_construct(ompi_free_list_t* fl)
fl->fl_num_allocated = 0; fl->fl_num_allocated = 0;
fl->fl_num_per_alloc = 0; fl->fl_num_per_alloc = 0;
fl->fl_num_waiting = 0; fl->fl_num_waiting = 0;
fl->fl_elem_size = 0; fl->fl_elem_size = sizeof(ompi_free_list_item_t);
fl->fl_elem_class = 0; fl->fl_elem_class = OBJ_CLASS(ompi_free_list_item_t);
fl->fl_header_space = 0;
fl->fl_alignment = 0;
fl->fl_mpool = 0; fl->fl_mpool = 0;
OBJ_CONSTRUCT(&(fl->fl_allocations), opal_list_t); OBJ_CONSTRUCT(&(fl->fl_allocations), opal_list_t);
} }
@ -95,36 +108,38 @@ static void ompi_free_list_destruct(ompi_free_list_t* fl)
OBJ_DESTRUCT(&fl->fl_lock); OBJ_DESTRUCT(&fl->fl_lock);
} }
int ompi_free_list_init( int ompi_free_list_init_ex(
ompi_free_list_t *flist, ompi_free_list_t *flist,
size_t elem_size, size_t elem_size,
size_t header_space,
size_t alignment,
opal_class_t* elem_class, opal_class_t* elem_class,
int num_elements_to_alloc, int num_elements_to_alloc,
int max_elements_to_alloc, int max_elements_to_alloc,
int num_elements_per_alloc, int num_elements_per_alloc,
mca_mpool_base_module_t* mpool) mca_mpool_base_module_t* mpool)
{ {
flist->fl_elem_size = elem_size; if(elem_size > flist->fl_elem_size)
if( elem_size % CACHE_LINE_SIZE ) { flist->fl_elem_size = elem_size;
flist->fl_elem_size += CACHE_LINE_SIZE - (elem_size % CACHE_LINE_SIZE); if(elem_class)
} flist->fl_elem_class = elem_class;
flist->fl_elem_class = elem_class;
flist->fl_max_to_alloc = max_elements_to_alloc; flist->fl_max_to_alloc = max_elements_to_alloc;
flist->fl_num_allocated = 0; flist->fl_num_allocated = 0;
flist->fl_num_per_alloc = num_elements_per_alloc; flist->fl_num_per_alloc = num_elements_per_alloc;
flist->fl_mpool = mpool; flist->fl_mpool = mpool;
flist->fl_header_space = header_space;
flist->fl_alignment = alignment;
flist->fl_elem_size = align_to(flist->fl_elem_size, flist->fl_alignment);
if(num_elements_to_alloc) if(num_elements_to_alloc)
return ompi_free_list_grow(flist, num_elements_to_alloc); return ompi_free_list_grow(flist, num_elements_to_alloc);
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
int ompi_free_list_grow(ompi_free_list_t* flist, size_t num_elements) int ompi_free_list_grow(ompi_free_list_t* flist, size_t num_elements)
{ {
unsigned char* ptr; unsigned char* ptr;
ompi_free_list_memory_t *alloc_ptr; ompi_free_list_memory_t *alloc_ptr;
size_t i; size_t i, alloc_size;
size_t mod;
mca_mpool_base_registration_t* user_out = NULL; mca_mpool_base_registration_t* user_out = NULL;
if (flist->fl_max_to_alloc > 0) if (flist->fl_max_to_alloc > 0)
@ -134,12 +149,16 @@ int ompi_free_list_grow(ompi_free_list_t* flist, size_t num_elements)
if (num_elements == 0) if (num_elements == 0)
return OMPI_ERR_TEMP_OUT_OF_RESOURCE; return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
alloc_size = num_elements * flist->fl_elem_size +
sizeof(ompi_free_list_memory_t) + flist->fl_header_space +
flist->fl_alignment;
if (NULL != flist->fl_mpool) if (NULL != flist->fl_mpool)
alloc_ptr = flist->fl_mpool->mpool_alloc(flist->fl_mpool, alloc_ptr = flist->fl_mpool->mpool_alloc(flist->fl_mpool,
(num_elements * flist->fl_elem_size) + CACHE_LINE_SIZE + sizeof(ompi_free_list_memory_t), alloc_size, 0, 0, &user_out);
0, 0, &user_out);
else else
alloc_ptr = malloc((num_elements * flist->fl_elem_size) + CACHE_LINE_SIZE + sizeof(ompi_free_list_memory_t)); alloc_ptr = malloc(alloc_size);
if(NULL == alloc_ptr) if(NULL == alloc_ptr)
return OMPI_ERR_TEMP_OUT_OF_RESOURCE; return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
@ -152,32 +171,19 @@ int ompi_free_list_grow(ompi_free_list_t* flist, size_t num_elements)
ptr = (unsigned char*) alloc_ptr + sizeof(ompi_free_list_memory_t); ptr = (unsigned char*) alloc_ptr + sizeof(ompi_free_list_memory_t);
mod = (unsigned long)ptr % CACHE_LINE_SIZE; ptr = (unsigned char*)(align_to((size_t)ptr + flist->fl_header_space,
if(mod != 0) { flist->fl_alignment) - flist->fl_header_space);
ptr += (CACHE_LINE_SIZE - mod);
for(i=0; i<num_elements; i++) {
ompi_free_list_item_t* item = (ompi_free_list_item_t*)ptr;
item->user_data = user_out;
OBJ_CONSTRUCT_INTERNAL(item, flist->fl_elem_class);
opal_atomic_lifo_push(&(flist->super), &(item->super));
ptr += flist->fl_elem_size;
} }
if (NULL != flist->fl_elem_class) {
for(i=0; i<num_elements; i++) {
ompi_free_list_item_t* item = (ompi_free_list_item_t*)ptr;
item->user_data = user_out;
OBJ_CONSTRUCT_INTERNAL(item, flist->fl_elem_class);
opal_atomic_lifo_push(&(flist->super), &(item->super));
ptr += flist->fl_elem_size;
}
} else {
for(i=0; i<num_elements; i++) {
ompi_free_list_item_t* item = (ompi_free_list_item_t*)ptr;
item->user_data = user_out;
OBJ_CONSTRUCT(&item->super, opal_list_item_t);
opal_atomic_lifo_push(&(flist->super), &(item->super));
ptr += flist->fl_elem_size;
}
}
flist->fl_num_allocated += num_elements; flist->fl_num_allocated += num_elements;
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
@ -201,11 +207,12 @@ int ompi_free_list_parse( ompi_free_list_t* list,
/* If the request will be the first on this memory region, it's easy. */ /* If the request will be the first on this memory region, it's easy. */
if( NULL == position->last_item ) { if( NULL == position->last_item ) {
unsigned long ptr = (unsigned long)position->last_memory; unsigned long ptr = (unsigned long)position->last_memory;
/* move it on the cache boundary */
if( ptr % CACHE_LINE_SIZE ) { ptr += sizeof(ompi_free_list_memory_t);
ptr = (ptr + CACHE_LINE_SIZE) & (CACHE_LINE_SIZE - 1);
} ptr = align_to(ptr + list->fl_header_space, list->fl_alignment) -
*return_item = (opal_list_item_t*)(ptr + sizeof(ompi_free_list_memory_t)); list->fl_header_space;
*return_item = (opal_list_item_t*)ptr;
return 0; return 0;
} }
/* else go to the next request */ /* else go to the next request */
@ -215,8 +222,10 @@ int ompi_free_list_parse( ompi_free_list_t* list,
/* otherwise go to the next one. Once there make sure we're still on the /* otherwise go to the next one. Once there make sure we're still on the
* memory fragment, otherwise go to the next fragment. * memory fragment, otherwise go to the next fragment.
*/ */
size_t frag_length = (list->fl_elem_size * list->fl_num_per_alloc + CACHE_LINE_SIZE size_t frag_length = (list->fl_elem_size * list->fl_num_per_alloc +
+ sizeof(ompi_free_list_memory_t)); + sizeof(ompi_free_list_memory_t) +
list->fl_header_space + list->fl_alignment);
if( position->last_item < (position->last_memory + frag_length) ) { if( position->last_item < (position->last_memory + frag_length) ) {
*return_item = (opal_list_item_t*)position->last_item; *return_item = (opal_list_item_t*)position->last_item;
return 0; return 0;

Просмотреть файл

@ -29,7 +29,7 @@
#if defined(c_plusplus) || defined(__cplusplus) #if defined(c_plusplus) || defined(__cplusplus)
extern "C" { extern "C" {
#endif #endif
OMPI_DECLSPEC extern opal_class_t ompi_free_list_t_class; OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_free_list_t);
struct mca_mem_pool_t; struct mca_mem_pool_t;
@ -41,6 +41,8 @@ struct ompi_free_list_t
size_t fl_num_per_alloc; size_t fl_num_per_alloc;
size_t fl_num_waiting; size_t fl_num_waiting;
size_t fl_elem_size; size_t fl_elem_size;
size_t fl_header_space;
size_t fl_alignment;
opal_class_t* fl_elem_class; opal_class_t* fl_elem_class;
struct mca_mpool_base_module_t* fl_mpool; struct mca_mpool_base_module_t* fl_mpool;
opal_mutex_t fl_lock; opal_mutex_t fl_lock;
@ -70,14 +72,31 @@ OBJ_CLASS_DECLARATION(ompi_free_list_item_t);
* @param mpool Optional memory pool for allocation.s * @param mpool Optional memory pool for allocation.s
*/ */
OMPI_DECLSPEC int ompi_free_list_init( OMPI_DECLSPEC int ompi_free_list_init_ex(
ompi_free_list_t *free_list,
size_t element_size,
size_t header_size,
size_t alignment,
opal_class_t* element_class,
int num_elements_to_alloc,
int max_elements_to_alloc,
int num_elements_per_alloc,
struct mca_mpool_base_module_t*);
static inline int ompi_free_list_init(
ompi_free_list_t *free_list, ompi_free_list_t *free_list,
size_t element_size, size_t element_size,
opal_class_t* element_class, opal_class_t* element_class,
int num_elements_to_alloc, int num_elements_to_alloc,
int max_elements_to_alloc, int max_elements_to_alloc,
int num_elements_per_alloc, int num_elements_per_alloc,
struct mca_mpool_base_module_t*); struct mca_mpool_base_module_t* mpool)
{
return ompi_free_list_init_ex(free_list, element_size, 0, CACHE_LINE_SIZE,
element_class, num_elements_to_alloc, max_elements_to_alloc,
num_elements_per_alloc, mpool);
}
OMPI_DECLSPEC int ompi_free_list_grow(ompi_free_list_t* flist, size_t num_elements); OMPI_DECLSPEC int ompi_free_list_grow(ompi_free_list_t* flist, size_t num_elements);

Просмотреть файл

@ -121,6 +121,7 @@ struct mca_btl_openib_component_t {
uint32_t max_eager_rdma; uint32_t max_eager_rdma;
uint32_t btls_per_lid; uint32_t btls_per_lid;
uint32_t max_lmc; uint32_t max_lmc;
uint32_t buffer_alignment;
}; typedef struct mca_btl_openib_component_t mca_btl_openib_component_t; }; typedef struct mca_btl_openib_component_t mca_btl_openib_component_t;

Просмотреть файл

@ -213,6 +213,8 @@ int mca_btl_openib_component_open(void)
1, (int*)&mca_btl_openib_component.btls_per_lid); 1, (int*)&mca_btl_openib_component.btls_per_lid);
mca_btl_openib_param_register_int("max_lmc", "Maximum LIDs to use for each port (0 - all available)", mca_btl_openib_param_register_int("max_lmc", "Maximum LIDs to use for each port (0 - all available)",
0, (int*)&mca_btl_openib_component.max_lmc); 0, (int*)&mca_btl_openib_component.max_lmc);
mca_btl_openib_param_register_int("buffer_alignment", "Prefered communication buffers alignmet for best performance",
64, (int*)&mca_btl_openib_component.buffer_alignment);
mca_btl_openib_param_register_int ("eager_limit", "eager send limit", mca_btl_openib_param_register_int ("eager_limit", "eager send limit",
(12*1024), &val); (12*1024), &val);
mca_btl_openib_module.super.btl_eager_limit = val; mca_btl_openib_module.super.btl_eager_limit = val;
@ -620,16 +622,20 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules,
openib_btl->eager_rdma_frag_size = openib_btl->eager_rdma_frag_size =
length & ~(2 * MCA_BTL_IB_FRAG_ALIGN - 1); length & ~(2 * MCA_BTL_IB_FRAG_ALIGN - 1);
ompi_free_list_init(&openib_btl->send_free_eager, ompi_free_list_init_ex(&openib_btl->send_free_eager,
length, length,
sizeof(mca_btl_openib_frag_t),
mca_btl_openib_component.buffer_alignment,
OBJ_CLASS(mca_btl_openib_send_frag_eager_t), OBJ_CLASS(mca_btl_openib_send_frag_eager_t),
mca_btl_openib_component.ib_free_list_num, mca_btl_openib_component.ib_free_list_num,
mca_btl_openib_component.ib_free_list_max, mca_btl_openib_component.ib_free_list_max,
mca_btl_openib_component.ib_free_list_inc, mca_btl_openib_component.ib_free_list_inc,
openib_btl->super.btl_mpool); openib_btl->super.btl_mpool);
ompi_free_list_init(&openib_btl->recv_free_eager, ompi_free_list_init_ex(&openib_btl->recv_free_eager,
length, length,
sizeof(mca_btl_openib_frag_t),
mca_btl_openib_component.buffer_alignment,
OBJ_CLASS(mca_btl_openib_recv_frag_eager_t), OBJ_CLASS(mca_btl_openib_recv_frag_eager_t),
mca_btl_openib_component.ib_free_list_num, mca_btl_openib_component.ib_free_list_num,
mca_btl_openib_component.ib_free_list_max, mca_btl_openib_component.ib_free_list_max,
@ -642,8 +648,10 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules,
2*MCA_BTL_IB_FRAG_ALIGN; 2*MCA_BTL_IB_FRAG_ALIGN;
ompi_free_list_init(&openib_btl->send_free_max, ompi_free_list_init_ex(&openib_btl->send_free_max,
length, length,
sizeof(mca_btl_openib_frag_t),
mca_btl_openib_component.buffer_alignment,
OBJ_CLASS(mca_btl_openib_send_frag_max_t), OBJ_CLASS(mca_btl_openib_send_frag_max_t),
mca_btl_openib_component.ib_free_list_num, mca_btl_openib_component.ib_free_list_num,
mca_btl_openib_component.ib_free_list_max, mca_btl_openib_component.ib_free_list_max,
@ -651,8 +659,10 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules,
openib_btl->super.btl_mpool); openib_btl->super.btl_mpool);
/* Initialize pool of receive fragments */ /* Initialize pool of receive fragments */
ompi_free_list_init (&openib_btl->recv_free_max, ompi_free_list_init_ex(&openib_btl->recv_free_max,
length, length,
sizeof(mca_btl_openib_frag_t),
mca_btl_openib_component.buffer_alignment,
OBJ_CLASS (mca_btl_openib_recv_frag_max_t), OBJ_CLASS (mca_btl_openib_recv_frag_max_t),
mca_btl_openib_component.ib_free_list_num, mca_btl_openib_component.ib_free_list_num,
mca_btl_openib_component.ib_free_list_max, mca_btl_openib_component.ib_free_list_max,