1
1

Separate frag lists for RDMA descriptors to two, one for src descriptors

and another for dst descriptors. This provide partial solution to OB1 protocol
deadlock problem. We can limit number of RDMA descriptors (by setting
btl_openib_free_list_max to something different from -1) and if we will be
lucky to hit this limit before we fail to register more memory the protocol
will not deadlock. When we had only one list for src/dst descriptors we
deadlocked when we reached max limit for the list.

This commit was SVN r13844.
Этот коммит содержится в:
Gleb Natapov 2007-02-28 13:43:38 +00:00
родитель 1b5d40e98a
Коммит 2b6cbd6299
5 изменённых файлов: 49 добавлений и 7 удалений

Просмотреть файл

@ -377,7 +377,9 @@ int mca_btl_openib_free(
{
mca_btl_openib_frag_t* frag = (mca_btl_openib_frag_t*)des;
if(MCA_BTL_OPENIB_FRAG_FRAG == frag->type && frag->registration != NULL) {
if(((MCA_BTL_OPENIB_SEND_FRAG_FRAG == frag->type) ||
(MCA_BTL_OPENIB_RECV_FRAG_FRAG == frag->type))
&& frag->registration != NULL) {
btl->btl_mpool->mpool_deregister(btl->btl_mpool,
(mca_mpool_base_registration_t*)
frag->registration);
@ -433,7 +435,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
if(ompi_convertor_need_buffers(convertor) == false && 0 == reserve) {
if(registration != NULL || max_data > btl->btl_max_send_size) {
MCA_BTL_IB_FRAG_ALLOC_FRAG(btl, frag, rc);
MCA_BTL_IB_FRAG_ALLOC_SEND_FRAG(btl, frag, rc);
if(NULL == frag) {
return NULL;
}
@ -548,7 +550,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
openib_btl = (mca_btl_openib_module_t*)btl;
MCA_BTL_IB_FRAG_ALLOC_FRAG(btl, frag, rc);
MCA_BTL_IB_FRAG_ALLOC_RECV_FRAG(btl, frag, rc);
if(NULL == frag) {
return NULL;
}

Просмотреть файл

@ -204,6 +204,7 @@ struct mca_btl_openib_module_t {
ompi_free_list_t recv_free_eager; /**< High priority free list of buffer descriptors */
ompi_free_list_t recv_free_max; /**< Low priority free list of buffer descriptors */
ompi_free_list_t recv_free_frag; /**< free list of frags only... used for pining memory */
ompi_free_list_t send_free_control; /**< frags for control massages */
opal_mutex_t ib_lock; /**< module level lock */

Просмотреть файл

@ -693,6 +693,7 @@ btl_openib_component_init(int *num_btl_modules,
OBJ_CONSTRUCT(&openib_btl->recv_free_eager, ompi_free_list_t);
OBJ_CONSTRUCT(&openib_btl->recv_free_max, ompi_free_list_t);
OBJ_CONSTRUCT(&openib_btl->recv_free_frag, ompi_free_list_t);
/* initialize the memory pool using the hca */
openib_btl->super.btl_mpool = openib_btl->hca->mpool;
@ -776,6 +777,14 @@ btl_openib_component_init(int *num_btl_modules,
mca_btl_openib_component.ib_free_list_inc,
NULL);
ompi_free_list_init(&openib_btl->recv_free_frag,
length,
OBJ_CLASS(mca_btl_openib_recv_frag_frag_t),
mca_btl_openib_component.ib_free_list_num,
mca_btl_openib_component.ib_free_list_max,
mca_btl_openib_component.ib_free_list_inc,
NULL);
orte_pointer_array_init(&openib_btl->eager_rdma_buffers,
mca_btl_openib_component.max_eager_rdma,
mca_btl_openib_component.max_eager_rdma,

Просмотреть файл

@ -109,11 +109,19 @@ static void mca_btl_openib_recv_frag_eager_constructor(mca_btl_openib_frag_t* fr
static void mca_btl_openib_send_frag_frag_constructor(mca_btl_openib_frag_t* frag)
{
frag->size = 0;
frag->type = MCA_BTL_OPENIB_FRAG_FRAG;
frag->type = MCA_BTL_OPENIB_SEND_FRAG_FRAG;
frag->registration = NULL;
mca_btl_openib_send_frag_common_constructor(frag);
}
static void mca_btl_openib_recv_frag_frag_constructor(mca_btl_openib_frag_t* frag)
{
frag->size = 0;
frag->type = MCA_BTL_OPENIB_RECV_FRAG_FRAG;
frag->registration = NULL;
mca_btl_openib_recv_frag_common_constructor(frag);
}
static void mca_btl_openib_send_frag_control_constructor(mca_btl_openib_frag_t* frag)
{
frag->size = sizeof(mca_btl_openib_eager_rdma_header_t);
@ -146,6 +154,12 @@ OBJ_CLASS_INSTANCE(
mca_btl_openib_send_frag_frag_constructor,
NULL);
OBJ_CLASS_INSTANCE(
mca_btl_openib_recv_frag_frag_t,
mca_btl_base_descriptor_t,
mca_btl_openib_recv_frag_frag_constructor,
NULL);
OBJ_CLASS_INSTANCE(
mca_btl_openib_recv_frag_eager_t,
mca_btl_base_descriptor_t,

Просмотреть файл

@ -152,7 +152,8 @@ do { \
enum mca_btl_openib_frag_type_t {
MCA_BTL_OPENIB_FRAG_EAGER,
MCA_BTL_OPENIB_FRAG_MAX,
MCA_BTL_OPENIB_FRAG_FRAG,
MCA_BTL_OPENIB_SEND_FRAG_FRAG,
MCA_BTL_OPENIB_RECV_FRAG_FRAG,
MCA_BTL_OPENIB_FRAG_EAGER_RDMA,
MCA_BTL_OPENIB_FRAG_CONTROL
};
@ -192,6 +193,10 @@ typedef struct mca_btl_openib_frag_t mca_btl_openib_send_frag_frag_t;
OBJ_CLASS_DECLARATION(mca_btl_openib_send_frag_frag_t);
typedef struct mca_btl_openib_frag_t mca_btl_openib_recv_frag_frag_t;
OBJ_CLASS_DECLARATION(mca_btl_openib_recv_frag_frag_t);
typedef struct mca_btl_openib_frag_t mca_btl_openib_recv_frag_eager_t;
OBJ_CLASS_DECLARATION(mca_btl_openib_recv_frag_eager_t);
@ -233,7 +238,7 @@ OBJ_CLASS_DECLARATION(mca_btl_openib_send_frag_control_t);
frag = (mca_btl_openib_frag_t*) item; \
}
#define MCA_BTL_IB_FRAG_ALLOC_FRAG(btl, frag, rc) \
#define MCA_BTL_IB_FRAG_ALLOC_SEND_FRAG(btl, frag, rc) \
{ \
\
ompi_free_list_item_t *item; \
@ -241,6 +246,14 @@ OBJ_CLASS_DECLARATION(mca_btl_openib_send_frag_control_t);
frag = (mca_btl_openib_frag_t*) item; \
}
#define MCA_BTL_IB_FRAG_ALLOC_RECV_FRAG(btl, frag, rc) \
{ \
\
ompi_free_list_item_t *item; \
OMPI_FREE_LIST_GET(&((mca_btl_openib_module_t*)btl)->recv_free_frag, item, rc); \
frag = (mca_btl_openib_frag_t*) item; \
}
#define MCA_BTL_IB_FRAG_RETURN(btl, frag) \
{ do { \
ompi_free_list_t* my_list = NULL; \
@ -255,7 +268,10 @@ OBJ_CLASS_DECLARATION(mca_btl_openib_send_frag_control_t);
case MCA_BTL_OPENIB_FRAG_CONTROL: \
my_list = &btl->send_free_control; \
break; \
case MCA_BTL_OPENIB_FRAG_FRAG: \
case MCA_BTL_OPENIB_RECV_FRAG_FRAG: \
my_list = &btl->recv_free_frag; \
break; \
case MCA_BTL_OPENIB_SEND_FRAG_FRAG: \
my_list = &btl->send_free_frag; \
break; \
} \