Fix deadlock reported by Andrew Friedley:
What's happening is that we're holding openib_btl->eager_rdma_lock when we call mca_btl_openib_endpoint_send_eager_rdma() on btl_openib_endpoint.c:1227. This in turn calls mca_btl_openib_endpoint_send() on line 1179. Then, if the endpoint state isn't MCA_BTL_IB_CONNECTED or MCA_BTL_IB_FAILED, we call opal_progress(), where we eventually try to lock openib_btl->eager_rdma_lock at btl_openib_component.c:997. The fix removes this lock altogether. Instead we atomically set local RDMA pointer to prevent other threads to create rdma buffer for the same endpoint. And we increment eager_rdma_buffers_count atomically thus polling thread doesn't need lock around it. This commit was SVN r12369.
Этот коммит содержится в:
родитель
1b152dfe09
Коммит
d7375ec102
@ -201,10 +201,9 @@ struct mca_btl_openib_module_t {
|
|||||||
|
|
||||||
opal_list_t pending_frags[2]; /**< list of pending frags */
|
opal_list_t pending_frags[2]; /**< list of pending frags */
|
||||||
|
|
||||||
opal_mutex_t eager_rdma_lock;
|
|
||||||
size_t eager_rdma_frag_size; /**< length of eager frag */
|
size_t eager_rdma_frag_size; /**< length of eager frag */
|
||||||
orte_pointer_array_t *eager_rdma_buffers; /**< RDMA buffers to poll */
|
orte_pointer_array_t *eager_rdma_buffers; /**< RDMA buffers to poll */
|
||||||
uint32_t eager_rdma_buffers_count; /**< number of RDMA buffers */
|
volatile uint32_t eager_rdma_buffers_count; /**< number of RDMA buffers */
|
||||||
|
|
||||||
mca_btl_base_module_error_cb_fn_t error_cb; /**< error handler */
|
mca_btl_base_module_error_cb_fn_t error_cb; /**< error handler */
|
||||||
|
|
||||||
|
@ -689,7 +689,6 @@ btl_openib_component_init(int *num_btl_modules,
|
|||||||
mca_btl_openib_component.max_eager_rdma,
|
mca_btl_openib_component.max_eager_rdma,
|
||||||
0);
|
0);
|
||||||
openib_btl->eager_rdma_buffers_count = 0;
|
openib_btl->eager_rdma_buffers_count = 0;
|
||||||
OBJ_CONSTRUCT(&openib_btl->eager_rdma_lock, opal_mutex_t);
|
|
||||||
|
|
||||||
orte_pointer_array_init(&openib_btl->endpoints, 10, INT_MAX, 100);
|
orte_pointer_array_init(&openib_btl->endpoints, 10, INT_MAX, 100);
|
||||||
btls[i] = &openib_btl->super;
|
btls[i] = &openib_btl->super;
|
||||||
@ -994,9 +993,7 @@ static int btl_openib_component_progress(void)
|
|||||||
for(i = 0; i < mca_btl_openib_component.ib_num_btls; i++) {
|
for(i = 0; i < mca_btl_openib_component.ib_num_btls; i++) {
|
||||||
mca_btl_openib_module_t* openib_btl = &mca_btl_openib_component.openib_btls[i];
|
mca_btl_openib_module_t* openib_btl = &mca_btl_openib_component.openib_btls[i];
|
||||||
|
|
||||||
OPAL_THREAD_LOCK(&openib_btl->eager_rdma_lock);
|
|
||||||
c = openib_btl->eager_rdma_buffers_count;
|
c = openib_btl->eager_rdma_buffers_count;
|
||||||
OPAL_THREAD_UNLOCK(&openib_btl->eager_rdma_lock);
|
|
||||||
|
|
||||||
for(j = 0; j < c; j++) {
|
for(j = 0; j < c; j++) {
|
||||||
endpoint =
|
endpoint =
|
||||||
|
@ -1191,10 +1191,13 @@ void mca_btl_openib_endpoint_connect_eager_rdma(
|
|||||||
mca_btl_openib_module_t* openib_btl = endpoint->endpoint_btl;
|
mca_btl_openib_module_t* openib_btl = endpoint->endpoint_btl;
|
||||||
char *buf;
|
char *buf;
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
|
orte_std_cntr_t index;
|
||||||
|
|
||||||
OPAL_THREAD_LOCK(&endpoint->eager_rdma_local.lock);
|
/* Set local rdma pointer to 1 temporarily so other threads will not try
|
||||||
if (endpoint->eager_rdma_local.base.pval)
|
* to enter the function */
|
||||||
goto unlock_rdma_local;
|
if(!opal_atomic_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, NULL,
|
||||||
|
(void*)1))
|
||||||
|
return;
|
||||||
|
|
||||||
buf = openib_btl->super.btl_mpool->mpool_alloc(openib_btl->super.btl_mpool,
|
buf = openib_btl->super.btl_mpool->mpool_alloc(openib_btl->super.btl_mpool,
|
||||||
openib_btl->eager_rdma_frag_size *
|
openib_btl->eager_rdma_frag_size *
|
||||||
@ -1221,28 +1224,26 @@ void mca_btl_openib_endpoint_connect_eager_rdma(
|
|||||||
((mca_btl_openib_frag_t*)item)->type = MCA_BTL_OPENIB_FRAG_EAGER_RDMA;
|
((mca_btl_openib_frag_t*)item)->type = MCA_BTL_OPENIB_FRAG_EAGER_RDMA;
|
||||||
}
|
}
|
||||||
|
|
||||||
OPAL_THREAD_LOCK(&openib_btl->eager_rdma_lock);
|
/* set local rdma pointer to real value */
|
||||||
if(orte_pointer_array_add (&endpoint->eager_rdma_index,
|
opal_atomic_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, (void*)1,
|
||||||
openib_btl->eager_rdma_buffers, endpoint) < 0)
|
buf);
|
||||||
|
|
||||||
|
if(orte_pointer_array_add(&index, openib_btl->eager_rdma_buffers, endpoint)
|
||||||
|
!= ORTE_SUCCESS)
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
|
|
||||||
endpoint->eager_rdma_local.base.pval = buf;
|
if(mca_btl_openib_endpoint_send_eager_rdma(endpoint) == 0) {
|
||||||
openib_btl->eager_rdma_buffers_count++;
|
/* from this point progress function starts to poll new buffer */
|
||||||
if (mca_btl_openib_endpoint_send_eager_rdma(endpoint) == 0) {
|
OPAL_THREAD_ADD32(&openib_btl->eager_rdma_buffers_count, 1);
|
||||||
OPAL_THREAD_UNLOCK(&openib_btl->eager_rdma_lock);
|
|
||||||
OPAL_THREAD_UNLOCK(&endpoint->eager_rdma_local.lock);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
openib_btl->eager_rdma_buffers_count--;
|
orte_pointer_array_set_item(openib_btl->eager_rdma_buffers, index, NULL);
|
||||||
endpoint->eager_rdma_local.base.pval = NULL;
|
|
||||||
orte_pointer_array_set_item(openib_btl->eager_rdma_buffers,
|
|
||||||
endpoint->eager_rdma_index, NULL);
|
|
||||||
|
|
||||||
cleanup:
|
cleanup:
|
||||||
OPAL_THREAD_UNLOCK(&openib_btl->eager_rdma_lock);
|
|
||||||
openib_btl->super.btl_mpool->mpool_free(openib_btl->super.btl_mpool,
|
openib_btl->super.btl_mpool->mpool_free(openib_btl->super.btl_mpool,
|
||||||
buf, (mca_mpool_base_registration_t*)endpoint->eager_rdma_local.reg);
|
buf, (mca_mpool_base_registration_t*)endpoint->eager_rdma_local.reg);
|
||||||
unlock_rdma_local:
|
unlock_rdma_local:
|
||||||
OPAL_THREAD_UNLOCK(&endpoint->eager_rdma_local.lock);
|
/* set local rdma pointer back to zero. Will retry later */
|
||||||
|
opal_atomic_cmpset_ptr(&endpoint->eager_rdma_local.base.pval,
|
||||||
|
endpoint->eager_rdma_local.base.pval, NULL);
|
||||||
}
|
}
|
||||||
|
@ -154,7 +154,6 @@ struct mca_btl_base_endpoint_t {
|
|||||||
/**< info about remote RDMA buffer */
|
/**< info about remote RDMA buffer */
|
||||||
mca_btl_openib_eager_rdma_local_t eager_rdma_local;
|
mca_btl_openib_eager_rdma_local_t eager_rdma_local;
|
||||||
/**< info about local RDMA buffer */
|
/**< info about local RDMA buffer */
|
||||||
int32_t eager_rdma_index; /**< index into RDMA buffers pointer array */
|
|
||||||
uint32_t index; /**< index of the endpoint in endpoints array */
|
uint32_t index; /**< index of the endpoint in endpoints array */
|
||||||
struct mca_btl_openib_frag_t *credit_frag[2]; /**< frags for sending explicit high priority credits */
|
struct mca_btl_openib_frag_t *credit_frag[2]; /**< frags for sending explicit high priority credits */
|
||||||
};
|
};
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user