1
1

eager_rdma_buffers update is not atomic. A buffer is added to the array and if

something is going wrong down in the code it is removed from the array. So add
mutex to prevent concurrent access to the array from different threads.

This commit was SVN r12385.
Этот коммит содержится в:
Gleb Natapov 2006-11-01 07:27:32 +00:00
родитель 233dac8bba
Коммит aac695a51f
3 изменённых файлов: 10 добавлений и 1 удалений

Просмотреть файл

@ -203,6 +203,7 @@ struct mca_btl_openib_module_t {
size_t eager_rdma_frag_size; /**< length of eager frag */
orte_pointer_array_t *eager_rdma_buffers; /**< RDMA buffers to poll */
opal_mutex_t eager_rdma_buffres_lock; /**< should be held while adding new rdma buffer */
volatile int32_t eager_rdma_buffers_count; /**< number of RDMA buffers */
mca_btl_base_module_error_cb_fn_t error_cb; /**< error handler */

Просмотреть файл

@ -689,7 +689,8 @@ btl_openib_component_init(int *num_btl_modules,
mca_btl_openib_component.max_eager_rdma,
0);
openib_btl->eager_rdma_buffers_count = 0;
OBJ_CONSTRUCT(&openib_btl->eager_rdma_buffres_lock, opal_mutex_t);
orte_pointer_array_init(&openib_btl->endpoints, 10, INT_MAX, 100);
btls[i] = &openib_btl->super;
}

Просмотреть файл

@ -1228,6 +1228,11 @@ void mca_btl_openib_endpoint_connect_eager_rdma(
opal_atomic_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, (void*)1,
buf);
/* lock is held during eager_rdma_buffers update in order to prevent hole
* in the array if mca_btl_openib_endpoint_send_eager_rdma() fails and
* another thread was creating eager RDMA buffer for another endpoint and
* allocated array index bigger then ours */
OPAL_THREAD_LOCK(&openib_btl->eager_rdma_buffres_lock);
if(orte_pointer_array_add(&index, openib_btl->eager_rdma_buffers, endpoint)
!= ORTE_SUCCESS)
goto cleanup;
@ -1235,10 +1240,12 @@ void mca_btl_openib_endpoint_connect_eager_rdma(
if(mca_btl_openib_endpoint_send_eager_rdma(endpoint) == 0) {
/* from this point progress function starts to poll new buffer */
OPAL_THREAD_ADD32(&openib_btl->eager_rdma_buffers_count, 1);
OPAL_THREAD_UNLOCK(&openib_btl->eager_rdma_buffres_lock);
return;
}
orte_pointer_array_set_item(openib_btl->eager_rdma_buffers, index, NULL);
OPAL_THREAD_UNLOCK(&openib_btl->eager_rdma_buffres_lock);
cleanup:
openib_btl->super.btl_mpool->mpool_free(openib_btl->super.btl_mpool,
buf, (mca_mpool_base_registration_t*)endpoint->eager_rdma_local.reg);