1
1

Create free lists of fragments per HCA, not per BTL. Saves memory in case of

multiple LMCs.

This commit was SVN r17082.
Этот коммит содержится в:
Gleb Natapov 2008-01-09 10:26:21 +00:00
родитель 5ce3213158
Коммит 621fa223c5
6 изменённых файлов: 292 добавлений и 254 удалений

Просмотреть файл

@ -486,7 +486,7 @@ ib_frag_alloc(mca_btl_openib_module_t *btl, size_t size, uint8_t order,
for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) {
if(mca_btl_openib_component.qp_infos[qp].size >= size) {
OMPI_FREE_LIST_GET(&btl->qps[qp].send_free, item, rc);
OMPI_FREE_LIST_GET(&btl->hca->qps[qp].send_free, item, rc);
if(item)
break;
}
@ -585,7 +585,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_alloc(
return ib_frag_alloc((mca_btl_openib_module_t*)btl, size, order, flags);
/* begin coalescing message */
MCA_BTL_IB_FRAG_ALLOC_COALESCED(obtl, cfrag);
cfrag = alloc_coalesced_frag();
cfrag->send_frag = sfrag;
/* fix up new coalescing header if this is the first coalesced frag */
@ -725,7 +725,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
if(ompi_convertor_need_buffers(convertor) == false && 0 == reserve) {
/* GMS bloody HACK! */
if(registration != NULL || max_data > btl->btl_max_send_size) {
MCA_BTL_IB_FRAG_ALLOC_SEND_USER(openib_btl, frag, rc);
frag = alloc_send_user_frag();
if(NULL == frag) {
return NULL;
}
@ -829,8 +829,8 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
void *buffer;
openib_btl = (mca_btl_openib_module_t*)btl;
MCA_BTL_IB_FRAG_ALLOC_RECV_USER(openib_btl, frag, rc);
frag = alloc_recv_user_frag();
if(NULL == frag) {
return NULL;
}
@ -939,7 +939,6 @@ static int mca_btl_finalize_hca(struct mca_btl_openib_hca_t *hca)
return OMPI_ERROR;
}
}
OBJ_DESTRUCT(&hca->hca_lock);
OBJ_RELEASE(hca);
return OMPI_SUCCESS;
}
@ -1001,15 +1000,8 @@ int mca_btl_openib_finalize(struct mca_btl_base_module_t* btl)
OBJ_DESTRUCT(&openib_btl->qps[qp].u.srq_qp.pending_frags[1]);
break;
}
/* Destroy free lists */
OBJ_DESTRUCT(&openib_btl->qps[qp].send_free);
OBJ_DESTRUCT(&openib_btl->qps[qp].recv_free);
}
OBJ_DESTRUCT(&openib_btl->send_free_control);
OBJ_DESTRUCT(&openib_btl->send_user_free);
OBJ_DESTRUCT(&openib_btl->recv_user_free);
/* Release pending lists */
if (!(--openib_btl->hca->btls)) {
/* All btls for the HCA were closed

Просмотреть файл

@ -78,6 +78,8 @@ struct mca_btl_openib_qp_info_t {
size_t size;
int32_t rd_num;
int32_t rd_low;
ompi_free_list_t send_free; /**< free lists of send buffer descriptors */
ompi_free_list_t recv_free; /**< free lists of receive buffer descriptors */
union {
mca_btl_openib_pp_qp_info_t pp_qp;
mca_btl_openib_srq_qp_info_t srq_qp;
@ -206,6 +208,12 @@ struct mca_btl_openib_component_t {
#endif
int rdma_qp;
int credits_qp; /* qp used for software flow control */
/**< free list of frags only; used for pining user memory */
ompi_free_list_t send_user_free;
/**< free list of frags only; used for pining user memory */
ompi_free_list_t recv_user_free;
/**< frags for coalesced massages */
ompi_free_list_t send_free_coalesced;
}; typedef struct mca_btl_openib_component_t mca_btl_openib_component_t;
OMPI_MODULE_DECLSPEC extern mca_btl_openib_component_t mca_btl_openib_component;
@ -245,6 +253,11 @@ typedef struct mca_btl_openib_port_info_t mca_btl_openib_port_info_t;
MCA_BTL_OPENIB_LID_HTON(hdr); \
} while (0)
typedef struct mca_btl_openib_hca_qp_t {
ompi_free_list_t send_free; /**< free lists of send buffer descriptors */
ompi_free_list_t recv_free; /**< free lists of receive buffer descriptors */
} mca_btl_openib_hca_qp_t;
struct mca_btl_base_endpoint_t;
typedef struct mca_btl_openib_hca_t {
@ -281,6 +294,9 @@ typedef struct mca_btl_openib_hca_t {
uint32_t non_eager_rdma_endpoints;
int32_t eager_rdma_buffers_count;
struct mca_btl_base_endpoint_t **eager_rdma_buffers;
/**< frags for control massages */
ompi_free_list_t send_free_control;
mca_btl_openib_hca_qp_t *qps;
} mca_btl_openib_hca_t;
OBJ_CLASS_DECLARATION(mca_btl_openib_hca_t);
@ -297,8 +313,6 @@ struct mca_btl_openib_module_srq_qp_t {
}; typedef struct mca_btl_openib_module_srq_qp_t mca_btl_openib_module_srq_qp_t;
struct mca_btl_openib_module_qp_t {
ompi_free_list_t send_free; /**< free lists of send buffer descriptors */
ompi_free_list_t recv_free; /**< free lists of receive buffer descriptors */
union {
mca_btl_openib_module_pp_qp_t pp_qp;
mca_btl_openib_module_srq_qp_t srq_qp;
@ -322,16 +336,6 @@ struct mca_btl_openib_module_t {
int32_t num_peers;
ompi_free_list_t send_user_free; /**< free list of frags only...
* used for pining user memory */
ompi_free_list_t recv_user_free; /**< free list of frags only...
* used for pining user memory */
ompi_free_list_t send_free_control; /**< frags for control massages */
ompi_free_list_t send_free_coalesced; /**< frags for coalesced massages */
opal_mutex_t ib_lock; /**< module level lock */
size_t ib_inline_max; /**< max size of inline send*/

Просмотреть файл

@ -508,8 +508,14 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_hca_t *hca,
static void hca_construct(mca_btl_openib_hca_t *hca)
{
int i;
hca->ib_dev = NULL;
hca->ib_dev_context = NULL;
hca->mpool = NULL;
#if OMPI_ENABLE_PROGRESS_THREADS == 1
hca->ib_channel = NULL;
#endif
hca->btls = 0;
hca->ib_cq[BTL_OPENIB_HP_CQ] = NULL;
hca->ib_cq[BTL_OPENIB_LP_CQ] = NULL;
@ -521,10 +527,23 @@ static void hca_construct(mca_btl_openib_hca_t *hca)
hca->pollme = true;
hca->eager_rdma_buffers_count = 0;
hca->eager_rdma_buffers = NULL;
#if HAVE_XRC
hca->xrc_fd = -1;
#endif
hca->qps = (mca_btl_openib_hca_qp_t*)calloc(mca_btl_openib_component.num_qps,
sizeof(mca_btl_openib_hca_qp_t));
OBJ_CONSTRUCT(&hca->hca_lock, opal_mutex_t);
for(i = 0; i < mca_btl_openib_component.num_qps; i++) {
OBJ_CONSTRUCT(&hca->qps[i].send_free, ompi_free_list_t);
OBJ_CONSTRUCT(&hca->qps[i].recv_free, ompi_free_list_t);
}
OBJ_CONSTRUCT(&hca->send_free_control, ompi_free_list_t);
}
static void hca_destruct(mca_btl_openib_hca_t *hca)
{
int i;
if(hca->eager_rdma_buffers) {
int i;
for(i = 0; i < hca->eager_rdma_buffers_count; i++)
@ -532,11 +551,134 @@ static void hca_destruct(mca_btl_openib_hca_t *hca)
OBJ_RELEASE(hca->eager_rdma_buffers[i]);
free(hca->eager_rdma_buffers);
}
OBJ_DESTRUCT(&hca->hca_lock);
for(i = 0; i < mca_btl_openib_component.num_qps; i++) {
OBJ_DESTRUCT(&hca->qps[i].send_free);
OBJ_DESTRUCT(&hca->qps[i].recv_free);
}
OBJ_DESTRUCT(&hca->send_free_control);
if(hca->qps)
free(hca->qps);
}
OBJ_CLASS_INSTANCE(mca_btl_openib_hca_t, opal_object_t, hca_construct,
hca_destruct);
static int prepare_hca_for_use(mca_btl_openib_hca_t *hca)
{
mca_btl_openib_frag_init_data_t *init_data;
int qp, length;
#if OMPI_HAVE_THREADS
if(mca_btl_openib_component.use_async_event_thread) {
if(0 == mca_btl_openib_component.async_thread) {
/* async thread is not yet started, so start it here */
if(start_async_event_thread() != OMPI_SUCCESS)
return OMPI_ERROR;
}
hca->got_fatal_event = false;
if (write(mca_btl_openib_component.async_pipe[1],
&hca->ib_dev_context->async_fd, sizeof(int))<0){
BTL_ERROR(("Failed to write to pipe [%d]",errno));
return OMPI_ERROR;
}
}
#if OMPI_ENABLE_PROGRESS_THREADS == 1
/* Prepare data for thread, but not starting it */
OBJ_CONSTRUCT(&hca->thread, opal_thread_t);
hca->thread.t_run = mca_btl_openib_progress_thread;
hca->thread.t_arg = hca;
hca->progress = false;
#endif
#endif
hca->endpoints = OBJ_NEW(opal_pointer_array_t);
opal_pointer_array_init(hca->endpoints, 10, INT_MAX, 10);
opal_pointer_array_add(&mca_btl_openib_component.hcas, hca);
if(mca_btl_openib_component.max_eager_rdma > 0 &&
mca_btl_openib_component.use_eager_rdma &&
hca->use_eager_rdma) {
hca->eager_rdma_buffers =
calloc(mca_btl_openib_component.max_eager_rdma * hca->btls,
sizeof(mca_btl_openib_endpoint_t*));
if(NULL == hca->eager_rdma_buffers) {
BTL_ERROR(("Memory allocation fails\n"));
return OMPI_ERR_OUT_OF_RESOURCE;
}
}
init_data = malloc(sizeof(mca_btl_openib_frag_init_data_t));
length = sizeof(mca_btl_openib_header_t) +
sizeof(mca_btl_openib_footer_t) +
sizeof(mca_btl_openib_eager_rdma_header_t);
init_data->order = MCA_BTL_NO_ORDER;
init_data->list = &hca->send_free_control;
if(OMPI_SUCCESS != ompi_free_list_init_ex_new(
&hca->send_free_control,
sizeof(mca_btl_openib_send_control_frag_t), CACHE_LINE_SIZE,
OBJ_CLASS(mca_btl_openib_send_control_frag_t), length,
mca_btl_openib_component.buffer_alignment,
mca_btl_openib_component.ib_free_list_num, -1,
mca_btl_openib_component.ib_free_list_inc,
hca->mpool, mca_btl_openib_frag_init,
init_data)) {
return OMPI_ERROR;
}
/* setup all the qps */
for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) {
init_data = malloc(sizeof(mca_btl_openib_frag_init_data_t));
/* Initialize pool of send fragments */
length = sizeof(mca_btl_openib_header_t) +
sizeof(mca_btl_openib_header_coalesced_t) +
sizeof(mca_btl_openib_control_header_t) +
sizeof(mca_btl_openib_footer_t) +
mca_btl_openib_component.qp_infos[qp].size;
init_data->order = qp;
init_data->list = &hca->qps[qp].send_free;
if(OMPI_SUCCESS != ompi_free_list_init_ex_new(init_data->list,
sizeof(mca_btl_openib_send_frag_t), CACHE_LINE_SIZE,
OBJ_CLASS(mca_btl_openib_send_frag_t), length,
mca_btl_openib_component.buffer_alignment,
mca_btl_openib_component.ib_free_list_num,
mca_btl_openib_component.ib_free_list_max,
mca_btl_openib_component.ib_free_list_inc,
hca->mpool, mca_btl_openib_frag_init,
init_data)) {
return OMPI_ERROR;
}
init_data = malloc(sizeof(mca_btl_openib_frag_init_data_t));
length = sizeof(mca_btl_openib_header_t) +
sizeof(mca_btl_openib_header_coalesced_t) +
sizeof(mca_btl_openib_control_header_t) +
sizeof(mca_btl_openib_footer_t) +
mca_btl_openib_component.qp_infos[qp].size;
init_data->order = qp;
init_data->list = &hca->qps[qp].recv_free;
if(OMPI_SUCCESS != ompi_free_list_init_ex_new(init_data->list,
sizeof(mca_btl_openib_recv_frag_t), CACHE_LINE_SIZE,
OBJ_CLASS(mca_btl_openib_recv_frag_t),
length, mca_btl_openib_component.buffer_alignment,
mca_btl_openib_component.ib_free_list_num,
mca_btl_openib_component.ib_free_list_max,
mca_btl_openib_component.ib_free_list_inc,
hca->mpool, mca_btl_openib_frag_init,
init_data)) {
return OMPI_ERROR;
}
}
mca_btl_openib_component.hcas_count++;
return OMPI_SUCCESS;
}
static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
{
struct mca_mpool_base_resources_t mpool_resources;
@ -555,24 +697,23 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
hca->ib_dev = ib_dev;
hca->ib_dev_context = ibv_open_device(ib_dev);
OBJ_CONSTRUCT(&hca->hca_lock, opal_mutex_t);
if(NULL == hca->ib_dev_context){
BTL_ERROR(("error obtaining device context for %s errno says %s\n",
ibv_get_device_name(ib_dev), strerror(errno)));
goto free_hca;
goto error;
}
if(ibv_query_device(hca->ib_dev_context, &hca->ib_dev_attr)){
BTL_ERROR(("error obtaining device attributes for %s errno says %s\n",
ibv_get_device_name(ib_dev), strerror(errno)));
goto close_hca;
goto error;
}
/* If mca_btl_if_include/exclude were specified, get usable ports */
allowed_ports = (int*)malloc(hca->ib_dev_attr.phys_port_cnt * sizeof(int));
port_cnt = get_port_list(hca, allowed_ports);
if(0 == port_cnt) {
ret = OMPI_SUCCESS;
goto close_hca;
goto error;
}
#if HAVE_XRC
/* if user configured to run with XRC qp and the device don't support it -
@ -586,7 +727,7 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
ibv_get_device_name(ib_dev),
orte_system_info.nodename);
ret = OMPI_SUCCESS;
goto close_hca;
goto error;
}
#endif
/* Load in vendor/part-specific HCA parameters. Note that even if
@ -597,7 +738,7 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
&values);
if (OMPI_SUCCESS != ret && OMPI_ERR_NOT_FOUND != ret) {
/* If we get a serious error, propagate it upwards */
goto close_hca;
goto error;
}
if (OMPI_ERR_NOT_FOUND == ret) {
/* If we didn't find a matching HCA in the INI files, output a
@ -616,7 +757,7 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
ret = ompi_btl_openib_ini_query(0, 0, &default_values);
if (OMPI_SUCCESS != ret && OMPI_ERR_NOT_FOUND != ret) {
/* If we get a serious error, propagate it upwards */
goto close_hca;
goto error;
}
/* If we did find values for this HCA (or in the defaults
@ -657,13 +798,13 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
if(NULL == hca->ib_pd){
BTL_ERROR(("error allocating pd for %s errno says %s\n",
ibv_get_device_name(ib_dev), strerror(errno)));
goto close_hca;
goto error;
}
if (MCA_BTL_XRC_ENABLED) {
if (OMPI_SUCCESS != mca_btl_openib_open_xrc_domain(hca)) {
BTL_ERROR(("XRC Internal error. Failed to open xrc domain"));
goto dealloc_pd;
goto error;
}
}
@ -677,7 +818,7 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
if(NULL == hca->mpool){
BTL_ERROR(("error creating IB memory pool for %s errno says %s\n",
ibv_get_device_name(ib_dev), strerror(errno)));
goto close_xrc_domain;
goto error;
}
#if OMPI_ENABLE_PROGRESS_THREADS == 1
@ -686,7 +827,7 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
BTL_ERROR(("error creating channel for %s errno says %s\n",
ibv_get_device_name(hca->ib_dev),
strerror(errno)));
goto mpool_destroy;
goto error;
}
#endif
@ -732,167 +873,46 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
/* If we made a BTL, we're done. Otherwise, fall through and
destroy everything */
if (hca->btls > 0) {
#if OMPI_HAVE_THREADS
if (mca_btl_openib_component.use_async_event_thread) {
if(0 == mca_btl_openib_component.async_thread) {
/* async thread is not yet started, so start it here */
if(start_async_event_thread() != OMPI_SUCCESS)
goto comp_channel;
}
hca->got_fatal_event = false;
if (write(mca_btl_openib_component.async_pipe[1],
&hca->ib_dev_context->async_fd, sizeof(int))<0){
BTL_ERROR(("Failed to write to pipe [%d]",errno));
goto comp_channel;
}
}
#if OMPI_ENABLE_PROGRESS_THREADS == 1
/* Prepare data for thread, but not starting it */
OBJ_CONSTRUCT(&hca->thread, opal_thread_t);
hca->thread.t_run = mca_btl_openib_progress_thread;
hca->thread.t_arg = hca;
hca->progress = false;
#endif
#endif
hca->endpoints = OBJ_NEW(opal_pointer_array_t);
opal_pointer_array_init(hca->endpoints, 10, INT_MAX, 10);
opal_pointer_array_add(&mca_btl_openib_component.hcas, hca);
if(mca_btl_openib_component.max_eager_rdma > 0 &&
mca_btl_openib_component.use_eager_rdma &&
hca->use_eager_rdma) {
hca->eager_rdma_buffers =
calloc(mca_btl_openib_component.max_eager_rdma * hca->btls,
sizeof(mca_btl_openib_endpoint_t*));
if(NULL == hca->eager_rdma_buffers) {
BTL_ERROR(("Memory allocation fails\n"));
return OMPI_ERR_OUT_OF_RESOURCE;
}
}
mca_btl_openib_component.hcas_count++;
return OMPI_SUCCESS;
ret = prepare_hca_for_use(hca);
if(OMPI_SUCCESS == ret)
return OMPI_SUCCESS;
}
#if OMPI_HAVE_THREADS
comp_channel:
#if OMPI_ENABLE_PROGRESS_THREADS == 1
ibv_destroy_comp_channel(hca->ib_channel);
mpool_destroy:
error:
#if defined(OMPI_HAVE_THREADS) && OMPI_ENABLE_PROGRESS_THREADS == 1
if(hca->ib_channel)
ibv_destroy_comp_channel(hca->ib_channel);
#endif
#endif
mca_mpool_base_module_destroy(hca->mpool);
close_xrc_domain:
if(hca->mpool)
mca_mpool_base_module_destroy(hca->mpool);
if (MCA_BTL_XRC_ENABLED) {
if (OMPI_SUCCESS != mca_btl_openib_close_xrc_domain(hca)) {
if(OMPI_SUCCESS != mca_btl_openib_close_xrc_domain(hca)) {
BTL_ERROR(("XRC Internal error. Failed to close xrc domain"));
}
}
dealloc_pd:
ibv_dealloc_pd(hca->ib_pd);
close_hca:
ibv_close_device(hca->ib_dev_context);
if(NULL != allowed_ports) {
free(allowed_ports);
}
free_hca:
if(hca->ib_pd)
ibv_dealloc_pd(hca->ib_pd);
if(hca->ib_dev_context)
ibv_close_device(hca->ib_dev_context);
OBJ_RELEASE(hca);
return ret;
}
static int finish_btl_init(mca_btl_openib_module_t *openib_btl)
{
mca_btl_openib_frag_init_data_t *init_data;
int qp, length;
int qp;
openib_btl->num_peers = 0;
/* Initialize module state */
OBJ_CONSTRUCT(&openib_btl->ib_lock, opal_mutex_t);
OBJ_CONSTRUCT(&openib_btl->send_free_control, ompi_free_list_t);
OBJ_CONSTRUCT(&openib_btl->send_free_coalesced, ompi_free_list_t);
OBJ_CONSTRUCT(&openib_btl->send_user_free, ompi_free_list_t);
OBJ_CONSTRUCT(&openib_btl->recv_user_free, ompi_free_list_t);
/* setup the qp structure */
openib_btl->qps = (mca_btl_openib_module_qp_t*)
malloc(sizeof(mca_btl_openib_module_qp_t) *
mca_btl_openib_component.num_qps);
calloc(mca_btl_openib_component.num_qps,
sizeof(mca_btl_openib_module_qp_t));
/* initialize the memory pool using the hca */
openib_btl->super.btl_mpool = openib_btl->hca->mpool;
init_data = malloc(sizeof(mca_btl_openib_frag_init_data_t));
init_data->order = mca_btl_openib_component.rdma_qp;
init_data->list = &openib_btl->send_user_free;
if(OMPI_SUCCESS != ompi_free_list_init_ex_new(&openib_btl->send_user_free,
sizeof(mca_btl_openib_put_frag_t), 2,
OBJ_CLASS(mca_btl_openib_put_frag_t),
0, 0,
mca_btl_openib_component.ib_free_list_num,
mca_btl_openib_component.ib_free_list_max,
mca_btl_openib_component.ib_free_list_inc,
NULL, mca_btl_openib_frag_init, init_data)) {
return OMPI_ERROR;
}
init_data = malloc(sizeof(mca_btl_openib_frag_init_data_t));
init_data->order = mca_btl_openib_component.rdma_qp;
init_data->list = &openib_btl->recv_user_free;
if(OMPI_SUCCESS != ompi_free_list_init_ex_new(&openib_btl->recv_user_free,
sizeof(mca_btl_openib_get_frag_t), 2,
OBJ_CLASS(mca_btl_openib_get_frag_t),
0, 0,
mca_btl_openib_component.ib_free_list_num,
mca_btl_openib_component.ib_free_list_max,
mca_btl_openib_component.ib_free_list_inc,
NULL, mca_btl_openib_frag_init, init_data)) {
return OMPI_ERROR;
}
init_data = malloc(sizeof(mca_btl_openib_frag_init_data_t));
length = sizeof(mca_btl_openib_header_t) +
sizeof(mca_btl_openib_footer_t) +
sizeof(mca_btl_openib_eager_rdma_header_t);
init_data->order = MCA_BTL_NO_ORDER;
init_data->list = &openib_btl->send_free_control;
if(OMPI_SUCCESS != ompi_free_list_init_ex_new(
&openib_btl->send_free_control,
sizeof(mca_btl_openib_send_control_frag_t), CACHE_LINE_SIZE,
OBJ_CLASS(mca_btl_openib_send_control_frag_t),
length,
mca_btl_openib_component.buffer_alignment,
mca_btl_openib_component.ib_free_list_num, -1,
mca_btl_openib_component.ib_free_list_inc,
openib_btl->super.btl_mpool, mca_btl_openib_frag_init,
init_data)) {
return OMPI_ERROR;
}
init_data = malloc(sizeof(mca_btl_openib_frag_init_data_t));
length = sizeof(mca_btl_openib_coalesced_frag_t);
init_data->list = &openib_btl->send_free_coalesced;
if(OMPI_SUCCESS != ompi_free_list_init_ex(&openib_btl->send_free_coalesced,
length, 2, OBJ_CLASS(mca_btl_openib_coalesced_frag_t),
mca_btl_openib_component.ib_free_list_num,
mca_btl_openib_component.ib_free_list_max,
mca_btl_openib_component.ib_free_list_inc,
NULL, mca_btl_openib_frag_init, init_data)) {
return OMPI_ERROR;
}
/* setup all the qps */
for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) {
OBJ_CONSTRUCT(&openib_btl->qps[qp].send_free, ompi_free_list_t);
OBJ_CONSTRUCT(&openib_btl->qps[qp].recv_free, ompi_free_list_t);
if(!BTL_OPENIB_QP_TYPE_PP(qp)) {
OBJ_CONSTRUCT(&openib_btl->qps[qp].u.srq_qp.pending_frags[0],
opal_list_t);
@ -901,53 +921,11 @@ static int finish_btl_init(mca_btl_openib_module_t *openib_btl)
openib_btl->qps[qp].u.srq_qp.sd_credits =
mca_btl_openib_component.qp_infos[qp].u.srq_qp.sd_max;
}
init_data = malloc(sizeof(mca_btl_openib_frag_init_data_t));
/* Initialize pool of send fragments */
length = sizeof(mca_btl_openib_header_t) +
sizeof(mca_btl_openib_header_coalesced_t) +
sizeof(mca_btl_openib_control_header_t) +
sizeof(mca_btl_openib_footer_t) +
mca_btl_openib_component.qp_infos[qp].size;
init_data->order = qp;
init_data->list = &openib_btl->qps[qp].send_free;
if(OMPI_SUCCESS != ompi_free_list_init_ex_new(init_data->list,
sizeof(mca_btl_openib_send_frag_t), CACHE_LINE_SIZE,
OBJ_CLASS(mca_btl_openib_send_frag_t),
length, mca_btl_openib_component.buffer_alignment,
mca_btl_openib_component.ib_free_list_num,
mca_btl_openib_component.ib_free_list_max,
mca_btl_openib_component.ib_free_list_inc,
openib_btl->super.btl_mpool, mca_btl_openib_frag_init,
init_data)) {
return OMPI_ERROR;
}
init_data = malloc(sizeof(mca_btl_openib_frag_init_data_t));
length = sizeof(mca_btl_openib_header_t) +
sizeof(mca_btl_openib_header_coalesced_t) +
sizeof(mca_btl_openib_control_header_t) +
sizeof(mca_btl_openib_footer_t) +
mca_btl_openib_component.qp_infos[qp].size;
init_data->order = qp;
init_data->list = &openib_btl->qps[qp].recv_free;
if(OMPI_SUCCESS != ompi_free_list_init_ex_new(init_data->list,
sizeof(mca_btl_openib_recv_frag_t), CACHE_LINE_SIZE,
OBJ_CLASS(mca_btl_openib_recv_frag_t),
length, mca_btl_openib_component.buffer_alignment,
mca_btl_openib_component.ib_free_list_num,
mca_btl_openib_component.ib_free_list_max,
mca_btl_openib_component.ib_free_list_inc,
openib_btl->super.btl_mpool, mca_btl_openib_frag_init,
init_data)) {
return OMPI_ERROR;
}
}
/* initialize the memory pool using the hca */
openib_btl->super.btl_mpool = openib_btl->hca->mpool;
openib_btl->eager_rdma_channels = 0;
openib_btl->eager_rdma_frag_size = OPAL_ALIGN(
@ -976,7 +954,7 @@ btl_openib_component_init(int *num_btl_modules,
{
struct ibv_device **ib_devs;
mca_btl_base_module_t** btls;
int i, ret, num_devs;
int i, ret, num_devs, length;
opal_list_t btl_list;
mca_btl_openib_module_t * openib_btl;
mca_btl_base_selected_module_t* ib_selected;
@ -986,6 +964,7 @@ btl_openib_component_init(int *num_btl_modules,
struct ibv_device* ib_dev;
#endif
unsigned short seedv[3];
mca_btl_openib_frag_init_data_t *init_data;
/* initialization */
*num_btl_modules = 0;
@ -1006,6 +985,59 @@ btl_openib_component_init(int *num_btl_modules,
opal_hash_table_t);
}
OBJ_CONSTRUCT(&mca_btl_openib_component.send_free_coalesced, ompi_free_list_t);
OBJ_CONSTRUCT(&mca_btl_openib_component.send_user_free, ompi_free_list_t);
OBJ_CONSTRUCT(&mca_btl_openib_component.recv_user_free, ompi_free_list_t);
init_data = malloc(sizeof(mca_btl_openib_frag_init_data_t));
init_data->order = mca_btl_openib_component.rdma_qp;
init_data->list = &mca_btl_openib_component.send_user_free;
if(OMPI_SUCCESS != ompi_free_list_init_ex_new(
&mca_btl_openib_component.send_user_free,
sizeof(mca_btl_openib_put_frag_t), 2,
OBJ_CLASS(mca_btl_openib_put_frag_t),
0, 0,
mca_btl_openib_component.ib_free_list_num,
mca_btl_openib_component.ib_free_list_max,
mca_btl_openib_component.ib_free_list_inc,
NULL, mca_btl_openib_frag_init, init_data)) {
goto no_btls;
}
init_data = malloc(sizeof(mca_btl_openib_frag_init_data_t));
init_data->order = mca_btl_openib_component.rdma_qp;
init_data->list = &mca_btl_openib_component.recv_user_free;
if(OMPI_SUCCESS != ompi_free_list_init_ex_new(
&mca_btl_openib_component.recv_user_free,
sizeof(mca_btl_openib_get_frag_t), 2,
OBJ_CLASS(mca_btl_openib_get_frag_t),
0, 0,
mca_btl_openib_component.ib_free_list_num,
mca_btl_openib_component.ib_free_list_max,
mca_btl_openib_component.ib_free_list_inc,
NULL, mca_btl_openib_frag_init, init_data)) {
goto no_btls;
}
init_data = malloc(sizeof(mca_btl_openib_frag_init_data_t));
length = sizeof(mca_btl_openib_coalesced_frag_t);
init_data->list = &mca_btl_openib_component.send_free_coalesced;
if(OMPI_SUCCESS != ompi_free_list_init_ex(
&mca_btl_openib_component.send_free_coalesced,
length, 2, OBJ_CLASS(mca_btl_openib_coalesced_frag_t),
mca_btl_openib_component.ib_free_list_num,
mca_btl_openib_component.ib_free_list_max,
mca_btl_openib_component.ib_free_list_inc,
NULL, mca_btl_openib_frag_init, init_data)) {
goto no_btls;
}
/* If we want fork support, try to enable it */
#ifdef HAVE_IBV_FORK_INIT
if (0 != mca_btl_openib_component.want_fork_support) {
@ -1925,7 +1957,7 @@ int mca_btl_openib_post_srr(mca_btl_openib_module_t* openib_btl, const int qp)
for(i = 0; i < num_post; i++) {
ompi_free_list_item_t* item;
OMPI_FREE_LIST_WAIT(&openib_btl->qps[qp].recv_free, item, rc);
OMPI_FREE_LIST_WAIT(&openib_btl->hca->qps[qp].recv_free, item, rc);
to_base_frag(item)->base.order = qp;
to_com_frag(item)->endpoint = NULL;
if(NULL == wr)

Просмотреть файл

@ -633,14 +633,14 @@ void mca_btl_openib_endpoint_send_credits(mca_btl_openib_endpoint_t* endpoint,
mca_btl_openib_module_t* openib_btl = endpoint->endpoint_btl;
mca_btl_openib_send_control_frag_t* frag;
mca_btl_openib_rdma_credits_header_t *credits_hdr;
int ib_rc;
int rc;
bool do_rdma = false;
int32_t cm_return;
frag = endpoint->qps[qp].credit_frag;
if(OPAL_UNLIKELY(NULL == frag)) {
MCA_BTL_IB_FRAG_ALLOC_CREDIT_WAIT(openib_btl, frag, ib_rc);
frag = alloc_credit_frag(openib_btl);
frag->qp_idx = qp;
endpoint->qps[qp].credit_frag = frag;
/* set those once and forever */
@ -686,9 +686,7 @@ void mca_btl_openib_endpoint_send_credits(mca_btl_openib_endpoint_t* endpoint,
if(endpoint->nbo)
BTL_OPENIB_RDMA_CREDITS_HEADER_HTON(*credits_hdr);
ib_rc = post_send(endpoint, frag, do_rdma);
if(0 == ib_rc)
if((rc = post_send(endpoint, frag, do_rdma)) == 0)
return;
if(endpoint->nbo) {
@ -705,7 +703,7 @@ void mca_btl_openib_endpoint_send_credits(mca_btl_openib_endpoint_t* endpoint,
else
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_sent, -1);
BTL_ERROR(("error posting send request errno %d says %s", ib_rc,
BTL_ERROR(("error posting send request errno %d says %s", rc,
strerror(errno)));
}
@ -728,7 +726,7 @@ static int mca_btl_openib_endpoint_send_eager_rdma(
mca_btl_openib_send_control_frag_t* frag;
int rc;
MCA_BTL_IB_FRAG_ALLOC_CREDIT_WAIT(openib_btl, frag, rc);
frag = alloc_credit_frag(openib_btl);
if(NULL == frag) {
return -1;
}

Просмотреть файл

@ -249,7 +249,7 @@ static inline int post_recvs(mca_btl_base_endpoint_t *ep, const int qp,
for(i = 0; i < num_post; i++) {
int rc;
ompi_free_list_item_t* item;
OMPI_FREE_LIST_WAIT(&openib_btl->qps[qp].recv_free, item, rc);
OMPI_FREE_LIST_WAIT(&openib_btl->hca->qps[qp].recv_free, item, rc);
to_base_frag(item)->base.order = qp;
to_com_frag(item)->endpoint = ep;
if(NULL == wr)

Просмотреть файл

@ -266,12 +266,16 @@ OBJ_CLASS_DECLARATION(mca_btl_openib_coalesced_frag_t);
*
*/
#define MCA_BTL_IB_FRAG_ALLOC_CREDIT_WAIT(btl, frag, rc) \
do { \
ompi_free_list_item_t *item; \
OMPI_FREE_LIST_WAIT(&(btl)->send_free_control, item, rc); \
frag = to_send_control_frag(item); \
} while(0)
static inline mca_btl_openib_send_control_frag_t *
alloc_credit_frag(mca_btl_openib_module_t *btl)
{
int rc;
ompi_free_list_item_t *item;
OMPI_FREE_LIST_WAIT(&btl->hca->send_free_control, item, rc);
return to_send_control_frag(item);
}
static inline uint8_t frag_size_to_order(mca_btl_openib_module_t* btl,
size_t size)
@ -284,27 +288,35 @@ static inline uint8_t frag_size_to_order(mca_btl_openib_module_t* btl,
return MCA_BTL_NO_ORDER;
}
#define MCA_BTL_IB_FRAG_ALLOC_SEND_USER(btl, frag, rc) \
do { \
ompi_free_list_item_t *item; \
OMPI_FREE_LIST_GET(&(btl)->send_user_free, item, rc); \
frag = to_com_frag(item); \
} while(0)
static inline mca_btl_openib_com_frag_t *alloc_send_user_frag(void)
{
int rc;
ompi_free_list_item_t *item;
#define MCA_BTL_IB_FRAG_ALLOC_RECV_USER(btl, frag, rc) \
do { \
ompi_free_list_item_t *item; \
OMPI_FREE_LIST_GET(&(btl)->recv_user_free, item, rc); \
frag = to_com_frag(item); \
} while(0)
OMPI_FREE_LIST_GET(&mca_btl_openib_component.send_user_free, item, rc);
#define MCA_BTL_IB_FRAG_ALLOC_COALESCED(btl, frag) \
do { \
int ign_rc; \
ompi_free_list_item_t *item; \
OMPI_FREE_LIST_GET(&(btl)->send_free_coalesced, item, ign_rc) \
frag = to_coalesced_frag(item); \
} while(0)
return to_com_frag(item);
}
static inline mca_btl_openib_com_frag_t *alloc_recv_user_frag(void)
{
int rc;
ompi_free_list_item_t *item;
OMPI_FREE_LIST_GET(&mca_btl_openib_component.recv_user_free, item, rc);
return to_com_frag(item);
}
static inline mca_btl_openib_coalesced_frag_t *alloc_coalesced_frag(void)
{
int rc;
ompi_free_list_item_t *item;
OMPI_FREE_LIST_GET(&mca_btl_openib_component.send_free_coalesced, item, rc);
return to_coalesced_frag(item);
}
#define MCA_BTL_IB_FRAG_RETURN(frag) \
do { \