1
1

Consolidate receive buffers prepost code for HP/LP QPs.

This commit was SVN r11552.
Этот коммит содержится в:
Gleb Natapov 2006-09-07 13:05:41 +00:00
родитель e7650ff48a
Коммит d0caffa0aa
8 изменённых файлов: 207 добавлений и 259 удалений

Просмотреть файл

@ -148,12 +148,14 @@ int mca_btl_openib_size_queues( struct mca_btl_openib_module_t* openib_btl, size
if(!first_time) {
struct ibv_srq_attr srq_attr;
srq_attr.max_wr = openib_btl->rd_num;
rc = ibv_modify_srq( openib_btl->srq_hp, &srq_attr, IBV_SRQ_MAX_WR);
rc = ibv_modify_srq(openib_btl->srq[BTL_OPENIB_HP_QP],
&srq_attr, IBV_SRQ_MAX_WR);
if(rc) {
BTL_ERROR(("cannot resize high priority shared receive queue, error: %d", rc));
return OMPI_ERROR;
}
rc = ibv_modify_srq(openib_btl->srq_lp, &srq_attr, IBV_SRQ_MAX_WR);
rc = ibv_modify_srq(openib_btl->srq[BTL_OPENIB_LP_QP],
&srq_attr, IBV_SRQ_MAX_WR);
if(rc) {
BTL_ERROR(("cannot resize low priority shared receive queue, error: %d", rc));
return OMPI_ERROR;
@ -673,7 +675,7 @@ int mca_btl_openib_put( mca_btl_base_module_t* btl,
frag->sg_entry.addr = (unsigned long) frag->base.des_src->seg_addr.pval;
frag->sg_entry.length = frag->base.des_src->seg_len;
if(ibv_post_send(endpoint->lcl_qp_lp,
if(ibv_post_send(endpoint->lcl_qp[BTL_OPENIB_LP_QP],
&frag->wr_desc.sr_desc,
&bad_wr)){
rc = OMPI_ERROR;
@ -682,11 +684,11 @@ int mca_btl_openib_put( mca_btl_base_module_t* btl,
}
if(mca_btl_openib_component.use_srq) {
MCA_BTL_OPENIB_POST_SRR_HIGH(openib_btl, 1);
MCA_BTL_OPENIB_POST_SRR_LOW(openib_btl, 1);
mca_btl_openib_post_srr(openib_btl, 1, BTL_OPENIB_HP_QP);
mca_btl_openib_post_srr(openib_btl, 1, BTL_OPENIB_LP_QP);
} else {
MCA_BTL_OPENIB_ENDPOINT_POST_RR_HIGH(endpoint, 1);
MCA_BTL_OPENIB_ENDPOINT_POST_RR_LOW(endpoint, 1);
btl_openib_endpoint_post_rr(endpoint, 1, BTL_OPENIB_HP_QP);
btl_openib_endpoint_post_rr(endpoint, 1, BTL_OPENIB_LP_QP);
}
}
return rc;
@ -735,7 +737,7 @@ int mca_btl_openib_get( mca_btl_base_module_t* btl,
frag->sg_entry.addr = (unsigned long) frag->base.des_dst->seg_addr.pval;
frag->sg_entry.length = frag->base.des_dst->seg_len;
if(ibv_post_send(endpoint->lcl_qp_lp,
if(ibv_post_send(endpoint->lcl_qp[BTL_OPENIB_LP_QP],
&frag->wr_desc.sr_desc,
&bad_wr)){
BTL_ERROR(("error posting send request errno (%d) says %s", errno, strerror(errno)));
@ -745,11 +747,11 @@ int mca_btl_openib_get( mca_btl_base_module_t* btl,
}
if(mca_btl_openib_component.use_srq) {
MCA_BTL_OPENIB_POST_SRR_HIGH(openib_btl, 1);
MCA_BTL_OPENIB_POST_SRR_LOW(openib_btl, 1);
mca_btl_openib_post_srr(openib_btl, 1, BTL_OPENIB_HP_QP);
mca_btl_openib_post_srr(openib_btl, 1, BTL_OPENIB_LP_QP);
} else {
MCA_BTL_OPENIB_ENDPOINT_POST_RR_HIGH(endpoint, 1);
MCA_BTL_OPENIB_ENDPOINT_POST_RR_LOW(endpoint, 1);
btl_openib_endpoint_post_rr(endpoint, 1, BTL_OPENIB_HP_QP);
btl_openib_endpoint_post_rr(endpoint, 1, BTL_OPENIB_LP_QP);
}
}
return rc;
@ -770,25 +772,27 @@ int mca_btl_openib_create_cq_srq(mca_btl_openib_module_t *openib_btl)
attr.attr.max_wr = mca_btl_openib_component.srq_rd_max;
attr.attr.max_sge = mca_btl_openib_component.ib_sg_list_size;
openib_btl->srd_posted_hp = 0;
openib_btl->srd_posted_lp = 0;
openib_btl->srd_posted[BTL_OPENIB_HP_QP] = 0;
openib_btl->srd_posted[BTL_OPENIB_LP_QP] = 0;
openib_btl->srq_hp = ibv_create_srq(openib_btl->hca->ib_pd, &attr);
if(NULL == openib_btl->srq_hp) {
openib_btl->srq[BTL_OPENIB_HP_QP] =
ibv_create_srq(openib_btl->hca->ib_pd, &attr);
if(NULL == openib_btl->srq[BTL_OPENIB_HP_QP]) {
BTL_ERROR(("error in ibv_create_srq\n"));
return OMPI_ERROR;
}
openib_btl->srq_lp = ibv_create_srq(openib_btl->hca->ib_pd, &attr);
if(NULL == openib_btl->srq_hp) {
openib_btl->srq[BTL_OPENIB_LP_QP] =
ibv_create_srq(openib_btl->hca->ib_pd, &attr);
if(NULL == openib_btl->srq[BTL_OPENIB_LP_QP]) {
BTL_ERROR(("error in ibv_create_srq\n"));
return OMPI_ERROR;
}
} else {
openib_btl->srq_hp = NULL;
openib_btl->srq_lp = NULL;
openib_btl->srq[BTL_OPENIB_HP_QP] = NULL;
openib_btl->srq[BTL_OPENIB_LP_QP] = NULL;
}
/* Create the low and high priority queue pairs */

Просмотреть файл

@ -40,7 +40,8 @@
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/btl/base/base.h"
#include "btl_openib_endpoint.h"
#include "btl_openib_frag.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
@ -138,6 +139,11 @@ extern mca_btl_openib_component_t mca_btl_openib_component;
typedef mca_btl_base_recv_reg_t mca_btl_openib_recv_reg_t;
struct mca_btl_openib_port_info_t {
uint32_t mtu;
uint16_t subnet;
};
typedef struct mca_btl_openib_port_info_t mca_btl_openib_port_info_t;
struct mca_btl_openib_hca_t {
struct ibv_device *ib_dev; /* the ib device */
@ -181,11 +187,8 @@ struct mca_btl_openib_module_t {
size_t ib_inline_max; /**< max size of inline send*/
bool poll_cq;
struct ibv_srq *srq_hp;
struct ibv_srq *srq_lp;
int32_t srd_posted_hp;
int32_t srd_posted_lp;
struct ibv_srq *srq[2];
int32_t srd_posted[2];
int32_t num_peers;
int32_t rd_num;
int32_t rd_low;
@ -205,7 +208,6 @@ struct mca_btl_openib_module_t {
orte_pointer_array_t *endpoints;
}; typedef struct mca_btl_openib_module_t mca_btl_openib_module_t;
struct mca_btl_openib_frag_t;
extern mca_btl_openib_module_t mca_btl_openib_module;
/**
@ -407,75 +409,45 @@ extern void mca_btl_openib_send_frag_return(
int mca_btl_openib_create_cq_srq(mca_btl_openib_module_t* openib_btl);
#define MCA_BTL_OPENIB_POST_SRR_HIGH(openib_btl, additional) \
{ \
do{ \
OPAL_THREAD_LOCK(&openib_btl->ib_lock); \
if(openib_btl->srd_posted_hp <= openib_btl->rd_low+additional && \
openib_btl->srd_posted_hp < openib_btl->rd_num){ \
MCA_BTL_OPENIB_POST_SRR_SUB(openib_btl->rd_num - \
openib_btl->srd_posted_hp, \
openib_btl, \
&openib_btl->recv_free_eager, \
&openib_btl->srd_posted_hp, \
openib_btl->srq_hp); \
} \
OPAL_THREAD_UNLOCK(&openib_btl->ib_lock); \
} while(0); \
}
#define MCA_BTL_OPENIB_POST_SRR_LOW(openib_btl, additional) \
{ \
do { \
OPAL_THREAD_LOCK(&openib_btl->ib_lock); \
if(openib_btl->srd_posted_lp <= openib_btl->rd_low+additional && \
openib_btl->srd_posted_lp < openib_btl->rd_num){ \
MCA_BTL_OPENIB_POST_SRR_SUB(openib_btl->rd_num - \
openib_btl->srd_posted_lp, \
openib_btl, \
&openib_btl->recv_free_max, \
&openib_btl->srd_posted_lp, \
openib_btl->srq_lp); \
} \
OPAL_THREAD_UNLOCK(&openib_btl->ib_lock); \
} while(0); \
}
#define MCA_BTL_OPENIB_POST_SRR_SUB(cnt, \
openib_btl, \
frag_list, \
srd_posted, \
srq) \
{\
do { \
int32_t i; \
int32_t num_post = cnt; \
ompi_free_list_item_t* item = NULL; \
mca_btl_openib_frag_t* frag = NULL; \
struct ibv_recv_wr *bad_wr; \
int32_t rc; \
for(i = 0; i < num_post; i++) { \
OMPI_FREE_LIST_WAIT(frag_list, item, rc); \
frag = (mca_btl_openib_frag_t*) item; \
frag->sg_entry.length = frag->size + \
((unsigned char*) frag->segment.seg_addr.pval- \
(unsigned char*) frag->hdr); \
if(ibv_post_srq_recv(srq, &frag->wr_desc.rd_desc, &bad_wr)) { \
BTL_ERROR(("error posting receive descriptors to shared receive queue: %s",\
strerror(errno))); \
return OMPI_ERROR; \
}\
}\
OPAL_THREAD_ADD32(srd_posted, num_post); \
} while(0);\
}
#define BTL_OPENIB_HP_QP 0
#define BTL_OPENIB_LP_QP 1
static inline mca_btl_openib_post_srr(mca_btl_openib_module_t* openib_btl,
const int additional, const int prio)
{
OPAL_THREAD_LOCK(&openib_btl->ib_lock);
if(openib_btl->srd_posted[prio] <= openib_btl->rd_low + additional &&
openib_btl->srd_posted[prio] < openib_btl->rd_num) {
int32_t i, rc;
int32_t num_post = openib_btl->rd_num - openib_btl->srd_posted[prio];
ompi_free_list_item_t* item;
mca_btl_openib_frag_t* frag;
struct ibv_recv_wr *bad_wr;
ompi_free_list_t *free_list;
if(BTL_OPENIB_HP_QP == prio)
free_list = &openib_btl->recv_free_eager;
else
free_list = &openib_btl->recv_free_max;
for(i = 0; i < num_post; i++) {
OMPI_FREE_LIST_WAIT(free_list, item, rc);
frag = (mca_btl_openib_frag_t*)item;
frag->sg_entry.length = frag->size +
((unsigned char*)frag->segment.seg_addr.pval -
(unsigned char*)frag->hdr);
if(ibv_post_srq_recv(openib_btl->srq[prio], &frag->wr_desc.rd_desc,
&bad_wr)) {
BTL_ERROR(("error posting receive descriptors to shared "
"receive queue: %s", strerror(errno)));
return OMPI_ERROR;
}
}
OPAL_THREAD_ADD32(&openib_btl->srd_posted[prio], num_post);
}
OPAL_THREAD_UNLOCK(&openib_btl->ib_lock);
}
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif

Просмотреть файл

@ -203,10 +203,10 @@ static void btl_openib_control(struct mca_btl_base_module_t* btl,
/* if not sent via rdma */
if(!MCA_BTL_OPENIB_RDMA_FRAG(frag) &&
ctl_hdr->type == MCA_BTL_OPENIB_CONTROL_CREDITS) {
OPAL_THREAD_ADD32(&endpoint->rd_credits_hp, -1);
OPAL_THREAD_ADD32(&endpoint->rd_credits[BTL_OPENIB_HP_QP], -1);
}
} else {
OPAL_THREAD_ADD32(&endpoint->rd_credits_lp, -1);
OPAL_THREAD_ADD32(&endpoint->rd_credits[BTL_OPENIB_LP_QP], -1);
}
switch (ctl_hdr->type) {
@ -745,21 +745,23 @@ static int btl_openib_handle_incoming_hp(mca_btl_openib_module_t *openib_btl,
/* check to see if we need to return credits */
if((endpoint->rd_credits_hp >= mca_btl_openib_component.rd_win ||
if((endpoint->rd_credits[BTL_OPENIB_HP_QP] >=
mca_btl_openib_component.rd_win ||
endpoint->eager_rdma_local.credits >=
mca_btl_openib_component.rd_win) &&
OPAL_THREAD_ADD32(&endpoint->sd_credits_hp, 1) == 1) {
OPAL_THREAD_ADD32(&endpoint->sd_credits[BTL_OPENIB_HP_QP],1) == 1) {
mca_btl_openib_endpoint_send_credits_hp(endpoint);
}
/* repost receive descriptors if receive not by RDMA */
if(!MCA_BTL_OPENIB_RDMA_FRAG(frag)) {
if(mca_btl_openib_component.use_srq) {
OPAL_THREAD_ADD32((int32_t*) &openib_btl->srd_posted_hp, -1);
MCA_BTL_OPENIB_POST_SRR_HIGH(openib_btl, 0);
OPAL_THREAD_ADD32((int32_t*)&openib_btl->srd_posted[BTL_OPENIB_HP_QP], -1);
mca_btl_openib_post_srr(openib_btl, 0, BTL_OPENIB_HP_QP);
} else {
OPAL_THREAD_ADD32((int32_t*) &endpoint->rd_posted_hp, -1);
MCA_BTL_OPENIB_ENDPOINT_POST_RR_HIGH(endpoint, 0);
OPAL_THREAD_ADD32((int32_t*)&endpoint->rd_posted[BTL_OPENIB_HP_QP],
-1);
btl_openib_endpoint_post_rr(endpoint, 0, BTL_OPENIB_HP_QP);
}
}
@ -1018,8 +1020,11 @@ static int btl_openib_component_progress(void)
if(!mca_btl_openib_component.use_srq) {
/* check to see if we need to return credits */
if((endpoint->rd_credits_hp >= mca_btl_openib_component.rd_win || endpoint->eager_rdma_local.credits >= mca_btl_openib_component.rd_win) &&
OPAL_THREAD_ADD32(&endpoint->sd_credits_hp, 1) == 1) {
if((endpoint->rd_credits[BTL_OPENIB_HP_QP] >=
mca_btl_openib_component.rd_win ||
endpoint->eager_rdma_local.credits >=
mca_btl_openib_component.rd_win) &&
OPAL_THREAD_ADD32(&endpoint->sd_credits[BTL_OPENIB_HP_QP], 1) == 1) {
mca_btl_openib_endpoint_send_credits_hp(endpoint);
}
@ -1075,8 +1080,9 @@ static int btl_openib_component_progress(void)
if(!mca_btl_openib_component.use_srq) {
/* check to see if we need to return credits */
if( endpoint->rd_credits_lp >= mca_btl_openib_component.rd_win &&
OPAL_THREAD_ADD32(&endpoint->sd_credits_lp, 1) == 1) {
if( endpoint->rd_credits[BTL_OPENIB_LP_QP] >=
mca_btl_openib_component.rd_win &&
OPAL_THREAD_ADD32(&endpoint->sd_credits[BTL_OPENIB_LP_QP], 1) == 1) {
mca_btl_openib_endpoint_send_credits_lp(endpoint);
}
}
@ -1119,13 +1125,13 @@ static int btl_openib_component_progress(void)
if(mca_btl_openib_component.use_srq) {
/* repost receive descriptors */
OPAL_THREAD_ADD32((int32_t*) &openib_btl->srd_posted_lp, -1);
MCA_BTL_OPENIB_POST_SRR_LOW(openib_btl, 0);
OPAL_THREAD_ADD32((int32_t*)&openib_btl->srd_posted[BTL_OPENIB_LP_QP], -1);
mca_btl_openib_post_srr(openib_btl, 0, BTL_OPENIB_LP_QP);
} else {
/* repost receive descriptors */
OPAL_THREAD_ADD32((int32_t*) &endpoint->rd_posted_lp, -1);
MCA_BTL_OPENIB_ENDPOINT_POST_RR_LOW(endpoint, 0);
OPAL_THREAD_ADD32((int32_t*)
&endpoint->rd_posted[BTL_OPENIB_LP_QP], -1);
btl_openib_endpoint_post_rr(endpoint, 0, BTL_OPENIB_LP_QP);
OPAL_THREAD_ADD32(&endpoint->sd_tokens[BTL_OPENIB_LP_QP],
credits);
@ -1135,8 +1141,9 @@ static int btl_openib_component_progress(void)
BTL_OPENIB_LP_QP);
/* check to see if we need to return credits */
if(endpoint->rd_credits_lp >= mca_btl_openib_component.rd_win &&
OPAL_THREAD_ADD32(&endpoint->sd_credits_lp, 1) == 1) {
if(endpoint->rd_credits[BTL_OPENIB_LP_QP] >=
mca_btl_openib_component.rd_win &&
OPAL_THREAD_ADD32(&endpoint->sd_credits[BTL_OPENIB_LP_QP], 1) == 1) {
mca_btl_openib_endpoint_send_credits_lp(endpoint);
}
}

Просмотреть файл

@ -12,7 +12,6 @@
#include "ompi_config.h"
#include "btl_openib.h"
#include "btl_openib_endpoint.h"
#include "ompi/mca/mpool/openib/mpool_openib.h"
#if defined(c_plusplus) || defined(__cplusplus)

Просмотреть файл

@ -62,7 +62,6 @@ int mca_btl_openib_endpoint_qp_init_query(
uint32_t port_num
);
/*
* post a send to the work queue
*/
@ -131,25 +130,27 @@ static inline int mca_btl_openib_endpoint_post_send(mca_btl_openib_module_t* ope
OPAL_THREAD_ADD32(&endpoint->eager_rdma_local.credits,
-frag->hdr->credits);
frag->hdr->credits |= BTL_OPENIB_RDMA_CREDITS_FLAG;
} else if(endpoint->rd_credits_hp > 0) {
frag->hdr->credits = endpoint->rd_credits_hp;
OPAL_THREAD_ADD32(&endpoint->rd_credits_hp, -frag->hdr->credits);
} else if(endpoint->rd_credits[BTL_OPENIB_HP_QP] > 0) {
frag->hdr->credits = endpoint->rd_credits[BTL_OPENIB_HP_QP];
OPAL_THREAD_ADD32(&endpoint->rd_credits[BTL_OPENIB_HP_QP],
-frag->hdr->credits);
} else {
frag->hdr->credits = 0;
}
ib_qp = endpoint->lcl_qp_hp;
ib_qp = endpoint->lcl_qp[BTL_OPENIB_HP_QP];
} else {
if(btl_openib_acquire_send_resources(openib_btl, endpoint, frag,
BTL_OPENIB_LP_QP, NULL) == OMPI_ERR_OUT_OF_RESOURCE)
return MPI_SUCCESS;
if(endpoint->rd_credits_lp > 0) {
frag->hdr->credits = endpoint->rd_credits_lp;
OPAL_THREAD_ADD32(&endpoint->rd_credits_lp, -frag->hdr->credits);
if(endpoint->rd_credits[BTL_OPENIB_LP_QP] > 0) {
frag->hdr->credits = endpoint->rd_credits[BTL_OPENIB_LP_QP];
OPAL_THREAD_ADD32(&endpoint->rd_credits[BTL_OPENIB_LP_QP],
-frag->hdr->credits);
} else {
frag->hdr->credits = 0;
}
ib_qp = endpoint->lcl_qp_lp;
ib_qp = endpoint->lcl_qp[BTL_OPENIB_LP_QP];
}
frag->sg_entry.length =
@ -202,11 +203,11 @@ static inline int mca_btl_openib_endpoint_post_send(mca_btl_openib_module_t* ope
}
if(mca_btl_openib_component.use_srq) {
MCA_BTL_OPENIB_POST_SRR_HIGH(openib_btl, 1);
MCA_BTL_OPENIB_POST_SRR_LOW(openib_btl, 1);
mca_btl_openib_post_srr(openib_btl, 1, BTL_OPENIB_HP_QP);
mca_btl_openib_post_srr(openib_btl, 1, BTL_OPENIB_LP_QP);
} else {
MCA_BTL_OPENIB_ENDPOINT_POST_RR_HIGH(endpoint, 1);
MCA_BTL_OPENIB_ENDPOINT_POST_RR_LOW(endpoint, 1);
btl_openib_endpoint_post_rr(endpoint, 1, BTL_OPENIB_HP_QP);
btl_openib_endpoint_post_rr(endpoint, 1, BTL_OPENIB_LP_QP);
}
return OMPI_SUCCESS;
@ -240,8 +241,8 @@ static void mca_btl_openib_endpoint_construct(mca_btl_base_endpoint_t* endpoint)
memset(endpoint->lcl_qp_attr_hp, 0, sizeof(struct ibv_qp_attr));
memset(endpoint->lcl_qp_attr_lp, 0, sizeof(struct ibv_qp_attr));
endpoint->rd_posted_hp = 0;
endpoint->rd_posted_lp = 0;
endpoint->rd_posted[BTL_OPENIB_HP_QP] = 0;
endpoint->rd_posted[BTL_OPENIB_LP_QP] = 0;
/* number of available send wqes */
endpoint->sd_wqe[BTL_OPENIB_HP_QP] = mca_btl_openib_component.rd_num;
@ -250,10 +251,10 @@ static void mca_btl_openib_endpoint_construct(mca_btl_base_endpoint_t* endpoint)
/* zero these out w/ initial posting, so that we start out w/
* zero credits to return to peer
*/
endpoint->rd_credits_hp = -(mca_btl_openib_component.rd_num + mca_btl_openib_component.rd_rsv);
endpoint->rd_credits_lp = -(mca_btl_openib_component.rd_num + mca_btl_openib_component.rd_rsv);
endpoint->sd_credits_hp = 0;
endpoint->sd_credits_lp = 0;
endpoint->rd_credits[BTL_OPENIB_HP_QP] = -(mca_btl_openib_component.rd_num + mca_btl_openib_component.rd_rsv);
endpoint->rd_credits[BTL_OPENIB_LP_QP] = -(mca_btl_openib_component.rd_num + mca_btl_openib_component.rd_rsv);
endpoint->sd_credits[BTL_OPENIB_HP_QP] = 0;
endpoint->sd_credits[BTL_OPENIB_LP_QP] = 0;
/* initialize the high and low priority tokens */
endpoint->sd_tokens[BTL_OPENIB_HP_QP] = mca_btl_openib_component.rd_num;
@ -313,13 +314,13 @@ static int mca_btl_openib_endpoint_send_connect_data(mca_btl_base_endpoint_t* en
/* pack the info in the send buffer */
rc = orte_dss.pack(buffer, &endpoint->lcl_qp_hp->qp_num, 1, ORTE_UINT32);
rc = orte_dss.pack(buffer, &endpoint->lcl_qp[BTL_OPENIB_HP_QP]->qp_num, 1, ORTE_UINT32);
if(rc != ORTE_SUCCESS) {
ORTE_ERROR_LOG(rc);
return rc;
}
rc = orte_dss.pack(buffer, &endpoint->lcl_qp_lp->qp_num, 1, ORTE_UINT32);
rc = orte_dss.pack(buffer, &endpoint->lcl_qp[BTL_OPENIB_LP_QP]->qp_num, 1, ORTE_UINT32);
if(rc != ORTE_SUCCESS) {
ORTE_ERROR_LOG(rc);
return rc;
@ -367,8 +368,8 @@ static int mca_btl_openib_endpoint_send_connect_data(mca_btl_base_endpoint_t* en
BTL_VERBOSE(("Sending High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
endpoint->lcl_qp_hp->qp_num,
endpoint->lcl_qp_lp->qp_num,
endpoint->lcl_qp[BTL_OPENIB_HP_QP]->qp_num,
endpoint->lcl_qp[BTL_OPENIB_LP_QP]->qp_num,
endpoint->endpoint_btl->lid));
if(rc < 0) {
@ -418,9 +419,9 @@ static int mca_btl_openib_endpoint_start_connect(mca_btl_base_endpoint_t* endpoi
if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_create_qp(openib_btl,
openib_btl->hca->ib_pd,
openib_btl->ib_cq_hp,
openib_btl->srq_hp,
openib_btl->srq[BTL_OPENIB_HP_QP],
endpoint->lcl_qp_attr_hp,
&endpoint->lcl_qp_hp))) {
&endpoint->lcl_qp[BTL_OPENIB_HP_QP]))) {
BTL_ERROR(("error creating queue pair, error code %d", rc));
return rc;
}
@ -431,17 +432,17 @@ static int mca_btl_openib_endpoint_start_connect(mca_btl_base_endpoint_t* endpoi
if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_create_qp(openib_btl,
openib_btl->hca->ib_pd,
openib_btl->ib_cq_lp,
openib_btl->srq_lp,
openib_btl->srq[BTL_OPENIB_LP_QP],
endpoint->lcl_qp_attr_lp,
&endpoint->lcl_qp_lp))) {
&endpoint->lcl_qp[BTL_OPENIB_LP_QP]))) {
BTL_ERROR(("error creating queue pair, error code %d", rc));
return rc;
}
endpoint->lcl_psn_lp = lrand48() & 0xffffff;
BTL_VERBOSE(("Initialized High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
endpoint->lcl_qp_hp->qp_num,
endpoint->lcl_qp_lp->qp_num,
endpoint->lcl_qp[BTL_OPENIB_HP_QP]->qp_num,
endpoint->lcl_qp[BTL_OPENIB_LP_QP]->qp_num,
openib_btl->lid));
/* Send connection info over to remote endpoint */
@ -468,10 +469,10 @@ static int mca_btl_openib_endpoint_reply_start_connect(mca_btl_openib_endpoint_t
if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_create_qp(openib_btl,
openib_btl->hca->ib_pd,
openib_btl->ib_cq_hp,
openib_btl->srq_hp,
openib_btl->srq[BTL_OPENIB_HP_QP],
endpoint->lcl_qp_attr_hp,
&endpoint->lcl_qp_hp))) {
&endpoint->lcl_qp[BTL_OPENIB_HP_QP]))) {
BTL_ERROR(("error creating queue pair, error code %d", rc));
return rc;
}
@ -482,18 +483,18 @@ static int mca_btl_openib_endpoint_reply_start_connect(mca_btl_openib_endpoint_t
if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_create_qp(openib_btl,
openib_btl->hca->ib_pd,
openib_btl->ib_cq_lp,
openib_btl->srq_lp,
openib_btl->srq[BTL_OPENIB_LP_QP],
endpoint->lcl_qp_attr_lp,
&endpoint->lcl_qp_lp))) {
&endpoint->lcl_qp[BTL_OPENIB_LP_QP]))) {
BTL_ERROR(("error creating queue pair, error code %d", rc));
return rc;
}
endpoint->lcl_psn_lp = lrand48() & 0xffffff;
BTL_VERBOSE(("Initialized High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
endpoint->lcl_qp_hp->qp_num,
endpoint->lcl_qp_lp->qp_num,
endpoint->lcl_qp[BTL_OPENIB_HP_QP]->qp_num,
endpoint->lcl_qp[BTL_OPENIB_LP_QP]->qp_num,
openib_btl->lid));
@ -871,7 +872,7 @@ int mca_btl_openib_endpoint_connect(
/* Connection establishment RC */
rc = mca_btl_openib_endpoint_qp_init_query(
openib_btl,
endpoint->lcl_qp_hp,
endpoint->lcl_qp[BTL_OPENIB_HP_QP],
endpoint->lcl_qp_attr_hp,
endpoint->lcl_psn_hp,
endpoint->rem_info.rem_qp_num_hp,
@ -888,7 +889,7 @@ int mca_btl_openib_endpoint_connect(
}
rc = mca_btl_openib_endpoint_qp_init_query(
openib_btl,
endpoint->lcl_qp_lp,
endpoint->lcl_qp[BTL_OPENIB_LP_QP],
endpoint->lcl_qp_attr_lp,
endpoint->lcl_psn_lp,
endpoint->rem_info.rem_qp_num_lp,
@ -908,11 +909,11 @@ int mca_btl_openib_endpoint_connect(
MCA_BTL_IB_FRAG_ALLOC_CREDIT_WAIT(openib_btl, endpoint->lp_credit_frag, rc);
if(mca_btl_openib_component.use_srq) {
MCA_BTL_OPENIB_POST_SRR_HIGH(openib_btl, 1);
MCA_BTL_OPENIB_POST_SRR_LOW(openib_btl, 1);
mca_btl_openib_post_srr(openib_btl, 1, BTL_OPENIB_HP_QP);
mca_btl_openib_post_srr(openib_btl, 1, BTL_OPENIB_LP_QP);
} else {
MCA_BTL_OPENIB_ENDPOINT_POST_RR_HIGH(endpoint, 1);
MCA_BTL_OPENIB_ENDPOINT_POST_RR_LOW(endpoint, 1);
btl_openib_endpoint_post_rr(endpoint, 1, BTL_OPENIB_HP_QP);
btl_openib_endpoint_post_rr(endpoint, 1, BTL_OPENIB_LP_QP);
}
return OMPI_SUCCESS;
@ -1066,10 +1067,11 @@ static void mca_btl_openib_endpoint_credits_lp(
OPAL_THREAD_ADD32(&endpoint->sd_wqe[BTL_OPENIB_LP_QP],-1);
/* check to see if there are addditional credits to return */
if ((credits = OPAL_THREAD_ADD32(&endpoint->sd_credits_lp,-1)) > 0) {
OPAL_THREAD_ADD32(&endpoint->sd_credits_lp,-credits);
if (endpoint->rd_credits_lp >= mca_btl_openib_component.rd_win &&
OPAL_THREAD_ADD32(&endpoint->sd_credits_lp,1) == 1) {
if ((credits = OPAL_THREAD_ADD32(&endpoint->sd_credits[BTL_OPENIB_LP_QP],-1)) > 0) {
OPAL_THREAD_ADD32(&endpoint->sd_credits[BTL_OPENIB_LP_QP],-credits);
if (endpoint->rd_credits[BTL_OPENIB_LP_QP] >=
mca_btl_openib_component.rd_win &&
OPAL_THREAD_ADD32(&endpoint->sd_credits[BTL_OPENIB_LP_QP],1) == 1) {
mca_btl_openib_endpoint_send_credits_lp(endpoint);
}
}
@ -1095,9 +1097,10 @@ void mca_btl_openib_endpoint_send_credits_lp(
frag->endpoint = endpoint;
frag->hdr->tag = MCA_BTL_TAG_BTL;
if(endpoint->rd_credits_lp > 0) {
frag->hdr->credits = endpoint->rd_credits_lp;
OPAL_THREAD_ADD32(&endpoint->rd_credits_lp, -frag->hdr->credits);
if(endpoint->rd_credits[BTL_OPENIB_LP_QP] > 0) {
frag->hdr->credits = endpoint->rd_credits[BTL_OPENIB_LP_QP];
OPAL_THREAD_ADD32(&endpoint->rd_credits[BTL_OPENIB_LP_QP],
-frag->hdr->credits);
} else {
frag->hdr->credits = 0;
}
@ -1120,11 +1123,12 @@ void mca_btl_openib_endpoint_send_credits_lp(
frag->wr_desc.sr_desc.send_flags = IBV_SEND_SIGNALED;
}
if(ibv_post_send(endpoint->lcl_qp_lp,
if(ibv_post_send(endpoint->lcl_qp[BTL_OPENIB_LP_QP],
&frag->wr_desc.sr_desc,
&bad_wr)) {
OPAL_THREAD_ADD32(&endpoint->sd_credits_lp, -1);
OPAL_THREAD_ADD32(&endpoint->rd_credits_lp, frag->hdr->credits);
OPAL_THREAD_ADD32(&endpoint->sd_credits[BTL_OPENIB_LP_QP], -1);
OPAL_THREAD_ADD32(&endpoint->rd_credits[BTL_OPENIB_LP_QP],
frag->hdr->credits);
MCA_BTL_IB_FRAG_RETURN(openib_btl, frag);
BTL_ERROR(("error posting send request errno %d says %s", strerror(errno)));
return;
@ -1148,11 +1152,11 @@ static void mca_btl_openib_endpoint_credits_hp(
OPAL_THREAD_ADD32(&endpoint->sd_wqe[BTL_OPENIB_HP_QP],-1);
/* check to see if there are addditional credits to return */
if ((credits = OPAL_THREAD_ADD32(&endpoint->sd_credits_hp,-1)) > 0) {
OPAL_THREAD_ADD32(&endpoint->sd_credits_hp,-credits);
if ((endpoint->rd_credits_hp >= mca_btl_openib_component.rd_win ||
if ((credits = OPAL_THREAD_ADD32(&endpoint->sd_credits[BTL_OPENIB_HP_QP],-1)) > 0) {
OPAL_THREAD_ADD32(&endpoint->sd_credits[BTL_OPENIB_HP_QP],-credits);
if ((endpoint->rd_credits[BTL_OPENIB_HP_QP] >= mca_btl_openib_component.rd_win ||
endpoint->eager_rdma_local.credits >= mca_btl_openib_component.rd_win) &&
OPAL_THREAD_ADD32(&endpoint->sd_credits_hp,1) == 1) {
OPAL_THREAD_ADD32(&endpoint->sd_credits[BTL_OPENIB_HP_QP],1) == 1) {
mca_btl_openib_endpoint_send_credits_hp(endpoint);
}
}
@ -1178,9 +1182,10 @@ void mca_btl_openib_endpoint_send_credits_hp(
frag->endpoint = endpoint;
frag->hdr->tag = MCA_BTL_TAG_BTL;
if(endpoint->rd_credits_hp > 0) {
frag->hdr->credits = endpoint->rd_credits_hp;
OPAL_THREAD_ADD32(&endpoint->rd_credits_hp, -frag->hdr->credits);
if(endpoint->rd_credits[BTL_OPENIB_HP_QP] > 0) {
frag->hdr->credits = endpoint->rd_credits[BTL_OPENIB_HP_QP];
OPAL_THREAD_ADD32(&endpoint->rd_credits[BTL_OPENIB_HP_QP],
-frag->hdr->credits);
} else
frag->hdr->credits = 0;
if(endpoint->eager_rdma_local.credits > 0) {
@ -1208,11 +1213,12 @@ void mca_btl_openib_endpoint_send_credits_hp(
frag->wr_desc.sr_desc.send_flags = IBV_SEND_SIGNALED;
}
if(ibv_post_send(endpoint->lcl_qp_hp,
if(ibv_post_send(endpoint->lcl_qp[BTL_OPENIB_HP_QP],
&frag->wr_desc.sr_desc,
&bad_wr)) {
OPAL_THREAD_ADD32(&endpoint->sd_credits_hp, -1);
OPAL_THREAD_ADD32(&endpoint->rd_credits_hp, frag->hdr->credits);
OPAL_THREAD_ADD32(&endpoint->sd_credits[BTL_OPENIB_HP_QP], -1);
OPAL_THREAD_ADD32(&endpoint->rd_credits[BTL_OPENIB_HP_QP],
frag->hdr->credits);
MCA_BTL_IB_FRAG_RETURN(openib_btl, frag);
BTL_ERROR(("error posting send request errno %d says %s", errno,
strerror(errno)));

Просмотреть файл

@ -37,13 +37,6 @@ extern "C" {
struct mca_btl_openib_frag_t;
struct mca_btl_openib_port_info_t {
uint32_t mtu;
uint16_t subnet;
};
typedef struct mca_btl_openib_port_info_t mca_btl_openib_port_info_t;
/**
* State of IB endpoint connection.
*/
@ -138,9 +131,7 @@ struct mca_btl_base_endpoint_t {
uint32_t lcl_psn_lp;
/* Local processes port sequence number (Low and High) */
struct ibv_qp* lcl_qp_hp;
struct ibv_qp* lcl_qp_lp;
/* Local QP (Low and High) */
struct ibv_qp* lcl_qp[2]; /* Local QP (Low and High) */
struct ibv_qp_attr* lcl_qp_attr_hp;
struct ibv_qp_attr* lcl_qp_attr_lp;
@ -149,12 +140,9 @@ struct mca_btl_base_endpoint_t {
int32_t sd_tokens[2]; /**< number of send tokens */
int32_t get_tokens; /**< number of available get tokens */
int32_t rd_posted_hp; /**< number of high priority descriptors posted to the nic*/
int32_t rd_posted_lp; /**< number of low priority descriptors posted to the nic*/
int32_t rd_credits_hp; /**< number of high priority credits to return to peer */
int32_t rd_credits_lp; /**< number of low priority credits to return to peer */
int32_t sd_credits_hp; /**< number of send wqe entries being used to return credits */
int32_t sd_credits_lp; /**< number of send wqe entries being used to return credits */
int32_t rd_posted[2]; /**< number of descriptors posted to the nic*/
int32_t rd_credits[2]; /**< number of credits to return to peer */
int32_t sd_credits[2]; /**< number of send wqe entries being used to return credits */
int32_t sd_wqe[2]; /**< number of available send wqe entries */
uint16_t subnet; /**< subnet of this endpoint*/
@ -181,76 +169,47 @@ void mca_btl_openib_post_recv(void);
void mca_btl_openib_endpoint_send_credits_hp(mca_btl_base_endpoint_t*);
void mca_btl_openib_endpoint_send_credits_lp(mca_btl_base_endpoint_t*);
void mca_btl_openib_endpoint_connect_eager_rdma(mca_btl_openib_endpoint_t*);
#define MCA_BTL_OPENIB_ENDPOINT_POST_RR_HIGH(endpoint, \
additional) \
{ \
do { \
mca_btl_openib_module_t * openib_btl = endpoint->endpoint_btl; \
OPAL_THREAD_LOCK(&openib_btl->ib_lock); \
if(endpoint->rd_posted_hp <= mca_btl_openib_component.rd_low+additional && \
endpoint->rd_posted_hp < openib_btl->rd_num) { \
MCA_BTL_OPENIB_ENDPOINT_POST_RR_SUB(openib_btl->rd_num - \
endpoint->rd_posted_hp, \
endpoint, \
&openib_btl->recv_free_eager, \
endpoint->rd_posted_hp, \
endpoint->rd_credits_hp, \
endpoint->lcl_qp_hp); \
} \
OPAL_THREAD_UNLOCK(&openib_btl->ib_lock); \
} while(0); \
}
#define MCA_BTL_OPENIB_ENDPOINT_POST_RR_LOW(endpoint, \
additional) { \
do { \
mca_btl_openib_module_t * openib_btl = endpoint->endpoint_btl; \
OPAL_THREAD_LOCK(&openib_btl->ib_lock); \
if(endpoint->rd_posted_lp <= mca_btl_openib_component.rd_low+additional && \
endpoint->rd_posted_lp < openib_btl->rd_num){ \
MCA_BTL_OPENIB_ENDPOINT_POST_RR_SUB(openib_btl->rd_num - \
endpoint->rd_posted_lp, \
endpoint, \
&openib_btl->recv_free_max, \
endpoint->rd_posted_lp, \
endpoint->rd_credits_lp, \
endpoint->lcl_qp_lp \
); } \
OPAL_THREAD_UNLOCK(&openib_btl->ib_lock); \
} while(0); \
}
static inline int btl_openib_endpoint_post_rr(mca_btl_base_endpoint_t *endpoint,
const int additional, const int prio)
{
mca_btl_openib_module_t *openib_btl = endpoint->endpoint_btl;
#define MCA_BTL_OPENIB_ENDPOINT_POST_RR_SUB(cnt, \
my_endpoint, \
frag_list, \
rd_posted, \
rd_credits, \
qp ) \
do { \
int32_t i; \
int rc; \
int32_t num_post = cnt; \
struct ibv_recv_wr* bad_wr; \
for(i = 0; i < num_post; i++) { \
ompi_free_list_item_t* item; \
mca_btl_openib_frag_t* frag; \
OMPI_FREE_LIST_WAIT(frag_list, item, rc); \
frag = (mca_btl_openib_frag_t*) item; \
frag->endpoint = my_endpoint; \
frag->sg_entry.length = frag->size + \
((unsigned char*) frag->segment.seg_addr.pval- \
(unsigned char*) frag->hdr); \
if(ibv_post_recv(qp, \
&frag->wr_desc.rd_desc, \
&bad_wr)) { \
BTL_ERROR(("error posting receive errno says %s\n", strerror(errno))); \
return OMPI_ERROR; \
}\
}\
OPAL_THREAD_ADD32(&(rd_posted), num_post); \
OPAL_THREAD_ADD32(&(rd_credits), num_post); \
} while(0);
OPAL_THREAD_LOCK(&openib_btl->ib_lock);
if(endpoint->rd_posted[prio] <=
mca_btl_openib_component.rd_low + additional &&
endpoint->rd_posted[prio] < openib_btl->rd_num) {
int rc;
int32_t i, num_post = openib_btl->rd_num - endpoint->rd_posted[prio];
struct ibv_recv_wr* bad_wr;
ompi_free_list_t *free_list;
if(BTL_OPENIB_HP_QP == prio)
free_list = &openib_btl->recv_free_eager;
else
free_list = &openib_btl->recv_free_max;
for(i = 0; i < num_post; i++) {
ompi_free_list_item_t* item;
mca_btl_openib_frag_t* frag;
OMPI_FREE_LIST_WAIT(free_list, item, rc);
frag = (mca_btl_openib_frag_t*)item;
frag->endpoint = endpoint;
frag->sg_entry.length = frag->size +
((unsigned char*)frag->segment.seg_addr.pval -
(unsigned char*)frag->hdr);
if(ibv_post_recv(endpoint->lcl_qp[prio], &frag->wr_desc.rd_desc,
&bad_wr)) {
BTL_ERROR(("error posting receive errno says %s\n",
strerror(errno)));
return OMPI_ERROR;
}
}
OPAL_THREAD_ADD32(&endpoint->rd_posted[prio], num_post);
OPAL_THREAD_ADD32(&endpoint->rd_credits[prio], num_post);
}
OPAL_THREAD_UNLOCK(&openib_btl->ib_lock);
}
#if defined(c_plusplus) || defined(__cplusplus)
}

Просмотреть файл

@ -17,6 +17,7 @@
*/
#include "btl_openib_frag.h"
#include "btl_openib_eager_rdma.h"
#include "ompi/mca/mpool/openib/mpool_openib.h"

Просмотреть файл

@ -20,10 +20,10 @@
#define MCA_BTL_IB_FRAG_H
#include "ompi_config.h"
#include "btl_openib.h"
#include <infiniband/verbs.h>
#include "ompi/mca/mpool/openib/mpool_openib.h"
#include "ompi/mca/btl/btl.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {