Send all explicit credits for PP QPs of all orders over smallest PP qp.
This commit was SVN r16781.
Этот коммит содержится в:
родитель
a9f864d15c
Коммит
5463eb892c
@ -195,6 +195,7 @@ struct mca_btl_openib_component_t {
|
||||
int want_fork_support;
|
||||
#endif
|
||||
int rdma_qp;
|
||||
int credits_qp; /* qp used for software flow control */
|
||||
}; typedef struct mca_btl_openib_component_t mca_btl_openib_component_t;
|
||||
|
||||
OMPI_MODULE_DECLSPEC extern mca_btl_openib_component_t mca_btl_openib_component;
|
||||
|
@ -71,10 +71,6 @@
|
||||
static int btl_openib_component_open(void);
|
||||
static int btl_openib_component_close(void);
|
||||
static int btl_openib_modex_send(void);
|
||||
static void btl_openib_control(struct mca_btl_base_module_t* btl,
|
||||
mca_btl_base_tag_t tag,
|
||||
mca_btl_base_descriptor_t* descriptor,
|
||||
void* cbdata);
|
||||
static int init_one_port(opal_list_t *btl_list, mca_btl_openib_hca_t *hca,
|
||||
uint8_t port_num, uint16_t pkey_index,
|
||||
struct ibv_port_attr *ib_port_attr);
|
||||
@ -84,10 +80,6 @@ static mca_btl_base_module_t **btl_openib_component_init(
|
||||
bool enable_mpi_threads);
|
||||
static void merge_values(ompi_btl_openib_ini_values_t *target,
|
||||
ompi_btl_openib_ini_values_t *src);
|
||||
static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl,
|
||||
mca_btl_openib_endpoint_t *endpoint,
|
||||
mca_btl_openib_recv_frag_t *frag,
|
||||
size_t byte_len);
|
||||
static char* btl_openib_component_status_to_string(enum ibv_wc_status status);
|
||||
static int btl_openib_component_progress(void);
|
||||
static int btl_openib_module_progress(mca_btl_openib_hca_t *hca);
|
||||
@ -220,50 +212,19 @@ static int btl_openib_modex_send(void)
|
||||
* Active Message Callback function on control message.
|
||||
*/
|
||||
|
||||
static void btl_openib_control(struct mca_btl_base_module_t* btl,
|
||||
mca_btl_base_tag_t tag,
|
||||
mca_btl_base_descriptor_t* des,
|
||||
void* cbdata)
|
||||
static void btl_openib_control(mca_btl_base_module_t* btl,
|
||||
mca_btl_base_tag_t tag, mca_btl_base_descriptor_t* des,
|
||||
void* cbdata)
|
||||
{
|
||||
/* don't return credits used for control messages */
|
||||
mca_btl_openib_endpoint_t* endpoint = to_com_frag(des)->endpoint;
|
||||
mca_btl_openib_endpoint_t* ep = to_com_frag(des)->endpoint;
|
||||
mca_btl_openib_control_header_t *ctl_hdr =
|
||||
to_base_frag(des)->segment.seg_addr.pval;
|
||||
mca_btl_openib_eager_rdma_header_t *rdma_hdr;
|
||||
mca_btl_openib_rdma_credits_header_t *credits_hdr;
|
||||
int qp;
|
||||
|
||||
switch (ctl_hdr->type) {
|
||||
case MCA_BTL_OPENIB_CONTROL_CREDITS:
|
||||
credits_hdr = (mca_btl_openib_rdma_credits_header_t*)ctl_hdr;
|
||||
|
||||
if(endpoint->nbo) {
|
||||
BTL_OPENIB_RDMA_CREDITS_HEADER_NTOH(*credits_hdr);
|
||||
}
|
||||
qp = credits_hdr->qpn;
|
||||
|
||||
/* if not sent via rdma */
|
||||
if(!MCA_BTL_OPENIB_RDMA_FRAG(des)) {
|
||||
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_received, 1);
|
||||
/* rd_posted don't account for rsv preposts for credit message but
|
||||
* receive path decreased it for each message received no matter if
|
||||
* it is credit message or not. So fix rd_posted value here. */
|
||||
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.rd_posted, 1);
|
||||
} else {
|
||||
mca_btl_openib_header_t *hdr = to_recv_frag(des)->hdr;
|
||||
/* if received via rdma the update credits here since they will not
|
||||
* be update in handle_incomming() function because qp num is not
|
||||
* known there */
|
||||
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.sd_credits,
|
||||
hdr->credits);
|
||||
progress_pending_frags_pp(endpoint, qp);
|
||||
}
|
||||
|
||||
if(credits_hdr->rdma_credits) {
|
||||
OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens,
|
||||
credits_hdr->rdma_credits);
|
||||
progress_pending_eager_rdma(endpoint);
|
||||
}
|
||||
assert(0); /* Credit message is handled elsewhere */
|
||||
break;
|
||||
case MCA_BTL_OPENIB_CONTROL_RDMA:
|
||||
rdma_hdr = (mca_btl_openib_eager_rdma_header_t*)ctl_hdr;
|
||||
@ -275,7 +236,7 @@ static void btl_openib_control(struct mca_btl_base_module_t* btl,
|
||||
rdma_hdr->rdma_start.ival
|
||||
));
|
||||
|
||||
if(endpoint->nbo) {
|
||||
if(ep->nbo) {
|
||||
BTL_OPENIB_EAGER_RDMA_CONTROL_HEADER_NTOH(*rdma_hdr);
|
||||
}
|
||||
|
||||
@ -284,14 +245,13 @@ static void btl_openib_control(struct mca_btl_base_module_t* btl,
|
||||
(unsigned long) rdma_hdr->rdma_start.lval,
|
||||
rdma_hdr->rdma_start.pval, rdma_hdr->rdma_start.ival));
|
||||
|
||||
if (endpoint->eager_rdma_remote.base.pval) {
|
||||
if (ep->eager_rdma_remote.base.pval) {
|
||||
BTL_ERROR(("Got RDMA connect twice!"));
|
||||
return;
|
||||
}
|
||||
endpoint->eager_rdma_remote.rkey = rdma_hdr->rkey;
|
||||
endpoint->eager_rdma_remote.base.lval = rdma_hdr->rdma_start.lval;
|
||||
endpoint->eager_rdma_remote.tokens =
|
||||
mca_btl_openib_component.eager_rdma_num - 1;
|
||||
ep->eager_rdma_remote.rkey = rdma_hdr->rkey;
|
||||
ep->eager_rdma_remote.base.lval = rdma_hdr->rdma_start.lval;
|
||||
ep->eager_rdma_remote.tokens=mca_btl_openib_component.eager_rdma_num - 1;
|
||||
break;
|
||||
default:
|
||||
BTL_ERROR(("Unknown message type received by BTL"));
|
||||
@ -1124,45 +1084,102 @@ static void merge_values(ompi_btl_openib_ini_values_t *target,
|
||||
}
|
||||
}
|
||||
|
||||
static bool inline is_credit_message(const mca_btl_openib_recv_frag_t *frag)
|
||||
{
|
||||
mca_btl_openib_control_header_t* chdr =
|
||||
to_base_frag(frag)->segment.seg_addr.pval;
|
||||
return (MCA_BTL_TAG_BTL == frag->hdr->tag) &&
|
||||
(MCA_BTL_OPENIB_CONTROL_CREDITS == chdr->type);
|
||||
}
|
||||
|
||||
static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl,
|
||||
mca_btl_openib_endpoint_t *endpoint,
|
||||
mca_btl_openib_endpoint_t *ep,
|
||||
mca_btl_openib_recv_frag_t *frag,
|
||||
size_t byte_len)
|
||||
{
|
||||
mca_btl_base_descriptor_t *des = &to_base_frag(frag)->base;
|
||||
mca_btl_openib_header_t *hdr = frag->hdr;
|
||||
int rqp = to_base_frag(frag)->base.order, cqp;
|
||||
uint16_t rcredits = 0, credits;
|
||||
bool is_credit_msg;
|
||||
|
||||
if(endpoint->nbo) {
|
||||
if(ep->nbo) {
|
||||
BTL_OPENIB_HEADER_NTOH(*hdr);
|
||||
}
|
||||
|
||||
/* advance the segment address past the header and subtract from the
|
||||
* length..*/
|
||||
* length.*/
|
||||
des->des_dst->seg_len = byte_len - sizeof(mca_btl_openib_header_t);
|
||||
|
||||
/* call registered callback */
|
||||
openib_btl->ib_reg[hdr->tag].cbfunc(&openib_btl->super, hdr->tag, des,
|
||||
if(OPAL_LIKELY(!(is_credit_msg = is_credit_message(frag)))) {
|
||||
/* call registered callback */
|
||||
openib_btl->ib_reg[hdr->tag].cbfunc(&openib_btl->super, hdr->tag, des,
|
||||
openib_btl->ib_reg[hdr->tag].cbdata);
|
||||
|
||||
if(BTL_OPENIB_IS_RDMA_CREDITS(hdr->credits)) {
|
||||
if(BTL_OPENIB_CREDITS(hdr->credits) > 0) {
|
||||
OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens,
|
||||
BTL_OPENIB_CREDITS(hdr->credits));
|
||||
progress_pending_eager_rdma(endpoint);
|
||||
cqp = rqp;
|
||||
if(BTL_OPENIB_IS_RDMA_CREDITS(hdr->credits)) {
|
||||
rcredits = BTL_OPENIB_CREDITS(hdr->credits);
|
||||
hdr->credits = 0;
|
||||
}
|
||||
} else {
|
||||
int qp = to_base_frag(frag)->base.order;
|
||||
|
||||
if(BTL_OPENIB_QP_TYPE_PP(qp) && hdr->credits > 0) {
|
||||
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.sd_credits,
|
||||
hdr->credits);
|
||||
progress_pending_frags_pp(endpoint, qp);
|
||||
mca_btl_openib_rdma_credits_header_t *chdr=des->des_dst->seg_addr.pval;
|
||||
if(ep->nbo) {
|
||||
BTL_OPENIB_RDMA_CREDITS_HEADER_NTOH(*chdr);
|
||||
}
|
||||
if(hdr->cm_seen)
|
||||
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_sent,-hdr->cm_seen);
|
||||
cqp = chdr->qpn;
|
||||
rcredits = chdr->rdma_credits;
|
||||
}
|
||||
|
||||
credits = hdr->credits;
|
||||
|
||||
if(hdr->cm_seen)
|
||||
OPAL_THREAD_ADD32(&ep->qps[cqp].u.pp_qp.cm_sent, -hdr->cm_seen);
|
||||
|
||||
/* Now return fragment. Don't touch hdr after this point! */
|
||||
if(MCA_BTL_OPENIB_RDMA_FRAG(frag)) {
|
||||
mca_btl_openib_eager_rdma_local_t *erl = &ep->eager_rdma_local;
|
||||
OPAL_THREAD_LOCK(&erl->lock);
|
||||
MCA_BTL_OPENIB_RDMA_MAKE_REMOTE(frag->ftr);
|
||||
while(erl->tail != erl->head) {
|
||||
mca_btl_openib_recv_frag_t *tf;
|
||||
tf = MCA_BTL_OPENIB_GET_LOCAL_RDMA_FRAG(ep, erl->tail);
|
||||
if(MCA_BTL_OPENIB_RDMA_FRAG_LOCAL(tf))
|
||||
break;
|
||||
OPAL_THREAD_ADD32(&erl->credits, 1);
|
||||
MCA_BTL_OPENIB_RDMA_NEXT_INDEX(erl->tail);
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&erl->lock);
|
||||
} else {
|
||||
MCA_BTL_IB_FRAG_RETURN(frag);
|
||||
if(BTL_OPENIB_QP_TYPE_SRQ(rqp)) {
|
||||
mca_btl_openib_module_t *btl = ep->endpoint_btl;
|
||||
OPAL_THREAD_ADD32(&btl->qps[rqp].u.srq_qp.rd_posted, -1);
|
||||
mca_btl_openib_post_srr(btl, 0, rqp);
|
||||
} else {
|
||||
if(OPAL_UNLIKELY(is_credit_msg))
|
||||
OPAL_THREAD_ADD32(&ep->qps[cqp].u.pp_qp.cm_received, 1);
|
||||
else
|
||||
OPAL_THREAD_ADD32(&ep->qps[rqp].u.pp_qp.rd_posted, -1);
|
||||
mca_btl_openib_endpoint_post_rr(ep, cqp);
|
||||
}
|
||||
}
|
||||
|
||||
if(rcredits > 0) {
|
||||
OPAL_THREAD_ADD32(&ep->eager_rdma_remote.tokens, rcredits);
|
||||
progress_pending_eager_rdma(ep);
|
||||
}
|
||||
|
||||
assert((cqp != MCA_BTL_NO_ORDER && BTL_OPENIB_QP_TYPE_PP(cqp)) || !credits);
|
||||
|
||||
if(credits) {
|
||||
OPAL_THREAD_ADD32(&ep->qps[cqp].u.pp_qp.sd_credits, credits);
|
||||
progress_pending_frags_pp(ep, cqp);
|
||||
}
|
||||
|
||||
|
||||
|
||||
send_credits(ep, (cqp != MCA_BTL_NO_ORDER) ? cqp :
|
||||
mca_btl_openib_component.credits_qp);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -1449,23 +1466,6 @@ static int btl_openib_component_progress(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK(&endpoint->eager_rdma_local.lock);
|
||||
MCA_BTL_OPENIB_RDMA_MAKE_REMOTE(frag->ftr);
|
||||
while (endpoint->eager_rdma_local.tail !=
|
||||
endpoint->eager_rdma_local.head) {
|
||||
mca_btl_openib_recv_frag_t *tf;
|
||||
tf = MCA_BTL_OPENIB_GET_LOCAL_RDMA_FRAG(endpoint,
|
||||
endpoint->eager_rdma_local.tail);
|
||||
if (MCA_BTL_OPENIB_RDMA_FRAG_LOCAL (tf))
|
||||
break;
|
||||
OPAL_THREAD_ADD32(&endpoint->eager_rdma_local.credits, 1);
|
||||
MCA_BTL_OPENIB_RDMA_NEXT_INDEX(endpoint->eager_rdma_local.tail);
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&endpoint->eager_rdma_local.lock);
|
||||
|
||||
/* send credits over qp 0 since it should always be present
|
||||
* anyway */
|
||||
send_credits(endpoint, 0);
|
||||
count++;
|
||||
} else
|
||||
OPAL_THREAD_UNLOCK(&endpoint->eager_rdma_local.lock);
|
||||
@ -1573,21 +1573,8 @@ static int btl_openib_module_progress(mca_btl_openib_hca_t* hca)
|
||||
return 0;
|
||||
}
|
||||
|
||||
MCA_BTL_IB_FRAG_RETURN(frag);
|
||||
|
||||
if(BTL_OPENIB_QP_TYPE_SRQ(qp)) {
|
||||
OPAL_THREAD_ADD32((int32_t*)
|
||||
&openib_btl->qps[qp].u.srq_qp.rd_posted, -1);
|
||||
mca_btl_openib_post_srr(openib_btl, 0, qp);
|
||||
} else {
|
||||
OPAL_THREAD_ADD32((int32_t*)
|
||||
&endpoint->qps[qp].u.pp_qp.rd_posted, -1);
|
||||
mca_btl_openib_endpoint_post_rr(endpoint, 0, qp);
|
||||
}
|
||||
count++;
|
||||
|
||||
send_credits(endpoint, qp);
|
||||
|
||||
/* decide if it is time to setup an eager rdma channel */
|
||||
if (!endpoint->eager_rdma_local.base.pval &&
|
||||
endpoint->use_eager_rdma &&
|
||||
|
@ -426,7 +426,7 @@ int mca_btl_openib_endpoint_post_recvs(mca_btl_openib_endpoint_t *endpoint)
|
||||
if (BTL_OPENIB_QP_TYPE_SRQ(qp)) {
|
||||
mca_btl_openib_post_srr(endpoint->endpoint_btl, 1, qp);
|
||||
} else {
|
||||
mca_btl_openib_endpoint_post_rr(endpoint, 1, qp);
|
||||
mca_btl_openib_endpoint_post_rr(endpoint, qp);
|
||||
}
|
||||
}
|
||||
|
||||
@ -535,26 +535,27 @@ int mca_btl_openib_endpoint_send(mca_btl_base_endpoint_t* endpoint,
|
||||
|
||||
static void mca_btl_openib_endpoint_credits(
|
||||
mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* endpoint,
|
||||
struct mca_btl_base_descriptor_t* descriptor,
|
||||
struct mca_btl_base_endpoint_t* ep,
|
||||
struct mca_btl_base_descriptor_t* des,
|
||||
int status)
|
||||
{
|
||||
|
||||
int qp;
|
||||
|
||||
mca_btl_openib_send_control_frag_t *frag = to_send_control_frag(descriptor);
|
||||
mca_btl_openib_send_control_frag_t *frag = to_send_control_frag(des);
|
||||
|
||||
qp = frag->qp_idx;
|
||||
|
||||
/* we don't acquire a wqe or token for credit message - so decrement */
|
||||
OPAL_THREAD_ADD32(&endpoint->qps[qp].sd_wqe, -1);
|
||||
/* we don't acquire a WQE for credit message - so decrement.
|
||||
* Note: doing it for QP used for credit management */
|
||||
OPAL_THREAD_ADD32(&ep->qps[des->order].sd_wqe, -1);
|
||||
|
||||
if(check_send_credits(endpoint, qp))
|
||||
mca_btl_openib_endpoint_send_credits(endpoint, qp);
|
||||
if(check_send_credits(ep, qp) || check_eager_rdma_credits(ep))
|
||||
mca_btl_openib_endpoint_send_credits(ep, qp);
|
||||
else {
|
||||
BTL_OPENIB_CREDITS_SEND_UNLOCK(endpoint, qp);
|
||||
BTL_OPENIB_CREDITS_SEND_UNLOCK(ep, qp);
|
||||
/* check one more time if credits are available after unlock */
|
||||
send_credits(endpoint, qp);
|
||||
send_credits(ep, qp);
|
||||
}
|
||||
}
|
||||
|
||||
@ -579,7 +580,7 @@ void mca_btl_openib_endpoint_send_credits(mca_btl_openib_endpoint_t* endpoint,
|
||||
frag->qp_idx = qp;
|
||||
endpoint->qps[qp].credit_frag = frag;
|
||||
/* set those once and forever */
|
||||
to_base_frag(frag)->base.order = qp;
|
||||
to_base_frag(frag)->base.order = mca_btl_openib_component.credits_qp;
|
||||
to_base_frag(frag)->base.des_cbfunc = mca_btl_openib_endpoint_credits;
|
||||
to_base_frag(frag)->base.des_cbdata = NULL;
|
||||
to_com_frag(frag)->endpoint = endpoint;
|
||||
@ -589,8 +590,7 @@ void mca_btl_openib_endpoint_send_credits(mca_btl_openib_endpoint_t* endpoint,
|
||||
}
|
||||
|
||||
assert(frag->qp_idx == qp);
|
||||
credits_hdr =
|
||||
(mca_btl_openib_rdma_credits_header_t*)
|
||||
credits_hdr = (mca_btl_openib_rdma_credits_header_t*)
|
||||
to_base_frag(frag)->segment.seg_addr.pval;
|
||||
if(acquire_eager_rdma_send_credit(endpoint) == MPI_SUCCESS) {
|
||||
do_rdma = true;
|
||||
@ -606,15 +606,13 @@ void mca_btl_openib_endpoint_send_credits(mca_btl_openib_endpoint_t* endpoint,
|
||||
GET_CREDITS(endpoint->qps[qp].u.pp_qp.rd_credits, frag->hdr->credits);
|
||||
|
||||
frag->hdr->cm_seen = 0;
|
||||
if(!do_rdma) {
|
||||
GET_CREDITS(endpoint->qps[qp].u.pp_qp.cm_return, cm_return);
|
||||
if(cm_return > 255) {
|
||||
frag->hdr->cm_seen = 255;
|
||||
cm_return -= 255;
|
||||
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_return, cm_return);
|
||||
} else {
|
||||
frag->hdr->cm_seen = cm_return;
|
||||
}
|
||||
GET_CREDITS(endpoint->qps[qp].u.pp_qp.cm_return, cm_return);
|
||||
if(cm_return > 255) {
|
||||
frag->hdr->cm_seen = 255;
|
||||
cm_return -= 255;
|
||||
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_return, cm_return);
|
||||
} else {
|
||||
frag->hdr->cm_seen = cm_return;
|
||||
}
|
||||
|
||||
GET_CREDITS(endpoint->eager_rdma_local.credits, credits_hdr->rdma_credits);
|
||||
@ -675,7 +673,7 @@ static int mca_btl_openib_endpoint_send_eager_rdma(
|
||||
mca_btl_openib_endpoint_eager_rdma_connect_cb;
|
||||
to_base_frag(frag)->base.des_cbdata = NULL;
|
||||
to_base_frag(frag)->base.des_flags |= MCA_BTL_DES_FLAGS_PRIORITY;
|
||||
to_send_frag(frag)->qp_idx = 0;
|
||||
to_base_frag(frag)->base.order = mca_btl_openib_component.credits_qp;
|
||||
to_base_frag(frag)->segment.seg_len =
|
||||
sizeof(mca_btl_openib_eager_rdma_header_t);
|
||||
to_com_frag(frag)->endpoint = endpoint;
|
||||
|
@ -206,72 +206,69 @@ void mca_btl_openib_endpoint_connect_eager_rdma(mca_btl_openib_endpoint_t*);
|
||||
int mca_btl_openib_endpoint_post_recvs(mca_btl_openib_endpoint_t*);
|
||||
void mca_btl_openib_endpoint_connected(mca_btl_openib_endpoint_t*);
|
||||
|
||||
|
||||
|
||||
static inline int mca_btl_openib_endpoint_post_rr(mca_btl_base_endpoint_t *endpoint,
|
||||
const int additional,
|
||||
const int qp)
|
||||
static inline int post_recvs(mca_btl_base_endpoint_t *ep, const int qp,
|
||||
const int num_post)
|
||||
{
|
||||
int i;
|
||||
struct ibv_recv_wr* bad_wr;
|
||||
ompi_free_list_t *free_list;
|
||||
mca_btl_openib_module_t *openib_btl = ep->endpoint_btl;
|
||||
|
||||
free_list = &openib_btl->qps[qp].recv_free;
|
||||
|
||||
for(i = 0; i < num_post; i++) {
|
||||
int rc;
|
||||
ompi_free_list_item_t* item;
|
||||
OMPI_FREE_LIST_WAIT(free_list, item, rc);
|
||||
to_base_frag(item)->base.order = qp;
|
||||
to_com_frag(item)->endpoint = ep;
|
||||
if((rc = ibv_post_recv(ep->qps[qp].lcl_qp, &to_recv_frag(item)->rd_desc,
|
||||
&bad_wr))) {
|
||||
BTL_ERROR(("error posting receive on qp %d (%d from %d)\n",
|
||||
qp, i, num_post));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static inline int mca_btl_openib_endpoint_post_rr(
|
||||
mca_btl_base_endpoint_t *endpoint, const int qp)
|
||||
{
|
||||
mca_btl_openib_module_t *openib_btl = endpoint->endpoint_btl;
|
||||
int rd_rsv = mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_rsv;
|
||||
int rd_num = mca_btl_openib_component.qp_infos[qp].rd_num;
|
||||
|
||||
int cqp = mca_btl_openib_component.credits_qp, rc;
|
||||
int cm_received, rd_posted, rd_low;
|
||||
|
||||
assert(BTL_OPENIB_QP_TYPE_PP(qp));
|
||||
OPAL_THREAD_LOCK(&openib_btl->ib_lock);
|
||||
|
||||
cm_received = endpoint->qps[qp].u.pp_qp.cm_received;
|
||||
rd_posted = endpoint->qps[qp].u.pp_qp.rd_posted;
|
||||
rd_low = mca_btl_openib_component.qp_infos[qp].rd_low;
|
||||
|
||||
if(cm_received >= (rd_rsv >> 2) || rd_posted <= rd_low) {
|
||||
int rc;
|
||||
int32_t i, num_post = rd_num - rd_posted;
|
||||
struct ibv_recv_wr* bad_wr;
|
||||
ompi_free_list_t *free_list;
|
||||
|
||||
free_list = &openib_btl->qps[qp].recv_free;
|
||||
|
||||
for(i = 0; i < (num_post + cm_received); i++) {
|
||||
ompi_free_list_item_t* item;
|
||||
OMPI_FREE_LIST_WAIT(free_list, item, rc);
|
||||
to_base_frag(item)->base.order = qp;
|
||||
to_com_frag(item)->endpoint = endpoint;
|
||||
if(ibv_post_recv(endpoint->qps[qp].lcl_qp,
|
||||
&to_recv_frag(item)->rd_desc,
|
||||
&bad_wr)) {
|
||||
BTL_ERROR(("error posting receive errno says %s\n",
|
||||
strerror(errno)));
|
||||
OPAL_THREAD_UNLOCK(&openib_btl->ib_lock);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
}
|
||||
if(num_post > 0) {
|
||||
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.rd_posted, num_post);
|
||||
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.rd_credits, num_post);
|
||||
}
|
||||
if(cm_received > 0) {
|
||||
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_return,
|
||||
cm_received);
|
||||
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_received,
|
||||
-cm_received);
|
||||
}
|
||||
assert(endpoint->qps[qp].u.pp_qp.rd_credits <= rd_num);
|
||||
assert(endpoint->qps[qp].u.pp_qp.rd_credits >= 0);
|
||||
/* post receive buffers */
|
||||
if(rd_posted <= rd_low) {
|
||||
int num_post = rd_num - rd_posted;
|
||||
if((rc = post_recvs(endpoint, qp, num_post)) != OMPI_SUCCESS)
|
||||
return rc;
|
||||
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.rd_posted, num_post);
|
||||
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.rd_credits, num_post);
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&openib_btl->ib_lock);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static inline int mca_btl_openib_endpoint_post_rr_all(mca_btl_base_endpoint_t *endpoint,
|
||||
const int additional)
|
||||
{
|
||||
int qp;
|
||||
for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++){
|
||||
if(BTL_OPENIB_QP_TYPE_PP(qp)) {
|
||||
mca_btl_openib_endpoint_post_rr(endpoint, additional, qp);
|
||||
}
|
||||
/* post buffers for credit management on credit management qp */
|
||||
if(cm_received >= (rd_rsv >> 2)) {
|
||||
if((rc = post_recvs(endpoint, cqp, cm_received)) != OMPI_SUCCESS)
|
||||
return rc;
|
||||
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_return, cm_received);
|
||||
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_received, -cm_received);
|
||||
}
|
||||
|
||||
assert(endpoint->qps[qp].u.pp_qp.rd_credits <= rd_num);
|
||||
assert(endpoint->qps[qp].u.pp_qp.rd_credits >= 0);
|
||||
assert(endpoint->qps[qp].u.pp_qp.rd_credits <= rd_num);
|
||||
assert(endpoint->qps[qp].u.pp_qp.rd_credits >= 0);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -280,28 +277,38 @@ static inline int mca_btl_openib_endpoint_post_rr_all(mca_btl_base_endpoint_t *e
|
||||
#define BTL_OPENIB_CREDITS_SEND_UNLOCK(E, Q) \
|
||||
OPAL_ATOMIC_CMPSET_32(&(E)->qps[(Q)].rd_credit_send_lock, 1, 0)
|
||||
|
||||
static inline bool check_send_credits(mca_btl_openib_endpoint_t *endpoint,
|
||||
const int qp)
|
||||
static inline bool check_eager_rdma_credits(const mca_btl_openib_endpoint_t *ep)
|
||||
{
|
||||
if(endpoint->eager_rdma_local.credits > endpoint->eager_rdma_local.rd_win)
|
||||
return true;
|
||||
|
||||
if(BTL_OPENIB_QP_TYPE_PP(qp)) {
|
||||
if(endpoint->qps[qp].u.pp_qp.rd_credits >=
|
||||
mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_win) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
return (ep->eager_rdma_local.credits > ep->eager_rdma_local.rd_win) ? true :
|
||||
false;
|
||||
}
|
||||
|
||||
static inline void send_credits(mca_btl_openib_endpoint_t *endpoint,
|
||||
const int qp)
|
||||
static inline bool
|
||||
check_send_credits(const mca_btl_openib_endpoint_t *ep, const int qp)
|
||||
{
|
||||
if(check_send_credits(endpoint, qp) &&
|
||||
BTL_OPENIB_CREDITS_SEND_TRYLOCK(endpoint, qp))
|
||||
mca_btl_openib_endpoint_send_credits(endpoint, qp);
|
||||
|
||||
if(!BTL_OPENIB_QP_TYPE_PP(qp))
|
||||
return false;
|
||||
|
||||
return (ep->qps[qp].u.pp_qp.rd_credits >=
|
||||
mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_win) ? true : false;
|
||||
}
|
||||
|
||||
static inline void send_credits(mca_btl_openib_endpoint_t *ep, int qp)
|
||||
{
|
||||
if(BTL_OPENIB_QP_TYPE_PP(qp)) {
|
||||
if(check_send_credits(ep, qp))
|
||||
goto try_send;
|
||||
} else {
|
||||
qp = mca_btl_openib_component.credits_qp;
|
||||
}
|
||||
|
||||
if(!check_eager_rdma_credits(ep))
|
||||
return;
|
||||
|
||||
try_send:
|
||||
if(BTL_OPENIB_CREDITS_SEND_TRYLOCK(ep, qp))
|
||||
mca_btl_openib_endpoint_send_credits(ep, qp);
|
||||
}
|
||||
|
||||
END_C_DECLS
|
||||
|
@ -483,7 +483,8 @@ static int mca_btl_openib_mca_setup_qps(void)
|
||||
char *default_qps = "P,128,256,128,16:S,1024,256,128,32:S,4096,256,128,32:S,65536,256,128,32";
|
||||
uint32_t max_qp_size, max_size_needed;
|
||||
int32_t min_freelist_size = 0;
|
||||
|
||||
int smallest_pp_qp = 0;
|
||||
|
||||
reg_string("receive_queues",
|
||||
"Colon-delimited, coma delimited list of receive queues: P,4096,8,6,4:P,32768,8,6,4",
|
||||
default_qps, &str, 0);
|
||||
@ -498,7 +499,9 @@ static int mca_btl_openib_mca_setup_qps(void)
|
||||
|
||||
while (queues[qp] != NULL) {
|
||||
if (0 == strncmp("P,", queues[qp], 2)) {
|
||||
num_pp_qps++;
|
||||
num_pp_qps++;
|
||||
if(smallest_pp_qp > qp)
|
||||
smallest_pp_qp = qp;
|
||||
} else if (0 == strncmp("S,", queues[qp], 2)) {
|
||||
num_srq_qps++;
|
||||
} else {
|
||||
@ -638,6 +641,7 @@ static int mca_btl_openib_mca_setup_qps(void)
|
||||
}
|
||||
|
||||
mca_btl_openib_component.rdma_qp = mca_btl_openib_component.num_qps - 1;
|
||||
mca_btl_openib_component.credits_qp = smallest_pp_qp;
|
||||
|
||||
/* Register any MCA params for the connect pseudo-components */
|
||||
|
||||
|
@ -238,7 +238,10 @@ static int qp_connect_all(mca_btl_openib_endpoint_t *endpoint)
|
||||
attr.qp_state = IBV_QPS_RTS;
|
||||
attr.timeout = mca_btl_openib_component.ib_timeout;
|
||||
attr.retry_cnt = mca_btl_openib_component.ib_retry_count;
|
||||
attr.rnr_retry = mca_btl_openib_component.ib_rnr_retry;
|
||||
/* On PP QPs we have SW flow control, no need for rnr retries. Setting
|
||||
* it to zero helps to catch bugs */
|
||||
attr.rnr_retry = BTL_OPENIB_QP_TYPE_PP(i) ? 0 :
|
||||
mca_btl_openib_component.ib_rnr_retry;
|
||||
attr.sq_psn = endpoint->qps[i].lcl_psn;
|
||||
attr.max_rd_atomic = mca_btl_openib_component.ib_max_rdma_dst_ops;
|
||||
if (ibv_modify_qp(qp, &attr,
|
||||
@ -264,25 +267,51 @@ static int qp_connect_all(mca_btl_openib_endpoint_t *endpoint)
|
||||
*/
|
||||
static int qp_create_all(mca_btl_base_endpoint_t* endpoint)
|
||||
{
|
||||
int qp, rc, prio;
|
||||
int qp, rc, prio, pp_qp_num = 0;
|
||||
int32_t rd_rsv_total = 0;
|
||||
|
||||
for (qp = 0; qp < mca_btl_openib_component.num_qps; ++qp)
|
||||
if(BTL_OPENIB_QP_TYPE_PP(qp)) {
|
||||
rd_rsv_total +=
|
||||
mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_rsv;
|
||||
pp_qp_num++;
|
||||
}
|
||||
|
||||
/* if there is no pp QPs we still need reserved WQE for eager rdma flow
|
||||
* control */
|
||||
if(0 == pp_qp_num && true == endpoint->use_eager_rdma)
|
||||
pp_qp_num = 1;
|
||||
|
||||
for (qp = 0; qp < mca_btl_openib_component.num_qps; ++qp) {
|
||||
struct ibv_srq *srq = NULL;
|
||||
uint32_t max_recv_wr, max_send_wr;
|
||||
int32_t rd_rsv, rd_num_credits;
|
||||
/* If the size for this qp is <= the eager limit, make it a
|
||||
high priority QP. Otherwise, make it a low priority QP. */
|
||||
prio = (mca_btl_openib_component.qp_infos[qp].size <=
|
||||
mca_btl_openib_component.eager_limit) ?
|
||||
BTL_OPENIB_HP_CQ : BTL_OPENIB_LP_CQ;
|
||||
if(MCA_BTL_OPENIB_PP_QP == mca_btl_openib_component.qp_infos[qp].type) {
|
||||
max_recv_wr = mca_btl_openib_component.qp_infos[qp].rd_num +
|
||||
mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_rsv;
|
||||
max_send_wr = mca_btl_openib_component.qp_infos[qp].rd_num + 1;
|
||||
|
||||
if(qp == 0)
|
||||
prio = BTL_OPENIB_HP_CQ; /* smallest qp is always HP */
|
||||
|
||||
/* QP used for SW flow control need some additional recourses */
|
||||
if(qp == mca_btl_openib_component.credits_qp) {
|
||||
rd_rsv = rd_rsv_total;
|
||||
rd_num_credits = pp_qp_num;
|
||||
} else {
|
||||
rd_rsv = rd_num_credits = 0;
|
||||
}
|
||||
|
||||
if(BTL_OPENIB_QP_TYPE_PP(qp)) {
|
||||
max_recv_wr = mca_btl_openib_component.qp_infos[qp].rd_num + rd_rsv;
|
||||
max_send_wr = mca_btl_openib_component.qp_infos[qp].rd_num +
|
||||
rd_num_credits;
|
||||
} else {
|
||||
srq = endpoint->endpoint_btl->qps[qp].u.srq_qp.srq;
|
||||
max_recv_wr = mca_btl_openib_component.qp_infos[qp].rd_num;
|
||||
max_send_wr = mca_btl_openib_component.qp_infos[qp].u.srq_qp.sd_max
|
||||
+ 1;
|
||||
+ rd_num_credits;
|
||||
}
|
||||
|
||||
rc = qp_create_one(endpoint, prio, qp, srq, max_recv_wr, max_send_wr);
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user