From 29f7b4deda1c5d8661862a537cc44cef21bba197 Mon Sep 17 00:00:00 2001 From: Galen Shipman Date: Thu, 1 Sep 2005 02:41:44 +0000 Subject: [PATCH] Changed send tokens to both send/rdma tokens for both low and high priority queue pairs. Tested on intel p2p with 16 procs - Passed. This commit was SVN r7119. --- ompi/mca/btl/mvapi/btl_mvapi.h | 2 +- ompi/mca/btl/mvapi/btl_mvapi_component.c | 29 ++++++---- ompi/mca/btl/mvapi/btl_mvapi_endpoint.c | 69 +++++++++++++++--------- ompi/mca/btl/mvapi/btl_mvapi_endpoint.h | 16 ++++-- 4 files changed, 76 insertions(+), 40 deletions(-) diff --git a/ompi/mca/btl/mvapi/btl_mvapi.h b/ompi/mca/btl/mvapi/btl_mvapi.h index 19cfc1058a..6bf1b844d7 100644 --- a/ompi/mca/btl/mvapi/btl_mvapi.h +++ b/ompi/mca/btl/mvapi/btl_mvapi.h @@ -118,7 +118,7 @@ struct mca_btl_mvapi_component_t { uint32_t ib_static_rate; uint32_t ib_src_path_bits; /* number of send tokes available */ - uint32_t max_send_tokens; + uint32_t max_wr_sq_tokens; }; typedef struct mca_btl_mvapi_component_t mca_btl_mvapi_component_t; diff --git a/ompi/mca/btl/mvapi/btl_mvapi_component.c b/ompi/mca/btl/mvapi/btl_mvapi_component.c index 4e52a113e2..d598126e2e 100644 --- a/ompi/mca/btl/mvapi/btl_mvapi_component.c +++ b/ompi/mca/btl/mvapi/btl_mvapi_component.c @@ -208,12 +208,12 @@ int mca_btl_mvapi_component_open(void) mca_base_param_lookup_int(param, &value); mca_btl_mvapi_component.leave_pinned = value; mca_base_param_reg_int(&mca_btl_mvapi_component.super.btl_version, - "max_send_tokens", - "Maximum number of send tokens", + "max_wr_sq_tokens", + "Maximum number of send/rdma work request tokens", false, false, 16, - &(mca_btl_mvapi_component.max_send_tokens)); + &(mca_btl_mvapi_component.max_wr_sq_tokens)); mca_btl_mvapi_component.max_send_size = mca_btl_mvapi_module.super.btl_max_send_size; mca_btl_mvapi_component.eager_limit = mca_btl_mvapi_module.super.btl_eager_limit; @@ -551,21 +551,18 @@ int mca_btl_mvapi_component_progress() return OMPI_ERROR; case VAPI_CQE_SQ_SEND_DATA : - frag = (mca_btl_mvapi_frag_t*) comp.id; - frag->endpoint->send_tokens++; - - /* fall through */ case VAPI_CQE_SQ_RDMA_READ: case VAPI_CQE_SQ_RDMA_WRITE: - + frag = (mca_btl_mvapi_frag_t*) comp.id; + OPAL_THREAD_ADD32(&frag->endpoint->wr_sq_tokens_hp, 1); /* Process a completed send or an rdma write */ frag->rc = OMPI_SUCCESS; frag->base.des_cbfunc(&mvapi_btl->super, frag->endpoint, &frag->base, frag->rc); count++; /* check and see if we need to progress pending sends */ - if(frag->endpoint->send_tokens && !opal_list_is_empty(&(frag->endpoint->pending_send_frags))) { + if(frag->endpoint->wr_sq_tokens_hp && !opal_list_is_empty(&(frag->endpoint->pending_frags_hp))) { opal_list_item_t *frag_item; - frag_item = opal_list_remove_first(&(frag->endpoint->pending_send_frags)); + frag_item = opal_list_remove_first(&(frag->endpoint->pending_frags_hp)); frag = (mca_btl_mvapi_frag_t *) frag_item; if(OMPI_SUCCESS != mca_btl_mvapi_endpoint_send(frag->endpoint, frag)) { @@ -627,9 +624,21 @@ int mca_btl_mvapi_component_progress() /* Process a completed send */ frag = (mca_btl_mvapi_frag_t*) comp.id; + OPAL_THREAD_ADD32(&frag->endpoint->wr_sq_tokens_lp, 1); frag->rc = OMPI_SUCCESS; frag->base.des_cbfunc(&mvapi_btl->super, frag->endpoint, &frag->base, frag->rc); count++; + /* check and see if we need to progress pending sends */ + if(frag->endpoint->wr_sq_tokens_lp && !opal_list_is_empty(&(frag->endpoint->pending_frags_lp))) { + opal_list_item_t *frag_item; + frag_item = opal_list_remove_first(&(frag->endpoint->pending_frags_lp)); + frag = (mca_btl_mvapi_frag_t *) frag_item; + + if(OMPI_SUCCESS != mca_btl_mvapi_endpoint_send(frag->endpoint, frag)) { + BTL_ERROR(("error in posting pending send\n")); + } + } + break; case VAPI_CQE_RQ_SEND_DATA: diff --git a/ompi/mca/btl/mvapi/btl_mvapi_endpoint.c b/ompi/mca/btl/mvapi/btl_mvapi_endpoint.c index 1996f64efa..6e67ead88f 100644 --- a/ompi/mca/btl/mvapi/btl_mvapi_endpoint.c +++ b/ompi/mca/btl/mvapi/btl_mvapi_endpoint.c @@ -64,11 +64,35 @@ static inline int mca_btl_mvapi_endpoint_post_send(mca_btl_mvapi_module_t* mvapi VAPI_qp_hndl_t qp_hndl; if(frag->base.des_flags & MCA_BTL_DES_FLAGS_PRIORITY && frag->size <= mvapi_btl->super.btl_eager_limit){ - frag->sr_desc.remote_qp = endpoint->rem_qp_num_high; - qp_hndl = endpoint->lcl_qp_hndl_high; + + if(0 == endpoint->wr_sq_tokens_hp) { + BTL_VERBOSE(("Queing because no send tokens \n")); + + opal_list_append(&endpoint->pending_frags_hp, + (opal_list_item_t *)frag); + + return OMPI_SUCCESS; + + } else { + + frag->sr_desc.remote_qp = endpoint->rem_qp_num_high; + qp_hndl = endpoint->lcl_qp_hndl_high; + OPAL_THREAD_ADD32(&endpoint->wr_sq_tokens_hp, -1); + } } else { - frag->sr_desc.remote_qp = endpoint->rem_qp_num_low; - qp_hndl = endpoint->lcl_qp_hndl_low; + if(0 == endpoint->wr_sq_tokens_lp) { + BTL_VERBOSE(("Queing because no send tokens \n")); + + opal_list_append(&endpoint->pending_frags_lp, + (opal_list_item_t *)frag); + + return OMPI_SUCCESS; + + } else { + frag->sr_desc.remote_qp = endpoint->rem_qp_num_low; + qp_hndl = endpoint->lcl_qp_hndl_low; + OPAL_THREAD_ADD32(&endpoint->wr_sq_tokens_lp, -1); + } } frag->sr_desc.opcode = VAPI_SEND; frag->sg_entry.len = frag->segment.seg_len + ((unsigned char*) frag->segment.seg_addr.pval - (unsigned char*) frag->hdr); /* sizeof(mca_btl_mvapi_header_t); */ @@ -120,10 +144,15 @@ static void mca_btl_mvapi_endpoint_construct(mca_btl_base_endpoint_t* endpoint) OBJ_CONSTRUCT(&endpoint->endpoint_send_lock, opal_mutex_t); OBJ_CONSTRUCT(&endpoint->endpoint_recv_lock, opal_mutex_t); OBJ_CONSTRUCT(&endpoint->pending_send_frags, opal_list_t); + OBJ_CONSTRUCT(&endpoint->pending_frags_hp, opal_list_t); + OBJ_CONSTRUCT(&endpoint->pending_frags_lp, opal_list_t); + endpoint->rr_posted_high = 0; - endpoint->rr_posted_low = 0; - endpoint->send_tokens = mca_btl_mvapi_component.max_send_tokens; + endpoint->rr_posted_low = 0; + /* initialize the high and low priority tokens */ + endpoint->wr_sq_tokens_hp = mca_btl_mvapi_component.max_wr_sq_tokens; + endpoint->wr_sq_tokens_lp = mca_btl_mvapi_component.max_wr_sq_tokens; } @@ -569,26 +598,14 @@ int mca_btl_mvapi_endpoint_send( mvapi_btl = endpoint->endpoint_btl; - if(0 == endpoint->send_tokens) { - BTL_VERBOSE(("Queing because no send tokens \n")); - - opal_list_append(&endpoint->pending_send_frags, - (opal_list_item_t *)frag); - - rc = OMPI_SUCCESS; - } else { + BTL_VERBOSE(("Send to : %d, len : %d, frag : %p", + endpoint->endpoint_proc->proc_guid.vpid, + frag->sg_entry.len, + frag)); - endpoint->send_tokens--; - - BTL_VERBOSE(("Send to : %d, len : %d, frag : %p", - endpoint->endpoint_proc->proc_guid.vpid, - frag->sg_entry.len, - frag)); - - rc = mca_btl_mvapi_endpoint_post_send(mvapi_btl, endpoint, frag); - } - break; - } + rc = mca_btl_mvapi_endpoint_post_send(mvapi_btl, endpoint, frag); + } + break; default: rc = OMPI_ERR_UNREACH; @@ -690,7 +707,7 @@ int mca_btl_mvapi_endpoint_create_qp( case VAPI_TS_RC: /* Set up RC qp parameters */ qp_init_attr.cap.max_oust_wr_rq = mca_btl_mvapi_component.ib_rr_buf_max; - qp_init_attr.cap.max_oust_wr_sq = mca_btl_mvapi_component.max_send_tokens; + qp_init_attr.cap.max_oust_wr_sq = mca_btl_mvapi_component.max_wr_sq_tokens; qp_init_attr.cap.max_sg_size_rq = mca_btl_mvapi_component.ib_sg_list_size; qp_init_attr.cap.max_sg_size_sq = mca_btl_mvapi_component.ib_sg_list_size; qp_init_attr.pd_hndl = ptag; diff --git a/ompi/mca/btl/mvapi/btl_mvapi_endpoint.h b/ompi/mca/btl/mvapi/btl_mvapi_endpoint.h index 350439b3f5..392438de08 100644 --- a/ompi/mca/btl/mvapi/btl_mvapi_endpoint.h +++ b/ompi/mca/btl/mvapi/btl_mvapi_endpoint.h @@ -105,9 +105,19 @@ struct mca_btl_base_endpoint_t { opal_list_t pending_send_frags; /**< list of pending send frags for this endpoint */ - - uint32_t send_tokens; - /**< number of sends that can be outstanding (down counter) */ + + opal_list_t pending_frags_hp; + /**< list of pending high priority frags */ + + opal_list_t pending_frags_lp; + /**< list of pending low prioirty frags */ + + uint32_t wr_sq_tokens_hp; + /**< number of high priority frags that can be outstanding (down counter) */ + + + uint32_t wr_sq_tokens_lp; + /**< number of low priority frags that can be outstanding (down counter) */ VAPI_qp_num_t rem_qp_num_high; /* High priority remote side QP number */