More miscellaneous cleanup of bfo.
This commit was SVN r23986.
Этот коммит содержится в:
родитель
505efbaa27
Коммит
1b231f7e73
@ -1779,7 +1779,6 @@ void mca_pml_bfo_check_recv_ctl_completion_status(mca_btl_base_module_t* btl,
|
|||||||
int status)
|
int status)
|
||||||
{
|
{
|
||||||
mca_pml_bfo_common_hdr_t * common = des->des_src->seg_addr.pval;
|
mca_pml_bfo_common_hdr_t * common = des->des_src->seg_addr.pval;
|
||||||
mca_pml_bfo_ack_hdr_t* ack; /* ACK header */
|
|
||||||
mca_pml_bfo_rdma_hdr_t* hdr; /* PUT header */
|
mca_pml_bfo_rdma_hdr_t* hdr; /* PUT header */
|
||||||
struct mca_btl_base_descriptor_t* rdma_des;
|
struct mca_btl_base_descriptor_t* rdma_des;
|
||||||
mca_pml_bfo_recv_request_t* recvreq;
|
mca_pml_bfo_recv_request_t* recvreq;
|
||||||
@ -1787,8 +1786,7 @@ void mca_pml_bfo_check_recv_ctl_completion_status(mca_btl_base_module_t* btl,
|
|||||||
if(OPAL_UNLIKELY(OMPI_SUCCESS != status)) {
|
if(OPAL_UNLIKELY(OMPI_SUCCESS != status)) {
|
||||||
switch (common->hdr_type) {
|
switch (common->hdr_type) {
|
||||||
case MCA_PML_BFO_HDR_TYPE_ACK:
|
case MCA_PML_BFO_HDR_TYPE_ACK:
|
||||||
ack = (mca_pml_bfo_ack_hdr_t*)des->des_src->seg_addr.pval;
|
recvreq = des->des_cbdata;
|
||||||
recvreq = (mca_pml_bfo_recv_request_t*) ack->hdr_dst_req.pval;
|
|
||||||
|
|
||||||
/* Record the error. Send RECVERRNOTIFY if necessary. */
|
/* Record the error. Send RECVERRNOTIFY if necessary. */
|
||||||
if (recvreq->req_errstate) {
|
if (recvreq->req_errstate) {
|
||||||
@ -1860,8 +1858,7 @@ void mca_pml_bfo_check_recv_ctl_completion_status(mca_btl_base_module_t* btl,
|
|||||||
|
|
||||||
switch (common->hdr_type) {
|
switch (common->hdr_type) {
|
||||||
case MCA_PML_BFO_HDR_TYPE_ACK:
|
case MCA_PML_BFO_HDR_TYPE_ACK:
|
||||||
ack = (mca_pml_bfo_ack_hdr_t*)des->des_src->seg_addr.pval;
|
recvreq = des->des_cbdata;
|
||||||
recvreq = (mca_pml_bfo_recv_request_t*) ack->hdr_dst_req.pval;
|
|
||||||
recvreq->req_events--;
|
recvreq->req_events--;
|
||||||
assert(recvreq->req_events >= 0);
|
assert(recvreq->req_events >= 0);
|
||||||
if(OPAL_UNLIKELY (recvreq->req_errstate & RECVREQ_RNDVRESTART_RECVED)) {
|
if(OPAL_UNLIKELY (recvreq->req_errstate & RECVREQ_RNDVRESTART_RECVED)) {
|
||||||
|
@ -253,6 +253,18 @@ extern void mca_pml_bfo_recv_frag_callback_recverrnotify( mca_btl_base_module_t
|
|||||||
* Macros for pml_bfo_sendreq.c file.
|
* Macros for pml_bfo_sendreq.c file.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/* This macro is called on the sending side after receiving
|
||||||
|
* a PUT message. There is a chance that this PUT message
|
||||||
|
* has shown up and is attempting to modify the state of
|
||||||
|
* the req_state, but the req_state is no longer being tracked
|
||||||
|
* because the RNDV message has turned into a RGET message
|
||||||
|
* because it got an error on the RNDV completion.
|
||||||
|
*/
|
||||||
|
#define MCA_PML_BFO_VERIFY_SENDREQ_REQ_STATE_VALUE(sendreq) \
|
||||||
|
if (sendreq->req_state == -1) { \
|
||||||
|
OPAL_THREAD_ADD32(&sendreq->req_state, 1); \
|
||||||
|
}
|
||||||
|
|
||||||
/* Now check the error state. This request can be in error if the
|
/* Now check the error state. This request can be in error if the
|
||||||
* RNDV message made it over, but the receiver got an error trying to
|
* RNDV message made it over, but the receiver got an error trying to
|
||||||
* send the ACK back and therefore sent a RECVERRNOTIFY message. In
|
* send the ACK back and therefore sent a RECVERRNOTIFY message. In
|
||||||
@ -326,7 +338,6 @@ extern void mca_pml_bfo_recv_frag_callback_recverrnotify( mca_btl_base_module_t
|
|||||||
mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_bml; \
|
mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_bml; \
|
||||||
bml_btl = mca_bml_base_btl_array_find(&bml_endpoint->btl_eager, btl); \
|
bml_btl = mca_bml_base_btl_array_find(&bml_endpoint->btl_eager, btl); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define MCA_PML_BFO_CHECK_SENDREQ_EAGER_BML_BTL(bml_btl, btl, sendreq, type) \
|
#define MCA_PML_BFO_CHECK_SENDREQ_EAGER_BML_BTL(bml_btl, btl, sendreq, type) \
|
||||||
if (bml_btl->btl != btl) { \
|
if (bml_btl->btl != btl) { \
|
||||||
mca_pml_bfo_find_sendreq_eager_bml_btl(&bml_btl, btl, sendreq, type); \
|
mca_pml_bfo_find_sendreq_eager_bml_btl(&bml_btl, btl, sendreq, type); \
|
||||||
|
@ -243,8 +243,6 @@ void mca_pml_bfo_recv_frag_callback_match(mca_btl_base_module_t* btl,
|
|||||||
slow_path:
|
slow_path:
|
||||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||||
#ifdef PML_BFO
|
#ifdef PML_BFO
|
||||||
/* Check for duplicate messages. If message is duplicate, then just
|
|
||||||
* return as that essentially drops the message. */
|
|
||||||
if (true == mca_pml_bfo_is_duplicate_msg(proc, hdr)) {
|
if (true == mca_pml_bfo_is_duplicate_msg(proc, hdr)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -626,6 +624,7 @@ static int mca_pml_bfo_recv_frag_match( mca_btl_base_module_t *btl,
|
|||||||
* the fragment.
|
* the fragment.
|
||||||
*/
|
*/
|
||||||
OPAL_THREAD_LOCK(&comm->matching_lock);
|
OPAL_THREAD_LOCK(&comm->matching_lock);
|
||||||
|
|
||||||
#ifdef PML_BFO
|
#ifdef PML_BFO
|
||||||
/* In case of network failover, we may get a message telling us to
|
/* In case of network failover, we may get a message telling us to
|
||||||
* restart. In that case, we already have a pointer to the receive
|
* restart. In that case, we already have a pointer to the receive
|
||||||
@ -718,8 +717,6 @@ wrong_seq:
|
|||||||
* is ahead of sequence. Save it for later.
|
* is ahead of sequence. Save it for later.
|
||||||
*/
|
*/
|
||||||
#ifdef PML_BFO
|
#ifdef PML_BFO
|
||||||
/* Check for duplicate messages. If message is duplicate, then just
|
|
||||||
* return as that essentially drops the message. */
|
|
||||||
if (true == mca_pml_bfo_is_duplicate_msg(proc, hdr)) {
|
if (true == mca_pml_bfo_is_duplicate_msg(proc, hdr)) {
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
@ -251,7 +251,7 @@ int mca_pml_bfo_recv_request_ack_send_btl(
|
|||||||
/* initialize descriptor */
|
/* initialize descriptor */
|
||||||
des->des_cbfunc = mca_pml_bfo_recv_ctl_completion;
|
des->des_cbfunc = mca_pml_bfo_recv_ctl_completion;
|
||||||
#ifdef PML_BFO
|
#ifdef PML_BFO
|
||||||
des->des_cbdata = (void *)proc;
|
des->des_cbdata = hdr_dst_req;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
rc = mca_bml_base_send(bml_btl, des, MCA_PML_BFO_HDR_TYPE_ACK);
|
rc = mca_bml_base_send(bml_btl, des, MCA_PML_BFO_HDR_TYPE_ACK);
|
||||||
|
@ -169,8 +169,7 @@ recv_request_pml_complete(mca_pml_bfo_recv_request_t *recvreq)
|
|||||||
}
|
}
|
||||||
recvreq->req_rdma_cnt = 0;
|
recvreq->req_rdma_cnt = 0;
|
||||||
#ifdef PML_BFO
|
#ifdef PML_BFO
|
||||||
/* Reset to a value that to indicate it is invalid. */
|
recvreq->req_msgseq -= 100;
|
||||||
recvreq->req_msgseq = recvreq->req_msgseq - 100;
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
OPAL_THREAD_LOCK(&ompi_request_lock);
|
OPAL_THREAD_LOCK(&ompi_request_lock);
|
||||||
|
@ -1343,18 +1343,10 @@ void mca_pml_bfo_send_request_put( mca_pml_bfo_send_request_t* sendreq,
|
|||||||
size_t i, size = 0;
|
size_t i, size = 0;
|
||||||
|
|
||||||
if(hdr->hdr_common.hdr_flags & MCA_PML_BFO_HDR_TYPE_ACK) {
|
if(hdr->hdr_common.hdr_flags & MCA_PML_BFO_HDR_TYPE_ACK) {
|
||||||
#ifdef PML_BFO
|
|
||||||
/* Handle the failover case where a RNDV request may
|
|
||||||
* have turned into a RGET and therefore the state
|
|
||||||
* is not being tracked. */
|
|
||||||
if (sendreq->req_state != 0) {
|
|
||||||
OPAL_THREAD_ADD32(&sendreq->req_state, -1);
|
OPAL_THREAD_ADD32(&sendreq->req_state, -1);
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
OPAL_THREAD_ADD32(&sendreq->req_state, -1);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
#ifdef PML_BFO
|
#ifdef PML_BFO
|
||||||
|
MCA_PML_BFO_VERIFY_SENDREQ_REQ_STATE_VALUE(sendreq);
|
||||||
sendreq->req_recv = hdr->hdr_dst_req; /* only needed once, but it is OK */
|
sendreq->req_recv = hdr->hdr_dst_req; /* only needed once, but it is OK */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1385,8 +1377,8 @@ void mca_pml_bfo_send_request_put( mca_pml_bfo_send_request_t* sendreq,
|
|||||||
|
|
||||||
frag->rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl);
|
frag->rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl);
|
||||||
#ifdef PML_BFO
|
#ifdef PML_BFO
|
||||||
frag->rdma_btl = btl;
|
|
||||||
MCA_PML_BFO_CHECK_FOR_REMOVED_BML(sendreq, frag, btl);
|
MCA_PML_BFO_CHECK_FOR_REMOVED_BML(sendreq, frag, btl);
|
||||||
|
frag->rdma_btl = btl; /* in case frag ends up on pending */
|
||||||
#endif
|
#endif
|
||||||
frag->rdma_hdr.hdr_rdma = *hdr;
|
frag->rdma_hdr.hdr_rdma = *hdr;
|
||||||
frag->rdma_req = sendreq;
|
frag->rdma_req = sendreq;
|
||||||
|
@ -250,14 +250,7 @@ send_request_pml_complete(mca_pml_bfo_send_request_t *sendreq)
|
|||||||
}
|
}
|
||||||
sendreq->req_send.req_base.req_pml_complete = true;
|
sendreq->req_send.req_base.req_pml_complete = true;
|
||||||
#ifdef PML_BFO
|
#ifdef PML_BFO
|
||||||
assert(0 == sendreq->req_events);
|
sendreq->req_send.req_base.req_sequence -= 100;
|
||||||
sendreq->req_restartseq = 0;
|
|
||||||
/* Since sequence numbers increase monotonically and
|
|
||||||
* roll over, initialize it to a value far away from
|
|
||||||
* what it was. I cannot set it to something like -1
|
|
||||||
* as that is not within the valid range. */
|
|
||||||
sendreq->req_send.req_base.req_sequence =
|
|
||||||
sendreq->req_send.req_base.req_sequence - 10;
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if(sendreq->req_send.req_base.req_free_called) {
|
if(sendreq->req_send.req_base.req_free_called) {
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user