diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.c b/ompi/mca/pml/ob1/pml_ob1_sendreq.c index 0969f55312..7415f8df93 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.c @@ -1036,6 +1036,15 @@ static void mca_pml_ob1_put_completion( /* check for request completion */ if( OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length) >= sendreq->req_send.req_bytes_packed) { + /* bump up the req_state after the last fin was sent.. + if rndv completion occurs after this (can happen!) then + the rndv completion will properly clean up after the request + we can't just do this on the first RDMA PUT + ACK ctl message in + mca_pml_ob1_send_request_put because then we might fall into sender + side scheduleing (pml pipeline protocol) */ + if(true == sendreq->req_got_put_ack) { + MCA_PML_OB1_SEND_REQUEST_ADVANCE_NO_SCHEDULE(sendreq); + } /* if we've got completion on rndv packet */ if (sendreq->req_state == 2) { MCA_PML_OB1_SEND_REQUEST_PML_COMPLETE(sendreq); @@ -1078,7 +1087,7 @@ void mca_pml_ob1_send_request_put( bml_btl = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl); MCA_PML_OB1_RDMA_FRAG_ALLOC(frag, rc); if(hdr->hdr_common.hdr_flags & MCA_PML_OB1_HDR_TYPE_ACK) { - MCA_PML_OB1_SEND_REQUEST_ADVANCE_NO_SCHEDULE(sendreq); + sendreq->req_got_put_ack = true; } if(NULL == frag) { diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.h b/ompi/mca/pml/ob1/pml_ob1_sendreq.h index af3475376b..6ad7306aae 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.h +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.h @@ -52,6 +52,7 @@ struct mca_pml_ob1_send_request_t { size_t req_bytes_delivered; size_t req_send_offset; size_t req_rdma_offset; + bool req_got_put_ack; mca_pml_ob1_rdma_btl_t req_rdma[MCA_PML_OB1_MAX_RDMA_PER_REQUEST]; uint32_t req_rdma_cnt; }; @@ -144,6 +145,7 @@ do { sendreq->req_bytes_delivered = 0; \ sendreq->req_state = 0; \ sendreq->req_send_offset = 0; \ + sendreq->req_got_put_ack = false; \ sendreq->req_send.req_base.req_sequence = OPAL_THREAD_ADD32( \ &comm->procs[sendreq->req_send.req_base.req_peer].send_sequence,1); \ sendreq->req_endpoint = endpoint; \