diff --git a/ompi/mca/pml/csum/pml_csum.c b/ompi/mca/pml/csum/pml_csum.c index d67848de87..308fcc49c7 100644 --- a/ompi/mca/pml/csum/pml_csum.c +++ b/ompi/mca/pml/csum/pml_csum.c @@ -428,6 +428,12 @@ static void mca_pml_csum_fin_completion( mca_btl_base_module_t* btl, MCA_PML_CSUM_PROGRESS_PENDING(bml_btl); } +/** + * Send an FIN to the peer. If we fail to send this ack (no more available + * fragments or the send failed) this function automatically add the FIN + * to the list of pending FIN, Which guarantee that the FIN will be sent + * later. + */ int mca_pml_csum_send_fin( ompi_proc_t* proc, mca_bml_base_btl_t* bml_btl, void *hdr_des, @@ -500,7 +506,7 @@ void mca_pml_csum_process_pending_packets(mca_bml_base_btl_t* bml_btl) if(NULL == send_dst) { OPAL_THREAD_LOCK(&mca_pml_csum.lock); opal_list_append(&mca_pml_csum.pckt_pending, - (opal_list_item_t*)pckt); + (opal_list_item_t*)pckt); OPAL_THREAD_UNLOCK(&mca_pml_csum.lock); continue; } @@ -513,12 +519,11 @@ void mca_pml_csum_process_pending_packets(mca_bml_base_btl_t* bml_btl) pckt->hdr.hdr_ack.hdr_dst_req.pval, pckt->hdr.hdr_ack.hdr_send_offset, pckt->hdr.hdr_common.hdr_flags & MCA_PML_CSUM_HDR_FLAGS_NORDMA); - MCA_PML_CSUM_PCKT_PENDING_RETURN(pckt); - if(OMPI_ERR_OUT_OF_RESOURCE == rc) { - MCA_PML_CSUM_ADD_ACK_TO_PENDING(pckt->proc, - pckt->hdr.hdr_ack.hdr_src_req.lval, - pckt->hdr.hdr_ack.hdr_dst_req.pval, - pckt->hdr.hdr_ack.hdr_send_offset); + if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == rc) ) { + OPAL_THREAD_LOCK(&mca_pml_csum.lock); + opal_list_append(&mca_pml_csum.pckt_pending, + (opal_list_item_t*)pckt); + OPAL_THREAD_UNLOCK(&mca_pml_csum.lock); return; } break; @@ -527,15 +532,17 @@ void mca_pml_csum_process_pending_packets(mca_bml_base_btl_t* bml_btl) pckt->hdr.hdr_fin.hdr_des.pval, pckt->order, pckt->hdr.hdr_fin.hdr_fail); - MCA_PML_CSUM_PCKT_PENDING_RETURN(pckt); - if(OMPI_ERR_OUT_OF_RESOURCE == rc) - return; + if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == rc) ) { + return; + } break; default: opal_output(0, "[%s:%d] wrong header type\n", - __FILE__, __LINE__); + __FILE__, __LINE__); break; } + /* We're done with this packet, return it back to the free list */ + MCA_PML_CSUM_PCKT_PENDING_RETURN(pckt); } } diff --git a/ompi/mca/pml/csum/pml_csum_recvreq.c b/ompi/mca/pml/csum/pml_csum_recvreq.c index 6aed902251..7e81b71129 100644 --- a/ompi/mca/pml/csum/pml_csum_recvreq.c +++ b/ompi/mca/pml/csum/pml_csum_recvreq.c @@ -227,7 +227,7 @@ int mca_pml_csum_recv_request_ack_send_btl( /* allocate descriptor */ mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER, sizeof(mca_pml_csum_ack_hdr_t), - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); + MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK); if( OPAL_UNLIKELY(NULL == des) ) { return OMPI_ERR_OUT_OF_RESOURCE; } @@ -252,9 +252,6 @@ int mca_pml_csum_recv_request_ack_send_btl( rc = mca_bml_base_send(bml_btl, des, MCA_PML_CSUM_HDR_TYPE_ACK); if( OPAL_LIKELY( rc >= 0 ) ) { - if( OPAL_LIKELY( 1 == rc ) ) { - MCA_PML_CSUM_PROGRESS_PENDING(bml_btl); - } return OMPI_SUCCESS; } mca_bml_base_free(bml_btl, des); @@ -381,7 +378,7 @@ int mca_pml_csum_recv_request_get_frag( mca_pml_csum_rdma_frag_t* frag ) MCA_BTL_NO_ORDER, 0, &frag->rdma_length, - MCA_BTL_DES_FLAGS_BTL_OWNERSHIP, + 0, /* always call the callback, PML ownership */ &descriptor ); if( OPAL_UNLIKELY(NULL == descriptor) ) { frag->rdma_length = save_size; @@ -831,7 +828,7 @@ int mca_pml_csum_recv_request_schedule_once( mca_pml_csum_recv_request_t* recvre } mca_bml_base_alloc(bml_btl, &ctl, MCA_BTL_NO_ORDER, hdr_size, - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); + MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK); if( OPAL_UNLIKELY(NULL == ctl) ) { mca_bml_base_free(bml_btl,dst); @@ -879,10 +876,6 @@ int mca_pml_csum_recv_request_schedule_once( mca_pml_csum_recv_request_t* recvre OPAL_THREAD_ADD_SIZE_T(&recvreq->req_pipeline_depth, 1); recvreq->req_rdma[rdma_idx].length -= size; bytes_remaining -= size; - if( OPAL_LIKELY( 1 == rc ) ) { - /* The send is completed, trigger the callback */ - MCA_PML_CSUM_PROGRESS_PENDING(bml_btl); - } } else { mca_bml_base_free(bml_btl,ctl); mca_bml_base_free(bml_btl,dst);