1
1
This commit was SVN r23994.
Этот коммит содержится в:
Rolf vandeVaart 2010-11-04 19:35:25 +00:00
родитель e40483465e
Коммит 9ed780b73d
4 изменённых файлов: 14 добавлений и 20 удалений

Просмотреть файл

@ -380,7 +380,17 @@ extern void mca_pml_bfo_recv_frag_callback_recverrnotify( mca_btl_base_module_t
return; \ return; \
} }
/* This macro checks to see if the cached number of BTLs in the
* send request still matches the value from the endpoint.
* If it does not, this means that a BTL was removed from the
* available list. In this case, start the request over.
*/
#define MCA_PML_BFO_CHECK_FOR_REMOVED_BTL(sendreq, range) \
if ((int)mca_bml_base_btl_array_get_size(&sendreq->req_endpoint->btl_send) \
!= range->range_btl_cnt) { \
sendreq->req_error++; \
return OMPI_ERROR; \
}
END_C_DECLS END_C_DECLS

Просмотреть файл

@ -626,17 +626,12 @@ static int mca_pml_bfo_recv_frag_match( mca_btl_base_module_t *btl,
OPAL_THREAD_LOCK(&comm->matching_lock); OPAL_THREAD_LOCK(&comm->matching_lock);
#ifdef PML_BFO #ifdef PML_BFO
/* In case of network failover, we may get a message telling us to
* restart. In that case, we already have a pointer to the receive
* request in the header itself. */
if(OPAL_UNLIKELY(hdr->hdr_common.hdr_flags & MCA_PML_BFO_HDR_FLAGS_RESTART)) { if(OPAL_UNLIKELY(hdr->hdr_common.hdr_flags & MCA_PML_BFO_HDR_FLAGS_RESTART)) {
match = mca_pml_bfo_get_request(hdr); if (NULL == (match = mca_pml_bfo_get_request(hdr))) {
if (NULL == match) {
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
} else { } else {
#endif #endif
/* get sequence number of next message that can be processed */ /* get sequence number of next message that can be processed */
next_msg_seq_expected = (uint16_t)proc->expected_sequence; next_msg_seq_expected = (uint16_t)proc->expected_sequence;
if(OPAL_UNLIKELY(frag_msg_seq != next_msg_seq_expected)) if(OPAL_UNLIKELY(frag_msg_seq != next_msg_seq_expected))
@ -672,6 +667,7 @@ out_of_order_match:
/* release matching lock before processing fragment */ /* release matching lock before processing fragment */
OPAL_THREAD_UNLOCK(&comm->matching_lock); OPAL_THREAD_UNLOCK(&comm->matching_lock);
#ifdef PML_BFO #ifdef PML_BFO
} }
#endif #endif

Просмотреть файл

@ -174,7 +174,6 @@ static void mca_pml_bfo_recv_ctl_completion( mca_btl_base_module_t* btl,
if (btl->btl_flags & MCA_BTL_FLAGS_FAILOVER_SUPPORT) { if (btl->btl_flags & MCA_BTL_FLAGS_FAILOVER_SUPPORT) {
mca_pml_bfo_check_recv_ctl_completion_status(btl, des, status); mca_pml_bfo_check_recv_ctl_completion_status(btl, des, status);
} }
MCA_PML_BFO_CHECK_RECVREQ_EAGER_BML_BTL_RECV_CTL(bml_btl, btl, des); MCA_PML_BFO_CHECK_RECVREQ_EAGER_BML_BTL_RECV_CTL(bml_btl, btl, des);
#endif #endif
MCA_PML_BFO_PROGRESS_PENDING(bml_btl); MCA_PML_BFO_PROGRESS_PENDING(bml_btl);
@ -350,10 +349,8 @@ static void mca_pml_bfo_rget_completion( mca_btl_base_module_t* btl,
#ifdef PML_BFO #ifdef PML_BFO
if (btl->btl_flags & MCA_BTL_FLAGS_FAILOVER_SUPPORT) { if (btl->btl_flags & MCA_BTL_FLAGS_FAILOVER_SUPPORT) {
recvreq->req_events--; recvreq->req_events--;
assert(recvreq->req_events >= 0);
} }
#endif #endif
/* check completion status */ /* check completion status */
if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) { if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) {
#ifdef PML_BFO #ifdef PML_BFO
@ -364,7 +361,6 @@ static void mca_pml_bfo_rget_completion( mca_btl_base_module_t* btl,
orte_errmgr.abort(-1, NULL); orte_errmgr.abort(-1, NULL);
#endif #endif
} }
#ifdef PML_BFO #ifdef PML_BFO
MCA_PML_BFO_SECOND_ERROR_CHECK_ON_RDMA_READ_COMPLETION(recvreq, status, btl); MCA_PML_BFO_SECOND_ERROR_CHECK_ON_RDMA_READ_COMPLETION(recvreq, status, btl);
MCA_PML_BFO_CHECK_RECVREQ_RDMA_BML_BTL(bml_btl, btl, recvreq, "RDMA write"); MCA_PML_BFO_CHECK_RECVREQ_RDMA_BML_BTL(bml_btl, btl, recvreq, "RDMA write");

Просмотреть файл

@ -1060,15 +1060,7 @@ mca_pml_bfo_send_request_schedule_once(mca_pml_bfo_send_request_t* sendreq)
assert(range->range_send_length != 0); assert(range->range_send_length != 0);
#ifdef PML_BFO #ifdef PML_BFO
/* Failover code. If this is true, this means the request thinks we MCA_PML_BFO_CHECK_FOR_REMOVED_BTL(sendreq, range);
* have more BTLs than there really are. This can happen because
* a BTL was removed from the available list. In this case, we
* want to start over. */
if ((int)mca_bml_base_btl_array_get_size(&sendreq->req_endpoint->btl_send)
!= range->range_btl_cnt) {
sendreq->req_error++;
return OMPI_ERROR;
}
#endif #endif
if(prev_bytes_remaining == range->range_send_length) if(prev_bytes_remaining == range->range_send_length)