From fc42320ea6380b9ea89eb390987d5ccc24c3f084 Mon Sep 17 00:00:00 2001 From: Galen Shipman Date: Mon, 20 Mar 2006 22:11:23 +0000 Subject: [PATCH] check retry counts on NAK retrans as well as timeouts This commit was SVN r9342. --- ompi/mca/pml/dr/pml_dr_sendreq.h | 6 ++++++ ompi/mca/pml/dr/pml_dr_vfrag.c | 4 +--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/ompi/mca/pml/dr/pml_dr_sendreq.h b/ompi/mca/pml/dr/pml_dr_sendreq.h index d99cd2a288..3e742ba7b9 100644 --- a/ompi/mca/pml/dr/pml_dr_sendreq.h +++ b/ompi/mca/pml/dr/pml_dr_sendreq.h @@ -351,6 +351,12 @@ do { \ #define MCA_PML_DR_SEND_REQUEST_RETRY(sendreq, vfrag) \ do { \ mca_bml_base_btl_t* bml_btl = sendreq->descriptor->des_context; \ + vfrag->vf_retry_cnt ++; \ + if(vfrag->vf_retry_cnt > mca_pml_dr.timer_wdog_max_count) { \ + opal_output(0, "%s:%d,%s retry count exceeded! FATAL", __FILE__, __LINE__, __func__); \ + orte_errmgr.abort(); \ + } \ + \ opal_output(0, "%s:%d:%s, retransmitting\n", __FILE__, __LINE__, __func__); \ assert(sendreq->descriptor->des_src != NULL); \ vfrag->vf_idx = 1; \ diff --git a/ompi/mca/pml/dr/pml_dr_vfrag.c b/ompi/mca/pml/dr/pml_dr_vfrag.c index 1404759677..9186b6011f 100644 --- a/ompi/mca/pml/dr/pml_dr_vfrag.c +++ b/ompi/mca/pml/dr/pml_dr_vfrag.c @@ -69,9 +69,8 @@ void mca_pml_dr_vfrag_wdog_timeout(int fd, short event, void* data) mca_pml_dr_vfrag_t* vfrag = (mca_pml_dr_vfrag_t*) data; mca_pml_dr_send_request_t* sendreq = vfrag->vf_send.pval; OPAL_THREAD_LOCK(&ompi_request_lock); - vfrag->vf_retry_cnt++; if(vfrag->vf_retry_cnt > mca_pml_dr.timer_wdog_max_count) { - opal_output(0, "wdog retry count exceeded! %s:%d FATAL", __FILE__, __LINE__); + opal_output(0, "%s:%d:%s, wdog retry count exceeded! FATAL", __FILE__, __LINE__, __func__); orte_errmgr.abort(); } vfrag->vf_idx = 1; @@ -90,7 +89,6 @@ void mca_pml_dr_vfrag_ack_timeout(int fd, short event, void* data) { mca_pml_dr_vfrag_t* vfrag = (mca_pml_dr_vfrag_t*) data; mca_pml_dr_send_request_t* sendreq = vfrag->vf_send.pval; OPAL_THREAD_LOCK(&ompi_request_lock); - vfrag->vf_retry_cnt++; if(vfrag->vf_retry_cnt > mca_pml_dr.timer_ack_max_count) { opal_output(0, "%s:%d: maximum ack retry count exceeded: FATAL", __FILE__, __LINE__); orte_errmgr.abort();