2005-11-22 20:24:47 +03:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
|
|
* University Research and Technology
|
|
|
|
* Corporation. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
|
|
* of Tennessee Research Foundation. All rights
|
|
|
|
* reserved.
|
2005-12-21 00:42:58 +03:00
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
2005-11-22 20:24:47 +03:00
|
|
|
* University of Stuttgart. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
|
|
|
* $COPYRIGHT$
|
2005-12-21 00:42:58 +03:00
|
|
|
*
|
2005-11-22 20:24:47 +03:00
|
|
|
* Additional copyrights may follow
|
2005-12-21 00:42:58 +03:00
|
|
|
*
|
2005-11-22 20:24:47 +03:00
|
|
|
* $HEADER$
|
|
|
|
*/
|
2005-12-21 00:42:58 +03:00
|
|
|
|
2005-11-22 20:24:47 +03:00
|
|
|
#include "ompi_config.h"
|
2005-12-21 00:42:58 +03:00
|
|
|
#include "pml_dr_vfrag.h"
|
2006-02-24 20:08:14 +03:00
|
|
|
#include "pml_dr_sendreq.h"
|
2006-03-17 01:33:08 +03:00
|
|
|
#include "orte/mca/errmgr/errmgr.h"
|
|
|
|
|
2006-03-04 03:36:16 +03:00
|
|
|
void mca_pml_dr_vfrag_wdog_timeout(int fd, short event, void* vfrag);
|
|
|
|
void mca_pml_dr_vfrag_ack_timeout(int fd, short event, void* vfrag);
|
2005-12-21 00:42:58 +03:00
|
|
|
|
|
|
|
static void mca_pml_dr_vfrag_construct(mca_pml_dr_vfrag_t* vfrag)
|
|
|
|
{
|
2006-02-16 19:15:16 +03:00
|
|
|
vfrag->vf_send.pval = NULL;
|
|
|
|
vfrag->vf_recv.pval = NULL;
|
|
|
|
vfrag->vf_id = 0;
|
|
|
|
vfrag->vf_idx = 0;
|
|
|
|
vfrag->vf_len = 0;
|
|
|
|
vfrag->vf_offset = 0;
|
|
|
|
vfrag->vf_size = 0;
|
|
|
|
vfrag->vf_max_send_size = 0;
|
|
|
|
vfrag->vf_ack = 0;
|
|
|
|
vfrag->vf_mask = 0;
|
2006-03-17 01:33:08 +03:00
|
|
|
vfrag->vf_send_cnt = 1;
|
2006-02-24 20:08:14 +03:00
|
|
|
vfrag->tv_wdog.tv_sec = mca_pml_dr.timer_wdog_sec;
|
|
|
|
vfrag->tv_wdog.tv_usec = mca_pml_dr.timer_wdog_usec;
|
|
|
|
vfrag->tv_ack.tv_sec = mca_pml_dr.timer_ack_usec;
|
|
|
|
vfrag->tv_ack.tv_usec = mca_pml_dr.timer_ack_usec;
|
2006-03-04 03:36:16 +03:00
|
|
|
opal_evtimer_set(&vfrag->ev_wdog, mca_pml_dr_vfrag_wdog_timeout, (void*) vfrag);
|
|
|
|
opal_evtimer_set(&vfrag->ev_ack, mca_pml_dr_vfrag_ack_timeout, (void*) vfrag);
|
2005-12-21 00:42:58 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void mca_pml_dr_vfrag_destruct(mca_pml_dr_vfrag_t* vfrag)
|
|
|
|
{
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2005-11-22 20:24:47 +03:00
|
|
|
|
2005-12-21 00:42:58 +03:00
|
|
|
OBJ_CLASS_INSTANCE(
|
|
|
|
mca_pml_dr_vfrag_t,
|
|
|
|
opal_list_item_t,
|
|
|
|
mca_pml_dr_vfrag_construct,
|
|
|
|
mca_pml_dr_vfrag_destruct
|
|
|
|
);
|
2005-11-22 20:24:47 +03:00
|
|
|
|
|
|
|
|
2006-02-24 20:08:14 +03:00
|
|
|
/**
|
|
|
|
* The wdog timer expired, better do something about it, like resend the current part of the vfrag
|
|
|
|
*/
|
2006-03-04 03:36:16 +03:00
|
|
|
void mca_pml_dr_vfrag_wdog_timeout(int fd, short event, void* data)
|
|
|
|
{
|
2006-02-24 20:08:14 +03:00
|
|
|
mca_pml_dr_vfrag_t* vfrag = (mca_pml_dr_vfrag_t*) data;
|
2006-03-04 03:36:16 +03:00
|
|
|
mca_pml_dr_send_request_t* sendreq = vfrag->vf_send.pval;
|
|
|
|
OPAL_THREAD_LOCK(&ompi_request_lock);
|
2006-03-17 01:33:08 +03:00
|
|
|
vfrag->vf_send_cnt++;
|
|
|
|
if(vfrag->vf_send_cnt > mca_pml_dr.timer_wdog_max_count) {
|
|
|
|
opal_output(0, "wdog retry count exceeded! %s:%d FATAL", __FILE__, __LINE__);
|
|
|
|
orte_errmgr.abort();
|
|
|
|
}
|
|
|
|
vfrag->vf_idx = 1;
|
|
|
|
vfrag->vf_mask_processed = 0;
|
|
|
|
vfrag->vf_ack = 0;
|
2006-02-24 20:08:14 +03:00
|
|
|
opal_list_append(&sendreq->req_retrans, (opal_list_item_t*)vfrag);
|
2006-03-04 03:36:16 +03:00
|
|
|
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
2006-02-24 20:08:14 +03:00
|
|
|
mca_pml_dr_send_request_schedule(sendreq);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* The ack timer expired, better do something about it, like resend the entire vfrag?
|
|
|
|
*/
|
2006-03-04 03:36:16 +03:00
|
|
|
void mca_pml_dr_vfrag_ack_timeout(int fd, short event, void* data) {
|
2006-03-17 01:33:08 +03:00
|
|
|
mca_pml_dr_vfrag_t* vfrag = (mca_pml_dr_vfrag_t*) data;
|
2006-03-04 03:36:16 +03:00
|
|
|
mca_pml_dr_send_request_t* sendreq = vfrag->vf_send.pval;
|
|
|
|
OPAL_THREAD_LOCK(&ompi_request_lock);
|
2006-03-17 01:33:08 +03:00
|
|
|
vfrag->vf_send_cnt++;
|
|
|
|
if(vfrag->vf_send_cnt > mca_pml_dr.timer_ack_max_count) {
|
2006-03-17 21:46:48 +03:00
|
|
|
opal_output(0, "%s:%d: maximum ack retry count exceeded: FATAL", __FILE__, __LINE__);
|
2006-03-17 01:33:08 +03:00
|
|
|
orte_errmgr.abort();
|
|
|
|
}
|
|
|
|
vfrag->vf_idx = 1;
|
|
|
|
vfrag->vf_mask_processed = 0;
|
|
|
|
vfrag->vf_ack = 0;
|
|
|
|
if(0 == vfrag->vf_offset) { /* this is the first part of the message
|
|
|
|
that we need to resend */
|
|
|
|
mca_bml_base_btl_t* bml_btl = sendreq->descriptor->des_context;
|
|
|
|
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
|
|
|
mca_bml_base_send(bml_btl, sendreq->descriptor, MCA_BTL_TAG_PML);
|
|
|
|
|
|
|
|
} else {
|
|
|
|
opal_list_append(&sendreq->req_retrans, (opal_list_item_t*)vfrag);
|
|
|
|
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
|
|
|
mca_pml_dr_send_request_schedule(sendreq);
|
|
|
|
}
|
2006-02-24 20:08:14 +03:00
|
|
|
}
|
|
|
|
|
2005-11-22 20:24:47 +03:00
|
|
|
|