1
1

Try to failover when we get an async error from the lower layer (BTL)..

This commit was SVN r12420.
Этот коммит содержится в:
Galen Shipman 2006-11-03 15:40:26 +00:00
родитель 27420fbda3
Коммит f7c554df65
4 изменённых файлов: 25 добавлений и 19 удалений

Просмотреть файл

@ -233,5 +233,10 @@ int mca_pml_dr_dump(
void mca_pml_dr_error_handler(
struct mca_btl_base_module_t* btl,
int32_t flags) {
orte_errmgr.abort();
/* try failover ! */
mca_pml_dr_sendreq_cleanup_active(btl);
mca_bml.bml_del_btl(btl);
/* orte_errmgr.abort(); */
}

Просмотреть файл

@ -1145,3 +1145,18 @@ void mca_pml_dr_send_request_frag_ack(
}
void mca_pml_dr_sendreq_cleanup_active(mca_btl_base_module_t* btl) {
opal_list_item_t* item;
for (item = opal_list_get_first(&mca_pml_dr.send_active) ;
item != opal_list_get_end(&mca_pml_dr.send_active) ;
item = opal_list_get_next(item)) {
mca_pml_dr_send_request_t* sendreq = (mca_pml_dr_send_request_t*) item;
mca_btl_base_descriptor_t* des = sendreq->req_descriptor;
mca_bml_base_btl_t* bml_btl = des->des_context;
if( bml_btl && bml_btl->btl == btl) {
des->des_context = NULL;
}
}
}

Просмотреть файл

@ -484,6 +484,8 @@ void mca_pml_dr_send_request_frag_ack(
mca_btl_base_module_t* btl,
mca_pml_dr_ack_hdr_t*);
void mca_pml_dr_sendreq_cleanup_active(mca_btl_base_module_t* btl);
#if defined(c_plusplus) || defined(__cplusplus)
}

Просмотреть файл

@ -24,7 +24,6 @@
static void mca_pml_dr_vfrag_wdog_timeout(int fd, short event, void* vfrag);
static void mca_pml_dr_vfrag_ack_timeout(int fd, short event, void* vfrag);
static void mca_pml_dr_vfrag_cleanup_active_desc(mca_bml_base_btl_t* bml_btl);
static void mca_pml_dr_vfrag_construct(mca_pml_dr_vfrag_t* vfrag)
{
@ -82,7 +81,7 @@ static void mca_pml_dr_vfrag_wdog_timeout(int fd, short event, void* data)
/* declare btl dead */
opal_output(0, "%s:%d:%s: failing BTL: %s", __FILE__, __LINE__, __func__,
vfrag->bml_btl->btl->btl_component->btl_version.mca_component_name);
mca_pml_dr_vfrag_cleanup_active_desc(vfrag->bml_btl);
mca_pml_dr_sendreq_cleanup_active(vfrag->bml_btl->btl);
mca_bml.bml_del_btl(vfrag->bml_btl->btl);
mca_pml_dr_vfrag_reset(vfrag);
}
@ -119,7 +118,7 @@ static void mca_pml_dr_vfrag_ack_timeout(int fd, short event, void* data)
/* declare btl dead */
opal_output(0, "%s:%d:%s: failing BTL: %s", __FILE__, __LINE__, __func__,
vfrag->bml_btl->btl->btl_component->btl_version.mca_component_name);
mca_pml_dr_vfrag_cleanup_active_desc(vfrag->bml_btl);
mca_pml_dr_sendreq_cleanup_active(vfrag->bml_btl->btl);
mca_bml.bml_del_btl(vfrag->bml_btl->btl);
mca_pml_dr_vfrag_reset(vfrag);
}
@ -198,18 +197,3 @@ void mca_pml_dr_vfrag_reschedule(mca_pml_dr_vfrag_t* vfrag)
}
}
static void mca_pml_dr_vfrag_cleanup_active_desc(mca_bml_base_btl_t* bml_btl) {
opal_list_item_t* item;
for (item = opal_list_get_first(&mca_pml_dr.send_active) ;
item != opal_list_get_end(&mca_pml_dr.send_active) ;
item = opal_list_get_next(item)) {
mca_pml_dr_send_request_t* sendreq = (mca_pml_dr_send_request_t*) item;
mca_btl_base_descriptor_t* des = sendreq->req_descriptor;
if( des->des_context == bml_btl) {
des->des_context = NULL;
}
}
}