more fixes for failover.. and yet still more to come..
This commit was SVN r12450.
Этот коммит содержится в:
родитель
ea77beca29
Коммит
813e7faea8
@ -561,12 +561,17 @@ int mca_bml_r2_del_btl(mca_btl_base_module_t* btl)
|
||||
opal_list_item_t* item;
|
||||
mca_btl_base_module_t** modules;
|
||||
mca_btl_base_component_progress_fn_t * btl_progress_new;
|
||||
|
||||
bool found = false;
|
||||
|
||||
procs = ompi_proc_all(&num_procs);
|
||||
if(NULL == procs)
|
||||
return OMPI_SUCCESS;
|
||||
|
||||
|
||||
if(opal_list_get_size(&mca_btl_base_modules_initialized) == 2){
|
||||
opal_output(0, "only one BTL left, can't failover");
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* dont use this btl for any peers */
|
||||
for(p=0; p<num_procs; p++) {
|
||||
ompi_proc_t* proc = procs[p];
|
||||
@ -581,10 +586,14 @@ int mca_bml_r2_del_btl(mca_btl_base_module_t* btl)
|
||||
if(sm->btl_module == btl) {
|
||||
opal_list_remove_item(&mca_btl_base_modules_initialized, item);
|
||||
free(sm);
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(!found) {
|
||||
/* doesn't even exist */
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
/* remove from bml list */
|
||||
modules = (mca_btl_base_module_t**)malloc(sizeof(mca_btl_base_module_t*) * mca_bml_r2.num_btl_modules-1);
|
||||
for(i=0,m=0; i<mca_bml_r2.num_btl_modules; i++) {
|
||||
@ -596,30 +605,26 @@ int mca_bml_r2_del_btl(mca_btl_base_module_t* btl)
|
||||
mca_bml_r2.btl_modules = modules;
|
||||
mca_bml_r2.num_btl_modules = m;
|
||||
|
||||
|
||||
/* remove progress function so btl_progress isn't
|
||||
called on the failed BTL */
|
||||
if(mca_bml_r2.num_btl_progress <= 1) {
|
||||
/* nothing left to send on! */
|
||||
opal_output(0, "%s:%d:%s: only one BTL, can't fail-over!",
|
||||
__FILE__, __LINE__, __func__);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
/* figure out which progress functions to keep */
|
||||
btl_progress_new = (mca_btl_base_component_progress_fn_t*)
|
||||
malloc(sizeof(mca_btl_base_component_progress_fn_t) *
|
||||
(mca_bml_r2.num_btl_progress - 1));
|
||||
j = 0;
|
||||
for(i = 0; i < mca_bml_r2.num_btl_progress; i++) {
|
||||
if(btl->btl_component->btl_progress != mca_bml_r2.btl_progress[i]) {
|
||||
btl_progress_new[j] = mca_bml_r2.btl_progress[i];
|
||||
j++;
|
||||
|
||||
if(btl->btl_component->btl_progress) {
|
||||
/* figure out which progress functions to keep */
|
||||
/* don't need to keep any if this is the last one.. */
|
||||
if(mca_bml_r2.num_btl_progress > 1) {
|
||||
btl_progress_new = (mca_btl_base_component_progress_fn_t*)
|
||||
malloc(sizeof(mca_btl_base_component_progress_fn_t) *
|
||||
(mca_bml_r2.num_btl_progress - 1));
|
||||
j = 0;
|
||||
for(i = 0; i < mca_bml_r2.num_btl_progress; i++) {
|
||||
if(btl->btl_component->btl_progress != mca_bml_r2.btl_progress[i]) {
|
||||
btl_progress_new[j] = mca_bml_r2.btl_progress[i];
|
||||
j++;
|
||||
}
|
||||
}
|
||||
free(mca_bml_r2.btl_progress);
|
||||
mca_bml_r2.btl_progress = btl_progress_new;
|
||||
}
|
||||
mca_bml_r2.num_btl_progress--;
|
||||
}
|
||||
free(mca_bml_r2.btl_progress);
|
||||
mca_bml_r2.btl_progress = btl_progress_new;
|
||||
mca_bml_r2.num_btl_progress--;
|
||||
|
||||
/* cleanup */
|
||||
btl->btl_finalize(btl);
|
||||
free(procs);
|
||||
@ -659,7 +664,7 @@ int mca_bml_r2_del_proc_btl(ompi_proc_t* proc, mca_btl_base_module_t* btl)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* remove btl from RDMA list */
|
||||
if(mca_bml_base_btl_array_remove(&ep->btl_rdma, btl)) {
|
||||
|
||||
|
@ -234,6 +234,8 @@ void mca_pml_dr_error_handler(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
int32_t flags) {
|
||||
/* try failover ! */
|
||||
opal_output(0, "%s:%d:%s: failing BTL: %s", __FILE__, __LINE__, __func__,
|
||||
btl->btl_component->btl_version.mca_component_name);
|
||||
mca_pml_dr_sendreq_cleanup_active(btl);
|
||||
mca_bml.bml_del_btl(btl);
|
||||
/* orte_errmgr.abort(); */
|
||||
|
@ -144,7 +144,8 @@ void mca_pml_dr_recv_frag_callback(
|
||||
if(ompi_seq_tracker_check_duplicate(&ep->seq_recvs, hdr->hdr_common.hdr_vid)) {
|
||||
MCA_PML_DR_DEBUG(0,(0, "%s:%d: got a duplicate vfrag vfrag id %d\n",
|
||||
__FILE__, __LINE__, hdr->hdr_common.hdr_vid));
|
||||
mca_pml_dr_recv_frag_ack(ep->bml_endpoint,
|
||||
mca_pml_dr_recv_frag_ack(btl,
|
||||
ep->bml_endpoint,
|
||||
&hdr->hdr_common,
|
||||
hdr->hdr_match.hdr_src_ptr.pval,
|
||||
1, 0);
|
||||
@ -198,7 +199,8 @@ void mca_pml_dr_recv_frag_callback(
|
||||
if(ompi_seq_tracker_check_duplicate(&ep->seq_recvs_matched, hdr->hdr_common.hdr_vid)) {
|
||||
MCA_PML_DR_DEBUG(0, (0, "%s:%d: acking duplicate matched rendezvous from sequence tracker\n",
|
||||
__FILE__, __LINE__));
|
||||
mca_pml_dr_recv_frag_ack(ep->bml_endpoint,
|
||||
mca_pml_dr_recv_frag_ack(btl,
|
||||
ep->bml_endpoint,
|
||||
&hdr->hdr_common,
|
||||
hdr->hdr_match.hdr_src_ptr.pval,
|
||||
~(uint64_t) 0, hdr->hdr_rndv.hdr_msg_length);
|
||||
@ -219,13 +221,14 @@ void mca_pml_dr_recv_frag_callback(
|
||||
mca_pml_dr_comm_proc_check_matched(proc, hdr->hdr_common.hdr_vid);
|
||||
if(NULL != recvreq) {
|
||||
MCA_PML_DR_DEBUG(0,(0, "%s:%d: acking duplicate matched rendezvous from pending matched vfrag id %d\n",
|
||||
__FILE__, __LINE__, hdr->hdr_common.hdr_vid));
|
||||
mca_pml_dr_recv_request_ack(recvreq, &hdr->hdr_common,
|
||||
hdr->hdr_match.hdr_src_ptr, recvreq->req_bytes_received, 1);
|
||||
__FILE__, __LINE__, hdr->hdr_common.hdr_vid));
|
||||
mca_pml_dr_recv_request_ack(btl, recvreq, &hdr->hdr_common,
|
||||
hdr->hdr_match.hdr_src_ptr, recvreq->req_bytes_received, 1);
|
||||
} else {
|
||||
if(ompi_seq_tracker_check_duplicate(&ep->seq_recvs_matched, hdr->hdr_common.hdr_vid)) {
|
||||
if(ompi_seq_tracker_check_duplicate(&ep->seq_recvs_matched, hdr->hdr_common.hdr_vid)) {
|
||||
MCA_PML_DR_DEBUG(0,(0, "%s:%d: acking duplicate matched rendezvous from sequence tracker\n", __FILE__, __LINE__));
|
||||
mca_pml_dr_recv_frag_ack(ep->bml_endpoint,
|
||||
mca_pml_dr_recv_frag_ack(btl,
|
||||
ep->bml_endpoint,
|
||||
&hdr->hdr_common,
|
||||
hdr->hdr_match.hdr_src_ptr.pval,
|
||||
~(uint64_t) 0, hdr->hdr_rndv.hdr_msg_length);
|
||||
@ -277,7 +280,8 @@ void mca_pml_dr_recv_frag_callback(
|
||||
/* seq_recvs protected by matching lock */
|
||||
if(ompi_seq_tracker_check_duplicate(&ep->seq_recvs, hdr->hdr_common.hdr_vid)) {
|
||||
MCA_PML_DR_DEBUG(0,(0, "%s:%d: acking duplicate fragment\n", __FILE__, __LINE__));
|
||||
mca_pml_dr_recv_frag_ack(ep->bml_endpoint,
|
||||
mca_pml_dr_recv_frag_ack(btl,
|
||||
ep->bml_endpoint,
|
||||
&hdr->hdr_common,
|
||||
hdr->hdr_frag.hdr_src_ptr.pval,
|
||||
~(uint64_t) 0, 0);
|
||||
@ -694,7 +698,8 @@ rematch:
|
||||
}
|
||||
MCA_PML_DR_RECV_FRAG_INIT(frag,ompi_proc,hdr,segments,num_segments,btl,csum);
|
||||
if(do_csum && csum != hdr->hdr_csum) {
|
||||
mca_pml_dr_recv_frag_ack((mca_bml_base_endpoint_t*)ompi_proc->proc_bml,
|
||||
mca_pml_dr_recv_frag_ack(btl,
|
||||
(mca_bml_base_endpoint_t*)ompi_proc->proc_bml,
|
||||
&hdr->hdr_common, hdr->hdr_src_ptr.pval, 0, 0);
|
||||
MCA_PML_DR_DEBUG(0,(0, "%s:%d: received corrupted data 0x%08x != 0x%08x (segments %d length %d)\n",
|
||||
__FILE__, __LINE__, csum, hdr->hdr_csum, num_segments,
|
||||
@ -730,7 +735,8 @@ rematch:
|
||||
}
|
||||
MCA_PML_DR_RECV_FRAG_INIT(frag,ompi_proc,hdr,segments,num_segments,btl,csum);
|
||||
if(do_csum && csum != hdr->hdr_csum) {
|
||||
mca_pml_dr_recv_frag_ack((mca_bml_base_endpoint_t*)ompi_proc->proc_bml,
|
||||
mca_pml_dr_recv_frag_ack(btl,
|
||||
(mca_bml_base_endpoint_t*)ompi_proc->proc_bml,
|
||||
&hdr->hdr_common, hdr->hdr_src_ptr.pval, 0, 0);
|
||||
MCA_PML_DR_DEBUG(0,(0, "%s:%d: received corrupted data 0x%08x != 0x%08x\n",
|
||||
__FILE__, __LINE__, csum, hdr->hdr_csum));
|
||||
@ -755,7 +761,8 @@ rematch:
|
||||
MCA_PML_DR_DEBUG(1,(0, "%s:%d: received short message, acking now vfrag id: %d\n",
|
||||
__FILE__, __LINE__, hdr->hdr_common.hdr_vid));
|
||||
|
||||
mca_pml_dr_recv_frag_ack((mca_bml_base_endpoint_t*)ompi_proc->proc_bml,
|
||||
mca_pml_dr_recv_frag_ack(btl,
|
||||
(mca_bml_base_endpoint_t*)ompi_proc->proc_bml,
|
||||
&hdr->hdr_common, hdr->hdr_src_ptr.pval, 1, 0);
|
||||
}
|
||||
|
||||
@ -773,6 +780,7 @@ rematch:
|
||||
|
||||
|
||||
void mca_pml_dr_recv_frag_ack(
|
||||
mca_btl_base_module_t* btl,
|
||||
mca_bml_base_endpoint_t* endpoint,
|
||||
mca_pml_dr_common_hdr_t* hdr,
|
||||
void *src_ptr,
|
||||
@ -785,7 +793,11 @@ void mca_pml_dr_recv_frag_ack(
|
||||
mca_pml_dr_ack_hdr_t* ack;
|
||||
int rc;
|
||||
bool do_csum;
|
||||
bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager);
|
||||
|
||||
/* use the same BTL for ACK's makes failover SANE */
|
||||
bml_btl = mca_bml_base_btl_array_find(&endpoint->btl_eager,
|
||||
btl);
|
||||
|
||||
do_csum = mca_pml_dr.enable_csum &&
|
||||
(bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
|
||||
|
||||
|
@ -133,12 +133,13 @@ do { \
|
||||
*/
|
||||
|
||||
void mca_pml_dr_recv_frag_ack(
|
||||
mca_bml_base_endpoint_t* endpoint,
|
||||
mca_pml_dr_common_hdr_t* hdr,
|
||||
void* src_ptr,
|
||||
uint64_t mask,
|
||||
uint16_t len);
|
||||
|
||||
mca_btl_base_module_t* btl,
|
||||
mca_bml_base_endpoint_t* endpoint,
|
||||
mca_pml_dr_common_hdr_t* hdr,
|
||||
void* src_ptr,
|
||||
uint64_t mask,
|
||||
uint16_t len);
|
||||
|
||||
/**
|
||||
* Callback from BTL on receipt of a recv_frag.
|
||||
*/
|
||||
|
@ -36,7 +36,7 @@
|
||||
* as we need to put the match back on the list if the checksum
|
||||
* fails for later matching
|
||||
*/
|
||||
#define MCA_PML_DR_RECV_REQUEST_MATCH_ACK(do_csum,recvreq,hdr,csum,bytes_received) \
|
||||
#define MCA_PML_DR_RECV_REQUEST_MATCH_ACK(btl,do_csum,recvreq,hdr,csum,bytes_received) \
|
||||
if(do_csum && csum != hdr->hdr_match.hdr_csum) { \
|
||||
/* failed the csum, put the request back on the list for \
|
||||
* matching later on retransmission \
|
||||
@ -46,10 +46,11 @@ if(do_csum && csum != hdr->hdr_match.hdr_csum) { \
|
||||
} else { \
|
||||
mca_pml_dr_recv_request_match_specific(recvreq); \
|
||||
} \
|
||||
mca_pml_dr_recv_frag_ack(recvreq->req_endpoint->bml_endpoint, \
|
||||
&hdr->hdr_common, \
|
||||
hdr->hdr_match.hdr_src_ptr.pval, \
|
||||
0, 0); \
|
||||
mca_pml_dr_recv_frag_ack(btl, \
|
||||
recvreq->req_endpoint->bml_endpoint, \
|
||||
&hdr->hdr_common, \
|
||||
hdr->hdr_match.hdr_src_ptr.pval, \
|
||||
0, 0); \
|
||||
MCA_PML_DR_DEBUG(0,(0, "%s:%d: [rank %d -> rank %d] " \
|
||||
"data checksum failed 0x%08x != 0x%08x\n", \
|
||||
__FILE__, __LINE__, \
|
||||
@ -59,7 +60,7 @@ if(do_csum && csum != hdr->hdr_match.hdr_csum) { \
|
||||
} else if (recvreq->req_acked == false) { \
|
||||
MCA_PML_DR_DEBUG(1,(0, "%s:%d: sending ack, vfrag ID %d", \
|
||||
__FILE__, __LINE__, recvreq->req_vfrag0.vf_id)); \
|
||||
mca_pml_dr_recv_request_ack(recvreq, &hdr->hdr_common, \
|
||||
mca_pml_dr_recv_request_ack(btl, recvreq, &hdr->hdr_common, \
|
||||
hdr->hdr_match.hdr_src_ptr, bytes_received, 1); \
|
||||
}
|
||||
|
||||
@ -162,11 +163,12 @@ static void mca_pml_dr_ctl_completion(
|
||||
*/
|
||||
|
||||
void mca_pml_dr_recv_request_ack(
|
||||
mca_pml_dr_recv_request_t* recvreq,
|
||||
mca_pml_dr_common_hdr_t* hdr,
|
||||
ompi_ptr_t src_ptr,
|
||||
size_t vlen,
|
||||
uint64_t mask)
|
||||
mca_btl_base_module_t* btl,
|
||||
mca_pml_dr_recv_request_t* recvreq,
|
||||
mca_pml_dr_common_hdr_t* hdr,
|
||||
ompi_ptr_t src_ptr,
|
||||
size_t vlen,
|
||||
uint64_t mask)
|
||||
{
|
||||
mca_btl_base_descriptor_t* des;
|
||||
mca_bml_base_btl_t* bml_btl;
|
||||
@ -174,8 +176,11 @@ void mca_pml_dr_recv_request_ack(
|
||||
int rc;
|
||||
bool do_csum;
|
||||
|
||||
|
||||
/* use the same BTL for ACK's makes failover SANE */
|
||||
bml_btl = mca_bml_base_btl_array_find(&recvreq->req_endpoint->bml_endpoint->btl_eager,
|
||||
btl);
|
||||
/* allocate descriptor */
|
||||
bml_btl = mca_bml_base_btl_array_get_next(&recvreq->req_endpoint->bml_endpoint->btl_eager);
|
||||
do_csum = mca_pml_dr.enable_csum &&
|
||||
(bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
|
||||
MCA_PML_DR_DES_ALLOC(bml_btl, des, sizeof(mca_pml_dr_ack_hdr_t));
|
||||
@ -254,7 +259,7 @@ void mca_pml_dr_recv_request_progress(
|
||||
bytes_received,
|
||||
bytes_delivered,
|
||||
csum);
|
||||
MCA_PML_DR_RECV_REQUEST_MATCH_ACK(do_csum, recvreq,hdr,csum,bytes_received);
|
||||
MCA_PML_DR_RECV_REQUEST_MATCH_ACK(btl,do_csum, recvreq,hdr,csum,bytes_received);
|
||||
|
||||
break;
|
||||
|
||||
@ -272,7 +277,7 @@ void mca_pml_dr_recv_request_progress(
|
||||
bytes_received,
|
||||
bytes_delivered,
|
||||
csum);
|
||||
MCA_PML_DR_RECV_REQUEST_MATCH_ACK(do_csum, recvreq,hdr,csum,bytes_received);
|
||||
MCA_PML_DR_RECV_REQUEST_MATCH_ACK(btl,do_csum, recvreq,hdr,csum,bytes_received);
|
||||
|
||||
break;
|
||||
|
||||
@ -283,7 +288,8 @@ void mca_pml_dr_recv_request_progress(
|
||||
if(vfrag->vf_ack == vfrag->vf_mask) {
|
||||
MCA_PML_DR_DEBUG(1,(0, "%s:%d: sending ack, vfrag ID %d",
|
||||
__FILE__, __LINE__, vfrag->vf_id));
|
||||
mca_pml_dr_recv_request_ack(recvreq, &hdr->hdr_common,
|
||||
mca_pml_dr_recv_request_ack(btl,
|
||||
recvreq, &hdr->hdr_common,
|
||||
hdr->hdr_frag.hdr_src_ptr,
|
||||
vfrag->vf_size,
|
||||
vfrag->vf_mask);
|
||||
@ -318,7 +324,7 @@ void mca_pml_dr_recv_request_progress(
|
||||
ompi_seq_tracker_insert(&recvreq->req_endpoint->seq_recvs, vfrag->vf_id);
|
||||
MCA_PML_DR_DEBUG(1,(0, "%s:%d: sending ack, vfrag ID %d",
|
||||
__FILE__, __LINE__, vfrag->vf_id));
|
||||
mca_pml_dr_recv_request_ack(recvreq, &hdr->hdr_common,
|
||||
mca_pml_dr_recv_request_ack(btl, recvreq, &hdr->hdr_common,
|
||||
hdr->hdr_frag.hdr_src_ptr,
|
||||
vfrag->vf_size, vfrag->vf_mask);
|
||||
}
|
||||
|
@ -177,11 +177,12 @@ void mca_pml_dr_recv_request_match_specific(mca_pml_dr_recv_request_t* request);
|
||||
* Ack a matched request.
|
||||
*/
|
||||
void mca_pml_dr_recv_request_ack(
|
||||
mca_pml_dr_recv_request_t* recvreq,
|
||||
mca_pml_dr_common_hdr_t* hdr,
|
||||
ompi_ptr_t src_ptr,
|
||||
size_t vlen,
|
||||
uint64_t vmask);
|
||||
mca_btl_base_module_t* blt,
|
||||
mca_pml_dr_recv_request_t* recvreq,
|
||||
mca_pml_dr_common_hdr_t* hdr,
|
||||
ompi_ptr_t src_ptr,
|
||||
size_t vlen,
|
||||
uint64_t vmask);
|
||||
|
||||
/**
|
||||
* Start an initialized request.
|
||||
|
@ -138,11 +138,6 @@ do {
|
||||
/* } */ \
|
||||
} while(0)
|
||||
|
||||
|
||||
/**
|
||||
* Start a send request.
|
||||
*/
|
||||
|
||||
#define MCA_PML_DR_SEND_REQUEST_START(sendreq, rc) \
|
||||
do { \
|
||||
mca_pml_dr_comm_t* comm = sendreq->req_send.req_base.req_comm->c_pml_comm; \
|
||||
|
@ -79,10 +79,14 @@ static void mca_pml_dr_vfrag_wdog_timeout(int fd, short event, void* data)
|
||||
/* check for hung btl */
|
||||
if(++vfrag->vf_wdog_cnt == mca_pml_dr.wdog_retry_max) {
|
||||
/* declare btl dead */
|
||||
opal_output(0, "%s:%d:%s: failing BTL: %s", __FILE__, __LINE__, __func__,
|
||||
vfrag->bml_btl->btl->btl_component->btl_version.mca_component_name);
|
||||
mca_pml_dr_sendreq_cleanup_active(vfrag->bml_btl->btl);
|
||||
mca_bml.bml_del_btl(vfrag->bml_btl->btl);
|
||||
if(vfrag->bml_btl->btl) {
|
||||
opal_output(0, "%s:%d:%s: failing BTL: %s", __FILE__, __LINE__, __func__,
|
||||
vfrag->bml_btl->btl->btl_component->btl_version.mca_component_name);
|
||||
mca_pml_dr_sendreq_cleanup_active(vfrag->bml_btl->btl);
|
||||
mca_bml.bml_del_btl(vfrag->bml_btl->btl);
|
||||
} else {
|
||||
opal_output(0, "%s:%d:%s: failing already failed BTL", __FILE__, __LINE__, __func__);
|
||||
}
|
||||
mca_pml_dr_vfrag_reset(vfrag);
|
||||
}
|
||||
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user