1
1

more fixes for failover.. and yet still more to come..

This commit was SVN r12450.
Этот коммит содержится в:
Galen Shipman 2006-11-06 21:27:17 +00:00
родитель ea77beca29
Коммит 813e7faea8
8 изменённых файлов: 100 добавлений и 74 удалений

Просмотреть файл

@ -561,12 +561,17 @@ int mca_bml_r2_del_btl(mca_btl_base_module_t* btl)
opal_list_item_t* item;
mca_btl_base_module_t** modules;
mca_btl_base_component_progress_fn_t * btl_progress_new;
bool found = false;
procs = ompi_proc_all(&num_procs);
if(NULL == procs)
return OMPI_SUCCESS;
if(opal_list_get_size(&mca_btl_base_modules_initialized) == 2){
opal_output(0, "only one BTL left, can't failover");
return OMPI_SUCCESS;
}
/* dont use this btl for any peers */
for(p=0; p<num_procs; p++) {
ompi_proc_t* proc = procs[p];
@ -581,10 +586,14 @@ int mca_bml_r2_del_btl(mca_btl_base_module_t* btl)
if(sm->btl_module == btl) {
opal_list_remove_item(&mca_btl_base_modules_initialized, item);
free(sm);
found = true;
break;
}
}
if(!found) {
/* doesn't even exist */
return OMPI_SUCCESS;
}
/* remove from bml list */
modules = (mca_btl_base_module_t**)malloc(sizeof(mca_btl_base_module_t*) * mca_bml_r2.num_btl_modules-1);
for(i=0,m=0; i<mca_bml_r2.num_btl_modules; i++) {
@ -596,30 +605,26 @@ int mca_bml_r2_del_btl(mca_btl_base_module_t* btl)
mca_bml_r2.btl_modules = modules;
mca_bml_r2.num_btl_modules = m;
/* remove progress function so btl_progress isn't
called on the failed BTL */
if(mca_bml_r2.num_btl_progress <= 1) {
/* nothing left to send on! */
opal_output(0, "%s:%d:%s: only one BTL, can't fail-over!",
__FILE__, __LINE__, __func__);
return OMPI_ERROR;
}
/* figure out which progress functions to keep */
btl_progress_new = (mca_btl_base_component_progress_fn_t*)
malloc(sizeof(mca_btl_base_component_progress_fn_t) *
(mca_bml_r2.num_btl_progress - 1));
j = 0;
for(i = 0; i < mca_bml_r2.num_btl_progress; i++) {
if(btl->btl_component->btl_progress != mca_bml_r2.btl_progress[i]) {
btl_progress_new[j] = mca_bml_r2.btl_progress[i];
j++;
if(btl->btl_component->btl_progress) {
/* figure out which progress functions to keep */
/* don't need to keep any if this is the last one.. */
if(mca_bml_r2.num_btl_progress > 1) {
btl_progress_new = (mca_btl_base_component_progress_fn_t*)
malloc(sizeof(mca_btl_base_component_progress_fn_t) *
(mca_bml_r2.num_btl_progress - 1));
j = 0;
for(i = 0; i < mca_bml_r2.num_btl_progress; i++) {
if(btl->btl_component->btl_progress != mca_bml_r2.btl_progress[i]) {
btl_progress_new[j] = mca_bml_r2.btl_progress[i];
j++;
}
}
free(mca_bml_r2.btl_progress);
mca_bml_r2.btl_progress = btl_progress_new;
}
mca_bml_r2.num_btl_progress--;
}
free(mca_bml_r2.btl_progress);
mca_bml_r2.btl_progress = btl_progress_new;
mca_bml_r2.num_btl_progress--;
/* cleanup */
btl->btl_finalize(btl);
free(procs);
@ -659,7 +664,7 @@ int mca_bml_r2_del_proc_btl(ompi_proc_t* proc, mca_btl_base_module_t* btl)
}
}
}
/* remove btl from RDMA list */
if(mca_bml_base_btl_array_remove(&ep->btl_rdma, btl)) {

Просмотреть файл

@ -234,6 +234,8 @@ void mca_pml_dr_error_handler(
struct mca_btl_base_module_t* btl,
int32_t flags) {
/* try failover ! */
opal_output(0, "%s:%d:%s: failing BTL: %s", __FILE__, __LINE__, __func__,
btl->btl_component->btl_version.mca_component_name);
mca_pml_dr_sendreq_cleanup_active(btl);
mca_bml.bml_del_btl(btl);
/* orte_errmgr.abort(); */

Просмотреть файл

@ -144,7 +144,8 @@ void mca_pml_dr_recv_frag_callback(
if(ompi_seq_tracker_check_duplicate(&ep->seq_recvs, hdr->hdr_common.hdr_vid)) {
MCA_PML_DR_DEBUG(0,(0, "%s:%d: got a duplicate vfrag vfrag id %d\n",
__FILE__, __LINE__, hdr->hdr_common.hdr_vid));
mca_pml_dr_recv_frag_ack(ep->bml_endpoint,
mca_pml_dr_recv_frag_ack(btl,
ep->bml_endpoint,
&hdr->hdr_common,
hdr->hdr_match.hdr_src_ptr.pval,
1, 0);
@ -198,7 +199,8 @@ void mca_pml_dr_recv_frag_callback(
if(ompi_seq_tracker_check_duplicate(&ep->seq_recvs_matched, hdr->hdr_common.hdr_vid)) {
MCA_PML_DR_DEBUG(0, (0, "%s:%d: acking duplicate matched rendezvous from sequence tracker\n",
__FILE__, __LINE__));
mca_pml_dr_recv_frag_ack(ep->bml_endpoint,
mca_pml_dr_recv_frag_ack(btl,
ep->bml_endpoint,
&hdr->hdr_common,
hdr->hdr_match.hdr_src_ptr.pval,
~(uint64_t) 0, hdr->hdr_rndv.hdr_msg_length);
@ -219,13 +221,14 @@ void mca_pml_dr_recv_frag_callback(
mca_pml_dr_comm_proc_check_matched(proc, hdr->hdr_common.hdr_vid);
if(NULL != recvreq) {
MCA_PML_DR_DEBUG(0,(0, "%s:%d: acking duplicate matched rendezvous from pending matched vfrag id %d\n",
__FILE__, __LINE__, hdr->hdr_common.hdr_vid));
mca_pml_dr_recv_request_ack(recvreq, &hdr->hdr_common,
hdr->hdr_match.hdr_src_ptr, recvreq->req_bytes_received, 1);
__FILE__, __LINE__, hdr->hdr_common.hdr_vid));
mca_pml_dr_recv_request_ack(btl, recvreq, &hdr->hdr_common,
hdr->hdr_match.hdr_src_ptr, recvreq->req_bytes_received, 1);
} else {
if(ompi_seq_tracker_check_duplicate(&ep->seq_recvs_matched, hdr->hdr_common.hdr_vid)) {
if(ompi_seq_tracker_check_duplicate(&ep->seq_recvs_matched, hdr->hdr_common.hdr_vid)) {
MCA_PML_DR_DEBUG(0,(0, "%s:%d: acking duplicate matched rendezvous from sequence tracker\n", __FILE__, __LINE__));
mca_pml_dr_recv_frag_ack(ep->bml_endpoint,
mca_pml_dr_recv_frag_ack(btl,
ep->bml_endpoint,
&hdr->hdr_common,
hdr->hdr_match.hdr_src_ptr.pval,
~(uint64_t) 0, hdr->hdr_rndv.hdr_msg_length);
@ -277,7 +280,8 @@ void mca_pml_dr_recv_frag_callback(
/* seq_recvs protected by matching lock */
if(ompi_seq_tracker_check_duplicate(&ep->seq_recvs, hdr->hdr_common.hdr_vid)) {
MCA_PML_DR_DEBUG(0,(0, "%s:%d: acking duplicate fragment\n", __FILE__, __LINE__));
mca_pml_dr_recv_frag_ack(ep->bml_endpoint,
mca_pml_dr_recv_frag_ack(btl,
ep->bml_endpoint,
&hdr->hdr_common,
hdr->hdr_frag.hdr_src_ptr.pval,
~(uint64_t) 0, 0);
@ -694,7 +698,8 @@ rematch:
}
MCA_PML_DR_RECV_FRAG_INIT(frag,ompi_proc,hdr,segments,num_segments,btl,csum);
if(do_csum && csum != hdr->hdr_csum) {
mca_pml_dr_recv_frag_ack((mca_bml_base_endpoint_t*)ompi_proc->proc_bml,
mca_pml_dr_recv_frag_ack(btl,
(mca_bml_base_endpoint_t*)ompi_proc->proc_bml,
&hdr->hdr_common, hdr->hdr_src_ptr.pval, 0, 0);
MCA_PML_DR_DEBUG(0,(0, "%s:%d: received corrupted data 0x%08x != 0x%08x (segments %d length %d)\n",
__FILE__, __LINE__, csum, hdr->hdr_csum, num_segments,
@ -730,7 +735,8 @@ rematch:
}
MCA_PML_DR_RECV_FRAG_INIT(frag,ompi_proc,hdr,segments,num_segments,btl,csum);
if(do_csum && csum != hdr->hdr_csum) {
mca_pml_dr_recv_frag_ack((mca_bml_base_endpoint_t*)ompi_proc->proc_bml,
mca_pml_dr_recv_frag_ack(btl,
(mca_bml_base_endpoint_t*)ompi_proc->proc_bml,
&hdr->hdr_common, hdr->hdr_src_ptr.pval, 0, 0);
MCA_PML_DR_DEBUG(0,(0, "%s:%d: received corrupted data 0x%08x != 0x%08x\n",
__FILE__, __LINE__, csum, hdr->hdr_csum));
@ -755,7 +761,8 @@ rematch:
MCA_PML_DR_DEBUG(1,(0, "%s:%d: received short message, acking now vfrag id: %d\n",
__FILE__, __LINE__, hdr->hdr_common.hdr_vid));
mca_pml_dr_recv_frag_ack((mca_bml_base_endpoint_t*)ompi_proc->proc_bml,
mca_pml_dr_recv_frag_ack(btl,
(mca_bml_base_endpoint_t*)ompi_proc->proc_bml,
&hdr->hdr_common, hdr->hdr_src_ptr.pval, 1, 0);
}
@ -773,6 +780,7 @@ rematch:
void mca_pml_dr_recv_frag_ack(
mca_btl_base_module_t* btl,
mca_bml_base_endpoint_t* endpoint,
mca_pml_dr_common_hdr_t* hdr,
void *src_ptr,
@ -785,7 +793,11 @@ void mca_pml_dr_recv_frag_ack(
mca_pml_dr_ack_hdr_t* ack;
int rc;
bool do_csum;
bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager);
/* use the same BTL for ACK's makes failover SANE */
bml_btl = mca_bml_base_btl_array_find(&endpoint->btl_eager,
btl);
do_csum = mca_pml_dr.enable_csum &&
(bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);

Просмотреть файл

@ -133,12 +133,13 @@ do { \
*/
void mca_pml_dr_recv_frag_ack(
mca_bml_base_endpoint_t* endpoint,
mca_pml_dr_common_hdr_t* hdr,
void* src_ptr,
uint64_t mask,
uint16_t len);
mca_btl_base_module_t* btl,
mca_bml_base_endpoint_t* endpoint,
mca_pml_dr_common_hdr_t* hdr,
void* src_ptr,
uint64_t mask,
uint16_t len);
/**
* Callback from BTL on receipt of a recv_frag.
*/

Просмотреть файл

@ -36,7 +36,7 @@
* as we need to put the match back on the list if the checksum
* fails for later matching
*/
#define MCA_PML_DR_RECV_REQUEST_MATCH_ACK(do_csum,recvreq,hdr,csum,bytes_received) \
#define MCA_PML_DR_RECV_REQUEST_MATCH_ACK(btl,do_csum,recvreq,hdr,csum,bytes_received) \
if(do_csum && csum != hdr->hdr_match.hdr_csum) { \
/* failed the csum, put the request back on the list for \
* matching later on retransmission \
@ -46,10 +46,11 @@ if(do_csum && csum != hdr->hdr_match.hdr_csum) { \
} else { \
mca_pml_dr_recv_request_match_specific(recvreq); \
} \
mca_pml_dr_recv_frag_ack(recvreq->req_endpoint->bml_endpoint, \
&hdr->hdr_common, \
hdr->hdr_match.hdr_src_ptr.pval, \
0, 0); \
mca_pml_dr_recv_frag_ack(btl, \
recvreq->req_endpoint->bml_endpoint, \
&hdr->hdr_common, \
hdr->hdr_match.hdr_src_ptr.pval, \
0, 0); \
MCA_PML_DR_DEBUG(0,(0, "%s:%d: [rank %d -> rank %d] " \
"data checksum failed 0x%08x != 0x%08x\n", \
__FILE__, __LINE__, \
@ -59,7 +60,7 @@ if(do_csum && csum != hdr->hdr_match.hdr_csum) { \
} else if (recvreq->req_acked == false) { \
MCA_PML_DR_DEBUG(1,(0, "%s:%d: sending ack, vfrag ID %d", \
__FILE__, __LINE__, recvreq->req_vfrag0.vf_id)); \
mca_pml_dr_recv_request_ack(recvreq, &hdr->hdr_common, \
mca_pml_dr_recv_request_ack(btl, recvreq, &hdr->hdr_common, \
hdr->hdr_match.hdr_src_ptr, bytes_received, 1); \
}
@ -162,11 +163,12 @@ static void mca_pml_dr_ctl_completion(
*/
void mca_pml_dr_recv_request_ack(
mca_pml_dr_recv_request_t* recvreq,
mca_pml_dr_common_hdr_t* hdr,
ompi_ptr_t src_ptr,
size_t vlen,
uint64_t mask)
mca_btl_base_module_t* btl,
mca_pml_dr_recv_request_t* recvreq,
mca_pml_dr_common_hdr_t* hdr,
ompi_ptr_t src_ptr,
size_t vlen,
uint64_t mask)
{
mca_btl_base_descriptor_t* des;
mca_bml_base_btl_t* bml_btl;
@ -174,8 +176,11 @@ void mca_pml_dr_recv_request_ack(
int rc;
bool do_csum;
/* use the same BTL for ACK's makes failover SANE */
bml_btl = mca_bml_base_btl_array_find(&recvreq->req_endpoint->bml_endpoint->btl_eager,
btl);
/* allocate descriptor */
bml_btl = mca_bml_base_btl_array_get_next(&recvreq->req_endpoint->bml_endpoint->btl_eager);
do_csum = mca_pml_dr.enable_csum &&
(bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
MCA_PML_DR_DES_ALLOC(bml_btl, des, sizeof(mca_pml_dr_ack_hdr_t));
@ -254,7 +259,7 @@ void mca_pml_dr_recv_request_progress(
bytes_received,
bytes_delivered,
csum);
MCA_PML_DR_RECV_REQUEST_MATCH_ACK(do_csum, recvreq,hdr,csum,bytes_received);
MCA_PML_DR_RECV_REQUEST_MATCH_ACK(btl,do_csum, recvreq,hdr,csum,bytes_received);
break;
@ -272,7 +277,7 @@ void mca_pml_dr_recv_request_progress(
bytes_received,
bytes_delivered,
csum);
MCA_PML_DR_RECV_REQUEST_MATCH_ACK(do_csum, recvreq,hdr,csum,bytes_received);
MCA_PML_DR_RECV_REQUEST_MATCH_ACK(btl,do_csum, recvreq,hdr,csum,bytes_received);
break;
@ -283,7 +288,8 @@ void mca_pml_dr_recv_request_progress(
if(vfrag->vf_ack == vfrag->vf_mask) {
MCA_PML_DR_DEBUG(1,(0, "%s:%d: sending ack, vfrag ID %d",
__FILE__, __LINE__, vfrag->vf_id));
mca_pml_dr_recv_request_ack(recvreq, &hdr->hdr_common,
mca_pml_dr_recv_request_ack(btl,
recvreq, &hdr->hdr_common,
hdr->hdr_frag.hdr_src_ptr,
vfrag->vf_size,
vfrag->vf_mask);
@ -318,7 +324,7 @@ void mca_pml_dr_recv_request_progress(
ompi_seq_tracker_insert(&recvreq->req_endpoint->seq_recvs, vfrag->vf_id);
MCA_PML_DR_DEBUG(1,(0, "%s:%d: sending ack, vfrag ID %d",
__FILE__, __LINE__, vfrag->vf_id));
mca_pml_dr_recv_request_ack(recvreq, &hdr->hdr_common,
mca_pml_dr_recv_request_ack(btl, recvreq, &hdr->hdr_common,
hdr->hdr_frag.hdr_src_ptr,
vfrag->vf_size, vfrag->vf_mask);
}

Просмотреть файл

@ -177,11 +177,12 @@ void mca_pml_dr_recv_request_match_specific(mca_pml_dr_recv_request_t* request);
* Ack a matched request.
*/
void mca_pml_dr_recv_request_ack(
mca_pml_dr_recv_request_t* recvreq,
mca_pml_dr_common_hdr_t* hdr,
ompi_ptr_t src_ptr,
size_t vlen,
uint64_t vmask);
mca_btl_base_module_t* blt,
mca_pml_dr_recv_request_t* recvreq,
mca_pml_dr_common_hdr_t* hdr,
ompi_ptr_t src_ptr,
size_t vlen,
uint64_t vmask);
/**
* Start an initialized request.

Просмотреть файл

@ -138,11 +138,6 @@ do {
/* } */ \
} while(0)
/**
* Start a send request.
*/
#define MCA_PML_DR_SEND_REQUEST_START(sendreq, rc) \
do { \
mca_pml_dr_comm_t* comm = sendreq->req_send.req_base.req_comm->c_pml_comm; \

Просмотреть файл

@ -79,10 +79,14 @@ static void mca_pml_dr_vfrag_wdog_timeout(int fd, short event, void* data)
/* check for hung btl */
if(++vfrag->vf_wdog_cnt == mca_pml_dr.wdog_retry_max) {
/* declare btl dead */
opal_output(0, "%s:%d:%s: failing BTL: %s", __FILE__, __LINE__, __func__,
vfrag->bml_btl->btl->btl_component->btl_version.mca_component_name);
mca_pml_dr_sendreq_cleanup_active(vfrag->bml_btl->btl);
mca_bml.bml_del_btl(vfrag->bml_btl->btl);
if(vfrag->bml_btl->btl) {
opal_output(0, "%s:%d:%s: failing BTL: %s", __FILE__, __LINE__, __func__,
vfrag->bml_btl->btl->btl_component->btl_version.mca_component_name);
mca_pml_dr_sendreq_cleanup_active(vfrag->bml_btl->btl);
mca_bml.bml_del_btl(vfrag->bml_btl->btl);
} else {
opal_output(0, "%s:%d:%s: failing already failed BTL", __FILE__, __LINE__, __func__);
}
mca_pml_dr_vfrag_reset(vfrag);
}