From 59e3fa8ed3c2b2353fd112cb4505b08b22fc99bd Mon Sep 17 00:00:00 2001 From: Rolf vandeVaart Date: Wed, 29 Sep 2010 13:46:45 +0000 Subject: [PATCH] Some more formatting fixes and code refactoring. All these changes are in the bfo so this has no affect on ob1. This commit was SVN r23815. --- ompi/mca/pml/bfo/pml_bfo.c | 45 ++++------- ompi/mca/pml/bfo/pml_bfo_failover.c | 112 ++++++++++++++++++++-------- ompi/mca/pml/bfo/pml_bfo_failover.h | 95 ++++++++++++++++++++++- ompi/mca/pml/bfo/pml_bfo_hdr.h | 14 +--- ompi/mca/pml/bfo/pml_bfo_recvfrag.c | 63 ++++------------ ompi/mca/pml/bfo/pml_bfo_recvreq.c | 43 +---------- ompi/mca/pml/bfo/pml_bfo_sendreq.c | 2 +- 7 files changed, 210 insertions(+), 164 deletions(-) diff --git a/ompi/mca/pml/bfo/pml_bfo.c b/ompi/mca/pml/bfo/pml_bfo.c index 2d76de1b49..dfffaae13c 100644 --- a/ompi/mca/pml/bfo/pml_bfo.c +++ b/ompi/mca/pml/bfo/pml_bfo.c @@ -411,33 +411,11 @@ int mca_pml_bfo_add_procs(ompi_proc_t** procs, size_t nprocs) goto cleanup_and_return; /* BFO FAILOVER CODE - begin */ - /* The following four functions are utilized when failover - * support for openib is enabled. */ - rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNOTIFY, - mca_pml_bfo_recv_frag_callback_rndvrestartnotify, - NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; - - rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_RNDVRESTARTACK, - mca_pml_bfo_recv_frag_callback_rndvrestartack, - NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; - - rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNACK, - mca_pml_bfo_recv_frag_callback_rndvrestartnack, - NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; - - rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_RECVERRNOTIFY, - mca_pml_bfo_recv_frag_callback_recverrnotify, - NULL ); + rc = mca_pml_bfo_register_callbacks(); if(OMPI_SUCCESS != rc) goto cleanup_and_return; /* BFO FAILOVER CODE - end */ - + /* register error handlers */ rc = mca_bml.bml_register_error(mca_pml_bfo_error_handler); if(OMPI_SUCCESS != rc) @@ -529,18 +507,21 @@ int mca_pml_bfo_send_fin( ompi_proc_t* proc, return OMPI_ERR_OUT_OF_RESOURCE; } fin->des_cbfunc = mca_pml_bfo_fin_completion; - fin->des_cbdata = proc; + fin->des_cbdata = NULL; /* fill in header */ hdr = (mca_pml_bfo_fin_hdr_t*)fin->des_src->seg_addr.pval; - hdr->hdr_match.hdr_common.hdr_flags = 0; - hdr->hdr_match.hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_FIN; + hdr->hdr_common.hdr_flags = 0; + hdr->hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_FIN; hdr->hdr_des = hdr_des; hdr->hdr_fail = status; +/* BFO FAILOVER CODE - begin */ + fin->des_cbdata = proc; hdr->hdr_match.hdr_seq = seq; - hdr->hdr_restartseq = restartseq; hdr->hdr_match.hdr_ctx = ctx; hdr->hdr_match.hdr_src = src; + hdr->hdr_match.hdr_common.hdr_flags = restartseq; /* use unused hdr_flags field */ +/* BFO FAILOVER CODE - end */ bfo_hdr_hton(hdr, MCA_PML_BFO_HDR_TYPE_FIN, proc); @@ -608,10 +589,10 @@ void mca_pml_bfo_process_pending_packets(struct mca_btl_base_module_t* btl) pckt->hdr.hdr_fin.hdr_des, pckt->order, pckt->hdr.hdr_fin.hdr_fail, - pckt->hdr.hdr_match.hdr_seq, - pckt->hdr.hdr_fin.hdr_restartseq, - pckt->hdr.hdr_match.hdr_ctx, - pckt->hdr.hdr_match.hdr_src); + pckt->hdr.hdr_fin.hdr_match.hdr_seq, + pckt->hdr.hdr_fin.hdr_match.hdr_common.hdr_flags, + pckt->hdr.hdr_fin.hdr_match.hdr_ctx, + pckt->hdr.hdr_fin.hdr_match.hdr_src); if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == OPAL_SOS_GET_ERROR_CODE(rc)) ) { return; } diff --git a/ompi/mca/pml/bfo/pml_bfo_failover.c b/ompi/mca/pml/bfo/pml_bfo_failover.c index ae05826354..67cfdb8e12 100644 --- a/ompi/mca/pml/bfo/pml_bfo_failover.c +++ b/ompi/mca/pml/bfo/pml_bfo_failover.c @@ -148,10 +148,12 @@ bool mca_pml_bfo_is_duplicate_fin(mca_pml_bfo_hdr_t* hdr, mca_btl_base_descripto * if it was freed and not reused yet. */ if (NULL == rdma->des_cbdata) { opal_output_verbose(20, mca_pml_bfo_output, - "FIN: received: dropping because not pointing to valid descriptor " - "PML=%d CTX=%d SRC=%d RQS=%d", - hdr->hdr_match.hdr_seq, hdr->hdr_match.hdr_ctx, - hdr->hdr_match.hdr_src, hdr->hdr_fin.hdr_restartseq); + "FIN: received: dropping because not pointing to valid descriptor " + "PML=%d CTX=%d SRC=%d RQS=%d", + hdr->hdr_fin.hdr_match.hdr_seq, + hdr->hdr_fin.hdr_match.hdr_ctx, + hdr->hdr_fin.hdr_match.hdr_src, + hdr->hdr_fin.hdr_match.hdr_common.hdr_flags); return true; } @@ -165,65 +167,76 @@ bool mca_pml_bfo_is_duplicate_fin(mca_pml_bfo_hdr_t* hdr, mca_btl_base_descripto * what fields to access. */ if (basereq->req_type == MCA_PML_REQUEST_RECV) { mca_pml_bfo_recv_request_t* recvreq = (mca_pml_bfo_recv_request_t*)basereq; - if ((hdr->hdr_match.hdr_ctx != recvreq->req_recv.req_base.req_comm->c_contextid) || - (hdr->hdr_match.hdr_src != recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE) || - (hdr->hdr_match.hdr_seq != (uint16_t)recvreq->req_msgseq)) { + if ((hdr->hdr_fin.hdr_match.hdr_ctx != + recvreq->req_recv.req_base.req_comm->c_contextid) || + (hdr->hdr_fin.hdr_match.hdr_src != + recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE) || + (hdr->hdr_fin.hdr_match.hdr_seq != (uint16_t)recvreq->req_msgseq)) { opal_output_verbose(5, mca_pml_bfo_output, "FIN: received on receiver: dropping because no match " "PML:exp=%d,act=%d CTX:exp=%d,act=%d SRC:exp=%d,act=%d " "RQS:exp=%d,act=%d, dst_req=%p", - (uint16_t)recvreq->req_msgseq, hdr->hdr_match.hdr_seq, + (uint16_t)recvreq->req_msgseq, hdr->hdr_fin.hdr_match.hdr_seq, recvreq->req_recv.req_base.req_comm->c_contextid, - hdr->hdr_match.hdr_ctx, + hdr->hdr_fin.hdr_match.hdr_ctx, recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE, - hdr->hdr_match.hdr_src, - recvreq->req_restartseq, hdr->hdr_fin.hdr_restartseq, + hdr->hdr_fin.hdr_match.hdr_src, + recvreq->req_restartseq, + hdr->hdr_fin.hdr_match.hdr_common.hdr_flags, (void *)recvreq); return true; } - if (hdr->hdr_fin.hdr_restartseq != recvreq->req_restartseq) { + if (hdr->hdr_fin.hdr_match.hdr_common.hdr_flags != recvreq->req_restartseq) { opal_output_verbose(5, mca_pml_bfo_output, "FIN: received on receiver: dropping because old " "PML:exp=%d,act=%d CTX:exp=%d,act=%d SRC:exp=%d,act=%d " "RQS:exp=%d,act=%d, dst_req=%p", - (uint16_t)recvreq->req_msgseq, hdr->hdr_match.hdr_seq, + (uint16_t)recvreq->req_msgseq, hdr->hdr_fin.hdr_match.hdr_seq, recvreq->req_recv.req_base.req_comm->c_contextid, - hdr->hdr_match.hdr_ctx, + hdr->hdr_fin.hdr_match.hdr_ctx, recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE, - hdr->hdr_match.hdr_src, - recvreq->req_restartseq, hdr->hdr_fin.hdr_restartseq, + hdr->hdr_fin.hdr_match.hdr_src, + recvreq->req_restartseq, + hdr->hdr_fin.hdr_match.hdr_common.hdr_flags, (void *)recvreq); return true; } } else if (basereq->req_type == MCA_PML_REQUEST_SEND) { mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)basereq; - if ((hdr->hdr_match.hdr_ctx != sendreq->req_send.req_base.req_comm->c_contextid) || - (hdr->hdr_match.hdr_src != sendreq->req_send.req_base.req_peer) || - (hdr->hdr_match.hdr_seq != (uint16_t)sendreq->req_send.req_base.req_sequence)) { + if ((hdr->hdr_fin.hdr_match.hdr_ctx != + sendreq->req_send.req_base.req_comm->c_contextid) || + (hdr->hdr_fin.hdr_match.hdr_src != + sendreq->req_send.req_base.req_peer) || + (hdr->hdr_fin.hdr_match.hdr_seq != + (uint16_t)sendreq->req_send.req_base.req_sequence)) { uint16_t seq = (uint16_t)sendreq->req_send.req_base.req_sequence; opal_output_verbose(5, mca_pml_bfo_output, "FIN: received on sender: dropping because no match " "PML:exp=%d,act=%d CTX:exp=%d,act=%d SRC:exp=%d,act=%d " "RQS:exp=%d,act=%d, dst_req=%p", - seq, hdr->hdr_match.hdr_seq, + seq, hdr->hdr_fin.hdr_match.hdr_seq, sendreq->req_send.req_base.req_comm->c_contextid, - hdr->hdr_match.hdr_ctx, - sendreq->req_send.req_base.req_peer, hdr->hdr_match.hdr_src, - sendreq->req_restartseq, hdr->hdr_fin.hdr_restartseq, + hdr->hdr_fin.hdr_match.hdr_ctx, + sendreq->req_send.req_base.req_peer, + hdr->hdr_fin.hdr_match.hdr_src, + sendreq->req_restartseq, + hdr->hdr_fin.hdr_match.hdr_common.hdr_flags, (void *)sendreq); return true; } - if (hdr->hdr_fin.hdr_restartseq != sendreq->req_restartseq) { + if (hdr->hdr_fin.hdr_match.hdr_common.hdr_flags != sendreq->req_restartseq) { uint16_t seq = (uint16_t)sendreq->req_send.req_base.req_sequence; opal_output_verbose(5, mca_pml_bfo_output, "FIN: received on sender: dropping because old " "PML:exp=%d,act=%d CTX:exp=%d,act=%d SRC:exp=%d,act=%d " "RQS:exp=%d,act=%d, dst_req=%p", - seq, hdr->hdr_match.hdr_seq, + seq, hdr->hdr_fin.hdr_match.hdr_seq, sendreq->req_send.req_base.req_comm->c_contextid, - hdr->hdr_match.hdr_ctx, - sendreq->req_send.req_base.req_peer, hdr->hdr_match.hdr_src, - sendreq->req_restartseq, hdr->hdr_fin.hdr_restartseq, + hdr->hdr_fin.hdr_match.hdr_ctx, + sendreq->req_send.req_base.req_peer, + hdr->hdr_fin.hdr_match.hdr_src, + sendreq->req_restartseq, + hdr->hdr_fin.hdr_match.hdr_common.hdr_flags, (void *)sendreq); return true; } @@ -236,8 +249,9 @@ bool mca_pml_bfo_is_duplicate_fin(mca_pml_bfo_hdr_t* hdr, mca_btl_base_descripto opal_output_verbose(5, mca_pml_bfo_output, "FIN: received: dropping because descriptor has been reused " "PML=%d CTX=%d SRC=%d RQS=%d rdma->des_flags=%d", - hdr->hdr_match.hdr_seq, hdr->hdr_match.hdr_ctx, - hdr->hdr_match.hdr_src, hdr->hdr_fin.hdr_restartseq, rdma->des_flags); + hdr->hdr_fin.hdr_match.hdr_seq, hdr->hdr_fin.hdr_match.hdr_ctx, + hdr->hdr_fin.hdr_match.hdr_src, hdr->hdr_fin.hdr_match.hdr_common.hdr_flags, + rdma->des_flags); return true; } } @@ -281,7 +295,8 @@ void mca_pml_bfo_repost_fin(struct mca_btl_base_descriptor_t* des) { /* Reconstruct the fin for sending on the other BTL */ mca_pml_bfo_send_fin(proc, bml_btl, hdr->hdr_des, MCA_BTL_NO_ORDER, - hdr->hdr_fail, hdr->hdr_match.hdr_seq, hdr->hdr_restartseq, + hdr->hdr_fail, hdr->hdr_match.hdr_seq, + hdr->hdr_match.hdr_common.hdr_flags, hdr->hdr_match.hdr_ctx, hdr->hdr_match.hdr_src); return; } @@ -1886,3 +1901,38 @@ void mca_pml_bfo_check_recv_ctl_completion_status(mca_btl_base_module_t* btl, break; } } + +/** + * Register four functions to handle extra PML message types that + * are utilized when a failover occurs. + */ +int mca_pml_bfo_register_callbacks(void) { + int rc; + /* The following four functions are utilized when failover + * support for openib is enabled. */ + rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNOTIFY, + mca_pml_bfo_recv_frag_callback_rndvrestartnotify, + NULL ); + if(OMPI_SUCCESS != rc) + return rc; + + rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_RNDVRESTARTACK, + mca_pml_bfo_recv_frag_callback_rndvrestartack, + NULL ); + if(OMPI_SUCCESS != rc) + return rc; + + rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNACK, + mca_pml_bfo_recv_frag_callback_rndvrestartnack, + NULL ); + if(OMPI_SUCCESS != rc) + return rc; + + rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_RECVERRNOTIFY, + mca_pml_bfo_recv_frag_callback_recverrnotify, + NULL ); + if(OMPI_SUCCESS != rc) + return rc; + + return rc; +} diff --git a/ompi/mca/pml/bfo/pml_bfo_failover.h b/ompi/mca/pml/bfo/pml_bfo_failover.h index 2aea218ef8..9fcbedff11 100644 --- a/ompi/mca/pml/bfo/pml_bfo_failover.h +++ b/ompi/mca/pml/bfo/pml_bfo_failover.h @@ -74,7 +74,7 @@ extern void mca_pml_bfo_map_out( mca_btl_base_module_t *btl, mca_btl_base_descriptor_t* descriptor, void* cbdata ); - +int mca_pml_bfo_register_callbacks(void); /** @@ -99,7 +99,98 @@ extern void mca_pml_bfo_recv_frag_callback_recverrnotify( mca_btl_base_module_t mca_btl_base_tag_t tag, mca_btl_base_descriptor_t* descriptor, void* cbdata ); - + +/** + * A bunch of macros to help isolate failover code from regular ob1 code. + */ + +/* Drop any ACK fragments if request is in error state. Do not want + * to initiate any more activity. */ +#define MCA_PML_BFO_ERROR_CHECK_ON_ACK_CALLBACK(sendreq) \ + if( OPAL_UNLIKELY((sendreq)->req_error)) { \ + opal_output_verbose(20, mca_pml_bfo_output, \ + "ACK: received: dropping because request in error, " \ + "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", \ + (uint16_t)(sendreq)->req_send.req_base.req_sequence, \ + (sendreq)->req_restartseq, \ + (void *)(sendreq), (sendreq)->req_recv.pval, \ + (sendreq)->req_send.req_base.req_peer); \ + return; \ + } + +/* Drop any FRAG fragments if request is in error state. Do not want + * to initiate any more activity. */ +#define MCA_PML_BFO_ERROR_CHECK_ON_FRAG_CALLBACK(recvreq) \ + if( OPAL_UNLIKELY((recvreq)->req_errstate)) { \ + opal_output_verbose(20, mca_pml_bfo_output, \ + "FRAG: received: dropping because request in error, " \ + "PML=%d, src_req=%p, dst_req=%p, peer=%d, offset=%d", \ + (uint16_t)(recvreq)->req_msgseq, \ + (recvreq)->remote_req_send.pval, \ + (void *)(recvreq), \ + (recvreq)->req_recv.req_base.req_ompi.req_status.MPI_SOURCE, \ + (int)hdr->hdr_frag.hdr_frag_offset); \ + return; \ + } + +/* Drop any PUT fragments if request is in error state. Do not want + * to initiate any more activity. */ +#define MCA_PML_BFO_ERROR_CHECK_ON_PUT_CALLBACK(sendreq) \ + if( OPAL_UNLIKELY((sendreq)->req_error)) { \ + opal_output_verbose(20, mca_pml_bfo_output, \ + "PUT: received: dropping because request in error, " \ + "PML=%d, src_req=%p, dst_req=%p, peer=%d", \ + (uint16_t)(sendreq)->req_send.req_base.req_sequence, \ + (void *)(sendreq), (sendreq)->req_recv.pval, \ + (sendreq)->req_send.req_base.req_peer); \ + return; \ + } + +/** + * Macros for pml_bfo_recvreq.c file. + */ + +/* This can happen if a FIN message arrives after the request was + * marked in error. So, just drop the message. Note that the status + * field is not being checked. That is because the status field is the + * value returned in the FIN hdr.hdr_fail field and may be used for + * other things. Note that we allow the various fields to be updated + * in case this actually completes the request and the sending side + * thinks it is done. */ +#define MCA_PML_BFO_ERROR_CHECK_ON_FIN_FOR_PUT(recvreq) \ + if( OPAL_UNLIKELY((recvreq)->req_errstate)) { \ + opal_output_verbose(20, mca_pml_bfo_output, \ + "FIN: received on broken request, skipping, " \ + "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", \ + (recvreq)->req_msgseq, (recvreq)->req_restartseq, \ + (recvreq)->remote_req_send.pval, (void *)(recvreq), \ + (recvreq)->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); \ + /* Even though in error, it still might complete. */ \ + recv_request_pml_complete_check(recvreq); \ + return; \ + } + +#define MCA_PML_BFO_ERROR_CHECK_ON_RDMA_READ_COMPLETION(recvreq) \ + if ((recvreq)->req_errstate) { \ + opal_output_verbose(30, mca_pml_bfo_output, \ + "RDMA read: completion failed, error already seen, " \ + "PML=%d, RQS=%d, src_req=%lx, dst_req=%lx, peer=%d", \ + (recvreq)->req_msgseq, (recvreq)->req_restartseq, \ + (unsigned long)(recvreq)->remote_req_send.pval, \ + (unsigned long)(recvreq), \ + (recvreq)->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); \ + return; \ + } else { \ + opal_output_verbose(30, mca_pml_bfo_output, \ + "RDMA read: completion failed, sending RECVERRNOTIFY to " \ + "sender, PML=%d, RQS=%d, src_req=%lx, dst_req=%lx, peer=%d", \ + (recvreq)->req_msgseq, (recvreq)->req_restartseq, \ + (unsigned long)(recvreq)->remote_req_send.pval, \ + (unsigned long)(recvreq), \ + (recvreq)->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); \ + mca_pml_bfo_recv_request_recverrnotify(recvreq, MCA_PML_BFO_HDR_TYPE_RGET, status); \ + } + END_C_DECLS diff --git a/ompi/mca/pml/bfo/pml_bfo_hdr.h b/ompi/mca/pml/bfo/pml_bfo_hdr.h index 6e9e63aba0..19e3959f92 100644 --- a/ompi/mca/pml/bfo/pml_bfo_hdr.h +++ b/ompi/mca/pml/bfo/pml_bfo_hdr.h @@ -340,24 +340,17 @@ do { \ */ struct mca_pml_bfo_fin_hdr_t { + mca_pml_bfo_common_hdr_t hdr_common; /**< common attributes */ /* BFO FAILOVER CODE - begin */ mca_pml_bfo_match_hdr_t hdr_match; /**< match info - needed for failover */ - uint8_t hdr_restartseq; /**< restart sequence - failover use only */ /* BFO FAILOVER CODE - end */ -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT - uint8_t hdr_padding[7]; -#endif uint32_t hdr_fail; /**< RDMA operation failed */ ompi_ptr_t hdr_des; /**< completed descriptor */ }; typedef struct mca_pml_bfo_fin_hdr_t mca_pml_bfo_fin_hdr_t; #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG -#define MCA_PML_BFO_FIN_HDR_FILL(h) \ -do { \ - (h).hdr_padding[0] = 0; \ - (h).hdr_padding[1] = 0; \ -} while (0) +#define MCA_PML_BFO_FIN_HDR_FILL(h) #else #define MCA_PML_BFO_FIN_HDR_FILL(h) #endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */ @@ -365,12 +358,13 @@ do { \ #define MCA_PML_BFO_FIN_HDR_NTOH(h) \ do { \ MCA_PML_BFO_COMMON_HDR_NTOH((h).hdr_common); \ + MCA_PML_BFO_MATCH_HDR_NTOH((h).hdr_match); \ } while (0) #define MCA_PML_BFO_FIN_HDR_HTON(h) \ do { \ MCA_PML_BFO_COMMON_HDR_HTON((h).hdr_common); \ - MCA_PML_BFO_FIN_HDR_FILL(h); \ + MCA_PML_BFO_MATCH_HDR_HTON((h).hdr_match); \ } while (0) /* BFO FAILOVER CODE - begin */ diff --git a/ompi/mca/pml/bfo/pml_bfo_recvfrag.c b/ompi/mca/pml/bfo/pml_bfo_recvfrag.c index 5b9bce1a53..7be9a7ad83 100644 --- a/ompi/mca/pml/bfo/pml_bfo_recvfrag.c +++ b/ompi/mca/pml/bfo/pml_bfo_recvfrag.c @@ -307,18 +307,7 @@ void mca_pml_bfo_recv_frag_callback_ack(mca_btl_base_module_t* btl, sendreq = (mca_pml_bfo_send_request_t*)hdr->hdr_ack.hdr_src_req.pval; sendreq->req_recv = hdr->hdr_ack.hdr_dst_req; /* BFO FAILOVER CODE - begin */ - /* Drop any fragments if request is in error state. Do not want - * to initiate any more activity. */ - if( OPAL_UNLIKELY(sendreq->req_error)) { - opal_output_verbose(20, mca_pml_bfo_output, - "ACK: received: dropping because request in error, " - "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", - (uint16_t)sendreq->req_send.req_base.req_sequence, - sendreq->req_restartseq, - (void *)sendreq, sendreq->req_recv.pval, - sendreq->req_send.req_base.req_peer); - return; - } + MCA_PML_BFO_ERROR_CHECK_ON_ACK_CALLBACK(sendreq) /* BFO FAILOVER CODE - end */ /* if the request should be delivered entirely by copy in/out @@ -357,33 +346,21 @@ void mca_pml_bfo_recv_frag_callback_frag(mca_btl_base_module_t* btl, mca_btl_base_tag_t tag, mca_btl_base_descriptor_t* des, void* cbdata ) { - mca_btl_base_segment_t* segments = des->des_dst; - mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval; - mca_pml_bfo_recv_request_t* recvreq; - - if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_bfo_common_hdr_t)) ) { - return; - } - bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_FRAG); - recvreq = (mca_pml_bfo_recv_request_t*)hdr->hdr_frag.hdr_dst_req.pval; + mca_btl_base_segment_t* segments = des->des_dst; + mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval; + mca_pml_bfo_recv_request_t* recvreq; + + if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_bfo_common_hdr_t)) ) { + return; + } + bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_FRAG); + recvreq = (mca_pml_bfo_recv_request_t*)hdr->hdr_frag.hdr_dst_req.pval; /* BFO FAILOVER CODE - begin */ - /* Drop any fragments if request is in error state. Do not want - * to initiate any more activity. */ - if( OPAL_UNLIKELY(recvreq->req_errstate)) { - opal_output_verbose(20, mca_pml_bfo_output, - "FRAG: received: dropping because request in error, " - "PML=%d, src_req=%p, dst_req=%p, peer=%d, offset=%d", - (uint16_t)recvreq->req_msgseq, - recvreq->remote_req_send.pval, - (void *)recvreq, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE, - (int)hdr->hdr_frag.hdr_frag_offset); - return; - } + MCA_PML_BFO_ERROR_CHECK_ON_FRAG_CALLBACK(recvreq) /* BFO FAILOVER CODE - end */ - mca_pml_bfo_recv_request_progress_frag(recvreq,btl,segments,des->des_dst_cnt); - - return; + mca_pml_bfo_recv_request_progress_frag(recvreq,btl,segments,des->des_dst_cnt); + + return; } @@ -402,17 +379,7 @@ void mca_pml_bfo_recv_frag_callback_put(mca_btl_base_module_t* btl, bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_PUT); sendreq = (mca_pml_bfo_send_request_t*)hdr->hdr_rdma.hdr_req.pval; /* BFO FAILOVER CODE - begin */ - /* Drop any fragments if request is in error state. Do not want - * to initiate any more activity. */ - if( OPAL_UNLIKELY(sendreq->req_error)) { - opal_output_verbose(20, mca_pml_bfo_output, - "PUT: received: dropping because request in error, " - "PML=%d, src_req=%p, dst_req=%p, peer=%d", - (uint16_t)sendreq->req_send.req_base.req_sequence, - (void *)sendreq, sendreq->req_recv.pval, - sendreq->req_send.req_base.req_peer); - return; - } + MCA_PML_BFO_ERROR_CHECK_ON_PUT_CALLBACK(sendreq) /* BFO FAILOVER CODE - end */ mca_pml_bfo_send_request_put(sendreq,btl,&hdr->hdr_rdma); diff --git a/ompi/mca/pml/bfo/pml_bfo_recvreq.c b/ompi/mca/pml/bfo/pml_bfo_recvreq.c index 1a3ba3101f..ed797f3bc9 100644 --- a/ompi/mca/pml/bfo/pml_bfo_recvreq.c +++ b/ompi/mca/pml/bfo/pml_bfo_recvreq.c @@ -196,26 +196,9 @@ static void mca_pml_bfo_put_completion( mca_btl_base_module_t* btl, OPAL_THREAD_ADD_SIZE_T(&recvreq->req_pipeline_depth,-1); btl->btl_free(btl, des); + /* BFO FAILOVER CODE - begin */ - /* This can happen if a FIN message arrives after the request was - * marked in error. So, just drop the message. Note that the - * status field is not being checked. That is because the status - * field is the value returned in the FIN hdr.hdr_fail field and - * may be used for other things. Note that we allow the various - * fields to be updated in case this actually completes the - * request and the sending side thinks it is done. */ - if( OPAL_UNLIKELY(recvreq->req_errstate)) { - opal_output_verbose(20, mca_pml_bfo_output, - "FIN: received on broken request, skipping, " - "PML=%d, RQS=%d, src_req=%lx, dst_req=%lx, peer=%d", - recvreq->req_msgseq, recvreq->req_restartseq, - (unsigned long)recvreq->remote_req_send.pval, - (unsigned long)recvreq, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - /* Even though in error, it still might complete. */ - recv_request_pml_complete_check(recvreq); - return; - } + MCA_PML_BFO_ERROR_CHECK_ON_FIN_FOR_PUT(recvreq) /* BFO FAILOVER CODE - end */ /* check completion status */ @@ -368,27 +351,7 @@ static void mca_pml_bfo_rget_completion( mca_btl_base_module_t* btl, /* check completion status */ if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) { -/* BFO FAILOVER CODE - begin */ - /* Record the error and send RECVERRNOTIFY if necessary. */ - if (recvreq->req_errstate) { - opal_output_verbose(30, mca_pml_bfo_output, - "RDMA read: completion failed, error already seen, " - "PML=%d, RQS=%d, src_req=%lx, dst_req=%lx, peer=%d", - recvreq->req_msgseq, recvreq->req_restartseq, - (unsigned long)recvreq->remote_req_send.pval, - (unsigned long)recvreq, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - return; - } else { - opal_output_verbose(30, mca_pml_bfo_output, - "RDMA read: completion failed, sending RECVERRNOTIFY to sender, " - "PML=%d, RQS=%d, src_req=%lx, dst_req=%lx, peer=%d", - recvreq->req_msgseq, recvreq->req_restartseq, - (unsigned long)recvreq->remote_req_send.pval, - (unsigned long)recvreq, - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - mca_pml_bfo_recv_request_recverrnotify(recvreq, MCA_PML_BFO_HDR_TYPE_RGET, status); - } + MCA_PML_BFO_ERROR_CHECK_ON_RDMA_READ_COMPLETION(recvreq) } /* BFO FAILOVER CODE - end */ /* BFO FAILOVER CODE - begin */ diff --git a/ompi/mca/pml/bfo/pml_bfo_sendreq.c b/ompi/mca/pml/bfo/pml_bfo_sendreq.c index b76b8692a3..f01acfa630 100644 --- a/ompi/mca/pml/bfo/pml_bfo_sendreq.c +++ b/ompi/mca/pml/bfo/pml_bfo_sendreq.c @@ -379,7 +379,7 @@ mca_pml_bfo_send_ctl_completion( mca_btl_base_module_t* btl, hdr->hdr_match.hdr_ctx, sendreq->req_send.req_base.req_comm->c_my_rank, hdr->hdr_match.hdr_src, - sendreq->req_restartseq, hdr->hdr_fin.hdr_restartseq, + sendreq->req_restartseq, hdr->hdr_rndv.hdr_restartseq, (void *)sendreq); return; }