Some more refactoring in the BFO PML. Getting it
as close to OB1 PML as possible. This commit was SVN r23920.
Этот коммит содержится в:
родитель
1766bf271a
Коммит
148ed00dd1
@ -48,9 +48,9 @@
|
||||
#include "pml_bfo_sendreq.h"
|
||||
#include "pml_bfo_recvreq.h"
|
||||
#include "pml_bfo_rdmafrag.h"
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
#include "pml_bfo_failover.h"
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
|
||||
mca_pml_bfo_t mca_pml_bfo = {
|
||||
{
|
||||
@ -409,13 +409,12 @@ int mca_pml_bfo_add_procs(ompi_proc_t** procs, size_t nprocs)
|
||||
NULL );
|
||||
if(OMPI_SUCCESS != rc)
|
||||
goto cleanup_and_return;
|
||||
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
|
||||
#ifdef PML_BFO
|
||||
rc = mca_pml_bfo_register_callbacks();
|
||||
if(OMPI_SUCCESS != rc)
|
||||
goto cleanup_and_return;
|
||||
/* BFO FAILOVER CODE - end */
|
||||
|
||||
#endif
|
||||
/* register error handlers */
|
||||
rc = mca_bml.bml_register_error(mca_pml_bfo_error_handler);
|
||||
if(OMPI_SUCCESS != rc)
|
||||
@ -472,14 +471,13 @@ static void mca_pml_bfo_fin_completion( mca_btl_base_module_t* btl,
|
||||
|
||||
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context;
|
||||
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) {
|
||||
mca_pml_bfo_repost_fin(des);
|
||||
return;
|
||||
}
|
||||
MCA_PML_BFO_CHECK_EAGER_BML_BTL_ON_FIN_COMPLETION(bml_btl, btl, des);
|
||||
/* BFO FAILOVER CODE - end */
|
||||
|
||||
#endif
|
||||
/* check for pending requests */
|
||||
MCA_PML_BFO_PROGRESS_PENDING(bml_btl);
|
||||
}
|
||||
@ -494,12 +492,14 @@ int mca_pml_bfo_send_fin( ompi_proc_t* proc,
|
||||
mca_bml_base_btl_t* bml_btl,
|
||||
ompi_ptr_t hdr_des,
|
||||
uint8_t order,
|
||||
#ifdef PML_BFO
|
||||
uint32_t status,
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
uint16_t seq,
|
||||
uint8_t restartseq,
|
||||
uint16_t ctx, uint32_t src)
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#else
|
||||
uint32_t status )
|
||||
#endif
|
||||
{
|
||||
mca_btl_base_descriptor_t* fin;
|
||||
mca_pml_bfo_fin_hdr_t* hdr;
|
||||
@ -521,13 +521,13 @@ int mca_pml_bfo_send_fin( ompi_proc_t* proc,
|
||||
hdr->hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_FIN;
|
||||
hdr->hdr_des = hdr_des;
|
||||
hdr->hdr_fail = status;
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
fin->des_cbdata = proc;
|
||||
hdr->hdr_match.hdr_seq = seq;
|
||||
hdr->hdr_match.hdr_ctx = ctx;
|
||||
hdr->hdr_match.hdr_src = src;
|
||||
hdr->hdr_match.hdr_common.hdr_flags = restartseq; /* use unused hdr_flags field */
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
|
||||
bfo_hdr_hton(hdr, MCA_PML_BFO_HDR_TYPE_FIN, proc);
|
||||
|
||||
@ -594,11 +594,15 @@ void mca_pml_bfo_process_pending_packets(mca_bml_base_btl_t* bml_btl)
|
||||
rc = mca_pml_bfo_send_fin(pckt->proc, send_dst,
|
||||
pckt->hdr.hdr_fin.hdr_des,
|
||||
pckt->order,
|
||||
#ifdef PML_BFO
|
||||
pckt->hdr.hdr_fin.hdr_fail,
|
||||
pckt->hdr.hdr_fin.hdr_match.hdr_seq,
|
||||
pckt->hdr.hdr_fin.hdr_match.hdr_common.hdr_flags,
|
||||
pckt->hdr.hdr_fin.hdr_match.hdr_ctx,
|
||||
pckt->hdr.hdr_fin.hdr_match.hdr_src);
|
||||
#else
|
||||
pckt->hdr.hdr_fin.hdr_fail);
|
||||
#endif
|
||||
if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == OPAL_SOS_GET_ERROR_CODE(rc)) ) {
|
||||
return;
|
||||
}
|
||||
@ -640,14 +644,13 @@ void mca_pml_bfo_process_pending_rdma(void)
|
||||
void mca_pml_bfo_error_handler(
|
||||
struct mca_btl_base_module_t* btl, int32_t flags,
|
||||
ompi_proc_t* errproc, char* btlinfo ) {
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
/* If we get a non-fatal error, try to failover */
|
||||
#ifdef PML_BFO
|
||||
if (flags & MCA_BTL_ERROR_FLAGS_NONFATAL) {
|
||||
mca_pml_bfo_failover_error_handler(btl, flags, errproc, btlinfo);
|
||||
/* BFO FAILOVER CODE - end */
|
||||
} else {
|
||||
orte_errmgr.abort(-1, NULL);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
orte_errmgr.abort(-1, NULL);
|
||||
}
|
||||
|
||||
#if OPAL_ENABLE_FT_CR == 0
|
||||
|
@ -23,6 +23,7 @@
|
||||
#ifndef MCA_PML_BFO_H
|
||||
#define MCA_PML_BFO_H
|
||||
|
||||
#define PML_BFO 1
|
||||
#include "ompi_config.h"
|
||||
#include "ompi/class/ompi_free_list.h"
|
||||
#include "ompi/request/request.h"
|
||||
@ -225,8 +226,12 @@ do { \
|
||||
|
||||
|
||||
int mca_pml_bfo_send_fin(ompi_proc_t* proc, mca_bml_base_btl_t* bml_btl,
|
||||
#ifdef PML_BFO
|
||||
ompi_ptr_t hdr_des, uint8_t order, uint32_t status,
|
||||
uint16_t seq, uint8_t reqseq, uint16_t ctx, uint32_t src);
|
||||
#else
|
||||
ompi_ptr_t hdr_des, uint8_t order, uint32_t status);
|
||||
#endif
|
||||
|
||||
/* This function tries to resend FIN/ACK packets from pckt_pending queue.
|
||||
* Packets are added to the queue when sending of FIN or ACK is failed due to
|
||||
|
@ -2103,3 +2103,102 @@ void mca_pml_bfo_find_recvreq_rdma_bml_btl(mca_bml_base_btl_t** bml_btl,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The completion event for the RNDV message has returned with an
|
||||
* error. We know that the send request we are looking at is valid
|
||||
* because it cannot be completed until the sendreq->req_state value
|
||||
* reaches 0. And for the sendreq->req_state to reach 0, the
|
||||
* completion event on the RNDV message must occur. So, we do not
|
||||
* bother checking whether the send request is valid, because we know
|
||||
* it is, but we put a few asserts in for good measure. We then check
|
||||
* a few fields in the request to decide what to do. If the
|
||||
* sendreq->req_error is set, that means that something has happend
|
||||
* already to the request and we do not want to restart it.
|
||||
* Presumably, we may have received a RECVERRNOTIFY message from the
|
||||
* receiver. We also check the sendreq->req_acked field to see if it
|
||||
* has been acked. If it has, then again we do not restart everything
|
||||
* because obviously the RNDV message has made it to the other side.
|
||||
*/
|
||||
bool mca_pml_bfo_rndv_completion_status_error(struct mca_btl_base_descriptor_t* des,
|
||||
mca_pml_bfo_send_request_t* sendreq)
|
||||
{
|
||||
assert(((mca_pml_bfo_hdr_t*)((des)->des_src->seg_addr.pval))->hdr_match.hdr_ctx ==
|
||||
(sendreq)->req_send.req_base.req_comm->c_contextid);
|
||||
assert(((mca_pml_bfo_hdr_t*)((des)->des_src->seg_addr.pval))->hdr_match.hdr_src ==
|
||||
(sendreq)->req_send.req_base.req_comm->c_my_rank);
|
||||
assert(((mca_pml_bfo_hdr_t*)((des)->des_src->seg_addr.pval))->hdr_match.hdr_seq ==
|
||||
(uint16_t)(sendreq)->req_send.req_base.req_sequence);
|
||||
if ((!(sendreq)->req_error) && (NULL == (sendreq)->req_recv.pval)) {
|
||||
(sendreq)->req_events--;
|
||||
/* Assume RNDV did not make it, so restart from the beginning. */
|
||||
mca_pml_bfo_send_request_restart(sendreq, true, MCA_PML_BFO_HDR_TYPE_RNDV);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check to see if an error has occurred on this send request. If it has
|
||||
* and there are no outstanding events, then we can start the restart dance.
|
||||
*/
|
||||
void mca_pml_bfo_completion_sendreq_has_error(mca_pml_bfo_send_request_t* sendreq,
|
||||
int status,
|
||||
mca_btl_base_module_t* btl,
|
||||
int type,
|
||||
char *description)
|
||||
{
|
||||
opal_output_verbose(30, mca_pml_bfo_output,
|
||||
"%s: completion: sendreq has error, outstanding events=%d, "
|
||||
"PML=%d, RQS=%d, src_req=%p, dst_req=%p, status=%d, peer=%d",
|
||||
description,
|
||||
sendreq->req_events, (uint16_t)sendreq->req_send.req_base.req_sequence,
|
||||
sendreq->req_restartseq, (void *)sendreq,
|
||||
sendreq->req_recv.pval,
|
||||
status, sendreq->req_send.req_base.req_peer);
|
||||
if (0 == sendreq->req_events) {
|
||||
mca_pml_bfo_send_request_rndvrestartnotify(sendreq, false,
|
||||
type, status, btl);
|
||||
}
|
||||
}
|
||||
|
||||
/* If we get an error on the RGET message, then first make sure that
|
||||
* header matches the send request that we are pointing to. This is
|
||||
* necessary, because even though the sending side got an error, the
|
||||
* RGET may have made it to the receiving side and the message transfer
|
||||
* may have completed. This would then mean the send request has been
|
||||
* completed and perhaps in use by another communication. So there is
|
||||
* no need to restart this request. Therefore, ensure that we are
|
||||
* looking at the same request that the header thinks we are looking
|
||||
* at. If not, then there is nothing else to be done. */
|
||||
void mca_pml_bfo_send_ctl_completion_status_error(struct mca_btl_base_descriptor_t* des)
|
||||
{
|
||||
mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)des->des_cbdata;
|
||||
mca_pml_bfo_hdr_t* hdr = des->des_src->seg_addr.pval;
|
||||
switch (hdr->hdr_common.hdr_type) {
|
||||
case MCA_PML_BFO_HDR_TYPE_RGET:
|
||||
if ((hdr->hdr_match.hdr_ctx != sendreq->req_send.req_base.req_comm->c_contextid) ||
|
||||
(hdr->hdr_match.hdr_src != sendreq->req_send.req_base.req_comm->c_my_rank) ||
|
||||
(hdr->hdr_match.hdr_seq != (uint16_t)sendreq->req_send.req_base.req_sequence)) {
|
||||
opal_output_verbose(30, mca_pml_bfo_output,
|
||||
"RGET: completion event: dropping because no valid request "
|
||||
"PML:exp=%d,act=%d CTX:exp=%d,act=%d SRC:exp=%d,act=%d "
|
||||
"RQS:exp=%d,act=%d, dst_req=%p",
|
||||
(uint16_t)sendreq->req_send.req_base.req_sequence,
|
||||
hdr->hdr_match.hdr_seq,
|
||||
sendreq->req_send.req_base.req_comm->c_contextid,
|
||||
hdr->hdr_match.hdr_ctx,
|
||||
sendreq->req_send.req_base.req_comm->c_my_rank,
|
||||
hdr->hdr_match.hdr_src,
|
||||
sendreq->req_restartseq, hdr->hdr_rndv.hdr_restartseq,
|
||||
(void *)sendreq);
|
||||
return;
|
||||
}
|
||||
mca_pml_bfo_send_request_restart(sendreq, true, MCA_PML_BFO_HDR_TYPE_RGET);
|
||||
return;
|
||||
default:
|
||||
opal_output(0, "%s:%d FATAL ERROR, unknown header (hdr=%d)",
|
||||
__FILE__, __LINE__, hdr->hdr_common.hdr_type);
|
||||
orte_errmgr.abort(-1, NULL);
|
||||
}
|
||||
}
|
||||
|
@ -105,6 +105,16 @@ void mca_pml_bfo_find_recvreq_rdma_bml_btl(mca_bml_base_btl_t** bml_btl,
|
||||
mca_pml_bfo_recv_request_t* recvreq,
|
||||
char* type);
|
||||
|
||||
bool mca_pml_bfo_rndv_completion_status_error(struct mca_btl_base_descriptor_t* des,
|
||||
mca_pml_bfo_send_request_t* sendreq);
|
||||
void mca_pml_bfo_send_ctl_completion_status_error(struct mca_btl_base_descriptor_t* des);
|
||||
|
||||
|
||||
void mca_pml_bfo_completion_sendreq_has_error(mca_pml_bfo_send_request_t* sendreq,
|
||||
int status,
|
||||
mca_btl_base_module_t* btl,
|
||||
int type,
|
||||
char *description);
|
||||
/**
|
||||
* Four new callbacks for the four new message types.
|
||||
*/
|
||||
@ -243,57 +253,33 @@ extern void mca_pml_bfo_recv_frag_callback_recverrnotify( mca_btl_base_module_t
|
||||
* Macros for pml_bfo_sendreq.c file.
|
||||
*/
|
||||
|
||||
/* The completion event for the RNDV message has returned with an
|
||||
* error. We know that the send request we are looking at is valid
|
||||
* because it cannot be completed until the sendreq->req_state value
|
||||
* reaches 0. And for the sendreq->req_state to reach 0, the
|
||||
* completion event on the RNDV message must occur. So, we do not
|
||||
* bother checking whether the send request is valid, because we know
|
||||
* it is, but we put a few asserts in for good measure. We then check
|
||||
* a few fields in the request to decide what to do. If the
|
||||
* sendreq->req_error is set, that means that something has happend
|
||||
* already to the request and we do not want to restart it.
|
||||
* Presumably, we may have received a RECVERRNOTIFY message from the
|
||||
* receiver. We also check the sendreq->req_acked field to see if it
|
||||
* has been acked. If it has, then again we do not restart everything
|
||||
* because obviously the RNDV message has made it to the other side. */
|
||||
#define MCA_PML_BFO_ERROR_ON_RNDV_COMPLETION(sendreq, des) \
|
||||
do { \
|
||||
assert(((mca_pml_bfo_hdr_t*)((des)->des_src->seg_addr.pval))->hdr_match.hdr_ctx == \
|
||||
(sendreq)->req_send.req_base.req_comm->c_contextid); \
|
||||
assert(((mca_pml_bfo_hdr_t*)((des)->des_src->seg_addr.pval))->hdr_match.hdr_src == \
|
||||
(sendreq)->req_send.req_base.req_comm->c_my_rank); \
|
||||
assert(((mca_pml_bfo_hdr_t*)((des)->des_src->seg_addr.pval))->hdr_match.hdr_seq == \
|
||||
(uint16_t)(sendreq)->req_send.req_base.req_sequence); \
|
||||
if ((!(sendreq)->req_error) && (NULL == (sendreq)->req_recv.pval)) { \
|
||||
(sendreq)->req_events--; \
|
||||
/* Assume RNDV did not make it, so restart from the beginning. */ \
|
||||
mca_pml_bfo_send_request_restart(sendreq, true, MCA_PML_BFO_HDR_TYPE_RNDV); \
|
||||
return; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/* Now check the error state. This request can be in error if the
|
||||
* RNDV message made it over, but the receiver got an error trying to
|
||||
* send the ACK back and therefore sent a RECVERRNOTIFY message. In
|
||||
* that case, we want to start the restart dance as the receiver has
|
||||
* matched this message already. Only restart if there are no
|
||||
* outstanding events on send request. */
|
||||
#define MCA_PML_BFO_CHECK_SENDREQ_ERROR_ON_RNDV_COMPLETION(sendreq, status, btl) \
|
||||
if ((sendreq)->req_error) { \
|
||||
opal_output_verbose(30, mca_pml_bfo_output, \
|
||||
"RNDV: completion: sendreq has error, outstanding events=%d, " \
|
||||
"PML=%d, RQS=%d, src_req=%lx, dst_req=%lx, status=%d, peer=%d", \
|
||||
(sendreq)->req_events, \
|
||||
(uint16_t)(sendreq)->req_send.req_base.req_sequence, \
|
||||
(sendreq)->req_restartseq, (unsigned long)(sendreq), \
|
||||
(unsigned long)(sendreq)->req_recv.pval, \
|
||||
status, (sendreq)->req_send.req_base.req_peer); \
|
||||
if (0 == (sendreq)->req_events) { \
|
||||
mca_pml_bfo_send_request_rndvrestartnotify(sendreq, false, \
|
||||
MCA_PML_BFO_HDR_TYPE_RNDV, \
|
||||
status, btl); \
|
||||
} \
|
||||
#define MCA_PML_BFO_RNDV_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl, type, description) \
|
||||
if( OPAL_UNLIKELY ((sendreq)->req_error)) { \
|
||||
mca_pml_bfo_completion_sendreq_has_error(sendreq, status, \
|
||||
btl, type, description); \
|
||||
return; \
|
||||
}
|
||||
|
||||
/**
|
||||
* This macro is called within the frag completion function in two
|
||||
* places. It is called to see if any errors occur prior to the
|
||||
* completion event on the frag. It is then called a second time
|
||||
* after the scheduling routine is called as the scheduling routine
|
||||
* may have detected that a BTL that was cached on the request had
|
||||
* been removed and therefore marked the request in error. In that
|
||||
* case, the scheduling of fragments can no longer proceed properly,
|
||||
* and if there are no outstanding events, iniated the restart dance.
|
||||
*/
|
||||
#define MCA_PML_BFO_FRAG_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl, type, description) \
|
||||
if( OPAL_UNLIKELY((sendreq)->req_error)) { \
|
||||
mca_pml_bfo_completion_sendreq_has_error(sendreq, status, \
|
||||
btl, type, description); \
|
||||
return; \
|
||||
}
|
||||
|
||||
@ -302,7 +288,7 @@ do {
|
||||
* field is not checked here. That is because that is the value
|
||||
* returned in the FIN hdr.hdr_fail field and may be used for other
|
||||
* things. */
|
||||
#define MCA_PML_BFO_CHECK_SENDREQ_ERROR_ON_RGET_COMPLETION(sendreq, btl, des) \
|
||||
#define MCA_PML_BFO_RGET_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, btl, des) \
|
||||
if( OPAL_UNLIKELY(sendreq->req_error)) { \
|
||||
opal_output_verbose(30, mca_pml_bfo_output, \
|
||||
"FIN: received on broken request, skipping, " \
|
||||
@ -314,66 +300,15 @@ do {
|
||||
return; \
|
||||
}
|
||||
|
||||
/* If we get an error on the RGET message, then first make sure that
|
||||
* header matches the send request that we are pointing to. This is
|
||||
* necessary, because even though the sending side got an error, the
|
||||
* RGET may have made it to the receiving side and the message transfer
|
||||
* may have completed. This would then mean the send request has been
|
||||
* completed and perhaps in use by another communication. So there is
|
||||
* no need to restart this request. Therefore, ensure that we are
|
||||
* looking at the same request that the header thinks we are looking
|
||||
* at. If not, then there is nothing else to be done. */
|
||||
#define MCA_PML_BFO_ERROR_ON_SEND_CTL_COMPLETION(sendreq, des) \
|
||||
do { \
|
||||
mca_pml_bfo_hdr_t* hdr = des->des_src->seg_addr.pval; \
|
||||
mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)des->des_cbdata; \
|
||||
switch (hdr->hdr_common.hdr_type) { \
|
||||
case MCA_PML_BFO_HDR_TYPE_RGET: \
|
||||
if ((hdr->hdr_match.hdr_ctx != sendreq->req_send.req_base.req_comm->c_contextid) || \
|
||||
(hdr->hdr_match.hdr_src != sendreq->req_send.req_base.req_comm->c_my_rank) || \
|
||||
(hdr->hdr_match.hdr_seq != (uint16_t)sendreq->req_send.req_base.req_sequence)) { \
|
||||
opal_output_verbose(30, mca_pml_bfo_output, \
|
||||
"RGET: completion event: dropping because no valid request " \
|
||||
"PML:exp=%d,act=%d CTX:exp=%d,act=%d SRC:exp=%d,act=%d " \
|
||||
"RQS:exp=%d,act=%d, dst_req=%p", \
|
||||
(uint16_t)sendreq->req_send.req_base.req_sequence, \
|
||||
hdr->hdr_match.hdr_seq, \
|
||||
sendreq->req_send.req_base.req_comm->c_contextid, \
|
||||
hdr->hdr_match.hdr_ctx, \
|
||||
sendreq->req_send.req_base.req_comm->c_my_rank, \
|
||||
hdr->hdr_match.hdr_src, \
|
||||
sendreq->req_restartseq, hdr->hdr_rndv.hdr_restartseq, \
|
||||
(void *)sendreq); \
|
||||
return; \
|
||||
} \
|
||||
mca_pml_bfo_send_request_restart(sendreq, true, MCA_PML_BFO_HDR_TYPE_RGET); \
|
||||
return; \
|
||||
default: \
|
||||
opal_output(0, "%s:%d FATAL ERROR, unknown header (hdr=%d)", \
|
||||
__FILE__, __LINE__, hdr->hdr_common.hdr_type); \
|
||||
orte_errmgr.abort(-1, NULL); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/* Check if there has been an error on the send request when we get
|
||||
* a completion event on the RDMA write. */
|
||||
#define MCA_PML_BFO_CHECK_SENDREQ_ERROR_ON_PUT_COMPLETION(sendreq, status, btl) \
|
||||
if ( OPAL_UNLIKELY(sendreq->req_error)) { \
|
||||
opal_output_verbose(30, mca_pml_bfo_output, \
|
||||
"RDMA write: completion: sendreq has error, outstanding events=%d," \
|
||||
" PML=%d, RQS=%d, src_req=%p, dst_req=%p, status=%d, peer=%d", \
|
||||
sendreq->req_events, \
|
||||
(uint16_t)sendreq->req_send.req_base.req_sequence, \
|
||||
sendreq->req_restartseq, (void *)sendreq, \
|
||||
sendreq->req_recv.pval, \
|
||||
status, sendreq->req_send.req_base.req_peer); \
|
||||
if (0 == sendreq->req_events) { \
|
||||
mca_pml_bfo_send_request_rndvrestartnotify(sendreq, false, \
|
||||
MCA_PML_BFO_HDR_TYPE_PUT, \
|
||||
status, btl); \
|
||||
} \
|
||||
MCA_PML_BFO_RDMA_FRAG_RETURN(frag); \
|
||||
return; \
|
||||
#define MCA_PML_BFO_PUT_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl) \
|
||||
if ( OPAL_UNLIKELY(sendreq->req_error)) { \
|
||||
mca_pml_bfo_completion_sendreq_has_error(sendreq, status, btl, \
|
||||
MCA_PML_BFO_HDR_TYPE_PUT, "RDMA write"); \
|
||||
MCA_PML_BFO_RDMA_FRAG_RETURN(frag); \
|
||||
return; \
|
||||
}
|
||||
|
||||
#define MCA_PML_BFO_CHECK_FOR_RNDV_RESTART(hdr, sendreq, type) \
|
||||
@ -416,6 +351,28 @@ do {
|
||||
mca_pml_bfo_update_eager_bml_btl_recv_ctl(&bml_btl, btl, des); \
|
||||
}
|
||||
|
||||
#define MCA_PML_BFO_CHECK_FOR_REMOVED_BML(sendreq, frag, btl) \
|
||||
if( OPAL_UNLIKELY(NULL == frag->rdma_bml) ) { \
|
||||
opal_output_verbose(30, mca_pml_bfo_output, \
|
||||
"PUT received: no matching BTL to RDMA write to, oustanding " \
|
||||
"events=%d, PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", \
|
||||
sendreq->req_events, \
|
||||
(uint16_t)sendreq->req_send.req_base.req_sequence, \
|
||||
sendreq->req_restartseq, (void *)sendreq, \
|
||||
sendreq->req_recv.pval, sendreq->req_send.req_base.req_peer); \
|
||||
MCA_PML_BFO_RDMA_FRAG_RETURN(frag); \
|
||||
sendreq->req_error++; \
|
||||
if (0 == sendreq->req_events) { \
|
||||
mca_pml_bfo_send_request_rndvrestartnotify(sendreq, false, \
|
||||
MCA_PML_BFO_HDR_TYPE_PUT, \
|
||||
OMPI_ERROR, btl); \
|
||||
} \
|
||||
return; \
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
||||
|
@ -36,7 +36,9 @@ typedef enum {
|
||||
struct mca_pml_bfo_rdma_frag_t {
|
||||
ompi_free_list_item_t super;
|
||||
mca_bml_base_btl_t* rdma_bml;
|
||||
#ifdef PML_BFO
|
||||
mca_btl_base_module_t* rdma_btl;
|
||||
#endif
|
||||
mca_pml_bfo_hdr_t rdma_hdr;
|
||||
mca_pml_bfo_rdma_state_t rdma_state;
|
||||
size_t rdma_length;
|
||||
|
@ -41,9 +41,9 @@
|
||||
#include "pml_bfo_recvreq.h"
|
||||
#include "pml_bfo_sendreq.h"
|
||||
#include "pml_bfo_hdr.h"
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
#include "pml_bfo_failover.h"
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
|
||||
OBJ_CLASS_INSTANCE( mca_pml_bfo_buffer_t,
|
||||
ompi_free_list_item_t,
|
||||
@ -242,13 +242,13 @@ void mca_pml_bfo_recv_frag_callback_match(mca_btl_base_module_t* btl,
|
||||
|
||||
slow_path:
|
||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
/* Check for duplicate messages. If message is duplicate, then just
|
||||
* return as that essentially drops the message. */
|
||||
if (true == mca_pml_bfo_is_duplicate_msg(proc, hdr)) {
|
||||
return;
|
||||
}
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
mca_pml_bfo_recv_frag_match(btl, hdr, segments,
|
||||
num_segments, MCA_PML_BFO_HDR_TYPE_MATCH);
|
||||
}
|
||||
@ -306,9 +306,9 @@ void mca_pml_bfo_recv_frag_callback_ack(mca_btl_base_module_t* btl,
|
||||
bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_ACK);
|
||||
sendreq = (mca_pml_bfo_send_request_t*)hdr->hdr_ack.hdr_src_req.pval;
|
||||
sendreq->req_recv = hdr->hdr_ack.hdr_dst_req;
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
MCA_PML_BFO_ERROR_CHECK_ON_ACK_CALLBACK(sendreq)
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
|
||||
/* if the request should be delivered entirely by copy in/out
|
||||
* then throttle sends */
|
||||
@ -352,9 +352,9 @@ void mca_pml_bfo_recv_frag_callback_frag(mca_btl_base_module_t* btl,
|
||||
}
|
||||
bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_FRAG);
|
||||
recvreq = (mca_pml_bfo_recv_request_t*)hdr->hdr_frag.hdr_dst_req.pval;
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
MCA_PML_BFO_ERROR_CHECK_ON_FRAG_CALLBACK(recvreq)
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
mca_pml_bfo_recv_request_progress_frag(recvreq,btl,segments,des->des_dst_cnt);
|
||||
|
||||
return;
|
||||
@ -375,9 +375,9 @@ void mca_pml_bfo_recv_frag_callback_put(mca_btl_base_module_t* btl,
|
||||
|
||||
bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_PUT);
|
||||
sendreq = (mca_pml_bfo_send_request_t*)hdr->hdr_rdma.hdr_req.pval;
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
MCA_PML_BFO_ERROR_CHECK_ON_PUT_CALLBACK(sendreq)
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
mca_pml_bfo_send_request_put(sendreq,btl,&hdr->hdr_rdma);
|
||||
|
||||
return;
|
||||
@ -398,11 +398,11 @@ void mca_pml_bfo_recv_frag_callback_fin(mca_btl_base_module_t* btl,
|
||||
|
||||
bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_FIN);
|
||||
rdma = (mca_btl_base_descriptor_t*)hdr->hdr_fin.hdr_des.pval;
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
if (true == mca_pml_bfo_is_duplicate_fin(hdr, rdma, btl)) {
|
||||
return;
|
||||
}
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
rdma->des_cbfunc(btl, NULL, rdma,
|
||||
hdr->hdr_fin.hdr_fail ? OMPI_ERROR : OMPI_SUCCESS);
|
||||
|
||||
@ -626,7 +626,7 @@ static int mca_pml_bfo_recv_frag_match( mca_btl_base_module_t *btl,
|
||||
* the fragment.
|
||||
*/
|
||||
OPAL_THREAD_LOCK(&comm->matching_lock);
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
/* In case of network failover, we may get a message telling us to
|
||||
* restart. In that case, we already have a pointer to the receive
|
||||
* request in the header itself. */
|
||||
@ -635,8 +635,8 @@ static int mca_pml_bfo_recv_frag_match( mca_btl_base_module_t *btl,
|
||||
if (NULL == match) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
/* BFO FAILOVER CODE - end */
|
||||
} else {
|
||||
#endif
|
||||
|
||||
/* get sequence number of next message that can be processed */
|
||||
next_msg_seq_expected = (uint16_t)proc->expected_sequence;
|
||||
@ -673,8 +673,9 @@ out_of_order_match:
|
||||
|
||||
/* release matching lock before processing fragment */
|
||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||
#ifdef PML_BFO
|
||||
}
|
||||
|
||||
#endif
|
||||
if(OPAL_LIKELY(match)) {
|
||||
switch(type) {
|
||||
case MCA_PML_BFO_HDR_TYPE_MATCH:
|
||||
@ -716,14 +717,13 @@ wrong_seq:
|
||||
* This message comes after the next expected, so it
|
||||
* is ahead of sequence. Save it for later.
|
||||
*/
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
/* Check for duplicate messages. If message is duplicate, then just
|
||||
* return as that essentially drops the message. */
|
||||
if (true == mca_pml_bfo_is_duplicate_msg(proc, hdr)) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
/* BFO FAILOVER CODE - end */
|
||||
|
||||
#endif
|
||||
append_frag_to_list(&proc->frags_cant_match, btl, hdr, segments,
|
||||
num_segments, NULL);
|
||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||
|
@ -33,9 +33,9 @@
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "opal/util/arch.h"
|
||||
#include "ompi/memchecker.h"
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
#include "pml_bfo_failover.h"
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
|
||||
void mca_pml_bfo_recv_request_process_pending(void)
|
||||
{
|
||||
@ -170,13 +170,13 @@ static void mca_pml_bfo_recv_ctl_completion( mca_btl_base_module_t* btl,
|
||||
{
|
||||
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context;
|
||||
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
if (btl->btl_flags & MCA_BTL_FLAGS_FAILOVER_SUPPORT) {
|
||||
mca_pml_bfo_check_recv_ctl_completion_status(btl, des, status);
|
||||
}
|
||||
|
||||
MCA_PML_BFO_CHECK_RECVREQ_EAGER_BML_BTL_RECV_CTL(bml_btl, btl, des);
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
MCA_PML_BFO_PROGRESS_PENDING(bml_btl);
|
||||
}
|
||||
|
||||
@ -199,12 +199,13 @@ static void mca_pml_bfo_put_completion( mca_btl_base_module_t* btl,
|
||||
}
|
||||
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_pipeline_depth,-1);
|
||||
|
||||
#ifdef PML_BFO
|
||||
btl->btl_free(btl, des);
|
||||
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
MCA_PML_BFO_ERROR_CHECK_ON_FIN_FOR_PUT(recvreq);
|
||||
MCA_PML_BFO_CHECK_RECVREQ_EAGER_BML_BTL(bml_btl, btl, recvreq, "PUT");
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#else
|
||||
mca_bml_base_free(bml_btl, des);
|
||||
#endif
|
||||
|
||||
/* check completion status */
|
||||
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received);
|
||||
@ -249,18 +250,18 @@ int mca_pml_bfo_recv_request_ack_send_btl(
|
||||
|
||||
/* initialize descriptor */
|
||||
des->des_cbfunc = mca_pml_bfo_recv_ctl_completion;
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
des->des_cbdata = (void *)proc;
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
|
||||
rc = mca_bml_base_send(bml_btl, des, MCA_PML_BFO_HDR_TYPE_ACK);
|
||||
if( OPAL_LIKELY( rc >= 0 ) ) {
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
if ((bml_btl->btl_flags & MCA_BTL_FLAGS_FAILOVER_SUPPORT) &&
|
||||
(des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK)) {
|
||||
((mca_pml_bfo_recv_request_t *)hdr_dst_req)->req_events++;
|
||||
}
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
mca_bml_base_free(bml_btl, des);
|
||||
@ -346,29 +347,39 @@ static void mca_pml_bfo_rget_completion( mca_btl_base_module_t* btl,
|
||||
mca_pml_bfo_rdma_frag_t* frag = (mca_pml_bfo_rdma_frag_t*)des->des_cbdata;
|
||||
mca_pml_bfo_recv_request_t* recvreq = (mca_pml_bfo_recv_request_t*)frag->rdma_req;
|
||||
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
if (btl->btl_flags & MCA_BTL_FLAGS_FAILOVER_SUPPORT) {
|
||||
recvreq->req_events--;
|
||||
assert(recvreq->req_events >= 0);
|
||||
}
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
|
||||
/* check completion status */
|
||||
if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) {
|
||||
#ifdef PML_BFO
|
||||
MCA_PML_BFO_ERROR_CHECK_ON_RDMA_READ_COMPLETION(recvreq);
|
||||
#else
|
||||
/* TSW - FIX */
|
||||
ORTE_ERROR_LOG(status);
|
||||
orte_errmgr.abort(-1, NULL);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
MCA_PML_BFO_SECOND_ERROR_CHECK_ON_RDMA_READ_COMPLETION(recvreq, status, btl);
|
||||
MCA_PML_BFO_CHECK_RECVREQ_RDMA_BML_BTL(bml_btl, btl, recvreq, "RDMA write");
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
|
||||
mca_pml_bfo_send_fin(recvreq->req_recv.req_base.req_proc,
|
||||
bml_btl,
|
||||
frag->rdma_hdr.hdr_rget.hdr_des,
|
||||
des->order, 0, (uint16_t)recvreq->req_msgseq, recvreq->req_restartseq,
|
||||
recvreq->req_recv.req_base.req_comm->c_contextid,
|
||||
recvreq->req_recv.req_base.req_comm->c_my_rank);
|
||||
#ifdef PML_BFO
|
||||
des->order, 0, (uint16_t)recvreq->req_msgseq, recvreq->req_restartseq,
|
||||
recvreq->req_recv.req_base.req_comm->c_contextid,
|
||||
recvreq->req_recv.req_base.req_comm->c_my_rank);
|
||||
#else
|
||||
des->order, 0);
|
||||
#endif
|
||||
|
||||
/* is receive request complete */
|
||||
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, frag->rdma_length);
|
||||
@ -432,12 +443,12 @@ int mca_pml_bfo_recv_request_get_frag( mca_pml_bfo_rdma_frag_t* frag )
|
||||
orte_errmgr.abort(-1, NULL);
|
||||
}
|
||||
}
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
if ((bml_btl->btl_flags & MCA_BTL_FLAGS_FAILOVER_SUPPORT) &&
|
||||
(descriptor->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK)) {
|
||||
recvreq->req_events++;
|
||||
}
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
@ -519,9 +530,9 @@ void mca_pml_bfo_recv_request_progress_rget( mca_pml_bfo_recv_request_t* recvreq
|
||||
0, bytes_received );
|
||||
recvreq->req_recv.req_bytes_packed = hdr->hdr_rndv.hdr_msg_length;
|
||||
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
recvreq->remote_req_send = hdr->hdr_rndv.hdr_src_req;
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
MCA_PML_BFO_RECV_REQUEST_MATCHED(recvreq, &hdr->hdr_rndv.hdr_match);
|
||||
|
||||
/* if receive buffer is not contiguous we can't just RDMA read into it, so
|
||||
@ -556,9 +567,9 @@ void mca_pml_bfo_recv_request_progress_rget( mca_pml_bfo_recv_request_t* recvreq
|
||||
size += hdr->hdr_segs[i].seg_len;
|
||||
}
|
||||
}
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
frag->rdma_btl = btl;
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
frag->rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl);
|
||||
if( OPAL_UNLIKELY(NULL == frag->rdma_bml) ) {
|
||||
opal_output(0, "[%s:%d] invalid bml for rdma get", __FILE__, __LINE__);
|
||||
@ -828,9 +839,9 @@ int mca_pml_bfo_recv_request_schedule_once( mca_pml_bfo_recv_request_t* recvreq,
|
||||
continue;
|
||||
}
|
||||
ctl->des_cbfunc = mca_pml_bfo_recv_ctl_completion;
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
ctl->des_cbdata = recvreq;
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
|
||||
/* fill in rdma header */
|
||||
hdr = (mca_pml_bfo_rdma_hdr_t*)ctl->des_src->seg_addr.pval;
|
||||
@ -838,9 +849,9 @@ int mca_pml_bfo_recv_request_schedule_once( mca_pml_bfo_recv_request_t* recvreq,
|
||||
hdr->hdr_common.hdr_flags =
|
||||
(!recvreq->req_ack_sent) ? MCA_PML_BFO_HDR_TYPE_ACK : 0;
|
||||
hdr->hdr_req = recvreq->remote_req_send;
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
hdr->hdr_dst_req.pval = recvreq; /* only needed in the first put message */
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
hdr->hdr_des.pval = dst;
|
||||
hdr->hdr_rdma_offset = recvreq->req_rdma_offset;
|
||||
hdr->hdr_seg_cnt = dst->des_dst_cnt;
|
||||
@ -862,12 +873,12 @@ int mca_pml_bfo_recv_request_schedule_once( mca_pml_bfo_recv_request_t* recvreq,
|
||||
/* send rdma request to peer */
|
||||
rc = mca_bml_base_send(bml_btl, ctl, MCA_PML_BFO_HDR_TYPE_PUT);
|
||||
if( OPAL_LIKELY( rc >= 0 ) ) {
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
if ((btl->btl_flags & MCA_BTL_FLAGS_FAILOVER_SUPPORT) &&
|
||||
(ctl->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK)) {
|
||||
recvreq->req_events++;
|
||||
}
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
/* update request state */
|
||||
recvreq->req_rdma_offset += size;
|
||||
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_pipeline_depth, 1);
|
||||
@ -993,12 +1004,12 @@ void mca_pml_bfo_recv_req_start(mca_pml_bfo_recv_request_t *req)
|
||||
req->req_bytes_received = 0;
|
||||
req->req_bytes_expected = 0;
|
||||
/* What about req_rdma_cnt ? */
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
req->req_rdma_cnt = 0;
|
||||
req->req_events = 0;
|
||||
req->req_restartseq = 0;
|
||||
req->req_errstate = 0;
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
req->req_rdma_idx = 0;
|
||||
req->req_pending = false;
|
||||
req->req_ack_sent = false;
|
||||
|
@ -30,24 +30,23 @@
|
||||
#include "ompi/mca/pml/bfo/pml_bfo_comm.h"
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
#include "ompi/mca/pml/base/pml_base_recvreq.h"
|
||||
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
#define RECVREQ_RECVERRSENT 0x01
|
||||
#define RECVREQ_RNDVRESTART_RECVED 0x02
|
||||
#define RECVREQ_RNDVRESTART_ACKED 0x04
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
struct mca_pml_bfo_recv_request_t {
|
||||
mca_pml_base_recv_request_t req_recv;
|
||||
ompi_ptr_t remote_req_send;
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
int32_t req_msgseq; /* PML sequence number */
|
||||
int32_t req_events; /* number of outstanding events on request */
|
||||
int32_t req_restartseq; /* sequence number of restarted request */
|
||||
int32_t req_errstate; /* state of request if in error */
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
int32_t req_lock;
|
||||
size_t req_pipeline_depth;
|
||||
size_t req_bytes_received; /**< amount of data transferred into the user buffer */
|
||||
@ -169,10 +168,10 @@ recv_request_pml_complete(mca_pml_bfo_recv_request_t *recvreq)
|
||||
}
|
||||
}
|
||||
recvreq->req_rdma_cnt = 0;
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
/* Initialize to a value that we indicate it is invalid */
|
||||
recvreq->req_msgseq = 42;
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#ifdef PML_BFO
|
||||
/* Reset to a value that to indicate it is invalid. */
|
||||
recvreq->req_msgseq = recvreq->req_msgseq - 100;
|
||||
#endif
|
||||
|
||||
OPAL_THREAD_LOCK(&ompi_request_lock);
|
||||
if(true == recvreq->req_recv.req_base.req_free_called) {
|
||||
@ -201,7 +200,11 @@ recv_request_pml_complete_check(mca_pml_bfo_recv_request_t *recvreq)
|
||||
#endif
|
||||
if(recvreq->req_match_received &&
|
||||
recvreq->req_bytes_received >= recvreq->req_recv.req_bytes_packed &&
|
||||
#ifdef PML_BFO
|
||||
(0 == recvreq->req_events) && lock_recv_request(recvreq)) {
|
||||
#else
|
||||
lock_recv_request(recvreq)) {
|
||||
#endif
|
||||
recv_request_pml_complete(recvreq);
|
||||
return true;
|
||||
}
|
||||
@ -236,9 +239,9 @@ static inline void recv_req_matched(mca_pml_bfo_recv_request_t *req,
|
||||
req->req_recv.req_base.req_ompi.req_status.MPI_SOURCE = hdr->hdr_src;
|
||||
req->req_recv.req_base.req_ompi.req_status.MPI_TAG = hdr->hdr_tag;
|
||||
req->req_match_received = true;
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
req->req_msgseq = hdr->hdr_seq;
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
#if OPAL_HAVE_THREAD_SUPPORT
|
||||
opal_atomic_wmb();
|
||||
#endif
|
||||
|
@ -31,9 +31,9 @@
|
||||
#include "pml_bfo_sendreq.h"
|
||||
#include "pml_bfo_rdmafrag.h"
|
||||
#include "pml_bfo_recvreq.h"
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
#include "pml_bfo_failover.h"
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
#include "ompi/mca/bml/base/base.h"
|
||||
#include "ompi/memchecker.h"
|
||||
|
||||
@ -183,12 +183,18 @@ mca_pml_bfo_match_completion_free( struct mca_btl_base_module_t* btl,
|
||||
|
||||
/* check completion status */
|
||||
if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) {
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
mca_pml_bfo_repost_match_fragment(des);
|
||||
return;
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#else
|
||||
/* TSW - FIX */
|
||||
opal_output(0, "%s:%d FATAL", __FILE__, __LINE__);
|
||||
orte_errmgr.abort(-1, NULL);
|
||||
#endif
|
||||
}
|
||||
#ifdef PML_BFO
|
||||
MCA_PML_BFO_CHECK_SENDREQ_EAGER_BML_BTL(bml_btl, btl, sendreq, "MATCH");
|
||||
#endif
|
||||
mca_pml_bfo_match_completion_free_request( bml_btl, sendreq );
|
||||
}
|
||||
|
||||
@ -229,13 +235,20 @@ mca_pml_bfo_rndv_completion( mca_btl_base_module_t* btl,
|
||||
|
||||
/* check completion status */
|
||||
if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) {
|
||||
MCA_PML_BFO_ERROR_ON_RNDV_COMPLETION(sendreq, des);
|
||||
#ifdef PML_BFO
|
||||
if (true == mca_pml_bfo_rndv_completion_status_error(des, sendreq))
|
||||
return;
|
||||
#else
|
||||
/* TSW - FIX */
|
||||
opal_output(0, "%s:%d FATAL", __FILE__, __LINE__);
|
||||
orte_errmgr.abort(-1, NULL);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
sendreq->req_events--;
|
||||
MCA_PML_BFO_CHECK_SENDREQ_ERROR_ON_RNDV_COMPLETION(sendreq, status, btl);
|
||||
/* BFO FAILOVER CODE - end */
|
||||
MCA_PML_BFO_RNDV_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl,
|
||||
MCA_PML_BFO_HDR_TYPE_RNDV, "RNDV");
|
||||
#endif
|
||||
|
||||
/* count bytes of user data actually delivered. As the rndv completion only
|
||||
* happens in one thread, the increase of the req_bytes_delivered does not
|
||||
@ -246,7 +259,9 @@ mca_pml_bfo_rndv_completion( mca_btl_base_module_t* btl,
|
||||
sizeof(mca_pml_bfo_rendezvous_hdr_t),
|
||||
req_bytes_delivered );
|
||||
|
||||
#ifdef PML_BFO
|
||||
MCA_PML_BFO_CHECK_SENDREQ_EAGER_BML_BTL(bml_btl, btl, sendreq, "RNDV");
|
||||
#endif
|
||||
mca_pml_bfo_rndv_completion_request( bml_btl, sendreq, req_bytes_delivered );
|
||||
}
|
||||
|
||||
@ -264,8 +279,9 @@ mca_pml_bfo_rget_completion( mca_btl_base_module_t* btl,
|
||||
mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)des->des_cbdata;
|
||||
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context;
|
||||
size_t req_bytes_delivered = 0;
|
||||
|
||||
MCA_PML_BFO_CHECK_SENDREQ_ERROR_ON_RGET_COMPLETION(sendreq, btl, des);
|
||||
#ifdef PML_BFO
|
||||
MCA_PML_BFO_RGET_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, btl, des);
|
||||
#endif
|
||||
|
||||
/* count bytes of user data actually delivered and check for request completion */
|
||||
MCA_PML_BFO_COMPUTE_SEGMENT_LENGTH( des->des_src, des->des_src_cnt,
|
||||
@ -274,8 +290,12 @@ mca_pml_bfo_rget_completion( mca_btl_base_module_t* btl,
|
||||
|
||||
send_request_pml_complete_check(sendreq);
|
||||
/* free the descriptor */
|
||||
#ifdef PML_BFO
|
||||
btl->btl_free(btl, des);
|
||||
MCA_PML_BFO_CHECK_SENDREQ_RDMA_BML_BTL(bml_btl, btl, sendreq, "RGET");
|
||||
#else
|
||||
mca_bml_base_free(bml_btl, des);
|
||||
#endif
|
||||
MCA_PML_BFO_PROGRESS_PENDING(bml_btl);
|
||||
}
|
||||
|
||||
@ -292,15 +312,15 @@ mca_pml_bfo_send_ctl_completion( mca_btl_base_module_t* btl,
|
||||
{
|
||||
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context;
|
||||
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)des->des_cbdata;
|
||||
if(OPAL_LIKELY(OMPI_SUCCESS == status)) {
|
||||
/* check for pending requests */
|
||||
MCA_PML_BFO_CHECK_SENDREQ_EAGER_BML_BTL(bml_btl, btl, sendreq, "ACK or RGET");
|
||||
MCA_PML_BFO_PROGRESS_PENDING(bml_btl);
|
||||
} else {
|
||||
MCA_PML_BFO_ERROR_ON_SEND_CTL_COMPLETION(sendreq, des);
|
||||
#ifdef PML_BFO
|
||||
if(OPAL_UNLIKELY(OMPI_SUCCESS != status)) {
|
||||
mca_pml_bfo_send_ctl_completion_status_error(des);
|
||||
return;
|
||||
}
|
||||
MCA_PML_BFO_CHECK_SENDREQ_EAGER_BML_BTL(bml_btl, btl, des->des_cbdata, "RGET");
|
||||
#endif
|
||||
/* check for pending requests */
|
||||
MCA_PML_BFO_PROGRESS_PENDING(bml_btl);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -317,15 +337,19 @@ mca_pml_bfo_frag_completion( mca_btl_base_module_t* btl,
|
||||
mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)des->des_cbdata;
|
||||
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context;
|
||||
size_t req_bytes_delivered = 0;
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
sendreq->req_events--;
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
|
||||
/* check completion status */
|
||||
if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) {
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
sendreq->req_error++;
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#else
|
||||
/* TSW - FIX */
|
||||
opal_output(0, "%s:%d FATAL", __FILE__, __LINE__);
|
||||
orte_errmgr.abort(-1, NULL);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* count bytes of user data actually delivered */
|
||||
@ -337,52 +361,23 @@ mca_pml_bfo_frag_completion( mca_btl_base_module_t* btl,
|
||||
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth, -1);
|
||||
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
|
||||
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
/* note we check error after bytes delivered computation in case frag made it */
|
||||
if( OPAL_UNLIKELY(sendreq->req_error)) {
|
||||
opal_output_verbose(30, mca_pml_bfo_output,
|
||||
"FRAG: completion: sendreq has error, outstanding events=%d, "
|
||||
"PML=%d, RQS=%d, src_req=%p, dst_req=%p, status=%d, peer=%d",
|
||||
sendreq->req_events, (uint16_t)sendreq->req_send.req_base.req_sequence,
|
||||
sendreq->req_restartseq, (void *)sendreq,
|
||||
sendreq->req_recv.pval,
|
||||
status, sendreq->req_send.req_base.req_peer);
|
||||
if (0 == sendreq->req_events) {
|
||||
mca_pml_bfo_send_request_rndvrestartnotify(sendreq, false,
|
||||
MCA_PML_BFO_HDR_TYPE_FRAG,
|
||||
status, btl);
|
||||
}
|
||||
return;
|
||||
}
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#ifdef PML_BFO
|
||||
MCA_PML_BFO_FRAG_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl,
|
||||
MCA_PML_BFO_HDR_TYPE_FRAG, "FRAG");
|
||||
#endif
|
||||
if(send_request_pml_complete_check(sendreq) == false) {
|
||||
mca_pml_bfo_send_request_schedule(sendreq);
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
if( OPAL_UNLIKELY(sendreq->req_error)) {
|
||||
/* This situation can happen if the scheduling function
|
||||
* determined that a BTL was removed from underneath us
|
||||
* and therefore marked the request in error. In that
|
||||
* case, the scheduling of fragments can no longer proceed
|
||||
* properly. Therefore, if no outstanding events, initiate
|
||||
* the restart dance. */
|
||||
opal_output_verbose(30, mca_pml_bfo_output,
|
||||
"FRAG: completion: BTL has been removed, outstanding events=%d, "
|
||||
"PML=%d, RQS=%d, src_req=%p, dst_req=%p, status=%d, peer=%d",
|
||||
sendreq->req_events, (uint16_t)sendreq->req_send.req_base.req_sequence,
|
||||
sendreq->req_restartseq, (void *)sendreq,
|
||||
sendreq->req_recv.pval,
|
||||
status, sendreq->req_send.req_base.req_peer);
|
||||
if (0 == sendreq->req_events) {
|
||||
mca_pml_bfo_send_request_rndvrestartnotify(sendreq, false,
|
||||
MCA_PML_BFO_HDR_TYPE_FRAG,
|
||||
status, btl);
|
||||
}
|
||||
}
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#ifdef PML_BFO
|
||||
MCA_PML_BFO_FRAG_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl,
|
||||
MCA_PML_BFO_HDR_TYPE_FRAG,
|
||||
"FRAG (BTL removal)");
|
||||
#endif
|
||||
}
|
||||
|
||||
/* check for pending requests */
|
||||
#ifdef PML_BFO
|
||||
MCA_PML_BFO_CHECK_SENDREQ_EAGER_BML_BTL(bml_btl, btl, sendreq, "FRAG");
|
||||
#endif
|
||||
MCA_PML_BFO_PROGRESS_PENDING(bml_btl);
|
||||
}
|
||||
|
||||
@ -438,7 +433,9 @@ int mca_pml_bfo_send_request_start_buffered(
|
||||
hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
|
||||
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
|
||||
hdr->hdr_rndv.hdr_src_req.pval = sendreq;
|
||||
#ifdef PML_BFO
|
||||
MCA_PML_BFO_CHECK_FOR_RNDV_RESTART(hdr, sendreq, "RNDV(buffered)");
|
||||
#endif
|
||||
|
||||
bfo_hdr_hton(hdr, MCA_PML_BFO_HDR_TYPE_RNDV,
|
||||
sendreq->req_send.req_base.req_proc);
|
||||
@ -487,11 +484,11 @@ int mca_pml_bfo_send_request_start_buffered(
|
||||
if( OPAL_LIKELY( 1 == rc ) ) {
|
||||
mca_pml_bfo_rndv_completion_request( bml_btl, sendreq, req_bytes_delivered);
|
||||
}
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
if (des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) {
|
||||
sendreq->req_events++;
|
||||
}
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
mca_bml_base_free(bml_btl, des );
|
||||
@ -537,14 +534,13 @@ int mca_pml_bfo_send_request_start_copy( mca_pml_bfo_send_request_t* sendreq,
|
||||
MCA_PML_BFO_HDR_TYPE_MATCH,
|
||||
&des);
|
||||
if( OPAL_LIKELY(OMPI_SUCCESS == rc) ) {
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
/* Needed for failover */
|
||||
#ifdef PML_BFO
|
||||
/* Needed in case of failover */
|
||||
if (NULL != des) {
|
||||
des->des_cbfunc = mca_pml_bfo_match_completion_free;
|
||||
des->des_cbdata = sendreq->req_endpoint;
|
||||
}
|
||||
/* BFO FAILOVER CODE - end */
|
||||
|
||||
#endif
|
||||
/* signal request completion */
|
||||
send_request_pml_complete(sendreq);
|
||||
|
||||
@ -774,9 +770,9 @@ int mca_pml_bfo_send_request_start_rdma( mca_pml_bfo_send_request_t* sendreq,
|
||||
hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
|
||||
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
|
||||
hdr->hdr_rndv.hdr_src_req.pval = sendreq;
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
MCA_PML_BFO_CHECK_FOR_RNDV_RESTART(hdr, sendreq, "RGET");
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
hdr->hdr_rget.hdr_des.pval = src;
|
||||
hdr->hdr_rget.hdr_seg_cnt = src->des_src_cnt;
|
||||
|
||||
@ -826,9 +822,9 @@ int mca_pml_bfo_send_request_start_rdma( mca_pml_bfo_send_request_t* sendreq,
|
||||
hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
|
||||
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
|
||||
hdr->hdr_rndv.hdr_src_req.pval = sendreq;
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
MCA_PML_BFO_CHECK_FOR_RNDV_RESTART(hdr, sendreq, "RNDV");
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
|
||||
bfo_hdr_hton(hdr, MCA_PML_BFO_HDR_TYPE_RNDV,
|
||||
sendreq->req_send.req_base.req_proc);
|
||||
@ -852,12 +848,12 @@ int mca_pml_bfo_send_request_start_rdma( mca_pml_bfo_send_request_t* sendreq,
|
||||
if( OPAL_LIKELY( 1 == rc ) && (true == need_local_cb)) {
|
||||
mca_pml_bfo_rndv_completion_request( bml_btl, sendreq, 0 );
|
||||
}
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
if ((des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) &&
|
||||
(MCA_PML_BFO_HDR_TYPE_RNDV == hdr->hdr_common.hdr_type)) {
|
||||
sendreq->req_events++;
|
||||
}
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
mca_bml_base_free(bml_btl, des);
|
||||
@ -925,9 +921,9 @@ int mca_pml_bfo_send_request_start_rndv( mca_pml_bfo_send_request_t* sendreq,
|
||||
hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
|
||||
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
|
||||
hdr->hdr_rndv.hdr_src_req.pval = sendreq;
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
MCA_PML_BFO_CHECK_FOR_RNDV_RESTART(hdr, sendreq, "RNDV");
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
|
||||
bfo_hdr_hton(hdr, MCA_PML_BFO_HDR_TYPE_RNDV,
|
||||
sendreq->req_send.req_base.req_proc);
|
||||
@ -945,11 +941,11 @@ int mca_pml_bfo_send_request_start_rndv( mca_pml_bfo_send_request_t* sendreq,
|
||||
if( OPAL_LIKELY( 1 == rc ) ) {
|
||||
mca_pml_bfo_rndv_completion_request( bml_btl, sendreq, size );
|
||||
}
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
if (des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) {
|
||||
sendreq->req_events++;
|
||||
}
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
mca_bml_base_free(bml_btl, des );
|
||||
@ -1062,7 +1058,7 @@ mca_pml_bfo_send_request_schedule_once(mca_pml_bfo_send_request_t* sendreq)
|
||||
mca_bml_base_btl_t* bml_btl;
|
||||
|
||||
assert(range->range_send_length != 0);
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
/* Failover code. If this is true, this means the request thinks we
|
||||
* have more BTLs than there really are. This can happen because
|
||||
* a BTL was removed from the available list. In this case, we
|
||||
@ -1072,7 +1068,7 @@ mca_pml_bfo_send_request_schedule_once(mca_pml_bfo_send_request_t* sendreq)
|
||||
sendreq->req_error++;
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
|
||||
if(prev_bytes_remaining == range->range_send_length)
|
||||
num_fail++;
|
||||
@ -1181,11 +1177,11 @@ cannot_pack:
|
||||
range = get_next_send_range(sendreq, range);
|
||||
prev_bytes_remaining = 0;
|
||||
}
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
if (des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) {
|
||||
sendreq->req_events++;
|
||||
}
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
} else {
|
||||
mca_bml_base_free(bml_btl,des);
|
||||
}
|
||||
@ -1209,28 +1205,33 @@ static void mca_pml_bfo_put_completion( mca_btl_base_module_t* btl,
|
||||
mca_pml_bfo_rdma_frag_t* frag = (mca_pml_bfo_rdma_frag_t*)des->des_cbdata;
|
||||
mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)frag->rdma_req;
|
||||
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context;
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
sendreq->req_events--;
|
||||
/* BFO FAILOVER CODE - end */
|
||||
|
||||
/* check completion status */
|
||||
if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) {
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
sendreq->req_error++;
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#else
|
||||
/* TSW - FIX */
|
||||
ORTE_ERROR_LOG(status);
|
||||
orte_errmgr.abort(-1, NULL);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
MCA_PML_BFO_CHECK_SENDREQ_ERROR_ON_PUT_COMPLETION(sendreq, status, btl);
|
||||
#ifdef PML_BFO
|
||||
sendreq->req_events--;
|
||||
MCA_PML_BFO_PUT_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl);
|
||||
MCA_PML_BFO_CHECK_SENDREQ_EAGER_BML_BTL(bml_btl, btl, sendreq, "RDMA write");
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
|
||||
mca_pml_bfo_send_fin(sendreq->req_send.req_base.req_proc,
|
||||
bml_btl,
|
||||
frag->rdma_hdr.hdr_rdma.hdr_des,
|
||||
des->order, 0, (uint16_t)sendreq->req_send.req_base.req_sequence,
|
||||
sendreq->req_restartseq, sendreq->req_send.req_base.req_comm->c_contextid,
|
||||
sendreq->req_send.req_base.req_comm->c_my_rank);
|
||||
#ifdef PML_BFO
|
||||
des->order, 0, (uint16_t)sendreq->req_send.req_base.req_sequence,
|
||||
sendreq->req_restartseq, sendreq->req_send.req_base.req_comm->c_contextid,
|
||||
sendreq->req_send.req_base.req_comm->c_my_rank);
|
||||
#else
|
||||
des->order, 0);
|
||||
#endif
|
||||
|
||||
/* check for request completion */
|
||||
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length);
|
||||
@ -1275,9 +1276,13 @@ int mca_pml_bfo_send_request_put_frag( mca_pml_bfo_rdma_frag_t* frag )
|
||||
/* tell receiver to unregister memory */
|
||||
mca_pml_bfo_send_fin(sendreq->req_send.req_base.req_proc,
|
||||
bml_btl, frag->rdma_hdr.hdr_rdma.hdr_des,
|
||||
MCA_BTL_NO_ORDER, 1, (uint16_t)sendreq->req_send.req_base.req_sequence,
|
||||
sendreq->req_restartseq, sendreq->req_send.req_base.req_comm->c_contextid,
|
||||
sendreq->req_send.req_base.req_comm->c_my_rank);
|
||||
#ifdef PML_BFO
|
||||
MCA_BTL_NO_ORDER, 1, (uint16_t)sendreq->req_send.req_base.req_sequence,
|
||||
sendreq->req_restartseq, sendreq->req_send.req_base.req_comm->c_contextid,
|
||||
sendreq->req_send.req_base.req_comm->c_my_rank);
|
||||
#else
|
||||
MCA_BTL_NO_ORDER, 1);
|
||||
#endif
|
||||
|
||||
/* send fragment by copy in/out */
|
||||
mca_pml_bfo_send_request_copy_in_out(sendreq,
|
||||
@ -1313,14 +1318,11 @@ int mca_pml_bfo_send_request_put_frag( mca_pml_bfo_rdma_frag_t* frag )
|
||||
orte_errmgr.abort(-1, NULL);
|
||||
}
|
||||
}
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
if (des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) {
|
||||
mca_pml_bfo_send_request_t *sendreq =
|
||||
(mca_pml_bfo_send_request_t*)frag->rdma_req;
|
||||
sendreq->req_events++;
|
||||
((mca_pml_bfo_send_request_t*)frag->rdma_req)->req_events++;
|
||||
}
|
||||
/* BFO FAILOVER CODE - end */
|
||||
|
||||
#endif
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -1341,18 +1343,20 @@ void mca_pml_bfo_send_request_put( mca_pml_bfo_send_request_t* sendreq,
|
||||
size_t i, size = 0;
|
||||
|
||||
if(hdr->hdr_common.hdr_flags & MCA_PML_BFO_HDR_TYPE_ACK) {
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
/* Handle the failover case where a RNDV request may
|
||||
* have turned into a RGET and therefore the state
|
||||
* is not being tracked. */
|
||||
if (sendreq->req_state != 0) {
|
||||
OPAL_THREAD_ADD32(&sendreq->req_state, -1);
|
||||
}
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#else
|
||||
OPAL_THREAD_ADD32(&sendreq->req_state, -1);
|
||||
#endif
|
||||
}
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
sendreq->req_recv = hdr->hdr_dst_req; /* only needed once, but it is OK */
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
|
||||
MCA_PML_BFO_RDMA_FRAG_ALLOC(frag, rc);
|
||||
|
||||
@ -1380,21 +1384,10 @@ void mca_pml_bfo_send_request_put( mca_pml_bfo_send_request_t* sendreq,
|
||||
}
|
||||
|
||||
frag->rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl);
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
frag->rdma_btl = btl;
|
||||
if( OPAL_UNLIKELY(NULL == frag->rdma_bml) ) {
|
||||
opal_output(0, "[%s:%d] invalid bml for rdma put", __FILE__, __LINE__);
|
||||
MCA_PML_BFO_RDMA_FRAG_RETURN(frag);
|
||||
sendreq->req_error++;
|
||||
if (0 == sendreq->req_events) {
|
||||
opal_output(0, "[%s:%d] Issuing rndvrestartnotify", __FILE__, __LINE__);
|
||||
mca_pml_bfo_send_request_rndvrestartnotify(sendreq, false,
|
||||
MCA_PML_BFO_HDR_TYPE_PUT,
|
||||
OMPI_ERROR, btl);
|
||||
}
|
||||
return;
|
||||
}
|
||||
/* BFO FAILOVER CODE - end */
|
||||
MCA_PML_BFO_CHECK_FOR_REMOVED_BML(sendreq, frag, btl);
|
||||
#endif
|
||||
frag->rdma_hdr.hdr_rdma = *hdr;
|
||||
frag->rdma_req = sendreq;
|
||||
frag->rdma_ep = bml_endpoint;
|
||||
|
@ -42,12 +42,12 @@ struct mca_pml_bfo_send_request_t {
|
||||
mca_pml_base_send_request_t req_send;
|
||||
mca_bml_base_endpoint_t* req_endpoint;
|
||||
ompi_ptr_t req_recv;
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
int32_t req_events; /* number of outstanding events on request */
|
||||
int32_t req_restartseq; /* sequence number of restarted request */
|
||||
int32_t req_restart; /* state of restarted request */
|
||||
int32_t req_error; /* non-zero when error has occurred on request */
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
int32_t req_state;
|
||||
int32_t req_lock;
|
||||
bool req_throttle_sends;
|
||||
@ -249,7 +249,7 @@ send_request_pml_complete(mca_pml_bfo_send_request_t *sendreq)
|
||||
MCA_PML_BFO_SEND_REQUEST_MPI_COMPLETE(sendreq, true);
|
||||
}
|
||||
sendreq->req_send.req_base.req_pml_complete = true;
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
assert(0 == sendreq->req_events);
|
||||
sendreq->req_restartseq = 0;
|
||||
/* Since sequence numbers increase monotonically and
|
||||
@ -258,7 +258,7 @@ send_request_pml_complete(mca_pml_bfo_send_request_t *sendreq)
|
||||
* as that is not within the valid range. */
|
||||
sendreq->req_send.req_base.req_sequence =
|
||||
sendreq->req_send.req_base.req_sequence - 10;
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
|
||||
if(sendreq->req_send.req_base.req_free_called) {
|
||||
MCA_PML_BFO_SEND_REQUEST_RETURN(sendreq);
|
||||
@ -437,12 +437,12 @@ mca_pml_bfo_send_request_start( mca_pml_bfo_send_request_t* sendreq )
|
||||
sendreq->req_pending = MCA_PML_BFO_SEND_PENDING_NONE;
|
||||
sendreq->req_send.req_base.req_sequence = OPAL_THREAD_ADD32(
|
||||
&comm->procs[sendreq->req_send.req_base.req_peer].send_sequence,1);
|
||||
/* BFO FAILOVER CODE - begin */
|
||||
#ifdef PML_BFO
|
||||
sendreq->req_restartseq = 0; /* counts up restarts */
|
||||
sendreq->req_restart = 0; /* reset in case we restart again */
|
||||
sendreq->req_error = 0; /* clear error state */
|
||||
sendreq->req_events = 0; /* clear events, probably 0 anyways */
|
||||
/* BFO FAILOVER CODE - end */
|
||||
#endif
|
||||
|
||||
MCA_PML_BASE_SEND_START( &sendreq->req_send.req_base );
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user