Some more refactoring in the BFO PML. Getting it
as close to OB1 PML as possible. This commit was SVN r23920.
Этот коммит содержится в:
родитель
1766bf271a
Коммит
148ed00dd1
@ -48,9 +48,9 @@
|
|||||||
#include "pml_bfo_sendreq.h"
|
#include "pml_bfo_sendreq.h"
|
||||||
#include "pml_bfo_recvreq.h"
|
#include "pml_bfo_recvreq.h"
|
||||||
#include "pml_bfo_rdmafrag.h"
|
#include "pml_bfo_rdmafrag.h"
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
#include "pml_bfo_failover.h"
|
#include "pml_bfo_failover.h"
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
|
|
||||||
mca_pml_bfo_t mca_pml_bfo = {
|
mca_pml_bfo_t mca_pml_bfo = {
|
||||||
{
|
{
|
||||||
@ -409,13 +409,12 @@ int mca_pml_bfo_add_procs(ompi_proc_t** procs, size_t nprocs)
|
|||||||
NULL );
|
NULL );
|
||||||
if(OMPI_SUCCESS != rc)
|
if(OMPI_SUCCESS != rc)
|
||||||
goto cleanup_and_return;
|
goto cleanup_and_return;
|
||||||
|
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
rc = mca_pml_bfo_register_callbacks();
|
rc = mca_pml_bfo_register_callbacks();
|
||||||
if(OMPI_SUCCESS != rc)
|
if(OMPI_SUCCESS != rc)
|
||||||
goto cleanup_and_return;
|
goto cleanup_and_return;
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
|
|
||||||
/* register error handlers */
|
/* register error handlers */
|
||||||
rc = mca_bml.bml_register_error(mca_pml_bfo_error_handler);
|
rc = mca_bml.bml_register_error(mca_pml_bfo_error_handler);
|
||||||
if(OMPI_SUCCESS != rc)
|
if(OMPI_SUCCESS != rc)
|
||||||
@ -472,14 +471,13 @@ static void mca_pml_bfo_fin_completion( mca_btl_base_module_t* btl,
|
|||||||
|
|
||||||
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context;
|
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context;
|
||||||
|
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) {
|
if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) {
|
||||||
mca_pml_bfo_repost_fin(des);
|
mca_pml_bfo_repost_fin(des);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
MCA_PML_BFO_CHECK_EAGER_BML_BTL_ON_FIN_COMPLETION(bml_btl, btl, des);
|
MCA_PML_BFO_CHECK_EAGER_BML_BTL_ON_FIN_COMPLETION(bml_btl, btl, des);
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
|
|
||||||
/* check for pending requests */
|
/* check for pending requests */
|
||||||
MCA_PML_BFO_PROGRESS_PENDING(bml_btl);
|
MCA_PML_BFO_PROGRESS_PENDING(bml_btl);
|
||||||
}
|
}
|
||||||
@ -494,12 +492,14 @@ int mca_pml_bfo_send_fin( ompi_proc_t* proc,
|
|||||||
mca_bml_base_btl_t* bml_btl,
|
mca_bml_base_btl_t* bml_btl,
|
||||||
ompi_ptr_t hdr_des,
|
ompi_ptr_t hdr_des,
|
||||||
uint8_t order,
|
uint8_t order,
|
||||||
|
#ifdef PML_BFO
|
||||||
uint32_t status,
|
uint32_t status,
|
||||||
/* BFO FAILOVER CODE - begin */
|
|
||||||
uint16_t seq,
|
uint16_t seq,
|
||||||
uint8_t restartseq,
|
uint8_t restartseq,
|
||||||
uint16_t ctx, uint32_t src)
|
uint16_t ctx, uint32_t src)
|
||||||
/* BFO FAILOVER CODE - end */
|
#else
|
||||||
|
uint32_t status )
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
mca_btl_base_descriptor_t* fin;
|
mca_btl_base_descriptor_t* fin;
|
||||||
mca_pml_bfo_fin_hdr_t* hdr;
|
mca_pml_bfo_fin_hdr_t* hdr;
|
||||||
@ -521,13 +521,13 @@ int mca_pml_bfo_send_fin( ompi_proc_t* proc,
|
|||||||
hdr->hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_FIN;
|
hdr->hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_FIN;
|
||||||
hdr->hdr_des = hdr_des;
|
hdr->hdr_des = hdr_des;
|
||||||
hdr->hdr_fail = status;
|
hdr->hdr_fail = status;
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
fin->des_cbdata = proc;
|
fin->des_cbdata = proc;
|
||||||
hdr->hdr_match.hdr_seq = seq;
|
hdr->hdr_match.hdr_seq = seq;
|
||||||
hdr->hdr_match.hdr_ctx = ctx;
|
hdr->hdr_match.hdr_ctx = ctx;
|
||||||
hdr->hdr_match.hdr_src = src;
|
hdr->hdr_match.hdr_src = src;
|
||||||
hdr->hdr_match.hdr_common.hdr_flags = restartseq; /* use unused hdr_flags field */
|
hdr->hdr_match.hdr_common.hdr_flags = restartseq; /* use unused hdr_flags field */
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
|
|
||||||
bfo_hdr_hton(hdr, MCA_PML_BFO_HDR_TYPE_FIN, proc);
|
bfo_hdr_hton(hdr, MCA_PML_BFO_HDR_TYPE_FIN, proc);
|
||||||
|
|
||||||
@ -594,11 +594,15 @@ void mca_pml_bfo_process_pending_packets(mca_bml_base_btl_t* bml_btl)
|
|||||||
rc = mca_pml_bfo_send_fin(pckt->proc, send_dst,
|
rc = mca_pml_bfo_send_fin(pckt->proc, send_dst,
|
||||||
pckt->hdr.hdr_fin.hdr_des,
|
pckt->hdr.hdr_fin.hdr_des,
|
||||||
pckt->order,
|
pckt->order,
|
||||||
|
#ifdef PML_BFO
|
||||||
pckt->hdr.hdr_fin.hdr_fail,
|
pckt->hdr.hdr_fin.hdr_fail,
|
||||||
pckt->hdr.hdr_fin.hdr_match.hdr_seq,
|
pckt->hdr.hdr_fin.hdr_match.hdr_seq,
|
||||||
pckt->hdr.hdr_fin.hdr_match.hdr_common.hdr_flags,
|
pckt->hdr.hdr_fin.hdr_match.hdr_common.hdr_flags,
|
||||||
pckt->hdr.hdr_fin.hdr_match.hdr_ctx,
|
pckt->hdr.hdr_fin.hdr_match.hdr_ctx,
|
||||||
pckt->hdr.hdr_fin.hdr_match.hdr_src);
|
pckt->hdr.hdr_fin.hdr_match.hdr_src);
|
||||||
|
#else
|
||||||
|
pckt->hdr.hdr_fin.hdr_fail);
|
||||||
|
#endif
|
||||||
if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == OPAL_SOS_GET_ERROR_CODE(rc)) ) {
|
if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == OPAL_SOS_GET_ERROR_CODE(rc)) ) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -640,14 +644,13 @@ void mca_pml_bfo_process_pending_rdma(void)
|
|||||||
void mca_pml_bfo_error_handler(
|
void mca_pml_bfo_error_handler(
|
||||||
struct mca_btl_base_module_t* btl, int32_t flags,
|
struct mca_btl_base_module_t* btl, int32_t flags,
|
||||||
ompi_proc_t* errproc, char* btlinfo ) {
|
ompi_proc_t* errproc, char* btlinfo ) {
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
/* If we get a non-fatal error, try to failover */
|
|
||||||
if (flags & MCA_BTL_ERROR_FLAGS_NONFATAL) {
|
if (flags & MCA_BTL_ERROR_FLAGS_NONFATAL) {
|
||||||
mca_pml_bfo_failover_error_handler(btl, flags, errproc, btlinfo);
|
mca_pml_bfo_failover_error_handler(btl, flags, errproc, btlinfo);
|
||||||
/* BFO FAILOVER CODE - end */
|
return;
|
||||||
} else {
|
|
||||||
orte_errmgr.abort(-1, NULL);
|
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
orte_errmgr.abort(-1, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if OPAL_ENABLE_FT_CR == 0
|
#if OPAL_ENABLE_FT_CR == 0
|
||||||
|
@ -23,6 +23,7 @@
|
|||||||
#ifndef MCA_PML_BFO_H
|
#ifndef MCA_PML_BFO_H
|
||||||
#define MCA_PML_BFO_H
|
#define MCA_PML_BFO_H
|
||||||
|
|
||||||
|
#define PML_BFO 1
|
||||||
#include "ompi_config.h"
|
#include "ompi_config.h"
|
||||||
#include "ompi/class/ompi_free_list.h"
|
#include "ompi/class/ompi_free_list.h"
|
||||||
#include "ompi/request/request.h"
|
#include "ompi/request/request.h"
|
||||||
@ -225,8 +226,12 @@ do { \
|
|||||||
|
|
||||||
|
|
||||||
int mca_pml_bfo_send_fin(ompi_proc_t* proc, mca_bml_base_btl_t* bml_btl,
|
int mca_pml_bfo_send_fin(ompi_proc_t* proc, mca_bml_base_btl_t* bml_btl,
|
||||||
|
#ifdef PML_BFO
|
||||||
ompi_ptr_t hdr_des, uint8_t order, uint32_t status,
|
ompi_ptr_t hdr_des, uint8_t order, uint32_t status,
|
||||||
uint16_t seq, uint8_t reqseq, uint16_t ctx, uint32_t src);
|
uint16_t seq, uint8_t reqseq, uint16_t ctx, uint32_t src);
|
||||||
|
#else
|
||||||
|
ompi_ptr_t hdr_des, uint8_t order, uint32_t status);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* This function tries to resend FIN/ACK packets from pckt_pending queue.
|
/* This function tries to resend FIN/ACK packets from pckt_pending queue.
|
||||||
* Packets are added to the queue when sending of FIN or ACK is failed due to
|
* Packets are added to the queue when sending of FIN or ACK is failed due to
|
||||||
|
@ -2103,3 +2103,102 @@ void mca_pml_bfo_find_recvreq_rdma_bml_btl(mca_bml_base_btl_t** bml_btl,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The completion event for the RNDV message has returned with an
|
||||||
|
* error. We know that the send request we are looking at is valid
|
||||||
|
* because it cannot be completed until the sendreq->req_state value
|
||||||
|
* reaches 0. And for the sendreq->req_state to reach 0, the
|
||||||
|
* completion event on the RNDV message must occur. So, we do not
|
||||||
|
* bother checking whether the send request is valid, because we know
|
||||||
|
* it is, but we put a few asserts in for good measure. We then check
|
||||||
|
* a few fields in the request to decide what to do. If the
|
||||||
|
* sendreq->req_error is set, that means that something has happend
|
||||||
|
* already to the request and we do not want to restart it.
|
||||||
|
* Presumably, we may have received a RECVERRNOTIFY message from the
|
||||||
|
* receiver. We also check the sendreq->req_acked field to see if it
|
||||||
|
* has been acked. If it has, then again we do not restart everything
|
||||||
|
* because obviously the RNDV message has made it to the other side.
|
||||||
|
*/
|
||||||
|
bool mca_pml_bfo_rndv_completion_status_error(struct mca_btl_base_descriptor_t* des,
|
||||||
|
mca_pml_bfo_send_request_t* sendreq)
|
||||||
|
{
|
||||||
|
assert(((mca_pml_bfo_hdr_t*)((des)->des_src->seg_addr.pval))->hdr_match.hdr_ctx ==
|
||||||
|
(sendreq)->req_send.req_base.req_comm->c_contextid);
|
||||||
|
assert(((mca_pml_bfo_hdr_t*)((des)->des_src->seg_addr.pval))->hdr_match.hdr_src ==
|
||||||
|
(sendreq)->req_send.req_base.req_comm->c_my_rank);
|
||||||
|
assert(((mca_pml_bfo_hdr_t*)((des)->des_src->seg_addr.pval))->hdr_match.hdr_seq ==
|
||||||
|
(uint16_t)(sendreq)->req_send.req_base.req_sequence);
|
||||||
|
if ((!(sendreq)->req_error) && (NULL == (sendreq)->req_recv.pval)) {
|
||||||
|
(sendreq)->req_events--;
|
||||||
|
/* Assume RNDV did not make it, so restart from the beginning. */
|
||||||
|
mca_pml_bfo_send_request_restart(sendreq, true, MCA_PML_BFO_HDR_TYPE_RNDV);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check to see if an error has occurred on this send request. If it has
|
||||||
|
* and there are no outstanding events, then we can start the restart dance.
|
||||||
|
*/
|
||||||
|
void mca_pml_bfo_completion_sendreq_has_error(mca_pml_bfo_send_request_t* sendreq,
|
||||||
|
int status,
|
||||||
|
mca_btl_base_module_t* btl,
|
||||||
|
int type,
|
||||||
|
char *description)
|
||||||
|
{
|
||||||
|
opal_output_verbose(30, mca_pml_bfo_output,
|
||||||
|
"%s: completion: sendreq has error, outstanding events=%d, "
|
||||||
|
"PML=%d, RQS=%d, src_req=%p, dst_req=%p, status=%d, peer=%d",
|
||||||
|
description,
|
||||||
|
sendreq->req_events, (uint16_t)sendreq->req_send.req_base.req_sequence,
|
||||||
|
sendreq->req_restartseq, (void *)sendreq,
|
||||||
|
sendreq->req_recv.pval,
|
||||||
|
status, sendreq->req_send.req_base.req_peer);
|
||||||
|
if (0 == sendreq->req_events) {
|
||||||
|
mca_pml_bfo_send_request_rndvrestartnotify(sendreq, false,
|
||||||
|
type, status, btl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If we get an error on the RGET message, then first make sure that
|
||||||
|
* header matches the send request that we are pointing to. This is
|
||||||
|
* necessary, because even though the sending side got an error, the
|
||||||
|
* RGET may have made it to the receiving side and the message transfer
|
||||||
|
* may have completed. This would then mean the send request has been
|
||||||
|
* completed and perhaps in use by another communication. So there is
|
||||||
|
* no need to restart this request. Therefore, ensure that we are
|
||||||
|
* looking at the same request that the header thinks we are looking
|
||||||
|
* at. If not, then there is nothing else to be done. */
|
||||||
|
void mca_pml_bfo_send_ctl_completion_status_error(struct mca_btl_base_descriptor_t* des)
|
||||||
|
{
|
||||||
|
mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)des->des_cbdata;
|
||||||
|
mca_pml_bfo_hdr_t* hdr = des->des_src->seg_addr.pval;
|
||||||
|
switch (hdr->hdr_common.hdr_type) {
|
||||||
|
case MCA_PML_BFO_HDR_TYPE_RGET:
|
||||||
|
if ((hdr->hdr_match.hdr_ctx != sendreq->req_send.req_base.req_comm->c_contextid) ||
|
||||||
|
(hdr->hdr_match.hdr_src != sendreq->req_send.req_base.req_comm->c_my_rank) ||
|
||||||
|
(hdr->hdr_match.hdr_seq != (uint16_t)sendreq->req_send.req_base.req_sequence)) {
|
||||||
|
opal_output_verbose(30, mca_pml_bfo_output,
|
||||||
|
"RGET: completion event: dropping because no valid request "
|
||||||
|
"PML:exp=%d,act=%d CTX:exp=%d,act=%d SRC:exp=%d,act=%d "
|
||||||
|
"RQS:exp=%d,act=%d, dst_req=%p",
|
||||||
|
(uint16_t)sendreq->req_send.req_base.req_sequence,
|
||||||
|
hdr->hdr_match.hdr_seq,
|
||||||
|
sendreq->req_send.req_base.req_comm->c_contextid,
|
||||||
|
hdr->hdr_match.hdr_ctx,
|
||||||
|
sendreq->req_send.req_base.req_comm->c_my_rank,
|
||||||
|
hdr->hdr_match.hdr_src,
|
||||||
|
sendreq->req_restartseq, hdr->hdr_rndv.hdr_restartseq,
|
||||||
|
(void *)sendreq);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
mca_pml_bfo_send_request_restart(sendreq, true, MCA_PML_BFO_HDR_TYPE_RGET);
|
||||||
|
return;
|
||||||
|
default:
|
||||||
|
opal_output(0, "%s:%d FATAL ERROR, unknown header (hdr=%d)",
|
||||||
|
__FILE__, __LINE__, hdr->hdr_common.hdr_type);
|
||||||
|
orte_errmgr.abort(-1, NULL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -105,6 +105,16 @@ void mca_pml_bfo_find_recvreq_rdma_bml_btl(mca_bml_base_btl_t** bml_btl,
|
|||||||
mca_pml_bfo_recv_request_t* recvreq,
|
mca_pml_bfo_recv_request_t* recvreq,
|
||||||
char* type);
|
char* type);
|
||||||
|
|
||||||
|
bool mca_pml_bfo_rndv_completion_status_error(struct mca_btl_base_descriptor_t* des,
|
||||||
|
mca_pml_bfo_send_request_t* sendreq);
|
||||||
|
void mca_pml_bfo_send_ctl_completion_status_error(struct mca_btl_base_descriptor_t* des);
|
||||||
|
|
||||||
|
|
||||||
|
void mca_pml_bfo_completion_sendreq_has_error(mca_pml_bfo_send_request_t* sendreq,
|
||||||
|
int status,
|
||||||
|
mca_btl_base_module_t* btl,
|
||||||
|
int type,
|
||||||
|
char *description);
|
||||||
/**
|
/**
|
||||||
* Four new callbacks for the four new message types.
|
* Four new callbacks for the four new message types.
|
||||||
*/
|
*/
|
||||||
@ -243,57 +253,33 @@ extern void mca_pml_bfo_recv_frag_callback_recverrnotify( mca_btl_base_module_t
|
|||||||
* Macros for pml_bfo_sendreq.c file.
|
* Macros for pml_bfo_sendreq.c file.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* The completion event for the RNDV message has returned with an
|
|
||||||
* error. We know that the send request we are looking at is valid
|
|
||||||
* because it cannot be completed until the sendreq->req_state value
|
|
||||||
* reaches 0. And for the sendreq->req_state to reach 0, the
|
|
||||||
* completion event on the RNDV message must occur. So, we do not
|
|
||||||
* bother checking whether the send request is valid, because we know
|
|
||||||
* it is, but we put a few asserts in for good measure. We then check
|
|
||||||
* a few fields in the request to decide what to do. If the
|
|
||||||
* sendreq->req_error is set, that means that something has happend
|
|
||||||
* already to the request and we do not want to restart it.
|
|
||||||
* Presumably, we may have received a RECVERRNOTIFY message from the
|
|
||||||
* receiver. We also check the sendreq->req_acked field to see if it
|
|
||||||
* has been acked. If it has, then again we do not restart everything
|
|
||||||
* because obviously the RNDV message has made it to the other side. */
|
|
||||||
#define MCA_PML_BFO_ERROR_ON_RNDV_COMPLETION(sendreq, des) \
|
|
||||||
do { \
|
|
||||||
assert(((mca_pml_bfo_hdr_t*)((des)->des_src->seg_addr.pval))->hdr_match.hdr_ctx == \
|
|
||||||
(sendreq)->req_send.req_base.req_comm->c_contextid); \
|
|
||||||
assert(((mca_pml_bfo_hdr_t*)((des)->des_src->seg_addr.pval))->hdr_match.hdr_src == \
|
|
||||||
(sendreq)->req_send.req_base.req_comm->c_my_rank); \
|
|
||||||
assert(((mca_pml_bfo_hdr_t*)((des)->des_src->seg_addr.pval))->hdr_match.hdr_seq == \
|
|
||||||
(uint16_t)(sendreq)->req_send.req_base.req_sequence); \
|
|
||||||
if ((!(sendreq)->req_error) && (NULL == (sendreq)->req_recv.pval)) { \
|
|
||||||
(sendreq)->req_events--; \
|
|
||||||
/* Assume RNDV did not make it, so restart from the beginning. */ \
|
|
||||||
mca_pml_bfo_send_request_restart(sendreq, true, MCA_PML_BFO_HDR_TYPE_RNDV); \
|
|
||||||
return; \
|
|
||||||
} \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
/* Now check the error state. This request can be in error if the
|
/* Now check the error state. This request can be in error if the
|
||||||
* RNDV message made it over, but the receiver got an error trying to
|
* RNDV message made it over, but the receiver got an error trying to
|
||||||
* send the ACK back and therefore sent a RECVERRNOTIFY message. In
|
* send the ACK back and therefore sent a RECVERRNOTIFY message. In
|
||||||
* that case, we want to start the restart dance as the receiver has
|
* that case, we want to start the restart dance as the receiver has
|
||||||
* matched this message already. Only restart if there are no
|
* matched this message already. Only restart if there are no
|
||||||
* outstanding events on send request. */
|
* outstanding events on send request. */
|
||||||
#define MCA_PML_BFO_CHECK_SENDREQ_ERROR_ON_RNDV_COMPLETION(sendreq, status, btl) \
|
#define MCA_PML_BFO_RNDV_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl, type, description) \
|
||||||
if ((sendreq)->req_error) { \
|
if( OPAL_UNLIKELY ((sendreq)->req_error)) { \
|
||||||
opal_output_verbose(30, mca_pml_bfo_output, \
|
mca_pml_bfo_completion_sendreq_has_error(sendreq, status, \
|
||||||
"RNDV: completion: sendreq has error, outstanding events=%d, " \
|
btl, type, description); \
|
||||||
"PML=%d, RQS=%d, src_req=%lx, dst_req=%lx, status=%d, peer=%d", \
|
return; \
|
||||||
(sendreq)->req_events, \
|
}
|
||||||
(uint16_t)(sendreq)->req_send.req_base.req_sequence, \
|
|
||||||
(sendreq)->req_restartseq, (unsigned long)(sendreq), \
|
/**
|
||||||
(unsigned long)(sendreq)->req_recv.pval, \
|
* This macro is called within the frag completion function in two
|
||||||
status, (sendreq)->req_send.req_base.req_peer); \
|
* places. It is called to see if any errors occur prior to the
|
||||||
if (0 == (sendreq)->req_events) { \
|
* completion event on the frag. It is then called a second time
|
||||||
mca_pml_bfo_send_request_rndvrestartnotify(sendreq, false, \
|
* after the scheduling routine is called as the scheduling routine
|
||||||
MCA_PML_BFO_HDR_TYPE_RNDV, \
|
* may have detected that a BTL that was cached on the request had
|
||||||
status, btl); \
|
* been removed and therefore marked the request in error. In that
|
||||||
} \
|
* case, the scheduling of fragments can no longer proceed properly,
|
||||||
|
* and if there are no outstanding events, iniated the restart dance.
|
||||||
|
*/
|
||||||
|
#define MCA_PML_BFO_FRAG_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl, type, description) \
|
||||||
|
if( OPAL_UNLIKELY((sendreq)->req_error)) { \
|
||||||
|
mca_pml_bfo_completion_sendreq_has_error(sendreq, status, \
|
||||||
|
btl, type, description); \
|
||||||
return; \
|
return; \
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -302,7 +288,7 @@ do {
|
|||||||
* field is not checked here. That is because that is the value
|
* field is not checked here. That is because that is the value
|
||||||
* returned in the FIN hdr.hdr_fail field and may be used for other
|
* returned in the FIN hdr.hdr_fail field and may be used for other
|
||||||
* things. */
|
* things. */
|
||||||
#define MCA_PML_BFO_CHECK_SENDREQ_ERROR_ON_RGET_COMPLETION(sendreq, btl, des) \
|
#define MCA_PML_BFO_RGET_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, btl, des) \
|
||||||
if( OPAL_UNLIKELY(sendreq->req_error)) { \
|
if( OPAL_UNLIKELY(sendreq->req_error)) { \
|
||||||
opal_output_verbose(30, mca_pml_bfo_output, \
|
opal_output_verbose(30, mca_pml_bfo_output, \
|
||||||
"FIN: received on broken request, skipping, " \
|
"FIN: received on broken request, skipping, " \
|
||||||
@ -314,66 +300,15 @@ do {
|
|||||||
return; \
|
return; \
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If we get an error on the RGET message, then first make sure that
|
|
||||||
* header matches the send request that we are pointing to. This is
|
|
||||||
* necessary, because even though the sending side got an error, the
|
|
||||||
* RGET may have made it to the receiving side and the message transfer
|
|
||||||
* may have completed. This would then mean the send request has been
|
|
||||||
* completed and perhaps in use by another communication. So there is
|
|
||||||
* no need to restart this request. Therefore, ensure that we are
|
|
||||||
* looking at the same request that the header thinks we are looking
|
|
||||||
* at. If not, then there is nothing else to be done. */
|
|
||||||
#define MCA_PML_BFO_ERROR_ON_SEND_CTL_COMPLETION(sendreq, des) \
|
|
||||||
do { \
|
|
||||||
mca_pml_bfo_hdr_t* hdr = des->des_src->seg_addr.pval; \
|
|
||||||
mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)des->des_cbdata; \
|
|
||||||
switch (hdr->hdr_common.hdr_type) { \
|
|
||||||
case MCA_PML_BFO_HDR_TYPE_RGET: \
|
|
||||||
if ((hdr->hdr_match.hdr_ctx != sendreq->req_send.req_base.req_comm->c_contextid) || \
|
|
||||||
(hdr->hdr_match.hdr_src != sendreq->req_send.req_base.req_comm->c_my_rank) || \
|
|
||||||
(hdr->hdr_match.hdr_seq != (uint16_t)sendreq->req_send.req_base.req_sequence)) { \
|
|
||||||
opal_output_verbose(30, mca_pml_bfo_output, \
|
|
||||||
"RGET: completion event: dropping because no valid request " \
|
|
||||||
"PML:exp=%d,act=%d CTX:exp=%d,act=%d SRC:exp=%d,act=%d " \
|
|
||||||
"RQS:exp=%d,act=%d, dst_req=%p", \
|
|
||||||
(uint16_t)sendreq->req_send.req_base.req_sequence, \
|
|
||||||
hdr->hdr_match.hdr_seq, \
|
|
||||||
sendreq->req_send.req_base.req_comm->c_contextid, \
|
|
||||||
hdr->hdr_match.hdr_ctx, \
|
|
||||||
sendreq->req_send.req_base.req_comm->c_my_rank, \
|
|
||||||
hdr->hdr_match.hdr_src, \
|
|
||||||
sendreq->req_restartseq, hdr->hdr_rndv.hdr_restartseq, \
|
|
||||||
(void *)sendreq); \
|
|
||||||
return; \
|
|
||||||
} \
|
|
||||||
mca_pml_bfo_send_request_restart(sendreq, true, MCA_PML_BFO_HDR_TYPE_RGET); \
|
|
||||||
return; \
|
|
||||||
default: \
|
|
||||||
opal_output(0, "%s:%d FATAL ERROR, unknown header (hdr=%d)", \
|
|
||||||
__FILE__, __LINE__, hdr->hdr_common.hdr_type); \
|
|
||||||
orte_errmgr.abort(-1, NULL); \
|
|
||||||
} \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
/* Check if there has been an error on the send request when we get
|
/* Check if there has been an error on the send request when we get
|
||||||
* a completion event on the RDMA write. */
|
* a completion event on the RDMA write. */
|
||||||
#define MCA_PML_BFO_CHECK_SENDREQ_ERROR_ON_PUT_COMPLETION(sendreq, status, btl) \
|
#define MCA_PML_BFO_PUT_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl) \
|
||||||
if ( OPAL_UNLIKELY(sendreq->req_error)) { \
|
if ( OPAL_UNLIKELY(sendreq->req_error)) { \
|
||||||
opal_output_verbose(30, mca_pml_bfo_output, \
|
mca_pml_bfo_completion_sendreq_has_error(sendreq, status, btl, \
|
||||||
"RDMA write: completion: sendreq has error, outstanding events=%d," \
|
MCA_PML_BFO_HDR_TYPE_PUT, "RDMA write"); \
|
||||||
" PML=%d, RQS=%d, src_req=%p, dst_req=%p, status=%d, peer=%d", \
|
MCA_PML_BFO_RDMA_FRAG_RETURN(frag); \
|
||||||
sendreq->req_events, \
|
return; \
|
||||||
(uint16_t)sendreq->req_send.req_base.req_sequence, \
|
|
||||||
sendreq->req_restartseq, (void *)sendreq, \
|
|
||||||
sendreq->req_recv.pval, \
|
|
||||||
status, sendreq->req_send.req_base.req_peer); \
|
|
||||||
if (0 == sendreq->req_events) { \
|
|
||||||
mca_pml_bfo_send_request_rndvrestartnotify(sendreq, false, \
|
|
||||||
MCA_PML_BFO_HDR_TYPE_PUT, \
|
|
||||||
status, btl); \
|
|
||||||
} \
|
|
||||||
MCA_PML_BFO_RDMA_FRAG_RETURN(frag); \
|
|
||||||
return; \
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define MCA_PML_BFO_CHECK_FOR_RNDV_RESTART(hdr, sendreq, type) \
|
#define MCA_PML_BFO_CHECK_FOR_RNDV_RESTART(hdr, sendreq, type) \
|
||||||
@ -416,6 +351,28 @@ do {
|
|||||||
mca_pml_bfo_update_eager_bml_btl_recv_ctl(&bml_btl, btl, des); \
|
mca_pml_bfo_update_eager_bml_btl_recv_ctl(&bml_btl, btl, des); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define MCA_PML_BFO_CHECK_FOR_REMOVED_BML(sendreq, frag, btl) \
|
||||||
|
if( OPAL_UNLIKELY(NULL == frag->rdma_bml) ) { \
|
||||||
|
opal_output_verbose(30, mca_pml_bfo_output, \
|
||||||
|
"PUT received: no matching BTL to RDMA write to, oustanding " \
|
||||||
|
"events=%d, PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", \
|
||||||
|
sendreq->req_events, \
|
||||||
|
(uint16_t)sendreq->req_send.req_base.req_sequence, \
|
||||||
|
sendreq->req_restartseq, (void *)sendreq, \
|
||||||
|
sendreq->req_recv.pval, sendreq->req_send.req_base.req_peer); \
|
||||||
|
MCA_PML_BFO_RDMA_FRAG_RETURN(frag); \
|
||||||
|
sendreq->req_error++; \
|
||||||
|
if (0 == sendreq->req_events) { \
|
||||||
|
mca_pml_bfo_send_request_rndvrestartnotify(sendreq, false, \
|
||||||
|
MCA_PML_BFO_HDR_TYPE_PUT, \
|
||||||
|
OMPI_ERROR, btl); \
|
||||||
|
} \
|
||||||
|
return; \
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
END_C_DECLS
|
END_C_DECLS
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -36,7 +36,9 @@ typedef enum {
|
|||||||
struct mca_pml_bfo_rdma_frag_t {
|
struct mca_pml_bfo_rdma_frag_t {
|
||||||
ompi_free_list_item_t super;
|
ompi_free_list_item_t super;
|
||||||
mca_bml_base_btl_t* rdma_bml;
|
mca_bml_base_btl_t* rdma_bml;
|
||||||
|
#ifdef PML_BFO
|
||||||
mca_btl_base_module_t* rdma_btl;
|
mca_btl_base_module_t* rdma_btl;
|
||||||
|
#endif
|
||||||
mca_pml_bfo_hdr_t rdma_hdr;
|
mca_pml_bfo_hdr_t rdma_hdr;
|
||||||
mca_pml_bfo_rdma_state_t rdma_state;
|
mca_pml_bfo_rdma_state_t rdma_state;
|
||||||
size_t rdma_length;
|
size_t rdma_length;
|
||||||
|
@ -41,9 +41,9 @@
|
|||||||
#include "pml_bfo_recvreq.h"
|
#include "pml_bfo_recvreq.h"
|
||||||
#include "pml_bfo_sendreq.h"
|
#include "pml_bfo_sendreq.h"
|
||||||
#include "pml_bfo_hdr.h"
|
#include "pml_bfo_hdr.h"
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
#include "pml_bfo_failover.h"
|
#include "pml_bfo_failover.h"
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
|
|
||||||
OBJ_CLASS_INSTANCE( mca_pml_bfo_buffer_t,
|
OBJ_CLASS_INSTANCE( mca_pml_bfo_buffer_t,
|
||||||
ompi_free_list_item_t,
|
ompi_free_list_item_t,
|
||||||
@ -242,13 +242,13 @@ void mca_pml_bfo_recv_frag_callback_match(mca_btl_base_module_t* btl,
|
|||||||
|
|
||||||
slow_path:
|
slow_path:
|
||||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
/* Check for duplicate messages. If message is duplicate, then just
|
/* Check for duplicate messages. If message is duplicate, then just
|
||||||
* return as that essentially drops the message. */
|
* return as that essentially drops the message. */
|
||||||
if (true == mca_pml_bfo_is_duplicate_msg(proc, hdr)) {
|
if (true == mca_pml_bfo_is_duplicate_msg(proc, hdr)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
mca_pml_bfo_recv_frag_match(btl, hdr, segments,
|
mca_pml_bfo_recv_frag_match(btl, hdr, segments,
|
||||||
num_segments, MCA_PML_BFO_HDR_TYPE_MATCH);
|
num_segments, MCA_PML_BFO_HDR_TYPE_MATCH);
|
||||||
}
|
}
|
||||||
@ -306,9 +306,9 @@ void mca_pml_bfo_recv_frag_callback_ack(mca_btl_base_module_t* btl,
|
|||||||
bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_ACK);
|
bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_ACK);
|
||||||
sendreq = (mca_pml_bfo_send_request_t*)hdr->hdr_ack.hdr_src_req.pval;
|
sendreq = (mca_pml_bfo_send_request_t*)hdr->hdr_ack.hdr_src_req.pval;
|
||||||
sendreq->req_recv = hdr->hdr_ack.hdr_dst_req;
|
sendreq->req_recv = hdr->hdr_ack.hdr_dst_req;
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
MCA_PML_BFO_ERROR_CHECK_ON_ACK_CALLBACK(sendreq)
|
MCA_PML_BFO_ERROR_CHECK_ON_ACK_CALLBACK(sendreq)
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
|
|
||||||
/* if the request should be delivered entirely by copy in/out
|
/* if the request should be delivered entirely by copy in/out
|
||||||
* then throttle sends */
|
* then throttle sends */
|
||||||
@ -352,9 +352,9 @@ void mca_pml_bfo_recv_frag_callback_frag(mca_btl_base_module_t* btl,
|
|||||||
}
|
}
|
||||||
bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_FRAG);
|
bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_FRAG);
|
||||||
recvreq = (mca_pml_bfo_recv_request_t*)hdr->hdr_frag.hdr_dst_req.pval;
|
recvreq = (mca_pml_bfo_recv_request_t*)hdr->hdr_frag.hdr_dst_req.pval;
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
MCA_PML_BFO_ERROR_CHECK_ON_FRAG_CALLBACK(recvreq)
|
MCA_PML_BFO_ERROR_CHECK_ON_FRAG_CALLBACK(recvreq)
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
mca_pml_bfo_recv_request_progress_frag(recvreq,btl,segments,des->des_dst_cnt);
|
mca_pml_bfo_recv_request_progress_frag(recvreq,btl,segments,des->des_dst_cnt);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
@ -375,9 +375,9 @@ void mca_pml_bfo_recv_frag_callback_put(mca_btl_base_module_t* btl,
|
|||||||
|
|
||||||
bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_PUT);
|
bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_PUT);
|
||||||
sendreq = (mca_pml_bfo_send_request_t*)hdr->hdr_rdma.hdr_req.pval;
|
sendreq = (mca_pml_bfo_send_request_t*)hdr->hdr_rdma.hdr_req.pval;
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
MCA_PML_BFO_ERROR_CHECK_ON_PUT_CALLBACK(sendreq)
|
MCA_PML_BFO_ERROR_CHECK_ON_PUT_CALLBACK(sendreq)
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
mca_pml_bfo_send_request_put(sendreq,btl,&hdr->hdr_rdma);
|
mca_pml_bfo_send_request_put(sendreq,btl,&hdr->hdr_rdma);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
@ -398,11 +398,11 @@ void mca_pml_bfo_recv_frag_callback_fin(mca_btl_base_module_t* btl,
|
|||||||
|
|
||||||
bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_FIN);
|
bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_FIN);
|
||||||
rdma = (mca_btl_base_descriptor_t*)hdr->hdr_fin.hdr_des.pval;
|
rdma = (mca_btl_base_descriptor_t*)hdr->hdr_fin.hdr_des.pval;
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
if (true == mca_pml_bfo_is_duplicate_fin(hdr, rdma, btl)) {
|
if (true == mca_pml_bfo_is_duplicate_fin(hdr, rdma, btl)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
rdma->des_cbfunc(btl, NULL, rdma,
|
rdma->des_cbfunc(btl, NULL, rdma,
|
||||||
hdr->hdr_fin.hdr_fail ? OMPI_ERROR : OMPI_SUCCESS);
|
hdr->hdr_fin.hdr_fail ? OMPI_ERROR : OMPI_SUCCESS);
|
||||||
|
|
||||||
@ -626,7 +626,7 @@ static int mca_pml_bfo_recv_frag_match( mca_btl_base_module_t *btl,
|
|||||||
* the fragment.
|
* the fragment.
|
||||||
*/
|
*/
|
||||||
OPAL_THREAD_LOCK(&comm->matching_lock);
|
OPAL_THREAD_LOCK(&comm->matching_lock);
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
/* In case of network failover, we may get a message telling us to
|
/* In case of network failover, we may get a message telling us to
|
||||||
* restart. In that case, we already have a pointer to the receive
|
* restart. In that case, we already have a pointer to the receive
|
||||||
* request in the header itself. */
|
* request in the header itself. */
|
||||||
@ -635,8 +635,8 @@ static int mca_pml_bfo_recv_frag_match( mca_btl_base_module_t *btl,
|
|||||||
if (NULL == match) {
|
if (NULL == match) {
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
/* BFO FAILOVER CODE - end */
|
|
||||||
} else {
|
} else {
|
||||||
|
#endif
|
||||||
|
|
||||||
/* get sequence number of next message that can be processed */
|
/* get sequence number of next message that can be processed */
|
||||||
next_msg_seq_expected = (uint16_t)proc->expected_sequence;
|
next_msg_seq_expected = (uint16_t)proc->expected_sequence;
|
||||||
@ -673,8 +673,9 @@ out_of_order_match:
|
|||||||
|
|
||||||
/* release matching lock before processing fragment */
|
/* release matching lock before processing fragment */
|
||||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||||
|
#ifdef PML_BFO
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
if(OPAL_LIKELY(match)) {
|
if(OPAL_LIKELY(match)) {
|
||||||
switch(type) {
|
switch(type) {
|
||||||
case MCA_PML_BFO_HDR_TYPE_MATCH:
|
case MCA_PML_BFO_HDR_TYPE_MATCH:
|
||||||
@ -716,14 +717,13 @@ wrong_seq:
|
|||||||
* This message comes after the next expected, so it
|
* This message comes after the next expected, so it
|
||||||
* is ahead of sequence. Save it for later.
|
* is ahead of sequence. Save it for later.
|
||||||
*/
|
*/
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
/* Check for duplicate messages. If message is duplicate, then just
|
/* Check for duplicate messages. If message is duplicate, then just
|
||||||
* return as that essentially drops the message. */
|
* return as that essentially drops the message. */
|
||||||
if (true == mca_pml_bfo_is_duplicate_msg(proc, hdr)) {
|
if (true == mca_pml_bfo_is_duplicate_msg(proc, hdr)) {
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
|
|
||||||
append_frag_to_list(&proc->frags_cant_match, btl, hdr, segments,
|
append_frag_to_list(&proc->frags_cant_match, btl, hdr, segments,
|
||||||
num_segments, NULL);
|
num_segments, NULL);
|
||||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||||
|
@ -33,9 +33,9 @@
|
|||||||
#include "orte/mca/errmgr/errmgr.h"
|
#include "orte/mca/errmgr/errmgr.h"
|
||||||
#include "opal/util/arch.h"
|
#include "opal/util/arch.h"
|
||||||
#include "ompi/memchecker.h"
|
#include "ompi/memchecker.h"
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
#include "pml_bfo_failover.h"
|
#include "pml_bfo_failover.h"
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
|
|
||||||
void mca_pml_bfo_recv_request_process_pending(void)
|
void mca_pml_bfo_recv_request_process_pending(void)
|
||||||
{
|
{
|
||||||
@ -170,13 +170,13 @@ static void mca_pml_bfo_recv_ctl_completion( mca_btl_base_module_t* btl,
|
|||||||
{
|
{
|
||||||
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context;
|
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context;
|
||||||
|
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
if (btl->btl_flags & MCA_BTL_FLAGS_FAILOVER_SUPPORT) {
|
if (btl->btl_flags & MCA_BTL_FLAGS_FAILOVER_SUPPORT) {
|
||||||
mca_pml_bfo_check_recv_ctl_completion_status(btl, des, status);
|
mca_pml_bfo_check_recv_ctl_completion_status(btl, des, status);
|
||||||
}
|
}
|
||||||
|
|
||||||
MCA_PML_BFO_CHECK_RECVREQ_EAGER_BML_BTL_RECV_CTL(bml_btl, btl, des);
|
MCA_PML_BFO_CHECK_RECVREQ_EAGER_BML_BTL_RECV_CTL(bml_btl, btl, des);
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
MCA_PML_BFO_PROGRESS_PENDING(bml_btl);
|
MCA_PML_BFO_PROGRESS_PENDING(bml_btl);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -199,12 +199,13 @@ static void mca_pml_bfo_put_completion( mca_btl_base_module_t* btl,
|
|||||||
}
|
}
|
||||||
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_pipeline_depth,-1);
|
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_pipeline_depth,-1);
|
||||||
|
|
||||||
|
#ifdef PML_BFO
|
||||||
btl->btl_free(btl, des);
|
btl->btl_free(btl, des);
|
||||||
|
|
||||||
/* BFO FAILOVER CODE - begin */
|
|
||||||
MCA_PML_BFO_ERROR_CHECK_ON_FIN_FOR_PUT(recvreq);
|
MCA_PML_BFO_ERROR_CHECK_ON_FIN_FOR_PUT(recvreq);
|
||||||
MCA_PML_BFO_CHECK_RECVREQ_EAGER_BML_BTL(bml_btl, btl, recvreq, "PUT");
|
MCA_PML_BFO_CHECK_RECVREQ_EAGER_BML_BTL(bml_btl, btl, recvreq, "PUT");
|
||||||
/* BFO FAILOVER CODE - end */
|
#else
|
||||||
|
mca_bml_base_free(bml_btl, des);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* check completion status */
|
/* check completion status */
|
||||||
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received);
|
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received);
|
||||||
@ -249,18 +250,18 @@ int mca_pml_bfo_recv_request_ack_send_btl(
|
|||||||
|
|
||||||
/* initialize descriptor */
|
/* initialize descriptor */
|
||||||
des->des_cbfunc = mca_pml_bfo_recv_ctl_completion;
|
des->des_cbfunc = mca_pml_bfo_recv_ctl_completion;
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
des->des_cbdata = (void *)proc;
|
des->des_cbdata = (void *)proc;
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
|
|
||||||
rc = mca_bml_base_send(bml_btl, des, MCA_PML_BFO_HDR_TYPE_ACK);
|
rc = mca_bml_base_send(bml_btl, des, MCA_PML_BFO_HDR_TYPE_ACK);
|
||||||
if( OPAL_LIKELY( rc >= 0 ) ) {
|
if( OPAL_LIKELY( rc >= 0 ) ) {
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
if ((bml_btl->btl_flags & MCA_BTL_FLAGS_FAILOVER_SUPPORT) &&
|
if ((bml_btl->btl_flags & MCA_BTL_FLAGS_FAILOVER_SUPPORT) &&
|
||||||
(des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK)) {
|
(des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK)) {
|
||||||
((mca_pml_bfo_recv_request_t *)hdr_dst_req)->req_events++;
|
((mca_pml_bfo_recv_request_t *)hdr_dst_req)->req_events++;
|
||||||
}
|
}
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
mca_bml_base_free(bml_btl, des);
|
mca_bml_base_free(bml_btl, des);
|
||||||
@ -346,29 +347,39 @@ static void mca_pml_bfo_rget_completion( mca_btl_base_module_t* btl,
|
|||||||
mca_pml_bfo_rdma_frag_t* frag = (mca_pml_bfo_rdma_frag_t*)des->des_cbdata;
|
mca_pml_bfo_rdma_frag_t* frag = (mca_pml_bfo_rdma_frag_t*)des->des_cbdata;
|
||||||
mca_pml_bfo_recv_request_t* recvreq = (mca_pml_bfo_recv_request_t*)frag->rdma_req;
|
mca_pml_bfo_recv_request_t* recvreq = (mca_pml_bfo_recv_request_t*)frag->rdma_req;
|
||||||
|
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
if (btl->btl_flags & MCA_BTL_FLAGS_FAILOVER_SUPPORT) {
|
if (btl->btl_flags & MCA_BTL_FLAGS_FAILOVER_SUPPORT) {
|
||||||
recvreq->req_events--;
|
recvreq->req_events--;
|
||||||
assert(recvreq->req_events >= 0);
|
assert(recvreq->req_events >= 0);
|
||||||
}
|
}
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
|
|
||||||
/* check completion status */
|
/* check completion status */
|
||||||
if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) {
|
if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) {
|
||||||
|
#ifdef PML_BFO
|
||||||
MCA_PML_BFO_ERROR_CHECK_ON_RDMA_READ_COMPLETION(recvreq);
|
MCA_PML_BFO_ERROR_CHECK_ON_RDMA_READ_COMPLETION(recvreq);
|
||||||
|
#else
|
||||||
|
/* TSW - FIX */
|
||||||
|
ORTE_ERROR_LOG(status);
|
||||||
|
orte_errmgr.abort(-1, NULL);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
MCA_PML_BFO_SECOND_ERROR_CHECK_ON_RDMA_READ_COMPLETION(recvreq, status, btl);
|
MCA_PML_BFO_SECOND_ERROR_CHECK_ON_RDMA_READ_COMPLETION(recvreq, status, btl);
|
||||||
MCA_PML_BFO_CHECK_RECVREQ_RDMA_BML_BTL(bml_btl, btl, recvreq, "RDMA write");
|
MCA_PML_BFO_CHECK_RECVREQ_RDMA_BML_BTL(bml_btl, btl, recvreq, "RDMA write");
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
|
|
||||||
mca_pml_bfo_send_fin(recvreq->req_recv.req_base.req_proc,
|
mca_pml_bfo_send_fin(recvreq->req_recv.req_base.req_proc,
|
||||||
bml_btl,
|
bml_btl,
|
||||||
frag->rdma_hdr.hdr_rget.hdr_des,
|
frag->rdma_hdr.hdr_rget.hdr_des,
|
||||||
des->order, 0, (uint16_t)recvreq->req_msgseq, recvreq->req_restartseq,
|
#ifdef PML_BFO
|
||||||
recvreq->req_recv.req_base.req_comm->c_contextid,
|
des->order, 0, (uint16_t)recvreq->req_msgseq, recvreq->req_restartseq,
|
||||||
recvreq->req_recv.req_base.req_comm->c_my_rank);
|
recvreq->req_recv.req_base.req_comm->c_contextid,
|
||||||
|
recvreq->req_recv.req_base.req_comm->c_my_rank);
|
||||||
|
#else
|
||||||
|
des->order, 0);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* is receive request complete */
|
/* is receive request complete */
|
||||||
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, frag->rdma_length);
|
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, frag->rdma_length);
|
||||||
@ -432,12 +443,12 @@ int mca_pml_bfo_recv_request_get_frag( mca_pml_bfo_rdma_frag_t* frag )
|
|||||||
orte_errmgr.abort(-1, NULL);
|
orte_errmgr.abort(-1, NULL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
if ((bml_btl->btl_flags & MCA_BTL_FLAGS_FAILOVER_SUPPORT) &&
|
if ((bml_btl->btl_flags & MCA_BTL_FLAGS_FAILOVER_SUPPORT) &&
|
||||||
(descriptor->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK)) {
|
(descriptor->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK)) {
|
||||||
recvreq->req_events++;
|
recvreq->req_events++;
|
||||||
}
|
}
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -519,9 +530,9 @@ void mca_pml_bfo_recv_request_progress_rget( mca_pml_bfo_recv_request_t* recvreq
|
|||||||
0, bytes_received );
|
0, bytes_received );
|
||||||
recvreq->req_recv.req_bytes_packed = hdr->hdr_rndv.hdr_msg_length;
|
recvreq->req_recv.req_bytes_packed = hdr->hdr_rndv.hdr_msg_length;
|
||||||
|
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
recvreq->remote_req_send = hdr->hdr_rndv.hdr_src_req;
|
recvreq->remote_req_send = hdr->hdr_rndv.hdr_src_req;
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
MCA_PML_BFO_RECV_REQUEST_MATCHED(recvreq, &hdr->hdr_rndv.hdr_match);
|
MCA_PML_BFO_RECV_REQUEST_MATCHED(recvreq, &hdr->hdr_rndv.hdr_match);
|
||||||
|
|
||||||
/* if receive buffer is not contiguous we can't just RDMA read into it, so
|
/* if receive buffer is not contiguous we can't just RDMA read into it, so
|
||||||
@ -556,9 +567,9 @@ void mca_pml_bfo_recv_request_progress_rget( mca_pml_bfo_recv_request_t* recvreq
|
|||||||
size += hdr->hdr_segs[i].seg_len;
|
size += hdr->hdr_segs[i].seg_len;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
frag->rdma_btl = btl;
|
frag->rdma_btl = btl;
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
frag->rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl);
|
frag->rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl);
|
||||||
if( OPAL_UNLIKELY(NULL == frag->rdma_bml) ) {
|
if( OPAL_UNLIKELY(NULL == frag->rdma_bml) ) {
|
||||||
opal_output(0, "[%s:%d] invalid bml for rdma get", __FILE__, __LINE__);
|
opal_output(0, "[%s:%d] invalid bml for rdma get", __FILE__, __LINE__);
|
||||||
@ -828,9 +839,9 @@ int mca_pml_bfo_recv_request_schedule_once( mca_pml_bfo_recv_request_t* recvreq,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
ctl->des_cbfunc = mca_pml_bfo_recv_ctl_completion;
|
ctl->des_cbfunc = mca_pml_bfo_recv_ctl_completion;
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
ctl->des_cbdata = recvreq;
|
ctl->des_cbdata = recvreq;
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
|
|
||||||
/* fill in rdma header */
|
/* fill in rdma header */
|
||||||
hdr = (mca_pml_bfo_rdma_hdr_t*)ctl->des_src->seg_addr.pval;
|
hdr = (mca_pml_bfo_rdma_hdr_t*)ctl->des_src->seg_addr.pval;
|
||||||
@ -838,9 +849,9 @@ int mca_pml_bfo_recv_request_schedule_once( mca_pml_bfo_recv_request_t* recvreq,
|
|||||||
hdr->hdr_common.hdr_flags =
|
hdr->hdr_common.hdr_flags =
|
||||||
(!recvreq->req_ack_sent) ? MCA_PML_BFO_HDR_TYPE_ACK : 0;
|
(!recvreq->req_ack_sent) ? MCA_PML_BFO_HDR_TYPE_ACK : 0;
|
||||||
hdr->hdr_req = recvreq->remote_req_send;
|
hdr->hdr_req = recvreq->remote_req_send;
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
hdr->hdr_dst_req.pval = recvreq; /* only needed in the first put message */
|
hdr->hdr_dst_req.pval = recvreq; /* only needed in the first put message */
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
hdr->hdr_des.pval = dst;
|
hdr->hdr_des.pval = dst;
|
||||||
hdr->hdr_rdma_offset = recvreq->req_rdma_offset;
|
hdr->hdr_rdma_offset = recvreq->req_rdma_offset;
|
||||||
hdr->hdr_seg_cnt = dst->des_dst_cnt;
|
hdr->hdr_seg_cnt = dst->des_dst_cnt;
|
||||||
@ -862,12 +873,12 @@ int mca_pml_bfo_recv_request_schedule_once( mca_pml_bfo_recv_request_t* recvreq,
|
|||||||
/* send rdma request to peer */
|
/* send rdma request to peer */
|
||||||
rc = mca_bml_base_send(bml_btl, ctl, MCA_PML_BFO_HDR_TYPE_PUT);
|
rc = mca_bml_base_send(bml_btl, ctl, MCA_PML_BFO_HDR_TYPE_PUT);
|
||||||
if( OPAL_LIKELY( rc >= 0 ) ) {
|
if( OPAL_LIKELY( rc >= 0 ) ) {
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
if ((btl->btl_flags & MCA_BTL_FLAGS_FAILOVER_SUPPORT) &&
|
if ((btl->btl_flags & MCA_BTL_FLAGS_FAILOVER_SUPPORT) &&
|
||||||
(ctl->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK)) {
|
(ctl->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK)) {
|
||||||
recvreq->req_events++;
|
recvreq->req_events++;
|
||||||
}
|
}
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
/* update request state */
|
/* update request state */
|
||||||
recvreq->req_rdma_offset += size;
|
recvreq->req_rdma_offset += size;
|
||||||
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_pipeline_depth, 1);
|
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_pipeline_depth, 1);
|
||||||
@ -993,12 +1004,12 @@ void mca_pml_bfo_recv_req_start(mca_pml_bfo_recv_request_t *req)
|
|||||||
req->req_bytes_received = 0;
|
req->req_bytes_received = 0;
|
||||||
req->req_bytes_expected = 0;
|
req->req_bytes_expected = 0;
|
||||||
/* What about req_rdma_cnt ? */
|
/* What about req_rdma_cnt ? */
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
req->req_rdma_cnt = 0;
|
req->req_rdma_cnt = 0;
|
||||||
req->req_events = 0;
|
req->req_events = 0;
|
||||||
req->req_restartseq = 0;
|
req->req_restartseq = 0;
|
||||||
req->req_errstate = 0;
|
req->req_errstate = 0;
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
req->req_rdma_idx = 0;
|
req->req_rdma_idx = 0;
|
||||||
req->req_pending = false;
|
req->req_pending = false;
|
||||||
req->req_ack_sent = false;
|
req->req_ack_sent = false;
|
||||||
|
@ -30,24 +30,23 @@
|
|||||||
#include "ompi/mca/pml/bfo/pml_bfo_comm.h"
|
#include "ompi/mca/pml/bfo/pml_bfo_comm.h"
|
||||||
#include "ompi/mca/mpool/base/base.h"
|
#include "ompi/mca/mpool/base/base.h"
|
||||||
#include "ompi/mca/pml/base/pml_base_recvreq.h"
|
#include "ompi/mca/pml/base/pml_base_recvreq.h"
|
||||||
|
#ifdef PML_BFO
|
||||||
/* BFO FAILOVER CODE - begin */
|
|
||||||
#define RECVREQ_RECVERRSENT 0x01
|
#define RECVREQ_RECVERRSENT 0x01
|
||||||
#define RECVREQ_RNDVRESTART_RECVED 0x02
|
#define RECVREQ_RNDVRESTART_RECVED 0x02
|
||||||
#define RECVREQ_RNDVRESTART_ACKED 0x04
|
#define RECVREQ_RNDVRESTART_ACKED 0x04
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
|
|
||||||
BEGIN_C_DECLS
|
BEGIN_C_DECLS
|
||||||
|
|
||||||
struct mca_pml_bfo_recv_request_t {
|
struct mca_pml_bfo_recv_request_t {
|
||||||
mca_pml_base_recv_request_t req_recv;
|
mca_pml_base_recv_request_t req_recv;
|
||||||
ompi_ptr_t remote_req_send;
|
ompi_ptr_t remote_req_send;
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
int32_t req_msgseq; /* PML sequence number */
|
int32_t req_msgseq; /* PML sequence number */
|
||||||
int32_t req_events; /* number of outstanding events on request */
|
int32_t req_events; /* number of outstanding events on request */
|
||||||
int32_t req_restartseq; /* sequence number of restarted request */
|
int32_t req_restartseq; /* sequence number of restarted request */
|
||||||
int32_t req_errstate; /* state of request if in error */
|
int32_t req_errstate; /* state of request if in error */
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
int32_t req_lock;
|
int32_t req_lock;
|
||||||
size_t req_pipeline_depth;
|
size_t req_pipeline_depth;
|
||||||
size_t req_bytes_received; /**< amount of data transferred into the user buffer */
|
size_t req_bytes_received; /**< amount of data transferred into the user buffer */
|
||||||
@ -169,10 +168,10 @@ recv_request_pml_complete(mca_pml_bfo_recv_request_t *recvreq)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
recvreq->req_rdma_cnt = 0;
|
recvreq->req_rdma_cnt = 0;
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
/* Initialize to a value that we indicate it is invalid */
|
/* Reset to a value that to indicate it is invalid. */
|
||||||
recvreq->req_msgseq = 42;
|
recvreq->req_msgseq = recvreq->req_msgseq - 100;
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
|
|
||||||
OPAL_THREAD_LOCK(&ompi_request_lock);
|
OPAL_THREAD_LOCK(&ompi_request_lock);
|
||||||
if(true == recvreq->req_recv.req_base.req_free_called) {
|
if(true == recvreq->req_recv.req_base.req_free_called) {
|
||||||
@ -201,7 +200,11 @@ recv_request_pml_complete_check(mca_pml_bfo_recv_request_t *recvreq)
|
|||||||
#endif
|
#endif
|
||||||
if(recvreq->req_match_received &&
|
if(recvreq->req_match_received &&
|
||||||
recvreq->req_bytes_received >= recvreq->req_recv.req_bytes_packed &&
|
recvreq->req_bytes_received >= recvreq->req_recv.req_bytes_packed &&
|
||||||
|
#ifdef PML_BFO
|
||||||
(0 == recvreq->req_events) && lock_recv_request(recvreq)) {
|
(0 == recvreq->req_events) && lock_recv_request(recvreq)) {
|
||||||
|
#else
|
||||||
|
lock_recv_request(recvreq)) {
|
||||||
|
#endif
|
||||||
recv_request_pml_complete(recvreq);
|
recv_request_pml_complete(recvreq);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -236,9 +239,9 @@ static inline void recv_req_matched(mca_pml_bfo_recv_request_t *req,
|
|||||||
req->req_recv.req_base.req_ompi.req_status.MPI_SOURCE = hdr->hdr_src;
|
req->req_recv.req_base.req_ompi.req_status.MPI_SOURCE = hdr->hdr_src;
|
||||||
req->req_recv.req_base.req_ompi.req_status.MPI_TAG = hdr->hdr_tag;
|
req->req_recv.req_base.req_ompi.req_status.MPI_TAG = hdr->hdr_tag;
|
||||||
req->req_match_received = true;
|
req->req_match_received = true;
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
req->req_msgseq = hdr->hdr_seq;
|
req->req_msgseq = hdr->hdr_seq;
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
#if OPAL_HAVE_THREAD_SUPPORT
|
#if OPAL_HAVE_THREAD_SUPPORT
|
||||||
opal_atomic_wmb();
|
opal_atomic_wmb();
|
||||||
#endif
|
#endif
|
||||||
|
@ -31,9 +31,9 @@
|
|||||||
#include "pml_bfo_sendreq.h"
|
#include "pml_bfo_sendreq.h"
|
||||||
#include "pml_bfo_rdmafrag.h"
|
#include "pml_bfo_rdmafrag.h"
|
||||||
#include "pml_bfo_recvreq.h"
|
#include "pml_bfo_recvreq.h"
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
#include "pml_bfo_failover.h"
|
#include "pml_bfo_failover.h"
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
#include "ompi/mca/bml/base/base.h"
|
#include "ompi/mca/bml/base/base.h"
|
||||||
#include "ompi/memchecker.h"
|
#include "ompi/memchecker.h"
|
||||||
|
|
||||||
@ -183,12 +183,18 @@ mca_pml_bfo_match_completion_free( struct mca_btl_base_module_t* btl,
|
|||||||
|
|
||||||
/* check completion status */
|
/* check completion status */
|
||||||
if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) {
|
if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) {
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
mca_pml_bfo_repost_match_fragment(des);
|
mca_pml_bfo_repost_match_fragment(des);
|
||||||
return;
|
return;
|
||||||
/* BFO FAILOVER CODE - end */
|
#else
|
||||||
|
/* TSW - FIX */
|
||||||
|
opal_output(0, "%s:%d FATAL", __FILE__, __LINE__);
|
||||||
|
orte_errmgr.abort(-1, NULL);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
#ifdef PML_BFO
|
||||||
MCA_PML_BFO_CHECK_SENDREQ_EAGER_BML_BTL(bml_btl, btl, sendreq, "MATCH");
|
MCA_PML_BFO_CHECK_SENDREQ_EAGER_BML_BTL(bml_btl, btl, sendreq, "MATCH");
|
||||||
|
#endif
|
||||||
mca_pml_bfo_match_completion_free_request( bml_btl, sendreq );
|
mca_pml_bfo_match_completion_free_request( bml_btl, sendreq );
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -229,13 +235,20 @@ mca_pml_bfo_rndv_completion( mca_btl_base_module_t* btl,
|
|||||||
|
|
||||||
/* check completion status */
|
/* check completion status */
|
||||||
if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) {
|
if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) {
|
||||||
MCA_PML_BFO_ERROR_ON_RNDV_COMPLETION(sendreq, des);
|
#ifdef PML_BFO
|
||||||
|
if (true == mca_pml_bfo_rndv_completion_status_error(des, sendreq))
|
||||||
|
return;
|
||||||
|
#else
|
||||||
|
/* TSW - FIX */
|
||||||
|
opal_output(0, "%s:%d FATAL", __FILE__, __LINE__);
|
||||||
|
orte_errmgr.abort(-1, NULL);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
#ifdef PML_BFO
|
||||||
/* BFO FAILOVER CODE - begin */
|
|
||||||
sendreq->req_events--;
|
sendreq->req_events--;
|
||||||
MCA_PML_BFO_CHECK_SENDREQ_ERROR_ON_RNDV_COMPLETION(sendreq, status, btl);
|
MCA_PML_BFO_RNDV_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl,
|
||||||
/* BFO FAILOVER CODE - end */
|
MCA_PML_BFO_HDR_TYPE_RNDV, "RNDV");
|
||||||
|
#endif
|
||||||
|
|
||||||
/* count bytes of user data actually delivered. As the rndv completion only
|
/* count bytes of user data actually delivered. As the rndv completion only
|
||||||
* happens in one thread, the increase of the req_bytes_delivered does not
|
* happens in one thread, the increase of the req_bytes_delivered does not
|
||||||
@ -246,7 +259,9 @@ mca_pml_bfo_rndv_completion( mca_btl_base_module_t* btl,
|
|||||||
sizeof(mca_pml_bfo_rendezvous_hdr_t),
|
sizeof(mca_pml_bfo_rendezvous_hdr_t),
|
||||||
req_bytes_delivered );
|
req_bytes_delivered );
|
||||||
|
|
||||||
|
#ifdef PML_BFO
|
||||||
MCA_PML_BFO_CHECK_SENDREQ_EAGER_BML_BTL(bml_btl, btl, sendreq, "RNDV");
|
MCA_PML_BFO_CHECK_SENDREQ_EAGER_BML_BTL(bml_btl, btl, sendreq, "RNDV");
|
||||||
|
#endif
|
||||||
mca_pml_bfo_rndv_completion_request( bml_btl, sendreq, req_bytes_delivered );
|
mca_pml_bfo_rndv_completion_request( bml_btl, sendreq, req_bytes_delivered );
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -264,8 +279,9 @@ mca_pml_bfo_rget_completion( mca_btl_base_module_t* btl,
|
|||||||
mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)des->des_cbdata;
|
mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)des->des_cbdata;
|
||||||
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context;
|
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context;
|
||||||
size_t req_bytes_delivered = 0;
|
size_t req_bytes_delivered = 0;
|
||||||
|
#ifdef PML_BFO
|
||||||
MCA_PML_BFO_CHECK_SENDREQ_ERROR_ON_RGET_COMPLETION(sendreq, btl, des);
|
MCA_PML_BFO_RGET_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, btl, des);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* count bytes of user data actually delivered and check for request completion */
|
/* count bytes of user data actually delivered and check for request completion */
|
||||||
MCA_PML_BFO_COMPUTE_SEGMENT_LENGTH( des->des_src, des->des_src_cnt,
|
MCA_PML_BFO_COMPUTE_SEGMENT_LENGTH( des->des_src, des->des_src_cnt,
|
||||||
@ -274,8 +290,12 @@ mca_pml_bfo_rget_completion( mca_btl_base_module_t* btl,
|
|||||||
|
|
||||||
send_request_pml_complete_check(sendreq);
|
send_request_pml_complete_check(sendreq);
|
||||||
/* free the descriptor */
|
/* free the descriptor */
|
||||||
|
#ifdef PML_BFO
|
||||||
btl->btl_free(btl, des);
|
btl->btl_free(btl, des);
|
||||||
MCA_PML_BFO_CHECK_SENDREQ_RDMA_BML_BTL(bml_btl, btl, sendreq, "RGET");
|
MCA_PML_BFO_CHECK_SENDREQ_RDMA_BML_BTL(bml_btl, btl, sendreq, "RGET");
|
||||||
|
#else
|
||||||
|
mca_bml_base_free(bml_btl, des);
|
||||||
|
#endif
|
||||||
MCA_PML_BFO_PROGRESS_PENDING(bml_btl);
|
MCA_PML_BFO_PROGRESS_PENDING(bml_btl);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -292,15 +312,15 @@ mca_pml_bfo_send_ctl_completion( mca_btl_base_module_t* btl,
|
|||||||
{
|
{
|
||||||
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context;
|
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context;
|
||||||
|
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)des->des_cbdata;
|
if(OPAL_UNLIKELY(OMPI_SUCCESS != status)) {
|
||||||
if(OPAL_LIKELY(OMPI_SUCCESS == status)) {
|
mca_pml_bfo_send_ctl_completion_status_error(des);
|
||||||
/* check for pending requests */
|
return;
|
||||||
MCA_PML_BFO_CHECK_SENDREQ_EAGER_BML_BTL(bml_btl, btl, sendreq, "ACK or RGET");
|
|
||||||
MCA_PML_BFO_PROGRESS_PENDING(bml_btl);
|
|
||||||
} else {
|
|
||||||
MCA_PML_BFO_ERROR_ON_SEND_CTL_COMPLETION(sendreq, des);
|
|
||||||
}
|
}
|
||||||
|
MCA_PML_BFO_CHECK_SENDREQ_EAGER_BML_BTL(bml_btl, btl, des->des_cbdata, "RGET");
|
||||||
|
#endif
|
||||||
|
/* check for pending requests */
|
||||||
|
MCA_PML_BFO_PROGRESS_PENDING(bml_btl);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -317,15 +337,19 @@ mca_pml_bfo_frag_completion( mca_btl_base_module_t* btl,
|
|||||||
mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)des->des_cbdata;
|
mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)des->des_cbdata;
|
||||||
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context;
|
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context;
|
||||||
size_t req_bytes_delivered = 0;
|
size_t req_bytes_delivered = 0;
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
sendreq->req_events--;
|
sendreq->req_events--;
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
|
|
||||||
/* check completion status */
|
/* check completion status */
|
||||||
if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) {
|
if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) {
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
sendreq->req_error++;
|
sendreq->req_error++;
|
||||||
/* BFO FAILOVER CODE - end */
|
#else
|
||||||
|
/* TSW - FIX */
|
||||||
|
opal_output(0, "%s:%d FATAL", __FILE__, __LINE__);
|
||||||
|
orte_errmgr.abort(-1, NULL);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/* count bytes of user data actually delivered */
|
/* count bytes of user data actually delivered */
|
||||||
@ -337,52 +361,23 @@ mca_pml_bfo_frag_completion( mca_btl_base_module_t* btl,
|
|||||||
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth, -1);
|
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth, -1);
|
||||||
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
|
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
|
||||||
|
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
/* note we check error after bytes delivered computation in case frag made it */
|
MCA_PML_BFO_FRAG_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl,
|
||||||
if( OPAL_UNLIKELY(sendreq->req_error)) {
|
MCA_PML_BFO_HDR_TYPE_FRAG, "FRAG");
|
||||||
opal_output_verbose(30, mca_pml_bfo_output,
|
#endif
|
||||||
"FRAG: completion: sendreq has error, outstanding events=%d, "
|
|
||||||
"PML=%d, RQS=%d, src_req=%p, dst_req=%p, status=%d, peer=%d",
|
|
||||||
sendreq->req_events, (uint16_t)sendreq->req_send.req_base.req_sequence,
|
|
||||||
sendreq->req_restartseq, (void *)sendreq,
|
|
||||||
sendreq->req_recv.pval,
|
|
||||||
status, sendreq->req_send.req_base.req_peer);
|
|
||||||
if (0 == sendreq->req_events) {
|
|
||||||
mca_pml_bfo_send_request_rndvrestartnotify(sendreq, false,
|
|
||||||
MCA_PML_BFO_HDR_TYPE_FRAG,
|
|
||||||
status, btl);
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
/* BFO FAILOVER CODE - end */
|
|
||||||
if(send_request_pml_complete_check(sendreq) == false) {
|
if(send_request_pml_complete_check(sendreq) == false) {
|
||||||
mca_pml_bfo_send_request_schedule(sendreq);
|
mca_pml_bfo_send_request_schedule(sendreq);
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
if( OPAL_UNLIKELY(sendreq->req_error)) {
|
MCA_PML_BFO_FRAG_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl,
|
||||||
/* This situation can happen if the scheduling function
|
MCA_PML_BFO_HDR_TYPE_FRAG,
|
||||||
* determined that a BTL was removed from underneath us
|
"FRAG (BTL removal)");
|
||||||
* and therefore marked the request in error. In that
|
#endif
|
||||||
* case, the scheduling of fragments can no longer proceed
|
|
||||||
* properly. Therefore, if no outstanding events, initiate
|
|
||||||
* the restart dance. */
|
|
||||||
opal_output_verbose(30, mca_pml_bfo_output,
|
|
||||||
"FRAG: completion: BTL has been removed, outstanding events=%d, "
|
|
||||||
"PML=%d, RQS=%d, src_req=%p, dst_req=%p, status=%d, peer=%d",
|
|
||||||
sendreq->req_events, (uint16_t)sendreq->req_send.req_base.req_sequence,
|
|
||||||
sendreq->req_restartseq, (void *)sendreq,
|
|
||||||
sendreq->req_recv.pval,
|
|
||||||
status, sendreq->req_send.req_base.req_peer);
|
|
||||||
if (0 == sendreq->req_events) {
|
|
||||||
mca_pml_bfo_send_request_rndvrestartnotify(sendreq, false,
|
|
||||||
MCA_PML_BFO_HDR_TYPE_FRAG,
|
|
||||||
status, btl);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/* BFO FAILOVER CODE - end */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* check for pending requests */
|
/* check for pending requests */
|
||||||
|
#ifdef PML_BFO
|
||||||
MCA_PML_BFO_CHECK_SENDREQ_EAGER_BML_BTL(bml_btl, btl, sendreq, "FRAG");
|
MCA_PML_BFO_CHECK_SENDREQ_EAGER_BML_BTL(bml_btl, btl, sendreq, "FRAG");
|
||||||
|
#endif
|
||||||
MCA_PML_BFO_PROGRESS_PENDING(bml_btl);
|
MCA_PML_BFO_PROGRESS_PENDING(bml_btl);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -438,7 +433,9 @@ int mca_pml_bfo_send_request_start_buffered(
|
|||||||
hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
|
hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
|
||||||
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
|
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
|
||||||
hdr->hdr_rndv.hdr_src_req.pval = sendreq;
|
hdr->hdr_rndv.hdr_src_req.pval = sendreq;
|
||||||
|
#ifdef PML_BFO
|
||||||
MCA_PML_BFO_CHECK_FOR_RNDV_RESTART(hdr, sendreq, "RNDV(buffered)");
|
MCA_PML_BFO_CHECK_FOR_RNDV_RESTART(hdr, sendreq, "RNDV(buffered)");
|
||||||
|
#endif
|
||||||
|
|
||||||
bfo_hdr_hton(hdr, MCA_PML_BFO_HDR_TYPE_RNDV,
|
bfo_hdr_hton(hdr, MCA_PML_BFO_HDR_TYPE_RNDV,
|
||||||
sendreq->req_send.req_base.req_proc);
|
sendreq->req_send.req_base.req_proc);
|
||||||
@ -487,11 +484,11 @@ int mca_pml_bfo_send_request_start_buffered(
|
|||||||
if( OPAL_LIKELY( 1 == rc ) ) {
|
if( OPAL_LIKELY( 1 == rc ) ) {
|
||||||
mca_pml_bfo_rndv_completion_request( bml_btl, sendreq, req_bytes_delivered);
|
mca_pml_bfo_rndv_completion_request( bml_btl, sendreq, req_bytes_delivered);
|
||||||
}
|
}
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
if (des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) {
|
if (des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) {
|
||||||
sendreq->req_events++;
|
sendreq->req_events++;
|
||||||
}
|
}
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
mca_bml_base_free(bml_btl, des );
|
mca_bml_base_free(bml_btl, des );
|
||||||
@ -537,14 +534,13 @@ int mca_pml_bfo_send_request_start_copy( mca_pml_bfo_send_request_t* sendreq,
|
|||||||
MCA_PML_BFO_HDR_TYPE_MATCH,
|
MCA_PML_BFO_HDR_TYPE_MATCH,
|
||||||
&des);
|
&des);
|
||||||
if( OPAL_LIKELY(OMPI_SUCCESS == rc) ) {
|
if( OPAL_LIKELY(OMPI_SUCCESS == rc) ) {
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
/* Needed for failover */
|
/* Needed in case of failover */
|
||||||
if (NULL != des) {
|
if (NULL != des) {
|
||||||
des->des_cbfunc = mca_pml_bfo_match_completion_free;
|
des->des_cbfunc = mca_pml_bfo_match_completion_free;
|
||||||
des->des_cbdata = sendreq->req_endpoint;
|
des->des_cbdata = sendreq->req_endpoint;
|
||||||
}
|
}
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
|
|
||||||
/* signal request completion */
|
/* signal request completion */
|
||||||
send_request_pml_complete(sendreq);
|
send_request_pml_complete(sendreq);
|
||||||
|
|
||||||
@ -774,9 +770,9 @@ int mca_pml_bfo_send_request_start_rdma( mca_pml_bfo_send_request_t* sendreq,
|
|||||||
hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
|
hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
|
||||||
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
|
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
|
||||||
hdr->hdr_rndv.hdr_src_req.pval = sendreq;
|
hdr->hdr_rndv.hdr_src_req.pval = sendreq;
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
MCA_PML_BFO_CHECK_FOR_RNDV_RESTART(hdr, sendreq, "RGET");
|
MCA_PML_BFO_CHECK_FOR_RNDV_RESTART(hdr, sendreq, "RGET");
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
hdr->hdr_rget.hdr_des.pval = src;
|
hdr->hdr_rget.hdr_des.pval = src;
|
||||||
hdr->hdr_rget.hdr_seg_cnt = src->des_src_cnt;
|
hdr->hdr_rget.hdr_seg_cnt = src->des_src_cnt;
|
||||||
|
|
||||||
@ -826,9 +822,9 @@ int mca_pml_bfo_send_request_start_rdma( mca_pml_bfo_send_request_t* sendreq,
|
|||||||
hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
|
hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
|
||||||
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
|
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
|
||||||
hdr->hdr_rndv.hdr_src_req.pval = sendreq;
|
hdr->hdr_rndv.hdr_src_req.pval = sendreq;
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
MCA_PML_BFO_CHECK_FOR_RNDV_RESTART(hdr, sendreq, "RNDV");
|
MCA_PML_BFO_CHECK_FOR_RNDV_RESTART(hdr, sendreq, "RNDV");
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
|
|
||||||
bfo_hdr_hton(hdr, MCA_PML_BFO_HDR_TYPE_RNDV,
|
bfo_hdr_hton(hdr, MCA_PML_BFO_HDR_TYPE_RNDV,
|
||||||
sendreq->req_send.req_base.req_proc);
|
sendreq->req_send.req_base.req_proc);
|
||||||
@ -852,12 +848,12 @@ int mca_pml_bfo_send_request_start_rdma( mca_pml_bfo_send_request_t* sendreq,
|
|||||||
if( OPAL_LIKELY( 1 == rc ) && (true == need_local_cb)) {
|
if( OPAL_LIKELY( 1 == rc ) && (true == need_local_cb)) {
|
||||||
mca_pml_bfo_rndv_completion_request( bml_btl, sendreq, 0 );
|
mca_pml_bfo_rndv_completion_request( bml_btl, sendreq, 0 );
|
||||||
}
|
}
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
if ((des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) &&
|
if ((des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) &&
|
||||||
(MCA_PML_BFO_HDR_TYPE_RNDV == hdr->hdr_common.hdr_type)) {
|
(MCA_PML_BFO_HDR_TYPE_RNDV == hdr->hdr_common.hdr_type)) {
|
||||||
sendreq->req_events++;
|
sendreq->req_events++;
|
||||||
}
|
}
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
mca_bml_base_free(bml_btl, des);
|
mca_bml_base_free(bml_btl, des);
|
||||||
@ -925,9 +921,9 @@ int mca_pml_bfo_send_request_start_rndv( mca_pml_bfo_send_request_t* sendreq,
|
|||||||
hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
|
hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
|
||||||
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
|
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
|
||||||
hdr->hdr_rndv.hdr_src_req.pval = sendreq;
|
hdr->hdr_rndv.hdr_src_req.pval = sendreq;
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
MCA_PML_BFO_CHECK_FOR_RNDV_RESTART(hdr, sendreq, "RNDV");
|
MCA_PML_BFO_CHECK_FOR_RNDV_RESTART(hdr, sendreq, "RNDV");
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
|
|
||||||
bfo_hdr_hton(hdr, MCA_PML_BFO_HDR_TYPE_RNDV,
|
bfo_hdr_hton(hdr, MCA_PML_BFO_HDR_TYPE_RNDV,
|
||||||
sendreq->req_send.req_base.req_proc);
|
sendreq->req_send.req_base.req_proc);
|
||||||
@ -945,11 +941,11 @@ int mca_pml_bfo_send_request_start_rndv( mca_pml_bfo_send_request_t* sendreq,
|
|||||||
if( OPAL_LIKELY( 1 == rc ) ) {
|
if( OPAL_LIKELY( 1 == rc ) ) {
|
||||||
mca_pml_bfo_rndv_completion_request( bml_btl, sendreq, size );
|
mca_pml_bfo_rndv_completion_request( bml_btl, sendreq, size );
|
||||||
}
|
}
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
if (des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) {
|
if (des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) {
|
||||||
sendreq->req_events++;
|
sendreq->req_events++;
|
||||||
}
|
}
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
mca_bml_base_free(bml_btl, des );
|
mca_bml_base_free(bml_btl, des );
|
||||||
@ -1062,7 +1058,7 @@ mca_pml_bfo_send_request_schedule_once(mca_pml_bfo_send_request_t* sendreq)
|
|||||||
mca_bml_base_btl_t* bml_btl;
|
mca_bml_base_btl_t* bml_btl;
|
||||||
|
|
||||||
assert(range->range_send_length != 0);
|
assert(range->range_send_length != 0);
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
/* Failover code. If this is true, this means the request thinks we
|
/* Failover code. If this is true, this means the request thinks we
|
||||||
* have more BTLs than there really are. This can happen because
|
* have more BTLs than there really are. This can happen because
|
||||||
* a BTL was removed from the available list. In this case, we
|
* a BTL was removed from the available list. In this case, we
|
||||||
@ -1072,7 +1068,7 @@ mca_pml_bfo_send_request_schedule_once(mca_pml_bfo_send_request_t* sendreq)
|
|||||||
sendreq->req_error++;
|
sendreq->req_error++;
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
|
|
||||||
if(prev_bytes_remaining == range->range_send_length)
|
if(prev_bytes_remaining == range->range_send_length)
|
||||||
num_fail++;
|
num_fail++;
|
||||||
@ -1181,11 +1177,11 @@ cannot_pack:
|
|||||||
range = get_next_send_range(sendreq, range);
|
range = get_next_send_range(sendreq, range);
|
||||||
prev_bytes_remaining = 0;
|
prev_bytes_remaining = 0;
|
||||||
}
|
}
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
if (des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) {
|
if (des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) {
|
||||||
sendreq->req_events++;
|
sendreq->req_events++;
|
||||||
}
|
}
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
} else {
|
} else {
|
||||||
mca_bml_base_free(bml_btl,des);
|
mca_bml_base_free(bml_btl,des);
|
||||||
}
|
}
|
||||||
@ -1209,28 +1205,33 @@ static void mca_pml_bfo_put_completion( mca_btl_base_module_t* btl,
|
|||||||
mca_pml_bfo_rdma_frag_t* frag = (mca_pml_bfo_rdma_frag_t*)des->des_cbdata;
|
mca_pml_bfo_rdma_frag_t* frag = (mca_pml_bfo_rdma_frag_t*)des->des_cbdata;
|
||||||
mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)frag->rdma_req;
|
mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)frag->rdma_req;
|
||||||
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context;
|
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context;
|
||||||
/* BFO FAILOVER CODE - begin */
|
|
||||||
sendreq->req_events--;
|
|
||||||
/* BFO FAILOVER CODE - end */
|
|
||||||
|
|
||||||
/* check completion status */
|
/* check completion status */
|
||||||
if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) {
|
if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) {
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
sendreq->req_error++;
|
sendreq->req_error++;
|
||||||
/* BFO FAILOVER CODE - end */
|
#else
|
||||||
|
/* TSW - FIX */
|
||||||
|
ORTE_ERROR_LOG(status);
|
||||||
|
orte_errmgr.abort(-1, NULL);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
#ifdef PML_BFO
|
||||||
/* BFO FAILOVER CODE - begin */
|
sendreq->req_events--;
|
||||||
MCA_PML_BFO_CHECK_SENDREQ_ERROR_ON_PUT_COMPLETION(sendreq, status, btl);
|
MCA_PML_BFO_PUT_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl);
|
||||||
MCA_PML_BFO_CHECK_SENDREQ_EAGER_BML_BTL(bml_btl, btl, sendreq, "RDMA write");
|
MCA_PML_BFO_CHECK_SENDREQ_EAGER_BML_BTL(bml_btl, btl, sendreq, "RDMA write");
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
|
|
||||||
mca_pml_bfo_send_fin(sendreq->req_send.req_base.req_proc,
|
mca_pml_bfo_send_fin(sendreq->req_send.req_base.req_proc,
|
||||||
bml_btl,
|
bml_btl,
|
||||||
frag->rdma_hdr.hdr_rdma.hdr_des,
|
frag->rdma_hdr.hdr_rdma.hdr_des,
|
||||||
des->order, 0, (uint16_t)sendreq->req_send.req_base.req_sequence,
|
#ifdef PML_BFO
|
||||||
sendreq->req_restartseq, sendreq->req_send.req_base.req_comm->c_contextid,
|
des->order, 0, (uint16_t)sendreq->req_send.req_base.req_sequence,
|
||||||
sendreq->req_send.req_base.req_comm->c_my_rank);
|
sendreq->req_restartseq, sendreq->req_send.req_base.req_comm->c_contextid,
|
||||||
|
sendreq->req_send.req_base.req_comm->c_my_rank);
|
||||||
|
#else
|
||||||
|
des->order, 0);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* check for request completion */
|
/* check for request completion */
|
||||||
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length);
|
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length);
|
||||||
@ -1275,9 +1276,13 @@ int mca_pml_bfo_send_request_put_frag( mca_pml_bfo_rdma_frag_t* frag )
|
|||||||
/* tell receiver to unregister memory */
|
/* tell receiver to unregister memory */
|
||||||
mca_pml_bfo_send_fin(sendreq->req_send.req_base.req_proc,
|
mca_pml_bfo_send_fin(sendreq->req_send.req_base.req_proc,
|
||||||
bml_btl, frag->rdma_hdr.hdr_rdma.hdr_des,
|
bml_btl, frag->rdma_hdr.hdr_rdma.hdr_des,
|
||||||
MCA_BTL_NO_ORDER, 1, (uint16_t)sendreq->req_send.req_base.req_sequence,
|
#ifdef PML_BFO
|
||||||
sendreq->req_restartseq, sendreq->req_send.req_base.req_comm->c_contextid,
|
MCA_BTL_NO_ORDER, 1, (uint16_t)sendreq->req_send.req_base.req_sequence,
|
||||||
sendreq->req_send.req_base.req_comm->c_my_rank);
|
sendreq->req_restartseq, sendreq->req_send.req_base.req_comm->c_contextid,
|
||||||
|
sendreq->req_send.req_base.req_comm->c_my_rank);
|
||||||
|
#else
|
||||||
|
MCA_BTL_NO_ORDER, 1);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* send fragment by copy in/out */
|
/* send fragment by copy in/out */
|
||||||
mca_pml_bfo_send_request_copy_in_out(sendreq,
|
mca_pml_bfo_send_request_copy_in_out(sendreq,
|
||||||
@ -1313,14 +1318,11 @@ int mca_pml_bfo_send_request_put_frag( mca_pml_bfo_rdma_frag_t* frag )
|
|||||||
orte_errmgr.abort(-1, NULL);
|
orte_errmgr.abort(-1, NULL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
if (des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) {
|
if (des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) {
|
||||||
mca_pml_bfo_send_request_t *sendreq =
|
((mca_pml_bfo_send_request_t*)frag->rdma_req)->req_events++;
|
||||||
(mca_pml_bfo_send_request_t*)frag->rdma_req;
|
|
||||||
sendreq->req_events++;
|
|
||||||
}
|
}
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1341,18 +1343,20 @@ void mca_pml_bfo_send_request_put( mca_pml_bfo_send_request_t* sendreq,
|
|||||||
size_t i, size = 0;
|
size_t i, size = 0;
|
||||||
|
|
||||||
if(hdr->hdr_common.hdr_flags & MCA_PML_BFO_HDR_TYPE_ACK) {
|
if(hdr->hdr_common.hdr_flags & MCA_PML_BFO_HDR_TYPE_ACK) {
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
/* Handle the failover case where a RNDV request may
|
/* Handle the failover case where a RNDV request may
|
||||||
* have turned into a RGET and therefore the state
|
* have turned into a RGET and therefore the state
|
||||||
* is not being tracked. */
|
* is not being tracked. */
|
||||||
if (sendreq->req_state != 0) {
|
if (sendreq->req_state != 0) {
|
||||||
OPAL_THREAD_ADD32(&sendreq->req_state, -1);
|
OPAL_THREAD_ADD32(&sendreq->req_state, -1);
|
||||||
}
|
}
|
||||||
/* BFO FAILOVER CODE - end */
|
#else
|
||||||
|
OPAL_THREAD_ADD32(&sendreq->req_state, -1);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
sendreq->req_recv = hdr->hdr_dst_req; /* only needed once, but it is OK */
|
sendreq->req_recv = hdr->hdr_dst_req; /* only needed once, but it is OK */
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
|
|
||||||
MCA_PML_BFO_RDMA_FRAG_ALLOC(frag, rc);
|
MCA_PML_BFO_RDMA_FRAG_ALLOC(frag, rc);
|
||||||
|
|
||||||
@ -1380,21 +1384,10 @@ void mca_pml_bfo_send_request_put( mca_pml_bfo_send_request_t* sendreq,
|
|||||||
}
|
}
|
||||||
|
|
||||||
frag->rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl);
|
frag->rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl);
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
frag->rdma_btl = btl;
|
frag->rdma_btl = btl;
|
||||||
if( OPAL_UNLIKELY(NULL == frag->rdma_bml) ) {
|
MCA_PML_BFO_CHECK_FOR_REMOVED_BML(sendreq, frag, btl);
|
||||||
opal_output(0, "[%s:%d] invalid bml for rdma put", __FILE__, __LINE__);
|
#endif
|
||||||
MCA_PML_BFO_RDMA_FRAG_RETURN(frag);
|
|
||||||
sendreq->req_error++;
|
|
||||||
if (0 == sendreq->req_events) {
|
|
||||||
opal_output(0, "[%s:%d] Issuing rndvrestartnotify", __FILE__, __LINE__);
|
|
||||||
mca_pml_bfo_send_request_rndvrestartnotify(sendreq, false,
|
|
||||||
MCA_PML_BFO_HDR_TYPE_PUT,
|
|
||||||
OMPI_ERROR, btl);
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
/* BFO FAILOVER CODE - end */
|
|
||||||
frag->rdma_hdr.hdr_rdma = *hdr;
|
frag->rdma_hdr.hdr_rdma = *hdr;
|
||||||
frag->rdma_req = sendreq;
|
frag->rdma_req = sendreq;
|
||||||
frag->rdma_ep = bml_endpoint;
|
frag->rdma_ep = bml_endpoint;
|
||||||
|
@ -42,12 +42,12 @@ struct mca_pml_bfo_send_request_t {
|
|||||||
mca_pml_base_send_request_t req_send;
|
mca_pml_base_send_request_t req_send;
|
||||||
mca_bml_base_endpoint_t* req_endpoint;
|
mca_bml_base_endpoint_t* req_endpoint;
|
||||||
ompi_ptr_t req_recv;
|
ompi_ptr_t req_recv;
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
int32_t req_events; /* number of outstanding events on request */
|
int32_t req_events; /* number of outstanding events on request */
|
||||||
int32_t req_restartseq; /* sequence number of restarted request */
|
int32_t req_restartseq; /* sequence number of restarted request */
|
||||||
int32_t req_restart; /* state of restarted request */
|
int32_t req_restart; /* state of restarted request */
|
||||||
int32_t req_error; /* non-zero when error has occurred on request */
|
int32_t req_error; /* non-zero when error has occurred on request */
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
int32_t req_state;
|
int32_t req_state;
|
||||||
int32_t req_lock;
|
int32_t req_lock;
|
||||||
bool req_throttle_sends;
|
bool req_throttle_sends;
|
||||||
@ -249,7 +249,7 @@ send_request_pml_complete(mca_pml_bfo_send_request_t *sendreq)
|
|||||||
MCA_PML_BFO_SEND_REQUEST_MPI_COMPLETE(sendreq, true);
|
MCA_PML_BFO_SEND_REQUEST_MPI_COMPLETE(sendreq, true);
|
||||||
}
|
}
|
||||||
sendreq->req_send.req_base.req_pml_complete = true;
|
sendreq->req_send.req_base.req_pml_complete = true;
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
assert(0 == sendreq->req_events);
|
assert(0 == sendreq->req_events);
|
||||||
sendreq->req_restartseq = 0;
|
sendreq->req_restartseq = 0;
|
||||||
/* Since sequence numbers increase monotonically and
|
/* Since sequence numbers increase monotonically and
|
||||||
@ -258,7 +258,7 @@ send_request_pml_complete(mca_pml_bfo_send_request_t *sendreq)
|
|||||||
* as that is not within the valid range. */
|
* as that is not within the valid range. */
|
||||||
sendreq->req_send.req_base.req_sequence =
|
sendreq->req_send.req_base.req_sequence =
|
||||||
sendreq->req_send.req_base.req_sequence - 10;
|
sendreq->req_send.req_base.req_sequence - 10;
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
|
|
||||||
if(sendreq->req_send.req_base.req_free_called) {
|
if(sendreq->req_send.req_base.req_free_called) {
|
||||||
MCA_PML_BFO_SEND_REQUEST_RETURN(sendreq);
|
MCA_PML_BFO_SEND_REQUEST_RETURN(sendreq);
|
||||||
@ -437,12 +437,12 @@ mca_pml_bfo_send_request_start( mca_pml_bfo_send_request_t* sendreq )
|
|||||||
sendreq->req_pending = MCA_PML_BFO_SEND_PENDING_NONE;
|
sendreq->req_pending = MCA_PML_BFO_SEND_PENDING_NONE;
|
||||||
sendreq->req_send.req_base.req_sequence = OPAL_THREAD_ADD32(
|
sendreq->req_send.req_base.req_sequence = OPAL_THREAD_ADD32(
|
||||||
&comm->procs[sendreq->req_send.req_base.req_peer].send_sequence,1);
|
&comm->procs[sendreq->req_send.req_base.req_peer].send_sequence,1);
|
||||||
/* BFO FAILOVER CODE - begin */
|
#ifdef PML_BFO
|
||||||
sendreq->req_restartseq = 0; /* counts up restarts */
|
sendreq->req_restartseq = 0; /* counts up restarts */
|
||||||
sendreq->req_restart = 0; /* reset in case we restart again */
|
sendreq->req_restart = 0; /* reset in case we restart again */
|
||||||
sendreq->req_error = 0; /* clear error state */
|
sendreq->req_error = 0; /* clear error state */
|
||||||
sendreq->req_events = 0; /* clear events, probably 0 anyways */
|
sendreq->req_events = 0; /* clear events, probably 0 anyways */
|
||||||
/* BFO FAILOVER CODE - end */
|
#endif
|
||||||
|
|
||||||
MCA_PML_BASE_SEND_START( &sendreq->req_send.req_base );
|
MCA_PML_BASE_SEND_START( &sendreq->req_send.req_base );
|
||||||
|
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user