PERUSE support for OB1. There we go, now the trunk has a partial peruse implementation.
We support all the events in the PERUSE specifications, but right now only one event of each type can be attached to a communicator. This will be worked out in the future. The events were places in such a way, that we will be able to measure the overhead for our threading implementation (the cost of the synchronization objects). This commit was SVN r9500.
Этот коммит содержится в:
родитель
1226d452bf
Коммит
58cd591d3b
@ -19,7 +19,7 @@
|
||||
#include "ompi_config.h"
|
||||
#include "ompi/request/request.h"
|
||||
#include "pml_ob1_recvreq.h"
|
||||
|
||||
#include "ompi/peruse/peruse-internal.h"
|
||||
|
||||
int mca_pml_ob1_irecv_init(void *addr,
|
||||
size_t count,
|
||||
|
@ -22,7 +22,7 @@
|
||||
#include "pml_ob1_proc.h"
|
||||
#include "pml_ob1_sendreq.h"
|
||||
#include "pml_ob1_recvreq.h"
|
||||
|
||||
#include "ompi/peruse/peruse-internal.h"
|
||||
|
||||
int mca_pml_ob1_isend_init(void *buf,
|
||||
size_t count,
|
||||
@ -130,6 +130,7 @@ int mca_pml_ob1_send(void *buf,
|
||||
ompi_request_waiting--;
|
||||
}
|
||||
}
|
||||
|
||||
rc = sendreq->req_send.req_base.req_ompi.req_status.MPI_ERROR;
|
||||
ompi_request_free( (ompi_request_t**)&sendreq );
|
||||
return rc;
|
||||
|
@ -34,8 +34,7 @@
|
||||
#include "pml_ob1_sendreq.h"
|
||||
#include "pml_ob1_hdr.h"
|
||||
#include "ompi/datatype/dt_arch.h"
|
||||
|
||||
|
||||
#include "ompi/peruse/peruse-internal.h"
|
||||
|
||||
OBJ_CLASS_INSTANCE(
|
||||
mca_pml_ob1_buffer_t,
|
||||
@ -310,6 +309,11 @@ do { \
|
||||
/* remove this recv from the wild receive queue */ \
|
||||
opal_list_remove_item(&comm->wild_receives, \
|
||||
(opal_list_item_t *)wild_recv); \
|
||||
\
|
||||
PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_REMOVE_FROM_POSTED_Q, \
|
||||
&(wild_recv->req_recv.req_base), \
|
||||
PERUSE_RECV); \
|
||||
\
|
||||
break; \
|
||||
} \
|
||||
\
|
||||
@ -351,6 +355,11 @@ do { \
|
||||
/* remove descriptor from specific receive list */ \
|
||||
opal_list_remove_item(&(proc)->specific_receives, \
|
||||
(opal_list_item_t *)specific_recv); \
|
||||
\
|
||||
PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_REMOVE_FROM_POSTED_Q, \
|
||||
&(specific_recv->req_recv.req_base), \
|
||||
PERUSE_RECV); \
|
||||
\
|
||||
break; \
|
||||
} \
|
||||
\
|
||||
@ -444,6 +453,15 @@ int mca_pml_ob1_recv_frag_match(
|
||||
frag_msg_seq = hdr->hdr_seq;
|
||||
proc = comm->procs + hdr->hdr_src;
|
||||
|
||||
/**
|
||||
* We generate the MSG_ARRIVED event as soon as the PML is aware of a matching
|
||||
* fragment arrival. Independing if it is received on the correct order or not.
|
||||
* This will allow the tools to figure out if the messages are not received in the
|
||||
* correct order (if multiple network interfaces).
|
||||
*/
|
||||
PERUSE_TRACE_MSG_EVENT( PERUSE_COMM_MSG_ARRIVED, comm_ptr,
|
||||
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
|
||||
|
||||
/* get next expected message sequence number - if threaded
|
||||
* run, lock to make sure that if another thread is processing
|
||||
* a frag from the same message a match is made only once.
|
||||
@ -465,6 +483,15 @@ int mca_pml_ob1_recv_frag_match(
|
||||
|
||||
/* We're now expecting the next sequence number. */
|
||||
(proc->expected_sequence)++;
|
||||
|
||||
/**
|
||||
* We generate the SEARCH_POSTED_QUEUE only when the message is received
|
||||
* in the correct sequence. Otherwise, we delay the event generation until
|
||||
* we reach the correct sequence number.
|
||||
*/
|
||||
PERUSE_TRACE_MSG_EVENT( PERUSE_COMM_SEARCH_POSTED_Q_BEGIN, comm_ptr,
|
||||
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
|
||||
|
||||
rematch:
|
||||
|
||||
/*
|
||||
@ -512,12 +539,26 @@ rematch:
|
||||
MCA_PML_OB1_RECV_FRAG_ALLOC(frag, rc);
|
||||
if(OMPI_SUCCESS != rc) {
|
||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||
/**
|
||||
* As we return from the match function, we should generate the expected event.
|
||||
*/
|
||||
PERUSE_TRACE_MSG_EVENT( PERUSE_COMM_SEARCH_POSTED_Q_END, comm_ptr,
|
||||
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
|
||||
|
||||
return rc;
|
||||
}
|
||||
MCA_PML_OB1_RECV_FRAG_INIT(frag,hdr,segments,num_segments,btl);
|
||||
opal_list_append( &proc->unexpected_frags, (opal_list_item_t *)frag );
|
||||
}
|
||||
|
||||
/**
|
||||
* The match is over. We generate the SEARCH_POSTED_Q_END here, before going
|
||||
* into the mca_pml_ob1_check_cantmatch_for_match so we can make a difference
|
||||
* for the searching time for all messages.
|
||||
*/
|
||||
PERUSE_TRACE_MSG_EVENT( PERUSE_COMM_SEARCH_POSTED_Q_END, comm_ptr,
|
||||
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
|
||||
|
||||
/*
|
||||
* Now that new message has arrived, check to see if
|
||||
* any fragments on the c_c_frags_cant_match list
|
||||
@ -548,12 +589,18 @@ rematch:
|
||||
|
||||
if(match != NULL) {
|
||||
mca_pml_ob1_recv_request_progress(match,btl,segments,num_segments);
|
||||
#if OMPI_WANT_PERUSE
|
||||
} else {
|
||||
PERUSE_TRACE_MSG_EVENT( PERUSE_COMM_MSG_INSERT_IN_UNEX_Q, comm_ptr,
|
||||
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
|
||||
#endif /* OMPI_WANT_PERUSE */
|
||||
}
|
||||
if(additional_match) {
|
||||
opal_list_item_t* item;
|
||||
while(NULL != (item = opal_list_remove_first(&additional_matches))) {
|
||||
mca_pml_ob1_recv_frag_t* frag = (mca_pml_ob1_recv_frag_t*)item;
|
||||
mca_pml_ob1_recv_request_progress(frag->request,frag->btl,frag->segments,frag->num_segments);
|
||||
mca_pml_ob1_recv_request_progress( frag->request, frag->btl, frag->segments,
|
||||
frag->num_segments );
|
||||
MCA_PML_OB1_RECV_FRAG_RETURN(frag);
|
||||
}
|
||||
}
|
||||
|
@ -46,6 +46,10 @@ static int mca_pml_ob1_recv_request_free(struct ompi_request_t** request)
|
||||
if( true == recvreq->req_recv.req_base.req_pml_complete ) {
|
||||
MCA_PML_OB1_RECV_REQUEST_RETURN( recvreq );
|
||||
}
|
||||
|
||||
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_NOTIFY,
|
||||
&(recvreq->req_recv.req_base), PERUSE_RECV );
|
||||
|
||||
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
||||
|
||||
*request = MPI_REQUEST_NULL;
|
||||
@ -70,6 +74,8 @@ static int mca_pml_ob1_recv_request_cancel(struct ompi_request_t* ompi_request,
|
||||
mca_pml_ob1_comm_proc_t* proc = comm->procs + request->req_recv.req_base.req_peer;
|
||||
opal_list_remove_item(&proc->specific_receives, (opal_list_item_t*)request);
|
||||
}
|
||||
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_REMOVE_FROM_POSTED_Q,
|
||||
&(request->req_recv.req_base), PERUSE_RECV );
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||
|
||||
@ -79,7 +85,7 @@ static int mca_pml_ob1_recv_request_cancel(struct ompi_request_t* ompi_request,
|
||||
* on this request will be able to complete. As the status is marked as
|
||||
* cancelled the cancel state will be detected.
|
||||
*/
|
||||
MCA_PML_BASE_REQUEST_MPI_COMPLETE(ompi_request);
|
||||
MCA_PML_OB1_RECV_REQUEST_MPI_COMPLETE(request);
|
||||
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
@ -108,7 +114,7 @@ OBJ_CLASS_INSTANCE(
|
||||
* Release resources.
|
||||
*/
|
||||
|
||||
static void mca_pml_ob1_ctl_completion(
|
||||
static void mca_pml_ob1_recv_ctl_completion(
|
||||
mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* ep,
|
||||
struct mca_btl_base_descriptor_t* des,
|
||||
@ -267,7 +273,7 @@ static void mca_pml_ob1_recv_request_ack(
|
||||
|
||||
/* initialize descriptor */
|
||||
des->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY;
|
||||
des->des_cbfunc = mca_pml_ob1_ctl_completion;
|
||||
des->des_cbfunc = mca_pml_ob1_recv_ctl_completion;
|
||||
|
||||
rc = mca_bml_base_send(bml_btl, des, MCA_BTL_TAG_PML);
|
||||
if(rc != OMPI_SUCCESS) {
|
||||
@ -701,7 +707,7 @@ void mca_pml_ob1_recv_request_schedule(mca_pml_ob1_recv_request_t* recvreq)
|
||||
break;
|
||||
}
|
||||
ctl->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY;
|
||||
ctl->des_cbfunc = mca_pml_ob1_ctl_completion;
|
||||
ctl->des_cbfunc = mca_pml_ob1_recv_ctl_completion;
|
||||
|
||||
/* fill in rdma header */
|
||||
hdr = (mca_pml_ob1_rdma_hdr_t*)ctl->des_src->seg_addr.pval;
|
||||
@ -767,6 +773,12 @@ void mca_pml_ob1_recv_request_match_specific(mca_pml_ob1_recv_request_t* request
|
||||
|
||||
/* check for a specific match */
|
||||
OPAL_THREAD_LOCK(&comm->matching_lock);
|
||||
/**
|
||||
* The laps of time between the ACTIVATE event and the SEARCH_UNEX one include
|
||||
* the cost of the request lock.
|
||||
*/
|
||||
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_SEARCH_UNEX_Q_BEGIN,
|
||||
&(request->req_recv.req_base), PERUSE_RECV );
|
||||
|
||||
/* assign sequence number */
|
||||
request->req_recv.req_base.req_sequence = comm->recv_sequence++;
|
||||
@ -775,6 +787,9 @@ void mca_pml_ob1_recv_request_match_specific(mca_pml_ob1_recv_request_t* request
|
||||
(frag = mca_pml_ob1_recv_request_match_specific_proc(request, proc)) != NULL) {
|
||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||
|
||||
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_SEARCH_UNEX_Q_END,
|
||||
&(request->req_recv.req_base), PERUSE_RECV );
|
||||
|
||||
if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) ||
|
||||
(MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) {
|
||||
mca_pml_ob1_recv_request_progress(request,frag->btl,frag->segments,frag->num_segments);
|
||||
@ -785,11 +800,18 @@ void mca_pml_ob1_recv_request_match_specific(mca_pml_ob1_recv_request_t* request
|
||||
return; /* match found */
|
||||
}
|
||||
|
||||
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_SEARCH_UNEX_Q_END,
|
||||
&(request->req_recv.req_base), PERUSE_RECV );
|
||||
|
||||
/* We didn't find any matches. Record this irecv so we can match
|
||||
* it when the message comes in.
|
||||
*/
|
||||
if(request->req_recv.req_base.req_type != MCA_PML_REQUEST_IPROBE) {
|
||||
opal_list_append(&proc->specific_receives, (opal_list_item_t*)request);
|
||||
if(request->req_recv.req_base.req_type != MCA_PML_REQUEST_PROBE) {
|
||||
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_INSERT_IN_POSTED_Q,
|
||||
&(request->req_recv.req_base), PERUSE_RECV );
|
||||
}
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||
}
|
||||
@ -814,6 +836,12 @@ void mca_pml_ob1_recv_request_match_wild(mca_pml_ob1_recv_request_t* request)
|
||||
* process.
|
||||
*/
|
||||
OPAL_THREAD_LOCK(&comm->matching_lock);
|
||||
/**
|
||||
* The laps of time between the ACTIVATE event and the SEARCH_UNEX one include
|
||||
* the cost of the request lock.
|
||||
*/
|
||||
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_SEARCH_UNEX_Q_BEGIN,
|
||||
&(request->req_recv.req_base), PERUSE_RECV );
|
||||
|
||||
/* assign sequence number */
|
||||
request->req_recv.req_base.req_sequence = comm->recv_sequence++;
|
||||
@ -831,6 +859,9 @@ void mca_pml_ob1_recv_request_match_wild(mca_pml_ob1_recv_request_t* request)
|
||||
if ((frag = mca_pml_ob1_recv_request_match_specific_proc(request, proc)) != NULL) {
|
||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||
|
||||
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_SEARCH_UNEX_Q_END,
|
||||
&(request->req_recv.req_base), PERUSE_RECV );
|
||||
|
||||
if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) ||
|
||||
(MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) {
|
||||
mca_pml_ob1_recv_request_progress(request,frag->btl,frag->segments,frag->num_segments);
|
||||
@ -843,12 +874,25 @@ void mca_pml_ob1_recv_request_match_wild(mca_pml_ob1_recv_request_t* request)
|
||||
proc++;
|
||||
}
|
||||
|
||||
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_SEARCH_UNEX_Q_END,
|
||||
&(request->req_recv.req_base), PERUSE_RECV );
|
||||
|
||||
/* We didn't find any matches. Record this irecv so we can match to
|
||||
* it when the message comes in.
|
||||
*/
|
||||
|
||||
if(request->req_recv.req_base.req_type != MCA_PML_REQUEST_IPROBE)
|
||||
if(request->req_recv.req_base.req_type != MCA_PML_REQUEST_IPROBE) {
|
||||
opal_list_append(&comm->wild_receives, (opal_list_item_t*)request);
|
||||
/**
|
||||
* We don't want to generate this kind of event for MPI_Probe. Hopefully,
|
||||
* the compiler will optimize out the empty if loop in the case where PERUSE
|
||||
* support is not required by the user.
|
||||
*/
|
||||
if(request->req_recv.req_base.req_type != MCA_PML_REQUEST_PROBE) {
|
||||
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_INSERT_IN_POSTED_Q,
|
||||
&(request->req_recv.req_base), PERUSE_RECV );
|
||||
}
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||
}
|
||||
|
||||
@ -897,9 +941,14 @@ static mca_pml_ob1_recv_frag_t* mca_pml_ob1_recv_request_match_specific_proc(
|
||||
request->req_recv.req_base.req_proc = proc->proc_ompi;
|
||||
if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) ||
|
||||
(MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) {
|
||||
PERUSE_TRACE_MSG_EVENT( PERUSE_COMM_MSG_REMOVE_FROM_UNEX_Q,
|
||||
request->req_recv.req_base.req_comm,
|
||||
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV );
|
||||
opal_list_remove_item(unexpected_frags, (opal_list_item_t*)frag);
|
||||
frag->request = request;
|
||||
}
|
||||
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_MATCH_UNEX,
|
||||
&(request->req_recv.req_base), PERUSE_RECV );
|
||||
return frag;
|
||||
}
|
||||
|
||||
|
@ -100,10 +100,22 @@ do { \
|
||||
persistent); \
|
||||
} while(0)
|
||||
|
||||
/**
|
||||
* Mark the request as completed at MPI level for internal purposes.
|
||||
*
|
||||
* @param recvreq (IN) Receive request.
|
||||
*/
|
||||
#define MCA_PML_OB1_RECV_REQUEST_MPI_COMPLETE( recvreq ) \
|
||||
do { \
|
||||
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_COMPLETE, \
|
||||
&(recvreq->req_recv.req_base), PERUSE_RECV ); \
|
||||
MCA_PML_BASE_REQUEST_MPI_COMPLETE( &(recvreq->req_recv.req_base.req_ompi) ); \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
* Return a recv request to the modules free list.
|
||||
*
|
||||
* @param request (IN) Receive request.
|
||||
* @param recvreq (IN) Receive request.
|
||||
*/
|
||||
#define MCA_PML_OB1_RECV_REQUEST_PML_COMPLETE(recvreq) \
|
||||
do { \
|
||||
@ -111,6 +123,9 @@ do {
|
||||
\
|
||||
assert( false == recvreq->req_recv.req_base.req_pml_complete ); \
|
||||
\
|
||||
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_END, \
|
||||
&(recvreq->req_recv.req_base), PERUSE_RECV ); \
|
||||
\
|
||||
for( r = 0; r < recvreq->req_rdma_cnt; r++ ) { \
|
||||
mca_mpool_base_registration_t* btl_reg = recvreq->req_rdma[r].btl_reg; \
|
||||
if( NULL != btl_reg ) { \
|
||||
@ -129,7 +144,7 @@ do {
|
||||
recvreq->req_recv.req_base.req_ompi.req_status._count = \
|
||||
(recvreq->req_bytes_received < recvreq->req_bytes_delivered ? \
|
||||
recvreq->req_bytes_received : recvreq->req_bytes_delivered); \
|
||||
MCA_PML_BASE_REQUEST_MPI_COMPLETE( &(recvreq->req_recv.req_base.req_ompi) ); \
|
||||
MCA_PML_OB1_RECV_REQUEST_MPI_COMPLETE( recvreq ); \
|
||||
} \
|
||||
OPAL_THREAD_UNLOCK(&ompi_request_lock); \
|
||||
} while(0)
|
||||
@ -178,17 +193,8 @@ do {
|
||||
(request)->req_lock = 0; \
|
||||
(request)->req_pipeline_depth = 0; \
|
||||
(request)->req_rdma_idx = 0; \
|
||||
(request)->req_recv.req_base.req_pml_complete = false; \
|
||||
(request)->req_recv.req_base.req_ompi.req_complete = false; \
|
||||
(request)->req_recv.req_base.req_ompi.req_state = OMPI_REQUEST_ACTIVE; \
|
||||
\
|
||||
/* always set the req_status.MPI_TAG to ANY_TAG before starting the \
|
||||
* request. This field is used if cancelled to find out if the request \
|
||||
* has been matched or not. \
|
||||
*/ \
|
||||
(request)->req_recv.req_base.req_ompi.req_status.MPI_TAG = OMPI_ANY_TAG; \
|
||||
(request)->req_recv.req_base.req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS; \
|
||||
(request)->req_recv.req_base.req_ompi.req_status._cancelled = 0; \
|
||||
MCA_PML_BASE_RECV_START( &(request)->req_recv.req_base ); \
|
||||
\
|
||||
/* attempt to match posted recv */ \
|
||||
if((request)->req_recv.req_base.req_peer == OMPI_ANY_SOURCE) { \
|
||||
@ -207,6 +213,10 @@ do {
|
||||
do { \
|
||||
(request)->req_recv.req_base.req_ompi.req_status.MPI_TAG = (hdr)->hdr_tag; \
|
||||
(request)->req_recv.req_base.req_ompi.req_status.MPI_SOURCE = (hdr)->hdr_src; \
|
||||
\
|
||||
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_MSG_MATCH_POSTED_REQ, \
|
||||
&((request)->req_recv.req_base), PERUSE_RECV ); \
|
||||
\
|
||||
if((request)->req_recv.req_bytes_packed != 0) { \
|
||||
ompi_convertor_copy_and_prepare_for_recv( \
|
||||
(request)->req_recv.req_base.req_proc->proc_convertor, \
|
||||
@ -218,6 +228,8 @@ do {
|
||||
ompi_convertor_get_unpacked_size( &(request)->req_recv.req_convertor, \
|
||||
&(request)->req_bytes_delivered ); \
|
||||
} \
|
||||
PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_XFER_BEGIN, \
|
||||
&(recvreq->req_recv.req_base), PERUSE_RECV); \
|
||||
} while (0)
|
||||
|
||||
|
||||
|
@ -52,6 +52,10 @@ static int mca_pml_ob1_send_request_free(struct ompi_request_t** request)
|
||||
if( true == sendreq->req_send.req_base.req_pml_complete ) {
|
||||
MCA_PML_OB1_SEND_REQUEST_RETURN( sendreq );
|
||||
}
|
||||
|
||||
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_NOTIFY,
|
||||
&(sendreq->req_send.req_base), PERUSE_SEND );
|
||||
|
||||
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
||||
|
||||
*request = MPI_REQUEST_NULL;
|
||||
@ -97,6 +101,9 @@ void mca_pml_ob1_match_completion_cache(
|
||||
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)descriptor->des_cbdata;
|
||||
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) descriptor->des_context;
|
||||
|
||||
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_BEGIN,
|
||||
&(sendreq->req_send.req_base), PERUSE_SEND );
|
||||
|
||||
/* check completion status */
|
||||
if(OMPI_SUCCESS != status) {
|
||||
/* TSW - FIX */
|
||||
@ -124,6 +131,9 @@ void mca_pml_ob1_match_completion_free(
|
||||
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)descriptor->des_cbdata;
|
||||
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) descriptor->des_context;
|
||||
|
||||
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_BEGIN,
|
||||
&(sendreq->req_send.req_base), PERUSE_SEND );
|
||||
|
||||
/* check completion status */
|
||||
if(OMPI_SUCCESS != status) {
|
||||
/* TSW - FIX */
|
||||
@ -151,6 +161,9 @@ static void mca_pml_ob1_rndv_completion(
|
||||
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)descriptor->des_cbdata;
|
||||
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) descriptor->des_context;
|
||||
|
||||
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_BEGIN,
|
||||
&(sendreq->req_send.req_base), PERUSE_SEND );
|
||||
|
||||
/* check completion status */
|
||||
if(OMPI_SUCCESS != status) {
|
||||
/* TSW - FIX */
|
||||
@ -208,7 +221,7 @@ static void mca_pml_ob1_rget_completion(
|
||||
* Completion of a control message - return resources.
|
||||
*/
|
||||
|
||||
static void mca_pml_ob1_ctl_completion(
|
||||
static void mca_pml_ob1_send_ctl_completion(
|
||||
mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* ep,
|
||||
struct mca_btl_base_descriptor_t* descriptor,
|
||||
@ -624,7 +637,15 @@ int mca_pml_ob1_send_request_start_rdma(
|
||||
|
||||
for(i=0; i<src->des_src_cnt; i++)
|
||||
hdr->hdr_rget.hdr_segs[i] = src->des_src[i];
|
||||
des->des_cbfunc = mca_pml_ob1_ctl_completion;
|
||||
des->des_cbfunc = mca_pml_ob1_send_ctl_completion;
|
||||
|
||||
/**
|
||||
* Well, it's a get so we will not know when the peer get the data anyway.
|
||||
* If we generate the PERUSE event here, at least we will know when do we
|
||||
* sent the GET message ...
|
||||
*/
|
||||
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_BEGIN,
|
||||
&(sendreq->req_send.req_base), PERUSE_SEND );
|
||||
|
||||
} else {
|
||||
|
||||
@ -882,6 +903,13 @@ int mca_pml_ob1_send_request_schedule(mca_pml_ob1_send_request_t* sendreq)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if OMPI_WANT_PERUSE
|
||||
if( 0 != sendreq->req_send_offset ) {
|
||||
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_CONTINUE,
|
||||
&(sendreq->req_send.req_base), PERUSE_SEND );
|
||||
}
|
||||
#endif /* OMPI_WANT_PERUSE */
|
||||
|
||||
/* update state */
|
||||
sendreq->req_send_offset += size;
|
||||
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth,1);
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -144,10 +144,6 @@ do {
|
||||
sendreq->req_bytes_delivered = 0; \
|
||||
sendreq->req_state = 0; \
|
||||
sendreq->req_send_offset = 0; \
|
||||
sendreq->req_send.req_base.req_pml_complete = false; \
|
||||
sendreq->req_send.req_base.req_ompi.req_complete = false; \
|
||||
sendreq->req_send.req_base.req_ompi.req_state = OMPI_REQUEST_ACTIVE; \
|
||||
sendreq->req_send.req_base.req_ompi.req_status._cancelled = 0; \
|
||||
sendreq->req_send.req_base.req_sequence = OPAL_THREAD_ADD32( \
|
||||
&comm->procs[sendreq->req_send.req_base.req_peer].send_sequence,1); \
|
||||
sendreq->req_endpoint = endpoint; \
|
||||
@ -156,6 +152,8 @@ do {
|
||||
bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); \
|
||||
eager_limit = bml_btl->btl_eager_limit - sizeof(mca_pml_ob1_hdr_t); \
|
||||
\
|
||||
MCA_PML_BASE_SEND_START( &sendreq->req_send.req_base ); \
|
||||
\
|
||||
/* shortcut for zero byte */ \
|
||||
if(size <= eager_limit) { \
|
||||
switch(sendreq->req_send.req_send_mode) { \
|
||||
@ -251,6 +249,10 @@ do {
|
||||
(sendreq)->req_send.req_base.req_ompi.req_status._count = \
|
||||
(sendreq)->req_send.req_bytes_packed; \
|
||||
MCA_PML_BASE_REQUEST_MPI_COMPLETE( &((sendreq)->req_send.req_base.req_ompi) ); \
|
||||
\
|
||||
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_COMPLETE, \
|
||||
&(sendreq->req_send.req_base), PERUSE_SEND); \
|
||||
\
|
||||
} while(0)
|
||||
|
||||
/*
|
||||
@ -267,6 +269,10 @@ do {
|
||||
\
|
||||
assert( false == sendreq->req_send.req_base.req_pml_complete ); \
|
||||
\
|
||||
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_END, \
|
||||
&(sendreq->req_send.req_base), \
|
||||
PERUSE_SEND ); \
|
||||
\
|
||||
/* return mpool resources */ \
|
||||
for( r = 0; r < sendreq->req_rdma_cnt; r++ ) { \
|
||||
mca_mpool_base_registration_t* reg = sendreq->req_rdma[r].btl_reg; \
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user