1
1

PERUSE support for OB1. There we go, now the trunk has a partial peruse implementation.

We support all the events in the PERUSE specifications, but right now only one event
of each type can be attached to a communicator. This will be worked out in the future.
The events were places in such a way, that we will be able to measure the overhead
for our threading implementation (the cost of the synchronization objects).

This commit was SVN r9500.
Этот коммит содержится в:
George Bosilca 2006-03-31 17:09:09 +00:00
родитель 1226d452bf
Коммит 58cd591d3b
7 изменённых файлов: 200 добавлений и 57 удалений

Просмотреть файл

@ -19,7 +19,7 @@
#include "ompi_config.h"
#include "ompi/request/request.h"
#include "pml_ob1_recvreq.h"
#include "ompi/peruse/peruse-internal.h"
int mca_pml_ob1_irecv_init(void *addr,
size_t count,

Просмотреть файл

@ -22,7 +22,7 @@
#include "pml_ob1_proc.h"
#include "pml_ob1_sendreq.h"
#include "pml_ob1_recvreq.h"
#include "ompi/peruse/peruse-internal.h"
int mca_pml_ob1_isend_init(void *buf,
size_t count,
@ -130,6 +130,7 @@ int mca_pml_ob1_send(void *buf,
ompi_request_waiting--;
}
}
rc = sendreq->req_send.req_base.req_ompi.req_status.MPI_ERROR;
ompi_request_free( (ompi_request_t**)&sendreq );
return rc;

Просмотреть файл

@ -33,9 +33,8 @@
#include "pml_ob1_recvreq.h"
#include "pml_ob1_sendreq.h"
#include "pml_ob1_hdr.h"
#include "ompi/datatype/dt_arch.h"
#include "ompi/datatype/dt_arch.h"
#include "ompi/peruse/peruse-internal.h"
OBJ_CLASS_INSTANCE(
mca_pml_ob1_buffer_t,
@ -310,6 +309,11 @@ do { \
/* remove this recv from the wild receive queue */ \
opal_list_remove_item(&comm->wild_receives, \
(opal_list_item_t *)wild_recv); \
\
PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_REMOVE_FROM_POSTED_Q, \
&(wild_recv->req_recv.req_base), \
PERUSE_RECV); \
\
break; \
} \
\
@ -351,6 +355,11 @@ do { \
/* remove descriptor from specific receive list */ \
opal_list_remove_item(&(proc)->specific_receives, \
(opal_list_item_t *)specific_recv); \
\
PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_REMOVE_FROM_POSTED_Q, \
&(specific_recv->req_recv.req_base), \
PERUSE_RECV); \
\
break; \
} \
\
@ -444,6 +453,15 @@ int mca_pml_ob1_recv_frag_match(
frag_msg_seq = hdr->hdr_seq;
proc = comm->procs + hdr->hdr_src;
/**
* We generate the MSG_ARRIVED event as soon as the PML is aware of a matching
* fragment arrival. Independing if it is received on the correct order or not.
* This will allow the tools to figure out if the messages are not received in the
* correct order (if multiple network interfaces).
*/
PERUSE_TRACE_MSG_EVENT( PERUSE_COMM_MSG_ARRIVED, comm_ptr,
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
/* get next expected message sequence number - if threaded
* run, lock to make sure that if another thread is processing
* a frag from the same message a match is made only once.
@ -465,6 +483,15 @@ int mca_pml_ob1_recv_frag_match(
/* We're now expecting the next sequence number. */
(proc->expected_sequence)++;
/**
* We generate the SEARCH_POSTED_QUEUE only when the message is received
* in the correct sequence. Otherwise, we delay the event generation until
* we reach the correct sequence number.
*/
PERUSE_TRACE_MSG_EVENT( PERUSE_COMM_SEARCH_POSTED_Q_BEGIN, comm_ptr,
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
rematch:
/*
@ -512,12 +539,26 @@ rematch:
MCA_PML_OB1_RECV_FRAG_ALLOC(frag, rc);
if(OMPI_SUCCESS != rc) {
OPAL_THREAD_UNLOCK(&comm->matching_lock);
/**
* As we return from the match function, we should generate the expected event.
*/
PERUSE_TRACE_MSG_EVENT( PERUSE_COMM_SEARCH_POSTED_Q_END, comm_ptr,
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
return rc;
}
MCA_PML_OB1_RECV_FRAG_INIT(frag,hdr,segments,num_segments,btl);
opal_list_append( &proc->unexpected_frags, (opal_list_item_t *)frag );
}
/**
* The match is over. We generate the SEARCH_POSTED_Q_END here, before going
* into the mca_pml_ob1_check_cantmatch_for_match so we can make a difference
* for the searching time for all messages.
*/
PERUSE_TRACE_MSG_EVENT( PERUSE_COMM_SEARCH_POSTED_Q_END, comm_ptr,
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
/*
* Now that new message has arrived, check to see if
* any fragments on the c_c_frags_cant_match list
@ -548,12 +589,18 @@ rematch:
if(match != NULL) {
mca_pml_ob1_recv_request_progress(match,btl,segments,num_segments);
#if OMPI_WANT_PERUSE
} else {
PERUSE_TRACE_MSG_EVENT( PERUSE_COMM_MSG_INSERT_IN_UNEX_Q, comm_ptr,
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
#endif /* OMPI_WANT_PERUSE */
}
if(additional_match) {
opal_list_item_t* item;
while(NULL != (item = opal_list_remove_first(&additional_matches))) {
mca_pml_ob1_recv_frag_t* frag = (mca_pml_ob1_recv_frag_t*)item;
mca_pml_ob1_recv_request_progress(frag->request,frag->btl,frag->segments,frag->num_segments);
mca_pml_ob1_recv_request_progress( frag->request, frag->btl, frag->segments,
frag->num_segments );
MCA_PML_OB1_RECV_FRAG_RETURN(frag);
}
}

Просмотреть файл

@ -46,6 +46,10 @@ static int mca_pml_ob1_recv_request_free(struct ompi_request_t** request)
if( true == recvreq->req_recv.req_base.req_pml_complete ) {
MCA_PML_OB1_RECV_REQUEST_RETURN( recvreq );
}
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_NOTIFY,
&(recvreq->req_recv.req_base), PERUSE_RECV );
OPAL_THREAD_UNLOCK(&ompi_request_lock);
*request = MPI_REQUEST_NULL;
@ -70,6 +74,8 @@ static int mca_pml_ob1_recv_request_cancel(struct ompi_request_t* ompi_request,
mca_pml_ob1_comm_proc_t* proc = comm->procs + request->req_recv.req_base.req_peer;
opal_list_remove_item(&proc->specific_receives, (opal_list_item_t*)request);
}
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_REMOVE_FROM_POSTED_Q,
&(request->req_recv.req_base), PERUSE_RECV );
}
OPAL_THREAD_UNLOCK(&comm->matching_lock);
@ -79,7 +85,7 @@ static int mca_pml_ob1_recv_request_cancel(struct ompi_request_t* ompi_request,
* on this request will be able to complete. As the status is marked as
* cancelled the cancel state will be detected.
*/
MCA_PML_BASE_REQUEST_MPI_COMPLETE(ompi_request);
MCA_PML_OB1_RECV_REQUEST_MPI_COMPLETE(request);
OPAL_THREAD_UNLOCK(&ompi_request_lock);
return OMPI_SUCCESS;
}
@ -108,7 +114,7 @@ OBJ_CLASS_INSTANCE(
* Release resources.
*/
static void mca_pml_ob1_ctl_completion(
static void mca_pml_ob1_recv_ctl_completion(
mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* ep,
struct mca_btl_base_descriptor_t* des,
@ -267,7 +273,7 @@ static void mca_pml_ob1_recv_request_ack(
/* initialize descriptor */
des->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY;
des->des_cbfunc = mca_pml_ob1_ctl_completion;
des->des_cbfunc = mca_pml_ob1_recv_ctl_completion;
rc = mca_bml_base_send(bml_btl, des, MCA_BTL_TAG_PML);
if(rc != OMPI_SUCCESS) {
@ -701,7 +707,7 @@ void mca_pml_ob1_recv_request_schedule(mca_pml_ob1_recv_request_t* recvreq)
break;
}
ctl->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY;
ctl->des_cbfunc = mca_pml_ob1_ctl_completion;
ctl->des_cbfunc = mca_pml_ob1_recv_ctl_completion;
/* fill in rdma header */
hdr = (mca_pml_ob1_rdma_hdr_t*)ctl->des_src->seg_addr.pval;
@ -767,6 +773,12 @@ void mca_pml_ob1_recv_request_match_specific(mca_pml_ob1_recv_request_t* request
/* check for a specific match */
OPAL_THREAD_LOCK(&comm->matching_lock);
/**
* The laps of time between the ACTIVATE event and the SEARCH_UNEX one include
* the cost of the request lock.
*/
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_SEARCH_UNEX_Q_BEGIN,
&(request->req_recv.req_base), PERUSE_RECV );
/* assign sequence number */
request->req_recv.req_base.req_sequence = comm->recv_sequence++;
@ -774,7 +786,10 @@ void mca_pml_ob1_recv_request_match_specific(mca_pml_ob1_recv_request_t* request
if (opal_list_get_size(&proc->unexpected_frags) > 0 &&
(frag = mca_pml_ob1_recv_request_match_specific_proc(request, proc)) != NULL) {
OPAL_THREAD_UNLOCK(&comm->matching_lock);
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_SEARCH_UNEX_Q_END,
&(request->req_recv.req_base), PERUSE_RECV );
if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) ||
(MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) {
mca_pml_ob1_recv_request_progress(request,frag->btl,frag->segments,frag->num_segments);
@ -785,11 +800,18 @@ void mca_pml_ob1_recv_request_match_specific(mca_pml_ob1_recv_request_t* request
return; /* match found */
}
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_SEARCH_UNEX_Q_END,
&(request->req_recv.req_base), PERUSE_RECV );
/* We didn't find any matches. Record this irecv so we can match
* it when the message comes in.
*/
if(request->req_recv.req_base.req_type != MCA_PML_REQUEST_IPROBE) {
opal_list_append(&proc->specific_receives, (opal_list_item_t*)request);
if(request->req_recv.req_base.req_type != MCA_PML_REQUEST_PROBE) {
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_INSERT_IN_POSTED_Q,
&(request->req_recv.req_base), PERUSE_RECV );
}
}
OPAL_THREAD_UNLOCK(&comm->matching_lock);
}
@ -814,6 +836,12 @@ void mca_pml_ob1_recv_request_match_wild(mca_pml_ob1_recv_request_t* request)
* process.
*/
OPAL_THREAD_LOCK(&comm->matching_lock);
/**
* The laps of time between the ACTIVATE event and the SEARCH_UNEX one include
* the cost of the request lock.
*/
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_SEARCH_UNEX_Q_BEGIN,
&(request->req_recv.req_base), PERUSE_RECV );
/* assign sequence number */
request->req_recv.req_base.req_sequence = comm->recv_sequence++;
@ -831,6 +859,9 @@ void mca_pml_ob1_recv_request_match_wild(mca_pml_ob1_recv_request_t* request)
if ((frag = mca_pml_ob1_recv_request_match_specific_proc(request, proc)) != NULL) {
OPAL_THREAD_UNLOCK(&comm->matching_lock);
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_SEARCH_UNEX_Q_END,
&(request->req_recv.req_base), PERUSE_RECV );
if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) ||
(MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) {
mca_pml_ob1_recv_request_progress(request,frag->btl,frag->segments,frag->num_segments);
@ -843,12 +874,25 @@ void mca_pml_ob1_recv_request_match_wild(mca_pml_ob1_recv_request_t* request)
proc++;
}
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_SEARCH_UNEX_Q_END,
&(request->req_recv.req_base), PERUSE_RECV );
/* We didn't find any matches. Record this irecv so we can match to
* it when the message comes in.
*/
if(request->req_recv.req_base.req_type != MCA_PML_REQUEST_IPROBE)
if(request->req_recv.req_base.req_type != MCA_PML_REQUEST_IPROBE) {
opal_list_append(&comm->wild_receives, (opal_list_item_t*)request);
/**
* We don't want to generate this kind of event for MPI_Probe. Hopefully,
* the compiler will optimize out the empty if loop in the case where PERUSE
* support is not required by the user.
*/
if(request->req_recv.req_base.req_type != MCA_PML_REQUEST_PROBE) {
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_INSERT_IN_POSTED_Q,
&(request->req_recv.req_base), PERUSE_RECV );
}
}
OPAL_THREAD_UNLOCK(&comm->matching_lock);
}
@ -897,9 +941,14 @@ static mca_pml_ob1_recv_frag_t* mca_pml_ob1_recv_request_match_specific_proc(
request->req_recv.req_base.req_proc = proc->proc_ompi;
if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) ||
(MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) {
PERUSE_TRACE_MSG_EVENT( PERUSE_COMM_MSG_REMOVE_FROM_UNEX_Q,
request->req_recv.req_base.req_comm,
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV );
opal_list_remove_item(unexpected_frags, (opal_list_item_t*)frag);
frag->request = request;
}
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_MATCH_UNEX,
&(request->req_recv.req_base), PERUSE_RECV );
return frag;
}

Просмотреть файл

@ -100,38 +100,53 @@ do { \
persistent); \
} while(0)
/**
* Mark the request as completed at MPI level for internal purposes.
*
* @param recvreq (IN) Receive request.
*/
#define MCA_PML_OB1_RECV_REQUEST_MPI_COMPLETE( recvreq ) \
do { \
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_COMPLETE, \
&(recvreq->req_recv.req_base), PERUSE_RECV ); \
MCA_PML_BASE_REQUEST_MPI_COMPLETE( &(recvreq->req_recv.req_base.req_ompi) ); \
} while (0)
/**
* Return a recv request to the modules free list.
*
* @param request (IN) Receive request.
* @param recvreq (IN) Receive request.
*/
#define MCA_PML_OB1_RECV_REQUEST_PML_COMPLETE(recvreq) \
do { \
size_t r; \
\
assert( false == recvreq->req_recv.req_base.req_pml_complete ); \
\
for( r = 0; r < recvreq->req_rdma_cnt; r++ ) { \
mca_mpool_base_registration_t* btl_reg = recvreq->req_rdma[r].btl_reg; \
if( NULL != btl_reg ) { \
btl_reg->mpool->mpool_release( btl_reg->mpool, btl_reg ); \
} \
} \
recvreq->req_rdma_cnt = 0; \
\
OPAL_THREAD_LOCK(&ompi_request_lock); \
\
if( true == recvreq->req_recv.req_base.req_free_called ) { \
MCA_PML_OB1_RECV_REQUEST_RETURN( recvreq ); \
} else { \
/* initialize request status */ \
recvreq->req_recv.req_base.req_pml_complete = true; \
recvreq->req_recv.req_base.req_ompi.req_status._count = \
(recvreq->req_bytes_received < recvreq->req_bytes_delivered ? \
recvreq->req_bytes_received : recvreq->req_bytes_delivered); \
MCA_PML_BASE_REQUEST_MPI_COMPLETE( &(recvreq->req_recv.req_base.req_ompi) ); \
} \
OPAL_THREAD_UNLOCK(&ompi_request_lock); \
#define MCA_PML_OB1_RECV_REQUEST_PML_COMPLETE(recvreq) \
do { \
size_t r; \
\
assert( false == recvreq->req_recv.req_base.req_pml_complete ); \
\
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_END, \
&(recvreq->req_recv.req_base), PERUSE_RECV ); \
\
for( r = 0; r < recvreq->req_rdma_cnt; r++ ) { \
mca_mpool_base_registration_t* btl_reg = recvreq->req_rdma[r].btl_reg; \
if( NULL != btl_reg ) { \
btl_reg->mpool->mpool_release( btl_reg->mpool, btl_reg ); \
} \
} \
recvreq->req_rdma_cnt = 0; \
\
OPAL_THREAD_LOCK(&ompi_request_lock); \
\
if( true == recvreq->req_recv.req_base.req_free_called ) { \
MCA_PML_OB1_RECV_REQUEST_RETURN( recvreq ); \
} else { \
/* initialize request status */ \
recvreq->req_recv.req_base.req_pml_complete = true; \
recvreq->req_recv.req_base.req_ompi.req_status._count = \
(recvreq->req_bytes_received < recvreq->req_bytes_delivered ? \
recvreq->req_bytes_received : recvreq->req_bytes_delivered); \
MCA_PML_OB1_RECV_REQUEST_MPI_COMPLETE( recvreq ); \
} \
OPAL_THREAD_UNLOCK(&ompi_request_lock); \
} while(0)
/*
@ -178,17 +193,8 @@ do {
(request)->req_lock = 0; \
(request)->req_pipeline_depth = 0; \
(request)->req_rdma_idx = 0; \
(request)->req_recv.req_base.req_pml_complete = false; \
(request)->req_recv.req_base.req_ompi.req_complete = false; \
(request)->req_recv.req_base.req_ompi.req_state = OMPI_REQUEST_ACTIVE; \
\
/* always set the req_status.MPI_TAG to ANY_TAG before starting the \
* request. This field is used if cancelled to find out if the request \
* has been matched or not. \
*/ \
(request)->req_recv.req_base.req_ompi.req_status.MPI_TAG = OMPI_ANY_TAG; \
(request)->req_recv.req_base.req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS; \
(request)->req_recv.req_base.req_ompi.req_status._cancelled = 0; \
MCA_PML_BASE_RECV_START( &(request)->req_recv.req_base ); \
\
/* attempt to match posted recv */ \
if((request)->req_recv.req_base.req_peer == OMPI_ANY_SOURCE) { \
@ -207,6 +213,10 @@ do {
do { \
(request)->req_recv.req_base.req_ompi.req_status.MPI_TAG = (hdr)->hdr_tag; \
(request)->req_recv.req_base.req_ompi.req_status.MPI_SOURCE = (hdr)->hdr_src; \
\
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_MSG_MATCH_POSTED_REQ, \
&((request)->req_recv.req_base), PERUSE_RECV ); \
\
if((request)->req_recv.req_bytes_packed != 0) { \
ompi_convertor_copy_and_prepare_for_recv( \
(request)->req_recv.req_base.req_proc->proc_convertor, \
@ -218,6 +228,8 @@ do {
ompi_convertor_get_unpacked_size( &(request)->req_recv.req_convertor, \
&(request)->req_bytes_delivered ); \
} \
PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_XFER_BEGIN, \
&(recvreq->req_recv.req_base), PERUSE_RECV); \
} while (0)

Просмотреть файл

@ -52,6 +52,10 @@ static int mca_pml_ob1_send_request_free(struct ompi_request_t** request)
if( true == sendreq->req_send.req_base.req_pml_complete ) {
MCA_PML_OB1_SEND_REQUEST_RETURN( sendreq );
}
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_NOTIFY,
&(sendreq->req_send.req_base), PERUSE_SEND );
OPAL_THREAD_UNLOCK(&ompi_request_lock);
*request = MPI_REQUEST_NULL;
@ -97,6 +101,9 @@ void mca_pml_ob1_match_completion_cache(
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)descriptor->des_cbdata;
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) descriptor->des_context;
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_BEGIN,
&(sendreq->req_send.req_base), PERUSE_SEND );
/* check completion status */
if(OMPI_SUCCESS != status) {
/* TSW - FIX */
@ -124,6 +131,9 @@ void mca_pml_ob1_match_completion_free(
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)descriptor->des_cbdata;
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) descriptor->des_context;
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_BEGIN,
&(sendreq->req_send.req_base), PERUSE_SEND );
/* check completion status */
if(OMPI_SUCCESS != status) {
/* TSW - FIX */
@ -151,6 +161,9 @@ static void mca_pml_ob1_rndv_completion(
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)descriptor->des_cbdata;
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) descriptor->des_context;
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_BEGIN,
&(sendreq->req_send.req_base), PERUSE_SEND );
/* check completion status */
if(OMPI_SUCCESS != status) {
/* TSW - FIX */
@ -208,7 +221,7 @@ static void mca_pml_ob1_rget_completion(
* Completion of a control message - return resources.
*/
static void mca_pml_ob1_ctl_completion(
static void mca_pml_ob1_send_ctl_completion(
mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* ep,
struct mca_btl_base_descriptor_t* descriptor,
@ -624,7 +637,15 @@ int mca_pml_ob1_send_request_start_rdma(
for(i=0; i<src->des_src_cnt; i++)
hdr->hdr_rget.hdr_segs[i] = src->des_src[i];
des->des_cbfunc = mca_pml_ob1_ctl_completion;
des->des_cbfunc = mca_pml_ob1_send_ctl_completion;
/**
* Well, it's a get so we will not know when the peer get the data anyway.
* If we generate the PERUSE event here, at least we will know when do we
* sent the GET message ...
*/
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_BEGIN,
&(sendreq->req_send.req_base), PERUSE_SEND );
} else {
@ -882,6 +903,13 @@ int mca_pml_ob1_send_request_schedule(mca_pml_ob1_send_request_t* sendreq)
#endif
#endif
#if OMPI_WANT_PERUSE
if( 0 != sendreq->req_send_offset ) {
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_CONTINUE,
&(sendreq->req_send.req_base), PERUSE_SEND );
}
#endif /* OMPI_WANT_PERUSE */
/* update state */
sendreq->req_send_offset += size;
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth,1);

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -144,10 +144,6 @@ do {
sendreq->req_bytes_delivered = 0; \
sendreq->req_state = 0; \
sendreq->req_send_offset = 0; \
sendreq->req_send.req_base.req_pml_complete = false; \
sendreq->req_send.req_base.req_ompi.req_complete = false; \
sendreq->req_send.req_base.req_ompi.req_state = OMPI_REQUEST_ACTIVE; \
sendreq->req_send.req_base.req_ompi.req_status._cancelled = 0; \
sendreq->req_send.req_base.req_sequence = OPAL_THREAD_ADD32( \
&comm->procs[sendreq->req_send.req_base.req_peer].send_sequence,1); \
sendreq->req_endpoint = endpoint; \
@ -156,6 +152,8 @@ do {
bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); \
eager_limit = bml_btl->btl_eager_limit - sizeof(mca_pml_ob1_hdr_t); \
\
MCA_PML_BASE_SEND_START( &sendreq->req_send.req_base ); \
\
/* shortcut for zero byte */ \
if(size <= eager_limit) { \
switch(sendreq->req_send.req_send_mode) { \
@ -251,6 +249,10 @@ do {
(sendreq)->req_send.req_base.req_ompi.req_status._count = \
(sendreq)->req_send.req_bytes_packed; \
MCA_PML_BASE_REQUEST_MPI_COMPLETE( &((sendreq)->req_send.req_base.req_ompi) ); \
\
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_COMPLETE, \
&(sendreq->req_send.req_base), PERUSE_SEND); \
\
} while(0)
/*
@ -267,6 +269,10 @@ do {
\
assert( false == sendreq->req_send.req_base.req_pml_complete ); \
\
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_END, \
&(sendreq->req_send.req_base), \
PERUSE_SEND ); \
\
/* return mpool resources */ \
for( r = 0; r < sendreq->req_rdma_cnt; r++ ) { \
mca_mpool_base_registration_t* reg = sendreq->req_rdma[r].btl_reg; \