From 58cd591d3bfea98103680a5ebbcb9616e55a7aa0 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Fri, 31 Mar 2006 17:09:09 +0000 Subject: [PATCH] PERUSE support for OB1. There we go, now the trunk has a partial peruse implementation. We support all the events in the PERUSE specifications, but right now only one event of each type can be attached to a communicator. This will be worked out in the future. The events were places in such a way, that we will be able to measure the overhead for our threading implementation (the cost of the synchronization objects). This commit was SVN r9500. --- ompi/mca/pml/ob1/pml_ob1_irecv.c | 2 +- ompi/mca/pml/ob1/pml_ob1_isend.c | 3 +- ompi/mca/pml/ob1/pml_ob1_recvfrag.c | 55 ++++++++++++++++-- ompi/mca/pml/ob1/pml_ob1_recvreq.c | 61 ++++++++++++++++++-- ompi/mca/pml/ob1/pml_ob1_recvreq.h | 88 ++++++++++++++++------------- ompi/mca/pml/ob1/pml_ob1_sendreq.c | 32 ++++++++++- ompi/mca/pml/ob1/pml_ob1_sendreq.h | 16 ++++-- 7 files changed, 200 insertions(+), 57 deletions(-) diff --git a/ompi/mca/pml/ob1/pml_ob1_irecv.c b/ompi/mca/pml/ob1/pml_ob1_irecv.c index 04c9061188..f84e0e0c38 100644 --- a/ompi/mca/pml/ob1/pml_ob1_irecv.c +++ b/ompi/mca/pml/ob1/pml_ob1_irecv.c @@ -19,7 +19,7 @@ #include "ompi_config.h" #include "ompi/request/request.h" #include "pml_ob1_recvreq.h" - +#include "ompi/peruse/peruse-internal.h" int mca_pml_ob1_irecv_init(void *addr, size_t count, diff --git a/ompi/mca/pml/ob1/pml_ob1_isend.c b/ompi/mca/pml/ob1/pml_ob1_isend.c index 0d406b312e..5cebf9bce4 100644 --- a/ompi/mca/pml/ob1/pml_ob1_isend.c +++ b/ompi/mca/pml/ob1/pml_ob1_isend.c @@ -22,7 +22,7 @@ #include "pml_ob1_proc.h" #include "pml_ob1_sendreq.h" #include "pml_ob1_recvreq.h" - +#include "ompi/peruse/peruse-internal.h" int mca_pml_ob1_isend_init(void *buf, size_t count, @@ -130,6 +130,7 @@ int mca_pml_ob1_send(void *buf, ompi_request_waiting--; } } + rc = sendreq->req_send.req_base.req_ompi.req_status.MPI_ERROR; ompi_request_free( (ompi_request_t**)&sendreq ); return rc; diff --git a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c index e61d5b8bba..b1c09ddcba 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c @@ -33,9 +33,8 @@ #include "pml_ob1_recvreq.h" #include "pml_ob1_sendreq.h" #include "pml_ob1_hdr.h" -#include "ompi/datatype/dt_arch.h" - - +#include "ompi/datatype/dt_arch.h" +#include "ompi/peruse/peruse-internal.h" OBJ_CLASS_INSTANCE( mca_pml_ob1_buffer_t, @@ -310,6 +309,11 @@ do { \ /* remove this recv from the wild receive queue */ \ opal_list_remove_item(&comm->wild_receives, \ (opal_list_item_t *)wild_recv); \ +\ + PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_REMOVE_FROM_POSTED_Q, \ + &(wild_recv->req_recv.req_base), \ + PERUSE_RECV); \ +\ break; \ } \ \ @@ -351,6 +355,11 @@ do { \ /* remove descriptor from specific receive list */ \ opal_list_remove_item(&(proc)->specific_receives, \ (opal_list_item_t *)specific_recv); \ +\ + PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_REMOVE_FROM_POSTED_Q, \ + &(specific_recv->req_recv.req_base), \ + PERUSE_RECV); \ +\ break; \ } \ \ @@ -444,6 +453,15 @@ int mca_pml_ob1_recv_frag_match( frag_msg_seq = hdr->hdr_seq; proc = comm->procs + hdr->hdr_src; + /** + * We generate the MSG_ARRIVED event as soon as the PML is aware of a matching + * fragment arrival. Independing if it is received on the correct order or not. + * This will allow the tools to figure out if the messages are not received in the + * correct order (if multiple network interfaces). + */ + PERUSE_TRACE_MSG_EVENT( PERUSE_COMM_MSG_ARRIVED, comm_ptr, + hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); + /* get next expected message sequence number - if threaded * run, lock to make sure that if another thread is processing * a frag from the same message a match is made only once. @@ -465,6 +483,15 @@ int mca_pml_ob1_recv_frag_match( /* We're now expecting the next sequence number. */ (proc->expected_sequence)++; + + /** + * We generate the SEARCH_POSTED_QUEUE only when the message is received + * in the correct sequence. Otherwise, we delay the event generation until + * we reach the correct sequence number. + */ + PERUSE_TRACE_MSG_EVENT( PERUSE_COMM_SEARCH_POSTED_Q_BEGIN, comm_ptr, + hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); + rematch: /* @@ -512,12 +539,26 @@ rematch: MCA_PML_OB1_RECV_FRAG_ALLOC(frag, rc); if(OMPI_SUCCESS != rc) { OPAL_THREAD_UNLOCK(&comm->matching_lock); + /** + * As we return from the match function, we should generate the expected event. + */ + PERUSE_TRACE_MSG_EVENT( PERUSE_COMM_SEARCH_POSTED_Q_END, comm_ptr, + hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); + return rc; } MCA_PML_OB1_RECV_FRAG_INIT(frag,hdr,segments,num_segments,btl); opal_list_append( &proc->unexpected_frags, (opal_list_item_t *)frag ); } + /** + * The match is over. We generate the SEARCH_POSTED_Q_END here, before going + * into the mca_pml_ob1_check_cantmatch_for_match so we can make a difference + * for the searching time for all messages. + */ + PERUSE_TRACE_MSG_EVENT( PERUSE_COMM_SEARCH_POSTED_Q_END, comm_ptr, + hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); + /* * Now that new message has arrived, check to see if * any fragments on the c_c_frags_cant_match list @@ -548,12 +589,18 @@ rematch: if(match != NULL) { mca_pml_ob1_recv_request_progress(match,btl,segments,num_segments); +#if OMPI_WANT_PERUSE + } else { + PERUSE_TRACE_MSG_EVENT( PERUSE_COMM_MSG_INSERT_IN_UNEX_Q, comm_ptr, + hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); +#endif /* OMPI_WANT_PERUSE */ } if(additional_match) { opal_list_item_t* item; while(NULL != (item = opal_list_remove_first(&additional_matches))) { mca_pml_ob1_recv_frag_t* frag = (mca_pml_ob1_recv_frag_t*)item; - mca_pml_ob1_recv_request_progress(frag->request,frag->btl,frag->segments,frag->num_segments); + mca_pml_ob1_recv_request_progress( frag->request, frag->btl, frag->segments, + frag->num_segments ); MCA_PML_OB1_RECV_FRAG_RETURN(frag); } } diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.c b/ompi/mca/pml/ob1/pml_ob1_recvreq.c index 0accaa7e68..0f209c5209 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.c @@ -46,6 +46,10 @@ static int mca_pml_ob1_recv_request_free(struct ompi_request_t** request) if( true == recvreq->req_recv.req_base.req_pml_complete ) { MCA_PML_OB1_RECV_REQUEST_RETURN( recvreq ); } + + PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_NOTIFY, + &(recvreq->req_recv.req_base), PERUSE_RECV ); + OPAL_THREAD_UNLOCK(&ompi_request_lock); *request = MPI_REQUEST_NULL; @@ -70,6 +74,8 @@ static int mca_pml_ob1_recv_request_cancel(struct ompi_request_t* ompi_request, mca_pml_ob1_comm_proc_t* proc = comm->procs + request->req_recv.req_base.req_peer; opal_list_remove_item(&proc->specific_receives, (opal_list_item_t*)request); } + PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_REMOVE_FROM_POSTED_Q, + &(request->req_recv.req_base), PERUSE_RECV ); } OPAL_THREAD_UNLOCK(&comm->matching_lock); @@ -79,7 +85,7 @@ static int mca_pml_ob1_recv_request_cancel(struct ompi_request_t* ompi_request, * on this request will be able to complete. As the status is marked as * cancelled the cancel state will be detected. */ - MCA_PML_BASE_REQUEST_MPI_COMPLETE(ompi_request); + MCA_PML_OB1_RECV_REQUEST_MPI_COMPLETE(request); OPAL_THREAD_UNLOCK(&ompi_request_lock); return OMPI_SUCCESS; } @@ -108,7 +114,7 @@ OBJ_CLASS_INSTANCE( * Release resources. */ -static void mca_pml_ob1_ctl_completion( +static void mca_pml_ob1_recv_ctl_completion( mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t* ep, struct mca_btl_base_descriptor_t* des, @@ -267,7 +273,7 @@ static void mca_pml_ob1_recv_request_ack( /* initialize descriptor */ des->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY; - des->des_cbfunc = mca_pml_ob1_ctl_completion; + des->des_cbfunc = mca_pml_ob1_recv_ctl_completion; rc = mca_bml_base_send(bml_btl, des, MCA_BTL_TAG_PML); if(rc != OMPI_SUCCESS) { @@ -701,7 +707,7 @@ void mca_pml_ob1_recv_request_schedule(mca_pml_ob1_recv_request_t* recvreq) break; } ctl->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY; - ctl->des_cbfunc = mca_pml_ob1_ctl_completion; + ctl->des_cbfunc = mca_pml_ob1_recv_ctl_completion; /* fill in rdma header */ hdr = (mca_pml_ob1_rdma_hdr_t*)ctl->des_src->seg_addr.pval; @@ -767,6 +773,12 @@ void mca_pml_ob1_recv_request_match_specific(mca_pml_ob1_recv_request_t* request /* check for a specific match */ OPAL_THREAD_LOCK(&comm->matching_lock); + /** + * The laps of time between the ACTIVATE event and the SEARCH_UNEX one include + * the cost of the request lock. + */ + PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_SEARCH_UNEX_Q_BEGIN, + &(request->req_recv.req_base), PERUSE_RECV ); /* assign sequence number */ request->req_recv.req_base.req_sequence = comm->recv_sequence++; @@ -774,7 +786,10 @@ void mca_pml_ob1_recv_request_match_specific(mca_pml_ob1_recv_request_t* request if (opal_list_get_size(&proc->unexpected_frags) > 0 && (frag = mca_pml_ob1_recv_request_match_specific_proc(request, proc)) != NULL) { OPAL_THREAD_UNLOCK(&comm->matching_lock); - + + PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_SEARCH_UNEX_Q_END, + &(request->req_recv.req_base), PERUSE_RECV ); + if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) || (MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) { mca_pml_ob1_recv_request_progress(request,frag->btl,frag->segments,frag->num_segments); @@ -785,11 +800,18 @@ void mca_pml_ob1_recv_request_match_specific(mca_pml_ob1_recv_request_t* request return; /* match found */ } + PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_SEARCH_UNEX_Q_END, + &(request->req_recv.req_base), PERUSE_RECV ); + /* We didn't find any matches. Record this irecv so we can match * it when the message comes in. */ if(request->req_recv.req_base.req_type != MCA_PML_REQUEST_IPROBE) { opal_list_append(&proc->specific_receives, (opal_list_item_t*)request); + if(request->req_recv.req_base.req_type != MCA_PML_REQUEST_PROBE) { + PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_INSERT_IN_POSTED_Q, + &(request->req_recv.req_base), PERUSE_RECV ); + } } OPAL_THREAD_UNLOCK(&comm->matching_lock); } @@ -814,6 +836,12 @@ void mca_pml_ob1_recv_request_match_wild(mca_pml_ob1_recv_request_t* request) * process. */ OPAL_THREAD_LOCK(&comm->matching_lock); + /** + * The laps of time between the ACTIVATE event and the SEARCH_UNEX one include + * the cost of the request lock. + */ + PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_SEARCH_UNEX_Q_BEGIN, + &(request->req_recv.req_base), PERUSE_RECV ); /* assign sequence number */ request->req_recv.req_base.req_sequence = comm->recv_sequence++; @@ -831,6 +859,9 @@ void mca_pml_ob1_recv_request_match_wild(mca_pml_ob1_recv_request_t* request) if ((frag = mca_pml_ob1_recv_request_match_specific_proc(request, proc)) != NULL) { OPAL_THREAD_UNLOCK(&comm->matching_lock); + PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_SEARCH_UNEX_Q_END, + &(request->req_recv.req_base), PERUSE_RECV ); + if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) || (MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) { mca_pml_ob1_recv_request_progress(request,frag->btl,frag->segments,frag->num_segments); @@ -843,12 +874,25 @@ void mca_pml_ob1_recv_request_match_wild(mca_pml_ob1_recv_request_t* request) proc++; } + PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_SEARCH_UNEX_Q_END, + &(request->req_recv.req_base), PERUSE_RECV ); + /* We didn't find any matches. Record this irecv so we can match to * it when the message comes in. */ - if(request->req_recv.req_base.req_type != MCA_PML_REQUEST_IPROBE) + if(request->req_recv.req_base.req_type != MCA_PML_REQUEST_IPROBE) { opal_list_append(&comm->wild_receives, (opal_list_item_t*)request); + /** + * We don't want to generate this kind of event for MPI_Probe. Hopefully, + * the compiler will optimize out the empty if loop in the case where PERUSE + * support is not required by the user. + */ + if(request->req_recv.req_base.req_type != MCA_PML_REQUEST_PROBE) { + PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_INSERT_IN_POSTED_Q, + &(request->req_recv.req_base), PERUSE_RECV ); + } + } OPAL_THREAD_UNLOCK(&comm->matching_lock); } @@ -897,9 +941,14 @@ static mca_pml_ob1_recv_frag_t* mca_pml_ob1_recv_request_match_specific_proc( request->req_recv.req_base.req_proc = proc->proc_ompi; if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) || (MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) { + PERUSE_TRACE_MSG_EVENT( PERUSE_COMM_MSG_REMOVE_FROM_UNEX_Q, + request->req_recv.req_base.req_comm, + hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV ); opal_list_remove_item(unexpected_frags, (opal_list_item_t*)frag); frag->request = request; } + PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_MATCH_UNEX, + &(request->req_recv.req_base), PERUSE_RECV ); return frag; } diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.h b/ompi/mca/pml/ob1/pml_ob1_recvreq.h index 599a0fcf9d..ce413e65a6 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.h +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.h @@ -100,38 +100,53 @@ do { \ persistent); \ } while(0) +/** + * Mark the request as completed at MPI level for internal purposes. + * + * @param recvreq (IN) Receive request. + */ +#define MCA_PML_OB1_RECV_REQUEST_MPI_COMPLETE( recvreq ) \ + do { \ + PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_COMPLETE, \ + &(recvreq->req_recv.req_base), PERUSE_RECV ); \ + MCA_PML_BASE_REQUEST_MPI_COMPLETE( &(recvreq->req_recv.req_base.req_ompi) ); \ + } while (0) + /** * Return a recv request to the modules free list. * - * @param request (IN) Receive request. + * @param recvreq (IN) Receive request. */ -#define MCA_PML_OB1_RECV_REQUEST_PML_COMPLETE(recvreq) \ -do { \ - size_t r; \ - \ - assert( false == recvreq->req_recv.req_base.req_pml_complete ); \ - \ - for( r = 0; r < recvreq->req_rdma_cnt; r++ ) { \ - mca_mpool_base_registration_t* btl_reg = recvreq->req_rdma[r].btl_reg; \ - if( NULL != btl_reg ) { \ - btl_reg->mpool->mpool_release( btl_reg->mpool, btl_reg ); \ - } \ - } \ - recvreq->req_rdma_cnt = 0; \ - \ - OPAL_THREAD_LOCK(&ompi_request_lock); \ - \ - if( true == recvreq->req_recv.req_base.req_free_called ) { \ - MCA_PML_OB1_RECV_REQUEST_RETURN( recvreq ); \ - } else { \ - /* initialize request status */ \ - recvreq->req_recv.req_base.req_pml_complete = true; \ - recvreq->req_recv.req_base.req_ompi.req_status._count = \ - (recvreq->req_bytes_received < recvreq->req_bytes_delivered ? \ - recvreq->req_bytes_received : recvreq->req_bytes_delivered); \ - MCA_PML_BASE_REQUEST_MPI_COMPLETE( &(recvreq->req_recv.req_base.req_ompi) ); \ - } \ - OPAL_THREAD_UNLOCK(&ompi_request_lock); \ +#define MCA_PML_OB1_RECV_REQUEST_PML_COMPLETE(recvreq) \ +do { \ + size_t r; \ + \ + assert( false == recvreq->req_recv.req_base.req_pml_complete ); \ + \ + PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_END, \ + &(recvreq->req_recv.req_base), PERUSE_RECV ); \ + \ + for( r = 0; r < recvreq->req_rdma_cnt; r++ ) { \ + mca_mpool_base_registration_t* btl_reg = recvreq->req_rdma[r].btl_reg; \ + if( NULL != btl_reg ) { \ + btl_reg->mpool->mpool_release( btl_reg->mpool, btl_reg ); \ + } \ + } \ + recvreq->req_rdma_cnt = 0; \ + \ + OPAL_THREAD_LOCK(&ompi_request_lock); \ + \ + if( true == recvreq->req_recv.req_base.req_free_called ) { \ + MCA_PML_OB1_RECV_REQUEST_RETURN( recvreq ); \ + } else { \ + /* initialize request status */ \ + recvreq->req_recv.req_base.req_pml_complete = true; \ + recvreq->req_recv.req_base.req_ompi.req_status._count = \ + (recvreq->req_bytes_received < recvreq->req_bytes_delivered ? \ + recvreq->req_bytes_received : recvreq->req_bytes_delivered); \ + MCA_PML_OB1_RECV_REQUEST_MPI_COMPLETE( recvreq ); \ + } \ + OPAL_THREAD_UNLOCK(&ompi_request_lock); \ } while(0) /* @@ -178,17 +193,8 @@ do { (request)->req_lock = 0; \ (request)->req_pipeline_depth = 0; \ (request)->req_rdma_idx = 0; \ - (request)->req_recv.req_base.req_pml_complete = false; \ - (request)->req_recv.req_base.req_ompi.req_complete = false; \ - (request)->req_recv.req_base.req_ompi.req_state = OMPI_REQUEST_ACTIVE; \ \ - /* always set the req_status.MPI_TAG to ANY_TAG before starting the \ - * request. This field is used if cancelled to find out if the request \ - * has been matched or not. \ - */ \ - (request)->req_recv.req_base.req_ompi.req_status.MPI_TAG = OMPI_ANY_TAG; \ - (request)->req_recv.req_base.req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS; \ - (request)->req_recv.req_base.req_ompi.req_status._cancelled = 0; \ + MCA_PML_BASE_RECV_START( &(request)->req_recv.req_base ); \ \ /* attempt to match posted recv */ \ if((request)->req_recv.req_base.req_peer == OMPI_ANY_SOURCE) { \ @@ -207,6 +213,10 @@ do { do { \ (request)->req_recv.req_base.req_ompi.req_status.MPI_TAG = (hdr)->hdr_tag; \ (request)->req_recv.req_base.req_ompi.req_status.MPI_SOURCE = (hdr)->hdr_src; \ + \ + PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_MSG_MATCH_POSTED_REQ, \ + &((request)->req_recv.req_base), PERUSE_RECV ); \ + \ if((request)->req_recv.req_bytes_packed != 0) { \ ompi_convertor_copy_and_prepare_for_recv( \ (request)->req_recv.req_base.req_proc->proc_convertor, \ @@ -218,6 +228,8 @@ do { ompi_convertor_get_unpacked_size( &(request)->req_recv.req_convertor, \ &(request)->req_bytes_delivered ); \ } \ + PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_XFER_BEGIN, \ + &(recvreq->req_recv.req_base), PERUSE_RECV); \ } while (0) diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.c b/ompi/mca/pml/ob1/pml_ob1_sendreq.c index 131b50b1e6..270e7e5c75 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.c @@ -52,6 +52,10 @@ static int mca_pml_ob1_send_request_free(struct ompi_request_t** request) if( true == sendreq->req_send.req_base.req_pml_complete ) { MCA_PML_OB1_SEND_REQUEST_RETURN( sendreq ); } + + PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_NOTIFY, + &(sendreq->req_send.req_base), PERUSE_SEND ); + OPAL_THREAD_UNLOCK(&ompi_request_lock); *request = MPI_REQUEST_NULL; @@ -97,6 +101,9 @@ void mca_pml_ob1_match_completion_cache( mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)descriptor->des_cbdata; mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) descriptor->des_context; + PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_BEGIN, + &(sendreq->req_send.req_base), PERUSE_SEND ); + /* check completion status */ if(OMPI_SUCCESS != status) { /* TSW - FIX */ @@ -124,6 +131,9 @@ void mca_pml_ob1_match_completion_free( mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)descriptor->des_cbdata; mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) descriptor->des_context; + PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_BEGIN, + &(sendreq->req_send.req_base), PERUSE_SEND ); + /* check completion status */ if(OMPI_SUCCESS != status) { /* TSW - FIX */ @@ -151,6 +161,9 @@ static void mca_pml_ob1_rndv_completion( mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)descriptor->des_cbdata; mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) descriptor->des_context; + PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_BEGIN, + &(sendreq->req_send.req_base), PERUSE_SEND ); + /* check completion status */ if(OMPI_SUCCESS != status) { /* TSW - FIX */ @@ -208,7 +221,7 @@ static void mca_pml_ob1_rget_completion( * Completion of a control message - return resources. */ -static void mca_pml_ob1_ctl_completion( +static void mca_pml_ob1_send_ctl_completion( mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t* ep, struct mca_btl_base_descriptor_t* descriptor, @@ -624,7 +637,15 @@ int mca_pml_ob1_send_request_start_rdma( for(i=0; ides_src_cnt; i++) hdr->hdr_rget.hdr_segs[i] = src->des_src[i]; - des->des_cbfunc = mca_pml_ob1_ctl_completion; + des->des_cbfunc = mca_pml_ob1_send_ctl_completion; + + /** + * Well, it's a get so we will not know when the peer get the data anyway. + * If we generate the PERUSE event here, at least we will know when do we + * sent the GET message ... + */ + PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_BEGIN, + &(sendreq->req_send.req_base), PERUSE_SEND ); } else { @@ -882,6 +903,13 @@ int mca_pml_ob1_send_request_schedule(mca_pml_ob1_send_request_t* sendreq) #endif #endif +#if OMPI_WANT_PERUSE + if( 0 != sendreq->req_send_offset ) { + PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_CONTINUE, + &(sendreq->req_send.req_base), PERUSE_SEND ); + } +#endif /* OMPI_WANT_PERUSE */ + /* update state */ sendreq->req_send_offset += size; OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth,1); diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.h b/ompi/mca/pml/ob1/pml_ob1_sendreq.h index 35841d4ed2..036e45522f 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.h +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.h @@ -2,7 +2,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -144,10 +144,6 @@ do { sendreq->req_bytes_delivered = 0; \ sendreq->req_state = 0; \ sendreq->req_send_offset = 0; \ - sendreq->req_send.req_base.req_pml_complete = false; \ - sendreq->req_send.req_base.req_ompi.req_complete = false; \ - sendreq->req_send.req_base.req_ompi.req_state = OMPI_REQUEST_ACTIVE; \ - sendreq->req_send.req_base.req_ompi.req_status._cancelled = 0; \ sendreq->req_send.req_base.req_sequence = OPAL_THREAD_ADD32( \ &comm->procs[sendreq->req_send.req_base.req_peer].send_sequence,1); \ sendreq->req_endpoint = endpoint; \ @@ -156,6 +152,8 @@ do { bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); \ eager_limit = bml_btl->btl_eager_limit - sizeof(mca_pml_ob1_hdr_t); \ \ + MCA_PML_BASE_SEND_START( &sendreq->req_send.req_base ); \ + \ /* shortcut for zero byte */ \ if(size <= eager_limit) { \ switch(sendreq->req_send.req_send_mode) { \ @@ -251,6 +249,10 @@ do { (sendreq)->req_send.req_base.req_ompi.req_status._count = \ (sendreq)->req_send.req_bytes_packed; \ MCA_PML_BASE_REQUEST_MPI_COMPLETE( &((sendreq)->req_send.req_base.req_ompi) ); \ + \ + PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_COMPLETE, \ + &(sendreq->req_send.req_base), PERUSE_SEND); \ + \ } while(0) /* @@ -267,6 +269,10 @@ do { \ assert( false == sendreq->req_send.req_base.req_pml_complete ); \ \ + PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_END, \ + &(sendreq->req_send.req_base), \ + PERUSE_SEND ); \ + \ /* return mpool resources */ \ for( r = 0; r < sendreq->req_rdma_cnt; r++ ) { \ mca_mpool_base_registration_t* reg = sendreq->req_rdma[r].btl_reg; \