PERUSE support for OB1. There we go, now the trunk has a partial peruse implementation.
We support all the events in the PERUSE specifications, but right now only one event of each type can be attached to a communicator. This will be worked out in the future. The events were places in such a way, that we will be able to measure the overhead for our threading implementation (the cost of the synchronization objects). This commit was SVN r9500.
Этот коммит содержится в:
родитель
1226d452bf
Коммит
58cd591d3b
@ -19,7 +19,7 @@
|
|||||||
#include "ompi_config.h"
|
#include "ompi_config.h"
|
||||||
#include "ompi/request/request.h"
|
#include "ompi/request/request.h"
|
||||||
#include "pml_ob1_recvreq.h"
|
#include "pml_ob1_recvreq.h"
|
||||||
|
#include "ompi/peruse/peruse-internal.h"
|
||||||
|
|
||||||
int mca_pml_ob1_irecv_init(void *addr,
|
int mca_pml_ob1_irecv_init(void *addr,
|
||||||
size_t count,
|
size_t count,
|
||||||
|
@ -22,7 +22,7 @@
|
|||||||
#include "pml_ob1_proc.h"
|
#include "pml_ob1_proc.h"
|
||||||
#include "pml_ob1_sendreq.h"
|
#include "pml_ob1_sendreq.h"
|
||||||
#include "pml_ob1_recvreq.h"
|
#include "pml_ob1_recvreq.h"
|
||||||
|
#include "ompi/peruse/peruse-internal.h"
|
||||||
|
|
||||||
int mca_pml_ob1_isend_init(void *buf,
|
int mca_pml_ob1_isend_init(void *buf,
|
||||||
size_t count,
|
size_t count,
|
||||||
@ -130,6 +130,7 @@ int mca_pml_ob1_send(void *buf,
|
|||||||
ompi_request_waiting--;
|
ompi_request_waiting--;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
rc = sendreq->req_send.req_base.req_ompi.req_status.MPI_ERROR;
|
rc = sendreq->req_send.req_base.req_ompi.req_status.MPI_ERROR;
|
||||||
ompi_request_free( (ompi_request_t**)&sendreq );
|
ompi_request_free( (ompi_request_t**)&sendreq );
|
||||||
return rc;
|
return rc;
|
||||||
|
@ -34,8 +34,7 @@
|
|||||||
#include "pml_ob1_sendreq.h"
|
#include "pml_ob1_sendreq.h"
|
||||||
#include "pml_ob1_hdr.h"
|
#include "pml_ob1_hdr.h"
|
||||||
#include "ompi/datatype/dt_arch.h"
|
#include "ompi/datatype/dt_arch.h"
|
||||||
|
#include "ompi/peruse/peruse-internal.h"
|
||||||
|
|
||||||
|
|
||||||
OBJ_CLASS_INSTANCE(
|
OBJ_CLASS_INSTANCE(
|
||||||
mca_pml_ob1_buffer_t,
|
mca_pml_ob1_buffer_t,
|
||||||
@ -310,6 +309,11 @@ do { \
|
|||||||
/* remove this recv from the wild receive queue */ \
|
/* remove this recv from the wild receive queue */ \
|
||||||
opal_list_remove_item(&comm->wild_receives, \
|
opal_list_remove_item(&comm->wild_receives, \
|
||||||
(opal_list_item_t *)wild_recv); \
|
(opal_list_item_t *)wild_recv); \
|
||||||
|
\
|
||||||
|
PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_REMOVE_FROM_POSTED_Q, \
|
||||||
|
&(wild_recv->req_recv.req_base), \
|
||||||
|
PERUSE_RECV); \
|
||||||
|
\
|
||||||
break; \
|
break; \
|
||||||
} \
|
} \
|
||||||
\
|
\
|
||||||
@ -351,6 +355,11 @@ do { \
|
|||||||
/* remove descriptor from specific receive list */ \
|
/* remove descriptor from specific receive list */ \
|
||||||
opal_list_remove_item(&(proc)->specific_receives, \
|
opal_list_remove_item(&(proc)->specific_receives, \
|
||||||
(opal_list_item_t *)specific_recv); \
|
(opal_list_item_t *)specific_recv); \
|
||||||
|
\
|
||||||
|
PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_REMOVE_FROM_POSTED_Q, \
|
||||||
|
&(specific_recv->req_recv.req_base), \
|
||||||
|
PERUSE_RECV); \
|
||||||
|
\
|
||||||
break; \
|
break; \
|
||||||
} \
|
} \
|
||||||
\
|
\
|
||||||
@ -444,6 +453,15 @@ int mca_pml_ob1_recv_frag_match(
|
|||||||
frag_msg_seq = hdr->hdr_seq;
|
frag_msg_seq = hdr->hdr_seq;
|
||||||
proc = comm->procs + hdr->hdr_src;
|
proc = comm->procs + hdr->hdr_src;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* We generate the MSG_ARRIVED event as soon as the PML is aware of a matching
|
||||||
|
* fragment arrival. Independing if it is received on the correct order or not.
|
||||||
|
* This will allow the tools to figure out if the messages are not received in the
|
||||||
|
* correct order (if multiple network interfaces).
|
||||||
|
*/
|
||||||
|
PERUSE_TRACE_MSG_EVENT( PERUSE_COMM_MSG_ARRIVED, comm_ptr,
|
||||||
|
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
|
||||||
|
|
||||||
/* get next expected message sequence number - if threaded
|
/* get next expected message sequence number - if threaded
|
||||||
* run, lock to make sure that if another thread is processing
|
* run, lock to make sure that if another thread is processing
|
||||||
* a frag from the same message a match is made only once.
|
* a frag from the same message a match is made only once.
|
||||||
@ -465,6 +483,15 @@ int mca_pml_ob1_recv_frag_match(
|
|||||||
|
|
||||||
/* We're now expecting the next sequence number. */
|
/* We're now expecting the next sequence number. */
|
||||||
(proc->expected_sequence)++;
|
(proc->expected_sequence)++;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* We generate the SEARCH_POSTED_QUEUE only when the message is received
|
||||||
|
* in the correct sequence. Otherwise, we delay the event generation until
|
||||||
|
* we reach the correct sequence number.
|
||||||
|
*/
|
||||||
|
PERUSE_TRACE_MSG_EVENT( PERUSE_COMM_SEARCH_POSTED_Q_BEGIN, comm_ptr,
|
||||||
|
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
|
||||||
|
|
||||||
rematch:
|
rematch:
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -512,12 +539,26 @@ rematch:
|
|||||||
MCA_PML_OB1_RECV_FRAG_ALLOC(frag, rc);
|
MCA_PML_OB1_RECV_FRAG_ALLOC(frag, rc);
|
||||||
if(OMPI_SUCCESS != rc) {
|
if(OMPI_SUCCESS != rc) {
|
||||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||||
|
/**
|
||||||
|
* As we return from the match function, we should generate the expected event.
|
||||||
|
*/
|
||||||
|
PERUSE_TRACE_MSG_EVENT( PERUSE_COMM_SEARCH_POSTED_Q_END, comm_ptr,
|
||||||
|
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
|
||||||
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
MCA_PML_OB1_RECV_FRAG_INIT(frag,hdr,segments,num_segments,btl);
|
MCA_PML_OB1_RECV_FRAG_INIT(frag,hdr,segments,num_segments,btl);
|
||||||
opal_list_append( &proc->unexpected_frags, (opal_list_item_t *)frag );
|
opal_list_append( &proc->unexpected_frags, (opal_list_item_t *)frag );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The match is over. We generate the SEARCH_POSTED_Q_END here, before going
|
||||||
|
* into the mca_pml_ob1_check_cantmatch_for_match so we can make a difference
|
||||||
|
* for the searching time for all messages.
|
||||||
|
*/
|
||||||
|
PERUSE_TRACE_MSG_EVENT( PERUSE_COMM_SEARCH_POSTED_Q_END, comm_ptr,
|
||||||
|
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now that new message has arrived, check to see if
|
* Now that new message has arrived, check to see if
|
||||||
* any fragments on the c_c_frags_cant_match list
|
* any fragments on the c_c_frags_cant_match list
|
||||||
@ -548,12 +589,18 @@ rematch:
|
|||||||
|
|
||||||
if(match != NULL) {
|
if(match != NULL) {
|
||||||
mca_pml_ob1_recv_request_progress(match,btl,segments,num_segments);
|
mca_pml_ob1_recv_request_progress(match,btl,segments,num_segments);
|
||||||
|
#if OMPI_WANT_PERUSE
|
||||||
|
} else {
|
||||||
|
PERUSE_TRACE_MSG_EVENT( PERUSE_COMM_MSG_INSERT_IN_UNEX_Q, comm_ptr,
|
||||||
|
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
|
||||||
|
#endif /* OMPI_WANT_PERUSE */
|
||||||
}
|
}
|
||||||
if(additional_match) {
|
if(additional_match) {
|
||||||
opal_list_item_t* item;
|
opal_list_item_t* item;
|
||||||
while(NULL != (item = opal_list_remove_first(&additional_matches))) {
|
while(NULL != (item = opal_list_remove_first(&additional_matches))) {
|
||||||
mca_pml_ob1_recv_frag_t* frag = (mca_pml_ob1_recv_frag_t*)item;
|
mca_pml_ob1_recv_frag_t* frag = (mca_pml_ob1_recv_frag_t*)item;
|
||||||
mca_pml_ob1_recv_request_progress(frag->request,frag->btl,frag->segments,frag->num_segments);
|
mca_pml_ob1_recv_request_progress( frag->request, frag->btl, frag->segments,
|
||||||
|
frag->num_segments );
|
||||||
MCA_PML_OB1_RECV_FRAG_RETURN(frag);
|
MCA_PML_OB1_RECV_FRAG_RETURN(frag);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -46,6 +46,10 @@ static int mca_pml_ob1_recv_request_free(struct ompi_request_t** request)
|
|||||||
if( true == recvreq->req_recv.req_base.req_pml_complete ) {
|
if( true == recvreq->req_recv.req_base.req_pml_complete ) {
|
||||||
MCA_PML_OB1_RECV_REQUEST_RETURN( recvreq );
|
MCA_PML_OB1_RECV_REQUEST_RETURN( recvreq );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_NOTIFY,
|
||||||
|
&(recvreq->req_recv.req_base), PERUSE_RECV );
|
||||||
|
|
||||||
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
||||||
|
|
||||||
*request = MPI_REQUEST_NULL;
|
*request = MPI_REQUEST_NULL;
|
||||||
@ -70,6 +74,8 @@ static int mca_pml_ob1_recv_request_cancel(struct ompi_request_t* ompi_request,
|
|||||||
mca_pml_ob1_comm_proc_t* proc = comm->procs + request->req_recv.req_base.req_peer;
|
mca_pml_ob1_comm_proc_t* proc = comm->procs + request->req_recv.req_base.req_peer;
|
||||||
opal_list_remove_item(&proc->specific_receives, (opal_list_item_t*)request);
|
opal_list_remove_item(&proc->specific_receives, (opal_list_item_t*)request);
|
||||||
}
|
}
|
||||||
|
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_REMOVE_FROM_POSTED_Q,
|
||||||
|
&(request->req_recv.req_base), PERUSE_RECV );
|
||||||
}
|
}
|
||||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||||
|
|
||||||
@ -79,7 +85,7 @@ static int mca_pml_ob1_recv_request_cancel(struct ompi_request_t* ompi_request,
|
|||||||
* on this request will be able to complete. As the status is marked as
|
* on this request will be able to complete. As the status is marked as
|
||||||
* cancelled the cancel state will be detected.
|
* cancelled the cancel state will be detected.
|
||||||
*/
|
*/
|
||||||
MCA_PML_BASE_REQUEST_MPI_COMPLETE(ompi_request);
|
MCA_PML_OB1_RECV_REQUEST_MPI_COMPLETE(request);
|
||||||
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -108,7 +114,7 @@ OBJ_CLASS_INSTANCE(
|
|||||||
* Release resources.
|
* Release resources.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static void mca_pml_ob1_ctl_completion(
|
static void mca_pml_ob1_recv_ctl_completion(
|
||||||
mca_btl_base_module_t* btl,
|
mca_btl_base_module_t* btl,
|
||||||
struct mca_btl_base_endpoint_t* ep,
|
struct mca_btl_base_endpoint_t* ep,
|
||||||
struct mca_btl_base_descriptor_t* des,
|
struct mca_btl_base_descriptor_t* des,
|
||||||
@ -267,7 +273,7 @@ static void mca_pml_ob1_recv_request_ack(
|
|||||||
|
|
||||||
/* initialize descriptor */
|
/* initialize descriptor */
|
||||||
des->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY;
|
des->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY;
|
||||||
des->des_cbfunc = mca_pml_ob1_ctl_completion;
|
des->des_cbfunc = mca_pml_ob1_recv_ctl_completion;
|
||||||
|
|
||||||
rc = mca_bml_base_send(bml_btl, des, MCA_BTL_TAG_PML);
|
rc = mca_bml_base_send(bml_btl, des, MCA_BTL_TAG_PML);
|
||||||
if(rc != OMPI_SUCCESS) {
|
if(rc != OMPI_SUCCESS) {
|
||||||
@ -701,7 +707,7 @@ void mca_pml_ob1_recv_request_schedule(mca_pml_ob1_recv_request_t* recvreq)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
ctl->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY;
|
ctl->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY;
|
||||||
ctl->des_cbfunc = mca_pml_ob1_ctl_completion;
|
ctl->des_cbfunc = mca_pml_ob1_recv_ctl_completion;
|
||||||
|
|
||||||
/* fill in rdma header */
|
/* fill in rdma header */
|
||||||
hdr = (mca_pml_ob1_rdma_hdr_t*)ctl->des_src->seg_addr.pval;
|
hdr = (mca_pml_ob1_rdma_hdr_t*)ctl->des_src->seg_addr.pval;
|
||||||
@ -767,6 +773,12 @@ void mca_pml_ob1_recv_request_match_specific(mca_pml_ob1_recv_request_t* request
|
|||||||
|
|
||||||
/* check for a specific match */
|
/* check for a specific match */
|
||||||
OPAL_THREAD_LOCK(&comm->matching_lock);
|
OPAL_THREAD_LOCK(&comm->matching_lock);
|
||||||
|
/**
|
||||||
|
* The laps of time between the ACTIVATE event and the SEARCH_UNEX one include
|
||||||
|
* the cost of the request lock.
|
||||||
|
*/
|
||||||
|
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_SEARCH_UNEX_Q_BEGIN,
|
||||||
|
&(request->req_recv.req_base), PERUSE_RECV );
|
||||||
|
|
||||||
/* assign sequence number */
|
/* assign sequence number */
|
||||||
request->req_recv.req_base.req_sequence = comm->recv_sequence++;
|
request->req_recv.req_base.req_sequence = comm->recv_sequence++;
|
||||||
@ -775,6 +787,9 @@ void mca_pml_ob1_recv_request_match_specific(mca_pml_ob1_recv_request_t* request
|
|||||||
(frag = mca_pml_ob1_recv_request_match_specific_proc(request, proc)) != NULL) {
|
(frag = mca_pml_ob1_recv_request_match_specific_proc(request, proc)) != NULL) {
|
||||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||||
|
|
||||||
|
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_SEARCH_UNEX_Q_END,
|
||||||
|
&(request->req_recv.req_base), PERUSE_RECV );
|
||||||
|
|
||||||
if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) ||
|
if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) ||
|
||||||
(MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) {
|
(MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) {
|
||||||
mca_pml_ob1_recv_request_progress(request,frag->btl,frag->segments,frag->num_segments);
|
mca_pml_ob1_recv_request_progress(request,frag->btl,frag->segments,frag->num_segments);
|
||||||
@ -785,11 +800,18 @@ void mca_pml_ob1_recv_request_match_specific(mca_pml_ob1_recv_request_t* request
|
|||||||
return; /* match found */
|
return; /* match found */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_SEARCH_UNEX_Q_END,
|
||||||
|
&(request->req_recv.req_base), PERUSE_RECV );
|
||||||
|
|
||||||
/* We didn't find any matches. Record this irecv so we can match
|
/* We didn't find any matches. Record this irecv so we can match
|
||||||
* it when the message comes in.
|
* it when the message comes in.
|
||||||
*/
|
*/
|
||||||
if(request->req_recv.req_base.req_type != MCA_PML_REQUEST_IPROBE) {
|
if(request->req_recv.req_base.req_type != MCA_PML_REQUEST_IPROBE) {
|
||||||
opal_list_append(&proc->specific_receives, (opal_list_item_t*)request);
|
opal_list_append(&proc->specific_receives, (opal_list_item_t*)request);
|
||||||
|
if(request->req_recv.req_base.req_type != MCA_PML_REQUEST_PROBE) {
|
||||||
|
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_INSERT_IN_POSTED_Q,
|
||||||
|
&(request->req_recv.req_base), PERUSE_RECV );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||||
}
|
}
|
||||||
@ -814,6 +836,12 @@ void mca_pml_ob1_recv_request_match_wild(mca_pml_ob1_recv_request_t* request)
|
|||||||
* process.
|
* process.
|
||||||
*/
|
*/
|
||||||
OPAL_THREAD_LOCK(&comm->matching_lock);
|
OPAL_THREAD_LOCK(&comm->matching_lock);
|
||||||
|
/**
|
||||||
|
* The laps of time between the ACTIVATE event and the SEARCH_UNEX one include
|
||||||
|
* the cost of the request lock.
|
||||||
|
*/
|
||||||
|
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_SEARCH_UNEX_Q_BEGIN,
|
||||||
|
&(request->req_recv.req_base), PERUSE_RECV );
|
||||||
|
|
||||||
/* assign sequence number */
|
/* assign sequence number */
|
||||||
request->req_recv.req_base.req_sequence = comm->recv_sequence++;
|
request->req_recv.req_base.req_sequence = comm->recv_sequence++;
|
||||||
@ -831,6 +859,9 @@ void mca_pml_ob1_recv_request_match_wild(mca_pml_ob1_recv_request_t* request)
|
|||||||
if ((frag = mca_pml_ob1_recv_request_match_specific_proc(request, proc)) != NULL) {
|
if ((frag = mca_pml_ob1_recv_request_match_specific_proc(request, proc)) != NULL) {
|
||||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||||
|
|
||||||
|
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_SEARCH_UNEX_Q_END,
|
||||||
|
&(request->req_recv.req_base), PERUSE_RECV );
|
||||||
|
|
||||||
if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) ||
|
if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) ||
|
||||||
(MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) {
|
(MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) {
|
||||||
mca_pml_ob1_recv_request_progress(request,frag->btl,frag->segments,frag->num_segments);
|
mca_pml_ob1_recv_request_progress(request,frag->btl,frag->segments,frag->num_segments);
|
||||||
@ -843,12 +874,25 @@ void mca_pml_ob1_recv_request_match_wild(mca_pml_ob1_recv_request_t* request)
|
|||||||
proc++;
|
proc++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_SEARCH_UNEX_Q_END,
|
||||||
|
&(request->req_recv.req_base), PERUSE_RECV );
|
||||||
|
|
||||||
/* We didn't find any matches. Record this irecv so we can match to
|
/* We didn't find any matches. Record this irecv so we can match to
|
||||||
* it when the message comes in.
|
* it when the message comes in.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if(request->req_recv.req_base.req_type != MCA_PML_REQUEST_IPROBE)
|
if(request->req_recv.req_base.req_type != MCA_PML_REQUEST_IPROBE) {
|
||||||
opal_list_append(&comm->wild_receives, (opal_list_item_t*)request);
|
opal_list_append(&comm->wild_receives, (opal_list_item_t*)request);
|
||||||
|
/**
|
||||||
|
* We don't want to generate this kind of event for MPI_Probe. Hopefully,
|
||||||
|
* the compiler will optimize out the empty if loop in the case where PERUSE
|
||||||
|
* support is not required by the user.
|
||||||
|
*/
|
||||||
|
if(request->req_recv.req_base.req_type != MCA_PML_REQUEST_PROBE) {
|
||||||
|
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_INSERT_IN_POSTED_Q,
|
||||||
|
&(request->req_recv.req_base), PERUSE_RECV );
|
||||||
|
}
|
||||||
|
}
|
||||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -897,9 +941,14 @@ static mca_pml_ob1_recv_frag_t* mca_pml_ob1_recv_request_match_specific_proc(
|
|||||||
request->req_recv.req_base.req_proc = proc->proc_ompi;
|
request->req_recv.req_base.req_proc = proc->proc_ompi;
|
||||||
if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) ||
|
if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) ||
|
||||||
(MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) {
|
(MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) {
|
||||||
|
PERUSE_TRACE_MSG_EVENT( PERUSE_COMM_MSG_REMOVE_FROM_UNEX_Q,
|
||||||
|
request->req_recv.req_base.req_comm,
|
||||||
|
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV );
|
||||||
opal_list_remove_item(unexpected_frags, (opal_list_item_t*)frag);
|
opal_list_remove_item(unexpected_frags, (opal_list_item_t*)frag);
|
||||||
frag->request = request;
|
frag->request = request;
|
||||||
}
|
}
|
||||||
|
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_MATCH_UNEX,
|
||||||
|
&(request->req_recv.req_base), PERUSE_RECV );
|
||||||
return frag;
|
return frag;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -100,10 +100,22 @@ do { \
|
|||||||
persistent); \
|
persistent); \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mark the request as completed at MPI level for internal purposes.
|
||||||
|
*
|
||||||
|
* @param recvreq (IN) Receive request.
|
||||||
|
*/
|
||||||
|
#define MCA_PML_OB1_RECV_REQUEST_MPI_COMPLETE( recvreq ) \
|
||||||
|
do { \
|
||||||
|
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_COMPLETE, \
|
||||||
|
&(recvreq->req_recv.req_base), PERUSE_RECV ); \
|
||||||
|
MCA_PML_BASE_REQUEST_MPI_COMPLETE( &(recvreq->req_recv.req_base.req_ompi) ); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return a recv request to the modules free list.
|
* Return a recv request to the modules free list.
|
||||||
*
|
*
|
||||||
* @param request (IN) Receive request.
|
* @param recvreq (IN) Receive request.
|
||||||
*/
|
*/
|
||||||
#define MCA_PML_OB1_RECV_REQUEST_PML_COMPLETE(recvreq) \
|
#define MCA_PML_OB1_RECV_REQUEST_PML_COMPLETE(recvreq) \
|
||||||
do { \
|
do { \
|
||||||
@ -111,6 +123,9 @@ do {
|
|||||||
\
|
\
|
||||||
assert( false == recvreq->req_recv.req_base.req_pml_complete ); \
|
assert( false == recvreq->req_recv.req_base.req_pml_complete ); \
|
||||||
\
|
\
|
||||||
|
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_END, \
|
||||||
|
&(recvreq->req_recv.req_base), PERUSE_RECV ); \
|
||||||
|
\
|
||||||
for( r = 0; r < recvreq->req_rdma_cnt; r++ ) { \
|
for( r = 0; r < recvreq->req_rdma_cnt; r++ ) { \
|
||||||
mca_mpool_base_registration_t* btl_reg = recvreq->req_rdma[r].btl_reg; \
|
mca_mpool_base_registration_t* btl_reg = recvreq->req_rdma[r].btl_reg; \
|
||||||
if( NULL != btl_reg ) { \
|
if( NULL != btl_reg ) { \
|
||||||
@ -129,7 +144,7 @@ do {
|
|||||||
recvreq->req_recv.req_base.req_ompi.req_status._count = \
|
recvreq->req_recv.req_base.req_ompi.req_status._count = \
|
||||||
(recvreq->req_bytes_received < recvreq->req_bytes_delivered ? \
|
(recvreq->req_bytes_received < recvreq->req_bytes_delivered ? \
|
||||||
recvreq->req_bytes_received : recvreq->req_bytes_delivered); \
|
recvreq->req_bytes_received : recvreq->req_bytes_delivered); \
|
||||||
MCA_PML_BASE_REQUEST_MPI_COMPLETE( &(recvreq->req_recv.req_base.req_ompi) ); \
|
MCA_PML_OB1_RECV_REQUEST_MPI_COMPLETE( recvreq ); \
|
||||||
} \
|
} \
|
||||||
OPAL_THREAD_UNLOCK(&ompi_request_lock); \
|
OPAL_THREAD_UNLOCK(&ompi_request_lock); \
|
||||||
} while(0)
|
} while(0)
|
||||||
@ -178,17 +193,8 @@ do {
|
|||||||
(request)->req_lock = 0; \
|
(request)->req_lock = 0; \
|
||||||
(request)->req_pipeline_depth = 0; \
|
(request)->req_pipeline_depth = 0; \
|
||||||
(request)->req_rdma_idx = 0; \
|
(request)->req_rdma_idx = 0; \
|
||||||
(request)->req_recv.req_base.req_pml_complete = false; \
|
|
||||||
(request)->req_recv.req_base.req_ompi.req_complete = false; \
|
|
||||||
(request)->req_recv.req_base.req_ompi.req_state = OMPI_REQUEST_ACTIVE; \
|
|
||||||
\
|
\
|
||||||
/* always set the req_status.MPI_TAG to ANY_TAG before starting the \
|
MCA_PML_BASE_RECV_START( &(request)->req_recv.req_base ); \
|
||||||
* request. This field is used if cancelled to find out if the request \
|
|
||||||
* has been matched or not. \
|
|
||||||
*/ \
|
|
||||||
(request)->req_recv.req_base.req_ompi.req_status.MPI_TAG = OMPI_ANY_TAG; \
|
|
||||||
(request)->req_recv.req_base.req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS; \
|
|
||||||
(request)->req_recv.req_base.req_ompi.req_status._cancelled = 0; \
|
|
||||||
\
|
\
|
||||||
/* attempt to match posted recv */ \
|
/* attempt to match posted recv */ \
|
||||||
if((request)->req_recv.req_base.req_peer == OMPI_ANY_SOURCE) { \
|
if((request)->req_recv.req_base.req_peer == OMPI_ANY_SOURCE) { \
|
||||||
@ -207,6 +213,10 @@ do {
|
|||||||
do { \
|
do { \
|
||||||
(request)->req_recv.req_base.req_ompi.req_status.MPI_TAG = (hdr)->hdr_tag; \
|
(request)->req_recv.req_base.req_ompi.req_status.MPI_TAG = (hdr)->hdr_tag; \
|
||||||
(request)->req_recv.req_base.req_ompi.req_status.MPI_SOURCE = (hdr)->hdr_src; \
|
(request)->req_recv.req_base.req_ompi.req_status.MPI_SOURCE = (hdr)->hdr_src; \
|
||||||
|
\
|
||||||
|
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_MSG_MATCH_POSTED_REQ, \
|
||||||
|
&((request)->req_recv.req_base), PERUSE_RECV ); \
|
||||||
|
\
|
||||||
if((request)->req_recv.req_bytes_packed != 0) { \
|
if((request)->req_recv.req_bytes_packed != 0) { \
|
||||||
ompi_convertor_copy_and_prepare_for_recv( \
|
ompi_convertor_copy_and_prepare_for_recv( \
|
||||||
(request)->req_recv.req_base.req_proc->proc_convertor, \
|
(request)->req_recv.req_base.req_proc->proc_convertor, \
|
||||||
@ -218,6 +228,8 @@ do {
|
|||||||
ompi_convertor_get_unpacked_size( &(request)->req_recv.req_convertor, \
|
ompi_convertor_get_unpacked_size( &(request)->req_recv.req_convertor, \
|
||||||
&(request)->req_bytes_delivered ); \
|
&(request)->req_bytes_delivered ); \
|
||||||
} \
|
} \
|
||||||
|
PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_XFER_BEGIN, \
|
||||||
|
&(recvreq->req_recv.req_base), PERUSE_RECV); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
|
||||||
|
@ -52,6 +52,10 @@ static int mca_pml_ob1_send_request_free(struct ompi_request_t** request)
|
|||||||
if( true == sendreq->req_send.req_base.req_pml_complete ) {
|
if( true == sendreq->req_send.req_base.req_pml_complete ) {
|
||||||
MCA_PML_OB1_SEND_REQUEST_RETURN( sendreq );
|
MCA_PML_OB1_SEND_REQUEST_RETURN( sendreq );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_NOTIFY,
|
||||||
|
&(sendreq->req_send.req_base), PERUSE_SEND );
|
||||||
|
|
||||||
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
||||||
|
|
||||||
*request = MPI_REQUEST_NULL;
|
*request = MPI_REQUEST_NULL;
|
||||||
@ -97,6 +101,9 @@ void mca_pml_ob1_match_completion_cache(
|
|||||||
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)descriptor->des_cbdata;
|
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)descriptor->des_cbdata;
|
||||||
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) descriptor->des_context;
|
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) descriptor->des_context;
|
||||||
|
|
||||||
|
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_BEGIN,
|
||||||
|
&(sendreq->req_send.req_base), PERUSE_SEND );
|
||||||
|
|
||||||
/* check completion status */
|
/* check completion status */
|
||||||
if(OMPI_SUCCESS != status) {
|
if(OMPI_SUCCESS != status) {
|
||||||
/* TSW - FIX */
|
/* TSW - FIX */
|
||||||
@ -124,6 +131,9 @@ void mca_pml_ob1_match_completion_free(
|
|||||||
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)descriptor->des_cbdata;
|
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)descriptor->des_cbdata;
|
||||||
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) descriptor->des_context;
|
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) descriptor->des_context;
|
||||||
|
|
||||||
|
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_BEGIN,
|
||||||
|
&(sendreq->req_send.req_base), PERUSE_SEND );
|
||||||
|
|
||||||
/* check completion status */
|
/* check completion status */
|
||||||
if(OMPI_SUCCESS != status) {
|
if(OMPI_SUCCESS != status) {
|
||||||
/* TSW - FIX */
|
/* TSW - FIX */
|
||||||
@ -151,6 +161,9 @@ static void mca_pml_ob1_rndv_completion(
|
|||||||
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)descriptor->des_cbdata;
|
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)descriptor->des_cbdata;
|
||||||
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) descriptor->des_context;
|
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) descriptor->des_context;
|
||||||
|
|
||||||
|
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_BEGIN,
|
||||||
|
&(sendreq->req_send.req_base), PERUSE_SEND );
|
||||||
|
|
||||||
/* check completion status */
|
/* check completion status */
|
||||||
if(OMPI_SUCCESS != status) {
|
if(OMPI_SUCCESS != status) {
|
||||||
/* TSW - FIX */
|
/* TSW - FIX */
|
||||||
@ -208,7 +221,7 @@ static void mca_pml_ob1_rget_completion(
|
|||||||
* Completion of a control message - return resources.
|
* Completion of a control message - return resources.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static void mca_pml_ob1_ctl_completion(
|
static void mca_pml_ob1_send_ctl_completion(
|
||||||
mca_btl_base_module_t* btl,
|
mca_btl_base_module_t* btl,
|
||||||
struct mca_btl_base_endpoint_t* ep,
|
struct mca_btl_base_endpoint_t* ep,
|
||||||
struct mca_btl_base_descriptor_t* descriptor,
|
struct mca_btl_base_descriptor_t* descriptor,
|
||||||
@ -624,7 +637,15 @@ int mca_pml_ob1_send_request_start_rdma(
|
|||||||
|
|
||||||
for(i=0; i<src->des_src_cnt; i++)
|
for(i=0; i<src->des_src_cnt; i++)
|
||||||
hdr->hdr_rget.hdr_segs[i] = src->des_src[i];
|
hdr->hdr_rget.hdr_segs[i] = src->des_src[i];
|
||||||
des->des_cbfunc = mca_pml_ob1_ctl_completion;
|
des->des_cbfunc = mca_pml_ob1_send_ctl_completion;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Well, it's a get so we will not know when the peer get the data anyway.
|
||||||
|
* If we generate the PERUSE event here, at least we will know when do we
|
||||||
|
* sent the GET message ...
|
||||||
|
*/
|
||||||
|
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_BEGIN,
|
||||||
|
&(sendreq->req_send.req_base), PERUSE_SEND );
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
@ -882,6 +903,13 @@ int mca_pml_ob1_send_request_schedule(mca_pml_ob1_send_request_t* sendreq)
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if OMPI_WANT_PERUSE
|
||||||
|
if( 0 != sendreq->req_send_offset ) {
|
||||||
|
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_CONTINUE,
|
||||||
|
&(sendreq->req_send.req_base), PERUSE_SEND );
|
||||||
|
}
|
||||||
|
#endif /* OMPI_WANT_PERUSE */
|
||||||
|
|
||||||
/* update state */
|
/* update state */
|
||||||
sendreq->req_send_offset += size;
|
sendreq->req_send_offset += size;
|
||||||
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth,1);
|
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth,1);
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||||
* University Research and Technology
|
* University Research and Technology
|
||||||
* Corporation. All rights reserved.
|
* Corporation. All rights reserved.
|
||||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||||
* of Tennessee Research Foundation. All rights
|
* of Tennessee Research Foundation. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
@ -144,10 +144,6 @@ do {
|
|||||||
sendreq->req_bytes_delivered = 0; \
|
sendreq->req_bytes_delivered = 0; \
|
||||||
sendreq->req_state = 0; \
|
sendreq->req_state = 0; \
|
||||||
sendreq->req_send_offset = 0; \
|
sendreq->req_send_offset = 0; \
|
||||||
sendreq->req_send.req_base.req_pml_complete = false; \
|
|
||||||
sendreq->req_send.req_base.req_ompi.req_complete = false; \
|
|
||||||
sendreq->req_send.req_base.req_ompi.req_state = OMPI_REQUEST_ACTIVE; \
|
|
||||||
sendreq->req_send.req_base.req_ompi.req_status._cancelled = 0; \
|
|
||||||
sendreq->req_send.req_base.req_sequence = OPAL_THREAD_ADD32( \
|
sendreq->req_send.req_base.req_sequence = OPAL_THREAD_ADD32( \
|
||||||
&comm->procs[sendreq->req_send.req_base.req_peer].send_sequence,1); \
|
&comm->procs[sendreq->req_send.req_base.req_peer].send_sequence,1); \
|
||||||
sendreq->req_endpoint = endpoint; \
|
sendreq->req_endpoint = endpoint; \
|
||||||
@ -156,6 +152,8 @@ do {
|
|||||||
bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); \
|
bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); \
|
||||||
eager_limit = bml_btl->btl_eager_limit - sizeof(mca_pml_ob1_hdr_t); \
|
eager_limit = bml_btl->btl_eager_limit - sizeof(mca_pml_ob1_hdr_t); \
|
||||||
\
|
\
|
||||||
|
MCA_PML_BASE_SEND_START( &sendreq->req_send.req_base ); \
|
||||||
|
\
|
||||||
/* shortcut for zero byte */ \
|
/* shortcut for zero byte */ \
|
||||||
if(size <= eager_limit) { \
|
if(size <= eager_limit) { \
|
||||||
switch(sendreq->req_send.req_send_mode) { \
|
switch(sendreq->req_send.req_send_mode) { \
|
||||||
@ -251,6 +249,10 @@ do {
|
|||||||
(sendreq)->req_send.req_base.req_ompi.req_status._count = \
|
(sendreq)->req_send.req_base.req_ompi.req_status._count = \
|
||||||
(sendreq)->req_send.req_bytes_packed; \
|
(sendreq)->req_send.req_bytes_packed; \
|
||||||
MCA_PML_BASE_REQUEST_MPI_COMPLETE( &((sendreq)->req_send.req_base.req_ompi) ); \
|
MCA_PML_BASE_REQUEST_MPI_COMPLETE( &((sendreq)->req_send.req_base.req_ompi) ); \
|
||||||
|
\
|
||||||
|
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_COMPLETE, \
|
||||||
|
&(sendreq->req_send.req_base), PERUSE_SEND); \
|
||||||
|
\
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -267,6 +269,10 @@ do {
|
|||||||
\
|
\
|
||||||
assert( false == sendreq->req_send.req_base.req_pml_complete ); \
|
assert( false == sendreq->req_send.req_base.req_pml_complete ); \
|
||||||
\
|
\
|
||||||
|
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_END, \
|
||||||
|
&(sendreq->req_send.req_base), \
|
||||||
|
PERUSE_SEND ); \
|
||||||
|
\
|
||||||
/* return mpool resources */ \
|
/* return mpool resources */ \
|
||||||
for( r = 0; r < sendreq->req_rdma_cnt; r++ ) { \
|
for( r = 0; r < sendreq->req_rdma_cnt; r++ ) { \
|
||||||
mca_mpool_base_registration_t* reg = sendreq->req_rdma[r].btl_reg; \
|
mca_mpool_base_registration_t* reg = sendreq->req_rdma[r].btl_reg; \
|
||||||
|
Загрузка…
Ссылка в новой задаче
Block a user