added timer profiling to trace rdma protocol
This commit was SVN r6080.
Этот коммит содержится в:
родитель
f886160f7c
Коммит
2bc9d9c657
@ -262,14 +262,15 @@ extern int mca_pml_ob1_start(
|
||||
*(request) = MPI_REQUEST_NULL; \
|
||||
}
|
||||
|
||||
|
||||
#if defined(__DEBUG__)
|
||||
#define get_profiler_timestamp() \
|
||||
( { \
|
||||
register unsigned long long __res; \
|
||||
asm volatile ("rdtsc" : "=A"(__res)); \
|
||||
__res; \
|
||||
} )
|
||||
#define MCA_PML_OB1_TIMESTAMPS 0
|
||||
#if MCA_PML_OB1_TIMESTAMPS
|
||||
#define MCA_PML_OB1_NUM_TSTAMPS 256
|
||||
static inline unsigned long long get_profiler_timestamp(void)
|
||||
{
|
||||
register unsigned long long __res;
|
||||
asm volatile ("rdtsc" : "=A"(__res));
|
||||
return __res;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -80,6 +80,9 @@ void mca_pml_ob1_recv_frag_callback(
|
||||
sendreq->req_state = MCA_PML_OB1_SR_ACKED;
|
||||
sendreq->req_recv = hdr->hdr_ack.hdr_dst_req;
|
||||
sendreq->req_rdma_offset = hdr->hdr_ack.hdr_rdma_offset;
|
||||
#if MCA_PML_OB1_TIMESTAMPS
|
||||
sendreq->t_send1 = get_profiler_timestamp();
|
||||
#endif
|
||||
mca_pml_ob1_send_request_schedule(sendreq);
|
||||
break;
|
||||
}
|
||||
@ -102,8 +105,16 @@ void mca_pml_ob1_recv_frag_callback(
|
||||
mca_bmi_base_descriptor_t* dst = (mca_bmi_base_descriptor_t*)
|
||||
hdr->hdr_fin.hdr_dst.pval;
|
||||
mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t*)dst->des_cbdata;
|
||||
#if MCA_PML_OB1_TIMESTAMPS
|
||||
recvreq->fin1[recvreq->fin_index] = get_profiler_timestamp();
|
||||
bmi->bmi_free(bmi,dst);
|
||||
recvreq->fin2[recvreq->fin_index] = get_profiler_timestamp();
|
||||
recvreq->fin_index++;
|
||||
mca_pml_ob1_recv_request_progress(recvreq,bmi,segments,des->des_dst_cnt);
|
||||
#else
|
||||
mca_pml_ob1_recv_request_progress(recvreq,bmi,segments,des->des_dst_cnt);
|
||||
bmi->bmi_free(bmi,dst);
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@ -525,9 +536,7 @@ int mca_pml_ob1_recv_frag_match(
|
||||
if(match != NULL) {
|
||||
MCA_PML_OB1_RECV_REQUEST_MATCHED(match, hdr);
|
||||
mca_pml_ob1_recv_request_progress(match,bmi,segments,num_segments);
|
||||
} else {
|
||||
ompi_output(0, "match not found\n");
|
||||
}
|
||||
}
|
||||
if(additional_match) {
|
||||
ompi_list_item_t* item;
|
||||
while(NULL != (item = ompi_list_remove_first(&additional_matches))) {
|
||||
|
@ -124,7 +124,6 @@ static void mca_pml_ob1_recv_request_ack(
|
||||
mca_bmi_base_descriptor_t* des;
|
||||
mca_pml_ob1_recv_frag_t* frag;
|
||||
mca_pml_ob1_ack_hdr_t* ack;
|
||||
bool schedule;
|
||||
int rc;
|
||||
|
||||
/* allocate descriptor */
|
||||
@ -151,11 +150,9 @@ static void mca_pml_ob1_recv_request_ack(
|
||||
&recvreq->req_recv.req_convertor,
|
||||
&recvreq->req_rdma_offset);
|
||||
ack->hdr_rdma_offset = recvreq->req_rdma_offset;
|
||||
schedule = true;
|
||||
} else {
|
||||
recvreq->req_rdma_offset = recvreq->req_recv.req_bytes_packed;
|
||||
ack->hdr_rdma_offset = recvreq->req_recv.req_bytes_packed;
|
||||
schedule = false;
|
||||
}
|
||||
|
||||
ack->hdr_common.hdr_flags = 0;
|
||||
@ -172,10 +169,6 @@ static void mca_pml_ob1_recv_request_ack(
|
||||
ep->bmi_free(ep->bmi,des);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/* after sending ack - attempt to schedule rdma */
|
||||
if(schedule)
|
||||
mca_pml_ob1_recv_request_schedule(recvreq);
|
||||
return;
|
||||
|
||||
/* queue request to retry later */
|
||||
@ -222,6 +215,9 @@ void mca_pml_ob1_recv_request_progress(
|
||||
|
||||
case MCA_PML_OB1_HDR_TYPE_RNDV:
|
||||
|
||||
#if MCA_PML_OB1_TIMESTAMPS
|
||||
recvreq->ack = get_profiler_timestamp();
|
||||
#endif
|
||||
recvreq->req_send = hdr->hdr_rndv.hdr_src_req;
|
||||
mca_pml_ob1_recv_request_ack(recvreq, &hdr->hdr_rndv);
|
||||
bytes_received = hdr->hdr_rndv.hdr_frag_length;
|
||||
@ -251,7 +247,7 @@ void mca_pml_ob1_recv_request_progress(
|
||||
|
||||
case MCA_PML_OB1_HDR_TYPE_FIN:
|
||||
|
||||
bytes_delivered = bytes_received = hdr->hdr_fin.hdr_rdma_length;
|
||||
bytes_delivered = bytes_received = hdr->hdr_fin.hdr_rdma_length;
|
||||
OMPI_THREAD_ADD32(&recvreq->req_pipeline_depth,-1);
|
||||
break;
|
||||
|
||||
@ -264,10 +260,27 @@ void mca_pml_ob1_recv_request_progress(
|
||||
recvreq->req_bytes_received += bytes_received;
|
||||
recvreq->req_bytes_delivered += bytes_delivered;
|
||||
if (recvreq->req_bytes_received >= recvreq->req_recv.req_bytes_packed) {
|
||||
|
||||
/* initialize request status */
|
||||
recvreq->req_recv.req_base.req_ompi.req_status._count = recvreq->req_bytes_delivered;
|
||||
recvreq->req_recv.req_base.req_pml_complete = true;
|
||||
recvreq->req_recv.req_base.req_ompi.req_complete = true;
|
||||
|
||||
#if MCA_PML_OB1_TIMESTAMPS
|
||||
if(recvreq->req_bytes_received > 0) {
|
||||
int i;
|
||||
ompi_output(0, "[%d,%d,%d] dst ack: %llu",
|
||||
ORTE_NAME_ARGS(orte_process_info.my_name), recvreq->ack);
|
||||
for(i=0; i<recvreq->pin_index; i++) {
|
||||
ompi_output(0, "[%d,%d,%d] dst pin, %llu %llu",
|
||||
ORTE_NAME_ARGS(orte_process_info.my_name), recvreq->pin1[i], recvreq->pin2[i] - recvreq->pin1[i]);
|
||||
}
|
||||
for(i=0; i<recvreq->fin_index; i++) {
|
||||
ompi_output(0, "[%d,%d,%d] dst fin: %llu %llu",
|
||||
ORTE_NAME_ARGS(orte_process_info.my_name), recvreq->fin1[i], recvreq->fin2[i] - recvreq->fin1[i]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if(ompi_request_waiting) {
|
||||
ompi_condition_broadcast(&ompi_request_cond);
|
||||
}
|
||||
@ -327,12 +340,18 @@ void mca_pml_ob1_recv_request_schedule(mca_pml_ob1_recv_request_t* recvreq)
|
||||
|
||||
/* prepare a descriptor for RDMA */
|
||||
ompi_convertor_set_position(&recvreq->req_recv.req_convertor, &recvreq->req_rdma_offset);
|
||||
#if MCA_PML_OB1_TIMESTAMPS
|
||||
recvreq->pin1[recvreq->pin_index] = get_profiler_timestamp();
|
||||
#endif
|
||||
dst = ep->bmi_prepare_dst(
|
||||
ep->bmi,
|
||||
ep->bmi_endpoint,
|
||||
&recvreq->req_recv.req_convertor,
|
||||
0,
|
||||
&size);
|
||||
#if MCA_PML_OB1_TIMESTAMPS
|
||||
recvreq->pin2[recvreq->pin_index] = get_profiler_timestamp();
|
||||
#endif
|
||||
if(dst == NULL) {
|
||||
OMPI_THREAD_LOCK(&mca_pml_ob1.lock);
|
||||
ompi_list_append(&mca_pml_ob1.recv_pending, (ompi_list_item_t*)recvreq);
|
||||
@ -340,6 +359,9 @@ void mca_pml_ob1_recv_request_schedule(mca_pml_ob1_recv_request_t* recvreq)
|
||||
break;
|
||||
}
|
||||
dst->des_cbdata = recvreq;
|
||||
#if MCA_PML_OB1_TIMESTAMPS
|
||||
recvreq->pin_index++;
|
||||
#endif
|
||||
|
||||
/* prepare a descriptor for rdma control message */
|
||||
hdr_size = sizeof(mca_pml_ob1_rdma_hdr_t);
|
||||
|
@ -27,6 +27,7 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
struct mca_pml_ob1_recv_request_t {
|
||||
mca_pml_base_recv_request_t req_recv;
|
||||
struct mca_pml_proc_t *req_proc;
|
||||
@ -37,9 +38,15 @@ struct mca_pml_ob1_recv_request_t {
|
||||
size_t req_bytes_delivered;
|
||||
size_t req_rdma_offset;
|
||||
|
||||
#if MCA_PML_OB1_TIMESTAMPS
|
||||
unsigned long long ack;
|
||||
unsigned long long fin_first;
|
||||
unsigned long long fin_last;
|
||||
unsigned long long pin1[MCA_PML_OB1_NUM_TSTAMPS];
|
||||
unsigned long long pin2[MCA_PML_OB1_NUM_TSTAMPS];
|
||||
unsigned long long fin1[MCA_PML_OB1_NUM_TSTAMPS];
|
||||
unsigned long long fin2[MCA_PML_OB1_NUM_TSTAMPS];
|
||||
int pin_index;
|
||||
int fin_index;
|
||||
#endif
|
||||
};
|
||||
typedef struct mca_pml_ob1_recv_request_t mca_pml_ob1_recv_request_t;
|
||||
|
||||
@ -121,7 +128,18 @@ void mca_pml_ob1_recv_request_match_wild(mca_pml_ob1_recv_request_t* request);
|
||||
* @param request (IN) Request to match.
|
||||
*/
|
||||
void mca_pml_ob1_recv_request_match_specific(mca_pml_ob1_recv_request_t* request);
|
||||
|
||||
|
||||
/**
|
||||
* Initialize diagnostic code for tracing rdma protocol timing
|
||||
*/
|
||||
#if MCA_PML_OB1_TIMESTAMPS
|
||||
#define MCA_PML_OB1_RECV_REQUEST_TSTAMPS_INIT(recvreq) \
|
||||
(request)->fin_index = 0; \
|
||||
(request)->pin_index = 0;
|
||||
#else
|
||||
#define MCA_PML_OB1_RECV_REQUEST_TSTAMPS_INIT(recvreq)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Start an initialized request.
|
||||
*
|
||||
@ -138,6 +156,7 @@ void mca_pml_ob1_recv_request_match_specific(mca_pml_ob1_recv_request_t* request
|
||||
(request)->req_recv.req_base.req_pml_complete = false; \
|
||||
(request)->req_recv.req_base.req_ompi.req_complete = false; \
|
||||
(request)->req_recv.req_base.req_ompi.req_state = OMPI_REQUEST_ACTIVE; \
|
||||
MCA_PML_OB1_RECV_REQUEST_TSTAMPS_INIT(request); \
|
||||
\
|
||||
/* always set the req_status.MPI_TAG to ANY_TAG before starting the \
|
||||
* request. This field is used if cancelled to find out if the request \
|
||||
|
@ -18,6 +18,7 @@
|
||||
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include <sched.h>
|
||||
#include "include/constants.h"
|
||||
#include "mca/pml/pml.h"
|
||||
#include "mca/bmi/bmi.h"
|
||||
@ -143,6 +144,12 @@ static void mca_pml_ob1_send_completion(
|
||||
break;
|
||||
}
|
||||
|
||||
#if MCA_PML_OB1_TIMESTAMPS
|
||||
if(sendreq->req_pipeline_depth == 1) {
|
||||
sendreq->t_send2 = get_profiler_timestamp();
|
||||
}
|
||||
#endif
|
||||
|
||||
/* check for request completion */
|
||||
OMPI_THREAD_LOCK(&ompi_request_lock);
|
||||
if (OMPI_THREAD_ADD32(&sendreq->req_pipeline_depth,-1) == 0 &&
|
||||
@ -151,6 +158,7 @@ static void mca_pml_ob1_send_completion(
|
||||
}
|
||||
OMPI_THREAD_UNLOCK(&ompi_request_lock);
|
||||
|
||||
|
||||
/* return the descriptor */
|
||||
bmi_ep->bmi_free(bmi_ep->bmi, descriptor);
|
||||
|
||||
@ -328,6 +336,9 @@ int mca_pml_ob1_send_request_start(
|
||||
OMPI_THREAD_ADD32(&sendreq->req_pipeline_depth,1);
|
||||
|
||||
/* send */
|
||||
#if MCA_PML_OB1_TIMESTAMPS
|
||||
sendreq->t_start = get_profiler_timestamp();
|
||||
#endif
|
||||
rc = endpoint->bmi_send(
|
||||
endpoint->bmi,
|
||||
endpoint->bmi_endpoint,
|
||||
@ -430,6 +441,10 @@ int mca_pml_ob1_send_request_schedule(mca_pml_ob1_send_request_t* sendreq)
|
||||
OMPI_THREAD_UNLOCK(&mca_pml_ob1.lock);
|
||||
break;
|
||||
}
|
||||
#if MCA_PML_OB1_TIMESTAMPS
|
||||
if(bytes_remaining == 0)
|
||||
sendreq->t_scheduled = get_profiler_timestamp();
|
||||
#endif
|
||||
}
|
||||
} while (OMPI_THREAD_ADD32(&sendreq->req_lock,-1) > 0);
|
||||
}
|
||||
@ -479,6 +494,13 @@ static void mca_pml_ob1_put_completion(
|
||||
orte_errmgr.abort();
|
||||
}
|
||||
|
||||
#if MCA_PML_OB1_TIMESTAMPS
|
||||
/* update statistics */
|
||||
sendreq->t_fin[sendreq->t_fin_index++] = get_profiler_timestamp();
|
||||
if(sendreq->t_fin_index >= MCA_PML_OB1_NUM_TSTAMPS)
|
||||
sendreq->t_fin_index = 0;
|
||||
#endif
|
||||
|
||||
/* check for request completion */
|
||||
OMPI_THREAD_LOCK(&ompi_request_lock);
|
||||
sendreq->req_bytes_delivered += frag->rdma_length;
|
||||
@ -580,7 +602,13 @@ void mca_pml_ob1_send_request_put(
|
||||
frag->rdma_ep = ep;
|
||||
frag->rdma_state = MCA_PML_OB1_RDMA_PREPARE;
|
||||
|
||||
#if MCA_PML_OB1_TIMESTAMPS
|
||||
/* setup descriptor */
|
||||
sendreq->t_pin[sendreq->t_pin_index++] = get_profiler_timestamp();
|
||||
if(sendreq->t_pin_index >= MCA_PML_OB1_NUM_TSTAMPS)
|
||||
sendreq->t_pin_index = 0;
|
||||
#endif
|
||||
|
||||
ompi_convertor_set_position(&sendreq->req_send.req_convertor, &offset);
|
||||
des = bmi->bmi_prepare_src(
|
||||
bmi,
|
||||
@ -601,7 +629,13 @@ void mca_pml_ob1_send_request_put(
|
||||
des->des_cbfunc = mca_pml_ob1_put_completion;
|
||||
des->des_cbdata = frag;
|
||||
|
||||
#if MCA_PML_OB1_TIMESTAMPS
|
||||
/* queue put */
|
||||
sendreq->t_put[sendreq->t_put_index++] = get_profiler_timestamp();
|
||||
if(sendreq->t_put_index >= MCA_PML_OB1_NUM_TSTAMPS)
|
||||
sendreq->t_put_index = 0;
|
||||
#endif
|
||||
|
||||
if(OMPI_SUCCESS != (rc = bmi->bmi_put(bmi, ep->bmi_endpoint, des))) {
|
||||
if(rc == OMPI_ERR_OUT_OF_RESOURCE) {
|
||||
OMPI_THREAD_LOCK(&mca_pml_ob1.lock);
|
||||
@ -613,6 +647,7 @@ void mca_pml_ob1_send_request_put(
|
||||
orte_errmgr.abort();
|
||||
}
|
||||
}
|
||||
sched_yield();
|
||||
}
|
||||
|
||||
|
||||
|
@ -49,6 +49,19 @@ struct mca_pml_ob1_send_request_t {
|
||||
size_t req_bytes_delivered;
|
||||
size_t req_send_offset;
|
||||
size_t req_rdma_offset;
|
||||
|
||||
#if MCA_PML_OB1_TIMESTAMPS
|
||||
unsigned long long t_start;
|
||||
unsigned long long t_send1;
|
||||
unsigned long long t_send2;
|
||||
unsigned long long t_scheduled;
|
||||
unsigned long long t_pin[MCA_PML_OB1_NUM_TSTAMPS];
|
||||
unsigned long long t_put[MCA_PML_OB1_NUM_TSTAMPS];
|
||||
unsigned long long t_fin[MCA_PML_OB1_NUM_TSTAMPS];
|
||||
int t_pin_index;
|
||||
int t_put_index;
|
||||
int t_fin_index;
|
||||
#endif
|
||||
};
|
||||
typedef struct mca_pml_ob1_send_request_t mca_pml_ob1_send_request_t;
|
||||
|
||||
@ -99,6 +112,53 @@ OBJ_CLASS_DECLARATION(mca_pml_ob1_send_request_t);
|
||||
persistent); \
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Diagnostic output to trace rdma protocol timing
|
||||
*/
|
||||
|
||||
#if MCA_PML_OB1_TIMESTAMPS
|
||||
#define MCA_PML_OB1_SEND_REQUEST_TSTAMPS_DUMP(sendreq) \
|
||||
{ \
|
||||
int i; \
|
||||
ompi_output(0, "[%d,%d,%d] src start, %llu\n", \
|
||||
ORTE_NAME_ARGS(orte_process_info.my_name), (sendreq)->t_start); \
|
||||
\
|
||||
ompi_output(0, "[%d,%d,%d] src send start, %llu\n", \
|
||||
ORTE_NAME_ARGS(orte_process_info.my_name), (sendreq)->t_send1); \
|
||||
\
|
||||
ompi_output(0, "[%d,%d,%d] src scheduled, %llu\n", \
|
||||
ORTE_NAME_ARGS(orte_process_info.my_name), (sendreq)->t_scheduled); \
|
||||
\
|
||||
ompi_output(0, "[%d,%d,%d] src send complete, %llu\n", \
|
||||
ORTE_NAME_ARGS(orte_process_info.my_name), (sendreq)->t_send2); \
|
||||
\
|
||||
for(i=0; i<(sendreq)->t_pin_index; i++) \
|
||||
ompi_output(0, "[%d,%d,%d] src pin, %llu %llu\n", \
|
||||
ORTE_NAME_ARGS(orte_process_info.my_name), (sendreq)->t_pin[i], \
|
||||
(sendreq)->t_put[i] - (sendreq)->t_pin[i]); \
|
||||
for(i=0; i<(sendreq)->t_put_index; i++) \
|
||||
ompi_output(0, "[%d,%d,%d] src put, %llu %llu\n", \
|
||||
ORTE_NAME_ARGS(orte_process_info.my_name), (sendreq)->t_put[i], \
|
||||
(sendreq)->t_fin[i] - (sendreq)->t_put[i]); \
|
||||
for(i=0; i<(sendreq)->t_fin_index; i++) \
|
||||
ompi_output(0, "[%d,%d,%d] src fin, %llu\n", \
|
||||
ORTE_NAME_ARGS(orte_process_info.my_name), (sendreq)->t_fin[i]); \
|
||||
}
|
||||
|
||||
#define MCA_PML_OB1_SEND_REQUEST_TSTAMPS_INIT(sendreq) \
|
||||
{ \
|
||||
sendreq->t_pin_index = 0; \
|
||||
sendreq->t_put_index = 0; \
|
||||
sendreq->t_fin_index = 0; \
|
||||
}
|
||||
|
||||
#else
|
||||
#define MCA_PML_OB1_SEND_REQUEST_TSTAMPS_DUMP(sendreq)
|
||||
#define MCA_PML_OB1_SEND_REQUEST_TSTAMPS_INIT(sendreq)
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* Start a send request.
|
||||
*/
|
||||
@ -111,6 +171,7 @@ OBJ_CLASS_DECLARATION(mca_pml_ob1_send_request_t);
|
||||
/* select next endpoint */ \
|
||||
endpoint = mca_pml_ob1_ep_array_get_next(&proc->bmi_eager); \
|
||||
sendreq->req_lock = 0; \
|
||||
MCA_PML_OB1_SEND_REQUEST_TSTAMPS_INIT(sendreq); \
|
||||
sendreq->req_pipeline_depth = 0; \
|
||||
sendreq->req_bytes_delivered = 0; \
|
||||
sendreq->req_send_offset = 0; \
|
||||
@ -124,10 +185,10 @@ OBJ_CLASS_DECLARATION(mca_pml_ob1_send_request_t);
|
||||
if(sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED) { \
|
||||
mca_pml_base_bsend_request_start(&sendreq->req_send.req_base.req_ompi); \
|
||||
} \
|
||||
\
|
||||
rc = mca_pml_ob1_send_request_start(sendreq, endpoint); \
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Complete a send request
|
||||
*/
|
||||
@ -144,12 +205,13 @@ OBJ_CLASS_DECLARATION(mca_pml_ob1_send_request_t);
|
||||
(sendreq)->req_send.req_bytes_packed; \
|
||||
(sendreq)->req_send.req_base.req_ompi.req_complete = true; \
|
||||
(sendreq)->req_state = MCA_PML_OB1_SR_COMPLETE; \
|
||||
MCA_PML_OB1_SEND_REQUEST_TSTAMPS_DUMP(sendreq); \
|
||||
if(ompi_request_waiting) { \
|
||||
ompi_condition_broadcast(&ompi_request_cond); \
|
||||
} \
|
||||
} else if(sendreq->req_send.req_base.req_free_called) { \
|
||||
} else if((sendreq)->req_send.req_base.req_free_called) { \
|
||||
MCA_PML_OB1_FREE((ompi_request_t**)&sendreq); \
|
||||
} else if (sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED) { \
|
||||
} else if ((sendreq)->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED) { \
|
||||
mca_pml_base_bsend_request_fini((ompi_request_t*)sendreq); \
|
||||
sendreq->req_state = MCA_PML_OB1_SR_COMPLETE; \
|
||||
} \
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user