Working down the latency (0.2 micro-sec on a Xeon 2Ghz) by removing the
second instance of the ompi_proc from the send and receive request. This information is already available on the base request, so there is no need for duplication. The drawback is that now (in order to avoid a second lookup in the communicator array of procs) we have to set the base proc in the PML's _ALLOC macro. This commit was SVN r8900.
Этот коммит содержится в:
родитель
b7fa1f4664
Коммит
eb1d2dd290
@ -91,7 +91,7 @@ typedef struct mca_pml_base_send_request_t mca_pml_base_send_request_t;
|
||||
(request)->req_base.req_peer = (int32_t)peer; \
|
||||
(request)->req_base.req_tag = (int32_t)tag; \
|
||||
(request)->req_base.req_comm = comm; \
|
||||
(request)->req_base.req_proc = ompi_comm_peer_lookup(comm,peer); \
|
||||
/* (request)->req_base.req_proc is set on request allocation */ \
|
||||
(request)->req_base.req_persistent = persistent; \
|
||||
(request)->req_base.req_pml_complete = (persistent ? true : false); \
|
||||
(request)->req_base.req_free_called = false; \
|
||||
|
@ -173,7 +173,7 @@ static void mca_pml_ob1_recv_request_ack(
|
||||
mca_pml_ob1_rendezvous_hdr_t* hdr,
|
||||
size_t bytes_received)
|
||||
{
|
||||
ompi_proc_t* proc = (ompi_proc_t*) recvreq->req_proc;
|
||||
ompi_proc_t* proc = (ompi_proc_t*)recvreq->req_recv.req_base.req_proc;
|
||||
mca_bml_base_endpoint_t* bml_endpoint = NULL;
|
||||
mca_btl_base_descriptor_t* des;
|
||||
mca_bml_base_btl_t* bml_btl;
|
||||
@ -185,7 +185,7 @@ static void mca_pml_ob1_recv_request_ack(
|
||||
if(NULL == proc) {
|
||||
ompi_proc_t *ompi_proc = ompi_comm_peer_lookup(
|
||||
recvreq->req_recv.req_base.req_comm, hdr->hdr_match.hdr_src);
|
||||
proc = recvreq->req_proc = ompi_proc;
|
||||
proc = recvreq->req_recv.req_base.req_proc = ompi_proc;
|
||||
}
|
||||
bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_pml;
|
||||
bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_eager);
|
||||
@ -363,7 +363,7 @@ static void mca_pml_ob1_recv_request_rget(
|
||||
int rc;
|
||||
|
||||
/* lookup bml datastructures */
|
||||
bml_endpoint = (mca_bml_base_endpoint_t*) recvreq->req_proc->proc_pml;
|
||||
bml_endpoint = (mca_bml_base_endpoint_t*)recvreq->req_recv.req_base.req_proc->proc_pml;
|
||||
bml_btl = mca_bml_base_btl_array_find(&bml_endpoint->btl_eager, btl);
|
||||
if(NULL == bml_btl) {
|
||||
opal_output(0, "[%s:%d] invalid bml for rdma get", __FILE__, __LINE__);
|
||||
@ -569,7 +569,7 @@ void mca_pml_ob1_recv_request_matched_probe(
|
||||
void mca_pml_ob1_recv_request_schedule(mca_pml_ob1_recv_request_t* recvreq)
|
||||
{
|
||||
if(OPAL_THREAD_ADD32(&recvreq->req_lock,1) == 1) {
|
||||
ompi_proc_t* proc = (ompi_proc_t*) recvreq->req_proc;
|
||||
ompi_proc_t* proc = (ompi_proc_t*)recvreq->req_recv.req_base.req_proc;
|
||||
mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_pml;
|
||||
mca_bml_base_btl_t* bml_btl;
|
||||
do {
|
||||
|
@ -34,7 +34,6 @@ extern "C" {
|
||||
|
||||
struct mca_pml_ob1_recv_request_t {
|
||||
mca_pml_base_recv_request_t req_recv;
|
||||
struct ompi_proc_t *req_proc;
|
||||
ompi_ptr_t req_send;
|
||||
#if OMPI_HAVE_THREAD_SUPPORT
|
||||
volatile int32_t req_lock;
|
||||
@ -193,14 +192,14 @@ do {
|
||||
ompi_comm_peer_lookup( \
|
||||
(request)->req_recv.req_base.req_comm, (hdr)->hdr_src); \
|
||||
\
|
||||
(request)->req_proc = proc; \
|
||||
(request)->req_recv.req_base.req_proc = proc; \
|
||||
ompi_convertor_copy_and_prepare_for_recv( proc->proc_convertor, \
|
||||
(request)->req_recv.req_base.req_datatype, \
|
||||
(request)->req_recv.req_base.req_count, \
|
||||
(request)->req_recv.req_base.req_addr, \
|
||||
&(request)->req_recv.req_convertor ); \
|
||||
} else { \
|
||||
(request)->req_proc = NULL; \
|
||||
(request)->req_recv.req_base.req_proc = NULL; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
|
@ -38,7 +38,6 @@ extern "C" {
|
||||
|
||||
struct mca_pml_ob1_send_request_t {
|
||||
mca_pml_base_send_request_t req_send;
|
||||
ompi_proc_t* req_proc;
|
||||
mca_bml_base_endpoint_t* req_endpoint;
|
||||
ompi_ptr_t req_recv;
|
||||
#if OMPI_HAVE_THREAD_SUPPORT
|
||||
@ -77,7 +76,7 @@ OBJ_CLASS_DECLARATION(mca_pml_ob1_send_request_t);
|
||||
rc = OMPI_SUCCESS; \
|
||||
OMPI_FREE_LIST_WAIT(&mca_pml_ob1.send_requests, item, rc); \
|
||||
sendreq = (mca_pml_ob1_send_request_t*)item; \
|
||||
sendreq->req_proc = proc; \
|
||||
sendreq->req_send.req_base.req_proc = proc; \
|
||||
} \
|
||||
}
|
||||
|
||||
@ -117,7 +116,8 @@ OBJ_CLASS_DECLARATION(mca_pml_ob1_send_request_t);
|
||||
#define MCA_PML_OB1_SEND_REQUEST_START(sendreq, rc) \
|
||||
do { \
|
||||
mca_pml_ob1_comm_t* comm = sendreq->req_send.req_base.req_comm->c_pml_comm; \
|
||||
mca_bml_base_endpoint_t* endpoint = (mca_bml_base_endpoint_t*)sendreq->req_proc->proc_pml; \
|
||||
mca_bml_base_endpoint_t* endpoint = (mca_bml_base_endpoint_t*) \
|
||||
sendreq->req_send.req_base.req_proc->proc_pml; \
|
||||
mca_bml_base_btl_t* bml_btl; \
|
||||
size_t size = sendreq->req_send.req_bytes_packed; \
|
||||
\
|
||||
|
@ -51,7 +51,7 @@ OBJ_CLASS_DECLARATION(mca_pml_teg_send_request_t);
|
||||
if(NULL == proc) { \
|
||||
return OMPI_ERR_OUT_OF_RESOURCE; \
|
||||
} \
|
||||
OPAL_THREAD_SCOPED_LOCK(&proc->base.proc_lock, \
|
||||
OPAL_THREAD_SCOPED_LOCK(&proc->base.proc_lock, \
|
||||
(ptl_proc = mca_ptl_array_get_next(&proc->proc_ptl_first))); \
|
||||
ptl_base = ptl_proc->ptl_base; \
|
||||
/* \
|
||||
@ -99,6 +99,7 @@ OBJ_CLASS_DECLARATION(mca_pml_teg_send_request_t);
|
||||
} \
|
||||
/* update request to point to current peer */ \
|
||||
sendreq->req_peer = ptl_proc->ptl_peer; \
|
||||
sendreq->req_send.req_base.req_proc = proc->base.proc_ompi; \
|
||||
}
|
||||
|
||||
|
||||
|
@ -95,6 +95,7 @@ OBJ_CLASS_DECLARATION(mca_pml_uniq_send_request_t);
|
||||
} \
|
||||
/* update request to point to current peer */ \
|
||||
sendreq->req_peer = proc->proc_ptl_first.ptl_peer; \
|
||||
sendreq->req_send.req_base.req_proc = proc->base.proc_ompi; \
|
||||
}
|
||||
|
||||
#define MCA_PML_UNIQ_SEND_REQUEST_INIT( request, \
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user