1
1

Working down the latency (0.2 micro-sec on a Xeon 2Ghz) by removing the

second instance of the ompi_proc from the send and receive request. This
information is already available on the base request, so there is no
need for duplication. The drawback is that now (in order to avoid a
second lookup in the communicator array of procs) we have to set the base
proc in the PML's _ALLOC macro.

This commit was SVN r8900.
Этот коммит содержится в:
George Bosilca 2006-02-05 06:13:07 +00:00
родитель b7fa1f4664
Коммит eb1d2dd290
6 изменённых файлов: 13 добавлений и 12 удалений

Просмотреть файл

@ -91,7 +91,7 @@ typedef struct mca_pml_base_send_request_t mca_pml_base_send_request_t;
(request)->req_base.req_peer = (int32_t)peer; \
(request)->req_base.req_tag = (int32_t)tag; \
(request)->req_base.req_comm = comm; \
(request)->req_base.req_proc = ompi_comm_peer_lookup(comm,peer); \
/* (request)->req_base.req_proc is set on request allocation */ \
(request)->req_base.req_persistent = persistent; \
(request)->req_base.req_pml_complete = (persistent ? true : false); \
(request)->req_base.req_free_called = false; \

Просмотреть файл

@ -173,7 +173,7 @@ static void mca_pml_ob1_recv_request_ack(
mca_pml_ob1_rendezvous_hdr_t* hdr,
size_t bytes_received)
{
ompi_proc_t* proc = (ompi_proc_t*) recvreq->req_proc;
ompi_proc_t* proc = (ompi_proc_t*)recvreq->req_recv.req_base.req_proc;
mca_bml_base_endpoint_t* bml_endpoint = NULL;
mca_btl_base_descriptor_t* des;
mca_bml_base_btl_t* bml_btl;
@ -185,7 +185,7 @@ static void mca_pml_ob1_recv_request_ack(
if(NULL == proc) {
ompi_proc_t *ompi_proc = ompi_comm_peer_lookup(
recvreq->req_recv.req_base.req_comm, hdr->hdr_match.hdr_src);
proc = recvreq->req_proc = ompi_proc;
proc = recvreq->req_recv.req_base.req_proc = ompi_proc;
}
bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_pml;
bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_eager);
@ -363,7 +363,7 @@ static void mca_pml_ob1_recv_request_rget(
int rc;
/* lookup bml datastructures */
bml_endpoint = (mca_bml_base_endpoint_t*) recvreq->req_proc->proc_pml;
bml_endpoint = (mca_bml_base_endpoint_t*)recvreq->req_recv.req_base.req_proc->proc_pml;
bml_btl = mca_bml_base_btl_array_find(&bml_endpoint->btl_eager, btl);
if(NULL == bml_btl) {
opal_output(0, "[%s:%d] invalid bml for rdma get", __FILE__, __LINE__);
@ -569,7 +569,7 @@ void mca_pml_ob1_recv_request_matched_probe(
void mca_pml_ob1_recv_request_schedule(mca_pml_ob1_recv_request_t* recvreq)
{
if(OPAL_THREAD_ADD32(&recvreq->req_lock,1) == 1) {
ompi_proc_t* proc = (ompi_proc_t*) recvreq->req_proc;
ompi_proc_t* proc = (ompi_proc_t*)recvreq->req_recv.req_base.req_proc;
mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_pml;
mca_bml_base_btl_t* bml_btl;
do {

Просмотреть файл

@ -34,7 +34,6 @@ extern "C" {
struct mca_pml_ob1_recv_request_t {
mca_pml_base_recv_request_t req_recv;
struct ompi_proc_t *req_proc;
ompi_ptr_t req_send;
#if OMPI_HAVE_THREAD_SUPPORT
volatile int32_t req_lock;
@ -193,14 +192,14 @@ do {
ompi_comm_peer_lookup( \
(request)->req_recv.req_base.req_comm, (hdr)->hdr_src); \
\
(request)->req_proc = proc; \
(request)->req_recv.req_base.req_proc = proc; \
ompi_convertor_copy_and_prepare_for_recv( proc->proc_convertor, \
(request)->req_recv.req_base.req_datatype, \
(request)->req_recv.req_base.req_count, \
(request)->req_recv.req_base.req_addr, \
&(request)->req_recv.req_convertor ); \
} else { \
(request)->req_proc = NULL; \
(request)->req_recv.req_base.req_proc = NULL; \
} \
} while (0)

Просмотреть файл

@ -38,7 +38,6 @@ extern "C" {
struct mca_pml_ob1_send_request_t {
mca_pml_base_send_request_t req_send;
ompi_proc_t* req_proc;
mca_bml_base_endpoint_t* req_endpoint;
ompi_ptr_t req_recv;
#if OMPI_HAVE_THREAD_SUPPORT
@ -77,7 +76,7 @@ OBJ_CLASS_DECLARATION(mca_pml_ob1_send_request_t);
rc = OMPI_SUCCESS; \
OMPI_FREE_LIST_WAIT(&mca_pml_ob1.send_requests, item, rc); \
sendreq = (mca_pml_ob1_send_request_t*)item; \
sendreq->req_proc = proc; \
sendreq->req_send.req_base.req_proc = proc; \
} \
}
@ -117,7 +116,8 @@ OBJ_CLASS_DECLARATION(mca_pml_ob1_send_request_t);
#define MCA_PML_OB1_SEND_REQUEST_START(sendreq, rc) \
do { \
mca_pml_ob1_comm_t* comm = sendreq->req_send.req_base.req_comm->c_pml_comm; \
mca_bml_base_endpoint_t* endpoint = (mca_bml_base_endpoint_t*)sendreq->req_proc->proc_pml; \
mca_bml_base_endpoint_t* endpoint = (mca_bml_base_endpoint_t*) \
sendreq->req_send.req_base.req_proc->proc_pml; \
mca_bml_base_btl_t* bml_btl; \
size_t size = sendreq->req_send.req_bytes_packed; \
\

Просмотреть файл

@ -51,7 +51,7 @@ OBJ_CLASS_DECLARATION(mca_pml_teg_send_request_t);
if(NULL == proc) { \
return OMPI_ERR_OUT_OF_RESOURCE; \
} \
OPAL_THREAD_SCOPED_LOCK(&proc->base.proc_lock, \
OPAL_THREAD_SCOPED_LOCK(&proc->base.proc_lock, \
(ptl_proc = mca_ptl_array_get_next(&proc->proc_ptl_first))); \
ptl_base = ptl_proc->ptl_base; \
/* \
@ -99,6 +99,7 @@ OBJ_CLASS_DECLARATION(mca_pml_teg_send_request_t);
} \
/* update request to point to current peer */ \
sendreq->req_peer = ptl_proc->ptl_peer; \
sendreq->req_send.req_base.req_proc = proc->base.proc_ompi; \
}

Просмотреть файл

@ -95,6 +95,7 @@ OBJ_CLASS_DECLARATION(mca_pml_uniq_send_request_t);
} \
/* update request to point to current peer */ \
sendreq->req_peer = proc->proc_ptl_first.ptl_peer; \
sendreq->req_send.req_base.req_proc = proc->base.proc_ompi; \
}
#define MCA_PML_UNIQ_SEND_REQUEST_INIT( request, \