-- Commit some code from experimentation
-- Will update the code with the tree soon BTW, Jeff, it seems to me that adding three NULL entries to an dummy module does not matter. But let them hold the spots can be a little more informative though. [yuw@p3 src]$ diff ptl_elan.c src/ptl_elan.c 46,49c46 < mca_ptl_elan_req_fini, < NULL, < NULL, < NULL --- > mca_ptl_elan_req_fini This commit was SVN r3033.
Этот коммит содержится в:
родитель
31f6c00f3f
Коммит
508dbc4453
@ -262,7 +262,8 @@ mca_ptl_elan_isend (struct mca_ptl_base_module_t *ptl,
|
||||
}
|
||||
|
||||
#if OMPI_PTL_ELAN_ZERO_FFRAG
|
||||
if (size > (OMPI_PTL_ELAN_MAX_QSIZE - sizeof(mca_ptl_base_header_t)))
|
||||
if (sendreq->req_bytes_packed >
|
||||
(OMPI_PTL_ELAN_MAX_QSIZE - sizeof(mca_ptl_base_header_t)))
|
||||
size = 0;
|
||||
#endif
|
||||
|
||||
@ -437,7 +438,10 @@ mca_ptl_elan_matched (mca_ptl_base_module_t * ptl,
|
||||
* if the recv descriptor is not posted (for too long) (TODO).
|
||||
* We then need to copy from unex_buffer to application buffer */
|
||||
if (header->hdr_frag.hdr_frag_length > 0) {
|
||||
|
||||
#if !OMPI_PTL_ELAN_USE_DTP
|
||||
memcpy(request->req_base.req_addr,
|
||||
frag->frag_base.frag_addr, frag->frag_base.frag_size);
|
||||
#else
|
||||
struct iovec iov;
|
||||
ompi_proc_t *proc;
|
||||
|
||||
@ -458,7 +462,9 @@ mca_ptl_elan_matched (mca_ptl_base_module_t * ptl,
|
||||
request->req_base.req_addr,
|
||||
header->hdr_frag.hdr_frag_offset);
|
||||
ompi_convertor_unpack(&frag->frag_base.frag_convertor, &iov, 1);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* XXX: progress the request based on the status of this recv frag
|
||||
* It is possible to employ a scheduling logic here.
|
||||
* Then Done with this fragment, i.e., data */
|
||||
|
@ -21,6 +21,7 @@
|
||||
struct mca_ptl_elan_state_t;
|
||||
struct ompi_ptl_elan_queue_ctrl_t;
|
||||
extern struct mca_ptl_elan_state_t mca_ptl_elan_global_state;
|
||||
extern struct ompi_ptl_elan_cmdq_space_t ptl_elan_cmdq_space;
|
||||
|
||||
/**
|
||||
* ELAN PTL Interface
|
||||
|
@ -42,6 +42,17 @@ ompi_init_elan_queue_events (mca_ptl_elan_module_t * ptl,
|
||||
rail = (RAIL *) ptl->ptl_elan_rail;
|
||||
ctx = (ELAN4_CTX *) ptl->ptl_elan_ctx;
|
||||
|
||||
#if OMPI_PTL_ELAN_CMQ_REUSE
|
||||
|
||||
#define OMPI_PTL_ELAN_CMQ_ENTRIES 1024
|
||||
{
|
||||
ptl_elan_cmdq_space.total = OMPI_PTL_ELAN_CMQ_ENTRIES;
|
||||
ptl_elan_cmdq_space.free = OMPI_PTL_ELAN_CMQ_ENTRIES;
|
||||
ptl_elan_cmdq_space.space = elan4_alloccq_space(ctx,
|
||||
8*OMPI_PTL_ELAN_CMQ_ENTRIES, CQ_Size8K);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* initialize list */
|
||||
OBJ_CONSTRUCT (&queue->tx_desc_free, ompi_free_list_t);
|
||||
flist = &queue->tx_desc_free;
|
||||
@ -99,8 +110,13 @@ ompi_init_elan_queue_events (mca_ptl_elan_module_t * ptl,
|
||||
|
||||
/* XXX: If completion is to be detected from the Queue
|
||||
* there is no need to trigger a local event */
|
||||
#if OMPI_PTL_ELAN_ONE_QUEUE
|
||||
desc->comp_dma.dma_dstEvent = elan4_main2elan (ctx,
|
||||
(void *) ptl->queue->input);
|
||||
#else
|
||||
desc->comp_dma.dma_dstEvent = elan4_main2elan (ctx,
|
||||
(void *) ptl->comp->input);
|
||||
#endif
|
||||
desc->comp_dma.dma_srcEvent = 0x0ULL;
|
||||
desc->comp_dma.dma_typeSize |= RUN_DMA_CMD;
|
||||
desc->comp_dma.dma_pad = NOP_CMD;
|
||||
@ -199,8 +215,13 @@ mca_ptl_elan_putget_desc_construct (
|
||||
|
||||
/* XXX: If completion is to be detected from the Queue
|
||||
* there is no need to trigger a local event */
|
||||
#if OMPI_PTL_ELAN_ONE_QUEUE
|
||||
desc->comp_dma.dma_dstEvent = elan4_main2elan (ctx,
|
||||
(void *) ptl->queue->input);
|
||||
#else
|
||||
desc->comp_dma.dma_dstEvent = elan4_main2elan (ctx,
|
||||
(void *) ptl->comp->input);
|
||||
#endif
|
||||
desc->comp_dma.dma_srcEvent = 0x0ULL;
|
||||
desc->comp_dma.dma_typeSize |= RUN_DMA_CMD;
|
||||
desc->comp_dma.dma_pad = NOP_CMD;
|
||||
|
@ -284,8 +284,12 @@ mca_ptl_elan_component_progress (mca_ptl_tstamp_t tstamp)
|
||||
|
||||
/* Iterate over all the PTL input Queues */
|
||||
for (i = 0; i < no_ptls; i++) {
|
||||
#if OMPI_PTL_ELAN_ONE_QUEUE
|
||||
mca_ptl_elan_lookup(elan_mp->modules[i]);
|
||||
#else
|
||||
mca_ptl_elan_update_desc(elan_mp->modules[i]);
|
||||
mca_ptl_elan_drain_recv(elan_mp->modules[i]);
|
||||
#endif
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
|
@ -154,7 +154,9 @@ mca_ptl_elan_send_desc_done (
|
||||
{
|
||||
mca_ptl_elan_module_t *ptl;
|
||||
mca_ptl_base_header_t *header;
|
||||
int dtype;
|
||||
|
||||
dtype = frag->desc->desc_type;
|
||||
ptl = ((ompi_ptl_elan_qdma_desc_t *)frag->desc)->ptl;
|
||||
header = &frag->frag_base.frag_header;
|
||||
|
||||
@ -166,7 +168,7 @@ mca_ptl_elan_send_desc_done (
|
||||
frag->frag_base.frag_size,
|
||||
frag->frag_base.frag_size);
|
||||
}
|
||||
elan4_freecq_space (ptl->ptl_elan_ctx,
|
||||
PTL_ELAN4_FREE_QBUFF (ptl->ptl_elan_ctx,
|
||||
((ompi_ptl_elan_putget_desc_t *) frag->desc)
|
||||
->chain_event->ev_Params[1], 8);
|
||||
OMPI_FREE_LIST_RETURN (&ptl->putget->get_desc_free,
|
||||
@ -184,9 +186,7 @@ mca_ptl_elan_send_desc_done (
|
||||
if(NULL == req) { /* An ack descriptor */
|
||||
OMPI_FREE_LIST_RETURN (&ptl->queue->tx_desc_free,
|
||||
(ompi_list_item_t *) frag);
|
||||
}
|
||||
#if 1
|
||||
else if (0 == (header->hdr_common.hdr_flags
|
||||
} else if (0 == (header->hdr_common.hdr_flags
|
||||
& MCA_PTL_FLAGS_ACK_MATCHED)
|
||||
|| mca_pml_base_send_request_matched(req)) {
|
||||
if(ompi_atomic_fetch_and_set_int (&frag->frag_progressed, 1) == 0)
|
||||
@ -203,7 +203,7 @@ mca_ptl_elan_send_desc_done (
|
||||
ompi_free_list_t *flist;
|
||||
if (frag->desc->desc_type == MCA_PTL_ELAN_DESC_PUT) {
|
||||
flist = &ptl->putget->put_desc_free;
|
||||
elan4_freecq_space (ptl->ptl_elan_ctx,
|
||||
PTL_ELAN4_FREE_QBUFF (ptl->ptl_elan_ctx,
|
||||
((ompi_ptl_elan_putget_desc_t *) frag->desc)
|
||||
->chain_event->ev_Params[1], 8);
|
||||
} else {
|
||||
@ -216,37 +216,7 @@ mca_ptl_elan_send_desc_done (
|
||||
&ptl->queue->tx_desc_free,
|
||||
ptl->queue->tx_desc_free.super.ompi_list_length);
|
||||
}
|
||||
}
|
||||
#else
|
||||
else {
|
||||
/* XXX:
|
||||
* Why the release of this send fragment is dependent
|
||||
* on the receiving of an acknowledgement
|
||||
* There are two drawbacks,
|
||||
* a) Send fragment is not immediately returned to the free pool
|
||||
* b) Some list is needed to hold on this fragment and
|
||||
* later on find an time slot to process it.
|
||||
* c) If ever local completion happens later then the receive
|
||||
* of the acknowledgement. The following will happen
|
||||
* 1) The receiving of an acknoledgement can not immediatly
|
||||
* trigger the scheduling the followup fragment since it
|
||||
* is dependent on the send fragment to complete.
|
||||
* 2) Later, the local send completeion cannot trigger
|
||||
* the start of following fragments. As the logic is not there.
|
||||
*/
|
||||
|
||||
if(ompi_atomic_fetch_and_set_int (&frag->frag_progressed, 1) == 0) {
|
||||
ptl->super.ptl_send_progress(ptl, req,
|
||||
header->hdr_frag.hdr_frag_length);
|
||||
}
|
||||
|
||||
/* Return a frag or if not cached, or it is a follow up */
|
||||
if((header->hdr_frag.hdr_frag_offset != 0) || (frag->desc->desc_status
|
||||
!= MCA_PTL_ELAN_DESC_CACHED))
|
||||
OMPI_FREE_LIST_RETURN (&queue->tx_desc_free,
|
||||
(ompi_list_item_t *) frag);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -12,6 +12,7 @@
|
||||
#define _ELAN4
|
||||
|
||||
mca_ptl_elan_state_t mca_ptl_elan_global_state;
|
||||
struct ompi_ptl_elan_cmdq_space_t ptl_elan_cmdq_space;
|
||||
|
||||
static int
|
||||
ompi_mca_ptl_elan_setup (mca_ptl_elan_state_t * ems)
|
||||
@ -630,6 +631,22 @@ mca_ptl_elan_thread_init (mca_ptl_elan_component_t * emp)
|
||||
|
||||
num_rails = emp->num_modules;
|
||||
|
||||
#if OMPI_PTL_ELAN_ONE_QUEUE
|
||||
emp->recv_threads = (struct ompi_ptl_elan_thread_t **)
|
||||
malloc (num_rails * sizeof(struct ompi_ptl_elan_thread_t*));
|
||||
|
||||
for (i = 0; i < num_rails; i ++) {
|
||||
ompi_ptl_elan_thread_t * t;
|
||||
t = (struct ompi_ptl_elan_thread_t *)
|
||||
malloc (sizeof(struct ompi_ptl_elan_thread_t));
|
||||
OBJ_CONSTRUCT(&t->thread, ompi_thread_t);
|
||||
t->thread.t_run = (ompi_thread_fn_t) mca_ptl_elan_lookup;
|
||||
t->ptl = emp->modules[i];
|
||||
pthread_create(&t->thread.t_handle, NULL,
|
||||
(void *)t->thread.t_run, (void*)t->ptl);
|
||||
emp->recv_threads[i] = t;
|
||||
}
|
||||
#else
|
||||
/*struct ompi_ptl_elan_thread_t **threads; */
|
||||
emp->send_threads = (struct ompi_ptl_elan_thread_t **)
|
||||
malloc (num_rails * sizeof(struct ompi_ptl_elan_thread_t*));
|
||||
@ -660,6 +677,7 @@ mca_ptl_elan_thread_init (mca_ptl_elan_component_t * emp)
|
||||
(void *)t->thread.t_run, (void*)t->ptl);
|
||||
emp->recv_threads[i] = t;
|
||||
}
|
||||
#endif
|
||||
|
||||
return (OMPI_SUCCESS);
|
||||
}
|
||||
@ -715,12 +733,14 @@ mca_ptl_elan_thread_close (mca_ptl_elan_component_t * emp)
|
||||
elan4_run_dma_cmd (ptl->queue->tx_cmdq, (DMA *) & desc->main_dma);
|
||||
elan4_flush_cmdq_reorder (ptl->queue->tx_cmdq);
|
||||
|
||||
#if !OMPI_PTL_ELAN_ONE_QUEUE
|
||||
/* finish the send thread */
|
||||
desc->main_dma.dma_dstEvent = SDRAM2ELAN (ctx, ptl->comp->input);
|
||||
MEMBAR_VISIBLE ();
|
||||
elan4_run_dma_cmd (ptl->queue->tx_cmdq, (DMA *) & desc->main_dma);
|
||||
elan4_flush_cmdq_reorder (ptl->queue->tx_cmdq);
|
||||
MEMBAR_VISIBLE ();
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Join all threads */
|
||||
@ -728,10 +748,11 @@ mca_ptl_elan_thread_close (mca_ptl_elan_component_t * emp)
|
||||
ompi_ptl_elan_thread_t * tsend, *trecv;
|
||||
int *ptr = (int *)malloc(sizeof(int));
|
||||
|
||||
tsend = emp->send_threads[i];
|
||||
trecv = emp->recv_threads[i];
|
||||
|
||||
#if !OMPI_PTL_ELAN_ONE_QUEUE
|
||||
tsend = emp->send_threads[i];
|
||||
ompi_thread_join(&tsend->thread, &ptr);
|
||||
#endif
|
||||
ompi_thread_join(&trecv->thread, &ptr);
|
||||
}
|
||||
|
||||
|
@ -267,6 +267,11 @@ mca_ptl_elan_init_qdma_desc (struct mca_ptl_elan_send_frag_t *frag,
|
||||
|
||||
/* initialize convertor */
|
||||
if(size_in > 0) {
|
||||
#if !OMPI_PTL_ELAN_USE_DTP
|
||||
memcpy(&desc->buff[header_length],
|
||||
pml_req->req_base.req_addr, size_in);
|
||||
size_out = size_in;
|
||||
#else
|
||||
struct iovec iov;
|
||||
ompi_convertor_t *convertor;
|
||||
|
||||
@ -296,6 +301,7 @@ mca_ptl_elan_init_qdma_desc (struct mca_ptl_elan_send_frag_t *frag,
|
||||
return;
|
||||
}
|
||||
size_out = iov.iov_len;
|
||||
#endif
|
||||
} else {
|
||||
size_out = size_in;
|
||||
}
|
||||
@ -317,11 +323,16 @@ mca_ptl_elan_init_qdma_desc (struct mca_ptl_elan_send_frag_t *frag,
|
||||
* so we need addend the command queue control bits.
|
||||
* Allocate space from command queues hanged off the CTX.
|
||||
*/
|
||||
desc->comp_event->ev_Params[1] = elan4_alloccq_space (ctx, 8, CQ_Size8K);
|
||||
PTL_ELAN4_GET_QBUFF (desc->comp_event->ev_Params[1], ctx, 8, CQ_Size8K);
|
||||
desc->comp_event->ev_CountAndType = E4_EVENT_INIT_VALUE(-32,
|
||||
E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, 8);
|
||||
desc->comp_dma.dma_cookie = elan4_local_cookie(ptl->queue->tx_cpool,
|
||||
E4_COOKIE_TYPE_LOCAL_DMA, ptl->elan_vp);
|
||||
|
||||
#if OMPI_PTL_ELAN_ONE_QUEUE
|
||||
frag->frag_base.frag_header.hdr_common.hdr_type += 8;
|
||||
#endif
|
||||
|
||||
desc->comp_dma.dma_srcAddr = elan4_main2elan (ctx,
|
||||
(void *) &frag->frag_base.frag_header);
|
||||
memcpy ((void *)desc->comp_buff, (void *)&desc->comp_dma,
|
||||
@ -484,14 +495,21 @@ mca_ptl_elan_init_put_desc (struct mca_ptl_elan_send_frag_t *frag,
|
||||
*/
|
||||
desc->comp_event->ev_Params[0] = elan4_main2elan (ctx,
|
||||
(void *)desc->comp_buff);
|
||||
desc->comp_event->ev_Params[1] = elan4_alloccq_space (ctx, 8, CQ_Size8K);
|
||||
PTL_ELAN4_GET_QBUFF (desc->comp_event->ev_Params[1], ctx, 8, CQ_Size8K);
|
||||
desc->comp_event->ev_CountAndType = E4_EVENT_INIT_VALUE(-32,
|
||||
E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, 8);
|
||||
|
||||
desc->comp_dma.dma_cookie = elan4_local_cookie(ptl->queue->tx_cpool,
|
||||
E4_COOKIE_TYPE_LOCAL_DMA, ptl->elan_vp);
|
||||
|
||||
#if OMPI_PTL_ELAN_ONE_QUEUE
|
||||
*((mca_ptl_base_header_t *) desc->buff) = *hdr;
|
||||
((mca_ptl_base_header_t *) desc->buff)->hdr_common.hdr_type += 8;
|
||||
desc->comp_dma.dma_srcAddr = elan4_main2elan (ctx, (void *)desc->buff);
|
||||
#else
|
||||
desc->comp_dma.dma_srcAddr = elan4_main2elan (ctx,
|
||||
(void *) &frag->frag_base.frag_header);
|
||||
#endif
|
||||
memcpy ((void *)desc->comp_buff, (void *)&desc->comp_dma,
|
||||
sizeof (E4_DMA64));
|
||||
|
||||
@ -519,14 +537,15 @@ mca_ptl_elan_init_put_desc (struct mca_ptl_elan_send_frag_t *frag,
|
||||
/* XXX: The chain dma will go directly into a command stream
|
||||
* so we need addend the command queue control bits.
|
||||
* Allocate space from command queues hanged off the CTX. */
|
||||
desc->chain_event->ev_Params[1] = elan4_alloccq_space (ctx, 8, CQ_Size8K);
|
||||
desc->main_dma.dma_srcAddr = desc->src_elan_addr;
|
||||
desc->main_dma.dma_dstAddr = desc->dst_elan_addr;
|
||||
desc->main_dma.dma_dstEvent= 0x0ULL; /*disable remote event */
|
||||
PTL_ELAN4_GET_QBUFF (desc->chain_event->ev_Params[1], ctx, 8, CQ_Size8K);
|
||||
|
||||
/* Chain an event */
|
||||
desc->main_dma.dma_srcEvent= elan4_main2elan(ctx, desc->chain_event);
|
||||
|
||||
desc->main_dma.dma_srcAddr = desc->src_elan_addr;
|
||||
desc->main_dma.dma_dstAddr = desc->dst_elan_addr;
|
||||
desc->main_dma.dma_dstEvent= 0x0ULL; /*disable remote event */
|
||||
|
||||
flags = 0;
|
||||
desc->main_dma.dma_typeSize = E4_DMA_TYPE_SIZE (size_out,
|
||||
DMA_DataTypeByte, flags, ptl->putget->pg_retryCount);
|
||||
@ -613,13 +632,20 @@ mca_ptl_elan_init_get_desc (mca_ptl_elan_module_t *ptl,
|
||||
*/
|
||||
frag->frag_base.frag_header = *hdr;
|
||||
((mca_ptl_elan_ack_header_t *) &frag->frag_base.frag_header)->frag = frag;
|
||||
desc->comp_event->ev_Params[1] = elan4_alloccq_space (ctx, 8, CQ_Size8K);
|
||||
PTL_ELAN4_GET_QBUFF (desc->comp_event->ev_Params[1], ctx, 8, CQ_Size8K);
|
||||
desc->comp_event->ev_CountAndType = E4_EVENT_INIT_VALUE(-32,
|
||||
E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, 8);
|
||||
desc->comp_dma.dma_cookie = elan4_local_cookie(ptl->queue->tx_cpool,
|
||||
E4_COOKIE_TYPE_LOCAL_DMA, ptl->elan_vp);
|
||||
|
||||
#if OMPI_PTL_ELAN_ONE_QUEUE
|
||||
*((mca_ptl_base_header_t *) desc->buff) = *hdr;
|
||||
((mca_ptl_base_header_t *) desc->buff)->hdr_common.hdr_type += 8;
|
||||
desc->comp_dma.dma_srcAddr = elan4_main2elan (ctx, (void *)desc->buff);
|
||||
#else
|
||||
desc->comp_dma.dma_srcAddr = elan4_main2elan (ctx,
|
||||
(void *) &frag->frag_base.frag_header);
|
||||
#endif
|
||||
memcpy ((void *)desc->comp_buff, (void *)&desc->comp_dma,
|
||||
sizeof (E4_DMA64));
|
||||
|
||||
@ -653,14 +679,15 @@ mca_ptl_elan_init_get_desc (mca_ptl_elan_module_t *ptl,
|
||||
/* XXX: The chain dma will go directly into a command stream
|
||||
* so we need addend the command queue control bits.
|
||||
* Allocate space from command queues hanged off the CTX. */
|
||||
desc->chain_event->ev_Params[1] = elan4_alloccq_space (ctx, 8, CQ_Size8K);
|
||||
PTL_ELAN4_GET_QBUFF (desc->chain_event->ev_Params[1], ctx, 8, CQ_Size8K);
|
||||
|
||||
/* Chain an event */
|
||||
desc->main_dma.dma_dstEvent= elan4_main2elan(ctx,
|
||||
(E4_Event *)desc->chain_event);
|
||||
|
||||
desc->main_dma.dma_srcAddr = desc->src_elan_addr;
|
||||
desc->main_dma.dma_dstAddr = desc->dst_elan_addr;
|
||||
desc->main_dma.dma_srcEvent= 0x0ULL; /*disable remote event */
|
||||
/* Chain an event */
|
||||
desc->main_dma.dma_dstEvent= elan4_main2elan(ctx,
|
||||
(E4_Event *)desc->chain_event);
|
||||
|
||||
flags = 0;
|
||||
desc->main_dma.dma_typeSize = E4_DMA_TYPE_SIZE (size_out,
|
||||
@ -956,14 +983,21 @@ mca_ptl_elan_start_ack ( mca_ptl_base_module_t * ptl,
|
||||
|
||||
/* XXX: Need to have a way to differentiate different frag */
|
||||
((mca_ptl_elan_ack_header_t *) hdr)->frag = desc;
|
||||
qdma->comp_event->ev_Params[1] = elan4_alloccq_space (ctx, 8, CQ_Size8K);
|
||||
PTL_ELAN4_GET_QBUFF (qdma->comp_event->ev_Params[1], ctx, 8, CQ_Size8K);
|
||||
qdma->comp_event->ev_CountAndType = E4_EVENT_INIT_VALUE(-32,
|
||||
E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, 8);
|
||||
qdma->comp_dma.dma_cookie = elan4_local_cookie(
|
||||
elan_ptl->queue->tx_cpool,
|
||||
E4_COOKIE_TYPE_LOCAL_DMA,
|
||||
elan_ptl->elan_vp);
|
||||
|
||||
#if OMPI_PTL_ELAN_ONE_QUEUE
|
||||
*((mca_ptl_base_header_t *) qdma->buff) = *hdr;
|
||||
((mca_ptl_base_header_t *) qdma->buff)->hdr_common.hdr_type += 8;
|
||||
qdma->comp_dma.dma_srcAddr = elan4_main2elan (ctx, (void *)qdma->buff);
|
||||
#else
|
||||
qdma->comp_dma.dma_srcAddr = elan4_main2elan (ctx, (void *) hdr);
|
||||
#endif
|
||||
memcpy ((void *)qdma->comp_buff, (void *)&qdma->comp_dma,
|
||||
sizeof (E4_DMA64));
|
||||
|
||||
@ -1154,7 +1188,7 @@ ptl_elan_send_comp:
|
||||
(mca_pml_base_send_request_t *) basic->req);
|
||||
|
||||
#if OMPI_PTL_ELAN_COMP_QUEUE
|
||||
elan4_freecq_space (ctx, frag->desc->comp_event->ev_Params[1], 8);
|
||||
PTL_ELAN4_FREE_QBUFF (ctx, frag->desc->comp_event->ev_Params[1], 8);
|
||||
#endif
|
||||
|
||||
/* Work out the new front pointer */
|
||||
@ -1222,3 +1256,129 @@ ptl_elan_send_comp:
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int
|
||||
mca_ptl_elan_lookup(struct mca_ptl_elan_module_t *ptl)
|
||||
{
|
||||
struct ompi_ptl_elan_queue_ctrl_t *queue;
|
||||
ompi_ptl_elan_recv_queue_t *rxq;
|
||||
ELAN_CTX *ctx;
|
||||
int rc;
|
||||
|
||||
queue = ptl->queue;
|
||||
rxq = queue->rxq;
|
||||
ctx = ptl->ptl_elan_ctx;
|
||||
|
||||
ptl_elan_recv_comp:
|
||||
OMPI_LOCK (&queue->rx_lock);
|
||||
#if OMPI_PTL_ELAN_THREADING
|
||||
rc = mca_ptl_elan_wait_queue(ptl, rxq, 1);
|
||||
#else
|
||||
rc = (*(int *) (&rxq->qr_doneWord));
|
||||
#endif
|
||||
if (rc) {
|
||||
mca_ptl_base_header_t *header;
|
||||
|
||||
header = (mca_ptl_base_header_t *) rxq->qr_fptr;
|
||||
|
||||
if (header->hdr_common.hdr_type >= 8) {
|
||||
mca_ptl_elan_send_frag_t *frag;
|
||||
ompi_ptl_elan_base_desc_t *basic;
|
||||
|
||||
header->hdr_common.hdr_type = header->hdr_common.hdr_type - 8;
|
||||
|
||||
#if OMPI_PTL_ELAN_THREADING
|
||||
if (header->hdr_common.hdr_type == MCA_PTL_HDR_TYPE_STOP) {
|
||||
/* XXX: release the lock and quit the thread */
|
||||
OMPI_UNLOCK (&queue->rx_lock);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
if (header->hdr_common.hdr_type == MCA_PTL_HDR_TYPE_ACK
|
||||
|| header->hdr_common.hdr_type == MCA_PTL_HDR_TYPE_FIN_ACK) {
|
||||
frag = ((mca_ptl_elan_ack_header_t*)header)->frag;
|
||||
} else {
|
||||
frag = (mca_ptl_elan_send_frag_t *)
|
||||
header->hdr_frag.hdr_src_ptr.pval;
|
||||
}
|
||||
basic = (ompi_ptl_elan_base_desc_t*)frag->desc;
|
||||
|
||||
LOG_PRINT(PTL_ELAN_DEBUG_SEND, "frag %p desc %p \n", frag, basic);
|
||||
|
||||
/* XXX: please reset additional chained event for put/get desc */
|
||||
mca_ptl_elan_send_desc_done (frag,
|
||||
(mca_pml_base_send_request_t *) basic->req);
|
||||
|
||||
#if OMPI_PTL_ELAN_COMP_QUEUE
|
||||
PTL_ELAN4_FREE_QBUFF (ctx, frag->desc->comp_event->ev_Params[1], 8);
|
||||
#endif
|
||||
|
||||
} else {
|
||||
|
||||
#if OMPI_PTL_ELAN_THREADING
|
||||
if (header->hdr_common.hdr_type == MCA_PTL_HDR_TYPE_STOP) {
|
||||
/* XXX: release the lock and quit the thread */
|
||||
OMPI_UNLOCK (&queue->rx_lock);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
|
||||
switch (header->hdr_common.hdr_type) {
|
||||
case MCA_PTL_HDR_TYPE_MATCH:
|
||||
case MCA_PTL_HDR_TYPE_FRAG:
|
||||
/* a data fragment */
|
||||
mca_ptl_elan_data_frag (ptl, header);
|
||||
break;
|
||||
case MCA_PTL_HDR_TYPE_ACK:
|
||||
case MCA_PTL_HDR_TYPE_NACK:
|
||||
mca_ptl_elan_ctrl_frag (ptl, header);
|
||||
break;
|
||||
case MCA_PTL_HDR_TYPE_FIN:
|
||||
mca_ptl_elan_last_frag (ptl, header);
|
||||
break;
|
||||
case MCA_PTL_HDR_TYPE_FIN_ACK:
|
||||
mca_ptl_elan_last_frag_ack (ptl, header);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "[%s:%d] unknow fragment type %d\n",
|
||||
__FILE__, __LINE__,
|
||||
header->hdr_common.hdr_type);
|
||||
fflush(stderr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Work out the new front pointer */
|
||||
if (rxq->qr_fptr == rxq->qr_top) {
|
||||
rxq->qr_fptr = rxq->qr_base;
|
||||
rxq->qr_efptr = rxq->qr_efitem;
|
||||
} else {
|
||||
rxq->qr_fptr = (void *) ((uintptr_t) rxq->qr_fptr
|
||||
+ queue->rx_slotsize);
|
||||
rxq->qr_efptr += queue->rx_slotsize;
|
||||
}
|
||||
|
||||
/* PCI Write, Reset the event
|
||||
* Order RESETEVENT wrt to wait_event_cmd */
|
||||
queue->input->q_fptr = rxq->qr_efptr;
|
||||
RESETEVENT_WORD (&rxq->qr_doneWord);
|
||||
MEMBAR_STORESTORE ();
|
||||
|
||||
/* Re-prime queue event by issuing a waitevent(1) on it */
|
||||
elan4_wait_event_cmd (rxq->qr_cmdq,
|
||||
/* Is qr_elanDone really a main memory address? */
|
||||
MAIN2ELAN (ctx, rxq->qr_elanDone),
|
||||
E4_EVENT_INIT_VALUE (-32, E4_EVENT_WRITE,
|
||||
E4_EVENT_DTYPE_LONG, 0),
|
||||
MAIN2ELAN (ctx, (void *) &rxq->qr_doneWord),
|
||||
0xfeedfacedeadbeef);
|
||||
elan4_flush_cmdq_reorder (rxq->qr_cmdq);
|
||||
}
|
||||
OMPI_UNLOCK (&queue->rx_lock);
|
||||
|
||||
#if OMPI_PTL_ELAN_THREADING
|
||||
goto ptl_elan_recv_comp;
|
||||
#endif
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -52,7 +52,7 @@
|
||||
#define PTL_ELAN_DEBUG_GET (0x400)
|
||||
#define PTL_ELAN_DEBUG_CHAIN (0x800)
|
||||
|
||||
#define PTL_ELAN_DEBUG_FLAG (PTL_ELAN_DEBUG_NONE)
|
||||
#define PTL_ELAN_DEBUG_FLAG (PTL_ELAN_DEBUG_NONE)
|
||||
|
||||
#define LOG_PRINT(flag, args...) \
|
||||
do { \
|
||||
@ -64,6 +64,8 @@ do { \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define OMPI_PTL_ELAN_CMQ_REUSE (1)
|
||||
|
||||
#define OMPI_PTL_ELAN_MAX_QSIZE (2048)
|
||||
#define OMPI_PTL_ELAN_MAX_QSLOTS (128)
|
||||
#define OMPI_PTL_ELAN_LOST_QSLOTS (1)
|
||||
@ -81,11 +83,14 @@ do { \
|
||||
|
||||
/* XXX: Potentially configurable parameters */
|
||||
#define OMPI_PTL_ELAN_NUM_QDESCS (16)
|
||||
#define OMPI_PTL_ELAN_NUM_PUTGET (8)
|
||||
#define OMPI_PTL_ELAN_ZERO_FFRAG (0)
|
||||
#define OMPI_PTL_ELAN_NUM_PUTGET (16)
|
||||
#define OMPI_PTL_ELAN_ZERO_FFRAG (1)
|
||||
|
||||
#define OMPI_PTL_ELAN_USE_DTP (0)
|
||||
#define OMPI_PTL_ELAN_ENABLE_GET (0)
|
||||
#define OMPI_PTL_ELAN_COMP_QUEUE (0)
|
||||
#define OMPI_PTL_ELAN_COMP_QUEUE (1)
|
||||
#define OMPI_PTL_ELAN_ONE_QUEUE (OMPI_PTL_ELAN_COMP_QUEUE && 1)
|
||||
|
||||
#define OMPI_PTL_ELAN_THREADING \
|
||||
(OMPI_PTL_ELAN_COMP_QUEUE && OMPI_HAVE_POSIX_THREADS)
|
||||
|
||||
@ -109,12 +114,43 @@ do { \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
|
||||
#if OMPI_PTL_ELAN_CMQ_REUSE
|
||||
#define PTL_ELAN4_GET_QBUFF(dspace, ctx, bsize, csize) \
|
||||
do { \
|
||||
if (ptl_elan_cmdq_space.free == 0) { \
|
||||
ompi_output(0, \
|
||||
"[%s:%d] error acquiring cmdq space \n", \
|
||||
__FILE__, __LINE__); \
|
||||
} else { \
|
||||
ptl_elan_cmdq_space.free --; \
|
||||
dspace = ptl_elan_cmdq_space.space; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define PTL_ELAN4_FREE_QBUFF(ctx, buff, bsize) \
|
||||
do { \
|
||||
if (ptl_elan_cmdq_space.free >= ptl_elan_cmdq_space.total || \
|
||||
ptl_elan_cmdq_space.space != buff ) { \
|
||||
ompi_output(0, \
|
||||
"[%s:%d] error releasing cmdq space \n", \
|
||||
__FILE__, __LINE__); \
|
||||
} else { \
|
||||
ptl_elan_cmdq_space.free ++; \
|
||||
} \
|
||||
} while (0)
|
||||
#else
|
||||
#define PTL_ELAN4_GET_QBUFF(dspace, ctx, bsize, csize) \
|
||||
dspace = elan4_alloccq_space(ctx, bsize, csize);
|
||||
#define PTL_ELAN4_FREE_QBUFF elan4_freecq_space
|
||||
#endif
|
||||
|
||||
enum {
|
||||
/* the first four bits for type */
|
||||
MCA_PTL_ELAN_DESC_NULL = 0x00,
|
||||
MCA_PTL_ELAN_DESC_QDMA = 0x01,
|
||||
MCA_PTL_ELAN_DESC_PUT = 0x02,
|
||||
MCA_PTL_ELAN_DESC_GET = 0x04,
|
||||
MCA_PTL_ELAN_DESC_PUT = 0x02, /* QDMA + PUT */
|
||||
MCA_PTL_ELAN_DESC_GET = 0x04, /* QDMA + GET */
|
||||
/* next first four bits for status */
|
||||
MCA_PTL_ELAN_DESC_LOCAL = 0x10,
|
||||
MCA_PTL_ELAN_DESC_CACHED = 0x20
|
||||
@ -125,6 +161,14 @@ enum {
|
||||
MCA_PTL_HDR_TYPE_STOP = 0xFF /* Only a character */
|
||||
};
|
||||
|
||||
/* To set up a component-wise list of free cmdq space */
|
||||
struct ompi_ptl_elan_cmdq_space_t {
|
||||
int total;
|
||||
int free;
|
||||
E4_Addr space;
|
||||
};
|
||||
typedef struct ompi_ptl_elan_cmdq_space_t ompi_ptl_elan_cmdq_space_t;
|
||||
|
||||
struct ompi_ptl_elan_thread_t
|
||||
{
|
||||
ompi_thread_t thread;
|
||||
@ -266,6 +310,7 @@ struct ompi_ptl_elan_putget_desc_t {
|
||||
E4_Addr src_elan_addr;
|
||||
E4_Addr dst_elan_addr;
|
||||
/* 8 byte aligned */
|
||||
uint8_t buff[sizeof(mca_ptl_base_header_t)];
|
||||
};
|
||||
typedef struct ompi_ptl_elan_putget_desc_t ompi_ptl_elan_putget_desc_t;
|
||||
|
||||
@ -384,6 +429,7 @@ int mca_ptl_elan_wait_queue(mca_ptl_elan_module_t * ptl,
|
||||
/* control, synchronization and state prototypes */
|
||||
int mca_ptl_elan_drain_recv(mca_ptl_elan_module_t * ptl);
|
||||
int mca_ptl_elan_update_desc(mca_ptl_elan_module_t * ptl);
|
||||
int mca_ptl_elan_lookup(mca_ptl_elan_module_t * ptl);
|
||||
|
||||
int
|
||||
mca_ptl_elan_start_get (mca_ptl_elan_send_frag_t * frag,
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user