1
1

-- Commit some code from experimentation

-- Will update the code with the tree soon

BTW, Jeff, it seems to me that adding three NULL entries to an dummy module
does not matter. But let them hold the spots can be a little more
informative
though.

[yuw@p3 src]$ diff ptl_elan.c src/ptl_elan.c
46,49c46
<         mca_ptl_elan_req_fini,
<         NULL,
<         NULL,
<         NULL
---
>         mca_ptl_elan_req_fini

This commit was SVN r3033.
Этот коммит содержится в:
Weikuan Yu 2004-10-11 00:14:39 +00:00
родитель 31f6c00f3f
Коммит 508dbc4453
8 изменённых файлов: 288 добавлений и 59 удалений

Просмотреть файл

@ -262,7 +262,8 @@ mca_ptl_elan_isend (struct mca_ptl_base_module_t *ptl,
}
#if OMPI_PTL_ELAN_ZERO_FFRAG
if (size > (OMPI_PTL_ELAN_MAX_QSIZE - sizeof(mca_ptl_base_header_t)))
if (sendreq->req_bytes_packed >
(OMPI_PTL_ELAN_MAX_QSIZE - sizeof(mca_ptl_base_header_t)))
size = 0;
#endif
@ -437,7 +438,10 @@ mca_ptl_elan_matched (mca_ptl_base_module_t * ptl,
* if the recv descriptor is not posted (for too long) (TODO).
* We then need to copy from unex_buffer to application buffer */
if (header->hdr_frag.hdr_frag_length > 0) {
#if !OMPI_PTL_ELAN_USE_DTP
memcpy(request->req_base.req_addr,
frag->frag_base.frag_addr, frag->frag_base.frag_size);
#else
struct iovec iov;
ompi_proc_t *proc;
@ -458,7 +462,9 @@ mca_ptl_elan_matched (mca_ptl_base_module_t * ptl,
request->req_base.req_addr,
header->hdr_frag.hdr_frag_offset);
ompi_convertor_unpack(&frag->frag_base.frag_convertor, &iov, 1);
#endif
}
/* XXX: progress the request based on the status of this recv frag
* It is possible to employ a scheduling logic here.
* Then Done with this fragment, i.e., data */

Просмотреть файл

@ -21,6 +21,7 @@
struct mca_ptl_elan_state_t;
struct ompi_ptl_elan_queue_ctrl_t;
extern struct mca_ptl_elan_state_t mca_ptl_elan_global_state;
extern struct ompi_ptl_elan_cmdq_space_t ptl_elan_cmdq_space;
/**
* ELAN PTL Interface

Просмотреть файл

@ -42,6 +42,17 @@ ompi_init_elan_queue_events (mca_ptl_elan_module_t * ptl,
rail = (RAIL *) ptl->ptl_elan_rail;
ctx = (ELAN4_CTX *) ptl->ptl_elan_ctx;
#if OMPI_PTL_ELAN_CMQ_REUSE
#define OMPI_PTL_ELAN_CMQ_ENTRIES 1024
{
ptl_elan_cmdq_space.total = OMPI_PTL_ELAN_CMQ_ENTRIES;
ptl_elan_cmdq_space.free = OMPI_PTL_ELAN_CMQ_ENTRIES;
ptl_elan_cmdq_space.space = elan4_alloccq_space(ctx,
8*OMPI_PTL_ELAN_CMQ_ENTRIES, CQ_Size8K);
}
#endif
/* initialize list */
OBJ_CONSTRUCT (&queue->tx_desc_free, ompi_free_list_t);
flist = &queue->tx_desc_free;
@ -99,8 +110,13 @@ ompi_init_elan_queue_events (mca_ptl_elan_module_t * ptl,
/* XXX: If completion is to be detected from the Queue
* there is no need to trigger a local event */
#if OMPI_PTL_ELAN_ONE_QUEUE
desc->comp_dma.dma_dstEvent = elan4_main2elan (ctx,
(void *) ptl->queue->input);
#else
desc->comp_dma.dma_dstEvent = elan4_main2elan (ctx,
(void *) ptl->comp->input);
#endif
desc->comp_dma.dma_srcEvent = 0x0ULL;
desc->comp_dma.dma_typeSize |= RUN_DMA_CMD;
desc->comp_dma.dma_pad = NOP_CMD;
@ -199,8 +215,13 @@ mca_ptl_elan_putget_desc_construct (
/* XXX: If completion is to be detected from the Queue
* there is no need to trigger a local event */
#if OMPI_PTL_ELAN_ONE_QUEUE
desc->comp_dma.dma_dstEvent = elan4_main2elan (ctx,
(void *) ptl->queue->input);
#else
desc->comp_dma.dma_dstEvent = elan4_main2elan (ctx,
(void *) ptl->comp->input);
#endif
desc->comp_dma.dma_srcEvent = 0x0ULL;
desc->comp_dma.dma_typeSize |= RUN_DMA_CMD;
desc->comp_dma.dma_pad = NOP_CMD;

Просмотреть файл

@ -284,8 +284,12 @@ mca_ptl_elan_component_progress (mca_ptl_tstamp_t tstamp)
/* Iterate over all the PTL input Queues */
for (i = 0; i < no_ptls; i++) {
#if OMPI_PTL_ELAN_ONE_QUEUE
mca_ptl_elan_lookup(elan_mp->modules[i]);
#else
mca_ptl_elan_update_desc(elan_mp->modules[i]);
mca_ptl_elan_drain_recv(elan_mp->modules[i]);
#endif
}
return OMPI_SUCCESS;

Просмотреть файл

@ -154,7 +154,9 @@ mca_ptl_elan_send_desc_done (
{
mca_ptl_elan_module_t *ptl;
mca_ptl_base_header_t *header;
int dtype;
dtype = frag->desc->desc_type;
ptl = ((ompi_ptl_elan_qdma_desc_t *)frag->desc)->ptl;
header = &frag->frag_base.frag_header;
@ -166,7 +168,7 @@ mca_ptl_elan_send_desc_done (
frag->frag_base.frag_size,
frag->frag_base.frag_size);
}
elan4_freecq_space (ptl->ptl_elan_ctx,
PTL_ELAN4_FREE_QBUFF (ptl->ptl_elan_ctx,
((ompi_ptl_elan_putget_desc_t *) frag->desc)
->chain_event->ev_Params[1], 8);
OMPI_FREE_LIST_RETURN (&ptl->putget->get_desc_free,
@ -184,9 +186,7 @@ mca_ptl_elan_send_desc_done (
if(NULL == req) { /* An ack descriptor */
OMPI_FREE_LIST_RETURN (&ptl->queue->tx_desc_free,
(ompi_list_item_t *) frag);
}
#if 1
else if (0 == (header->hdr_common.hdr_flags
} else if (0 == (header->hdr_common.hdr_flags
& MCA_PTL_FLAGS_ACK_MATCHED)
|| mca_pml_base_send_request_matched(req)) {
if(ompi_atomic_fetch_and_set_int (&frag->frag_progressed, 1) == 0)
@ -203,7 +203,7 @@ mca_ptl_elan_send_desc_done (
ompi_free_list_t *flist;
if (frag->desc->desc_type == MCA_PTL_ELAN_DESC_PUT) {
flist = &ptl->putget->put_desc_free;
elan4_freecq_space (ptl->ptl_elan_ctx,
PTL_ELAN4_FREE_QBUFF (ptl->ptl_elan_ctx,
((ompi_ptl_elan_putget_desc_t *) frag->desc)
->chain_event->ev_Params[1], 8);
} else {
@ -216,37 +216,7 @@ mca_ptl_elan_send_desc_done (
&ptl->queue->tx_desc_free,
ptl->queue->tx_desc_free.super.ompi_list_length);
}
}
#else
else {
/* XXX:
* Why the release of this send fragment is dependent
* on the receiving of an acknowledgement
* There are two drawbacks,
* a) Send fragment is not immediately returned to the free pool
* b) Some list is needed to hold on this fragment and
* later on find an time slot to process it.
* c) If ever local completion happens later then the receive
* of the acknowledgement. The following will happen
* 1) The receiving of an acknoledgement can not immediatly
* trigger the scheduling the followup fragment since it
* is dependent on the send fragment to complete.
* 2) Later, the local send completeion cannot trigger
* the start of following fragments. As the logic is not there.
*/
if(ompi_atomic_fetch_and_set_int (&frag->frag_progressed, 1) == 0) {
ptl->super.ptl_send_progress(ptl, req,
header->hdr_frag.hdr_frag_length);
}
/* Return a frag or if not cached, or it is a follow up */
if((header->hdr_frag.hdr_frag_offset != 0) || (frag->desc->desc_status
!= MCA_PTL_ELAN_DESC_CACHED))
OMPI_FREE_LIST_RETURN (&queue->tx_desc_free,
(ompi_list_item_t *) frag);
}
#endif
}
}
void

Просмотреть файл

@ -12,6 +12,7 @@
#define _ELAN4
mca_ptl_elan_state_t mca_ptl_elan_global_state;
struct ompi_ptl_elan_cmdq_space_t ptl_elan_cmdq_space;
static int
ompi_mca_ptl_elan_setup (mca_ptl_elan_state_t * ems)
@ -630,6 +631,22 @@ mca_ptl_elan_thread_init (mca_ptl_elan_component_t * emp)
num_rails = emp->num_modules;
#if OMPI_PTL_ELAN_ONE_QUEUE
emp->recv_threads = (struct ompi_ptl_elan_thread_t **)
malloc (num_rails * sizeof(struct ompi_ptl_elan_thread_t*));
for (i = 0; i < num_rails; i ++) {
ompi_ptl_elan_thread_t * t;
t = (struct ompi_ptl_elan_thread_t *)
malloc (sizeof(struct ompi_ptl_elan_thread_t));
OBJ_CONSTRUCT(&t->thread, ompi_thread_t);
t->thread.t_run = (ompi_thread_fn_t) mca_ptl_elan_lookup;
t->ptl = emp->modules[i];
pthread_create(&t->thread.t_handle, NULL,
(void *)t->thread.t_run, (void*)t->ptl);
emp->recv_threads[i] = t;
}
#else
/*struct ompi_ptl_elan_thread_t **threads; */
emp->send_threads = (struct ompi_ptl_elan_thread_t **)
malloc (num_rails * sizeof(struct ompi_ptl_elan_thread_t*));
@ -660,6 +677,7 @@ mca_ptl_elan_thread_init (mca_ptl_elan_component_t * emp)
(void *)t->thread.t_run, (void*)t->ptl);
emp->recv_threads[i] = t;
}
#endif
return (OMPI_SUCCESS);
}
@ -715,12 +733,14 @@ mca_ptl_elan_thread_close (mca_ptl_elan_component_t * emp)
elan4_run_dma_cmd (ptl->queue->tx_cmdq, (DMA *) & desc->main_dma);
elan4_flush_cmdq_reorder (ptl->queue->tx_cmdq);
#if !OMPI_PTL_ELAN_ONE_QUEUE
/* finish the send thread */
desc->main_dma.dma_dstEvent = SDRAM2ELAN (ctx, ptl->comp->input);
MEMBAR_VISIBLE ();
elan4_run_dma_cmd (ptl->queue->tx_cmdq, (DMA *) & desc->main_dma);
elan4_flush_cmdq_reorder (ptl->queue->tx_cmdq);
MEMBAR_VISIBLE ();
#endif
}
/* Join all threads */
@ -728,10 +748,11 @@ mca_ptl_elan_thread_close (mca_ptl_elan_component_t * emp)
ompi_ptl_elan_thread_t * tsend, *trecv;
int *ptr = (int *)malloc(sizeof(int));
tsend = emp->send_threads[i];
trecv = emp->recv_threads[i];
#if !OMPI_PTL_ELAN_ONE_QUEUE
tsend = emp->send_threads[i];
ompi_thread_join(&tsend->thread, &ptr);
#endif
ompi_thread_join(&trecv->thread, &ptr);
}

Просмотреть файл

@ -267,6 +267,11 @@ mca_ptl_elan_init_qdma_desc (struct mca_ptl_elan_send_frag_t *frag,
/* initialize convertor */
if(size_in > 0) {
#if !OMPI_PTL_ELAN_USE_DTP
memcpy(&desc->buff[header_length],
pml_req->req_base.req_addr, size_in);
size_out = size_in;
#else
struct iovec iov;
ompi_convertor_t *convertor;
@ -296,6 +301,7 @@ mca_ptl_elan_init_qdma_desc (struct mca_ptl_elan_send_frag_t *frag,
return;
}
size_out = iov.iov_len;
#endif
} else {
size_out = size_in;
}
@ -317,11 +323,16 @@ mca_ptl_elan_init_qdma_desc (struct mca_ptl_elan_send_frag_t *frag,
* so we need addend the command queue control bits.
* Allocate space from command queues hanged off the CTX.
*/
desc->comp_event->ev_Params[1] = elan4_alloccq_space (ctx, 8, CQ_Size8K);
PTL_ELAN4_GET_QBUFF (desc->comp_event->ev_Params[1], ctx, 8, CQ_Size8K);
desc->comp_event->ev_CountAndType = E4_EVENT_INIT_VALUE(-32,
E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, 8);
desc->comp_dma.dma_cookie = elan4_local_cookie(ptl->queue->tx_cpool,
E4_COOKIE_TYPE_LOCAL_DMA, ptl->elan_vp);
#if OMPI_PTL_ELAN_ONE_QUEUE
frag->frag_base.frag_header.hdr_common.hdr_type += 8;
#endif
desc->comp_dma.dma_srcAddr = elan4_main2elan (ctx,
(void *) &frag->frag_base.frag_header);
memcpy ((void *)desc->comp_buff, (void *)&desc->comp_dma,
@ -484,14 +495,21 @@ mca_ptl_elan_init_put_desc (struct mca_ptl_elan_send_frag_t *frag,
*/
desc->comp_event->ev_Params[0] = elan4_main2elan (ctx,
(void *)desc->comp_buff);
desc->comp_event->ev_Params[1] = elan4_alloccq_space (ctx, 8, CQ_Size8K);
PTL_ELAN4_GET_QBUFF (desc->comp_event->ev_Params[1], ctx, 8, CQ_Size8K);
desc->comp_event->ev_CountAndType = E4_EVENT_INIT_VALUE(-32,
E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, 8);
desc->comp_dma.dma_cookie = elan4_local_cookie(ptl->queue->tx_cpool,
E4_COOKIE_TYPE_LOCAL_DMA, ptl->elan_vp);
#if OMPI_PTL_ELAN_ONE_QUEUE
*((mca_ptl_base_header_t *) desc->buff) = *hdr;
((mca_ptl_base_header_t *) desc->buff)->hdr_common.hdr_type += 8;
desc->comp_dma.dma_srcAddr = elan4_main2elan (ctx, (void *)desc->buff);
#else
desc->comp_dma.dma_srcAddr = elan4_main2elan (ctx,
(void *) &frag->frag_base.frag_header);
#endif
memcpy ((void *)desc->comp_buff, (void *)&desc->comp_dma,
sizeof (E4_DMA64));
@ -519,14 +537,15 @@ mca_ptl_elan_init_put_desc (struct mca_ptl_elan_send_frag_t *frag,
/* XXX: The chain dma will go directly into a command stream
* so we need addend the command queue control bits.
* Allocate space from command queues hanged off the CTX. */
desc->chain_event->ev_Params[1] = elan4_alloccq_space (ctx, 8, CQ_Size8K);
desc->main_dma.dma_srcAddr = desc->src_elan_addr;
desc->main_dma.dma_dstAddr = desc->dst_elan_addr;
desc->main_dma.dma_dstEvent= 0x0ULL; /*disable remote event */
PTL_ELAN4_GET_QBUFF (desc->chain_event->ev_Params[1], ctx, 8, CQ_Size8K);
/* Chain an event */
desc->main_dma.dma_srcEvent= elan4_main2elan(ctx, desc->chain_event);
desc->main_dma.dma_srcAddr = desc->src_elan_addr;
desc->main_dma.dma_dstAddr = desc->dst_elan_addr;
desc->main_dma.dma_dstEvent= 0x0ULL; /*disable remote event */
flags = 0;
desc->main_dma.dma_typeSize = E4_DMA_TYPE_SIZE (size_out,
DMA_DataTypeByte, flags, ptl->putget->pg_retryCount);
@ -613,13 +632,20 @@ mca_ptl_elan_init_get_desc (mca_ptl_elan_module_t *ptl,
*/
frag->frag_base.frag_header = *hdr;
((mca_ptl_elan_ack_header_t *) &frag->frag_base.frag_header)->frag = frag;
desc->comp_event->ev_Params[1] = elan4_alloccq_space (ctx, 8, CQ_Size8K);
PTL_ELAN4_GET_QBUFF (desc->comp_event->ev_Params[1], ctx, 8, CQ_Size8K);
desc->comp_event->ev_CountAndType = E4_EVENT_INIT_VALUE(-32,
E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, 8);
desc->comp_dma.dma_cookie = elan4_local_cookie(ptl->queue->tx_cpool,
E4_COOKIE_TYPE_LOCAL_DMA, ptl->elan_vp);
#if OMPI_PTL_ELAN_ONE_QUEUE
*((mca_ptl_base_header_t *) desc->buff) = *hdr;
((mca_ptl_base_header_t *) desc->buff)->hdr_common.hdr_type += 8;
desc->comp_dma.dma_srcAddr = elan4_main2elan (ctx, (void *)desc->buff);
#else
desc->comp_dma.dma_srcAddr = elan4_main2elan (ctx,
(void *) &frag->frag_base.frag_header);
#endif
memcpy ((void *)desc->comp_buff, (void *)&desc->comp_dma,
sizeof (E4_DMA64));
@ -653,14 +679,15 @@ mca_ptl_elan_init_get_desc (mca_ptl_elan_module_t *ptl,
/* XXX: The chain dma will go directly into a command stream
* so we need addend the command queue control bits.
* Allocate space from command queues hanged off the CTX. */
desc->chain_event->ev_Params[1] = elan4_alloccq_space (ctx, 8, CQ_Size8K);
PTL_ELAN4_GET_QBUFF (desc->chain_event->ev_Params[1], ctx, 8, CQ_Size8K);
/* Chain an event */
desc->main_dma.dma_dstEvent= elan4_main2elan(ctx,
(E4_Event *)desc->chain_event);
desc->main_dma.dma_srcAddr = desc->src_elan_addr;
desc->main_dma.dma_dstAddr = desc->dst_elan_addr;
desc->main_dma.dma_srcEvent= 0x0ULL; /*disable remote event */
/* Chain an event */
desc->main_dma.dma_dstEvent= elan4_main2elan(ctx,
(E4_Event *)desc->chain_event);
flags = 0;
desc->main_dma.dma_typeSize = E4_DMA_TYPE_SIZE (size_out,
@ -956,14 +983,21 @@ mca_ptl_elan_start_ack ( mca_ptl_base_module_t * ptl,
/* XXX: Need to have a way to differentiate different frag */
((mca_ptl_elan_ack_header_t *) hdr)->frag = desc;
qdma->comp_event->ev_Params[1] = elan4_alloccq_space (ctx, 8, CQ_Size8K);
PTL_ELAN4_GET_QBUFF (qdma->comp_event->ev_Params[1], ctx, 8, CQ_Size8K);
qdma->comp_event->ev_CountAndType = E4_EVENT_INIT_VALUE(-32,
E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, 8);
qdma->comp_dma.dma_cookie = elan4_local_cookie(
elan_ptl->queue->tx_cpool,
E4_COOKIE_TYPE_LOCAL_DMA,
elan_ptl->elan_vp);
#if OMPI_PTL_ELAN_ONE_QUEUE
*((mca_ptl_base_header_t *) qdma->buff) = *hdr;
((mca_ptl_base_header_t *) qdma->buff)->hdr_common.hdr_type += 8;
qdma->comp_dma.dma_srcAddr = elan4_main2elan (ctx, (void *)qdma->buff);
#else
qdma->comp_dma.dma_srcAddr = elan4_main2elan (ctx, (void *) hdr);
#endif
memcpy ((void *)qdma->comp_buff, (void *)&qdma->comp_dma,
sizeof (E4_DMA64));
@ -1154,7 +1188,7 @@ ptl_elan_send_comp:
(mca_pml_base_send_request_t *) basic->req);
#if OMPI_PTL_ELAN_COMP_QUEUE
elan4_freecq_space (ctx, frag->desc->comp_event->ev_Params[1], 8);
PTL_ELAN4_FREE_QBUFF (ctx, frag->desc->comp_event->ev_Params[1], 8);
#endif
/* Work out the new front pointer */
@ -1222,3 +1256,129 @@ ptl_elan_send_comp:
return OMPI_SUCCESS;
}
int
mca_ptl_elan_lookup(struct mca_ptl_elan_module_t *ptl)
{
struct ompi_ptl_elan_queue_ctrl_t *queue;
ompi_ptl_elan_recv_queue_t *rxq;
ELAN_CTX *ctx;
int rc;
queue = ptl->queue;
rxq = queue->rxq;
ctx = ptl->ptl_elan_ctx;
ptl_elan_recv_comp:
OMPI_LOCK (&queue->rx_lock);
#if OMPI_PTL_ELAN_THREADING
rc = mca_ptl_elan_wait_queue(ptl, rxq, 1);
#else
rc = (*(int *) (&rxq->qr_doneWord));
#endif
if (rc) {
mca_ptl_base_header_t *header;
header = (mca_ptl_base_header_t *) rxq->qr_fptr;
if (header->hdr_common.hdr_type >= 8) {
mca_ptl_elan_send_frag_t *frag;
ompi_ptl_elan_base_desc_t *basic;
header->hdr_common.hdr_type = header->hdr_common.hdr_type - 8;
#if OMPI_PTL_ELAN_THREADING
if (header->hdr_common.hdr_type == MCA_PTL_HDR_TYPE_STOP) {
/* XXX: release the lock and quit the thread */
OMPI_UNLOCK (&queue->rx_lock);
return OMPI_SUCCESS;
}
#endif
if (header->hdr_common.hdr_type == MCA_PTL_HDR_TYPE_ACK
|| header->hdr_common.hdr_type == MCA_PTL_HDR_TYPE_FIN_ACK) {
frag = ((mca_ptl_elan_ack_header_t*)header)->frag;
} else {
frag = (mca_ptl_elan_send_frag_t *)
header->hdr_frag.hdr_src_ptr.pval;
}
basic = (ompi_ptl_elan_base_desc_t*)frag->desc;
LOG_PRINT(PTL_ELAN_DEBUG_SEND, "frag %p desc %p \n", frag, basic);
/* XXX: please reset additional chained event for put/get desc */
mca_ptl_elan_send_desc_done (frag,
(mca_pml_base_send_request_t *) basic->req);
#if OMPI_PTL_ELAN_COMP_QUEUE
PTL_ELAN4_FREE_QBUFF (ctx, frag->desc->comp_event->ev_Params[1], 8);
#endif
} else {
#if OMPI_PTL_ELAN_THREADING
if (header->hdr_common.hdr_type == MCA_PTL_HDR_TYPE_STOP) {
/* XXX: release the lock and quit the thread */
OMPI_UNLOCK (&queue->rx_lock);
return OMPI_SUCCESS;
}
#endif
switch (header->hdr_common.hdr_type) {
case MCA_PTL_HDR_TYPE_MATCH:
case MCA_PTL_HDR_TYPE_FRAG:
/* a data fragment */
mca_ptl_elan_data_frag (ptl, header);
break;
case MCA_PTL_HDR_TYPE_ACK:
case MCA_PTL_HDR_TYPE_NACK:
mca_ptl_elan_ctrl_frag (ptl, header);
break;
case MCA_PTL_HDR_TYPE_FIN:
mca_ptl_elan_last_frag (ptl, header);
break;
case MCA_PTL_HDR_TYPE_FIN_ACK:
mca_ptl_elan_last_frag_ack (ptl, header);
break;
default:
fprintf(stderr, "[%s:%d] unknow fragment type %d\n",
__FILE__, __LINE__,
header->hdr_common.hdr_type);
fflush(stderr);
break;
}
}
/* Work out the new front pointer */
if (rxq->qr_fptr == rxq->qr_top) {
rxq->qr_fptr = rxq->qr_base;
rxq->qr_efptr = rxq->qr_efitem;
} else {
rxq->qr_fptr = (void *) ((uintptr_t) rxq->qr_fptr
+ queue->rx_slotsize);
rxq->qr_efptr += queue->rx_slotsize;
}
/* PCI Write, Reset the event
* Order RESETEVENT wrt to wait_event_cmd */
queue->input->q_fptr = rxq->qr_efptr;
RESETEVENT_WORD (&rxq->qr_doneWord);
MEMBAR_STORESTORE ();
/* Re-prime queue event by issuing a waitevent(1) on it */
elan4_wait_event_cmd (rxq->qr_cmdq,
/* Is qr_elanDone really a main memory address? */
MAIN2ELAN (ctx, rxq->qr_elanDone),
E4_EVENT_INIT_VALUE (-32, E4_EVENT_WRITE,
E4_EVENT_DTYPE_LONG, 0),
MAIN2ELAN (ctx, (void *) &rxq->qr_doneWord),
0xfeedfacedeadbeef);
elan4_flush_cmdq_reorder (rxq->qr_cmdq);
}
OMPI_UNLOCK (&queue->rx_lock);
#if OMPI_PTL_ELAN_THREADING
goto ptl_elan_recv_comp;
#endif
return OMPI_SUCCESS;
}

Просмотреть файл

@ -52,7 +52,7 @@
#define PTL_ELAN_DEBUG_GET (0x400)
#define PTL_ELAN_DEBUG_CHAIN (0x800)
#define PTL_ELAN_DEBUG_FLAG (PTL_ELAN_DEBUG_NONE)
#define PTL_ELAN_DEBUG_FLAG (PTL_ELAN_DEBUG_NONE)
#define LOG_PRINT(flag, args...) \
do { \
@ -64,6 +64,8 @@ do { \
} \
} while (0)
#define OMPI_PTL_ELAN_CMQ_REUSE (1)
#define OMPI_PTL_ELAN_MAX_QSIZE (2048)
#define OMPI_PTL_ELAN_MAX_QSLOTS (128)
#define OMPI_PTL_ELAN_LOST_QSLOTS (1)
@ -81,11 +83,14 @@ do { \
/* XXX: Potentially configurable parameters */
#define OMPI_PTL_ELAN_NUM_QDESCS (16)
#define OMPI_PTL_ELAN_NUM_PUTGET (8)
#define OMPI_PTL_ELAN_ZERO_FFRAG (0)
#define OMPI_PTL_ELAN_NUM_PUTGET (16)
#define OMPI_PTL_ELAN_ZERO_FFRAG (1)
#define OMPI_PTL_ELAN_USE_DTP (0)
#define OMPI_PTL_ELAN_ENABLE_GET (0)
#define OMPI_PTL_ELAN_COMP_QUEUE (0)
#define OMPI_PTL_ELAN_COMP_QUEUE (1)
#define OMPI_PTL_ELAN_ONE_QUEUE (OMPI_PTL_ELAN_COMP_QUEUE && 1)
#define OMPI_PTL_ELAN_THREADING \
(OMPI_PTL_ELAN_COMP_QUEUE && OMPI_HAVE_POSIX_THREADS)
@ -109,12 +114,43 @@ do { \
} \
} while (0)
#if OMPI_PTL_ELAN_CMQ_REUSE
#define PTL_ELAN4_GET_QBUFF(dspace, ctx, bsize, csize) \
do { \
if (ptl_elan_cmdq_space.free == 0) { \
ompi_output(0, \
"[%s:%d] error acquiring cmdq space \n", \
__FILE__, __LINE__); \
} else { \
ptl_elan_cmdq_space.free --; \
dspace = ptl_elan_cmdq_space.space; \
} \
} while (0)
#define PTL_ELAN4_FREE_QBUFF(ctx, buff, bsize) \
do { \
if (ptl_elan_cmdq_space.free >= ptl_elan_cmdq_space.total || \
ptl_elan_cmdq_space.space != buff ) { \
ompi_output(0, \
"[%s:%d] error releasing cmdq space \n", \
__FILE__, __LINE__); \
} else { \
ptl_elan_cmdq_space.free ++; \
} \
} while (0)
#else
#define PTL_ELAN4_GET_QBUFF(dspace, ctx, bsize, csize) \
dspace = elan4_alloccq_space(ctx, bsize, csize);
#define PTL_ELAN4_FREE_QBUFF elan4_freecq_space
#endif
enum {
/* the first four bits for type */
MCA_PTL_ELAN_DESC_NULL = 0x00,
MCA_PTL_ELAN_DESC_QDMA = 0x01,
MCA_PTL_ELAN_DESC_PUT = 0x02,
MCA_PTL_ELAN_DESC_GET = 0x04,
MCA_PTL_ELAN_DESC_PUT = 0x02, /* QDMA + PUT */
MCA_PTL_ELAN_DESC_GET = 0x04, /* QDMA + GET */
/* next first four bits for status */
MCA_PTL_ELAN_DESC_LOCAL = 0x10,
MCA_PTL_ELAN_DESC_CACHED = 0x20
@ -125,6 +161,14 @@ enum {
MCA_PTL_HDR_TYPE_STOP = 0xFF /* Only a character */
};
/* To set up a component-wise list of free cmdq space */
struct ompi_ptl_elan_cmdq_space_t {
int total;
int free;
E4_Addr space;
};
typedef struct ompi_ptl_elan_cmdq_space_t ompi_ptl_elan_cmdq_space_t;
struct ompi_ptl_elan_thread_t
{
ompi_thread_t thread;
@ -266,6 +310,7 @@ struct ompi_ptl_elan_putget_desc_t {
E4_Addr src_elan_addr;
E4_Addr dst_elan_addr;
/* 8 byte aligned */
uint8_t buff[sizeof(mca_ptl_base_header_t)];
};
typedef struct ompi_ptl_elan_putget_desc_t ompi_ptl_elan_putget_desc_t;
@ -384,6 +429,7 @@ int mca_ptl_elan_wait_queue(mca_ptl_elan_module_t * ptl,
/* control, synchronization and state prototypes */
int mca_ptl_elan_drain_recv(mca_ptl_elan_module_t * ptl);
int mca_ptl_elan_update_desc(mca_ptl_elan_module_t * ptl);
int mca_ptl_elan_lookup(mca_ptl_elan_module_t * ptl);
int
mca_ptl_elan_start_get (mca_ptl_elan_send_frag_t * frag,