-- Got shared completion queue working for QDMA, will have it work for
put/get -- last step: thread to check completion. Sounds straightforward? :-( This commit was SVN r2367.
Этот коммит содержится в:
родитель
64418c2a7e
Коммит
1eda2aa2c6
@ -37,7 +37,7 @@ ompi_init_elan_queue_events (mca_ptl_elan_module_t * ptl,
|
|||||||
|
|
||||||
ompi_free_list_t *flist;
|
ompi_free_list_t *flist;
|
||||||
ompi_ptl_elan_qdma_desc_t *desc;
|
ompi_ptl_elan_qdma_desc_t *desc;
|
||||||
E4_Event *elan_ptr;
|
E4_Event32 *elan_ptr;
|
||||||
|
|
||||||
START_FUNC(PTL_ELAN_DEBUG_INIT);
|
START_FUNC(PTL_ELAN_DEBUG_INIT);
|
||||||
|
|
||||||
@ -52,7 +52,13 @@ ompi_init_elan_queue_events (mca_ptl_elan_module_t * ptl,
|
|||||||
elan_align = OMPI_PTL_ELAN_GET_MAX (sizeof (int *), ELAN_BLOCK_ALIGN);
|
elan_align = OMPI_PTL_ELAN_GET_MAX (sizeof (int *), ELAN_BLOCK_ALIGN);
|
||||||
main_size = OMPI_PTL_ELAN_ALIGNUP (sizeof (ompi_ptl_elan_qdma_desc_t),
|
main_size = OMPI_PTL_ELAN_ALIGNUP (sizeof (ompi_ptl_elan_qdma_desc_t),
|
||||||
main_align);
|
main_align);
|
||||||
elan_size = OMPI_PTL_ELAN_ALIGNUP (sizeof (E4_Event), elan_align);
|
#if OMPI_PTL_ELAN_COMP_QUEUE
|
||||||
|
elan_size = OMPI_PTL_ELAN_ALIGNUP (
|
||||||
|
(2*sizeof (E4_Event32) + ELAN_BLOCK_SIZE), elan_align);
|
||||||
|
#else
|
||||||
|
elan_size = OMPI_PTL_ELAN_ALIGNUP (sizeof (E4_Event32), elan_align);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
OBJ_CONSTRUCT(&flist->fl_lock, ompi_mutex_t);
|
OBJ_CONSTRUCT(&flist->fl_lock, ompi_mutex_t);
|
||||||
flist->fl_elem_size = flist->fl_max_to_alloc = OMPI_PTL_ELAN_MAX_QDESCS;
|
flist->fl_elem_size = flist->fl_max_to_alloc = OMPI_PTL_ELAN_MAX_QDESCS;
|
||||||
@ -72,7 +78,7 @@ ompi_init_elan_queue_events (mca_ptl_elan_module_t * ptl,
|
|||||||
OMPI_PTL_ELAN_CHECK_UNEX (desc, NULL, OMPI_ERROR, 0);
|
OMPI_PTL_ELAN_CHECK_UNEX (desc, NULL, OMPI_ERROR, 0);
|
||||||
|
|
||||||
/* Allocating elan related structures */
|
/* Allocating elan related structures */
|
||||||
elan_ptr = (E4_Event *) elan4_allocElan (rail->r_alloc,
|
elan_ptr = (E4_Event32 *) elan4_allocElan (rail->r_alloc,
|
||||||
elan_align, elan_size * count);
|
elan_align, elan_size * count);
|
||||||
OMPI_PTL_ELAN_CHECK_UNEX (elan_ptr, NULL, OMPI_ERROR, 0);
|
OMPI_PTL_ELAN_CHECK_UNEX (elan_ptr, NULL, OMPI_ERROR, 0);
|
||||||
|
|
||||||
@ -80,25 +86,19 @@ ompi_init_elan_queue_events (mca_ptl_elan_module_t * ptl,
|
|||||||
ompi_list_item_t *item;
|
ompi_list_item_t *item;
|
||||||
|
|
||||||
desc->ptl = ptl;
|
desc->ptl = ptl;
|
||||||
desc->elan_event = elan_ptr;
|
desc->elan_event = (E4_Event *) elan_ptr;
|
||||||
frag->desc = (ompi_ptl_elan_base_desc_t *)desc;
|
frag->desc = (ompi_ptl_elan_base_desc_t *)desc;
|
||||||
|
|
||||||
/* Initialize some of the dma structures */
|
|
||||||
desc->main_dma.dma_dstAddr = 0;
|
|
||||||
|
|
||||||
#if OMPI_PTL_ELAN_COMP_QUEUE
|
#if OMPI_PTL_ELAN_COMP_QUEUE
|
||||||
/* XXX: Chain a QDMA to each queue and
|
/* XXX: provide a DMA structure for each chained event */
|
||||||
* Have all the srcEvent fired to the Queue */
|
|
||||||
|
|
||||||
/* Setup the chain dma */
|
|
||||||
desc->comp_dma.dma_typeSize = E4_DMA_TYPE_SIZE (
|
desc->comp_dma.dma_typeSize = E4_DMA_TYPE_SIZE (
|
||||||
sizeof(mca_ptl_base_frag_header_t),
|
sizeof(mca_ptl_base_frag_header_t),
|
||||||
DMA_DataTypeByte, DMA_QueueWrite, 8);
|
DMA_DataTypeByte, DMA_QueueWrite, 8);
|
||||||
desc->comp_dma.dma_cookie = elan4_local_cookie(ptl->queue->tx_cpoll,
|
desc->comp_dma.dma_cookie = elan4_local_cookie(ptl->queue->tx_cpool,
|
||||||
E4_COOKIE_TYPE_LOCAL_DMA, ptl->elan_vp);
|
E4_COOKIE_TYPE_LOCAL_DMA, ptl->elan_vp);
|
||||||
desc->comp_dma.dma_vproc = ptl->elan_vp;
|
desc->comp_dma.dma_vproc = ptl->elan_vp;
|
||||||
desc->comp_dma.dma_srcAddr = elan4_main2elan (ctx, (void *) hdr);
|
desc->comp_dma.dma_srcAddr = 0x0ULL; /* To be filled in */
|
||||||
desc->comp_dma.dma_dstAddr = 0x0ULL;
|
desc->comp_dma.dma_dstAddr = 0x0ULL; /* To be filled in */
|
||||||
|
|
||||||
/* XXX: If completion is to be detected from the Queue
|
/* XXX: If completion is to be detected from the Queue
|
||||||
* there is no need to trigger a local event */
|
* there is no need to trigger a local event */
|
||||||
@ -106,26 +106,24 @@ ompi_init_elan_queue_events (mca_ptl_elan_module_t * ptl,
|
|||||||
(void *) ptl->comp->input);
|
(void *) ptl->comp->input);
|
||||||
/*desc->main_dma.dma_srcEvent = SDRAM2ELAN (ctx, desc->elan_event);*/
|
/*desc->main_dma.dma_srcEvent = SDRAM2ELAN (ctx, desc->elan_event);*/
|
||||||
desc->comp_dma.dma_srcEvent = 0x0ULL;
|
desc->comp_dma.dma_srcEvent = 0x0ULL;
|
||||||
|
|
||||||
desc->comp_dma.dma_typeSize |= RUN_DMA_CMD;
|
desc->comp_dma.dma_typeSize |= RUN_DMA_CMD;
|
||||||
desc->comp_dma.dma_pad = NOP_CMD;
|
desc->comp_dma.dma_pad = NOP_CMD;
|
||||||
memcpy ((void *)desc->comp_buff, (void *)&desc->comp_dma,
|
desc->comp_event = (E4_Event *) (elan_ptr + 1);
|
||||||
sizeof (E4_DMA64));
|
desc->comp_buff = (E4_Addr *) (elan_ptr + 2);
|
||||||
|
|
||||||
desc->comp_event->ev_CountAndType = E4_EVENT_INIT_VALUE(-32,
|
desc->comp_event->ev_CountAndType = E4_EVENT_INIT_VALUE(-32,
|
||||||
E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, 8);
|
E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, 8);
|
||||||
desc->comp_event->ev_Params[0] = elan4_main2elan (ctx,
|
desc->comp_event->ev_Params[0] = elan4_main2elan (ctx,
|
||||||
(void *)desc->comp_buff);
|
(void *)desc->comp_buff);
|
||||||
|
desc->comp_event->ev_Params[1] = 0x0ULL;
|
||||||
|
|
||||||
/* XXX: The chain dma will go directly into a command stream
|
/* Initialize some of the dma structures */
|
||||||
* so we need addend the command queue control bits.
|
desc->main_dma.dma_dstAddr = 0;
|
||||||
* Allocate space from command queues hanged off the CTX.
|
|
||||||
*/
|
|
||||||
desc->comp_event->ev_Params[1] = elan4_alloccq_space (ctx, 8, CQ_Size8K);
|
|
||||||
desc->main_dma.dma_srcEvent= elan4_main2elan(ctx,
|
desc->main_dma.dma_srcEvent= elan4_main2elan(ctx,
|
||||||
(E4_Event *)desc->comp_event);
|
(E4_Event *)desc->comp_event);
|
||||||
desc->main_dma.dma_dstEvent= SDRAM2ELAN (ctx, queue->input);
|
desc->main_dma.dma_dstEvent= SDRAM2ELAN (ctx, queue->input);
|
||||||
#else
|
#else
|
||||||
|
/* Initialize some of the dma structures */
|
||||||
|
desc->main_dma.dma_dstAddr = 0;
|
||||||
desc->main_dma.dma_srcEvent = SDRAM2ELAN (ctx, desc->elan_event);
|
desc->main_dma.dma_srcEvent = SDRAM2ELAN (ctx, desc->elan_event);
|
||||||
desc->main_dma.dma_dstEvent = SDRAM2ELAN (ctx, queue->input);
|
desc->main_dma.dma_dstEvent = SDRAM2ELAN (ctx, queue->input);
|
||||||
INITEVENT_WORD (ctx, desc->elan_event, &desc->main_doneWord);
|
INITEVENT_WORD (ctx, desc->elan_event, &desc->main_doneWord);
|
||||||
@ -138,7 +136,7 @@ ompi_init_elan_queue_events (mca_ptl_elan_module_t * ptl,
|
|||||||
|
|
||||||
/* Progress to the next element */
|
/* Progress to the next element */
|
||||||
desc = (ompi_ptl_elan_qdma_desc_t *) ((char *) desc + main_size);
|
desc = (ompi_ptl_elan_qdma_desc_t *) ((char *) desc + main_size);
|
||||||
elan_ptr = (E4_Event *) ((char *) elan_ptr + elan_size);
|
elan_ptr = (E4_Event32 *) ((char *) elan_ptr + elan_size);
|
||||||
frag ++;
|
frag ++;
|
||||||
}
|
}
|
||||||
flist->fl_num_allocated += flist->fl_num_per_alloc;
|
flist->fl_num_allocated += flist->fl_num_per_alloc;
|
||||||
@ -162,23 +160,35 @@ mca_ptl_elan_putget_desc_contruct (
|
|||||||
memset(desc, 0, sizeof(desc));
|
memset(desc, 0, sizeof(desc));
|
||||||
desc->ptl = ptl;
|
desc->ptl = ptl;
|
||||||
desc->req = NULL;
|
desc->req = NULL;
|
||||||
desc->main_dma.dma_typeSize = 0;
|
|
||||||
desc->main_dma.dma_cookie = 0;
|
#if OMPI_PTL_ELAN_COMP_QUEUE
|
||||||
desc->main_dma.dma_vproc = 0;
|
|
||||||
|
/* Allocate elan memory for chained event and buff */
|
||||||
|
desc->chain_buff = (E4_Addr *) ((char *)elan_event);
|
||||||
|
desc->comp_buff = (E4_Addr *) ((char *)elan_event + ELAN_BLOCK_SIZE );
|
||||||
|
desc->elan_event = (E4_Event *) ((char *)elan_event + 2*ELAN_BLOCK_SIZE );
|
||||||
|
desc->chain_event= (E4_Event *) ((char *)elan_event
|
||||||
|
+ 2 * ELAN_BLOCK_SIZE + sizeof (E4_Event32));
|
||||||
|
desc->comp_event= (E4_Event *) ((char *)elan_event
|
||||||
|
+ 2 * ELAN_BLOCK_SIZE + 2 * sizeof (E4_Event32));
|
||||||
|
#else
|
||||||
desc->elan_event = elan_event;
|
desc->elan_event = elan_event;
|
||||||
desc->chain_event= (E4_Event32 *)
|
desc->chain_event= (E4_Event32 *)
|
||||||
((char *)elan_event + sizeof (E4_Event32));
|
((char *)elan_event + sizeof (E4_Event32));
|
||||||
desc->chain_buff = (E4_Addr *)
|
desc->chain_buff = (E4_Addr *)
|
||||||
((char *)elan_event + 2*sizeof (E4_Event32));
|
((char *)elan_event + 2*sizeof (E4_Event32));
|
||||||
|
|
||||||
/* XXX: Remember all the address needs to be converted
|
#if 0
|
||||||
|
desc->main_dma.dma_typeSize = 0;
|
||||||
|
desc->main_dma.dma_cookie = 0;
|
||||||
|
desc->main_dma.dma_vproc = 0;
|
||||||
|
/* XXX + TODO: To remove this block after verification
|
||||||
|
* Remember all the address needs to be converted
|
||||||
* before assigning to DMA descritpor */
|
* before assigning to DMA descritpor */
|
||||||
desc->main_dma.dma_srcAddr = src_elan4_addr;
|
desc->main_dma.dma_srcAddr = src_elan4_addr;
|
||||||
desc->main_dma.dma_dstAddr = dst_elan4_addr;
|
desc->main_dma.dma_dstAddr = dst_elan4_addr;
|
||||||
|
#endif
|
||||||
|
|
||||||
#if OMPI_PTL_ELAN_COMP_QUEUE
|
|
||||||
/* Have all the source event fired to the Queue */
|
|
||||||
#else
|
|
||||||
if (local) {
|
if (local) {
|
||||||
desc->main_dma.dma_srcEvent = elan4_main2elan(ctx, elan_event);
|
desc->main_dma.dma_srcEvent = elan4_main2elan(ctx, elan_event);
|
||||||
} else {
|
} else {
|
||||||
@ -243,10 +253,13 @@ ompi_ptl_elan_init_putget_ctrl (mca_ptl_elan_module_t * ptl,
|
|||||||
main_size = OMPI_PTL_ELAN_ALIGNUP(sizeof(ompi_ptl_elan_putget_desc_t),
|
main_size = OMPI_PTL_ELAN_ALIGNUP(sizeof(ompi_ptl_elan_putget_desc_t),
|
||||||
main_align);
|
main_align);
|
||||||
|
|
||||||
/* FIXME: Get the correct number of elan_size,
|
#if OMPI_PTL_ELAN_COMP_QUEUE
|
||||||
* Contain elan_event, chain_event and a chain_buff */
|
elan_size = OMPI_PTL_ELAN_ALIGNUP(
|
||||||
|
(ELAN_BLOCK_SIZE + sizeof(E4_Event32)*2 ), elan_align);
|
||||||
|
#else
|
||||||
elan_size = OMPI_PTL_ELAN_ALIGNUP(
|
elan_size = OMPI_PTL_ELAN_ALIGNUP(
|
||||||
(ELAN_BLOCK_SIZE * 2 + sizeof(E4_Event32)*3 ), elan_align);
|
(ELAN_BLOCK_SIZE * 2 + sizeof(E4_Event32)*3 ), elan_align);
|
||||||
|
#endif
|
||||||
|
|
||||||
rail = (RAIL *) ptl->ptl_elan_rail;
|
rail = (RAIL *) ptl->ptl_elan_rail;
|
||||||
ctx = (ELAN4_CTX *) ptl->ptl_elan_ctx;
|
ctx = (ELAN4_CTX *) ptl->ptl_elan_ctx;
|
||||||
@ -309,13 +322,14 @@ ompi_init_elan_qdma (mca_ptl_elan_component_t * emp,
|
|||||||
int num_rails)
|
int num_rails)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
int nslots = OMPI_PTL_ELAN_MAX_QSLOTS;
|
int nslots;
|
||||||
int slotsize = OMPI_PTL_ELAN_MAX_QSIZE;
|
int slotsize;
|
||||||
RAIL *rail;
|
RAIL *rail;
|
||||||
ELAN4_CTX *ctx;
|
ELAN4_CTX *ctx;
|
||||||
struct mca_ptl_elan_module_t *ptl;
|
struct mca_ptl_elan_module_t *ptl;
|
||||||
|
|
||||||
START_FUNC(PTL_ELAN_DEBUG_INIT);
|
START_FUNC(PTL_ELAN_DEBUG_INIT);
|
||||||
|
nslots = OMPI_PTL_ELAN_MAX_QSLOTS;
|
||||||
|
|
||||||
#if OMPI_PTL_ELAN_COMP_QUEUE
|
#if OMPI_PTL_ELAN_COMP_QUEUE
|
||||||
/* Create a complete queue here, later use the queue above directly */
|
/* Create a complete queue here, later use the queue above directly */
|
||||||
@ -343,9 +357,10 @@ ompi_init_elan_qdma (mca_ptl_elan_component_t * emp,
|
|||||||
/* Init the Receive Queue structure */
|
/* Init the Receive Queue structure */
|
||||||
comp->rx_nslots = nslots;
|
comp->rx_nslots = nslots;
|
||||||
nslots += OMPI_PTL_ELAN_LOST_QSLOTS;
|
nslots += OMPI_PTL_ELAN_LOST_QSLOTS;
|
||||||
|
slotsize = sizeof(mca_ptl_base_header_t);
|
||||||
|
comp->rx_slotsize = ELAN_ALIGNUP (slotsize, OMPI_PTL_ELAN_SLOT_ALIGN);
|
||||||
comp->rx_buffsize = (slotsize > INPUT_QUEUE_MAX) ?
|
comp->rx_buffsize = (slotsize > INPUT_QUEUE_MAX) ?
|
||||||
INPUT_QUEUE_MAX : slotsize;
|
INPUT_QUEUE_MAX : slotsize;
|
||||||
comp->rx_slotsize = ELAN_ALIGNUP (slotsize, OMPI_PTL_ELAN_SLOT_ALIGN);
|
|
||||||
rxq = comp->rxq = (ompi_ptl_elan_recv_queue_t *)
|
rxq = comp->rxq = (ompi_ptl_elan_recv_queue_t *)
|
||||||
elan4_allocMain (rail->r_alloc, 64,
|
elan4_allocMain (rail->r_alloc, 64,
|
||||||
sizeof (ompi_ptl_elan_recv_queue_t));
|
sizeof (ompi_ptl_elan_recv_queue_t));
|
||||||
@ -374,7 +389,6 @@ ompi_init_elan_qdma (mca_ptl_elan_component_t * emp,
|
|||||||
|
|
||||||
/* Event to wait/block on, Bug here for the event */
|
/* Event to wait/block on, Bug here for the event */
|
||||||
rxq->qr_qEvent = rxq->qr_elanDone;
|
rxq->qr_qEvent = rxq->qr_elanDone;
|
||||||
|
|
||||||
comp->input->q_event= SDRAM2ELAN (ctx, (void *) rxq->qr_elanDone);
|
comp->input->q_event= SDRAM2ELAN (ctx, (void *) rxq->qr_elanDone);
|
||||||
comp->input->q_fptr = rxq->qr_efitem;
|
comp->input->q_fptr = rxq->qr_efitem;
|
||||||
comp->input->q_bptr = rxq->qr_efitem;
|
comp->input->q_bptr = rxq->qr_efitem;
|
||||||
@ -430,29 +444,19 @@ ompi_init_elan_qdma (mca_ptl_elan_component_t * emp,
|
|||||||
CQ_WriteEnableBit |
|
CQ_WriteEnableBit |
|
||||||
CQ_DmaStartEnableBit |
|
CQ_DmaStartEnableBit |
|
||||||
CQ_STENEnableBit, NULL);
|
CQ_STENEnableBit, NULL);
|
||||||
|
|
||||||
OMPI_PTL_ELAN_CHECK_UNEX (queue->tx_cmdq, NULL, OMPI_ERROR, 0);
|
OMPI_PTL_ELAN_CHECK_UNEX (queue->tx_cmdq, NULL, OMPI_ERROR, 0);
|
||||||
|
|
||||||
/*
|
|
||||||
* Elan4 has a hierarchical event mechanism.
|
|
||||||
* It is easy to use but nontrivial to manipulate
|
|
||||||
* We implement a simpler event control mechanism, which
|
|
||||||
* should also provide us the capability to chain event,
|
|
||||||
* dma and IRQ etc but more open to update.
|
|
||||||
*
|
|
||||||
* Initialize a new event list managing this queue */
|
|
||||||
ompi_init_elan_queue_events (ptl, queue);
|
|
||||||
|
|
||||||
/* Allocate a cookie pool */
|
/* Allocate a cookie pool */
|
||||||
queue->tx_cpool = elan4_allocCookiePool (ctx, ptl->elan_vp);
|
queue->tx_cpool = elan4_allocCookiePool (ctx, ptl->elan_vp);
|
||||||
|
ompi_init_elan_queue_events (ptl, queue);
|
||||||
|
|
||||||
/* Init the Receive Queue structure */
|
/* Init the Receive Queue structure */
|
||||||
queue->rx_nslots = nslots;
|
queue->rx_nslots = nslots;
|
||||||
nslots += OMPI_PTL_ELAN_LOST_QSLOTS;
|
nslots += OMPI_PTL_ELAN_LOST_QSLOTS;
|
||||||
|
slotsize = OMPI_PTL_ELAN_MAX_QSIZE;
|
||||||
|
queue->rx_slotsize = ELAN_ALIGNUP (slotsize, OMPI_PTL_ELAN_SLOT_ALIGN);
|
||||||
queue->rx_buffsize = (slotsize > INPUT_QUEUE_MAX) ?
|
queue->rx_buffsize = (slotsize > INPUT_QUEUE_MAX) ?
|
||||||
INPUT_QUEUE_MAX : slotsize;
|
INPUT_QUEUE_MAX : slotsize;
|
||||||
queue->rx_slotsize = ELAN_ALIGNUP (slotsize, OMPI_PTL_ELAN_SLOT_ALIGN);
|
|
||||||
|
|
||||||
rxq = queue->rxq = (ompi_ptl_elan_recv_queue_t *)
|
rxq = queue->rxq = (ompi_ptl_elan_recv_queue_t *)
|
||||||
elan4_allocMain (rail->r_alloc, 64,
|
elan4_allocMain (rail->r_alloc, 64,
|
||||||
@ -470,7 +474,7 @@ ompi_init_elan_qdma (mca_ptl_elan_component_t * emp,
|
|||||||
OMPI_PTL_ELAN_SLOT_ALIGN, sizeof (EVENT32));
|
OMPI_PTL_ELAN_SLOT_ALIGN, sizeof (EVENT32));
|
||||||
OMPI_PTL_ELAN_CHECK_UNEX (rxq->qr_elanDone, NULL, OMPI_ERROR, 0);
|
OMPI_PTL_ELAN_CHECK_UNEX (rxq->qr_elanDone, NULL, OMPI_ERROR, 0);
|
||||||
|
|
||||||
/* Set the top et al */
|
/* Set the top etc */
|
||||||
rxq->qr_efitem = (E4_uint64) elan4_main2elan (ctx, rxq->qr_fptr);
|
rxq->qr_efitem = (E4_uint64) elan4_main2elan (ctx, rxq->qr_fptr);
|
||||||
assert(rxq->qr_efitem != ELAN_BAD_ADDR);
|
assert(rxq->qr_efitem != ELAN_BAD_ADDR);
|
||||||
rxq->qr_base = rxq->qr_fptr;
|
rxq->qr_base = rxq->qr_fptr;
|
||||||
@ -480,9 +484,8 @@ ompi_init_elan_qdma (mca_ptl_elan_component_t * emp,
|
|||||||
rxq->qr_elitem = rxq->qr_efitem +
|
rxq->qr_elitem = rxq->qr_efitem +
|
||||||
(queue->rx_slotsize * (nslots - OMPI_PTL_ELAN_LOST_QSLOTS));
|
(queue->rx_slotsize * (nslots - OMPI_PTL_ELAN_LOST_QSLOTS));
|
||||||
|
|
||||||
/* Event to wait/block on, Bug here for the event */
|
/* XXX: Event to wait/block on, was buggy here for the event */
|
||||||
rxq->qr_qEvent = rxq->qr_elanDone;
|
rxq->qr_qEvent = rxq->qr_elanDone;
|
||||||
|
|
||||||
queue->input->q_event= SDRAM2ELAN (ctx, (void *) rxq->qr_elanDone);
|
queue->input->q_event= SDRAM2ELAN (ctx, (void *) rxq->qr_elanDone);
|
||||||
queue->input->q_fptr = rxq->qr_efitem;
|
queue->input->q_fptr = rxq->qr_efitem;
|
||||||
queue->input->q_bptr = rxq->qr_efitem;
|
queue->input->q_bptr = rxq->qr_efitem;
|
||||||
@ -510,7 +513,6 @@ ompi_init_elan_qdma (mca_ptl_elan_component_t * emp,
|
|||||||
OBJ_CONSTRUCT (&queue->rx_lock, ompi_mutex_t);
|
OBJ_CONSTRUCT (&queue->rx_lock, ompi_mutex_t);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
END_FUNC(PTL_ELAN_DEBUG_INIT);
|
END_FUNC(PTL_ELAN_DEBUG_INIT);
|
||||||
return (OMPI_SUCCESS);
|
return (OMPI_SUCCESS);
|
||||||
}
|
}
|
||||||
|
@ -1027,20 +1027,20 @@ mca_ptl_elan_drain_recv (struct mca_ptl_elan_module_t *ptl)
|
|||||||
int
|
int
|
||||||
mca_ptl_elan_update_desc (struct mca_ptl_elan_module_t *ptl)
|
mca_ptl_elan_update_desc (struct mca_ptl_elan_module_t *ptl)
|
||||||
{
|
{
|
||||||
struct ompi_ptl_elan_comp_queue_t *comp;
|
|
||||||
mca_ptl_elan_send_frag_t *frag;
|
|
||||||
ELAN4_CTX *ctx;
|
ELAN4_CTX *ctx;
|
||||||
int rc = 0;
|
int rc = 0;
|
||||||
|
|
||||||
ctx = ptl->ptl_elan_ctx;
|
|
||||||
|
|
||||||
#if OMPI_PTL_ELAN_COMP_QUEUE
|
#if OMPI_PTL_ELAN_COMP_QUEUE
|
||||||
|
struct ompi_ptl_elan_comp_queue_t *comp;
|
||||||
|
|
||||||
|
ompi_ptl_elan_recv_queue_t *rxq;
|
||||||
|
|
||||||
comp = ptl->comp;
|
comp = ptl->comp;
|
||||||
|
ctx = ptl->ptl_elan_ctx;
|
||||||
rxq = comp->rxq;
|
rxq = comp->rxq;
|
||||||
#if 1
|
#if 1
|
||||||
/* XXX: Just test and go */
|
/* XXX: Just test and go */
|
||||||
rc = (*(int *) (&rxq->qr_doneWord));
|
rc = (*(int *) (&rxq->qr_doneWord));
|
||||||
|
|
||||||
#else
|
#else
|
||||||
/* XXX: block on the event without holding a lock */
|
/* XXX: block on the event without holding a lock */
|
||||||
rc = elan4_pollevent_word (ctx, &rxq->qr_doneWord, 1);
|
rc = elan4_pollevent_word (ctx, &rxq->qr_doneWord, 1);
|
||||||
@ -1052,6 +1052,12 @@ mca_ptl_elan_update_desc (struct mca_ptl_elan_module_t *ptl)
|
|||||||
OMPI_LOCK (&comp->rx_lock);
|
OMPI_LOCK (&comp->rx_lock);
|
||||||
header = (mca_ptl_base_header_t *) rxq->qr_fptr;
|
header = (mca_ptl_base_header_t *) rxq->qr_fptr;
|
||||||
|
|
||||||
|
LOG_PRINT(PTL_ELAN_DEBUG_MAC,
|
||||||
|
"[comp...] type %d flag %d size %d\n",
|
||||||
|
header->hdr_common.hdr_type,
|
||||||
|
header->hdr_common.hdr_flags,
|
||||||
|
header->hdr_common.hdr_size);
|
||||||
|
|
||||||
/* FIXME: please fix this completion queue processing */
|
/* FIXME: please fix this completion queue processing */
|
||||||
|
|
||||||
/* FIXME: please reorganize this into a common routine */
|
/* FIXME: please reorganize this into a common routine */
|
||||||
@ -1084,7 +1090,10 @@ mca_ptl_elan_update_desc (struct mca_ptl_elan_module_t *ptl)
|
|||||||
OMPI_UNLOCK (&comp->rx_lock);
|
OMPI_UNLOCK (&comp->rx_lock);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
ctx = ptl->ptl_elan_ctx;
|
||||||
while (ompi_list_get_size (&ptl->send_frags) > 0) {
|
while (ompi_list_get_size (&ptl->send_frags) > 0) {
|
||||||
|
mca_ptl_elan_send_frag_t *frag;
|
||||||
|
|
||||||
frag = (mca_ptl_elan_send_frag_t *)
|
frag = (mca_ptl_elan_send_frag_t *)
|
||||||
ompi_list_get_first (&ptl->send_frags);
|
ompi_list_get_first (&ptl->send_frags);
|
||||||
rc = * ((int *) (&frag->desc->main_doneWord));
|
rc = * ((int *) (&frag->desc->main_doneWord));
|
||||||
@ -1105,8 +1114,8 @@ mca_ptl_elan_update_desc (struct mca_ptl_elan_module_t *ptl)
|
|||||||
/* XXX: Stop at any incomplete send desc */
|
/* XXX: Stop at any incomplete send desc */
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
} /* end of the while loop */
|
} /* end of the while loop */
|
||||||
|
#endif
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
@ -52,7 +52,7 @@
|
|||||||
#define PTL_ELAN_DEBUG_GET (0x400)
|
#define PTL_ELAN_DEBUG_GET (0x400)
|
||||||
#define PTL_ELAN_DEBUG_CHAIN (0x800)
|
#define PTL_ELAN_DEBUG_CHAIN (0x800)
|
||||||
|
|
||||||
#define PTL_ELAN_DEBUG_FLAG (PTL_ELAN_DEBUG_GET|PTL_ELAN_DEBUG_ACK|PTL_ELAN_DEBUG_SEND)
|
#define PTL_ELAN_DEBUG_FLAG (PTL_ELAN_DEBUG_MAC|PTL_ELAN_DEBUG_SEND)
|
||||||
|
|
||||||
#define START_FUNC(flag) \
|
#define START_FUNC(flag) \
|
||||||
do { \
|
do { \
|
||||||
@ -83,8 +83,8 @@ do { \
|
|||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
/* PTL_ELAN related MACROS, expose some as configurable options if needed */
|
/* PTL_ELAN related MACROS, expose some as configurable options if needed */
|
||||||
#define OMPI_PTL_ELAN_ENABLE_GET (1)
|
#define OMPI_PTL_ELAN_ENABLE_GET (0)
|
||||||
#define OMPI_PTL_ELAN_COMP_QUEUE (0)
|
#define OMPI_PTL_ELAN_COMP_QUEUE (1)
|
||||||
#define OMPI_PTL_ELAN_THREADING (OMPI_HAVE_POSIX_THREADS)
|
#define OMPI_PTL_ELAN_THREADING (OMPI_HAVE_POSIX_THREADS)
|
||||||
|
|
||||||
#define OMPI_PTL_ELAN_MAX_QSIZE (2048)
|
#define OMPI_PTL_ELAN_MAX_QSIZE (2048)
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user