From 2478c6bc37f2c9856b773e1623fba525b8122452 Mon Sep 17 00:00:00 2001 From: Weikuan Yu Date: Wed, 4 Aug 2004 23:18:45 +0000 Subject: [PATCH] setup put. To do a chained QDMA for notification. Also PML changes is needed for this stuff to work This commit was SVN r1883. --- src/mca/ptl/elan/src/ptl_elan.c | 29 +- src/mca/ptl/elan/src/ptl_elan.h | 3 +- src/mca/ptl/elan/src/ptl_elan_comm_init.c | 340 ++++++++++++++-------- src/mca/ptl/elan/src/ptl_elan_frag.c | 75 ++--- src/mca/ptl/elan/src/ptl_elan_frag.h | 9 +- src/mca/ptl/elan/src/ptl_elan_init.c | 2 +- src/mca/ptl/elan/src/ptl_elan_priv.c | 135 ++++++++- src/mca/ptl/elan/src/ptl_elan_priv.h | 54 +++- 8 files changed, 462 insertions(+), 185 deletions(-) diff --git a/src/mca/ptl/elan/src/ptl_elan.c b/src/mca/ptl/elan/src/ptl_elan.c index 77d7c03299..8074076b57 100644 --- a/src/mca/ptl/elan/src/ptl_elan.c +++ b/src/mca/ptl/elan/src/ptl_elan.c @@ -168,7 +168,7 @@ mca_ptl_elan_req_init (struct mca_ptl_base_module_t *ptl, START_FUNC(); - desc = mca_ptl_elan_alloc_send_desc(ptl, request); + desc = mca_ptl_elan_alloc_send_desc(ptl, request, 0); if (NULL == desc) { ompi_output(0, "[%s:%d] Unable to allocate an elan send descriptors \n", @@ -245,8 +245,7 @@ mca_ptl_elan_isend (struct mca_ptl_base_module_t *ptl, if (offset == 0) { /* The first fragment uses a cached desc */ desc = ((mca_ptl_elan_send_request_t*)sendreq)->req_frag; } else { - - desc = mca_ptl_elan_alloc_send_desc(ptl, sendreq); + desc = mca_ptl_elan_alloc_send_desc(ptl, sendreq, 0); if (NULL == desc) { ompi_output(0, "[%s:%d] Unable to allocate an elan send descriptors \n", @@ -280,6 +279,28 @@ mca_ptl_elan_put (struct mca_ptl_base_module_t *ptl, int flags) { int rc = OMPI_SUCCESS; + mca_ptl_elan_send_frag_t *desc; + + /* XXX: + * Since the address passed down from PML does not provide + * elan information, so there needs to be a change + */ + + START_FUNC(); + + desc = mca_ptl_elan_alloc_send_desc(ptl, sendreq, 1); + if (NULL == desc) { + ompi_output(0, + "[%s:%d] Unable to allocate an elan send descriptors \n", + __FILE__, __LINE__); + } + + rc = mca_ptl_elan_start_desc(desc, + (struct mca_ptl_elan_peer_t *)ptl_peer, + sendreq, offset, &size, flags); + + /* Update all the sends until the put is done */ + END_FUNC(); return rc; } @@ -325,7 +346,7 @@ mca_ptl_elan_matched (mca_ptl_base_module_t * ptl, mca_ptl_elan_send_frag_t *desc; /* Get a frag desc and allocate a send desc */ - desc = mca_ptl_elan_alloc_send_desc(ptl, NULL); + desc = mca_ptl_elan_alloc_send_desc(ptl, NULL, 0); if (NULL == desc) { ompi_output(0, diff --git a/src/mca/ptl/elan/src/ptl_elan.h b/src/mca/ptl/elan/src/ptl_elan.h index 7078f604f6..a87c96ecfa 100644 --- a/src/mca/ptl/elan/src/ptl_elan.h +++ b/src/mca/ptl/elan/src/ptl_elan.h @@ -41,7 +41,8 @@ struct mca_ptl_elan_module_t { unsigned int elan_vp; /**< elan vpid, not ompi vpid */ unsigned int elan_nvp; /**< total # of elan vpid */ - struct ompi_ptl_elan_queue_ctrl_t *queue; /**< Queue control structures */ + struct ompi_ptl_elan_queue_ctrl_t *queue; /**< Queue ctrl struct*/ + struct ompi_ptl_elan_putget_ctrl_t *putget; /**< putget ctrl struct */ int max_num_dmas; /**< total rdma descriptors */ }; diff --git a/src/mca/ptl/elan/src/ptl_elan_comm_init.c b/src/mca/ptl/elan/src/ptl_elan_comm_init.c index 9e60bd157b..44b232b5f6 100644 --- a/src/mca/ptl/elan/src/ptl_elan_comm_init.c +++ b/src/mca/ptl/elan/src/ptl_elan_comm_init.c @@ -15,6 +15,18 @@ #define MAX(a,b) ((a>b)? a:b) #define ALIGNUP(x,a) (((unsigned int)(x) + ((a)-1)) & (-(a))) +#define OMPI_PTL_ELAN_CTRL_LIST(flist, init_num, inc_num, max_num) \ +do { \ + OBJ_CONSTRUCT (flist, ompi_free_list_t); \ + OBJ_CONSTRUCT(&flist->fl_lock, ompi_mutex_t); \ + flist->fl_elem_size = flist->fl_max_to_alloc = max_num; \ + flist->fl_num_allocated = init_num; \ + flist->fl_num_per_alloc = inc_num; \ + flist->fl_elem_class = NULL; /* leave it null */ \ + flist->fl_mpool = NULL; /* leave it null */ \ +} while (0) + + static int ompi_init_elan_queue_events (mca_ptl_elan_module_t * ptl, ompi_ptl_elan_queue_ctrl_t * queue) @@ -24,13 +36,13 @@ ompi_init_elan_queue_events (mca_ptl_elan_module_t * ptl, int main_align, main_size; int elan_align, elan_size; - mca_ptl_elan_send_frag_t *desc; + mca_ptl_elan_send_frag_t *frag; RAIL *rail; ELAN4_CTX *ctx; ompi_free_list_t *flist; - ompi_ptl_elan_qdma_desc_t *ptr; + ompi_ptl_elan_qdma_desc_t *desc; ompi_elan_event_t *elan_ptr; START_FUNC(); @@ -45,7 +57,7 @@ ompi_init_elan_queue_events (mca_ptl_elan_module_t * ptl, flist = &queue->tx_desc_free; main_align = MAX (sizeof (void *), 8); - elan_align = MAX (sizeof (int *), 128); + elan_align = MAX (sizeof (int *), ELAN_BLOCK_ALIGN); main_size = ALIGNUP (sizeof (ompi_ptl_elan_qdma_desc_t), main_align); elan_size = ALIGNUP (sizeof (ompi_elan_event_t), elan_align); @@ -58,50 +70,48 @@ ompi_init_elan_queue_events (mca_ptl_elan_module_t * ptl, /* Allocate the elements */ - desc = (mca_ptl_elan_send_frag_t *) - malloc(sizeof(mca_ptl_elan_send_frag_t) * (count + 1)); - OMPI_PTL_ELAN_CHECK_UNEX (desc, NULL, OMPI_ERROR, 0); + frag = (mca_ptl_elan_send_frag_t *) + malloc(sizeof(mca_ptl_elan_send_frag_t) * count); + OMPI_PTL_ELAN_CHECK_UNEX (frag, NULL, OMPI_ERROR, 0); - ptr = (ompi_ptl_elan_qdma_desc_t *) elan4_allocMain (rail->r_alloc, + desc = (ompi_ptl_elan_qdma_desc_t *) elan4_allocMain (rail->r_alloc, main_align, - main_size * - (count + 1)); - OMPI_PTL_ELAN_CHECK_UNEX (ptr, NULL, OMPI_ERROR, 0); + main_size * count); + OMPI_PTL_ELAN_CHECK_UNEX (desc, NULL, OMPI_ERROR, 0); /* Allocating elan related structures */ elan_ptr = (ompi_elan_event_t *) elan4_allocElan (rail->r_alloc, elan_align, - elan_size * (count + - 1)); + elan_size * count); OMPI_PTL_ELAN_CHECK_UNEX (elan_ptr, NULL, OMPI_ERROR, 0); for (i = 0; i < flist->fl_num_per_alloc; i++) { ompi_list_item_t *item; - ptr->rail = rail; - ptr->ptl = ptl; - ptr->elan_data_event = elan_ptr; - desc->desc = (ompi_ptl_elan_base_desc_t *)ptr; + desc->rail = rail; + desc->ptl = ptl; + desc->elan_data_event = elan_ptr; + frag->desc = (ompi_ptl_elan_base_desc_t *)desc; /* Initialize some of the dma structures */ { - ptr->main_dma.dma_dstAddr = 0; - ptr->main_dma.dma_srcEvent = + desc->main_dma.dma_dstAddr = 0; + desc->main_dma.dma_srcEvent = SDRAM2ELAN (ctx, &elan_ptr->event32); - ptr->main_dma.dma_dstEvent = SDRAM2ELAN (ctx, queue->input); + desc->main_dma.dma_dstEvent = SDRAM2ELAN (ctx, queue->input); INITEVENT_WORD (ctx, (EVENT *) & elan_ptr->event32, - &ptr->main_doneWord); - RESETEVENT_WORD (&ptr->main_doneWord); + &desc->main_doneWord); + RESETEVENT_WORD (&desc->main_doneWord); PRIMEEVENT_WORD (ctx, (EVENT *) & elan_ptr->event32, 1); } - item = (ompi_list_item_t *) desc; + item = (ompi_list_item_t *) frag; ompi_list_append (&flist->super, item); /* Progress to the next element */ - ptr = (ompi_ptl_elan_qdma_desc_t *) ((char *) ptr + main_size); + desc = (ompi_ptl_elan_qdma_desc_t *) ((char *) desc + main_size); elan_ptr = (ompi_elan_event_t *) ((char *) elan_ptr + elan_size); - desc ++; + frag ++; } flist->fl_num_allocated += flist->fl_num_per_alloc; @@ -109,6 +119,144 @@ ompi_init_elan_queue_events (mca_ptl_elan_module_t * ptl, return OMPI_SUCCESS; } +static void +mca_ptl_elan_putget_desc_contruct ( + ELAN4_CTX *ctx, + ompi_ptl_elan_putget_desc_t *desc, + EVENT *elan_event, + E4_Addr src_elan4_addr, + E4_Addr dst_elan4_addr, + int local /* dma_src is local */ ) +{ + /* Zero this descriptor */ + memset(desc, 0, sizeof(desc)); + + desc->dma_typeSize = 0; + desc->dma_cookie = 0; + desc->dma_vproc = 0; + + /* Remember all the address needs to be converted + * before assigning to DMA descritpor */ + desc->main_dma.dma_srcAddr = src_elan4_addr; + desc->main_dma.dma_dstAddr = dst_elan4_addr; + + if (local) { + desc->main_dma.dma_srcEvent = elan4_main2elan(ctx, elan_event); + } else { + desc->main_dma.dma_dstEvent = elan4_main2elan(ctx, elan_event); + } + + INITEVENT_WORD (ctx, elan_event, &desc->main_doneWord); + RESETEVENT_WORD (&desc->main_doneWord); + PRIMEEVENT_WORD (ctx, elan_event, 1); + + /* Make PCI write visable */ + mb(); +} + +#define OMPI_ELAN_DESC_LIST(ctx, flist, frag, desc, eptr, msize, esize, local)\ +do { \ + int i; \ + for (i = 0; i < flist->fl_num_per_alloc; i++) { \ + ompi_list_item_t *item; \ + \ + desc->elan_data_event = eptr; \ + frag->desc = (ompi_ptl_elan_base_desc_t *)desc; \ + \ + /* Initialize some of the dma structures */ \ + mca_ptl_elan_putget_desc_contruct (ctx, desc, \ + eptr, 0, 0, local); \ + \ + item = (ompi_list_item_t *) frag; \ + ompi_list_append (&flist->super, item); \ + \ + /* Progress to the next element */ \ + desc = (ompi_ptl_elan_putget_desc_t *) \ + ((char *)desc + msize); \ + eptr = (ompi_elan_event_t *) ((char *) eptr + esize); \ + frag ++; \ + } \ + flist->fl_num_allocated += flist->fl_num_per_alloc; \ +} while (0) + + +static int +ompi_ptl_elan_init_putget_ctrl (mca_ptl_elan_module_t * ptl, + RAIL *rail, + ompi_ptl_elan_putget_ctrl_t * putget, + int init_num, int inc_num, int max_num) +{ + int i; + int main_size; + int main_align; + int elan_size; + int elan_align; + + RAIL *rail; + ELAN4_CTX *ctx; + ompi_elan_event_t *elan_ptr; + mca_ptl_elan_send_frag_t *frag; + ompi_free_list_t *put_list, *get_list; + ompi_ptl_elan_putget_desc_t *put_desc, *get_desc; + + START_FUNC(); + + main_align = MAX (sizeof (void *), ELAN_ALIGN); + elan_align = MAX (sizeof (int *), ELAN_BLOCK_ALIGN); + main_size = ALIGNUP(sizeof(ompi_ptl_elan_putget_desc_t), main_align); + elan_size = ALIGNUP(sizeof(ompi_elan_event_t), elan_align); + + rail = (RAIL *) ptl->ptl_elan_rail; + ctx = (ELAN4_CTX *) ptl->ptl_elan_ctx; + + /* initialize list */ + OBJ_CONSTRUCT (&putget->put_desc, ompi_list_t); + OBJ_CONSTRUCT (&putget->put_desc_free, ompi_free_list_t); + put_list = &putget->put_desc_free; + OMPI_PTL_ELAN_CTRL_LIST(put_list, 0, inc_num, max_num); + + /* Allocate the elements */ + frag = (mca_ptl_elan_send_frag_t *) + malloc(sizeof(mca_ptl_elan_send_frag_t) * inc_num); + OMPI_PTL_ELAN_CHECK_UNEX (frag, NULL, OMPI_ERROR, 0); + + /* Allocating elan related structures */ + elan_ptr = (ompi_elan_event_t *) elan4_allocElan (rail->r_alloc, + elan_align, elan_size * inc_num); + OMPI_PTL_ELAN_CHECK_UNEX (elan_ptr, NULL, OMPI_ERROR, 0); + + put_desc = (ompi_ptl_elan_putget_desc_t *) elan4_allocMain ( + rail->r_alloc, main_align, main_size * inc_num); + OMPI_PTL_ELAN_CHECK_UNEX (put_desc, NULL, OMPI_ERROR, 0); + OMPI_PTL_ELAN_GROW_DESC_LIST(ctx, put_list, frag, + put_desc, elan_ptr, main_size, elan_size, 1) + + OBJ_CONSTRUCT (&putget->get_desc, ompi_list_t); + OBJ_CONSTRUCT (&putget->get_desc_free, ompi_free_list_t); + get_list = &putget->get_desc_free; + OMPI_PTL_ELAN_CTRL_LIST(get_list, 0, inc_num, max_num); + + /* Allocate the elements */ + frag = (mca_ptl_elan_send_frag_t *) + malloc(sizeof(mca_ptl_elan_send_frag_t) * inc_num); + OMPI_PTL_ELAN_CHECK_UNEX (frag, NULL, OMPI_ERROR, 0); + + /* Allocating elan related structures */ + elan_ptr = (ompi_elan_event_t *) elan4_allocElan (rail->r_alloc, + elan_align, elan_size * inc_num); + OMPI_PTL_ELAN_CHECK_UNEX (elan_ptr, NULL, OMPI_ERROR, 0); + + get_desc = (ompi_ptl_elan_putget_desc_t *) elan4_allocMain ( + rail->r_alloc, main_align, main_size * inc_num); + OMPI_PTL_ELAN_CHECK_UNEX (get_desc, NULL, OMPI_ERROR, 0); + OMPI_PTL_ELAN_GROW_DESC_LIST(ctx, get_list, frag, + get_desc, elan_ptr, main_size, elan_size, 0) + + END_FUNC(); + return OMPI_SUCCESS; +} + + int ompi_init_elan_stat (mca_ptl_elan_component_t * emp, int num_rails) @@ -245,131 +393,79 @@ ompi_init_elan_qdma (mca_ptl_elan_component_t * emp, } int -ompi_init_elan_rdma (mca_ptl_elan_component_t * emp, +ompi_init_elan_putget (mca_ptl_elan_component_t * emp, int num_rails) { -#if 0 int i; int nslots = 128; - int slotsize = 32*1024; + int slotsize = 2048; RAIL *rail; ELAN4_CTX *ctx; - struct mca_ptl_elan_t *ptl; + struct mca_ptl_elan_module_t *ptl; START_FUNC(); /* Init the Transmit Queue structure */ for (i = 0; i < num_rails; i++) { - ompi_ptl_elan_recv_queue_t *rxq; - ompi_ptl_elan_queue_ctrl_t *queue; + E4_CmdQParams *cqp; + ompi_ptl_elan_putget_ctrl_t *putget; - ptl = emp->elan_ptls[i]; + ptl = emp->elan_ptl_modules[i]; rail = (RAIL *) ptl->ptl_elan_rail; ctx = (ELAN4_CTX *) ptl->ptl_elan_ctx; - queue = ptl->queue = (ompi_ptl_elan_queue_ctrl_t *) - malloc (sizeof (ompi_ptl_elan_queue_ctrl_t)); - OMPI_PTL_ELAN_CHECK_UNEX (queue, NULL, OMPI_ERROR, 0); - memset (queue, 0, sizeof (ompi_ptl_elan_queue_ctrl_t)); + putget = ptl->putget = (ompi_ptl_elan_putget_ctrl_t *) + malloc (sizeof (ompi_ptl_elan_putget_ctrl_t)); + OMPI_PTL_ELAN_CHECK_UNEX (putget, NULL, OMPI_ERROR, 0); + memset (putget, 0, sizeof (ompi_ptl_elan_putget_ctrl_t)); - /* Allocate input queue */ - queue->input = (E4_InputQueue *) elan4_allocElan (rail->r_alloc, - INPUT_QUEUE_ALIGN, - INPUT_QUEUE_SIZE); - OMPI_PTL_ELAN_CHECK_UNEX (queue->input, NULL, OMPI_ERROR, 0); + putget->pg_throttle = PUTGET_THROTTLE; + putget->pg_flags = ELAN_PUT_FASTPATH; + putget->pg_retryCount = 16; + putget->pg_evictCache = TRUE; + putget->pg_waitType = ELAN_POLL_EVENT; + + /* construct the lock variable */ + OBJ_CONSTRUCT (&putget->pg_lock, ompi_mutex_t); - queue->tx_cmdq = elan4_alloc_cmdq (ctx, - rail->r_alloc, - CQ_Size8K, - CQ_WriteEnableBit | - CQ_DmaStartEnableBit | - CQ_STENEnableBit, NULL); + *cqp = elan4_probe_cmdq(ctx, rail->r_alloc, 0x10, CQ_AutoCtrlFlowOn); + putget->put_cmdq = elan4_alloc_cmdq(ctx, + rail>r_alloc, + CQ_Size8K, + CQ_WriteEnableBit | + CQ_DmaStartEnableBit | + CQ_SetEventEnableBit | + CQ_STENEnableBit, cqp); + OMPI_PTL_ELAN_CHECK_UNEX (putget->put_cmdq, NULL, OMPI_ERROR, 0); - OMPI_PTL_ELAN_CHECK_UNEX (queue->tx_cmdq, NULL, OMPI_ERROR, 0); + putget->get_cmdq = elan4_alloc_cmdq(ctx, + rail->r_alloc, + CQ_Size8K, + CQ_WriteEnableBit | + CQ_STENEnableBit | + CQ_SetEventEnableBit, cqp); + OMPI_PTL_ELAN_CHECK_UNEX (putget->get_cmdq, NULL, OMPI_ERROR, 0); - /* - * Elan4 has a hierarchical event mechanism. - * It is easy to use but nontrivial to manipulate - * We implement a simpler event control mechanism, which - * should also provide us the capability to chain event, - * dma and IRQ etc but more open to update. - * - * Initialize a new event list managing this queue */ + /* Simple report on the command queue parameters */ + elan4_disp_cmdq_params (ptl->putget->put_cmdq); + elan4_disp_cmdq_params (ptl->putget->get_cmdq); - ompi_init_elan_queue_events (ptl, queue); + putget->pg_cmdStream = malloc(PAGESIZE); + OMPI_PTL_ELAN_CHECK_UNEX (putget->pg_cmdStream, NULL, OMPI_ERROR, 0); - /* Allocate a cookie pool */ - queue->tx_cpool = elan4_allocCookiePool (ctx, ptl->elan_vp); + /* Allocate a per vp counter to throttle outstanding get DMAs */ + putget->pg_pendingGetCount = malloc(sizeof(u_int)*state->nvp); + OMPI_PTL_ELAN_CHECK_UNEX (putget->pg_pendingGetCount, + NULL, OMPI_ERROR, 0); + memset(putget->pg_pendingGetCount, 0, sizeof(u_int)*state->nvp); - /* Init the Receive Queue structure */ - queue->rx_nslots = 128; - nslots += ELAN_QUEUE_LOST_SLOTS; + putget->pg_cpool = elan4_allocCookiePool(ctx, state->vp); - queue->rx_buffsize = (slotsize > INPUT_QUEUE_MAX) ? - INPUT_QUEUE_MAX : slotsize; - queue->rx_slotsize = ELAN_ALIGNUP (slotsize, SLOT_ALIGN); - - rxq = queue->rxq = (ompi_ptl_elan_recv_queue_t *) - elan4_allocMain (rail->r_alloc, 64, - sizeof (ompi_ptl_elan_recv_queue_t)); - OMPI_PTL_ELAN_CHECK_UNEX (rxq, NULL, OMPI_ERROR, 0); - memset (rxq, 0, sizeof (ompi_ptl_elan_recv_queue_t)); - - rxq->qr_rail = rail; - rxq->qr_fptr = elan4_allocMain (rail->r_alloc, - 128, nslots * queue->rx_slotsize); - OMPI_PTL_ELAN_CHECK_UNEX (rxq->qr_fptr, NULL, OMPI_ERROR, 0); - memset (rxq->qr_fptr, 0xeb, nslots * queue->rx_slotsize); - - rxq->qr_elanDone = ALLOC_ELAN (rail, SLOT_ALIGN, sizeof (EVENT32)); - OMPI_PTL_ELAN_CHECK_UNEX (rxq->qr_elanDone, NULL, OMPI_ERROR, 0); - - /* Set the top et al */ - rxq->qr_efitem = (E4_uint64) elan4_main2elan (ctx, rxq->qr_fptr); - assert(rxq->qr_efitem != ELAN_BAD_ADDR); - rxq->qr_base = rxq->qr_fptr; - rxq->qr_top = (void *) ((uintptr_t) rxq->qr_base - + (queue->rx_slotsize * (nslots - 1))); - rxq->qr_efptr = rxq->qr_efitem; - rxq->qr_elitem = - rxq->qr_efitem + (queue->rx_slotsize * (nslots - 1)); - - /* Event to wait/block on, Bug here for the event */ - rxq->qr_qEvent = rxq->qr_elanDone; - - queue->input->q_event = - SDRAM2ELAN (ctx, (void *) rxq->qr_elanDone); - queue->input->q_fptr = rxq->qr_efitem; - queue->input->q_bptr = rxq->qr_efitem; - queue->input->q_control = - E4_InputQueueControl (rxq->qr_efitem, rxq->qr_elitem, - queue->rx_slotsize); - - /* The event */ - INITEVENT_WORD (ctx, (EVENT *) rxq->qr_elanDone, - &rxq->qr_doneWord); - RESETEVENT_WORD (&rxq->qr_doneWord); - PRIMEEVENT_WORD (ctx, (EVENT *) rxq->qr_elanDone, 1); - - rxq->qr_cmdq = elan4_alloc_cmdq (ctx, rail->r_alloc, - CQ_Size1K, - CQ_WriteEnableBit | - CQ_WaitEventEnableBit, NULL); - - /*elan4_disp_cmdq_params (rxq->qr_cmdq);*/ - OMPI_PTL_ELAN_CHECK_UNEX (rxq->qr_cmdq, NULL, OMPI_ERROR, 0); - - /* Allocate a sleepDesc for threads to block on */ - rxq->qr_es = ompi_init_elan_sleepdesc (&mca_ptl_elan_global_state, - rxq->qr_rail); - OMPI_PTL_ELAN_CHECK_UNEX (rxq->qr_es, NULL, OMPI_ERROR, 0); - - OBJ_CONSTRUCT (&queue->rx_lock, ompi_mutex_t); + ompi_ptl_elan_init_putget_ctrl (ptl, rail, putget, 0, 16, 32) } END_FUNC(); -#endif return (OMPI_SUCCESS); } diff --git a/src/mca/ptl/elan/src/ptl_elan_frag.c b/src/mca/ptl/elan/src/ptl_elan_frag.c index 447aaff7f1..2a0c154284 100644 --- a/src/mca/ptl/elan/src/ptl_elan_frag.c +++ b/src/mca/ptl/elan/src/ptl_elan_frag.c @@ -82,10 +82,9 @@ extern mca_ptl_elan_state_t mca_ptl_elan_global_state; mca_ptl_elan_send_frag_t * mca_ptl_elan_alloc_send_desc (struct mca_ptl_base_module_t *ptl_ptr, - struct mca_pml_base_send_request_t *sendreq) + struct mca_pml_base_send_request_t *sendreq, + int oneside) { - struct ompi_ptl_elan_queue_ctrl_t *queue; - /*struct mca_ptl_elan_peer_t *peer;*/ ompi_free_list_t *flist; ompi_list_item_t *item; @@ -94,48 +93,52 @@ mca_ptl_elan_alloc_send_desc (struct mca_ptl_base_module_t *ptl_ptr, START_FUNC(); /* For now, bind to queue DMA directly */ - { - queue = ((mca_ptl_elan_module_t *) ptl_ptr)->queue; - flist = &queue->tx_desc_free; + if (oneside) { + /*struct mca_ptl_elan_peer_t *peer;*/ + flist = &(((mca_ptl_elan_module_t *) ptl_ptr)->putget)->tx_desc_free; + } else { + flist = &(((mca_ptl_elan_module_t *) ptl_ptr)->queue)->tx_desc_free; + } - if (ompi_using_threads ()) { + if (ompi_using_threads ()) { - ompi_mutex_lock(&flist->fl_lock); + ompi_mutex_lock(&flist->fl_lock); + item = ompi_list_remove_first (&((flist)->super)); - item = ompi_list_remove_first (&((flist)->super)); + /* Progress this PTL module to get back a descriptor, + * Is it OK to progress with ptl->ptl_send_progress? */ + while (NULL == item) { + mca_ptl_tstamp_t tstamp = 0; - /* Progress this PTL module to get back a descriptor, - * Is it OK to progress with ptl->ptl_send_progress? */ - while (NULL == item) { - mca_ptl_tstamp_t tstamp = 0; + ptl_ptr->ptl_component->ptlm_progress (tstamp); + item = ompi_list_remove_first (&((flist)->super)); + } + ompi_mutex_unlock(&flist->fl_lock); + } else { + item = ompi_list_remove_first (&((flist)->super)); - ptl_ptr->ptl_component->ptlm_progress (tstamp); - item = ompi_list_remove_first (&((flist)->super)); - } - ompi_mutex_unlock(&flist->fl_lock); - } else { - item = ompi_list_remove_first (&((flist)->super)); + /* Progress this PTL module to get back a descriptor, + * Is it OK to progress with ptl->ptl_send_progress()? */ + while (NULL == item) { + mca_ptl_tstamp_t tstamp = 0; - /* Progress this PTL module to get back a descriptor, - * Is it OK to progress with ptl->ptl_send_progress()? */ - while (NULL == item) { - mca_ptl_tstamp_t tstamp = 0; + /* XXX: + * Well, this still does not trigger the progress on + * PTL's from other modules. Wait for PML to change. + * Otherwise have to trigger PML progress from PTL. Ouch.. + */ + ptl_ptr->ptl_component->ptlm_progress (tstamp); + item = ompi_list_remove_first (&((flist)->super)); + } + } + desc = (mca_ptl_elan_send_frag_t *) item; + desc->desc->req = (struct mca_ptl_elan_send_request_t *) sendreq; - /* - * Well, this still does not trigger the progress on - * PTL's from other modules. Wait for PML to change. - * Otherwise have to trigger PML progress from PTL. Ouch.. - */ - ptl_ptr->ptl_component->ptlm_progress (tstamp); - item = ompi_list_remove_first (&((flist)->super)); - } - } - desc = (mca_ptl_elan_send_frag_t *) item; + if (oneside) { + desc->desc->desc_type = MCA_PTL_ELAN_DESC_PUTGET; + } else { desc->desc->desc_type = MCA_PTL_ELAN_DESC_QDMA; } - desc->desc->req = - (struct mca_ptl_elan_send_request_t *) sendreq; - /*(struct mca_pml_base_send_request_t *)sendreq;*/ END_FUNC(); return desc; diff --git a/src/mca/ptl/elan/src/ptl_elan_frag.h b/src/mca/ptl/elan/src/ptl_elan_frag.h index dfaf8d5a02..55e462d28b 100644 --- a/src/mca/ptl/elan/src/ptl_elan_frag.h +++ b/src/mca/ptl/elan/src/ptl_elan_frag.h @@ -37,10 +37,12 @@ struct mca_ptl_elan_recv_frag_t { size_t frag_msg_cnt; volatile int frag_progressed; /* Is it record to request */ bool frag_ack_pending; /* Is there an ack to send */ +#if 0 union { - struct ompi_ptl_elan_qdma_frag_t *qdma; - struct ompi_ptl_elan_putget_frag_t *putget; + struct ompi_ptl_elan_qdma_desc_t *qdma; + struct ompi_ptl_elan_putget_desc_t *putget; } frag; +#endif char *alloc_buff; char *unex_buff; }; @@ -51,7 +53,8 @@ extern ompi_class_t mca_ptl_elan_recv_frag_t_class; mca_ptl_elan_send_frag_t * mca_ptl_elan_alloc_send_desc( struct mca_ptl_base_module_t *ptl, - struct mca_pml_base_send_request_t *sendreq); + struct mca_pml_base_send_request_t *sendreq, + int oneside); mca_ptl_elan_recv_frag_t * mca_ptl_elan_alloc_recv_desc(struct mca_pml_base_recv_request_t *req); diff --git a/src/mca/ptl/elan/src/ptl_elan_init.c b/src/mca/ptl/elan/src/ptl_elan_init.c index 0823c496e2..078ca8f4e9 100644 --- a/src/mca/ptl/elan/src/ptl_elan_init.c +++ b/src/mca/ptl/elan/src/ptl_elan_init.c @@ -83,7 +83,7 @@ ompi_mca_ptl_elan_setup (mca_ptl_elan_state_t * ems) /* * XXX: Leave the following later after testing of QDMA is done */ - if (OMPI_SUCCESS != ompi_init_elan_rdma (emp, rail_count)) { + if (OMPI_SUCCESS != ompi_init_elan_putget (emp, rail_count)) { return OMPI_ERROR; } diff --git a/src/mca/ptl/elan/src/ptl_elan_priv.c b/src/mca/ptl/elan/src/ptl_elan_priv.c index 5e2b61ca02..e8f0207ed8 100644 --- a/src/mca/ptl/elan/src/ptl_elan_priv.c +++ b/src/mca/ptl/elan/src/ptl_elan_priv.c @@ -88,7 +88,7 @@ mca_ptl_elan_start_ack ( mca_ptl_base_module_t * ptl, } static void -mca_ptl_elan_init_qdma_desc (struct ompi_ptl_elan_qdma_desc_t *desc, +mca_ptl_elan_init_qdma_desc (struct mca_ptl_elan_send_frag_t *frag, mca_ptl_elan_module_t * ptl, struct mca_ptl_elan_peer_t *ptl_peer, mca_pml_base_send_request_t *pml_req, @@ -97,15 +97,17 @@ mca_ptl_elan_init_qdma_desc (struct ompi_ptl_elan_qdma_desc_t *desc, int flags) { int header_length; - mca_ptl_base_header_t *hdr; - int destvp; int size_out; int size_in; int rc = OMPI_SUCCESS; + mca_ptl_base_header_t *hdr; + struct ompi_ptl_elan_qdma_desc_t * desc; + START_FUNC(); + desc = (ompi_ptl_elan_qdma_desc_t *)frag->desc; destvp = ptl_peer->peer_vp; size_in = *size; @@ -127,7 +129,6 @@ mca_ptl_elan_init_qdma_desc (struct ompi_ptl_elan_qdma_desc_t *desc, hdr->hdr_match.hdr_tag = pml_req->req_base.req_tag; hdr->hdr_match.hdr_msg_length = pml_req->req_bytes_packed; hdr->hdr_match.hdr_msg_seq = pml_req->req_base.req_sequence; - header_length = sizeof (mca_ptl_base_match_header_t); } else { hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_FRAG; @@ -149,7 +150,7 @@ mca_ptl_elan_init_qdma_desc (struct ompi_ptl_elan_qdma_desc_t *desc, if( offset <= mca_ptl_elan_module.super.ptl_first_frag_size ) { convertor = &pml_req->req_convertor; } else { - convertor = &desc->frag_convertor; + convertor = &frag->frag_base->frag_convertor; ompi_convertor_copy(&pml_req->req_convertor, convertor); ompi_convertor_init_for_send( convertor, @@ -209,6 +210,108 @@ mca_ptl_elan_init_qdma_desc (struct ompi_ptl_elan_qdma_desc_t *desc, END_FUNC(); } +static void +mca_ptl_elan_init_putget_desc (struct mca_ptl_elan_send_frag_t *frag, + mca_ptl_elan_module_t * ptl, + struct mca_ptl_elan_peer_t *ptl_peer, + mca_pml_base_send_request_t *pml_req, + size_t offset, + size_t *size, + int flags) +{ + int header_length; + int destvp; + int size_out; + int size_in; + int flags; + int rc = OMPI_SUCCESS; + + struct ompi_ptl_elan_putget_desc_t * desc; + + START_FUNC(); + + desc = (ompi_ptl_elan_putget_desc_t *)frag->desc; + destvp = ptl_peer->peer_vp; + size_in = *size; + + desc->src_elan_addr = MAIN2ELAN (desc->rail->r_ctx, + pml_req->req_base.req_addr); + desc->dst_elan_addr = (E4_Addr)pml_req->req_peer_addr; + + /* initialize convertor */ + if(size_in > 0) { + struct iovec iov; + ompi_convertor_t *convertor; + + if( offset <= mca_ptl_elan_module.super.ptl_first_frag_size ) { + convertor = &pml_req->req_convertor; + } else { + convertor = &frag->frag_base.frag_convertor; + ompi_convertor_copy(&pml_req->req_convertor, convertor); + ompi_convertor_init_for_send( + convertor, + 0, + pml_req->req_base.req_datatype, + pml_req->req_base.req_count, + pml_req->req_base.req_addr, + offset); + } + + /* For now, eager sends are always packed into the descriptor + * TODO: Inline up to 256 bytes (including the header), then + * do a chained send for mesg < first_frag_size */ + iov.iov_base = &desc->buff[header_length]; + iov.iov_len = size_in; + rc = ompi_convertor_pack(convertor, &iov, 1); + if (rc < 0) { + ompi_output (0, "[%s:%d] Unable to pack data\n", + __FILE__, __LINE__); + return; + } + size_out = iov.iov_len; + } else { + size_out = size_in; + } + + *size = size_out; + + desc->main_dma.dma_srcAddr = desc->src_elan_addr; + desc->main_dma.dma_srcAddr = desc->dst_elan_addr; + + /* XXX: no additional flags for the DMA, remote, shmem, qwrite, + * broadcast, etc */ + flags = 0; + + /* XXX: Hardcoded DMA retry count */ + desc->main_dma.dma_typeSize = E4_DMA_TYPE_SIZE ( + (header_length + size_out), DMA_DataTypeByte, flags, + putget->pg_retryCount); + + /* Just a normal DMA, no need to have additional flags */ + desc->main_dma.dma_cookie = elan4_local_cookie ( + ptl->putget->pg_cpool, + E4_COOKIE_TYPE_LOCAL_DMA, + destvp); + desc->main_dma.dma_vproc = destvp; + + if (CHECK_ELAN) { + char hostname[32]; + + gethostname(hostname, 32); + fprintf(stderr, "[%s send...] destvp %d type %d flag %d size %d\n", + hostname, destvp, hdr->hdr_common.hdr_type, + hdr->hdr_common.hdr_flags, + hdr->hdr_common.hdr_size); + } + + + /* Make main memory coherent with IO domain (IA64) */ + MEMBAR_VISIBLE (); + /*elan4_run_dma_cmd(cmdq, (E4_DMA *)&pd->pd_dma);*/ + END_FUNC(); +} + + int mca_ptl_elan_start_desc (mca_ptl_elan_send_frag_t * desc, struct mca_ptl_elan_peer_t *ptl_peer, @@ -219,33 +322,43 @@ mca_ptl_elan_start_desc (mca_ptl_elan_send_frag_t * desc, { mca_ptl_elan_module_t *ptl; + ptl = &ptl_peer->peer_ptl; + START_FUNC(); if (desc->desc->desc_type == MCA_PTL_ELAN_DESC_QDMA) { struct ompi_ptl_elan_qdma_desc_t *qdma; qdma = (ompi_ptl_elan_qdma_desc_t *)desc->desc; - ptl = qdma->ptl; - mca_ptl_elan_init_qdma_ack (qdma, ptl, ptl_peer, sendreq, + mca_ptl_elan_init_qdma_desc (qdma, ptl, ptl_peer, sendreq, offset, size, flags); - elan4_run_dma_cmd (ptl->queue->tx_cmdq, (DMA *) & qdma->main_dma); - /*ptl->queue->tx_cmdq->cmdq_flush */ elan4_flush_cmdq_reorder (ptl->queue->tx_cmdq); /* Insert desc into the list of outstanding DMA's */ ompi_list_append (&ptl->queue->tx_desc, (ompi_list_item_t *) desc); + } else if (desc->desc->desc_type == MCA_PTL_ELAN_DESC_PUTGET) { + + struct ompi_ptl_elan_putget_desc_t *pdesc; + + pdesc = (ompi_ptl_elan_putget_desc_t *)desc->desc; + mca_ptl_elan_init_putget_desc (pdesc, ptl, ptl_peer, sendreq, + offset, size, flags); + elan4_run_dma_cmd (ptl->queue->tx_cmdq, (DMA *) & pdesc->main_dma); + /*ptl->queue->tx_cmdq->cmdq_flush */ + elan4_flush_cmdq_reorder (ptl->queue->tx_cmdq); + + /* Insert desc into the list of outstanding DMA's */ + ompi_list_append (&ptl->queue->put_desc, (ompi_list_item_t *) desc); } else { ompi_output (0, "Other types of DMA are not supported right now \n"); return OMPI_ERROR; } - /*mca_ptl_base_frag_t frag_base; */ - /* fragment state */ desc->frag_base.frag_owner = &ptl_peer->peer_ptl->super; desc->frag_base.frag_peer = ptl_peer; diff --git a/src/mca/ptl/elan/src/ptl_elan_priv.h b/src/mca/ptl/elan/src/ptl_elan_priv.h index 5d8ca64960..52704d591c 100644 --- a/src/mca/ptl/elan/src/ptl_elan_priv.h +++ b/src/mca/ptl/elan/src/ptl_elan_priv.h @@ -113,8 +113,9 @@ struct ompi_ptl_elan_recv_queue_t { typedef struct ompi_ptl_elan_recv_queue_t ompi_ptl_elan_recv_queue_t; typedef struct { - /* SHOULD BE 128-byte aligned */ - uint8_t data[INPUT_QUEUE_MAX]; /* queue req data packet */ + /* SHOULD BE 128-byte aligned + * queue req data packet */ + /*uint8_t data[INPUT_QUEUE_MAX]; For NIC-based tag-matching*/ /* SHOULD be 32-byte aligned */ E4_Event32 event32; /* Local elan completion event */ } ompi_elan_event_t; @@ -128,7 +129,7 @@ typedef struct { volatile E4_uint64 main_doneWord; \ /* 8 byte aligned */ \ ompi_elan_event_t *elan_data_event; \ - mca_ptl_elan_send_request_t *req; \ + mca_pml_base_send_request_t *req; \ /* 8 byte aligned */ \ int desc_type; \ int desc_status; \ @@ -151,11 +152,12 @@ struct ompi_ptl_elan_qdma_desc_t { uint8_t buff[INPUT_QUEUE_MAX]; /**< queue data */ /* 8 byte aligned */ - ompi_convertor_t frag_convertor; /**< datatype convertor */ + //ompi_convertor_t frag_convertor; /**< datatype convertor */ }; typedef struct ompi_ptl_elan_qdma_desc_t ompi_ptl_elan_qdma_desc_t; struct ompi_ptl_elan_queue_ctrl_t { + /* Transmit Queues */ /** < elan located INPUT_QUEUE_ALIGN'ed with INPUT_QUEUE_SIZE */ E4_InputQueue *input; @@ -184,6 +186,45 @@ struct ompi_ptl_elan_queue_ctrl_t { }; typedef struct ompi_ptl_elan_queue_ctrl_t ompi_ptl_elan_queue_ctrl_t; +struct ompi_ptl_elan_putget_desc_t { + + ELAN_BASE_DESC_FIELDS + /* 8 byte aligned */ + + mca_ptl_elan_module_t *ptl; + RAIL *rail; + /* 8 byte aligned */ + + uint8_t *src_elan_addr; + uint8_t *dst_elan_addr; + /* 8 byte aligned */ +}; +typedef struct ompi_ptl_elan_putget_desc_t ompi_ptl_elan_putget_desc_t; + +struct ompi_ptl_elan_putget_ctrl_t { + + /**