From 7f1815f5b1cfdac85356ce1e3d9563b48fa25e37 Mon Sep 17 00:00:00 2001 From: Weikuan Yu Date: Wed, 1 Sep 2004 17:13:10 +0000 Subject: [PATCH] -- Add a ctrl QDMA to queue structure to avoid using buff-equipped desc. This commit was SVN r2430. --- src/mca/ptl/elan/src/ptl_elan_comm_init.c | 13 ++++++ src/mca/ptl/elan/src/ptl_elan_init.c | 49 ++++++++++++++++++++++- src/mca/ptl/elan/src/ptl_elan_priv.c | 27 ++++++++++++- src/mca/ptl/elan/src/ptl_elan_priv.h | 17 ++++++++ 4 files changed, 103 insertions(+), 3 deletions(-) diff --git a/src/mca/ptl/elan/src/ptl_elan_comm_init.c b/src/mca/ptl/elan/src/ptl_elan_comm_init.c index 298c61c053..7606d60bf7 100644 --- a/src/mca/ptl/elan/src/ptl_elan_comm_init.c +++ b/src/mca/ptl/elan/src/ptl_elan_comm_init.c @@ -142,6 +142,19 @@ ompi_init_elan_queue_events (mca_ptl_elan_module_t * ptl, elan_ptr = (E4_Event32 *) ((char *) elan_ptr + elan_size); frag ++; } + +#if OMPI_PTL_ELAN_THREADING + /* Allocating a DMA:event pair for threads notification */ + queue->last = (ompi_ptl_elan_ctrl_desc_t *) elan4_allocMain ( + rail->r_alloc, main_align, sizeof(ompi_ptl_elan_ctrl_desc_t)); + OMPI_PTL_ELAN_CHECK_UNEX (queue->last, NULL, OMPI_ERROR, 0); + queue->last->elan_event = (E4_Event *) elan4_allocElan ( + rail->r_alloc, elan_align, sizeof(E4_Event32)); + OMPI_PTL_ELAN_CHECK_UNEX (queue->last->elan_event, NULL, OMPI_ERROR, 0); + queue->last->mesg = malloc(sizeof(mca_ptl_base_header_t)); + OMPI_PTL_ELAN_CHECK_UNEX (queue->last->mesg, NULL, OMPI_ERROR, 0); +#endif + flist->fl_num_allocated += flist->fl_num_per_alloc; END_FUNC(PTL_ELAN_DEBUG_INIT); diff --git a/src/mca/ptl/elan/src/ptl_elan_init.c b/src/mca/ptl/elan/src/ptl_elan_init.c index 4a75532cac..ba13f97020 100644 --- a/src/mca/ptl/elan/src/ptl_elan_init.c +++ b/src/mca/ptl/elan/src/ptl_elan_init.c @@ -66,6 +66,7 @@ ompi_mca_ptl_elan_setup (mca_ptl_elan_state_t * ems) ptl->ptl_elan_ctx = ems->elan_rail[emp->num_modules]->rail_ctx; ptl->elan_vp = ems->elan_vp; ptl->elan_nvp = ems->elan_nvp; + OBJ_CONSTRUCT (&ptl->recv_frags, ompi_list_t); OBJ_CONSTRUCT (&ptl->send_frags, ompi_list_t); OBJ_CONSTRUCT (&ptl->pending_acks, ompi_list_t); @@ -651,7 +652,53 @@ mca_ptl_elan_thread_close (mca_ptl_elan_component_t * emp) num_rails = emp->num_modules; for (i = 0; i < num_rails; i ++) { - /* FIXME: Generate a QUEUE DMA to each thread */ + int header_length; + int destvp; + + mca_ptl_elan_module_t *ptl; + mca_ptl_base_header_t *hdr; + struct ompi_ptl_elan_ctrl_desc_t * desc; + ELAN4_CTX *ctx; + + ptl = mca_ptl_elan_component.modules[i]; + + header_length = sizeof(mca_ptl_base_header_t); + destvp = ptl->elan_vp; + ctx = ptl->ptl_elan_ctx, + desc = ptl->queue->last; + + hdr = (mca_ptl_base_header_t *) desc->mesg; + hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_STOP; + hdr->hdr_common.hdr_flags = 0; /* XXX: to change if needed */ + hdr->hdr_common.hdr_size = sizeof (mca_ptl_base_header_t); + + INITEVENT_WORD (ctx, desc->elan_event, &desc->main_doneWord); + RESETEVENT_WORD (&desc->main_doneWord); + PRIMEEVENT_WORD (ctx, desc->elan_event, 2); + + /* Initialize some of the dma structures */ + desc->main_dma.dma_srcEvent = SDRAM2ELAN (ctx, desc->elan_event); + desc->main_dma.dma_dstEvent = SDRAM2ELAN (ctx, ptl->queue->input); + desc->main_dma.dma_dstAddr = 0x0ULL; + desc->main_dma.dma_srcAddr = MAIN2ELAN (ctx, hdr); + desc->main_dma.dma_typeSize = E4_DMA_TYPE_SIZE (header_length, + DMA_DataTypeByte, + DMA_QueueWrite, 16); + desc->main_dma.dma_cookie = elan4_local_cookie (ptl->queue->tx_cpool, + E4_COOKIE_TYPE_LOCAL_DMA, destvp); + desc->main_dma.dma_vproc = destvp; + + /* finish the recv thread */ + MEMBAR_VISIBLE (); + elan4_run_dma_cmd (ptl->queue->tx_cmdq, (DMA *) & desc->main_dma); + elan4_flush_cmdq_reorder (ptl->queue->tx_cmdq); + + /* finish the send thread */ + desc->main_dma.dma_dstEvent = SDRAM2ELAN (ctx, ptl->comp->input); + MEMBAR_VISIBLE (); + elan4_run_dma_cmd (ptl->queue->tx_cmdq, (DMA *) & desc->main_dma); + elan4_flush_cmdq_reorder (ptl->queue->tx_cmdq); + MEMBAR_VISIBLE (); } /* Join all threads */ diff --git a/src/mca/ptl/elan/src/ptl_elan_priv.c b/src/mca/ptl/elan/src/ptl_elan_priv.c index 7b561ac56f..11c118297d 100644 --- a/src/mca/ptl/elan/src/ptl_elan_priv.c +++ b/src/mca/ptl/elan/src/ptl_elan_priv.c @@ -1062,6 +1062,7 @@ mca_ptl_elan_drain_recv (struct mca_ptl_elan_module_t *ptl) rxq = queue->rxq; ctx = ptl->ptl_elan_ctx; +ptl_elan_recv_comp: OMPI_LOCK (&queue->rx_lock); #if OMPI_PTL_ELAN_THREADING rc = mca_ptl_elan_wait_queue(ptl, rxq, 1); @@ -1073,6 +1074,13 @@ mca_ptl_elan_drain_recv (struct mca_ptl_elan_module_t *ptl) header = (mca_ptl_base_header_t *) rxq->qr_fptr; +#if OMPI_PTL_ELAN_THREADING + if (header->hdr_common.hdr_type == MCA_PTL_HDR_TYPE_STOP) { + /* XXX: release the lock and quit the thread */ + OMPI_UNLOCK (&queue->rx_lock); + return OMPI_SUCCESS; + } +#endif switch (header->hdr_common.hdr_type) { case MCA_PTL_HDR_TYPE_MATCH: case MCA_PTL_HDR_TYPE_FRAG: @@ -1125,6 +1133,10 @@ mca_ptl_elan_drain_recv (struct mca_ptl_elan_module_t *ptl) } OMPI_UNLOCK (&queue->rx_lock); +#if OMPI_PTL_ELAN_THREADING + goto ptl_elan_recv_comp; +#endif + END_FUNC(PTL_ELAN_DEBUG_THREAD); return OMPI_SUCCESS; } @@ -1144,6 +1156,7 @@ mca_ptl_elan_update_desc (struct mca_ptl_elan_module_t *ptl) comp = ptl->comp; ctx = ptl->ptl_elan_ctx; rxq = comp->rxq; +ptl_elan_send_comp: OMPI_LOCK (&comp->rx_lock); #if OMPI_PTL_ELAN_THREADING /* XXX: block on the recv queue without holding a lock */ @@ -1166,8 +1179,13 @@ mca_ptl_elan_update_desc (struct mca_ptl_elan_module_t *ptl) header->hdr_common.hdr_flags, header->hdr_common.hdr_size); - /* FIXME: To handle other different types of headers - * and use a simplied way checking completion */ +#if OMPI_PTL_ELAN_THREADING + if (header->hdr_common.hdr_type == MCA_PTL_HDR_TYPE_STOP) { + /* XXX: release the lock and quit the thread */ + OMPI_UNLOCK (&comp->rx_lock); + return OMPI_SUCCESS; + } +#endif if (header->hdr_common.hdr_type == MCA_PTL_HDR_TYPE_ACK) { frag = ((mca_ptl_elan_ack_header_t*)header)->frag; } else { @@ -1207,6 +1225,11 @@ mca_ptl_elan_update_desc (struct mca_ptl_elan_module_t *ptl) elan4_flush_cmdq_reorder (rxq->qr_cmdq); } OMPI_UNLOCK (&comp->rx_lock); + +#if OMPI_PTL_ELAN_THREADING + goto ptl_elan_send_comp; +#endif + #else ctx = ptl->ptl_elan_ctx; while (ompi_list_get_size (&ptl->send_frags) > 0) { diff --git a/src/mca/ptl/elan/src/ptl_elan_priv.h b/src/mca/ptl/elan/src/ptl_elan_priv.h index 6522ccf215..17dd68fd57 100644 --- a/src/mca/ptl/elan/src/ptl_elan_priv.h +++ b/src/mca/ptl/elan/src/ptl_elan_priv.h @@ -139,6 +139,11 @@ enum { MCA_PTL_ELAN_DESC_CACHED = 0x20 }; +/* XXX: Temporarily a type of header to stop threads */ +enum { + MCA_PTL_HDR_TYPE_STOP = 0xFFFF +}; + struct ompi_ptl_elan_thread_t { ompi_thread_t thread; @@ -217,6 +222,17 @@ typedef struct ompi_ptl_elan_comp_queue_t ompi_ptl_elan_comp_queue_t; E4_Addr comp_dstAddr; \ /* 8 byte aligned */ +struct ompi_ptl_elan_ctrl_desc_t { + E4_DMA64 main_dma; + /* 8 byte aligned */ + volatile E4_uint64 main_doneWord; + /* 8 byte aligned */ + E4_Event *elan_event; + void *mesg; + /* 8 byte aligned */ +}; +typedef struct ompi_ptl_elan_ctrl_desc_t ompi_ptl_elan_ctrl_desc_t; + struct ompi_ptl_elan_base_desc_t { ELAN_BASE_DESC_FIELDS /* 8 byte aligned */ @@ -251,6 +267,7 @@ struct ompi_ptl_elan_queue_ctrl_t { /* Recv Queue has to be well-aligned */ ompi_ptl_elan_recv_queue_t *rxq; + ompi_ptl_elan_ctrl_desc_t *last; }; typedef struct ompi_ptl_elan_queue_ctrl_t ompi_ptl_elan_queue_ctrl_t;