From c9badff0c94118feda657502f440b1692214a08d Mon Sep 17 00:00:00 2001 From: Weikuan Yu Date: Mon, 30 Aug 2004 05:37:14 +0000 Subject: [PATCH] -- Extend ptl_base_ack_header to be mca_ptl_elan_ack_header with a pointer to the fragment. -- Done with shared send completion queue support -- To test thead-based send/recv completion checking Points to be aware, i) Expand control mesg header type and Optimize completion checking. ii) Introduce additional control message for management functionality. This commit was SVN r2389. --- src/mca/ptl/elan/src/ptl_elan.c | 49 +--------- src/mca/ptl/elan/src/ptl_elan_comm_init.c | 19 +++- src/mca/ptl/elan/src/ptl_elan_frag.h | 15 +-- src/mca/ptl/elan/src/ptl_elan_priv.c | 107 +++++++++++++++------- src/mca/ptl/elan/src/ptl_elan_priv.h | 10 +- 5 files changed, 106 insertions(+), 94 deletions(-) diff --git a/src/mca/ptl/elan/src/ptl_elan.c b/src/mca/ptl/elan/src/ptl_elan.c index 52e64fbf0c..a727386b62 100644 --- a/src/mca/ptl/elan/src/ptl_elan.c +++ b/src/mca/ptl/elan/src/ptl_elan.c @@ -246,9 +246,10 @@ mca_ptl_elan_isend (struct mca_ptl_base_module_t *ptl, */ START_FUNC(PTL_ELAN_DEBUG_SEND); + { /* FIXME: YUW, remove this block */ - fprintf(stderr, "[proc%s:%s:%d] here\n", + fprintf(stderr, "[proc%s:%s:%d] here \n", getenv("RMS_RANK"), __FILE__, __LINE__); } @@ -366,7 +367,6 @@ mca_ptl_elan_get (struct mca_ptl_base_module_t *ptl, __FILE__, __LINE__); } - /*rc = mca_ptl_elan_start_desc(desc, */ rc = mca_ptl_elan_start_get(desc, (struct mca_ptl_elan_peer_t *)ptl_peer, req, offset, &size, flags); @@ -400,19 +400,6 @@ mca_ptl_elan_matched (mca_ptl_base_module_t * ptl, request = frag->frag_request; recv_frag = (mca_ptl_elan_recv_frag_t * ) frag; - /* FIXME + TODO: Optimized processing fragments - * Pseudocode, for additional processing of fragments - * a) (ACK:no, Get:No) - * Remove the frag. no need for further processing - * b) (ACK:yes, Get:No) - * Send an ACK only - * c) (ACK:yes, Get:yes) - * Get a message, update the fragment descriptor and - * then send an ACK, - * d) Consider moving time-consuming tasks to some BH-like - * mechanisms. - */ - if (header->hdr_common.hdr_flags & MCA_PTL_FLAGS_ACK_MATCHED) { int desc_type ; /* Basic ACK scheme following TCP cases */ @@ -450,7 +437,8 @@ mca_ptl_elan_matched (mca_ptl_base_module_t * ptl, #if 1 set = fetchNset (&((mca_ptl_elan_recv_frag_t *)frag)->frag_progressed, 1); #else - set = ompi_atomic_fetch_and_set_int (&((mca_ptl_elan_recv_frag_t *)frag)->frag_progressed, 1); + set = ompi_atomic_fetch_and_set_int ( + &((mca_ptl_elan_recv_frag_t *)frag)->frag_progressed, 1); #endif if (!set) { /* IN TCP case, IO_VEC is first allocated. @@ -482,39 +470,10 @@ mca_ptl_elan_matched (mca_ptl_base_module_t * ptl, header->hdr_frag.hdr_frag_offset); ompi_convertor_unpack(&frag->frag_base.frag_convertor, &iov, 1); } -#if 0 - if (header->hdr_common.hdr_flags & MCA_PTL_FLAGS_ACK_MATCHED) { - /* FIXME: Pseudocode, for additional processing of fragments - * a) (ACK:no, Get:No) - * Remove the frag. no need for further processing - * b) (ACK:yes, Get:No) - * Send an ACK only - * c) (ACK:yes, Get:yes) - * Get a message, update the fragment descriptor and - * then send an ACK, - * d) Consider moving time-consuming tasks to some BH-like - * mechanisms. - */ - } - - frag->frag_base.frag_owner->ptl_recv_progress ( - frag->frag_base.frag_owner, - request, - frag->frag_base.frag_size, - frag->frag_base.frag_size); - - /* FIXME: - * To support the required ACK, do not return - * until the ack is out */ - if (((mca_ptl_elan_recv_frag_t *) frag)->frag_ack_pending == false) - mca_ptl_elan_recv_frag_return (frag->frag_base.frag_owner, - (mca_ptl_elan_recv_frag_t *) frag); -#else /* XXX: progress the request based on the status of this recv frag * It is possible to employ a scheduling logic here. * Then Done with this fragment, i.e., data */ mca_ptl_elan_recv_frag_done (header, frag, request); -#endif } } diff --git a/src/mca/ptl/elan/src/ptl_elan_comm_init.c b/src/mca/ptl/elan/src/ptl_elan_comm_init.c index 13bcf93e34..298c61c053 100644 --- a/src/mca/ptl/elan/src/ptl_elan_comm_init.c +++ b/src/mca/ptl/elan/src/ptl_elan_comm_init.c @@ -92,7 +92,7 @@ ompi_init_elan_queue_events (mca_ptl_elan_module_t * ptl, #if OMPI_PTL_ELAN_COMP_QUEUE /* XXX: provide a DMA structure for each chained event */ desc->comp_dma.dma_typeSize = E4_DMA_TYPE_SIZE ( - sizeof(mca_ptl_base_frag_header_t), + sizeof(mca_ptl_base_header_t), DMA_DataTypeByte, DMA_QueueWrite, 8); desc->comp_dma.dma_cookie = elan4_local_cookie(ptl->queue->tx_cpool, E4_COOKIE_TYPE_LOCAL_DMA, ptl->elan_vp); @@ -120,6 +120,10 @@ ompi_init_elan_queue_events (mca_ptl_elan_module_t * ptl, desc->main_dma.dma_srcEvent= elan4_main2elan(ctx, (E4_Event *)desc->comp_event); desc->main_dma.dma_dstEvent= SDRAM2ELAN (ctx, queue->input); + + LOG_PRINT(PTL_ELAN_DEBUG_NONE, + "desc %p comp_buff %p elan_event %p comp_event %p \n", + desc, desc->comp_buff, desc->elan_event, desc->comp_event); #else /* Initialize some of the dma structures */ desc->main_dma.dma_dstAddr = 0; @@ -145,7 +149,7 @@ ompi_init_elan_queue_events (mca_ptl_elan_module_t * ptl, } static void -mca_ptl_elan_putget_desc_contruct ( +mca_ptl_elan_putget_desc_construct ( mca_ptl_elan_module_t * ptl, ompi_ptl_elan_putget_desc_t *desc, EVENT *elan_event, @@ -170,9 +174,15 @@ mca_ptl_elan_putget_desc_contruct ( desc->comp_event= (E4_Event *) ((char *)elan_event + 2 * ELAN_BLOCK_SIZE + 2 * sizeof (E4_Event32)); + LOG_PRINT(PTL_ELAN_DEBUG_NONE, + "desc %p chain_buff %p comp_buff %p elan_event %p " + " chain_event %p comp_event %p \n", + desc, desc->chain_buff, desc->comp_buff, desc->elan_event, + desc->chain_event, desc->comp_event); + /* XXX: provide a DMA structure for each chained event */ desc->comp_dma.dma_typeSize = E4_DMA_TYPE_SIZE ( - sizeof(mca_ptl_base_frag_header_t), + sizeof(mca_ptl_base_header_t), DMA_DataTypeByte, DMA_QueueWrite, 8); desc->comp_dma.dma_vproc = ptl->elan_vp; desc->comp_dma.dma_srcAddr = 0x0ULL; /* To be filled in */ @@ -196,6 +206,7 @@ mca_ptl_elan_putget_desc_contruct ( desc->main_dma.dma_srcEvent= elan4_main2elan(ctx, (E4_Event *)desc->chain_event); desc->main_dma.dma_dstEvent= 0x0ULL; + #else desc->elan_event = elan_event; desc->chain_event= (E4_Event32 *) @@ -228,7 +239,7 @@ do { \ frag->desc = (ompi_ptl_elan_base_desc_t *)dp; \ \ /* Initialize some of the dma structures */ \ - mca_ptl_elan_putget_desc_contruct (ptl, dp, \ + mca_ptl_elan_putget_desc_construct (ptl, dp, \ eptr, 0, 0, local); \ \ item = (ompi_list_item_t *) frag; \ diff --git a/src/mca/ptl/elan/src/ptl_elan_frag.h b/src/mca/ptl/elan/src/ptl_elan_frag.h index f8d3d04789..908f097935 100644 --- a/src/mca/ptl/elan/src/ptl_elan_frag.h +++ b/src/mca/ptl/elan/src/ptl_elan_frag.h @@ -22,12 +22,19 @@ struct ompi_ptl_elan_base_desc_t; struct mca_ptl_elan_send_frag_t { mca_ptl_base_frag_t frag_base; + struct ompi_ptl_elan_base_desc_t *desc; volatile int frag_progressed; bool frag_ack_pending; /* Is there an ack to recv */ - struct ompi_ptl_elan_base_desc_t *desc; }; typedef struct mca_ptl_elan_send_frag_t mca_ptl_elan_send_frag_t; +/* XXX: Extend the header a bit with an pointer to frag */ +struct mca_ptl_elan_ack_header_t { + struct mca_ptl_base_ack_header_t base_ack; /* 32 bytes */ + struct mca_ptl_elan_send_frag_t *frag; +}; +typedef struct mca_ptl_elan_ack_header_t mca_ptl_elan_ack_header_t; + /** * ELAN received fragment derived type. */ @@ -37,12 +44,6 @@ struct mca_ptl_elan_recv_frag_t { size_t frag_msg_cnt; volatile int frag_progressed; /* Is it record to request */ bool frag_ack_pending; /* Is there an ack to send */ -#if 0 - union { - struct ompi_ptl_elan_qdma_desc_t *qdma; - struct ompi_ptl_elan_putget_desc_t *putget; - } frag; -#endif char *alloc_buff; char *unex_buff; }; diff --git a/src/mca/ptl/elan/src/ptl_elan_priv.c b/src/mca/ptl/elan/src/ptl_elan_priv.c index 67cf78ded9..57b0fbbc6a 100644 --- a/src/mca/ptl/elan/src/ptl_elan_priv.c +++ b/src/mca/ptl/elan/src/ptl_elan_priv.c @@ -354,6 +354,16 @@ mca_ptl_elan_init_qdma_desc (struct mca_ptl_elan_send_frag_t *frag, E4_COOKIE_TYPE_LOCAL_DMA, destvp); desc->main_dma.dma_srcEvent= elan4_main2elan(ctx, desc->comp_event); desc->main_dma.dma_srcAddr = MAIN2ELAN (ctx, &desc->buff[0]); + + LOG_PRINT (PTL_ELAN_DEBUG_SEND, + " desc %p comp_buff %p comp_event %p " + "comp src_addr %x main dst_addr %x size %d\n", + desc, + (void *)desc->comp_buff, + (void *)desc->comp_event, + (int)desc->comp_dma.dma_srcAddr, + (int)desc->main_dma.dma_srcAddr, + size_out); #else desc->main_dma.dma_srcAddr = MAIN2ELAN (ctx, &desc->buff[0]); /* XXX: Hardcoded DMA retry count */ @@ -366,6 +376,10 @@ mca_ptl_elan_init_qdma_desc (struct mca_ptl_elan_send_frag_t *frag, desc->main_dma.dma_vproc = destvp; #endif + LOG_PRINT (PTL_ELAN_DEBUG_SEND, + "dest events main %lx \n", + desc->main_dma.dma_dstEvent); + /* Make main memory coherent with IO domain (IA64) */ MEMBAR_VISIBLE (); END_FUNC(PTL_ELAN_DEBUG_SEND); @@ -471,6 +485,12 @@ mca_ptl_elan_init_put_desc (struct mca_ptl_elan_send_frag_t *frag, desc->chain_dma.dma_dstEvent = elan4_main2elan (ctx, (void *) ptl->queue->input); + LOG_PRINT (PTL_ELAN_DEBUG_PUT, + "dest events main %lx chain %lx comp %lx \n", + desc->main_dma.dma_dstEvent, + desc->chain_dma.dma_dstEvent, + desc->comp_dma.dma_dstEvent); + #if OMPI_PTL_ELAN_COMP_QUEUE /* XXX: Chain a QDMA to each queue and * Have all the srcEvent fired to the Queue @@ -484,6 +504,7 @@ mca_ptl_elan_init_put_desc (struct mca_ptl_elan_send_frag_t *frag, desc->comp_event->ev_Params[1] = elan4_alloccq_space (ctx, 8, CQ_Size8K); desc->comp_event->ev_CountAndType = E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, 8); + desc->comp_dma.dma_cookie = elan4_local_cookie(ptl->queue->tx_cpool, E4_COOKIE_TYPE_LOCAL_DMA, ptl->elan_vp); desc->comp_dma.dma_srcAddr = elan4_main2elan (ctx, @@ -503,13 +524,6 @@ mca_ptl_elan_init_put_desc (struct mca_ptl_elan_send_frag_t *frag, desc->chain_dma.dma_typeSize |= RUN_DMA_CMD; desc->chain_dma.dma_pad = NOP_CMD; - LOG_PRINT (PTL_ELAN_DEBUG_FLAG, - " desc %p chain_buff %p chain_event %p " - "src_addr %x dst_addr %x size %d\n", - desc, (void *)desc->chain_buff, (void *)desc->chain_event, - (int)desc->src_elan_addr, - (int)desc->dst_elan_addr, size_out); - /* Copy down the chain dma to the chain buffer in elan sdram */ memcpy ((void *)desc->chain_buff, (void *)&desc->chain_dma, sizeof (E4_DMA64)); @@ -521,16 +535,14 @@ mca_ptl_elan_init_put_desc (struct mca_ptl_elan_send_frag_t *frag, /* XXX: The chain dma will go directly into a command stream * so we need addend the command queue control bits. - * Allocate space from command queues hanged off the CTX. - */ + * Allocate space from command queues hanged off the CTX. */ desc->chain_event->ev_Params[1] = elan4_alloccq_space (ctx, 8, CQ_Size8K); desc->main_dma.dma_srcAddr = desc->src_elan_addr; desc->main_dma.dma_dstAddr = desc->dst_elan_addr; desc->main_dma.dma_dstEvent= 0x0ULL; /*disable remote event */ /* Chain an event */ - desc->main_dma.dma_srcEvent= elan4_main2elan(ctx, - (E4_Event *)desc->chain_event); + desc->main_dma.dma_srcEvent= elan4_main2elan(ctx, desc->chain_event); /* FIXME: no additional flags for the DMA, remote, shmem, qwrite, * broadcast, etc. Be sure to correctly setup a chained DMA. */ @@ -543,11 +555,11 @@ mca_ptl_elan_init_put_desc (struct mca_ptl_elan_send_frag_t *frag, destvp); desc->main_dma.dma_vproc = destvp; - LOG_PRINT(PTL_ELAN_DEBUG_MAC, - "destvp %d type %d flag %d size %d\n", - destvp, hdr->hdr_common.hdr_type, - hdr->hdr_common.hdr_flags, - hdr->hdr_common.hdr_size); + LOG_PRINT (PTL_ELAN_DEBUG_PUT, + "chain_event %p param0 %lx param1 %lx \n", + desc->chain_event, + desc->chain_event->ev_Params[0], + desc->chain_event->ev_Params[1]); /* Make main memory coherent with IO domain (IA64) */ MEMBAR_VISIBLE (); @@ -878,6 +890,7 @@ mca_ptl_elan_start_ack ( mca_ptl_base_module_t * ptl, mca_ptl_base_header_t *hdr; mca_pml_base_recv_request_t* request; mca_ptl_elan_module_t *elan_ptl; + ELAN4_CTX *ctx; int destvp; @@ -889,7 +902,8 @@ mca_ptl_elan_start_ack ( mca_ptl_base_module_t * ptl, elan_ptl = (mca_ptl_elan_module_t *) ptl; desc->desc->desc_type = MCA_PTL_ELAN_DESC_QDMA; qdma = (ompi_ptl_elan_qdma_desc_t *)desc->desc; - hdr = (mca_ptl_base_header_t *) & qdma->buff[0]; + ctx = elan_ptl->ptl_elan_ctx; + hdr = &desc->frag_base.frag_header; request = recv_frag->frag_recv.frag_request; hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_ACK; @@ -904,27 +918,34 @@ mca_ptl_elan_start_ack ( mca_ptl_base_module_t * ptl, hdr->hdr_ack.hdr_dst_match.lval = 0; hdr->hdr_ack.hdr_dst_match.pval = request; - /* FIXME: this needs to be some offsete from the base addr */ + /* FIXME: this needs to be some offsete from the base addr + * posted buffer size is the leftover */ hdr->hdr_ack.hdr_dst_addr.pval = 0; - hdr->hdr_ack.hdr_dst_addr.lval = elan4_main2elan( - elan_ptl->ptl_elan_ctx, request->req_base.req_addr); - - /* FIXME: posted buffer size is the leftover */ + hdr->hdr_ack.hdr_dst_addr.lval = elan4_main2elan(ctx, + request->req_base.req_addr); hdr->hdr_ack.hdr_dst_size = request->req_bytes_packed - request->req_bytes_received; - LOG_PRINT(PTL_ELAN_DEBUG_ACK, - "remote frag %p local req %p buffer %p size %d \n", - hdr->hdr_ack.hdr_src_ptr.pval, - hdr->hdr_ack.hdr_dst_match.pval, - hdr->hdr_ack.hdr_dst_addr.pval, - hdr->hdr_ack.hdr_dst_size); + /* FIXME: save frag descriptor somewhere in the header */ + ((mca_ptl_elan_ack_header_t *) hdr)->frag = desc; - /* Filling up QDMA descriptor */ - qdma->main_dma.dma_srcAddr = elan4_main2elan( - elan_ptl->ptl_elan_ctx, &qdma->buff[0]); + LOG_PRINT(PTL_ELAN_DEBUG_ACK, "desc %p hdr %p \n", desc, hdr); - /* XXX: Hardcoded DMA retry count */ +#if OMPI_PTL_ELAN_COMP_QUEUE + /* XXX: Need to have a way to differentiate different frag */ + qdma->comp_event->ev_Params[1] = elan4_alloccq_space (ctx, 8, CQ_Size8K); + qdma->comp_event->ev_CountAndType = E4_EVENT_INIT_VALUE(-32, + E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, 8); + qdma->comp_dma.dma_cookie = elan4_local_cookie( + elan_ptl->queue->tx_cpool, + E4_COOKIE_TYPE_LOCAL_DMA, + elan_ptl->elan_vp); + qdma->comp_dma.dma_srcAddr = elan4_main2elan (ctx, (void *) hdr); + memcpy ((void *)qdma->comp_buff, (void *)&qdma->comp_dma, + sizeof (E4_DMA64)); + + /* XXX: Hardcoded DMA retry count + * Initialize some of the dma structures */ qdma->main_dma.dma_typeSize = E4_DMA_TYPE_SIZE ( sizeof(mca_ptl_base_ack_header_t), DMA_DataTypeByte, DMA_QueueWrite, 16); @@ -932,6 +953,19 @@ mca_ptl_elan_start_ack ( mca_ptl_base_module_t * ptl, qdma->main_dma.dma_cookie = elan4_local_cookie ( elan_ptl->queue->tx_cpool, E4_COOKIE_TYPE_LOCAL_DMA, destvp); + qdma->main_dma.dma_srcAddr = elan4_main2elan(ctx, (void *) hdr); + qdma->main_dma.dma_srcEvent= elan4_main2elan(ctx, qdma->comp_event); +#else + /* Filling up QDMA descriptor */ + qdma->main_dma.dma_typeSize = E4_DMA_TYPE_SIZE ( + sizeof(mca_ptl_base_ack_header_t), + DMA_DataTypeByte, DMA_QueueWrite, 16); + qdma->main_dma.dma_vproc = destvp; + qdma->main_dma.dma_cookie = elan4_local_cookie ( + elan_ptl->queue->tx_cpool, + E4_COOKIE_TYPE_LOCAL_DMA, destvp); + qdma->main_dma.dma_srcAddr = elan4_main2elan(ctx, (void*)hdr); +#endif /* Make main memory coherent with IO domain (IA64) */ MEMBAR_VISIBLE (); @@ -1068,7 +1102,14 @@ mca_ptl_elan_update_desc (struct mca_ptl_elan_module_t *ptl) header->hdr_common.hdr_flags, header->hdr_common.hdr_size); - frag = (mca_ptl_elan_send_frag_t *)header->hdr_frag.hdr_src_ptr.pval; + /* FIXME: To handle other different types of headers + * and use a simplied way checking completion */ + if (header->hdr_common.hdr_type == MCA_PTL_HDR_TYPE_ACK) { + frag = ((mca_ptl_elan_ack_header_t*)header)->frag; + } else { + frag = (mca_ptl_elan_send_frag_t *) + header->hdr_frag.hdr_src_ptr.pval; + } basic = (ompi_ptl_elan_base_desc_t*)frag->desc; /* XXX: please reset additional chained event for put/get desc */ diff --git a/src/mca/ptl/elan/src/ptl_elan_priv.h b/src/mca/ptl/elan/src/ptl_elan_priv.h index db5f484aea..93003749df 100644 --- a/src/mca/ptl/elan/src/ptl_elan_priv.h +++ b/src/mca/ptl/elan/src/ptl_elan_priv.h @@ -53,7 +53,7 @@ #define PTL_ELAN_DEBUG_CHAIN (0x800) #define PTL_ELAN_DEBUG_FLAG \ -(PTL_ELAN_DEBUG_MAC|PTL_ELAN_DEBUG_SEND|PTL_ELAN_DEBUG_RECV|PTL_ELAN_DEBUG_PUT|PTL_ELAN_DEBUG_GET) +(PTL_ELAN_DEBUG_PUT|PTL_ELAN_DEBUG_GET) #define START_FUNC(flag) \ do { \ @@ -93,11 +93,11 @@ do { \ #define OMPI_PTL_ELAN_LOST_QSLOTS (1) #define OMPI_PTL_ELAN_MAX_QDESCS (128) -#define OMPI_PTL_ELAN_NUM_QDESCS (4) +#define OMPI_PTL_ELAN_NUM_QDESCS (2) #define OMPI_PTL_ELAN_QDMA_RETRY (16) #define OMPI_PTL_ELAN_MAX_PUTGET (32) -#define OMPI_PTL_ELAN_NUM_PUTGET (8) +#define OMPI_PTL_ELAN_NUM_PUTGET (1) #define OMPI_PTL_ELAN_MAX_PGDESC (8) #define OMPI_PTL_ELAN_FASTPATH (0x1) @@ -120,7 +120,7 @@ do { \ do { \ if (value == unexp) { \ ompi_output(output, \ - "[%s:%d] allocate received unexpect value \n", \ + "[%s:%d] alloc received unexpect value \n", \ __FILE__, __LINE__); \ return errno; \ } \ @@ -260,7 +260,7 @@ struct ompi_ptl_elan_putget_desc_t { /* 8 byte aligned */ volatile E4_uint64 chain_doneWord; /* 8 byte aligned */ - E4_Event32 *chain_event; /* E4_Event plus pad */ + E4_Event *chain_event; /* E4_Event plus pad */ E4_Addr *chain_buff; E4_Addr src_elan_addr;