diff --git a/src/mca/ptl/elan/src/ptl_elan.c b/src/mca/ptl/elan/src/ptl_elan.c index cda0fbdbdf..383bd52dd4 100644 --- a/src/mca/ptl/elan/src/ptl_elan.c +++ b/src/mca/ptl/elan/src/ptl_elan.c @@ -11,6 +11,7 @@ #include "mca/ptl/base/ptl_base_sendfrag.h" #include "mca/pml/base/pml_base_sendreq.h" #include "mca/pml/base/pml_base_recvreq.h" +#include "mca/pml/teg/src/pml_teg_proc.h" #include "mca/ptl/base/ptl_base_recvfrag.h" #include "mca/base/mca_base_module_exchange.h" #include "ptl_elan.h" @@ -170,9 +171,9 @@ mca_ptl_elan_req_init (struct mca_ptl_base_module_t *ptl, { mca_ptl_elan_send_frag_t *desc; - START_FUNC(); + START_FUNC(PTL_ELAN_DEBUG_NONE); - desc = mca_ptl_elan_alloc_send_desc(ptl, request, 0); + desc = mca_ptl_elan_alloc_send_desc(ptl, request, MCA_PTL_ELAN_DESC_QDMA); if (NULL == desc) { ompi_output(0, "[%s:%d] Unable to allocate an elan send descriptors \n", @@ -184,7 +185,7 @@ mca_ptl_elan_req_init (struct mca_ptl_base_module_t *ptl, } desc->desc->desc_status = MCA_PTL_ELAN_DESC_CACHED; - END_FUNC(); + END_FUNC(PTL_ELAN_DEBUG_NONE); return OMPI_SUCCESS; } @@ -242,9 +243,10 @@ mca_ptl_elan_isend (struct mca_ptl_base_module_t *ptl, * PML extract an request from PTL component and then use this * a request to ask for a fragment * Is it too deep across stacks to get a request and - * correspondingly multiple LOCKS to go through*/ + * correspondingly multiple LOCKS to go through + */ - START_FUNC(); + START_FUNC(PTL_ELAN_DEBUG_NONE); if (offset == 0) { /* The first fragment uses a cached desc */ desc = ((mca_ptl_elan_send_request_t*)sendreq)->req_frag; @@ -267,7 +269,7 @@ mca_ptl_elan_isend (struct mca_ptl_base_module_t *ptl, /* Update offset */ sendreq->req_offset += size; - END_FUNC(); + END_FUNC(PTL_ELAN_DEBUG_NONE); return rc; } @@ -286,12 +288,25 @@ mca_ptl_elan_put (struct mca_ptl_base_module_t *ptl, int rc = OMPI_SUCCESS; mca_ptl_elan_send_frag_t *desc; + + /* PML still utilize this interface the same as a send option. + * So we need to generate a QDMA to the remote side for completion + * notification */ + /* XXX: * Since the address passed down from PML does not provide * elan information, so there needs to be a change */ - START_FUNC(); + START_FUNC(PTL_ELAN_DEBUG_PUT); + + if (PTL_ELAN_DEBUG_FLAG & PTL_ELAN_DEBUG_PUT) { + char hostname[32]; gethostname(hostname, 32); + fprintf(stderr, "[%s:%s:%d] ptl %p ptl_peer %p req %p offset %d" + " size %d flags %d \n", + hostname, __FUNCTION__, __LINE__, + ptl, ptl_peer, sendreq, offset, size, flags); + } desc = mca_ptl_elan_alloc_send_desc(ptl, sendreq, MCA_PTL_ELAN_DESC_PUT); if (NULL == desc) { @@ -305,7 +320,7 @@ mca_ptl_elan_put (struct mca_ptl_base_module_t *ptl, sendreq, offset, &size, flags); /* Update all the sends until the put is done */ - END_FUNC(); + END_FUNC(PTL_ELAN_DEBUG_NONE); return rc; } @@ -331,7 +346,7 @@ mca_ptl_elan_get (struct mca_ptl_base_module_t *ptl, * elan information, so there needs to be a change */ - START_FUNC(); + START_FUNC(PTL_ELAN_DEBUG_NONE); desc = mca_ptl_elan_alloc_send_desc(ptl, sendreq, MCA_PTL_ELAN_DESC_GET); if (NULL == desc) { @@ -345,7 +360,7 @@ mca_ptl_elan_get (struct mca_ptl_base_module_t *ptl, sendreq, offset, &size, flags); /* Update all the sends until the put is done */ - END_FUNC(); + END_FUNC(PTL_ELAN_DEBUG_NONE); #endif return rc; } @@ -370,13 +385,34 @@ mca_ptl_elan_matched (mca_ptl_base_module_t * ptl, request = frag->frag_request; recv_frag = (mca_ptl_elan_recv_frag_t * ) frag; +#if 1 + /* XXX: If to change in PML and PTL/base + * Two places to setting the frag_peer after match + * teg_recvreq.c:157 or ptl_base_match.c:131 + */ + + /* Makeup for setting up peer information + for elan's connectionless nature */ + { + mca_pml_proc_t* proc; + mca_ptl_proc_t* ptl_proc; + + proc = mca_pml_teg_proc_lookup_remote(request->req_base.req_comm, + header->hdr_match.hdr_src); + + THREAD_SCOPED_LOCK(&proc->proc_lock, + (ptl_proc = mca_ptl_array_get_next(&proc->proc_ptl_first))); + frag->frag_base.frag_peer = ptl_proc->ptl_peer; + } +#endif + if (header->hdr_common.hdr_flags & MCA_PTL_FLAGS_ACK_MATCHED) #if 1 /* Basic ACK scheme following TCP cases */ { mca_ptl_elan_send_frag_t *desc; /* Get a frag desc and allocate a send desc */ - desc = mca_ptl_elan_alloc_send_desc(ptl, NULL, 0); + desc = mca_ptl_elan_alloc_send_desc(ptl, NULL, MCA_PTL_ELAN_DESC_QDMA); if (NULL == desc) { ompi_output(0, diff --git a/src/mca/ptl/elan/src/ptl_elan.h b/src/mca/ptl/elan/src/ptl_elan.h index a87c96ecfa..be85d4dafa 100644 --- a/src/mca/ptl/elan/src/ptl_elan.h +++ b/src/mca/ptl/elan/src/ptl_elan.h @@ -70,9 +70,7 @@ struct mca_ptl_elan_component_t { struct mca_ptl_elan_module_t **elan_ptl_modules; /**< array of available PTL modules */ struct mca_ptl_elan_proc_t *elan_local; ompi_mutex_t elan_lock; /**< lock for module state */ - ompi_list_t elan_procs; /**< elan proc's */ - ompi_list_t elan_send_frags; /**< outstanding send/put/get */ ompi_list_t elan_recv_frags; /**< outstanding recv's */ ompi_list_t elan_pending_acks; /**< recv's with ack to send */ diff --git a/src/mca/ptl/elan/src/ptl_elan_comm_init.c b/src/mca/ptl/elan/src/ptl_elan_comm_init.c index 1d7fed76e4..857774d294 100644 --- a/src/mca/ptl/elan/src/ptl_elan_comm_init.c +++ b/src/mca/ptl/elan/src/ptl_elan_comm_init.c @@ -47,7 +47,7 @@ ompi_init_elan_queue_events (mca_ptl_elan_module_t * ptl, ompi_ptl_elan_qdma_desc_t *desc; E4_Event *elan_ptr; - START_FUNC(); + START_FUNC(PTL_ELAN_DEBUG_NONE); rail = (RAIL *) ptl->ptl_elan_rail; ctx = (ELAN4_CTX *) ptl->ptl_elan_ctx; @@ -113,12 +113,13 @@ ompi_init_elan_queue_events (mca_ptl_elan_module_t * ptl, } flist->fl_num_allocated += flist->fl_num_per_alloc; - END_FUNC(); + END_FUNC(PTL_ELAN_DEBUG_NONE); return OMPI_SUCCESS; } static void mca_ptl_elan_putget_desc_contruct ( + mca_ptl_elan_send_frag_t *frag, ELAN4_CTX *ctx, ompi_ptl_elan_putget_desc_t *desc, EVENT *elan_event, @@ -133,6 +134,19 @@ mca_ptl_elan_putget_desc_contruct ( desc->main_dma.dma_cookie = 0; desc->main_dma.dma_vproc = 0; + desc->elan_event = elan_event; + desc->chain_event= (E4_Event32 *) + ((char *)elan_event + sizeof (E4_Event)); + desc->chain_buff = (E4_Addr *) + ((char *)elan_event + 2*sizeof (E4_Event)); + + if (PTL_ELAN_DEBUG_PUT & PTL_ELAN_DEBUG_FLAG) { + char hostname[32]; gethostname(hostname, 32); + fprintf(stderr, "[%s:%s:%d] frag %p desc %p chain_buff %p chain_event %p \n", + hostname, __FUNCTION__, __LINE__, + frag, desc, desc->chain_buff, desc->chain_event); + } + /* Remember all the address needs to be converted * before assigning to DMA descritpor */ desc->main_dma.dma_srcAddr = src_elan4_addr; @@ -152,25 +166,23 @@ mca_ptl_elan_putget_desc_contruct ( mb(); } -#define OMPI_ELAN_DESC_LIST(ctx, flist, frag, dp, eptr, msize, esize, local)\ +#define OMPI_ELAN_PUTGET_GROW(ctx, flist, frag, dp, eptr, msize, esize, local)\ do { \ int i; \ for (i = 0; i < flist->fl_num_per_alloc; i++) { \ ompi_list_item_t *item; \ \ - dp->elan_event = eptr; \ frag->desc = (ompi_ptl_elan_base_desc_t *)dp; \ \ /* Initialize some of the dma structures */ \ - mca_ptl_elan_putget_desc_contruct (ctx, dp, \ + mca_ptl_elan_putget_desc_contruct (frag, ctx, dp, \ eptr, 0, 0, local); \ \ item = (ompi_list_item_t *) frag; \ ompi_list_append (&flist->super, item); \ \ /* Progress to the next element */ \ - dp= (ompi_ptl_elan_putget_desc_t *) \ - ((char *)dp + msize); \ + dp= (ompi_ptl_elan_putget_desc_t *) ((char *)dp + msize); \ eptr = (E4_Event *) ((char *) eptr + esize); \ frag ++; \ } \ @@ -195,12 +207,14 @@ ompi_ptl_elan_init_putget_ctrl (mca_ptl_elan_module_t * ptl, ompi_free_list_t *put_list, *get_list; ompi_ptl_elan_putget_desc_t *put_desc, *get_desc; - START_FUNC(); + START_FUNC(PTL_ELAN_DEBUG_NONE); main_align = MAX (sizeof (void *), ELAN_ALIGN); elan_align = MAX (sizeof (int *), ELAN_BLOCK_ALIGN); main_size = ALIGNUP(sizeof(ompi_ptl_elan_putget_desc_t), main_align); - elan_size = ALIGNUP(sizeof(E4_Event), elan_align); + + /* Contain elan_event, chain_event and a chain_buff */ + elan_size = ALIGNUP((sizeof(E4_Event32)*2 + ELAN_BLOCK_SIZE), elan_align); rail = (RAIL *) ptl->ptl_elan_rail; ctx = (ELAN4_CTX *) ptl->ptl_elan_ctx; @@ -224,7 +238,7 @@ ompi_ptl_elan_init_putget_ctrl (mca_ptl_elan_module_t * ptl, put_desc = (ompi_ptl_elan_putget_desc_t *) elan4_allocMain ( rail->r_alloc, main_align, main_size * inc_num); OMPI_PTL_ELAN_CHECK_UNEX (put_desc, NULL, OMPI_ERROR, 0); - OMPI_ELAN_DESC_LIST(ctx, put_list, frag, put_desc, elan_ptr, + OMPI_ELAN_PUTGET_GROW(ctx, put_list, frag, put_desc, elan_ptr, main_size, elan_size, 1); OBJ_CONSTRUCT (&putget->get_desc, ompi_list_t); @@ -245,10 +259,10 @@ ompi_ptl_elan_init_putget_ctrl (mca_ptl_elan_module_t * ptl, get_desc = (ompi_ptl_elan_putget_desc_t *) elan4_allocMain ( rail->r_alloc, main_align, main_size * inc_num); OMPI_PTL_ELAN_CHECK_UNEX (get_desc, NULL, OMPI_ERROR, 0); - OMPI_ELAN_DESC_LIST(ctx, get_list, frag, get_desc, elan_ptr, + OMPI_ELAN_PUTGET_GROW(ctx, get_list, frag, get_desc, elan_ptr, main_size, elan_size, 0); - END_FUNC(); + END_FUNC(PTL_ELAN_DEBUG_NONE); return OMPI_SUCCESS; } @@ -272,7 +286,7 @@ ompi_init_elan_qdma (mca_ptl_elan_component_t * emp, ELAN4_CTX *ctx; struct mca_ptl_elan_module_t *ptl; - START_FUNC(); + START_FUNC(PTL_ELAN_DEBUG_NONE); /* Init the Transmit Queue structure */ for (i = 0; i < num_rails; i++) { @@ -384,7 +398,7 @@ ompi_init_elan_qdma (mca_ptl_elan_component_t * emp, OBJ_CONSTRUCT (&queue->rx_lock, ompi_mutex_t); } - END_FUNC(); + END_FUNC(PTL_ELAN_DEBUG_NONE); return (OMPI_SUCCESS); } @@ -399,7 +413,7 @@ ompi_init_elan_putget (mca_ptl_elan_component_t * emp, ELAN4_CTX *ctx; struct mca_ptl_elan_module_t *ptl; - START_FUNC(); + START_FUNC(PTL_ELAN_DEBUG_NONE); /* Init the Transmit Queue structure */ for (i = 0; i < num_rails; i++) { @@ -444,9 +458,10 @@ ompi_init_elan_putget (mca_ptl_elan_component_t * emp, CQ_SetEventEnableBit, cqp); OMPI_PTL_ELAN_CHECK_UNEX (putget->get_cmdq, NULL, OMPI_ERROR, 0); - /* Simple report on the command queue parameters */ - elan4_disp_cmdq_params (ptl->putget->put_cmdq); - elan4_disp_cmdq_params (ptl->putget->get_cmdq); + /* XXX: With elan4_disp_cmdq_params(), + * put_cmdq->cmd_flush == elan4_flush_cmdq_reorder + * get_cmdq->cmd_flush == elan4_flush_cmdq_reorder + */ putget->pg_cmdStream = malloc(PAGESIZE); OMPI_PTL_ELAN_CHECK_UNEX (putget->pg_cmdStream, NULL, OMPI_ERROR, 0); @@ -462,7 +477,7 @@ ompi_init_elan_putget (mca_ptl_elan_component_t * emp, ompi_ptl_elan_init_putget_ctrl (ptl, rail, putget, 0, 16, 32); } - END_FUNC(); + END_FUNC(PTL_ELAN_DEBUG_NONE); return (OMPI_SUCCESS); } diff --git a/src/mca/ptl/elan/src/ptl_elan_component.c b/src/mca/ptl/elan/src/ptl_elan_component.c index dc92c7f18c..7923dd905f 100644 --- a/src/mca/ptl/elan/src/ptl_elan_component.c +++ b/src/mca/ptl/elan/src/ptl_elan_component.c @@ -26,11 +26,6 @@ #include "ptl_elan_frag.h" #include "ptl_elan_priv.h" -#ifdef CHECK_ELAN -#undef CHECK_ELAN -#define CHECK_ELAN 0 -#endif - extern ompi_proc_t *ompi_proc_local_proc; mca_ptl_elan_component_t mca_ptl_elan_component = { @@ -99,23 +94,25 @@ static int mca_ptl_elan_component_register (mca_ptl_elan_component_t *emp) int mca_ptl_elan_component_open (void) { - /* register super module parameters */ + /* FIXME: register the default super module parameters, + * Some sanity checking is needed to ensure that user + * would not provide unrealistic parameters.*/ mca_ptl_elan_module.super.ptl_exclusivity = mca_ptl_elan_param_register_int ("exclusivity", 0); mca_ptl_elan_module.super.ptl_first_frag_size = mca_ptl_elan_param_register_int ("first_frag_size", - (2048 - sizeof(mca_ptl_base_header_t))/*magic*/); + (PTL_ELAN_INPUT_QUEUE_MAX - sizeof(mca_ptl_base_header_t))); mca_ptl_elan_module.super.ptl_min_frag_size = mca_ptl_elan_param_register_int ("min_frag_size", - (2048 - sizeof(mca_ptl_base_header_t))/*magic*/); + (PTL_ELAN_INPUT_QUEUE_MAX - sizeof(mca_ptl_base_header_t))); mca_ptl_elan_module.super.ptl_max_frag_size = - mca_ptl_elan_param_register_int ("max_frag_size", 2<<30); + mca_ptl_elan_param_register_int ("max_frag_size", 2<<31); /* register ELAN module parameters */ elan_mp->elan_free_list_num = mca_ptl_elan_param_register_int ("free_list_num", 32); elan_mp->elan_free_list_max = - mca_ptl_elan_param_register_int ("free_list_max", 1024); + mca_ptl_elan_param_register_int ("free_list_max", 128); elan_mp->elan_free_list_inc = mca_ptl_elan_param_register_int ("free_list_inc", 32); @@ -129,7 +126,6 @@ mca_ptl_elan_component_open (void) OBJ_CONSTRUCT (&elan_mp->elan_pending_acks, ompi_list_t); OBJ_CONSTRUCT (&elan_mp->elan_recv_frags, ompi_list_t); OBJ_CONSTRUCT (&elan_mp->elan_send_frags, ompi_list_t); - OBJ_CONSTRUCT (&elan_mp->elan_recv_frags_free, ompi_free_list_t); /* initialize other objects */ @@ -203,9 +199,9 @@ mca_ptl_elan_component_init (int *num_ptl_modules, *num_ptl_modules = 0; - START_FUNC(); + START_FUNC(PTL_ELAN_DEBUG_NONE); - if (CHECK_ELAN) { + if (PTL_ELAN_DEBUG_FLAG & PTL_ELAN_DEBUG_INIT) { char hostname[32]; gethostname(hostname, 32); fprintf(stderr, "[%s:%s:%d] debugging ...\n", hostname, __FUNCTION__, __LINE__); @@ -216,19 +212,6 @@ mca_ptl_elan_component_init (int *num_ptl_modules, *allow_multi_user_threads = true; *have_hidden_threads = OMPI_HAVE_THREADS; - if (CHECK_ELAN) { - char hostname[32]; gethostname(hostname, 32); - fprintf(stderr, "[%s:%s:%d] before list init...\n", - hostname, __FUNCTION__, __LINE__); - } - - if (CHECK_ELAN) { - char hostname[32]; gethostname(hostname, 32); - fprintf(stderr, "[%s:%s:%d] after list init...\n", - hostname, __FUNCTION__, __LINE__); - } - - ompi_free_list_init (&(elan_mp->elan_recv_frags_free), sizeof (mca_ptl_elan_recv_frag_t), OBJ_CLASS (mca_ptl_elan_recv_frag_t), @@ -236,12 +219,6 @@ mca_ptl_elan_component_init (int *num_ptl_modules, elan_mp->elan_free_list_max, elan_mp->elan_free_list_inc, NULL); - if (CHECK_ELAN) { - char hostname[32]; gethostname(hostname, 32); - fprintf(stderr, "[%s:%s:%d] after list init...\n", - hostname, __FUNCTION__, __LINE__); - } - /* open basic elan device */ if (OMPI_SUCCESS != ompi_mca_ptl_elan_init(&mca_ptl_elan_component)) { ompi_output(0, @@ -250,7 +227,8 @@ mca_ptl_elan_component_init (int *num_ptl_modules, return NULL; } - if (OMPI_SUCCESS != mca_ptl_elan_component_register(&mca_ptl_elan_component)) { + if (OMPI_SUCCESS != + mca_ptl_elan_component_register(&mca_ptl_elan_component)) { ompi_output(0, "[%s:%d] error in registering with Runtime/OOB \n", __FILE__, __LINE__); @@ -271,7 +249,7 @@ mca_ptl_elan_component_init (int *num_ptl_modules, *num_ptl_modules = elan_mp->elan_num_ptl_modules; mca_ptl_elan_component_initialized = true; - END_FUNC(); + END_FUNC(PTL_ELAN_DEBUG_NONE); return ptls; } @@ -296,8 +274,8 @@ static int times = 0; int mca_ptl_elan_component_progress (mca_ptl_tstamp_t tstamp) { - START_FUNC(); - /*if (times <= -1000)*/ + START_FUNC(PTL_ELAN_DEBUG_NONE); +#if 0 if (times <= -1) { char hostname[32]; gethostname(hostname, 32); @@ -307,8 +285,9 @@ mca_ptl_elan_component_progress (mca_ptl_tstamp_t tstamp) } else { times ++; } +#endif mca_ptl_elan_drain_recv(elan_mp); mca_ptl_elan_update_desc(elan_mp); - END_FUNC(); + END_FUNC(PTL_ELAN_DEBUG_NONE); return OMPI_SUCCESS; } diff --git a/src/mca/ptl/elan/src/ptl_elan_frag.c b/src/mca/ptl/elan/src/ptl_elan_frag.c index 829d5e6757..cc576de216 100644 --- a/src/mca/ptl/elan/src/ptl_elan_frag.c +++ b/src/mca/ptl/elan/src/ptl_elan_frag.c @@ -90,14 +90,14 @@ mca_ptl_elan_alloc_send_desc (struct mca_ptl_base_module_t *ptl_ptr, ompi_list_item_t *item; mca_ptl_elan_send_frag_t *desc; - START_FUNC(); + START_FUNC(PTL_ELAN_DEBUG_NONE); /* For now, bind to queue DMA directly */ - if (MCA_PTL_ELAN_DESC_QDMA) { + if (MCA_PTL_ELAN_DESC_QDMA == desc_type) { flist = &(((mca_ptl_elan_module_t *) ptl_ptr)->queue)->tx_desc_free; - } else if (MCA_PTL_ELAN_DESC_PUT) { + } else if (MCA_PTL_ELAN_DESC_PUT == desc_type) { flist = &(((mca_ptl_elan_module_t *) ptl_ptr)->putget)->put_desc_free; - } else if (MCA_PTL_ELAN_DESC_GET) { + } else if (MCA_PTL_ELAN_DESC_GET == desc_type) { /*struct mca_ptl_elan_peer_t *peer;*/ flist = &(((mca_ptl_elan_module_t *) ptl_ptr)->putget)->get_desc_free; } else { @@ -143,7 +143,7 @@ mca_ptl_elan_alloc_send_desc (struct mca_ptl_base_module_t *ptl_ptr, desc->desc->desc_type = desc_type; - END_FUNC(); + END_FUNC(PTL_ELAN_DEBUG_NONE); return desc; } @@ -167,6 +167,16 @@ mca_ptl_elan_send_desc_done ( header = &desc->frag_base.frag_header; queue = ptl->queue; + if (PTL_ELAN_DEBUG_FLAG & PTL_ELAN_DEBUG_SEND) { + char hostname[32]; + gethostname(hostname, 32); + + fprintf(stderr, "req %p flag %d, length %d\n", + req, + header->hdr_common.hdr_flags, + header->hdr_frag.hdr_frag_length); + } + if(NULL == req) { /* An ack descriptor */ OMPI_FREE_LIST_RETURN (&queue->tx_desc_free, (ompi_list_item_t *) desc); diff --git a/src/mca/ptl/elan/src/ptl_elan_init.c b/src/mca/ptl/elan/src/ptl_elan_init.c index 078ca8f4e9..5692432628 100644 --- a/src/mca/ptl/elan/src/ptl_elan_init.c +++ b/src/mca/ptl/elan/src/ptl_elan_init.c @@ -24,7 +24,7 @@ ompi_mca_ptl_elan_setup (mca_ptl_elan_state_t * ems) mca_ptl_elan_component_t *emp; int rail_count; - START_FUNC(); + START_FUNC(PTL_ELAN_DEBUG_NONE); rail_count = ems->elan_nrails; emp = ems->elan_component; @@ -94,7 +94,7 @@ ompi_mca_ptl_elan_setup (mca_ptl_elan_state_t * ems) return OMPI_ERROR; } - END_FUNC(); + END_FUNC(PTL_ELAN_DEBUG_NONE); return (OMPI_SUCCESS); } @@ -248,12 +248,6 @@ ompi_module_elan_close_procs (mca_ptl_elan_component_t * emp, /* TODO: find the ones that are still there and free them */ } -static void -ompi_init_elan_queue_events (ompi_ptl_elan_queue_ctrl_t * queue) -{ - -} - ELAN_SLEEP * ompi_init_elan_sleepdesc (mca_ptl_elan_state_t * ems, RAIL * rail) @@ -316,7 +310,7 @@ ompi_mca_ptl_elan_init (mca_ptl_elan_component_t * emp) mca_ptl_elan_state_t *ems; - START_FUNC(); + START_FUNC(PTL_ELAN_DEBUG_NONE); ems = &mca_ptl_elan_global_state; @@ -534,7 +528,7 @@ ompi_mca_ptl_elan_init (mca_ptl_elan_component_t * emp) return OMPI_ERROR; } - END_FUNC(); + END_FUNC(PTL_ELAN_DEBUG_NONE); return (OMPI_SUCCESS); } diff --git a/src/mca/ptl/elan/src/ptl_elan_priv.c b/src/mca/ptl/elan/src/ptl_elan_priv.c index 78e3bbb0f7..fd40075613 100644 --- a/src/mca/ptl/elan/src/ptl_elan_priv.c +++ b/src/mca/ptl/elan/src/ptl_elan_priv.c @@ -22,11 +22,12 @@ mca_ptl_elan_data_frag (struct mca_ptl_elan_module_t *ptl, /* Allocate a recv frag descriptor */ mca_ptl_elan_recv_frag_t *recv_frag; ompi_list_item_t *item; - /*mca_pml_base_recv_request_t *request;*/ bool matched; int rc = OMPI_SUCCESS; + START_FUNC(PTL_ELAN_DEBUG_RECV); + OMPI_FREE_LIST_GET (&mca_ptl_elan_component.elan_recv_frags_free, item, rc); @@ -42,10 +43,22 @@ mca_ptl_elan_data_frag (struct mca_ptl_elan_module_t *ptl, recv_frag = (mca_ptl_elan_recv_frag_t *) item; recv_frag->frag_recv.frag_base.frag_owner = (mca_ptl_base_module_t *) ptl; - /* XXX: - * Since elan is not connection oriented, - * No information about which peer until checking the header + * Another problem caused by TCP oriented PML. + * a) Since elan is not connection oriented, + * No information about which peer until checking the header + * Somewhere after the frag is matched, this peer information needs + * to be filled in so that ACK can be sent out. + * + * b) Possibly, another drawback of hooking the ack to the particular + * recv fragment. If the ack fragment is not hooked this way, + * PML will provide the peer information when the ack is requested. + * + * c) What if the recv request specifies MPI_ANY_SOURCE, then + * for the handshaking to complete, peer should be fixed the + * handshaking. Then in this case, PML needs information from + * PTL to know about which peer this data is from. + * So PTL has to provide the peer information to PML. */ recv_frag->frag_recv.frag_base.frag_peer = NULL; recv_frag->frag_recv.frag_request = NULL; @@ -125,7 +138,7 @@ mca_ptl_elan_init_qdma_desc (struct mca_ptl_elan_send_frag_t *frag, mca_ptl_base_header_t *hdr; struct ompi_ptl_elan_qdma_desc_t * desc; - START_FUNC(); + START_FUNC(PTL_ELAN_DEBUG_NONE); desc = (ompi_ptl_elan_qdma_desc_t *)frag->desc; destvp = ptl_peer->peer_vp; @@ -140,7 +153,8 @@ mca_ptl_elan_init_qdma_desc (struct mca_ptl_elan_send_frag_t *frag, hdr->hdr_frag.hdr_frag_offset = offset; hdr->hdr_frag.hdr_frag_seq = 0; hdr->hdr_frag.hdr_src_ptr.lval = 0; - hdr->hdr_frag.hdr_src_ptr.pval = desc; + /* Frag descriptor, so that incoming ack will locate it */ + hdr->hdr_frag.hdr_src_ptr.pval = frag; hdr->hdr_frag.hdr_dst_ptr.lval = 0; hdr->hdr_match.hdr_contextid = pml_req->req_base.req_comm->c_contextid; @@ -157,11 +171,19 @@ mca_ptl_elan_init_qdma_desc (struct mca_ptl_elan_send_frag_t *frag, hdr->hdr_frag.hdr_frag_offset = offset; hdr->hdr_frag.hdr_frag_seq = 0; hdr->hdr_frag.hdr_src_ptr.lval = 0; - hdr->hdr_frag.hdr_src_ptr.pval = desc; + hdr->hdr_frag.hdr_src_ptr.pval = frag; /* Frag descriptor */ hdr->hdr_frag.hdr_dst_ptr = pml_req->req_peer_match; header_length = sizeof (mca_ptl_base_frag_header_t); } + if (PTL_ELAN_DEBUG_FLAG & PTL_ELAN_DEBUG_SEND) { + char hostname[32]; gethostname(hostname, 32); + fprintf(stderr, "[%s:%s:%d] frag %p req %p \n", + hostname, __FUNCTION__, __LINE__, + hdr->hdr_frag.hdr_src_ptr.pval, + hdr->hdr_frag.hdr_dst_ptr.pval); + } + /* initialize convertor */ if(size_in > 0) { struct iovec iov; @@ -200,6 +222,11 @@ mca_ptl_elan_init_qdma_desc (struct mca_ptl_elan_send_frag_t *frag, *size = size_out; hdr->hdr_frag.hdr_frag_length = size_out; + /* TODO: + * For now just save the information to the provided header + * Later will use the inline header to report the progress */ + frag->frag_base.frag_header = *hdr; + desc->main_dma.dma_srcAddr = MAIN2ELAN (desc->ptl->ptl_elan_ctx, &desc->buff[0]); @@ -213,7 +240,7 @@ mca_ptl_elan_init_qdma_desc (struct mca_ptl_elan_send_frag_t *frag, elan4_local_cookie (ptl->queue->tx_cpool, E4_COOKIE_TYPE_LOCAL_DMA, destvp); - if (CHECK_ELAN) { + if (PTL_ELAN_DEBUG_FLAG & PTL_ELAN_DEBUG_SEND) { char hostname[32]; gethostname(hostname, 32); @@ -227,7 +254,7 @@ mca_ptl_elan_init_qdma_desc (struct mca_ptl_elan_send_frag_t *frag, /* Make main memory coherent with IO domain (IA64) */ MEMBAR_VISIBLE (); - END_FUNC(); + END_FUNC(PTL_ELAN_DEBUG_NONE); } static void @@ -249,7 +276,15 @@ mca_ptl_elan_init_putget_desc (struct mca_ptl_elan_send_frag_t *frag, mca_ptl_base_header_t *hdr; - START_FUNC(); + START_FUNC((PTL_ELAN_DEBUG_PUT | PTL_ELAN_DEBUG_GET)); + + if (PTL_ELAN_DEBUG_FLAG & (PTL_ELAN_DEBUG_PUT|PTL_ELAN_DEBUG_GET)) { + char hostname[32]; gethostname(hostname, 32); + fprintf(stderr, "[%s:%s:%d] frag %p ptl %p ptl_peer %p req %p " + "offset %d size %d flags %d \n", + hostname, __FUNCTION__, __LINE__, + frag, ptl, ptl_peer, pml_req, offset, *size, flags); + } hdr = &frag->frag_base.frag_header; desc = (ompi_ptl_elan_putget_desc_t *)frag->desc; @@ -316,9 +351,6 @@ mca_ptl_elan_init_putget_desc (struct mca_ptl_elan_send_frag_t *frag, * broadcast, etc */ flags = 0; -#define MCA_PTL_ELAN_USE_CHAINED_DMA 0 - -#if defined(MCA_PTL_ELAN_USE_CHAINED_DMA) /* Setup a chained DMA * FIXME: remember */ @@ -333,6 +365,7 @@ mca_ptl_elan_init_putget_desc (struct mca_ptl_elan_send_frag_t *frag, elan4_main2elan (ctx, (void *) hdr); desc->chain_dma.dma_dstAddr = 0x0ULL; desc->chain_dma.dma_srcEvent = SDRAM2ELAN (ctx, desc->elan_event); + /* causes the inputter to redirect the dma to the inputq */ desc->chain_dma.dma_dstEvent = elan4_main2elan (ctx, (void *) ptl->queue->input); @@ -346,9 +379,17 @@ mca_ptl_elan_init_putget_desc (struct mca_ptl_elan_send_frag_t *frag, desc->chain_dma.dma_typeSize |= RUN_DMA_CMD; desc->chain_dma.dma_pad = NOP_CMD; + + if (PTL_ELAN_DEBUG_PUT & PTL_ELAN_DEBUG_FLAG) { + char hostname[32]; gethostname(hostname, 32); + fprintf(stderr, "[%s:%s:%d] desc %p chain_buff %p chain_event %p \n", + hostname, __FUNCTION__, __LINE__, + desc, desc->chain_buff, desc->chain_event); + } /* Copy down the chain dma to the chain buffer in elan sdram */ memcpy ((void *)desc->chain_buff, (void *)&desc->chain_dma, sizeof (E4_DMA64)); + desc->chain_event->ev_CountAndType = E4_EVENT_INIT_VALUE(-32, E4_EVENT_COPY, E4_EVENT_DTYPE_LONG, 8); desc->chain_event->ev_Params[0] = elan4_main2elan (ctx, @@ -359,7 +400,6 @@ mca_ptl_elan_init_putget_desc (struct mca_ptl_elan_send_frag_t *frag, * Allocate space from command queues hanged off the CTX. */ desc->chain_event->ev_Params[1] = elan4_alloccq_space (ctx, 8, CQ_Size8K); -#endif desc->main_dma.dma_srcAddr = desc->src_elan_addr; desc->main_dma.dma_dstAddr = desc->dst_elan_addr; @@ -380,7 +420,7 @@ mca_ptl_elan_init_putget_desc (struct mca_ptl_elan_send_frag_t *frag, destvp); desc->main_dma.dma_vproc = destvp; - if (CHECK_ELAN) { + if (PTL_ELAN_DEBUG_FLAG & PTL_ELAN_DEBUG_PUT) { char hostname[32]; gethostname(hostname, 32); @@ -392,7 +432,7 @@ mca_ptl_elan_init_putget_desc (struct mca_ptl_elan_send_frag_t *frag, /* Make main memory coherent with IO domain (IA64) */ MEMBAR_VISIBLE (); - END_FUNC(); + END_FUNC(PTL_ELAN_DEBUG_NONE); } int @@ -407,7 +447,7 @@ mca_ptl_elan_start_desc (mca_ptl_elan_send_frag_t * desc, ptl = ptl_peer->peer_ptl; - START_FUNC(); + START_FUNC(PTL_ELAN_DEBUG_NONE); if (desc->desc->desc_type == MCA_PTL_ELAN_DESC_QDMA) { struct ompi_ptl_elan_qdma_desc_t *qdma; @@ -454,7 +494,7 @@ mca_ptl_elan_start_desc (mca_ptl_elan_send_frag_t * desc, desc->frag_progressed = 0; desc->frag_ack_pending = 0; /* this is ack for internal elan */ - END_FUNC(); + END_FUNC(PTL_ELAN_DEBUG_NONE); return OMPI_SUCCESS; } @@ -471,7 +511,7 @@ mca_ptl_elan_start_ack ( mca_ptl_base_module_t * ptl, int destvp; - START_FUNC(); + START_FUNC(PTL_ELAN_DEBUG_ACK); destvp = ((mca_ptl_elan_peer_t *) recv_frag->frag_recv.frag_base.frag_peer)->peer_vp; @@ -488,14 +528,34 @@ mca_ptl_elan_start_ack ( mca_ptl_base_module_t * ptl, hdr->hdr_common.hdr_flags = 0; hdr->hdr_common.hdr_size = sizeof(mca_ptl_base_ack_header_t); + /* Remote send fragment descriptor */ hdr->hdr_ack.hdr_src_ptr = recv_frag->frag_recv.frag_base.frag_header.hdr_frag.hdr_src_ptr; + + /* Matched request from recv side */ hdr->hdr_ack.hdr_dst_match.lval = 0; hdr->hdr_ack.hdr_dst_match.pval = request; hdr->hdr_ack.hdr_dst_addr.lval = 0; + + /* FIXME: this needs to be some offsete from the base addr */ hdr->hdr_ack.hdr_dst_addr.pval = request->req_base.req_addr; - hdr->hdr_ack.hdr_dst_size = request->req_bytes_packed; - hdr->hdr_frag.hdr_frag_length = sizeof(mca_ptl_base_ack_header_t); + + /* FIXME: posted buffer size is the leftover */ + hdr->hdr_ack.hdr_dst_size = + request->req_bytes_packed - request->req_bytes_received; + + /* XXX: No need to set the fragment size */ + /*hdr->hdr_common.hdr_frag_length = sizeof(mca_ptl_base_ack_header_t);*/ + + if (PTL_ELAN_DEBUG_FLAG & PTL_ELAN_DEBUG_ACK) { + char hostname[32]; gethostname(hostname, 32); + fprintf(stderr, "[%s:%s:%d] frag %p req %p buffer %p size %d \n", + hostname, __FUNCTION__, __LINE__, + hdr->hdr_ack.hdr_src_ptr.pval, + hdr->hdr_ack.hdr_dst_match.pval, + hdr->hdr_ack.hdr_dst_addr.pval, + hdr->hdr_ack.hdr_dst_size); + } /* Filling up QDMA descriptor */ qdma->main_dma.dma_srcAddr = elan4_main2elan( @@ -530,61 +590,10 @@ mca_ptl_elan_start_ack ( mca_ptl_base_module_t * ptl, desc->frag_progressed = 0; desc->desc->desc_status = MCA_PTL_ELAN_DESC_LOCAL; - END_FUNC(); + END_FUNC(PTL_ELAN_DEBUG_NONE); return OMPI_SUCCESS; } -#if 0 -int -mca_ptl_elan_start_ack (mca_ptl_elan_send_frag_t * desc, - struct mca_ptl_elan_peer_t *ptl_peer, - struct mca_pml_base_send_request_t *sendreq, - size_t offset, - size_t *size, - int flags) -{ - mca_ptl_elan_module_t *ptl; - - START_FUNC(); - - if (desc->desc->desc_type == MCA_PTL_ELAN_DESC_QDMA) { - struct ompi_ptl_elan_qdma_desc_t *qdma; - - qdma = (ompi_ptl_elan_qdma_desc_t *)desc->desc; - ptl = qdma->ptl; - - mca_ptl_elan_init_qdma_desc (qdma, ptl, ptl_peer, sendreq, - offset, size, flags); - - elan4_run_dma_cmd (ptl->queue->tx_cmdq, (DMA *) & qdma->main_dma); - - /*ptl->queue->tx_cmdq->cmdq_flush */ - elan4_flush_cmdq_reorder (ptl->queue->tx_cmdq); - - /* Insert desc into the list of outstanding DMA's */ - ompi_list_append (&ptl->queue->tx_desc, (ompi_list_item_t *) desc); - - } else { - ompi_output (0, - "Other types of DMA are not supported right now \n"); - return OMPI_ERROR; - } - - /*mca_ptl_base_frag_t frag_base; */ - - /* fragment state */ - desc->frag_base.frag_owner = &ptl_peer->peer_ptl->super; - desc->frag_base.frag_peer = ptl_peer; - desc->frag_base.frag_addr = NULL; - desc->frag_base.frag_size = *size; - desc->frag_progressed = 0; - desc->frag_ack_pending = 0; /* this is ack for internal elan */ - - END_FUNC(); - return OMPI_SUCCESS; -} -#endif - int mca_ptl_elan_drain_recv (mca_ptl_elan_component_t * emp) { @@ -597,7 +606,7 @@ mca_ptl_elan_drain_recv (mca_ptl_elan_component_t * emp) int i; int rc; - START_FUNC(); + START_FUNC(PTL_ELAN_DEBUG_NONE); num_ptl_modules = emp->elan_num_ptl_modules; /* Iterate over all the PTL input Queues */ @@ -622,7 +631,7 @@ mca_ptl_elan_drain_recv (mca_ptl_elan_component_t * emp) header = (mca_ptl_base_header_t *) rxq->qr_fptr; - if (CHECK_ELAN) { + if (PTL_ELAN_DEBUG_FLAG & PTL_ELAN_DEBUG_RECV) { char hostname[32]; gethostname(hostname, 32); @@ -688,7 +697,7 @@ mca_ptl_elan_drain_recv (mca_ptl_elan_component_t * emp) OMPI_UNLOCK (&queue->rx_lock); } - END_FUNC(); + END_FUNC(PTL_ELAN_DEBUG_NONE); return OMPI_SUCCESS; } @@ -704,7 +713,7 @@ mca_ptl_elan_update_desc (mca_ptl_elan_component_t * emp) int i; int rc = 0; - START_FUNC(); + START_FUNC(PTL_ELAN_DEBUG_NONE); num_ptl_modules = emp->elan_num_ptl_modules; @@ -737,7 +746,7 @@ mca_ptl_elan_update_desc (mca_ptl_elan_component_t * emp) req = (mca_ptl_elan_send_request_t *)qdma->req; header = (mca_ptl_base_header_t *)&qdma->buff[0]; - if (CHECK_ELAN) { + if (PTL_ELAN_DEBUG_FLAG & PTL_ELAN_DEBUG_SEND) { char hostname[32]; gethostname(hostname, 32); @@ -762,6 +771,6 @@ mca_ptl_elan_update_desc (mca_ptl_elan_component_t * emp) } /* end of the while loop */ } /* end of the for loop */ - END_FUNC(); + END_FUNC(PTL_ELAN_DEBUG_NONE); return OMPI_SUCCESS; } diff --git a/src/mca/ptl/elan/src/ptl_elan_priv.h b/src/mca/ptl/elan/src/ptl_elan_priv.h index 3df8602977..ec19fb4941 100644 --- a/src/mca/ptl/elan/src/ptl_elan_priv.h +++ b/src/mca/ptl/elan/src/ptl_elan_priv.h @@ -38,6 +38,26 @@ #include "init_sys.h" #include "elan4/events.h" +#define PTL_ELAN_DEBUG_NONE (0x000) +#define PTL_ELAN_DEBUG_INIT (0x001) +#define PTL_ELAN_DEBUG_FIN (0x002) +#define PTL_ELAN_DEBUG_QDESC (0x004) +#define PTL_ELAN_DEBUG_RDESC (0x008) + +#define PTL_ELAN_DEBUG_SEND (0x010) +#define PTL_ELAN_DEBUG_RECV (0x020) +#define PTL_ELAN_DEBUG_ACK (0x040) +#define PTL_ELAN_DEBUG_PROG (0x080) + +#define PTL_ELAN_DEBUG_QDMA (0x100) +#define PTL_ELAN_DEBUG_PUT (0x200) +#define PTL_ELAN_DEBUG_GET (0x400) +#define PTL_ELAN_DEBUG_CHAIN (0x800) + +/* For now only debug send's */ +#define PTL_ELAN_DEBUG_FLAG (PTL_ELAN_DEBUG_NONE \ + | PTL_ELAN_DEBUG_SEND | PTL_ELAN_DEBUG_PUT) + #define OMPI_PTL_ELAN_CHECK_UNEX(value, unexp, errno, output) \ do { \ if (value == unexp) { \ @@ -48,29 +68,25 @@ } \ } while (0) -#define CHECK_ELAN 1 - -#if CHECK_ELAN && 0 -#define START_FUNC() \ +#define START_FUNC(flag) \ do { \ - char hostname[32]; gethostname(hostname, 32); \ - fprintf(stderr, "[%s:%s:%d] Entering ...\n", \ - hostname, __FUNCTION__, __LINE__); \ - } while (0); + if (PTL_ELAN_DEBUG_FLAG & flag) { \ + char hostname[32]; gethostname(hostname, 32); \ + fprintf(stderr, "[%s:%s:%d] Entering ...\n", \ + hostname, __FUNCTION__, __LINE__); \ + } \ + } while (0) -#define END_FUNC() \ +#define END_FUNC(flag) \ do { \ - char hostname[32]; gethostname(hostname, 32); \ - fprintf(stderr, "[%s:%s:%d] Completes ...\n", \ - hostname, __FUNCTION__, __LINE__); \ - } while (0); + if (PTL_ELAN_DEBUG_FLAG & flag) { \ + char hostname[32]; gethostname(hostname, 32); \ + fprintf(stderr, "[%s:%s:%d] Completes ...\n", \ + hostname, __FUNCTION__, __LINE__); \ + } \ + } while (0) -#else - -#define START_FUNC() -#define END_FUNC() - -#endif +#define PTL_ELAN_INPUT_QUEUE_MAX (2048) enum { /* the first four bits for type */ diff --git a/src/mca/ptl/elan/tests/Makefile.am b/src/mca/ptl/elan/tests/Makefile.am index 03692b11b6..aa478f23bf 100644 --- a/src/mca/ptl/elan/tests/Makefile.am +++ b/src/mca/ptl/elan/tests/Makefile.am @@ -12,7 +12,7 @@ AM_CPPFLAGS = -I$(top_ompi_builddir)/src/include \ LDFLAGS += -L$(prefix)/lib -L../src/.libs -EXECS = qsnet_init qsnet_qdma qsnet_rdma mpitest +EXECS = qsnet_init qsnet_qdma qsnet_rdma mpitest lat units: $(EXECS) @@ -28,5 +28,8 @@ qsnet_rdma: qsnet_rdma.c mpitest: mpi_test.c ${HOME}/installs/openmpi/bin/mpicc -g -o mpitest mpi_test.c +lat: % : %.c + ${HOME}/installs/openmpi/bin/mpicc -g -o lat lat.c + clean: -rm -rf $(EXECS) diff --git a/src/mca/ptl/elan/tests/mpi_test.c b/src/mca/ptl/elan/tests/mpi_test.c index 8095b97769..01bcf24c9f 100644 --- a/src/mca/ptl/elan/tests/mpi_test.c +++ b/src/mca/ptl/elan/tests/mpi_test.c @@ -14,7 +14,7 @@ int main (int argc, char ** argv) MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &proc); MPI_Comm_size(MPI_COMM_WORLD, &nproc); - /*MPI_Barrier(MPI_COMM_WORLD);*/ + MPI_Barrier(MPI_COMM_WORLD); fprintf(stdout, "[%s:%s:%d] done with init \n", hostname, __FUNCTION__, __LINE__); fflush(stdout);