diff --git a/src/mca/ptl/elan/src/Makefile.am b/src/mca/ptl/elan/src/Makefile.am index 0675209141..8c6b569ab5 100644 --- a/src/mca/ptl/elan/src/Makefile.am +++ b/src/mca/ptl/elan/src/Makefile.am @@ -9,8 +9,6 @@ AM_CPPFLAGS = -I$(top_ompi_builddir)/src/include \ -I$(top_ompi_srcdir)/src -I$(top_ompi_srcdir)/src/include \ -I/usr/lib/qsnet/elan4/include -#AM_LDFLAGS = -lelan -lelanctrl -lrms -lrmscall -lelan4 - noinst_LTLIBRARIES = libmca_ptl_elan.la libmca_ptl_elan_la_SOURCES = \ elan_config.h \ @@ -25,6 +23,7 @@ libmca_ptl_elan_la_SOURCES = \ ptl_elan_proc.c \ ptl_elan_module.c \ ptl_elan_priv.c \ + ptl_elan_peer.c \ ptl_elan_init.c \ ptl_elan_comm_init.c \ ptl_elan.c diff --git a/src/mca/ptl/elan/src/ptl_elan.c b/src/mca/ptl/elan/src/ptl_elan.c index 459e7015c0..166dcf9174 100644 --- a/src/mca/ptl/elan/src/ptl_elan.c +++ b/src/mca/ptl/elan/src/ptl_elan.c @@ -20,9 +20,8 @@ #include "ptl_elan_frag.h" #include "ptl_elan_priv.h" - /* XXX: There must be multiple PTL's. This could be the template */ -mca_ptl_elan_t mca_ptl_elan = { +mca_ptl_elan_t mca_ptl_elan = { { &mca_ptl_elan_module.super, 0, /* ptl_exclusivity */ @@ -45,72 +44,92 @@ mca_ptl_elan_t mca_ptl_elan = { } }; -int mca_ptl_elan_add_procs (struct mca_ptl_t *ptl, - size_t nprocs, - struct ompi_proc_t **procs, - struct mca_ptl_base_peer_t **peers, - ompi_bitmap_t* reachable) +int +mca_ptl_elan_add_procs (struct mca_ptl_t *ptl, + size_t nprocs, + struct ompi_proc_t **procs, + struct mca_ptl_base_peer_t **peers, + ompi_bitmap_t * reachable) { - struct ompi_proc_t *ompi_proc; + struct ompi_proc_t *ompi_proc; mca_ptl_elan_proc_t *ptl_proc; mca_ptl_elan_peer_t *ptl_peer; - int rc; - int i; + int rc; + int i; - for(i=0; iproc_lock); + OMPI_THREAD_LOCK (&ptl_proc->proc_lock); - if(ptl_proc->proc_addr_count == ptl_proc->proc_peer_count) { - OMPI_THREAD_UNLOCK(&ptl_proc->proc_lock); - ompi_output(0, "all peers are taken already\n"); + if (ptl_proc->proc_addr_count == ptl_proc->proc_peer_count) { + OMPI_THREAD_UNLOCK (&ptl_proc->proc_lock); + ompi_output (0, "all peers are taken already\n"); return OMPI_ERR_UNREACH; } - /* The ptl_proc datastructure is shared by all TCP PTL - * instances that are trying to reach this destination. - * Cache the peer instance on the ptl_proc. + /* The ptl_proc datastructure is shared by all PTL + * instances that are trying to reach this destination. + * Cache the peer instance on the ptl_proc. */ - ptl_peer = OBJ_NEW(mca_ptl_elan_peer_t); - if(NULL == ptl_peer) { - OMPI_THREAD_UNLOCK(&ptl_proc->proc_lock); + + ptl_peer = OBJ_NEW (mca_ptl_elan_peer_t); + if (NULL == ptl_peer) { + OMPI_THREAD_UNLOCK (&ptl_proc->proc_lock); + ompi_output (0, "[%s:%d] unabled to allocate ptl_peer \n", + __FILE__, __LINE__); return OMPI_ERR_OUT_OF_RESOURCE; } - ptl_peer->peer_ptl = (mca_ptl_elan_t*)ptl; - rc = mca_ptl_elan_proc_insert(ptl_proc, ptl_peer); - if(rc != OMPI_SUCCESS) { - OBJ_RELEASE(ptl_peer); - OMPI_THREAD_UNLOCK(&ptl_proc->proc_lock); + ptl_peer->peer_ptl = (mca_ptl_elan_t *) ptl; + + rc = mca_ptl_elan_proc_insert (ptl_proc, ptl_peer); + if (rc != OMPI_SUCCESS) { + OBJ_RELEASE (ptl_peer); + OMPI_THREAD_UNLOCK (&ptl_proc->proc_lock); + ompi_output (0, "[%s:%d] unabled to insert ptl_peer \n", + __FILE__, __LINE__); return rc; } - ompi_bitmap_set_bit(reachable, i); - OMPI_THREAD_UNLOCK(&ptl_proc->proc_lock); - peers[i] = ptl_peer; + ompi_bitmap_set_bit (reachable, i); + + OMPI_THREAD_UNLOCK (&ptl_proc->proc_lock); + peers[i] = (struct mca_ptl_base_peer_t *) ptl_peer; } return OMPI_SUCCESS; } -int mca_ptl_elan_del_procs (struct mca_ptl_t *ptl, - size_t nprocs, - struct ompi_proc_t ** procs, - struct mca_ptl_base_peer_t **peers) +int +mca_ptl_elan_del_procs (struct mca_ptl_t *ptl, + size_t nprocs, + struct ompi_proc_t **procs, + struct mca_ptl_base_peer_t **peers) { + int i; + for (i = 0; i < nprocs; i++) { + OBJ_RELEASE (peers[i]); + } return OMPI_SUCCESS; } -int mca_ptl_elan_finalize (struct mca_ptl_t *ptl) +int +mca_ptl_elan_finalize (struct mca_ptl_t *ptl) { - int rail_index; - struct mca_ptl_elan_t * elan_ptl ; + int rail_index; + struct mca_ptl_elan_t *elan_ptl; elan_ptl = (struct mca_ptl_elan_t *) ptl; @@ -118,59 +137,58 @@ int mca_ptl_elan_finalize (struct mca_ptl_t *ptl) /* Free the PTL */ rail_index = elan_ptl->ptl_ni_local; - free(elan_ptl); + free (elan_ptl); /* Record the missing of this entry */ mca_ptl_elan_module.elan_ptls[rail_index] = NULL; - mca_ptl_elan_module.elan_num_ptls -- ; + mca_ptl_elan_module.elan_num_ptls--; return OMPI_SUCCESS; } -int mca_ptl_elan_req_alloc (struct mca_ptl_t *ptl, - struct mca_pml_base_send_request_t **request) +int +mca_ptl_elan_req_alloc (struct mca_ptl_t *ptl, + struct mca_pml_base_send_request_t **request) { - int rc = OMPI_SUCCESS; - mca_pml_base_send_request_t* sendreq; - ompi_list_item_t* item; + int rc = OMPI_SUCCESS; + mca_pml_base_send_request_t *sendreq; + ompi_list_item_t *item; -#if 0 - /* PTL_TCP allocate request from the module - * But PTL_ELAN have to allocate from each PTL since - * all the descriptors are bound to the PTL related command queue, etc - * for their functions. - */ - OMPI_FREE_LIST_GET(&mca_ptl_elan_module.elan_send_requests, item, rc); + /* FIXME, Error here, rc is passed in by value + * Which will not bring any output from this allocation request */ + OMPI_FREE_LIST_GET (&mca_ptl_elan_module.elan_reqs_free, item, rc); - if(NULL != (sendreq = (mca_pml_base_send_request_t*)item)) + if (NULL != (sendreq = (mca_pml_base_send_request_t *) item)) sendreq->req_owner = ptl; *request = sendreq; -#endif + return rc; } -void mca_ptl_elan_req_return (struct mca_ptl_t *ptl, - struct mca_pml_base_send_request_t *request) +void +mca_ptl_elan_req_return (struct mca_ptl_t *ptl, + struct mca_pml_base_send_request_t *request) { - /*OBJ_DESTRUCT(&request->req_convertor);*/ -#if 0 - OMPI_FREE_LIST_RETURN(&mca_ptl_elan_module.elan_send_requests, - (ompi_list_item_t*)request); -#endif + OMPI_FREE_LIST_RETURN (&mca_ptl_elan_module.elan_reqs_free, + (ompi_list_item_t *) request); return; } -void mca_ptl_elan_recv_frag_return (struct mca_ptl_t *ptl, - struct mca_ptl_elan_recv_frag_t *frag) +void +mca_ptl_elan_recv_frag_return (struct mca_ptl_t *ptl, + struct mca_ptl_elan_recv_frag_t *frag) { + OMPI_FREE_LIST_RETURN(&mca_ptl_elan_module.elan_recv_frags_free, + (ompi_list_item_t*)frag); return; } -void mca_ptl_elan_send_frag_return (struct mca_ptl_t *ptl, - struct mca_ptl_elan_send_frag_t *frag) +void +mca_ptl_elan_send_frag_return (struct mca_ptl_t *ptl, + struct mca_ptl_elan_send_frag_t *frag) { return; } @@ -179,26 +197,60 @@ void mca_ptl_elan_send_frag_return (struct mca_ptl_t *ptl, * Initiate a put operation. */ -int mca_ptl_elan_put (struct mca_ptl_t* ptl, - struct mca_ptl_base_peer_t* ptl_base_peer, - struct mca_pml_base_send_request_t* request, - size_t offset, - size_t size, - int flags) +int +mca_ptl_elan_put (struct mca_ptl_t *ptl, + struct mca_ptl_base_peer_t *ptl_peer, + struct mca_pml_base_send_request_t *sendreq, + size_t offset, + size_t size, + int flags) { - return OMPI_SUCCESS; + mca_ptl_elan_desc_item_t *sd; + + if (size <= 0) { + sendreq->super.req_mpi_done = true; + sendreq->super.req_pml_done = true; + return OMPI_SUCCESS; + } + + /* XXX: fix pml_send? + * Why presenting so many arguments while each of them is already + * contained in the request descriptors, + * + * XXX: + * PML extract an request from PTL module and then use this + * a request to ask for a fragment + * Is it too deep across stacks to get a request and + * correspondingly multiple LOCKS to go through*/ + + sd = mca_ptl_elan_alloc_send_desc(sendreq); + if (NULL == sd) { + ompi_output(0, + "[%s:%d] Unable to allocate an elan send descriptors \n", + __FILE__, __LINE__); + } + + /* Update offset, in TCP case, this is a must. + * XXX: Not sure how it is going to be here */ + sendreq->req_offset += size; + ((struct mca_ptl_elan_send_request_t *)sendreq)->req_frag = sd; + + return mca_ptl_elan_start_desc( + ((struct mca_ptl_elan_send_request_t *)sendreq)->desc_type, sd); } /* - * Initiate a get. + * Get routine. We need an interface that provides one more argument, + * describing the source of the data. */ -int mca_ptl_elan_get (struct mca_ptl_t* ptl, - struct mca_ptl_base_peer_t* ptl_base_peer, - struct mca_pml_base_recv_request_t* request, - size_t offset, - size_t size, - int flags) +int +mca_ptl_elan_get (struct mca_ptl_t *ptl, + struct mca_ptl_base_peer_t *ptl_base_peer, + struct mca_pml_base_recv_request_t *request, + size_t offset, + size_t size, + int flags) { return OMPI_SUCCESS; } @@ -208,8 +260,9 @@ int mca_ptl_elan_get (struct mca_ptl_t* ptl, * ack back to the peer and process the fragment. */ -void mca_ptl_elan_matched (mca_ptl_t * ptl, - mca_ptl_base_recv_frag_t * frag) +void +mca_ptl_elan_matched (mca_ptl_t * ptl, + mca_ptl_base_recv_frag_t * frag) { return; } diff --git a/src/mca/ptl/elan/src/ptl_elan.h b/src/mca/ptl/elan/src/ptl_elan.h index 6d6711e9ed..aa9bb3f6a5 100644 --- a/src/mca/ptl/elan/src/ptl_elan.h +++ b/src/mca/ptl/elan/src/ptl_elan.h @@ -31,8 +31,8 @@ struct mca_ptl_elan_t { mca_ptl_t super; /**< base PTL interface */ /**< The following are elan-related control structures */ - ELAN_RAIL *ptl_elan_rail; /**< Pointer to this Rail */ - ELAN_CTX *ptl_elan_ctx; /**< Elan ctx of this rail */ + ELAN_RAIL *ptl_elan_rail; /**< Pointer to this Rail */ + ELAN_CTX *ptl_elan_ctx; /**< Elan ctx of this rail */ int ptl_ni_local; /**< PTL NI local rank */ int ptl_ni_total; /**< PTL NI total */ @@ -43,30 +43,7 @@ struct mca_ptl_elan_t { struct ompi_ptl_elan_queue_ctrl_t *queue; /**< Queue control structures */ - int elan_sten_size; /**< sten packet len */ - int elan_rdma_size; /**< qdma packet length */ - int elan_qdma_size; /**< qdma packet length */ - - int sten_total; /**< total sten descriptors */ - int rdma_total; /**< total rdma descriptors */ - int qdma_total; /**< total rdma descriptors */ - - int sten_num; /**< num of outstanding sten packets */ - int rdma_num; /**< num of outstanding rdma packets */ - int qdma_num; /**< num of outstanding rdma packets */ - int max_num_dmas; /**< total rdma descriptors */ - - ompi_list_t elan_stens; /**< used elan sten descriptors*/ - ompi_list_t elan_dmas; /**< used elan dma descriptors*/ - ompi_list_t elan_rdmas; /**< used elan rdma descriptors */ - ompi_list_t elan_frags; /**< used elan fragments */ - - ompi_free_list_t elan_dmas_free; /**< free elan dma descriptors*/ - ompi_free_list_t elan_stens_free; /**< free elan sten descriptors*/ - ompi_free_list_t elan_rdmas_free; /**< free elan rdma descriptors */ - ompi_free_list_t elan_frags_free; /**< free elan rdma fragments */ - }; typedef struct mca_ptl_elan_t mca_ptl_elan_t; extern mca_ptl_elan_t mca_ptl_elan; @@ -91,17 +68,13 @@ struct mca_ptl_elan_module_1_0_0_t { struct mca_ptl_elan_t **elan_ptls; /**< array of available PTLs */ size_t elan_num_ptls; /**< number of ptls activated */ - ompi_list_t elan_reqs; /**< all elan requests */ - ompi_list_t elan_prog_events; /**< events in progress */ - ompi_list_t elan_comp_events; /**< events completed, but to reclaim */ ompi_list_t elan_procs; /**< elan proc's */ + ompi_list_t elan_reqs; /**< all elan requests */ + ompi_list_t elan_recv_frags; ompi_list_t elan_pending_acks; - ompi_free_list_t elan_events_free;/**< free events */ ompi_free_list_t elan_reqs_free; /**< all elan requests */ - - ompi_event_t elan_send_event; /**< event structure for sends */ - ompi_event_t elan_recv_event; /**< event structure for recvs */ + ompi_free_list_t elan_recv_frags_free; struct mca_ptl_elan_proc_t *elan_local; diff --git a/src/mca/ptl/elan/src/ptl_elan_comm_init.c b/src/mca/ptl/elan/src/ptl_elan_comm_init.c index b8d7943106..b36b089771 100644 --- a/src/mca/ptl/elan/src/ptl_elan_comm_init.c +++ b/src/mca/ptl/elan/src/ptl_elan_comm_init.c @@ -13,79 +13,91 @@ #define ELAN_QUEUE_LOST_SLOTS 1 #define SLOT_ALIGN 128 #define MAX(a,b) ((a>b)? a:b) -#define ALIGNUP(x,a) (((unsigned int)(x) + ((a)-1)) & (-(a))) +#define ALIGNUP(x,a) (((unsigned int)(x) + ((a)-1)) & (-(a))) static int -ompi_init_elan_queue_events (mca_ptl_elan_t *ptl, - ompi_ptl_elan_queue_ctrl_t *queue) +ompi_init_elan_queue_events (mca_ptl_elan_t * ptl, + ompi_ptl_elan_queue_ctrl_t * queue) { - int i; - int count; - int main_align, main_size; - int elan_align, elan_size; + int i; + int count; + int main_align, main_size; + int elan_align, elan_size; - RAIL *rail; - ELAN4_CTX *ctx; + mca_ptl_elan_desc_item_t *desc; + + RAIL *rail; + ELAN4_CTX *ctx; ompi_free_list_t *flist; - ompi_ptl_elan_queue_send_t *ptr; - ompi_elan_event_t *elan_ptr; + ompi_ptl_elan_qdma_desc_t *ptr; + ompi_elan_event_t *elan_ptr; rail = (RAIL *) ptl->ptl_elan_rail; - ctx = (ELAN4_CTX *) ptl->ptl_elan_ctx; + ctx = (ELAN4_CTX *) ptl->ptl_elan_ctx; flist = &queue->tx_desc_free; - /* initialize list */ + /* initialize list */ OBJ_CONSTRUCT (&queue->tx_desc, ompi_list_t); OBJ_CONSTRUCT (&queue->tx_desc_free, ompi_free_list_t); - main_align = MAX(sizeof(void *), 8); - elan_align = MAX(sizeof(int *), 128); - main_size = ALIGNUP(sizeof (ompi_ptl_elan_queue_send_t), main_align); - elan_size = ALIGNUP(sizeof (ompi_ptl_elan_queue_send_t), elan_align); + main_align = MAX (sizeof (void *), 8); + elan_align = MAX (sizeof (int *), 128); + main_size = ALIGNUP (sizeof (ompi_ptl_elan_qdma_desc_t), main_align); + elan_size = ALIGNUP (sizeof (ompi_elan_event_t), elan_align); - flist->fl_elem_size = - flist->fl_max_to_alloc = 128; + flist->fl_elem_size = flist->fl_max_to_alloc = 128; flist->fl_num_allocated = 0; flist->fl_num_per_alloc = count = 16; - flist->fl_elem_class = NULL; /* leave it null */ - flist->fl_mpool = NULL; /* leave it null */ + flist->fl_elem_class = NULL; /* leave it null */ + flist->fl_mpool = NULL; /* leave it null */ /* Allocate the elements */ - ptr = (ompi_ptl_elan_queue_send_t*) elan4_allocMain(rail->r_alloc, - main_align, main_size*(count + 1)); - OMPI_PTL_ELAN_CHECK_UNEX(ptr, NULL, OMPI_ERROR, 0); + desc = (mca_ptl_elan_desc_item_t *) + malloc(sizeof(mca_ptl_elan_desc_item_t) * (count + 1)); + OMPI_PTL_ELAN_CHECK_UNEX (desc, NULL, OMPI_ERROR, 0); + + ptr = (ompi_ptl_elan_qdma_desc_t *) elan4_allocMain (rail->r_alloc, + main_align, + main_size * + (count + 1)); + OMPI_PTL_ELAN_CHECK_UNEX (ptr, NULL, OMPI_ERROR, 0); /* Allocating elan related structures */ - elan_ptr = (ompi_elan_event_t *) elan4_allocElan(rail->r_alloc, - elan_align, elan_size * (count + 1)); - OMPI_PTL_ELAN_CHECK_UNEX(elan_ptr, NULL, OMPI_ERROR, 0); + elan_ptr = (ompi_elan_event_t *) elan4_allocElan (rail->r_alloc, + elan_align, + elan_size * (count + + 1)); + OMPI_PTL_ELAN_CHECK_UNEX (elan_ptr, NULL, OMPI_ERROR, 0); - for(i=0; i< flist->fl_num_per_alloc; i++) { - ompi_list_item_t* item; - ptr->rail = rail; + for (i = 0; i < flist->fl_num_per_alloc; i++) { + ompi_list_item_t *item; + + ptr->rail = rail; ptr->elan_data_event = elan_ptr; - - item = (ompi_list_item_t*)ptr; - ompi_list_append(&flist->super, item); + desc->item = (mca_ptl_elan_send_desc_t)ptr; /* Initialize some of the dma structures */ { ptr->main_dma.dma_dstAddr = 0; - ptr->main_dma.dma_srcEvent = SDRAM2ELAN(ctx, &elan_ptr->event32); - ptr->main_dma.dma_dstEvent = SDRAM2ELAN(ctx, queue->input); - INITEVENT_WORD(ctx, (EVENT *)&elan_ptr->event32, - &ptr->main_doneWord); - RESETEVENT_WORD(&ptr->main_doneWord); - PRIMEEVENT_WORD(ctx, (EVENT *)&elan_ptr->event32, 1); - + ptr->main_dma.dma_srcEvent = + SDRAM2ELAN (ctx, &elan_ptr->event32); + ptr->main_dma.dma_dstEvent = SDRAM2ELAN (ctx, queue->input); + INITEVENT_WORD (ctx, (EVENT *) & elan_ptr->event32, + &ptr->main_doneWord); + RESETEVENT_WORD (&ptr->main_doneWord); + PRIMEEVENT_WORD (ctx, (EVENT *) & elan_ptr->event32, 1); } + item = (ompi_list_item_t *) desc; + ompi_list_append (&flist->super, item); + /* Progress to the next element */ - ptr = (ompi_ptl_elan_queue_send_t*) ((char *) ptr + main_size); + ptr = (ompi_ptl_elan_qdma_desc_t *) ((char *) ptr + main_size); elan_ptr = (ompi_elan_event_t *) ((char *) elan_ptr + elan_size); + desc ++; } flist->fl_num_allocated += flist->fl_num_per_alloc; @@ -93,50 +105,54 @@ ompi_init_elan_queue_events (mca_ptl_elan_t *ptl, } int -ompi_init_elan_stat (mca_ptl_elan_module_1_0_0_t* emp, int num_rails) +ompi_init_elan_stat (mca_ptl_elan_module_1_0_0_t * emp, + int num_rails) { return (OMPI_SUCCESS); } -int -ompi_init_elan_qdma (mca_ptl_elan_module_1_0_0_t* emp, int num_rails) +int +ompi_init_elan_qdma (mca_ptl_elan_module_1_0_0_t * emp, + int num_rails) { - int i; - int nslots = 128; - int slotsize = 2048; - RAIL *rail; - ELAN4_CTX *ctx; - struct mca_ptl_elan_t * ptl; + int i; + int nslots = 128; + int slotsize = 2048; + RAIL *rail; + ELAN4_CTX *ctx; + struct mca_ptl_elan_t *ptl; /* Init the Transmit Queue structure */ - for ( i = 0 ; i < num_rails; i++ ) { + for (i = 0; i < num_rails; i++) { ompi_ptl_elan_recv_queue_t *rxq; ompi_ptl_elan_queue_ctrl_t *queue; - ptl = emp->elan_ptls[i]; - rail = (RAIL *) ptl->ptl_elan_rail; - ctx = (ELAN4_CTX *) ptl->ptl_elan_ctx; + ptl = emp->elan_ptls[i]; + rail = (RAIL *) ptl->ptl_elan_rail; + ctx = (ELAN4_CTX *) ptl->ptl_elan_ctx; queue = ptl->queue = (ompi_ptl_elan_queue_ctrl_t *) - malloc(sizeof(ompi_ptl_elan_queue_ctrl_t)); - OMPI_PTL_ELAN_CHECK_UNEX(queue, NULL, OMPI_ERROR, 0); - memset(queue, 0, sizeof(ompi_ptl_elan_queue_ctrl_t)); + malloc (sizeof (ompi_ptl_elan_queue_ctrl_t)); + OMPI_PTL_ELAN_CHECK_UNEX (queue, NULL, OMPI_ERROR, 0); + memset (queue, 0, sizeof (ompi_ptl_elan_queue_ctrl_t)); /* Allocate input queue */ - queue->input = (E4_InputQueue *) elan4_allocElan(rail->r_alloc, - INPUT_QUEUE_ALIGN, INPUT_QUEUE_SIZE); - OMPI_PTL_ELAN_CHECK_UNEX(queue->input, NULL, OMPI_ERROR, 0); + queue->input = (E4_InputQueue *) elan4_allocElan (rail->r_alloc, + INPUT_QUEUE_ALIGN, + INPUT_QUEUE_SIZE); + OMPI_PTL_ELAN_CHECK_UNEX (queue->input, NULL, OMPI_ERROR, 0); - queue->tx_cmdq = elan4_alloc_cmdq (ctx, - rail->r_alloc, - CQ_Size8K, - CQ_WriteEnableBit | CQ_DmaStartEnableBit | CQ_STENEnableBit, - NULL); - OMPI_PTL_ELAN_CHECK_UNEX(queue->tx_cmdq, NULL, OMPI_ERROR, 0); + queue->tx_cmdq = elan4_alloc_cmdq (ctx, + rail->r_alloc, + CQ_Size8K, + CQ_WriteEnableBit | + CQ_DmaStartEnableBit | + CQ_STENEnableBit, NULL); + OMPI_PTL_ELAN_CHECK_UNEX (queue->tx_cmdq, NULL, OMPI_ERROR, 0); - /* + /* * Elan4 has a rather complicated hierarchical event mechanism. * It is easy to use but nontrivial to manipulate * We implement a simpler event control mechanism, which @@ -145,10 +161,10 @@ ompi_init_elan_qdma (mca_ptl_elan_module_1_0_0_t* emp, int num_rails) * * Initialize a new event list managing this queue */ - ompi_init_elan_queue_events(ptl, queue); + ompi_init_elan_queue_events (ptl, queue); /* Allocate a cookie pool */ - queue->tx_cpool = elan4_allocCookiePool(ctx, ptl->elan_vp); + queue->tx_cpool = elan4_allocCookiePool (ctx, ptl->elan_vp); /* Init the Receive Queue structure */ queue->rx_nslots = 128; @@ -156,56 +172,59 @@ ompi_init_elan_qdma (mca_ptl_elan_module_1_0_0_t* emp, int num_rails) queue->rx_buffsize = (slotsize > INPUT_QUEUE_MAX) ? INPUT_QUEUE_MAX : slotsize; - queue->rx_slotsize = ELAN_ALIGNUP(slotsize, SLOT_ALIGN); + queue->rx_slotsize = ELAN_ALIGNUP (slotsize, SLOT_ALIGN); rxq = queue->rxq = (ompi_ptl_elan_recv_queue_t *) - elan4_allocMain(rail->r_alloc, 64, - sizeof(ompi_ptl_elan_recv_queue_t)); - OMPI_PTL_ELAN_CHECK_UNEX(rxq, NULL, OMPI_ERROR, 0); - memset(rxq,0,sizeof(ompi_ptl_elan_recv_queue_t)); + elan4_allocMain (rail->r_alloc, 64, + sizeof (ompi_ptl_elan_recv_queue_t)); + OMPI_PTL_ELAN_CHECK_UNEX (rxq, NULL, OMPI_ERROR, 0); + memset (rxq, 0, sizeof (ompi_ptl_elan_recv_queue_t)); rxq->qr_rail = rail; - rxq->qr_fptr = elan4_allocMain(rail->r_alloc, - 128, nslots * queue->rx_slotsize); - OMPI_PTL_ELAN_CHECK_UNEX(rxq->qr_fptr, NULL, OMPI_ERROR, 0); - memset(rxq->qr_fptr, 0xeb, nslots * queue->rx_slotsize); + rxq->qr_fptr = elan4_allocMain (rail->r_alloc, + 128, nslots * queue->rx_slotsize); + OMPI_PTL_ELAN_CHECK_UNEX (rxq->qr_fptr, NULL, OMPI_ERROR, 0); + memset (rxq->qr_fptr, 0xeb, nslots * queue->rx_slotsize); + + rxq->qr_elanDone = ALLOC_ELAN (rail, SLOT_ALIGN, sizeof (EVENT32)); + OMPI_PTL_ELAN_CHECK_UNEX (rxq->qr_elanDone, NULL, OMPI_ERROR, 0); - rxq->qr_elanDone = ALLOC_ELAN(rail, SLOT_ALIGN, sizeof(EVENT32)); - OMPI_PTL_ELAN_CHECK_UNEX(rxq->qr_elanDone, NULL, OMPI_ERROR, 0); - /* Set the top et al */ - rxq->qr_efitem = (E4_uint64)elan4_main2elan(ctx, rxq->qr_fptr); - rxq->qr_base = rxq->qr_fptr; - rxq->qr_top = (void *)((uintptr_t)rxq->qr_base - + (queue->rx_slotsize * (nslots-1))); - rxq->qr_efptr = rxq->qr_efitem; - rxq->qr_elitem = rxq->qr_efitem + (queue->rx_slotsize * (nslots-1)); + rxq->qr_efitem = (E4_uint64) elan4_main2elan (ctx, rxq->qr_fptr); + rxq->qr_base = rxq->qr_fptr; + rxq->qr_top = (void *) ((uintptr_t) rxq->qr_base + + (queue->rx_slotsize * (nslots - 1))); + rxq->qr_efptr = rxq->qr_efitem; + rxq->qr_elitem = + rxq->qr_efitem + (queue->rx_slotsize * (nslots - 1)); /* Event to wait/block on */ rxq->qr_qEvent = &rxq->qr_elanDone; - queue->input->q_event = SDRAM2ELAN(ctx, (void *)&rxq->qr_elanDone); - queue->input->q_fptr = rxq->qr_efitem; - queue->input->q_bptr = rxq->qr_efitem; - queue->input->q_control = E4_InputQueueControl( - rxq->qr_efitem, rxq->qr_elitem, queue->rx_slotsize); - - /* The event */ - INITEVENT_WORD(ctx, (EVENT *)&rxq->qr_elanDone, - &rxq->qr_doneWord); - RESETEVENT_WORD(&rxq->qr_doneWord); - PRIMEEVENT_WORD(ctx, (EVENT *)&rxq->qr_elanDone, 1); + queue->input->q_event = + SDRAM2ELAN (ctx, (void *) &rxq->qr_elanDone); + queue->input->q_fptr = rxq->qr_efitem; + queue->input->q_bptr = rxq->qr_efitem; + queue->input->q_control = + E4_InputQueueControl (rxq->qr_efitem, rxq->qr_elitem, + queue->rx_slotsize); - rxq->qr_cmdq = elan4_alloc_cmdq( - ctx, rail->r_alloc, - CQ_Size1K, CQ_WriteEnableBit | CQ_WaitEventEnableBit, - NULL); - OMPI_PTL_ELAN_CHECK_UNEX(rxq->qr_cmdq, NULL, OMPI_ERROR, 0); + /* The event */ + INITEVENT_WORD (ctx, (EVENT *) & rxq->qr_elanDone, + &rxq->qr_doneWord); + RESETEVENT_WORD (&rxq->qr_doneWord); + PRIMEEVENT_WORD (ctx, (EVENT *) & rxq->qr_elanDone, 1); + + rxq->qr_cmdq = elan4_alloc_cmdq (ctx, rail->r_alloc, + CQ_Size1K, + CQ_WriteEnableBit | + CQ_WaitEventEnableBit, NULL); + OMPI_PTL_ELAN_CHECK_UNEX (rxq->qr_cmdq, NULL, OMPI_ERROR, 0); /* Allocate a sleepDesc for threads to block on */ - rxq->qr_es = ompi_init_elan_sleepdesc(&mca_ptl_elan_global_state, - rxq->qr_rail); - OMPI_PTL_ELAN_CHECK_UNEX(rxq->qr_es, NULL, OMPI_ERROR, 0); + rxq->qr_es = ompi_init_elan_sleepdesc (&mca_ptl_elan_global_state, + rxq->qr_rail); + OMPI_PTL_ELAN_CHECK_UNEX (rxq->qr_es, NULL, OMPI_ERROR, 0); OBJ_CONSTRUCT (&queue->rx_lock, ompi_mutex_t); } @@ -213,12 +232,9 @@ ompi_init_elan_qdma (mca_ptl_elan_module_1_0_0_t* emp, int num_rails) return (OMPI_SUCCESS); } -int ompi_init_elan_rdma (mca_ptl_elan_module_1_0_0_t* emp, int num_rails) -{ - return (OMPI_SUCCESS); -} - -int ompi_init_elan_sten (mca_ptl_elan_module_1_0_0_t* emp, int num_rails) +int +ompi_init_elan_rdma (mca_ptl_elan_module_1_0_0_t * emp, + int num_rails) { return (OMPI_SUCCESS); } diff --git a/src/mca/ptl/elan/src/ptl_elan_frag.c b/src/mca/ptl/elan/src/ptl_elan_frag.c index de1476658c..c55881c40a 100644 --- a/src/mca/ptl/elan/src/ptl_elan_frag.c +++ b/src/mca/ptl/elan/src/ptl_elan_frag.c @@ -12,163 +12,20 @@ #include "ptl_elan_peer.h" #include "ptl_elan_proc.h" #include "ptl_elan_frag.h" +#include "ptl_elan_req.h" +#include "ptl_elan_priv.h" -#define frag_header super.super.frag_header -#define frag_owner super.super.frag_owner -#define frag_peer super.super.frag_peer -#define frag_convertor super.super.frag_convertor +#if 0 +static void +mca_ptl_elan_send_frag_construct (mca_ptl_elan_send_frag_t * frag) +{ - -static void mca_ptl_elan_send_frag_construct (mca_ptl_elan_send_frag_t * - frag); -static void mca_ptl_elan_send_frag_destruct (mca_ptl_elan_send_frag_t * - frag); - -static void mca_ptl_elan_recv_frag_construct (mca_ptl_elan_recv_frag_t * - frag); -static void mca_ptl_elan_recv_frag_destruct (mca_ptl_elan_recv_frag_t * - frag); -static bool mca_ptl_elan_recv_frag_header (mca_ptl_elan_recv_frag_t * frag, - int sd, - size_t); -static bool -mca_ptl_elan_recv_frag_ack (mca_ptl_elan_recv_frag_t * frag, - int sd); -static bool -mca_ptl_elan_recv_frag_frag (mca_ptl_elan_recv_frag_t * frag, - int sd); -static bool -mca_ptl_elan_recv_frag_match (mca_ptl_elan_recv_frag_t * frag, - int sd); -static bool -mca_ptl_elan_recv_frag_data (mca_ptl_elan_recv_frag_t * frag, - int sd); -static bool -mca_ptl_elan_recv_frag_discard (mca_ptl_elan_recv_frag_t * frag, - int sd); - - -ompi_class_t mca_ptl_elan_recv_frag_t_class = { - "mca_ptl_elan_recv_frag_t", - OBJ_CLASS (mca_ptl_base_recv_frag_t), - (ompi_construct_t) mca_ptl_elan_recv_frag_construct, - (ompi_destruct_t) mca_ptl_elan_recv_frag_destruct -}; - -/* - * ELAN fragment constructor - */ +} static void -mca_ptl_elan_recv_frag_construct (mca_ptl_elan_recv_frag_t * frag) +mca_ptl_elan_send_frag_destruct (mca_ptl_elan_send_frag_t * frag) { -} - -/* - * ELAN fragment destructor - */ - -static void -mca_ptl_elan_recv_frag_destruct (mca_ptl_elan_recv_frag_t * frag) -{ -} - - -/* - * Initialize a ELAN receive fragment for a specific peer. - */ - -void -mca_ptl_elan_recv_frag_init (mca_ptl_elan_recv_frag_t * frag, - mca_ptl_elan_peer_t * peer) -{ -} - -/* - * Callback from event library when socket has data available - * for receive. - */ - -bool -mca_ptl_elan_recv_frag_handler (mca_ptl_elan_recv_frag_t * frag, - int sd) -{ - return false; -} - -/* - * Receive fragment header - */ - -static bool -mca_ptl_elan_recv_frag_header (mca_ptl_elan_recv_frag_t * frag, - int sd, - size_t size) -{ - return true; -} - - -/* - * Receive and process an ack. - */ - -static bool -mca_ptl_elan_recv_frag_ack (mca_ptl_elan_recv_frag_t * frag, - int sd) -{ - return true; -} - - -/* - * Receive and process a match request - first fragment. - */ - -static bool -mca_ptl_elan_recv_frag_match (mca_ptl_elan_recv_frag_t * frag, - int sd) -{ - return true; -} - - -/* - * Receive and process 2nd+ fragments of a multi-fragment message. - */ - -static bool -mca_ptl_elan_recv_frag_frag (mca_ptl_elan_recv_frag_t * frag, - int sd) -{ - return true; -} - - -/* - * Continue with non-blocking recv() calls until the entire - * fragment is received. - */ - -static bool -mca_ptl_elan_recv_frag_data (mca_ptl_elan_recv_frag_t * frag, - int sd) -{ - return true; -} - - -/* - * If the app posted a receive buffer smaller than the - * fragment, receive and discard remaining bytes. -*/ - -static bool -mca_ptl_elan_recv_frag_discard (mca_ptl_elan_recv_frag_t * frag, - int sd) -{ - return true; } ompi_class_t mca_ptl_elan_send_frag_t_class = { @@ -177,48 +34,112 @@ ompi_class_t mca_ptl_elan_send_frag_t_class = { (ompi_construct_t) mca_ptl_elan_send_frag_construct, (ompi_destruct_t) mca_ptl_elan_send_frag_destruct }; - -/* - * Placeholders for send fragment constructor/destructors. - */ +#endif static void -mca_ptl_elan_send_frag_construct (mca_ptl_elan_send_frag_t * frag) +mca_ptl_elan_recv_frag_construct (mca_ptl_elan_recv_frag_t * frag) { + OBJ_CONSTRUCT (frag, mca_ptl_elan_recv_frag_t); + frag->frag_hdr_cnt = 0; + frag->frag_msg_cnt = 0; + frag->frag.qdma = NULL; + frag->alloc_buff = (char *) malloc (sizeof (char) * 2048 + 32); + if (NULL == frag->alloc_buff) { + ompi_output (0, + "[%s:%d] Fatal error, unable to allocate recv buff \n", + __FILE__, __LINE__); + } + frag->unex_buff = (char *) (((int) frag->alloc_buff + 32) >> 5 << 5); } - static void -mca_ptl_elan_send_frag_destruct (mca_ptl_elan_send_frag_t * frag) +mca_ptl_elan_recv_frag_destruct (mca_ptl_elan_recv_frag_t * frag) { + /* Does this destruct free the memory? since OBJ_DESTRUCT, + * works only for non-dynamically allocated objects */ + frag->frag_hdr_cnt = 0; + frag->frag_msg_cnt = 0; + frag->frag.qdma = NULL; + free (frag->alloc_buff); + frag->alloc_buff = NULL; + frag->unex_buff = NULL; + OBJ_DESTRUCT (frag); } +ompi_class_t mca_ptl_elan_recv_frag_t_class = { + "mca_ptl_elan_recv_frag_t", + OBJ_CLASS (mca_ptl_base_recv_frag_t), + (ompi_construct_t) mca_ptl_elan_recv_frag_construct, + (ompi_destruct_t) mca_ptl_elan_recv_frag_destruct +}; -/* - * Initialize the fragment based on the current offset into the users - * data buffer, and the indicated size. - */ +extern mca_ptl_elan_state_t mca_ptl_elan_global_state; -int -mca_ptl_elan_send_frag_init (mca_ptl_elan_send_frag_t * sendfrag, - mca_ptl_elan_peer_t * ptl_peer, - mca_pml_base_send_request_t * sendreq, - size_t offset, - size_t * size, - int flags) +mca_ptl_elan_desc_item_t * +mca_ptl_elan_alloc_send_desc (struct mca_pml_base_send_request_t *req) { - return OMPI_SUCCESS; + struct ompi_ptl_elan_queue_ctrl_t *queue; + mca_ptl_elan_t *ptl; + struct mca_ptl_elan_peer_t *peer; + size_t offset; + size_t size; + + ompi_free_list_t *flist; + ompi_list_item_t *item; + mca_ptl_elan_desc_item_t *desc; + + ptl = (mca_ptl_elan_t *) req->req_owner; + peer = (mca_ptl_elan_peer_t *) req->req_peer; + offset = req->req_offset; + size = ptl->super.ptl_first_frag_size; + + /* For now, bind to queue DMA directly */ + { + queue = ptl->queue; + flist = &queue->tx_desc_free; + + if (ompi_using_threads ()) { + + ompi_mutex_lock (&((flist)->fl_lock)); + item = ompi_list_remove_first (&((flist)->super)); + + /* Progress this PTL module to get back a descriptor, + * Is it OK to progress with ptl->ptl_send_progress? */ + while (NULL == item) { + mca_ptl_tstamp_t tstamp = 0; + + ptl->super.ptl_module->ptlm_progress (tstamp); + item = ompi_list_remove_first (&((flist)->super)); + } + ompi_mutex_unlock (&((flist)->fl_lock)); + } else { + item = ompi_list_remove_first (&((flist)->super)); + + /* Progress this PTL module to get back a descriptor, + * Is it OK to progress with ptl->ptl_send_progress? */ + while (NULL == item) { + mca_ptl_tstamp_t tstamp = 0; + + /* + * Well, this still does not trigger the progress on + * PTL's from other modules. Wait for PML to change. + * Otherwise have to trigger PML progress from PTL. Ouch.. + */ + ptl->super.ptl_module->ptlm_progress (tstamp); + item = ompi_list_remove_first (&((flist)->super)); + } + } + + ((struct mca_ptl_elan_send_request_t *) req)->desc_type + = MCA_PTL_ELAN_QDMA_DESC; + desc = (mca_ptl_elan_desc_item_t *) item; + } + + return desc; } - -/* - * The socket is setup as non-blocking, writes are handled asynchronously, - * with event callbacks when the socket is ready for writes. - */ - -bool -mca_ptl_elan_send_frag_handler (mca_ptl_elan_send_frag_t * frag, - int sd) +mca_ptl_elan_recv_frag_t * +mca_ptl_elan_alloc_recv_desc (struct mca_pml_base_recv_request_t * req) { - return true; + return NULL; } diff --git a/src/mca/ptl/elan/src/ptl_elan_frag.h b/src/mca/ptl/elan/src/ptl_elan_frag.h index 2a4592a187..e2fb2a58f4 100644 --- a/src/mca/ptl/elan/src/ptl_elan_frag.h +++ b/src/mca/ptl/elan/src/ptl_elan_frag.h @@ -17,96 +17,63 @@ #include "mca/ptl/base/ptl_base_recvfrag.h" #include "ptl_elan.h" -extern ompi_class_t mca_ptl_elan_send_frag_t_class; +extern ompi_class_t mca_ptl_elan_recv_frag_t_class; + struct mca_ptl_elan_peer_t; +struct ompi_ptl_elan_qdma_frag_t; +struct ompi_ptl_elan_putget_frag_t; /** - * ELAN send fragment derived type. + * ELAN descriptor for send */ -struct mca_ptl_elan_send_frag_t { - mca_ptl_base_send_frag_t super; /**< base send fragment descriptor */ - size_t frag_vec_cnt; - volatile int frag_progressed; +union mca_ptl_elan_send_desc_t { + struct ompi_ptl_elan_qdma_desc_t *qdma; + struct ompi_ptl_elan_putget_desc_t *putget; }; -typedef struct mca_ptl_elan_send_frag_t mca_ptl_elan_send_frag_t; +typedef union mca_ptl_elan_send_desc_t mca_ptl_elan_send_desc_t; - -#define MCA_PTL_ELAN_SEND_FRAG_ALLOC(item, rc) \ - OMPI_FREE_LIST_GET(&mca_ptl_elan_module.elan_send_frags, item, rc); - - -#define MCA_PTL_ELAN_RECV_FRAG_ALLOC(frag, rc) \ -{ \ - ompi_list_item_t* item; \ - OMPI_FREE_LIST_GET(&mca_ptl_elan_module.elan_recv_frags, item, rc); \ - frag = (mca_ptl_elan_recv_frag_t*)item; \ -} - -bool -mca_ptl_elan_send_frag_handler (mca_ptl_elan_send_frag_t *, int sd); - - -int -mca_ptl_elan_send_frag_init (mca_ptl_elan_send_frag_t *, - struct mca_ptl_elan_peer_t *, - struct mca_pml_base_send_request_t *, - size_t offset, size_t * size, int flags); - -extern ompi_class_t mca_ptl_elan_recv_frag_t_class; +struct mca_ptl_elan_desc_item_t { + ompi_list_item_t super; + mca_ptl_elan_send_desc_t item; +}; +typedef struct mca_ptl_elan_desc_item_t mca_ptl_elan_desc_item_t; /** * ELAN received fragment derived type. */ struct mca_ptl_elan_recv_frag_t { - mca_ptl_base_recv_frag_t super; /**< base receive fragment descriptor */ - size_t frag_hdr_cnt; /**< number of header bytes received */ - size_t frag_msg_cnt; /**< number of message bytes received */ - bool frag_ack_pending; /**< an ack pending for this fragment */ - volatile int frag_progressed; /**< flag to atomically progress */ + mca_ptl_base_recv_frag_t super; + size_t frag_hdr_cnt; + size_t frag_msg_cnt; + union { + struct ompi_ptl_elan_qdma_frag_t *qdma; + struct ompi_ptl_elan_putget_frag_t *putget; + } frag; + char *alloc_buff; + char *unex_buff; }; typedef struct mca_ptl_elan_recv_frag_t mca_ptl_elan_recv_frag_t; -bool -mca_ptl_elan_recv_frag_handler (mca_ptl_elan_recv_frag_t *, int sd); +mca_ptl_elan_desc_item_t * +mca_ptl_elan_alloc_send_desc(struct mca_pml_base_send_request_t *req); -void -mca_ptl_elan_recv_frag_init (mca_ptl_elan_recv_frag_t * frag, - struct mca_ptl_elan_peer_t *peer); - -bool -mca_ptl_elan_recv_frag_send_ack (mca_ptl_elan_recv_frag_t * frag); - -/* - * For fragments that require an acknowledgment, this routine will be called - * twice, once when the send completes, and again when the acknowledgment is - * returned. Only the last caller should update the request status, so we - * add a lock w/ the frag_progressed flag. - */ -static inline void -mca_ptl_elan_send_frag_progress (mca_ptl_elan_send_frag_t * frag) -{ - return; -} - -static inline void -mca_ptl_elan_send_frag_init_ack (mca_ptl_elan_send_frag_t * ack, - struct mca_ptl_t *ptl, - struct mca_ptl_elan_peer_t *ptl_peer, - mca_ptl_elan_recv_frag_t * frag) -{ - return; -} +mca_ptl_elan_recv_frag_t * +mca_ptl_elan_alloc_recv_desc(struct mca_pml_base_recv_request_t *req); static inline void -mca_ptl_elan_recv_frag_matched (mca_ptl_elan_recv_frag_t * frag) -{ - return; +mca_ptl_elan_recv_frag_progress(mca_ptl_elan_recv_frag_t* frag) +{ + /* make sure this only happens once for threaded case */ + mca_pml_base_recv_request_t* request; + mca_ptl_base_recv_progress_fn_t progress; + + progress = (frag)->super.super.frag_owner->ptl_recv_progress; + request = (frag)->super.frag_request; + + /* progress the request */ + progress((frag)->super.super.frag_owner, request, &(frag)->super); + mca_ptl_elan_recv_frag_return((frag)->super.super.frag_owner, (frag)); } -static inline void -mca_ptl_elan_recv_frag_progress (mca_ptl_elan_recv_frag_t * frag) -{ - return; -} #endif diff --git a/src/mca/ptl/elan/src/ptl_elan_init.c b/src/mca/ptl/elan/src/ptl_elan_init.c index 502db915d3..ba87fe0976 100644 --- a/src/mca/ptl/elan/src/ptl_elan_init.c +++ b/src/mca/ptl/elan/src/ptl_elan_init.c @@ -4,7 +4,7 @@ #include #define _ELAN4 -#define __elan4__ +#define __elan4 #include "ptl_elan.h" #include "ptl_elan_priv.h" @@ -15,21 +15,12 @@ mca_ptl_elan_state_t mca_ptl_elan_global_state; -static int ompi_mca_ptl_elan_setup (mca_ptl_elan_state_t * ems) +static int +ompi_mca_ptl_elan_setup (mca_ptl_elan_state_t * ems) { - /* TODO: - * a) create elan PTL instances - * b) init transport structure for all rails - * including STEN, QDMA and RDMA - * c) For each possible transport mechanism, allocate: - * send/recv descriptors; - * system buffer; - * event structure for transport control - * d) initialize STAT (including SYNC) structures. - */ - mca_ptl_elan_module_1_0_0_t * emp; + mca_ptl_elan_module_1_0_0_t *emp; int rail_count; - + emp = ems->elan_module; emp->elan_ptls = malloc (rail_count * sizeof (mca_ptl_elan_t *)); if (NULL == emp->elan_ptls) { @@ -41,43 +32,42 @@ static int ompi_mca_ptl_elan_setup (mca_ptl_elan_state_t * ems) /* Initialiaze PTL's */ do { - char param[256]; - mca_ptl_elan_t *ptl; - - ptl = malloc(sizeof(mca_ptl_elan_t)); - if(NULL == ptl) { - ompi_output (0, - "[%s:%d] error in malloc for ptl structures \n", - __FILE__, __LINE__); - return OMPI_ERR_OUT_OF_RESOURCE; - } + char param[256]; + mca_ptl_elan_t *ptl; - memcpy(ptl, &mca_ptl_elan, sizeof(mca_ptl_elan)); - emp->elan_ptls[emp->elan_num_ptls] = ptl; + ptl = malloc (sizeof (mca_ptl_elan_t)); + if (NULL == ptl) { + ompi_output (0, + "[%s:%d] error in malloc for ptl structures \n", + __FILE__, __LINE__); + return OMPI_ERR_OUT_OF_RESOURCE; + } - /* MCA related structures */ + memcpy (ptl, &mca_ptl_elan, sizeof (mca_ptl_elan)); + emp->elan_ptls[emp->elan_num_ptls] = ptl; - ptl->ptl_ni_local = emp->elan_num_ptls; - ptl->ptl_ni_total = rail_count; - emp->elan_num_ptls ++; + /* MCA related structures */ - /* allow user to specify per rail bandwidth and latency */ - sprintf(param, "bandwidth_elanrail%d", emp->elan_num_ptls); - ptl->super.ptl_bandwidth = - mca_ptl_elan_param_register_int(param, 1000); - sprintf(param, "latency_elanrail%d", emp->elan_num_ptls); - ptl->super.ptl_latency = mca_ptl_elan_param_register_int(param, 1); + ptl->ptl_ni_local = emp->elan_num_ptls; + ptl->ptl_ni_total = rail_count; + emp->elan_num_ptls++; - /* Setup elan related structures such as ctx, rail */ - ptl->ptl_elan_rail = ems->elan_rail[rail_count]; - ptl->ptl_elan_ctx = ems->elan_rail[rail_count]->rail_ctx; - ptl->elan_vp = ems->elan_vp; - ptl->elan_nvp = ems->elan_nvp; + /* allow user to specify per rail bandwidth and latency */ + sprintf (param, "bandwidth_elanrail%d", emp->elan_num_ptls); + ptl->super.ptl_bandwidth = + mca_ptl_elan_param_register_int (param, 1000); + sprintf (param, "latency_elanrail%d", emp->elan_num_ptls); + ptl->super.ptl_latency = + mca_ptl_elan_param_register_int (param, 1); + + /* Setup elan related structures such as ctx, rail */ + ptl->ptl_elan_rail = ems->elan_rail[rail_count]; + ptl->ptl_elan_ctx = ems->elan_rail[rail_count]->rail_ctx; + ptl->elan_vp = ems->elan_vp; + ptl->elan_nvp = ems->elan_nvp; } while (emp->elan_num_ptls < rail_count); - /* Allocating all the communication strcutures for PTL's, - * XXX: Leave it later after finalization is done - */ + /* Allocating all the communication strcutures for PTL's, */ if (OMPI_SUCCESS != ompi_init_elan_qdma (emp, rail_count)) { return OMPI_ERROR; } @@ -89,10 +79,9 @@ static int ompi_mca_ptl_elan_setup (mca_ptl_elan_state_t * ems) return OMPI_ERROR; } - if (OMPI_SUCCESS != ompi_init_elan_sten (emp, rail_count)) { - return OMPI_ERROR; - } - + /* + * XXX: initialize STAT (including SYNC) structures. + */ if (OMPI_SUCCESS != ompi_init_elan_stat (emp, rail_count)) { return OMPI_ERROR; } @@ -100,17 +89,7 @@ static int ompi_mca_ptl_elan_setup (mca_ptl_elan_state_t * ems) return (OMPI_SUCCESS); } -/* Attach to the network: - * a) First add myself into the capability - * b) For each rail, - * Block the inputter for any incoming traffic - * attach to the device - * Find out the location - * Fill out the vp in main and elan structures. - * create a cookiePool for threading control - * c) Allocate a cookiePool for export Oth Rail - * d) Find out total vpids, localVpids, localId and number of locals - */ +/* Attach to the network */ static int ompi_elan_attach_network (mca_ptl_elan_state_t * ems) { @@ -236,62 +215,67 @@ ompi_elan_attach_network (mca_ptl_elan_state_t * ems) return (OMPI_SUCCESS); } -static void -ompi_module_elan_close_ptls (mca_ptl_elan_module_1_0_0_t* emp, int num_rails) +static void +ompi_module_elan_close_ptls (mca_ptl_elan_module_1_0_0_t * emp, + int num_rails) { - + /* TODO: find the ones that are still there and free them */ } -static void -ompi_module_elan_close_procs (mca_ptl_elan_module_1_0_0_t* emp, int num_rails) +static void +ompi_module_elan_close_procs (mca_ptl_elan_module_1_0_0_t * emp, + int num_rails) { - + /* TODO: find the ones that are still there and free them */ } -static void ompi_init_elan_queue_events(ompi_ptl_elan_queue_ctrl_t *queue) +static void +ompi_init_elan_queue_events (ompi_ptl_elan_queue_ctrl_t * queue) { } ELAN_SLEEP * -ompi_init_elan_sleepdesc(mca_ptl_elan_state_t * ems, RAIL *rail) +ompi_init_elan_sleepdesc (mca_ptl_elan_state_t * ems, + RAIL * rail) { ELAN_SLEEP *es; /* XXX: asking the caller to hold the lock */ - es = MALLOC(sizeof(ELAN_SLEEP)); - OMPI_PTL_ELAN_CHECK_UNEX(es, NULL, NULL, 0); - memset(es, 0, sizeof(ELAN_SLEEP)); + es = MALLOC (sizeof (ELAN_SLEEP)); + OMPI_PTL_ELAN_CHECK_UNEX (es, NULL, NULL, 0); + memset (es, 0, sizeof (ELAN_SLEEP)); /* Assign next interrupt cookie value */ es->es_cookie = ems->intcookie++; - + /* XXX, rail[0] is choosen instead this rail */ - if (elan4_alloc_intcookie(ems->elan_rail[0]->rail_ctx, - es->es_cookie) < 0) { - ompi_output(0, - "[%s:%d] Failed to allocate IRQ cookie \n", - __FILE__, __LINE__); + if (elan4_alloc_intcookie (ems->elan_rail[0]->rail_ctx, + es->es_cookie) < 0) { + ompi_output (0, + "[%s:%d] Failed to allocate IRQ cookie \n", + __FILE__, __LINE__); } - es->es_cmdBlk = ALLOC_ELAN(rail, E4_EVENTBLOCK_SIZE, - E4_EVENTBLOCK_SIZE); + es->es_cmdBlk = ALLOC_ELAN (rail, E4_EVENTBLOCK_SIZE, + E4_EVENTBLOCK_SIZE); - OMPI_PTL_ELAN_CHECK_UNEX(es->es_cmdBlk, 0, NULL, 0); + OMPI_PTL_ELAN_CHECK_UNEX (es->es_cmdBlk, 0, NULL, 0); - /*Allocate a pair of command queues for blocking waits with*/ - es->es_cmdq = elan4_alloc_cmdq(rail->r_ctx, rail->r_alloc, - CQ_Size1K, CQ_WriteEnableBit | CQ_WaitEventEnableBit, - NULL); - OMPI_PTL_ELAN_CHECK_UNEX(es->es_cmdq, NULL, NULL, 0); + /*Allocate a pair of command queues for blocking waits with */ + es->es_cmdq = elan4_alloc_cmdq (rail->r_ctx, rail->r_alloc, + CQ_Size1K, + CQ_WriteEnableBit | + CQ_WaitEventEnableBit, NULL); + OMPI_PTL_ELAN_CHECK_UNEX (es->es_cmdq, NULL, NULL, 0); /* This command queue used to fire the IRQ via a cmd port copy event */ - es->es_ecmdq = elan4_alloc_cmdq(rail->r_ctx, - rail->r_alloc, CQ_Size1K, /* CQ_EnableAllBits, */ - CQ_WriteEnableBit | CQ_InterruptEnableBit, NULL); - OMPI_PTL_ELAN_CHECK_UNEX(es->es_ecmdq, NULL, NULL, 0); + es->es_ecmdq = elan4_alloc_cmdq (rail->r_ctx, rail->r_alloc, CQ_Size1K, /* CQ_EnableAllBits, */ + CQ_WriteEnableBit | + CQ_InterruptEnableBit, NULL); + OMPI_PTL_ELAN_CHECK_UNEX (es->es_ecmdq, NULL, NULL, 0); es->es_next = NULL; /* XXX: asking the caller to release the lock */ @@ -299,7 +283,6 @@ ompi_init_elan_sleepdesc(mca_ptl_elan_state_t * ems, RAIL *rail) return es; } - int ompi_mca_ptl_elan_init (mca_ptl_elan_module_1_0_0_t * emp) { @@ -333,7 +316,7 @@ ompi_mca_ptl_elan_init (mca_ptl_elan_module_1_0_0_t * emp) /* Default allocator parameters */ ems->elan_flags = 0; - ems->elan_waittype = ELAN_POLL_EVENT; /* or ELAN_WAIT_EVENT */ + ems->elan_waittype = ELAN_POLL_EVENT; /* or ELAN_WAIT_EVENT */ ems->main_size = ELAN_ALLOC_SIZE; ems->elan_size = ELAN_ALLOCELAN_SIZE; ems->elan_flags |= (EXCEPTIONCORE | EXCEPTIONTRACE | EXCEPTIONDBGDUMP); @@ -342,16 +325,16 @@ ompi_mca_ptl_elan_init (mca_ptl_elan_module_1_0_0_t * emp) #ifdef ELAN_VERSION if (!elan_checkVersion (ELAN_VERSION)) { - ompi_output (0, - "Elan version is not compatible with %s \n", - ELAN_VERSION); + ompi_output (0, + "Elan version is not compatible with %s \n", + ELAN_VERSION); return OMPI_ERROR; } #endif /* Allocate elan capability from the heap */ ems->elan_cap = (ELAN_CAPABILITY *) malloc (sizeof (ELAN_CAPABILITY)); - OMPI_PTL_ELAN_CHECK_UNEX(ems->elan_cap, NULL, OMPI_ERROR, 0); + OMPI_PTL_ELAN_CHECK_UNEX (ems->elan_cap, NULL, OMPI_ERROR, 0); memset (ems->elan_cap, 0, sizeof (ELAN_CAPABILITY)); /* Process the capability info supplied by RMS */ @@ -373,22 +356,22 @@ ompi_mca_ptl_elan_init (mca_ptl_elan_module_1_0_0_t * emp) } ems->all_rails = (RAIL **) malloc (sizeof (RAIL *) * num_rails); - OMPI_PTL_ELAN_CHECK_UNEX(ems->all_rails, NULL, - OMPI_ERR_OUT_OF_RESOURCE, 0); + OMPI_PTL_ELAN_CHECK_UNEX (ems->all_rails, NULL, + OMPI_ERR_OUT_OF_RESOURCE, 0); ems->all_estates = (ADDR_SDRAM *) malloc (sizeof (ELAN_ESTATE *) * num_rails); - OMPI_PTL_ELAN_CHECK_UNEX(ems->all_estates, NULL, - OMPI_ERR_OUT_OF_RESOURCE, 0); + OMPI_PTL_ELAN_CHECK_UNEX (ems->all_estates, NULL, + OMPI_ERR_OUT_OF_RESOURCE, 0); rails = (int *) malloc (sizeof (int) * num_rails); - OMPI_PTL_ELAN_CHECK_UNEX(rails, NULL, OMPI_ERR_OUT_OF_RESOURCE, 0); + OMPI_PTL_ELAN_CHECK_UNEX (rails, NULL, OMPI_ERR_OUT_OF_RESOURCE, 0); (void) elan_rails (ems->elan_cap, rails); - ems->elan_rail = (ELAN_RAIL **) malloc (sizeof (ELAN_RAIL **) - * (num_rails + 1)); - OMPI_PTL_ELAN_CHECK_UNEX(ems->elan_rail, NULL, - OMPI_ERR_OUT_OF_RESOURCE, 0); + ems->elan_rail = (ELAN_RAIL **) malloc (sizeof (ELAN_RAIL **) + * (num_rails + 1)); + OMPI_PTL_ELAN_CHECK_UNEX (ems->elan_rail, NULL, + OMPI_ERR_OUT_OF_RESOURCE, 0); ems->elan_rail[num_rails] = NULL; alloc_mainsize = ELAN_ALIGNUP (ems->main_size, ems->elan_pagesize); @@ -398,11 +381,11 @@ ompi_mca_ptl_elan_init (mca_ptl_elan_module_1_0_0_t * emp) /* Magic quadrics number for the starting cookie value */ ems->intcookie = 42; - ems->rail_intcookie = (int*) malloc (sizeof (int)*(num_rails + 1)); - OMPI_PTL_ELAN_CHECK_UNEX(ems->rail_intcookie, NULL, - OMPI_ERR_OUT_OF_RESOURCE, 0); - memset (ems->elan_cap, 0, (num_rails + 1) * sizeof (int)); - ems->rail_intcookie[num_rails] = NULL; + ems->rail_intcookie = (int *) malloc (sizeof (int) * (num_rails + 1)); + OMPI_PTL_ELAN_CHECK_UNEX (ems->rail_intcookie, NULL, + OMPI_ERR_OUT_OF_RESOURCE, 0); + memset ((void*)ems->elan_cap, 0, (num_rails + 1) * sizeof (int)); + ems->rail_intcookie[num_rails] = 0; for (i = 0; i < num_rails; i++) { @@ -413,18 +396,19 @@ ompi_mca_ptl_elan_init (mca_ptl_elan_module_1_0_0_t * emp) /* Allocate the Main memory control structure for this rail */ rail = ems->all_rails[i] = (RAIL *) malloc (sizeof (RAIL)); - OMPI_PTL_ELAN_CHECK_UNEX(rail, NULL, OMPI_ERROR, 0); + OMPI_PTL_ELAN_CHECK_UNEX (rail, NULL, OMPI_ERROR, 0); memset (rail, 0, sizeof (RAIL)); rail->r_ctx = elan4_init (rails[i]); - OMPI_PTL_ELAN_CHECK_UNEX(rail->r_ctx, NULL, OMPI_ERROR, 0); + OMPI_PTL_ELAN_CHECK_UNEX (rail->r_ctx, NULL, OMPI_ERROR, 0); rail->r_sdram = elan4_open_sdram (rails[i], 0, alloc_elansize); - OMPI_PTL_ELAN_CHECK_UNEX(rail->r_sdram, NULL, OMPI_ERROR, 0); + OMPI_PTL_ELAN_CHECK_UNEX (rail->r_sdram, NULL, OMPI_ERROR, 0); - rail->r_alloc = elan4_createAllocator (ems->main_size, - rail->r_sdram, 0, ems->elan_size); - OMPI_PTL_ELAN_CHECK_UNEX(rail->r_alloc, NULL, OMPI_ERROR, 0); + rail->r_alloc = elan4_createAllocator (ems->main_size, + rail->r_sdram, 0, + ems->elan_size); + OMPI_PTL_ELAN_CHECK_UNEX (rail->r_alloc, NULL, OMPI_ERROR, 0); if (elan4_set_standard_mappings (rail->r_ctx) < 0 || elan4_set_required_mappings (rail->r_ctx) < 0) { @@ -434,46 +418,42 @@ ompi_mca_ptl_elan_init (mca_ptl_elan_module_1_0_0_t * emp) return OMPI_ERROR; } -#if 0 /* Is this only needed for TPORT support? */ - if (elan4_register_trap_handler(rail->r_ctx, UTS_UNIMP_INSTR, - UTS_TPROC, elan_unimp_handler, NULL) < 0) { - ompi_output(0, "elan_init(%d): Failed elan4_register_unimp()", i); - return OMPI_ERROR; - } -#endif - /* Now allocate the SDRAM Elan control structure for this rail */ - estate = ems->all_estates[i] = elan4_allocElan (rail->r_alloc, - ELAN_ALIGN, sizeof (ELAN_EPRIVSTATE)); - OMPI_PTL_ELAN_CHECK_UNEX(estate, NULL, OMPI_ERROR, 0); + estate = ems->all_estates[i] = elan4_allocElan (rail->r_alloc, + ELAN_ALIGN, + sizeof + (ELAN_EPRIVSTATE)); + OMPI_PTL_ELAN_CHECK_UNEX (estate, NULL, OMPI_ERROR, 0); priv_estate = (ELAN_EPRIVSTATE *) estate; memset (priv_estate, 0, sizeof (ELAN_EPRIVSTATE)); /* Allocate a command port for non sten functions etc */ rail->r_cmdq = elan4_alloc_cmdq (rail->r_ctx, - rail->r_alloc, - CQ_Size8K, - CQ_ModifyEnableBit | CQ_WriteEnableBit | CQ_WaitEventEnableBit - | CQ_SetEventEnableBit | CQ_ThreadStartEnableBit, - NULL); - OMPI_PTL_ELAN_CHECK_UNEX(rail->r_cmdq, NULL, OMPI_ERROR, 0); + rail->r_alloc, + CQ_Size8K, + CQ_ModifyEnableBit | + CQ_WriteEnableBit | + CQ_WaitEventEnableBit | + CQ_SetEventEnableBit | + CQ_ThreadStartEnableBit, NULL); + OMPI_PTL_ELAN_CHECK_UNEX (rail->r_cmdq, NULL, OMPI_ERROR, 0); /* Allocate a command port for thread rescheduling etc */ rail->r_ecmdq = elan4_alloc_cmdq (rail->r_ctx, rail->r_alloc, - CQ_Size8K, CQ_EnableAllBits, NULL); - OMPI_PTL_ELAN_CHECK_UNEX(rail->r_ecmdq, NULL, OMPI_ERROR, 0); + CQ_Size8K, CQ_EnableAllBits, + NULL); + OMPI_PTL_ELAN_CHECK_UNEX (rail->r_ecmdq, NULL, OMPI_ERROR, 0); priv_estate->cport = MAIN2ELAN (rail->r_ctx, rail->r_ecmdq->cmdq_mapping); - - /* save the rail pointers */ + + /* Save the rail pointers */ ems->elan_rail[i] = (ELAN_RAIL *) rail; ems->rail_intcookie[i] = ems->intcookie; /* Allocate a Sleep Desc */ - - es = ompi_init_elan_sleepdesc(ems, rail); + es = ompi_init_elan_sleepdesc (ems, rail); /* XXX: put a lock and hold a lock */ es->es_next = rail->r_sleepDescs; @@ -492,7 +472,7 @@ ompi_mca_ptl_elan_init (mca_ptl_elan_module_1_0_0_t * emp) /*ompi_elan_railtable_t *rt; */ struct railtable *rt; rt = (struct railtable *) malloc (sizeof (struct railtable)); - OMPI_PTL_ELAN_CHECK_UNEX(rt, NULL, OMPI_ERROR, 0); + OMPI_PTL_ELAN_CHECK_UNEX (rt, NULL, OMPI_ERROR, 0); memset (rt, 0, sizeof (struct railtable)); rt->rt_nrails = 1; @@ -539,8 +519,8 @@ ompi_mca_ptl_elan_init (mca_ptl_elan_module_1_0_0_t * emp) int ompi_mca_ptl_elan_finalize (mca_ptl_elan_module_1_0_0_t * emp) { - int i; - int num_rails; + int i; + int num_rails; mca_ptl_elan_state_t *ems; ems = &mca_ptl_elan_global_state; @@ -548,44 +528,37 @@ ompi_mca_ptl_elan_finalize (mca_ptl_elan_module_1_0_0_t * emp) ompi_module_elan_close_ptls (emp, num_rails); ompi_module_elan_close_procs (emp, num_rails); - - /* Cleanup the global state - * Free per rail structures, then the references to them */ for (i = 0; i < num_rails; i++) { RAIL *rail; - rail = ems->all_rails[i]; + rail = ems->all_rails[i]; - free (rail->r_railTable); + free (rail->r_railTable); - /* Free the memory from the rail allocator */ - elan4_freeMain(rail->r_alloc, rail->r_ecmdq); - elan4_freeMain(rail->r_alloc, rail->r_cmdq); - elan4_freeElan(rail->r_alloc, ems->all_estates[i]); - - /* Free cookie value, Destroy allocator and SDRAM handler and - then close device */ + /* Free the memory from the rail allocator */ + elan4_freeMain (rail->r_alloc, rail->r_ecmdq); + elan4_freeMain (rail->r_alloc, rail->r_cmdq); + elan4_freeElan (rail->r_alloc, ems->all_estates[i]); /* Since the cookie allocated from rail[0], be consistent here */ - elan4_free_intcookie (ems->all_rails[0]->r_ctx, - ems->rail_intcookie[i]); + elan4_free_intcookie (ems->all_rails[0]->r_ctx, + ems->rail_intcookie[i]); - elan4_destroyAllocator (rail->r_alloc); - elan4_close_sdram (rail->r_sdram); + elan4_destroyAllocator (rail->r_alloc); + elan4_close_sdram (rail->r_sdram); - /*elan4_fini (rail->r_ctx); Not working yet */ + /*elan4_fini (rail->r_ctx); Not working yet */ - /* Free the rail structure, why two pointers are used to - * point to the same RAIL, all_rails and elan_rails */ - /*free (ems->elan_rail[i]);*/ + /* Free the rail structure used one the array of pointers + * to the RAILs, either all_rails for elan_rails */ free (ems->all_rails[i]); } - free(ems->elan_rail); - free(ems->all_estates); - free(ems->all_rails); - free(ems->elan_cap); + free (ems->elan_rail); + free (ems->all_estates); + free (ems->all_rails); + free (ems->elan_cap); return (OMPI_SUCCESS); } diff --git a/src/mca/ptl/elan/src/ptl_elan_module.c b/src/mca/ptl/elan/src/ptl_elan_module.c index efdef1df95..0684f56420 100644 --- a/src/mca/ptl/elan/src/ptl_elan_module.c +++ b/src/mca/ptl/elan/src/ptl_elan_module.c @@ -94,19 +94,21 @@ mca_ptl_elan_module_open (void) mca_ptl_elan.super.ptl_exclusivity = mca_ptl_elan_param_register_int ("exclusivity", 0); mca_ptl_elan.super.ptl_first_frag_size = - mca_ptl_elan_param_register_int ("first_frag_size", 2048/*magic*/); + mca_ptl_elan_param_register_int ("first_frag_size", + (2048 - sizeof(mca_ptl_base_header_t))/*magic*/); mca_ptl_elan.super.ptl_min_frag_size = - mca_ptl_elan_param_register_int ("min_frag_size", 320); + mca_ptl_elan_param_register_int ("min_frag_size", + (2048 - sizeof(mca_ptl_base_header_t))/*magic*/); mca_ptl_elan.super.ptl_max_frag_size = - mca_ptl_elan_param_register_int ("max_frag_size", -1); + mca_ptl_elan_param_register_int ("max_frag_size", 2<<30); /* register ELAN module parameters */ elan_mp->elan_free_list_num = - mca_ptl_elan_param_register_int ("free_list_num", 64); + mca_ptl_elan_param_register_int ("free_list_num", 32); elan_mp->elan_free_list_max = - mca_ptl_elan_param_register_int ("free_list_max", -1); + mca_ptl_elan_param_register_int ("free_list_max", 1024); elan_mp->elan_free_list_inc = - mca_ptl_elan_param_register_int ("free_list_inc", 64); + mca_ptl_elan_param_register_int ("free_list_inc", 32); /* initialize state */ elan_mp->elan_ptls = NULL; @@ -115,14 +117,13 @@ mca_ptl_elan_module_open (void) /* initialize list */ OBJ_CONSTRUCT (&elan_mp->elan_reqs, ompi_list_t); - OBJ_CONSTRUCT (&elan_mp->elan_prog_events, ompi_list_t); - OBJ_CONSTRUCT (&elan_mp->elan_comp_events, ompi_list_t); OBJ_CONSTRUCT (&elan_mp->elan_procs, ompi_list_t); OBJ_CONSTRUCT (&elan_mp->elan_pending_acks, ompi_list_t); + OBJ_CONSTRUCT (&elan_mp->elan_recv_frags, ompi_list_t); /* initialize free list */ - OBJ_CONSTRUCT (&elan_mp->elan_events_free, ompi_free_list_t); - OBJ_CONSTRUCT (&elan_mp->elan_reqs, ompi_free_list_t); + OBJ_CONSTRUCT (&elan_mp->elan_reqs_free, ompi_free_list_t); + OBJ_CONSTRUCT (&elan_mp->elan_recv_frags_free, ompi_free_list_t); /* initialize other objects */ OBJ_CONSTRUCT (&elan_mp->elan_lock, ompi_mutex_t); @@ -161,24 +162,27 @@ mca_ptl_elan_module_close (void) elan_mp->elan_reqs_free.super.ompi_list_length); } - if (elan_mp->elan_events_free.fl_num_allocated != - elan_mp->elan_events_free.super.ompi_list_length) { - ompi_output (0, "elan events: %d allocated %d returned\n", - elan_mp->elan_reqs_free.fl_num_allocated, - elan_mp->elan_reqs_free.super.ompi_list_length); + if (elan_mp->elan_recv_frags_free.fl_num_allocated != + elan_mp->elan_recv_frags_free.super.ompi_list_length) { + ompi_output (0, "elan requests: %d allocated %d returned\n", + elan_mp->elan_recv_frags_free.fl_num_allocated, + elan_mp->elan_recv_frags_free.super.ompi_list_length); } + /* FIXME: free free list entries before destructing lists */ /* Free the empty list holders */ OBJ_DESTRUCT (&(elan_mp->elan_reqs)); - OBJ_DESTRUCT (&(elan_mp->elan_prog_events)); - OBJ_DESTRUCT (&(elan_mp->elan_comp_events)); OBJ_DESTRUCT (&(elan_mp->elan_procs)); OBJ_DESTRUCT (&(elan_mp->elan_pending_acks)); + OBJ_DESTRUCT (&(elan_mp->elan_recv_frags)); - /* Destruct the free lists */ - OBJ_DESTRUCT (&(elan_mp->elan_events_free)); + /* TODO: + * We need free all the memory allocated for this list + * before desctructing this free_list */ + OBJ_DESTRUCT (&(elan_mp->elan_reqs_free)); + OBJ_DESTRUCT (&(elan_mp->elan_recv_frags_free)); /* Destruct other structures */ OBJ_DESTRUCT (&elan_mp->elan_lock); @@ -198,15 +202,14 @@ mca_ptl_elan_module_init (int *num_ptls, bool * have_hidden_threads) { mca_ptl_t **ptls; - int rc; *num_ptls = 0; + + /* TODO: support multiple threads */ + *allow_multi_user_threads = true; *have_hidden_threads = OMPI_HAVE_THREADS; - /* Leave the thread related setting to PML:PTL(TCP) to decide */ - - /* initialize free lists */ ompi_free_list_init (&(elan_mp->elan_reqs_free), sizeof (mca_ptl_elan_send_request_t), OBJ_CLASS (mca_ptl_elan_send_request_t), @@ -214,15 +217,13 @@ mca_ptl_elan_module_init (int *num_ptls, elan_mp->elan_free_list_max, elan_mp->elan_free_list_inc, NULL); - ompi_free_list_init (&elan_mp->elan_events_free, - sizeof (mca_ptl_elan_send_frag_t), - OBJ_CLASS (mca_ptl_elan_send_frag_t), + ompi_free_list_init (&(elan_mp->elan_recv_frags_free), + sizeof (mca_ptl_elan_recv_frag_t), + OBJ_CLASS (mca_ptl_elan_recv_frag_t), elan_mp->elan_free_list_num, elan_mp->elan_free_list_max, elan_mp->elan_free_list_inc, NULL); - /* use default allocator */ - /* open basic elan device */ if (OMPI_SUCCESS != ompi_mca_ptl_elan_init(&mca_ptl_elan_module)) { ompi_output(0, @@ -231,14 +232,11 @@ mca_ptl_elan_module_init (int *num_ptls, return NULL; } - /* - * we need to publish some information for elan. - */ if (OMPI_SUCCESS != mca_ptl_elan_module_register(&mca_ptl_elan_module)) { ompi_output(0, "[%s:%d] error in malloc for elan PTL references\n", __FILE__, __LINE__); - return OMPI_ERROR; + return NULL; } ptls = (mca_ptl_t **) malloc (elan_mp->elan_num_ptls * @@ -250,7 +248,6 @@ mca_ptl_elan_module_init (int *num_ptls, return NULL; } - /* Will coherency on two replicas be a potential problem? */ memcpy (ptls, elan_mp->elan_ptls, elan_mp->elan_num_ptls * sizeof (mca_ptl_elan_t *)); *num_ptls = elan_mp->elan_num_ptls; @@ -259,28 +256,14 @@ mca_ptl_elan_module_init (int *num_ptls, return ptls; } -/* - * FIXME: to support ELAN module control - */ - +/* support ELAN module control */ int mca_ptl_elan_module_control (int param, void *value, size_t size) { switch (param) { -#if 0 case MCA_PTL_ENABLE: - if (*(int *) value) { - /* Trying to trigger the thread progress engine, - * Here the elan PTL does not have this capability - * for now. So we skip this function. */ - ompi_event_add (&elan_mp->elan_recv_event, 0); - } else { - ompi_event_del (&elan_mp->elan_recv_event); - } - break; -#endif default: break; } @@ -288,13 +271,12 @@ mca_ptl_elan_module_control (int param, } -/* - * FIXME: to support event-based module progress. - */ +/* TODO: to support event-based module progress later. */ int mca_ptl_elan_module_progress (mca_ptl_tstamp_t tstamp) { + mca_ptl_elan_drain_recv(elan_mp); + mca_ptl_elan_update_send(elan_mp); return OMPI_SUCCESS; } - diff --git a/src/mca/ptl/elan/src/ptl_elan_peer.h b/src/mca/ptl/elan/src/ptl_elan_peer.h index 8647628017..9b005385ff 100644 --- a/src/mca/ptl/elan/src/ptl_elan_peer.h +++ b/src/mca/ptl/elan/src/ptl_elan_peer.h @@ -22,27 +22,30 @@ typedef enum { MCA_PTL_ELAN_FAILED, NUM_MCA_PTL_ELAN_STAT } mca_ptl_elan_status_t; + + /** * An abstraction that represents a connection to a peer process. - * Peers are always connected unless they are in different LAN or died. */ struct mca_ptl_elan_peer_t { ompi_list_item_t super; - struct mca_ptl_elan_t* peer_ptl; + struct mca_ptl_elan_t* peer_ptl; + /*struct mca_ptl_elan_remote_t *peer_ptl; */ struct mca_ptl_elan_proc_t* peer_proc; - struct mca_ptl_elan_addr_t* peer_addr; /**< address of peer */ + struct mca_ptl_elan_addr_t* peer_addr; /**< address of peer */ - int resending; /* A resending stage, no more new dma's */ - int num_resend; /* How many times I have retried */ - double open_time; - double close_time; + int peer_state; + int num_credits; + int max_credits; + int resending; + int num_resend; double known_alive_time; }; typedef struct mca_ptl_elan_peer_t mca_ptl_elan_peer_t; -extern ompi_class_t mca_ptl_elan_peer_t_class; +OBJ_CLASS_DECLARATION(mca_ptl_elan_peer_t); #endif diff --git a/src/mca/ptl/elan/src/ptl_elan_priv.c b/src/mca/ptl/elan/src/ptl_elan_priv.c index e69de29bb2..76d78dfb8b 100644 --- a/src/mca/ptl/elan/src/ptl_elan_priv.c +++ b/src/mca/ptl/elan/src/ptl_elan_priv.c @@ -0,0 +1,331 @@ +#include +#include +#include +#include "types.h" +#include "datatype/datatype.h" +#include "mca/pml/base/pml_base_sendreq.h" +#include "mca/pml/base/pml_base_recvreq.h" +#include "ptl_elan.h" +#include "ptl_elan_peer.h" +#include "ptl_elan_proc.h" +#include "ptl_elan_frag.h" +#include "ptl_elan_req.h" +#include "ptl_elan_priv.h" + +static void +mca_ptl_elan_init_qdma_desc (struct ompi_ptl_elan_qdma_desc_t *desc, + mca_ptl_elan_t * ptl, + mca_ptl_elan_send_request_t * req) +{ + char *app_buff; + int header_length; + int mesg_length; + int flags = 0; /* FIXME: now */ + mca_ptl_base_header_t *hdr; + + int destvp = ptl->elan_vp; + + /* TODO: For now, assume data are contiguous and less than eager size */ + app_buff = (char *) req->super.super.req_addr; + + header_length = sizeof (mca_ptl_base_match_header_t); + mesg_length = req->super.req_bytes_packed; + + hdr = (mca_ptl_base_header_t *) & desc->buff[0]; + + hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_MATCH; + hdr->hdr_common.hdr_flags = flags; + hdr->hdr_common.hdr_size = sizeof (mca_ptl_base_match_header_t); + hdr->hdr_frag.hdr_frag_offset = 0; + hdr->hdr_frag.hdr_frag_seq = 0; + hdr->hdr_frag.hdr_src_ptr.pval = 0; + hdr->hdr_frag.hdr_dst_ptr.lval = 0; + + hdr->hdr_match.hdr_contextid = req->super.super.req_comm->c_contextid; + hdr->hdr_match.hdr_src = req->super.super.req_comm->c_my_rank; + hdr->hdr_match.hdr_dst = req->super.super.req_peer; + hdr->hdr_match.hdr_tag = req->super.super.req_tag; + hdr->hdr_match.hdr_msg_length = mesg_length; + hdr->hdr_match.hdr_msg_seq = req->super.super.req_sequence; + hdr->hdr_frag.hdr_frag_length = mesg_length; + + /* Fill up all the necessary fields */ + memcpy (&desc->buff[header_length], app_buff, mesg_length); + desc->main_dma.dma_srcAddr = MAIN2ELAN (desc->rail->r_ctx, + &desc->buff[0]); + + /* XXXX Hardwired DMA retry count */ + desc->main_dma.dma_typeSize = E4_DMA_TYPE_SIZE ((header_length + + mesg_length), + DMA_DataTypeByte, + DMA_QueueWrite, 16); + desc->main_dma.dma_cookie = + elan4_local_cookie (ptl->queue->tx_cpool, E4_COOKIE_TYPE_LOCAL_DMA, + destvp); + desc->main_dma.dma_vproc = destvp; + + ompi_output (0, + "elan_queueTx(%p): DMA: typeSize %Lx vproc %lx srcAddr %Lx " + "dstAddr %Lx srcEvent %Lx dstEvent %Lx\n", + desc->rail, + desc->main_dma.dma_typeSize, + desc->main_dma.dma_vproc, + desc->main_dma.dma_srcAddr, + desc->main_dma.dma_dstAddr, + desc->main_dma.dma_srcEvent, desc->main_dma.dma_dstEvent); + + /* Make main memory coherent with IO domain (IA64) */ + MEMBAR_VISIBLE (); +} + + +int +mca_ptl_elan_start_desc (int type, + mca_ptl_elan_desc_item_t * desc) +{ + mca_ptl_elan_t *ptl; + mca_ptl_elan_send_request_t *req; + + if (type == MCA_PTL_ELAN_QDMA_DESC) { + struct ompi_ptl_elan_qdma_desc_t *qdma; + + qdma = desc->item.qdma; + ptl = qdma->ptl; + req = qdma->req; + + mca_ptl_elan_init_qdma_desc (qdma, ptl, req); + elan4_run_dma_cmd (ptl->queue->tx_cmdq, (DMA *) & qdma->main_dma); + ptl->queue->tx_cmdq->cmdq_flush (ptl->queue->tx_cmdq); + ompi_output (0, "elan_queueTx(%p) returning %p\n", + ptl->queue, desc); + + /* Insert desc into the list of outstanding DMA's */ + ompi_list_append (&ptl->queue->tx_desc, (ompi_list_item_t *) desc); + + } else { + ompi_output (0, + "Other types of DMA are not supported right now \n"); + return OMPI_ERROR; + } + return OMPI_SUCCESS; +} + + +static void +mca_ptl_elan_data_frag (struct mca_ptl_elan_t *ptl, + mca_ptl_base_header_t * header) +{ + /* Allocate a recv frag descriptor */ + mca_ptl_elan_recv_frag_t *recv_frag; + ompi_list_item_t *item; + mca_pml_base_recv_request_t *request; + + int rc; + + OMPI_FREE_LIST_GET (&mca_ptl_elan_module.elan_recv_frags_free, + item, rc); + + if (OMPI_SUCCESS != rc) { + ompi_output (0, + "[%s:%d] Unable to allocate a recv fragment", + __FILE__, __LINE__); + return; + /* TODO: progress the recv state machine */ + } + + recv_frag = (mca_ptl_elan_recv_frag_t *) item; + + recv_frag->super.super.frag_owner = (mca_ptl_t *) ptl; + recv_frag->super.super.frag_addr = NULL; + recv_frag->super.super.frag_size = 0; + recv_frag->super.super.frag_peer = NULL; /* FIXME: peer; */ + recv_frag->super.frag_request = 0; + recv_frag->super.frag_is_buffered = false; + recv_frag->frag_hdr_cnt = 0; + recv_frag->frag_msg_cnt = 0; + + /* Take the header */ + recv_frag->super.super.frag_header = *header; + + /* match with preposted requests */ + if (mca_ptl_base_recv_frag_match (recv_frag->super.super.frag_owner, + &recv_frag->super, + &recv_frag->super.super.frag_header. + hdr_match)) { + /* copy into the request buffer */ + request = recv_frag->super.frag_request; + memcpy (request->super.req_addr, + (char *) header + sizeof (mca_ptl_base_header_t), + header->hdr_frag.hdr_frag_length); + } else { + recv_frag->super.frag_is_buffered = true; + recv_frag->super.super.frag_addr = recv_frag->unex_buff; + recv_frag->super.super.frag_size = + header->hdr_frag.hdr_frag_length; + memcpy (recv_frag->unex_buff, + (char *) header + sizeof (mca_ptl_base_header_t), + header->hdr_frag.hdr_frag_length); + } + + /* Complete the fragment */ + if (NULL != recv_frag->super.frag_request) { + mca_ptl_base_recv_progress_fn_t progress; + + progress = recv_frag->super.super.frag_owner->ptl_recv_progress; + request = recv_frag->super.frag_request; + + /* progress the request */ + progress (recv_frag->super.super.frag_owner, request, + &recv_frag->super); + mca_ptl_elan_recv_frag_return (recv_frag->super.super.frag_owner, + recv_frag); + + } +} + +static void +mca_ptl_elan_ctrl_frag (struct mca_ptl_elan_t *ptl, + mca_ptl_base_header_t * header) +{ + /* TODO: + * 0) First of all, no need to allocate frag descriptors, + * since control packet does not contain data. + * 1) Start successive fragments if it is an ACK + * 2) Resend the original fragment if it is a NACK + * 3) Update the request if no more fragment. + */ +} + +int +mca_ptl_elan_drain_recv (mca_ptl_elan_module_1_0_0_t * emp) +{ + struct ompi_ptl_elan_queue_ctrl_t *queue; + ompi_ptl_elan_recv_queue_t *rxq; + struct mca_ptl_elan_t *ptl; + ELAN_CTX *ctx; + + int num_ptls; + int i; + int rc; + + num_ptls = emp->elan_num_ptls; + + /* Iterate over all the PTL input Queues */ + for (i = 0; i < num_ptls; i++) { + ptl = emp->elan_ptls[i]; + queue = emp->elan_ptls[i]->queue; + rxq = queue->rxq; + ctx = ptl->ptl_elan_ctx; + + OMPI_LOCK (&queue->rx_lock); + + rc = elan4_pollevent_word (ctx, &rxq->qr_doneWord, 1); + + if (rc) { + + mca_ptl_base_header_t *header; + + header = (mca_ptl_base_header_t *) rxq->qr_fptr; + + switch (header->hdr_common.hdr_type) { + case MCA_PTL_HDR_TYPE_MATCH: + case MCA_PTL_HDR_TYPE_FRAG: + /* a data fragment */ + mca_ptl_elan_data_frag (ptl, header); + break; + case MCA_PTL_HDR_TYPE_ACK: + case MCA_PTL_HDR_TYPE_NACK: + /* a control fragment for a message */ + mca_ptl_elan_ctrl_frag (ptl, header); + break; + default: + ompi_output (0, "[%s:%d] unknow fragment type %d\n" + __FILE__, __LINE__, + header->hdr_common.hdr_type); + break; + } + + /* Work out the new front pointer */ + if (rxq->qr_fptr == rxq->qr_top) { + rxq->qr_fptr = rxq->qr_base; + rxq->qr_efptr = rxq->qr_efitem; + } else { + rxq->qr_fptr = (void *) ((uintptr_t) rxq->qr_fptr + + queue->rx_slotsize); + rxq->qr_efptr += queue->rx_slotsize; + } + + /* PCI Write */ + queue->input->q_fptr = rxq->qr_efptr; + MEMBAR_STORESTORE (); + + /* Reset the event */ + RESETEVENT_WORD (&rxq->qr_doneWord); + + /* Order RESETEVENT wrt to wait_event_cmd */ + MEMBAR_STORESTORE (); + + /* Re-prime queue event by issuing a waitevent(1) on it */ + elan4_wait_event_cmd (rxq->qr_cmdq, + /* Is qr_elanDone really a main memory address? */ + MAIN2ELAN (ctx, &rxq->qr_elanDone), + E4_EVENT_INIT_VALUE (-32, E4_EVENT_WRITE, + E4_EVENT_DTYPE_LONG, 0), + MAIN2ELAN (ctx, (void *) &rxq-> qr_doneWord), + 0xfeedfacedeadbeef); + rxq->qr_cmdq->cmdq_flush (rxq->qr_cmdq); + + } + OMPI_UNLOCK (&queue->rx_lock); + } + + return OMPI_SUCCESS; +} + +int +mca_ptl_elan_update_send (mca_ptl_elan_module_1_0_0_t * emp) +{ + struct mca_ptl_elan_t *ptl; + ompi_ptl_elan_queue_ctrl_t *queue; + mca_ptl_elan_desc_item_t *desc; + ELAN4_CTX *ctx; + + int num_ptls; + int i; + + num_ptls = emp->elan_num_ptls; + + /* Update the send request if any of send's is completed */ + for (i = 0; i < num_ptls; i++) { + ptl = emp->elan_ptls[i]; + queue = ptl->queue; + ctx = ptl->ptl_elan_ctx; + + do { + desc = (mca_ptl_elan_desc_item_t *) + ompi_list_get_first (&queue->tx_desc); +#if 1 + if ((int *) (&desc->item.qdma->main_doneWord)) +#else + /* Poll the completion event for 1usec */ + if (elan4_pollevent_word + (ctx, desc->item.qdma->main_doneWord, 1)) +#endif + { + mca_ptl_elan_send_request_t *req; + /* Remove the desc, update the request, put back to free list */ + desc = (mca_ptl_elan_desc_item_t *) + ompi_list_remove_first (&queue->tx_desc); + req = desc->item.qdma->req; + req->super.super.req_mpi_done = true; + req->super.super.req_pml_done = true; + OMPI_FREE_LIST_RETURN (&queue->tx_desc_free, + (ompi_list_item_t *) desc); + } else { + break; + } + } while (ompi_list_get_size (&queue->tx_desc) > 0); + } + + return OMPI_SUCCESS; +} diff --git a/src/mca/ptl/elan/src/ptl_elan_priv.h b/src/mca/ptl/elan/src/ptl_elan_priv.h index 0d384bab9b..5e0fc9fe6c 100644 --- a/src/mca/ptl/elan/src/ptl_elan_priv.h +++ b/src/mca/ptl/elan/src/ptl_elan_priv.h @@ -34,7 +34,7 @@ #include #include - + #include #include "misc_sys.h" #include "init_sys.h" @@ -54,76 +54,79 @@ * Structure used to publish elan information to peers. */ struct mca_ptl_elan_addr_t { - int elan_vp; /* Right now only elan_vp is needed */ - int addr_inuse; + int elan_vp; /* Right now only elan_vp is needed */ + int addr_inuse; }; typedef struct mca_ptl_elan_addr_t mca_ptl_elan_addr_t; -struct ompi_ptl_elan_recv_queue_t -{ +struct ompi_ptl_elan_recv_queue_t { /* Events needs to be aligned */ - EVENT_WORD qr_doneWord; - ADDR_SDRAM qr_qEvent; - EVENT32 *qr_elanDone; + EVENT_WORD qr_doneWord; + ADDR_SDRAM qr_qEvent; + EVENT32 *qr_elanDone; /* The one don't care */ - E4_uint64 qr_efitem; - E4_uint64 qr_efptr; - E4_uint64 qr_elitem; - void *qr_base; - void *qr_fptr; - void *qr_top; + E4_uint64 qr_efitem; + E4_uint64 qr_efptr; + E4_uint64 qr_elitem; + void *qr_base; + void *qr_fptr; + void *qr_top; - E4_CmdQ *qr_cmdq; - ELAN_SLEEP *qr_es; - RAIL *qr_rail; + E4_CmdQ *qr_cmdq; + ELAN_SLEEP *qr_es; + RAIL *qr_rail; }; -typedef struct ompi_ptl_elan_recv_queue_t ompi_ptl_elan_recv_queue_t; +typedef struct ompi_ptl_elan_recv_queue_t ompi_ptl_elan_recv_queue_t; -typedef struct -{ +typedef struct { /* SHOULD BE 128-byte aligned */ - uint8_t data[INPUT_QUEUE_MAX]; /* queue req data packet */ + uint8_t data[INPUT_QUEUE_MAX]; /* queue req data packet */ /* SHOULD be 32-byte aligned */ - E4_Event32 event32; /* Local elan completion event */ + E4_Event32 event32; /* Local elan completion event */ } ompi_elan_event_t; -struct ompi_ptl_elan_queue_send_t -{ - E4_DMA64 main_dma; /**< Must be 8-byte aligned */ +struct ompi_ptl_elan_qdma_desc_t { + E4_DMA64 main_dma; /**< Must be 8-byte aligned */ /* 8 byte aligned */ + volatile E4_uint64 main_doneWord; /**< main memory location to poll */ ompi_elan_event_t *elan_data_event; /**< 128-byte aligned copy event */ - RAIL *rail; + RAIL *rail; /* 8 byte aligned */ - uint8_t buff[INPUT_QUEUE_MAX]; /**< queue data */ -}; -typedef struct ompi_ptl_elan_queue_send_t ompi_ptl_elan_queue_send_t; -struct ompi_ptl_elan_queue_ctrl_t -{ + mca_ptl_elan_t *ptl; + mca_ptl_elan_send_request_t *req; + /* 8 byte aligned */ + + uint8_t buff[INPUT_QUEUE_MAX]; /**< queue data */ + /* 8 byte aligned */ +}; +typedef struct ompi_ptl_elan_qdma_desc_t ompi_ptl_elan_qdma_desc_t; + +struct ompi_ptl_elan_queue_ctrl_t { /* Transmit Queues */ /** < elan located INPUT_QUEUE_ALIGN'ed with INPUT_QUEUE_SIZE */ - E4_InputQueue *input; + E4_InputQueue *input; /** proc_ompi = NULL; proc->proc_addrs = NULL; @@ -38,12 +39,12 @@ void mca_ptl_elan_proc_construct (mca_ptl_elan_proc_t * proc) proc->proc_guid.jobid = 0; proc->proc_guid.procid = 0; - OBJ_CONSTRUCT(&proc->proc_lock, ompi_mutex_t); + OBJ_CONSTRUCT (&proc->proc_lock, ompi_mutex_t); /* add to list of all proc instance */ - OMPI_THREAD_LOCK(&mca_ptl_elan_module.elan_lock); - ompi_list_append(&mca_ptl_elan_module.elan_procs, &proc->super); - OMPI_THREAD_UNLOCK(&mca_ptl_elan_module.elan_lock); + OMPI_THREAD_LOCK (&mca_ptl_elan_module.elan_lock); + ompi_list_append (&mca_ptl_elan_module.elan_procs, &proc->super); + OMPI_THREAD_UNLOCK (&mca_ptl_elan_module.elan_lock); return; } @@ -53,16 +54,17 @@ void mca_ptl_elan_proc_construct (mca_ptl_elan_proc_t * proc) * Cleanup elan proc instance */ -void mca_ptl_elan_proc_destruct (mca_ptl_elan_proc_t * proc) +void +mca_ptl_elan_proc_destruct (mca_ptl_elan_proc_t * proc) { /* remove from list of all proc instances */ - OMPI_THREAD_LOCK(&mca_ptl_elan_module.elan_lock); - ompi_list_remove_item(&mca_ptl_elan_module.elan_procs, &proc->super); - OMPI_THREAD_UNLOCK(&mca_ptl_elan_module.elan_lock); + OMPI_THREAD_LOCK (&mca_ptl_elan_module.elan_lock); + ompi_list_remove_item (&mca_ptl_elan_module.elan_procs, &proc->super); + OMPI_THREAD_UNLOCK (&mca_ptl_elan_module.elan_lock); /* release resources */ - if(NULL != proc->proc_peers) - free(proc->proc_peers); + if (NULL != proc->proc_peers) + free (proc->proc_peers); return; } @@ -75,47 +77,55 @@ void mca_ptl_elan_proc_destruct (mca_ptl_elan_proc_t * proc) * addresses) associated w/ a given destination on this datastructure. */ -mca_ptl_elan_proc_t *mca_ptl_elan_proc_create (ompi_proc_t * ompi_proc) +mca_ptl_elan_proc_t * +mca_ptl_elan_proc_create (ompi_proc_t * ompi_proc) { - int rc; - size_t size; - mca_ptl_elan_proc_t* ptl_proc; - - ptl_proc = mca_ptl_elan_proc_lookup_ompi(ompi_proc); - if(ptl_proc != NULL) + int rc; + size_t size; + mca_ptl_elan_proc_t *ptl_proc; + + ptl_proc = mca_ptl_elan_proc_lookup_ompi (ompi_proc); + if (ptl_proc != NULL) return ptl_proc; - ptl_proc = OBJ_NEW(mca_ptl_elan_proc_t); + ptl_proc = OBJ_NEW (mca_ptl_elan_proc_t); ptl_proc->proc_ompi = ompi_proc; ptl_proc->proc_guid = ompi_proc->proc_name; - rc = mca_base_modex_recv( &mca_ptl_elan_module.super.ptlm_version, - ompi_proc, (void**)&ptl_proc->proc_addrs, &size); + /* Extract exposed addresses from remote proc */ + rc = mca_base_modex_recv (&mca_ptl_elan_module.super.ptlm_version, + ompi_proc, (void **) &ptl_proc->proc_addrs, + &size); - if(rc != OMPI_SUCCESS) { - ompi_output(0, "[%s:%d] mca_base_modex_recv failed to recv data \n", - __FILE__, __LINE__); - OBJ_RELEASE(ptl_proc); + if (rc != OMPI_SUCCESS) { + ompi_output (0, + "[%s:%d] mca_base_modex_recv failed to recv data \n", + __FILE__, __LINE__); + OBJ_RELEASE (ptl_proc); return NULL; } - if(0 != (size % sizeof(mca_ptl_elan_addr_t))) { - ompi_output(0, "[%s:%d] invalid received data size %d\n", size); + if (0 != (size % sizeof (mca_ptl_elan_addr_t))) { + ompi_output (0, "[%s:%d] invalid received data size %d\n", + __FILE__, __LINE__, size); return NULL; } - ptl_proc->proc_addr_count = size / sizeof(mca_ptl_elan_addr_t); + ptl_proc->proc_addr_count = size / sizeof (mca_ptl_elan_addr_t); /* allocate space for peer array - one for each exported address */ - ptl_proc->proc_peers = (mca_ptl_elan_peer_t**) - malloc(ptl_proc->proc_addr_count * sizeof(mca_ptl_elan_peer_t*)); + ptl_proc->proc_peers = (mca_ptl_elan_peer_t **) + malloc (ptl_proc->proc_addr_count * + sizeof (mca_ptl_elan_peer_t *)); - if(NULL == ptl_proc->proc_peers) { - OBJ_RELEASE(ptl_proc); + if (NULL == ptl_proc->proc_peers) { + OBJ_RELEASE (ptl_proc); + ompi_output (0, "[%s:%d] unable to allocate peer procs \n" + __FILE__, __LINE__); return NULL; } - if(NULL == mca_ptl_elan_module.elan_local - && ompi_proc == ompi_proc_local()) { + if (NULL == mca_ptl_elan_module.elan_local + && ompi_proc == ompi_proc_local ()) { mca_ptl_elan_module.elan_local = ptl_proc; } return ptl_proc; @@ -126,24 +136,25 @@ mca_ptl_elan_proc_t *mca_ptl_elan_proc_create (ompi_proc_t * ompi_proc) * ompi_proc_t instance. */ static mca_ptl_elan_proc_t * -mca_ptl_elan_proc_lookup_ompi (ompi_proc_t *ompi_proc) +mca_ptl_elan_proc_lookup_ompi (ompi_proc_t * ompi_proc) { - mca_ptl_elan_proc_t* elan_proc; + mca_ptl_elan_proc_t *elan_proc; - OMPI_THREAD_LOCK(&mca_ptl_elan_module.elan_lock); + OMPI_THREAD_LOCK (&mca_ptl_elan_module.elan_lock); - elan_proc = (mca_ptl_elan_proc_t*) - ompi_list_get_first(&mca_ptl_elan_module.elan_procs); + elan_proc = (mca_ptl_elan_proc_t *) + ompi_list_get_first (&mca_ptl_elan_module.elan_procs); - for( ; elan_proc != (mca_ptl_elan_proc_t*) - ompi_list_get_end(&mca_ptl_elan_module.elan_procs); - elan_proc = (mca_ptl_elan_proc_t*)ompi_list_get_next(elan_proc)) { - if(elan_proc->proc_ompi == ompi_proc) { - OMPI_THREAD_UNLOCK(&mca_ptl_elan_module.elan_lock); + for (; elan_proc != (mca_ptl_elan_proc_t *) + ompi_list_get_end (&mca_ptl_elan_module.elan_procs); + elan_proc = + (mca_ptl_elan_proc_t *) ompi_list_get_next (elan_proc)) { + if (elan_proc->proc_ompi == ompi_proc) { + OMPI_THREAD_UNLOCK (&mca_ptl_elan_module.elan_lock); return elan_proc; } } - OMPI_THREAD_UNLOCK(&mca_ptl_elan_module.elan_lock); + OMPI_THREAD_UNLOCK (&mca_ptl_elan_module.elan_lock); return NULL; } @@ -153,7 +164,9 @@ mca_ptl_elan_proc_lookup_ompi (ompi_proc_t *ompi_proc) * Look for an existing ELAN process instance based on the globally unique * process identifier. */ -mca_ptl_elan_proc_t *mca_ptl_elan_proc_lookup (void *guid, size_t size) +mca_ptl_elan_proc_t * +mca_ptl_elan_proc_lookup (void *guid, + size_t size) { return NULL; } @@ -163,12 +176,13 @@ mca_ptl_elan_proc_t *mca_ptl_elan_proc_lookup (void *guid, size_t size) * Note that this routine must be called with the lock on the process already * held. Insert a ptl instance into the proc array and assign it an address. */ -int mca_ptl_elan_proc_insert (mca_ptl_elan_proc_t * ptl_proc, - mca_ptl_elan_peer_t * ptl_peer) +int +mca_ptl_elan_proc_insert (mca_ptl_elan_proc_t * ptl_proc, + mca_ptl_elan_peer_t * ptl_peer) { - int i; + int i; struct mca_ptl_elan_t *ptl_elan; - + ptl_elan = ptl_peer->peer_ptl; ptl_peer->peer_proc = ptl_proc; ptl_proc->proc_peers[ptl_proc->proc_peer_count++] = ptl_peer; @@ -177,24 +191,24 @@ int mca_ptl_elan_proc_insert (mca_ptl_elan_proc_t * ptl_proc, * directly attached network. If we don't find one, pick the first * unused address. */ - for(i=0; iproc_addr_count; i++) { + for (i = 0; i < ptl_proc->proc_addr_count; i++) { - unsigned vp_local; - unsigned vp_remote; - mca_ptl_elan_addr_t* peer_addr; + unsigned vp_local; + unsigned vp_remote; + mca_ptl_elan_addr_t *peer_addr; peer_addr = ptl_proc->proc_addrs + i; - if(peer_addr->addr_inuse != 0) { + if (peer_addr->addr_inuse != 0) { continue; } - vp_local = ptl_elan->elan_vp; + vp_local = ptl_elan->elan_vp; vp_remote = peer_addr->elan_vp; - if(vp_local = vp_remote) { + if (vp_local = vp_remote) { ptl_peer->peer_addr = peer_addr; break; - } else if(ptl_peer->peer_addr != 0) { + } else if (ptl_peer->peer_addr != 0) { ptl_peer->peer_addr = peer_addr; } } @@ -209,8 +223,9 @@ int mca_ptl_elan_proc_insert (mca_ptl_elan_proc_t * ptl_proc, * no longer in use. */ -int mca_ptl_elan_proc_remove (mca_ptl_elan_proc_t * ptl_proc, - mca_ptl_elan_peer_t * ptl_peer) +int +mca_ptl_elan_proc_remove (mca_ptl_elan_proc_t * ptl_proc, + mca_ptl_elan_peer_t * ptl_peer) { return OMPI_SUCCESS; } @@ -220,8 +235,10 @@ int mca_ptl_elan_proc_remove (mca_ptl_elan_proc_t * ptl_proc, * loop through all available PTLs for one matching the source address * of the request. */ -bool mca_ptl_elan_proc_accept (mca_ptl_elan_proc_t * ptl_proc, - struct sockaddr_in * addr, int sd) +bool +mca_ptl_elan_proc_accept (mca_ptl_elan_proc_t * ptl_proc, + struct sockaddr_in * addr, + int sd) { return false; } diff --git a/src/mca/ptl/elan/src/ptl_elan_req.c b/src/mca/ptl/elan/src/ptl_elan_req.c index e9c17e91fc..f6b66e100d 100644 --- a/src/mca/ptl/elan/src/ptl_elan_req.c +++ b/src/mca/ptl/elan/src/ptl_elan_req.c @@ -9,10 +9,22 @@ #include "ptl_elan_req.h" #include "ptl_elan.h" -static void -mca_ptl_elan_send_request_construct (mca_ptl_elan_send_request_t *); -static void -mca_ptl_elan_send_request_destruct (mca_ptl_elan_send_request_t *); +void +mca_ptl_elan_send_request_construct (mca_ptl_elan_send_request_t * request) +{ + OBJ_CONSTRUCT (&request->super, mca_pml_base_send_request_t); + request->desc_type = 0; + request->req_frag = NULL; +} + + +void +mca_ptl_elan_send_request_destruct (mca_ptl_elan_send_request_t * request) +{ + OBJ_DESTRUCT (&request->super); + request->desc_type = 0; + request->req_frag = NULL; +} ompi_class_t mca_ptl_elan_send_request_t_class = { "mca_ptl_elan_send_request_t", @@ -20,17 +32,3 @@ ompi_class_t mca_ptl_elan_send_request_t_class = { (ompi_construct_t) mca_ptl_elan_send_request_construct, (ompi_destruct_t) mca_ptl_elan_send_request_destruct }; - - -void -mca_ptl_elan_send_request_construct (mca_ptl_elan_send_request_t * request) -{ - OBJ_CONSTRUCT (&request->req_frag, mca_ptl_elan_send_frag_t); -} - - -void -mca_ptl_elan_send_request_destruct (mca_ptl_elan_send_request_t * request) -{ - OBJ_DESTRUCT (&request->req_frag); -} diff --git a/src/mca/ptl/elan/src/ptl_elan_req.h b/src/mca/ptl/elan/src/ptl_elan_req.h index 6c6b0a51c3..640b020395 100644 --- a/src/mca/ptl/elan/src/ptl_elan_req.h +++ b/src/mca/ptl/elan/src/ptl_elan_req.h @@ -25,20 +25,23 @@ #include "ptl_elan.h" #include "ptl_elan_frag.h" +enum { + MCA_PTL_ELAN_NULL_DESC, + MCA_PTL_ELAN_QDMA_DESC, + MCA_PTL_ELAN_PUTGET_DESC +}; + OBJ_CLASS_DECLARATION(mca_ptl_elan_send_request_t); OBJ_CLASS_DECLARATION(mca_ptl_elan_recv_request_t); -/*extern ompi_class_t mca_ptl_elan_send_request_t_class;*/ -/*extern ompi_class_t mca_ptl_elan_recv_request_t_class;*/ /** - * ELAN send request derived type. The send request contains both the - * base send request, and space for the first ELAN send fragment descriptor. - * This avoids the overhead of a second allocation for the initial send - * fragment on every send request. + * ELAN send request derived type. The send request contains + * the base send request and a point to the elan fragment descriptor */ struct mca_ptl_elan_send_request_t { - mca_pml_base_send_request_t super; - mca_ptl_elan_send_frag_t req_frag; /* first fragment */ + mca_pml_base_send_request_t super; + int desc_type; + mca_ptl_elan_desc_item_t *req_frag; }; typedef struct mca_ptl_elan_send_request_t mca_ptl_elan_send_request_t; #endif