diff --git a/src/mca/ptl/elan/src/ptl_elan.c b/src/mca/ptl/elan/src/ptl_elan.c index 076f9e6dd1..e9fce7449e 100644 --- a/src/mca/ptl/elan/src/ptl_elan.c +++ b/src/mca/ptl/elan/src/ptl_elan.c @@ -24,7 +24,7 @@ mca_ptl_elan_t mca_ptl_elan = { { &mca_ptl_elan_module.super, 4, - sizeof(mca_ptl_elan_desc_item_t), + sizeof(mca_ptl_elan_send_frag_t), 0, /* ptl_exclusivity */ 0, /* ptl_latency */ 0, /* ptl_bandwidth */ @@ -165,7 +165,7 @@ mca_ptl_elan_req_init (struct mca_ptl_t *ptl, struct mca_pml_base_send_request_t *request) { int rc = OMPI_SUCCESS; - mca_ptl_elan_desc_item_t *sd; + mca_ptl_elan_send_frag_t *sd; mca_ptl_elan_send_request_t * elan_req; START_FUNC(); @@ -233,7 +233,7 @@ mca_ptl_elan_isend (struct mca_ptl_t *ptl, int flags) { int rc = OMPI_SUCCESS; - mca_ptl_elan_desc_item_t *sd; + mca_ptl_elan_send_frag_t *sd; /* XXX: * PML extract an request from PTL module and then use this @@ -246,6 +246,18 @@ mca_ptl_elan_isend (struct mca_ptl_t *ptl, if (offset == 0) /* The first fragment uses a cached desc */ sd = ((mca_ptl_elan_send_request_t*)sendreq)->req_frag; } else { + + /* Get a frag desc and allocate a send desc */ + ompi_free_list_t * frag_list; + + frag_list = &mca_ptl_elan_module.elan_send_frags_free; + + /* More sendfrag then descritpors, no need to block */ + ompi_mutex_lock(&frag_list->fl_lock); + item = ompi_list_remove_first (&((flist)->super)); + ompi_mutex_unlock(&flist->fl_lock); + OMPI_PTL_ELAN_CHECK_UNEX (item, NULL, OMPI_ERROR, 0); + sd = mca_ptl_elan_alloc_send_desc(ptl, sendreq); if (NULL == sd) { ompi_output(0, diff --git a/src/mca/ptl/elan/src/ptl_elan.h b/src/mca/ptl/elan/src/ptl_elan.h index 49b91655db..a74d7a4f71 100644 --- a/src/mca/ptl/elan/src/ptl_elan.h +++ b/src/mca/ptl/elan/src/ptl_elan.h @@ -55,9 +55,10 @@ struct mca_ptl_elan_module_1_0_0_t { mca_ptl_base_module_1_0_0_t super; /**< base PTL module */ - int elan_free_list_num; /**< initial size of free lists */ - int elan_free_list_max; /**< maximum size of free lists */ - int elan_free_list_inc; /**< # to alloc when growing lists */ + size_t elan_free_list_num; /**< initial size of free lists */ + size_t elan_free_list_max; /**< maximum size of free lists */ + size_t elan_free_list_inc; /**< # to alloc when growing lists */ + size_t elan_num_ptls; /**< number of ptls activated */ /* * We create our own simplified structure for managing elan state @@ -66,16 +67,16 @@ struct mca_ptl_elan_module_1_0_0_t { */ struct mca_ptl_elan_state_t *elan_ctrl; struct mca_ptl_elan_t **elan_ptls; /**< array of available PTLs */ - size_t elan_num_ptls; /**< number of ptls activated */ - - ompi_list_t elan_procs; /**< elan proc's */ - ompi_list_t elan_recv_frags; - ompi_list_t elan_pending_acks; - ompi_free_list_t elan_recv_frags_free; - struct mca_ptl_elan_proc_t *elan_local; + ompi_mutex_t elan_lock; /**< lock for module state */ - ompi_mutex_t elan_lock; /**< lock for module state */ + ompi_list_t elan_procs; /**< elan proc's */ + ompi_list_t elan_send_frags; + ompi_list_t elan_pending_acks; + ompi_list_t elan_recv_frags; + + ompi_free_list_t elan_send_frags_free; + ompi_free_list_t elan_recv_frags_free; }; typedef struct mca_ptl_elan_module_1_0_0_t mca_ptl_elan_module_1_0_0_t; diff --git a/src/mca/ptl/elan/src/ptl_elan_comm_init.c b/src/mca/ptl/elan/src/ptl_elan_comm_init.c index 45e42ed68a..c77981fbc6 100644 --- a/src/mca/ptl/elan/src/ptl_elan_comm_init.c +++ b/src/mca/ptl/elan/src/ptl_elan_comm_init.c @@ -21,7 +21,7 @@ ompi_init_elan_queue_events (mca_ptl_elan_t * ptl, int main_align, main_size; int elan_align, elan_size; - mca_ptl_elan_desc_item_t *desc; + mca_ptl_elan_send_frag_t *desc; RAIL *rail; ELAN4_CTX *ctx; @@ -55,8 +55,8 @@ ompi_init_elan_queue_events (mca_ptl_elan_t * ptl, /* Allocate the elements */ - desc = (mca_ptl_elan_desc_item_t *) - malloc(sizeof(mca_ptl_elan_desc_item_t) * (count + 1)); + desc = (mca_ptl_elan_send_frag_t *) + malloc(sizeof(mca_ptl_elan_send_frag_t) * (count + 1)); OMPI_PTL_ELAN_CHECK_UNEX (desc, NULL, OMPI_ERROR, 0); ptr = (ompi_ptl_elan_qdma_desc_t *) elan4_allocMain (rail->r_alloc, @@ -158,7 +158,7 @@ ompi_init_elan_qdma (mca_ptl_elan_module_1_0_0_t * emp, OMPI_PTL_ELAN_CHECK_UNEX (queue->tx_cmdq, NULL, OMPI_ERROR, 0); /* - * Elan4 has a rather complicated hierarchical event mechanism. + * Elan4 has a hierarchical event mechanism. * It is easy to use but nontrivial to manipulate * We implement a simpler event control mechanism, which * should also provide us the capability to chain event, diff --git a/src/mca/ptl/elan/src/ptl_elan_frag.c b/src/mca/ptl/elan/src/ptl_elan_frag.c index 4adf078536..9b80d2f840 100644 --- a/src/mca/ptl/elan/src/ptl_elan_frag.c +++ b/src/mca/ptl/elan/src/ptl_elan_frag.c @@ -14,6 +14,26 @@ #include "ptl_elan_frag.h" #include "ptl_elan_priv.h" +static void +mca_ptl_elan_send_frag_construct (mca_ptl_elan_send_frag_t * frag) +{ + frag->frag_progressed = 0; + frag->desc = 0; +} + +static void +mca_ptl_elan_send_frag_destruct (mca_ptl_elan_send_frag_t * frag) +{ + /* Nothing to do then */ +} + +ompi_class_t mca_ptl_elan_send_frag_t_class = { + "mca_ptl_elan_send_frag_t", + OBJ_CLASS (mca_ptl_base_frag_t), + (ompi_construct_t) mca_ptl_elan_send_frag_construct, + (ompi_destruct_t) mca_ptl_elan_send_frag_destruct +}; + static void mca_ptl_elan_recv_frag_construct (mca_ptl_elan_recv_frag_t * frag) { @@ -34,8 +54,6 @@ mca_ptl_elan_recv_frag_construct (mca_ptl_elan_recv_frag_t * frag) static void mca_ptl_elan_recv_frag_destruct (mca_ptl_elan_recv_frag_t * frag) { - /* Does this destruct free the memory? since OBJ_DESTRUCT, - * works only for non-dynamically allocated objects */ frag->frag_hdr_cnt = 0; frag->frag_msg_cnt = 0; frag->frag_progressed = 0; @@ -55,7 +73,7 @@ ompi_class_t mca_ptl_elan_recv_frag_t_class = { extern mca_ptl_elan_state_t mca_ptl_elan_global_state; -mca_ptl_elan_desc_item_t * +mca_ptl_elan_send_frag_t * mca_ptl_elan_alloc_send_desc (struct mca_ptl_t *ptl_ptr, struct mca_pml_base_send_request_t *sendreq) { @@ -64,7 +82,7 @@ mca_ptl_elan_alloc_send_desc (struct mca_ptl_t *ptl_ptr, ompi_free_list_t *flist; ompi_list_item_t *item; - mca_ptl_elan_desc_item_t *desc; + mca_ptl_elan_send_frag_t *desc; START_FUNC(); @@ -105,7 +123,7 @@ mca_ptl_elan_alloc_send_desc (struct mca_ptl_t *ptl_ptr, item = ompi_list_remove_first (&((flist)->super)); } } - desc = (mca_ptl_elan_desc_item_t *) item; + desc = (mca_ptl_elan_send_frag_t *) item; desc->desc->desc_type = MCA_PTL_ELAN_QDMA_DESC; } desc->desc->req = (struct mca_pml_base_send_request_t *)sendreq; diff --git a/src/mca/ptl/elan/src/ptl_elan_frag.h b/src/mca/ptl/elan/src/ptl_elan_frag.h index adfe4b8294..679d87b8db 100644 --- a/src/mca/ptl/elan/src/ptl_elan_frag.h +++ b/src/mca/ptl/elan/src/ptl_elan_frag.h @@ -22,19 +22,12 @@ extern ompi_class_t mca_ptl_elan_recv_frag_t_class; struct mca_ptl_elan_peer_t; struct ompi_ptl_elan_base_desc_t; -struct mca_ptl_elan_desc_item_t { -#if 0 - mca_ptl_base_send_frag_t frag_send; - struct iovec *frag_vec_ptr; - size_t frag_vec_cnt; - struct iovec frag_vec[2]; - volatile int frag_progressed; -#endif - ompi_list_item_t super; - volatile int frag_progressed; +struct mca_ptl_elan_send_frag_t { + mca_ptl_base_frag_t frag_base; + volatile int frag_progressed; struct ompi_ptl_elan_base_desc_t *desc; }; -typedef struct mca_ptl_elan_desc_item_t mca_ptl_elan_desc_item_t; +typedef struct mca_ptl_elan_send_frag_t mca_ptl_elan_send_frag_t; /** * ELAN received fragment derived type. @@ -54,7 +47,7 @@ struct mca_ptl_elan_recv_frag_t { }; typedef struct mca_ptl_elan_recv_frag_t mca_ptl_elan_recv_frag_t; -mca_ptl_elan_desc_item_t * +mca_ptl_elan_send_frag_t * mca_ptl_elan_alloc_send_desc( struct mca_ptl_t *ptl, struct mca_pml_base_send_request_t *sendreq); @@ -84,5 +77,4 @@ mca_ptl_elan_recv_frag_progress(mca_ptl_elan_recv_frag_t* frag) } #endif - #endif diff --git a/src/mca/ptl/elan/src/ptl_elan_module.c b/src/mca/ptl/elan/src/ptl_elan_module.c index 563d02ae9c..57da209639 100644 --- a/src/mca/ptl/elan/src/ptl_elan_module.c +++ b/src/mca/ptl/elan/src/ptl_elan_module.c @@ -152,6 +152,14 @@ mca_ptl_elan_module_close (void) } } + if (elan_mp->elan_send_frags_free.fl_num_allocated != + elan_mp->elan_send_frags_free.super.ompi_list_length) { + ompi_output (0, "[%s:%d] send_frags : %d allocated %d returned\n", + __FILE__, __LINE__, + elan_mp->elan_send_frags_free.fl_num_allocated, + elan_mp->elan_send_frags_free.super.ompi_list_length); + } + if (elan_mp->elan_recv_frags_free.fl_num_allocated != elan_mp->elan_recv_frags_free.super.ompi_list_length) { ompi_output (0, "[%s:%d] recv_frags : %d allocated %d returned\n", @@ -165,11 +173,13 @@ mca_ptl_elan_module_close (void) /* Free the empty list holders */ OBJ_DESTRUCT (&(elan_mp->elan_procs)); OBJ_DESTRUCT (&(elan_mp->elan_pending_acks)); + OBJ_DESTRUCT (&(elan_mp->elan_send_frags)); OBJ_DESTRUCT (&(elan_mp->elan_recv_frags)); /* TODO: * We need free all the memory allocated for this list * before desctructing this free_list */ + OBJ_DESTRUCT (&(elan_mp->elan_send_frags_free)); OBJ_DESTRUCT (&(elan_mp->elan_recv_frags_free)); /* Destruct other structures */ @@ -206,6 +216,13 @@ mca_ptl_elan_module_init (int *num_ptls, *allow_multi_user_threads = true; *have_hidden_threads = OMPI_HAVE_THREADS; + ompi_free_list_init (&(elan_mp->elan_send_frags_free), + sizeof (mca_ptl_elan_send_frag_t), + OBJ_CLASS (mca_ptl_elan_recv_frag_t), + elan_mp->elan_free_list_num, + elan_mp->elan_free_list_max, + elan_mp->elan_free_list_inc, NULL); + ompi_free_list_init (&(elan_mp->elan_recv_frags_free), sizeof (mca_ptl_elan_recv_frag_t), OBJ_CLASS (mca_ptl_elan_recv_frag_t), diff --git a/src/mca/ptl/elan/src/ptl_elan_priv.c b/src/mca/ptl/elan/src/ptl_elan_priv.c index 2fb03d340b..a12a56db8e 100644 --- a/src/mca/ptl/elan/src/ptl_elan_priv.c +++ b/src/mca/ptl/elan/src/ptl_elan_priv.c @@ -102,18 +102,6 @@ mca_ptl_elan_init_qdma_desc (struct ompi_ptl_elan_qdma_desc_t *desc, *size = size_out; hdr->hdr_frag.hdr_frag_length = size_out; - /* fragment state */ -#if 0 - sendfrag->frag_owner = &ptl_peer->peer_ptl->super; - sendfrag->frag_send.frag_request = sendreq; - sendfrag->frag_send.frag_base.frag_addr = sendfrag->frag_vec[1].iov_base; - sendfrag->frag_send.frag_base.frag_size = size_out; - sendfrag->frag_peer = ptl_peer; - - /* XXX: Fragment state, is this going to be set anywhere in PML */ - sendfrag->frag_progressed = 0; -#endif - desc->main_dma.dma_srcAddr = MAIN2ELAN (desc->rail->r_ctx, &desc->buff[0]); @@ -146,7 +134,7 @@ mca_ptl_elan_init_qdma_desc (struct ompi_ptl_elan_qdma_desc_t *desc, int -mca_ptl_elan_start_desc (mca_ptl_elan_desc_item_t * desc, +mca_ptl_elan_start_desc (mca_ptl_elan_send_frag_t * desc, struct mca_ptl_elan_peer_t *ptl_peer, struct mca_pml_base_send_request_t *sendreq, size_t offset, @@ -157,6 +145,19 @@ mca_ptl_elan_start_desc (mca_ptl_elan_desc_item_t * desc, START_FUNC(); + /* fragment state */ +#if 0 + sendfrag->frag_owner = &ptl_peer->peer_ptl->super; + sendfrag->frag_send.frag_request = sendreq; + sendfrag->frag_send.frag_base.frag_addr = sendfrag->frag_vec[1].iov_base; + sendfrag->frag_send.frag_base.frag_size = size_out; + sendfrag->frag_peer = ptl_peer; + + /* XXX: Fragment state, is this going to be set anywhere in PML */ + sendfrag->frag_progressed = 0; +#endif + + if (desc->desc->desc_type == MCA_PTL_ELAN_QDMA_DESC) { struct ompi_ptl_elan_qdma_desc_t *qdma; @@ -379,7 +380,7 @@ mca_ptl_elan_update_send (mca_ptl_elan_module_1_0_0_t * emp) { struct mca_ptl_elan_t *ptl; ompi_ptl_elan_queue_ctrl_t *queue; - mca_ptl_elan_desc_item_t *desc; + mca_ptl_elan_send_frag_t *desc; ELAN4_CTX *ctx; int num_ptls; @@ -398,7 +399,7 @@ mca_ptl_elan_update_send (mca_ptl_elan_module_1_0_0_t * emp) ctx = ptl->ptl_elan_ctx; while (ompi_list_get_size (&queue->tx_desc) > 0) { - desc = (mca_ptl_elan_desc_item_t *) + desc = (mca_ptl_elan_send_frag_t *) ompi_list_get_first (&queue->tx_desc); #if 1 rc = (int *) (&desc->desc->main_doneWord); @@ -410,7 +411,7 @@ mca_ptl_elan_update_send (mca_ptl_elan_module_1_0_0_t * emp) mca_ptl_base_header_t *header; mca_ptl_elan_send_request_t *req; /* Remove the desc, update the request, put back to free list */ - desc = (mca_ptl_elan_desc_item_t *) + desc = (mca_ptl_elan_send_frag_t *) ompi_list_remove_first (&queue->tx_desc); req = desc->desc->req; header = (mca_ptl_base_header_t *)&desc->desc->buff[0]; diff --git a/src/mca/ptl/elan/src/ptl_elan_priv.h b/src/mca/ptl/elan/src/ptl_elan_priv.h index 7bf047cb98..2295af1a5b 100644 --- a/src/mca/ptl/elan/src/ptl_elan_priv.h +++ b/src/mca/ptl/elan/src/ptl_elan_priv.h @@ -125,7 +125,7 @@ typedef struct { */ struct mca_ptl_elan_send_request_t { mca_pml_base_send_request_t super; - mca_ptl_elan_desc_item_t *req_frag; + mca_ptl_elan_send_frag_t *req_frag; }; typedef struct mca_ptl_elan_send_request_t mca_ptl_elan_send_request_t; @@ -262,15 +262,15 @@ int ompi_init_elan_stat (mca_ptl_elan_module_1_0_0_t * emp, int num_rails); /* communication prototypes */ -int mca_ptl_elan_start_desc(mca_ptl_elan_desc_item_t *desc, +int mca_ptl_elan_start_desc(mca_ptl_elan_send_frag_t *desc, struct mca_ptl_elan_peer_t *ptl_peer, struct mca_pml_base_send_request_t *sendreq, size_t offset, size_t *size, int flags); -int mca_ptl_elan_poll_desc(mca_ptl_elan_desc_item_t *desc); -int mca_ptl_elan_wait_desc(mca_ptl_elan_desc_item_t *desc); +int mca_ptl_elan_poll_desc(mca_ptl_elan_send_frag_t *desc); +int mca_ptl_elan_wait_desc(mca_ptl_elan_send_frag_t *desc); /* control, synchronization and state prototypes */ int mca_ptl_elan_drain_recv(mca_ptl_elan_module_1_0_0_t *emp);