diff --git a/src/mca/ptl/elan/src/ptl_elan.c b/src/mca/ptl/elan/src/ptl_elan.c index c5c38e0487..c23c89641d 100644 --- a/src/mca/ptl/elan/src/ptl_elan.c +++ b/src/mca/ptl/elan/src/ptl_elan.c @@ -417,6 +417,8 @@ mca_ptl_elan_matched (mca_ptl_base_module_t * ptl, #endif /* Get a frag desc and allocate a send desc */ desc = mca_ptl_elan_alloc_desc(ptl, NULL, desc_type); + LOG_PRINT(PTL_ELAN_DEBUG_GET, "Get desc %p type %d\n", + desc, desc->desc->desc_type); if (NULL == desc) { ompi_output(0, @@ -432,13 +434,14 @@ mca_ptl_elan_matched (mca_ptl_base_module_t * ptl, * pay more attention to timing of the release */ #if OMPI_PTL_ELAN_ENABLE_GET mca_ptl_elan_get_with_ack (ptl, desc, recv_frag); + LOG_PRINT(PTL_ELAN_DEBUG_GET, "Get desc %p type %d\n", + desc, desc->desc->desc_type); #else mca_ptl_elan_start_ack (ptl, desc, recv_frag); #endif } } - /* Process the fragment */ set = ompi_atomic_fetch_and_set_int ( &((mca_ptl_elan_recv_frag_t *)frag)->frag_progressed, 1); if (!set) { diff --git a/src/mca/ptl/elan/src/ptl_elan_comm_init.c b/src/mca/ptl/elan/src/ptl_elan_comm_init.c index 2ac849da53..9360eb3d3a 100644 --- a/src/mca/ptl/elan/src/ptl_elan_comm_init.c +++ b/src/mca/ptl/elan/src/ptl_elan_comm_init.c @@ -57,7 +57,6 @@ ompi_init_elan_queue_events (mca_ptl_elan_module_t * ptl, (2*sizeof (E4_Event32) + ELAN_BLOCK_SIZE), elan_align); #else elan_size = OMPI_PTL_ELAN_ALIGNUP (sizeof (E4_Event32), elan_align); - #endif OBJ_CONSTRUCT(&flist->fl_lock, ompi_mutex_t); diff --git a/src/mca/ptl/elan/src/ptl_elan_frag.c b/src/mca/ptl/elan/src/ptl_elan_frag.c index 9cd3158d8b..8c7aaf7b60 100644 --- a/src/mca/ptl/elan/src/ptl_elan_frag.c +++ b/src/mca/ptl/elan/src/ptl_elan_frag.c @@ -152,7 +152,7 @@ mca_ptl_elan_alloc_recv_desc (struct mca_pml_base_recv_request_t * req) void mca_ptl_elan_send_desc_done ( - mca_ptl_elan_send_frag_t *desc, + mca_ptl_elan_send_frag_t *frag, mca_pml_base_send_request_t *req) { mca_ptl_elan_module_t *ptl; @@ -160,21 +160,45 @@ mca_ptl_elan_send_desc_done ( START_FUNC(PTL_ELAN_DEBUG_SEND); - ptl = ((ompi_ptl_elan_qdma_desc_t *)desc->desc)->ptl; - header = &desc->frag_base.frag_header; + ptl = ((ompi_ptl_elan_qdma_desc_t *)frag->desc)->ptl; + header = &frag->frag_base.frag_header; - LOG_PRINT(PTL_ELAN_DEBUG_SEND, "req %p done frag %p \n", req, desc); + if (frag->desc->desc_type == MCA_PTL_ELAN_DESC_GET) { + LOG_PRINT(PTL_ELAN_DEBUG_SEND, + "req %p done frag %p desc_status %d desc_type %d length %d\n", + req, frag, frag->desc->desc_status, + frag->desc->desc_type, frag->frag_base.frag_size); + + if(ompi_atomic_fetch_and_set_int (&frag->frag_progressed, 1) == 0) { + ptl->super.ptl_recv_progress(ptl, + (mca_pml_base_recv_request_t *) req, + frag->frag_base.frag_size, + frag->frag_base.frag_size); + } + elan4_freecq_space (ptl->ptl_elan_ctx, + ((ompi_ptl_elan_putget_desc_t *) frag->desc) + ->chain_event->ev_Params[1], 8); + OMPI_FREE_LIST_RETURN (&ptl->putget->get_desc_free, + (ompi_list_item_t *) frag); + END_FUNC(PTL_ELAN_DEBUG_SEND); + return; + } + + LOG_PRINT(PTL_ELAN_DEBUG_SEND, + "req %p done frag %p desc_status %d desc_type %d length %d\n", + req, frag, frag->desc->desc_status, + frag->desc->desc_type, + header->hdr_frag.hdr_frag_length); if(NULL == req) { /* An ack descriptor */ OMPI_FREE_LIST_RETURN (&ptl->queue->tx_desc_free, - (ompi_list_item_t *) desc); + (ompi_list_item_t *) frag); } #if 1 else if (0 == (header->hdr_common.hdr_flags & MCA_PTL_FLAGS_ACK_MATCHED) || mca_pml_base_send_request_matched(req)) { - - if(ompi_atomic_fetch_and_set_int (&desc->frag_progressed, 1) == 0) + if(ompi_atomic_fetch_and_set_int (&frag->frag_progressed, 1) == 0) { ptl->super.ptl_send_progress(ptl, req, header->hdr_frag.hdr_frag_length); @@ -182,14 +206,17 @@ mca_ptl_elan_send_desc_done ( /* Return a frag or if not cached, or it is a follow up */ if ( /*(header->hdr_frag.hdr_frag_offset != 0) || */ - (desc->desc->desc_status != MCA_PTL_ELAN_DESC_CACHED)){ - if (desc->desc->desc_type == MCA_PTL_ELAN_DESC_PUT) { - OMPI_FREE_LIST_RETURN (&ptl->putget->put_desc_free, - (ompi_list_item_t *) desc); + (frag->desc->desc_status != MCA_PTL_ELAN_DESC_CACHED)){ + ompi_free_list_t *flist; + if (frag->desc->desc_type == MCA_PTL_ELAN_DESC_PUT) { + flist = &ptl->putget->put_desc_free; + elan4_freecq_space (ptl->ptl_elan_ctx, + ((ompi_ptl_elan_putget_desc_t *) frag->desc) + ->chain_event->ev_Params[1], 8); } else { - OMPI_FREE_LIST_RETURN (&ptl->queue->tx_desc_free, - (ompi_list_item_t *) desc); + flist = &ptl->queue->tx_desc_free; } + OMPI_FREE_LIST_RETURN (flist, (ompi_list_item_t *) frag); } else { LOG_PRINT(PTL_ELAN_DEBUG_ACK, "PML will return frag to list %p, length %d\n", @@ -217,16 +244,16 @@ mca_ptl_elan_send_desc_done ( * the start of following fragments. As the logic is not there. */ - if(ompi_atomic_fetch_and_set_int (&desc->frag_progressed, 1) == 0) { + if(ompi_atomic_fetch_and_set_int (&frag->frag_progressed, 1) == 0) { ptl->super.ptl_send_progress(ptl, req, header->hdr_frag.hdr_frag_length); } /* Return a frag or if not cached, or it is a follow up */ - if((header->hdr_frag.hdr_frag_offset != 0) || (desc->desc->desc_status + if((header->hdr_frag.hdr_frag_offset != 0) || (frag->desc->desc_status != MCA_PTL_ELAN_DESC_CACHED)) OMPI_FREE_LIST_RETURN (&queue->tx_desc_free, - (ompi_list_item_t *) desc); + (ompi_list_item_t *) frag); } #endif diff --git a/src/mca/ptl/elan/src/ptl_elan_priv.c b/src/mca/ptl/elan/src/ptl_elan_priv.c index e2e9efacd4..c9db03eb10 100644 --- a/src/mca/ptl/elan/src/ptl_elan_priv.c +++ b/src/mca/ptl/elan/src/ptl_elan_priv.c @@ -623,7 +623,7 @@ mca_ptl_elan_init_get_desc (mca_ptl_elan_module_t *ptl, desc->chain_dma.dma_dstEvent = elan4_main2elan (ctx, (void *) ptl->queue->input); - LOG_PRINT(PTL_ELAN_DEBUG_ACK, + LOG_PRINT(PTL_ELAN_DEBUG_GET, "remote frag %p local req %p buffer %p size %d len %d\n", hdr->hdr_ack.hdr_src_ptr.pval, hdr->hdr_ack.hdr_dst_match.pval, @@ -700,12 +700,12 @@ mca_ptl_elan_init_get_desc (mca_ptl_elan_module_t *ptl, E4_COOKIE_TYPE_REMOTE_DMA, destvp); desc->main_dma.dma_vproc = ptl->elan_vp; /* target is self */ - LOG_PRINT(PTL_ELAN_DEBUG_MAC, + LOG_PRINT(PTL_ELAN_DEBUG_GET, "destvp %d type %d flag %d size %d\n", destvp, hdr->hdr_common.hdr_type, hdr->hdr_common.hdr_flags, hdr->hdr_common.hdr_size); - END_FUNC(PTL_ELAN_DEBUG_SEND); + END_FUNC(PTL_ELAN_DEBUG_GET); } #endif /* End of OMPI_PTL_ELAN_ENABLE_GET */ @@ -890,7 +890,7 @@ mca_ptl_elan_get_with_ack ( mca_ptl_base_module_t * ptl, request = recv_frag->frag_recv.frag_request; destvp = ((mca_ptl_elan_peer_t *) recv_frag->frag_recv.frag_base.frag_peer)->peer_vp; - frag->desc->desc_type = MCA_PTL_ELAN_DESC_PUT; + frag->desc->desc_type = MCA_PTL_ELAN_DESC_GET; gdesc = (ompi_ptl_elan_putget_desc_t *)frag->desc; hdr = (mca_ptl_base_header_t *) &frag->frag_base.frag_header; recv_len = @@ -925,7 +925,7 @@ mca_ptl_elan_get_with_ack ( mca_ptl_base_module_t * ptl, MEMBAR_DRAIN(); ompi_list_append (&elan_ptl->send_frags, (ompi_list_item_t *) frag); - /* XXX: fragment state, remember the recv_frag may be gone */ + /* XXX: fragment state, remember recv_frag may be gone */ frag->desc->req = (mca_pml_base_request_t *) request ; /*recv req*/ frag->desc->desc_status = MCA_PTL_ELAN_DESC_LOCAL; frag->frag_base.frag_owner= ptl; @@ -935,8 +935,8 @@ mca_ptl_elan_get_with_ack ( mca_ptl_base_module_t * ptl, frag->frag_base.frag_size = remain_len; frag->frag_progressed = 0; - LOG_PRINT(PTL_ELAN_DEBUG_ACK, - "remote frag %p local req %p buffer %p size %d len %d\n", + LOG_PRINT(PTL_ELAN_DEBUG_GET, "remote frag %p" + " local req %p buffer %p size %d len %d\n", hdr->hdr_ack.hdr_src_ptr.pval, hdr->hdr_ack.hdr_dst_match.pval, hdr->hdr_ack.hdr_dst_addr.pval, @@ -1186,12 +1186,6 @@ ptl_elan_send_comp: header = (mca_ptl_base_header_t *) rxq->qr_fptr; - LOG_PRINT(PTL_ELAN_DEBUG_MAC, - "[comp...] type %d flag %d size %d\n", - header->hdr_common.hdr_type, - header->hdr_common.hdr_flags, - header->hdr_common.hdr_size); - #if OMPI_PTL_ELAN_THREADING if (header->hdr_common.hdr_type == MCA_PTL_HDR_TYPE_STOP) { /* XXX: release the lock and quit the thread */ @@ -1212,6 +1206,10 @@ ptl_elan_send_comp: mca_ptl_elan_send_desc_done (frag, (mca_pml_base_send_request_t *) basic->req); +#if OMPI_PTL_ELAN_COMP_QUEUE + elan4_freecq_space (ctx, frag->desc->comp_event->ev_Params[1], 8); +#endif + /* Work out the new front pointer */ if (rxq->qr_fptr == rxq->qr_top) { rxq->qr_fptr = rxq->qr_base; diff --git a/src/mca/ptl/elan/src/ptl_elan_priv.h b/src/mca/ptl/elan/src/ptl_elan_priv.h index c4c1ef83c5..f9b622eedb 100644 --- a/src/mca/ptl/elan/src/ptl_elan_priv.h +++ b/src/mca/ptl/elan/src/ptl_elan_priv.h @@ -52,7 +52,7 @@ #define PTL_ELAN_DEBUG_GET (0x400) #define PTL_ELAN_DEBUG_CHAIN (0x800) -#define PTL_ELAN_DEBUG_FLAG (PTL_ELAN_DEBUG_NONE) +#define PTL_ELAN_DEBUG_FLAG (PTL_ELAN_DEBUG_NONE) #define START_FUNC(flag) \ do { \ @@ -102,7 +102,7 @@ do { \ #define OMPI_PTL_ELAN_NUM_PUTGET (8) #define OMPI_PTL_ELAN_ZERO_FFRAG (0) -#define OMPI_PTL_ELAN_ENABLE_GET (0) +#define OMPI_PTL_ELAN_ENABLE_GET (1) #define OMPI_PTL_ELAN_COMP_QUEUE (0) #define OMPI_PTL_ELAN_THREADING \ (OMPI_PTL_ELAN_COMP_QUEUE && OMPI_HAVE_POSIX_THREADS) diff --git a/src/mca/ptl/elan/tests/check.c b/src/mca/ptl/elan/tests/check.c index c54df7a34f..0bec734067 100644 --- a/src/mca/ptl/elan/tests/check.c +++ b/src/mca/ptl/elan/tests/check.c @@ -9,7 +9,7 @@ #define MYBUFSIZE (4*1024*1024) #define CHECK 1 -#define PONG 1 +#define PONG 0 char s_buf[MYBUFSIZE]; char r_buf[MYBUFSIZE]; diff --git a/src/mca/ptl/elan/tests/test_util.h b/src/mca/ptl/elan/tests/test_util.h index 95948eb46a..4d51dafb43 100644 --- a/src/mca/ptl/elan/tests/test_util.h +++ b/src/mca/ptl/elan/tests/test_util.h @@ -10,6 +10,7 @@ static void env_init_for_elan() setenv("OMPI_MCA_pcm_cofs_jobid", "1", 1); setenv("OMPI_MCA_pcm_cofs_num_procs", "2", 1); setenv("OMPI_MCA_ptl_base_exclude", "tcp", 1); + setenv("OMPI_MCA_oob_base_exclude", "tcp", 1); if (NULL != (rms_rank = getenv("RMS_RANK"))) { /* RMS_JOBID:RMS_NNODES:RMS_NPROCS:RMS_NODEID:RMS_RESOURCEID */