diff --git a/src/mca/ptl/elan/src/ptl_elan.h b/src/mca/ptl/elan/src/ptl_elan.h index 8c55a2a8d7..1423fad4f1 100644 --- a/src/mca/ptl/elan/src/ptl_elan.h +++ b/src/mca/ptl/elan/src/ptl_elan.h @@ -72,8 +72,8 @@ struct mca_ptl_elan_module_1_0_0_t { ompi_list_t elan_procs; /**< elan proc's */ ompi_list_t elan_send_frags; - ompi_list_t elan_pending_acks; ompi_list_t elan_recv_frags; + ompi_list_t elan_pending_acks; ompi_free_list_t elan_send_frags_free; ompi_free_list_t elan_recv_frags_free; diff --git a/src/mca/ptl/elan/src/ptl_elan_module.c b/src/mca/ptl/elan/src/ptl_elan_module.c index 57da209639..a7961e4e6a 100644 --- a/src/mca/ptl/elan/src/ptl_elan_module.c +++ b/src/mca/ptl/elan/src/ptl_elan_module.c @@ -25,6 +25,11 @@ #include "ptl_elan_frag.h" #include "ptl_elan_priv.h" +#ifdef CHECK_ELAN +#undef CHECK_ELAN +#define CHECK_ELAN 0 +#endif + extern ompi_proc_t *ompi_proc_local_proc; mca_ptl_elan_module_1_0_0_t mca_ptl_elan_module = { @@ -121,6 +126,9 @@ mca_ptl_elan_module_open (void) OBJ_CONSTRUCT (&elan_mp->elan_procs, ompi_list_t); OBJ_CONSTRUCT (&elan_mp->elan_pending_acks, ompi_list_t); OBJ_CONSTRUCT (&elan_mp->elan_recv_frags, ompi_list_t); + OBJ_CONSTRUCT (&elan_mp->elan_send_frags, ompi_list_t); + + OBJ_CONSTRUCT (&elan_mp->elan_send_frags_free, ompi_free_list_t); OBJ_CONSTRUCT (&elan_mp->elan_recv_frags_free, ompi_free_list_t); /* initialize other objects */ @@ -216,6 +224,12 @@ mca_ptl_elan_module_init (int *num_ptls, *allow_multi_user_threads = true; *have_hidden_threads = OMPI_HAVE_THREADS; + if (CHECK_ELAN) { + char hostname[32]; gethostname(hostname, 32); + fprintf(stderr, "[%s:%s:%d] before list init...\n", + hostname, __FUNCTION__, __LINE__); + } + ompi_free_list_init (&(elan_mp->elan_send_frags_free), sizeof (mca_ptl_elan_send_frag_t), OBJ_CLASS (mca_ptl_elan_recv_frag_t), @@ -223,6 +237,13 @@ mca_ptl_elan_module_init (int *num_ptls, elan_mp->elan_free_list_max, elan_mp->elan_free_list_inc, NULL); + if (CHECK_ELAN) { + char hostname[32]; gethostname(hostname, 32); + fprintf(stderr, "[%s:%s:%d] after list init...\n", + hostname, __FUNCTION__, __LINE__); + } + + ompi_free_list_init (&(elan_mp->elan_recv_frags_free), sizeof (mca_ptl_elan_recv_frag_t), OBJ_CLASS (mca_ptl_elan_recv_frag_t), @@ -230,6 +251,12 @@ mca_ptl_elan_module_init (int *num_ptls, elan_mp->elan_free_list_max, elan_mp->elan_free_list_inc, NULL); + if (CHECK_ELAN) { + char hostname[32]; gethostname(hostname, 32); + fprintf(stderr, "[%s:%s:%d] after list init...\n", + hostname, __FUNCTION__, __LINE__); + } + /* open basic elan device */ if (OMPI_SUCCESS != ompi_mca_ptl_elan_init(&mca_ptl_elan_module)) { ompi_output(0, @@ -285,6 +312,16 @@ int mca_ptl_elan_module_progress (mca_ptl_tstamp_t tstamp) { START_FUNC(); + /*if (times <= -1000)*/ + if (times <= -1) + { + char hostname[32]; gethostname(hostname, 32); + fprintf(stderr, "[%s:%s:%d] debugging ...\n", + hostname, __FUNCTION__, __LINE__); + exit(1); + } else { + times ++; + } mca_ptl_elan_drain_recv(elan_mp); mca_ptl_elan_update_send(elan_mp); END_FUNC(); diff --git a/src/mca/ptl/elan/src/ptl_elan_priv.c b/src/mca/ptl/elan/src/ptl_elan_priv.c index e50b4fd386..86100d66f6 100644 --- a/src/mca/ptl/elan/src/ptl_elan_priv.c +++ b/src/mca/ptl/elan/src/ptl_elan_priv.c @@ -146,19 +146,6 @@ mca_ptl_elan_start_desc (mca_ptl_elan_send_frag_t * desc, START_FUNC(); - /* fragment state */ -#if 0 - sendfrag->frag_owner = &ptl_peer->peer_ptl->super; - sendfrag->frag_send.frag_request = sendreq; - sendfrag->frag_send.frag_base.frag_addr = sendfrag->frag_vec[1].iov_base; - sendfrag->frag_send.frag_base.frag_size = size_out; - sendfrag->frag_peer = ptl_peer; - - /* XXX: Fragment state, is this going to be set anywhere in PML */ - sendfrag->frag_progressed = 0; -#endif - - if (desc->desc->desc_type == MCA_PTL_ELAN_DESC_QDMA) { struct ompi_ptl_elan_qdma_desc_t *qdma; @@ -182,6 +169,15 @@ mca_ptl_elan_start_desc (mca_ptl_elan_send_frag_t * desc, return OMPI_ERROR; } + /*mca_ptl_base_frag_t frag_base; */ + + /* fragment state */ + desc->frag_base.frag_owner = &ptl_peer->peer_ptl->super; + desc->frag_base.frag_peer = ptl_peer; + desc->frag_base.frag_addr = NULL; + desc->frag_base.frag_size = *size; + desc->frag_progressed = 0; + END_FUNC(); return OMPI_SUCCESS; } @@ -293,10 +289,10 @@ mca_ptl_elan_drain_recv (mca_ptl_elan_module_1_0_0_t * emp) OMPI_LOCK (&queue->rx_lock); -#if 1 +#if 0 rc = (*(int *) (&rxq->qr_doneWord)); #else - rc = elan4_pollevent_word (ctx, &rxq->qr_doneWord, 1); + rc = elan4_pollevent_word (ctx, &rxq->qr_doneWord, 2000); #endif if (rc) { @@ -310,7 +306,7 @@ mca_ptl_elan_drain_recv (mca_ptl_elan_module_1_0_0_t * emp) gethostname(hostname, 32); fprintf(stderr, - "[%s recv...] type %x flag %x size %x\n", + "[%s recv...] type %d flag %d size %d\n", hostname, header->hdr_common.hdr_type, header->hdr_common.hdr_flags, @@ -401,11 +397,11 @@ mca_ptl_elan_update_send (mca_ptl_elan_module_1_0_0_t * emp) while (ompi_list_get_size (&queue->tx_desc) > 0) { desc = (mca_ptl_elan_send_frag_t *) ompi_list_get_first (&queue->tx_desc); -#if 1 +#if 0 rc = * ((int *) (&desc->desc->main_doneWord)); #else /* Poll the completion event for 1usec */ - rc = elan4_pollevent_word(ctx, &desc->desc->main_doneWord, 1); + rc = elan4_pollevent_word(ctx, &desc->desc->main_doneWord, 2000); #endif if (rc) { mca_ptl_base_header_t *header; @@ -417,6 +413,18 @@ mca_ptl_elan_update_send (mca_ptl_elan_module_1_0_0_t * emp) header = (mca_ptl_base_header_t *)& ((ompi_ptl_elan_qdma_desc_t *)desc->desc)->buff[0]; + if (CHECK_ELAN) { + char hostname[32]; + gethostname(hostname, 32); + + fprintf(stderr, + "[%s comp sending...] type %d flag %d size %d\n", + hostname, + header->hdr_common.hdr_type, + header->hdr_common.hdr_flags, + header->hdr_common.hdr_size); + } + if(NULL == req) { /* An ack descriptor */ OMPI_FREE_LIST_RETURN (&queue->tx_desc_free, (ompi_list_item_t *) desc); diff --git a/src/mca/ptl/elan/src/ptl_elan_priv.h b/src/mca/ptl/elan/src/ptl_elan_priv.h index 2295af1a5b..7e27b946e5 100644 --- a/src/mca/ptl/elan/src/ptl_elan_priv.h +++ b/src/mca/ptl/elan/src/ptl_elan_priv.h @@ -48,9 +48,9 @@ } \ } while (0) -#define CHECK_ELAN 0 +#define CHECK_ELAN 1 -#if CHECK_ELAN +#if CHECK_ELAN && 0 #define START_FUNC() \ do { \ char hostname[32]; gethostname(hostname, 32); \ diff --git a/src/mca/ptl/elan/tests/mpi_test.c b/src/mca/ptl/elan/tests/mpi_test.c index 01bcf24c9f..8095b97769 100644 --- a/src/mca/ptl/elan/tests/mpi_test.c +++ b/src/mca/ptl/elan/tests/mpi_test.c @@ -14,7 +14,7 @@ int main (int argc, char ** argv) MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &proc); MPI_Comm_size(MPI_COMM_WORLD, &nproc); - MPI_Barrier(MPI_COMM_WORLD); + /*MPI_Barrier(MPI_COMM_WORLD);*/ fprintf(stdout, "[%s:%s:%d] done with init \n", hostname, __FUNCTION__, __LINE__); fflush(stdout);