From fdf4c3b9002205fcb744ecb90cd78dd4d83fbac6 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Fri, 28 Mar 2014 22:06:16 +0000 Subject: [PATCH] osc/rdma: really fix active message support The last fix prevented a hang but had some cases where the results were wrong. Fixed. Tested with armci, openmpi/ibm, openmpi/onesided. cmr=v1.8:reviewer=jsquyres This commit was SVN r31284. --- ompi/mca/osc/rdma/osc_rdma_active_target.c | 6 +++++- ompi/mca/osc/rdma/osc_rdma_data_move.c | 5 ++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/ompi/mca/osc/rdma/osc_rdma_active_target.c b/ompi/mca/osc/rdma/osc_rdma_active_target.c index 8c44a2529c..b31141563d 100644 --- a/ompi/mca/osc/rdma/osc_rdma_active_target.c +++ b/ompi/mca/osc/rdma/osc_rdma_active_target.c @@ -242,7 +242,6 @@ ompi_osc_rdma_complete(ompi_win_t *win) complete_req.base.type = OMPI_OSC_RDMA_HDR_TYPE_COMPLETE; complete_req.base.flags = OMPI_OSC_RDMA_HDR_FLAG_VALID; complete_req.frag_count = module->epoch_outgoing_frag_count[ranks[i]]; - module->epoch_outgoing_frag_count[ranks[i]] = 0; ret = ompi_osc_rdma_control_send(module, ranks[i], @@ -256,6 +255,11 @@ ompi_osc_rdma_complete(ompi_win_t *win) ret = ompi_osc_rdma_frag_flush_all(module); if (OMPI_SUCCESS != ret) goto cleanup; + /* zero the fragment counts here to ensure they are zerod */ + for (i = 0 ; i < ompi_group_size(module->sc_group) ; ++i) { + module->epoch_outgoing_frag_count[ranks[i]] = 0; + } + /* wait for outgoing requests to complete. Don't wait for incoming, as we're only completing the access epoch, not the exposure epoch */ while (module->outgoing_frag_count != module->outgoing_frag_signal_count) { diff --git a/ompi/mca/osc/rdma/osc_rdma_data_move.c b/ompi/mca/osc/rdma/osc_rdma_data_move.c index d5f4e9ffff..6471fa408b 100644 --- a/ompi/mca/osc/rdma/osc_rdma_data_move.c +++ b/ompi/mca/osc/rdma/osc_rdma_data_move.c @@ -1328,9 +1328,8 @@ static inline int process_complete (ompi_osc_rdma_module_t *module, int source, OPAL_THREAD_LOCK(&module->lock); - /* the current fragment is not part of the frag_count but it doesn't need be be adjusted - * for here */ - module->active_incoming_frag_signal_count += complete_header->frag_count; + /* the current fragment is not part of the frag_count so we need to add it here */ + module->active_incoming_frag_signal_count += complete_header->frag_count + 1; module->num_complete_msgs++; if (0 == module->num_complete_msgs) {