From 1ebf9fd3a4db18f51d3cb42619ec9518ccbe103e Mon Sep 17 00:00:00 2001 From: Mark Allen Date: Thu, 15 Dec 2016 16:51:29 -0500 Subject: [PATCH 1/3] osc/pt2pt: Fix PSCW after Fence wrong answer. * If the user uses PSCW synchronization after a Fence then the previous epoch is not reset which can cause the PSCW to transfer data before it is ready leading to wrong answers. * This commit resets the `eager_send_active` in the start call. Signed-off-by: Joshua Hursey --- ompi/mca/osc/pt2pt/osc_pt2pt_active_target.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_active_target.c b/ompi/mca/osc/pt2pt/osc_pt2pt_active_target.c index 3c086a42f2..55917ca65a 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_active_target.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_active_target.c @@ -10,7 +10,7 @@ * All rights reserved. * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2010 IBM Corporation. All rights reserved. + * Copyright (c) 2010-2016 IBM Corporation. All rights reserved. * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -227,6 +227,12 @@ int ompi_osc_pt2pt_start (ompi_group_t *group, int assert, ompi_win_t *win) /* haven't processed any post messages yet */ sync->sync_expected = sync->num_peers; + /* If the previous epoch was from Fence, then eager_send_active is still + * set to true at this time, but it shoulnd't be true until we get our + * incoming Posts. So reset to 'false' for this new epoch. + */ + sync->eager_send_active = false; + OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_start entering with group size %d...", sync->num_peers)); From 0d1336b4a8f7a7d6cf48c372720846808e534a9f Mon Sep 17 00:00:00 2001 From: Mark Allen Date: Thu, 15 Dec 2016 16:59:13 -0500 Subject: [PATCH 2/3] osc/pt2pt: Fix Lock/Unlock and Get wrong answer * When using `MPI_Lock`/`MPI_Unlock` with `MPI_Get` and non-contiguous datatypes is is possible that the unlock finishes too early before the data is actually present in the recv buffer. * We need to wait for the irecv to complete before unlocking the target. This commit waits for the outgoing fragment counts to become equal before unlocking. Signed-off-by: Joshua Hursey --- ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c b/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c index 2ba0aee7ef..e49ba1a384 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c @@ -10,7 +10,7 @@ * All rights reserved. * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2010 IBM Corporation. All rights reserved. + * Copyright (c) 2010-2016 IBM Corporation. All rights reserved. * Copyright (c) 2012-2013 Sandia National Laboratories. All rights reserved. * Copyright (c) 2015 Intel, Inc. All rights reserved. * Copyright (c) 2015-2016 Research Organization for Information Science @@ -421,6 +421,16 @@ static int ompi_osc_pt2pt_unlock_internal (int target, ompi_win_t *win) /* wait for unlock acks. this signals remote completion of fragments */ ompi_osc_pt2pt_sync_wait_expected (lock); + /* It is possible for the unlock to finish too early before the data + * is actually present in the recv buffer (for non-contiguous datatypes) + * So make sure to wait for all of the fragments to arrive. + */ + OPAL_THREAD_LOCK(&module->lock); + while (module->outgoing_frag_count < module->outgoing_frag_signal_count) { + opal_condition_wait(&module->cond, &module->lock); + } + OPAL_THREAD_UNLOCK(&module->lock); + OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_unlock: unlock of %d complete", target)); } else { From eec1d5bf2ebf2048d484d4a59a864270a4757a00 Mon Sep 17 00:00:00 2001 From: Mark Allen Date: Thu, 15 Dec 2016 17:13:31 -0500 Subject: [PATCH 3/3] osc/pt2pt: Fix hang with Put and Win_lock_all * When using `MPI_Put` with `MPI_Win_lock_all` a hang is possible since the `put` is waiting on `eager_send_active` to become `true` but that variable might not be reset in the case of `MPI_Win_lock_all` depending on other incoming events (e.g., `post` or ACKs of lock requests. Signed-off-by: Joshua Hursey --- ompi/mca/osc/pt2pt/osc_pt2pt_comm.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_comm.c b/ompi/mca/osc/pt2pt/osc_pt2pt_comm.c index 4d16f1a8ce..3973cf88d9 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_comm.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_comm.c @@ -15,6 +15,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 FUJITSU LIMITED. All rights reserved. + * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -336,7 +337,16 @@ static inline int ompi_osc_pt2pt_put_w_req (const void *origin_addr, int origin_ if (is_long_msg) { /* wait for eager sends to be active before starting a long put */ - ompi_osc_pt2pt_sync_wait_expected (pt2pt_sync); + if (pt2pt_sync->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK) { + OPAL_THREAD_LOCK(&pt2pt_sync->lock); + ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, target); + while (!(peer->flags & OMPI_OSC_PT2PT_PEER_FLAG_EAGER)) { + opal_condition_wait(&pt2pt_sync->cond, &pt2pt_sync->lock); + } + OPAL_THREAD_UNLOCK(&pt2pt_sync->lock); + } else { + ompi_osc_pt2pt_sync_wait_expected (pt2pt_sync); + } } OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, @@ -495,7 +505,16 @@ ompi_osc_pt2pt_accumulate_w_req (const void *origin_addr, int origin_count, if (is_long_msg) { /* wait for synchronization before posting a long message */ - ompi_osc_pt2pt_sync_wait_expected (pt2pt_sync); + if (pt2pt_sync->type == OMPI_OSC_PT2PT_SYNC_TYPE_LOCK) { + OPAL_THREAD_LOCK(&pt2pt_sync->lock); + ompi_osc_pt2pt_peer_t *peer = ompi_osc_pt2pt_peer_lookup (module, target); + while (!(peer->flags & OMPI_OSC_PT2PT_PEER_FLAG_EAGER)) { + opal_condition_wait(&pt2pt_sync->cond, &pt2pt_sync->lock); + } + OPAL_THREAD_UNLOCK(&pt2pt_sync->lock); + } else { + ompi_osc_pt2pt_sync_wait_expected (pt2pt_sync); + } } header = (ompi_osc_pt2pt_header_acc_t*) ptr;