From 95c0a17b9a1de8422e38052b080071c08ea6f497 Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Wed, 17 Jan 2007 21:13:12 +0000 Subject: [PATCH] Send the unlock request before starting the requests. We won't unlock until we get an ack from the remote side, so there's no longer a race there (I used to do the unlock request last, after local completion of all the requests completed, to try to avoid having the passive side reply to the active side, but I don't do that anymore). The unlock side will not "unlock" the window until it actually receives the correct number of results, so we're good there. This fixes an issue where we would receive data on the remote side we weren't expecting that could cause us to release a lock before it really should have been released to the requesting peer. It could also cause a deadlock if one of the processes trying to unlock was "self", as that would result in the active unlock never sending the unlock request, even though it sent the payload, which could cause a counter that should always be positive to hit -1, causing an infinite loop that could only be solved by popping up the stack, which was an impossibility. Refs trac:785 This commit was SVN r13160. The following Trac tickets were found above: Ticket 785 --> https://svn.open-mpi.org/trac/ompi/ticket/785 --- ompi/mca/osc/pt2pt/osc_pt2pt_sync.c | 31 +++++++++++++---------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_sync.c b/ompi/mca/osc/pt2pt/osc_pt2pt_sync.c index e77e867279..009b48e49f 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_sync.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_sync.c @@ -465,7 +465,20 @@ ompi_osc_pt2pt_module_unlock(int target, out of pending_sendreqs, so don't need the lock here */ out_count = opal_list_get_size(&(P2P_MODULE(win)->p2p_copy_pending_sendreqs)); - OPAL_THREAD_ADD32(&(P2P_MODULE(win)->p2p_num_pending_out), out_count); + /* we want to send all the requests, plus we wait for one more + completion event for the control message ack from the unlocker + saying we're done */ + OPAL_THREAD_ADD32(&(P2P_MODULE(win)->p2p_num_pending_out), out_count + 1); + + /* send the unlock request */ + opal_output_verbose(50, ompi_osc_base_output, + "%d sending unlock request to %d", + P2P_MODULE(win)->p2p_comm->c_my_rank, target); + ompi_osc_pt2pt_control_send(P2P_MODULE(win), + proc, + OMPI_OSC_PT2PT_HDR_UNLOCK_REQ, + P2P_MODULE(win)->p2p_comm->c_my_rank, + out_count); while (NULL != (item = opal_list_remove_first(&(P2P_MODULE(win)->p2p_copy_pending_sendreqs)))) { @@ -487,22 +500,6 @@ ompi_osc_pt2pt_module_unlock(int target, ompi_osc_pt2pt_progress_long(P2P_MODULE(win)); } - /* send the unlock request */ - opal_output_verbose(50, ompi_osc_base_output, - "%d sending unlock request to %d", - P2P_MODULE(win)->p2p_comm->c_my_rank, target); - ompi_osc_pt2pt_control_send(P2P_MODULE(win), - proc, - OMPI_OSC_PT2PT_HDR_UNLOCK_REQ, - P2P_MODULE(win)->p2p_comm->c_my_rank, - out_count); - - /* wait for ack */ - OPAL_THREAD_ADD32(&(P2P_MODULE(win)->p2p_num_pending_out), 1); - while (0 != P2P_MODULE(win)->p2p_num_pending_out) { - ompi_osc_pt2pt_progress_long(P2P_MODULE(win)); - } - /* set our mode on the window */ ompi_win_remove_mode(win, OMPI_WIN_ACCESS_EPOCH | OMPI_WIN_LOCK_ACCESS);