pml/ob1: fix deadlock with communicator flag ALLOW_OVERTAKE.
We missed an assert to check if ALLOW_OVERTAKE is set or not before validating the sequence number and this will cause deadlock. Signed-off-by: Thananon Patinyasakdikul <tpatinya@utk.edu>
Этот коммит содержится в:
родитель
d1fd1f4cce
Коммит
0263456cf4
@ -3,7 +3,7 @@
|
|||||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||||
* University Research and Technology
|
* University Research and Technology
|
||||||
* Corporation. All rights reserved.
|
* Corporation. All rights reserved.
|
||||||
* Copyright (c) 2004-2018 The University of Tennessee and The University
|
* Copyright (c) 2004-2019 The University of Tennessee and The University
|
||||||
* of Tennessee Research Foundation. All rights
|
* of Tennessee Research Foundation. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2004-2007 High Performance Computing Center Stuttgart,
|
* Copyright (c) 2004-2007 High Performance Computing Center Stuttgart,
|
||||||
@ -963,6 +963,7 @@ static int mca_pml_ob1_recv_frag_match( mca_btl_base_module_t *btl,
|
|||||||
frag_msg_seq = hdr->hdr_seq;
|
frag_msg_seq = hdr->hdr_seq;
|
||||||
next_msg_seq_expected = (uint16_t)proc->expected_sequence;
|
next_msg_seq_expected = (uint16_t)proc->expected_sequence;
|
||||||
|
|
||||||
|
if (!OMPI_COMM_CHECK_ASSERT_ALLOW_OVERTAKE(comm_ptr)) {
|
||||||
/* If the sequence number is wrong, queue it up for later. */
|
/* If the sequence number is wrong, queue it up for later. */
|
||||||
if(OPAL_UNLIKELY(frag_msg_seq != next_msg_seq_expected)) {
|
if(OPAL_UNLIKELY(frag_msg_seq != next_msg_seq_expected)) {
|
||||||
mca_pml_ob1_recv_frag_t* frag;
|
mca_pml_ob1_recv_frag_t* frag;
|
||||||
@ -977,6 +978,7 @@ static int mca_pml_ob1_recv_frag_match( mca_btl_base_module_t *btl,
|
|||||||
OB1_MATCHING_UNLOCK(&comm->matching_lock);
|
OB1_MATCHING_UNLOCK(&comm->matching_lock);
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* mca_pml_ob1_recv_frag_match_proc() will release the lock. */
|
/* mca_pml_ob1_recv_frag_match_proc() will release the lock. */
|
||||||
return mca_pml_ob1_recv_frag_match_proc(btl, comm_ptr, proc, hdr,
|
return mca_pml_ob1_recv_frag_match_proc(btl, comm_ptr, proc, hdr,
|
||||||
@ -1011,6 +1013,10 @@ mca_pml_ob1_recv_frag_match_proc( mca_btl_base_module_t *btl,
|
|||||||
|
|
||||||
match_this_frag:
|
match_this_frag:
|
||||||
/* We're now expecting the next sequence number. */
|
/* We're now expecting the next sequence number. */
|
||||||
|
/* NOTE: We should have checked for ALLOW_OVERTAKE comm flag here
|
||||||
|
* but adding a branch in this critical path is not ideal for performance.
|
||||||
|
* We decided to let it run the sequence number even we are not doing
|
||||||
|
* anything with it. */
|
||||||
proc->expected_sequence++;
|
proc->expected_sequence++;
|
||||||
|
|
||||||
/* We generate the SEARCH_POSTED_QUEUE only when the message is
|
/* We generate the SEARCH_POSTED_QUEUE only when the message is
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user