From fd0e1b7261a3aa9cbe8e3568c1c6ba8faf57154a Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Tue, 10 Jun 2014 16:24:13 +0000 Subject: [PATCH] If we detect an error on a request that has been already released at the MPI level, we should call abort on MPI_COMM_WORLD. Fixes ticket #1943. cmr=v1.8.2:reviewer=jsquyres This commit was SVN r31982. --- ompi/mca/pml/bfo/pml_bfo_recvreq.h | 5 ++++- ompi/mca/pml/bfo/pml_bfo_sendreq.h | 4 ++++ ompi/mca/pml/ob1/pml_ob1_recvreq.h | 3 +++ ompi/mca/pml/ob1/pml_ob1_sendreq.h | 4 ++++ 4 files changed, 15 insertions(+), 1 deletion(-) diff --git a/ompi/mca/pml/bfo/pml_bfo_recvreq.h b/ompi/mca/pml/bfo/pml_bfo_recvreq.h index 518aa5bfd4..ca12b0baee 100644 --- a/ompi/mca/pml/bfo/pml_bfo_recvreq.h +++ b/ompi/mca/pml/bfo/pml_bfo_recvreq.h @@ -2,7 +2,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University + * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, @@ -175,6 +175,9 @@ recv_request_pml_complete(mca_pml_bfo_recv_request_t *recvreq) OPAL_THREAD_LOCK(&ompi_request_lock); if(true == recvreq->req_recv.req_base.req_free_called) { + if( MPI_SUCCESS != recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR ) { + ompi_mpi_abort(&ompi_mpi_comm_world.comm, MPI_ERR_REQUEST, true); + } MCA_PML_BFO_RECV_REQUEST_RETURN(recvreq); } else { /* initialize request status */ diff --git a/ompi/mca/pml/bfo/pml_bfo_sendreq.h b/ompi/mca/pml/bfo/pml_bfo_sendreq.h index e8ee31c0af..a438d0ab32 100644 --- a/ompi/mca/pml/bfo/pml_bfo_sendreq.h +++ b/ompi/mca/pml/bfo/pml_bfo_sendreq.h @@ -247,6 +247,10 @@ send_request_pml_complete(mca_pml_bfo_send_request_t *sendreq) if(false == sendreq->req_send.req_base.req_ompi.req_complete) { /* Should only be called for long messages (maybe synchronous) */ MCA_PML_BFO_SEND_REQUEST_MPI_COMPLETE(sendreq, true); + } else { + if( MPI_SUCCESS != sendreq->req_send.req_base.req_ompi.req_status.MPI_ERROR ) { + ompi_mpi_abort(&ompi_mpi_comm_world.comm, MPI_ERR_REQUEST, true); + } } sendreq->req_send.req_base.req_pml_complete = true; #if PML_BFO diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.h b/ompi/mca/pml/ob1/pml_ob1_recvreq.h index 40288160c3..d548b43665 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.h +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.h @@ -163,6 +163,9 @@ recv_request_pml_complete(mca_pml_ob1_recv_request_t *recvreq) OPAL_THREAD_LOCK(&ompi_request_lock); if(true == recvreq->req_recv.req_base.req_free_called) { + if( MPI_SUCCESS != recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR ) { + ompi_mpi_abort(&ompi_mpi_comm_world.comm, MPI_ERR_REQUEST, true); + } MCA_PML_OB1_RECV_REQUEST_RETURN(recvreq); } else { /* initialize request status */ diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.h b/ompi/mca/pml/ob1/pml_ob1_sendreq.h index 710aeeca62..b833806ac4 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.h +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.h @@ -256,6 +256,10 @@ send_request_pml_complete(mca_pml_ob1_send_request_t *sendreq) if(false == sendreq->req_send.req_base.req_ompi.req_complete) { /* Should only be called for long messages (maybe synchronous) */ MCA_PML_OB1_SEND_REQUEST_MPI_COMPLETE(sendreq, true); + } else { + if( MPI_SUCCESS != sendreq->req_send.req_base.req_ompi.req_status.MPI_ERROR ) { + ompi_mpi_abort(&ompi_mpi_comm_world.comm, MPI_ERR_REQUEST, true); + } } sendreq->req_send.req_base.req_pml_complete = true;