From c5b1bdd57c39daf509d88dc5ca831557a330e6e4 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Tue, 31 Mar 2009 23:44:59 +0000 Subject: [PATCH] Correctly deal with the error case. The problem is tricky: the MPI standard doesn't allow MPI_ERR_IN_STATUS to be returned from any functions that return only one completed request (few exception here: wait_some and wait_all and the test versions). As we use an wait_all in these send_receive functions we should convert the MPI_ERR_IN_STATUS to the real error, i.e. the one comming from the MPI_ERROR field in the status corresponding to the failed request. This commit was SVN r20907. --- ompi/mca/coll/tuned/coll_tuned_util.c | 64 +++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 9 deletions(-) diff --git a/ompi/mca/coll/tuned/coll_tuned_util.c b/ompi/mca/coll/tuned/coll_tuned_util.c index 8b1408c11f..dd73cef6fb 100644 --- a/ompi/mca/coll/tuned/coll_tuned_util.c +++ b/ompi/mca/coll/tuned/coll_tuned_util.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University + * Copyright (c) 2004-2009 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -55,15 +55,39 @@ int ompi_coll_tuned_sendrecv_actual( void* sendbuf, int scount, err = ompi_request_wait_all( 2, reqs, statuses ); if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; } - if (MPI_STATUS_IGNORE!=status) { + if (MPI_STATUS_IGNORE != status) { *status = statuses[0]; } return (MPI_SUCCESS); error_handler: - OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred\n", - __FILE__,line,err)); + /* As we use wait_all we will get MPI_ERR_IN_STATUS which is not an error + * code that we can propagate up the stack. Instead, look for the real + * error code from the MPI_ERROR in the status. + */ + if( MPI_ERR_IN_STATUS == err ) { + /* At least we know he error was detected during the wait_all */ + int err_index = 0; + if( MPI_SUCCESS != statuses[1].MPI_ERROR ) { + err_index = 1; + } + if (MPI_STATUS_IGNORE != status) { + *status = statuses[err_index]; + } + err = statuses[err_index].MPI_ERROR; + OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred (req index %d)\n", + __FILE__, line, err, err_index)); + } else { + /* Error discovered during the posting of the irecv or isend, + * and no status is available. + */ + OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred\n", + __FILE__, line, err)); + if (MPI_STATUS_IGNORE != status) { + status->MPI_ERROR = err; + } + } return (err); } @@ -85,7 +109,7 @@ int ompi_coll_tuned_sendrecv_actual_localcompleted( void* sendbuf, int scount, { /* post receive first, then [local] sync send, then wait... should be fast (I hope) */ int err, line = 0; ompi_request_t* req[2]; - ompi_status_public_t tmpstatus[2]; + ompi_status_public_t statuses[2]; /* post new irecv */ err = MCA_PML_CALL(irecv( recvbuf, rcount, rdatatype, source, rtag, @@ -97,17 +121,39 @@ int ompi_coll_tuned_sendrecv_actual_localcompleted( void* sendbuf, int scount, MCA_PML_BASE_SEND_SYNCHRONOUS, comm, &(req[1]))); if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; } - err = ompi_request_wait_all( 2, req, tmpstatus ); + err = ompi_request_wait_all( 2, req, statuses ); if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; } - if (MPI_STATUS_IGNORE!=status) { - *status = tmpstatus[0]; + if (MPI_STATUS_IGNORE != status) { + *status = statuses[0]; } return (MPI_SUCCESS); error_handler: - OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred\n",__FILE__,line,err)); + /* As we use wait_all we will get MPI_ERR_IN_STATUS which is not an error + * code that we can propagate up the stack. Instead, look for the real + * error code from the MPI_ERROR in the status. + */ + if( MPI_ERR_IN_STATUS == err ) { + int err_index = 0; + if( MPI_SUCCESS != statuses[1].MPI_ERROR ) { + err_index = 1; + } + if (MPI_STATUS_IGNORE != status) { + *status = statuses[err_index]; + } + err = statuses[err_index].MPI_ERROR; + OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred (req index %d)\n", + __FILE__,line,err, err_index)); + } else { + OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred\n", + __FILE__,line,err)); + if (MPI_STATUS_IGNORE != status) { + status->MPI_ERROR = err; + } + } + return (err); }