Correctly deal with the error case. The problem is tricky: the MPI standard doesn't allow

MPI_ERR_IN_STATUS to be returned from any functions that return only one completed request (few exception here: wait_some and wait_all and the test versions). As we use an wait_all in these send_receive functions we should convert the MPI_ERR_IN_STATUS to the real error, i.e. the one comming from the MPI_ERROR field in the status corresponding to the failed request. This commit was SVN r20907.
2009-03-31 23:44:59 +00:00 · 2009-03-31 23:44:59 +00:00 · c5b1bdd57c
--- a/ompi/mca/coll/tuned/coll_tuned_util.c
+++ b/ompi/mca/coll/tuned/coll_tuned_util.c
@ -2,7 +2,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2007 The University of Tennessee and The University
+ * Copyright (c) 2004-2009 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
@ -55,15 +55,39 @@ int ompi_coll_tuned_sendrecv_actual( void* sendbuf, int scount,
    err = ompi_request_wait_all( 2, reqs, statuses );
    if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; }
-    if (MPI_STATUS_IGNORE!=status) {
+    if (MPI_STATUS_IGNORE != status) {
        *status = statuses[0];
    }
    return (MPI_SUCCESS);
 error_handler:
-    OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred\n",
+    /* As we use wait_all we will get MPI_ERR_IN_STATUS which is not an error
-                  __FILE__,line,err));
+     * code that we can propagate up the stack. Instead, look for the real
     * error code from the MPI_ERROR in the status.
     */
    if( MPI_ERR_IN_STATUS == err ) {
        /* At least we know he error was detected during the wait_all */
        int err_index = 0;
        if( MPI_SUCCESS != statuses[1].MPI_ERROR ) {
            err_index = 1;
        }
        if (MPI_STATUS_IGNORE != status) {
            *status = statuses[err_index];
        }
        err = statuses[err_index].MPI_ERROR;
        OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred (req index %d)\n",
                      __FILE__, line, err, err_index));
    } else {
        /* Error discovered during the posting of the irecv or isend,
         * and no status is available.
         */
        OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred\n",
                      __FILE__, line, err));
        if (MPI_STATUS_IGNORE != status) {
            status->MPI_ERROR = err;
        }
    }
    return (err);
 }
@ -85,7 +109,7 @@ int ompi_coll_tuned_sendrecv_actual_localcompleted( void* sendbuf, int scount,
 { /* post receive first, then [local] sync send, then wait... should be fast (I hope) */
    int err, line = 0;
    ompi_request_t* req[2];
-    ompi_status_public_t tmpstatus[2];
+    ompi_status_public_t statuses[2];
    /* post new irecv */
    err = MCA_PML_CALL(irecv( recvbuf, rcount, rdatatype, source, rtag, 
@ -97,17 +121,39 @@ int ompi_coll_tuned_sendrecv_actual_localcompleted( void* sendbuf, int scount,
                              MCA_PML_BASE_SEND_SYNCHRONOUS, comm, &(req[1])));
    if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; }
-    err = ompi_request_wait_all( 2, req, tmpstatus );
+    err = ompi_request_wait_all( 2, req, statuses );
    if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; }
-    if (MPI_STATUS_IGNORE!=status) {
+    if (MPI_STATUS_IGNORE != status) {
-        *status = tmpstatus[0];
+        *status = statuses[0];
    }
    return (MPI_SUCCESS);
 error_handler:
-    OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred\n",__FILE__,line,err));
+    /* As we use wait_all we will get MPI_ERR_IN_STATUS which is not an error
     * code that we can propagate up the stack. Instead, look for the real
     * error code from the MPI_ERROR in the status.
     */
    if( MPI_ERR_IN_STATUS == err ) { 
        int err_index = 0;
        if( MPI_SUCCESS != statuses[1].MPI_ERROR ) {
            err_index = 1;
        }
        if (MPI_STATUS_IGNORE != status) {
            *status = statuses[err_index];
        }
        err = statuses[err_index].MPI_ERROR;
        OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred (req index %d)\n",
                      __FILE__,line,err, err_index));
    } else {
        OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred\n",
                      __FILE__,line,err));
        if (MPI_STATUS_IGNORE != status) {
            status->MPI_ERROR = err;
        }
    }
    return (err);
 }