1
1

Correctly deal with the error case. The problem is tricky: the MPI standard doesn't allow

MPI_ERR_IN_STATUS to be returned from any functions that return only one completed request
(few exception here: wait_some and wait_all and the test versions). As we use an wait_all
in these send_receive functions we should convert the MPI_ERR_IN_STATUS to the real
error, i.e. the one comming from the MPI_ERROR field in the status corresponding to the
failed request.

This commit was SVN r20907.
Этот коммит содержится в:
George Bosilca 2009-03-31 23:44:59 +00:00
родитель 12ce14ec8c
Коммит c5b1bdd57c

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
* Corporation. All rights reserved. * Corporation. All rights reserved.
* Copyright (c) 2004-2007 The University of Tennessee and The University * Copyright (c) 2004-2009 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights * of Tennessee Research Foundation. All rights
* reserved. * reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -55,15 +55,39 @@ int ompi_coll_tuned_sendrecv_actual( void* sendbuf, int scount,
err = ompi_request_wait_all( 2, reqs, statuses ); err = ompi_request_wait_all( 2, reqs, statuses );
if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; } if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; }
if (MPI_STATUS_IGNORE!=status) { if (MPI_STATUS_IGNORE != status) {
*status = statuses[0]; *status = statuses[0];
} }
return (MPI_SUCCESS); return (MPI_SUCCESS);
error_handler: error_handler:
OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred\n", /* As we use wait_all we will get MPI_ERR_IN_STATUS which is not an error
__FILE__,line,err)); * code that we can propagate up the stack. Instead, look for the real
* error code from the MPI_ERROR in the status.
*/
if( MPI_ERR_IN_STATUS == err ) {
/* At least we know he error was detected during the wait_all */
int err_index = 0;
if( MPI_SUCCESS != statuses[1].MPI_ERROR ) {
err_index = 1;
}
if (MPI_STATUS_IGNORE != status) {
*status = statuses[err_index];
}
err = statuses[err_index].MPI_ERROR;
OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred (req index %d)\n",
__FILE__, line, err, err_index));
} else {
/* Error discovered during the posting of the irecv or isend,
* and no status is available.
*/
OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred\n",
__FILE__, line, err));
if (MPI_STATUS_IGNORE != status) {
status->MPI_ERROR = err;
}
}
return (err); return (err);
} }
@ -85,7 +109,7 @@ int ompi_coll_tuned_sendrecv_actual_localcompleted( void* sendbuf, int scount,
{ /* post receive first, then [local] sync send, then wait... should be fast (I hope) */ { /* post receive first, then [local] sync send, then wait... should be fast (I hope) */
int err, line = 0; int err, line = 0;
ompi_request_t* req[2]; ompi_request_t* req[2];
ompi_status_public_t tmpstatus[2]; ompi_status_public_t statuses[2];
/* post new irecv */ /* post new irecv */
err = MCA_PML_CALL(irecv( recvbuf, rcount, rdatatype, source, rtag, err = MCA_PML_CALL(irecv( recvbuf, rcount, rdatatype, source, rtag,
@ -97,17 +121,39 @@ int ompi_coll_tuned_sendrecv_actual_localcompleted( void* sendbuf, int scount,
MCA_PML_BASE_SEND_SYNCHRONOUS, comm, &(req[1]))); MCA_PML_BASE_SEND_SYNCHRONOUS, comm, &(req[1])));
if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; } if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; }
err = ompi_request_wait_all( 2, req, tmpstatus ); err = ompi_request_wait_all( 2, req, statuses );
if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; } if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; }
if (MPI_STATUS_IGNORE!=status) { if (MPI_STATUS_IGNORE != status) {
*status = tmpstatus[0]; *status = statuses[0];
} }
return (MPI_SUCCESS); return (MPI_SUCCESS);
error_handler: error_handler:
OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred\n",__FILE__,line,err)); /* As we use wait_all we will get MPI_ERR_IN_STATUS which is not an error
* code that we can propagate up the stack. Instead, look for the real
* error code from the MPI_ERROR in the status.
*/
if( MPI_ERR_IN_STATUS == err ) {
int err_index = 0;
if( MPI_SUCCESS != statuses[1].MPI_ERROR ) {
err_index = 1;
}
if (MPI_STATUS_IGNORE != status) {
*status = statuses[err_index];
}
err = statuses[err_index].MPI_ERROR;
OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred (req index %d)\n",
__FILE__,line,err, err_index));
} else {
OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred\n",
__FILE__,line,err));
if (MPI_STATUS_IGNORE != status) {
status->MPI_ERROR = err;
}
}
return (err); return (err);
} }