Correctly deal with the error case. The problem is tricky: the MPI standard doesn't allow
MPI_ERR_IN_STATUS to be returned from any functions that return only one completed request (few exception here: wait_some and wait_all and the test versions). As we use an wait_all in these send_receive functions we should convert the MPI_ERR_IN_STATUS to the real error, i.e. the one comming from the MPI_ERROR field in the status corresponding to the failed request. This commit was SVN r20907.
Этот коммит содержится в:
родитель
12ce14ec8c
Коммит
c5b1bdd57c
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2007 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2009 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -55,15 +55,39 @@ int ompi_coll_tuned_sendrecv_actual( void* sendbuf, int scount,
|
||||
err = ompi_request_wait_all( 2, reqs, statuses );
|
||||
if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; }
|
||||
|
||||
if (MPI_STATUS_IGNORE!=status) {
|
||||
if (MPI_STATUS_IGNORE != status) {
|
||||
*status = statuses[0];
|
||||
}
|
||||
|
||||
return (MPI_SUCCESS);
|
||||
|
||||
error_handler:
|
||||
OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred\n",
|
||||
__FILE__,line,err));
|
||||
/* As we use wait_all we will get MPI_ERR_IN_STATUS which is not an error
|
||||
* code that we can propagate up the stack. Instead, look for the real
|
||||
* error code from the MPI_ERROR in the status.
|
||||
*/
|
||||
if( MPI_ERR_IN_STATUS == err ) {
|
||||
/* At least we know he error was detected during the wait_all */
|
||||
int err_index = 0;
|
||||
if( MPI_SUCCESS != statuses[1].MPI_ERROR ) {
|
||||
err_index = 1;
|
||||
}
|
||||
if (MPI_STATUS_IGNORE != status) {
|
||||
*status = statuses[err_index];
|
||||
}
|
||||
err = statuses[err_index].MPI_ERROR;
|
||||
OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred (req index %d)\n",
|
||||
__FILE__, line, err, err_index));
|
||||
} else {
|
||||
/* Error discovered during the posting of the irecv or isend,
|
||||
* and no status is available.
|
||||
*/
|
||||
OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred\n",
|
||||
__FILE__, line, err));
|
||||
if (MPI_STATUS_IGNORE != status) {
|
||||
status->MPI_ERROR = err;
|
||||
}
|
||||
}
|
||||
return (err);
|
||||
}
|
||||
|
||||
@ -85,7 +109,7 @@ int ompi_coll_tuned_sendrecv_actual_localcompleted( void* sendbuf, int scount,
|
||||
{ /* post receive first, then [local] sync send, then wait... should be fast (I hope) */
|
||||
int err, line = 0;
|
||||
ompi_request_t* req[2];
|
||||
ompi_status_public_t tmpstatus[2];
|
||||
ompi_status_public_t statuses[2];
|
||||
|
||||
/* post new irecv */
|
||||
err = MCA_PML_CALL(irecv( recvbuf, rcount, rdatatype, source, rtag,
|
||||
@ -97,17 +121,39 @@ int ompi_coll_tuned_sendrecv_actual_localcompleted( void* sendbuf, int scount,
|
||||
MCA_PML_BASE_SEND_SYNCHRONOUS, comm, &(req[1])));
|
||||
if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; }
|
||||
|
||||
err = ompi_request_wait_all( 2, req, tmpstatus );
|
||||
err = ompi_request_wait_all( 2, req, statuses );
|
||||
if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; }
|
||||
|
||||
if (MPI_STATUS_IGNORE!=status) {
|
||||
*status = tmpstatus[0];
|
||||
if (MPI_STATUS_IGNORE != status) {
|
||||
*status = statuses[0];
|
||||
}
|
||||
|
||||
return (MPI_SUCCESS);
|
||||
|
||||
error_handler:
|
||||
OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred\n",__FILE__,line,err));
|
||||
/* As we use wait_all we will get MPI_ERR_IN_STATUS which is not an error
|
||||
* code that we can propagate up the stack. Instead, look for the real
|
||||
* error code from the MPI_ERROR in the status.
|
||||
*/
|
||||
if( MPI_ERR_IN_STATUS == err ) {
|
||||
int err_index = 0;
|
||||
if( MPI_SUCCESS != statuses[1].MPI_ERROR ) {
|
||||
err_index = 1;
|
||||
}
|
||||
if (MPI_STATUS_IGNORE != status) {
|
||||
*status = statuses[err_index];
|
||||
}
|
||||
err = statuses[err_index].MPI_ERROR;
|
||||
OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred (req index %d)\n",
|
||||
__FILE__,line,err, err_index));
|
||||
} else {
|
||||
OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred\n",
|
||||
__FILE__,line,err));
|
||||
if (MPI_STATUS_IGNORE != status) {
|
||||
status->MPI_ERROR = err;
|
||||
}
|
||||
}
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user