Ensure we assign "err" properly when invoking MCA_PML_CALLs. Although
technically this is a necessary thing to do, it wasn't a tragedy that we didn't have it because err was initialize to 0 in the beginning of the functions where this problem occurred. Also, OMPI will likely abort if one of the MCA_PML_CALLs actually incurs an error (or, even if it doesn't, MPI doesn't define the behavior anyway ;-) ). But looking forward to an FT-aware world, fixing this issue is a Good Thing. Many thanks to Hristo Iliev for pointing out the issue. This commit was SVN r27070.
Этот коммит содержится в:
родитель
335c0eafcf
Коммит
a4e97fb4c0
@ -9,6 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -124,9 +125,9 @@ ompi_coll_tuned_bcast_intra_generic( void* buffer,
|
||||
5) Send the last segment to children
|
||||
*/
|
||||
req_index = 0;
|
||||
MCA_PML_CALL(irecv(tmpbuf, count_by_segment, datatype,
|
||||
tree->tree_prev, MCA_COLL_BASE_TAG_BCAST,
|
||||
comm, &recv_reqs[req_index]));
|
||||
err = MCA_PML_CALL(irecv(tmpbuf, count_by_segment, datatype,
|
||||
tree->tree_prev, MCA_COLL_BASE_TAG_BCAST,
|
||||
comm, &recv_reqs[req_index]));
|
||||
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
|
||||
|
||||
for( segindex = 1; segindex < num_segments; segindex++ ) {
|
||||
@ -134,10 +135,10 @@ ompi_coll_tuned_bcast_intra_generic( void* buffer,
|
||||
req_index = req_index ^ 0x1;
|
||||
|
||||
/* post new irecv */
|
||||
MCA_PML_CALL(irecv( tmpbuf + realsegsize, count_by_segment,
|
||||
datatype, tree->tree_prev,
|
||||
MCA_COLL_BASE_TAG_BCAST,
|
||||
comm, &recv_reqs[req_index]));
|
||||
err = MCA_PML_CALL(irecv( tmpbuf + realsegsize, count_by_segment,
|
||||
datatype, tree->tree_prev,
|
||||
MCA_COLL_BASE_TAG_BCAST,
|
||||
comm, &recv_reqs[req_index]));
|
||||
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
|
||||
|
||||
/* wait for and forward the previous segment to children */
|
||||
@ -493,9 +494,9 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
|
||||
* and we disseminating the data to all children.
|
||||
*/
|
||||
sendcount[lr] = segcount[lr];
|
||||
MCA_PML_CALL(irecv(tmpbuf[lr], sendcount[lr], datatype,
|
||||
tree->tree_prev, MCA_COLL_BASE_TAG_BCAST,
|
||||
comm, &base_req));
|
||||
err = MCA_PML_CALL(irecv(tmpbuf[lr], sendcount[lr], datatype,
|
||||
tree->tree_prev, MCA_COLL_BASE_TAG_BCAST,
|
||||
comm, &base_req));
|
||||
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
|
||||
|
||||
for( segindex = 1; segindex < num_segments[lr]; segindex++ ) {
|
||||
@ -503,17 +504,17 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
|
||||
if( segindex == (num_segments[lr] - 1))
|
||||
sendcount[lr] = counts[lr] - (ptrdiff_t)segindex * (ptrdiff_t)segcount[lr];
|
||||
/* post new irecv */
|
||||
MCA_PML_CALL(irecv( tmpbuf[lr] + realsegsize[lr], sendcount[lr],
|
||||
datatype, tree->tree_prev, MCA_COLL_BASE_TAG_BCAST,
|
||||
comm, &new_req));
|
||||
err = MCA_PML_CALL(irecv( tmpbuf[lr] + realsegsize[lr], sendcount[lr],
|
||||
datatype, tree->tree_prev, MCA_COLL_BASE_TAG_BCAST,
|
||||
comm, &new_req));
|
||||
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
|
||||
|
||||
/* wait for and forward current segment */
|
||||
err = ompi_request_wait_all( 1, &base_req, MPI_STATUSES_IGNORE );
|
||||
for( i = 0; i < tree->tree_nextsize; i++ ) { /* send data to children (segcount[lr]) */
|
||||
MCA_PML_CALL(send( tmpbuf[lr], segcount[lr], datatype,
|
||||
tree->tree_next[i], MCA_COLL_BASE_TAG_BCAST,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
err = MCA_PML_CALL(send( tmpbuf[lr], segcount[lr], datatype,
|
||||
tree->tree_next[i], MCA_COLL_BASE_TAG_BCAST,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
|
||||
} /* end of for each child */
|
||||
|
||||
@ -526,9 +527,9 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
|
||||
/* wait for the last segment and forward current segment */
|
||||
err = ompi_request_wait_all( 1, &base_req, MPI_STATUSES_IGNORE );
|
||||
for( i = 0; i < tree->tree_nextsize; i++ ) { /* send data to children */
|
||||
MCA_PML_CALL(send(tmpbuf[lr], sendcount[lr], datatype,
|
||||
tree->tree_next[i], MCA_COLL_BASE_TAG_BCAST,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
err = MCA_PML_CALL(send(tmpbuf[lr], sendcount[lr], datatype,
|
||||
tree->tree_next[i], MCA_COLL_BASE_TAG_BCAST,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
|
||||
} /* end of for each child */
|
||||
}
|
||||
@ -542,9 +543,9 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
|
||||
if (segindex == (num_segments[lr] - 1))
|
||||
sendcount[lr] = counts[lr] - (ptrdiff_t)segindex * (ptrdiff_t)segcount[lr];
|
||||
/* receive segments */
|
||||
MCA_PML_CALL(recv(tmpbuf[lr], sendcount[lr], datatype,
|
||||
tree->tree_prev, MCA_COLL_BASE_TAG_BCAST,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
err = MCA_PML_CALL(recv(tmpbuf[lr], sendcount[lr], datatype,
|
||||
tree->tree_prev, MCA_COLL_BASE_TAG_BCAST,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
|
||||
/* update the initial pointer to the buffer */
|
||||
tmpbuf[lr] += realsegsize[lr];
|
||||
@ -581,17 +582,17 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
|
||||
} else if ( (size%2) == 0 ) {
|
||||
/* root sends right buffer to the last node */
|
||||
if( rank == root ) {
|
||||
MCA_PML_CALL(send(tmpbuf[1], counts[1], datatype,
|
||||
(root+size-1)%size, MCA_COLL_BASE_TAG_BCAST,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
err = MCA_PML_CALL(send(tmpbuf[1], counts[1], datatype,
|
||||
(root+size-1)%size, MCA_COLL_BASE_TAG_BCAST,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
|
||||
|
||||
}
|
||||
/* last node receives right buffer from the root */
|
||||
else if (rank == (root+size-1)%size) {
|
||||
MCA_PML_CALL(recv(tmpbuf[1], counts[1], datatype,
|
||||
root, MCA_COLL_BASE_TAG_BCAST,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
err = MCA_PML_CALL(recv(tmpbuf[1], counts[1], datatype,
|
||||
root, MCA_COLL_BASE_TAG_BCAST,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
|
||||
}
|
||||
/* everyone else exchanges buffers */
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user