1
1

coll/libnbc: do not recursively call opal_progress()

instead of invoking ompi_request_test_all(), that will end up
calling opal_progress() recursively, manually check the status
of the requests.

the same method is used in ompi_comm_request_progress()

Refs open-mpi/ompi#3901

Signed-off-by: Gilles Gouaillardet <gilles@rist.or.jp>
Этот коммит содержится в:
Gilles Gouaillardet 2018-01-14 13:13:53 +09:00 коммит произвёл Nathan Hjelm
родитель 9a8797a6be
Коммит 1a41482720

Просмотреть файл

@ -10,7 +10,7 @@
* rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2015-2017 Research Organization for Information Science
* Copyright (c) 2015-2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
*
* Author(s): Torsten Hoefler <htor@cs.indiana.edu>
@ -315,7 +315,8 @@ static inline void NBC_Free (NBC_Handle* handle) {
*
* to be called *only* from the progress thread !!! */
int NBC_Progress(NBC_Handle *handle) {
int flag, res, ret=NBC_CONTINUE;
int res, ret=NBC_CONTINUE;
bool flag;
unsigned long size = 0;
char *delim;
int i;
@ -325,43 +326,27 @@ int NBC_Progress(NBC_Handle *handle) {
return NBC_OK;
}
flag = true;
if ((handle->req_count > 0) && (handle->req_array != NULL)) {
NBC_DEBUG(50, "NBC_Progress: testing for %i requests\n", handle->req_count);
#ifdef NBC_TIMING
Test_time -= MPI_Wtime();
#endif
res = ompi_request_test_all(handle->req_count, handle->req_array, &flag, MPI_STATUSES_IGNORE);
if(res != OMPI_SUCCESS) {
// Attempt to cancel outstanding requests
for(i = 0; i < handle->req_count; ++i ) {
// If the request is complete, then try to report the error code
if( handle->req_array[i]->req_complete ) {
if( OMPI_SUCCESS != handle->req_array[i]->req_status.MPI_ERROR ) {
NBC_Error ("MPI Error in MPI_Testall() (req %d = %d)", i, handle->req_array[i]->req_status.MPI_ERROR);
}
/* don't call ompi_request_test_all as it causes a recursive call into opal_progress */
while (handle->req_count) {
ompi_request_t *subreq = handle->req_array[handle->req_count - 1];
if (REQUEST_COMPLETE(subreq)) {
ompi_request_free(&subreq);
handle->req_count--;
} else {
flag = false;
break;
}
else {
ompi_request_cancel(handle->req_array[i]);
// If the PML actually canceled the request, then wait on it
if( handle->req_array[i]->req_status._cancelled) {
ompi_request_wait(&handle->req_array[i], &status);
}
// Warn the user that we had to leave a PML message outstanding so
// bad things could happen if they continue using nonblocking collectives
else {
NBC_Error ("MPI Error: Not able to cancel the internal request %d. "
"Be aware that continuing to use nonblocking collectives on this communicator may result in undefined behavior.", i);
}
}
}
return OMPI_ERROR;
}
#ifdef NBC_TIMING
Test_time += MPI_Wtime();
#endif
} else {
flag = 1; /* we had no open requests -> proceed to next round */
}
/* a round is finished */