diff --git a/ompi/request/req_test.c b/ompi/request/req_test.c index 494308eda7..c46a316768 100644 --- a/ompi/request/req_test.c +++ b/ompi/request/req_test.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -27,11 +28,12 @@ #include "ompi/mca/crcp/crcp.h" -int ompi_request_default_test( ompi_request_t ** rptr, - int *completed, - ompi_status_public_t * status ) +int ompi_request_default_test(ompi_request_t ** rptr, + int *completed, + ompi_status_public_t * status ) { ompi_request_t *request = *rptr; + #if OPAL_ENABLE_PROGRESS_THREADS == 0 int do_it_once = 0; @@ -46,7 +48,7 @@ int ompi_request_default_test( ompi_request_t ** rptr, return OMPI_SUCCESS; } - if (request->req_complete) { + if( REQUEST_COMPLETE(request) ) { OMPI_CRCP_REQUEST_COMPLETE(request); *completed = true; @@ -117,7 +119,7 @@ int ompi_request_default_test_any( continue; } - if( request->req_complete ) { + if( REQUEST_COMPLETE(request) ) { OMPI_CRCP_REQUEST_COMPLETE(request); *index = i; @@ -193,8 +195,7 @@ int ompi_request_default_test_all( request = *rptr; if( request->req_state == OMPI_REQUEST_INACTIVE || - request->req_complete) { - OMPI_CRCP_REQUEST_COMPLETE(request); + REQUEST_COMPLETE(request) ) { num_completed++; } } @@ -225,6 +226,7 @@ int ompi_request_default_test_all( if (OMPI_REQUEST_GEN == request->req_type) { ompi_grequest_invoke_query(request, &request->req_status); } + OMPI_CRCP_REQUEST_COMPLETE(request); statuses[i] = request->req_status; if( request->req_persistent ) { request->req_state = OMPI_REQUEST_INACTIVE; @@ -255,6 +257,7 @@ int ompi_request_default_test_all( if (OMPI_REQUEST_GEN == request->req_type) { ompi_grequest_invoke_query(request, &request->req_status); } + OMPI_CRCP_REQUEST_COMPLETE(request); if( request->req_persistent ) { request->req_state = OMPI_REQUEST_INACTIVE; continue; diff --git a/ompi/request/req_wait.c b/ompi/request/req_wait.c index c9f7d12bf1..e098fa7106 100644 --- a/ompi/request/req_wait.c +++ b/ompi/request/req_wait.c @@ -28,10 +28,7 @@ #include "ompi/request/request_default.h" #include "ompi/request/grequest.h" -#include "opal/runtime/opal_cr.h" #include "ompi/mca/crcp/crcp.h" -#include "ompi/mca/pml/base/pml_base_request.h" - int ompi_request_default_wait( ompi_request_t ** req_ptr, @@ -41,9 +38,7 @@ int ompi_request_default_wait( ompi_request_wait_completion(req); -#if OPAL_ENABLE_FT_CR == 1 OMPI_CRCP_REQUEST_COMPLETE(req); -#endif /* return status. If it's a generalized request, we *have* to invoke the query_fn, even if the user procided STATUS_IGNORE. @@ -89,7 +84,7 @@ int ompi_request_default_wait_any(size_t count, ompi_status_public_t * status) { size_t completed = count, num_requests_null_inactive = 0; - int rc = OMPI_SUCCESS; + int i, rc = OMPI_SUCCESS; ompi_request_t **rptr=NULL; ompi_request_t *request=NULL; ompi_wait_sync_t sync; @@ -98,13 +93,8 @@ int ompi_request_default_wait_any(size_t count, rptr = requests; num_requests_null_inactive = 0; - for (size_t i = 0; i < count; i++, rptr++) { - request = *rptr; - - /* Sanity test */ - if( NULL == request) { - continue; - } + for (i = 0; i < count; i++) { + request = requests[i]; /* * Check for null or completed persistent request. @@ -115,11 +105,11 @@ int ompi_request_default_wait_any(size_t count, continue; } - if(!OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, &sync)) { + if( !OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, &sync) ) { assert(REQUEST_COMPLETE(request)); - wait_sync_update( &sync, 1, request->req_status.MPI_ERROR); completed = i; - break; + *index = i; + goto after_sync_wait; } } @@ -133,11 +123,12 @@ int ompi_request_default_wait_any(size_t count, } SYNC_WAIT(&sync); - - /* recheck the complete status and clean up the sync primitives */ - rptr = requests; - for(size_t i = 0, pending_count = completed; i < pending_count ; i++, rptr++) { - request = *rptr; + + after_sync_wait: + /* recheck the complete status and clean up the sync primitives. Do it backward to + * return the earliest complete request to the user. */ + for(i = completed-1; i >= 0; i--) { + request = requests[i]; if( request->req_state == OMPI_REQUEST_INACTIVE ) { continue; @@ -147,15 +138,12 @@ int ompi_request_default_wait_any(size_t count, * the request has been completed meanwhile, and it has been atomically * marked as REQUEST_COMPLETE. */ - OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING); - if (REQUEST_COMPLETE(request)) { - completed = i; + if( !OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING) ) { + *index = i; } } - rptr = requests + completed; - request = *rptr; - + request = requests[*index]; assert( REQUEST_COMPLETE(request) ); /* Per note above, we have to call gen request query_fn even if STATUS_IGNORE was provided */ @@ -177,9 +165,8 @@ int ompi_request_default_wait_any(size_t count, /* If there's an error while freeing the request, assume that the request is still there. Otherwise, Bad Things will happen later! */ - rc = ompi_request_free(rptr); + rc = ompi_request_free(&requests[*index]); } - *index = completed; #if OPAL_ENABLE_FT_CR == 1 if( opal_cr_is_enabled) { @@ -201,10 +188,10 @@ int ompi_request_default_wait_all( size_t count, ompi_request_t ** requests, ompi_status_public_t * statuses ) { - size_t completed = 0, i, failed = 0; + size_t completed = 0, failed = 0; ompi_request_t **rptr; ompi_request_t *request; - int mpi_error = OMPI_SUCCESS; + int i, mpi_error = OMPI_SUCCESS; ompi_wait_sync_t sync; WAIT_SYNC_INIT(&sync, count); @@ -212,6 +199,11 @@ int ompi_request_default_wait_all( size_t count, for (i = 0; i < count; i++) { request = *rptr++; + if( request->req_state == OMPI_REQUEST_INACTIVE ) { + completed++; + continue; + } + if (!OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, &sync)) { if( OPAL_UNLIKELY( MPI_SUCCESS != request->req_status.MPI_ERROR ) ) { failed++; @@ -219,31 +211,21 @@ int ompi_request_default_wait_all( size_t count, completed++; } } - if( 0 != completed ) { - wait_sync_update(&sync, completed, OPAL_SUCCESS); - } - if( failed > 0 ) { goto finish; } - /* - * acquire lock and test for completion - if all requests are - * not completed pend on condition variable until a request - * completes - */ - SYNC_WAIT(&sync); -#if OPAL_ENABLE_FT_CR == 1 - if( opal_cr_is_enabled) { - rptr = requests; - for (i = 0; i < count; i++, rptr++) { - request = *rptr; - if( REQUEST_COMPLETE(request) ) { - OMPI_CRCP_REQUEST_COMPLETE(request); - } - } + if( 0 != completed ) { + wait_sync_update(&sync, completed, OPAL_SUCCESS); + } + + /* wait until all requests complete or until an error is triggered. */ + mpi_error = SYNC_WAIT(&sync); + if( OPAL_SUCCESS != mpi_error ) { + /* if we are in an error case, increase the failed to ensure + proper cleanup during the requests completion. */ + failed++; } -#endif finish: rptr = requests; @@ -252,50 +234,54 @@ int ompi_request_default_wait_all( size_t count, for( i = 0; i < count; i++, rptr++ ) { request = *rptr; - /* - * Assert only if no requests were failed. - * Since some may still be pending. - */ - if( 0 >= failed ) { - assert( REQUEST_COMPLETE(request) ); - } - if( request->req_state == OMPI_REQUEST_INACTIVE ) { statuses[i] = ompi_status_empty; continue; } - if (OMPI_REQUEST_GEN == request->req_type) { - ompi_grequest_invoke_query(request, &request->req_status); - } - statuses[i] = request->req_status; - /* - * Per MPI 2.2 p 60: - * Allows requests to be marked as MPI_ERR_PENDING if they are - * "neither failed nor completed." Which can only happen if - * there was an error in one of the other requests. - */ if( OPAL_UNLIKELY(0 < failed) ) { - if( !request->req_complete ) { + /* if we have failed requests we skipped the waiting on the sync. Thus, + * some of the requests might not be properly completed, in which case + * we must detach all requests from the sync. However, if we can succesfully + * mark the request as pending then it is neither failed nor complete, and + * we must stop altering it. + */ + if( OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING ) ) { + /* + * Per MPI 2.2 p 60: + * Allows requests to be marked as MPI_ERR_PENDING if they are + * "neither failed nor completed." Which can only happen if + * there was an error in one of the other requests. + */ statuses[i].MPI_ERROR = MPI_ERR_PENDING; mpi_error = MPI_ERR_IN_STATUS; continue; } } + assert( REQUEST_COMPLETE(request) ); + + if( opal_cr_is_enabled) { + OMPI_CRCP_REQUEST_COMPLETE(request); + } + + if (OMPI_REQUEST_GEN == request->req_type) { + ompi_grequest_invoke_query(request, &request->req_status); + } + + statuses[i] = request->req_status; if( request->req_persistent ) { request->req_state = OMPI_REQUEST_INACTIVE; continue; - } else { - /* Only free the request if there is no error on it */ - if (MPI_SUCCESS == request->req_status.MPI_ERROR) { - /* If there's an error while freeing the request, - assume that the request is still there. - Otherwise, Bad Things will happen later! */ - int tmp = ompi_request_free(rptr); - if (OMPI_SUCCESS == mpi_error && OMPI_SUCCESS != tmp) { - mpi_error = tmp; - } + } + /* Only free the request if there is no error on it */ + if (MPI_SUCCESS == request->req_status.MPI_ERROR) { + /* If there's an error while freeing the request, + assume that the request is still there. + Otherwise, Bad Things will happen later! */ + int tmp = ompi_request_free(rptr); + if (OMPI_SUCCESS == mpi_error && OMPI_SUCCESS != tmp) { + mpi_error = tmp; } } if( statuses[i].MPI_ERROR != OMPI_SUCCESS) { @@ -303,24 +289,38 @@ int ompi_request_default_wait_all( size_t count, } } } else { + int rc; /* free request if required */ for( i = 0; i < count; i++, rptr++ ) { - int rc; request = *rptr; + if( request->req_state == OMPI_REQUEST_INACTIVE ) { + rc = ompi_status_empty.MPI_ERROR; + goto absorb_error_and_continue; + } /* * Assert only if no requests were failed. * Since some may still be pending. */ - if( 0 >= failed ) { - assert( REQUEST_COMPLETE(request) ); - } else { + if( OPAL_UNLIKELY(0 < failed) ) { /* If the request is still pending due to a failed request * then skip it in this loop. */ - if( !REQUEST_COMPLETE(request) ) { - continue; - } + if( OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING ) ) { + /* + * Per MPI 2.2 p 60: + * Allows requests to be marked as MPI_ERR_PENDING if they are + * "neither failed nor completed." Which can only happen if + * there was an error in one of the other requests. + */ + rc = MPI_ERR_PENDING; + goto absorb_error_and_continue; + } + } + assert( REQUEST_COMPLETE(request) ); + + if( opal_cr_is_enabled) { + OMPI_CRCP_REQUEST_COMPLETE(request); } /* Per note above, we have to call gen request query_fn @@ -328,11 +328,9 @@ int ompi_request_default_wait_all( size_t count, if (OMPI_REQUEST_GEN == request->req_type) { rc = ompi_grequest_invoke_query(request, &request->req_status); } - if( request->req_state == OMPI_REQUEST_INACTIVE ) { - rc = ompi_status_empty.MPI_ERROR; - } else { - rc = request->req_status.MPI_ERROR; - } + + rc = request->req_status.MPI_ERROR; + if( request->req_persistent ) { request->req_state = OMPI_REQUEST_INACTIVE; } else if (MPI_SUCCESS == rc) { @@ -342,6 +340,7 @@ int ompi_request_default_wait_all( size_t count, mpi_error = tmp; } } + absorb_error_and_continue: /* * Per MPI 2.2 p34: * "It is possible for an MPI function to return MPI_ERR_IN_STATUS @@ -375,10 +374,6 @@ int ompi_request_default_wait_some(size_t count, *outcount = 0; - /* - * We only get here when outcount still is 0. - * give up and sleep until completion - */ rptr = requests; num_requests_null_inactive = 0; num_requests_done = 0; @@ -394,33 +389,22 @@ int ompi_request_default_wait_some(size_t count, } if( !OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, &sync) ) { + /* If the request is completed go ahead and mark it as such */ assert( REQUEST_COMPLETE(request) ); num_requests_done++; } } + if(num_requests_null_inactive == count) { *outcount = MPI_UNDEFINED; WAIT_SYNC_RELEASE(&sync); return rc; } - if( 0 != num_requests_done ) { - /* As we only expect one trigger update the sync with count 1 */ - wait_sync_update(&sync, 1, request->req_status.MPI_ERROR); + if( 0 == num_requests_done ) { + /* One completed request is enough to satisfy the some condition */ + SYNC_WAIT(&sync); } - SYNC_WAIT(&sync); - -#if OPAL_ENABLE_FT_CR == 1 - if( opal_cr_is_enabled) { - rptr = requests; - for (i = 0; i < count; i++, rptr++) { - request = *rptr; - if( REQUEST_COMPLETE(request) ) { - OMPI_CRCP_REQUEST_COMPLETE(request); - } - } - } -#endif /* Do the final counting and */ /* Clean up the synchronization primitives */ @@ -433,9 +417,13 @@ int ompi_request_default_wait_some(size_t count, if( request->req_state == OMPI_REQUEST_INACTIVE ) { continue; } - + /* Atomically mark the request as pending. If this succeed + * then the request was not completed, and it is now marked as + * pending. Otherwise, the request is complete )either it was + * before or it has been meanwhile). The major drawback here + * is that we will do all the atomics operations in all cases. + */ if( !OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING) ) { - assert(REQUEST_COMPLETE(request)); indices[num_requests_done] = i; num_requests_done++; } @@ -448,6 +436,13 @@ int ompi_request_default_wait_some(size_t count, for (size_t i = 0; i < num_requests_done; i++) { request = requests[indices[i]]; assert( REQUEST_COMPLETE(request) ); + +#if OPAL_ENABLE_FT_CR == 1 + if( opal_cr_is_enabled) { + OMPI_CRCP_REQUEST_COMPLETE(request); + } +#endif + /* Per note above, we have to call gen request query_fn even if STATUS_IGNORE was provided */ if (OMPI_REQUEST_GEN == request->req_type) {