MPI_Waitsome performance improvement
by avoiding extra atomic exchanges. Use indices array to mark already completed connections in the pre-wait loop to avoid extra atomic exchanges in the after-wait loop.
Этот коммит содержится в:
родитель
955269b4f1
Коммит
732d89095b
@ -391,8 +391,8 @@ int ompi_request_default_wait_some(size_t count,
|
||||
num_requests_null_inactive++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if( !OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, &sync) ) {
|
||||
indices[i] = OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, &sync);
|
||||
if( !indices[i] ) {
|
||||
/* If the request is completed go ahead and mark it as such */
|
||||
assert( REQUEST_COMPLETE(request) );
|
||||
num_requests_done++;
|
||||
@ -423,15 +423,23 @@ int ompi_request_default_wait_some(size_t count,
|
||||
if( request->req_state == OMPI_REQUEST_INACTIVE ) {
|
||||
continue;
|
||||
}
|
||||
/* Atomically mark the request as pending. If this succeed
|
||||
* then the request was not completed, and it is now marked as
|
||||
* pending. Otherwise, the request is complete )either it was
|
||||
* before or it has been meanwhile). The major drawback here
|
||||
* is that we will do all the atomics operations in all cases.
|
||||
/* Here we have 3 possibilities:
|
||||
* a) request was found completed in the first loop
|
||||
* => ( indices[i] == 0 )
|
||||
* b) request was completed between first loop and this check
|
||||
* => ( indices[i] == 1 ) and we can NOT atomically mark the
|
||||
* request as pending.
|
||||
* c) request wasn't finished yet
|
||||
* => ( indices[i] == 1 ) and we CAN atomically mark the
|
||||
* request as pending.
|
||||
* NOTE that in any case (i >= num_requests_done) as latter grows
|
||||
* either slowly (in case of partial completion)
|
||||
* OR in parallel with `i` (in case of full set completion)
|
||||
*/
|
||||
if( !OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING) ) {
|
||||
indices[num_requests_done] = i;
|
||||
num_requests_done++;
|
||||
if( !indices[i] ){
|
||||
indices[num_requests_done++] = i;
|
||||
} else if( !OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING) ) {
|
||||
indices[num_requests_done++] = i;
|
||||
}
|
||||
}
|
||||
sync_unsets = count - num_requests_null_inactive - num_requests_done;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user