MPI_Waitsome performance improvement
by avoiding extra atomic exchanges. Use indices array to mark already completed connections in the pre-wait loop to avoid extra atomic exchanges in the after-wait loop.
Этот коммит содержится в:
родитель
955269b4f1
Коммит
732d89095b
@ -391,8 +391,8 @@ int ompi_request_default_wait_some(size_t count,
|
|||||||
num_requests_null_inactive++;
|
num_requests_null_inactive++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
indices[i] = OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, &sync);
|
||||||
if( !OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, REQUEST_PENDING, &sync) ) {
|
if( !indices[i] ) {
|
||||||
/* If the request is completed go ahead and mark it as such */
|
/* If the request is completed go ahead and mark it as such */
|
||||||
assert( REQUEST_COMPLETE(request) );
|
assert( REQUEST_COMPLETE(request) );
|
||||||
num_requests_done++;
|
num_requests_done++;
|
||||||
@ -423,15 +423,23 @@ int ompi_request_default_wait_some(size_t count,
|
|||||||
if( request->req_state == OMPI_REQUEST_INACTIVE ) {
|
if( request->req_state == OMPI_REQUEST_INACTIVE ) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
/* Atomically mark the request as pending. If this succeed
|
/* Here we have 3 possibilities:
|
||||||
* then the request was not completed, and it is now marked as
|
* a) request was found completed in the first loop
|
||||||
* pending. Otherwise, the request is complete )either it was
|
* => ( indices[i] == 0 )
|
||||||
* before or it has been meanwhile). The major drawback here
|
* b) request was completed between first loop and this check
|
||||||
* is that we will do all the atomics operations in all cases.
|
* => ( indices[i] == 1 ) and we can NOT atomically mark the
|
||||||
|
* request as pending.
|
||||||
|
* c) request wasn't finished yet
|
||||||
|
* => ( indices[i] == 1 ) and we CAN atomically mark the
|
||||||
|
* request as pending.
|
||||||
|
* NOTE that in any case (i >= num_requests_done) as latter grows
|
||||||
|
* either slowly (in case of partial completion)
|
||||||
|
* OR in parallel with `i` (in case of full set completion)
|
||||||
*/
|
*/
|
||||||
if( !OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING) ) {
|
if( !indices[i] ){
|
||||||
indices[num_requests_done] = i;
|
indices[num_requests_done++] = i;
|
||||||
num_requests_done++;
|
} else if( !OPAL_ATOMIC_CMPSET_PTR(&request->req_complete, &sync, REQUEST_PENDING) ) {
|
||||||
|
indices[num_requests_done++] = i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
sync_unsets = count - num_requests_null_inactive - num_requests_done;
|
sync_unsets = count - num_requests_null_inactive - num_requests_done;
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user