ompi/request: fix performance regression
This commit fixes a performance regression introduced by the request rework. We were always using the multi-thread path because OPAL_ENABLE_MULTI_THREADS is either not defined or always defined to 1 depending on the Open MPI version. To fix this I removed the conditional and added a conditional on opal_using_threads(). This path will be optimized out in 2.0.0 in a non-thread-multiple build as opal_using_threads is #defined to false in that case. Fixes open-mpi/ompi#1806 Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
родитель
46406914c5
Коммит
544adb9aed
@ -375,27 +375,25 @@ static inline int ompi_request_free(ompi_request_t** request)
|
||||
* Wait a particular request for completion
|
||||
*/
|
||||
|
||||
#if OPAL_ENABLE_MULTI_THREADS
|
||||
static inline void ompi_request_wait_completion(ompi_request_t *req)
|
||||
{
|
||||
ompi_wait_sync_t sync;
|
||||
WAIT_SYNC_INIT(&sync, 1);
|
||||
if (opal_using_threads ()) {
|
||||
ompi_wait_sync_t sync;
|
||||
WAIT_SYNC_INIT(&sync, 1);
|
||||
|
||||
if(OPAL_ATOMIC_CMPSET_PTR(&req->req_complete, REQUEST_PENDING, &sync)) {
|
||||
SYNC_WAIT(&sync);
|
||||
}
|
||||
if(OPAL_ATOMIC_CMPSET_PTR(&req->req_complete, REQUEST_PENDING, &sync)) {
|
||||
SYNC_WAIT(&sync);
|
||||
}
|
||||
|
||||
assert(REQUEST_COMPLETE(req));
|
||||
WAIT_SYNC_RELEASE(&sync);
|
||||
}
|
||||
#else
|
||||
static inline void ompi_request_wait_completion(ompi_request_t *req)
|
||||
{
|
||||
while(!REQUEST_COMPLETE(req)) {
|
||||
opal_progress();
|
||||
assert(REQUEST_COMPLETE(req));
|
||||
WAIT_SYNC_RELEASE(&sync);
|
||||
} else {
|
||||
while(!REQUEST_COMPLETE(req)) {
|
||||
opal_progress();
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Signal or mark a request as complete. If with_signal is true this will
|
||||
* wake any thread pending on the request. If with_signal is false, the
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user