1
1

ompi/request: fix performance regression

This commit fixes a performance regression introduced by the request
rework. We were always using the multi-thread path because
OPAL_ENABLE_MULTI_THREADS is either not defined or always defined to 1
depending on the Open MPI version. To fix this I removed the
conditional and added a conditional on opal_using_threads(). This path
will be optimized out in 2.0.0 in a non-thread-multiple build as
opal_using_threads is #defined to false in that case.

Fixes open-mpi/ompi#1806

Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
Nathan Hjelm 2016-06-21 11:45:32 -06:00
родитель 46406914c5
Коммит 544adb9aed

Просмотреть файл

@ -375,27 +375,25 @@ static inline int ompi_request_free(ompi_request_t** request)
* Wait a particular request for completion
*/
#if OPAL_ENABLE_MULTI_THREADS
static inline void ompi_request_wait_completion(ompi_request_t *req)
{
ompi_wait_sync_t sync;
WAIT_SYNC_INIT(&sync, 1);
if (opal_using_threads ()) {
ompi_wait_sync_t sync;
WAIT_SYNC_INIT(&sync, 1);
if(OPAL_ATOMIC_CMPSET_PTR(&req->req_complete, REQUEST_PENDING, &sync)) {
SYNC_WAIT(&sync);
}
if(OPAL_ATOMIC_CMPSET_PTR(&req->req_complete, REQUEST_PENDING, &sync)) {
SYNC_WAIT(&sync);
}
assert(REQUEST_COMPLETE(req));
WAIT_SYNC_RELEASE(&sync);
}
#else
static inline void ompi_request_wait_completion(ompi_request_t *req)
{
while(!REQUEST_COMPLETE(req)) {
opal_progress();
assert(REQUEST_COMPLETE(req));
WAIT_SYNC_RELEASE(&sync);
} else {
while(!REQUEST_COMPLETE(req)) {
opal_progress();
}
}
}
#endif
/**
* Signal or mark a request as complete. If with_signal is true this will
* wake any thread pending on the request. If with_signal is false, the