Fix performance regression caused by enabling opal thread support
This commit adds opal_using_threads() protection around the atomic operation in OBJ_RETAIN/OBJ_RELEASE. This resolves the performance issues seen when running psm with MPI_THREAD_SINGLE. To avoid issues with header dependencies opal_using_threads() has been moved to a new header (thread_usage.h). The OPAL_THREAD_ADD* and OPAL_THREAD_CMPSET* macros have also been relocated to this header. This commit is cherry-picked off a fix that was submitted for the v1.8 release series but never applied to master. This fixes part of the problem reported by @nysal in #1902. (cherry picked from commit open-mpi/ompi-release@ce91307918) Signed-off-by: Nathan Hjelm <hjelmn@me.com>
Этот коммит содержится в:
родитель
b8a1ffb87e
Коммит
a8c3699484
@ -11,6 +11,8 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007-2014 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -121,7 +123,7 @@
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "opal/sys/atomic.h"
|
||||
#include "opal/threads/thread_usage.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
@ -508,7 +510,7 @@ static inline opal_object_t *opal_obj_new(opal_class_t * cls)
|
||||
static inline int opal_obj_update(opal_object_t *object, int inc) __opal_attribute_always_inline__;
|
||||
static inline int opal_obj_update(opal_object_t *object, int inc)
|
||||
{
|
||||
return opal_atomic_add_32(&(object->obj_reference_count), inc);
|
||||
return OPAL_THREAD_ADD32(&object->obj_reference_count, inc);
|
||||
}
|
||||
|
||||
END_C_DECLS
|
||||
|
@ -29,7 +29,8 @@ headers += \
|
||||
threads/mutex_unix.h \
|
||||
threads/threads.h \
|
||||
threads/tsd.h \
|
||||
threads/wait_sync.h
|
||||
threads/wait_sync.h \
|
||||
threads/thread_usage.h
|
||||
|
||||
lib@OPAL_LIB_PREFIX@open_pal_la_SOURCES += \
|
||||
threads/condition.c \
|
||||
|
@ -28,8 +28,7 @@
|
||||
|
||||
#include "opal_config.h"
|
||||
|
||||
#include "opal/sys/atomic.h"
|
||||
#include "opal/prefetch.h"
|
||||
#include "opal/threads/thread_usage.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
@ -41,11 +40,6 @@ BEGIN_C_DECLS
|
||||
* Functions for locking of critical sections.
|
||||
*/
|
||||
|
||||
/*
|
||||
* declaring this here so that CL does not complain
|
||||
*/
|
||||
OPAL_DECLSPEC extern bool opal_uses_threads;
|
||||
|
||||
/**
|
||||
* Opaque mutex object
|
||||
*/
|
||||
@ -107,67 +101,6 @@ END_C_DECLS
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/**
|
||||
* Check and see if the process is using multiple threads.
|
||||
*
|
||||
* @retval true If the process may have more than one thread.
|
||||
* @retval false If the process only has a single thread.
|
||||
*
|
||||
* The value that this function returns is influenced by:
|
||||
*
|
||||
* - how MPI_INIT or MPI_INIT_THREAD was invoked,
|
||||
* - what the final MPI thread level was determined to be,
|
||||
* - whether the OMPI or MPI libraries are multi-threaded (Jan 2003:
|
||||
* they're not),
|
||||
* - whether configure determined if we have thread support or not
|
||||
*
|
||||
* MPI_INIT and MPI_INIT_THREAD (specifically, back-end OMPI startup
|
||||
* functions) invoke opal_set_using_threads() to influence the value of
|
||||
* this function, depending on their situation. Some examples:
|
||||
*
|
||||
* - if configure determined that we do not have threads, then this
|
||||
* value will always be false.
|
||||
*
|
||||
* - if MPI_INIT is invoked, and the ompi libraries are [still]
|
||||
* single-threaded, this value will be false.
|
||||
*
|
||||
* - if MPI_INIT_THREAD is invoked with MPI_THREAD_MULTIPLE, we have
|
||||
* thread support, and the final thread level is determined to be
|
||||
* MPI_THREAD_MULTIPLE, this value will be true.
|
||||
*
|
||||
* - if the process is a single-threaded OMPI executable (e.g., mpicc),
|
||||
* this value will be false.
|
||||
*
|
||||
* Hence, this function will return false if there is guaranteed to
|
||||
* only be one thread in the process. If there is even the
|
||||
* possibility that we may have multiple threads, true will be
|
||||
* returned.
|
||||
*/
|
||||
#define opal_using_threads() opal_uses_threads
|
||||
|
||||
/**
|
||||
* Set whether the process is using multiple threads or not.
|
||||
*
|
||||
* @param have Boolean indicating whether the process is using
|
||||
* multiple threads or not.
|
||||
*
|
||||
* @retval opal_using_threads The new return value from
|
||||
* opal_using_threads().
|
||||
*
|
||||
* This function is used to influence the return value of
|
||||
* opal_using_threads(). If configure detected that we have thread
|
||||
* support, the return value of future invocations of
|
||||
* opal_using_threads() will be the parameter's value. If configure
|
||||
* detected that we have no thread support, then the retuen from
|
||||
* opal_using_threads() will always be false.
|
||||
*/
|
||||
static inline bool opal_set_using_threads(bool have)
|
||||
{
|
||||
opal_uses_threads = have;
|
||||
return opal_using_threads();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Lock a mutex if opal_using_threads() says that multiple threads may
|
||||
* be active in the process.
|
||||
@ -254,90 +187,6 @@ static inline bool opal_set_using_threads(bool have)
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
* Use an atomic operation for increment/decrement if opal_using_threads()
|
||||
* indicates that threads are in use by the application or library.
|
||||
*/
|
||||
|
||||
static inline int32_t
|
||||
OPAL_THREAD_ADD32(volatile int32_t *addr, int delta)
|
||||
{
|
||||
int32_t ret;
|
||||
|
||||
if (OPAL_UNLIKELY(opal_using_threads())) {
|
||||
ret = opal_atomic_add_32(addr, delta);
|
||||
} else {
|
||||
ret = (*addr += delta);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if OPAL_HAVE_ATOMIC_MATH_64
|
||||
static inline int64_t
|
||||
OPAL_THREAD_ADD64(volatile int64_t *addr, int delta)
|
||||
{
|
||||
int64_t ret;
|
||||
|
||||
if (OPAL_UNLIKELY(opal_using_threads())) {
|
||||
ret = opal_atomic_add_64(addr, delta);
|
||||
} else {
|
||||
ret = (*addr += delta);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline size_t
|
||||
OPAL_THREAD_ADD_SIZE_T(volatile size_t *addr, int delta)
|
||||
{
|
||||
size_t ret;
|
||||
|
||||
if (OPAL_UNLIKELY(opal_using_threads())) {
|
||||
ret = opal_atomic_add_size_t(addr, delta);
|
||||
} else {
|
||||
ret = (*addr += delta);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* BWB: FIX ME: remove if possible */
|
||||
#define OPAL_CMPSET(x, y, z) ((*(x) == (y)) ? ((*(x) = (z)), 1) : 0)
|
||||
|
||||
#if OPAL_HAVE_ATOMIC_CMPSET_32
|
||||
#define OPAL_ATOMIC_CMPSET_32(x, y, z) \
|
||||
(OPAL_UNLIKELY(opal_using_threads()) ? opal_atomic_cmpset_32(x, y, z) : OPAL_CMPSET(x, y, z))
|
||||
#endif
|
||||
#if OPAL_HAVE_ATOMIC_CMPSET_64
|
||||
#define OPAL_ATOMIC_CMPSET_64(x, y, z) \
|
||||
(OPAL_UNLIKELY(opal_using_threads()) ? opal_atomic_cmpset_64(x, y, z) : OPAL_CMPSET(x, y, z))
|
||||
#endif
|
||||
#if OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64
|
||||
#define OPAL_ATOMIC_CMPSET(x, y, z) \
|
||||
(OPAL_UNLIKELY(opal_using_threads()) ? opal_atomic_cmpset(x, y, z) : OPAL_CMPSET(x, y, z))
|
||||
#endif
|
||||
#if OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64
|
||||
#define OPAL_ATOMIC_CMPSET_PTR(x, y, z) \
|
||||
(opal_using_threads() ? opal_atomic_cmpset_ptr(x, y, z) : OPAL_CMPSET(x, y, z))
|
||||
#endif
|
||||
|
||||
|
||||
static inline void *opal_thread_swap_ptr (volatile void *ptr, void *newvalue)
|
||||
{
|
||||
if (opal_using_threads ()) {
|
||||
return opal_atomic_swap_ptr (ptr, newvalue);
|
||||
}
|
||||
|
||||
void *old = ((void **) ptr)[0];
|
||||
((void **) ptr)[0] = newvalue;
|
||||
|
||||
return old;
|
||||
}
|
||||
|
||||
#define OPAL_ATOMIC_SWAP_PTR(x, y) opal_thread_swap_ptr (x, y)
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* OPAL_MUTEX_H */
|
||||
|
174
opal/threads/thread_usage.h
Обычный файл
174
opal/threads/thread_usage.h
Обычный файл
@ -0,0 +1,174 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2007 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007-2014 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#if !defined(OPAL_THREAD_USAGE_H)
|
||||
#define OPAL_THREAD_USAGE_H
|
||||
|
||||
#include "opal_config.h"
|
||||
|
||||
#include "opal/sys/atomic.h"
|
||||
#include "opal/prefetch.h"
|
||||
|
||||
OPAL_DECLSPEC extern bool opal_uses_threads;
|
||||
|
||||
/**
|
||||
* Check and see if the process is using multiple threads.
|
||||
*
|
||||
* @retval true If the process may have more than one thread.
|
||||
* @retval false If the process only has a single thread.
|
||||
*
|
||||
* The value that this function returns is influenced by:
|
||||
*
|
||||
* - how MPI_INIT or MPI_INIT_THREAD was invoked,
|
||||
* - what the final MPI thread level was determined to be,
|
||||
* - whether the OMPI or MPI libraries are multi-threaded
|
||||
*
|
||||
* MPI_INIT and MPI_INIT_THREAD (specifically, back-end OMPI startup
|
||||
* functions) invoke opal_set_using_threads() to influence the value of
|
||||
* this function, depending on their situation. Some examples:
|
||||
*
|
||||
* - if MPI_INIT is invoked, and the ompi components in use are
|
||||
* single-threaded, this value will be false.
|
||||
*
|
||||
* - if MPI_INIT_THREAD is invoked with MPI_THREAD_MULTIPLE, we have
|
||||
* thread support, and the final thread level is determined to be
|
||||
* MPI_THREAD_MULTIPLE, this value will be true.
|
||||
*
|
||||
* - if the process is a single-threaded OMPI executable (e.g., mpicc),
|
||||
* this value will be false.
|
||||
*
|
||||
* Hence, this function will return false if there is guaranteed to
|
||||
* only be one thread in the process. If there is even the
|
||||
* possibility that we may have multiple threads, true will be
|
||||
* returned.
|
||||
*/
|
||||
#define opal_using_threads() opal_uses_threads
|
||||
|
||||
/**
|
||||
* Set whether the process is using multiple threads or not.
|
||||
*
|
||||
* @param have Boolean indicating whether the process is using
|
||||
* multiple threads or not.
|
||||
*
|
||||
* @retval opal_using_threads The new return value from
|
||||
* opal_using_threads().
|
||||
*
|
||||
* This function is used to influence the return value of
|
||||
* opal_using_threads(). If configure detected that we have thread
|
||||
* support, the return value of future invocations of
|
||||
* opal_using_threads() will be the parameter's value. If configure
|
||||
* detected that we have no thread support, then the retuen from
|
||||
* opal_using_threads() will always be false.
|
||||
*/
|
||||
static inline bool opal_set_using_threads(bool have)
|
||||
{
|
||||
opal_uses_threads = have;
|
||||
return opal_using_threads();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Use an atomic operation for increment/decrement if opal_using_threads()
|
||||
* indicates that threads are in use by the application or library.
|
||||
*/
|
||||
|
||||
static inline int32_t
|
||||
OPAL_THREAD_ADD32(volatile int32_t *addr, int delta)
|
||||
{
|
||||
int32_t ret;
|
||||
|
||||
if (OPAL_UNLIKELY(opal_using_threads())) {
|
||||
ret = opal_atomic_add_32(addr, delta);
|
||||
} else {
|
||||
ret = (*addr += delta);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if OPAL_HAVE_ATOMIC_MATH_64
|
||||
static inline int64_t
|
||||
OPAL_THREAD_ADD64(volatile int64_t *addr, int delta)
|
||||
{
|
||||
int64_t ret;
|
||||
|
||||
if (OPAL_UNLIKELY(opal_using_threads())) {
|
||||
ret = opal_atomic_add_64(addr, delta);
|
||||
} else {
|
||||
ret = (*addr += delta);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline size_t
|
||||
OPAL_THREAD_ADD_SIZE_T(volatile size_t *addr, int delta)
|
||||
{
|
||||
size_t ret;
|
||||
|
||||
if (OPAL_UNLIKELY(opal_using_threads())) {
|
||||
ret = opal_atomic_add_size_t(addr, delta);
|
||||
} else {
|
||||
ret = (*addr += delta);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* BWB: FIX ME: remove if possible */
|
||||
#define OPAL_CMPSET(x, y, z) ((*(x) == (y)) ? ((*(x) = (z)), 1) : 0)
|
||||
|
||||
#if OPAL_HAVE_ATOMIC_CMPSET_32
|
||||
#define OPAL_ATOMIC_CMPSET_32(x, y, z) \
|
||||
(opal_using_threads() ? opal_atomic_cmpset_32(x, y, z) : OPAL_CMPSET(x, y, z))
|
||||
#endif
|
||||
#if OPAL_HAVE_ATOMIC_CMPSET_64
|
||||
#define OPAL_ATOMIC_CMPSET_64(x, y, z) \
|
||||
(opal_using_threads() ? opal_atomic_cmpset_64(x, y, z) : OPAL_CMPSET(x, y, z))
|
||||
#endif
|
||||
#if OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64
|
||||
#define OPAL_ATOMIC_CMPSET(x, y, z) \
|
||||
(opal_using_threads() ? opal_atomic_cmpset(x, y, z) : OPAL_CMPSET(x, y, z))
|
||||
#endif
|
||||
#if OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64
|
||||
#define OPAL_ATOMIC_CMPSET_PTR(x, y, z) \
|
||||
(opal_using_threads() ? opal_atomic_cmpset_ptr(x, y, z) : OPAL_CMPSET(x, y, z))
|
||||
#endif
|
||||
|
||||
static inline void *opal_thread_swap_ptr (volatile void *ptr, void *newvalue)
|
||||
{
|
||||
if (opal_using_threads ()) {
|
||||
return opal_atomic_swap_ptr (ptr, newvalue);
|
||||
}
|
||||
|
||||
void *old = ((void **) ptr)[0];
|
||||
((void **) ptr)[0] = newvalue;
|
||||
|
||||
return old;
|
||||
}
|
||||
|
||||
#define OPAL_ATOMIC_SWAP_PTR(x, y) opal_thread_swap_ptr (x, y)
|
||||
|
||||
#endif /* !defined(OPAL_THREAD_USAGE_H) */
|
Загрузка…
Ссылка в новой задаче
Block a user