1
1

Merge pull request #1909 from hjelmn/threads

Fix performance regression caused by enabling opal thread support
Этот коммит содержится в:
Nathan Hjelm 2016-07-28 08:41:13 -06:00 коммит произвёл GitHub
родитель a0a999e63d a8c3699484
Коммит 6f7e680c57
4 изменённых файлов: 181 добавлений и 155 удалений

Просмотреть файл

@ -11,6 +11,8 @@
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2007-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights * Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* $COPYRIGHT$ * $COPYRIGHT$
@ -121,7 +123,7 @@
#include <assert.h> #include <assert.h>
#include <stdlib.h> #include <stdlib.h>
#include "opal/sys/atomic.h" #include "opal/threads/thread_usage.h"
BEGIN_C_DECLS BEGIN_C_DECLS
@ -508,7 +510,7 @@ static inline opal_object_t *opal_obj_new(opal_class_t * cls)
static inline int opal_obj_update(opal_object_t *object, int inc) __opal_attribute_always_inline__; static inline int opal_obj_update(opal_object_t *object, int inc) __opal_attribute_always_inline__;
static inline int opal_obj_update(opal_object_t *object, int inc) static inline int opal_obj_update(opal_object_t *object, int inc)
{ {
return opal_atomic_add_32(&(object->obj_reference_count), inc); return OPAL_THREAD_ADD32(&object->obj_reference_count, inc);
} }
END_C_DECLS END_C_DECLS

Просмотреть файл

@ -29,7 +29,8 @@ headers += \
threads/mutex_unix.h \ threads/mutex_unix.h \
threads/threads.h \ threads/threads.h \
threads/tsd.h \ threads/tsd.h \
threads/wait_sync.h threads/wait_sync.h \
threads/thread_usage.h
lib@OPAL_LIB_PREFIX@open_pal_la_SOURCES += \ lib@OPAL_LIB_PREFIX@open_pal_la_SOURCES += \
threads/condition.c \ threads/condition.c \

Просмотреть файл

@ -28,8 +28,7 @@
#include "opal_config.h" #include "opal_config.h"
#include "opal/sys/atomic.h" #include "opal/threads/thread_usage.h"
#include "opal/prefetch.h"
BEGIN_C_DECLS BEGIN_C_DECLS
@ -41,11 +40,6 @@ BEGIN_C_DECLS
* Functions for locking of critical sections. * Functions for locking of critical sections.
*/ */
/*
* declaring this here so that CL does not complain
*/
OPAL_DECLSPEC extern bool opal_uses_threads;
/** /**
* Opaque mutex object * Opaque mutex object
*/ */
@ -107,67 +101,6 @@ END_C_DECLS
BEGIN_C_DECLS BEGIN_C_DECLS
/**
* Check and see if the process is using multiple threads.
*
* @retval true If the process may have more than one thread.
* @retval false If the process only has a single thread.
*
* The value that this function returns is influenced by:
*
* - how MPI_INIT or MPI_INIT_THREAD was invoked,
* - what the final MPI thread level was determined to be,
* - whether the OMPI or MPI libraries are multi-threaded (Jan 2003:
* they're not),
* - whether configure determined if we have thread support or not
*
* MPI_INIT and MPI_INIT_THREAD (specifically, back-end OMPI startup
* functions) invoke opal_set_using_threads() to influence the value of
* this function, depending on their situation. Some examples:
*
* - if configure determined that we do not have threads, then this
* value will always be false.
*
* - if MPI_INIT is invoked, and the ompi libraries are [still]
* single-threaded, this value will be false.
*
* - if MPI_INIT_THREAD is invoked with MPI_THREAD_MULTIPLE, we have
* thread support, and the final thread level is determined to be
* MPI_THREAD_MULTIPLE, this value will be true.
*
* - if the process is a single-threaded OMPI executable (e.g., mpicc),
* this value will be false.
*
* Hence, this function will return false if there is guaranteed to
* only be one thread in the process. If there is even the
* possibility that we may have multiple threads, true will be
* returned.
*/
#define opal_using_threads() opal_uses_threads
/**
* Set whether the process is using multiple threads or not.
*
* @param have Boolean indicating whether the process is using
* multiple threads or not.
*
* @retval opal_using_threads The new return value from
* opal_using_threads().
*
* This function is used to influence the return value of
* opal_using_threads(). If configure detected that we have thread
* support, the return value of future invocations of
* opal_using_threads() will be the parameter's value. If configure
* detected that we have no thread support, then the retuen from
* opal_using_threads() will always be false.
*/
static inline bool opal_set_using_threads(bool have)
{
opal_uses_threads = have;
return opal_using_threads();
}
/** /**
* Lock a mutex if opal_using_threads() says that multiple threads may * Lock a mutex if opal_using_threads() says that multiple threads may
* be active in the process. * be active in the process.
@ -254,90 +187,6 @@ static inline bool opal_set_using_threads(bool have)
} \ } \
} while (0) } while (0)
/**
* Use an atomic operation for increment/decrement if opal_using_threads()
* indicates that threads are in use by the application or library.
*/
static inline int32_t
OPAL_THREAD_ADD32(volatile int32_t *addr, int delta)
{
int32_t ret;
if (OPAL_UNLIKELY(opal_using_threads())) {
ret = opal_atomic_add_32(addr, delta);
} else {
ret = (*addr += delta);
}
return ret;
}
#if OPAL_HAVE_ATOMIC_MATH_64
static inline int64_t
OPAL_THREAD_ADD64(volatile int64_t *addr, int delta)
{
int64_t ret;
if (OPAL_UNLIKELY(opal_using_threads())) {
ret = opal_atomic_add_64(addr, delta);
} else {
ret = (*addr += delta);
}
return ret;
}
#endif
static inline size_t
OPAL_THREAD_ADD_SIZE_T(volatile size_t *addr, int delta)
{
size_t ret;
if (OPAL_UNLIKELY(opal_using_threads())) {
ret = opal_atomic_add_size_t(addr, delta);
} else {
ret = (*addr += delta);
}
return ret;
}
/* BWB: FIX ME: remove if possible */
#define OPAL_CMPSET(x, y, z) ((*(x) == (y)) ? ((*(x) = (z)), 1) : 0)
#if OPAL_HAVE_ATOMIC_CMPSET_32
#define OPAL_ATOMIC_CMPSET_32(x, y, z) \
(OPAL_UNLIKELY(opal_using_threads()) ? opal_atomic_cmpset_32(x, y, z) : OPAL_CMPSET(x, y, z))
#endif
#if OPAL_HAVE_ATOMIC_CMPSET_64
#define OPAL_ATOMIC_CMPSET_64(x, y, z) \
(OPAL_UNLIKELY(opal_using_threads()) ? opal_atomic_cmpset_64(x, y, z) : OPAL_CMPSET(x, y, z))
#endif
#if OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64
#define OPAL_ATOMIC_CMPSET(x, y, z) \
(OPAL_UNLIKELY(opal_using_threads()) ? opal_atomic_cmpset(x, y, z) : OPAL_CMPSET(x, y, z))
#endif
#if OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64
#define OPAL_ATOMIC_CMPSET_PTR(x, y, z) \
(opal_using_threads() ? opal_atomic_cmpset_ptr(x, y, z) : OPAL_CMPSET(x, y, z))
#endif
static inline void *opal_thread_swap_ptr (volatile void *ptr, void *newvalue)
{
if (opal_using_threads ()) {
return opal_atomic_swap_ptr (ptr, newvalue);
}
void *old = ((void **) ptr)[0];
((void **) ptr)[0] = newvalue;
return old;
}
#define OPAL_ATOMIC_SWAP_PTR(x, y) opal_thread_swap_ptr (x, y)
END_C_DECLS END_C_DECLS
#endif /* OPAL_MUTEX_H */ #endif /* OPAL_MUTEX_H */

174
opal/threads/thread_usage.h Обычный файл
Просмотреть файл

@ -0,0 +1,174 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2007 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#if !defined(OPAL_THREAD_USAGE_H)
#define OPAL_THREAD_USAGE_H
#include "opal_config.h"
#include "opal/sys/atomic.h"
#include "opal/prefetch.h"
OPAL_DECLSPEC extern bool opal_uses_threads;
/**
* Check and see if the process is using multiple threads.
*
* @retval true If the process may have more than one thread.
* @retval false If the process only has a single thread.
*
* The value that this function returns is influenced by:
*
* - how MPI_INIT or MPI_INIT_THREAD was invoked,
* - what the final MPI thread level was determined to be,
* - whether the OMPI or MPI libraries are multi-threaded
*
* MPI_INIT and MPI_INIT_THREAD (specifically, back-end OMPI startup
* functions) invoke opal_set_using_threads() to influence the value of
* this function, depending on their situation. Some examples:
*
* - if MPI_INIT is invoked, and the ompi components in use are
* single-threaded, this value will be false.
*
* - if MPI_INIT_THREAD is invoked with MPI_THREAD_MULTIPLE, we have
* thread support, and the final thread level is determined to be
* MPI_THREAD_MULTIPLE, this value will be true.
*
* - if the process is a single-threaded OMPI executable (e.g., mpicc),
* this value will be false.
*
* Hence, this function will return false if there is guaranteed to
* only be one thread in the process. If there is even the
* possibility that we may have multiple threads, true will be
* returned.
*/
#define opal_using_threads() opal_uses_threads
/**
* Set whether the process is using multiple threads or not.
*
* @param have Boolean indicating whether the process is using
* multiple threads or not.
*
* @retval opal_using_threads The new return value from
* opal_using_threads().
*
* This function is used to influence the return value of
* opal_using_threads(). If configure detected that we have thread
* support, the return value of future invocations of
* opal_using_threads() will be the parameter's value. If configure
* detected that we have no thread support, then the retuen from
* opal_using_threads() will always be false.
*/
static inline bool opal_set_using_threads(bool have)
{
opal_uses_threads = have;
return opal_using_threads();
}
/**
* Use an atomic operation for increment/decrement if opal_using_threads()
* indicates that threads are in use by the application or library.
*/
static inline int32_t
OPAL_THREAD_ADD32(volatile int32_t *addr, int delta)
{
int32_t ret;
if (OPAL_UNLIKELY(opal_using_threads())) {
ret = opal_atomic_add_32(addr, delta);
} else {
ret = (*addr += delta);
}
return ret;
}
#if OPAL_HAVE_ATOMIC_MATH_64
static inline int64_t
OPAL_THREAD_ADD64(volatile int64_t *addr, int delta)
{
int64_t ret;
if (OPAL_UNLIKELY(opal_using_threads())) {
ret = opal_atomic_add_64(addr, delta);
} else {
ret = (*addr += delta);
}
return ret;
}
#endif
static inline size_t
OPAL_THREAD_ADD_SIZE_T(volatile size_t *addr, int delta)
{
size_t ret;
if (OPAL_UNLIKELY(opal_using_threads())) {
ret = opal_atomic_add_size_t(addr, delta);
} else {
ret = (*addr += delta);
}
return ret;
}
/* BWB: FIX ME: remove if possible */
#define OPAL_CMPSET(x, y, z) ((*(x) == (y)) ? ((*(x) = (z)), 1) : 0)
#if OPAL_HAVE_ATOMIC_CMPSET_32
#define OPAL_ATOMIC_CMPSET_32(x, y, z) \
(opal_using_threads() ? opal_atomic_cmpset_32(x, y, z) : OPAL_CMPSET(x, y, z))
#endif
#if OPAL_HAVE_ATOMIC_CMPSET_64
#define OPAL_ATOMIC_CMPSET_64(x, y, z) \
(opal_using_threads() ? opal_atomic_cmpset_64(x, y, z) : OPAL_CMPSET(x, y, z))
#endif
#if OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64
#define OPAL_ATOMIC_CMPSET(x, y, z) \
(opal_using_threads() ? opal_atomic_cmpset(x, y, z) : OPAL_CMPSET(x, y, z))
#endif
#if OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64
#define OPAL_ATOMIC_CMPSET_PTR(x, y, z) \
(opal_using_threads() ? opal_atomic_cmpset_ptr(x, y, z) : OPAL_CMPSET(x, y, z))
#endif
static inline void *opal_thread_swap_ptr (volatile void *ptr, void *newvalue)
{
if (opal_using_threads ()) {
return opal_atomic_swap_ptr (ptr, newvalue);
}
void *old = ((void **) ptr)[0];
((void **) ptr)[0] = newvalue;
return old;
}
#define OPAL_ATOMIC_SWAP_PTR(x, y) opal_thread_swap_ptr (x, y)
#endif /* !defined(OPAL_THREAD_USAGE_H) */