opal/asm: change ll/sc atomics to macros
This commit fixes a hang that occurs with debug builds of Open MPI on aarch64 and power/powerpc systems. When the ll/sc atomics are inline functions the compiler emits load/store instructions for the function arguments with -O0. These extra load/store arguments can cause the ll reservation to be cancelled causing live-lock. Note that we did attempt to fix this with always_inline but the extra instructions are stil emitted by the compiler (gcc). There may be another fix but this has been tested and is working well. References #3697. Close when applied to v3.0.x and v3.1.x. Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
родитель
50a9508e0f
Коммит
f8dbf62879
@ -12,7 +12,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Voltaire All rights reserved.
|
||||
* Copyright (c) 2010 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights
|
||||
* reseved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -183,9 +183,10 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
|
||||
static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo,
|
||||
opal_list_item_t *item)
|
||||
{
|
||||
const opal_list_item_t * const ghost = &fifo->opal_fifo_ghost;
|
||||
opal_list_item_t *tail_item;
|
||||
|
||||
item->opal_list_next = &fifo->opal_fifo_ghost;
|
||||
item->opal_list_next = (opal_list_item_t *) ghost;
|
||||
|
||||
opal_atomic_wmb ();
|
||||
|
||||
@ -194,7 +195,7 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo,
|
||||
|
||||
opal_atomic_wmb ();
|
||||
|
||||
if (&fifo->opal_fifo_ghost == tail_item) {
|
||||
if (ghost == tail_item) {
|
||||
/* update the head */
|
||||
fifo->opal_fifo_head.data.item = item;
|
||||
} else {
|
||||
@ -212,12 +213,22 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo,
|
||||
*/
|
||||
static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
|
||||
{
|
||||
opal_list_item_t *item, *next, *ghost = &fifo->opal_fifo_ghost;
|
||||
const opal_list_item_t * const ghost = &fifo->opal_fifo_ghost;
|
||||
|
||||
#if OPAL_HAVE_ATOMIC_LLSC_PTR
|
||||
register opal_list_item_t *item, *next;
|
||||
int attempt = 0, ret = 0;
|
||||
|
||||
/* use load-linked store-conditional to avoid ABA issues */
|
||||
do {
|
||||
item = opal_atomic_ll_ptr (&fifo->opal_fifo_head.data.item);
|
||||
if (++attempt == 5) {
|
||||
/* deliberatly suspend this thread to allow other threads to run. this should
|
||||
* only occur during periods of contention on the lifo. */
|
||||
_opal_lifo_release_cpu ();
|
||||
attempt = 0;
|
||||
}
|
||||
|
||||
opal_atomic_ll_ptr(&fifo->opal_fifo_head.data.item, item);
|
||||
if (ghost == item) {
|
||||
if (ghost == fifo->opal_fifo_tail.data.item) {
|
||||
return NULL;
|
||||
@ -229,11 +240,12 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
|
||||
}
|
||||
|
||||
next = (opal_list_item_t *) item->opal_list_next;
|
||||
if (opal_atomic_sc_ptr (&fifo->opal_fifo_head.data.item, next)) {
|
||||
break;
|
||||
}
|
||||
} while (1);
|
||||
opal_atomic_sc_ptr(&fifo->opal_fifo_head.data.item, next, ret);
|
||||
} while (!ret);
|
||||
|
||||
#else
|
||||
opal_list_item_t *item, *next;
|
||||
|
||||
/* protect against ABA issues by "locking" the head */
|
||||
do {
|
||||
if (!opal_atomic_swap_32 ((volatile int32_t *) &fifo->opal_fifo_head.data.counter, 1)) {
|
||||
@ -258,10 +270,10 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
|
||||
if (ghost == next) {
|
||||
void *tmp = item;
|
||||
|
||||
if (!opal_atomic_compare_exchange_strong_ptr (&fifo->opal_fifo_tail.data.item, &tmp, ghost)) {
|
||||
while (ghost == item->opal_list_next) {
|
||||
if (!opal_atomic_compare_exchange_strong_ptr (&fifo->opal_fifo_tail.data.item, &tmp, (void *) ghost)) {
|
||||
do {
|
||||
opal_atomic_rmb ();
|
||||
}
|
||||
} while (ghost == item->opal_list_next);
|
||||
|
||||
fifo->opal_fifo_head.data.item = (opal_list_item_t *) item->opal_list_next;
|
||||
}
|
||||
|
@ -12,7 +12,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Voltaire All rights reserved.
|
||||
* Copyright (c) 2010 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights
|
||||
* reseved.
|
||||
* Copyright (c) 2016-2018 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
@ -206,8 +206,8 @@ static inline void _opal_lifo_release_cpu (void)
|
||||
*/
|
||||
static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo)
|
||||
{
|
||||
opal_list_item_t *item, *next;
|
||||
int attempt = 0;
|
||||
register opal_list_item_t *item, *next;
|
||||
int attempt = 0, ret;
|
||||
|
||||
do {
|
||||
if (++attempt == 5) {
|
||||
@ -217,13 +217,14 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo)
|
||||
attempt = 0;
|
||||
}
|
||||
|
||||
item = (opal_list_item_t *) opal_atomic_ll_ptr (&lifo->opal_lifo_head.data.item);
|
||||
opal_atomic_ll_ptr(&lifo->opal_lifo_head.data.item, item);
|
||||
if (&lifo->opal_lifo_ghost == item) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
next = (opal_list_item_t *) item->opal_list_next;
|
||||
} while (!opal_atomic_sc_ptr (&lifo->opal_lifo_head.data.item, next));
|
||||
opal_atomic_sc_ptr(&lifo->opal_lifo_head.data.item, next, ret);
|
||||
} while (!ret);
|
||||
|
||||
opal_atomic_wmb ();
|
||||
|
||||
|
@ -162,28 +162,31 @@ static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int32_t opal_atomic_ll_32 (volatile int32_t *addr)
|
||||
{
|
||||
int32_t ret;
|
||||
#define opal_atomic_ll_32(addr, ret) \
|
||||
do { \
|
||||
volatile int32_t *_addr = (addr); \
|
||||
int32_t _ret; \
|
||||
\
|
||||
__asm__ __volatile__ ("ldaxr %w0, [%1] \n" \
|
||||
: "=&r" (_ret) \
|
||||
: "r" (_addr)); \
|
||||
\
|
||||
ret = (typeof(ret)) _ret; \
|
||||
} while (0)
|
||||
|
||||
__asm__ __volatile__ ("ldaxr %w0, [%1] \n"
|
||||
: "=&r" (ret)
|
||||
: "r" (addr));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int opal_atomic_sc_32 (volatile int32_t *addr, int32_t newval)
|
||||
{
|
||||
int ret;
|
||||
|
||||
__asm__ __volatile__ ("stlxr %w0, %w2, [%1] \n"
|
||||
: "=&r" (ret)
|
||||
: "r" (addr), "r" (newval)
|
||||
: "cc", "memory");
|
||||
|
||||
return ret == 0;
|
||||
}
|
||||
#define opal_atomic_sc_32(addr, newval, ret) \
|
||||
do { \
|
||||
volatile int32_t *_addr = (addr); \
|
||||
int32_t _newval = (int32_t) newval; \
|
||||
int _ret; \
|
||||
\
|
||||
__asm__ __volatile__ ("stlxr %w0, %w2, [%1] \n" \
|
||||
: "=&r" (_ret) \
|
||||
: "r" (_addr), "r" (_newval) \
|
||||
: "cc", "memory"); \
|
||||
\
|
||||
ret = (_ret == 0); \
|
||||
} while (0)
|
||||
|
||||
static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
|
||||
{
|
||||
@ -269,28 +272,31 @@ static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int64_t opal_atomic_ll_64 (volatile int64_t *addr)
|
||||
{
|
||||
int64_t ret;
|
||||
#define opal_atomic_ll_64(addr, ret) \
|
||||
do { \
|
||||
volatile int64_t *_addr = (addr); \
|
||||
int64_t _ret; \
|
||||
\
|
||||
__asm__ __volatile__ ("ldaxr %0, [%1] \n" \
|
||||
: "=&r" (_ret) \
|
||||
: "r" (_addr)); \
|
||||
\
|
||||
ret = (typeof(ret)) _ret; \
|
||||
} while (0)
|
||||
|
||||
__asm__ __volatile__ ("ldaxr %0, [%1] \n"
|
||||
: "=&r" (ret)
|
||||
: "r" (addr));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int opal_atomic_sc_64 (volatile int64_t *addr, int64_t newval)
|
||||
{
|
||||
int ret;
|
||||
|
||||
__asm__ __volatile__ ("stlxr %w0, %2, [%1] \n"
|
||||
: "=&r" (ret)
|
||||
: "r" (addr), "r" (newval)
|
||||
: "cc", "memory");
|
||||
|
||||
return ret == 0;
|
||||
}
|
||||
#define opal_atomic_sc_64(addr, newval, ret) \
|
||||
do { \
|
||||
volatile int64_t *_addr = (addr); \
|
||||
int64_t _newval = (int64_t) newval; \
|
||||
int _ret; \
|
||||
\
|
||||
__asm__ __volatile__ ("stlxr %w0, %2, [%1] \n" \
|
||||
: "=&r" (_ret) \
|
||||
: "r" (_addr), "r" (_newval) \
|
||||
: "cc", "memory"); \
|
||||
\
|
||||
ret = (_ret == 0); \
|
||||
} while (0)
|
||||
|
||||
#define OPAL_ASM_MAKE_ATOMIC(type, bits, name, inst, reg) \
|
||||
static inline type opal_atomic_fetch_ ## name ## _ ## bits (volatile type *addr, type value) \
|
||||
|
@ -308,15 +308,15 @@ OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(_rel_)
|
||||
|
||||
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_LLSC_32
|
||||
|
||||
#define opal_atomic_ll_ptr(addr) (void *) opal_atomic_ll_32((int32_t *) addr)
|
||||
#define opal_atomic_sc_ptr(addr, newval) opal_atomic_sc_32((int32_t *) addr, (int32_t) newval)
|
||||
#define opal_atomic_ll_ptr(addr, ret) opal_atomic_ll_32((volatile int32_t *) (addr), ret)
|
||||
#define opal_atomic_sc_ptr(addr, value, ret) opal_atomic_sc_32((volatile int32_t *) (addr), (intptr_t) (value), ret)
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_LLSC_PTR 1
|
||||
|
||||
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_LLSC_64
|
||||
|
||||
#define opal_atomic_ll_ptr(addr) (void *) opal_atomic_ll_64((int64_t *) addr)
|
||||
#define opal_atomic_sc_ptr(addr, newval) opal_atomic_sc_64((int64_t *) addr, (int64_t) newval)
|
||||
#define opal_atomic_ll_ptr(addr, ret) opal_atomic_ll_64((volatile int64_t *) (addr), ret)
|
||||
#define opal_atomic_sc_ptr(addr, value, ret) opal_atomic_sc_64((volatile int64_t *) (addr), (intptr_t) (value), ret)
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_LLSC_PTR 1
|
||||
|
||||
|
@ -165,31 +165,35 @@ static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *add
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int32_t opal_atomic_ll_32 (volatile int32_t *addr)
|
||||
{
|
||||
int32_t ret;
|
||||
/* NTH: the LL/SC support is done through macros due to issues with non-optimized builds. The reason
|
||||
* is that even with an always_inline attribute the compiler may still emit instructions to store then
|
||||
* load the arguments to/from the stack. This sequence may cause the ll reservation to be cancelled. */
|
||||
#define opal_atomic_ll_32(addr, ret) \
|
||||
do { \
|
||||
volatile int32_t *_addr = (addr); \
|
||||
int32_t _ret; \
|
||||
__asm__ __volatile__ ("lwarx %0, 0, %1 \n\t" \
|
||||
: "=&r" (_ret) \
|
||||
: "r" (_addr) \
|
||||
); \
|
||||
ret = (typeof(ret)) _ret; \
|
||||
} while (0)
|
||||
|
||||
__asm__ __volatile__ ("lwarx %0, 0, %1 \n\t"
|
||||
: "=&r" (ret)
|
||||
: "r" (addr)
|
||||
);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int opal_atomic_sc_32 (volatile int32_t *addr, int32_t newval)
|
||||
{
|
||||
int32_t ret, foo;
|
||||
|
||||
__asm__ __volatile__ (" stwcx. %4, 0, %3 \n\t"
|
||||
" li %0,0 \n\t"
|
||||
" bne- 1f \n\t"
|
||||
" ori %0,%0,1 \n\t"
|
||||
"1:"
|
||||
: "=r" (ret), "=m" (*addr), "=r" (foo)
|
||||
: "r" (addr), "r" (newval)
|
||||
: "cc", "memory");
|
||||
return ret;
|
||||
}
|
||||
#define opal_atomic_sc_32(addr, value, ret) \
|
||||
do { \
|
||||
volatile int32_t *_addr = (addr); \
|
||||
int32_t _ret, _foo, _newval = (int32_t) value; \
|
||||
\
|
||||
__asm__ __volatile__ (" stwcx. %4, 0, %3 \n\t" \
|
||||
" li %0,0 \n\t" \
|
||||
" bne- 1f \n\t" \
|
||||
" ori %0,%0,1 \n\t" \
|
||||
"1:" \
|
||||
: "=r" (_ret), "=m" (*_addr), "=r" (_foo) \
|
||||
: "r" (_addr), "r" (_newval) \
|
||||
: "cc", "memory"); \
|
||||
ret = _ret; \
|
||||
} while (0)
|
||||
|
||||
/* these two functions aren't inlined in the non-gcc case because then
|
||||
there would be two function calls (since neither cmpset_32 nor
|
||||
@ -278,32 +282,33 @@ static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *add
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int64_t opal_atomic_ll_64(volatile int64_t *addr)
|
||||
{
|
||||
int64_t ret;
|
||||
|
||||
__asm__ __volatile__ ("ldarx %0, 0, %1 \n\t"
|
||||
: "=&r" (ret)
|
||||
: "r" (addr)
|
||||
);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int opal_atomic_sc_64(volatile int64_t *addr, int64_t newval)
|
||||
{
|
||||
int32_t ret;
|
||||
|
||||
__asm__ __volatile__ (" stdcx. %2, 0, %1 \n\t"
|
||||
" li %0,0 \n\t"
|
||||
" bne- 1f \n\t"
|
||||
" ori %0,%0,1 \n\t"
|
||||
"1:"
|
||||
: "=r" (ret)
|
||||
: "r" (addr), "r" (OPAL_ASM_VALUE64(newval))
|
||||
: "cc", "memory");
|
||||
return ret;
|
||||
}
|
||||
#define opal_atomic_ll_64(addr, ret) \
|
||||
do { \
|
||||
volatile int64_t *_addr = (addr); \
|
||||
int64_t _ret; \
|
||||
__asm__ __volatile__ ("ldarx %0, 0, %1 \n\t" \
|
||||
: "=&r" (_ret) \
|
||||
: "r" (_addr) \
|
||||
); \
|
||||
ret = (typeof(ret)) _ret; \
|
||||
} while (0)
|
||||
|
||||
#define opal_atomic_sc_64(addr, value, ret) \
|
||||
do { \
|
||||
volatile int64_t *_addr = (addr); \
|
||||
int64_t _foo, _newval = (int64_t) value; \
|
||||
int32_t _ret; \
|
||||
\
|
||||
__asm__ __volatile__ (" stdcx. %2, 0, %1 \n\t" \
|
||||
" li %0,0 \n\t" \
|
||||
" bne- 1f \n\t" \
|
||||
" ori %0,%0,1 \n\t" \
|
||||
"1:" \
|
||||
: "=r" (_ret) \
|
||||
: "r" (_addr), "r" (OPAL_ASM_VALUE64(_newval)) \
|
||||
: "cc", "memory"); \
|
||||
ret = _ret; \
|
||||
} while (0)
|
||||
|
||||
static inline int64_t opal_atomic_swap_64(volatile int64_t *addr, int64_t newval)
|
||||
{
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user