1
1

opal/asm: change ll/sc atomics to macros

This commit fixes a hang that occurs with debug builds of Open MPI on
aarch64 and power/powerpc systems. When the ll/sc atomics are inline
functions the compiler emits load/store instructions for the function
arguments with -O0. These extra load/store arguments can cause the ll
reservation to be cancelled causing live-lock.

Note that we did attempt to fix this with always_inline but the extra
instructions are stil emitted by the compiler (gcc). There may be
another fix but this has been tested and is working well.

References #3697. Close when applied to v3.0.x and v3.1.x.

Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
Nathan Hjelm 2018-05-31 09:13:01 -06:00 коммит произвёл Nathan Hjelm
родитель 50a9508e0f
Коммит f8dbf62879
5 изменённых файлов: 136 добавлений и 112 удалений

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2007 Voltaire All rights reserved.
* Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights
* Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights
* reseved.
* $COPYRIGHT$
*
@ -183,9 +183,10 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo,
opal_list_item_t *item)
{
const opal_list_item_t * const ghost = &fifo->opal_fifo_ghost;
opal_list_item_t *tail_item;
item->opal_list_next = &fifo->opal_fifo_ghost;
item->opal_list_next = (opal_list_item_t *) ghost;
opal_atomic_wmb ();
@ -194,7 +195,7 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo,
opal_atomic_wmb ();
if (&fifo->opal_fifo_ghost == tail_item) {
if (ghost == tail_item) {
/* update the head */
fifo->opal_fifo_head.data.item = item;
} else {
@ -212,12 +213,22 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo,
*/
static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
{
opal_list_item_t *item, *next, *ghost = &fifo->opal_fifo_ghost;
const opal_list_item_t * const ghost = &fifo->opal_fifo_ghost;
#if OPAL_HAVE_ATOMIC_LLSC_PTR
register opal_list_item_t *item, *next;
int attempt = 0, ret = 0;
/* use load-linked store-conditional to avoid ABA issues */
do {
item = opal_atomic_ll_ptr (&fifo->opal_fifo_head.data.item);
if (++attempt == 5) {
/* deliberatly suspend this thread to allow other threads to run. this should
* only occur during periods of contention on the lifo. */
_opal_lifo_release_cpu ();
attempt = 0;
}
opal_atomic_ll_ptr(&fifo->opal_fifo_head.data.item, item);
if (ghost == item) {
if (ghost == fifo->opal_fifo_tail.data.item) {
return NULL;
@ -229,11 +240,12 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
}
next = (opal_list_item_t *) item->opal_list_next;
if (opal_atomic_sc_ptr (&fifo->opal_fifo_head.data.item, next)) {
break;
}
} while (1);
opal_atomic_sc_ptr(&fifo->opal_fifo_head.data.item, next, ret);
} while (!ret);
#else
opal_list_item_t *item, *next;
/* protect against ABA issues by "locking" the head */
do {
if (!opal_atomic_swap_32 ((volatile int32_t *) &fifo->opal_fifo_head.data.counter, 1)) {
@ -258,10 +270,10 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
if (ghost == next) {
void *tmp = item;
if (!opal_atomic_compare_exchange_strong_ptr (&fifo->opal_fifo_tail.data.item, &tmp, ghost)) {
while (ghost == item->opal_list_next) {
if (!opal_atomic_compare_exchange_strong_ptr (&fifo->opal_fifo_tail.data.item, &tmp, (void *) ghost)) {
do {
opal_atomic_rmb ();
}
} while (ghost == item->opal_list_next);
fifo->opal_fifo_head.data.item = (opal_list_item_t *) item->opal_list_next;
}

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2007 Voltaire All rights reserved.
* Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights
* Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights
* reseved.
* Copyright (c) 2016-2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
@ -206,8 +206,8 @@ static inline void _opal_lifo_release_cpu (void)
*/
static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo)
{
opal_list_item_t *item, *next;
int attempt = 0;
register opal_list_item_t *item, *next;
int attempt = 0, ret;
do {
if (++attempt == 5) {
@ -217,13 +217,14 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo)
attempt = 0;
}
item = (opal_list_item_t *) opal_atomic_ll_ptr (&lifo->opal_lifo_head.data.item);
opal_atomic_ll_ptr(&lifo->opal_lifo_head.data.item, item);
if (&lifo->opal_lifo_ghost == item) {
return NULL;
}
next = (opal_list_item_t *) item->opal_list_next;
} while (!opal_atomic_sc_ptr (&lifo->opal_lifo_head.data.item, next));
opal_atomic_sc_ptr(&lifo->opal_lifo_head.data.item, next, ret);
} while (!ret);
opal_atomic_wmb ();

Просмотреть файл

@ -162,28 +162,31 @@ static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t
return ret;
}
static inline int32_t opal_atomic_ll_32 (volatile int32_t *addr)
{
int32_t ret;
#define opal_atomic_ll_32(addr, ret) \
do { \
volatile int32_t *_addr = (addr); \
int32_t _ret; \
\
__asm__ __volatile__ ("ldaxr %w0, [%1] \n" \
: "=&r" (_ret) \
: "r" (_addr)); \
\
ret = (typeof(ret)) _ret; \
} while (0)
__asm__ __volatile__ ("ldaxr %w0, [%1] \n"
: "=&r" (ret)
: "r" (addr));
return ret;
}
static inline int opal_atomic_sc_32 (volatile int32_t *addr, int32_t newval)
{
int ret;
__asm__ __volatile__ ("stlxr %w0, %w2, [%1] \n"
: "=&r" (ret)
: "r" (addr), "r" (newval)
: "cc", "memory");
return ret == 0;
}
#define opal_atomic_sc_32(addr, newval, ret) \
do { \
volatile int32_t *_addr = (addr); \
int32_t _newval = (int32_t) newval; \
int _ret; \
\
__asm__ __volatile__ ("stlxr %w0, %w2, [%1] \n" \
: "=&r" (_ret) \
: "r" (_addr), "r" (_newval) \
: "cc", "memory"); \
\
ret = (_ret == 0); \
} while (0)
static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
{
@ -269,28 +272,31 @@ static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t
return ret;
}
static inline int64_t opal_atomic_ll_64 (volatile int64_t *addr)
{
int64_t ret;
#define opal_atomic_ll_64(addr, ret) \
do { \
volatile int64_t *_addr = (addr); \
int64_t _ret; \
\
__asm__ __volatile__ ("ldaxr %0, [%1] \n" \
: "=&r" (_ret) \
: "r" (_addr)); \
\
ret = (typeof(ret)) _ret; \
} while (0)
__asm__ __volatile__ ("ldaxr %0, [%1] \n"
: "=&r" (ret)
: "r" (addr));
return ret;
}
static inline int opal_atomic_sc_64 (volatile int64_t *addr, int64_t newval)
{
int ret;
__asm__ __volatile__ ("stlxr %w0, %2, [%1] \n"
: "=&r" (ret)
: "r" (addr), "r" (newval)
: "cc", "memory");
return ret == 0;
}
#define opal_atomic_sc_64(addr, newval, ret) \
do { \
volatile int64_t *_addr = (addr); \
int64_t _newval = (int64_t) newval; \
int _ret; \
\
__asm__ __volatile__ ("stlxr %w0, %2, [%1] \n" \
: "=&r" (_ret) \
: "r" (_addr), "r" (_newval) \
: "cc", "memory"); \
\
ret = (_ret == 0); \
} while (0)
#define OPAL_ASM_MAKE_ATOMIC(type, bits, name, inst, reg) \
static inline type opal_atomic_fetch_ ## name ## _ ## bits (volatile type *addr, type value) \

Просмотреть файл

@ -308,15 +308,15 @@ OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(_rel_)
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_LLSC_32
#define opal_atomic_ll_ptr(addr) (void *) opal_atomic_ll_32((int32_t *) addr)
#define opal_atomic_sc_ptr(addr, newval) opal_atomic_sc_32((int32_t *) addr, (int32_t) newval)
#define opal_atomic_ll_ptr(addr, ret) opal_atomic_ll_32((volatile int32_t *) (addr), ret)
#define opal_atomic_sc_ptr(addr, value, ret) opal_atomic_sc_32((volatile int32_t *) (addr), (intptr_t) (value), ret)
#define OPAL_HAVE_ATOMIC_LLSC_PTR 1
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_LLSC_64
#define opal_atomic_ll_ptr(addr) (void *) opal_atomic_ll_64((int64_t *) addr)
#define opal_atomic_sc_ptr(addr, newval) opal_atomic_sc_64((int64_t *) addr, (int64_t) newval)
#define opal_atomic_ll_ptr(addr, ret) opal_atomic_ll_64((volatile int64_t *) (addr), ret)
#define opal_atomic_sc_ptr(addr, value, ret) opal_atomic_sc_64((volatile int64_t *) (addr), (intptr_t) (value), ret)
#define OPAL_HAVE_ATOMIC_LLSC_PTR 1

Просмотреть файл

@ -165,31 +165,35 @@ static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *add
return ret;
}
static inline int32_t opal_atomic_ll_32 (volatile int32_t *addr)
{
int32_t ret;
/* NTH: the LL/SC support is done through macros due to issues with non-optimized builds. The reason
* is that even with an always_inline attribute the compiler may still emit instructions to store then
* load the arguments to/from the stack. This sequence may cause the ll reservation to be cancelled. */
#define opal_atomic_ll_32(addr, ret) \
do { \
volatile int32_t *_addr = (addr); \
int32_t _ret; \
__asm__ __volatile__ ("lwarx %0, 0, %1 \n\t" \
: "=&r" (_ret) \
: "r" (_addr) \
); \
ret = (typeof(ret)) _ret; \
} while (0)
__asm__ __volatile__ ("lwarx %0, 0, %1 \n\t"
: "=&r" (ret)
: "r" (addr)
);
return ret;
}
static inline int opal_atomic_sc_32 (volatile int32_t *addr, int32_t newval)
{
int32_t ret, foo;
__asm__ __volatile__ (" stwcx. %4, 0, %3 \n\t"
" li %0,0 \n\t"
" bne- 1f \n\t"
" ori %0,%0,1 \n\t"
"1:"
: "=r" (ret), "=m" (*addr), "=r" (foo)
: "r" (addr), "r" (newval)
: "cc", "memory");
return ret;
}
#define opal_atomic_sc_32(addr, value, ret) \
do { \
volatile int32_t *_addr = (addr); \
int32_t _ret, _foo, _newval = (int32_t) value; \
\
__asm__ __volatile__ (" stwcx. %4, 0, %3 \n\t" \
" li %0,0 \n\t" \
" bne- 1f \n\t" \
" ori %0,%0,1 \n\t" \
"1:" \
: "=r" (_ret), "=m" (*_addr), "=r" (_foo) \
: "r" (_addr), "r" (_newval) \
: "cc", "memory"); \
ret = _ret; \
} while (0)
/* these two functions aren't inlined in the non-gcc case because then
there would be two function calls (since neither cmpset_32 nor
@ -278,32 +282,33 @@ static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *add
return ret;
}
static inline int64_t opal_atomic_ll_64(volatile int64_t *addr)
{
int64_t ret;
__asm__ __volatile__ ("ldarx %0, 0, %1 \n\t"
: "=&r" (ret)
: "r" (addr)
);
return ret;
}
static inline int opal_atomic_sc_64(volatile int64_t *addr, int64_t newval)
{
int32_t ret;
__asm__ __volatile__ (" stdcx. %2, 0, %1 \n\t"
" li %0,0 \n\t"
" bne- 1f \n\t"
" ori %0,%0,1 \n\t"
"1:"
: "=r" (ret)
: "r" (addr), "r" (OPAL_ASM_VALUE64(newval))
: "cc", "memory");
return ret;
}
#define opal_atomic_ll_64(addr, ret) \
do { \
volatile int64_t *_addr = (addr); \
int64_t _ret; \
__asm__ __volatile__ ("ldarx %0, 0, %1 \n\t" \
: "=&r" (_ret) \
: "r" (_addr) \
); \
ret = (typeof(ret)) _ret; \
} while (0)
#define opal_atomic_sc_64(addr, value, ret) \
do { \
volatile int64_t *_addr = (addr); \
int64_t _foo, _newval = (int64_t) value; \
int32_t _ret; \
\
__asm__ __volatile__ (" stdcx. %2, 0, %1 \n\t" \
" li %0,0 \n\t" \
" bne- 1f \n\t" \
" ori %0,%0,1 \n\t" \
"1:" \
: "=r" (_ret) \
: "r" (_addr), "r" (OPAL_ASM_VALUE64(_newval)) \
: "cc", "memory"); \
ret = _ret; \
} while (0)
static inline int64_t opal_atomic_swap_64(volatile int64_t *addr, int64_t newval)
{