1
1

opal/asm: change ll/sc atomics to macros

This commit fixes a hang that occurs with debug builds of Open MPI on
aarch64 and power/powerpc systems. When the ll/sc atomics are inline
functions the compiler emits load/store instructions for the function
arguments with -O0. These extra load/store arguments can cause the ll
reservation to be cancelled causing live-lock.

Note that we did attempt to fix this with always_inline but the extra
instructions are stil emitted by the compiler (gcc). There may be
another fix but this has been tested and is working well.

References #3697. Close when applied to v3.0.x and v3.1.x.

Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
Nathan Hjelm 2018-05-31 09:13:01 -06:00 коммит произвёл Nathan Hjelm
родитель 50a9508e0f
Коммит f8dbf62879
5 изменённых файлов: 136 добавлений и 112 удалений

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2007 Voltaire All rights reserved. * Copyright (c) 2007 Voltaire All rights reserved.
* Copyright (c) 2010 IBM Corporation. All rights reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights * Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights
* reseved. * reseved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
@ -183,9 +183,10 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo, static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo,
opal_list_item_t *item) opal_list_item_t *item)
{ {
const opal_list_item_t * const ghost = &fifo->opal_fifo_ghost;
opal_list_item_t *tail_item; opal_list_item_t *tail_item;
item->opal_list_next = &fifo->opal_fifo_ghost; item->opal_list_next = (opal_list_item_t *) ghost;
opal_atomic_wmb (); opal_atomic_wmb ();
@ -194,7 +195,7 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo,
opal_atomic_wmb (); opal_atomic_wmb ();
if (&fifo->opal_fifo_ghost == tail_item) { if (ghost == tail_item) {
/* update the head */ /* update the head */
fifo->opal_fifo_head.data.item = item; fifo->opal_fifo_head.data.item = item;
} else { } else {
@ -212,12 +213,22 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo,
*/ */
static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo) static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
{ {
opal_list_item_t *item, *next, *ghost = &fifo->opal_fifo_ghost; const opal_list_item_t * const ghost = &fifo->opal_fifo_ghost;
#if OPAL_HAVE_ATOMIC_LLSC_PTR #if OPAL_HAVE_ATOMIC_LLSC_PTR
register opal_list_item_t *item, *next;
int attempt = 0, ret = 0;
/* use load-linked store-conditional to avoid ABA issues */ /* use load-linked store-conditional to avoid ABA issues */
do { do {
item = opal_atomic_ll_ptr (&fifo->opal_fifo_head.data.item); if (++attempt == 5) {
/* deliberatly suspend this thread to allow other threads to run. this should
* only occur during periods of contention on the lifo. */
_opal_lifo_release_cpu ();
attempt = 0;
}
opal_atomic_ll_ptr(&fifo->opal_fifo_head.data.item, item);
if (ghost == item) { if (ghost == item) {
if (ghost == fifo->opal_fifo_tail.data.item) { if (ghost == fifo->opal_fifo_tail.data.item) {
return NULL; return NULL;
@ -229,11 +240,12 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
} }
next = (opal_list_item_t *) item->opal_list_next; next = (opal_list_item_t *) item->opal_list_next;
if (opal_atomic_sc_ptr (&fifo->opal_fifo_head.data.item, next)) { opal_atomic_sc_ptr(&fifo->opal_fifo_head.data.item, next, ret);
break; } while (!ret);
}
} while (1);
#else #else
opal_list_item_t *item, *next;
/* protect against ABA issues by "locking" the head */ /* protect against ABA issues by "locking" the head */
do { do {
if (!opal_atomic_swap_32 ((volatile int32_t *) &fifo->opal_fifo_head.data.counter, 1)) { if (!opal_atomic_swap_32 ((volatile int32_t *) &fifo->opal_fifo_head.data.counter, 1)) {
@ -258,10 +270,10 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
if (ghost == next) { if (ghost == next) {
void *tmp = item; void *tmp = item;
if (!opal_atomic_compare_exchange_strong_ptr (&fifo->opal_fifo_tail.data.item, &tmp, ghost)) { if (!opal_atomic_compare_exchange_strong_ptr (&fifo->opal_fifo_tail.data.item, &tmp, (void *) ghost)) {
while (ghost == item->opal_list_next) { do {
opal_atomic_rmb (); opal_atomic_rmb ();
} } while (ghost == item->opal_list_next);
fifo->opal_fifo_head.data.item = (opal_list_item_t *) item->opal_list_next; fifo->opal_fifo_head.data.item = (opal_list_item_t *) item->opal_list_next;
} }

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2007 Voltaire All rights reserved. * Copyright (c) 2007 Voltaire All rights reserved.
* Copyright (c) 2010 IBM Corporation. All rights reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights * Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights
* reseved. * reseved.
* Copyright (c) 2016-2018 Research Organization for Information Science * Copyright (c) 2016-2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
@ -206,8 +206,8 @@ static inline void _opal_lifo_release_cpu (void)
*/ */
static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo) static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo)
{ {
opal_list_item_t *item, *next; register opal_list_item_t *item, *next;
int attempt = 0; int attempt = 0, ret;
do { do {
if (++attempt == 5) { if (++attempt == 5) {
@ -217,13 +217,14 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo)
attempt = 0; attempt = 0;
} }
item = (opal_list_item_t *) opal_atomic_ll_ptr (&lifo->opal_lifo_head.data.item); opal_atomic_ll_ptr(&lifo->opal_lifo_head.data.item, item);
if (&lifo->opal_lifo_ghost == item) { if (&lifo->opal_lifo_ghost == item) {
return NULL; return NULL;
} }
next = (opal_list_item_t *) item->opal_list_next; next = (opal_list_item_t *) item->opal_list_next;
} while (!opal_atomic_sc_ptr (&lifo->opal_lifo_head.data.item, next)); opal_atomic_sc_ptr(&lifo->opal_lifo_head.data.item, next, ret);
} while (!ret);
opal_atomic_wmb (); opal_atomic_wmb ();

Просмотреть файл

@ -162,28 +162,31 @@ static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t
return ret; return ret;
} }
static inline int32_t opal_atomic_ll_32 (volatile int32_t *addr) #define opal_atomic_ll_32(addr, ret) \
{ do { \
int32_t ret; volatile int32_t *_addr = (addr); \
int32_t _ret; \
\
__asm__ __volatile__ ("ldaxr %w0, [%1] \n" \
: "=&r" (_ret) \
: "r" (_addr)); \
\
ret = (typeof(ret)) _ret; \
} while (0)
__asm__ __volatile__ ("ldaxr %w0, [%1] \n" #define opal_atomic_sc_32(addr, newval, ret) \
: "=&r" (ret) do { \
: "r" (addr)); volatile int32_t *_addr = (addr); \
int32_t _newval = (int32_t) newval; \
return ret; int _ret; \
} \
__asm__ __volatile__ ("stlxr %w0, %w2, [%1] \n" \
static inline int opal_atomic_sc_32 (volatile int32_t *addr, int32_t newval) : "=&r" (_ret) \
{ : "r" (_addr), "r" (_newval) \
int ret; : "cc", "memory"); \
\
__asm__ __volatile__ ("stlxr %w0, %w2, [%1] \n" ret = (_ret == 0); \
: "=&r" (ret) } while (0)
: "r" (addr), "r" (newval)
: "cc", "memory");
return ret == 0;
}
static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval)
{ {
@ -269,28 +272,31 @@ static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t
return ret; return ret;
} }
static inline int64_t opal_atomic_ll_64 (volatile int64_t *addr) #define opal_atomic_ll_64(addr, ret) \
{ do { \
int64_t ret; volatile int64_t *_addr = (addr); \
int64_t _ret; \
\
__asm__ __volatile__ ("ldaxr %0, [%1] \n" \
: "=&r" (_ret) \
: "r" (_addr)); \
\
ret = (typeof(ret)) _ret; \
} while (0)
__asm__ __volatile__ ("ldaxr %0, [%1] \n" #define opal_atomic_sc_64(addr, newval, ret) \
: "=&r" (ret) do { \
: "r" (addr)); volatile int64_t *_addr = (addr); \
int64_t _newval = (int64_t) newval; \
return ret; int _ret; \
} \
__asm__ __volatile__ ("stlxr %w0, %2, [%1] \n" \
static inline int opal_atomic_sc_64 (volatile int64_t *addr, int64_t newval) : "=&r" (_ret) \
{ : "r" (_addr), "r" (_newval) \
int ret; : "cc", "memory"); \
\
__asm__ __volatile__ ("stlxr %w0, %2, [%1] \n" ret = (_ret == 0); \
: "=&r" (ret) } while (0)
: "r" (addr), "r" (newval)
: "cc", "memory");
return ret == 0;
}
#define OPAL_ASM_MAKE_ATOMIC(type, bits, name, inst, reg) \ #define OPAL_ASM_MAKE_ATOMIC(type, bits, name, inst, reg) \
static inline type opal_atomic_fetch_ ## name ## _ ## bits (volatile type *addr, type value) \ static inline type opal_atomic_fetch_ ## name ## _ ## bits (volatile type *addr, type value) \

Просмотреть файл

@ -308,15 +308,15 @@ OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(_rel_)
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_LLSC_32 #if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_LLSC_32
#define opal_atomic_ll_ptr(addr) (void *) opal_atomic_ll_32((int32_t *) addr) #define opal_atomic_ll_ptr(addr, ret) opal_atomic_ll_32((volatile int32_t *) (addr), ret)
#define opal_atomic_sc_ptr(addr, newval) opal_atomic_sc_32((int32_t *) addr, (int32_t) newval) #define opal_atomic_sc_ptr(addr, value, ret) opal_atomic_sc_32((volatile int32_t *) (addr), (intptr_t) (value), ret)
#define OPAL_HAVE_ATOMIC_LLSC_PTR 1 #define OPAL_HAVE_ATOMIC_LLSC_PTR 1
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_LLSC_64 #elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_LLSC_64
#define opal_atomic_ll_ptr(addr) (void *) opal_atomic_ll_64((int64_t *) addr) #define opal_atomic_ll_ptr(addr, ret) opal_atomic_ll_64((volatile int64_t *) (addr), ret)
#define opal_atomic_sc_ptr(addr, newval) opal_atomic_sc_64((int64_t *) addr, (int64_t) newval) #define opal_atomic_sc_ptr(addr, value, ret) opal_atomic_sc_64((volatile int64_t *) (addr), (intptr_t) (value), ret)
#define OPAL_HAVE_ATOMIC_LLSC_PTR 1 #define OPAL_HAVE_ATOMIC_LLSC_PTR 1

Просмотреть файл

@ -165,31 +165,35 @@ static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *add
return ret; return ret;
} }
static inline int32_t opal_atomic_ll_32 (volatile int32_t *addr) /* NTH: the LL/SC support is done through macros due to issues with non-optimized builds. The reason
{ * is that even with an always_inline attribute the compiler may still emit instructions to store then
int32_t ret; * load the arguments to/from the stack. This sequence may cause the ll reservation to be cancelled. */
#define opal_atomic_ll_32(addr, ret) \
do { \
volatile int32_t *_addr = (addr); \
int32_t _ret; \
__asm__ __volatile__ ("lwarx %0, 0, %1 \n\t" \
: "=&r" (_ret) \
: "r" (_addr) \
); \
ret = (typeof(ret)) _ret; \
} while (0)
__asm__ __volatile__ ("lwarx %0, 0, %1 \n\t" #define opal_atomic_sc_32(addr, value, ret) \
: "=&r" (ret) do { \
: "r" (addr) volatile int32_t *_addr = (addr); \
); int32_t _ret, _foo, _newval = (int32_t) value; \
return ret; \
} __asm__ __volatile__ (" stwcx. %4, 0, %3 \n\t" \
" li %0,0 \n\t" \
static inline int opal_atomic_sc_32 (volatile int32_t *addr, int32_t newval) " bne- 1f \n\t" \
{ " ori %0,%0,1 \n\t" \
int32_t ret, foo; "1:" \
: "=r" (_ret), "=m" (*_addr), "=r" (_foo) \
__asm__ __volatile__ (" stwcx. %4, 0, %3 \n\t" : "r" (_addr), "r" (_newval) \
" li %0,0 \n\t" : "cc", "memory"); \
" bne- 1f \n\t" ret = _ret; \
" ori %0,%0,1 \n\t" } while (0)
"1:"
: "=r" (ret), "=m" (*addr), "=r" (foo)
: "r" (addr), "r" (newval)
: "cc", "memory");
return ret;
}
/* these two functions aren't inlined in the non-gcc case because then /* these two functions aren't inlined in the non-gcc case because then
there would be two function calls (since neither cmpset_32 nor there would be two function calls (since neither cmpset_32 nor
@ -278,32 +282,33 @@ static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *add
return ret; return ret;
} }
static inline int64_t opal_atomic_ll_64(volatile int64_t *addr) #define opal_atomic_ll_64(addr, ret) \
{ do { \
int64_t ret; volatile int64_t *_addr = (addr); \
int64_t _ret; \
__asm__ __volatile__ ("ldarx %0, 0, %1 \n\t" __asm__ __volatile__ ("ldarx %0, 0, %1 \n\t" \
: "=&r" (ret) : "=&r" (_ret) \
: "r" (addr) : "r" (_addr) \
); ); \
return ret; ret = (typeof(ret)) _ret; \
} } while (0)
static inline int opal_atomic_sc_64(volatile int64_t *addr, int64_t newval)
{
int32_t ret;
__asm__ __volatile__ (" stdcx. %2, 0, %1 \n\t"
" li %0,0 \n\t"
" bne- 1f \n\t"
" ori %0,%0,1 \n\t"
"1:"
: "=r" (ret)
: "r" (addr), "r" (OPAL_ASM_VALUE64(newval))
: "cc", "memory");
return ret;
}
#define opal_atomic_sc_64(addr, value, ret) \
do { \
volatile int64_t *_addr = (addr); \
int64_t _foo, _newval = (int64_t) value; \
int32_t _ret; \
\
__asm__ __volatile__ (" stdcx. %2, 0, %1 \n\t" \
" li %0,0 \n\t" \
" bne- 1f \n\t" \
" ori %0,%0,1 \n\t" \
"1:" \
: "=r" (_ret) \
: "r" (_addr), "r" (OPAL_ASM_VALUE64(_newval)) \
: "cc", "memory"); \
ret = _ret; \
} while (0)
static inline int64_t opal_atomic_swap_64(volatile int64_t *addr, int64_t newval) static inline int64_t opal_atomic_swap_64(volatile int64_t *addr, int64_t newval)
{ {