Merge pull request #720 from hjelmn/ppc_fixes
PowerPC/Power lifo/fifo improvements
Этот коммит содержится в:
Коммит
d2b3c9d54b
@ -216,6 +216,27 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
|
||||
{
|
||||
opal_list_item_t *item, *next;
|
||||
|
||||
#if OPAL_HAVE_ATOMIC_LLSC_PTR
|
||||
/* use load-linked store-conditional to avoid ABA issues */
|
||||
do {
|
||||
item = opal_atomic_ll_ptr (&fifo->opal_fifo_head.data.item);
|
||||
if (&fifo->opal_fifo_ghost == item) {
|
||||
if (&fifo->opal_fifo_ghost == fifo->opal_fifo_tail.data.item) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* fifo does not appear empty. wait for the fifo to be made
|
||||
* consistent by conflicting thread. */
|
||||
continue;
|
||||
}
|
||||
|
||||
next = (opal_list_item_t *) item->opal_list_next;
|
||||
if (opal_atomic_sc_ptr (&fifo->opal_fifo_head.data.item, next)) {
|
||||
break;
|
||||
}
|
||||
} while (1);
|
||||
#else
|
||||
/* protect against ABA issues by "locking" the head */
|
||||
do {
|
||||
if (opal_atomic_cmpset_32 ((int32_t *) &fifo->opal_fifo_head.data.counter, 0, 1)) {
|
||||
break;
|
||||
@ -234,6 +255,7 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
|
||||
|
||||
next = (opal_list_item_t *) item->opal_list_next;
|
||||
fifo->opal_fifo_head.data.item = next;
|
||||
#endif
|
||||
|
||||
if (&fifo->opal_fifo_ghost == next) {
|
||||
if (!opal_atomic_cmpset_ptr (&fifo->opal_fifo_tail.data.item, item, &fifo->opal_fifo_ghost)) {
|
||||
|
@ -12,7 +12,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Voltaire All rights reserved.
|
||||
* Copyright (c) 2010 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights
|
||||
* reseved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -25,6 +25,7 @@
|
||||
#define OPAL_LIFO_H_HAS_BEEN_INCLUDED
|
||||
|
||||
#include "opal_config.h"
|
||||
#include <time.h>
|
||||
#include "opal/class/opal_list.h"
|
||||
|
||||
#include "opal/sys/atomic.h"
|
||||
@ -180,6 +181,52 @@ static inline opal_list_item_t *opal_lifo_push_atomic (opal_lifo_t *lifo,
|
||||
} while (1);
|
||||
}
|
||||
|
||||
#if OPAL_HAVE_ATOMIC_LLSC_PTR
|
||||
|
||||
static inline void _opal_lifo_release_cpu (void)
|
||||
{
|
||||
/* NTH: there are many ways to cause the current thread to be suspended. This one
|
||||
* should work well in most cases. Another approach would be to use poll (NULL, 0, ) but
|
||||
* the interval will be forced to be in ms (instead of ns or us). Note that there
|
||||
* is a performance improvement for the lifo test when this call is made on detection
|
||||
* of contention but it may not translate into actually MPI or application performance
|
||||
* improvements. */
|
||||
static struct timespec interval = { .tv_sec = 0, .tv_nsec = 100 };
|
||||
nanosleep (&interval, NULL);
|
||||
}
|
||||
|
||||
/* Retrieve one element from the LIFO. If we reach the ghost element then the LIFO
|
||||
* is empty so we return NULL.
|
||||
*/
|
||||
static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo)
|
||||
{
|
||||
opal_list_item_t *item, *next;
|
||||
int attempt = 0;
|
||||
|
||||
do {
|
||||
if (++attempt == 5) {
|
||||
/* deliberatly suspend this thread to allow other threads to run. this should
|
||||
* only occur during periods of contention on the lifo. */
|
||||
_opal_lifo_release_cpu ();
|
||||
attempt = 0;
|
||||
}
|
||||
|
||||
item = (opal_list_item_t *) opal_atomic_ll_ptr (&lifo->opal_lifo_head.data.item);
|
||||
if (&lifo->opal_lifo_ghost == item) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
next = (opal_list_item_t *) item->opal_list_next;
|
||||
} while (!opal_atomic_sc_ptr (&lifo->opal_lifo_head.data.item, next));
|
||||
|
||||
opal_atomic_wmb ();
|
||||
|
||||
item->opal_list_next = NULL;
|
||||
return item;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Retrieve one element from the LIFO. If we reach the ghost element then the LIFO
|
||||
* is empty so we return NULL.
|
||||
*/
|
||||
@ -216,6 +263,8 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo)
|
||||
return item;
|
||||
}
|
||||
|
||||
#endif /* OPAL_HAVE_ATOMIC_LLSC_PTR */
|
||||
|
||||
#endif
|
||||
|
||||
/* single-threaded versions of the lifo functions */
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
@ -11,7 +12,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011 Sandia National Laboratories. All rights reserved.
|
||||
* Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -38,7 +39,7 @@
|
||||
* - \c OPAL_HAVE_ATOMIC_MEM_BARRIER atomic memory barriers
|
||||
* - \c OPAL_HAVE_ATOMIC_SPINLOCKS atomic spinlocks
|
||||
* - \c OPAL_HAVE_ATOMIC_MATH_32 if 32 bit add/sub/cmpset can be done "atomicly"
|
||||
* - \c OPAL_HAVE_ATOMIC_MATH_64 if 32 bit add/sub/cmpset can be done "atomicly"
|
||||
* - \c OPAL_HAVE_ATOMIC_MATH_64 if 64 bit add/sub/cmpset can be done "atomicly"
|
||||
*
|
||||
* Note that for the Atomic math, atomic add/sub may be implemented as
|
||||
* C code using opal_atomic_cmpset. The appearance of atomic
|
||||
@ -177,6 +178,12 @@ typedef struct opal_atomic_lock_t opal_atomic_lock_t;
|
||||
#ifndef OPAL_HAVE_ATOMIC_CMPSET_128
|
||||
#define OPAL_HAVE_ATOMIC_CMPSET_128 0
|
||||
#endif
|
||||
#ifndef OPAL_HAVE_ATOMIC_LLSC_32
|
||||
#define OPAL_HAVE_ATOMIC_LLSC_32 0
|
||||
#endif
|
||||
#ifndef OPAL_HAVE_ATOMIC_LLSC_64
|
||||
#define OPAL_HAVE_ATOMIC_LLSC_64 0
|
||||
#endif
|
||||
#endif /* DOXYGEN */
|
||||
|
||||
/**********************************************************************
|
||||
|
@ -11,7 +11,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -274,6 +274,30 @@ static inline int opal_atomic_cmpset_rel_ptr(volatile void* addr,
|
||||
|
||||
#endif /* (OPAL_HAVE_ATOMIC_SWAP_32 || OPAL_HAVE_ATOMIC_SWAP_64) */
|
||||
|
||||
#if (OPAL_HAVE_ATOMIC_LLSC_32 || OPAL_HAVE_ATOMIC_LLSC_64)
|
||||
|
||||
#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_LLSC_32
|
||||
|
||||
#define opal_atomic_ll_ptr(addr) (void *) opal_atomic_ll_32((int32_t *) addr)
|
||||
#define opal_atomic_sc_ptr(addr, newval) opal_atomic_sc_32((int32_t *) addr, (int32_t) newval)
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_LLSC_PTR 1
|
||||
|
||||
#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_LLSC_64
|
||||
|
||||
#define opal_atomic_ll_ptr(addr) (void *) opal_atomic_ll_64((int64_t *) addr)
|
||||
#define opal_atomic_sc_ptr(addr, newval) opal_atomic_sc_64((int64_t *) addr, (int64_t) newval)
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_LLSC_PTR 1
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* (OPAL_HAVE_ATOMIC_LLSC_32 || OPAL_HAVE_ATOMIC_LLSC_64)*/
|
||||
|
||||
#if !defined(OPAL_HAVE_ATOMIC_LLSC_PTR)
|
||||
#define OPAL_HAVE_ATOMIC_LLSC_PTR 0
|
||||
#endif
|
||||
|
||||
#if OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_MATH_64
|
||||
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
@ -10,6 +11,8 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -40,6 +43,8 @@
|
||||
#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_CMPSET_32 1
|
||||
#define OPAL_HAVE_ATOMIC_SWAP_32 1
|
||||
#define OPAL_HAVE_ATOMIC_LLSC_32 1
|
||||
|
||||
#define OPAL_HAVE_ATOMIC_MATH_32 1
|
||||
#define OPAL_HAVE_ATOMIC_ADD_32 1
|
||||
@ -48,6 +53,8 @@
|
||||
|
||||
#if (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) || OPAL_ASM_SUPPORT_64BIT
|
||||
#define OPAL_HAVE_ATOMIC_CMPSET_64 1
|
||||
#define OPAL_HAVE_ATOMIC_SWAP_64 1
|
||||
#define OPAL_HAVE_ATOMIC_LLSC_64 1
|
||||
#endif
|
||||
|
||||
|
||||
@ -140,6 +147,32 @@ static inline int opal_atomic_cmpset_32(volatile int32_t *addr,
|
||||
return (ret == oldval);
|
||||
}
|
||||
|
||||
static inline int32_t opal_atomic_ll_32 (volatile int32_t *addr)
|
||||
{
|
||||
int32_t ret;
|
||||
|
||||
__asm__ __volatile__ ("lwarx %0, 0, %1 \n\t"
|
||||
: "=&r" (ret)
|
||||
: "r" (addr)
|
||||
:);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int opal_atomic_sc_32 (volatile int32_t *addr, int32_t newval)
|
||||
{
|
||||
int32_t ret, foo;
|
||||
|
||||
__asm__ __volatile__ (" stwcx. %4, 0, %3 \n\t"
|
||||
" li %0,0 \n\t"
|
||||
" bne- 1f \n\t"
|
||||
" ori %0,%0,1 \n\t"
|
||||
"1:"
|
||||
: "=r" (ret), "=m" (*addr), "=r" (foo)
|
||||
: "r" (addr), "r" (newval)
|
||||
: "cc", "memory");
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* these two functions aren't inlined in the non-gcc case because then
|
||||
there would be two function calls (since neither cmpset_32 nor
|
||||
atomic_?mb can be inlined). Instead, we "inline" them by hand in
|
||||
@ -164,6 +197,20 @@ static inline int opal_atomic_cmpset_rel_32(volatile int32_t *addr,
|
||||
return opal_atomic_cmpset_32(addr, oldval, newval);
|
||||
}
|
||||
|
||||
static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval)
|
||||
{
|
||||
int32_t ret;
|
||||
|
||||
__asm__ __volatile__ ("1: lwarx %0, 0, %2 \n\t"
|
||||
" stwcx. %3, 0, %2 \n\t"
|
||||
" bne- 1b \n\t"
|
||||
: "=&r" (ret), "=m" (*addr)
|
||||
: "r" (addr), "r" (newval)
|
||||
: "cc", "memory");
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif /* OPAL_GCC_INLINE_ASSEMBLY */
|
||||
|
||||
|
||||
@ -189,6 +236,32 @@ static inline int opal_atomic_cmpset_64(volatile int64_t *addr,
|
||||
return (ret == oldval);
|
||||
}
|
||||
|
||||
static inline int64_t opal_atomic_ll_64(volatile int64_t *addr)
|
||||
{
|
||||
int64_t ret;
|
||||
|
||||
__asm__ __volatile__ ("ldarx %0, 0, %1 \n\t"
|
||||
: "=&r" (ret)
|
||||
: "r" (addr)
|
||||
:);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int opal_atomic_sc_64(volatile int64_t *addr, int64_t newval)
|
||||
{
|
||||
int32_t ret, foo;
|
||||
|
||||
__asm__ __volatile__ (" stdcx. %4, 0, %3 \n\t"
|
||||
" li %0,0 \n\t"
|
||||
" bne- 1f \n\t"
|
||||
" ori %0,%0,1 \n\t"
|
||||
"1:"
|
||||
: "=r" (ret), "=m" (*addr), "=r" (foo)
|
||||
: "r" (addr), "r" (newval)
|
||||
: "cc", "memory");
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* these two functions aren't inlined in the non-gcc case because then
|
||||
there would be two function calls (since neither cmpset_64 nor
|
||||
atomic_?mb can be inlined). Instead, we "inline" them by hand in
|
||||
@ -213,6 +286,20 @@ static inline int opal_atomic_cmpset_rel_64(volatile int64_t *addr,
|
||||
return opal_atomic_cmpset_64(addr, oldval, newval);
|
||||
}
|
||||
|
||||
static inline int64_t opal_atomic_swap_64(volatile int64_t *addr, int64_t newval)
|
||||
{
|
||||
int64_t ret;
|
||||
|
||||
__asm__ __volatile__ ("1: ldarx %0, 0, %2 \n\t"
|
||||
" stdcx. %3, 0, %2 \n\t"
|
||||
" bne- 1b \n\t"
|
||||
: "=&r" (ret), "=m" (*addr)
|
||||
: "r" (addr), "r" (newval)
|
||||
: "cc", "memory");
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif /* OPAL_GCC_INLINE_ASSEMBLY */
|
||||
|
||||
#elif (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC32) && OPAL_ASM_SUPPORT_64BIT
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user