1
1

Update the atomic stuff. Not all architectures provide the "fastest" atomic one. The only required atomic

operations are actually the compare_and_set 32 and 64 bits. For all others atomic there is a fallback in sys/atomic.c

For ia32 I add a workaround the ebx register. When compiling with -fPIC ebx is reserved for the shared object interface (store some global pointer). On the same time cmpxchg8b use ebx and ecx to store the new value. The workaround save the ebx content before cmpxchg8b and restore it afterward. There is still a possible bug !!! If we get interrupted between the push and pop of the ebx then we will definitivelly crash ...

This commit was SVN r3387.
Этот коммит содержится в:
George Bosilca 2004-10-28 17:32:27 +00:00
родитель e5d9df23de
Коммит fdaaae5998
5 изменённых файлов: 385 добавлений и 584 удалений

Просмотреть файл

@ -37,77 +37,38 @@ static inline void ompi_atomic_wmb(void)
} }
static inline int ompi_atomic_cmpset_32(volatile uint32_t *addr, static inline int ompi_atomic_cmpset_32(volatile uint32_t *addr,
uint32_t oldval, uint32_t oldval,
uint32_t newval) uint32_t newval)
{ {
unsigned long prev; unsigned long prev;
__asm__ __volatile__(SMPLOCK "cmpxchgl %k1,%2" __asm__ __volatile__(SMPLOCK "cmpxchgl %k1,%2"
: "=a"(prev) : "=a"(prev)
: "q"(newval), "m"(*addr), "0"(oldval) : "q"(newval), "m"(*addr), "0"(oldval)
: "memory"); : "memory");
return prev == oldval; return prev == oldval;
#if 0
uint32_t ret = oldval;
__asm__ __volatile (
SMPLOCK "cmpxchgl %1,%2 \n\
setz %%al \n\
movzbl %%al,%0 \n"
: "+a" (ret)
: "r" (newval), "m" (*(addr))
: "memory");
return (ret == oldval);
#endif
} }
#define ompi_atomic_cmpset_acq_32 ompi_atomic_cmpset_32
static inline int ompi_atomic_cmpset_acq_32(volatile uint32_t *addr, #define ompi_atomic_cmpset_rel_32 ompi_atomic_cmpset_32
uint32_t oldval,
uint32_t newval)
{
return ompi_atomic_cmpset_32(addr, oldval, newval);
}
static inline int ompi_atomic_cmpset_rel_32(volatile uint32_t *addr,
uint32_t oldval,
uint32_t newval)
{
return ompi_atomic_cmpset_32(addr, oldval, newval);
}
static inline int ompi_atomic_cmpset_64(volatile uint64_t *addr, static inline int ompi_atomic_cmpset_64(volatile uint64_t *addr,
uint64_t oldval, uint64_t oldval,
uint64_t newval) uint64_t newval)
{ {
uint64_t ret = oldval; uint64_t ret = oldval;
__asm__ __volatile ( __asm__ __volatile (
SMPLOCK "cmpxchgq %1,%2 \n\ SMPLOCK "cmpxchgq %1,%2 \n\t"
setz %%al \n\ "setz %%al \n\t"
movzbl %%al,%0 \n" "movzbl %%al,%0 \n\t"
: "+a" (ret) : "+a" (ret)
: "r" (newval), "m" (*(addr)) : "r" (newval), "m" (*(addr))
: "memory"); : "memory");
return (ret == oldval); return (ret == oldval);
} }
#define ompi_atomic_cmpset_acq_64 ompi_atomic_cmpset_64
static inline int ompi_atomic_cmpset_acq_64(volatile uint64_t *addr, #define ompi_atomic_cmpset_rel_64 ompi_atomic_cmpset_64
uint64_t oldval,
uint64_t newval)
{
return ompi_atomic_cmpset_64( addr, oldval, newval );
}
static inline int ompi_atomic_cmpset_rel_64(volatile uint64_t *addr,
uint64_t oldval,
uint64_t newval)
{
return ompi_atomic_cmpset_64( addr, oldval, newval );
}
#endif /* ! OMPI_SYS_ARCH_ATOMIC_H */ #endif /* ! OMPI_SYS_ARCH_ATOMIC_H */

Просмотреть файл

@ -42,234 +42,21 @@ typedef struct ompi_lock_t ompi_lock_t;
/** /**
* Memory barrier * Memory barrier
*/ */
STATIC_INLINE void ompi_atomic_mb(void); static inline void ompi_atomic_mb(void);
/** /**
* Read memory barrier * Read memory barrier
*/ */
STATIC_INLINE void ompi_atomic_rmb(void); static inline void ompi_atomic_rmb(void);
/** /**
* Write memory barrier. * Write memory barrier.
*/ */
STATIC_INLINE void ompi_atomic_wmb(void); static inline void ompi_atomic_wmb(void);
/**
* Atomic compare and set of unsigned 32-bit integer.
*
* @param addr Address of integer.
* @param oldval Comparison value.
* @param newval New value to set if comparision is true.
*
* Pseudo-code:
*
* @code
* int ompi_atomic_cmpset_32(addr, oldval, newval)
* {
* if (*addr == oldval) {
* *addr = newval;
* return 1; // success, set value
* } else {
* return 0; // failure, do not set value
* }
* }
* @endcode
*/
STATIC_INLINE int ompi_atomic_cmpset_32(volatile uint32_t *addr,
uint32_t oldval,
uint32_t newval);
/**
* Atomic compare and set of unsigned 32-bit integer with acquire
* semantics.
*
* @param addr Address of integer.
* @param oldval Comparison value.
* @param newval New value to set if comparision is true.
*
* See ompi_atomic_cmpset_32 for pseudo-code.
*/
STATIC_INLINE int ompi_atomic_cmpset_acq_32(volatile uint32_t *addr,
uint32_t oldval,
uint32_t newval);
/**
* Atomic compare and set of unsigned 32-bit integer with release
* semantics.
*
* @param addr Address of integer.
* @param oldval Comparison value.
* @param newval New value to set if comparision is true.
*
* See ompi_atomic_cmpset_32 for pseudo-code.
*/
STATIC_INLINE int ompi_atomic_cmpset_rel_32(volatile uint32_t *addr,
uint32_t oldval,
uint32_t newval);
/**
* Atomic compare and set of unsigned 64-bit integer.
*
* @param addr Address of integer.
* @param oldval Comparison value.
* @param newval New value to set if comparision is true.
*
* See ompi_atomic_cmpset_32 for pseudo-code.
*/
STATIC_INLINE int ompi_atomic_cmpset_64(volatile uint64_t *addr,
uint64_t oldval,
uint64_t newval);
/**
* Atomic compare and set of unsigned 64-bit integer with acquire
* semantics.
*
* @param addr Address of integer.
* @param oldval Comparison value.
* @param newval New value to set if comparision is true.
*
* See ompi_atomic_cmpset_32 for pseudo-code.
*/
STATIC_INLINE int ompi_atomic_cmpset_acq_64(volatile uint64_t *addr,
uint64_t oldval,
uint64_t newval);
/**
* Atomic compare and set of unsigned 64-bit integer with release
* semantics.
*
* @param addr Address of integer.
* @param oldval Comparison value.
* @param newval New value to set if comparision is true.
*
* See ompi_atomic_cmpset_32 for pseudo-code.
*/
STATIC_INLINE int ompi_atomic_cmpset_rel_64(volatile uint64_t *addr,
uint64_t oldval,
uint64_t newval);
/**
* Atomic compare and set of integer.
*
* @param addr Address of integer.
* @param oldval Comparison value.
* @param newval New value to set if comparision is true.
*
* See ompi_atomic_cmpset_32 for pseudo-code.
*/
STATIC_INLINE int ompi_atomic_cmpset_int(volatile int *addr,
int oldval,
int newval);
/**
* Atomic compare and set of integer with acquire semantics.
*
* @param addr Address of integer.
* @param oldval Comparison value.
* @param newval New value to set if comparision is true.
*
* See ompi_atomic_cmpset_32 for pseudo-code.
*/
STATIC_INLINE int ompi_atomic_cmpset_acq_int(volatile int *addr,
int oldval,
int newval);
/**
* Atomic compare and set of integer with release semantics.
*
* @param addr Address of integer.
* @param oldval Comparison value.
* @param newval New value to set if comparision is true.
*
* See ompi_atomic_cmpset_32 for pseudo-code.
*/
STATIC_INLINE int ompi_atomic_cmpset_rel_int(volatile int *addr,
int oldval,
int newval);
/**
* Atomic compare and set of pointer.
*
* @param addr Address of integer.
* @param oldval Comparison value.
* @param newval New value to set if comparision is true.
*
* See ompi_atomic_cmpset_32 for pseudo-code.
*/
STATIC_INLINE int ompi_atomic_cmpset_ptr(volatile void *addr,
void *oldval,
void *newval);
/**
* Atomic compare and set of pointer with acquire semantics.
*
* @param addr Address of integer.
* @param oldval Comparison value.
* @param newval New value to set if comparision is true.
*
* See ompi_atomic_cmpset_32 for pseudo-code.
*/
STATIC_INLINE int ompi_atomic_cmpset_acq_ptr(volatile void *addr,
void *oldval,
void *newval);
/**
* Atomic compare and set of pointer with release semantics.
*
* @param addr Address of integer.
* @param oldval Comparison value.
* @param newval New value to set if comparision is true.
*
* See ompi_atomic_cmpset_32 for pseudo-code.
*/
STATIC_INLINE int ompi_atomic_cmpset_rel_ptr(volatile void *addr,
void *oldval,
void *newval);
/**
* Atomically add to a 32-bit integer.
*
* @param addr Address of integer.
* @param delta Value to add.
* @return New value of integer.
*/
static inline uint32_t ompi_atomic_add_32(volatile uint32_t *addr, int delta);
/**
* Atomically add to a 64-bit integer.
*
* @param addr Address of integer.
* @param delta Value to add.
* @return New value of integer.
*/
static inline uint64_t ompi_atomic_add_64(volatile uint64_t *addr, int delta);
/**
* Atomically add to an integer.
*
* @param addr Address of integer.
* @param delta Value to add.
* @return New value of integer.
*/
static inline int ompi_atomic_add_int(volatile int *addr, int delta);
#if 0
/** /**
* Atomically add to an integer. * Atomically add to an integer.
* *
@ -277,8 +64,8 @@ static inline int ompi_atomic_add_int(volatile int *addr, int delta);
* @param newval Value to set. * @param newval Value to set.
* @return Old value of integer. * @return Old value of integer.
*/ */
static inline int ompi_atomic_fetch_and_set_int(volatile int *addr, int newval); static inline int ompi_atomic_fetch_and_set_int(volatile void *addr, int newval);
#endif
/** /**
* Try to acquire a lock. * Try to acquire a lock.
@ -347,189 +134,206 @@ static inline void ompi_atomic_unlock(ompi_lock_t *lock);
# ifdef __GNUC__ # ifdef __GNUC__
# include "sparc64/atomic.h" # include "sparc64/atomic.h"
# endif # endif
#else
#error No atomic operations defined yet
#endif #endif
#ifndef OMPI_HAVE_ATOMIC #ifndef OMPI_HAVE_ATOMIC
#define OMPI_HAVE_ATOMIC 0 #define OMPI_HAVE_ATOMIC 0
#endif #endif
#if OMPI_HAVE_ATOMIC /* All the architectures provide a compare_and_set atomic operations. If
* they dont provide atomic additions and/or substractions then we can
/* * define these operations using the atomic compare_and_set.
* derived operations
*/ */
#if SIZEOF_INT == 4 #if !defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_ADD_32)
static inline int32_t ompi_atomic_add_32(volatile int32_t *addr, int delta)
static inline int ompi_atomic_cmpset_int(volatile int *addr,
int oldval,
int newval)
{ {
return ompi_atomic_cmpset_32((volatile uint32_t *) addr, int32_t oldval;
(uint32_t) oldval,
(uint32_t) newval); do {
oldval = *addr;
} while (0 == ompi_atomic_cmpset_32(addr, oldval, oldval + delta));
return (oldval + delta);
} }
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_ADD_32 */
static inline int ompi_atomic_cmpset_acq_int(volatile int *addr, #if !defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_SUB_32)
int oldval, static inline int32_t ompi_atomic_sub_32(volatile int32_t *addr, int delta)
int newval)
{ {
return ompi_atomic_cmpset_acq_32((volatile uint32_t *) addr, int32_t oldval;
(uint32_t) oldval,
(uint32_t) newval); do {
oldval = *addr;
} while (0 == ompi_atomic_cmpset_32(addr, oldval, oldval - delta));
return (oldval - delta);
} }
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_SUB_32 */
static inline int ompi_atomic_cmpset_rel_int(volatile int *addr, #if !defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_ADD_64)
int oldval, static inline int64_t ompi_atomic_add_64(volatile int64_t *addr, int delta)
int newval)
{ {
return ompi_atomic_cmpset_rel_32((volatile uint32_t *) addr, int64_t oldval;
(uint32_t) oldval,
(uint32_t) newval); do {
oldval = *addr;
} while (0 == ompi_atomic_cmpset_64(addr, oldval, oldval + delta));
return (oldval + delta);
} }
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_ADD_64 */
#elif SIZEOF_INT == 8 #if !defined(OMPI_ARCHITECTURE_DEFINE_ATOMIC_SUB_64)
static inline int64_t ompi_atomic_sub_64(volatile int64_t *addr, int delta)
static inline int ompi_atomic_cmpset_int(volatile int *addr,
int oldval,
int newval)
{ {
return ompi_atomic_cmpset_64((volatile uint64_t *) addr, int64_t oldval;
(uint64_t) oldval,
(uint64_t) newval);
}
static inline int ompi_atomic_cmpset_acq_int(volatile int *addr,
int oldval,
int newval)
{
return ompi_atomic_cmpset_acq_64((volatile uint64_t *) addr,
(uint64_t) oldval,
(uint64_t) newval);
}
static inline int ompi_atomic_cmpset_rel_int(volatile int *addr,
int oldval,
int newval)
{
return ompi_atomic_cmpset_rel_64((volatile uint64_t *) addr,
(uint64_t) oldval,
(uint64_t) newval);
}
#else
#error
#endif
#if SIZEOF_VOID_P == 4
static inline int ompi_atomic_cmpset_ptr(volatile void *addr,
void *oldval,
void *newval)
{
return ompi_atomic_cmpset_32((volatile uint32_t *) addr,
(uint32_t) oldval, (uint32_t) newval);
}
static inline int ompi_atomic_cmpset_acq_ptr(volatile void *addr,
void *oldval,
void *newval)
{
return ompi_atomic_cmpset_acq_32((volatile uint32_t *) addr,
(uint32_t) oldval, (uint32_t) newval);
}
static inline int ompi_atomic_cmpset_rel_ptr(volatile void *addr,
void *oldval,
void *newval)
{
return ompi_atomic_cmpset_rel_32((volatile uint32_t *) addr,
(uint32_t) oldval, (uint32_t) newval);
}
#elif SIZEOF_VOID_P == 8
static inline int ompi_atomic_cmpset_ptr(volatile void *addr,
void *oldval,
void *newval)
{
return ompi_atomic_cmpset_64((volatile uint64_t *) addr,
(uint64_t) oldval,
(uint64_t) newval);
}
static inline int ompi_atomic_cmpset_acq_ptr(volatile void *addr,
void *oldval,
void *newval)
{
return ompi_atomic_cmpset_acq_64((volatile uint64_t *) addr,
(uint64_t) oldval,
(uint64_t) newval);
}
static inline int ompi_atomic_cmpset_rel_ptr(volatile void *addr,
void *oldval,
void *newval)
{
return ompi_atomic_cmpset_rel_64((volatile uint64_t *) addr,
(uint64_t) oldval,
(uint64_t) newval);
}
#else
#error
#endif
static inline uint32_t ompi_atomic_add_32(volatile uint32_t *addr, int delta)
{
uint32_t oldval;
do { do {
oldval = *addr; oldval = *addr;
} while (0 == ompi_atomic_cmpset_32(addr, oldval, oldval + delta)); } while (0 == ompi_atomic_cmpset_64(addr, oldval, oldval - delta));
return (oldval + delta); return (oldval - delta);
} }
#endif /* OMPI_ARCHITECTURE_DEFINE_ATOMIC_SUB_64 */
static inline int ompi_atomic_cmpset_xx( volatile void* addr, int64_t oldval,
static inline uint64_t ompi_atomic_add_64(volatile uint64_t *addr, int delta) int64_t newval, size_t length )
{ {
uint64_t oldval; switch( length ) {
case 4:
do { return ompi_atomic_cmpset_32( (volatile int32_t*)addr,
oldval = *addr; (int32_t)oldval, (int32_t)newval );
} while (0 == ompi_atomic_cmpset_64(addr, oldval, oldval + delta)); case 8:
return (oldval + delta); return ompi_atomic_cmpset_64( (volatile int64_t*)addr,
(int64_t)oldval, (int64_t)newval );
default:
*(int*)(NULL) = 0;
}
return 0; /* always fail */
} }
/**
* Atomic compare and set of pointer with relaxed semantics. This
* macro detect at compile time the type of the first argument
* and choose the correct function to be called.
*
* @param addr Address of <TYPE>.
* @param oldval Comparison value <TYPE>.
* @param newval New value to set if comparision is true <TYPE>.
*
* See ompi_atomic_cmpset_* for pseudo-code.
*/
#define ompi_atomic_cmpset( ADDR, OLDVAL, NEWVAL ) \
ompi_atomic_cmpset_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), (int64_t)(NEWVAL), sizeof(*(ADDR)) )
static inline int ompi_atomic_add_int(volatile int *addr, int delta) static inline int ompi_atomic_cmpset_acq_xx( volatile void* addr, int64_t oldval,
int64_t newval, size_t length )
{ {
int oldval; switch( length ) {
case 4:
do { return ompi_atomic_cmpset_acq_32( (volatile int32_t*)addr,
oldval = *addr; (int32_t)oldval, (int32_t)newval );
} while (0 == ompi_atomic_cmpset_int(addr, oldval, oldval + delta)); case 8:
return (oldval + delta); return ompi_atomic_cmpset_acq_64( (volatile int64_t*)addr,
(int64_t)oldval, (int64_t)newval );
default:
*(int*)(NULL) = 0;
}
return 0; /* always fail */
} }
/**
* Atomic compare and set of pointer with acquire semantics. This
* macro detect at compile time the type of the first argument
* and choose the correct function to be called.
*
* @param addr Address of <TYPE>.
* @param oldval Comparison value <TYPE>.
* @param newval New value to set if comparision is true <TYPE>.
*
* See ompi_atomic_cmpset_acq_* for pseudo-code.
*/
#define ompi_atomic_cmpset_acq( ADDR, OLDVAL, NEWVAL ) \
ompi_atomic_cmpset_acq_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), (int64_t)(NEWVAL), sizeof(*(ADDR)) )
static inline int ompi_atomic_fetch_and_set_int(volatile int *addr, int newval) static inline int ompi_atomic_cmpset_rel_xx( volatile void* addr, int64_t oldval,
int64_t newval, size_t length )
{ {
int oldval; switch( length ) {
case 4:
do { return ompi_atomic_cmpset_rel_32( (volatile int32_t*)addr,
oldval = *addr; (int32_t)oldval, (int32_t)newval );
} while (0 == ompi_atomic_cmpset_int(addr, oldval, newval)); case 8:
return (oldval); return ompi_atomic_cmpset_rel_64( (volatile int64_t*)addr,
(int64_t)oldval, (int64_t)newval );
default:
*(int*)(NULL) = 0;
}
return 0; /* always fail */
} }
#endif /* OMPI_HAVE_ATOMIC */ /**
* Atomic compare and set of pointer with release semantics. This
* macro detect at compile time the type of the first argument
* and choose the correct function to b
*
* @param addr Address of <TYPE>.
* @param oldval Comparison value <TYPE>.
* @param newval New value to set if comparision is true <TYPE>.
*
* See ompi_atomic_cmpsetrel_* for pseudo-code.
*/
#define ompi_atomic_cmpset_rel( ADDR, OLDVAL, NEWVAL ) \
ompi_atomic_cmpset_rel_xx( (volatile void*)(ADDR), (int64_t)(OLDVAL), (int64_t)(NEWVAL), sizeof(*(ADDR)) )
static inline void ompi_atomic_add_xx( volatile void* addr, int32_t value, size_t length )
{
switch( length ) {
case 4:
ompi_atomic_add_32( (volatile int32_t*)addr, (int32_t)value );
break;
case 8:
ompi_atomic_add_64( (volatile int64_t*)addr, (int64_t)value );
break;
default:
*(int*)(NULL) = 0;
}
}
/**
* Atomically increment the content depending on the type. This
* macro detect at compile time the type of the first argument
* and choose the correct function to be called.
*
* @param addr Address of <TYPE>
* @param delta Value to add (converted to <TYPE>).
*/
#define ompi_atomic_add( ADDR, VALUE ) \
ompi_atomic_add_xx( (volatile void*)(ADDR), (int32_t)(VALUE), sizeof(*(ADDR)) )
static inline void ompi_atomic_sub_xx( volatile void* addr, int32_t value, size_t length )
{
switch( length ) {
case 4:
ompi_atomic_sub_32( (volatile int32_t*)addr, (int32_t)value );
break;
case 8:
ompi_atomic_sub_64( (volatile int64_t*)addr, (int64_t)value );
break;
default:
*(int*)(NULL) = 0;
}
}
/**
* Atomically decrement the content depending on the type. This
* macro detect at compile time the type of the first argument
* and choose the correct function to be called.
*
* @param addr Address of <TYPE>
* @param delta Value to substract (converted to <TYPE>).
*/
#define ompi_atomic_sub( ADDR, VALUE ) \
ompi_atomic_sub_xx( (volatile void*)(ADDR), (int32_t)(VALUE), sizeof(*(ADDR)) )
#endif /* ifdef WIN32 */ #endif /* ifdef WIN32 */
@ -550,32 +354,28 @@ enum {
static inline int ompi_atomic_trylock(ompi_lock_t *lock) static inline int ompi_atomic_trylock(ompi_lock_t *lock)
{ {
return ompi_atomic_cmpset_acq_int((volatile int *) lock, ompi_atomic_cmpset_acq((volatile int*) lock,
OMPI_ATOMIC_UNLOCKED, OMPI_ATOMIC_UNLOCKED,
OMPI_ATOMIC_LOCKED); OMPI_ATOMIC_LOCKED);
return lock->u.lock;
} }
static inline void ompi_atomic_lock(ompi_lock_t *lock) static inline void ompi_atomic_lock(ompi_lock_t *lock)
{ {
while (!ompi_atomic_cmpset_acq_int((volatile int *) lock, while( !ompi_atomic_cmpset_acq((volatile int *) lock,
OMPI_ATOMIC_UNLOCKED, OMPI_ATOMIC_UNLOCKED,
OMPI_ATOMIC_LOCKED)) { OMPI_ATOMIC_LOCKED) ) {
while (lock->u.lock == OMPI_ATOMIC_LOCKED) { while (lock->u.lock == OMPI_ATOMIC_LOCKED) {
/* spin */ ; /* spin */ ;
} }
} }
} }
static inline void ompi_atomic_unlock(ompi_lock_t *lock) static inline void ompi_atomic_unlock(ompi_lock_t *lock)
{ {
if (0) { ompi_atomic_cmpset_rel((volatile int *) lock,
ompi_atomic_cmpset_rel_int((volatile int *) lock, OMPI_ATOMIC_LOCKED,
OMPI_ATOMIC_LOCKED, OMPI_ATOMIC_UNLOCKED);
OMPI_ATOMIC_UNLOCKED);
} else {
ompi_atomic_wmb();
lock->u.lock = OMPI_ATOMIC_UNLOCKED;
}
} }
#endif /* OMPI_HAVE_ATOMIC || OMPI_HAVE_ATOMIC_WIN32 */ #endif /* OMPI_HAVE_ATOMIC || OMPI_HAVE_ATOMIC_WIN32 */

Просмотреть файл

@ -37,84 +37,113 @@ static inline void ompi_atomic_wmb(void)
} }
static inline int ompi_atomic_cmpset_32(volatile uint32_t *addr, static inline int ompi_atomic_cmpset_32(volatile int32_t *addr,
uint32_t oldval, int32_t oldval,
uint32_t newval) int32_t newval)
{ {
unsigned char ret;
__asm__ __volatile (
SMPLOCK "cmpxchgl %1,%2 \n\t"
"sete %0 \n\t"
: "=qm" (ret)
: "q"(newval), "m"(*((volatile long*)addr)), "a"(oldval)
: "memory");
return (int)ret;
}
#define ompi_atomic_cmpset_acq_32 ompi_atomic_cmpset_32
#define ompi_atomic_cmpset_rel_32 ompi_atomic_cmpset_32
typedef struct {
uint32_t lo;
uint32_t hi;
} lwords_t;
/* On Linux the EBX register is used by the shared libraries
* to keep the global offset. In same time this register is
* required by the cmpxchg8b instruction (as an input parameter).
* This conflict orce us to save the EBX before the cmpxchg8b
* and to restore it afterward.
*/
static inline int ompi_atomic_cmpset_64(volatile int64_t *addr,
int64_t oldval,
int64_t newval)
{
/*
* Compare EDX:EAX with m64. If equal, set ZF and load ECX:EBX into
* m64. Else, clear ZF and load m64 into EDX:EAX.
*/
lwords_t *pnew = (lwords_t*)&newval;
#if 1 #if 1
uint32_t ret = *addr; int64_t prev;
if(ret == oldval) {
*addr = newval; __asm__ __volatile__(
return 1; "push %%ebx \n\t"
} "movl %3, %%ebx \n\t"
return 0; SMPLOCK "cmpxchg8b %4 \n\t"
"pop %%ebx \n\t"
: "=A" (prev)
: "0" (oldval), "c" ((unsigned long)pnew->lo),
"r" ((unsigned long)pnew->hi), "m" (addr)
: "cc", "memory");
return (prev == oldval);
#else #else
uint32_t ret = oldval; unsigned char realized;
__asm__ __volatile (
SMPLOCK "cmpxchgl %1,%2 \n\
setz %%al \n\
movzbl %%al,%0 \n"
: "+a" (ret)
: "a" (oldval) "r" (newval), "m" (*addr)
: "memory");
return (ret == oldval); lwords_t *pold = (lwords_t*)&oldval;
__asm__ __volatile(
"push %%ebx \n\t"
"movl %4, %%ebx \n\t"
SMPLOCK "cmpxchg8b %1 \n\t"
"sete %0 \n\t"
"pop %%ebx \n\t"
: "=qm" (realized)
: "m"(*((volatile long*)addr)), "a"(pold->hi), "d"(pold->lo),
"r"(pnew->hi), "c"(pnew->lo)
: "cc", "memory" );
return realized;
#endif #endif
} }
#define ompi_atomic_cmpset_acq_64 ompi_atomic_cmpset_64
#define ompi_atomic_cmpset_rel_64 ompi_atomic_cmpset_64
static inline int ompi_atomic_cmpset_acq_32(volatile uint32_t *addr, #define OMPI_ARCHITECTURE_DEFINE_ATOMIC_ADD_32
uint32_t oldval, /**
uint32_t newval) * atomic_add - add integer to atomic variable
* @i: integer value to add
* @v: pointer of type int
*
* Atomically adds @i to @v.
*/
static inline int ompi_atomic_add_32(volatile int32_t* v, int i)
{ {
return ompi_atomic_cmpset_32(addr, oldval, newval); __asm__ __volatile__(
SMPLOCK "addl %1,%0"
:"=m" (*v)
:"ir" (i), "m" (*v));
return (*v); /* should be an atomic operation */
} }
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_SUB_32
static inline int ompi_atomic_cmpset_rel_32(volatile uint32_t *addr, /**
uint32_t oldval, * atomic_sub - subtract the atomic variable
uint32_t newval) * @i: integer value to subtract
* @v: pointer of type int
*
* Atomically subtracts @i from @v.
*/
static inline int ompi_atomic_sub_32(volatile int32_t* v, int i)
{ {
return ompi_atomic_cmpset_32(addr, oldval, newval); __asm__ __volatile__(
SMPLOCK "subl %1,%0"
:"=m" (*v)
:"ir" (i), "m" (*v));
return (*v); /* should be an atomic operation */
} }
static inline int ompi_atomic_cmpset_64(volatile uint64_t *addr,
uint64_t oldval,
uint64_t newval)
{
/*
* Compare EDX:EAX with m64. If equal, set ZF and load ECX:EBX into
* m64. Else, clear ZF and load m64 into EDX:EAX.
*/
uint64_t ret = oldval;
#if 0
struct { uint32_t lo; uint32_t hi; } *p = (struct lwords *) &newval;
__asm__ __volatile(
SMPLOCK "cmpxchg8b %1\n"
: "+A" (ret)
: "m" (*addr), "b" (p->lo), "c" (p->hi)
: "memory");
#endif
return (ret == oldval);
}
static inline int ompi_atomic_cmpset_acq_64(volatile uint64_t *addr,
uint64_t oldval,
uint64_t newval)
{
return ompi_atomic_cmpset_64(addr, oldval, newval);
}
static inline int ompi_atomic_cmpset_rel_64(volatile uint64_t *addr,
uint64_t oldval,
uint64_t newval)
{
return ompi_atomic_cmpset_64(addr, oldval, newval);
}
#endif /* ! OMPI_SYS_ARCH_ATOMIC_H */ #endif /* ! OMPI_SYS_ARCH_ATOMIC_H */

Просмотреть файл

@ -34,7 +34,6 @@ static inline void ompi_atomic_wmb(void)
MB(); MB();
} }
static inline int ompi_atomic_cmpset_acq_32(volatile uint32_t *addr, static inline int ompi_atomic_cmpset_acq_32(volatile uint32_t *addr,
uint32_t oldval, uint32_t oldval,
uint32_t newval) uint32_t newval)
@ -42,80 +41,68 @@ static inline int ompi_atomic_cmpset_acq_32(volatile uint32_t *addr,
uint32_t ret; uint32_t ret;
__asm__ __volatile( __asm__ __volatile(
" mov ar.ccv=%2 \n\ "mov ar.ccv=%2 \n\t"
cmpxchg4.acq %0=%4,%3,ar.ccv \n" "cmpxchg4.acq %0=%4,%3,ar.ccv \n\t"
: "=r"(ret), "=m"(*addr) : "=r"(ret), "=m"(*addr)
: "r"(oldval), "r"(newval), "m"(*addr) : "r"(oldval), "r"(newval), "m"(*addr)
: "memory"); : "memory");
return (ret == oldval); return (ret == oldval);
} }
static inline int ompi_atomic_cmpset_rel_32(volatile uint32_t *addr, static inline int ompi_atomic_cmpset_rel_32(volatile uint32_t *addr,
uint32_t oldval, uint32_t oldval,
uint32_t newval) uint32_t newval)
{ {
uint32_t ret; uint32_t ret;
__asm__ __volatile( __asm__ __volatile(
" mov ar.ccv=%2 \n\ "mov ar.ccv=%2 \n\t"
cmpxchg4.rel %0=%4,%3,ar.ccv \n" "cmpxchg4.rel %0=%4,%3,ar.ccv \n\t"
: "=r"(ret), "=m"(*addr) : "=r"(ret), "=m"(*addr)
: "r"(oldval), "r"(newval), "m"(*addr) : "r"(oldval), "r"(newval), "m"(*addr)
: "memory"); : "memory");
return (ret == oldval); return (ret == oldval);
} }
static inline int ompi_atomic_cmpset_32(volatile uint32_t *addr, #define ompi_atomic_cmpset_32 ompi_atomic_cmpset_acq_32
uint32_t oldval,
uint32_t newval)
{
return ompi_atomic_cmpset_acq_32(addr, oldval, newval);
}
static inline int ompi_atomic_cmpset_acq_64(volatile uint64_t *addr, static inline int ompi_atomic_cmpset_acq_64(volatile uint64_t *addr,
uint64_t oldval, uint64_t oldval,
uint64_t newval) uint64_t newval)
{ {
uint64_t ret; uint64_t ret;
__asm__ __volatile( __asm__ __volatile(
" mov ar.ccv=%2 \n\ "mov ar.ccv=%2 \n\t"
cmpxchg8.acq %0=%4,%3,ar.ccv \n" "cmpxchg8.acq %0=%4,%3,ar.ccv \n\t"
: "=r"(ret), "=m"(*addr) : "=r"(ret), "=m"(*addr)
: "r"(oldval), "r"(newval), "m"(*addr) : "r"(oldval), "r"(newval), "m"(*addr)
: "memory"); : "memory");
return (ret == oldval); return (ret == oldval);
} }
static inline int ompi_atomic_cmpset_rel_64(volatile uint64_t *addr, static inline int ompi_atomic_cmpset_rel_64(volatile uint64_t *addr,
uint64_t oldval, uint64_t oldval,
uint64_t newval) uint64_t newval)
{ {
uint64_t ret; uint64_t ret;
__asm__ __volatile( __asm__ __volatile(
" mov ar.ccv=%2 \n\ "mov ar.ccv=%2 \n\t"
cmpxchg8.rel %0=%4,%3,ar.ccv \n" "cmpxchg8.rel %0=%4,%3,ar.ccv \n\t"
: "=r"(ret), "=m"(*addr) : "=r"(ret), "=m"(*addr)
: "r"(oldval), "r"(newval), "m"(*addr) : "r"(oldval), "r"(newval), "m"(*addr)
: "memory"); : "memory");
return (ret); return (ret);
} }
static inline int ompi_atomic_cmpset_64(volatile uint64_t *addr, #define ompi_atomic_cmpset_64 ompi_atomic_cmpset_acq_64
uint64_t oldval,
uint64_t newval)
{
return ompi_atomic_cmpset_acq_64(addr, oldval, newval);
}
#endif /* ! OMPI_SYS_ARCH_ATOMIC_H */ #endif /* ! OMPI_SYS_ARCH_ATOMIC_H */

Просмотреть файл

@ -41,12 +41,11 @@ static inline void ompi_atomic_wmb(void)
WMB(); WMB();
} }
static inline int ompi_atomic_cmpset_32(volatile int32_t *addr,
static inline int ompi_atomic_cmpset_32(volatile uint32_t *addr, int32_t oldval,
uint32_t oldval, int32_t newval)
uint32_t newval)
{ {
uint32_t ret; int32_t ret;
__asm__ __volatile__ ( __asm__ __volatile__ (
"1: lwarx %0, 0, %2 \n\ "1: lwarx %0, 0, %2 \n\
@ -63,9 +62,9 @@ static inline int ompi_atomic_cmpset_32(volatile uint32_t *addr,
} }
static inline int ompi_atomic_cmpset_acq_32(volatile uint32_t *addr, static inline int ompi_atomic_cmpset_acq_32(volatile int32_t *addr,
uint32_t oldval, int32_t oldval,
uint32_t newval) int32_t newval)
{ {
int rc; int rc;
@ -76,20 +75,20 @@ static inline int ompi_atomic_cmpset_acq_32(volatile uint32_t *addr,
} }
static inline int ompi_atomic_cmpset_rel_32(volatile uint32_t *addr, static inline int ompi_atomic_cmpset_rel_32(volatile int32_t *addr,
uint32_t oldval, int32_t oldval,
uint32_t newval) int32_t newval)
{ {
ompi_atomic_wmb(); ompi_atomic_wmb();
return ompi_atomic_cmpset_32(addr, oldval, newval); return ompi_atomic_cmpset_32(addr, oldval, newval);
} }
static inline int ompi_atomic_cmpset_64(volatile uint64_t *addr, static inline int ompi_atomic_cmpset_64(volatile int64_t *addr,
uint64_t oldval, int64_t oldval,
uint64_t newval) int64_t newval)
{ {
uint64_t ret; int64_t ret;
__asm__ __volatile__ ( __asm__ __volatile__ (
"1: ldarx %0, 0, %2 \n\ "1: ldarx %0, 0, %2 \n\
@ -106,9 +105,9 @@ static inline int ompi_atomic_cmpset_64(volatile uint64_t *addr,
} }
static inline int ompi_atomic_cmpset_acq_64(volatile uint64_t *addr, static inline int ompi_atomic_cmpset_acq_64(volatile int64_t *addr,
uint64_t oldval, int64_t oldval,
uint64_t newval) int64_t newval)
{ {
int rc; int rc;
@ -119,13 +118,38 @@ static inline int ompi_atomic_cmpset_acq_64(volatile uint64_t *addr,
} }
static inline int ompi_atomic_cmpset_rel_64(volatile uint64_t *addr, static inline int ompi_atomic_cmpset_rel_64(volatile int64_t *addr,
uint64_t oldval, int64_t oldval,
uint64_t newval) int64_t newval)
{ {
ompi_atomic_wmb(); ompi_atomic_wmb();
return ompi_atomic_cmpset_64(addr, oldval, newval); return ompi_atomic_cmpset_64(addr, oldval, newval);
} }
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_ADD_32
static inline int ompi_atomic_add_32(volatile int32_t* v, int i)
{
__asm__ volatile("top1:\tlwarx r4, 0, %0\n\t" \
"addi r4, r4, 1\n\t" \
"stwcx. r4, 0, %0\n\t" \
"bne cr0, top1"
:
: "r" (ptr)
: "r4");
return *v;
}
#define OMPI_ARCHITECTURE_DEFINE_ATOMIC_SUB_32
static inline int ompi_atomic_sub_32(volatile int32_t* v, int i)
{
__asm__ volatile("top2:\tlwarx r4, 0, %0\n\t" \
"subi r4, r4, 1\n\t" \
"stwcx. r4, 0, %0\n\t" \
"bne cr0, top2"
:
: "r" (ptr)
: "r4");
return *v;
}
#endif /* ! OMPI_SYS_ARCH_ATOMIC_H */ #endif /* ! OMPI_SYS_ARCH_ATOMIC_H */