1
1

amd64 timers: use lfence instead of cpuid for serialization

Signed-off-by: Carlos Bederián <bc@famaf.unc.edu.ar>
Этот коммит содержится в:
Carlos Bederián 2017-02-04 18:33:25 -03:00
родитель 4009ba6b94
Коммит ccea3de44c

Просмотреть файл

@ -31,31 +31,14 @@ typedef uint64_t opal_timer_t;
#if OPAL_GCC_INLINE_ASSEMBLY #if OPAL_GCC_INLINE_ASSEMBLY
/** /* TODO: add AMD mfence version and dispatch at init */
* http://www.intel.com/content/www/us/en/intelligent-systems/embedded-systems-training/ia-32-ia-64-benchmark-code-execution-paper.html
*/
static inline opal_timer_t static inline opal_timer_t
opal_sys_timer_get_cycles(void) opal_sys_timer_get_cycles(void)
{ {
unsigned l, h; uint32_t l, h;
#if !OPAL_ASSEMBLY_SUPPORTS_RDTSCP __asm__ __volatile__ ("lfence\n\t"
__asm__ __volatile__ ("cpuid\n\t"
"rdtsc\n\t" "rdtsc\n\t"
: "=a" (l), "=d" (h) : "=a" (l), "=d" (h));
:: "rbx", "rcx");
#else
/* If we need higher accuracy we should implement the algorithm proposed
* on the Intel document referenced above. However, in the context of MPI
* this function will be used as the backend for MPI_Wtime and as such
* can afford a small inaccuracy.
*/
__asm__ __volatile__ ("rdtscp\n\t"
"mov %%edx, %0\n\t"
"mov %%eax, %1\n\t"
"cpuid\n\t"
: "=r" (h), "=r" (l)
:: "rax", "rbx", "rcx", "rdx");
#endif
return ((opal_timer_t)l) | (((opal_timer_t)h) << 32); return ((opal_timer_t)l) | (((opal_timer_t)h) << 32);
} }