From b5574c1b4f1dd717b1eeca5b2bd984482332b34c Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Mon, 24 Nov 2014 14:15:25 -0500 Subject: [PATCH] Per Dave suggestion add a serializing instruction bundled together with RDTSC. It is still not perfect, but hopefully much better than before. --- opal/include/opal/sys/amd64/timer.h | 24 +++++++++++++++++++++--- opal/include/opal/sys/ia32/timer.h | 7 +++++-- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/opal/include/opal/sys/amd64/timer.h b/opal/include/opal/sys/amd64/timer.h index a0ea9c8520..39244f352d 100644 --- a/opal/include/opal/sys/amd64/timer.h +++ b/opal/include/opal/sys/amd64/timer.h @@ -25,11 +25,29 @@ typedef uint64_t opal_timer_t; #if OPAL_GCC_INLINE_ASSEMBLY -static inline opal_timer_t +/** + * http://www.intel.com/content/www/us/en/intelligent-systems/embedded-systems-training/ia-32-ia-64-benchmark-code-execution-paper.html + */ +static inline opal_timer_t opal_sys_timer_get_cycles(void) { - unsigned a, d; - __asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d)); + unsigned a, d; +#if 0 + __asm__ __volatile__ ("cpuid\n\t" + "rdtsc\n\t" + : "=a" (a), "=d" (d) + :: "%rax", "%rbx", "%rcx", "%rdx"); +#else + /* If we need higher accuracy we should implement the algorithm proposed + * on the Intel document referenced above. However, in the context of MPI + * this function will be used as the backend for MPI_Wtime and as such + * can afford a small inaccuracy. + */ + __asm__ __volatile__ ("rdtscp\n\t" + "cpuid" + : "=a" (a), "=d" (d) + :: "%rax", "%rbx", "%rcx", "%rdx"); +#endif return ((opal_timer_t)a) | (((opal_timer_t)d) << 32); } diff --git a/opal/include/opal/sys/ia32/timer.h b/opal/include/opal/sys/ia32/timer.h index 53e2375336..06f368e7d4 100644 --- a/opal/include/opal/sys/ia32/timer.h +++ b/opal/include/opal/sys/ia32/timer.h @@ -2,7 +2,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -30,7 +30,10 @@ opal_sys_timer_get_cycles(void) { opal_timer_t ret; - __asm__ __volatile__("rdtsc" : "=A"(ret)); + __asm__ __volatile__("cpuid\n" + "rdtsc\n" + : "=A"(ret) + :: "%eax", "%ebx", "%ecx", "%edx"); return ret; }