From e27759956f0355609e5c785332ef0fad202568bb Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Sun, 23 Nov 2014 23:51:13 -0500 Subject: [PATCH 01/10] Allow the use of the optimized used timers --- opal/mca/timer/aix/timer_aix.h | 4 ++-- opal/mca/timer/altix/timer_altix.h | 4 ++-- opal/mca/timer/darwin/timer_darwin.h | 16 ++++++++++------ opal/mca/timer/darwin/timer_darwin_component.c | 6 ++---- opal/mca/timer/linux/timer_linux.h | 6 +++--- opal/mca/timer/solaris/timer_solaris.h | 4 ++-- 6 files changed, 21 insertions(+), 19 deletions(-) diff --git a/opal/mca/timer/aix/timer_aix.h b/opal/mca/timer/aix/timer_aix.h index 278ee71c97..2e99ffb60d 100644 --- a/opal/mca/timer/aix/timer_aix.h +++ b/opal/mca/timer/aix/timer_aix.h @@ -2,7 +2,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -39,7 +39,7 @@ opal_timer_base_get_usec() retval = (t.tb_high * 1000000) + t.tb_low / 1000; return retval; -} +} static inline opal_timer_t opal_timer_base_get_cycles() diff --git a/opal/mca/timer/altix/timer_altix.h b/opal/mca/timer/altix/timer_altix.h index 1ed990999d..e588d74bb8 100644 --- a/opal/mca/timer/altix/timer_altix.h +++ b/opal/mca/timer/altix/timer_altix.h @@ -2,7 +2,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -36,7 +36,7 @@ static inline opal_timer_t opal_timer_base_get_usec(void) { return opal_timer_base_get_cycles() / opal_timer_altix_usec_conv; -} +} static inline opal_timer_t diff --git a/opal/mca/timer/darwin/timer_darwin.h b/opal/mca/timer/darwin/timer_darwin.h index 7d4e342c6c..961b3cfe09 100644 --- a/opal/mca/timer/darwin/timer_darwin.h +++ b/opal/mca/timer/darwin/timer_darwin.h @@ -2,7 +2,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -21,20 +21,24 @@ #include "opal_config.h" #include +#include typedef uint64_t opal_timer_t; /* frequency in mhz */ OPAL_DECLSPEC extern opal_timer_t opal_timer_darwin_freq; - +OPAL_DECLSPEC extern mach_timebase_info_data_t opal_timer_darwin_info; static inline opal_timer_t opal_timer_base_get_cycles(void) { + if( opal_timer_darwin_info.denom == 0 ) { + (void) mach_timebase_info(&opal_timer_darwin_info); + } /* this is basically a wrapper around the "right" assembly to get the tick counter off the PowerPC Time Base. I believe it's something similar on x86 */ - return mach_absolute_time(); + return mach_absolute_time() * opal_timer_darwin_info.numer / opal_timer_darwin_info.denom / 1000; } @@ -43,7 +47,7 @@ opal_timer_base_get_usec(void) { /* freq is in Hz, so this gives usec */ return mach_absolute_time() * 1000000 / opal_timer_darwin_freq; -} +} static inline opal_timer_t @@ -53,9 +57,9 @@ opal_timer_base_get_freq(void) } -#define OPAL_TIMER_CYCLE_NATIVE 1 +#define OPAL_TIMER_CYCLE_NATIVE 0 #define OPAL_TIMER_CYCLE_SUPPORTED 1 -#define OPAL_TIMER_USEC_NATIVE 0 +#define OPAL_TIMER_USEC_NATIVE 1 #define OPAL_TIMER_USEC_SUPPORTED 1 #endif diff --git a/opal/mca/timer/darwin/timer_darwin_component.c b/opal/mca/timer/darwin/timer_darwin_component.c index 6f05e2f088..aecb94ab95 100644 --- a/opal/mca/timer/darwin/timer_darwin_component.c +++ b/opal/mca/timer/darwin/timer_darwin_component.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -20,17 +20,15 @@ #include "opal_config.h" -#include - #include "opal/mca/timer/timer.h" #include "opal/mca/timer/darwin/timer_darwin.h" #include "opal/constants.h" opal_timer_t opal_timer_darwin_freq; +mach_timebase_info_data_t opal_timer_darwin_info; static int opal_timer_darwin_open(void); - const opal_timer_base_component_2_0_0_t mca_timer_darwin_component = { /* First, the mca_component_t struct containing meta information about the component itself */ diff --git a/opal/mca/timer/linux/timer_linux.h b/opal/mca/timer/linux/timer_linux.h index 6bdffcb0f7..af3b84e057 100644 --- a/opal/mca/timer/linux/timer_linux.h +++ b/opal/mca/timer/linux/timer_linux.h @@ -2,7 +2,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -44,7 +44,7 @@ opal_timer_base_get_usec(void) #else return 0; #endif -} +} static inline opal_timer_t @@ -56,7 +56,7 @@ opal_timer_base_get_freq(void) #define OPAL_TIMER_CYCLE_NATIVE OPAL_HAVE_SYS_TIMER_GET_CYCLES #define OPAL_TIMER_CYCLE_SUPPORTED OPAL_HAVE_SYS_TIMER_GET_CYCLES -#define OPAL_TIMER_USEC_NATIVE 0 +#define OPAL_TIMER_USEC_NATIVE 1 #define OPAL_TIMER_USEC_SUPPORTED OPAL_HAVE_SYS_TIMER_GET_CYCLES #endif diff --git a/opal/mca/timer/solaris/timer_solaris.h b/opal/mca/timer/solaris/timer_solaris.h index a48c570448..40973c432b 100644 --- a/opal/mca/timer/solaris/timer_solaris.h +++ b/opal/mca/timer/solaris/timer_solaris.h @@ -2,7 +2,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -35,7 +35,7 @@ opal_timer_base_get_usec(void) { /* gethrtime returns nanoseconds */ return gethrtime() / 1000; -} +} static inline opal_timer_t opal_timer_base_get_freq(void) From 18a916fced1837b3aae0bf3bc9d1cdbcfa2fd035 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Sun, 23 Nov 2014 23:58:39 -0500 Subject: [PATCH 02/10] Cleanup the AMD64 timer function. --- opal/include/opal/sys/amd64/timer.h | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/opal/include/opal/sys/amd64/timer.h b/opal/include/opal/sys/amd64/timer.h index 67a1d32061..71b009886c 100644 --- a/opal/include/opal/sys/amd64/timer.h +++ b/opal/include/opal/sys/amd64/timer.h @@ -25,19 +25,6 @@ typedef uint64_t opal_timer_t; #if OPAL_GCC_INLINE_ASSEMBLY -#if 0 -static inline opal_timer_t -opal_sys_timer_get_cycles(void) -{ - opal_timer_t ret; - - __asm__ __volatile__("rdtsc" : "=A"(ret)); - - return ret; -} - -#else - static inline opal_timer_t opal_sys_timer_get_cycles(void) { @@ -46,8 +33,6 @@ opal_sys_timer_get_cycles(void) return ((opal_timer_t)a) | (((opal_timer_t)d) << 32); } -#endif - #define OPAL_HAVE_SYS_TIMER_GET_CYCLES 1 #else From 5f49a11b2901cfeab38025688e8ff881edb2d0a8 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Mon, 24 Nov 2014 00:44:50 -0500 Subject: [PATCH 03/10] Minor cleanups. --- opal/include/opal/sys/amd64/timer.h | 2 +- opal/mca/timer/darwin/timer_darwin_component.c | 5 ----- opal/mca/timer/linux/timer_linux.h | 2 +- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/opal/include/opal/sys/amd64/timer.h b/opal/include/opal/sys/amd64/timer.h index 71b009886c..a0ea9c8520 100644 --- a/opal/include/opal/sys/amd64/timer.h +++ b/opal/include/opal/sys/amd64/timer.h @@ -2,7 +2,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, diff --git a/opal/mca/timer/darwin/timer_darwin_component.c b/opal/mca/timer/darwin/timer_darwin_component.c index aecb94ab95..15f52215c7 100644 --- a/opal/mca/timer/darwin/timer_darwin_component.c +++ b/opal/mca/timer/darwin/timer_darwin_component.c @@ -93,11 +93,6 @@ int opal_timer_darwin_open(void) nanoseconds, taking the reverse of that and multipling by 1000000000 will give you a frequency in cycles / second if you think of mach_absolute_time() always returning a cycle count. - - By the way, it's interesting to note that because these are - library functions and because of how rosetta works, a PPC - binary running under rosetta on an Intel Mac will behave - exactly like an Intel binary running on an Intel Mac. */ opal_timer_darwin_freq = sTBI.denom * (1000000000 / sTBI.numer); diff --git a/opal/mca/timer/linux/timer_linux.h b/opal/mca/timer/linux/timer_linux.h index af3b84e057..d4ffc8043e 100644 --- a/opal/mca/timer/linux/timer_linux.h +++ b/opal/mca/timer/linux/timer_linux.h @@ -56,7 +56,7 @@ opal_timer_base_get_freq(void) #define OPAL_TIMER_CYCLE_NATIVE OPAL_HAVE_SYS_TIMER_GET_CYCLES #define OPAL_TIMER_CYCLE_SUPPORTED OPAL_HAVE_SYS_TIMER_GET_CYCLES -#define OPAL_TIMER_USEC_NATIVE 1 +#define OPAL_TIMER_USEC_NATIVE OPAL_HAVE_SYS_TIMER_GET_CYCLES #define OPAL_TIMER_USEC_SUPPORTED OPAL_HAVE_SYS_TIMER_GET_CYCLES #endif From d4edd097c0ed0a87513065d328171b137bb0cab6 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Mon, 24 Nov 2014 00:45:14 -0500 Subject: [PATCH 04/10] Allow for native timer (cycle level) integration for MPI_Wtime and MPI_Wtick. --- ompi/mpi/c/wtick.c | 4 +++- ompi/mpi/c/wtime.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/ompi/mpi/c/wtick.c b/ompi/mpi/c/wtick.c index 2d0b01fcd8..8088888e5f 100644 --- a/ompi/mpi/c/wtick.c +++ b/ompi/mpi/c/wtick.c @@ -40,7 +40,9 @@ double MPI_Wtick(void) { OPAL_CR_NOOP_PROGRESS(); -#if OPAL_TIMER_USEC_NATIVE +#if OPAL_TIMER_CYCLE_NATIVE + return opal_timer_base_get_freq(); +#elif OPAL_TIMER_USEC_NATIVE return 0.000001; #else /* Otherwise, we already return usec precision. */ diff --git a/ompi/mpi/c/wtime.c b/ompi/mpi/c/wtime.c index 6a39f51e23..c7309ddb0a 100644 --- a/ompi/mpi/c/wtime.c +++ b/ompi/mpi/c/wtime.c @@ -40,7 +40,9 @@ double MPI_Wtime(void) { double wtime; -#if OPAL_TIMER_USEC_NATIVE +#if OPAL_TIMER_CYCLE_NATIVE + wtime = ((double) opal_timer_base_get_cycles()) / opal_timer_base_get_freq(); +#elif OPAL_TIMER_USEC_NATIVE wtime = ((double) opal_timer_base_get_usec()) / 1000000.0; #else /* Fall back to gettimeofday() if we have nothing else */ From 766cfece362d61643ffcf161ff1d58bfdbc5bde2 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Mon, 24 Nov 2014 00:57:54 -0500 Subject: [PATCH 05/10] Remove useless header. --- opal/mca/timer/darwin/timer_darwin.h | 1 - 1 file changed, 1 deletion(-) diff --git a/opal/mca/timer/darwin/timer_darwin.h b/opal/mca/timer/darwin/timer_darwin.h index 961b3cfe09..92a3b888fd 100644 --- a/opal/mca/timer/darwin/timer_darwin.h +++ b/opal/mca/timer/darwin/timer_darwin.h @@ -21,7 +21,6 @@ #include "opal_config.h" #include -#include typedef uint64_t opal_timer_t; From b5574c1b4f1dd717b1eeca5b2bd984482332b34c Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Mon, 24 Nov 2014 14:15:25 -0500 Subject: [PATCH 06/10] Per Dave suggestion add a serializing instruction bundled together with RDTSC. It is still not perfect, but hopefully much better than before. --- opal/include/opal/sys/amd64/timer.h | 24 +++++++++++++++++++++--- opal/include/opal/sys/ia32/timer.h | 7 +++++-- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/opal/include/opal/sys/amd64/timer.h b/opal/include/opal/sys/amd64/timer.h index a0ea9c8520..39244f352d 100644 --- a/opal/include/opal/sys/amd64/timer.h +++ b/opal/include/opal/sys/amd64/timer.h @@ -25,11 +25,29 @@ typedef uint64_t opal_timer_t; #if OPAL_GCC_INLINE_ASSEMBLY -static inline opal_timer_t +/** + * http://www.intel.com/content/www/us/en/intelligent-systems/embedded-systems-training/ia-32-ia-64-benchmark-code-execution-paper.html + */ +static inline opal_timer_t opal_sys_timer_get_cycles(void) { - unsigned a, d; - __asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d)); + unsigned a, d; +#if 0 + __asm__ __volatile__ ("cpuid\n\t" + "rdtsc\n\t" + : "=a" (a), "=d" (d) + :: "%rax", "%rbx", "%rcx", "%rdx"); +#else + /* If we need higher accuracy we should implement the algorithm proposed + * on the Intel document referenced above. However, in the context of MPI + * this function will be used as the backend for MPI_Wtime and as such + * can afford a small inaccuracy. + */ + __asm__ __volatile__ ("rdtscp\n\t" + "cpuid" + : "=a" (a), "=d" (d) + :: "%rax", "%rbx", "%rcx", "%rdx"); +#endif return ((opal_timer_t)a) | (((opal_timer_t)d) << 32); } diff --git a/opal/include/opal/sys/ia32/timer.h b/opal/include/opal/sys/ia32/timer.h index 53e2375336..06f368e7d4 100644 --- a/opal/include/opal/sys/ia32/timer.h +++ b/opal/include/opal/sys/ia32/timer.h @@ -2,7 +2,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -30,7 +30,10 @@ opal_sys_timer_get_cycles(void) { opal_timer_t ret; - __asm__ __volatile__("rdtsc" : "=A"(ret)); + __asm__ __volatile__("cpuid\n" + "rdtsc\n" + : "=A"(ret) + :: "%eax", "%ebx", "%ecx", "%edx"); return ret; } From 1877dfd0df7f6c2f09f6e0bc8d546ac98ed8dbaf Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Mon, 24 Nov 2014 14:16:36 -0500 Subject: [PATCH 07/10] On Darwin make sure the field we expect to be 0 is indeed 0. --- opal/mca/timer/darwin/timer_darwin_component.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opal/mca/timer/darwin/timer_darwin_component.c b/opal/mca/timer/darwin/timer_darwin_component.c index 15f52215c7..ff2a3eabd7 100644 --- a/opal/mca/timer/darwin/timer_darwin_component.c +++ b/opal/mca/timer/darwin/timer_darwin_component.c @@ -25,7 +25,7 @@ #include "opal/constants.h" opal_timer_t opal_timer_darwin_freq; -mach_timebase_info_data_t opal_timer_darwin_info; +mach_timebase_info_data_t opal_timer_darwin_info = {.denom = 0}; static int opal_timer_darwin_open(void); From b7fa0e312f86d1810324766ac7328fe1155075af Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Mon, 24 Nov 2014 16:35:51 -0500 Subject: [PATCH 08/10] Cleanup the clobber list. --- opal/include/opal/sys/amd64/timer.h | 6 +++--- opal/include/opal/sys/ia32/timer.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/opal/include/opal/sys/amd64/timer.h b/opal/include/opal/sys/amd64/timer.h index 39244f352d..750ad887ab 100644 --- a/opal/include/opal/sys/amd64/timer.h +++ b/opal/include/opal/sys/amd64/timer.h @@ -36,7 +36,7 @@ opal_sys_timer_get_cycles(void) __asm__ __volatile__ ("cpuid\n\t" "rdtsc\n\t" : "=a" (a), "=d" (d) - :: "%rax", "%rbx", "%rcx", "%rdx"); + :: "rbx", "rcx"); #else /* If we need higher accuracy we should implement the algorithm proposed * on the Intel document referenced above. However, in the context of MPI @@ -44,9 +44,9 @@ opal_sys_timer_get_cycles(void) * can afford a small inaccuracy. */ __asm__ __volatile__ ("rdtscp\n\t" - "cpuid" + "cpuid\n\t" : "=a" (a), "=d" (d) - :: "%rax", "%rbx", "%rcx", "%rdx"); + :: "rbx", "rcx"); #endif return ((opal_timer_t)a) | (((opal_timer_t)d) << 32); } diff --git a/opal/include/opal/sys/ia32/timer.h b/opal/include/opal/sys/ia32/timer.h index 06f368e7d4..a040aa81c1 100644 --- a/opal/include/opal/sys/ia32/timer.h +++ b/opal/include/opal/sys/ia32/timer.h @@ -33,7 +33,7 @@ opal_sys_timer_get_cycles(void) __asm__ __volatile__("cpuid\n" "rdtsc\n" : "=A"(ret) - :: "%eax", "%ebx", "%ecx", "%edx"); + :: "ebx", "ecx", "edx"); return ret; } From 261684858f00671b3f55bacb1fb004015e220e5b Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Mon, 24 Nov 2014 17:15:49 -0500 Subject: [PATCH 09/10] Improved support for OSX timers. --- opal/mca/timer/darwin/timer_darwin.h | 24 ++++-- .../mca/timer/darwin/timer_darwin_component.c | 79 +++++++++---------- 2 files changed, 57 insertions(+), 46 deletions(-) diff --git a/opal/mca/timer/darwin/timer_darwin.h b/opal/mca/timer/darwin/timer_darwin.h index 92a3b888fd..4aa6dbb754 100644 --- a/opal/mca/timer/darwin/timer_darwin.h +++ b/opal/mca/timer/darwin/timer_darwin.h @@ -27,17 +27,29 @@ typedef uint64_t opal_timer_t; /* frequency in mhz */ OPAL_DECLSPEC extern opal_timer_t opal_timer_darwin_freq; OPAL_DECLSPEC extern mach_timebase_info_data_t opal_timer_darwin_info; +OPAL_DECLSPEC extern opal_timer_t opal_timer_darwin_bias; +/** + * Use the pragmatic solution proposed at + * http://stackoverflow.com/questions/23378063/how-can-i-use-mach-absolute-time-without-overflowing/23378064#23378064 + */ static inline opal_timer_t opal_timer_base_get_cycles(void) { + uint64_t now = mach_absolute_time(); + if( opal_timer_darwin_info.denom == 0 ) { - (void) mach_timebase_info(&opal_timer_darwin_info); + (void)mach_timebase_info(&opal_timer_darwin_info); + if( opal_timer_darwin_info.denom > 1024 ) { + double frac = (double)opal_timer_darwin_info.numer/opal_timer_darwin_info.denom; + opal_timer_darwin_info.denom = 1024; + opal_timer_darwin_info.numer = opal_timer_darwin_info.denom * frac + 0.5; + } + opal_timer_darwin_bias = now; } - /* this is basically a wrapper around the "right" assembly to get - the tick counter off the PowerPC Time Base. I believe it's - something similar on x86 */ - return mach_absolute_time() * opal_timer_darwin_info.numer / opal_timer_darwin_info.denom / 1000; + /* this is basically a wrapper around the "right" assembly to convert + the tick counter off the PowerPC Time Base into nanos. */ + return (now - opal_timer_darwin_bias) * opal_timer_darwin_info.numer / opal_timer_darwin_info.denom; } @@ -45,7 +57,7 @@ static inline opal_timer_t opal_timer_base_get_usec(void) { /* freq is in Hz, so this gives usec */ - return mach_absolute_time() * 1000000 / opal_timer_darwin_freq; + return opal_timer_base_get_cycles() / 1000; } diff --git a/opal/mca/timer/darwin/timer_darwin_component.c b/opal/mca/timer/darwin/timer_darwin_component.c index ff2a3eabd7..7529636f31 100644 --- a/opal/mca/timer/darwin/timer_darwin_component.c +++ b/opal/mca/timer/darwin/timer_darwin_component.c @@ -26,6 +26,7 @@ opal_timer_t opal_timer_darwin_freq; mach_timebase_info_data_t opal_timer_darwin_info = {.denom = 0}; +opal_timer_t opal_timer_darwin_bias; static int opal_timer_darwin_open(void); @@ -51,50 +52,48 @@ const opal_timer_base_component_2_0_0_t mca_timer_darwin_component = { }, }; +/* mach_timebase_info() returns a fraction that can be multiplied + by the difference between two calls to mach_absolute_time() to + get the number of nanoseconds that passed between the two + calls. + On PPC, mach_timebase_info returns numer = 1000000000 and denom + = 33333335 (or possibly 25000000, depending on the machine). + mach_absolute_time() returns a cycle count from the global + clock, which runs at 25 - 33MHz, so dividing the cycle count by + the frequency gives you seconds between the interval, then + multiplying by 1000000000 gives you nanoseconds. Of course, + you should do the multiply first, then the divide to reduce + arithmetic errors due to integer math. But since we want the + least amount of math in the critical path as possible and + mach_absolute_time is already a cycle counter, we claim we have + native cycle count support and set the frequencey to be the + frequencey of the global clock, which is sTBI.denom * + (1000000000 / sTBI.numer), which is sTBI.denom * (1 / 1), or + sTBI.denom. + + On Intel, mach_timebase_info returns numer = 1 nd denom = 1, + meaning that mach_absolute_time() returns some global clock + time in nanoseconds. Because PPC returns a frequency and + returning a time in microseconds would still require math in + the critical path (a divide, at that), we pretend that the + nanosecond timer is instead a cycle counter for a 1GHz clock + and that we're returning a cycle count natively. so sTBI.denom + * (1000000000 / sTBI.numer) gives us 1 * (1000000000 / 1), or + 1000000000, meaning we have a 1GHz clock. + + More generally, since mach_timebase_info() gives the "keys" to + transition the return from mach_absolute_time() into + nanoseconds, taking the reverse of that and multipling by + 1000000000 will give you a frequency in cycles / second if you + think of mach_absolute_time() always returning a cycle count. +*/ int opal_timer_darwin_open(void) { - mach_timebase_info_data_t sTBI; + /* Call the opal_timer_base_get_cycles once to start the enging */ + (void)opal_timer_base_get_cycles(); - mach_timebase_info(&sTBI); - - /* mach_timebase_info() returns a fraction that can be multiplied - by the difference between two calls to mach_absolute_time() to - get the number of nanoseconds that passed between the two - calls. - - On PPC, mach_timebase_info returns numer = 1000000000 and denom - = 33333335 (or possibly 25000000, depending on the machine). - mach_absolute_time() returns a cycle count from the global - clock, which runs at 25 - 33MHz, so dividing the cycle count by - the frequency gives you seconds between the interval, then - multiplying by 1000000000 gives you nanoseconds. Of course, - you should do the multiply first, then the divide to reduce - arithmetic errors due to integer math. But since we want the - least amount of math in the critical path as possible and - mach_absolute_time is already a cycle counter, we claim we have - native cycle count support and set the frequencey to be the - frequencey of the global clock, which is sTBI.denom * - (1000000000 / sTBI.numer), which is sTBI.denom * (1 / 1), or - sTBI.denom. - - On Intel, mach_timebase_info returns numer = 1 nd denom = 1, - meaning that mach_absolute_time() returns some global clock - time in nanoseconds. Because PPC returns a frequency and - returning a time in microseconds would still require math in - the critical path (a divide, at that), we pretend that the - nanosecond timer is instead a cycle counter for a 1GHz clock - and that we're returning a cycle count natively. so sTBI.denom - * (1000000000 / sTBI.numer) gives us 1 * (1000000000 / 1), or - 1000000000, meaning we have a 1GHz clock. - - More generally, since mach_timebase_info() gives the "keys" to - transition the return from mach_absolute_time() into - nanoseconds, taking the reverse of that and multipling by - 1000000000 will give you a frequency in cycles / second if you - think of mach_absolute_time() always returning a cycle count. - */ - opal_timer_darwin_freq = sTBI.denom * (1000000000 / sTBI.numer); + opal_timer_darwin_freq = opal_timer_darwin_info.denom * (1000000000 / opal_timer_darwin_info.numer); return OPAL_SUCCESS; } From fb44b03f67517c4f4db77989f32c2683ad7e8d97 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Mon, 24 Nov 2014 17:40:53 -0500 Subject: [PATCH 10/10] The use of cpuid destroys the content of %eax and %edx, so they must be saved before. --- opal/include/opal/sys/amd64/timer.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/opal/include/opal/sys/amd64/timer.h b/opal/include/opal/sys/amd64/timer.h index 750ad887ab..a04c367f61 100644 --- a/opal/include/opal/sys/amd64/timer.h +++ b/opal/include/opal/sys/amd64/timer.h @@ -44,9 +44,11 @@ opal_sys_timer_get_cycles(void) * can afford a small inaccuracy. */ __asm__ __volatile__ ("rdtscp\n\t" + "mov %%edx, %0\n\t" + "mov %%eax, %1\n\t" "cpuid\n\t" - : "=a" (a), "=d" (d) - :: "rbx", "rcx"); + : "=r" (a), "=r" (d) + :: "rax", "rbx", "rcx", "rdx"); #endif return ((opal_timer_t)a) | (((opal_timer_t)d) << 32); }