1
1

Merge pull request #285 from bosilca/master

Reenable high accuracy timers
Этот коммит содержится в:
bosilca 2014-11-25 17:09:34 -05:00
родитель f48b9012cb 43901fa15a
Коммит 8cae899a42
10 изменённых файлов: 108 добавлений и 89 удалений

Просмотреть файл

@ -40,7 +40,9 @@ double MPI_Wtick(void)
{
OPAL_CR_NOOP_PROGRESS();
#if OPAL_TIMER_USEC_NATIVE
#if OPAL_TIMER_CYCLE_NATIVE
return opal_timer_base_get_freq();
#elif OPAL_TIMER_USEC_NATIVE
return 0.000001;
#else
/* Otherwise, we already return usec precision. */

Просмотреть файл

@ -40,7 +40,9 @@ double MPI_Wtime(void)
{
double wtime;
#if OPAL_TIMER_USEC_NATIVE
#if OPAL_TIMER_CYCLE_NATIVE
wtime = ((double) opal_timer_base_get_cycles()) / opal_timer_base_get_freq();
#elif OPAL_TIMER_USEC_NATIVE
wtime = ((double) opal_timer_base_get_usec()) / 1000000.0;
#else
/* Fall back to gettimeofday() if we have nothing else */

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* Copyright (c) 2004-2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -25,29 +25,34 @@ typedef uint64_t opal_timer_t;
#if OPAL_GCC_INLINE_ASSEMBLY
#if 0
static inline opal_timer_t
opal_sys_timer_get_cycles(void)
{
opal_timer_t ret;
__asm__ __volatile__("rdtsc" : "=A"(ret));
return ret;
}
#else
/**
* http://www.intel.com/content/www/us/en/intelligent-systems/embedded-systems-training/ia-32-ia-64-benchmark-code-execution-paper.html
*/
static inline opal_timer_t
opal_sys_timer_get_cycles(void)
{
unsigned a, d;
__asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d));
#if 0
__asm__ __volatile__ ("cpuid\n\t"
"rdtsc\n\t"
: "=a" (a), "=d" (d)
:: "rbx", "rcx");
#else
/* If we need higher accuracy we should implement the algorithm proposed
* on the Intel document referenced above. However, in the context of MPI
* this function will be used as the backend for MPI_Wtime and as such
* can afford a small inaccuracy.
*/
__asm__ __volatile__ ("rdtscp\n\t"
"mov %%edx, %0\n\t"
"mov %%eax, %1\n\t"
"cpuid\n\t"
: "=r" (a), "=r" (d)
:: "rax", "rbx", "rcx", "rdx");
#endif
return ((opal_timer_t)a) | (((opal_timer_t)d) << 32);
}
#endif
#define OPAL_HAVE_SYS_TIMER_GET_CYCLES 1
#else

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* Copyright (c) 2004-2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -30,7 +30,10 @@ opal_sys_timer_get_cycles(void)
{
opal_timer_t ret;
__asm__ __volatile__("rdtsc" : "=A"(ret));
__asm__ __volatile__("cpuid\n"
"rdtsc\n"
: "=A"(ret)
:: "ebx", "ecx", "edx");
return ret;
}

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* Copyright (c) 2004-2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* Copyright (c) 2004-2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* Copyright (c) 2004-2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -26,15 +26,30 @@ typedef uint64_t opal_timer_t;
/* frequency in mhz */
OPAL_DECLSPEC extern opal_timer_t opal_timer_darwin_freq;
OPAL_DECLSPEC extern mach_timebase_info_data_t opal_timer_darwin_info;
OPAL_DECLSPEC extern opal_timer_t opal_timer_darwin_bias;
/**
* Use the pragmatic solution proposed at
* http://stackoverflow.com/questions/23378063/how-can-i-use-mach-absolute-time-without-overflowing/23378064#23378064
*/
static inline opal_timer_t
opal_timer_base_get_cycles(void)
{
/* this is basically a wrapper around the "right" assembly to get
the tick counter off the PowerPC Time Base. I believe it's
something similar on x86 */
return mach_absolute_time();
uint64_t now = mach_absolute_time();
if( opal_timer_darwin_info.denom == 0 ) {
(void)mach_timebase_info(&opal_timer_darwin_info);
if( opal_timer_darwin_info.denom > 1024 ) {
double frac = (double)opal_timer_darwin_info.numer/opal_timer_darwin_info.denom;
opal_timer_darwin_info.denom = 1024;
opal_timer_darwin_info.numer = opal_timer_darwin_info.denom * frac + 0.5;
}
opal_timer_darwin_bias = now;
}
/* this is basically a wrapper around the "right" assembly to convert
the tick counter off the PowerPC Time Base into nanos. */
return (now - opal_timer_darwin_bias) * opal_timer_darwin_info.numer / opal_timer_darwin_info.denom;
}
@ -42,7 +57,7 @@ static inline opal_timer_t
opal_timer_base_get_usec(void)
{
/* freq is in Hz, so this gives usec */
return mach_absolute_time() * 1000000 / opal_timer_darwin_freq;
return opal_timer_base_get_cycles() / 1000;
}
@ -53,9 +68,9 @@ opal_timer_base_get_freq(void)
}
#define OPAL_TIMER_CYCLE_NATIVE 1
#define OPAL_TIMER_CYCLE_NATIVE 0
#define OPAL_TIMER_CYCLE_SUPPORTED 1
#define OPAL_TIMER_USEC_NATIVE 0
#define OPAL_TIMER_USEC_NATIVE 1
#define OPAL_TIMER_USEC_SUPPORTED 1
#endif

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* Copyright (c) 2004-2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -20,17 +20,16 @@
#include "opal_config.h"
#include <mach/mach_time.h>
#include "opal/mca/timer/timer.h"
#include "opal/mca/timer/darwin/timer_darwin.h"
#include "opal/constants.h"
opal_timer_t opal_timer_darwin_freq;
mach_timebase_info_data_t opal_timer_darwin_info = {.denom = 0};
opal_timer_t opal_timer_darwin_bias;
static int opal_timer_darwin_open(void);
const opal_timer_base_component_2_0_0_t mca_timer_darwin_component = {
/* First, the mca_component_t struct containing meta information
about the component itself */
@ -53,14 +52,7 @@ const opal_timer_base_component_2_0_0_t mca_timer_darwin_component = {
},
};
int opal_timer_darwin_open(void)
{
mach_timebase_info_data_t sTBI;
mach_timebase_info(&sTBI);
/* mach_timebase_info() returns a fraction that can be multiplied
/* mach_timebase_info() returns a fraction that can be multiplied
by the difference between two calls to mach_absolute_time() to
get the number of nanoseconds that passed between the two
calls.
@ -95,13 +87,13 @@ int opal_timer_darwin_open(void)
nanoseconds, taking the reverse of that and multipling by
1000000000 will give you a frequency in cycles / second if you
think of mach_absolute_time() always returning a cycle count.
*/
int opal_timer_darwin_open(void)
{
/* Call the opal_timer_base_get_cycles once to start the enging */
(void)opal_timer_base_get_cycles();
By the way, it's interesting to note that because these are
library functions and because of how rosetta works, a PPC
binary running under rosetta on an Intel Mac will behave
exactly like an Intel binary running on an Intel Mac.
*/
opal_timer_darwin_freq = sTBI.denom * (1000000000 / sTBI.numer);
opal_timer_darwin_freq = opal_timer_darwin_info.denom * (1000000000 / opal_timer_darwin_info.numer);
return OPAL_SUCCESS;
}

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* Copyright (c) 2004-2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -56,7 +56,7 @@ opal_timer_base_get_freq(void)
#define OPAL_TIMER_CYCLE_NATIVE OPAL_HAVE_SYS_TIMER_GET_CYCLES
#define OPAL_TIMER_CYCLE_SUPPORTED OPAL_HAVE_SYS_TIMER_GET_CYCLES
#define OPAL_TIMER_USEC_NATIVE 0
#define OPAL_TIMER_USEC_NATIVE OPAL_HAVE_SYS_TIMER_GET_CYCLES
#define OPAL_TIMER_USEC_SUPPORTED OPAL_HAVE_SYS_TIMER_GET_CYCLES
#endif

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* Copyright (c) 2004-2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,