Коммит
8cae899a42
@ -40,7 +40,9 @@ double MPI_Wtick(void)
|
||||
{
|
||||
OPAL_CR_NOOP_PROGRESS();
|
||||
|
||||
#if OPAL_TIMER_USEC_NATIVE
|
||||
#if OPAL_TIMER_CYCLE_NATIVE
|
||||
return opal_timer_base_get_freq();
|
||||
#elif OPAL_TIMER_USEC_NATIVE
|
||||
return 0.000001;
|
||||
#else
|
||||
/* Otherwise, we already return usec precision. */
|
||||
|
@ -40,7 +40,9 @@ double MPI_Wtime(void)
|
||||
{
|
||||
double wtime;
|
||||
|
||||
#if OPAL_TIMER_USEC_NATIVE
|
||||
#if OPAL_TIMER_CYCLE_NATIVE
|
||||
wtime = ((double) opal_timer_base_get_cycles()) / opal_timer_base_get_freq();
|
||||
#elif OPAL_TIMER_USEC_NATIVE
|
||||
wtime = ((double) opal_timer_base_get_usec()) / 1000000.0;
|
||||
#else
|
||||
/* Fall back to gettimeofday() if we have nothing else */
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2014 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -25,29 +25,34 @@ typedef uint64_t opal_timer_t;
|
||||
|
||||
#if OPAL_GCC_INLINE_ASSEMBLY
|
||||
|
||||
#if 0
|
||||
/**
|
||||
* http://www.intel.com/content/www/us/en/intelligent-systems/embedded-systems-training/ia-32-ia-64-benchmark-code-execution-paper.html
|
||||
*/
|
||||
static inline opal_timer_t
|
||||
opal_sys_timer_get_cycles(void)
|
||||
{
|
||||
opal_timer_t ret;
|
||||
|
||||
__asm__ __volatile__("rdtsc" : "=A"(ret));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
unsigned a, d;
|
||||
#if 0
|
||||
__asm__ __volatile__ ("cpuid\n\t"
|
||||
"rdtsc\n\t"
|
||||
: "=a" (a), "=d" (d)
|
||||
:: "rbx", "rcx");
|
||||
#else
|
||||
|
||||
static inline opal_timer_t
|
||||
opal_sys_timer_get_cycles(void)
|
||||
{
|
||||
unsigned a, d;
|
||||
__asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d));
|
||||
/* If we need higher accuracy we should implement the algorithm proposed
|
||||
* on the Intel document referenced above. However, in the context of MPI
|
||||
* this function will be used as the backend for MPI_Wtime and as such
|
||||
* can afford a small inaccuracy.
|
||||
*/
|
||||
__asm__ __volatile__ ("rdtscp\n\t"
|
||||
"mov %%edx, %0\n\t"
|
||||
"mov %%eax, %1\n\t"
|
||||
"cpuid\n\t"
|
||||
: "=r" (a), "=r" (d)
|
||||
:: "rax", "rbx", "rcx", "rdx");
|
||||
#endif
|
||||
return ((opal_timer_t)a) | (((opal_timer_t)d) << 32);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#define OPAL_HAVE_SYS_TIMER_GET_CYCLES 1
|
||||
|
||||
#else
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2014 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -30,7 +30,10 @@ opal_sys_timer_get_cycles(void)
|
||||
{
|
||||
opal_timer_t ret;
|
||||
|
||||
__asm__ __volatile__("rdtsc" : "=A"(ret));
|
||||
__asm__ __volatile__("cpuid\n"
|
||||
"rdtsc\n"
|
||||
: "=A"(ret)
|
||||
:: "ebx", "ecx", "edx");
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2014 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -39,7 +39,7 @@ opal_timer_base_get_usec()
|
||||
retval = (t.tb_high * 1000000) + t.tb_low / 1000;
|
||||
|
||||
return retval;
|
||||
}
|
||||
}
|
||||
|
||||
static inline opal_timer_t
|
||||
opal_timer_base_get_cycles()
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2014 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -36,7 +36,7 @@ static inline opal_timer_t
|
||||
opal_timer_base_get_usec(void)
|
||||
{
|
||||
return opal_timer_base_get_cycles() / opal_timer_altix_usec_conv;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static inline opal_timer_t
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2014 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -26,15 +26,30 @@ typedef uint64_t opal_timer_t;
|
||||
|
||||
/* frequency in mhz */
|
||||
OPAL_DECLSPEC extern opal_timer_t opal_timer_darwin_freq;
|
||||
OPAL_DECLSPEC extern mach_timebase_info_data_t opal_timer_darwin_info;
|
||||
OPAL_DECLSPEC extern opal_timer_t opal_timer_darwin_bias;
|
||||
|
||||
|
||||
/**
|
||||
* Use the pragmatic solution proposed at
|
||||
* http://stackoverflow.com/questions/23378063/how-can-i-use-mach-absolute-time-without-overflowing/23378064#23378064
|
||||
*/
|
||||
static inline opal_timer_t
|
||||
opal_timer_base_get_cycles(void)
|
||||
{
|
||||
/* this is basically a wrapper around the "right" assembly to get
|
||||
the tick counter off the PowerPC Time Base. I believe it's
|
||||
something similar on x86 */
|
||||
return mach_absolute_time();
|
||||
uint64_t now = mach_absolute_time();
|
||||
|
||||
if( opal_timer_darwin_info.denom == 0 ) {
|
||||
(void)mach_timebase_info(&opal_timer_darwin_info);
|
||||
if( opal_timer_darwin_info.denom > 1024 ) {
|
||||
double frac = (double)opal_timer_darwin_info.numer/opal_timer_darwin_info.denom;
|
||||
opal_timer_darwin_info.denom = 1024;
|
||||
opal_timer_darwin_info.numer = opal_timer_darwin_info.denom * frac + 0.5;
|
||||
}
|
||||
opal_timer_darwin_bias = now;
|
||||
}
|
||||
/* this is basically a wrapper around the "right" assembly to convert
|
||||
the tick counter off the PowerPC Time Base into nanos. */
|
||||
return (now - opal_timer_darwin_bias) * opal_timer_darwin_info.numer / opal_timer_darwin_info.denom;
|
||||
}
|
||||
|
||||
|
||||
@ -42,8 +57,8 @@ static inline opal_timer_t
|
||||
opal_timer_base_get_usec(void)
|
||||
{
|
||||
/* freq is in Hz, so this gives usec */
|
||||
return mach_absolute_time() * 1000000 / opal_timer_darwin_freq;
|
||||
}
|
||||
return opal_timer_base_get_cycles() / 1000;
|
||||
}
|
||||
|
||||
|
||||
static inline opal_timer_t
|
||||
@ -53,9 +68,9 @@ opal_timer_base_get_freq(void)
|
||||
}
|
||||
|
||||
|
||||
#define OPAL_TIMER_CYCLE_NATIVE 1
|
||||
#define OPAL_TIMER_CYCLE_NATIVE 0
|
||||
#define OPAL_TIMER_CYCLE_SUPPORTED 1
|
||||
#define OPAL_TIMER_USEC_NATIVE 0
|
||||
#define OPAL_TIMER_USEC_NATIVE 1
|
||||
#define OPAL_TIMER_USEC_SUPPORTED 1
|
||||
|
||||
#endif
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2014 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -20,17 +20,16 @@
|
||||
|
||||
#include "opal_config.h"
|
||||
|
||||
#include <mach/mach_time.h>
|
||||
|
||||
#include "opal/mca/timer/timer.h"
|
||||
#include "opal/mca/timer/darwin/timer_darwin.h"
|
||||
#include "opal/constants.h"
|
||||
|
||||
opal_timer_t opal_timer_darwin_freq;
|
||||
mach_timebase_info_data_t opal_timer_darwin_info = {.denom = 0};
|
||||
opal_timer_t opal_timer_darwin_bias;
|
||||
|
||||
static int opal_timer_darwin_open(void);
|
||||
|
||||
|
||||
const opal_timer_base_component_2_0_0_t mca_timer_darwin_component = {
|
||||
/* First, the mca_component_t struct containing meta information
|
||||
about the component itself */
|
||||
@ -53,55 +52,48 @@ const opal_timer_base_component_2_0_0_t mca_timer_darwin_component = {
|
||||
},
|
||||
};
|
||||
|
||||
/* mach_timebase_info() returns a fraction that can be multiplied
|
||||
by the difference between two calls to mach_absolute_time() to
|
||||
get the number of nanoseconds that passed between the two
|
||||
calls.
|
||||
|
||||
On PPC, mach_timebase_info returns numer = 1000000000 and denom
|
||||
= 33333335 (or possibly 25000000, depending on the machine).
|
||||
mach_absolute_time() returns a cycle count from the global
|
||||
clock, which runs at 25 - 33MHz, so dividing the cycle count by
|
||||
the frequency gives you seconds between the interval, then
|
||||
multiplying by 1000000000 gives you nanoseconds. Of course,
|
||||
you should do the multiply first, then the divide to reduce
|
||||
arithmetic errors due to integer math. But since we want the
|
||||
least amount of math in the critical path as possible and
|
||||
mach_absolute_time is already a cycle counter, we claim we have
|
||||
native cycle count support and set the frequencey to be the
|
||||
frequencey of the global clock, which is sTBI.denom *
|
||||
(1000000000 / sTBI.numer), which is sTBI.denom * (1 / 1), or
|
||||
sTBI.denom.
|
||||
|
||||
On Intel, mach_timebase_info returns numer = 1 nd denom = 1,
|
||||
meaning that mach_absolute_time() returns some global clock
|
||||
time in nanoseconds. Because PPC returns a frequency and
|
||||
returning a time in microseconds would still require math in
|
||||
the critical path (a divide, at that), we pretend that the
|
||||
nanosecond timer is instead a cycle counter for a 1GHz clock
|
||||
and that we're returning a cycle count natively. so sTBI.denom
|
||||
* (1000000000 / sTBI.numer) gives us 1 * (1000000000 / 1), or
|
||||
1000000000, meaning we have a 1GHz clock.
|
||||
|
||||
More generally, since mach_timebase_info() gives the "keys" to
|
||||
transition the return from mach_absolute_time() into
|
||||
nanoseconds, taking the reverse of that and multipling by
|
||||
1000000000 will give you a frequency in cycles / second if you
|
||||
think of mach_absolute_time() always returning a cycle count.
|
||||
*/
|
||||
int opal_timer_darwin_open(void)
|
||||
{
|
||||
mach_timebase_info_data_t sTBI;
|
||||
/* Call the opal_timer_base_get_cycles once to start the enging */
|
||||
(void)opal_timer_base_get_cycles();
|
||||
|
||||
mach_timebase_info(&sTBI);
|
||||
|
||||
/* mach_timebase_info() returns a fraction that can be multiplied
|
||||
by the difference between two calls to mach_absolute_time() to
|
||||
get the number of nanoseconds that passed between the two
|
||||
calls.
|
||||
|
||||
On PPC, mach_timebase_info returns numer = 1000000000 and denom
|
||||
= 33333335 (or possibly 25000000, depending on the machine).
|
||||
mach_absolute_time() returns a cycle count from the global
|
||||
clock, which runs at 25 - 33MHz, so dividing the cycle count by
|
||||
the frequency gives you seconds between the interval, then
|
||||
multiplying by 1000000000 gives you nanoseconds. Of course,
|
||||
you should do the multiply first, then the divide to reduce
|
||||
arithmetic errors due to integer math. But since we want the
|
||||
least amount of math in the critical path as possible and
|
||||
mach_absolute_time is already a cycle counter, we claim we have
|
||||
native cycle count support and set the frequencey to be the
|
||||
frequencey of the global clock, which is sTBI.denom *
|
||||
(1000000000 / sTBI.numer), which is sTBI.denom * (1 / 1), or
|
||||
sTBI.denom.
|
||||
|
||||
On Intel, mach_timebase_info returns numer = 1 nd denom = 1,
|
||||
meaning that mach_absolute_time() returns some global clock
|
||||
time in nanoseconds. Because PPC returns a frequency and
|
||||
returning a time in microseconds would still require math in
|
||||
the critical path (a divide, at that), we pretend that the
|
||||
nanosecond timer is instead a cycle counter for a 1GHz clock
|
||||
and that we're returning a cycle count natively. so sTBI.denom
|
||||
* (1000000000 / sTBI.numer) gives us 1 * (1000000000 / 1), or
|
||||
1000000000, meaning we have a 1GHz clock.
|
||||
|
||||
More generally, since mach_timebase_info() gives the "keys" to
|
||||
transition the return from mach_absolute_time() into
|
||||
nanoseconds, taking the reverse of that and multipling by
|
||||
1000000000 will give you a frequency in cycles / second if you
|
||||
think of mach_absolute_time() always returning a cycle count.
|
||||
|
||||
By the way, it's interesting to note that because these are
|
||||
library functions and because of how rosetta works, a PPC
|
||||
binary running under rosetta on an Intel Mac will behave
|
||||
exactly like an Intel binary running on an Intel Mac.
|
||||
*/
|
||||
opal_timer_darwin_freq = sTBI.denom * (1000000000 / sTBI.numer);
|
||||
opal_timer_darwin_freq = opal_timer_darwin_info.denom * (1000000000 / opal_timer_darwin_info.numer);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2014 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -44,7 +44,7 @@ opal_timer_base_get_usec(void)
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static inline opal_timer_t
|
||||
@ -56,7 +56,7 @@ opal_timer_base_get_freq(void)
|
||||
|
||||
#define OPAL_TIMER_CYCLE_NATIVE OPAL_HAVE_SYS_TIMER_GET_CYCLES
|
||||
#define OPAL_TIMER_CYCLE_SUPPORTED OPAL_HAVE_SYS_TIMER_GET_CYCLES
|
||||
#define OPAL_TIMER_USEC_NATIVE 0
|
||||
#define OPAL_TIMER_USEC_NATIVE OPAL_HAVE_SYS_TIMER_GET_CYCLES
|
||||
#define OPAL_TIMER_USEC_SUPPORTED OPAL_HAVE_SYS_TIMER_GET_CYCLES
|
||||
|
||||
#endif
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2014 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -35,7 +35,7 @@ opal_timer_base_get_usec(void)
|
||||
{
|
||||
/* gethrtime returns nanoseconds */
|
||||
return gethrtime() / 1000;
|
||||
}
|
||||
}
|
||||
|
||||
static inline opal_timer_t
|
||||
opal_timer_base_get_freq(void)
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user