Improved support for OSX timers.
Этот коммит содержится в:
родитель
b7fa0e312f
Коммит
261684858f
@ -27,17 +27,29 @@ typedef uint64_t opal_timer_t;
|
|||||||
/* frequency in mhz */
|
/* frequency in mhz */
|
||||||
OPAL_DECLSPEC extern opal_timer_t opal_timer_darwin_freq;
|
OPAL_DECLSPEC extern opal_timer_t opal_timer_darwin_freq;
|
||||||
OPAL_DECLSPEC extern mach_timebase_info_data_t opal_timer_darwin_info;
|
OPAL_DECLSPEC extern mach_timebase_info_data_t opal_timer_darwin_info;
|
||||||
|
OPAL_DECLSPEC extern opal_timer_t opal_timer_darwin_bias;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use the pragmatic solution proposed at
|
||||||
|
* http://stackoverflow.com/questions/23378063/how-can-i-use-mach-absolute-time-without-overflowing/23378064#23378064
|
||||||
|
*/
|
||||||
static inline opal_timer_t
|
static inline opal_timer_t
|
||||||
opal_timer_base_get_cycles(void)
|
opal_timer_base_get_cycles(void)
|
||||||
{
|
{
|
||||||
|
uint64_t now = mach_absolute_time();
|
||||||
|
|
||||||
if( opal_timer_darwin_info.denom == 0 ) {
|
if( opal_timer_darwin_info.denom == 0 ) {
|
||||||
(void) mach_timebase_info(&opal_timer_darwin_info);
|
(void)mach_timebase_info(&opal_timer_darwin_info);
|
||||||
|
if( opal_timer_darwin_info.denom > 1024 ) {
|
||||||
|
double frac = (double)opal_timer_darwin_info.numer/opal_timer_darwin_info.denom;
|
||||||
|
opal_timer_darwin_info.denom = 1024;
|
||||||
|
opal_timer_darwin_info.numer = opal_timer_darwin_info.denom * frac + 0.5;
|
||||||
|
}
|
||||||
|
opal_timer_darwin_bias = now;
|
||||||
}
|
}
|
||||||
/* this is basically a wrapper around the "right" assembly to get
|
/* this is basically a wrapper around the "right" assembly to convert
|
||||||
the tick counter off the PowerPC Time Base. I believe it's
|
the tick counter off the PowerPC Time Base into nanos. */
|
||||||
something similar on x86 */
|
return (now - opal_timer_darwin_bias) * opal_timer_darwin_info.numer / opal_timer_darwin_info.denom;
|
||||||
return mach_absolute_time() * opal_timer_darwin_info.numer / opal_timer_darwin_info.denom / 1000;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -45,7 +57,7 @@ static inline opal_timer_t
|
|||||||
opal_timer_base_get_usec(void)
|
opal_timer_base_get_usec(void)
|
||||||
{
|
{
|
||||||
/* freq is in Hz, so this gives usec */
|
/* freq is in Hz, so this gives usec */
|
||||||
return mach_absolute_time() * 1000000 / opal_timer_darwin_freq;
|
return opal_timer_base_get_cycles() / 1000;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -26,6 +26,7 @@
|
|||||||
|
|
||||||
opal_timer_t opal_timer_darwin_freq;
|
opal_timer_t opal_timer_darwin_freq;
|
||||||
mach_timebase_info_data_t opal_timer_darwin_info = {.denom = 0};
|
mach_timebase_info_data_t opal_timer_darwin_info = {.denom = 0};
|
||||||
|
opal_timer_t opal_timer_darwin_bias;
|
||||||
|
|
||||||
static int opal_timer_darwin_open(void);
|
static int opal_timer_darwin_open(void);
|
||||||
|
|
||||||
@ -51,50 +52,48 @@ const opal_timer_base_component_2_0_0_t mca_timer_darwin_component = {
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* mach_timebase_info() returns a fraction that can be multiplied
|
||||||
|
by the difference between two calls to mach_absolute_time() to
|
||||||
|
get the number of nanoseconds that passed between the two
|
||||||
|
calls.
|
||||||
|
|
||||||
|
On PPC, mach_timebase_info returns numer = 1000000000 and denom
|
||||||
|
= 33333335 (or possibly 25000000, depending on the machine).
|
||||||
|
mach_absolute_time() returns a cycle count from the global
|
||||||
|
clock, which runs at 25 - 33MHz, so dividing the cycle count by
|
||||||
|
the frequency gives you seconds between the interval, then
|
||||||
|
multiplying by 1000000000 gives you nanoseconds. Of course,
|
||||||
|
you should do the multiply first, then the divide to reduce
|
||||||
|
arithmetic errors due to integer math. But since we want the
|
||||||
|
least amount of math in the critical path as possible and
|
||||||
|
mach_absolute_time is already a cycle counter, we claim we have
|
||||||
|
native cycle count support and set the frequencey to be the
|
||||||
|
frequencey of the global clock, which is sTBI.denom *
|
||||||
|
(1000000000 / sTBI.numer), which is sTBI.denom * (1 / 1), or
|
||||||
|
sTBI.denom.
|
||||||
|
|
||||||
|
On Intel, mach_timebase_info returns numer = 1 nd denom = 1,
|
||||||
|
meaning that mach_absolute_time() returns some global clock
|
||||||
|
time in nanoseconds. Because PPC returns a frequency and
|
||||||
|
returning a time in microseconds would still require math in
|
||||||
|
the critical path (a divide, at that), we pretend that the
|
||||||
|
nanosecond timer is instead a cycle counter for a 1GHz clock
|
||||||
|
and that we're returning a cycle count natively. so sTBI.denom
|
||||||
|
* (1000000000 / sTBI.numer) gives us 1 * (1000000000 / 1), or
|
||||||
|
1000000000, meaning we have a 1GHz clock.
|
||||||
|
|
||||||
|
More generally, since mach_timebase_info() gives the "keys" to
|
||||||
|
transition the return from mach_absolute_time() into
|
||||||
|
nanoseconds, taking the reverse of that and multipling by
|
||||||
|
1000000000 will give you a frequency in cycles / second if you
|
||||||
|
think of mach_absolute_time() always returning a cycle count.
|
||||||
|
*/
|
||||||
int opal_timer_darwin_open(void)
|
int opal_timer_darwin_open(void)
|
||||||
{
|
{
|
||||||
mach_timebase_info_data_t sTBI;
|
/* Call the opal_timer_base_get_cycles once to start the enging */
|
||||||
|
(void)opal_timer_base_get_cycles();
|
||||||
|
|
||||||
mach_timebase_info(&sTBI);
|
opal_timer_darwin_freq = opal_timer_darwin_info.denom * (1000000000 / opal_timer_darwin_info.numer);
|
||||||
|
|
||||||
/* mach_timebase_info() returns a fraction that can be multiplied
|
|
||||||
by the difference between two calls to mach_absolute_time() to
|
|
||||||
get the number of nanoseconds that passed between the two
|
|
||||||
calls.
|
|
||||||
|
|
||||||
On PPC, mach_timebase_info returns numer = 1000000000 and denom
|
|
||||||
= 33333335 (or possibly 25000000, depending on the machine).
|
|
||||||
mach_absolute_time() returns a cycle count from the global
|
|
||||||
clock, which runs at 25 - 33MHz, so dividing the cycle count by
|
|
||||||
the frequency gives you seconds between the interval, then
|
|
||||||
multiplying by 1000000000 gives you nanoseconds. Of course,
|
|
||||||
you should do the multiply first, then the divide to reduce
|
|
||||||
arithmetic errors due to integer math. But since we want the
|
|
||||||
least amount of math in the critical path as possible and
|
|
||||||
mach_absolute_time is already a cycle counter, we claim we have
|
|
||||||
native cycle count support and set the frequencey to be the
|
|
||||||
frequencey of the global clock, which is sTBI.denom *
|
|
||||||
(1000000000 / sTBI.numer), which is sTBI.denom * (1 / 1), or
|
|
||||||
sTBI.denom.
|
|
||||||
|
|
||||||
On Intel, mach_timebase_info returns numer = 1 nd denom = 1,
|
|
||||||
meaning that mach_absolute_time() returns some global clock
|
|
||||||
time in nanoseconds. Because PPC returns a frequency and
|
|
||||||
returning a time in microseconds would still require math in
|
|
||||||
the critical path (a divide, at that), we pretend that the
|
|
||||||
nanosecond timer is instead a cycle counter for a 1GHz clock
|
|
||||||
and that we're returning a cycle count natively. so sTBI.denom
|
|
||||||
* (1000000000 / sTBI.numer) gives us 1 * (1000000000 / 1), or
|
|
||||||
1000000000, meaning we have a 1GHz clock.
|
|
||||||
|
|
||||||
More generally, since mach_timebase_info() gives the "keys" to
|
|
||||||
transition the return from mach_absolute_time() into
|
|
||||||
nanoseconds, taking the reverse of that and multipling by
|
|
||||||
1000000000 will give you a frequency in cycles / second if you
|
|
||||||
think of mach_absolute_time() always returning a cycle count.
|
|
||||||
*/
|
|
||||||
opal_timer_darwin_freq = sTBI.denom * (1000000000 / sTBI.numer);
|
|
||||||
|
|
||||||
return OPAL_SUCCESS;
|
return OPAL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user