blob: 8632369ad2896e724e22707d2d6950085f17e18a [file] [log] [blame]
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Windows Timer Primer
//
// A good article: http://www.ddj.com/windows/184416651
// A good mozilla bug: http://bugzilla.mozilla.org/show_bug.cgi?id=363258
//
// The default windows timer, GetSystemTimeAsFileTime is not very precise.
// It is only good to ~15.5ms.
//
// QueryPerformanceCounter is the logical choice for a high-precision timer.
// However, it is known to be buggy on some hardware. Specifically, it can
// sometimes "jump". On laptops, QPC can also be very expensive to call.
// It's 3-4x slower than timeGetTime() on desktops, but can be 10x slower
// on laptops. A unittest exists which will show the relative cost of various
// timers on any system.
//
// The next logical choice is timeGetTime(). timeGetTime has a precision of
// 1ms, but only if you call APIs (timeBeginPeriod()) which affect all other
// applications on the system. By default, precision is only 15.5ms.
// Unfortunately, we don't want to call timeBeginPeriod because we don't
// want to affect other applications. Further, on mobile platforms, use of
// faster multimedia timers can hurt battery life. See the intel
// article about this here:
// http://softwarecommunity.intel.com/articles/eng/1086.htm
//
// To work around all this, we're going to generally use timeGetTime(). We
// will only increase the system-wide timer if we're not running on battery
// power.
#include "base/time/time.h"
#include <windows.h>
#include <mmsystem.h>
#include <stdint.h>
#include <mutex>
#include "base/bit_cast.h"
#include "base/logging.h"
#include "base/threading/platform_thread.h"
#include "base/time/time_override.h"
namespace base {
namespace {
// From MSDN, FILETIME "Contains a 64-bit value representing the number of
// 100-nanosecond intervals since January 1, 1601 (UTC)."
int64_t FileTimeToMicroseconds(const FILETIME& ft) {
// Need to bit_cast to fix alignment, then divide by 10 to convert
// 100-nanoseconds to microseconds. This only works on little-endian
// machines.
return bit_cast<int64_t, FILETIME>(ft) / 10;
}
void MicrosecondsToFileTime(int64_t us, FILETIME* ft) {
DCHECK_GE(us, 0LL) << "Time is less than 0, negative values are not "
"representable in FILETIME";
// Multiply by 10 to convert microseconds to 100-nanoseconds. Bit_cast will
// handle alignment problems. This only works on little-endian machines.
*ft = bit_cast<FILETIME, int64_t>(us * 10);
}
int64_t CurrentWallclockMicroseconds() {
FILETIME ft;
::GetSystemTimeAsFileTime(&ft);
return FileTimeToMicroseconds(ft);
}
// Time between resampling the un-granular clock for this API.
constexpr TimeDelta kMaxTimeToAvoidDrift = TimeDelta::FromSeconds(60);
int64_t g_initial_time = 0;
TimeTicks g_initial_ticks;
void InitializeClock() {
g_initial_ticks = subtle::TimeTicksNowIgnoringOverride();
g_initial_time = CurrentWallclockMicroseconds();
}
// The two values that ActivateHighResolutionTimer uses to set the systemwide
// timer interrupt frequency on Windows. It controls how precise timers are
// but also has a big impact on battery life.
const int kMinTimerIntervalHighResMs = 1;
const int kMinTimerIntervalLowResMs = 4;
// Track if kMinTimerIntervalHighResMs or kMinTimerIntervalLowResMs is active.
bool g_high_res_timer_enabled = false;
// How many times the high resolution timer has been called.
uint32_t g_high_res_timer_count = 0;
// Start time of the high resolution timer usage monitoring. This is needed
// to calculate the usage as percentage of the total elapsed time.
TimeTicks g_high_res_timer_usage_start;
// The cumulative time the high resolution timer has been in use since
// |g_high_res_timer_usage_start| moment.
TimeDelta g_high_res_timer_usage;
// Timestamp of the last activation change of the high resolution timer. This
// is used to calculate the cumulative usage.
TimeTicks g_high_res_timer_last_activation;
// The lock to control access to the above two variables.
std::mutex* GetHighResLock() {
static auto* lock = new std::mutex();
return lock;
}
// Returns the current value of the performance counter.
uint64_t QPCNowRaw() {
LARGE_INTEGER perf_counter_now = {};
// According to the MSDN documentation for QueryPerformanceCounter(), this
// will never fail on systems that run XP or later.
// https://msdn.microsoft.com/library/windows/desktop/ms644904.aspx
::QueryPerformanceCounter(&perf_counter_now);
return perf_counter_now.QuadPart;
}
bool SafeConvertToWord(int in, WORD* out) {
CheckedNumeric<WORD> result = in;
*out = result.ValueOrDefault(std::numeric_limits<WORD>::max());
return result.IsValid();
}
} // namespace
// Time -----------------------------------------------------------------------
namespace subtle {
Time TimeNowIgnoringOverride() {
if (g_initial_time == 0)
InitializeClock();
// We implement time using the high-resolution timers so that we can get
// timeouts which are smaller than 10-15ms. If we just used
// CurrentWallclockMicroseconds(), we'd have the less-granular timer.
//
// To make this work, we initialize the clock (g_initial_time) and the
// counter (initial_ctr). To compute the initial time, we can check
// the number of ticks that have elapsed, and compute the delta.
//
// To avoid any drift, we periodically resync the counters to the system
// clock.
while (true) {
TimeTicks ticks = TimeTicksNowIgnoringOverride();
// Calculate the time elapsed since we started our timer
TimeDelta elapsed = ticks - g_initial_ticks;
// Check if enough time has elapsed that we need to resync the clock.
if (elapsed > kMaxTimeToAvoidDrift) {
InitializeClock();
continue;
}
return Time() + elapsed + TimeDelta::FromMicroseconds(g_initial_time);
}
}
Time TimeNowFromSystemTimeIgnoringOverride() {
// Force resync.
InitializeClock();
return Time() + TimeDelta::FromMicroseconds(g_initial_time);
}
} // namespace subtle
// static
Time Time::FromFileTime(FILETIME ft) {
if (bit_cast<int64_t, FILETIME>(ft) == 0)
return Time();
if (ft.dwHighDateTime == std::numeric_limits<DWORD>::max() &&
ft.dwLowDateTime == std::numeric_limits<DWORD>::max())
return Max();
return Time(FileTimeToMicroseconds(ft));
}
FILETIME Time::ToFileTime() const {
if (is_null())
return bit_cast<FILETIME, int64_t>(0);
if (is_max()) {
FILETIME result;
result.dwHighDateTime = std::numeric_limits<DWORD>::max();
result.dwLowDateTime = std::numeric_limits<DWORD>::max();
return result;
}
FILETIME utc_ft;
MicrosecondsToFileTime(us_, &utc_ft);
return utc_ft;
}
// static
void Time::EnableHighResolutionTimer(bool enable) {
std::lock_guard<std::mutex> lock(*GetHighResLock());
if (g_high_res_timer_enabled == enable)
return;
g_high_res_timer_enabled = enable;
if (!g_high_res_timer_count)
return;
// Since g_high_res_timer_count != 0, an ActivateHighResolutionTimer(true)
// was called which called timeBeginPeriod with g_high_res_timer_enabled
// with a value which is the opposite of |enable|. With that information we
// call timeEndPeriod with the same value used in timeBeginPeriod and
// therefore undo the period effect.
if (enable) {
timeEndPeriod(kMinTimerIntervalLowResMs);
timeBeginPeriod(kMinTimerIntervalHighResMs);
} else {
timeEndPeriod(kMinTimerIntervalHighResMs);
timeBeginPeriod(kMinTimerIntervalLowResMs);
}
}
// static
bool Time::ActivateHighResolutionTimer(bool activating) {
// We only do work on the transition from zero to one or one to zero so we
// can easily undo the effect (if necessary) when EnableHighResolutionTimer is
// called.
const uint32_t max = std::numeric_limits<uint32_t>::max();
std::lock_guard<std::mutex> lock(*GetHighResLock());
UINT period = g_high_res_timer_enabled ? kMinTimerIntervalHighResMs
: kMinTimerIntervalLowResMs;
if (activating) {
DCHECK_NE(g_high_res_timer_count, max);
++g_high_res_timer_count;
if (g_high_res_timer_count == 1) {
g_high_res_timer_last_activation = subtle::TimeTicksNowIgnoringOverride();
timeBeginPeriod(period);
}
} else {
DCHECK_NE(g_high_res_timer_count, 0u);
--g_high_res_timer_count;
if (g_high_res_timer_count == 0) {
g_high_res_timer_usage += subtle::TimeTicksNowIgnoringOverride() -
g_high_res_timer_last_activation;
timeEndPeriod(period);
}
}
return (period == kMinTimerIntervalHighResMs);
}
// static
bool Time::IsHighResolutionTimerInUse() {
std::lock_guard<std::mutex> lock(*GetHighResLock());
return g_high_res_timer_enabled && g_high_res_timer_count > 0;
}
// static
void Time::ResetHighResolutionTimerUsage() {
std::lock_guard<std::mutex> lock(*GetHighResLock());
g_high_res_timer_usage = TimeDelta();
g_high_res_timer_usage_start = subtle::TimeTicksNowIgnoringOverride();
if (g_high_res_timer_count > 0)
g_high_res_timer_last_activation = g_high_res_timer_usage_start;
}
// static
double Time::GetHighResolutionTimerUsage() {
std::lock_guard<std::mutex> lock(*GetHighResLock());
TimeTicks now = subtle::TimeTicksNowIgnoringOverride();
TimeDelta elapsed_time = now - g_high_res_timer_usage_start;
if (elapsed_time.is_zero()) {
// This is unexpected but possible if TimeTicks resolution is low and
// GetHighResolutionTimerUsage() is called promptly after
// ResetHighResolutionTimerUsage().
return 0.0;
}
TimeDelta used_time = g_high_res_timer_usage;
if (g_high_res_timer_count > 0) {
// If currently activated add the remainder of time since the last
// activation.
used_time += now - g_high_res_timer_last_activation;
}
return used_time.InMillisecondsF() / elapsed_time.InMillisecondsF() * 100;
}
// TimeTicks ------------------------------------------------------------------
namespace {
// Discussion of tick counter options on Windows:
//
// (1) CPU cycle counter. (Retrieved via RDTSC)
// The CPU counter provides the highest resolution time stamp and is the least
// expensive to retrieve. However, on older CPUs, two issues can affect its
// reliability: First it is maintained per processor and not synchronized
// between processors. Also, the counters will change frequency due to thermal
// and power changes, and stop in some states.
//
// (2) QueryPerformanceCounter (QPC). The QPC counter provides a high-
// resolution (<1 microsecond) time stamp. On most hardware running today, it
// auto-detects and uses the constant-rate RDTSC counter to provide extremely
// efficient and reliable time stamps.
//
// On older CPUs where RDTSC is unreliable, it falls back to using more
// expensive (20X to 40X more costly) alternate clocks, such as HPET or the ACPI
// PM timer, and can involve system calls; and all this is up to the HAL (with
// some help from ACPI). According to
// http://blogs.msdn.com/oldnewthing/archive/2005/09/02/459952.aspx, in the
// worst case, it gets the counter from the rollover interrupt on the
// programmable interrupt timer. In best cases, the HAL may conclude that the
// RDTSC counter runs at a constant frequency, then it uses that instead. On
// multiprocessor machines, it will try to verify the values returned from
// RDTSC on each processor are consistent with each other, and apply a handful
// of workarounds for known buggy hardware. In other words, QPC is supposed to
// give consistent results on a multiprocessor computer, but for older CPUs it
// can be unreliable due bugs in BIOS or HAL.
//
// (3) System time. The system time provides a low-resolution (from ~1 to ~15.6
// milliseconds) time stamp but is comparatively less expensive to retrieve and
// more reliable. Time::EnableHighResolutionTimer() and
// Time::ActivateHighResolutionTimer() can be called to alter the resolution of
// this timer; and also other Windows applications can alter it, affecting this
// one.
TimeTicks InitialNowFunction();
// See "threading notes" in InitializeNowFunctionPointer() for details on how
// concurrent reads/writes to these globals has been made safe.
TimeTicksNowFunction g_time_ticks_now_ignoring_override_function =
&InitialNowFunction;
int64_t g_qpc_ticks_per_second = 0;
// As of January 2015, use of <atomic> is forbidden in Chromium code. This is
// what std::atomic_thread_fence does on Windows on all Intel architectures when
// the memory_order argument is anything but std::memory_order_seq_cst:
#define ATOMIC_THREAD_FENCE(memory_order) _ReadWriteBarrier();
TimeDelta QPCValueToTimeDelta(LONGLONG qpc_value) {
// Ensure that the assignment to |g_qpc_ticks_per_second|, made in
// InitializeNowFunctionPointer(), has happened by this point.
ATOMIC_THREAD_FENCE(memory_order_acquire);
DCHECK_GT(g_qpc_ticks_per_second, 0);
// If the QPC Value is below the overflow threshold, we proceed with
// simple multiply and divide.
if (qpc_value < Time::kQPCOverflowThreshold) {
return TimeDelta::FromMicroseconds(
qpc_value * Time::kMicrosecondsPerSecond / g_qpc_ticks_per_second);
}
// Otherwise, calculate microseconds in a round about manner to avoid
// overflow and precision issues.
int64_t whole_seconds = qpc_value / g_qpc_ticks_per_second;
int64_t leftover_ticks = qpc_value - (whole_seconds * g_qpc_ticks_per_second);
return TimeDelta::FromMicroseconds(
(whole_seconds * Time::kMicrosecondsPerSecond) +
((leftover_ticks * Time::kMicrosecondsPerSecond) /
g_qpc_ticks_per_second));
}
TimeTicks QPCNow() {
return TimeTicks() + QPCValueToTimeDelta(QPCNowRaw());
}
void InitializeNowFunctionPointer() {
LARGE_INTEGER ticks_per_sec = {};
if (!QueryPerformanceFrequency(&ticks_per_sec))
ticks_per_sec.QuadPart = 0;
TimeTicksNowFunction now_function = &QPCNow;
// Threading note 1: In an unlikely race condition, it's possible for two or
// more threads to enter InitializeNowFunctionPointer() in parallel. This is
// not a problem since all threads should end up writing out the same values
// to the global variables.
//
// Threading note 2: A release fence is placed here to ensure, from the
// perspective of other threads using the function pointers, that the
// assignment to |g_qpc_ticks_per_second| happens before the function pointers
// are changed.
g_qpc_ticks_per_second = ticks_per_sec.QuadPart;
ATOMIC_THREAD_FENCE(memory_order_release);
// Also set g_time_ticks_now_function to avoid the additional indirection via
// TimeTicksNowIgnoringOverride() for future calls to TimeTicks::Now(). But
// g_time_ticks_now_function may have already be overridden.
if (internal::g_time_ticks_now_function ==
&subtle::TimeTicksNowIgnoringOverride) {
internal::g_time_ticks_now_function = now_function;
}
g_time_ticks_now_ignoring_override_function = now_function;
}
TimeTicks InitialNowFunction() {
InitializeNowFunctionPointer();
return g_time_ticks_now_ignoring_override_function();
}
} // namespace
namespace subtle {
TimeTicks TimeTicksNowIgnoringOverride() {
return g_time_ticks_now_ignoring_override_function();
}
} // namespace subtle
// static
bool TimeTicks::IsHighResolution() {
if (g_time_ticks_now_ignoring_override_function == &InitialNowFunction)
InitializeNowFunctionPointer();
return g_time_ticks_now_ignoring_override_function == &QPCNow;
}
// static
bool TimeTicks::IsConsistentAcrossProcesses() {
// According to Windows documentation [1] QPC is consistent post-Windows
// Vista. So if we are using QPC then we are consistent which is the same as
// being high resolution.
//
// [1]
// https://msdn.microsoft.com/en-us/library/windows/desktop/dn553408(v=vs.85).aspx
//
// "In general, the performance counter results are consistent across all
// processors in multi-core and multi-processor systems, even when measured on
// different threads or processes. Here are some exceptions to this rule:
// - Pre-Windows Vista operating systems that run on certain processors might
// violate this consistency because of one of these reasons:
// 1. The hardware processors have a non-invariant TSC and the BIOS
// doesn't indicate this condition correctly.
// 2. The TSC synchronization algorithm that was used wasn't suitable for
// systems with large numbers of processors."
return IsHighResolution();
}
// static
TimeTicks::Clock TimeTicks::GetClock() {
return IsHighResolution() ? Clock::WIN_QPC
: Clock::WIN_ROLLOVER_PROTECTED_TIME_GET_TIME;
}
// ThreadTicks ----------------------------------------------------------------
namespace subtle {
ThreadTicks ThreadTicksNowIgnoringOverride() {
return ThreadTicks::GetForThread(PlatformThread::CurrentHandle());
}
} // namespace subtle
// static
ThreadTicks ThreadTicks::GetForThread(
const PlatformThreadHandle& thread_handle) {
DCHECK(IsSupported());
// Get the number of TSC ticks used by the current thread.
ULONG64 thread_cycle_time = 0;
::QueryThreadCycleTime(thread_handle.platform_handle(), &thread_cycle_time);
// Get the frequency of the TSC.
double tsc_ticks_per_second = TSCTicksPerSecond();
if (tsc_ticks_per_second == 0)
return ThreadTicks();
// Return the CPU time of the current thread.
double thread_time_seconds = thread_cycle_time / tsc_ticks_per_second;
return ThreadTicks(
static_cast<int64_t>(thread_time_seconds * Time::kMicrosecondsPerSecond));
}
// static
void ThreadTicks::WaitUntilInitializedWin() {
while (TSCTicksPerSecond() == 0)
::Sleep(10);
}
double ThreadTicks::TSCTicksPerSecond() {
DCHECK(IsSupported());
// The value returned by QueryPerformanceFrequency() cannot be used as the TSC
// frequency, because there is no guarantee that the TSC frequency is equal to
// the performance counter frequency.
// The TSC frequency is cached in a static variable because it takes some time
// to compute it.
static double tsc_ticks_per_second = 0;
if (tsc_ticks_per_second != 0)
return tsc_ticks_per_second;
// Increase the thread priority to reduces the chances of having a context
// switch during a reading of the TSC and the performance counter.
int previous_priority = ::GetThreadPriority(::GetCurrentThread());
::SetThreadPriority(::GetCurrentThread(), THREAD_PRIORITY_HIGHEST);
// The first time that this function is called, make an initial reading of the
// TSC and the performance counter.
static const uint64_t tsc_initial = __rdtsc();
static const uint64_t perf_counter_initial = QPCNowRaw();
// Make a another reading of the TSC and the performance counter every time
// that this function is called.
uint64_t tsc_now = __rdtsc();
uint64_t perf_counter_now = QPCNowRaw();
// Reset the thread priority.
::SetThreadPriority(::GetCurrentThread(), previous_priority);
// Make sure that at least 50 ms elapsed between the 2 readings. The first
// time that this function is called, we don't expect this to be the case.
// Note: The longer the elapsed time between the 2 readings is, the more
// accurate the computed TSC frequency will be. The 50 ms value was
// chosen because local benchmarks show that it allows us to get a
// stddev of less than 1 tick/us between multiple runs.
// Note: According to the MSDN documentation for QueryPerformanceFrequency(),
// this will never fail on systems that run XP or later.
// https://msdn.microsoft.com/library/windows/desktop/ms644905.aspx
LARGE_INTEGER perf_counter_frequency = {};
::QueryPerformanceFrequency(&perf_counter_frequency);
DCHECK_GE(perf_counter_now, perf_counter_initial);
uint64_t perf_counter_ticks = perf_counter_now - perf_counter_initial;
double elapsed_time_seconds =
perf_counter_ticks / static_cast<double>(perf_counter_frequency.QuadPart);
static constexpr double kMinimumEvaluationPeriodSeconds = 0.05;
if (elapsed_time_seconds < kMinimumEvaluationPeriodSeconds)
return 0;
// Compute the frequency of the TSC.
DCHECK_GE(tsc_now, tsc_initial);
uint64_t tsc_ticks = tsc_now - tsc_initial;
tsc_ticks_per_second = tsc_ticks / elapsed_time_seconds;
return tsc_ticks_per_second;
}
// static
TimeTicks TimeTicks::FromQPCValue(LONGLONG qpc_value) {
return TimeTicks() + QPCValueToTimeDelta(qpc_value);
}
// TimeDelta ------------------------------------------------------------------
// static
TimeDelta TimeDelta::FromQPCValue(LONGLONG qpc_value) {
return QPCValueToTimeDelta(qpc_value);
}
// static
TimeDelta TimeDelta::FromFileTime(FILETIME ft) {
return TimeDelta::FromMicroseconds(FileTimeToMicroseconds(ft));
}
} // namespace base