blob: aa695d890e65c3bc035b9d67f3fb7ce1c141b4be [file] [log] [blame]
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/sampling_heap_profiler/sampling_heap_profiler.h"
#include <algorithm>
#include <cmath>
#include <utility>
#include "base/allocator/allocator_shim.h"
#include "base/allocator/partition_allocator/partition_alloc.h"
#include "base/atomicops.h"
#include "base/debug/stack_trace.h"
#include "base/macros.h"
#include "base/no_destructor.h"
#include "base/partition_alloc_buildflags.h"
#include "base/rand_util.h"
#include "base/threading/thread_local_storage.h"
#include "build_config.h"
namespace base {
using base::allocator::AllocatorDispatch;
using base::subtle::Atomic32;
using base::subtle::AtomicWord;
namespace {
// Control how many top frames to skip when recording call stack.
// These frames correspond to the profiler own frames.
const uint32_t kSkipBaseAllocatorFrames = 2;
const size_t kDefaultSamplingIntervalBytes = 128 * 1024;
// Controls if sample intervals should not be randomized. Used for testing.
bool g_deterministic;
// A positive value if profiling is running, otherwise it's zero.
Atomic32 g_running;
// Number of lock-free safe (not causing rehashing) accesses to samples_ map
// currently being performed.
Atomic32 g_operations_in_flight;
// Controls if new incoming lock-free accesses are allowed.
// When set to true, threads should not enter lock-free paths.
Atomic32 g_fast_path_is_closed;
// Sampling interval parameter, the mean value for intervals between samples.
AtomicWord g_sampling_interval = kDefaultSamplingIntervalBytes;
// Last generated sample ordinal number.
uint32_t g_last_sample_ordinal = 0;
void (*g_hooks_install_callback)();
Atomic32 g_hooks_installed;
void* AllocFn(const AllocatorDispatch* self, size_t size, void* context) {
void* address = self->next->alloc_function(self->next, size, context);
SamplingHeapProfiler::RecordAlloc(address, size, kSkipBaseAllocatorFrames);
return address;
}
void* AllocZeroInitializedFn(const AllocatorDispatch* self,
size_t n,
size_t size,
void* context) {
void* address =
self->next->alloc_zero_initialized_function(self->next, n, size, context);
SamplingHeapProfiler::RecordAlloc(address, n * size,
kSkipBaseAllocatorFrames);
return address;
}
void* AllocAlignedFn(const AllocatorDispatch* self,
size_t alignment,
size_t size,
void* context) {
void* address =
self->next->alloc_aligned_function(self->next, alignment, size, context);
SamplingHeapProfiler::RecordAlloc(address, size, kSkipBaseAllocatorFrames);
return address;
}
void* ReallocFn(const AllocatorDispatch* self,
void* address,
size_t size,
void* context) {
// Note: size == 0 actually performs free.
SamplingHeapProfiler::RecordFree(address);
address = self->next->realloc_function(self->next, address, size, context);
SamplingHeapProfiler::RecordAlloc(address, size, kSkipBaseAllocatorFrames);
return address;
}
void FreeFn(const AllocatorDispatch* self, void* address, void* context) {
SamplingHeapProfiler::RecordFree(address);
self->next->free_function(self->next, address, context);
}
size_t GetSizeEstimateFn(const AllocatorDispatch* self,
void* address,
void* context) {
return self->next->get_size_estimate_function(self->next, address, context);
}
unsigned BatchMallocFn(const AllocatorDispatch* self,
size_t size,
void** results,
unsigned num_requested,
void* context) {
unsigned num_allocated = self->next->batch_malloc_function(
self->next, size, results, num_requested, context);
for (unsigned i = 0; i < num_allocated; ++i) {
SamplingHeapProfiler::RecordAlloc(results[i], size,
kSkipBaseAllocatorFrames);
}
return num_allocated;
}
void BatchFreeFn(const AllocatorDispatch* self,
void** to_be_freed,
unsigned num_to_be_freed,
void* context) {
for (unsigned i = 0; i < num_to_be_freed; ++i)
SamplingHeapProfiler::RecordFree(to_be_freed[i]);
self->next->batch_free_function(self->next, to_be_freed, num_to_be_freed,
context);
}
void FreeDefiniteSizeFn(const AllocatorDispatch* self,
void* address,
size_t size,
void* context) {
SamplingHeapProfiler::RecordFree(address);
self->next->free_definite_size_function(self->next, address, size, context);
}
AllocatorDispatch g_allocator_dispatch = {&AllocFn,
&AllocZeroInitializedFn,
&AllocAlignedFn,
&ReallocFn,
&FreeFn,
&GetSizeEstimateFn,
&BatchMallocFn,
&BatchFreeFn,
&FreeDefiniteSizeFn,
nullptr};
#if BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL)
void PartitionAllocHook(void* address, size_t size, const char*) {
SamplingHeapProfiler::RecordAlloc(address, size);
}
void PartitionFreeHook(void* address) {
SamplingHeapProfiler::RecordFree(address);
}
#endif // BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL)
ThreadLocalStorage::Slot& AccumulatedBytesTLS() {
static base::NoDestructor<base::ThreadLocalStorage::Slot>
accumulated_bytes_tls;
return *accumulated_bytes_tls;
}
} // namespace
SamplingHeapProfiler::Sample::Sample(size_t size,
size_t total,
uint32_t ordinal)
: size(size), total(total), ordinal(ordinal) {}
SamplingHeapProfiler::Sample::Sample(const Sample&) = default;
SamplingHeapProfiler::Sample::~Sample() = default;
SamplingHeapProfiler* SamplingHeapProfiler::instance_;
SamplingHeapProfiler::SamplingHeapProfiler() {
instance_ = this;
}
// static
void SamplingHeapProfiler::InitTLSSlot() {
// Preallocate the TLS slot early, so it can't cause reentracy issues
// when sampling is started.
ignore_result(AccumulatedBytesTLS().Get());
}
// static
void SamplingHeapProfiler::InstallAllocatorHooksOnce() {
static bool hook_installed = InstallAllocatorHooks();
ignore_result(hook_installed);
}
// static
bool SamplingHeapProfiler::InstallAllocatorHooks() {
ignore_result(g_allocator_dispatch);
DLOG(WARNING)
<< "base::allocator shims are not available for memory sampling.";
#if BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL)
base::PartitionAllocHooks::SetAllocationHook(&PartitionAllocHook);
base::PartitionAllocHooks::SetFreeHook(&PartitionFreeHook);
#endif // BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL)
int32_t hooks_install_callback_has_been_set =
base::subtle::Acquire_CompareAndSwap(&g_hooks_installed, 0, 1);
if (hooks_install_callback_has_been_set)
g_hooks_install_callback();
return true;
}
// static
void SamplingHeapProfiler::SetHooksInstallCallback(
void (*hooks_install_callback)()) {
CHECK(!g_hooks_install_callback && hooks_install_callback);
g_hooks_install_callback = hooks_install_callback;
int32_t profiler_has_already_been_initialized =
base::subtle::Release_CompareAndSwap(&g_hooks_installed, 0, 1);
if (profiler_has_already_been_initialized)
g_hooks_install_callback();
}
uint32_t SamplingHeapProfiler::Start() {
InstallAllocatorHooksOnce();
base::subtle::Barrier_AtomicIncrement(&g_running, 1);
return g_last_sample_ordinal;
}
void SamplingHeapProfiler::Stop() {
AtomicWord count = base::subtle::Barrier_AtomicIncrement(&g_running, -1);
CHECK_GE(count, 0);
}
void SamplingHeapProfiler::SetSamplingInterval(size_t sampling_interval) {
// TODO(alph): Reset the sample being collected if running.
base::subtle::Release_Store(&g_sampling_interval,
static_cast<AtomicWord>(sampling_interval));
}
// static
size_t SamplingHeapProfiler::GetNextSampleInterval(size_t interval) {
if (UNLIKELY(g_deterministic))
return interval;
// We sample with a Poisson process, with constant average sampling
// interval. This follows the exponential probability distribution with
// parameter λ = 1/interval where |interval| is the average number of bytes
// between samples.
// Let u be a uniformly distributed random number between 0 and 1, then
// next_sample = -ln(u) / λ
double uniform = base::RandDouble();
double value = -log(uniform) * interval;
size_t min_value = sizeof(intptr_t);
// We limit the upper bound of a sample interval to make sure we don't have
// huge gaps in the sampling stream. Probability of the upper bound gets hit
// is exp(-20) ~ 2e-9, so it should not skew the distibution.
size_t max_value = interval * 20;
if (UNLIKELY(value < min_value))
return min_value;
if (UNLIKELY(value > max_value))
return max_value;
return static_cast<size_t>(value);
}
// static
void SamplingHeapProfiler::RecordAlloc(void* address,
size_t size,
uint32_t skip_frames) {
if (UNLIKELY(!base::subtle::NoBarrier_Load(&g_running)))
return;
if (UNLIKELY(base::ThreadLocalStorage::HasBeenDestroyed()))
return;
// TODO(alph): On MacOS it may call the hook several times for a single
// allocation. Handle the case.
intptr_t accumulated_bytes =
reinterpret_cast<intptr_t>(AccumulatedBytesTLS().Get());
accumulated_bytes += size;
if (LIKELY(accumulated_bytes < 0)) {
AccumulatedBytesTLS().Set(reinterpret_cast<void*>(accumulated_bytes));
return;
}
size_t mean_interval = base::subtle::NoBarrier_Load(&g_sampling_interval);
size_t samples = accumulated_bytes / mean_interval;
accumulated_bytes %= mean_interval;
do {
accumulated_bytes -= GetNextSampleInterval(mean_interval);
++samples;
} while (accumulated_bytes >= 0);
AccumulatedBytesTLS().Set(reinterpret_cast<void*>(accumulated_bytes));
instance_->DoRecordAlloc(samples * mean_interval, size, address, skip_frames);
}
void SamplingHeapProfiler::RecordStackTrace(Sample* sample,
uint32_t skip_frames) {
#if !defined(OS_NACL)
// TODO(alph): Consider using debug::TraceStackFramePointers. It should be
// somewhat faster than base::debug::StackTrace.
base::debug::StackTrace trace;
size_t count;
void* const* addresses = const_cast<void* const*>(trace.Addresses(&count));
const uint32_t kSkipProfilerOwnFrames = 2;
skip_frames += kSkipProfilerOwnFrames;
sample->stack.insert(
sample->stack.end(), &addresses[skip_frames],
&addresses[std::max(count, static_cast<size_t>(skip_frames))]);
#endif
}
void SamplingHeapProfiler::DoRecordAlloc(size_t total_allocated,
size_t size,
void* address,
uint32_t skip_frames) {
if (entered_.Get())
return;
entered_.Set(true);
{
base::AutoLock lock(mutex_);
Sample sample(size, total_allocated, ++g_last_sample_ordinal);
RecordStackTrace(&sample, skip_frames);
if (MayRehashOnInsert()) {
// Close the fast path as inserting an element into samples_ may cause
// rehashing that invalidates iterators affecting all the concurrent
// readers.
base::subtle::Release_Store(&g_fast_path_is_closed, 1);
// Wait until all current readers leave.
while (base::subtle::Acquire_Load(&g_operations_in_flight)) {
while (base::subtle::NoBarrier_Load(&g_operations_in_flight)) {
}
}
samples_.emplace(address, std::move(sample));
// Open the fast path.
base::subtle::Release_Store(&g_fast_path_is_closed, 0);
} else {
samples_.emplace(address, std::move(sample));
}
for (auto* observer : observers_)
observer->SampleAdded(sample.ordinal, size, total_allocated);
}
entered_.Set(false);
}
// static
void SamplingHeapProfiler::RecordFree(void* address) {
bool maybe_sampled = true; // Pessimistically assume allocation was sampled.
base::subtle::Barrier_AtomicIncrement(&g_operations_in_flight, 1);
if (LIKELY(!base::subtle::NoBarrier_Load(&g_fast_path_is_closed))) {
maybe_sampled =
instance_->samples_.find(address) != instance_->samples_.end();
}
base::subtle::Barrier_AtomicIncrement(&g_operations_in_flight, -1);
if (maybe_sampled)
instance_->DoRecordFree(address);
}
void SamplingHeapProfiler::DoRecordFree(void* address) {
if (UNLIKELY(base::ThreadLocalStorage::HasBeenDestroyed()))
return;
if (entered_.Get())
return;
entered_.Set(true);
{
base::AutoLock lock(mutex_);
auto it = samples_.find(address);
if (it != samples_.end()) {
for (auto* observer : observers_)
observer->SampleRemoved(it->second.ordinal);
samples_.erase(it);
}
}
entered_.Set(false);
}
bool SamplingHeapProfiler::MayRehashOnInsert() {
size_t max_items_before_rehash =
std::floor(samples_.bucket_count() * samples_.max_load_factor());
// Conservatively use 2 instead of 1 to workaround potential rounding errors.
return samples_.size() + 2 >= max_items_before_rehash;
}
// static
SamplingHeapProfiler* SamplingHeapProfiler::GetInstance() {
static base::NoDestructor<SamplingHeapProfiler> instance;
return instance.get();
}
// static
void SamplingHeapProfiler::SuppressRandomnessForTest(bool suppress) {
g_deterministic = suppress;
}
void SamplingHeapProfiler::AddSamplesObserver(SamplesObserver* observer) {
CHECK(!entered_.Get());
entered_.Set(true);
{
base::AutoLock lock(mutex_);
observers_.push_back(observer);
}
entered_.Set(false);
}
void SamplingHeapProfiler::RemoveSamplesObserver(SamplesObserver* observer) {
CHECK(!entered_.Get());
entered_.Set(true);
{
base::AutoLock lock(mutex_);
auto it = std::find(observers_.begin(), observers_.end(), observer);
CHECK(it != observers_.end());
observers_.erase(it);
}
entered_.Set(false);
}
std::vector<SamplingHeapProfiler::Sample> SamplingHeapProfiler::GetSamples(
uint32_t profile_id) {
CHECK(!entered_.Get());
entered_.Set(true);
std::vector<Sample> samples;
{
base::AutoLock lock(mutex_);
for (auto& it : samples_) {
Sample& sample = it.second;
if (sample.ordinal > profile_id)
samples.push_back(sample);
}
}
entered_.Set(false);
return samples;
}
} // namespace base