|  | // Copyright 2018 The Chromium Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style license that can be | 
|  | // found in the LICENSE file. | 
|  |  | 
|  | #include "base/sampling_heap_profiler/sampling_heap_profiler.h" | 
|  |  | 
|  | #include <algorithm> | 
|  | #include <cmath> | 
|  | #include <utility> | 
|  |  | 
|  | #include "base/allocator/allocator_shim.h" | 
|  | #include "base/allocator/partition_allocator/partition_alloc.h" | 
|  | #include "base/atomicops.h" | 
|  | #include "base/debug/stack_trace.h" | 
|  | #include "base/macros.h" | 
|  | #include "base/no_destructor.h" | 
|  | #include "base/partition_alloc_buildflags.h" | 
|  | #include "base/rand_util.h" | 
|  | #include "base/threading/thread_local_storage.h" | 
|  | #include "build_config.h" | 
|  |  | 
|  | namespace base { | 
|  |  | 
|  | using base::allocator::AllocatorDispatch; | 
|  | using base::subtle::Atomic32; | 
|  | using base::subtle::AtomicWord; | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | // Control how many top frames to skip when recording call stack. | 
|  | // These frames correspond to the profiler own frames. | 
|  | const uint32_t kSkipBaseAllocatorFrames = 2; | 
|  |  | 
|  | const size_t kDefaultSamplingIntervalBytes = 128 * 1024; | 
|  |  | 
|  | // Controls if sample intervals should not be randomized. Used for testing. | 
|  | bool g_deterministic; | 
|  |  | 
|  | // A positive value if profiling is running, otherwise it's zero. | 
|  | Atomic32 g_running; | 
|  |  | 
|  | // Number of lock-free safe (not causing rehashing) accesses to samples_ map | 
|  | // currently being performed. | 
|  | Atomic32 g_operations_in_flight; | 
|  |  | 
|  | // Controls if new incoming lock-free accesses are allowed. | 
|  | // When set to true, threads should not enter lock-free paths. | 
|  | Atomic32 g_fast_path_is_closed; | 
|  |  | 
|  | // Sampling interval parameter, the mean value for intervals between samples. | 
|  | AtomicWord g_sampling_interval = kDefaultSamplingIntervalBytes; | 
|  |  | 
|  | // Last generated sample ordinal number. | 
|  | uint32_t g_last_sample_ordinal = 0; | 
|  |  | 
|  | void (*g_hooks_install_callback)(); | 
|  | Atomic32 g_hooks_installed; | 
|  |  | 
|  | void* AllocFn(const AllocatorDispatch* self, size_t size, void* context) { | 
|  | void* address = self->next->alloc_function(self->next, size, context); | 
|  | SamplingHeapProfiler::RecordAlloc(address, size, kSkipBaseAllocatorFrames); | 
|  | return address; | 
|  | } | 
|  |  | 
|  | void* AllocZeroInitializedFn(const AllocatorDispatch* self, | 
|  | size_t n, | 
|  | size_t size, | 
|  | void* context) { | 
|  | void* address = | 
|  | self->next->alloc_zero_initialized_function(self->next, n, size, context); | 
|  | SamplingHeapProfiler::RecordAlloc(address, n * size, | 
|  | kSkipBaseAllocatorFrames); | 
|  | return address; | 
|  | } | 
|  |  | 
|  | void* AllocAlignedFn(const AllocatorDispatch* self, | 
|  | size_t alignment, | 
|  | size_t size, | 
|  | void* context) { | 
|  | void* address = | 
|  | self->next->alloc_aligned_function(self->next, alignment, size, context); | 
|  | SamplingHeapProfiler::RecordAlloc(address, size, kSkipBaseAllocatorFrames); | 
|  | return address; | 
|  | } | 
|  |  | 
|  | void* ReallocFn(const AllocatorDispatch* self, | 
|  | void* address, | 
|  | size_t size, | 
|  | void* context) { | 
|  | // Note: size == 0 actually performs free. | 
|  | SamplingHeapProfiler::RecordFree(address); | 
|  | address = self->next->realloc_function(self->next, address, size, context); | 
|  | SamplingHeapProfiler::RecordAlloc(address, size, kSkipBaseAllocatorFrames); | 
|  | return address; | 
|  | } | 
|  |  | 
|  | void FreeFn(const AllocatorDispatch* self, void* address, void* context) { | 
|  | SamplingHeapProfiler::RecordFree(address); | 
|  | self->next->free_function(self->next, address, context); | 
|  | } | 
|  |  | 
|  | size_t GetSizeEstimateFn(const AllocatorDispatch* self, | 
|  | void* address, | 
|  | void* context) { | 
|  | return self->next->get_size_estimate_function(self->next, address, context); | 
|  | } | 
|  |  | 
|  | unsigned BatchMallocFn(const AllocatorDispatch* self, | 
|  | size_t size, | 
|  | void** results, | 
|  | unsigned num_requested, | 
|  | void* context) { | 
|  | unsigned num_allocated = self->next->batch_malloc_function( | 
|  | self->next, size, results, num_requested, context); | 
|  | for (unsigned i = 0; i < num_allocated; ++i) { | 
|  | SamplingHeapProfiler::RecordAlloc(results[i], size, | 
|  | kSkipBaseAllocatorFrames); | 
|  | } | 
|  | return num_allocated; | 
|  | } | 
|  |  | 
|  | void BatchFreeFn(const AllocatorDispatch* self, | 
|  | void** to_be_freed, | 
|  | unsigned num_to_be_freed, | 
|  | void* context) { | 
|  | for (unsigned i = 0; i < num_to_be_freed; ++i) | 
|  | SamplingHeapProfiler::RecordFree(to_be_freed[i]); | 
|  | self->next->batch_free_function(self->next, to_be_freed, num_to_be_freed, | 
|  | context); | 
|  | } | 
|  |  | 
|  | void FreeDefiniteSizeFn(const AllocatorDispatch* self, | 
|  | void* address, | 
|  | size_t size, | 
|  | void* context) { | 
|  | SamplingHeapProfiler::RecordFree(address); | 
|  | self->next->free_definite_size_function(self->next, address, size, context); | 
|  | } | 
|  |  | 
|  | AllocatorDispatch g_allocator_dispatch = {&AllocFn, | 
|  | &AllocZeroInitializedFn, | 
|  | &AllocAlignedFn, | 
|  | &ReallocFn, | 
|  | &FreeFn, | 
|  | &GetSizeEstimateFn, | 
|  | &BatchMallocFn, | 
|  | &BatchFreeFn, | 
|  | &FreeDefiniteSizeFn, | 
|  | nullptr}; | 
|  |  | 
|  | #if BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL) | 
|  |  | 
|  | void PartitionAllocHook(void* address, size_t size, const char*) { | 
|  | SamplingHeapProfiler::RecordAlloc(address, size); | 
|  | } | 
|  |  | 
|  | void PartitionFreeHook(void* address) { | 
|  | SamplingHeapProfiler::RecordFree(address); | 
|  | } | 
|  |  | 
|  | #endif  // BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL) | 
|  |  | 
|  | ThreadLocalStorage::Slot& AccumulatedBytesTLS() { | 
|  | static base::NoDestructor<base::ThreadLocalStorage::Slot> | 
|  | accumulated_bytes_tls; | 
|  | return *accumulated_bytes_tls; | 
|  | } | 
|  |  | 
|  | }  // namespace | 
|  |  | 
|  | SamplingHeapProfiler::Sample::Sample(size_t size, | 
|  | size_t total, | 
|  | uint32_t ordinal) | 
|  | : size(size), total(total), ordinal(ordinal) {} | 
|  |  | 
|  | SamplingHeapProfiler::Sample::Sample(const Sample&) = default; | 
|  |  | 
|  | SamplingHeapProfiler::Sample::~Sample() = default; | 
|  |  | 
|  | SamplingHeapProfiler* SamplingHeapProfiler::instance_; | 
|  |  | 
|  | SamplingHeapProfiler::SamplingHeapProfiler() { | 
|  | instance_ = this; | 
|  | } | 
|  |  | 
|  | // static | 
|  | void SamplingHeapProfiler::InitTLSSlot() { | 
|  | // Preallocate the TLS slot early, so it can't cause reentracy issues | 
|  | // when sampling is started. | 
|  | ignore_result(AccumulatedBytesTLS().Get()); | 
|  | } | 
|  |  | 
|  | // static | 
|  | void SamplingHeapProfiler::InstallAllocatorHooksOnce() { | 
|  | static bool hook_installed = InstallAllocatorHooks(); | 
|  | ignore_result(hook_installed); | 
|  | } | 
|  |  | 
|  | // static | 
|  | bool SamplingHeapProfiler::InstallAllocatorHooks() { | 
|  | ignore_result(g_allocator_dispatch); | 
|  | DLOG(WARNING) | 
|  | << "base::allocator shims are not available for memory sampling."; | 
|  |  | 
|  | #if BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL) | 
|  | base::PartitionAllocHooks::SetAllocationHook(&PartitionAllocHook); | 
|  | base::PartitionAllocHooks::SetFreeHook(&PartitionFreeHook); | 
|  | #endif  // BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL) | 
|  |  | 
|  | int32_t hooks_install_callback_has_been_set = | 
|  | base::subtle::Acquire_CompareAndSwap(&g_hooks_installed, 0, 1); | 
|  | if (hooks_install_callback_has_been_set) | 
|  | g_hooks_install_callback(); | 
|  |  | 
|  | return true; | 
|  | } | 
|  |  | 
|  | // static | 
|  | void SamplingHeapProfiler::SetHooksInstallCallback( | 
|  | void (*hooks_install_callback)()) { | 
|  | CHECK(!g_hooks_install_callback && hooks_install_callback); | 
|  | g_hooks_install_callback = hooks_install_callback; | 
|  |  | 
|  | int32_t profiler_has_already_been_initialized = | 
|  | base::subtle::Release_CompareAndSwap(&g_hooks_installed, 0, 1); | 
|  | if (profiler_has_already_been_initialized) | 
|  | g_hooks_install_callback(); | 
|  | } | 
|  |  | 
|  | uint32_t SamplingHeapProfiler::Start() { | 
|  | InstallAllocatorHooksOnce(); | 
|  | base::subtle::Barrier_AtomicIncrement(&g_running, 1); | 
|  | return g_last_sample_ordinal; | 
|  | } | 
|  |  | 
|  | void SamplingHeapProfiler::Stop() { | 
|  | AtomicWord count = base::subtle::Barrier_AtomicIncrement(&g_running, -1); | 
|  | CHECK_GE(count, 0); | 
|  | } | 
|  |  | 
|  | void SamplingHeapProfiler::SetSamplingInterval(size_t sampling_interval) { | 
|  | // TODO(alph): Reset the sample being collected if running. | 
|  | base::subtle::Release_Store(&g_sampling_interval, | 
|  | static_cast<AtomicWord>(sampling_interval)); | 
|  | } | 
|  |  | 
|  | // static | 
|  | size_t SamplingHeapProfiler::GetNextSampleInterval(size_t interval) { | 
|  | if (UNLIKELY(g_deterministic)) | 
|  | return interval; | 
|  |  | 
|  | // We sample with a Poisson process, with constant average sampling | 
|  | // interval. This follows the exponential probability distribution with | 
|  | // parameter λ = 1/interval where |interval| is the average number of bytes | 
|  | // between samples. | 
|  | // Let u be a uniformly distributed random number between 0 and 1, then | 
|  | // next_sample = -ln(u) / λ | 
|  | double uniform = base::RandDouble(); | 
|  | double value = -log(uniform) * interval; | 
|  | size_t min_value = sizeof(intptr_t); | 
|  | // We limit the upper bound of a sample interval to make sure we don't have | 
|  | // huge gaps in the sampling stream. Probability of the upper bound gets hit | 
|  | // is exp(-20) ~ 2e-9, so it should not skew the distibution. | 
|  | size_t max_value = interval * 20; | 
|  | if (UNLIKELY(value < min_value)) | 
|  | return min_value; | 
|  | if (UNLIKELY(value > max_value)) | 
|  | return max_value; | 
|  | return static_cast<size_t>(value); | 
|  | } | 
|  |  | 
|  | // static | 
|  | void SamplingHeapProfiler::RecordAlloc(void* address, | 
|  | size_t size, | 
|  | uint32_t skip_frames) { | 
|  | if (UNLIKELY(!base::subtle::NoBarrier_Load(&g_running))) | 
|  | return; | 
|  | if (UNLIKELY(base::ThreadLocalStorage::HasBeenDestroyed())) | 
|  | return; | 
|  |  | 
|  | // TODO(alph): On MacOS it may call the hook several times for a single | 
|  | // allocation. Handle the case. | 
|  |  | 
|  | intptr_t accumulated_bytes = | 
|  | reinterpret_cast<intptr_t>(AccumulatedBytesTLS().Get()); | 
|  | accumulated_bytes += size; | 
|  | if (LIKELY(accumulated_bytes < 0)) { | 
|  | AccumulatedBytesTLS().Set(reinterpret_cast<void*>(accumulated_bytes)); | 
|  | return; | 
|  | } | 
|  |  | 
|  | size_t mean_interval = base::subtle::NoBarrier_Load(&g_sampling_interval); | 
|  | size_t samples = accumulated_bytes / mean_interval; | 
|  | accumulated_bytes %= mean_interval; | 
|  |  | 
|  | do { | 
|  | accumulated_bytes -= GetNextSampleInterval(mean_interval); | 
|  | ++samples; | 
|  | } while (accumulated_bytes >= 0); | 
|  |  | 
|  | AccumulatedBytesTLS().Set(reinterpret_cast<void*>(accumulated_bytes)); | 
|  |  | 
|  | instance_->DoRecordAlloc(samples * mean_interval, size, address, skip_frames); | 
|  | } | 
|  |  | 
|  | void SamplingHeapProfiler::RecordStackTrace(Sample* sample, | 
|  | uint32_t skip_frames) { | 
|  | #if !defined(OS_NACL) | 
|  | // TODO(alph): Consider using debug::TraceStackFramePointers. It should be | 
|  | // somewhat faster than base::debug::StackTrace. | 
|  | base::debug::StackTrace trace; | 
|  | size_t count; | 
|  | void* const* addresses = const_cast<void* const*>(trace.Addresses(&count)); | 
|  | const uint32_t kSkipProfilerOwnFrames = 2; | 
|  | skip_frames += kSkipProfilerOwnFrames; | 
|  | sample->stack.insert( | 
|  | sample->stack.end(), &addresses[skip_frames], | 
|  | &addresses[std::max(count, static_cast<size_t>(skip_frames))]); | 
|  | #endif | 
|  | } | 
|  |  | 
|  | void SamplingHeapProfiler::DoRecordAlloc(size_t total_allocated, | 
|  | size_t size, | 
|  | void* address, | 
|  | uint32_t skip_frames) { | 
|  | if (entered_.Get()) | 
|  | return; | 
|  | entered_.Set(true); | 
|  | { | 
|  | base::AutoLock lock(mutex_); | 
|  |  | 
|  | Sample sample(size, total_allocated, ++g_last_sample_ordinal); | 
|  | RecordStackTrace(&sample, skip_frames); | 
|  |  | 
|  | if (MayRehashOnInsert()) { | 
|  | // Close the fast path as inserting an element into samples_ may cause | 
|  | // rehashing that invalidates iterators affecting all the concurrent | 
|  | // readers. | 
|  | base::subtle::Release_Store(&g_fast_path_is_closed, 1); | 
|  | // Wait until all current readers leave. | 
|  | while (base::subtle::Acquire_Load(&g_operations_in_flight)) { | 
|  | while (base::subtle::NoBarrier_Load(&g_operations_in_flight)) { | 
|  | } | 
|  | } | 
|  | samples_.emplace(address, std::move(sample)); | 
|  | // Open the fast path. | 
|  | base::subtle::Release_Store(&g_fast_path_is_closed, 0); | 
|  | } else { | 
|  | samples_.emplace(address, std::move(sample)); | 
|  | } | 
|  |  | 
|  | for (auto* observer : observers_) | 
|  | observer->SampleAdded(sample.ordinal, size, total_allocated); | 
|  | } | 
|  |  | 
|  | entered_.Set(false); | 
|  | } | 
|  |  | 
|  | // static | 
|  | void SamplingHeapProfiler::RecordFree(void* address) { | 
|  | bool maybe_sampled = true;  // Pessimistically assume allocation was sampled. | 
|  | base::subtle::Barrier_AtomicIncrement(&g_operations_in_flight, 1); | 
|  | if (LIKELY(!base::subtle::NoBarrier_Load(&g_fast_path_is_closed))) { | 
|  | maybe_sampled = | 
|  | instance_->samples_.find(address) != instance_->samples_.end(); | 
|  | } | 
|  | base::subtle::Barrier_AtomicIncrement(&g_operations_in_flight, -1); | 
|  | if (maybe_sampled) | 
|  | instance_->DoRecordFree(address); | 
|  | } | 
|  |  | 
|  | void SamplingHeapProfiler::DoRecordFree(void* address) { | 
|  | if (UNLIKELY(base::ThreadLocalStorage::HasBeenDestroyed())) | 
|  | return; | 
|  | if (entered_.Get()) | 
|  | return; | 
|  | entered_.Set(true); | 
|  | { | 
|  | base::AutoLock lock(mutex_); | 
|  | auto it = samples_.find(address); | 
|  | if (it != samples_.end()) { | 
|  | for (auto* observer : observers_) | 
|  | observer->SampleRemoved(it->second.ordinal); | 
|  | samples_.erase(it); | 
|  | } | 
|  | } | 
|  | entered_.Set(false); | 
|  | } | 
|  |  | 
|  | bool SamplingHeapProfiler::MayRehashOnInsert() { | 
|  | size_t max_items_before_rehash = | 
|  | std::floor(samples_.bucket_count() * samples_.max_load_factor()); | 
|  | // Conservatively use 2 instead of 1 to workaround potential rounding errors. | 
|  | return samples_.size() + 2 >= max_items_before_rehash; | 
|  | } | 
|  |  | 
|  | // static | 
|  | SamplingHeapProfiler* SamplingHeapProfiler::GetInstance() { | 
|  | static base::NoDestructor<SamplingHeapProfiler> instance; | 
|  | return instance.get(); | 
|  | } | 
|  |  | 
|  | // static | 
|  | void SamplingHeapProfiler::SuppressRandomnessForTest(bool suppress) { | 
|  | g_deterministic = suppress; | 
|  | } | 
|  |  | 
|  | void SamplingHeapProfiler::AddSamplesObserver(SamplesObserver* observer) { | 
|  | CHECK(!entered_.Get()); | 
|  | entered_.Set(true); | 
|  | { | 
|  | base::AutoLock lock(mutex_); | 
|  | observers_.push_back(observer); | 
|  | } | 
|  | entered_.Set(false); | 
|  | } | 
|  |  | 
|  | void SamplingHeapProfiler::RemoveSamplesObserver(SamplesObserver* observer) { | 
|  | CHECK(!entered_.Get()); | 
|  | entered_.Set(true); | 
|  | { | 
|  | base::AutoLock lock(mutex_); | 
|  | auto it = std::find(observers_.begin(), observers_.end(), observer); | 
|  | CHECK(it != observers_.end()); | 
|  | observers_.erase(it); | 
|  | } | 
|  | entered_.Set(false); | 
|  | } | 
|  |  | 
|  | std::vector<SamplingHeapProfiler::Sample> SamplingHeapProfiler::GetSamples( | 
|  | uint32_t profile_id) { | 
|  | CHECK(!entered_.Get()); | 
|  | entered_.Set(true); | 
|  | std::vector<Sample> samples; | 
|  | { | 
|  | base::AutoLock lock(mutex_); | 
|  | for (auto& it : samples_) { | 
|  | Sample& sample = it.second; | 
|  | if (sample.ordinal > profile_id) | 
|  | samples.push_back(sample); | 
|  | } | 
|  | } | 
|  | entered_.Set(false); | 
|  | return samples; | 
|  | } | 
|  |  | 
|  | }  // namespace base |