Revert "Teach gn to handle systems with > 64 processors" This reverts commit d9bbb45cbf7b2e54328b45e4573a107849d2db25. Reason for revert: In the context of gn running on Chromium on modern 72+ core machines this CL causes a net slowdown. It causes gn to use "too many" threads, leading to excessive lock contention, perhaps exacerbated by using multiple NUMA nodes. The original change measured only a tiny speedup so the potential gains from reverting are much greater than the potential losses. The original revert is here: https://gn-review.git.corp.google.com/c/gn/+/17000 However when I resolved merge conflicts it got redirected from 17000 to 17020 and I'd like to land the version that has the merge conflicts fixed. Original change's description: > Teach gn to handle systems with > 64 processors > > Previously, when run on Windows systems with > 64 logical processors, > gn's calculation of the number of available processors (used to > determine how many worker threads to create) under-counted the number of > processors, thereby not making full use of the system's available > processing power. > > This change corrects gn's count of available CPUs so that it can use all > of the system's processors. In addition to creating the correct number > of workers, this change also distributes created workers between > processor groups, a necessary step when utilizing more than 64 > processors on Windows. > > On the 72-logical-processor P920, this change makes gn gen use all 36 > cores, rather than only 18. > > Bug: crbug.com/982982 > Change-Id: I40b40f4d34b634566991e4bebf686f48836445e8 > Reviewed-on: https://gn-review.googlesource.com/c/gn/+/5660 > Commit-Queue: Scott Graham <scottmg@chromium.org> > Reviewed-by: Scott Graham <scottmg@chromium.org> # Not skipping CQ checks because original CL landed > 1 day ago. Bug: crbug.com/982982 Change-Id: I1ede6a0ec1eaa70b06061f2534232d2c5f0e47cb Reviewed-on: https://gn-review.googlesource.com/c/gn/+/17020 Reviewed-by: Takuto Ikuta <tikuta@google.com> Commit-Queue: Takuto Ikuta <tikuta@google.com>
diff --git a/src/util/sys_info.cc b/src/util/sys_info.cc index 4cf71db..20bbdf7 100644 --- a/src/util/sys_info.cc +++ b/src/util/sys_info.cc
@@ -122,7 +122,14 @@ return static_cast<int>(res); #elif defined(OS_WIN) - return ::GetActiveProcessorCount(ALL_PROCESSOR_GROUPS); + // On machines with more than 64 logical processors this will not return the + // full number of processors. Instead it will return the number of processors + // in one processor group - something smaller than 65. However this is okay + // because gn doesn't parallelize well beyond 10-20 threads - it starts to run + // slower. + SYSTEM_INFO system_info = {}; + ::GetNativeSystemInfo(&system_info); + return system_info.dwNumberOfProcessors; #else #error #endif
diff --git a/src/util/worker_pool.cc b/src/util/worker_pool.cc index f594699..c1105d6 100644 --- a/src/util/worker_pool.cc +++ b/src/util/worker_pool.cc
@@ -7,50 +7,10 @@ #include "base/command_line.h" #include "base/strings/string_number_conversions.h" #include "gn/switches.h" -#include "util/build_config.h" #include "util/sys_info.h" -#if defined(OS_WIN) -#include <windows.h> -#endif - namespace { -#if defined(OS_WIN) -class ProcessorGroupSetter { - public: - void SetProcessorGroup(std::thread* thread); - - private: - int group_ = 0; - GROUP_AFFINITY group_affinity_; - int num_available_cores_in_group_ = ::GetActiveProcessorCount(group_) / 2; - const int num_groups_ = ::GetActiveProcessorGroupCount(); -}; - -void ProcessorGroupSetter::SetProcessorGroup(std::thread* thread) { - if (num_groups_ <= 1) - return; - - const HANDLE thread_handle = HANDLE(thread->native_handle()); - ::GetThreadGroupAffinity(thread_handle, &group_affinity_); - group_affinity_.Group = group_; - const bool success = - ::SetThreadGroupAffinity(thread_handle, &group_affinity_, nullptr); - DCHECK(success); - - // Move to next group once one thread has been assigned per core in |group_|. - num_available_cores_in_group_--; - if (num_available_cores_in_group_ <= 0) { - group_++; - if (group_ >= num_groups_) { - group_ = 0; - } - num_available_cores_in_group_ = ::GetActiveProcessorCount(group_) / 2; - } -} -#endif - int GetThreadCount() { std::string thread_count = base::CommandLine::ForCurrentProcess()->GetSwitchValueString( @@ -86,22 +46,9 @@ WorkerPool::WorkerPool() : WorkerPool(GetThreadCount()) {} WorkerPool::WorkerPool(size_t thread_count) : should_stop_processing_(false) { -#if defined(OS_WIN) - ProcessorGroupSetter processor_group_setter; -#endif - threads_.reserve(thread_count); - for (size_t i = 0; i < thread_count; ++i) { + for (size_t i = 0; i < thread_count; ++i) threads_.emplace_back([this]() { Worker(); }); - -#if defined(OS_WIN) - // Set thread processor group. This is needed for systems with more than 64 - // logical processors, wherein available processors are divided into groups, - // and applications that need to use more than one group's processors must - // manually assign their threads to groups. - processor_group_setter.SetProcessorGroup(&threads_.back()); -#endif - } } WorkerPool::~WorkerPool() {