Revert "Teach gn to handle systems with > 64 processors"
This reverts commit d9bbb45cbf7b2e54328b45e4573a107849d2db25.
Reason for revert: In the context of gn running on Chromium on modern
72+ core machines this CL causes a net slowdown. It causes gn to use
"too many" threads, leading to excessive lock contention, perhaps
exacerbated by using multiple NUMA nodes.
The original change measured only a tiny speedup so the potential
gains from reverting are much greater than the potential losses.
The original revert is here:
https://gn-review.git.corp.google.com/c/gn/+/17000
However when I resolved merge conflicts it got redirected from 17000
to 17020 and I'd like to land the version that has the merge conflicts
fixed.
Original change's description:
> Teach gn to handle systems with > 64 processors
>
> Previously, when run on Windows systems with > 64 logical processors,
> gn's calculation of the number of available processors (used to
> determine how many worker threads to create) under-counted the number of
> processors, thereby not making full use of the system's available
> processing power.
>
> This change corrects gn's count of available CPUs so that it can use all
> of the system's processors. In addition to creating the correct number
> of workers, this change also distributes created workers between
> processor groups, a necessary step when utilizing more than 64
> processors on Windows.
>
> On the 72-logical-processor P920, this change makes gn gen use all 36
> cores, rather than only 18.
>
> Bug: crbug.com/982982
> Change-Id: I40b40f4d34b634566991e4bebf686f48836445e8
> Reviewed-on: https://gn-review.googlesource.com/c/gn/+/5660
> Commit-Queue: Scott Graham <scottmg@chromium.org>
> Reviewed-by: Scott Graham <scottmg@chromium.org>
# Not skipping CQ checks because original CL landed > 1 day ago.
Bug: crbug.com/982982
Change-Id: I1ede6a0ec1eaa70b06061f2534232d2c5f0e47cb
Reviewed-on: https://gn-review.googlesource.com/c/gn/+/17020
Reviewed-by: Takuto Ikuta <tikuta@google.com>
Commit-Queue: Takuto Ikuta <tikuta@google.com>
diff --git a/src/util/sys_info.cc b/src/util/sys_info.cc
index 4cf71db..20bbdf7 100644
--- a/src/util/sys_info.cc
+++ b/src/util/sys_info.cc
@@ -122,7 +122,14 @@
return static_cast<int>(res);
#elif defined(OS_WIN)
- return ::GetActiveProcessorCount(ALL_PROCESSOR_GROUPS);
+ // On machines with more than 64 logical processors this will not return the
+ // full number of processors. Instead it will return the number of processors
+ // in one processor group - something smaller than 65. However this is okay
+ // because gn doesn't parallelize well beyond 10-20 threads - it starts to run
+ // slower.
+ SYSTEM_INFO system_info = {};
+ ::GetNativeSystemInfo(&system_info);
+ return system_info.dwNumberOfProcessors;
#else
#error
#endif
diff --git a/src/util/worker_pool.cc b/src/util/worker_pool.cc
index f594699..c1105d6 100644
--- a/src/util/worker_pool.cc
+++ b/src/util/worker_pool.cc
@@ -7,50 +7,10 @@
#include "base/command_line.h"
#include "base/strings/string_number_conversions.h"
#include "gn/switches.h"
-#include "util/build_config.h"
#include "util/sys_info.h"
-#if defined(OS_WIN)
-#include <windows.h>
-#endif
-
namespace {
-#if defined(OS_WIN)
-class ProcessorGroupSetter {
- public:
- void SetProcessorGroup(std::thread* thread);
-
- private:
- int group_ = 0;
- GROUP_AFFINITY group_affinity_;
- int num_available_cores_in_group_ = ::GetActiveProcessorCount(group_) / 2;
- const int num_groups_ = ::GetActiveProcessorGroupCount();
-};
-
-void ProcessorGroupSetter::SetProcessorGroup(std::thread* thread) {
- if (num_groups_ <= 1)
- return;
-
- const HANDLE thread_handle = HANDLE(thread->native_handle());
- ::GetThreadGroupAffinity(thread_handle, &group_affinity_);
- group_affinity_.Group = group_;
- const bool success =
- ::SetThreadGroupAffinity(thread_handle, &group_affinity_, nullptr);
- DCHECK(success);
-
- // Move to next group once one thread has been assigned per core in |group_|.
- num_available_cores_in_group_--;
- if (num_available_cores_in_group_ <= 0) {
- group_++;
- if (group_ >= num_groups_) {
- group_ = 0;
- }
- num_available_cores_in_group_ = ::GetActiveProcessorCount(group_) / 2;
- }
-}
-#endif
-
int GetThreadCount() {
std::string thread_count =
base::CommandLine::ForCurrentProcess()->GetSwitchValueString(
@@ -86,22 +46,9 @@
WorkerPool::WorkerPool() : WorkerPool(GetThreadCount()) {}
WorkerPool::WorkerPool(size_t thread_count) : should_stop_processing_(false) {
-#if defined(OS_WIN)
- ProcessorGroupSetter processor_group_setter;
-#endif
-
threads_.reserve(thread_count);
- for (size_t i = 0; i < thread_count; ++i) {
+ for (size_t i = 0; i < thread_count; ++i)
threads_.emplace_back([this]() { Worker(); });
-
-#if defined(OS_WIN)
- // Set thread processor group. This is needed for systems with more than 64
- // logical processors, wherein available processors are divided into groups,
- // and applications that need to use more than one group's processors must
- // manually assign their threads to groups.
- processor_group_setter.SetProcessorGroup(&threads_.back());
-#endif
- }
}
WorkerPool::~WorkerPool() {