Use high-performance cores on Apple Silicon for worker pool On macbook with M4 Max which has 12 high performance + 4 efficient cores, using only high performance core is faster than 8 threads for `gn gen`. To reduce lock contention in lock for DependencyCache, this also introduces sharding for that. `gn gen` for build config with 30k targets, ``` $ hyperfine --warmup 1 "~/ghq/gn.googlesource.com/gn/out/gn_main gen out/Default" "~/ghq/gn.googlesource.com/gn/out/gn_shard gen out/Default" Benchmark 1: ~/ghq/gn.googlesource.com/gn/out/gn_main gen out/Default Time (mean ± σ): 1.962 s ± 0.024 s [User: 9.090 s, System: 2.124 s] Range (min … max): 1.927 s … 1.997 s 10 runs Benchmark 2: ~/ghq/gn.googlesource.com/gn/out/gn_shard gen out/Default Time (mean ± σ): 1.818 s ± 0.018 s [User: 9.632 s, System: 3.990 s] Range (min … max): 1.794 s … 1.848 s 10 runs Summary ~/ghq/gn.googlesource.com/gn/out/gn_shard gen out/Default ran 1.08 ± 0.02 times faster than ~/ghq/gn.googlesource.com/gn/out/gn_main gen out/Default ``` `gn gen --check` for build config with 30k targets, ``` $ hyperfine --warmup 1 "~/ghq/gn.googlesource.com/gn/out/gn_main gen out/Default --check" "~/ghq/gn.googlesource.com/gn/out/gn_shard gen out/Default --check" Benchmark 1: ~/ghq/gn.googlesource.com/gn/out/gn_main gen out/Default --check Time (mean ± σ): 6.569 s ± 0.063 s [User: 21.740 s, System: 15.753 s] Range (min … max): 6.500 s … 6.687 s 10 runs Benchmark 2: ~/ghq/gn.googlesource.com/gn/out/gn_shard gen out/Default --check Time (mean ± σ): 4.968 s ± 0.035 s [User: 21.436 s, System: 24.465 s] Range (min … max): 4.903 s … 5.033 s 10 runs Summary ~/ghq/gn.googlesource.com/gn/out/gn_shard gen out/Default --check ran 1.32 ± 0.02 times faster than ~/ghq/gn.googlesource.com/gn/out/gn_main gen out/Default --check ``` `gn gen` for build config with 40k targets, ``` $ hyperfine --warmup 1 "~/ghq/gn.googlesource.com/gn/out/gn_main gen out/no_clang_modules" "~/ghq/gn.googlesource.com/gn/out/gn_shard gen out/no_clang_modules" Benchmark 1: ~/ghq/gn.googlesource.com/gn/out/gn_main gen out/no_clang_modules Time (mean ± σ): 2.854 s ± 0.047 s [User: 13.619 s, System: 3.021 s] Range (min … max): 2.760 s … 2.905 s 10 runs Benchmark 2: ~/ghq/gn.googlesource.com/gn/out/gn_shard gen out/no_clang_modules Time (mean ± σ): 2.496 s ± 0.028 s [User: 14.474 s, System: 5.201 s] Range (min … max): 2.458 s … 2.545 s 10 runs Summary ~/ghq/gn.googlesource.com/gn/out/gn_shard gen out/no_clang_modules ran 1.14 ± 0.02 times faster than ~/ghq/gn.googlesource.com/gn/out/gn_main gen out/no_clang_modules ``` `gn gen --check` for build config with 40k targets, ``` $ hyperfine --warmup 1 "~/ghq/gn.googlesource.com/gn/out/gn_main gen out/no_clang_modules --check" "~/ghq/gn.googlesource.com/gn/out/gn_shard gen out/no_clang_modules --check" Benchmark 1: ~/ghq/gn.googlesource.com/gn/out/gn_main gen out/no_clang_modules --check Time (mean ± σ): 21.681 s ± 0.178 s [User: 133.529 s, System: 16.690 s] Range (min … max): 21.454 s … 21.901 s 10 runs Benchmark 2: ~/ghq/gn.googlesource.com/gn/out/gn_shard gen out/no_clang_modules --check Time (mean ± σ): 15.762 s ± 0.212 s [User: 143.211 s, System: 26.597 s] Range (min … max): 15.537 s … 16.215 s 10 runs Summary ~/ghq/gn.googlesource.com/gn/out/gn_shard gen out/no_clang_modules --check ran 1.38 ± 0.02 times faster than ~/ghq/gn.googlesource.com/gn/out/gn_main gen out/no_clang_modules --check ``` This also improves --check performance on linux, ``` $ hyperfine --warmup 1 "~/ghq/gn.googlesource.com/gn/out/gn_main gen out/Default --check" "~/ghq/gn.googlesource.com/gn/out/gn_shard gen out/Default --check" Benchmark 1: ~/ghq/gn.googlesource.com/gn/out/gn_main gen out/Default --check Time (mean ± σ): 8.296 s ± 0.136 s [User: 76.245 s, System: 55.341 s] Range (min … max): 8.106 s … 8.492 s 10 runs Benchmark 2: ~/ghq/gn.googlesource.com/gn/out/gn_shard gen out/Default --check Time (mean ± σ): 5.575 s ± 0.069 s [User: 76.813 s, System: 100.234 s] Range (min … max): 5.403 s … 5.653 s 10 runs Summary ~/ghq/gn.googlesource.com/gn/out/gn_shard gen out/Default --check ran 1.49 ± 0.03 times faster than ~/ghq/gn.googlesource.com/gn/out/gn_main gen out/Default --check ``` Bug: 484862257 Bug: 484863025 Change-Id: I7d54762dcdfaaddb81aff6fdeabb3f7efb3f9590 Reviewed-on: https://gn-review.googlesource.com/c/gn/+/21100 Reviewed-by: David Turner <digit@google.com> Reviewed-by: Sylvain Defresne <sdefresne@chromium.org> Commit-Queue: Takuto Ikuta <tikuta@google.com>

diff --git a/build/gen.py b/build/gen.py
index eb07c22..6ad60af 100755
--- a/build/gen.py
+++ b/build/gen.py

@@ -912,6 +912,7 @@
         'src/gn/xcode_object_unittest.cc',
         'src/gn/xml_element_writer_unittest.cc',
         'src/util/atomic_write_unittest.cc',
+        'src/util/sys_info_unittest.cc',
         'src/util/test/gn_test.cc',
       ], 'libs': []},
   }

diff --git a/src/gn/header_checker.cc b/src/gn/header_checker.cc
index d363591..70fd300 100644
--- a/src/gn/header_checker.cc
+++ b/src/gn/header_checker.cc

@@ -177,7 +177,7 @@
       check_generated_(check_generated),
       check_system_(check_system),
       targets_count_(targets.size()),
-      lock_(),
+      errors_lock_(),
       task_count_cv_() {
   for (auto* target : targets)
     AddTargetToFileMap(target, &file_map_);
@@ -247,7 +247,7 @@
 void HeaderChecker::DoWork(const Target* target, const SourceFile& file) {
   std::vector<Err> errors;
   if (!CheckFile(target, file, &errors)) {
-    std::lock_guard<std::shared_mutex> lock(lock_);
+    std::lock_guard<std::mutex> lock(errors_lock_);
     errors_.insert(errors_.end(), errors.begin(), errors.end());
   }
 
@@ -581,10 +581,15 @@
     return false;
   }
 
+  size_t hash_for = search_for->label().hash();
+  size_t hash_from = search_from->label().hash();
+  size_t shard_index = (hash_for ^ hash_from) % kNumShards;
+  auto& shard = dependency_cache_[shard_index];
+
   {
-    std::shared_lock<std::shared_mutex> lock(lock_);
-    auto it = dependency_cache_.find(std::make_pair(search_for, search_from));
-    if (it != dependency_cache_.end()) {
+    std::shared_lock<std::shared_mutex> lock(shard.lock);
+    auto it = shard.cache.find(std::make_pair(search_for, search_from));
+    if (it != shard.cache.end()) {
       if (it->second == DependencyState::kNotADependency) {
         *is_permitted = false;
         return false;
@@ -604,8 +609,8 @@
   // Find the shortest public dependency chain.
   if (IsDependencyOf(search_for, search_from, true, chain)) {
     *is_permitted = true;
-    std::unique_lock<std::shared_mutex> lock(lock_);
-    dependency_cache_[std::make_pair(search_for, search_from)] =
+    std::unique_lock<std::shared_mutex> lock(shard.lock);
+    shard.cache[std::make_pair(search_for, search_from)] =
         DependencyState::kPermittedDependency;
     return true;
   }
@@ -613,15 +618,15 @@
   // If not, try to find any dependency chain at all.
   if (IsDependencyOf(search_for, search_from, false, chain)) {
     *is_permitted = false;
-    std::unique_lock<std::shared_mutex> lock(lock_);
-    dependency_cache_[std::make_pair(search_for, search_from)] =
+    std::unique_lock<std::shared_mutex> lock(shard.lock);
+    shard.cache[std::make_pair(search_for, search_from)] =
         DependencyState::kNonPermittedDependency;
     return true;
   }
 
   *is_permitted = false;
-  std::unique_lock<std::shared_mutex> lock(lock_);
-  dependency_cache_[std::make_pair(search_for, search_from)] =
+  std::unique_lock<std::shared_mutex> lock(shard.lock);
+  shard.cache[std::make_pair(search_for, search_from)] =
       DependencyState::kNotADependency;
   return false;
 }

diff --git a/src/gn/header_checker.h b/src/gn/header_checker.h
index 3f313fb..8150bdf 100644
--- a/src/gn/header_checker.h
+++ b/src/gn/header_checker.h

@@ -5,6 +5,7 @@
 #ifndef TOOLS_GN_HEADER_CHECKER_H_
 #define TOOLS_GN_HEADER_CHECKER_H_
 
+#include <array>
 #include <condition_variable>
 #include <functional>
 #include <map>
@@ -203,15 +204,21 @@
   using DependencyCache =
       std::map<std::pair<const Target*, const Target*>, DependencyState>;
 
+  static constexpr size_t kNumShards = 64;
+  struct DependencyCacheShard {
+    mutable std::shared_mutex lock;
+    DependencyCache cache;
+  };
+
   // Locked variables ----------------------------------------------------------
   //
   // These are mutable during runtime and require locking.
 
-  mutable std::shared_mutex lock_;
+  mutable std::mutex errors_lock_;
 
   std::vector<Err> errors_;
 
-  mutable DependencyCache dependency_cache_;
+  mutable std::array<DependencyCacheShard, kNumShards> dependency_cache_;
 
   // Separate lock for task count synchronization since std::condition_variable
   // only works with std::unique_lock<std::mutex>.

diff --git a/src/util/sys_info.cc b/src/util/sys_info.cc
index cc75c45..386ba8b 100644
--- a/src/util/sys_info.cc
+++ b/src/util/sys_info.cc

@@ -12,6 +12,11 @@
 #include <unistd.h>
 #endif
 
+#if defined(OS_MACOSX)
+#include <sys/sysctl.h>
+#include <sys/types.h>
+#endif
+
 #if defined(OS_WIN)
 #include <windows.h>
 #include "base/win/registry.h"
@@ -135,3 +140,21 @@
 #error
 #endif
 }
+
+int NumberOfPerformanceProcessors() {
+#if defined(OS_MACOSX) && defined(ARCH_CPU_ARM64)
+  int nperflevels;
+  size_t len = sizeof(nperflevels);
+  if (sysctlbyname("hw.nperflevels", &nperflevels, &len, nullptr, 0) == 0) {
+    if (nperflevels > 0) {
+      int perflevel0_cores;
+      len = sizeof(perflevel0_cores);
+      if (sysctlbyname("hw.perflevel0.physicalcpu", &perflevel0_cores, &len,
+                       nullptr, 0) == 0) {
+        return perflevel0_cores;
+      }
+    }
+  }
+#endif
+  return NumberOfProcessors();
+}

diff --git a/src/util/sys_info.h b/src/util/sys_info.h
index 7a6924d..904e568 100644
--- a/src/util/sys_info.h
+++ b/src/util/sys_info.h

@@ -11,4 +11,9 @@
 std::string OperatingSystemArchitecture();
 int NumberOfProcessors();
 
+// Returns the number of high-performance processors on the system.
+// currently only implemented on macOS.
+// On other platforms, returns NumberOfProcessors().
+int NumberOfPerformanceProcessors();
+
 #endif  // UTIL_SYS_INFO_H_

diff --git a/src/util/sys_info_unittest.cc b/src/util/sys_info_unittest.cc
new file mode 100644
index 0000000..ab39cc8
--- /dev/null
+++ b/src/util/sys_info_unittest.cc

@@ -0,0 +1,28 @@
+// Copyright 2026 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "util/sys_info.h"
+
+#include "util/build_config.h"
+#include "util/test/test.h"
+
+TEST(SysInfoTest, NumberOfProcessors) {
+  int num_processors = NumberOfProcessors();
+  EXPECT_GT(num_processors, 0);
+}
+
+TEST(SysInfoTest, NumberOfPerformanceProcessors) {
+  int num_perf_processors = NumberOfPerformanceProcessors();
+  // On all platforms this should be at least 1 (if implemented) or same as NumberOfProcessors
+  EXPECT_GT(num_perf_processors, 0);
+
+#if defined(OS_MACOSX) && defined(ARCH_CPU_ARM64)
+  // Apple Silicon has both performance and efficiency cores, so the number of
+  // performance cores should be less than the total number of processors.
+  EXPECT_LE(num_perf_processors, NumberOfProcessors());
+#else
+  // On other platforms, it returns NumberOfProcessors().
+  EXPECT_EQ(num_perf_processors, NumberOfProcessors());
+#endif
+}

diff --git a/src/util/worker_pool.cc b/src/util/worker_pool.cc
index 9efc1d2..ac903fa 100644
--- a/src/util/worker_pool.cc
+++ b/src/util/worker_pool.cc

@@ -24,6 +24,12 @@
     return result;
   }
 
+#if defined(OS_MACOSX) && defined(ARCH_CPU_ARM64)
+  // On Apple Silicon, we want to use only the high-performance cores.
+  // These cores are not hyperthreaded.
+  return NumberOfPerformanceProcessors();
+#endif
+
   // Almost all CPUs now are hyperthreaded.
   int num_cores = NumberOfProcessors() / 2;