Reduce the number of worker threads GN uses.
I noticed the main thread is being starved which slows down execution. On a Z620 using the new default improves generation speed for Chrome by 20%.
This also adds trace events for the OnResolved step of target resolution which is a bottleneck in some cases.
Review URL: https://codereview.chromium.org/1464463003
Cr-Original-Commit-Position: refs/heads/master@{#360955}
Cr-Mirrored-From: https://chromium.googlesource.com/chromium/src
Cr-Mirrored-Commit: d0f8e7b23cd48a8e4685e82b7beffb0930554bba
diff --git a/tools/gn/scheduler.cc b/tools/gn/scheduler.cc
index 622019e..dfc877d 100644
--- a/tools/gn/scheduler.cc
+++ b/tools/gn/scheduler.cc
@@ -4,25 +4,59 @@
#include "tools/gn/scheduler.h"
+#include <algorithm>
+
#include "base/bind.h"
#include "base/command_line.h"
#include "base/strings/string_number_conversions.h"
+#include "build/build_config.h"
#include "tools/gn/standard_out.h"
#include "tools/gn/switches.h"
+#if defined(OS_WIN)
+#include <windows.h>
+#else
+#include <unistd.h>
+#endif
+
Scheduler* g_scheduler = nullptr;
namespace {
+#if defined(OS_WIN)
+int GetCPUCount() {
+ SYSTEM_INFO sysinfo;
+ ::GetSystemInfo(&sysinfo);
+ return sysinfo.dwNumberOfProcessors;
+}
+#else
+int GetCPUCount() {
+ return static_cast<int>(sysconf(_SC_NPROCESSORS_ONLN));
+}
+#endif
+
int GetThreadCount() {
std::string thread_count =
base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
switches::kThreads);
+ // See if an override was specified on the command line.
int result;
- if (thread_count.empty() || !base::StringToInt(thread_count, &result))
- return 32;
- return result;
+ if (!thread_count.empty() && base::StringToInt(thread_count, &result))
+ return result;
+
+ // Base the default number of worker threads on number of cores in the
+ // system. When building large projects, the speed can be limited by how fast
+ // the main thread can dispatch work and connect the dependency graph. If
+ // there are too many worker threads, the main thread can be starved and it
+ // will run slower overall.
+ //
+ // One less worker thread than the number of physical CPUs seems to be a
+ // good value, both theoretically and experimentally. But always use at
+ // least three workers to prevent us from being too sensitive to I/O latency
+ // on low-end systems.
+ int num_cores = GetCPUCount() / 2; // Almost all CPUs now are hyperthreaded.
+ return std::max(num_cores - 1, 3);
}
} // namespace
diff --git a/tools/gn/target.cc b/tools/gn/target.cc
index 3655982..4a93abc 100644
--- a/tools/gn/target.cc
+++ b/tools/gn/target.cc
@@ -12,6 +12,7 @@
#include "tools/gn/filesystem_utils.h"
#include "tools/gn/scheduler.h"
#include "tools/gn/substitution_writer.h"
+#include "tools/gn/trace.h"
namespace {
@@ -156,6 +157,9 @@
DCHECK(output_type_ != UNKNOWN);
DCHECK(toolchain_) << "Toolchain should have been set before resolving.";
+ ScopedTrace trace(TraceItem::TRACE_ON_RESOLVED, label());
+ trace.SetToolchain(settings()->toolchain_label());
+
// Copy our own dependent configs to the list of configs applying to us.
configs_.Append(all_dependent_configs_.begin(), all_dependent_configs_.end());
MergePublicConfigsFrom(this, &configs_);
diff --git a/tools/gn/trace.cc b/tools/gn/trace.cc
index c810793..4746310 100644
--- a/tools/gn/trace.cc
+++ b/tools/gn/trace.cc
@@ -202,6 +202,7 @@
case TraceItem::TRACE_FILE_LOAD:
case TraceItem::TRACE_FILE_WRITE:
case TraceItem::TRACE_DEFINE_TARGET:
+ case TraceItem::TRACE_ON_RESOLVED:
break; // Ignore these for the summary.
}
}
@@ -281,6 +282,9 @@
case TraceItem::TRACE_DEFINE_TARGET:
out << "\"define\"";
break;
+ case TraceItem::TRACE_ON_RESOLVED:
+ out << "\"onresolved\"";
+ break;
case TraceItem::TRACE_CHECK_HEADER:
out << "\"hdr\"";
break;
diff --git a/tools/gn/trace.h b/tools/gn/trace.h
index 384907b..e32f319 100644
--- a/tools/gn/trace.h
+++ b/tools/gn/trace.h
@@ -25,6 +25,7 @@
TRACE_FILE_WRITE,
TRACE_SCRIPT_EXECUTE,
TRACE_DEFINE_TARGET,
+ TRACE_ON_RESOLVED,
TRACE_CHECK_HEADER, // One file.
TRACE_CHECK_HEADERS, // All files.
};