Optimize HeaderChecker file I/O by parsing files only once
Instead of scheduling a separate task for each Target that includes a
given file, this change groups targets by the file they need to check.
We then pass the list of targets to `CheckFile`, which opens the file,
reads its contents, and parses the `#include` directives exactly once.
This significantly reduces:
- File I/O (open/read/close operations are reduced to 1 per file)
- CIncludeIterator allocations and parsing overhead
- Number of tasks scheduled in WorkerPool, reducing condition variable waits
and thread synchronization overhead.
Based on local sample profiling with `gn gen --check` on no clang
modules build, total self-time spent in system locks (WorkerPool waits)
and I/O decreased substantially.
`hyperfine` benchmarking shows ~1.23x speedup and a ~43% reduction in
`System` execution time (kernel mode threads / locks / IO overhead) on
macOS:
Benchmark 1: out/gn_main gen ...
Time (mean ± σ): 7.689 s ± 0.049 s [User: 27.350 s, System: 38.073 s]
Range (min … max): 7.581 s … 7.749 s 10 runs
Benchmark 2: out/gn_fast gen ...
Time (mean ± σ): 6.265 s ± 0.047 s [User: 27.103 s, System: 21.643 s]
Range (min … max): 6.149 s … 6.312 s 10 runs
Summary
out/gn_fast ... ran
1.23 ± 0.01 times faster than out/gn_main ...
Bug: 484862257
Change-Id: If5cb8b04674fce929d972e4759021578e79e87ed
Reviewed-on: https://gn-review.googlesource.com/c/gn/+/21220
Reviewed-by: David Turner <digit@google.com>
Commit-Queue: Takuto Ikuta <tikuta@google.com>
diff --git a/src/gn/header_checker.cc b/src/gn/header_checker.cc
index c5c2bfe..aba451b 100644
--- a/src/gn/header_checker.cc
+++ b/src/gn/header_checker.cc
@@ -194,14 +194,18 @@
continue;
}
+ std::vector<const Target*> targets_to_check;
for (const auto& vect_i : file.second) {
if (vect_i.target->check_includes()) {
- task_count_.Increment();
- pool.PostTask([this, target = vect_i.target, file = file.first]() {
- DoWork(target, file);
- });
+ targets_to_check.push_back(vect_i.target);
}
}
+ if (targets_to_check.empty())
+ continue;
+
+ task_count_.Increment();
+ pool.PostTask([this, targets = std::move(targets_to_check),
+ file = file.first]() { DoWork(targets, file); });
}
task_count_.Decrement();
@@ -212,9 +216,10 @@
task_count_cv_.wait(auto_lock);
}
-void HeaderChecker::DoWork(const Target* target, const SourceFile& file) {
+void HeaderChecker::DoWork(const std::vector<const Target*>& targets,
+ const SourceFile& file) {
std::vector<Err> errors;
- if (!CheckFile(target, file, &errors)) {
+ if (!CheckFile(targets, file, &errors)) {
std::lock_guard<std::mutex> lock(errors_lock_);
errors_.insert(errors_.end(), errors.begin(), errors.end());
}
@@ -407,7 +412,7 @@
return true;
}
-bool HeaderChecker::CheckFile(const Target* from_target,
+bool HeaderChecker::CheckFile(const std::vector<const Target*>& targets,
const SourceFile& file,
std::vector<Err>* errors) const {
ScopedTrace trace(TraceItem::TRACE_CHECK_HEADER, file.value());
@@ -427,44 +432,54 @@
if (IsFileInOuputDir(file))
return true;
- errors->emplace_back(from_target->defined_from(), "Source file not found.",
- "The target:\n " +
- from_target->label().GetUserVisibleName(false) +
- "\nhas a source file:\n " + file.value() +
- "\nwhich was not found.");
+ for (const Target* from_target : targets) {
+ errors->emplace_back(
+ from_target->defined_from(), "Source file not found.",
+ "The target:\n " + from_target->label().GetUserVisibleName(false) +
+ "\nhas a source file:\n " + file.value() +
+ "\nwhich was not found.");
+ }
return false;
}
InputFile input_file(file);
input_file.SetContents(contents);
- std::vector<SourceDir> include_dirs;
- for (ConfigValuesIterator iter(from_target); !iter.done(); iter.Next()) {
- const std::vector<SourceDir>& target_include_dirs =
- iter.cur().include_dirs();
- include_dirs.insert(include_dirs.end(), target_include_dirs.begin(),
- target_include_dirs.end());
- }
-
- size_t error_count_before = errors->size();
+ std::vector<IncludeStringWithLocation> includes;
CIncludeIterator iter(&input_file);
-
IncludeStringWithLocation include;
-
- // Get the cache for the source target once for the whole file.
- ReachabilityCache& from_target_cache =
- GetReachabilityCacheForTarget(from_target);
-
while (iter.GetNextIncludeString(&include)) {
if (include.system_style_include && !check_system_)
continue;
+ includes.push_back(include);
+ }
- Err err;
- SourceFile included_file =
- SourceFileForInclude(include, include_dirs, input_file, &err);
- if (!included_file.is_null()) {
- CheckInclude(from_target_cache, input_file, included_file,
- include.location, errors);
+ if (includes.empty())
+ return true;
+
+ size_t error_count_before = errors->size();
+
+ for (const Target* from_target : targets) {
+ std::vector<SourceDir> include_dirs;
+ for (ConfigValuesIterator target_iter(from_target); !target_iter.done();
+ target_iter.Next()) {
+ const std::vector<SourceDir>& target_include_dirs =
+ target_iter.cur().include_dirs();
+ include_dirs.insert(include_dirs.end(), target_include_dirs.begin(),
+ target_include_dirs.end());
+ }
+
+ ReachabilityCache& from_target_cache =
+ GetReachabilityCacheForTarget(from_target);
+
+ for (const auto& inc : includes) {
+ Err err;
+ SourceFile included_file =
+ SourceFileForInclude(inc, include_dirs, input_file, &err);
+ if (!included_file.is_null()) {
+ CheckInclude(from_target_cache, input_file, included_file, inc.location,
+ errors);
+ }
}
}
diff --git a/src/gn/header_checker.h b/src/gn/header_checker.h
index f0e0408..083272f 100644
--- a/src/gn/header_checker.h
+++ b/src/gn/header_checker.h
@@ -244,7 +244,8 @@
// will be populate on failure.
void RunCheckOverFiles(const FileMap& flies, bool force_check);
- void DoWork(const Target* target, const SourceFile& file);
+ void DoWork(const std::vector<const Target*>& targets,
+ const SourceFile& file);
// Adds the sources and public files from the given target to the given map.
static void AddTargetToFileMap(const Target* target, FileMap* dest);
@@ -258,9 +259,9 @@
const InputFile& source_file,
Err* err) const;
- // from_target is the target the file was defined from. It will be used in
+ // targets is a list of targets using the source file. They will be used in
// error messages.
- bool CheckFile(const Target* from_target,
+ bool CheckFile(const std::vector<const Target*>& targets,
const SourceFile& file,
std::vector<Err>* errors) const;