Improve Builder performance.

The Builder class runs single-threaded to perform target
resolution, and is critical for performance. This CL improves
it in three distinct ways, which all improve 'gn gen' times
and reduce RAM usage:

- Implement BuilderRecordSet as PointerSet<BuilderRecord>
  which measures favoribly over competing implementations
  (i.e. std::set<>, std::unordered_set<> and
  base::flat_set<>).

- Remove the unresolved_deps_ member from the class since
  the exact set is not needed during target resolution,
  instead, replace it with a simple unresolved_counter_.

  The GetUnresolvedDeps() method is added to collect
  the unresolved dependencies by scanning all_deps_
  instead. This is only used out of the performance
  critical loop though.

- Provide a faster/smaller BuilderRecordMap type
  used by the Builder class to map Labels to BuilderRecord
  instances.

Overall results for Fuchsia is "gn gen" time is reduced by
3s (-19.7%) and RAM usage by 130 MiB (-4.95%).

Bug: NONE

------------------------------------------------------
Measurements for Fuchsia "gn gen":

BEFORE gn-target-set

Done. Made 173214 targets from 5389 files in 15042ms
Done. Made 173214 targets from 5389 files in 15143ms
Done. Made 173214 targets from 5389 files in 15436ms
Done. Made 173214 targets from 5389 files in 15642ms
Done. Made 173214 targets from 5389 files in 15649ms *
Done. Made 173214 targets from 5389 files in 15754ms
Done. Made 173214 targets from 5389 files in 15790ms
Done. Made 173214 targets from 5389 files in 16151ms
Done. Made 173214 targets from 5389 files in 16760ms

3252084
3255684 *
3264448

AFTER gn-builder-record

Done. Made 173214 targets from 5389 files in 11610ms
Done. Made 173214 targets from 5389 files in 11663ms
Done. Made 173214 targets from 5389 files in 12243ms
Done. Made 173214 targets from 5389 files in 12467ms
Done. Made 173214 targets from 5389 files in 12558ms *
Done. Made 173214 targets from 5389 files in 12578ms
Done. Made 173214 targets from 5389 files in 12803ms
Done. Made 173214 targets from 5389 files in 12900ms
Done. Made 173214 targets from 5389 files in 12906ms

3114220
3125024 *
3135548

Change-Id: Ifc8fbedf0b4e9c47ceea708931fd53600f0138a5
Reviewed-on: https://gn-review.googlesource.com/c/gn/+/12584
Reviewed-by: Brett Wilson <brettw@chromium.org>
Commit-Queue: David Turner <digit@google.com>
diff --git a/build/gen.py b/build/gen.py
index cc6e835..772d090 100755
--- a/build/gen.py
+++ b/build/gen.py
@@ -684,6 +684,7 @@
         'src/gn/analyzer_unittest.cc',
         'src/gn/args_unittest.cc',
         'src/gn/builder_unittest.cc',
+        'src/gn/builder_record_map_unittest.cc',
         'src/gn/c_include_iterator_unittest.cc',
         'src/gn/command_format_unittest.cc',
         'src/gn/commands_unittest.cc',
diff --git a/src/gn/builder.cc b/src/gn/builder.cc
index d1ba662..7e36deb 100644
--- a/src/gn/builder.cc
+++ b/src/gn/builder.cc
@@ -34,7 +34,9 @@
 bool RecursiveFindCycle(const BuilderRecord* search_in,
                         std::vector<const BuilderRecord*>* path) {
   path->push_back(search_in);
-  for (auto* cur : search_in->unresolved_deps()) {
+  BuilderRecordSet unresolved_deps = search_in->GetUnresolvedDeps();
+  for (auto it = unresolved_deps.begin(); it.valid(); ++it) {
+    const BuilderRecord* cur = *it;
     std::vector<const BuilderRecord*>::iterator found =
         std::find(path->begin(), path->end(), cur);
     if (found != path->end()) {
@@ -133,7 +135,7 @@
   std::vector<const BuilderRecord*> result;
   result.reserve(records_.size());
   for (const auto& record : records_)
-    result.push_back(record.second.get());
+    result.push_back(&record);
   return result;
 }
 
@@ -141,9 +143,9 @@
   std::vector<const Item*> result;
   result.reserve(records_.size());
   for (const auto& record : records_) {
-    if (record.second->type() != BuilderRecord::ITEM_UNKNOWN &&
-        record.second->should_generate() && record.second->item()) {
-      result.push_back(record.second->item());
+    if (record.type() != BuilderRecord::ITEM_UNKNOWN &&
+        record.should_generate() && record.item()) {
+      result.push_back(record.item());
     }
   }
 
@@ -154,9 +156,9 @@
   std::vector<const Target*> result;
   result.reserve(records_.size());
   for (const auto& record : records_) {
-    if (record.second->type() == BuilderRecord::ITEM_TARGET &&
-        record.second->should_generate() && record.second->item())
-      result.push_back(record.second->item()->AsTarget());
+    if (record.type() == BuilderRecord::ITEM_TARGET &&
+        record.should_generate() && record.item())
+      result.push_back(record.item()->AsTarget());
   }
   return result;
 }
@@ -167,8 +169,7 @@
 }
 
 BuilderRecord* Builder::GetRecord(const Label& label) {
-  auto found = records_.find(label);
-  return (found != records_.end()) ? found->second.get() : nullptr;
+  return records_.find(label);
 }
 
 bool Builder::CheckForBadItems(Err* err) const {
@@ -183,18 +184,18 @@
   // that below.
   std::vector<const BuilderRecord*> bad_records;
   std::string depstring;
-  for (const auto& record_pair : records_) {
-    const BuilderRecord* src = record_pair.second.get();
-    if (!src->should_generate())
+  for (const auto& src : records_) {
+    if (!src.should_generate())
       continue;  // Skip ungenerated nodes.
 
-    if (!src->resolved()) {
-      bad_records.push_back(src);
+    if (!src.resolved()) {
+      bad_records.push_back(&src);
 
       // Check dependencies.
-      for (auto* dest : src->unresolved_deps()) {
+      BuilderRecordSet unresolved_deps = src.GetUnresolvedDeps();
+      for (const auto* dest : unresolved_deps) {
         if (!dest->item()) {
-          depstring += src->label().GetUserVisibleName(true) + "\n  needs " +
+          depstring += src.label().GetUserVisibleName(true) + "\n  needs " +
                        dest->label().GetUserVisibleName(true) + "\n";
         }
       }
@@ -265,8 +266,9 @@
   // anything they depend on is actually written, the "generate" flag isn't
   // relevant and means extra book keeping. Just force load any deps of this
   // config.
-  for (auto* cur : record->all_deps())
-    ScheduleItemLoadIfNecessary(cur);
+  for (auto it = record->all_deps().begin(); it.valid(); ++it) {
+    ScheduleItemLoadIfNecessary(*it);
+  }
 
   return true;
 }
@@ -303,18 +305,11 @@
                                                 const ParseNode* request_from,
                                                 BuilderRecord::ItemType type,
                                                 Err* err) {
-  BuilderRecord* record = GetRecord(label);
-  if (!record) {
-    // Not seen this record yet, create a new one.
-    auto new_record = std::make_unique<BuilderRecord>(type, label);
-    new_record->set_originally_referenced_from(request_from);
-    record = new_record.get();
-    records_[label] = std::move(new_record);
-    return record;
-  }
+  auto pair = records_.try_emplace(label, request_from, type);
+  BuilderRecord* record = pair.second;
 
-  // Check types.
-  if (record->type() != type) {
+  // Check types, if the record was not just created.
+  if (!pair.first && record->type() != type) {
     std::string msg =
         "The type of " + label.GetUserVisibleName(false) + "\nhere is a " +
         BuilderRecord::GetNameForType(type) + " but was previously seen as a " +
@@ -461,7 +456,8 @@
     return;  // Already set and we're not required to iterate dependencies.
   }
 
-  for (auto* cur : record->all_deps()) {
+  for (auto it = record->all_deps().begin(); it.valid(); ++it) {
+    BuilderRecord* cur = *it;
     if (!cur->should_generate()) {
       ScheduleItemLoadIfNecessary(cur);
       RecursiveSetShouldGenerate(cur, false);
@@ -508,12 +504,10 @@
     resolved_and_generated_callback_(record);
 
   // Recursively update everybody waiting on this item to be resolved.
-  for (BuilderRecord* waiting : record->waiting_on_resolution()) {
-    DCHECK(waiting->unresolved_deps().find(record) !=
-           waiting->unresolved_deps().end());
-    waiting->unresolved_deps().erase(record);
-
-    if (waiting->can_resolve()) {
+  const BuilderRecordSet waiting_deps = record->waiting_on_resolution();
+  for (auto it = waiting_deps.begin(); it.valid(); ++it) {
+    BuilderRecord* waiting = *it;
+    if (waiting->OnResolvedDep(record)) {
       if (!ResolveItem(waiting, err))
         return false;
     }
diff --git a/src/gn/builder.h b/src/gn/builder.h
index bd62c18..4b85824 100644
--- a/src/gn/builder.h
+++ b/src/gn/builder.h
@@ -6,10 +6,11 @@
 #define TOOLS_GN_BUILDER_H_
 
 #include <functional>
-#include <map>
 #include <memory>
+#include <unordered_map>
 
 #include "gn/builder_record.h"
+#include "gn/builder_record_map.h"
 #include "gn/label.h"
 #include "gn/label_ptr.h"
 #include "gn/unique_vector.h"
@@ -138,7 +139,7 @@
   // Non owning pointer.
   Loader* loader_;
 
-  std::map<Label, std::unique_ptr<BuilderRecord>> records_;
+  BuilderRecordMap records_;
 
   ResolvedGeneratedCallback resolved_and_generated_callback_;
 
diff --git a/src/gn/builder_record.cc b/src/gn/builder_record.cc
index 6325cdc..7849d91 100644
--- a/src/gn/builder_record.cc
+++ b/src/gn/builder_record.cc
@@ -6,8 +6,12 @@
 
 #include "gn/item.h"
 
-BuilderRecord::BuilderRecord(ItemType type, const Label& label)
-    : type_(type), label_(label) {}
+BuilderRecord::BuilderRecord(ItemType type,
+                             const Label& label,
+                             const ParseNode* originally_referenced_from)
+    : type_(type),
+      label_(label),
+      originally_referenced_from_(originally_referenced_from) {}
 
 // static
 const char* BuilderRecord::GetNameForType(ItemType type) {
@@ -64,10 +68,25 @@
   all_deps_.insert(record);
 }
 
+bool BuilderRecord::OnResolvedDep(const BuilderRecord* dep) {
+  DCHECK(all_deps_.contains(const_cast<BuilderRecord*>(dep)));
+  DCHECK(unresolved_count_ > 0);
+  return --unresolved_count_ == 0;
+}
+
+BuilderRecord::BuilderRecordSet BuilderRecord::GetUnresolvedDeps() const {
+  BuilderRecordSet result;
+  for (auto it = all_deps_.begin(); it.valid(); ++it) {
+    BuilderRecord* dep = *it;
+    if (dep->waiting_on_resolution_.contains(const_cast<BuilderRecord*>(this)))
+      result.add(dep);
+  }
+  return result;
+}
+
 void BuilderRecord::AddDep(BuilderRecord* record) {
-  all_deps_.insert(record);
-  if (!record->resolved()) {
-    unresolved_deps_.insert(record);
-    record->waiting_on_resolution_.insert(this);
+  if (all_deps_.add(record) && !record->resolved()) {
+    unresolved_count_ += 1;
+    record->waiting_on_resolution_.add(this);
   }
 }
diff --git a/src/gn/builder_record.h b/src/gn/builder_record.h
index 439893b..28f75fe 100644
--- a/src/gn/builder_record.h
+++ b/src/gn/builder_record.h
@@ -6,11 +6,11 @@
 #define TOOLS_GN_BUILDER_RECORD_H_
 
 #include <memory>
-#include <set>
 #include <utility>
 
 #include "gn/item.h"
 #include "gn/location.h"
+#include "gn/pointer_set.h"
 
 class ParseNode;
 
@@ -28,7 +28,7 @@
 // the current build (should_generate is false).
 class BuilderRecord {
  public:
-  using BuilderRecordSet = std::set<BuilderRecord*>;
+  using BuilderRecordSet = PointerSet<BuilderRecord>;
 
   enum ItemType {
     ITEM_UNKNOWN,
@@ -38,7 +38,9 @@
     ITEM_POOL
   };
 
-  BuilderRecord(ItemType type, const Label& label);
+  BuilderRecord(ItemType type,
+                const Label& label,
+                const ParseNode* originally_referenced_from);
 
   ItemType type() const { return type_; }
   const Label& label() const { return label_; }
@@ -62,9 +64,6 @@
   const ParseNode* originally_referenced_from() const {
     return originally_referenced_from_;
   }
-  void set_originally_referenced_from(const ParseNode* pn) {
-    originally_referenced_from_ = pn;
-  }
 
   bool should_generate() const { return should_generate_; }
   void set_should_generate(bool sg) { should_generate_ = sg; }
@@ -72,16 +71,21 @@
   bool resolved() const { return resolved_; }
   void set_resolved(bool r) { resolved_ = r; }
 
-  bool can_resolve() const { return item_ && unresolved_deps_.empty(); }
+  bool can_resolve() const { return item_ && unresolved_count_ == 0; }
 
   // All records this one is depending on. Note that this includes gen_deps for
   // targets, which can have cycles.
   BuilderRecordSet& all_deps() { return all_deps_; }
   const BuilderRecordSet& all_deps() const { return all_deps_; }
 
-  // Unresolved records this one is depending on. A subset of all... above.
-  BuilderRecordSet& unresolved_deps() { return unresolved_deps_; }
-  const BuilderRecordSet& unresolved_deps() const { return unresolved_deps_; }
+  // Return the set of unresolved records this one is depend on.
+  // This is a subset of all_deps() above.
+  BuilderRecordSet GetUnresolvedDeps() const;
+
+  // Call this method to notify the record that its dependency |dep| was
+  // just resolved. This returns true to indicate that the current record
+  // should now be resolved.
+  bool OnResolvedDep(const BuilderRecord* dep);
 
   // Records that are waiting on this one to be resolved. This is the other
   // end of the "unresolved deps" arrow.
@@ -95,14 +99,14 @@
 
  private:
   ItemType type_;
+  bool should_generate_ = false;
+  bool resolved_ = false;
   Label label_;
   std::unique_ptr<Item> item_;
   const ParseNode* originally_referenced_from_ = nullptr;
-  bool should_generate_ = false;
-  bool resolved_ = false;
 
+  size_t unresolved_count_ = 0;
   BuilderRecordSet all_deps_;
-  BuilderRecordSet unresolved_deps_;
   BuilderRecordSet waiting_on_resolution_;
 
   BuilderRecord(const BuilderRecord&) = delete;
diff --git a/src/gn/builder_record_map.h b/src/gn/builder_record_map.h
new file mode 100644
index 0000000..58ce712
--- /dev/null
+++ b/src/gn/builder_record_map.h
@@ -0,0 +1,80 @@
+// Copyright 2021 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SRC_GN_BUILDER_RECORD_MAP_H_
+#define SRC_GN_BUILDER_RECORD_MAP_H_
+
+#include "gn/builder_record.h"
+#include "gn/hash_table_base.h"
+
+// This header implements a custom Label -> BuilderRecord map that is critical
+// for performance of the Builder class.
+//
+// Measurements show it performs better than
+// std::unordered_map<Label, BuilderRecord>, which itself performs much better
+// than base::flat_map<Label, BuilderRecord> or std::map<Label, BuilderRecord>
+//
+// NOTE: This only implements the features required by the Builder class.
+//
+
+struct BuilderRecordNode {
+  BuilderRecord* record;
+  bool is_null() const { return !record; }
+  static constexpr bool is_tombstone() { return false; }
+  bool is_valid() const { return !is_null() && !is_tombstone(); }
+  size_t hash_value() const { return record->label().hash(); }
+};
+
+class BuilderRecordMap : public HashTableBase<BuilderRecordNode> {
+ public:
+  using NodeType = BuilderRecordNode;
+
+  ~BuilderRecordMap() {
+    for (auto it = NodeBegin(); it.valid(); ++it) {
+      delete it->record;
+    }
+  }
+
+  // Find BuilderRecord matching |label| or return nullptr.
+  BuilderRecord* find(const Label& label) const {
+    return Lookup(label)->record;
+  }
+
+  // Try to find BuilderRecord matching |label|, and create one if
+  // none is found. result.first will be true to indicate that a new
+  // record was created.
+  std::pair<bool, BuilderRecord*> try_emplace(const Label& label,
+                                              const ParseNode* request_from,
+                                              BuilderRecord::ItemType type) {
+    NodeType* node = Lookup(label);
+    if (node->is_valid()) {
+      return {false, node->record};
+    }
+    BuilderRecord* record = new BuilderRecord(type, label, request_from);
+    node->record = record;
+    UpdateAfterInsert();
+    return {true, record};
+  }
+
+  // Iteration support
+  struct const_iterator : public NodeIterator {
+    const BuilderRecord& operator*() const { return *node_->record; }
+    BuilderRecord& operator*() { return *node_->record; }
+
+    const BuilderRecord* operator->() const { return node_->record; }
+    BuilderRecord* operator->() { return node_->record; }
+  };
+
+  const_iterator begin() const { return {NodeBegin()}; }
+  const_iterator end() const { return {NodeEnd()}; }
+
+ private:
+  NodeType* Lookup(const Label& label) const {
+    return NodeLookup(label.hash(), [&label](const NodeType* node) {
+      return node->record->label() == label;
+    });
+  }
+};
+
+#endif  // SRC_GN_BUILDER_RECORD_MAP_H_
diff --git a/src/gn/builder_record_map_unittest.cc b/src/gn/builder_record_map_unittest.cc
new file mode 100644
index 0000000..b26ed5c
--- /dev/null
+++ b/src/gn/builder_record_map_unittest.cc
@@ -0,0 +1,62 @@
+// Copyright 2021 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+
+#include "gn/builder_record_map.h"
+#include "gn/builder_record.h"
+#include "gn/label.h"
+#include "gn/source_dir.h"
+#include "util/test/test.h"
+
+TEST(BuilderRecordMap, Construction) {
+  const Label kLabel1(SourceDir("//src"), "foo");
+  BuilderRecordMap map;
+  EXPECT_TRUE(map.empty());
+  EXPECT_EQ(0u, map.size());
+  EXPECT_FALSE(map.find(kLabel1));
+}
+
+TEST(BuilderRecordMap, TryEmplace) {
+  const Label kLabel1(SourceDir("//src"), "foo");
+  const Label kLabel2(SourceDir("//src"), "bar");
+  const Label kLabel3(SourceDir("//third_party/src"), "zoo");
+
+  BuilderRecordMap map;
+
+  auto ret = map.try_emplace(kLabel1, nullptr, BuilderRecord::ITEM_TARGET);
+  EXPECT_TRUE(ret.first);
+  EXPECT_TRUE(ret.second);
+  EXPECT_EQ(BuilderRecord::ITEM_TARGET, ret.second->type());
+  EXPECT_EQ(kLabel1, ret.second->label());
+  EXPECT_EQ(ret.second, map.find(kLabel1));
+  EXPECT_EQ(1u, map.size());
+
+  BuilderRecord* record = ret.second;
+  ret = map.try_emplace(kLabel1, nullptr, BuilderRecord::ITEM_CONFIG);
+  EXPECT_FALSE(ret.first);
+  EXPECT_EQ(record, ret.second);
+  EXPECT_EQ(1u, map.size());
+
+  ret = map.try_emplace(kLabel2, nullptr, BuilderRecord::ITEM_CONFIG);
+  EXPECT_TRUE(ret.first);
+  EXPECT_TRUE(ret.second);
+  EXPECT_EQ(2u, map.size());
+  EXPECT_EQ(BuilderRecord::ITEM_CONFIG, ret.second->type());
+  EXPECT_EQ(kLabel2, ret.second->label());
+  EXPECT_EQ(ret.second, map.find(kLabel2));
+
+  ret = map.try_emplace(kLabel3, nullptr, BuilderRecord::ITEM_TOOLCHAIN);
+  EXPECT_TRUE(ret.first);
+  EXPECT_TRUE(ret.second);
+  EXPECT_EQ(3u, map.size());
+  EXPECT_EQ(BuilderRecord::ITEM_TOOLCHAIN, ret.second->type());
+  EXPECT_EQ(kLabel3, ret.second->label());
+  EXPECT_EQ(ret.second, map.find(kLabel3));
+
+  ret = map.try_emplace(kLabel2, nullptr, BuilderRecord::ITEM_CONFIG);
+  EXPECT_FALSE(ret.first);
+
+  EXPECT_EQ(3u, map.size());
+  EXPECT_TRUE(map.find(kLabel1));
+  EXPECT_TRUE(map.find(kLabel2));
+  EXPECT_TRUE(map.find(kLabel3));
+}
diff --git a/src/gn/builder_unittest.cc b/src/gn/builder_unittest.cc
index 81b9f89..2a354d5 100644
--- a/src/gn/builder_unittest.cc
+++ b/src/gn/builder_unittest.cc
@@ -138,17 +138,14 @@
 
   // A should have two deps: B and the toolchain. Only B should be unresolved.
   EXPECT_EQ(2u, a_record->all_deps().size());
-  EXPECT_EQ(1u, a_record->unresolved_deps().size());
-  EXPECT_NE(a_record->all_deps().end(),
-            a_record->all_deps().find(toolchain_record));
-  EXPECT_NE(a_record->all_deps().end(), a_record->all_deps().find(b_record));
-  EXPECT_NE(a_record->unresolved_deps().end(),
-            a_record->unresolved_deps().find(b_record));
+  EXPECT_EQ(1u, a_record->GetUnresolvedDeps().size());
+  EXPECT_TRUE(a_record->all_deps().contains(toolchain_record));
+  EXPECT_TRUE(a_record->all_deps().contains(b_record));
+  EXPECT_TRUE(a_record->GetUnresolvedDeps().contains(b_record));
 
   // B should be marked as having A waiting on it.
   EXPECT_EQ(1u, b_record->waiting_on_resolution().size());
-  EXPECT_NE(b_record->waiting_on_resolution().end(),
-            b_record->waiting_on_resolution().find(a_record));
+  EXPECT_TRUE(b_record->waiting_on_resolution().contains(a_record));
 
   // Add the C target.
   Target* c = new Target(&settings_, c_label);
@@ -177,9 +174,9 @@
   EXPECT_TRUE(b_record->resolved());
   EXPECT_TRUE(c_record->resolved());
 
-  EXPECT_TRUE(a_record->unresolved_deps().empty());
-  EXPECT_TRUE(b_record->unresolved_deps().empty());
-  EXPECT_TRUE(c_record->unresolved_deps().empty());
+  EXPECT_TRUE(a_record->GetUnresolvedDeps().empty());
+  EXPECT_TRUE(b_record->GetUnresolvedDeps().empty());
+  EXPECT_TRUE(c_record->GetUnresolvedDeps().empty());
 
   EXPECT_TRUE(a_record->waiting_on_resolution().empty());
   EXPECT_TRUE(b_record->waiting_on_resolution().empty());
diff --git a/src/gn/loader.cc b/src/gn/loader.cc
index c615170..9d46103 100644
--- a/src/gn/loader.cc
+++ b/src/gn/loader.cc
@@ -38,7 +38,7 @@
       : file(f), toolchain_name(tc_name) {}
 
   bool operator<(const LoadID& other) const {
-    if (file.value() == other.file.value())
+    if (file == other.file)
       return toolchain_name < other.toolchain_name;
     return file < other.file;
   }