Fix bug in Grow logic in HashTableBase

NodeLookup() returns only when a null slot is found, but if tombstones
accumulate, it can infinite loop due to no null slots existing.

This changes the hash table to track the number of tombstones, and grow
when the number of null entries is < 25%.

This bug does not affect any existing uses of PointerSet, since it
happens only for sets that alternate between add() and erase() (which
afaict, no existing uses do).

Change-Id: I846ae0468faa8084d84d0c042bfaf5a06995e4f6
Reviewed-on: https://gn-review.googlesource.com/c/gn/+/19160
Reviewed-by: David Turner <digit@google.com>
Commit-Queue: Andrew Grieve <agrieve@google.com>
diff --git a/src/gn/builder_record_map.h b/src/gn/builder_record_map.h
index 58ce712..77f0695 100644
--- a/src/gn/builder_record_map.h
+++ b/src/gn/builder_record_map.h
@@ -53,7 +53,7 @@
     }
     BuilderRecord* record = new BuilderRecord(type, label, request_from);
     node->record = record;
-    UpdateAfterInsert();
+    UpdateAfterInsert(false);
     return {true, record};
   }
 
diff --git a/src/gn/hash_table_base.h b/src/gn/hash_table_base.h
index 4e9e347..7e5b34e 100644
--- a/src/gn/hash_table_base.h
+++ b/src/gn/hash_table_base.h
@@ -206,7 +206,9 @@
   // so any owned pointer inside nodes should be handled by custom
   // constructors and operators in the derived class, if needed.
   HashTableBase(const HashTableBase& other)
-      : count_(other.count_), size_(other.size_) {
+      : count_(other.count_),
+        tombstone_count_(other.tombstone_count_),
+        size_(other.size_) {
     if (other.buckets_ != other.buckets0_) {
       // NOTE: using malloc() here to clarify that no object construction
       // should occur here.
@@ -224,7 +226,10 @@
   }
 
   HashTableBase(HashTableBase&& other) noexcept
-      : count_(other.count_), size_(other.size_), buckets_(other.buckets_) {
+      : count_(other.count_),
+        tombstone_count_(other.tombstone_count_),
+        size_(other.size_),
+        buckets_(other.buckets_) {
     if (buckets_ == other.buckets0_) {
       buckets0_[0] = other.buckets0_[0];
       buckets_ = buckets0_;
@@ -432,6 +437,7 @@
       free(buckets_);
 
     count_ = 0;
+    tombstone_count_ = 0;
     size_ = 1;
     buckets_ = buckets0_;
     buckets0_[0] = Node{};
@@ -474,9 +480,12 @@
   // Call this method after updating the content of the |node| pointer
   // returned by an unsuccessful NodeLookup(). Return true to indicate that
   // the table size changed, and that existing iterators were invalidated.
-  bool UpdateAfterInsert() {
+  bool UpdateAfterInsert(bool was_tombstone) {
     count_ += 1;
-    if (UNLIKELY(count_ * 4 >= size_ * 3)) {
+    if (was_tombstone) {
+      tombstone_count_ -= 1;
+    }
+    if (UNLIKELY((count_ + tombstone_count_) * 4 >= size_ * 3)) {
       GrowBuckets();
       return true;
     }
@@ -489,6 +498,7 @@
   // iterators where invalidated.
   bool UpdateAfterRemoval() {
     count_ -= 1;
+    tombstone_count_ += 1;
     // For now don't support shrinking since this is not useful for GN.
     return false;
   }
@@ -527,12 +537,14 @@
     buckets_ = new_buckets;
     buckets0_[0] = Node{};
     size_ = new_size;
+    tombstone_count_ = 0;
   }
 
   // NOTE: The reason for default-initializing |buckets_| to a storage slot
   // inside the object is to ensure the value is never null. This removes one
   // nullptr check from each NodeLookup() instantiation.
   size_t count_ = 0;
+  size_t tombstone_count_ = 0;
   size_t size_ = 1;
   Node* buckets_ = buckets0_;
   Node buckets0_[1] = {{}};
diff --git a/src/gn/hash_table_base_unittest.cc b/src/gn/hash_table_base_unittest.cc
index d6324cf..7acbfbe 100644
--- a/src/gn/hash_table_base_unittest.cc
+++ b/src/gn/hash_table_base_unittest.cc
@@ -145,8 +145,9 @@
     if (node->is_valid())
       return false;
 
+    bool was_tombstone = node->is_tombstone();
     node->int_ptr = new Int(x);
-    UpdateAfterInsert();
+    UpdateAfterInsert(was_tombstone);
     return true;
   }
 
@@ -354,3 +355,13 @@
   EXPECT_EQ(values[1], 5);
   EXPECT_EQ(values[2], 7);
 }
+
+TEST(HashTableBaseTest, Erase) {
+  TestHashTable table;
+  // Use sequention hashes to test that tombstone_count_ grows the hash table.
+  for (int i = 0; i < 16; ++i) {
+    table.insert(i);
+    table.erase(i);
+    EXPECT_EQ(table.size(), 0u);
+  }
+}
diff --git a/src/gn/header_checker.cc b/src/gn/header_checker.cc
index 705b892..05c74d6 100644
--- a/src/gn/header_checker.cc
+++ b/src/gn/header_checker.cc
@@ -151,6 +151,7 @@
 
 void HeaderChecker::RunCheckOverFiles(const FileMap& files, bool force_check) {
   WorkerPool pool;
+  task_count_.Increment();
 
   for (const auto& file : files) {
     // Only check C-like source files (RC files also have includes).
@@ -181,6 +182,8 @@
     }
   }
 
+  task_count_.Decrement();
+
   // Wait for all tasks posted by this method to complete.
   std::unique_lock<std::mutex> auto_lock(lock_);
   while (!task_count_.IsZero())
diff --git a/src/gn/pointer_set.h b/src/gn/pointer_set.h
index 2bec930..6a4ec62 100644
--- a/src/gn/pointer_set.h
+++ b/src/gn/pointer_set.h
@@ -102,8 +102,9 @@
     if (node->is_valid())
       return false;
 
+    bool was_tombstone = node->is_tombstone();
     node->ptr_ = ptr;
-    UpdateAfterInsert();
+    UpdateAfterInsert(was_tombstone);
     return true;
   }
 
diff --git a/src/gn/string_atom.cc b/src/gn/string_atom.cc
index 36f912f..71f9b87 100644
--- a/src/gn/string_atom.cc
+++ b/src/gn/string_atom.cc
@@ -101,7 +101,7 @@
   void Insert(Node* node, size_t hash, KeyType key) {
     node->hash = hash;
     node->key = key;
-    BaseType::UpdateAfterInsert();
+    BaseType::UpdateAfterInsert(false);
   }
 };
 
diff --git a/src/gn/unique_vector.h b/src/gn/unique_vector.h
index 473786d..3024613 100644
--- a/src/gn/unique_vector.h
+++ b/src/gn/unique_vector.h
@@ -68,7 +68,7 @@
   // UniqueVectorKey type.
   void Insert(Node* node, size_t hash, size_t index) {
     *node = Node::Make(hash, index);
-    BaseType::UpdateAfterInsert();
+    BaseType::UpdateAfterInsert(false);
   }
 
   void Clear() { NodeClear(); }
diff --git a/src/util/msg_loop.cc b/src/util/msg_loop.cc
index e614b66..117daf8 100644
--- a/src/util/msg_loop.cc
+++ b/src/util/msg_loop.cc
@@ -28,6 +28,8 @@
 }
 
 void MsgLoop::Run() {
+  should_quit_ = false;
+
   while (!should_quit_) {
     std::function<void()> task;
     {