Add PointerSet<T> template.

This CL adds a new template class that implements fast non-owning
pointer sets. This will be used by future CLs to implement faster
TargetSet implementations.

+ Update BaseHashTable documentation a little to clarify
  things and remove "iff" instances.

Change-Id: I934a93a3ca7abc638a7860bf6d80d8865a0c9a2f
Reviewed-on: https://gn-review.googlesource.com/c/gn/+/12582
Reviewed-by: Brett Wilson <brettw@chromium.org>
Commit-Queue: David Turner <digit@google.com>
diff --git a/build/gen.py b/build/gen.py
index 265b1f5..cc6e835 100755
--- a/build/gen.py
+++ b/build/gen.py
@@ -740,6 +740,7 @@
         'src/gn/parser_unittest.cc',
         'src/gn/path_output_unittest.cc',
         'src/gn/pattern_unittest.cc',
+        'src/gn/pointer_set_unittest.cc',
         'src/gn/runtime_deps_unittest.cc',
         'src/gn/scope_per_file_provider_unittest.cc',
         'src/gn/scope_unittest.cc',
diff --git a/src/gn/hash_table_base.h b/src/gn/hash_table_base.h
index c26b514..a5d9e12 100644
--- a/src/gn/hash_table_base.h
+++ b/src/gn/hash_table_base.h
@@ -46,14 +46,14 @@
 //  - It *must* be a trivial type, that is zero-initialized.
 //
 //  - It provides an is_null() const method, which should return true
-//    iff the corresponding node matches a 'free' entry in the hash
+//    if the corresponding node matches a 'free' entry in the hash
 //    table, i.e. one that has not been assigned to an item, or
 //    to a deletion tombstone (see below).
 //
 //    Of course, a default (zeroed) value, should always return true.
 //
 //  - It provides an is_tombstone() const method, which should return
-//    return true iff the corresponding node indicates a previously
+//    return true if the corresponding node indicates a previously
 //    deleted item.
 //
 //    Note that if your hash table does not need deletion support,
@@ -127,7 +127,7 @@
 //        }
 //      }
 //
-//      // Returns true iff this set contains |key|.
+//      // Returns true if this set contains |key|.
 //      bool contains(const Foo& key) const {
 //        Node* node = BaseType::NodeLookup(
 //            MakeHash(key),
@@ -171,6 +171,7 @@
 //        delete node->foo;
 //        node->foo = Node::kTombstone;
 //        UpdateAfterDeletion().
+//        return true;
 //      }
 //
 //      static size_t MakeHash(const Foo& foo) {
@@ -241,7 +242,7 @@
     return *this;
   }
 
-  // Return true iff the table is empty.
+  // Return true if the table is empty.
   bool empty() const { return count_ == 0; }
 
   // Return the number of keys in the set.
@@ -259,8 +260,45 @@
   // own iterator/const_iterator/begin()/end() types and methods, possibly as
   // simple wrappers around the NodeIterator/NodeBegin/NodeEnd below.
   //
+  // Defining a custom iterator is as easy as deriving from NodeIterator
+  // and overloading operator*() and operator->(), as in:
+  //
+  //  struct FooNode {
+  //     size_t hash;
+  //     Foo*   foo_ptr;
+  //     ...
+  //  };
+  //
+  //  class FooTable : public HashTableBase<FooNode> {
+  //  public:
+  //     ...
+  //
+  //     // Iterators point to Foo instances, not table nodes.
+  //     struct ConstIterator : NodeIterator {
+  //       const Foo* operator->() { return node_->foo_ptr; }
+  //       const Foo& operator*)) { return *(node_->foo_ptr); }
+  //     };
+  //
+  //     ConstIterator begin() const { return { NodeBegin() }; }
+  //     ConstIterator end() const { return { NodeEnd() }; }
+  //
+  // The NodeIterator type has a valid() method that can be used to perform
+  // faster iteration though at the cost of using a slightly more annoying
+  // syntax:
+  //
+  //    for (auto it = my_table.begin(); it.valid(); ++it) {
+  //      const Foo& foo = *it;
+  //      ...
+  //    }
+  //
+  // Instead of:
+  //
+  //    for (const Foo& foo : my_table) {
+  //      ...
+  //    }
+  //
   // The ValidNodesRange() method also returns a object that has begin() and
-  // end() methods to be used in for-range loops as in:
+  // end() methods to be used in for-range loops over Node values as in:
   //
   //    for (Node& node : my_table.ValidNodesRange()) { ... }
   //
@@ -295,6 +333,9 @@
       return result;
     }
 
+    // Returns true if the iterator points to a valid node.
+    bool valid() const { return node_ != node_limit_; }
+
     Node* node_ = nullptr;
     Node* node_limit_ = nullptr;
   };
@@ -323,11 +364,9 @@
   //     ...
   //
   //     // Iterators point to Foo instances, not table nodes.
-  //     struct ConstIterator {
-  //       const Foo* operator->() { return iter_->foo_ptr; }
-  //       const Foo& operator->() { return *(iter_->foo_ptr); }
-  //       ...
-  //       NodeIterator iter_;
+  //     struct ConstIterator : NodeIterator {
+  //       const Foo* operator->() { return node_->foo_ptr; }
+  //       const Foo& operator*)) { return *(node_->foo_ptr); }
   //     };
   //
   // and compare two ways to implement its begin() method:
@@ -400,7 +439,7 @@
 
   // Lookup for a node in the hash table that matches the |node_equal|
   // predicate, which takes a const Node* pointer argument, and returns
-  // true iff this corresponds to a lookup match.
+  // true if this corresponds to a lookup match.
   //
   // IMPORTANT: |node_equal| may or may not check the node hash value,
   // the choice is left to the implementation.
diff --git a/src/gn/pointer_set.h b/src/gn/pointer_set.h
new file mode 100644
index 0000000..b690ace
--- /dev/null
+++ b/src/gn/pointer_set.h
@@ -0,0 +1,200 @@
+// Copyright 2021 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SRC_GN_POINTER_SET_H_
+#define SRC_GN_POINTER_SET_H_
+
+#include <functional>
+#include "gn/hash_table_base.h"
+
+// PointerSet<T> is a fast implemention of a set of non-owning and non-null
+// typed pointer values (of type T*).
+//
+// Note that this intentionally does not support a find() method
+// for performance reasons, instead callers must use contains(), add()
+// or erase() directly to perform lookups or conditional insertion/removal.
+//
+// Only constant iterators are provided. Moreover, the slightly faster
+// iteration form can be used for performance as in:
+//
+//   for (auto it = my_set.begin(); it.valid(); ++it) {
+//     T* ptr = *it;
+//     ...
+//   }
+//
+// Instead of:
+//
+//   for (T* ptr : my_set) {
+//     ...
+//   }
+//
+// The PointerSetNode type implements the methods required by HashTableBase<>
+// to store and hash a pointer directly in the buckets array. The special
+// address 1 is used as the tombstone value to support removal.
+//
+// Null nodes are marked with an empty pointer, which means that nullptr
+// itself cannot be stored in the set.
+struct PointerSetNode {
+  const void* ptr_;
+  bool is_null() const { return !ptr_; }
+  bool is_tombstone() const { return ptr_ == MakeTombstone(); }
+  bool is_valid() const { return !is_null() && !is_tombstone(); }
+  size_t hash_value() const { return MakeHash(ptr_); }
+
+  // Return the tomebstone value. This could be a static constexpr value
+  // if C++ didn't complain about reinterpret_cast<> here.
+  static const void* MakeTombstone() {
+    return reinterpret_cast<const void*>(1u);
+  }
+
+  // Return the hash corresponding to a given pointer.
+  static size_t MakeHash(const void* ptr) {
+    return std::hash<const void*>()(ptr);
+  }
+};
+
+template <typename T>
+class PointerSet : public HashTableBase<PointerSetNode> {
+ public:
+  using NodeType = PointerSetNode;
+  using BaseType = HashTableBase<NodeType>;
+
+  PointerSet() = default;
+
+  // Allow copying pointer sets.
+  PointerSet(const PointerSet& other) { insert(other); }
+  PointerSet& operator=(const PointerSet& other) {
+    if (this != &other) {
+      this->~PointerSet();
+      new (this) PointerSet(other);
+    }
+    return *this;
+  }
+
+  // Redefine move operators since the copy constructor above
+  // masks the base ones by default.
+  PointerSet(PointerSet&& other) noexcept : BaseType(std::move(other)) {}
+  PointerSet& operator=(PointerSet&& other) noexcept {
+    if (this != &other) {
+      this->~PointerSet();
+      new (this) PointerSet(std::move(other));
+    }
+    return *this;
+  }
+
+  // Range constructor.
+  template <typename InputIter>
+  PointerSet(InputIter first, InputIter last) {
+    for (; first != last; ++first)
+      add(*first);
+  }
+
+  // Clear the set.
+  void clear() { NodeClear(); }
+
+  // Add one pointer to the set. Return true if the pointer was
+  // added, or false if its was already in the set.
+  bool add(T* ptr) {
+    NodeType* node = Lookup(ptr);
+    if (node->is_valid())
+      return false;
+
+    node->ptr_ = ptr;
+    UpdateAfterInsert();
+    return true;
+  }
+
+  // Return true if a given pointer is already in the set.
+  bool contains(T* ptr) const { return Lookup(ptr)->is_valid(); }
+
+  // Try to remove a pointer from the set. Return true in case of
+  // removal, or false if the pointer was not in the set.
+  bool erase(T* ptr) {
+    NodeType* node = Lookup(ptr);
+    if (!node->is_valid())
+      return false;
+    node->ptr_ = node->MakeTombstone();
+    UpdateAfterRemoval();
+    return true;
+  }
+
+  // Same as add() but does not return a boolean.
+  // This minimizes code changes when PointerSet<> replaces
+  // other standard set types in the code.
+  void insert(T* ptr) { add(ptr); }
+
+  // Range insertion.
+  template <typename InputIter>
+  void insert(InputIter first, InputIter last) {
+    for (; first != last; ++first)
+      add(*first);
+  }
+
+  // Insert of aitems of |other| into the current set. This
+  // is slightly more efficient than using range insertion
+  // with insert(other.begin(), other.end()).
+  void insert(const PointerSet& other) {
+    for (const_iterator iter = other.begin(); iter.valid(); ++iter)
+      add(*iter);
+  }
+
+  // Return a new set that is the intersection of the current one
+  // and |other|.
+  PointerSet intersection_with(const PointerSet& other) const {
+    PointerSet result;
+    for (const_iterator iter = other.begin(); iter.valid(); ++iter) {
+      if (contains(*iter))
+        result.add(*iter);
+    }
+    return result;
+  }
+
+  // Only provide const interators for pointer sets.
+  struct const_iterator : public NodeIterator {
+    T* const* operator->() const {
+      return &const_cast<T*>(static_cast<const T*>(node_->ptr_));
+    }
+    T* operator*() const {
+      return const_cast<T*>(static_cast<const T*>(node_->ptr_));
+    }
+
+    // The following allows:
+    // std::vector<T*> vector(set.begin(), set.end());
+    using iterator_category = std::forward_iterator_tag;
+    using difference_type = std::ptrdiff_t;
+    using value_type = T*;
+    using pointer = T**;
+    using reference = T*&;
+  };
+
+  const_iterator begin() const { return {NodeBegin()}; }
+  const_iterator end() const { return {NodeEnd()}; }
+
+  // Only used for unit-tests so performance is not critical.
+  bool operator==(const PointerSet& other) const {
+    if (size() != other.size())
+      return false;
+
+    for (const_iterator iter = begin(); iter.valid(); ++iter)
+      if (!other.contains(*iter))
+        return false;
+
+    for (const_iterator iter = other.begin(); iter.valid(); ++iter)
+      if (!contains(*iter))
+        return false;
+
+    return true;
+  }
+
+ private:
+  // Lookup node matching a given pointer. If result->valid() is true
+  // then the pointer was found, otherwise, this is the location of
+  // the best place to insert it.
+  NodeType* Lookup(T* ptr) const {
+    size_t hash = NodeType::MakeHash(ptr);
+    return NodeLookup(hash, [&](NodeType* node) { return node->ptr_ == ptr; });
+  }
+};
+
+#endif  // SRC_GN_POINTER_SET_H_
diff --git a/src/gn/pointer_set_unittest.cc b/src/gn/pointer_set_unittest.cc
new file mode 100644
index 0000000..0bb4d6a
--- /dev/null
+++ b/src/gn/pointer_set_unittest.cc
@@ -0,0 +1,167 @@
+// Copyright 2021 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "gn/pointer_set.h"
+#include "util/test/test.h"
+
+struct Foo {
+  int x;
+};
+
+static const Foo kFoo1[1] = {{1}};
+static const Foo kFoo2[1] = {{2}};
+static const Foo kFoo3[1] = {{3}};
+
+static const std::initializer_list<const Foo*> kFullList = {kFoo1, kFoo2,
+                                                            kFoo3};
+
+using TestPointerSet = PointerSet<const Foo>;
+
+TEST(PointerSet, DefaultConstruction) {
+  TestPointerSet set;
+  EXPECT_TRUE(set.empty());
+  EXPECT_EQ(0u, set.size());
+  EXPECT_FALSE(set.contains(kFoo1));
+}
+
+TEST(PointerSet, RangeConstruction) {
+  TestPointerSet set(kFullList.begin(), kFullList.end());
+  EXPECT_FALSE(set.empty());
+  EXPECT_EQ(3u, set.size());
+  EXPECT_TRUE(set.contains(kFoo1));
+  EXPECT_TRUE(set.contains(kFoo2));
+  EXPECT_TRUE(set.contains(kFoo3));
+}
+
+TEST(PointerSet, CopyConstruction) {
+  TestPointerSet set1(kFullList.begin(), kFullList.end());
+  TestPointerSet set2(set1);
+  set1.clear();
+  EXPECT_TRUE(set1.empty());
+  EXPECT_FALSE(set2.empty());
+  EXPECT_EQ(3u, set2.size());
+  EXPECT_TRUE(set2.contains(kFoo1));
+  EXPECT_TRUE(set2.contains(kFoo2));
+  EXPECT_TRUE(set2.contains(kFoo3));
+}
+
+TEST(PointerSet, MoveConstruction) {
+  TestPointerSet set1(kFullList.begin(), kFullList.end());
+  TestPointerSet set2(std::move(set1));
+  EXPECT_TRUE(set1.empty());
+  EXPECT_FALSE(set2.empty());
+  EXPECT_EQ(3u, set2.size());
+  EXPECT_TRUE(set2.contains(kFoo1));
+  EXPECT_TRUE(set2.contains(kFoo2));
+  EXPECT_TRUE(set2.contains(kFoo3));
+}
+
+TEST(PointerSet, Add) {
+  TestPointerSet set;
+  EXPECT_TRUE(set.add(kFoo1));
+  EXPECT_EQ(1u, set.size());
+  EXPECT_TRUE(set.contains(kFoo1));
+
+  EXPECT_FALSE(set.add(kFoo1));
+  EXPECT_EQ(1u, set.size());
+  EXPECT_TRUE(set.contains(kFoo1));
+
+  EXPECT_TRUE(set.add(kFoo2));
+  EXPECT_EQ(2u, set.size());
+  EXPECT_TRUE(set.contains(kFoo1));
+  EXPECT_TRUE(set.contains(kFoo2));
+
+  EXPECT_FALSE(set.add(kFoo1));
+  EXPECT_FALSE(set.add(kFoo2));
+
+  EXPECT_TRUE(set.add(kFoo3));
+  EXPECT_EQ(3u, set.size());
+  EXPECT_TRUE(set.contains(kFoo1));
+  EXPECT_TRUE(set.contains(kFoo2));
+  EXPECT_TRUE(set.contains(kFoo3));
+
+  EXPECT_FALSE(set.add(kFoo1));
+  EXPECT_FALSE(set.add(kFoo2));
+  EXPECT_FALSE(set.add(kFoo3));
+}
+
+TEST(PointerSet, Erase) {
+  TestPointerSet set(kFullList.begin(), kFullList.end());
+  EXPECT_EQ(3u, set.size());
+
+  EXPECT_TRUE(set.erase(kFoo1));
+  EXPECT_EQ(2u, set.size());
+  EXPECT_FALSE(set.contains(kFoo1));
+  EXPECT_FALSE(set.erase(kFoo1));
+  EXPECT_EQ(2u, set.size());
+
+  EXPECT_TRUE(set.erase(kFoo2));
+  EXPECT_EQ(1u, set.size());
+  EXPECT_FALSE(set.contains(kFoo2));
+  EXPECT_FALSE(set.erase(kFoo2));
+  EXPECT_EQ(1u, set.size());
+
+  EXPECT_TRUE(set.erase(kFoo3));
+  EXPECT_EQ(0u, set.size());
+  EXPECT_FALSE(set.contains(kFoo3));
+  EXPECT_FALSE(set.erase(kFoo3));
+  EXPECT_EQ(0u, set.size());
+}
+
+TEST(PointerSet, RangeInsert) {
+  TestPointerSet set;
+  set.insert(kFullList.begin(), kFullList.end());
+  EXPECT_EQ(3u, set.size());
+  EXPECT_TRUE(set.contains(kFoo1));
+  EXPECT_TRUE(set.contains(kFoo2));
+  EXPECT_TRUE(set.contains(kFoo3));
+
+  set.insert(kFullList.begin(), kFullList.end());
+  EXPECT_EQ(3u, set.size());
+}
+
+TEST(PointerSet, InsertOther) {
+  TestPointerSet set1(kFullList.begin(), kFullList.end());
+  TestPointerSet set2;
+  set2.add(kFoo1);
+  set1.insert(set2);
+  EXPECT_EQ(3u, set1.size());
+  EXPECT_EQ(1u, set2.size());
+
+  set1.clear();
+  set1.add(kFoo1);
+  set2.clear();
+  set2.add(kFoo3);
+  set1.insert(set2);
+  EXPECT_EQ(2u, set1.size());
+  EXPECT_EQ(1u, set2.size());
+  EXPECT_TRUE(set1.contains(kFoo1));
+  EXPECT_TRUE(set1.contains(kFoo3));
+}
+
+TEST(PointerSet, IntersectionWith) {
+  TestPointerSet set1;
+  TestPointerSet set2;
+
+  set1.add(kFoo1);
+  set2.add(kFoo3);
+
+  TestPointerSet set = set1.intersection_with(set2);
+  EXPECT_TRUE(set.empty());
+
+  set1.add(kFoo2);
+  set2.add(kFoo2);
+
+  set = set1.intersection_with(set2);
+  EXPECT_FALSE(set.empty());
+  EXPECT_EQ(1u, set.size());
+  EXPECT_TRUE(set.contains(kFoo2));
+
+  set1.insert(kFullList.begin(), kFullList.end());
+  set2 = set1;
+  set = set1.intersection_with(set2);
+  EXPECT_EQ(3u, set.size());
+  EXPECT_EQ(set1, set);
+  EXPECT_EQ(set2, set);
+}