Add PointerSet<T> template.
This CL adds a new template class that implements fast non-owning
pointer sets. This will be used by future CLs to implement faster
TargetSet implementations.
+ Update BaseHashTable documentation a little to clarify
things and remove "iff" instances.
Change-Id: I934a93a3ca7abc638a7860bf6d80d8865a0c9a2f
Reviewed-on: https://gn-review.googlesource.com/c/gn/+/12582
Reviewed-by: Brett Wilson <brettw@chromium.org>
Commit-Queue: David Turner <digit@google.com>
diff --git a/build/gen.py b/build/gen.py
index 265b1f5..cc6e835 100755
--- a/build/gen.py
+++ b/build/gen.py
@@ -740,6 +740,7 @@
'src/gn/parser_unittest.cc',
'src/gn/path_output_unittest.cc',
'src/gn/pattern_unittest.cc',
+ 'src/gn/pointer_set_unittest.cc',
'src/gn/runtime_deps_unittest.cc',
'src/gn/scope_per_file_provider_unittest.cc',
'src/gn/scope_unittest.cc',
diff --git a/src/gn/hash_table_base.h b/src/gn/hash_table_base.h
index c26b514..a5d9e12 100644
--- a/src/gn/hash_table_base.h
+++ b/src/gn/hash_table_base.h
@@ -46,14 +46,14 @@
// - It *must* be a trivial type, that is zero-initialized.
//
// - It provides an is_null() const method, which should return true
-// iff the corresponding node matches a 'free' entry in the hash
+// if the corresponding node matches a 'free' entry in the hash
// table, i.e. one that has not been assigned to an item, or
// to a deletion tombstone (see below).
//
// Of course, a default (zeroed) value, should always return true.
//
// - It provides an is_tombstone() const method, which should return
-// return true iff the corresponding node indicates a previously
+// return true if the corresponding node indicates a previously
// deleted item.
//
// Note that if your hash table does not need deletion support,
@@ -127,7 +127,7 @@
// }
// }
//
-// // Returns true iff this set contains |key|.
+// // Returns true if this set contains |key|.
// bool contains(const Foo& key) const {
// Node* node = BaseType::NodeLookup(
// MakeHash(key),
@@ -171,6 +171,7 @@
// delete node->foo;
// node->foo = Node::kTombstone;
// UpdateAfterDeletion().
+// return true;
// }
//
// static size_t MakeHash(const Foo& foo) {
@@ -241,7 +242,7 @@
return *this;
}
- // Return true iff the table is empty.
+ // Return true if the table is empty.
bool empty() const { return count_ == 0; }
// Return the number of keys in the set.
@@ -259,8 +260,45 @@
// own iterator/const_iterator/begin()/end() types and methods, possibly as
// simple wrappers around the NodeIterator/NodeBegin/NodeEnd below.
//
+ // Defining a custom iterator is as easy as deriving from NodeIterator
+ // and overloading operator*() and operator->(), as in:
+ //
+ // struct FooNode {
+ // size_t hash;
+ // Foo* foo_ptr;
+ // ...
+ // };
+ //
+ // class FooTable : public HashTableBase<FooNode> {
+ // public:
+ // ...
+ //
+ // // Iterators point to Foo instances, not table nodes.
+ // struct ConstIterator : NodeIterator {
+ // const Foo* operator->() { return node_->foo_ptr; }
+ // const Foo& operator*)) { return *(node_->foo_ptr); }
+ // };
+ //
+ // ConstIterator begin() const { return { NodeBegin() }; }
+ // ConstIterator end() const { return { NodeEnd() }; }
+ //
+ // The NodeIterator type has a valid() method that can be used to perform
+ // faster iteration though at the cost of using a slightly more annoying
+ // syntax:
+ //
+ // for (auto it = my_table.begin(); it.valid(); ++it) {
+ // const Foo& foo = *it;
+ // ...
+ // }
+ //
+ // Instead of:
+ //
+ // for (const Foo& foo : my_table) {
+ // ...
+ // }
+ //
// The ValidNodesRange() method also returns a object that has begin() and
- // end() methods to be used in for-range loops as in:
+ // end() methods to be used in for-range loops over Node values as in:
//
// for (Node& node : my_table.ValidNodesRange()) { ... }
//
@@ -295,6 +333,9 @@
return result;
}
+ // Returns true if the iterator points to a valid node.
+ bool valid() const { return node_ != node_limit_; }
+
Node* node_ = nullptr;
Node* node_limit_ = nullptr;
};
@@ -323,11 +364,9 @@
// ...
//
// // Iterators point to Foo instances, not table nodes.
- // struct ConstIterator {
- // const Foo* operator->() { return iter_->foo_ptr; }
- // const Foo& operator->() { return *(iter_->foo_ptr); }
- // ...
- // NodeIterator iter_;
+ // struct ConstIterator : NodeIterator {
+ // const Foo* operator->() { return node_->foo_ptr; }
+ // const Foo& operator*)) { return *(node_->foo_ptr); }
// };
//
// and compare two ways to implement its begin() method:
@@ -400,7 +439,7 @@
// Lookup for a node in the hash table that matches the |node_equal|
// predicate, which takes a const Node* pointer argument, and returns
- // true iff this corresponds to a lookup match.
+ // true if this corresponds to a lookup match.
//
// IMPORTANT: |node_equal| may or may not check the node hash value,
// the choice is left to the implementation.
diff --git a/src/gn/pointer_set.h b/src/gn/pointer_set.h
new file mode 100644
index 0000000..b690ace
--- /dev/null
+++ b/src/gn/pointer_set.h
@@ -0,0 +1,200 @@
+// Copyright 2021 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SRC_GN_POINTER_SET_H_
+#define SRC_GN_POINTER_SET_H_
+
+#include <functional>
+#include "gn/hash_table_base.h"
+
+// PointerSet<T> is a fast implemention of a set of non-owning and non-null
+// typed pointer values (of type T*).
+//
+// Note that this intentionally does not support a find() method
+// for performance reasons, instead callers must use contains(), add()
+// or erase() directly to perform lookups or conditional insertion/removal.
+//
+// Only constant iterators are provided. Moreover, the slightly faster
+// iteration form can be used for performance as in:
+//
+// for (auto it = my_set.begin(); it.valid(); ++it) {
+// T* ptr = *it;
+// ...
+// }
+//
+// Instead of:
+//
+// for (T* ptr : my_set) {
+// ...
+// }
+//
+// The PointerSetNode type implements the methods required by HashTableBase<>
+// to store and hash a pointer directly in the buckets array. The special
+// address 1 is used as the tombstone value to support removal.
+//
+// Null nodes are marked with an empty pointer, which means that nullptr
+// itself cannot be stored in the set.
+struct PointerSetNode {
+ const void* ptr_;
+ bool is_null() const { return !ptr_; }
+ bool is_tombstone() const { return ptr_ == MakeTombstone(); }
+ bool is_valid() const { return !is_null() && !is_tombstone(); }
+ size_t hash_value() const { return MakeHash(ptr_); }
+
+ // Return the tomebstone value. This could be a static constexpr value
+ // if C++ didn't complain about reinterpret_cast<> here.
+ static const void* MakeTombstone() {
+ return reinterpret_cast<const void*>(1u);
+ }
+
+ // Return the hash corresponding to a given pointer.
+ static size_t MakeHash(const void* ptr) {
+ return std::hash<const void*>()(ptr);
+ }
+};
+
+template <typename T>
+class PointerSet : public HashTableBase<PointerSetNode> {
+ public:
+ using NodeType = PointerSetNode;
+ using BaseType = HashTableBase<NodeType>;
+
+ PointerSet() = default;
+
+ // Allow copying pointer sets.
+ PointerSet(const PointerSet& other) { insert(other); }
+ PointerSet& operator=(const PointerSet& other) {
+ if (this != &other) {
+ this->~PointerSet();
+ new (this) PointerSet(other);
+ }
+ return *this;
+ }
+
+ // Redefine move operators since the copy constructor above
+ // masks the base ones by default.
+ PointerSet(PointerSet&& other) noexcept : BaseType(std::move(other)) {}
+ PointerSet& operator=(PointerSet&& other) noexcept {
+ if (this != &other) {
+ this->~PointerSet();
+ new (this) PointerSet(std::move(other));
+ }
+ return *this;
+ }
+
+ // Range constructor.
+ template <typename InputIter>
+ PointerSet(InputIter first, InputIter last) {
+ for (; first != last; ++first)
+ add(*first);
+ }
+
+ // Clear the set.
+ void clear() { NodeClear(); }
+
+ // Add one pointer to the set. Return true if the pointer was
+ // added, or false if its was already in the set.
+ bool add(T* ptr) {
+ NodeType* node = Lookup(ptr);
+ if (node->is_valid())
+ return false;
+
+ node->ptr_ = ptr;
+ UpdateAfterInsert();
+ return true;
+ }
+
+ // Return true if a given pointer is already in the set.
+ bool contains(T* ptr) const { return Lookup(ptr)->is_valid(); }
+
+ // Try to remove a pointer from the set. Return true in case of
+ // removal, or false if the pointer was not in the set.
+ bool erase(T* ptr) {
+ NodeType* node = Lookup(ptr);
+ if (!node->is_valid())
+ return false;
+ node->ptr_ = node->MakeTombstone();
+ UpdateAfterRemoval();
+ return true;
+ }
+
+ // Same as add() but does not return a boolean.
+ // This minimizes code changes when PointerSet<> replaces
+ // other standard set types in the code.
+ void insert(T* ptr) { add(ptr); }
+
+ // Range insertion.
+ template <typename InputIter>
+ void insert(InputIter first, InputIter last) {
+ for (; first != last; ++first)
+ add(*first);
+ }
+
+ // Insert of aitems of |other| into the current set. This
+ // is slightly more efficient than using range insertion
+ // with insert(other.begin(), other.end()).
+ void insert(const PointerSet& other) {
+ for (const_iterator iter = other.begin(); iter.valid(); ++iter)
+ add(*iter);
+ }
+
+ // Return a new set that is the intersection of the current one
+ // and |other|.
+ PointerSet intersection_with(const PointerSet& other) const {
+ PointerSet result;
+ for (const_iterator iter = other.begin(); iter.valid(); ++iter) {
+ if (contains(*iter))
+ result.add(*iter);
+ }
+ return result;
+ }
+
+ // Only provide const interators for pointer sets.
+ struct const_iterator : public NodeIterator {
+ T* const* operator->() const {
+ return &const_cast<T*>(static_cast<const T*>(node_->ptr_));
+ }
+ T* operator*() const {
+ return const_cast<T*>(static_cast<const T*>(node_->ptr_));
+ }
+
+ // The following allows:
+ // std::vector<T*> vector(set.begin(), set.end());
+ using iterator_category = std::forward_iterator_tag;
+ using difference_type = std::ptrdiff_t;
+ using value_type = T*;
+ using pointer = T**;
+ using reference = T*&;
+ };
+
+ const_iterator begin() const { return {NodeBegin()}; }
+ const_iterator end() const { return {NodeEnd()}; }
+
+ // Only used for unit-tests so performance is not critical.
+ bool operator==(const PointerSet& other) const {
+ if (size() != other.size())
+ return false;
+
+ for (const_iterator iter = begin(); iter.valid(); ++iter)
+ if (!other.contains(*iter))
+ return false;
+
+ for (const_iterator iter = other.begin(); iter.valid(); ++iter)
+ if (!contains(*iter))
+ return false;
+
+ return true;
+ }
+
+ private:
+ // Lookup node matching a given pointer. If result->valid() is true
+ // then the pointer was found, otherwise, this is the location of
+ // the best place to insert it.
+ NodeType* Lookup(T* ptr) const {
+ size_t hash = NodeType::MakeHash(ptr);
+ return NodeLookup(hash, [&](NodeType* node) { return node->ptr_ == ptr; });
+ }
+};
+
+#endif // SRC_GN_POINTER_SET_H_
diff --git a/src/gn/pointer_set_unittest.cc b/src/gn/pointer_set_unittest.cc
new file mode 100644
index 0000000..0bb4d6a
--- /dev/null
+++ b/src/gn/pointer_set_unittest.cc
@@ -0,0 +1,167 @@
+// Copyright 2021 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "gn/pointer_set.h"
+#include "util/test/test.h"
+
+struct Foo {
+ int x;
+};
+
+static const Foo kFoo1[1] = {{1}};
+static const Foo kFoo2[1] = {{2}};
+static const Foo kFoo3[1] = {{3}};
+
+static const std::initializer_list<const Foo*> kFullList = {kFoo1, kFoo2,
+ kFoo3};
+
+using TestPointerSet = PointerSet<const Foo>;
+
+TEST(PointerSet, DefaultConstruction) {
+ TestPointerSet set;
+ EXPECT_TRUE(set.empty());
+ EXPECT_EQ(0u, set.size());
+ EXPECT_FALSE(set.contains(kFoo1));
+}
+
+TEST(PointerSet, RangeConstruction) {
+ TestPointerSet set(kFullList.begin(), kFullList.end());
+ EXPECT_FALSE(set.empty());
+ EXPECT_EQ(3u, set.size());
+ EXPECT_TRUE(set.contains(kFoo1));
+ EXPECT_TRUE(set.contains(kFoo2));
+ EXPECT_TRUE(set.contains(kFoo3));
+}
+
+TEST(PointerSet, CopyConstruction) {
+ TestPointerSet set1(kFullList.begin(), kFullList.end());
+ TestPointerSet set2(set1);
+ set1.clear();
+ EXPECT_TRUE(set1.empty());
+ EXPECT_FALSE(set2.empty());
+ EXPECT_EQ(3u, set2.size());
+ EXPECT_TRUE(set2.contains(kFoo1));
+ EXPECT_TRUE(set2.contains(kFoo2));
+ EXPECT_TRUE(set2.contains(kFoo3));
+}
+
+TEST(PointerSet, MoveConstruction) {
+ TestPointerSet set1(kFullList.begin(), kFullList.end());
+ TestPointerSet set2(std::move(set1));
+ EXPECT_TRUE(set1.empty());
+ EXPECT_FALSE(set2.empty());
+ EXPECT_EQ(3u, set2.size());
+ EXPECT_TRUE(set2.contains(kFoo1));
+ EXPECT_TRUE(set2.contains(kFoo2));
+ EXPECT_TRUE(set2.contains(kFoo3));
+}
+
+TEST(PointerSet, Add) {
+ TestPointerSet set;
+ EXPECT_TRUE(set.add(kFoo1));
+ EXPECT_EQ(1u, set.size());
+ EXPECT_TRUE(set.contains(kFoo1));
+
+ EXPECT_FALSE(set.add(kFoo1));
+ EXPECT_EQ(1u, set.size());
+ EXPECT_TRUE(set.contains(kFoo1));
+
+ EXPECT_TRUE(set.add(kFoo2));
+ EXPECT_EQ(2u, set.size());
+ EXPECT_TRUE(set.contains(kFoo1));
+ EXPECT_TRUE(set.contains(kFoo2));
+
+ EXPECT_FALSE(set.add(kFoo1));
+ EXPECT_FALSE(set.add(kFoo2));
+
+ EXPECT_TRUE(set.add(kFoo3));
+ EXPECT_EQ(3u, set.size());
+ EXPECT_TRUE(set.contains(kFoo1));
+ EXPECT_TRUE(set.contains(kFoo2));
+ EXPECT_TRUE(set.contains(kFoo3));
+
+ EXPECT_FALSE(set.add(kFoo1));
+ EXPECT_FALSE(set.add(kFoo2));
+ EXPECT_FALSE(set.add(kFoo3));
+}
+
+TEST(PointerSet, Erase) {
+ TestPointerSet set(kFullList.begin(), kFullList.end());
+ EXPECT_EQ(3u, set.size());
+
+ EXPECT_TRUE(set.erase(kFoo1));
+ EXPECT_EQ(2u, set.size());
+ EXPECT_FALSE(set.contains(kFoo1));
+ EXPECT_FALSE(set.erase(kFoo1));
+ EXPECT_EQ(2u, set.size());
+
+ EXPECT_TRUE(set.erase(kFoo2));
+ EXPECT_EQ(1u, set.size());
+ EXPECT_FALSE(set.contains(kFoo2));
+ EXPECT_FALSE(set.erase(kFoo2));
+ EXPECT_EQ(1u, set.size());
+
+ EXPECT_TRUE(set.erase(kFoo3));
+ EXPECT_EQ(0u, set.size());
+ EXPECT_FALSE(set.contains(kFoo3));
+ EXPECT_FALSE(set.erase(kFoo3));
+ EXPECT_EQ(0u, set.size());
+}
+
+TEST(PointerSet, RangeInsert) {
+ TestPointerSet set;
+ set.insert(kFullList.begin(), kFullList.end());
+ EXPECT_EQ(3u, set.size());
+ EXPECT_TRUE(set.contains(kFoo1));
+ EXPECT_TRUE(set.contains(kFoo2));
+ EXPECT_TRUE(set.contains(kFoo3));
+
+ set.insert(kFullList.begin(), kFullList.end());
+ EXPECT_EQ(3u, set.size());
+}
+
+TEST(PointerSet, InsertOther) {
+ TestPointerSet set1(kFullList.begin(), kFullList.end());
+ TestPointerSet set2;
+ set2.add(kFoo1);
+ set1.insert(set2);
+ EXPECT_EQ(3u, set1.size());
+ EXPECT_EQ(1u, set2.size());
+
+ set1.clear();
+ set1.add(kFoo1);
+ set2.clear();
+ set2.add(kFoo3);
+ set1.insert(set2);
+ EXPECT_EQ(2u, set1.size());
+ EXPECT_EQ(1u, set2.size());
+ EXPECT_TRUE(set1.contains(kFoo1));
+ EXPECT_TRUE(set1.contains(kFoo3));
+}
+
+TEST(PointerSet, IntersectionWith) {
+ TestPointerSet set1;
+ TestPointerSet set2;
+
+ set1.add(kFoo1);
+ set2.add(kFoo3);
+
+ TestPointerSet set = set1.intersection_with(set2);
+ EXPECT_TRUE(set.empty());
+
+ set1.add(kFoo2);
+ set2.add(kFoo2);
+
+ set = set1.intersection_with(set2);
+ EXPECT_FALSE(set.empty());
+ EXPECT_EQ(1u, set.size());
+ EXPECT_TRUE(set.contains(kFoo2));
+
+ set1.insert(kFullList.begin(), kFullList.end());
+ set2 = set1;
+ set = set1.intersection_with(set2);
+ EXPECT_EQ(3u, set.size());
+ EXPECT_EQ(set1, set);
+ EXPECT_EQ(set2, set);
+}