Add PointerSet<T> template. This CL adds a new template class that implements fast non-owning pointer sets. This will be used by future CLs to implement faster TargetSet implementations. + Update BaseHashTable documentation a little to clarify things and remove "iff" instances. Change-Id: I934a93a3ca7abc638a7860bf6d80d8865a0c9a2f Reviewed-on: https://gn-review.googlesource.com/c/gn/+/12582 Reviewed-by: Brett Wilson <brettw@chromium.org> Commit-Queue: David Turner <digit@google.com>
diff --git a/build/gen.py b/build/gen.py index 265b1f5..cc6e835 100755 --- a/build/gen.py +++ b/build/gen.py
@@ -740,6 +740,7 @@ 'src/gn/parser_unittest.cc', 'src/gn/path_output_unittest.cc', 'src/gn/pattern_unittest.cc', + 'src/gn/pointer_set_unittest.cc', 'src/gn/runtime_deps_unittest.cc', 'src/gn/scope_per_file_provider_unittest.cc', 'src/gn/scope_unittest.cc',
diff --git a/src/gn/hash_table_base.h b/src/gn/hash_table_base.h index c26b514..a5d9e12 100644 --- a/src/gn/hash_table_base.h +++ b/src/gn/hash_table_base.h
@@ -46,14 +46,14 @@ // - It *must* be a trivial type, that is zero-initialized. // // - It provides an is_null() const method, which should return true -// iff the corresponding node matches a 'free' entry in the hash +// if the corresponding node matches a 'free' entry in the hash // table, i.e. one that has not been assigned to an item, or // to a deletion tombstone (see below). // // Of course, a default (zeroed) value, should always return true. // // - It provides an is_tombstone() const method, which should return -// return true iff the corresponding node indicates a previously +// return true if the corresponding node indicates a previously // deleted item. // // Note that if your hash table does not need deletion support, @@ -127,7 +127,7 @@ // } // } // -// // Returns true iff this set contains |key|. +// // Returns true if this set contains |key|. // bool contains(const Foo& key) const { // Node* node = BaseType::NodeLookup( // MakeHash(key), @@ -171,6 +171,7 @@ // delete node->foo; // node->foo = Node::kTombstone; // UpdateAfterDeletion(). +// return true; // } // // static size_t MakeHash(const Foo& foo) { @@ -241,7 +242,7 @@ return *this; } - // Return true iff the table is empty. + // Return true if the table is empty. bool empty() const { return count_ == 0; } // Return the number of keys in the set. @@ -259,8 +260,45 @@ // own iterator/const_iterator/begin()/end() types and methods, possibly as // simple wrappers around the NodeIterator/NodeBegin/NodeEnd below. // + // Defining a custom iterator is as easy as deriving from NodeIterator + // and overloading operator*() and operator->(), as in: + // + // struct FooNode { + // size_t hash; + // Foo* foo_ptr; + // ... + // }; + // + // class FooTable : public HashTableBase<FooNode> { + // public: + // ... + // + // // Iterators point to Foo instances, not table nodes. + // struct ConstIterator : NodeIterator { + // const Foo* operator->() { return node_->foo_ptr; } + // const Foo& operator*)) { return *(node_->foo_ptr); } + // }; + // + // ConstIterator begin() const { return { NodeBegin() }; } + // ConstIterator end() const { return { NodeEnd() }; } + // + // The NodeIterator type has a valid() method that can be used to perform + // faster iteration though at the cost of using a slightly more annoying + // syntax: + // + // for (auto it = my_table.begin(); it.valid(); ++it) { + // const Foo& foo = *it; + // ... + // } + // + // Instead of: + // + // for (const Foo& foo : my_table) { + // ... + // } + // // The ValidNodesRange() method also returns a object that has begin() and - // end() methods to be used in for-range loops as in: + // end() methods to be used in for-range loops over Node values as in: // // for (Node& node : my_table.ValidNodesRange()) { ... } // @@ -295,6 +333,9 @@ return result; } + // Returns true if the iterator points to a valid node. + bool valid() const { return node_ != node_limit_; } + Node* node_ = nullptr; Node* node_limit_ = nullptr; }; @@ -323,11 +364,9 @@ // ... // // // Iterators point to Foo instances, not table nodes. - // struct ConstIterator { - // const Foo* operator->() { return iter_->foo_ptr; } - // const Foo& operator->() { return *(iter_->foo_ptr); } - // ... - // NodeIterator iter_; + // struct ConstIterator : NodeIterator { + // const Foo* operator->() { return node_->foo_ptr; } + // const Foo& operator*)) { return *(node_->foo_ptr); } // }; // // and compare two ways to implement its begin() method: @@ -400,7 +439,7 @@ // Lookup for a node in the hash table that matches the |node_equal| // predicate, which takes a const Node* pointer argument, and returns - // true iff this corresponds to a lookup match. + // true if this corresponds to a lookup match. // // IMPORTANT: |node_equal| may or may not check the node hash value, // the choice is left to the implementation.
diff --git a/src/gn/pointer_set.h b/src/gn/pointer_set.h new file mode 100644 index 0000000..b690ace --- /dev/null +++ b/src/gn/pointer_set.h
@@ -0,0 +1,200 @@ +// Copyright 2021 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SRC_GN_POINTER_SET_H_ +#define SRC_GN_POINTER_SET_H_ + +#include <functional> +#include "gn/hash_table_base.h" + +// PointerSet<T> is a fast implemention of a set of non-owning and non-null +// typed pointer values (of type T*). +// +// Note that this intentionally does not support a find() method +// for performance reasons, instead callers must use contains(), add() +// or erase() directly to perform lookups or conditional insertion/removal. +// +// Only constant iterators are provided. Moreover, the slightly faster +// iteration form can be used for performance as in: +// +// for (auto it = my_set.begin(); it.valid(); ++it) { +// T* ptr = *it; +// ... +// } +// +// Instead of: +// +// for (T* ptr : my_set) { +// ... +// } +// +// The PointerSetNode type implements the methods required by HashTableBase<> +// to store and hash a pointer directly in the buckets array. The special +// address 1 is used as the tombstone value to support removal. +// +// Null nodes are marked with an empty pointer, which means that nullptr +// itself cannot be stored in the set. +struct PointerSetNode { + const void* ptr_; + bool is_null() const { return !ptr_; } + bool is_tombstone() const { return ptr_ == MakeTombstone(); } + bool is_valid() const { return !is_null() && !is_tombstone(); } + size_t hash_value() const { return MakeHash(ptr_); } + + // Return the tomebstone value. This could be a static constexpr value + // if C++ didn't complain about reinterpret_cast<> here. + static const void* MakeTombstone() { + return reinterpret_cast<const void*>(1u); + } + + // Return the hash corresponding to a given pointer. + static size_t MakeHash(const void* ptr) { + return std::hash<const void*>()(ptr); + } +}; + +template <typename T> +class PointerSet : public HashTableBase<PointerSetNode> { + public: + using NodeType = PointerSetNode; + using BaseType = HashTableBase<NodeType>; + + PointerSet() = default; + + // Allow copying pointer sets. + PointerSet(const PointerSet& other) { insert(other); } + PointerSet& operator=(const PointerSet& other) { + if (this != &other) { + this->~PointerSet(); + new (this) PointerSet(other); + } + return *this; + } + + // Redefine move operators since the copy constructor above + // masks the base ones by default. + PointerSet(PointerSet&& other) noexcept : BaseType(std::move(other)) {} + PointerSet& operator=(PointerSet&& other) noexcept { + if (this != &other) { + this->~PointerSet(); + new (this) PointerSet(std::move(other)); + } + return *this; + } + + // Range constructor. + template <typename InputIter> + PointerSet(InputIter first, InputIter last) { + for (; first != last; ++first) + add(*first); + } + + // Clear the set. + void clear() { NodeClear(); } + + // Add one pointer to the set. Return true if the pointer was + // added, or false if its was already in the set. + bool add(T* ptr) { + NodeType* node = Lookup(ptr); + if (node->is_valid()) + return false; + + node->ptr_ = ptr; + UpdateAfterInsert(); + return true; + } + + // Return true if a given pointer is already in the set. + bool contains(T* ptr) const { return Lookup(ptr)->is_valid(); } + + // Try to remove a pointer from the set. Return true in case of + // removal, or false if the pointer was not in the set. + bool erase(T* ptr) { + NodeType* node = Lookup(ptr); + if (!node->is_valid()) + return false; + node->ptr_ = node->MakeTombstone(); + UpdateAfterRemoval(); + return true; + } + + // Same as add() but does not return a boolean. + // This minimizes code changes when PointerSet<> replaces + // other standard set types in the code. + void insert(T* ptr) { add(ptr); } + + // Range insertion. + template <typename InputIter> + void insert(InputIter first, InputIter last) { + for (; first != last; ++first) + add(*first); + } + + // Insert of aitems of |other| into the current set. This + // is slightly more efficient than using range insertion + // with insert(other.begin(), other.end()). + void insert(const PointerSet& other) { + for (const_iterator iter = other.begin(); iter.valid(); ++iter) + add(*iter); + } + + // Return a new set that is the intersection of the current one + // and |other|. + PointerSet intersection_with(const PointerSet& other) const { + PointerSet result; + for (const_iterator iter = other.begin(); iter.valid(); ++iter) { + if (contains(*iter)) + result.add(*iter); + } + return result; + } + + // Only provide const interators for pointer sets. + struct const_iterator : public NodeIterator { + T* const* operator->() const { + return &const_cast<T*>(static_cast<const T*>(node_->ptr_)); + } + T* operator*() const { + return const_cast<T*>(static_cast<const T*>(node_->ptr_)); + } + + // The following allows: + // std::vector<T*> vector(set.begin(), set.end()); + using iterator_category = std::forward_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = T*; + using pointer = T**; + using reference = T*&; + }; + + const_iterator begin() const { return {NodeBegin()}; } + const_iterator end() const { return {NodeEnd()}; } + + // Only used for unit-tests so performance is not critical. + bool operator==(const PointerSet& other) const { + if (size() != other.size()) + return false; + + for (const_iterator iter = begin(); iter.valid(); ++iter) + if (!other.contains(*iter)) + return false; + + for (const_iterator iter = other.begin(); iter.valid(); ++iter) + if (!contains(*iter)) + return false; + + return true; + } + + private: + // Lookup node matching a given pointer. If result->valid() is true + // then the pointer was found, otherwise, this is the location of + // the best place to insert it. + NodeType* Lookup(T* ptr) const { + size_t hash = NodeType::MakeHash(ptr); + return NodeLookup(hash, [&](NodeType* node) { return node->ptr_ == ptr; }); + } +}; + +#endif // SRC_GN_POINTER_SET_H_
diff --git a/src/gn/pointer_set_unittest.cc b/src/gn/pointer_set_unittest.cc new file mode 100644 index 0000000..0bb4d6a --- /dev/null +++ b/src/gn/pointer_set_unittest.cc
@@ -0,0 +1,167 @@ +// Copyright 2021 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "gn/pointer_set.h" +#include "util/test/test.h" + +struct Foo { + int x; +}; + +static const Foo kFoo1[1] = {{1}}; +static const Foo kFoo2[1] = {{2}}; +static const Foo kFoo3[1] = {{3}}; + +static const std::initializer_list<const Foo*> kFullList = {kFoo1, kFoo2, + kFoo3}; + +using TestPointerSet = PointerSet<const Foo>; + +TEST(PointerSet, DefaultConstruction) { + TestPointerSet set; + EXPECT_TRUE(set.empty()); + EXPECT_EQ(0u, set.size()); + EXPECT_FALSE(set.contains(kFoo1)); +} + +TEST(PointerSet, RangeConstruction) { + TestPointerSet set(kFullList.begin(), kFullList.end()); + EXPECT_FALSE(set.empty()); + EXPECT_EQ(3u, set.size()); + EXPECT_TRUE(set.contains(kFoo1)); + EXPECT_TRUE(set.contains(kFoo2)); + EXPECT_TRUE(set.contains(kFoo3)); +} + +TEST(PointerSet, CopyConstruction) { + TestPointerSet set1(kFullList.begin(), kFullList.end()); + TestPointerSet set2(set1); + set1.clear(); + EXPECT_TRUE(set1.empty()); + EXPECT_FALSE(set2.empty()); + EXPECT_EQ(3u, set2.size()); + EXPECT_TRUE(set2.contains(kFoo1)); + EXPECT_TRUE(set2.contains(kFoo2)); + EXPECT_TRUE(set2.contains(kFoo3)); +} + +TEST(PointerSet, MoveConstruction) { + TestPointerSet set1(kFullList.begin(), kFullList.end()); + TestPointerSet set2(std::move(set1)); + EXPECT_TRUE(set1.empty()); + EXPECT_FALSE(set2.empty()); + EXPECT_EQ(3u, set2.size()); + EXPECT_TRUE(set2.contains(kFoo1)); + EXPECT_TRUE(set2.contains(kFoo2)); + EXPECT_TRUE(set2.contains(kFoo3)); +} + +TEST(PointerSet, Add) { + TestPointerSet set; + EXPECT_TRUE(set.add(kFoo1)); + EXPECT_EQ(1u, set.size()); + EXPECT_TRUE(set.contains(kFoo1)); + + EXPECT_FALSE(set.add(kFoo1)); + EXPECT_EQ(1u, set.size()); + EXPECT_TRUE(set.contains(kFoo1)); + + EXPECT_TRUE(set.add(kFoo2)); + EXPECT_EQ(2u, set.size()); + EXPECT_TRUE(set.contains(kFoo1)); + EXPECT_TRUE(set.contains(kFoo2)); + + EXPECT_FALSE(set.add(kFoo1)); + EXPECT_FALSE(set.add(kFoo2)); + + EXPECT_TRUE(set.add(kFoo3)); + EXPECT_EQ(3u, set.size()); + EXPECT_TRUE(set.contains(kFoo1)); + EXPECT_TRUE(set.contains(kFoo2)); + EXPECT_TRUE(set.contains(kFoo3)); + + EXPECT_FALSE(set.add(kFoo1)); + EXPECT_FALSE(set.add(kFoo2)); + EXPECT_FALSE(set.add(kFoo3)); +} + +TEST(PointerSet, Erase) { + TestPointerSet set(kFullList.begin(), kFullList.end()); + EXPECT_EQ(3u, set.size()); + + EXPECT_TRUE(set.erase(kFoo1)); + EXPECT_EQ(2u, set.size()); + EXPECT_FALSE(set.contains(kFoo1)); + EXPECT_FALSE(set.erase(kFoo1)); + EXPECT_EQ(2u, set.size()); + + EXPECT_TRUE(set.erase(kFoo2)); + EXPECT_EQ(1u, set.size()); + EXPECT_FALSE(set.contains(kFoo2)); + EXPECT_FALSE(set.erase(kFoo2)); + EXPECT_EQ(1u, set.size()); + + EXPECT_TRUE(set.erase(kFoo3)); + EXPECT_EQ(0u, set.size()); + EXPECT_FALSE(set.contains(kFoo3)); + EXPECT_FALSE(set.erase(kFoo3)); + EXPECT_EQ(0u, set.size()); +} + +TEST(PointerSet, RangeInsert) { + TestPointerSet set; + set.insert(kFullList.begin(), kFullList.end()); + EXPECT_EQ(3u, set.size()); + EXPECT_TRUE(set.contains(kFoo1)); + EXPECT_TRUE(set.contains(kFoo2)); + EXPECT_TRUE(set.contains(kFoo3)); + + set.insert(kFullList.begin(), kFullList.end()); + EXPECT_EQ(3u, set.size()); +} + +TEST(PointerSet, InsertOther) { + TestPointerSet set1(kFullList.begin(), kFullList.end()); + TestPointerSet set2; + set2.add(kFoo1); + set1.insert(set2); + EXPECT_EQ(3u, set1.size()); + EXPECT_EQ(1u, set2.size()); + + set1.clear(); + set1.add(kFoo1); + set2.clear(); + set2.add(kFoo3); + set1.insert(set2); + EXPECT_EQ(2u, set1.size()); + EXPECT_EQ(1u, set2.size()); + EXPECT_TRUE(set1.contains(kFoo1)); + EXPECT_TRUE(set1.contains(kFoo3)); +} + +TEST(PointerSet, IntersectionWith) { + TestPointerSet set1; + TestPointerSet set2; + + set1.add(kFoo1); + set2.add(kFoo3); + + TestPointerSet set = set1.intersection_with(set2); + EXPECT_TRUE(set.empty()); + + set1.add(kFoo2); + set2.add(kFoo2); + + set = set1.intersection_with(set2); + EXPECT_FALSE(set.empty()); + EXPECT_EQ(1u, set.size()); + EXPECT_TRUE(set.contains(kFoo2)); + + set1.insert(kFullList.begin(), kFullList.end()); + set2 = set1; + set = set1.intersection_with(set2); + EXPECT_EQ(3u, set.size()); + EXPECT_EQ(set1, set); + EXPECT_EQ(set2, set); +}