| // Copyright (c) 2020 The Chromium Authors. All rights reserved. | 
 | // Use of this source code is governed by a BSD-style license that can be | 
 | // found in the LICENSE file. | 
 |  | 
 | #ifndef TOOLS_GN_HASH_TABLE_BASE_H_ | 
 | #define TOOLS_GN_HASH_TABLE_BASE_H_ | 
 |  | 
 | #include "base/compiler_specific.h" | 
 |  | 
 | #include <stdlib.h> | 
 | #include <type_traits> | 
 | #include <utility> | 
 |  | 
 | // IMPORTANT DISCLAIMER: | 
 | // | 
 | // THIS IS *NOT* A GENERAL PURPOSE HASH TABLE TEMPLATE. INSTEAD, IT CAN | 
 | // CAN BE USED AS THE BASIS FOR VERY HIGH SPEED AND COMPACT HASH TABLES | 
 | // THAT OBEY VERY STRICT CONDITIONS DESCRIBED BELOW. | 
 | // | 
 | // DO NOT USE THIS UNLESS YOU HAVE A GOOD REASON, I.E. THAT PROFILING | 
 | // SHOWS THERE *IS* AN OVERALL BENEFIT TO DO SO. FOR MOST CASES, | 
 | // std::set<>, std::unordered_set<> and base::flat_set<> ARE PERFECTLY | 
 | // FINE AND SHOULD BE PREFERRED. | 
 | // | 
 | // | 
 | // That being said, this implementation uses a completely typical | 
 | // open-addressing scheme with a buckets array size which is always | 
 | // a power of 2, instead of a prime number. Experience shows this is | 
 | // not detrimental to performance as long as you have a sufficiently | 
 | // good hash function (which is the case of all C++ standard libraries | 
 | // these days for strings and pointers). | 
 | // | 
 | // The reason it is so fast is due to its compactness and the very | 
 | // simple but tight code for a typical lookup / insert / deletion | 
 | // operation. | 
 | // | 
 | // The |buckets_| field holds a pointer to an array of NODE_TYPE | 
 | // instances, called nodes. Each node represents one of the following: | 
 | // a free entry in the table, an inserted item, or a tombstone marking | 
 | // the location of a previously deleted item. Tombstones are only | 
 | // used if the hash table instantiation requires deletion support | 
 | // (see the is_tombstone() description below). | 
 | // | 
 | // The template requires that Node be a type with the following traits: | 
 | // | 
 | //  - It *must* be a trivial type, that is zero-initialized. | 
 | // | 
 | //  - It provides an is_null() const method, which should return true | 
 | //    iff the corresponding node matches a 'free' entry in the hash | 
 | //    table, i.e. one that has not been assigned to an item, or | 
 | //    to a deletion tombstone (see below). | 
 | // | 
 | //    Of course, a default (zeroed) value, should always return true. | 
 | // | 
 | //  - It provides an is_tombstone() const method, which should return | 
 | //    return true iff the corresponding node indicates a previously | 
 | //    deleted item. | 
 | // | 
 | //    Note that if your hash table does not need deletion support, | 
 | //    it is highly recommended to make this a static constexpr method | 
 | //    that always return false. Doing so will optimize the lookup loop | 
 | //    automatically! | 
 | // | 
 | //  - It must provide an is_valid() method, that simply returns | 
 | //    (!is_null() && !is_tombstone()). This is a convenience for this | 
 | //    template, but also for the derived class that will extend it | 
 | //    (more on this below, in the usage description). | 
 | // | 
 | // Note that because Node instances are trivial, std::unique_ptr<> | 
 | // cannot be used in them. Item lifecycle must this be managed | 
 | // explicitly by a derived class of the template instantiation | 
 | // instead. | 
 | // | 
 | // Lookup, insertion and deletion are performed in ways that | 
 | // are *very* different from standard containers, and the reason | 
 | // is, unsuprisingly, performance. | 
 | // | 
 | // Use NodeLookup() to look for an existing item in the hash table. | 
 | // This takes the item's hash value, and a templated callable to | 
 | // compare a node against the search key. This scheme allows | 
 | // heterogeneous lookups, and keeps the node type details | 
 | // out of this header. Any recent C++ optimizer will generate | 
 | // very tight machine code for this call. | 
 | // | 
 | // The NodeLookup() function always returns a non-null and | 
 | // mutable |node| pointer. If |node->is_valid()| is true, | 
 | // then the item was found, and |node| points to it. | 
 | // | 
 | // Otherwise, |node| corresponds to a location that may be | 
 | // used for insertion. To do so, the caller should update the | 
 | // content of |node| appropriately (e.g. writing a pointer and/or | 
 | // hash value to it), then call UpdateAfterInsertion(), which | 
 | // may eventually grow the table and rehash nodes in it. | 
 | // | 
 | // To delete an item, call NodeLookup() first to verify that | 
 | // the item is present, then write a tombstone value to |node|, | 
 | // then call UpdateAfterDeletion(). | 
 | // | 
 | // Note that what the tombstone value is doesn't matter to this | 
 | // header, as long as |node->is_tombstone()| returns true for | 
 | // it. | 
 | // | 
 | // Here's an example of a concrete implementation that stores | 
 | // a hash value and an owning pointer in each node: | 
 | // | 
 | //     struct MyFooNode { | 
 | //       size_t hash; | 
 | //       Foo*   foo; | 
 | // | 
 | //       bool is_null() const { return !foo; } | 
 | //       bool is_tombstone() const { return foo == &kTombstone; } | 
 | //       bool is_valid() const { return !is_null() && !is_tombstone(); } | 
 | // | 
 | //       static const Foo kTombstone; | 
 | //     }; | 
 | // | 
 | //    class MyFooSet : public HashTableBase<MyFoodNode> { | 
 | //    public: | 
 | //      using BaseType = HashTableBase<MyFooNode>; | 
 | //      using Node = BaseType::Node; | 
 | // | 
 | //      ~MyFooSet() { | 
 | //        // Destroy all items in the set. | 
 | //        // Note that the iterator only parses over valid items. | 
 | //        for (Node* node : *this) { | 
 | //          delete node->foo; | 
 | //        } | 
 | //      } | 
 | // | 
 | //      // Returns true iff this set contains |key|. | 
 | //      bool contains(const Foo& key) const { | 
 | //        Node* node = BaseType::NodeLookup( | 
 | //            MakeHash(key), | 
 | //            [&](const Node* node) { return node->foo == key; }); | 
 | // | 
 | //        return node->is_valid(); | 
 | //      } | 
 | // | 
 | //      // Try to add |key| to the set. Return true if the insertion | 
 | //      // was succesful, or false if the item was already in the set. | 
 | //      bool add(const Foo& key) { | 
 | //        size_t hash = MakeHash(key); | 
 | //        Node* node = NodeLookup( | 
 | //            hash, | 
 | //            [&](const Node* node) { return node->foo == key; }); | 
 | // | 
 | //        if (node->is_valid()) { | 
 | //          // Already in the set. | 
 | //          return false; | 
 | //        } | 
 | // | 
 | //        // Add it. | 
 | //        node->hash = hash; | 
 | //        node->foo  = new Foo(key); | 
 | //        UpdateAfterInsert(); | 
 | //        return true; | 
 | //      } | 
 | // | 
 | //      // Try to remove |key| from the set. Return true if the | 
 | //      // item was already in it, false otherwise. | 
 | //      bool erase(const Foo& key) { | 
 | //        Node* node = BaseType::NodeLookup( | 
 | //            MakeHash(key), | 
 | //            [&](const Node* node) { return node->foo == key; }); | 
 | // | 
 | //        if (!node->is_valid()) { | 
 | //          // Not in the set. | 
 | //          return false; | 
 | //        } | 
 | // | 
 | //        delete node->foo; | 
 | //        node->foo = Node::kTombstone; | 
 | //        UpdateAfterDeletion(). | 
 | //      } | 
 | // | 
 | //      static size_t MakeHash(const Foo& foo) { | 
 | //        return std::hash<Foo>()(); | 
 | //      } | 
 | //    }; | 
 | // | 
 | // For more concrete examples, see the implementation of | 
 | // StringAtom or UniqueVector<> | 
 | // | 
 | template <typename NODE_TYPE> | 
 | class HashTableBase { | 
 |  public: | 
 |   using Node = NODE_TYPE; | 
 |  | 
 |   static_assert(std::is_trivial<NODE_TYPE>::value, | 
 |                 "KEY_TYPE should be a trivial type!"); | 
 |  | 
 |   static_assert(std::is_standard_layout<NODE_TYPE>::value, | 
 |                 "KEY_TYPE should be a standard layout type!"); | 
 |  | 
 |   // Default constructor. | 
 |   HashTableBase() = default; | 
 |  | 
 |   // Destructor. This only deals with the |buckets_| array itself. | 
 |   ~HashTableBase() { | 
 |     if (buckets_ != buckets0_) | 
 |       free(buckets_); | 
 |   } | 
 |  | 
 |   // Copy and move operations. These only operate on the |buckets_| array | 
 |   // so any owned pointer inside nodes should be handled by custom | 
 |   // constructors and operators in the derived class, if needed. | 
 |   HashTableBase(const HashTableBase& other) | 
 |       : count_(other.count_), size_(other.size_) { | 
 |     if (other.buckets_ != other.buckets0_) { | 
 |       // NOTE: using malloc() here to clarify that no object construction | 
 |       // should occur here. | 
 |       buckets_ = reinterpret_cast<Node*>(malloc(other.size_ * sizeof(Node))); | 
 |     } | 
 |     memcpy(buckets_, other.buckets_, other.size_ * sizeof(Node)); | 
 |   } | 
 |  | 
 |   HashTableBase& operator=(const HashTableBase& other) { | 
 |     if (this != &other) { | 
 |       this->~HashTableBase(); | 
 |       new (this) HashTableBase(other); | 
 |     } | 
 |     return *this; | 
 |   } | 
 |  | 
 |   HashTableBase(HashTableBase&& other) noexcept | 
 |       : count_(other.count_), size_(other.size_), buckets_(other.buckets_) { | 
 |     if (buckets_ == other.buckets0_) { | 
 |       buckets0_[0] = other.buckets0_[0]; | 
 |       buckets_ = buckets0_; | 
 |     } else { | 
 |       other.buckets_ = other.buckets0_; | 
 |     } | 
 |     other.NodeClear(); | 
 |   } | 
 |  | 
 |   HashTableBase& operator=(HashTableBase&& other) noexcept { | 
 |     if (this != &other) { | 
 |       this->~HashTableBase(); | 
 |       new (this) HashTableBase(std::move(other)); | 
 |     } | 
 |     return *this; | 
 |   } | 
 |  | 
 |   // Return true iff the table is empty. | 
 |   bool empty() const { return count_ == 0; } | 
 |  | 
 |   // Return the number of keys in the set. | 
 |   size_t size() const { return count_; } | 
 |  | 
 |  protected: | 
 |   // The following should only be called by derived classes that | 
 |   // extend this template class, and are not available to their | 
 |   // clients. This forces the derived class to implement lookup | 
 |   // insertion and deletion with sane APIs instead. | 
 |  | 
 |   // Iteration support note: | 
 |   // | 
 |   // Derived classes, if they wish to support iteration, should provide their | 
 |   // own iterator/const_iterator/begin()/end() types and methods, possibly as | 
 |   // simple wrappers around the NodeIterator/NodeBegin/NodeEnd below. | 
 |   // | 
 |   // The ValidNodesRange() method also returns a object that has begin() and | 
 |   // end() methods to be used in for-range loops as in: | 
 |   // | 
 |   //    for (Node& node : my_table.ValidNodesRange()) { ... } | 
 |   // | 
 |   struct NodeIterator { | 
 |     Node& operator*() { return *node_; } | 
 |     const Node& operator*() const { return *node_; } | 
 |  | 
 |     Node* operator->() { return node_; } | 
 |     const Node* operator->() const { return node_; } | 
 |  | 
 |     bool operator==(const NodeIterator& other) const { | 
 |       return node_ == other.node_; | 
 |     } | 
 |  | 
 |     bool operator!=(const NodeIterator& other) const { | 
 |       return node_ != other.node_; | 
 |     } | 
 |  | 
 |     // pre-increment | 
 |     NodeIterator& operator++() { | 
 |       node_++; | 
 |       while (node_ < node_limit_ && !node_->is_valid()) | 
 |         node_++; | 
 |  | 
 |       return *this; | 
 |     } | 
 |  | 
 |     // post-increment | 
 |     NodeIterator operator++(int) { | 
 |       NodeIterator result = *this; | 
 |       ++(*this); | 
 |       return result; | 
 |     } | 
 |  | 
 |     Node* node_ = nullptr; | 
 |     Node* node_limit_ = nullptr; | 
 |   }; | 
 |  | 
 |   // NOTE: This is intentionally not public to avoid exposing | 
 |   // them in derived classes by mistake. If a derived class | 
 |   // wants to support iteration, it provide its own begin() and end() | 
 |   // methods, possibly using NodeBegin() and NodeEnd() below to | 
 |   // implement them. | 
 |   NodeIterator begin() { return NodeBegin(); } | 
 |   NodeIterator end() { return NodeEnd(); } | 
 |  | 
 |   // Providing methods named NodeBegin() and NodeEnd(), instead of | 
 |   // just using begin() and end() is a convenience to derived classes | 
 |   // that need to provide their own begin() and end() method, e.g. | 
 |   // consider this class: | 
 |   // | 
 |   //  struct FooNode { | 
 |   //     size_t hash; | 
 |   //     Foo*   foo_ptr; | 
 |   //     ... | 
 |   //  }; | 
 |   // | 
 |   //  class FooTable : public HashTableBase<FooNode> { | 
 |   //  public: | 
 |   //     ... | 
 |   // | 
 |   //     // Iterators point to Foo instances, not table nodes. | 
 |   //     struct ConstIterator { | 
 |   //       const Foo* operator->() { return iter_->foo_ptr; } | 
 |   //       const Foo& operator->() { return *(iter_->foo_ptr); } | 
 |   //       ... | 
 |   //       NodeIterator iter_; | 
 |   //     }; | 
 |   // | 
 |   // and compare two ways to implement its begin() method: | 
 |   // | 
 |   //    Foo::ConstIterator Foo::begin() const { | 
 |   //      return { | 
 |   //        reinterpret_cast<const HashTableBase<FooNode> *>(this)->begin() | 
 |   //      }; | 
 |   //    }; | 
 |   // | 
 |   // with: | 
 |   // | 
 |   //    Foo::ConstIterator Foo::begin() const { | 
 |   //      return { NodeBegin(); } | 
 |   //    } | 
 |   // | 
 |   NodeIterator NodeBegin() const { | 
 |     Node* node = buckets_; | 
 |     Node* limit = node + size_; | 
 |     while (node < limit && !node->is_valid()) | 
 |       node++; | 
 |  | 
 |     return {node, limit}; | 
 |   } | 
 |  | 
 |   NodeIterator NodeEnd() const { | 
 |     Node* limit = buckets_ + size_; | 
 |     return {limit, limit}; | 
 |   } | 
 |  | 
 |   // ValidNodeRange() allows a derived-class to use range-based  loops | 
 |   // over valid nodes, even if they have defined their own begin() and | 
 |   // end() methods, e.g. following the same class design as in the | 
 |   // above comment: | 
 |   // | 
 |   //   FooTable::~FooTable() { | 
 |   //     for (const FooNode& node : ValidNodesRange()) { | 
 |   //       delete node->foo_ptr; | 
 |   //     } | 
 |   //   } | 
 |   // | 
 |   // which is a little bit clearer than the (valid) alternative: | 
 |   // | 
 |   //   FooTable::~FooTable() { | 
 |   //     for (Foo& foo : *this) { | 
 |   //       delete &foo;  // WHAT!? | 
 |   //     } | 
 |   //   } | 
 |   // | 
 |   struct NodeIteratorPair { | 
 |     NodeIterator begin() { return begin_; } | 
 |     NodeIterator end() { return end_; } | 
 |  | 
 |     NodeIterator begin_; | 
 |     NodeIterator end_; | 
 |   }; | 
 |  | 
 |   NodeIteratorPair ValidNodesRange() const { return {NodeBegin(), NodeEnd()}; } | 
 |  | 
 |   // Clear the nodes table completely. | 
 |   void NodeClear() { | 
 |     if (buckets_ != buckets0_) | 
 |       free(buckets_); | 
 |  | 
 |     count_ = 0; | 
 |     size_ = 1; | 
 |     buckets_ = buckets0_; | 
 |     buckets0_[0] = Node{}; | 
 |   } | 
 |  | 
 |   // Lookup for a node in the hash table that matches the |node_equal| | 
 |   // predicate, which takes a const Node* pointer argument, and returns | 
 |   // true iff this corresponds to a lookup match. | 
 |   // | 
 |   // IMPORTANT: |node_equal| may or may not check the node hash value, | 
 |   // the choice is left to the implementation. | 
 |   // | 
 |   // Returns a non-null *mutable* node pointer. |node->is_valid()| will | 
 |   // be true for matches, and false for misses. | 
 |   // | 
 |   // NOTE: In case of a miss, this will return the location of the first | 
 |   // tombstone encountered during probing, if any, or the first free entry | 
 |   // otherwise. This keeps the table consistent in case the node is overwritten | 
 |   // by the caller in a following insert operation. | 
 |   template <typename NODE_EQUAL> | 
 |   Node* NodeLookup(size_t hash, NODE_EQUAL node_equal) const { | 
 |     size_t mask = size_ - 1; | 
 |     size_t index = hash & mask; | 
 |     Node* tombstone = nullptr;  // First tombstone node found, if any. | 
 |     for (;;) { | 
 |       Node* node = buckets_ + index; | 
 |       if (node->is_null()) { | 
 |         return tombstone ? tombstone : node; | 
 |       } | 
 |       if (node->is_tombstone()) { | 
 |         if (!tombstone) | 
 |           tombstone = node; | 
 |       } else if (node_equal(node)) { | 
 |         return node; | 
 |       } | 
 |       index = (index + 1) & mask; | 
 |     } | 
 |   } | 
 |  | 
 |   // Call this method after updating the content of the |node| pointer | 
 |   // returned by an unsucessful NodeLookup(). Return true to indicate that | 
 |   // the table size changed, and that existing iterators were invalidated. | 
 |   bool UpdateAfterInsert() { | 
 |     count_ += 1; | 
 |     if (UNLIKELY(count_ * 4 >= size_ * 3)) { | 
 |       GrowBuckets(); | 
 |       return true; | 
 |     } | 
 |     return false; | 
 |   } | 
 |  | 
 |   // Call this method after updating the content of the |node| value | 
 |   // returned a by succesful NodeLookup, to the tombstone value, if any. | 
 |   // Return true to indicate a table size change, ie. that existing | 
 |   // iterators where invalidated. | 
 |   bool UpdateAfterRemoval() { | 
 |     count_ -= 1; | 
 |     // For now don't support shrinking since this is not useful for GN. | 
 |     return false; | 
 |   } | 
 |  | 
 |  private: | 
 | #if defined(__GNUC__) || defined(__clang__) | 
 |   [[gnu::noinline]] | 
 | #endif | 
 |   void | 
 |   GrowBuckets() { | 
 |     size_t size = size_; | 
 |     size_t new_size = (size == 1) ? 8 : size * 2; | 
 |     size_t new_mask = new_size - 1; | 
 |  | 
 |     // NOTE: Using calloc() since no object constructiopn can or should take | 
 |     // place here. | 
 |     Node* new_buckets = reinterpret_cast<Node*>(calloc(new_size, sizeof(Node))); | 
 |  | 
 |     for (size_t src_index = 0; src_index < size; ++src_index) { | 
 |       const Node* node = &buckets_[src_index]; | 
 |       if (!node->is_valid()) | 
 |         continue; | 
 |       size_t dst_index = node->hash_value() & new_mask; | 
 |       for (;;) { | 
 |         Node* node2 = new_buckets + dst_index; | 
 |         if (node2->is_null()) { | 
 |           *node2 = *node; | 
 |           break; | 
 |         } | 
 |         dst_index = (dst_index + 1) & new_mask; | 
 |       } | 
 |     } | 
 |  | 
 |     if (buckets_ != buckets0_) | 
 |       free(buckets_); | 
 |  | 
 |     buckets_ = new_buckets; | 
 |     buckets0_[0] = Node{}; | 
 |     size_ = new_size; | 
 |   } | 
 |  | 
 |   // NOTE: The reason for default-initializing |buckets_| to a storage slot | 
 |   // inside the object is to ensure the value is never null. This removes one | 
 |   // nullptr check from each NodeLookup() instantiation. | 
 |   size_t count_ = 0; | 
 |   size_t size_ = 1; | 
 |   Node* buckets_ = buckets0_; | 
 |   Node buckets0_[1] = {{}}; | 
 | }; | 
 |  | 
 | #endif  // TOOLS_GN_HASH_TABLE_BASE_H_ |