Improve UniqueVector class.

This improves the UniqueVector template class in several ways,
which will be used by future CLs:

- Allow custom hash and equality operators to be used for
  the items in the set.

- Make the UniqueVectorNode internal set type smaller by using
  32-bit indices and hash values, reducing the RAM footproint of
  each instance.

- Add Contains() method, and kIndexNone value, returned by IndexOf()
  when the value is not found, replacing the hard-coded (size_t)-1
  value.

- Add emplace_back() method for convenience.

- Add a release() method to move the final vector out of the
  instance.

- Add PushBackWithIndex(), EmplaceBackWithIndex() methods to
  perform push_back() / emplace_back() while returning a
  (insertion_flag, item_index) value.

Bug: None
Change-Id: I381cd05f5fc8fadf06d66379a1ead67c9f940267
Reviewed-on: https://gn-review.googlesource.com/c/gn/+/12701
Reviewed-by: Brett Wilson <brettw@chromium.org>
Commit-Queue: David Turner <digit@google.com>
diff --git a/src/gn/ninja_c_binary_target_writer.cc b/src/gn/ninja_c_binary_target_writer.cc
index 376c943..5b518b3 100644
--- a/src/gn/ninja_c_binary_target_writer.cc
+++ b/src/gn/ninja_c_binary_target_writer.cc
@@ -213,7 +213,7 @@
     AddSourceSetFiles(target_, &computed_obj);
     DCHECK_EQ(obj_files.size(), computed_obj.size());
     for (const auto& obj : obj_files)
-      DCHECK_NE(static_cast<size_t>(-1), computed_obj.IndexOf(obj));
+      DCHECK(computed_obj.Contains(obj));
 #endif
   } else {
     WriteLinkerStuff(obj_files, other_files, input_deps);
@@ -754,11 +754,10 @@
   }
 
   // Libraries specified by paths.
-  const UniqueVector<LibFile>& libs = target_->all_libs();
-  for (size_t i = 0; i < libs.size(); i++) {
-    if (libs[i].is_source_file()) {
+  for (const auto& lib : target_->all_libs()) {
+    if (lib.is_source_file()) {
       implicit_deps.push_back(
-          OutputFile(settings_->build_settings(), libs[i].source_file()));
+          OutputFile(settings_->build_settings(), lib.source_file()));
     }
   }
 
diff --git a/src/gn/unique_vector.h b/src/gn/unique_vector.h
index 821f55a..da4edc2 100644
--- a/src/gn/unique_vector.h
+++ b/src/gn/unique_vector.h
@@ -6,6 +6,7 @@
 #define TOOLS_GN_UNIQUE_VECTOR_H_
 
 #include <stddef.h>
+#include <stdint.h>
 
 #include <algorithm>
 #include <vector>
@@ -17,10 +18,10 @@
 // to follow the HashTableBase requirements (i.e. zero-initializable null
 // value).
 struct UniqueVectorNode {
-  size_t hash;
-  size_t index_plus1;
+  uint32_t hash32;
+  uint32_t index_plus1;
 
-  size_t hash_value() const { return hash; }
+  size_t hash_value() const { return hash32; }
 
   bool is_valid() const { return !is_null(); }
 
@@ -32,9 +33,11 @@
   // Return vector index.
   size_t index() const { return index_plus1 - 1u; }
 
+  static uint32_t ToHash32(size_t hash) { return static_cast<uint32_t>(hash); }
+
   // Create new instance from hash value and vector index.
   static UniqueVectorNode Make(size_t hash, size_t index) {
-    return {hash, index + 1u};
+    return {ToHash32(hash), static_cast<uint32_t>(index + 1u)};
   }
 };
 
@@ -52,10 +55,11 @@
   // |vector| is containing vector for existing items.
   //
   // Returns a non-null mutable Node pointer.
-  template <typename T>
+  template <typename T, typename EqualTo = std::equal_to<T>>
   Node* Lookup(size_t hash, const T& item, const std::vector<T>& vector) const {
-    return BaseType::NodeLookup(hash, [&](const Node* node) {
-      return node->hash == hash && vector[node->index()] == item;
+    uint32_t hash32 = Node::ToHash32(hash);
+    return BaseType::NodeLookup(hash32, [&](const Node* node) {
+      return hash32 == node->hash32 && EqualTo()(vector[node->index()], item);
     });
   }
 
@@ -72,7 +76,9 @@
 // An ordered set optimized for GN's usage. Such sets are used to store lists
 // of configs and libraries, and are appended to but not randomly inserted
 // into.
-template <typename T>
+template <typename T,
+          typename Hash = std::hash<T>,
+          typename EqualTo = std::equal_to<T>>
 class UniqueVector {
  public:
   using Vector = std::vector<T>;
@@ -93,32 +99,85 @@
   const_iterator begin() const { return vector_.begin(); }
   const_iterator end() const { return vector_.end(); }
 
+  // Extract the vector out of the instance, clearing it at the same time.
+  Vector release() {
+    Vector result = std::move(vector_);
+    clear();
+    return result;
+  }
+
   // Returns true if the item was appended, false if it already existed (and
   // thus the vector was not modified).
   bool push_back(const T& t) {
-    size_t hash = std::hash<T>()(t);
-    auto* node = set_.Lookup(hash, t, vector_);
+    size_t hash;
+    auto* node = Lookup(t, &hash);
     if (node->is_valid()) {
       return false;  // Already have this one.
     }
-
     vector_.push_back(t);
     set_.Insert(node, hash, vector_.size() - 1);
     return true;
   }
 
+  // Same as above, but moves the item into the vector if possible.
   bool push_back(T&& t) {
-    size_t hash = std::hash<T>()(t);
-    auto* node = set_.Lookup(hash, t, vector_);
+    size_t hash = Hash()(t);
+    auto* node = Lookup(t, &hash);
     if (node->is_valid()) {
       return false;  // Already have this one.
     }
-
     vector_.push_back(std::move(t));
     set_.Insert(node, hash, vector_.size() - 1);
     return true;
   }
 
+  // Construct an item in-place if possible. Return true if it was
+  // appended, false otherwise.
+  template <typename... ARGS>
+  bool emplace_back(ARGS... args) {
+    return push_back(T{std::forward<ARGS>(args)...});
+  }
+
+  // Try to add an item to the vector. Return (true, index) in
+  // case of insertion, or (false, index) otherwise. In both cases
+  // |index| will be the item's index in the set and will not be
+  // kIndexNone. This can be used to implement a map using a
+  // UniqueVector<> for keys, and a parallel array for values.
+  std::pair<bool, size_t> PushBackWithIndex(const T& t) {
+    size_t hash;
+    auto* node = Lookup(t, &hash);
+    if (node->is_valid()) {
+      return {false, node->index()};
+    }
+    size_t result = vector_.size();
+    vector_.push_back(t);
+    set_.Insert(node, hash, result);
+    return {true, result};
+  }
+
+  // Same as above, but moves the item into the set on success.
+  std::pair<bool, size_t> PushBackWithIndex(T&& t) {
+    size_t hash;
+    auto* node = Lookup(t, &hash);
+    if (node->is_valid()) {
+      return {false, node->index()};
+    }
+    size_t result = vector_.size();
+    vector_.push_back(std::move(t));
+    set_.Insert(node, hash, result);
+    return {true, result};
+  }
+
+  // Construct an item in-place if possible. If a corresponding
+  // item is already in the vector, return (false, index), otherwise
+  // perform the insertion and return (true, index). In both cases
+  // |index| will be the item's index in the vector and will not be
+  // kIndexNone.
+  template <typename... ARGS>
+  std::pair<bool, size_t> EmplaceBackWithIndex(ARGS... args) {
+    return PushBackWithIndex(T{std::forward<ARGS>(args)...});
+  }
+
   // Appends a range of items from an iterator.
   template <typename iter>
   void Append(const iter& begin, const iter& end) {
@@ -126,19 +185,33 @@
       push_back(*i);
   }
 
+  // Append another vector into this one.
   void Append(const UniqueVector& other) {
     Append(other.begin(), other.end());
   }
 
-  // Returns the index of the item matching the given value in the list, or
-  // (size_t)(-1) if it's not found.
-  size_t IndexOf(const T& t) const {
-    size_t hash = std::hash<T>()(t);
-    auto* node = set_.Lookup(hash, t, vector_);
-    return node->index();
+  // Returns true if the item is already in the vector.
+  bool Contains(const T& t) const {
+    size_t hash;
+    return Lookup(t, &hash)->is_valid();
   }
 
+  // Returns the index of the item matching the given value in the list, or
+  // kIndexNone if it's not found.
+  size_t IndexOf(const T& t) const {
+    size_t hash;
+    return Lookup(t, &hash)->index();
+  }
+
+  static constexpr size_t kIndexNone = 0xffffffffu;
+
  private:
+  // Lookup hash set node for item |t|, also sets |*hash| to the hash value.
+  UniqueVectorNode* Lookup(const T& t, size_t* hash) const {
+    *hash = Hash()(t);
+    return set_.Lookup<T, EqualTo>(*hash, t, vector_);
+  }
+
   Vector vector_;
   UniqueVectorHashSet set_;
 };
diff --git a/src/gn/unique_vector_unittest.cc b/src/gn/unique_vector_unittest.cc
index 96303b4..186cefb 100644
--- a/src/gn/unique_vector_unittest.cc
+++ b/src/gn/unique_vector_unittest.cc
@@ -28,7 +28,8 @@
   EXPECT_EQ(0u, foo.IndexOf(1));
   EXPECT_EQ(1u, foo.IndexOf(2));
   EXPECT_EQ(2u, foo.IndexOf(0));
-  EXPECT_EQ(static_cast<size_t>(-1), foo.IndexOf(99));
+  EXPECT_FALSE(foo.Contains(98));
+  EXPECT_EQ(foo.kIndexNone, foo.IndexOf(99));
 }
 
 TEST(UniqueVector, PushBackMove) {
@@ -43,3 +44,85 @@
 
   EXPECT_EQ(0u, vect.IndexOf("a"));
 }
+
+TEST(UniqueVector, EmplaceBack) {
+  UniqueVector<std::string> vect;
+  EXPECT_TRUE(vect.emplace_back("a"));
+  EXPECT_FALSE(vect.emplace_back("a"));
+  EXPECT_EQ(1u, vect.size());
+  EXPECT_TRUE(vect.emplace_back("b"));
+
+  EXPECT_EQ(2u, vect.size());
+  EXPECT_TRUE(vect.Contains(std::string("a")));
+  EXPECT_TRUE(vect.Contains(std::string("b")));
+}
+
+static auto MakePair(bool first, size_t second) -> std::pair<bool, size_t> {
+  return {first, second};
+}
+
+TEST(UniqueVector, PushBackWithIndex) {
+  UniqueVector<int> foo;
+
+  EXPECT_EQ(MakePair(true, 0u), foo.PushBackWithIndex(1));
+  EXPECT_EQ(MakePair(false, 0u), foo.PushBackWithIndex(1));
+  EXPECT_EQ(MakePair(true, 1u), foo.PushBackWithIndex(2));
+  EXPECT_EQ(MakePair(true, 2u), foo.PushBackWithIndex(3));
+  EXPECT_EQ(MakePair(false, 0u), foo.PushBackWithIndex(1));
+  EXPECT_EQ(MakePair(false, 1u), foo.PushBackWithIndex(2));
+  EXPECT_EQ(MakePair(false, 2u), foo.PushBackWithIndex(3));
+
+  EXPECT_TRUE(foo.Contains(1));
+  EXPECT_TRUE(foo.Contains(2));
+  EXPECT_TRUE(foo.Contains(3));
+  EXPECT_EQ(0u, foo.IndexOf(1));
+  EXPECT_EQ(1u, foo.IndexOf(2));
+  EXPECT_EQ(2u, foo.IndexOf(3));
+  EXPECT_EQ(foo.kIndexNone, foo.IndexOf(98));
+}
+
+TEST(UniqueVector, PushBackMoveWithIndex) {
+  UniqueVector<std::string> vect;
+  std::string a("a");
+  EXPECT_EQ(MakePair(true, 0), vect.PushBackWithIndex(std::move(a)));
+  EXPECT_EQ("", a);
+
+  a = "a";
+  EXPECT_EQ(MakePair(false, 0), vect.PushBackWithIndex(std::move(a)));
+  EXPECT_EQ("a", a);
+
+  EXPECT_EQ(0u, vect.IndexOf("a"));
+}
+
+TEST(UniqueVector, EmplaceBackWithIndex) {
+  UniqueVector<std::string> vect;
+  EXPECT_EQ(MakePair(true, 0u), vect.EmplaceBackWithIndex("a"));
+  EXPECT_EQ(MakePair(false, 0u), vect.EmplaceBackWithIndex("a"));
+  EXPECT_EQ(1u, vect.size());
+
+  EXPECT_EQ(MakePair(true, 1u), vect.EmplaceBackWithIndex("b"));
+  EXPECT_EQ(2u, vect.size());
+
+  EXPECT_TRUE(vect.Contains(std::string("a")));
+  EXPECT_TRUE(vect.Contains(std::string("b")));
+}
+
+TEST(UniqueVector, Release) {
+  UniqueVector<std::string> vect;
+  EXPECT_TRUE(vect.emplace_back("a"));
+  EXPECT_TRUE(vect.emplace_back("b"));
+  EXPECT_TRUE(vect.emplace_back("c"));
+
+  std::vector<std::string> v = vect.release();
+  EXPECT_TRUE(vect.empty());
+  EXPECT_FALSE(v.empty());
+
+  EXPECT_FALSE(vect.Contains(std::string("a")));
+  EXPECT_FALSE(vect.Contains(std::string("b")));
+  EXPECT_FALSE(vect.Contains(std::string("a")));
+
+  EXPECT_EQ(3u, v.size());
+  EXPECT_EQ(std::string("a"), v[0]);
+  EXPECT_EQ(std::string("b"), v[1]);
+  EXPECT_EQ(std::string("c"), v[2]);
+}