Deduplicate item in 'deps', 'sources' and related lists

When sorting 'deps', 'sources' and related lists, also remove any
duplicated items (this can be inhibited by using a "# KEEPDUPS"
comment before the list).

Also fix the sorting of accessor nodes (they were ignoring the
member or subscript part when comparing them).

Fixed: 42440318
Change-Id: I99aae1c2dc2008c21423464e0744792ada96732c
Reviewed-on: https://gn-review.googlesource.com/c/gn/+/19660
Reviewed-by: Takuto Ikuta <tikuta@google.com>
Commit-Queue: Sylvain Defresne <sdefresne@chromium.org>
diff --git a/src/gn/command_format.cc b/src/gn/command_format.cc
index a509eed..9805309 100644
--- a/src/gn/command_format.cc
+++ b/src/gn/command_format.cc
@@ -124,6 +124,10 @@
   return ident.ends_with("deps") || ident == "visibility";
 }
 
+bool IsSourcesList(std::string_view ident) {
+  return ident.ends_with("sources") || ident == "public";
+}
+
 class Printer {
  public:
   Printer();
@@ -182,6 +186,10 @@
   // 'visibility': same as 'deps'.
   void SortIfApplicable(const BinaryOpNode* binop);
 
+  // Remove duplicates. Applies to 'visibility', 'deps' or ends in 'deps',
+  // 'sources' or ends in 'sources', or 'public'.
+  void DeduplicateIfApplicable(const BinaryOpNode* binop);
+
   // Traverse a binary op node tree and apply a callback to each leaf node.
   void TraverseBinaryOpNode(const ParseNode* node,
                             std::function<void(const ParseNode*)> callback);
@@ -442,7 +450,7 @@
   }
 
   const std::string_view lhs = ident->value().value();
-  if (lhs.ends_with("sources") || lhs == "public") {
+  if (IsSourcesList(lhs)) {
     TraverseBinaryOpNode(binop->right(), [](const ParseNode* node) {
       const ListNode* list = node->AsList();
       if (list)
@@ -457,6 +465,31 @@
   }
 }
 
+void Printer::DeduplicateIfApplicable(const BinaryOpNode* binop) {
+  if (const Comments* comments = binop->comments()) {
+    const std::vector<Token>& before = comments->before();
+    if (!before.empty() && (before.front().value() == "# KEEPDUPS" ||
+                            before.back().value() == "# KEEPDUPS")) {
+      // Allow disabling of deduplication for specific actions that might
+      // want duplicate sources.
+      return;
+    }
+  }
+  const IdentifierNode* ident = binop->left()->AsIdentifier();
+  if (!ident || !IsAssignment(binop->op().value())) {
+    return;
+  }
+
+  const std::string_view lhs = ident->value().value();
+  if (IsSourcesList(lhs) || IsTargetsList(lhs)) {
+    TraverseBinaryOpNode(binop->right(), [](const ParseNode* node) {
+      const ListNode* list = node->AsList();
+      if (list)
+        const_cast<ListNode*>(list)->DeduplicateList();
+    });
+  }
+}
+
 void Printer::TraverseBinaryOpNode(
     const ParseNode* node,
     std::function<void(const ParseNode*)> callback) {
@@ -790,6 +823,7 @@
     // Shorten before sorting, since the shortening may affect the ordering.
     ShortenIfApplicable(binop);
     SortIfApplicable(binop);
+    DeduplicateIfApplicable(binop);
 
     Precedence prec = precedence_[binop->op().value()];
 
diff --git a/src/gn/command_format_unittest.cc b/src/gn/command_format_unittest.cc
index de7e004..645ec27 100644
--- a/src/gn/command_format_unittest.cc
+++ b/src/gn/command_format_unittest.cc
@@ -137,3 +137,4 @@
 FORMAT_TEST(082)
 FORMAT_TEST(083)
 FORMAT_TEST(084)
+FORMAT_TEST(085)
diff --git a/src/gn/format_test_data/063.gn b/src/gn/format_test_data/063.gn
index de5002e..2a5659f 100644
--- a/src/gn/format_test_data/063.gn
+++ b/src/gn/format_test_data/063.gn
@@ -1,6 +1,15 @@
 source_set("test") {
   a = "a"
   b = "b"
+  c = {
+    d = "d"
+    e = "e"
+  }
+  f = [
+    "f0",
+    "f1",
+    "f2",
+  ]
   deps = [
     "//a",
     "//a/a",
@@ -8,8 +17,12 @@
     "//a:a",
     "//a:b",
     "//b",
+    c.e,
+    c.d,
     ":a",
     ":b",
+    f[2],
+    f[0],
     "a",
     "a/a",
     "a/b",
@@ -18,6 +31,9 @@
     "b",
     a,
     b,
+    c.d,
+    f[0],
+    f[1],
   ]
 
   public_deps = []
@@ -32,6 +48,18 @@
       "a:a",
       a,
     ]
+
+    deps += [
+      ":d",
+      ":e",
+      # Some comment attached to an item. Should be moved with the item
+      # when it is sorted.
+      ":a",
+      ":b",
+      # Comment attached to a duplicated item. Should be concatenated to
+      # the comment from the other item.
+      ":a",
+    ]
   }
 
   # Sort lists with "deps" suffix as "deps".
diff --git a/src/gn/format_test_data/063.golden b/src/gn/format_test_data/063.golden
index 73cfc04..3c0d677 100644
--- a/src/gn/format_test_data/063.golden
+++ b/src/gn/format_test_data/063.golden
@@ -1,23 +1,35 @@
 source_set("test") {
   a = "a"
   b = "b"
+  c = {
+    d = "d"
+    e = "e"
+  }
+  f = [
+    "f0",
+    "f1",
+    "f2",
+  ]
   deps = [
     ":a",
     ":b",
     "a",
-    "a",
     "a:b",
     "a/a",
     "a/b",
     "b",
     "//a",
-    "//a",
     "//a:b",
     "//a/a",
     "//a/b",
     "//b",
     a,
     b,
+    c.d,
+    c.e,
+    f[0],
+    f[1],
+    f[2],
   ]
 
   public_deps = []
@@ -25,13 +37,22 @@
     public_deps += [
       ":a",
       "a",
-      "a",
       "a/a",
       "//a",
-      "//a",
       "//a/a",
       a,
     ]
+
+    deps += [
+      # Some comment attached to an item. Should be moved with the item
+      # when it is sorted.
+      # Comment attached to a duplicated item. Should be concatenated to
+      # the comment from the other item.
+      ":a",
+      ":b",
+      ":d",
+      ":e",
+    ]
   }
 
   # Sort lists with "deps" suffix as "deps".
diff --git a/src/gn/format_test_data/085.gn b/src/gn/format_test_data/085.gn
new file mode 100644
index 0000000..c8ada36
--- /dev/null
+++ b/src/gn/format_test_data/085.gn
@@ -0,0 +1,32 @@
+# Suppress deduplication based on comment.
+
+# KEEPDUPS
+sources = []
+
+# KEEPDUPS
+sources = [ "a" ]
+
+# KEEPDUPS
+sources += [ "a" ]
+
+# KEEPDUPS
+# This is KEEPDUPS because reason.
+sources = [
+  "z",
+  "a",
+  "z",
+  "z2",
+  "a",
+  "y.cc",
+]
+
+# This is KEEPDUPS because reason:
+# KEEPDUPS
+sources += [
+  "z",
+  "a",
+  "z",
+  "z2",
+  "a",
+  "y.cc",
+]
diff --git a/src/gn/format_test_data/085.golden b/src/gn/format_test_data/085.golden
new file mode 100644
index 0000000..7c5dde5
--- /dev/null
+++ b/src/gn/format_test_data/085.golden
@@ -0,0 +1,32 @@
+# Suppress deduplication based on comment.
+
+# KEEPDUPS
+sources = []
+
+# KEEPDUPS
+sources = [ "a" ]
+
+# KEEPDUPS
+sources += [ "a" ]
+
+# KEEPDUPS
+# This is KEEPDUPS because reason.
+sources = [
+  "a",
+  "a",
+  "y.cc",
+  "z",
+  "z",
+  "z2",
+]
+
+# This is KEEPDUPS because reason:
+# KEEPDUPS
+sources += [
+  "a",
+  "a",
+  "y.cc",
+  "z",
+  "z",
+  "z2",
+]
diff --git a/src/gn/parse_tree.cc b/src/gn/parse_tree.cc
index b9f04f1..e4fba38 100644
--- a/src/gn/parse_tree.cc
+++ b/src/gn/parse_tree.cc
@@ -7,6 +7,7 @@
 #include <stdint.h>
 
 #include <memory>
+#include <sstream>
 #include <string>
 #include <tuple>
 
@@ -83,15 +84,24 @@
                static_cast<int>(node->comments()->before().size() + 1)));
 }
 
-std::string_view GetStringRepresentation(const ParseNode* node) {
+std::string GetStringRepresentation(const ParseNode* node) {
   DCHECK(node->AsLiteral() || node->AsIdentifier() || node->AsAccessor());
   if (node->AsLiteral())
-    return node->AsLiteral()->value().value();
-  else if (node->AsIdentifier())
-    return node->AsIdentifier()->value().value();
-  else if (node->AsAccessor())
-    return node->AsAccessor()->base().value();
-  return std::string_view();
+    return std::string(node->AsLiteral()->value().value());
+  if (node->AsIdentifier())
+    return std::string(node->AsIdentifier()->value().value());
+  if (const auto* accessor_node = node->AsAccessor()) {
+    std::ostringstream buffer;
+    buffer << accessor_node->base().value();
+    if (const auto* subscript_node = accessor_node->subscript()) {
+      buffer << "[" << GetStringRepresentation(subscript_node) << "]";
+    }
+    if (const auto* member_node = accessor_node->member()) {
+      buffer << "." << member_node->value().value();
+    }
+    return buffer.str();
+  }
+  return std::string();
 }
 
 void AddLocationJSONNodes(base::Value* dict, LocationRange location) {
@@ -156,6 +166,18 @@
                                 value.FindKey(kJsonNodeValue)->GetString());
 }
 
+void SetLineNumber(const ParseNode* node, int line_number) {
+  DCHECK(node->AsLiteral() || node->AsIdentifier() || node->AsAccessor());
+  if (node->AsLiteral()) {
+    const_cast<LiteralNode*>(node->AsLiteral())->SetNewLocation(line_number);
+  } else if (node->AsIdentifier()) {
+    const_cast<IdentifierNode*>(node->AsIdentifier())
+        ->SetNewLocation(line_number);
+  } else if (node->AsAccessor()) {
+    const_cast<AccessorNode*>(node->AsAccessor())->SetNewLocation(line_number);
+  }
+}
+
 }  // namespace
 
 Comments::Comments() = default;
@@ -978,19 +1000,8 @@
     const ParseNode* prev = nullptr;
     for (size_t i = sr.begin; i != sr.end; ++i) {
       const ParseNode* node = contents_[i].get();
-      DCHECK(node->AsLiteral() || node->AsIdentifier() || node->AsAccessor());
-      int line_number =
-          prev ? prev->GetRange().end().line_number() + 1 : start_line;
-      if (node->AsLiteral()) {
-        const_cast<LiteralNode*>(node->AsLiteral())
-            ->SetNewLocation(line_number);
-      } else if (node->AsIdentifier()) {
-        const_cast<IdentifierNode*>(node->AsIdentifier())
-            ->SetNewLocation(line_number);
-      } else if (node->AsAccessor()) {
-        const_cast<AccessorNode*>(node->AsAccessor())
-            ->SetNewLocation(line_number);
-      }
+      SetLineNumber(
+          node, prev ? prev->GetRange().end().line_number() + 1 : start_line);
       prev = node;
     }
   }
@@ -1007,8 +1018,8 @@
 void ListNode::SortAsStringsList() {
   // Sorts alphabetically.
   SortList([](const ParseNode* a, const ParseNode* b) {
-    std::string_view astr = GetStringRepresentation(a);
-    std::string_view bstr = GetStringRepresentation(b);
+    std::string astr = GetStringRepresentation(a);
+    std::string bstr = GetStringRepresentation(b);
     return astr < bstr;
   });
 }
@@ -1017,13 +1028,59 @@
   // Sorts first relative targets, then absolute, each group is sorted
   // alphabetically.
   SortList([](const ParseNode* a, const ParseNode* b) {
-    std::string_view astr = GetStringRepresentation(a);
-    std::string_view bstr = GetStringRepresentation(b);
+    std::string astr = GetStringRepresentation(a);
+    std::string bstr = GetStringRepresentation(b);
     return std::make_pair(GetDepsCategory(astr), SplitAtFirst(astr, ':')) <
            std::make_pair(GetDepsCategory(bstr), SplitAtFirst(bstr, ':'));
   });
 }
 
+void ListNode::DeduplicateList() {
+  // Nothing to do if the list is empty.
+  if (contents_.empty()) {
+    return;
+  }
+
+  // First check if there is any unsupported nodes and bail out if this
+  // is the case.
+  for (auto& node : contents_) {
+    if (!node->AsLiteral() && !node->AsIdentifier() && !node->AsAccessor())
+      return;
+  }
+
+  const ParseNode* prev = nullptr;
+  auto write_iter = contents_.begin();
+  std::map<std::string, const ParseNode*> seen;
+
+  for (auto& node : contents_) {
+    std::string item_repr = GetStringRepresentation(node.get());
+    if (auto iter = seen.find(item_repr); iter != seen.end()) {
+      // If the node has any comment, move them to the item it is a
+      // duplicate of.
+      if (node->comments()) {
+        for (const auto& hc : node->comments()->before()) {
+          const_cast<ParseNode*>(iter->second)
+              ->comments_mutable()
+              ->append_before(hc);
+        }
+      }
+      continue;
+    }
+
+    seen.emplace(std::move(item_repr), node.get());
+    if (prev) {
+      SetLineNumber(node.get(), prev->GetRange().end().line_number() + 1);
+    }
+
+    prev = node.get();
+    *write_iter++ = std::move(node);
+  }
+
+  if (write_iter != contents_.end()) {
+    contents_.erase(write_iter, contents_.end());
+  }
+}
+
 // Breaks the ParseNodes of |contents| up by ranges that should be separately
 // sorted. In particular, we break at a block comment, or an item that has an
 // attached "before" comment and is separated by a blank line from the item
diff --git a/src/gn/parse_tree.h b/src/gn/parse_tree.h
index 6ad2aec..f3aeb58 100644
--- a/src/gn/parse_tree.h
+++ b/src/gn/parse_tree.h
@@ -471,6 +471,7 @@
   void ShortenTargets();
   void SortAsStringsList();
   void SortAsTargetsList();
+  void DeduplicateList();
 
   struct SortRange {
     size_t begin;