Improve the "gn path" command.

Previously, GN path would find all paths between the two targets, count them for statistical purposes, sort them, and print the shortest (or all of them if requested).

Unfortunately, there are many hundred millions of unique paths between two targets like //chrome and //base and it would take a long time and eventually run out of address space counting them all.

This patch changes the algorithm from depth-first to breadth-first search, and doesn't traverse or count a target if that target was already found to have a path to the destination. The result is the path from //chrome to //base finds 65 paths which makes the algorithm run instantly and the output with --all is much more useful.

Review-Url: https://codereview.chromium.org/2037303002
Cr-Original-Commit-Position: refs/heads/master@{#398491}
Cr-Mirrored-From: https://chromium.googlesource.com/chromium/src
Cr-Mirrored-Commit: 8161ecd407152b4a07652b62dad8b2b273a55638
diff --git a/tools/gn/command_path.cc b/tools/gn/command_path.cc
index 2368788..a17001d 100644
--- a/tools/gn/command_path.cc
+++ b/tools/gn/command_path.cc
@@ -17,166 +17,259 @@
 
 namespace {
 
-enum DepType {
-  DEP_NONE,
-  DEP_PUBLIC,
-  DEP_PRIVATE,
-  DEP_DATA
+enum class DepType {
+  NONE,
+  PUBLIC,
+  PRIVATE,
+  DATA
 };
 
-// As we do a depth-first search, this vector will store the current path
-// the current target for printing when a match is found.
+// The dependency paths are stored in a vector. Assuming the chain:
+//    A --[public]--> B --[private]--> C
+// The stack will look like:
+//    [0] = A, NONE (this has no dep type since nobody depends on it)
+//    [1] = B, PUBLIC
+//    [2] = C, PRIVATE
 using TargetDep = std::pair<const Target*, DepType>;
-using DepStack = std::vector<TargetDep>;
+using PathVector = std::vector<TargetDep>;
 
-// Note that this uses raw pointers. These need to be manually deleted (which
-// we won't normally bother with). This allows the vector to be resized
-// more quickly.
-using DepStackVector = std::vector<DepStack*>;
-
-using DepSet = base::hash_set<const Target*>;
+// How to search.
+enum class PrivateDeps { INCLUDE, EXCLUDE };
+enum class DataDeps { INCLUDE, EXCLUDE };
+enum class PrintWhat { ONE, ALL };
 
 struct Options {
   Options()
-      : all(false),
+      : print_what(PrintWhat::ONE),
         public_only(false),
         with_data(false) {
   }
 
-  bool all;
+  PrintWhat print_what;
   bool public_only;
   bool with_data;
 };
 
-struct State {
-  State() : found_count(0) {
-    // Reserve fairly large buffers for the found vectors.
-    const size_t kReserveSize = 32768;
-    found_public.reserve(kReserveSize);
-    found_other.reserve(kReserveSize);
+typedef std::list<PathVector> WorkQueue;
+
+struct Stats {
+  Stats() : public_paths(0), other_paths(0) {
   }
 
-  // Stores targets that do not have any paths to the destination. This is
-  // an optimization to avoid revisiting useless paths.
-  DepSet rejected;
+  int total_paths() const { return public_paths + other_paths; }
 
-  // Total number of paths found.
-  int found_count;
+  int public_paths;
+  int other_paths;
 
-  // The pointers in these vectors are owned by this object, but are
-  // deliberately leaked. There can be a lot of them which can take a long time
-  // to free, and GN will just exit after this is used anyway.
-  DepStackVector found_public;
-  DepStackVector found_other;
+  // Stores targets that have a path to the destination, and whether that
+  // path is public, private, or data.
+  std::map<const Target*, DepType> found_paths;
 };
 
-void PrintDepStack(const DepStack& stack) {
-  // Don't print toolchains unless they differ from the first target.
-  const Label& default_toolchain = stack[0].first->label().GetToolchainLabel();
+// If the implicit_last_dep is not "none", this type indicates the
+// classification of the elided last part of path.
+DepType ClassifyPath(const PathVector& path, DepType implicit_last_dep) {
+  DepType result;
+  if (implicit_last_dep != DepType::NONE)
+    result = implicit_last_dep;
+  else
+    result = DepType::PUBLIC;
 
-  for (const auto& pair : stack) {
-    OutputString(pair.first->label().GetUserVisibleName(default_toolchain));
-    switch (pair.second) {
-      case DEP_NONE:
-        break;
-      case DEP_PUBLIC:
-        OutputString(" --[public]-->", DECORATION_DIM);
-        break;
-      case DEP_PRIVATE:
-        OutputString(" --[private]-->", DECORATION_DIM);
-        break;
-      case DEP_DATA:
-        OutputString(" --[data]-->", DECORATION_DIM);
-        break;
+  // Skip the 0th one since that is always NONE.
+  for (size_t i = 1; i < path.size(); i++) {
+    // PRIVATE overrides PUBLIC, and DATA overrides everything (the idea is
+    // to find the worst link in the path).
+    if (path[i].second == DepType::PRIVATE) {
+      if (result == DepType::PUBLIC)
+        result = DepType::PRIVATE;
+    } else if (path[i].second == DepType::DATA) {
+      result = DepType::DATA;
+    }
+  }
+  return result;
+}
+
+const char* StringForDepType(DepType type) {
+  switch(type) {
+    case DepType::PUBLIC:
+      return "public";
+    case DepType::PRIVATE:
+      return "private";
+    case DepType::DATA:
+      return "data";
+      break;
+    case DepType::NONE:
+    default:
+      return "";
+  }
+}
+
+// Prints the given path. If the implicit_last_dep is not "none", the last
+// dependency will show an elided dependency with the given annotation.
+void PrintPath(const PathVector& path, DepType implicit_last_dep) {
+  if (path.empty())
+    return;
+
+  // Don't print toolchains unless they differ from the first target.
+  const Label& default_toolchain = path[0].first->label().GetToolchainLabel();
+
+  for (size_t i = 0; i < path.size(); i++) {
+    OutputString(path[i].first->label().GetUserVisibleName(default_toolchain));
+
+    // Output dependency type.
+    if (i == path.size() - 1) {
+      // Last one either gets the implicit last dep type or nothing.
+      if (implicit_last_dep != DepType::NONE) {
+        OutputString(std::string(" --> see ") +
+                     StringForDepType(implicit_last_dep) +
+                     " chain printed above...", DECORATION_DIM);
+      }
+    } else {
+      // Take type from the next entry.
+      OutputString(std::string(" --[") + StringForDepType(path[i + 1].second) +
+                   "]-->", DECORATION_DIM);
     }
     OutputString("\n");
   }
+
   OutputString("\n");
 }
 
-bool AreAllPublic(const DepStack& stack) {
-  // Don't check the type of the last one since that doesn't point to anything.
-  for (size_t i = 0; i < stack.size() - 1; i++) {
-    if (stack[i].second != DEP_PUBLIC)
-      return false;
+void InsertTargetsIntoFoundPaths(const PathVector& path,
+                                 DepType implicit_last_dep,
+                                 Stats* stats) {
+  DepType type = ClassifyPath(path, implicit_last_dep);
+
+  bool inserted = false;
+
+  // Don't try to insert the 0th item in the list which is the "from" target.
+  // The search will be run more than once (for the different path types) and
+  // if the "from" target was in the list, subsequent passes could never run
+  // the starting point is alredy in the list of targets considered).
+  //
+  // One might imagine an alternate implementation where all items are counted
+  // here but the "from" item is erased at the beginning of each search, but
+  // that will mess up the metrics (the private search pass will find the
+  // same public paths as the previous public pass, "inserted" will be true
+  // here since the item wasn't found, and the public path will be
+  // double-counted in the stats.
+  for (size_t i = 1; i < path.size(); i++) {
+    const auto& pair = path[i];
+
+    // Don't overwrite an existing one. The algorithm works by first doing
+    // public, then private, then data, so anything already there is guaranteed
+    // at least as good as our addition.
+    if (stats->found_paths.find(pair.first) == stats->found_paths.end()) {
+      stats->found_paths.insert(std::make_pair(pair.first, type));
+      inserted = true;
+    }
   }
-  return true;
-}
 
-// Increments *found_count to reflect how many results are found. If print_all
-// is not set, only the first result will be printed.
-//
-// As an optimization, targets that do not have any paths are added to
-// *reject so this function doesn't waste time revisiting them.
-void RecursiveFindPath(const Options& options,
-                       State* state,
-                       const Target* current,
-                       const Target* desired,
-                       DepStack* stack) {
-  if (state->rejected.find(current) != state->rejected.end())
-    return;
-  int initial_found_count = state->found_count;
-
-  if (current == desired) {
-    // Found a path.
-    state->found_count++;
-    stack->push_back(TargetDep(current, DEP_NONE));
-    if (AreAllPublic(*stack))
-      state->found_public.push_back(new DepStack(*stack));
+  if (inserted) {
+    // Only count this path in the stats if any part of it was actually new.
+    if (type == DepType::PUBLIC)
+      stats->public_paths++;
     else
-      state->found_other.push_back(new DepStack(*stack));
-    stack->pop_back();
-    return;
+      stats->other_paths++;
   }
+}
 
-  stack->push_back(TargetDep(current, DEP_PUBLIC));
-  for (const auto& pair : current->public_deps())
-    RecursiveFindPath(options, state, pair.ptr, desired, stack);
+void BreadthFirstSearch(const Target* from, const Target* to,
+                        PrivateDeps private_deps, DataDeps data_deps,
+                        PrintWhat print_what,
+                        Stats* stats) {
+  // Seed the initial stack with just the "from" target.
+  PathVector initial_stack;
+  initial_stack.emplace_back(from, DepType::NONE);
+  WorkQueue work_queue;
+  work_queue.push_back(initial_stack);
 
+  // Track checked targets to avoid checking the same once more than once.
+  std::set<const Target*> visited;
+
+  while (!work_queue.empty()) {
+    PathVector current_path = work_queue.front();
+    work_queue.pop_front();
+
+    const Target* current_target = current_path.back().first;
+
+    if (current_target == to) {
+      // Found a new path.
+      if (stats->total_paths() == 0 || print_what == PrintWhat::ALL)
+        PrintPath(current_path, DepType::NONE);
+
+      // Insert all nodes on the path into the found paths list. Since we're
+      // doing search breadth first, we know that the current path is the best
+      // path for all nodes on it.
+      InsertTargetsIntoFoundPaths(current_path, DepType::NONE, stats);
+    } else {
+      // Check for a path that connects to an already known-good one. Printing
+      // this here will mean the results aren't strictly in depth-first order
+      // since there could be many items on the found path this connects to.
+      // Doing this here will mean that the output is sorted by length of items
+      // printed (with the redundant parts of the path omitted) rather than
+      // complete path length.
+      const auto& found_current_target =
+          stats->found_paths.find(current_target);
+      if (found_current_target != stats->found_paths.end()) {
+        if (stats->total_paths() == 0 || print_what == PrintWhat::ALL)
+          PrintPath(current_path, found_current_target->second);
+
+        // Insert all nodes on the path into the found paths list since we know
+        // everything along this path also leads to the destination.
+        InsertTargetsIntoFoundPaths(current_path, found_current_target->second,
+                                    stats);
+        continue;
+      }
+    }
+
+    // If we've already checked this one, stop. This should be after the above
+    // check for a known-good check, because known-good ones will always have
+    // been previously visited.
+    if (visited.find(current_target) == visited.end())
+      visited.insert(current_target);
+    else
+      continue;
+
+    // Add public deps for this target to the queue.
+    for (const auto& pair : current_target->public_deps()) {
+      work_queue.push_back(current_path);
+      work_queue.back().push_back(TargetDep(pair.ptr, DepType::PUBLIC));
+    }
+
+    if (private_deps == PrivateDeps::INCLUDE) {
+      // Add private deps.
+      for (const auto& pair : current_target->private_deps()) {
+        work_queue.push_back(current_path);
+        work_queue.back().push_back(
+            TargetDep(pair.ptr, DepType::PRIVATE));
+      }
+    }
+
+    if (data_deps == DataDeps::INCLUDE) {
+      // Add data deps.
+      for (const auto& pair : current_target->data_deps()) {
+        work_queue.push_back(current_path);
+        work_queue.back().push_back(TargetDep(pair.ptr, DepType::DATA));
+      }
+    }
+  }
+}
+
+void DoSearch(const Target* from, const Target* to, const Options& options,
+              Stats* stats) {
+  BreadthFirstSearch(from, to, PrivateDeps::EXCLUDE, DataDeps::EXCLUDE,
+                     options.print_what, stats);
   if (!options.public_only) {
-    stack->back().second = DEP_PRIVATE;
-    for (const auto& pair : current->private_deps())
-      RecursiveFindPath(options, state, pair.ptr, desired, stack);
+    // Check private deps.
+    BreadthFirstSearch(from, to, PrivateDeps::INCLUDE,
+                       DataDeps::EXCLUDE, options.print_what, stats);
+    if (options.with_data) {
+      // Check data deps.
+      BreadthFirstSearch(from, to, PrivateDeps::INCLUDE,
+                         DataDeps::INCLUDE, options.print_what, stats);
+    }
   }
-
-  if (options.with_data) {
-    stack->back().second = DEP_DATA;
-    for (const auto& pair : current->data_deps())
-      RecursiveFindPath(options, state, pair.ptr, desired, stack);
-  }
-
-  stack->pop_back();
-
-  if (state->found_count == initial_found_count)
-    state->rejected.insert(current);  // Eliminated this target.
-}
-
-bool StackLengthLess(const DepStack* a, const DepStack* b) {
-  return a->size() < b->size();
-}
-
-// Prints one result vector. The vector will be modified.
-void PrintResultVector(const Options& options, DepStackVector* result) {
-  if (!options.all && !result->empty()) {
-    // Just print the smallest one.
-    PrintDepStack(**std::min_element(result->begin(), result->end(),
-                                     &StackLengthLess));
-    return;
-  }
-
-  // Print all in order of increasing length.
-  std::sort(result->begin(), result->end(), &StackLengthLess);
-  for (const auto& stack : *result)
-    PrintDepStack(*stack);
-}
-
-void PrintResults(const Options& options, State* state) {
-  PrintResultVector(options, &state->found_public);
-
-  // Consider non-public paths only if all paths are requested or there were
-  // no public paths.
-  if (state->found_public.empty() || options.all)
-    PrintResultVector(options, &state->found_other);
 }
 
 }  // namespace
@@ -189,8 +282,8 @@
     "\n"
     "  Finds paths of dependencies between two targets. Each unique path\n"
     "  will be printed in one group, and groups will be separate by newlines.\n"
-    "  The two targets can appear in either order: paths will be found going\n"
-    "  in either direction.\n"
+    "  The two targets can appear in either order (paths will be found going\n"
+    "  in either direction).\n"
     "\n"
     "  By default, a single path will be printed. If there is a path with\n"
     "  only public dependencies, the shortest public path will be printed.\n"
@@ -199,12 +292,19 @@
     "  will also be considered. If there are multiple shortest paths, an\n"
     "  arbitrary one will be selected.\n"
     "\n"
+    "Interesting paths\n"
+    "\n"
+    "  In a large project, there can be 100's of millions of unique paths\n"
+    "  between a very high level and a common low-level target. To make the\n"
+    "  output more useful (and terminate in a reasonable time), GN will not\n"
+    "  revisit sub-paths previously known to lead to the target.\n"
+    "\n"
     "Options\n"
     "\n"
     "  --all\n"
-    "     Prints all paths found rather than just the first one. Public paths\n"
-    "     will be printed first in order of increasing length, followed by\n"
-    "     non-public paths in order of increasing length.\n"
+    "     Prints all \"interesting\" paths found rather than just the first\n"
+    "     one. Public paths will be printed first in order of increasing\n"
+    "     length, followed by non-public paths in order of increasing length.\n"
     "\n"
     "  --public\n"
     "     Considers only public paths. Can't be used with --with-data.\n"
@@ -239,7 +339,8 @@
     return 1;
 
   Options options;
-  options.all = base::CommandLine::ForCurrentProcess()->HasSwitch("all");
+  options.print_what = base::CommandLine::ForCurrentProcess()->HasSwitch("all")
+      ? PrintWhat::ALL : PrintWhat::ONE;
   options.public_only =
       base::CommandLine::ForCurrentProcess()->HasSwitch("public");
   options.with_data =
@@ -251,21 +352,15 @@
     return 1;
   }
 
-  // If we don't find a path going "forwards", try the reverse direction. Deps
-  // can only go in one direction without having a cycle, which will have
-  // caused a run failure above.
-  State state;
-  DepStack stack;
-  RecursiveFindPath(options, &state, target1, target2, &stack);
-  if (state.found_count == 0) {
-    // Need to reset the rejected set for a new invocation since the reverse
-    // search will revisit the same targets looking for something else.
-    state.rejected.clear();
-    RecursiveFindPath(options, &state, target2, target1, &stack);
+  Stats stats;
+  DoSearch(target1, target2, options, &stats);
+  if (stats.total_paths() == 0) {
+    // If we don't find a path going "forwards", try the reverse direction.
+    // Deps can only go in one direction without having a cycle, which will
+    // have caused a run failure above.
+    DoSearch(target2, target1, options, &stats);
   }
 
-  PrintResults(options, &state);
-
   // This string is inserted in the results to annotate whether the result
   // is only public or includes data deps or not.
   const char* path_annotation = "";
@@ -274,42 +369,42 @@
   else if (!options.with_data)
     path_annotation = "non-data ";
 
-  if (state.found_count == 0) {
+  if (stats.total_paths() == 0) {
     // No results.
     OutputString(base::StringPrintf(
         "No %spaths found between these two targets.\n", path_annotation),
         DECORATION_YELLOW);
-  } else if (state.found_count == 1) {
+  } else if (stats.total_paths() == 1) {
     // Exactly one result.
     OutputString(base::StringPrintf("1 %spath found.", path_annotation),
                  DECORATION_YELLOW);
     if (!options.public_only) {
-      if (state.found_public.empty())
-        OutputString(" It is not public.");
-      else
+      if (stats.public_paths)
         OutputString(" It is public.");
+      else
+        OutputString(" It is not public.");
     }
     OutputString("\n");
   } else {
-    if (options.all) {
+    if (options.print_what == PrintWhat::ALL) {
       // Showing all paths when there are many.
-      OutputString(base::StringPrintf("%d unique %spaths found.",
-                                      state.found_count, path_annotation),
+      OutputString(base::StringPrintf("%d \"interesting\" %spaths found.",
+                                      stats.total_paths(), path_annotation),
                    DECORATION_YELLOW);
       if (!options.public_only) {
         OutputString(base::StringPrintf(" %d of them are public.",
-            static_cast<int>(state.found_public.size())));
+                                        stats.public_paths));
       }
       OutputString("\n");
     } else {
       // Showing one path when there are many.
       OutputString(
-          base::StringPrintf("Showing one of %d unique %spaths.",
-                             state.found_count, path_annotation),
+          base::StringPrintf("Showing one of %d \"interesting\" %spaths.",
+                             stats.total_paths(), path_annotation),
           DECORATION_YELLOW);
       if (!options.public_only) {
         OutputString(base::StringPrintf(" %d of them are public.\n",
-            static_cast<int>(state.found_public.size())));
+                                        stats.public_paths));
       }
       OutputString("Use --all to print all paths.\n");
     }