Remove type_ member from SourceFile type

In order to reduce GN's peak RAM usage by 5%, this CL
removes the SourceFile::type_ member entirely, relying
instead of SourceFile::GetType() and SourceFile::IsFooType()
methods to get the same information.

Profiling shows that a very large number of SourceFile instances
are used at "gn gen" time, mostly due to the fact that SourceFileSet
merges are performed extensively during this operation. The type
value can be recomputed on the fly, with a slightly optimized
GetSourceFilePath() implementation, or compared with
SourceFile::IsFooType() methods as well.

Measurements on the Fuchsia source tree, and a version of GN built
with a recent Clang toolchain, linked to rpmalloc, with ICF and LTO
enabled shows that:

  - Peak RAM usage goes from 4 GiB to 3.8 GiB (medians of 3 runs):

       /usr/bin/time -f%M /tmp/gn-xx gen out/default
         BEFORE: 4018648
         AFTER:  3794956
         DIFF:    223692

  - Overall "gn gen" time improves by half a second (medians of 9 runs):

       /tmp/gn-xx gen out/default
         BEFORE: Done. Made 173241 targets from 5370 files in 18229ms
         AFTER:  Done. Made 173241 targets from 5370 files in 17761ms
         DIFF:   468ms

For a Chromium build, no significant difference in "gn gen" time,
but 20 MiB or peak RAM, out of 770 MiB, are saved.

+ Change std::hash<SourceFile> implementation to use pointer hashing
  instead of string hashing. This doesn't have a significant performance
  impact, since this method is rarely used (in the
  InputFileManager::InputFileMap type only).

Change-Id: I79bd35d4add840ee90bc271564f5e127658db10a
Reviewed-on: https://gn-review.googlesource.com/c/gn/+/12380
Commit-Queue: David Turner <digit@google.com>
Reviewed-by: Brett Wilson <brettw@chromium.org>
diff --git a/src/gn/binary_target_generator.cc b/src/gn/binary_target_generator.cc
index 485884e..4cac96f 100644
--- a/src/gn/binary_target_generator.cc
+++ b/src/gn/binary_target_generator.cc
@@ -93,7 +93,8 @@
   bool ret = TargetGenerator::FillSources();
   for (std::size_t i = 0; i < target_->sources().size(); ++i) {
     const auto& source = target_->sources()[i];
-    switch (source.type()) {
+    const SourceFile::Type source_type = source.GetType();
+    switch (source_type) {
       case SourceFile::SOURCE_CPP:
       case SourceFile::SOURCE_MODULEMAP:
       case SourceFile::SOURCE_H:
@@ -121,7 +122,7 @@
                     ". " + source.value() + " is not one of the valid types.");
     }
 
-    target_->source_types_used().Set(source.type());
+    target_->source_types_used().Set(source_type);
   }
   return ret;
 }
diff --git a/src/gn/compile_commands_writer.cc b/src/gn/compile_commands_writer.cc
index 74f6990..c0b94d1 100644
--- a/src/gn/compile_commands_writer.cc
+++ b/src/gn/compile_commands_writer.cc
@@ -252,7 +252,7 @@
     for (const auto& source : target->sources()) {
       // If this source is not a C/C++/ObjC/ObjC++ source (not header) file,
       // continue as it does not belong in the compilation database.
-      SourceFile::Type source_type = source.type();
+      const SourceFile::Type source_type = source.GetType();
       if (source_type != SourceFile::SOURCE_CPP &&
           source_type != SourceFile::SOURCE_C &&
           source_type != SourceFile::SOURCE_M &&
diff --git a/src/gn/header_checker.cc b/src/gn/header_checker.cc
index 212f581..32f4d90 100644
--- a/src/gn/header_checker.cc
+++ b/src/gn/header_checker.cc
@@ -154,7 +154,7 @@
 
   for (const auto& file : files) {
     // Only check C-like source files (RC files also have includes).
-    SourceFile::Type type = file.first.type();
+    const SourceFile::Type type = file.first.GetType();
     if (type != SourceFile::SOURCE_CPP && type != SourceFile::SOURCE_H &&
         type != SourceFile::SOURCE_C && type != SourceFile::SOURCE_M &&
         type != SourceFile::SOURCE_MM && type != SourceFile::SOURCE_RC)
diff --git a/src/gn/ninja_binary_target_writer.cc b/src/gn/ninja_binary_target_writer.cc
index b26171a..48f485a 100644
--- a/src/gn/ninja_binary_target_writer.cc
+++ b/src/gn/ninja_binary_target_writer.cc
@@ -228,7 +228,7 @@
     for (const OutputFile& output : outputs) {
       SourceFile output_as_source =
           output.AsSourceFile(source_set->settings()->build_settings());
-      if (output_as_source.type() == SourceFile::SOURCE_O) {
+      if (output_as_source.IsObjectType()) {
         obj_files->push_back(output);
       }
     }
diff --git a/src/gn/ninja_c_binary_target_writer.cc b/src/gn/ninja_c_binary_target_writer.cc
index cd76a16..d9c3ad2 100644
--- a/src/gn/ninja_c_binary_target_writer.cc
+++ b/src/gn/ninja_c_binary_target_writer.cc
@@ -76,9 +76,8 @@
 
 const SourceFile* GetModuleMapFromTargetSources(const Target* target) {
   for (const SourceFile& sf : target->sources()) {
-    if (sf.type() == SourceFile::SOURCE_MODULEMAP) {
+    if (sf.IsModuleMapType())
       return &sf;
-    }
   }
   return nullptr;
 }
@@ -552,13 +551,13 @@
   std::vector<OutputFile> tool_outputs;  // Prevent reallocation in loop.
   std::vector<OutputFile> deps;
   for (const auto& source : target_->sources()) {
-    DCHECK_NE(source.type(), SourceFile::SOURCE_SWIFT);
+    DCHECK_NE(source.GetType(), SourceFile::SOURCE_SWIFT);
 
     // Clear the vector but maintain the max capacity to prevent reallocations.
     deps.resize(0);
     const char* tool_name = Tool::kToolNone;
     if (!target_->GetOutputFilesForSource(source, &tool_name, &tool_outputs)) {
-      if (source.type() == SourceFile::SOURCE_DEF)
+      if (source.IsDefType())
         other_files->push_back(source);
       continue;  // No output for this source.
     }
@@ -606,7 +605,7 @@
 
     // It's theoretically possible for a compiler to produce more than one
     // output, but we'll only link to the first output.
-    if (source.type() != SourceFile::SOURCE_MODULEMAP) {
+    if (!source.IsModuleMapType()) {
       object_files->push_back(tool_outputs[0]);
     }
   }
@@ -644,7 +643,7 @@
       const SourceFile output_as_source =
           output.AsSourceFile(target_->settings()->build_settings());
 
-      if (output_as_source.type() == SourceFile::SOURCE_O) {
+      if (output_as_source.IsObjectType()) {
         object_files->push_back(output);
       }
     }
@@ -654,7 +653,7 @@
       // Avoid re-allocation during loop.
       std::vector<OutputFile> partial_outputs;
       for (const auto& source : target_->sources()) {
-        if (source.type() != SourceFile::SOURCE_SWIFT)
+        if (!source.IsSwiftType())
           continue;
 
         partial_outputs.resize(0);
@@ -665,7 +664,7 @@
           additional_outputs.push_back(output);
           SourceFile output_as_source =
               output.AsSourceFile(target_->settings()->build_settings());
-          if (output_as_source.type() == SourceFile::SOURCE_O) {
+          if (output_as_source.IsObjectType()) {
             object_files->push_back(output);
           }
         }
@@ -745,7 +744,7 @@
   const SourceFile* optional_def_file = nullptr;
   if (!other_files.empty()) {
     for (const SourceFile& src_file : other_files) {
-      if (src_file.type() == SourceFile::SOURCE_DEF) {
+      if (src_file.IsDefType()) {
         optional_def_file = &src_file;
         implicit_deps.push_back(
             OutputFile(settings_->build_settings(), src_file));
diff --git a/src/gn/source_file.cc b/src/gn/source_file.cc
index 52066d7..f9d816a 100644
--- a/src/gn/source_file.cc
+++ b/src/gn/source_file.cc
@@ -2,6 +2,8 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
+#include <string.h>
+
 #include "gn/source_file.h"
 
 #include "base/logging.h"
@@ -21,40 +23,93 @@
   DCHECK(!EndsWithSlash(s)) << s;
 }
 
+bool EndsWithExtension(std::string_view str, std::string_view ext) {
+  return str.size() > ext.size() && str[str.size() - ext.size() - 1] == '.' &&
+         !::memcmp(str.data() + str.size() - ext.size(), ext.data(),
+                   ext.size());
+}
+
 SourceFile::Type GetSourceFileType(const std::string& file) {
-  std::string_view extension = FindExtension(&file);
-  if (extension == "cc" || extension == "cpp" || extension == "cxx" ||
-      extension == "c++")
-    return SourceFile::SOURCE_CPP;
-  if (extension == "h" || extension == "hpp" || extension == "hxx" ||
-      extension == "hh" || extension == "inc" || extension == "ipp" ||
-      extension == "inl")
-    return SourceFile::SOURCE_H;
-  if (extension == "c")
-    return SourceFile::SOURCE_C;
-  if (extension == "m")
-    return SourceFile::SOURCE_M;
-  if (extension == "mm")
-    return SourceFile::SOURCE_MM;
-  if (extension == "modulemap")
-    return SourceFile::SOURCE_MODULEMAP;
-  if (extension == "rc")
-    return SourceFile::SOURCE_RC;
-  if (extension == "S" || extension == "s" || extension == "asm")
-    return SourceFile::SOURCE_S;
-  if (extension == "o" || extension == "obj")
-    return SourceFile::SOURCE_O;
-  if (extension == "def")
-    return SourceFile::SOURCE_DEF;
-  if (extension == "rs")
-    return SourceFile::SOURCE_RS;
-  if (extension == "go")
-    return SourceFile::SOURCE_GO;
-  if (extension == "swift")
+  size_t size = file.size();
+  const char* str = file.data();
+
+  // First, single-char extensions.
+  if (size > 2 && str[size - 2] == '.') {
+    switch (str[size - 1]) {
+      case 'c':
+        return SourceFile::SOURCE_C;  // .c
+      case 'h':
+        return SourceFile::SOURCE_H;  // .h
+      case 'm':
+        return SourceFile::SOURCE_M;  // .m
+      case 'o':
+        return SourceFile::SOURCE_O;  // .o
+      case 'S':
+      case 's':
+        return SourceFile::SOURCE_S;  // .S and .s
+      default:
+        return SourceFile::SOURCE_UNKNOWN;
+    }
+  }
+
+  // Second, two-char extensions
+  if (size > 3 && str[size - 3] == '.') {
+#define TAG2(c1, c2) ((unsigned)(c1) | ((unsigned)(c2) << 8))
+    switch (TAG2(str[size - 2], str[size - 1])) {
+      case TAG2('c', 'c'):
+        return SourceFile::SOURCE_CPP;  // .cc
+      case TAG2('g', 'o'):
+        return SourceFile::SOURCE_GO;  // .go
+      case TAG2('h', 'h'):
+        return SourceFile::SOURCE_H;   // .hh
+      case TAG2('m', 'm'):
+        return SourceFile::SOURCE_MM;  // .mm
+      case TAG2('r', 'c'):
+        return SourceFile::SOURCE_RC;  // .rc
+      case TAG2('r', 's'):
+        return SourceFile::SOURCE_RS;  // .rs
+      default:
+        return SourceFile::SOURCE_UNKNOWN;
+    }
+#undef TAG2
+  }
+
+  if (size > 4 && str[size - 4] == '.') {
+#define TAG3(c1, c2, c3) \
+  ((unsigned)(c1) | ((unsigned)(c2) << 8) | ((unsigned)(c3) << 16))
+    switch (TAG3(str[size - 3], str[size - 2], str[size - 1])) {
+      case TAG3('c', 'p', 'p'):
+      case TAG3('c', 'x', 'x'):
+      case TAG3('c', '+', '+'):
+        return SourceFile::SOURCE_CPP;
+      case TAG3('h', 'p', 'p'):
+      case TAG3('h', 'x', 'x'):
+      case TAG3('i', 'n', 'c'):
+      case TAG3('i', 'p', 'p'):
+      case TAG3('i', 'n', 'l'):
+        return SourceFile::SOURCE_H;
+      case TAG3('a', 's', 'm'):
+        return SourceFile::SOURCE_S;
+      case TAG3('d', 'e', 'f'):
+        return SourceFile::SOURCE_DEF;
+      case TAG3('o', 'b', 'j'):
+        return SourceFile::SOURCE_O;
+      default:
+        return SourceFile::SOURCE_UNKNOWN;
+    }
+#undef TAG3
+  }
+
+  // Other cases
+  if (EndsWithExtension(file, "swift"))
     return SourceFile::SOURCE_SWIFT;
-  if (extension == "swiftmodule")
+
+  if (EndsWithExtension(file, "swiftmodule"))
     return SourceFile::SOURCE_SWIFTMODULE;
 
+  if (EndsWithExtension(file, "modulemap"))
+    return SourceFile::SOURCE_MODULEMAP;
+
   return SourceFile::SOURCE_UNKNOWN;
 }
 
@@ -73,8 +128,35 @@
 SourceFile::SourceFile(std::string&& value)
     : SourceFile(StringAtom(Normalized(std::move(value)))) {}
 
-SourceFile::SourceFile(StringAtom value) : value_(value) {
-  type_ = GetSourceFileType(value_.str());
+SourceFile::SourceFile(StringAtom value) : value_(value) {}
+
+SourceFile::Type SourceFile::GetType() const {
+  return GetSourceFileType(value_.str());
+}
+
+bool SourceFile::IsDefType() const {
+  std::string_view v = value_.str();
+  return EndsWithExtension(v, "def");
+}
+
+bool SourceFile::IsObjectType() const {
+  std::string_view v = value_.str();
+  return EndsWithExtension(v, "o") || EndsWithExtension(v, "obj");
+}
+
+bool SourceFile::IsModuleMapType() const {
+  std::string_view v = value_.str();
+  return EndsWithExtension(v, "modulemap");
+}
+
+bool SourceFile::IsSwiftType() const {
+  std::string_view v = value_.str();
+  return EndsWithExtension(v, "swift");
+}
+
+bool SourceFile::IsSwiftModuleType() const {
+  std::string_view v = value_.str();
+  return EndsWithExtension(v, "swiftmodule");
 }
 
 std::string SourceFile::GetName() const {
@@ -103,7 +185,6 @@
 
 void SourceFile::SetValue(const std::string& value) {
   value_ = StringAtom(value);
-  type_ = GetSourceFileType(value);
 }
 
 SourceFileTypeSet::SourceFileTypeSet() : empty_(true) {
diff --git a/src/gn/source_file.h b/src/gn/source_file.h
index 5ad8f62..b13a400 100644
--- a/src/gn/source_file.h
+++ b/src/gn/source_file.h
@@ -59,7 +59,14 @@
 
   bool is_null() const { return value_.empty(); }
   const std::string& value() const { return value_.str(); }
-  Type type() const { return type_; }
+  Type GetType() const;
+
+  // Optimized implementation of GetType() == SOURCE_XXX
+  bool IsDefType() const;          // SOURCE_DEF
+  bool IsModuleMapType() const;    // SOURCE_MODULEMAP
+  bool IsObjectType() const;       // SOURCE_O
+  bool IsSwiftType() const;        // SOURCE_SWIFT
+  bool IsSwiftModuleType() const;  // SOURCE_SWIFTMODULE
 
   // Returns everything after the last slash.
   std::string GetName() const;
@@ -121,7 +128,6 @@
   void SetValue(const std::string& value);
 
   StringAtom value_;
-  Type type_ = SOURCE_UNKNOWN;
 };
 
 namespace std {
@@ -129,8 +135,7 @@
 template <>
 struct hash<SourceFile> {
   std::size_t operator()(const SourceFile& v) const {
-    hash<std::string> h;
-    return h(v.value());
+    return SourceFile::PtrHash()(v);
   }
 };
 
diff --git a/src/gn/source_file_unittest.cc b/src/gn/source_file_unittest.cc
index 9d3a123..6c409b1 100644
--- a/src/gn/source_file_unittest.cc
+++ b/src/gn/source_file_unittest.cc
@@ -18,3 +18,45 @@
   EXPECT_TRUE(b_str.empty());  // Should have been swapped in.
   EXPECT_EQ("//bar.cc", b.value());
 }
+
+TEST(SourceFile, GetType) {
+  static const struct {
+    std::string_view path;
+    SourceFile::Type type;
+  } kData[] = {
+      {"", SourceFile::SOURCE_UNKNOWN},
+      {"a.c", SourceFile::SOURCE_C},
+      {"a.cc", SourceFile::SOURCE_CPP},
+      {"a.cpp", SourceFile::SOURCE_CPP},
+      {"a.cxx", SourceFile::SOURCE_CPP},
+      {"a.c++", SourceFile::SOURCE_CPP},
+      {"foo.h", SourceFile::SOURCE_H},
+      {"foo.hh", SourceFile::SOURCE_H},
+      {"foo.hpp", SourceFile::SOURCE_H},
+      {"foo.inc", SourceFile::SOURCE_H},
+      {"foo.inl", SourceFile::SOURCE_H},
+      {"foo.ipp", SourceFile::SOURCE_H},
+      {"foo.m", SourceFile::SOURCE_M},
+      {"foo.mm", SourceFile::SOURCE_MM},
+      {"foo.o", SourceFile::SOURCE_O},
+      {"foo.obj", SourceFile::SOURCE_O},
+      {"foo.S", SourceFile::SOURCE_S},
+      {"foo.s", SourceFile::SOURCE_S},
+      {"foo.asm", SourceFile::SOURCE_S},
+      {"foo.go", SourceFile::SOURCE_GO},
+      {"foo.rc", SourceFile::SOURCE_RC},
+      {"foo.rs", SourceFile::SOURCE_RS},
+      {"foo.def", SourceFile::SOURCE_DEF},
+      {"foo.swift", SourceFile::SOURCE_SWIFT},
+      {"foo.swiftmodule", SourceFile::SOURCE_SWIFTMODULE},
+      {"foo.modulemap", SourceFile::SOURCE_MODULEMAP},
+
+      // A few degenerate cases
+      {"foo.obj/a", SourceFile::SOURCE_UNKNOWN},
+      {"foo.cppp", SourceFile::SOURCE_UNKNOWN},
+      {"cpp", SourceFile::SOURCE_UNKNOWN},
+  };
+  for (const auto& data : kData) {
+    EXPECT_EQ(data.type, SourceFile(data.path).GetType());
+  }
+}
diff --git a/src/gn/swift_values.cc b/src/gn/swift_values.cc
index c15e319..a9677a6 100644
--- a/src/gn/swift_values.cc
+++ b/src/gn/swift_values.cc
@@ -56,7 +56,7 @@
 
   const SourceFile module_output_file_as_source =
       module_output_file.AsSourceFile(target->settings()->build_settings());
-  if (module_output_file_as_source.type() != SourceFile::SOURCE_SWIFTMODULE) {
+  if (!module_output_file_as_source.IsSwiftModuleType()) {
     *err = Err(tool->defined_from(), "Incorrect outputs for tool",
                "The first output of tool " + std::string(tool->name()) +
                    " must be a .swiftmodule file.");
diff --git a/src/gn/target.cc b/src/gn/target.cc
index 66ffc1f..7071357 100644
--- a/src/gn/target.cc
+++ b/src/gn/target.cc
@@ -589,7 +589,7 @@
     // All binary targets do a tool lookup.
     DCHECK(IsBinary());
 
-    SourceFile::Type file_type = source.type();
+    const SourceFile::Type file_type = source.GetType();
     if (file_type == SourceFile::SOURCE_UNKNOWN)
       return false;
     if (file_type == SourceFile::SOURCE_O) {