Add string_join() and string_split() builtins

Usage: string_join(separator, strings)
Example: string_join(" ", ["a", "b", "c"]) --> "a b c"

Usage: string_split(string, [, separator])
Example: string_split("hello   world ") --> ["hello", "world"]
Example: string_split("a|b|c", "|") --> ["a", "b", "c"]

This patch is based on Petr Hosek's patch:
https://gn-review.googlesource.com/c/gn/+/3401

Change-Id: Iba30f78cb0b1c8aba8f895117e53795aacfc8f6a
Reviewed-on: https://gn-review.googlesource.com/c/gn/+/6563
Commit-Queue: Petr Hosek <phosek@google.com>
Reviewed-by: Brett Wilson <brettw@chromium.org>
diff --git a/docs/reference.md b/docs/reference.md
index 38ebeb6..8253dae 100644
--- a/docs/reference.md
+++ b/docs/reference.md
@@ -55,7 +55,9 @@
     *   [set_defaults: Set default values for a target type.](#func_set_defaults)
     *   [set_sources_assignment_filter: Set a pattern to filter source files.](#func_set_sources_assignment_filter)
     *   [split_list: Splits a list into N different sub-lists.](#func_split_list)
+    *   [string_join: Concatenates a list of strings with a separator.](#func_string_join)
     *   [string_replace: Replaces substring in the given string.](#func_string_replace)
+    *   [string_split: Split string into a list of strings.](#func_string_split)
     *   [template: Define a template rule.](#func_template)
     *   [tool: Specify arguments to a toolchain tool.](#func_tool)
     *   [toolchain: Defines a toolchain.](#func_toolchain)
@@ -2960,6 +2962,22 @@
   Will print:
     [[1, 2], [3, 4], [5, 6]
 ```
+### <a name="func_string_join"></a>**string_join**: Concatenates a list of strings with a separator.
+
+```
+  result = string_join(separator, strings)
+
+  Concatenate a list of strings with intervening occurrences of separator.
+```
+
+#### **Examples**
+
+```
+    string_join("", ["a", "b", "c"])    --> "abc"
+    string_join("|", ["a", "b", "c"])   --> "a|b|c"
+    string_join(", ", ["a", "b", "c"])  --> "a, b, c"
+    string_join("s", ["", ""])          --> "s"
+```
 ### <a name="func_string_replace"></a>**string_replace**: Replaces substring in the given string.
 
 ```
@@ -2981,6 +2999,33 @@
   Will print:
     Hello, GN!
 ```
+### <a name="func_string_split"></a>**string_split**: Split string into a list of strings.
+
+```
+  result = string_split(str[, sep])
+
+  Split string into all substrings separated by separator and returns a list
+  of the substrings between those separators.
+
+  If the separator argument is omitted, the split is by any whitespace, and
+  any leading/trailing whitespace is ignored; similar to Python's str.split().
+```
+
+#### **Examples without a separator (split on whitespace)**:
+
+```
+  string_split("")          --> []
+  string_split("a")         --> ["a"]
+  string_split(" aa  bb")   --> ["aa", "bb"]
+```
+
+#### **Examples with a separator (split on separators)**:
+
+```
+  string_split("", "|")           --> [""]
+  string_split("  a b  ", " ")    --> ["", "", "a", "b", "", ""]
+  string_split("aa+-bb+-c", "+-") --> ["aa", "bb", "c"]
+```
 ### <a name="func_template"></a>**template**: Define a template rule.
 
 ```
diff --git a/src/gn/functions.cc b/src/gn/functions.cc
index 0690419..4675ee3 100644
--- a/src/gn/functions.cc
+++ b/src/gn/functions.cc
@@ -5,6 +5,7 @@
 #include "gn/functions.h"
 
 #include <stddef.h>
+#include <cctype>
 #include <iostream>
 #include <memory>
 #include <regex>
@@ -1129,6 +1130,67 @@
   return result;
 }
 
+// string_join -----------------------------------------------------------------
+
+const char kStringJoin[] = "string_join";
+const char kStringJoin_HelpShort[] =
+    "string_join: Concatenates a list of strings with a separator.";
+const char kStringJoin_Help[] =
+    R"(string_join: Concatenates a list of strings with a separator.
+
+  result = string_join(separator, strings)
+
+  Concatenate a list of strings with intervening occurrences of separator.
+
+Examples
+
+    string_join("", ["a", "b", "c"])    --> "abc"
+    string_join("|", ["a", "b", "c"])   --> "a|b|c"
+    string_join(", ", ["a", "b", "c"])  --> "a, b, c"
+    string_join("s", ["", ""])          --> "s"
+)";
+
+Value RunStringJoin(Scope* scope,
+                    const FunctionCallNode* function,
+                    const std::vector<Value>& args,
+                    Err* err) {
+  // Check usage: Number of arguments.
+  if (args.size() != 2) {
+    *err = Err(function, "Wrong number of arguments to string_join().",
+               "Expecting exactly two. usage: string_join(separator, strings)");
+    return Value();
+  }
+
+  // Check usage: separator is a string.
+  if (!args[0].VerifyTypeIs(Value::STRING, err)) {
+    *err = Err(function, "separator in string_join(separator, strings) is not "
+               "a string", "Expecting separator argument to be a string.");
+    return Value();
+  }
+  const std::string separator = args[0].string_value();
+
+  // Check usage: strings is a list.
+  if (!args[1].VerifyTypeIs(Value::LIST, err)) {
+    *err = Err(function, "strings in string_join(separator, strings) "
+               "is not a list", "Expecting strings argument to be a list.");
+    return Value();
+  }
+  const std::vector<Value> strings = args[1].list_value();
+
+  // Arguments looks good; do the join.
+  std::stringstream stream;
+  for (size_t i = 0; i < strings.size(); ++i) {
+    if (!strings[i].VerifyTypeIs(Value::STRING, err)) {
+      return Value();
+    }
+    if (i != 0) {
+      stream << separator;
+    }
+    stream << strings[i].string_value();
+  }
+  return Value(function, stream.str());
+}
+
 // string_replace --------------------------------------------------------------
 
 const char kStringReplace[] = "string_replace";
@@ -1198,6 +1260,106 @@
   return Value(function, std::move(val));
 }
 
+// string_split ----------------------------------------------------------------
+
+const char kStringSplit[] = "string_split";
+const char kStringSplit_HelpShort[] =
+    "string_split: Split string into a list of strings.";
+const char kStringSplit_Help[] =
+    R"(string_split: Split string into a list of strings.
+
+  result = string_split(str[, sep])
+
+  Split string into all substrings separated by separator and returns a list
+  of the substrings between those separators.
+
+  If the separator argument is omitted, the split is by any whitespace, and
+  any leading/trailing whitespace is ignored; similar to Python's str.split().
+
+Examples without a separator (split on whitespace):
+
+  string_split("")          --> []
+  string_split("a")         --> ["a"]
+  string_split(" aa  bb")   --> ["aa", "bb"]
+
+Examples with a separator (split on separators):
+
+  string_split("", "|")           --> [""]
+  string_split("  a b  ", " ")    --> ["", "", "a", "b", "", ""]
+  string_split("aa+-bb+-c", "+-") --> ["aa", "bb", "c"]
+)";
+
+Value RunStringSplit(Scope* scope,
+                     const FunctionCallNode* function,
+                     const std::vector<Value>& args,
+                     Err* err) {
+  // Check usage: argument count.
+  if (args.size() != 1 && args.size() != 2) {
+    *err = Err(function, "Wrong number of arguments to string_split().",
+               "Usage: string_split(str[, sep])");
+    return Value();
+  }
+
+  // Check usage: str is a string.
+  if (!args[0].VerifyTypeIs(Value::STRING, err)) {
+    return Value();
+  }
+  const std::string str = args[0].string_value();
+
+  // Check usage: separator is a non-empty string.
+  std::string separator;
+  if (args.size() == 2) {
+    if (!args[1].VerifyTypeIs(Value::STRING, err)) {
+      return Value();
+    }
+    separator = args[1].string_value();
+    if (separator.empty()) {
+      *err = Err(function, "Separator argument to string_split() "
+                 "cannot be empty string", "Usage: string_split(str[, sep])");
+      return Value();
+    }
+  }
+
+  // Split the string into a std::vector.
+  std::vector<std::string> strings;
+  if (!separator.empty()) {
+    // Case: Explicit separator argument.
+    // Note: split_string("", "x") --> [""] like Python.
+    size_t pos = 0;
+    size_t next_pos = 0;
+    while ((next_pos = str.find(separator, pos)) != std::string::npos) {
+      strings.push_back(str.substr(pos, next_pos - pos));
+      pos = next_pos + separator.length();
+    }
+    strings.push_back(str.substr(pos, std::string::npos));
+  } else {
+    // Case: Split on any whitespace and strip ends.
+    // Note: split_string("") --> [] like Python.
+    std::string::const_iterator pos = str.cbegin();
+    while (pos != str.end()) {
+      // Advance past spaces. After this, pos is pointing to non-whitespace.
+      pos = find_if(pos, str.end(), [](char x) { return !std::isspace(x); });
+      if (pos == str.end()) {
+        // Tail is all whitespace, so we're done.
+        break;
+      }
+      // Advance past non-whitespace to get next chunk.
+      std::string::const_iterator next_whitespace_position =
+          find_if(pos, str.end(), [](char x) { return std::isspace(x); });
+      strings.push_back(std::string(pos, next_whitespace_position));
+      pos = next_whitespace_position;
+    }
+  }
+
+  // Convert vector of std::strings to list of GN strings.
+  Value result(function, Value::LIST);
+  result.list_value().resize(strings.size());
+  for (size_t i = 0; i < strings.size(); ++i) {
+    result.list_value()[i] = Value(function, strings[i]);
+  }
+  return result;
+}
+
 // -----------------------------------------------------------------------------
 
 FunctionInfo::FunctionInfo()
@@ -1307,7 +1469,9 @@
     INSERT_FUNCTION(SetDefaultToolchain, false)
     INSERT_FUNCTION(SetSourcesAssignmentFilter, false)
     INSERT_FUNCTION(SplitList, false)
+    INSERT_FUNCTION(StringJoin, false)
     INSERT_FUNCTION(StringReplace, false)
+    INSERT_FUNCTION(StringSplit, false)
     INSERT_FUNCTION(Template, false)
     INSERT_FUNCTION(Tool, false)
     INSERT_FUNCTION(Toolchain, false)
diff --git a/src/gn/functions_unittest.cc b/src/gn/functions_unittest.cc
index 1de8fdc..5b2218e 100644
--- a/src/gn/functions_unittest.cc
+++ b/src/gn/functions_unittest.cc
@@ -185,6 +185,81 @@
       setup.print_output());
 }
 
+TEST(Functions, StringJoin) {
+  TestWithScope setup;
+
+  // Verify outputs when string_join() is called correctly.
+  {
+    TestParseInput input(R"gn(
+        # No elements in the list and empty separator.
+        print("<" + string_join("", []) + ">")
+
+        # No elements in the list.
+        print("<" + string_join(" ", []) + ">")
+
+        # One element in the list.
+        print(string_join("|", ["a"]))
+
+        # Multiple elements in the list.
+        print(string_join(" ", ["a", "b", "c"]))
+
+        # Multi-character separator.
+        print(string_join("-.", ["a", "b", "c"]))
+
+        # Empty separator.
+        print(string_join("", ["x", "y", "z"]))
+
+        # Empty string list elements.
+        print(string_join("x", ["", "", ""]))
+
+        # Empty string list elements and separator
+        print(string_join("", ["", "", ""]))
+        )gn");
+    ASSERT_FALSE(input.has_error());
+
+    Err err;
+    input.parsed()->Execute(setup.scope(), &err);
+    ASSERT_FALSE(err.has_error()) << err.message();
+
+    EXPECT_EQ(
+        "<>\n"
+        "<>\n"
+        "a\n"
+        "a b c\n"
+        "a-.b-.c\n"
+        "xyz\n"
+        "xx\n"
+        "\n",
+        setup.print_output()) << setup.print_output();
+  }
+
+  // Verify usage errors are detected.
+  std::vector<std::string> bad_usage_examples = {
+    // Number of arguments.
+    R"gn(string_join())gn",
+    R"gn(string_join(["oops"]))gn",
+    R"gn(string_join("kk", [], "oops"))gn",
+
+    // Argument types.
+    R"gn(string_join(1, []))gn",
+    R"gn(string_join("kk", "oops"))gn",
+    R"gn(string_join(["oops"], []))gn",
+
+    // Non-string elements in list of strings.
+    R"gn(string_join("kk", [1]))gn",
+    R"gn(string_join("kk", ["hello", 1]))gn",
+    R"gn(string_join("kk", ["hello", []]))gn",
+  };
+  for (const auto& bad_usage_example : bad_usage_examples) {
+    TestParseInput input(bad_usage_example);
+    ASSERT_FALSE(input.has_error());
+
+    Err err;
+    input.parsed()->Execute(setup.scope(), &err);
+    ASSERT_TRUE(err.has_error()) << bad_usage_example;
+  }
+}
+
 TEST(Functions, StringReplace) {
   TestWithScope setup;
 
@@ -218,6 +293,106 @@
       setup.print_output());
 }
 
+TEST(Functions, StringSplit) {
+  TestWithScope setup;
+
+  // Verify outputs when string_join() is called correctly.
+  {
+    TestParseInput input(R"gn(
+        # Split on all whitespace: empty string.
+        print(string_split(""))
+
+        # Split on all whitespace: leading, trailing, runs; one element.
+        print(string_split("hello"))
+        print(string_split("  hello"))
+        print(string_split("  hello   "))
+        print(string_split("hello   "))
+
+        # Split on all whitespace: leading, trailing, runs; multiple elements.
+        print(string_split("a b"))          # Pre-stripped
+        print(string_split("  a b"))        # Leading whitespace
+        print(string_split("  a b  "))      # Leading & trailing whitespace
+        print(string_split("a b  "))        # Trailing whitespace
+        print(string_split("a  b  "))       # Whitespace run between words
+        print(string_split(" a b cc ddd"))  # More & multi-character elements
+
+        # Split on string.
+        print(string_split("", "|"))           # Empty string
+        print(string_split("|", "|"))          # Only a separator
+        print(string_split("ab", "|"))         # String is missing separator
+        print(string_split("a|b", "|"))        # Two elements
+        print(string_split("|a|b", "|"))       # Leading separator
+        print(string_split("a|b|", "|"))       # Trailing separator
+        print(string_split("||x", "|"))        # Leading consecutive separators
+        print(string_split("x||", "|"))        # Trailing consecutive separators
+        print(string_split("a|bb|ccc", "|"))   # Multiple elements
+        print(string_split(".x.x.x.", ".x."))  # Self-overlapping separators 1
+        print(string_split("x.x.x.", ".x."))   # Self-overlapping separators 2
+        )gn");
+    ASSERT_FALSE(input.has_error());
+
+    Err err;
+    input.parsed()->Execute(setup.scope(), &err);
+    ASSERT_FALSE(err.has_error()) << err.message();
+
+    EXPECT_EQ(
+        // Split on all whitespace: empty string.
+        "[]\n"
+
+        // Split on all whitespace: leading, trailing, runs; one element.
+        "[\"hello\"]\n"
+        "[\"hello\"]\n"
+        "[\"hello\"]\n"
+        "[\"hello\"]\n"
+
+        // Split on all whitespace: leading, trailing, runs; multiple elements.
+        "[\"a\", \"b\"]\n"
+        "[\"a\", \"b\"]\n"
+        "[\"a\", \"b\"]\n"
+        "[\"a\", \"b\"]\n"
+        "[\"a\", \"b\"]\n"
+        "[\"a\", \"b\", \"cc\", \"ddd\"]\n"
+
+        // Split on string.
+        "[\"\"]\n"                   // Empty string (like Python)
+        "[\"\", \"\"]\n"             // Only a separator
+        "[\"ab\"]\n"                 // String is missing separator
+        "[\"a\", \"b\"]\n"           // Two elements
+        "[\"\", \"a\", \"b\"]\n"     // Leading
+        "[\"a\", \"b\", \"\"]\n"     // Trailing
+        "[\"\", \"\", \"x\"]\n"      // Leading consecutive separators
+        "[\"x\", \"\", \"\"]\n"      // Trailing consecutive separators
+        "[\"a\", \"bb\", \"ccc\"]\n" // Multiple elements
+        "[\"\", \"x\", \"\"]\n"      // Self-overlapping separators 1
+        "[\"x\", \"x.\"]\n"          // Self-overlapping separators 2
+        ,
+        setup.print_output()) << setup.print_output();
+  }
+
+  // Verify usage errors are detected.
+  std::vector<std::string> bad_usage_examples = {
+    // Number of arguments.
+    R"gn(string_split())gn",
+    R"gn(string_split("a", "b", "c"))gn",
+
+    // Argument types.
+    R"gn(string_split(1))gn",
+    R"gn(string_split(["oops"]))gn",
+    R"gn(string_split("kk", 1))gn",
+    R"gn(string_split("kk", ["oops"]))gn",
+
+    // Empty separator argument.
+    R"gn(string_split("kk", ""))gn",
+  };
+  for (const auto& bad_usage_example : bad_usage_examples) {
+    TestParseInput input(bad_usage_example);
+    ASSERT_FALSE(input.has_error());
+    Err err;
+    input.parsed()->Execute(setup.scope(), &err);
+    ASSERT_TRUE(err.has_error()) << bad_usage_example;
+  }
+}
+
 TEST(Functions, DeclareArgs) {
   TestWithScope setup;
   Err err;