Add string_join() and string_split() builtins
Usage: string_join(separator, strings)
Example: string_join(" ", ["a", "b", "c"]) --> "a b c"
Usage: string_split(string, [, separator])
Example: string_split("hello world ") --> ["hello", "world"]
Example: string_split("a|b|c", "|") --> ["a", "b", "c"]
This patch is based on Petr Hosek's patch:
https://gn-review.googlesource.com/c/gn/+/3401
Change-Id: Iba30f78cb0b1c8aba8f895117e53795aacfc8f6a
Reviewed-on: https://gn-review.googlesource.com/c/gn/+/6563
Commit-Queue: Petr Hosek <phosek@google.com>
Reviewed-by: Brett Wilson <brettw@chromium.org>
diff --git a/docs/reference.md b/docs/reference.md
index 38ebeb6..8253dae 100644
--- a/docs/reference.md
+++ b/docs/reference.md
@@ -55,7 +55,9 @@
* [set_defaults: Set default values for a target type.](#func_set_defaults)
* [set_sources_assignment_filter: Set a pattern to filter source files.](#func_set_sources_assignment_filter)
* [split_list: Splits a list into N different sub-lists.](#func_split_list)
+ * [string_join: Concatenates a list of strings with a separator.](#func_string_join)
* [string_replace: Replaces substring in the given string.](#func_string_replace)
+ * [string_split: Split string into a list of strings.](#func_string_split)
* [template: Define a template rule.](#func_template)
* [tool: Specify arguments to a toolchain tool.](#func_tool)
* [toolchain: Defines a toolchain.](#func_toolchain)
@@ -2960,6 +2962,22 @@
Will print:
[[1, 2], [3, 4], [5, 6]
```
+### <a name="func_string_join"></a>**string_join**: Concatenates a list of strings with a separator.
+
+```
+ result = string_join(separator, strings)
+
+ Concatenate a list of strings with intervening occurrences of separator.
+```
+
+#### **Examples**
+
+```
+ string_join("", ["a", "b", "c"]) --> "abc"
+ string_join("|", ["a", "b", "c"]) --> "a|b|c"
+ string_join(", ", ["a", "b", "c"]) --> "a, b, c"
+ string_join("s", ["", ""]) --> "s"
+```
### <a name="func_string_replace"></a>**string_replace**: Replaces substring in the given string.
```
@@ -2981,6 +2999,33 @@
Will print:
Hello, GN!
```
+### <a name="func_string_split"></a>**string_split**: Split string into a list of strings.
+
+```
+ result = string_split(str[, sep])
+
+ Split string into all substrings separated by separator and returns a list
+ of the substrings between those separators.
+
+ If the separator argument is omitted, the split is by any whitespace, and
+ any leading/trailing whitespace is ignored; similar to Python's str.split().
+```
+
+#### **Examples without a separator (split on whitespace)**:
+
+```
+ string_split("") --> []
+ string_split("a") --> ["a"]
+ string_split(" aa bb") --> ["aa", "bb"]
+```
+
+#### **Examples with a separator (split on separators)**:
+
+```
+ string_split("", "|") --> [""]
+ string_split(" a b ", " ") --> ["", "", "a", "b", "", ""]
+ string_split("aa+-bb+-c", "+-") --> ["aa", "bb", "c"]
+```
### <a name="func_template"></a>**template**: Define a template rule.
```
diff --git a/src/gn/functions.cc b/src/gn/functions.cc
index 0690419..4675ee3 100644
--- a/src/gn/functions.cc
+++ b/src/gn/functions.cc
@@ -5,6 +5,7 @@
#include "gn/functions.h"
#include <stddef.h>
+#include <cctype>
#include <iostream>
#include <memory>
#include <regex>
@@ -1129,6 +1130,67 @@
return result;
}
+// string_join -----------------------------------------------------------------
+
+const char kStringJoin[] = "string_join";
+const char kStringJoin_HelpShort[] =
+ "string_join: Concatenates a list of strings with a separator.";
+const char kStringJoin_Help[] =
+ R"(string_join: Concatenates a list of strings with a separator.
+
+ result = string_join(separator, strings)
+
+ Concatenate a list of strings with intervening occurrences of separator.
+
+Examples
+
+ string_join("", ["a", "b", "c"]) --> "abc"
+ string_join("|", ["a", "b", "c"]) --> "a|b|c"
+ string_join(", ", ["a", "b", "c"]) --> "a, b, c"
+ string_join("s", ["", ""]) --> "s"
+)";
+
+Value RunStringJoin(Scope* scope,
+ const FunctionCallNode* function,
+ const std::vector<Value>& args,
+ Err* err) {
+ // Check usage: Number of arguments.
+ if (args.size() != 2) {
+ *err = Err(function, "Wrong number of arguments to string_join().",
+ "Expecting exactly two. usage: string_join(separator, strings)");
+ return Value();
+ }
+
+ // Check usage: separator is a string.
+ if (!args[0].VerifyTypeIs(Value::STRING, err)) {
+ *err = Err(function, "separator in string_join(separator, strings) is not "
+ "a string", "Expecting separator argument to be a string.");
+ return Value();
+ }
+ const std::string separator = args[0].string_value();
+
+ // Check usage: strings is a list.
+ if (!args[1].VerifyTypeIs(Value::LIST, err)) {
+ *err = Err(function, "strings in string_join(separator, strings) "
+ "is not a list", "Expecting strings argument to be a list.");
+ return Value();
+ }
+ const std::vector<Value> strings = args[1].list_value();
+
+ // Arguments looks good; do the join.
+ std::stringstream stream;
+ for (size_t i = 0; i < strings.size(); ++i) {
+ if (!strings[i].VerifyTypeIs(Value::STRING, err)) {
+ return Value();
+ }
+ if (i != 0) {
+ stream << separator;
+ }
+ stream << strings[i].string_value();
+ }
+ return Value(function, stream.str());
+}
+
// string_replace --------------------------------------------------------------
const char kStringReplace[] = "string_replace";
@@ -1198,6 +1260,106 @@
return Value(function, std::move(val));
}
+// string_split ----------------------------------------------------------------
+
+const char kStringSplit[] = "string_split";
+const char kStringSplit_HelpShort[] =
+ "string_split: Split string into a list of strings.";
+const char kStringSplit_Help[] =
+ R"(string_split: Split string into a list of strings.
+
+ result = string_split(str[, sep])
+
+ Split string into all substrings separated by separator and returns a list
+ of the substrings between those separators.
+
+ If the separator argument is omitted, the split is by any whitespace, and
+ any leading/trailing whitespace is ignored; similar to Python's str.split().
+
+Examples without a separator (split on whitespace):
+
+ string_split("") --> []
+ string_split("a") --> ["a"]
+ string_split(" aa bb") --> ["aa", "bb"]
+
+Examples with a separator (split on separators):
+
+ string_split("", "|") --> [""]
+ string_split(" a b ", " ") --> ["", "", "a", "b", "", ""]
+ string_split("aa+-bb+-c", "+-") --> ["aa", "bb", "c"]
+)";
+
+Value RunStringSplit(Scope* scope,
+ const FunctionCallNode* function,
+ const std::vector<Value>& args,
+ Err* err) {
+ // Check usage: argument count.
+ if (args.size() != 1 && args.size() != 2) {
+ *err = Err(function, "Wrong number of arguments to string_split().",
+ "Usage: string_split(str[, sep])");
+ return Value();
+ }
+
+ // Check usage: str is a string.
+ if (!args[0].VerifyTypeIs(Value::STRING, err)) {
+ return Value();
+ }
+ const std::string str = args[0].string_value();
+
+ // Check usage: separator is a non-empty string.
+ std::string separator;
+ if (args.size() == 2) {
+ if (!args[1].VerifyTypeIs(Value::STRING, err)) {
+ return Value();
+ }
+ separator = args[1].string_value();
+ if (separator.empty()) {
+ *err = Err(function, "Separator argument to string_split() "
+ "cannot be empty string", "Usage: string_split(str[, sep])");
+ return Value();
+ }
+ }
+
+ // Split the string into a std::vector.
+ std::vector<std::string> strings;
+ if (!separator.empty()) {
+ // Case: Explicit separator argument.
+ // Note: split_string("", "x") --> [""] like Python.
+ size_t pos = 0;
+ size_t next_pos = 0;
+ while ((next_pos = str.find(separator, pos)) != std::string::npos) {
+ strings.push_back(str.substr(pos, next_pos - pos));
+ pos = next_pos + separator.length();
+ }
+ strings.push_back(str.substr(pos, std::string::npos));
+ } else {
+ // Case: Split on any whitespace and strip ends.
+ // Note: split_string("") --> [] like Python.
+ std::string::const_iterator pos = str.cbegin();
+ while (pos != str.end()) {
+ // Advance past spaces. After this, pos is pointing to non-whitespace.
+ pos = find_if(pos, str.end(), [](char x) { return !std::isspace(x); });
+ if (pos == str.end()) {
+ // Tail is all whitespace, so we're done.
+ break;
+ }
+ // Advance past non-whitespace to get next chunk.
+ std::string::const_iterator next_whitespace_position =
+ find_if(pos, str.end(), [](char x) { return std::isspace(x); });
+ strings.push_back(std::string(pos, next_whitespace_position));
+ pos = next_whitespace_position;
+ }
+ }
+
+ // Convert vector of std::strings to list of GN strings.
+ Value result(function, Value::LIST);
+ result.list_value().resize(strings.size());
+ for (size_t i = 0; i < strings.size(); ++i) {
+ result.list_value()[i] = Value(function, strings[i]);
+ }
+ return result;
+}
+
// -----------------------------------------------------------------------------
FunctionInfo::FunctionInfo()
@@ -1307,7 +1469,9 @@
INSERT_FUNCTION(SetDefaultToolchain, false)
INSERT_FUNCTION(SetSourcesAssignmentFilter, false)
INSERT_FUNCTION(SplitList, false)
+ INSERT_FUNCTION(StringJoin, false)
INSERT_FUNCTION(StringReplace, false)
+ INSERT_FUNCTION(StringSplit, false)
INSERT_FUNCTION(Template, false)
INSERT_FUNCTION(Tool, false)
INSERT_FUNCTION(Toolchain, false)
diff --git a/src/gn/functions_unittest.cc b/src/gn/functions_unittest.cc
index 1de8fdc..5b2218e 100644
--- a/src/gn/functions_unittest.cc
+++ b/src/gn/functions_unittest.cc
@@ -185,6 +185,81 @@
setup.print_output());
}
+TEST(Functions, StringJoin) {
+ TestWithScope setup;
+
+ // Verify outputs when string_join() is called correctly.
+ {
+ TestParseInput input(R"gn(
+ # No elements in the list and empty separator.
+ print("<" + string_join("", []) + ">")
+
+ # No elements in the list.
+ print("<" + string_join(" ", []) + ">")
+
+ # One element in the list.
+ print(string_join("|", ["a"]))
+
+ # Multiple elements in the list.
+ print(string_join(" ", ["a", "b", "c"]))
+
+ # Multi-character separator.
+ print(string_join("-.", ["a", "b", "c"]))
+
+ # Empty separator.
+ print(string_join("", ["x", "y", "z"]))
+
+ # Empty string list elements.
+ print(string_join("x", ["", "", ""]))
+
+ # Empty string list elements and separator
+ print(string_join("", ["", "", ""]))
+ )gn");
+ ASSERT_FALSE(input.has_error());
+
+ Err err;
+ input.parsed()->Execute(setup.scope(), &err);
+ ASSERT_FALSE(err.has_error()) << err.message();
+
+ EXPECT_EQ(
+ "<>\n"
+ "<>\n"
+ "a\n"
+ "a b c\n"
+ "a-.b-.c\n"
+ "xyz\n"
+ "xx\n"
+ "\n",
+ setup.print_output()) << setup.print_output();
+ }
+
+ // Verify usage errors are detected.
+ std::vector<std::string> bad_usage_examples = {
+ // Number of arguments.
+ R"gn(string_join())gn",
+ R"gn(string_join(["oops"]))gn",
+ R"gn(string_join("kk", [], "oops"))gn",
+
+ // Argument types.
+ R"gn(string_join(1, []))gn",
+ R"gn(string_join("kk", "oops"))gn",
+ R"gn(string_join(["oops"], []))gn",
+
+ // Non-string elements in list of strings.
+ R"gn(string_join("kk", [1]))gn",
+ R"gn(string_join("kk", ["hello", 1]))gn",
+ R"gn(string_join("kk", ["hello", []]))gn",
+ };
+ for (const auto& bad_usage_example : bad_usage_examples) {
+ TestParseInput input(bad_usage_example);
+ ASSERT_FALSE(input.has_error());
+
+ Err err;
+ input.parsed()->Execute(setup.scope(), &err);
+ ASSERT_TRUE(err.has_error()) << bad_usage_example;
+ }
+}
+
TEST(Functions, StringReplace) {
TestWithScope setup;
@@ -218,6 +293,106 @@
setup.print_output());
}
+TEST(Functions, StringSplit) {
+ TestWithScope setup;
+
+ // Verify outputs when string_join() is called correctly.
+ {
+ TestParseInput input(R"gn(
+ # Split on all whitespace: empty string.
+ print(string_split(""))
+
+ # Split on all whitespace: leading, trailing, runs; one element.
+ print(string_split("hello"))
+ print(string_split(" hello"))
+ print(string_split(" hello "))
+ print(string_split("hello "))
+
+ # Split on all whitespace: leading, trailing, runs; multiple elements.
+ print(string_split("a b")) # Pre-stripped
+ print(string_split(" a b")) # Leading whitespace
+ print(string_split(" a b ")) # Leading & trailing whitespace
+ print(string_split("a b ")) # Trailing whitespace
+ print(string_split("a b ")) # Whitespace run between words
+ print(string_split(" a b cc ddd")) # More & multi-character elements
+
+ # Split on string.
+ print(string_split("", "|")) # Empty string
+ print(string_split("|", "|")) # Only a separator
+ print(string_split("ab", "|")) # String is missing separator
+ print(string_split("a|b", "|")) # Two elements
+ print(string_split("|a|b", "|")) # Leading separator
+ print(string_split("a|b|", "|")) # Trailing separator
+ print(string_split("||x", "|")) # Leading consecutive separators
+ print(string_split("x||", "|")) # Trailing consecutive separators
+ print(string_split("a|bb|ccc", "|")) # Multiple elements
+ print(string_split(".x.x.x.", ".x.")) # Self-overlapping separators 1
+ print(string_split("x.x.x.", ".x.")) # Self-overlapping separators 2
+ )gn");
+ ASSERT_FALSE(input.has_error());
+
+ Err err;
+ input.parsed()->Execute(setup.scope(), &err);
+ ASSERT_FALSE(err.has_error()) << err.message();
+
+ EXPECT_EQ(
+ // Split on all whitespace: empty string.
+ "[]\n"
+
+ // Split on all whitespace: leading, trailing, runs; one element.
+ "[\"hello\"]\n"
+ "[\"hello\"]\n"
+ "[\"hello\"]\n"
+ "[\"hello\"]\n"
+
+ // Split on all whitespace: leading, trailing, runs; multiple elements.
+ "[\"a\", \"b\"]\n"
+ "[\"a\", \"b\"]\n"
+ "[\"a\", \"b\"]\n"
+ "[\"a\", \"b\"]\n"
+ "[\"a\", \"b\"]\n"
+ "[\"a\", \"b\", \"cc\", \"ddd\"]\n"
+
+ // Split on string.
+ "[\"\"]\n" // Empty string (like Python)
+ "[\"\", \"\"]\n" // Only a separator
+ "[\"ab\"]\n" // String is missing separator
+ "[\"a\", \"b\"]\n" // Two elements
+ "[\"\", \"a\", \"b\"]\n" // Leading
+ "[\"a\", \"b\", \"\"]\n" // Trailing
+ "[\"\", \"\", \"x\"]\n" // Leading consecutive separators
+ "[\"x\", \"\", \"\"]\n" // Trailing consecutive separators
+ "[\"a\", \"bb\", \"ccc\"]\n" // Multiple elements
+ "[\"\", \"x\", \"\"]\n" // Self-overlapping separators 1
+ "[\"x\", \"x.\"]\n" // Self-overlapping separators 2
+ ,
+ setup.print_output()) << setup.print_output();
+ }
+
+ // Verify usage errors are detected.
+ std::vector<std::string> bad_usage_examples = {
+ // Number of arguments.
+ R"gn(string_split())gn",
+ R"gn(string_split("a", "b", "c"))gn",
+
+ // Argument types.
+ R"gn(string_split(1))gn",
+ R"gn(string_split(["oops"]))gn",
+ R"gn(string_split("kk", 1))gn",
+ R"gn(string_split("kk", ["oops"]))gn",
+
+ // Empty separator argument.
+ R"gn(string_split("kk", ""))gn",
+ };
+ for (const auto& bad_usage_example : bad_usage_examples) {
+ TestParseInput input(bad_usage_example);
+ ASSERT_FALSE(input.has_error());
+ Err err;
+ input.parsed()->Execute(setup.scope(), &err);
+ ASSERT_TRUE(err.has_error()) << bad_usage_example;
+ }
+}
+
TEST(Functions, DeclareArgs) {
TestWithScope setup;
Err err;