Optimize base::EscapeJSONString for ASCII inputs.
Most of the strings passed to this function are ASCII-only and
don't even need escaping. This CL provides fast-paths for these
cases.
This speeds up the generation of many JSON files written by GN.
For example, on a large Fuchsia build plan, this saves about 4
seconds while generating identical JSON outputs:
```
$ hyperfine --runs=5 '/tmp/gn1 gen --ide=json out/default --export-rust-project --export-compile-commands' '/tmp/gn3 gen --ide=json out/default --export-rust-project --export-compile-commands'
Benchmark 1: /tmp/gn1 gen --ide=json out/default --export-rust-project --export-compile-commands
Time (mean ± σ): 28.883 s ± 0.836 s [User: 117.852 s, System: 42.476 s]
Range (min … max): 27.694 s … 29.916 s 5 runs
Benchmark 2: /tmp/gn2 gen --ide=json out/default --export-rust-project --export-compile-commands
Time (mean ± σ): 24.829 s ± 0.590 s [User: 112.068 s, System: 42.400 s]
Range (min … max): 23.879 s … 25.296 s 5 runs
Summary
'/tmp/gn3 gen --ide=json out/default --export-rust-project --export-compile-commands' ran
1.16 ± 0.04 times faster than '/tmp/gn1 gen --ide=json out/default --export-rust-project --export-compile-commands'
```
Change-Id: I7f678f898a9e7f1d271ef8cb47ea77bb6d22800c
Reviewed-on: https://gn-review.googlesource.com/c/gn/+/16040
Commit-Queue: David Turner <digit@google.com>
Reviewed-by: Takuto Ikuta <tikuta@google.com>
diff --git a/src/base/json/string_escape.cc b/src/base/json/string_escape.cc
index 51a818c..90cbbab 100644
--- a/src/base/json/string_escape.cc
+++ b/src/base/json/string_escape.cc
@@ -29,6 +29,34 @@
// Used below in EscapeSpecialCodePoint().
static_assert('<' == 0x3C, "less than sign must be 0x3c");
+template <typename S>
+bool IsAscii(const S& str) {
+ for (auto ch : str) {
+ if (ch > 126)
+ return false;
+ }
+ return true;
+}
+
+size_t ComputeAsciiEscapedSize(char ch) {
+ switch (ch) {
+ case '\b':
+ case '\f':
+ case '\n':
+ case '\r':
+ case '\t':
+ case '\\':
+ case '"':
+ return 2;
+ case '<': // Special case, consistent with EscapeSpecialCodePoint below.
+ return 6;
+ default:
+ if (ch < 32)
+ return 6;
+ return 0;
+ }
+}
+
// Try to escape the |code_point| if it is a known special character. If
// successful, returns true and appends the escape sequence to |dest|. This
// isn't required by the spec, but it's more readable by humans.
@@ -72,7 +100,10 @@
dest->append("\\u2029");
break;
default:
- return false;
+ if (code_point >= 32)
+ return false;
+ // Escape non-printing characters.
+ base::StringAppendF(dest, kU16EscapeFormat, code_point);
}
return true;
}
@@ -84,28 +115,40 @@
if (put_in_quotes)
dest->push_back('"');
- // Casting is necessary because ICU uses int32_t. Try and do so safely.
- CHECK_LE(str.length(),
- static_cast<size_t>(std::numeric_limits<int32_t>::max()));
- const int32_t length = static_cast<int32_t>(str.length());
+ // Most input strings are ASCII only and do not need UTF-8 parsing or
+ // even escaping at all.
+ if (IsAscii(str)) {
+ size_t escapes_size = 0;
+ for (auto ch : str)
+ escapes_size += ComputeAsciiEscapedSize(ch);
- for (int32_t i = 0; i < length; ++i) {
- uint32_t code_point;
- if (!ReadUnicodeCharacter(str.data(), length, &i, &code_point) ||
- code_point == static_cast<decltype(code_point)>(CBU_SENTINEL) ||
- !IsValidCharacter(code_point)) {
- code_point = kReplacementCodePoint;
- did_replacement = true;
+ if (escapes_size == 0) {
+ dest->append(str.begin(), str.end());
+ } else {
+ dest->reserve(dest->size() + str.size() + escapes_size);
+ for (auto ch : str) {
+ if (!EscapeSpecialCodePoint(ch, dest))
+ dest->push_back(ch);
+ }
}
+ } else {
+ // Casting is necessary because ICU uses int32_t. Try and do so safely.
+ CHECK_LE(str.length(),
+ static_cast<size_t>(std::numeric_limits<int32_t>::max()));
+ const int32_t length = static_cast<int32_t>(str.length());
- if (EscapeSpecialCodePoint(code_point, dest))
- continue;
+ for (int32_t i = 0; i < length; ++i) {
+ uint32_t code_point;
+ if (!ReadUnicodeCharacter(str.data(), length, &i, &code_point) ||
+ code_point == static_cast<decltype(code_point)>(CBU_SENTINEL) ||
+ !IsValidCharacter(code_point)) {
+ code_point = kReplacementCodePoint;
+ did_replacement = true;
+ }
- // Escape non-printing characters.
- if (code_point < 32)
- base::StringAppendF(dest, kU16EscapeFormat, code_point);
- else
- WriteUnicodeCharacter(code_point, dest);
+ if (!EscapeSpecialCodePoint(code_point, dest))
+ WriteUnicodeCharacter(code_point, dest);
+ }
}
if (put_in_quotes)