src/base/json/json_parser.cc - gn - Git at Google

 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "base/json/json_parser.h"

 #include <cmath>
 #include <string_view>
 #include <utility>
 #include <vector>

 #include "base/logging.h"
 #include "base/numerics/safe_conversions.h"
 #include "base/strings/string_number_conversions.h"
 #include "base/strings/string_util.h"
 #include "base/strings/stringprintf.h"
 #include "base/strings/utf_string_conversion_utils.h"
 #include "base/strings/utf_string_conversions.h"
 #include "base/third_party/icu/icu_utf.h"
 #include "base/values.h"

 namespace base {
 namespace internal {

 namespace {

 const int32_t kExtendedASCIIStart = 0x80;

 // Simple class that checks for maximum recursion/"stack overflow."
 class StackMarker {
  public:
   StackMarker(int max_depth, int* depth)
       : max_depth_(max_depth), depth_(depth) {
     ++(*depth_);
     DCHECK_LE(*depth_, max_depth_);
   }
   ~StackMarker() { --(*depth_); }

   bool IsTooDeep() const { return *depth_ >= max_depth_; }

  private:
   const int max_depth_;
   int* const depth_;

   StackMarker(const StackMarker&) = delete;
   StackMarker& operator=(const StackMarker&) = delete;
 };

 constexpr uint32_t kUnicodeReplacementPoint = 0xFFFD;

 }  // namespace

 // This is U+FFFD.
 const char kUnicodeReplacementString[] = "\xEF\xBF\xBD";

 JSONParser::JSONParser(int options, int max_depth)
     : options_(options),
       max_depth_(max_depth),
       index_(0),
       stack_depth_(0),
       line_number_(0),
       index_last_line_(0),
       error_code_(JSONReader::JSON_NO_ERROR),
       error_line_(0),
       error_column_(0) {
   CHECK_LE(max_depth, JSONReader::kStackMaxDepth);
 }

 JSONParser::~JSONParser() = default;

 std::optional<Value> JSONParser::Parse(std::string_view input) {
   input_ = input;
   index_ = 0;
   line_number_ = 1;
   index_last_line_ = 0;

   error_code_ = JSONReader::JSON_NO_ERROR;
   error_line_ = 0;
   error_column_ = 0;

   // ICU and ReadUnicodeCharacter() use int32_t for lengths, so ensure
   // that the index_ will not overflow when parsing.
   if (!base::IsValueInRangeForNumericType<int32_t>(input.length())) {
     ReportError(JSONReader::JSON_TOO_LARGE, 0);
     return std::nullopt;
   }

   // When the input JSON string starts with a UTF-8 Byte-Order-Mark,
   // advance the start position to avoid the ParseNextToken function mis-
   // treating a Unicode BOM as an invalid character and returning NULL.
   ConsumeIfMatch("\xEF\xBB\xBF");

   // Parse the first and any nested tokens.
   std::optional<Value> root(ParseNextToken());
   if (!root)
     return std::nullopt;

   // Make sure the input stream is at an end.
   if (GetNextToken() != T_END_OF_INPUT) {
     ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1);
     return std::nullopt;
   }

   return root;
 }

 JSONReader::JsonParseError JSONParser::error_code() const {
   return error_code_;
 }

 std::string JSONParser::GetErrorMessage() const {
   return FormatErrorMessage(error_line_, error_column_,
                             JSONReader::ErrorCodeToString(error_code_));
 }

 int JSONParser::error_line() const {
   return error_line_;
 }

 int JSONParser::error_column() const {
   return error_column_;
 }

 // StringBuilder ///////////////////////////////////////////////////////////////

 JSONParser::StringBuilder::StringBuilder() : StringBuilder(nullptr) {}

 JSONParser::StringBuilder::StringBuilder(const char* pos)
     : pos_(pos), length_(0) {}

 JSONParser::StringBuilder::~StringBuilder() = default;

 JSONParser::StringBuilder& JSONParser::StringBuilder::operator=(
     StringBuilder&& other) = default;

 void JSONParser::StringBuilder::Append(uint32_t point) {
   DCHECK(IsValidCharacter(point));

   if (point < kExtendedASCIIStart && !string_) {
     DCHECK_EQ(static_cast<char>(point), pos_[length_]);
     ++length_;
   } else {
     Convert();
     if (UNLIKELY(point == kUnicodeReplacementPoint)) {
       string_->append(kUnicodeReplacementString);
     } else {
       WriteUnicodeCharacter(point, &*string_);
     }
   }
 }

 void JSONParser::StringBuilder::Convert() {
   if (string_)
     return;
   string_.emplace(pos_, length_);
 }

 std::string JSONParser::StringBuilder::DestructiveAsString() {
   if (string_)
     return std::move(*string_);
   return std::string(pos_, length_);
 }

 // JSONParser private //////////////////////////////////////////////////////////

 std::optional<std::string_view> JSONParser::PeekChars(int count) {
   if (static_cast<size_t>(index_) + count > input_.length())
     return std::nullopt;
   // Using std::string_view::substr() is significantly slower (according to
   // base_perftests) than constructing a substring manually.
   return std::string_view(input_.data() + index_, count);
 }

 std::optional<char> JSONParser::PeekChar() {
   std::optional<std::string_view> chars = PeekChars(1);
   if (chars)
     return (*chars)[0];
   return std::nullopt;
 }

 std::optional<std::string_view> JSONParser::ConsumeChars(int count) {
   std::optional<std::string_view> chars = PeekChars(count);
   if (chars)
     index_ += count;
   return chars;
 }

 std::optional<char> JSONParser::ConsumeChar() {
   std::optional<std::string_view> chars = ConsumeChars(1);
   if (chars)
     return (*chars)[0];
   return std::nullopt;
 }

 const char* JSONParser::pos() {
   CHECK_LE(static_cast<size_t>(index_), input_.length());
   return input_.data() + index_;
 }

 JSONParser::Token JSONParser::GetNextToken() {
   EatWhitespaceAndComments();

   std::optional<char> c = PeekChar();
   if (!c)
     return T_END_OF_INPUT;

   switch (*c) {
     case '{':
       return T_OBJECT_BEGIN;
     case '}':
       return T_OBJECT_END;
     case '[':
       return T_ARRAY_BEGIN;
     case ']':
       return T_ARRAY_END;
     case '"':
       return T_STRING;
     case '0':
     case '1':
     case '2':
     case '3':
     case '4':
     case '5':
     case '6':
     case '7':
     case '8':
     case '9':
     case '-':
       return T_NUMBER;
     case 't':
       return T_BOOL_TRUE;
     case 'f':
       return T_BOOL_FALSE;
     case 'n':
       return T_NULL;
     case ',':
       return T_LIST_SEPARATOR;
     case ':':
       return T_OBJECT_PAIR_SEPARATOR;
     default:
       return T_INVALID_TOKEN;
   }
 }

 void JSONParser::EatWhitespaceAndComments() {
   while (std::optional<char> c = PeekChar()) {
     switch (*c) {
       case '\r':
       case '\n':
         index_last_line_ = index_;
         // Don't increment line_number_ twice for "\r\n".
         if (!(c == '\n' && index_ > 0 && input_[index_ - 1] == '\r')) {
           ++line_number_;
         }
         FALLTHROUGH;
       case ' ':
       case '\t':
         ConsumeChar();
         break;
       case '/':
         if (!EatComment())
           return;
         break;
       default:
         return;
     }
   }
 }

 bool JSONParser::EatComment() {
   std::optional<std::string_view> comment_start = ConsumeChars(2);
   if (!comment_start)
     return false;

   if (comment_start == "//") {
     // Single line comment, read to newline.
     while (std::optional<char> c = PeekChar()) {
       if (c == '\n' || c == '\r')
         return true;
       ConsumeChar();
     }
   } else if (comment_start == "/*") {
     char previous_char = '\0';
     // Block comment, read until end marker.
     while (std::optional<char> c = PeekChar()) {
       if (previous_char == '*' && c == '/') {
         // EatWhitespaceAndComments will inspect pos(), which will still be on
         // the last / of the comment, so advance once more (which may also be
         // end of input).
         ConsumeChar();
         return true;
       }
       previous_char = *ConsumeChar();
     }

     // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT.
   }

   return false;
 }

 std::optional<Value> JSONParser::ParseNextToken() {
   return ParseToken(GetNextToken());
 }

 std::optional<Value> JSONParser::ParseToken(Token token) {
   switch (token) {
     case T_OBJECT_BEGIN:
       return ConsumeDictionary();
     case T_ARRAY_BEGIN:
       return ConsumeList();
     case T_STRING:
       return ConsumeString();
     case T_NUMBER:
       return ConsumeNumber();
     case T_BOOL_TRUE:
     case T_BOOL_FALSE:
     case T_NULL:
       return ConsumeLiteral();
     default:
       ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
       return std::nullopt;
   }
 }

 std::optional<Value> JSONParser::ConsumeDictionary() {
   if (ConsumeChar() != '{') {
     ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
     return std::nullopt;
   }

   StackMarker depth_check(max_depth_, &stack_depth_);
   if (depth_check.IsTooDeep()) {
     ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 0);
     return std::nullopt;
   }

   std::vector<Value::DictStorage::value_type> dict_storage;

   Token token = GetNextToken();
   while (token != T_OBJECT_END) {
     if (token != T_STRING) {
       ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1);
       return std::nullopt;
     }

     // First consume the key.
     StringBuilder key;
     if (!ConsumeStringRaw(&key)) {
       return std::nullopt;
     }

     // Read the separator.
     token = GetNextToken();
     if (token != T_OBJECT_PAIR_SEPARATOR) {
       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
       return std::nullopt;
     }

     // The next token is the value. Ownership transfers to |dict|.
     ConsumeChar();
     std::optional<Value> value = ParseNextToken();
     if (!value) {
       // ReportError from deeper level.
       return std::nullopt;
     }

     dict_storage.emplace_back(key.DestructiveAsString(),
                               std::make_unique<Value>(std::move(*value)));

     token = GetNextToken();
     if (token == T_LIST_SEPARATOR) {
       ConsumeChar();
       token = GetNextToken();
       if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
         ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
         return std::nullopt;
       }
     } else if (token != T_OBJECT_END) {
       ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
       return std::nullopt;
     }
   }

   ConsumeChar();  // Closing '}'.

   return Value(Value::DictStorage(std::move(dict_storage), KEEP_LAST_OF_DUPES));
 }

 std::optional<Value> JSONParser::ConsumeList() {
   if (ConsumeChar() != '[') {
     ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
     return std::nullopt;
   }

   StackMarker depth_check(max_depth_, &stack_depth_);
   if (depth_check.IsTooDeep()) {
     ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 0);
     return std::nullopt;
   }

   Value::ListStorage list_storage;

   Token token = GetNextToken();
   while (token != T_ARRAY_END) {
     std::optional<Value> item = ParseToken(token);
     if (!item) {
       // ReportError from deeper level.
       return std::nullopt;
     }

     list_storage.push_back(std::move(*item));

     token = GetNextToken();
     if (token == T_LIST_SEPARATOR) {
       ConsumeChar();
       token = GetNextToken();
       if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
         ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
         return std::nullopt;
       }
     } else if (token != T_ARRAY_END) {
       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
       return std::nullopt;
     }
   }

   ConsumeChar();  // Closing ']'.

   return Value(std::move(list_storage));
 }

 std::optional<Value> JSONParser::ConsumeString() {
   StringBuilder string;
   if (!ConsumeStringRaw(&string))
     return std::nullopt;

   return Value(string.DestructiveAsString());
 }

 bool JSONParser::ConsumeStringRaw(StringBuilder* out) {
   if (ConsumeChar() != '"') {
     ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
     return false;
   }

   // StringBuilder will internally build a std::string_view unless a UTF-16
   // conversion occurs, at which point it will perform a copy into a
   // std::string.
   StringBuilder string(pos());

   while (PeekChar()) {
     uint32_t next_char = 0;
     if (!ReadUnicodeCharacter(input_.data(),
                               static_cast<int32_t>(input_.length()), &index_,
                               &next_char) ||
         !IsValidCharacter(next_char)) {
       if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0) {
         ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1);
         return false;
       }
       ConsumeChar();
       string.Append(kUnicodeReplacementPoint);
       continue;
     }

     if (next_char == '"') {
       ConsumeChar();
       *out = std::move(string);
       return true;
     } else if (next_char != '\\') {
       // If this character is not an escape sequence...
       ConsumeChar();
       string.Append(next_char);
     } else {
       // And if it is an escape sequence, the input string will be adjusted
       // (either by combining the two characters of an encoded escape sequence,
       // or with a UTF conversion), so using std::string_view isn't possible --
       // force a conversion.
       string.Convert();

       // Read past the escape '\' and ensure there's a character following.
       std::optional<std::string_view> escape_sequence = ConsumeChars(2);
       if (!escape_sequence) {
         ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
         return false;
       }

       switch ((*escape_sequence)[1]) {
         // Allowed esape sequences:
         case 'x': {  // UTF-8 sequence.
           // UTF-8 \x escape sequences are not allowed in the spec, but they
           // are supported here for backwards-compatiblity with the old parser.
           escape_sequence = ConsumeChars(2);
           if (!escape_sequence) {
             ReportError(JSONReader::JSON_INVALID_ESCAPE, -2);
             return false;
           }

           int hex_digit = 0;
           if (!HexStringToInt(*escape_sequence, &hex_digit) ||
               !IsValidCharacter(hex_digit)) {
             ReportError(JSONReader::JSON_INVALID_ESCAPE, -2);
             return false;
           }

           string.Append(hex_digit);
           break;
         }
         case 'u': {  // UTF-16 sequence.
           // UTF units are of the form \uXXXX.
           uint32_t code_point;
           if (!DecodeUTF16(&code_point)) {
             ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
             return false;
           }
           string.Append(code_point);
           break;
         }
         case '"':
           string.Append('"');
           break;
         case '\\':
           string.Append('\\');
           break;
         case '/':
           string.Append('/');
           break;
         case 'b':
           string.Append('\b');
           break;
         case 'f':
           string.Append('\f');
           break;
         case 'n':
           string.Append('\n');
           break;
         case 'r':
           string.Append('\r');
           break;
         case 't':
           string.Append('\t');
           break;
         case 'v':  // Not listed as valid escape sequence in the RFC.
           string.Append('\v');
           break;
         // All other escape squences are illegal.
         default:
           ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
           return false;
       }
     }
   }

   ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
   return false;
 }

 // Entry is at the first X in \uXXXX.
 bool JSONParser::DecodeUTF16(uint32_t* out_code_point) {
   std::optional<std::string_view> escape_sequence = ConsumeChars(4);
   if (!escape_sequence)
     return false;

   // Consume the UTF-16 code unit, which may be a high surrogate.
   int code_unit16_high = 0;
   if (!HexStringToInt(*escape_sequence, &code_unit16_high))
     return false;

   // If this is a high surrogate, consume the next code unit to get the
   // low surrogate.
   if (CBU16_IS_SURROGATE(code_unit16_high)) {
     // Make sure this is the high surrogate. If not, it's an encoding
     // error.
     if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high))
       return false;

     // Make sure that the token has more characters to consume the
     // lower surrogate.
     if (!ConsumeIfMatch("\\u"))
       return false;

     escape_sequence = ConsumeChars(4);
     if (!escape_sequence)
       return false;

     int code_unit16_low = 0;
     if (!HexStringToInt(*escape_sequence, &code_unit16_low))
       return false;

     if (!CBU16_IS_TRAIL(code_unit16_low))
       return false;

     uint32_t code_point =
         CBU16_GET_SUPPLEMENTARY(code_unit16_high, code_unit16_low);
     if (!IsValidCharacter(code_point))
       return false;

     *out_code_point = code_point;
   } else {
     // Not a surrogate.
     DCHECK(CBU16_IS_SINGLE(code_unit16_high));
     if (!IsValidCharacter(code_unit16_high)) {
       if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0) {
         return false;
       }
       *out_code_point = kUnicodeReplacementPoint;
       return true;
     }

     *out_code_point = code_unit16_high;
   }

   return true;
 }

 std::optional<Value> JSONParser::ConsumeNumber() {
   const char* num_start = pos();
   const int start_index = index_;
   int end_index = start_index;

   if (PeekChar() == '-')
     ConsumeChar();

   if (!ReadInt(false)) {
     ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
     return std::nullopt;
   }
   end_index = index_;

   // The optional fraction part.
   if (PeekChar() == '.') {
     ConsumeChar();
     if (!ReadInt(true)) {
       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
       return std::nullopt;
     }
     end_index = index_;
   }

   // Optional exponent part.
   std::optional<char> c = PeekChar();
   if (c == 'e' || c == 'E') {
     ConsumeChar();
     if (PeekChar() == '-' || PeekChar() == '+') {
       ConsumeChar();
     }
     if (!ReadInt(true)) {
       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
       return std::nullopt;
     }
     end_index = index_;
   }

   // ReadInt is greedy because numbers have no easily detectable sentinel,
   // so save off where the parser should be on exit (see Consume invariant at
   // the top of the header), then make sure the next token is one which is
   // valid.
   int exit_index = index_;

   switch (GetNextToken()) {
     case T_OBJECT_END:
     case T_ARRAY_END:
     case T_LIST_SEPARATOR:
     case T_END_OF_INPUT:
       break;
     default:
       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
       return std::nullopt;
   }

   index_ = exit_index;

   std::string_view num_string(num_start, end_index - start_index);

   int num_int;
   if (StringToInt(num_string, &num_int))
     return Value(num_int);

   return std::nullopt;
 }

 bool JSONParser::ReadInt(bool allow_leading_zeros) {
   size_t len = 0;
   char first = 0;

   while (std::optional<char> c = PeekChar()) {
     if (!IsAsciiDigit(c))
       break;

     if (len == 0)
       first = *c;

     ++len;
     ConsumeChar();
   }

   if (len == 0)
     return false;

   if (!allow_leading_zeros && len > 1 && first == '0')
     return false;

   return true;
 }

 std::optional<Value> JSONParser::ConsumeLiteral() {
   if (ConsumeIfMatch("true")) {
     return Value(true);
   } else if (ConsumeIfMatch("false")) {
     return Value(false);
   } else if (ConsumeIfMatch("null")) {
     return Value(Value::Type::NONE);
   } else {
     ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
     return std::nullopt;
   }
 }

 bool JSONParser::ConsumeIfMatch(std::string_view match) {
   if (match == PeekChars(match.size())) {
     ConsumeChars(match.size());
     return true;
   }
   return false;
 }

 void JSONParser::ReportError(JSONReader::JsonParseError code,
                              int column_adjust) {
   error_code_ = code;
   error_line_ = line_number_;
   error_column_ = index_ - index_last_line_ + column_adjust;
 }

 // static
 std::string JSONParser::FormatErrorMessage(int line,
                                            int column,
                                            const std::string& description) {
   if (line || column) {
     return StringPrintf("Line: %i, column: %i, %s", line, column,
                         description.c_str());
   }
   return description;
 }

 }  // namespace internal
 }  // namespace base
	// Copyright (c) 2012 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "base/json/json_parser.h"

	#include <cmath>
	#include <string_view>
	#include <utility>
	#include <vector>

	#include "base/logging.h"
	#include "base/numerics/safe_conversions.h"
	#include "base/strings/string_number_conversions.h"
	#include "base/strings/string_util.h"
	#include "base/strings/stringprintf.h"
	#include "base/strings/utf_string_conversion_utils.h"
	#include "base/strings/utf_string_conversions.h"
	#include "base/third_party/icu/icu_utf.h"
	#include "base/values.h"

	namespace base {
	namespace internal {

	namespace {

	const int32_t kExtendedASCIIStart = 0x80;

	// Simple class that checks for maximum recursion/"stack overflow."
	class StackMarker {
	public:
	StackMarker(int max_depth, int* depth)
	: max_depth_(max_depth), depth_(depth) {
	++(*depth_);
	DCHECK_LE(*depth_, max_depth_);
	}
	~StackMarker() { --(*depth_); }

	bool IsTooDeep() const { return *depth_ >= max_depth_; }

	private:
	const int max_depth_;
	int* const depth_;

	StackMarker(const StackMarker&) = delete;
	StackMarker& operator=(const StackMarker&) = delete;
	};

	constexpr uint32_t kUnicodeReplacementPoint = 0xFFFD;

	} // namespace

	// This is U+FFFD.
	const char kUnicodeReplacementString[] = "\xEF\xBF\xBD";

	JSONParser::JSONParser(int options, int max_depth)
	: options_(options),
	max_depth_(max_depth),
	index_(0),
	stack_depth_(0),
	line_number_(0),
	index_last_line_(0),
	error_code_(JSONReader::JSON_NO_ERROR),
	error_line_(0),
	error_column_(0) {
	CHECK_LE(max_depth, JSONReader::kStackMaxDepth);
	}

	JSONParser::~JSONParser() = default;

	std::optional<Value> JSONParser::Parse(std::string_view input) {
	input_ = input;
	index_ = 0;
	line_number_ = 1;
	index_last_line_ = 0;

	error_code_ = JSONReader::JSON_NO_ERROR;
	error_line_ = 0;
	error_column_ = 0;

	// ICU and ReadUnicodeCharacter() use int32_t for lengths, so ensure
	// that the index_ will not overflow when parsing.
	if (!base::IsValueInRangeForNumericType<int32_t>(input.length())) {
	ReportError(JSONReader::JSON_TOO_LARGE, 0);
	return std::nullopt;
	}

	// When the input JSON string starts with a UTF-8 Byte-Order-Mark,
	// advance the start position to avoid the ParseNextToken function mis-
	// treating a Unicode BOM as an invalid character and returning NULL.
	ConsumeIfMatch("\xEF\xBB\xBF");

	// Parse the first and any nested tokens.
	std::optional<Value> root(ParseNextToken());
	if (!root)
	return std::nullopt;

	// Make sure the input stream is at an end.
	if (GetNextToken() != T_END_OF_INPUT) {
	ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1);
	return std::nullopt;
	}

	return root;
	}

	JSONReader::JsonParseError JSONParser::error_code() const {
	return error_code_;
	}

	std::string JSONParser::GetErrorMessage() const {
	return FormatErrorMessage(error_line_, error_column_,
	JSONReader::ErrorCodeToString(error_code_));
	}

	int JSONParser::error_line() const {
	return error_line_;
	}

	int JSONParser::error_column() const {
	return error_column_;
	}

	// StringBuilder ///////////////////////////////////////////////////////////////

	JSONParser::StringBuilder::StringBuilder() : StringBuilder(nullptr) {}

	JSONParser::StringBuilder::StringBuilder(const char* pos)
	: pos_(pos), length_(0) {}

	JSONParser::StringBuilder::~StringBuilder() = default;

	JSONParser::StringBuilder& JSONParser::StringBuilder::operator=(
	StringBuilder&& other) = default;

	void JSONParser::StringBuilder::Append(uint32_t point) {
	DCHECK(IsValidCharacter(point));

	if (point < kExtendedASCIIStart && !string_) {
	DCHECK_EQ(static_cast<char>(point), pos_[length_]);
	++length_;
	} else {
	Convert();
	if (UNLIKELY(point == kUnicodeReplacementPoint)) {
	string_->append(kUnicodeReplacementString);
	} else {
	WriteUnicodeCharacter(point, &*string_);
	}
	}
	}

	void JSONParser::StringBuilder::Convert() {
	if (string_)
	return;
	string_.emplace(pos_, length_);
	}

	std::string JSONParser::StringBuilder::DestructiveAsString() {
	if (string_)
	return std::move(*string_);
	return std::string(pos_, length_);
	}

	// JSONParser private //////////////////////////////////////////////////////////

	std::optional<std::string_view> JSONParser::PeekChars(int count) {
	if (static_cast<size_t>(index_) + count > input_.length())
	return std::nullopt;
	// Using std::string_view::substr() is significantly slower (according to
	// base_perftests) than constructing a substring manually.
	return std::string_view(input_.data() + index_, count);
	}

	std::optional<char> JSONParser::PeekChar() {
	std::optional<std::string_view> chars = PeekChars(1);
	if (chars)
	return (*chars)[0];
	return std::nullopt;
	}

	std::optional<std::string_view> JSONParser::ConsumeChars(int count) {
	std::optional<std::string_view> chars = PeekChars(count);
	if (chars)
	index_ += count;
	return chars;
	}

	std::optional<char> JSONParser::ConsumeChar() {
	std::optional<std::string_view> chars = ConsumeChars(1);
	if (chars)
	return (*chars)[0];
	return std::nullopt;
	}

	const char* JSONParser::pos() {
	CHECK_LE(static_cast<size_t>(index_), input_.length());
	return input_.data() + index_;
	}

	JSONParser::Token JSONParser::GetNextToken() {
	EatWhitespaceAndComments();

	std::optional<char> c = PeekChar();
	if (!c)
	return T_END_OF_INPUT;

	switch (*c) {
	case '{':
	return T_OBJECT_BEGIN;
	case '}':
	return T_OBJECT_END;
	case '[':
	return T_ARRAY_BEGIN;
	case ']':
	return T_ARRAY_END;
	case '"':
	return T_STRING;
	case '0':
	case '1':
	case '2':
	case '3':
	case '4':
	case '5':
	case '6':
	case '7':
	case '8':
	case '9':
	case '-':
	return T_NUMBER;
	case 't':
	return T_BOOL_TRUE;
	case 'f':
	return T_BOOL_FALSE;
	case 'n':
	return T_NULL;
	case ',':
	return T_LIST_SEPARATOR;
	case ':':
	return T_OBJECT_PAIR_SEPARATOR;
	default:
	return T_INVALID_TOKEN;
	}
	}

	void JSONParser::EatWhitespaceAndComments() {
	while (std::optional<char> c = PeekChar()) {
	switch (*c) {
	case '\r':
	case '\n':
	index_last_line_ = index_;
	// Don't increment line_number_ twice for "\r\n".
	if (!(c == '\n' && index_ > 0 && input_[index_ - 1] == '\r')) {
	++line_number_;
	}
	FALLTHROUGH;
	case ' ':
	case '\t':
	ConsumeChar();
	break;
	case '/':
	if (!EatComment())
	return;
	break;
	default:
	return;
	}
	}
	}

	bool JSONParser::EatComment() {
	std::optional<std::string_view> comment_start = ConsumeChars(2);
	if (!comment_start)
	return false;

	if (comment_start == "//") {
	// Single line comment, read to newline.
	while (std::optional<char> c = PeekChar()) {
	if (c == '\n' \|\| c == '\r')
	return true;
	ConsumeChar();
	}
	} else if (comment_start == "/*") {
	char previous_char = '\0';
	// Block comment, read until end marker.
	while (std::optional<char> c = PeekChar()) {
	if (previous_char == '*' && c == '/') {
	// EatWhitespaceAndComments will inspect pos(), which will still be on
	// the last / of the comment, so advance once more (which may also be
	// end of input).
	ConsumeChar();
	return true;
	}
	previous_char = *ConsumeChar();
	}

	// If the comment is unterminated, GetNextToken will report T_END_OF_INPUT.
	}

	return false;
	}

	std::optional<Value> JSONParser::ParseNextToken() {
	return ParseToken(GetNextToken());
	}

	std::optional<Value> JSONParser::ParseToken(Token token) {
	switch (token) {
	case T_OBJECT_BEGIN:
	return ConsumeDictionary();
	case T_ARRAY_BEGIN:
	return ConsumeList();
	case T_STRING:
	return ConsumeString();
	case T_NUMBER:
	return ConsumeNumber();
	case T_BOOL_TRUE:
	case T_BOOL_FALSE:
	case T_NULL:
	return ConsumeLiteral();
	default:
	ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
	return std::nullopt;
	}
	}

	std::optional<Value> JSONParser::ConsumeDictionary() {
	if (ConsumeChar() != '{') {
	ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
	return std::nullopt;
	}

	StackMarker depth_check(max_depth_, &stack_depth_);
	if (depth_check.IsTooDeep()) {
	ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 0);
	return std::nullopt;
	}

	std::vector<Value::DictStorage::value_type> dict_storage;

	Token token = GetNextToken();
	while (token != T_OBJECT_END) {
	if (token != T_STRING) {
	ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1);
	return std::nullopt;
	}

	// First consume the key.
	StringBuilder key;
	if (!ConsumeStringRaw(&key)) {
	return std::nullopt;
	}

	// Read the separator.
	token = GetNextToken();
	if (token != T_OBJECT_PAIR_SEPARATOR) {
	ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
	return std::nullopt;
	}

	// The next token is the value. Ownership transfers to \|dict\|.
	ConsumeChar();
	std::optional<Value> value = ParseNextToken();
	if (!value) {
	// ReportError from deeper level.
	return std::nullopt;
	}

	dict_storage.emplace_back(key.DestructiveAsString(),
	std::make_unique<Value>(std::move(*value)));

	token = GetNextToken();
	if (token == T_LIST_SEPARATOR) {
	ConsumeChar();
	token = GetNextToken();
	if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
	ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
	return std::nullopt;
	}
	} else if (token != T_OBJECT_END) {
	ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
	return std::nullopt;
	}
	}

	ConsumeChar(); // Closing '}'.

	return Value(Value::DictStorage(std::move(dict_storage), KEEP_LAST_OF_DUPES));
	}

	std::optional<Value> JSONParser::ConsumeList() {
	if (ConsumeChar() != '[') {
	ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
	return std::nullopt;
	}

	StackMarker depth_check(max_depth_, &stack_depth_);
	if (depth_check.IsTooDeep()) {
	ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 0);
	return std::nullopt;
	}

	Value::ListStorage list_storage;

	Token token = GetNextToken();
	while (token != T_ARRAY_END) {
	std::optional<Value> item = ParseToken(token);
	if (!item) {
	// ReportError from deeper level.
	return std::nullopt;
	}

	list_storage.push_back(std::move(*item));

	token = GetNextToken();
	if (token == T_LIST_SEPARATOR) {
	ConsumeChar();
	token = GetNextToken();
	if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
	ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
	return std::nullopt;
	}
	} else if (token != T_ARRAY_END) {
	ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
	return std::nullopt;
	}
	}

	ConsumeChar(); // Closing ']'.

	return Value(std::move(list_storage));
	}

	std::optional<Value> JSONParser::ConsumeString() {
	StringBuilder string;
	if (!ConsumeStringRaw(&string))
	return std::nullopt;

	return Value(string.DestructiveAsString());
	}

	bool JSONParser::ConsumeStringRaw(StringBuilder* out) {
	if (ConsumeChar() != '"') {
	ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
	return false;
	}

	// StringBuilder will internally build a std::string_view unless a UTF-16
	// conversion occurs, at which point it will perform a copy into a
	// std::string.
	StringBuilder string(pos());

	while (PeekChar()) {
	uint32_t next_char = 0;
	if (!ReadUnicodeCharacter(input_.data(),
	static_cast<int32_t>(input_.length()), &index_,
	&next_char) \|\|
	!IsValidCharacter(next_char)) {
	if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0) {
	ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1);
	return false;
	}
	ConsumeChar();
	string.Append(kUnicodeReplacementPoint);
	continue;
	}

	if (next_char == '"') {
	ConsumeChar();
	*out = std::move(string);
	return true;
	} else if (next_char != '\\') {
	// If this character is not an escape sequence...
	ConsumeChar();
	string.Append(next_char);
	} else {
	// And if it is an escape sequence, the input string will be adjusted
	// (either by combining the two characters of an encoded escape sequence,
	// or with a UTF conversion), so using std::string_view isn't possible --
	// force a conversion.
	string.Convert();

	// Read past the escape '\' and ensure there's a character following.
	std::optional<std::string_view> escape_sequence = ConsumeChars(2);
	if (!escape_sequence) {
	ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
	return false;
	}

	switch ((*escape_sequence)[1]) {
	// Allowed esape sequences:
	case 'x': { // UTF-8 sequence.
	// UTF-8 \x escape sequences are not allowed in the spec, but they
	// are supported here for backwards-compatiblity with the old parser.
	escape_sequence = ConsumeChars(2);
	if (!escape_sequence) {
	ReportError(JSONReader::JSON_INVALID_ESCAPE, -2);
	return false;
	}

	int hex_digit = 0;
	if (!HexStringToInt(*escape_sequence, &hex_digit) \|\|
	!IsValidCharacter(hex_digit)) {
	ReportError(JSONReader::JSON_INVALID_ESCAPE, -2);
	return false;
	}

	string.Append(hex_digit);
	break;
	}
	case 'u': { // UTF-16 sequence.
	// UTF units are of the form \uXXXX.
	uint32_t code_point;
	if (!DecodeUTF16(&code_point)) {
	ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
	return false;
	}
	string.Append(code_point);
	break;
	}
	case '"':
	string.Append('"');
	break;
	case '\\':
	string.Append('\\');
	break;
	case '/':
	string.Append('/');
	break;
	case 'b':
	string.Append('\b');
	break;
	case 'f':
	string.Append('\f');
	break;
	case 'n':
	string.Append('\n');
	break;
	case 'r':
	string.Append('\r');
	break;
	case 't':
	string.Append('\t');
	break;
	case 'v': // Not listed as valid escape sequence in the RFC.
	string.Append('\v');
	break;
	// All other escape squences are illegal.
	default:
	ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
	return false;
	}
	}
	}

	ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
	return false;
	}

	// Entry is at the first X in \uXXXX.
	bool JSONParser::DecodeUTF16(uint32_t* out_code_point) {
	std::optional<std::string_view> escape_sequence = ConsumeChars(4);
	if (!escape_sequence)
	return false;

	// Consume the UTF-16 code unit, which may be a high surrogate.
	int code_unit16_high = 0;
	if (!HexStringToInt(*escape_sequence, &code_unit16_high))
	return false;

	// If this is a high surrogate, consume the next code unit to get the
	// low surrogate.
	if (CBU16_IS_SURROGATE(code_unit16_high)) {
	// Make sure this is the high surrogate. If not, it's an encoding
	// error.
	if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high))
	return false;

	// Make sure that the token has more characters to consume the
	// lower surrogate.
	if (!ConsumeIfMatch("\\u"))
	return false;

	escape_sequence = ConsumeChars(4);
	if (!escape_sequence)
	return false;

	int code_unit16_low = 0;
	if (!HexStringToInt(*escape_sequence, &code_unit16_low))
	return false;

	if (!CBU16_IS_TRAIL(code_unit16_low))
	return false;

	uint32_t code_point =
	CBU16_GET_SUPPLEMENTARY(code_unit16_high, code_unit16_low);
	if (!IsValidCharacter(code_point))
	return false;

	*out_code_point = code_point;
	} else {
	// Not a surrogate.
	DCHECK(CBU16_IS_SINGLE(code_unit16_high));
	if (!IsValidCharacter(code_unit16_high)) {
	if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0) {
	return false;
	}
	*out_code_point = kUnicodeReplacementPoint;
	return true;
	}

	*out_code_point = code_unit16_high;
	}

	return true;
	}

	std::optional<Value> JSONParser::ConsumeNumber() {
	const char* num_start = pos();
	const int start_index = index_;
	int end_index = start_index;

	if (PeekChar() == '-')
	ConsumeChar();

	if (!ReadInt(false)) {
	ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
	return std::nullopt;
	}
	end_index = index_;

	// The optional fraction part.
	if (PeekChar() == '.') {
	ConsumeChar();
	if (!ReadInt(true)) {
	ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
	return std::nullopt;
	}
	end_index = index_;
	}

	// Optional exponent part.
	std::optional<char> c = PeekChar();
	if (c == 'e' \|\| c == 'E') {
	ConsumeChar();
	if (PeekChar() == '-' \|\| PeekChar() == '+') {
	ConsumeChar();
	}
	if (!ReadInt(true)) {
	ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
	return std::nullopt;
	}
	end_index = index_;
	}

	// ReadInt is greedy because numbers have no easily detectable sentinel,
	// so save off where the parser should be on exit (see Consume invariant at
	// the top of the header), then make sure the next token is one which is
	// valid.
	int exit_index = index_;

	switch (GetNextToken()) {
	case T_OBJECT_END:
	case T_ARRAY_END:
	case T_LIST_SEPARATOR:
	case T_END_OF_INPUT:
	break;
	default:
	ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
	return std::nullopt;
	}

	index_ = exit_index;

	std::string_view num_string(num_start, end_index - start_index);

	int num_int;
	if (StringToInt(num_string, &num_int))
	return Value(num_int);

	return std::nullopt;
	}

	bool JSONParser::ReadInt(bool allow_leading_zeros) {
	size_t len = 0;
	char first = 0;

	while (std::optional<char> c = PeekChar()) {
	if (!IsAsciiDigit(c))
	break;

	if (len == 0)
	first = *c;

	++len;
	ConsumeChar();
	}

	if (len == 0)
	return false;

	if (!allow_leading_zeros && len > 1 && first == '0')
	return false;

	return true;
	}

	std::optional<Value> JSONParser::ConsumeLiteral() {
	if (ConsumeIfMatch("true")) {
	return Value(true);
	} else if (ConsumeIfMatch("false")) {
	return Value(false);
	} else if (ConsumeIfMatch("null")) {
	return Value(Value::Type::NONE);
	} else {
	ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
	return std::nullopt;
	}
	}

	bool JSONParser::ConsumeIfMatch(std::string_view match) {
	if (match == PeekChars(match.size())) {
	ConsumeChars(match.size());
	return true;
	}
	return false;
	}

	void JSONParser::ReportError(JSONReader::JsonParseError code,
	int column_adjust) {
	error_code_ = code;
	error_line_ = line_number_;
	error_column_ = index_ - index_last_line_ + column_adjust;
	}

	// static
	std::string JSONParser::FormatErrorMessage(int line,
	int column,
	const std::string& description) {
	if (line \|\| column) {
	return StringPrintf("Line: %i, column: %i, %s", line, column,
	description.c_str());
	}
	return description;
	}

	} // namespace internal
	} // namespace base