Blame - base/json/json_parser.cc - gn

blob: df02829e88342ce16fd2c06d8bbc2a2a03093293 [file] [log] [blame]

Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	1	// Copyright (c) 2012 The Chromium Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style license that can be
				3	// found in the LICENSE file.
				4
				5	#include "base/json/json_parser.h"
				6
				7	#include <cmath>
Brett Wilson	ad9e442	2019-09-07 13:33:06 -0700	[diff] [blame]	8	#include <string_view>
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	9	#include <utility>
				10	#include <vector>
				11
				12	#include "base/logging.h"
				13	#include "base/macros.h"
				14	#include "base/numerics/safe_conversions.h"
				15	#include "base/strings/string_number_conversions.h"
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	16	#include "base/strings/string_util.h"
				17	#include "base/strings/stringprintf.h"
				18	#include "base/strings/utf_string_conversion_utils.h"
				19	#include "base/strings/utf_string_conversions.h"
				20	#include "base/third_party/icu/icu_utf.h"
				21	#include "base/values.h"
				22
				23	namespace base {
				24	namespace internal {
				25
				26	namespace {
				27
				28	const int32_t kExtendedASCIIStart = 0x80;
				29
				30	// Simple class that checks for maximum recursion/"stack overflow."
				31	class StackMarker {
				32	public:
				33	StackMarker(int max_depth, int* depth)
				34	: max_depth_(max_depth), depth_(depth) {
				35	++(*depth_);
				36	DCHECK_LE(*depth_, max_depth_);
				37	}
Scott Graham	98cd3ca	2018-06-14 22:26:55 -0700	[diff] [blame]	38	~StackMarker() { --(*depth_); }
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	39
				40	bool IsTooDeep() const { return *depth_ >= max_depth_; }
				41
				42	private:
				43	const int max_depth_;
				44	int* const depth_;
				45
				46	DISALLOW_COPY_AND_ASSIGN(StackMarker);
				47	};
				48
				49	constexpr uint32_t kUnicodeReplacementPoint = 0xFFFD;
				50
				51	} // namespace
				52
				53	// This is U+FFFD.
				54	const char kUnicodeReplacementString[] = "\xEF\xBF\xBD";
				55
				56	JSONParser::JSONParser(int options, int max_depth)
				57	: options_(options),
				58	max_depth_(max_depth),
				59	index_(0),
				60	stack_depth_(0),
				61	line_number_(0),
				62	index_last_line_(0),
				63	error_code_(JSONReader::JSON_NO_ERROR),
				64	error_line_(0),
				65	error_column_(0) {
				66	CHECK_LE(max_depth, JSONReader::kStackMaxDepth);
				67	}
				68
				69	JSONParser::~JSONParser() = default;
				70
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	71	std::optional<Value> JSONParser::Parse(std::string_view input) {
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	72	input_ = input;
				73	index_ = 0;
				74	line_number_ = 1;
				75	index_last_line_ = 0;
				76
				77	error_code_ = JSONReader::JSON_NO_ERROR;
				78	error_line_ = 0;
				79	error_column_ = 0;
				80
				81	// ICU and ReadUnicodeCharacter() use int32_t for lengths, so ensure
				82	// that the index_ will not overflow when parsing.
				83	if (!base::IsValueInRangeForNumericType<int32_t>(input.length())) {
				84	ReportError(JSONReader::JSON_TOO_LARGE, 0);
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	85	return std::nullopt;
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	86	}
				87
				88	// When the input JSON string starts with a UTF-8 Byte-Order-Mark,
				89	// advance the start position to avoid the ParseNextToken function mis-
				90	// treating a Unicode BOM as an invalid character and returning NULL.
				91	ConsumeIfMatch("\xEF\xBB\xBF");
				92
				93	// Parse the first and any nested tokens.
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	94	std::optional<Value> root(ParseNextToken());
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	95	if (!root)
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	96	return std::nullopt;
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	97
				98	// Make sure the input stream is at an end.
				99	if (GetNextToken() != T_END_OF_INPUT) {
				100	ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1);
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	101	return std::nullopt;
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	102	}
				103
				104	return root;
				105	}
				106
				107	JSONReader::JsonParseError JSONParser::error_code() const {
				108	return error_code_;
				109	}
				110
				111	std::string JSONParser::GetErrorMessage() const {
				112	return FormatErrorMessage(error_line_, error_column_,
Scott Graham	98cd3ca	2018-06-14 22:26:55 -0700	[diff] [blame]	113	JSONReader::ErrorCodeToString(error_code_));
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	114	}
				115
				116	int JSONParser::error_line() const {
				117	return error_line_;
				118	}
				119
				120	int JSONParser::error_column() const {
				121	return error_column_;
				122	}
				123
				124	// StringBuilder ///////////////////////////////////////////////////////////////
				125
				126	JSONParser::StringBuilder::StringBuilder() : StringBuilder(nullptr) {}
				127
				128	JSONParser::StringBuilder::StringBuilder(const char* pos)
				129	: pos_(pos), length_(0) {}
				130
				131	JSONParser::StringBuilder::~StringBuilder() = default;
				132
				133	JSONParser::StringBuilder& JSONParser::StringBuilder::operator=(
				134	StringBuilder&& other) = default;
				135
				136	void JSONParser::StringBuilder::Append(uint32_t point) {
				137	DCHECK(IsValidCharacter(point));
				138
				139	if (point < kExtendedASCIIStart && !string_) {
				140	DCHECK_EQ(static_cast<char>(point), pos_[length_]);
				141	++length_;
				142	} else {
				143	Convert();
				144	if (UNLIKELY(point == kUnicodeReplacementPoint)) {
				145	string_->append(kUnicodeReplacementString);
				146	} else {
				147	WriteUnicodeCharacter(point, &*string_);
				148	}
				149	}
				150	}
				151
				152	void JSONParser::StringBuilder::Convert() {
				153	if (string_)
				154	return;
				155	string_.emplace(pos_, length_);
				156	}
				157
				158	std::string JSONParser::StringBuilder::DestructiveAsString() {
				159	if (string_)
				160	return std::move(*string_);
				161	return std::string(pos_, length_);
				162	}
				163
				164	// JSONParser private //////////////////////////////////////////////////////////
				165
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	166	std::optional<std::string_view> JSONParser::PeekChars(int count) {
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	167	if (static_cast<size_t>(index_) + count > input_.length())
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	168	return std::nullopt;
Brett Wilson	ad9e442	2019-09-07 13:33:06 -0700	[diff] [blame]	169	// Using std::string_view::substr() is significantly slower (according to
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	170	// base_perftests) than constructing a substring manually.
Brett Wilson	ad9e442	2019-09-07 13:33:06 -0700	[diff] [blame]	171	return std::string_view(input_.data() + index_, count);
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	172	}
				173
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	174	std::optional<char> JSONParser::PeekChar() {
				175	std::optional<std::string_view> chars = PeekChars(1);
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	176	if (chars)
				177	return (*chars)[0];
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	178	return std::nullopt;
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	179	}
				180
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	181	std::optional<std::string_view> JSONParser::ConsumeChars(int count) {
				182	std::optional<std::string_view> chars = PeekChars(count);
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	183	if (chars)
				184	index_ += count;
				185	return chars;
				186	}
				187
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	188	std::optional<char> JSONParser::ConsumeChar() {
				189	std::optional<std::string_view> chars = ConsumeChars(1);
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	190	if (chars)
				191	return (*chars)[0];
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	192	return std::nullopt;
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	193	}
				194
				195	const char* JSONParser::pos() {
				196	CHECK_LE(static_cast<size_t>(index_), input_.length());
				197	return input_.data() + index_;
				198	}
				199
				200	JSONParser::Token JSONParser::GetNextToken() {
				201	EatWhitespaceAndComments();
				202
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	203	std::optional<char> c = PeekChar();
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	204	if (!c)
				205	return T_END_OF_INPUT;
				206
				207	switch (*c) {
				208	case '{':
				209	return T_OBJECT_BEGIN;
				210	case '}':
				211	return T_OBJECT_END;
				212	case '[':
				213	return T_ARRAY_BEGIN;
				214	case ']':
				215	return T_ARRAY_END;
				216	case '"':
				217	return T_STRING;
				218	case '0':
				219	case '1':
				220	case '2':
				221	case '3':
				222	case '4':
				223	case '5':
				224	case '6':
				225	case '7':
				226	case '8':
				227	case '9':
				228	case '-':
				229	return T_NUMBER;
				230	case 't':
				231	return T_BOOL_TRUE;
				232	case 'f':
				233	return T_BOOL_FALSE;
				234	case 'n':
				235	return T_NULL;
				236	case ',':
				237	return T_LIST_SEPARATOR;
				238	case ':':
				239	return T_OBJECT_PAIR_SEPARATOR;
				240	default:
				241	return T_INVALID_TOKEN;
				242	}
				243	}
				244
				245	void JSONParser::EatWhitespaceAndComments() {
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	246	while (std::optional<char> c = PeekChar()) {
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	247	switch (*c) {
				248	case '\r':
				249	case '\n':
				250	index_last_line_ = index_;
				251	// Don't increment line_number_ twice for "\r\n".
				252	if (!(c == '\n' && index_ > 0 && input_[index_ - 1] == '\r')) {
				253	++line_number_;
				254	}
				255	FALLTHROUGH;
				256	case ' ':
				257	case '\t':
				258	ConsumeChar();
				259	break;
				260	case '/':
				261	if (!EatComment())
				262	return;
				263	break;
				264	default:
				265	return;
				266	}
				267	}
				268	}
				269
				270	bool JSONParser::EatComment() {
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	271	std::optional<std::string_view> comment_start = ConsumeChars(2);
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	272	if (!comment_start)
				273	return false;
				274
				275	if (comment_start == "//") {
				276	// Single line comment, read to newline.
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	277	while (std::optional<char> c = PeekChar()) {
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	278	if (c == '\n' \|\| c == '\r')
				279	return true;
				280	ConsumeChar();
				281	}
				282	} else if (comment_start == "/*") {
				283	char previous_char = '\0';
				284	// Block comment, read until end marker.
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	285	while (std::optional<char> c = PeekChar()) {
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	286	if (previous_char == '*' && c == '/') {
				287	// EatWhitespaceAndComments will inspect pos(), which will still be on
				288	// the last / of the comment, so advance once more (which may also be
				289	// end of input).
				290	ConsumeChar();
				291	return true;
				292	}
				293	previous_char = *ConsumeChar();
				294	}
				295
				296	// If the comment is unterminated, GetNextToken will report T_END_OF_INPUT.
				297	}
				298
				299	return false;
				300	}
				301
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	302	std::optional<Value> JSONParser::ParseNextToken() {
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	303	return ParseToken(GetNextToken());
				304	}
				305
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	306	std::optional<Value> JSONParser::ParseToken(Token token) {
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	307	switch (token) {
				308	case T_OBJECT_BEGIN:
				309	return ConsumeDictionary();
				310	case T_ARRAY_BEGIN:
				311	return ConsumeList();
				312	case T_STRING:
				313	return ConsumeString();
				314	case T_NUMBER:
				315	return ConsumeNumber();
				316	case T_BOOL_TRUE:
				317	case T_BOOL_FALSE:
				318	case T_NULL:
				319	return ConsumeLiteral();
				320	default:
				321	ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	322	return std::nullopt;
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	323	}
				324	}
				325
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	326	std::optional<Value> JSONParser::ConsumeDictionary() {
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	327	if (ConsumeChar() != '{') {
				328	ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	329	return std::nullopt;
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	330	}
				331
				332	StackMarker depth_check(max_depth_, &stack_depth_);
				333	if (depth_check.IsTooDeep()) {
				334	ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 0);
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	335	return std::nullopt;
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	336	}
				337
				338	std::vector<Value::DictStorage::value_type> dict_storage;
				339
				340	Token token = GetNextToken();
				341	while (token != T_OBJECT_END) {
				342	if (token != T_STRING) {
				343	ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1);
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	344	return std::nullopt;
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	345	}
				346
				347	// First consume the key.
				348	StringBuilder key;
				349	if (!ConsumeStringRaw(&key)) {
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	350	return std::nullopt;
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	351	}
				352
				353	// Read the separator.
				354	token = GetNextToken();
				355	if (token != T_OBJECT_PAIR_SEPARATOR) {
				356	ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	357	return std::nullopt;
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	358	}
				359
				360	// The next token is the value. Ownership transfers to \|dict\|.
				361	ConsumeChar();
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	362	std::optional<Value> value = ParseNextToken();
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	363	if (!value) {
				364	// ReportError from deeper level.
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	365	return std::nullopt;
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	366	}
				367
				368	dict_storage.emplace_back(key.DestructiveAsString(),
				369	std::make_unique<Value>(std::move(*value)));
				370
				371	token = GetNextToken();
				372	if (token == T_LIST_SEPARATOR) {
				373	ConsumeChar();
				374	token = GetNextToken();
				375	if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
				376	ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	377	return std::nullopt;
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	378	}
				379	} else if (token != T_OBJECT_END) {
				380	ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	381	return std::nullopt;
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	382	}
				383	}
				384
				385	ConsumeChar(); // Closing '}'.
				386
				387	return Value(Value::DictStorage(std::move(dict_storage), KEEP_LAST_OF_DUPES));
				388	}
				389
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	390	std::optional<Value> JSONParser::ConsumeList() {
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	391	if (ConsumeChar() != '[') {
				392	ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	393	return std::nullopt;
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	394	}
				395
				396	StackMarker depth_check(max_depth_, &stack_depth_);
				397	if (depth_check.IsTooDeep()) {
				398	ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 0);
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	399	return std::nullopt;
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	400	}
				401
				402	Value::ListStorage list_storage;
				403
				404	Token token = GetNextToken();
				405	while (token != T_ARRAY_END) {
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	406	std::optional<Value> item = ParseToken(token);
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	407	if (!item) {
				408	// ReportError from deeper level.
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	409	return std::nullopt;
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	410	}
				411
				412	list_storage.push_back(std::move(*item));
				413
				414	token = GetNextToken();
				415	if (token == T_LIST_SEPARATOR) {
				416	ConsumeChar();
				417	token = GetNextToken();
				418	if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
				419	ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	420	return std::nullopt;
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	421	}
				422	} else if (token != T_ARRAY_END) {
				423	ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	424	return std::nullopt;
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	425	}
				426	}
				427
				428	ConsumeChar(); // Closing ']'.
				429
				430	return Value(std::move(list_storage));
				431	}
				432
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	433	std::optional<Value> JSONParser::ConsumeString() {
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	434	StringBuilder string;
				435	if (!ConsumeStringRaw(&string))
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	436	return std::nullopt;
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	437
				438	return Value(string.DestructiveAsString());
				439	}
				440
				441	bool JSONParser::ConsumeStringRaw(StringBuilder* out) {
				442	if (ConsumeChar() != '"') {
				443	ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
				444	return false;
				445	}
				446
Brett Wilson	ad9e442	2019-09-07 13:33:06 -0700	[diff] [blame]	447	// StringBuilder will internally build a std::string_view unless a UTF-16
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	448	// conversion occurs, at which point it will perform a copy into a
				449	// std::string.
				450	StringBuilder string(pos());
				451
				452	while (PeekChar()) {
				453	uint32_t next_char = 0;
				454	if (!ReadUnicodeCharacter(input_.data(),
Scott Graham	98cd3ca	2018-06-14 22:26:55 -0700	[diff] [blame]	455	static_cast<int32_t>(input_.length()), &index_,
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	456	&next_char) \|\|
				457	!IsValidCharacter(next_char)) {
				458	if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0) {
				459	ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1);
				460	return false;
				461	}
				462	ConsumeChar();
				463	string.Append(kUnicodeReplacementPoint);
				464	continue;
				465	}
				466
				467	if (next_char == '"') {
				468	ConsumeChar();
				469	*out = std::move(string);
				470	return true;
				471	} else if (next_char != '\\') {
				472	// If this character is not an escape sequence...
				473	ConsumeChar();
				474	string.Append(next_char);
				475	} else {
				476	// And if it is an escape sequence, the input string will be adjusted
				477	// (either by combining the two characters of an encoded escape sequence,
Brett Wilson	ad9e442	2019-09-07 13:33:06 -0700	[diff] [blame]	478	// or with a UTF conversion), so using std::string_view isn't possible --
				479	// force a conversion.
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	480	string.Convert();
				481
				482	// Read past the escape '\' and ensure there's a character following.
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	483	std::optional<std::string_view> escape_sequence = ConsumeChars(2);
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	484	if (!escape_sequence) {
				485	ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
				486	return false;
				487	}
				488
				489	switch ((*escape_sequence)[1]) {
				490	// Allowed esape sequences:
				491	case 'x': { // UTF-8 sequence.
				492	// UTF-8 \x escape sequences are not allowed in the spec, but they
				493	// are supported here for backwards-compatiblity with the old parser.
				494	escape_sequence = ConsumeChars(2);
				495	if (!escape_sequence) {
				496	ReportError(JSONReader::JSON_INVALID_ESCAPE, -2);
				497	return false;
				498	}
				499
				500	int hex_digit = 0;
				501	if (!HexStringToInt(*escape_sequence, &hex_digit) \|\|
				502	!IsValidCharacter(hex_digit)) {
				503	ReportError(JSONReader::JSON_INVALID_ESCAPE, -2);
				504	return false;
				505	}
				506
				507	string.Append(hex_digit);
				508	break;
				509	}
				510	case 'u': { // UTF-16 sequence.
				511	// UTF units are of the form \uXXXX.
				512	uint32_t code_point;
				513	if (!DecodeUTF16(&code_point)) {
				514	ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
				515	return false;
				516	}
				517	string.Append(code_point);
				518	break;
				519	}
				520	case '"':
				521	string.Append('"');
				522	break;
				523	case '\\':
				524	string.Append('\\');
				525	break;
				526	case '/':
				527	string.Append('/');
				528	break;
				529	case 'b':
				530	string.Append('\b');
				531	break;
				532	case 'f':
				533	string.Append('\f');
				534	break;
				535	case 'n':
				536	string.Append('\n');
				537	break;
				538	case 'r':
				539	string.Append('\r');
				540	break;
				541	case 't':
				542	string.Append('\t');
				543	break;
				544	case 'v': // Not listed as valid escape sequence in the RFC.
				545	string.Append('\v');
				546	break;
				547	// All other escape squences are illegal.
				548	default:
				549	ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
				550	return false;
				551	}
				552	}
				553	}
				554
				555	ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
				556	return false;
				557	}
				558
				559	// Entry is at the first X in \uXXXX.
				560	bool JSONParser::DecodeUTF16(uint32_t* out_code_point) {
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	561	std::optional<std::string_view> escape_sequence = ConsumeChars(4);
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	562	if (!escape_sequence)
				563	return false;
				564
				565	// Consume the UTF-16 code unit, which may be a high surrogate.
				566	int code_unit16_high = 0;
				567	if (!HexStringToInt(*escape_sequence, &code_unit16_high))
				568	return false;
				569
				570	// If this is a high surrogate, consume the next code unit to get the
				571	// low surrogate.
				572	if (CBU16_IS_SURROGATE(code_unit16_high)) {
				573	// Make sure this is the high surrogate. If not, it's an encoding
				574	// error.
				575	if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high))
				576	return false;
				577
				578	// Make sure that the token has more characters to consume the
				579	// lower surrogate.
				580	if (!ConsumeIfMatch("\\u"))
				581	return false;
				582
				583	escape_sequence = ConsumeChars(4);
				584	if (!escape_sequence)
				585	return false;
				586
				587	int code_unit16_low = 0;
				588	if (!HexStringToInt(*escape_sequence, &code_unit16_low))
				589	return false;
				590
				591	if (!CBU16_IS_TRAIL(code_unit16_low))
				592	return false;
				593
				594	uint32_t code_point =
				595	CBU16_GET_SUPPLEMENTARY(code_unit16_high, code_unit16_low);
				596	if (!IsValidCharacter(code_point))
				597	return false;
				598
				599	*out_code_point = code_point;
				600	} else {
				601	// Not a surrogate.
				602	DCHECK(CBU16_IS_SINGLE(code_unit16_high));
				603	if (!IsValidCharacter(code_unit16_high)) {
				604	if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0) {
				605	return false;
				606	}
				607	*out_code_point = kUnicodeReplacementPoint;
				608	return true;
				609	}
				610
				611	*out_code_point = code_unit16_high;
				612	}
				613
				614	return true;
				615	}
				616
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	617	std::optional<Value> JSONParser::ConsumeNumber() {
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	618	const char* num_start = pos();
				619	const int start_index = index_;
				620	int end_index = start_index;
				621
				622	if (PeekChar() == '-')
				623	ConsumeChar();
				624
				625	if (!ReadInt(false)) {
				626	ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	627	return std::nullopt;
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	628	}
				629	end_index = index_;
				630
				631	// The optional fraction part.
				632	if (PeekChar() == '.') {
				633	ConsumeChar();
				634	if (!ReadInt(true)) {
				635	ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	636	return std::nullopt;
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	637	}
				638	end_index = index_;
				639	}
				640
				641	// Optional exponent part.
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	642	std::optional<char> c = PeekChar();
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	643	if (c == 'e' \|\| c == 'E') {
				644	ConsumeChar();
				645	if (PeekChar() == '-' \|\| PeekChar() == '+') {
				646	ConsumeChar();
				647	}
				648	if (!ReadInt(true)) {
				649	ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	650	return std::nullopt;
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	651	}
				652	end_index = index_;
				653	}
				654
				655	// ReadInt is greedy because numbers have no easily detectable sentinel,
				656	// so save off where the parser should be on exit (see Consume invariant at
				657	// the top of the header), then make sure the next token is one which is
				658	// valid.
				659	int exit_index = index_;
				660
				661	switch (GetNextToken()) {
				662	case T_OBJECT_END:
				663	case T_ARRAY_END:
				664	case T_LIST_SEPARATOR:
				665	case T_END_OF_INPUT:
				666	break;
				667	default:
				668	ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	669	return std::nullopt;
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	670	}
				671
				672	index_ = exit_index;
				673
Brett Wilson	ad9e442	2019-09-07 13:33:06 -0700	[diff] [blame]	674	std::string_view num_string(num_start, end_index - start_index);
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	675
				676	int num_int;
				677	if (StringToInt(num_string, &num_int))
				678	return Value(num_int);
				679
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	680	return std::nullopt;
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	681	}
				682
				683	bool JSONParser::ReadInt(bool allow_leading_zeros) {
				684	size_t len = 0;
				685	char first = 0;
				686
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	687	while (std::optional<char> c = PeekChar()) {
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	688	if (!IsAsciiDigit(c))
				689	break;
				690
				691	if (len == 0)
				692	first = *c;
				693
				694	++len;
				695	ConsumeChar();
				696	}
				697
				698	if (len == 0)
				699	return false;
				700
				701	if (!allow_leading_zeros && len > 1 && first == '0')
				702	return false;
				703
				704	return true;
				705	}
				706
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	707	std::optional<Value> JSONParser::ConsumeLiteral() {
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	708	if (ConsumeIfMatch("true")) {
				709	return Value(true);
				710	} else if (ConsumeIfMatch("false")) {
				711	return Value(false);
				712	} else if (ConsumeIfMatch("null")) {
				713	return Value(Value::Type::NONE);
				714	} else {
				715	ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
Brett Wilson	572ba24	2019-09-09 16:32:59 -0700	[diff] [blame]	716	return std::nullopt;
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	717	}
				718	}
				719
Brett Wilson	ad9e442	2019-09-07 13:33:06 -0700	[diff] [blame]	720	bool JSONParser::ConsumeIfMatch(std::string_view match) {
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	721	if (match == PeekChars(match.size())) {
				722	ConsumeChars(match.size());
				723	return true;
				724	}
				725	return false;
				726	}
				727
				728	void JSONParser::ReportError(JSONReader::JsonParseError code,
				729	int column_adjust) {
				730	error_code_ = code;
				731	error_line_ = line_number_;
				732	error_column_ = index_ - index_last_line_ + column_adjust;
				733	}
				734
				735	// static
Scott Graham	98cd3ca	2018-06-14 22:26:55 -0700	[diff] [blame]	736	std::string JSONParser::FormatErrorMessage(int line,
				737	int column,
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	738	const std::string& description) {
				739	if (line \|\| column) {
Scott Graham	98cd3ca	2018-06-14 22:26:55 -0700	[diff] [blame]	740	return StringPrintf("Line: %i, column: %i, %s", line, column,
				741	description.c_str());
Scott Graham	6696211	2018-06-08 12:42:08 -0700	[diff] [blame]	742	}
				743	return description;
				744	}
				745
				746	} // namespace internal
				747	} // namespace base