blob: df02829e88342ce16fd2c06d8bbc2a2a03093293 [file] [log] [blame]
Scott Graham66962112018-06-08 12:42:08 -07001// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/json/json_parser.h"
6
7#include <cmath>
Brett Wilsonad9e4422019-09-07 13:33:06 -07008#include <string_view>
Scott Graham66962112018-06-08 12:42:08 -07009#include <utility>
10#include <vector>
11
12#include "base/logging.h"
13#include "base/macros.h"
14#include "base/numerics/safe_conversions.h"
15#include "base/strings/string_number_conversions.h"
Scott Graham66962112018-06-08 12:42:08 -070016#include "base/strings/string_util.h"
17#include "base/strings/stringprintf.h"
18#include "base/strings/utf_string_conversion_utils.h"
19#include "base/strings/utf_string_conversions.h"
20#include "base/third_party/icu/icu_utf.h"
21#include "base/values.h"
22
23namespace base {
24namespace internal {
25
26namespace {
27
28const int32_t kExtendedASCIIStart = 0x80;
29
30// Simple class that checks for maximum recursion/"stack overflow."
31class StackMarker {
32 public:
33 StackMarker(int max_depth, int* depth)
34 : max_depth_(max_depth), depth_(depth) {
35 ++(*depth_);
36 DCHECK_LE(*depth_, max_depth_);
37 }
Scott Graham98cd3ca2018-06-14 22:26:55 -070038 ~StackMarker() { --(*depth_); }
Scott Graham66962112018-06-08 12:42:08 -070039
40 bool IsTooDeep() const { return *depth_ >= max_depth_; }
41
42 private:
43 const int max_depth_;
44 int* const depth_;
45
46 DISALLOW_COPY_AND_ASSIGN(StackMarker);
47};
48
49constexpr uint32_t kUnicodeReplacementPoint = 0xFFFD;
50
51} // namespace
52
53// This is U+FFFD.
54const char kUnicodeReplacementString[] = "\xEF\xBF\xBD";
55
56JSONParser::JSONParser(int options, int max_depth)
57 : options_(options),
58 max_depth_(max_depth),
59 index_(0),
60 stack_depth_(0),
61 line_number_(0),
62 index_last_line_(0),
63 error_code_(JSONReader::JSON_NO_ERROR),
64 error_line_(0),
65 error_column_(0) {
66 CHECK_LE(max_depth, JSONReader::kStackMaxDepth);
67}
68
69JSONParser::~JSONParser() = default;
70
Brett Wilson572ba242019-09-09 16:32:59 -070071std::optional<Value> JSONParser::Parse(std::string_view input) {
Scott Graham66962112018-06-08 12:42:08 -070072 input_ = input;
73 index_ = 0;
74 line_number_ = 1;
75 index_last_line_ = 0;
76
77 error_code_ = JSONReader::JSON_NO_ERROR;
78 error_line_ = 0;
79 error_column_ = 0;
80
81 // ICU and ReadUnicodeCharacter() use int32_t for lengths, so ensure
82 // that the index_ will not overflow when parsing.
83 if (!base::IsValueInRangeForNumericType<int32_t>(input.length())) {
84 ReportError(JSONReader::JSON_TOO_LARGE, 0);
Brett Wilson572ba242019-09-09 16:32:59 -070085 return std::nullopt;
Scott Graham66962112018-06-08 12:42:08 -070086 }
87
88 // When the input JSON string starts with a UTF-8 Byte-Order-Mark,
89 // advance the start position to avoid the ParseNextToken function mis-
90 // treating a Unicode BOM as an invalid character and returning NULL.
91 ConsumeIfMatch("\xEF\xBB\xBF");
92
93 // Parse the first and any nested tokens.
Brett Wilson572ba242019-09-09 16:32:59 -070094 std::optional<Value> root(ParseNextToken());
Scott Graham66962112018-06-08 12:42:08 -070095 if (!root)
Brett Wilson572ba242019-09-09 16:32:59 -070096 return std::nullopt;
Scott Graham66962112018-06-08 12:42:08 -070097
98 // Make sure the input stream is at an end.
99 if (GetNextToken() != T_END_OF_INPUT) {
100 ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1);
Brett Wilson572ba242019-09-09 16:32:59 -0700101 return std::nullopt;
Scott Graham66962112018-06-08 12:42:08 -0700102 }
103
104 return root;
105}
106
107JSONReader::JsonParseError JSONParser::error_code() const {
108 return error_code_;
109}
110
111std::string JSONParser::GetErrorMessage() const {
112 return FormatErrorMessage(error_line_, error_column_,
Scott Graham98cd3ca2018-06-14 22:26:55 -0700113 JSONReader::ErrorCodeToString(error_code_));
Scott Graham66962112018-06-08 12:42:08 -0700114}
115
116int JSONParser::error_line() const {
117 return error_line_;
118}
119
120int JSONParser::error_column() const {
121 return error_column_;
122}
123
124// StringBuilder ///////////////////////////////////////////////////////////////
125
126JSONParser::StringBuilder::StringBuilder() : StringBuilder(nullptr) {}
127
128JSONParser::StringBuilder::StringBuilder(const char* pos)
129 : pos_(pos), length_(0) {}
130
131JSONParser::StringBuilder::~StringBuilder() = default;
132
133JSONParser::StringBuilder& JSONParser::StringBuilder::operator=(
134 StringBuilder&& other) = default;
135
136void JSONParser::StringBuilder::Append(uint32_t point) {
137 DCHECK(IsValidCharacter(point));
138
139 if (point < kExtendedASCIIStart && !string_) {
140 DCHECK_EQ(static_cast<char>(point), pos_[length_]);
141 ++length_;
142 } else {
143 Convert();
144 if (UNLIKELY(point == kUnicodeReplacementPoint)) {
145 string_->append(kUnicodeReplacementString);
146 } else {
147 WriteUnicodeCharacter(point, &*string_);
148 }
149 }
150}
151
152void JSONParser::StringBuilder::Convert() {
153 if (string_)
154 return;
155 string_.emplace(pos_, length_);
156}
157
158std::string JSONParser::StringBuilder::DestructiveAsString() {
159 if (string_)
160 return std::move(*string_);
161 return std::string(pos_, length_);
162}
163
164// JSONParser private //////////////////////////////////////////////////////////
165
Brett Wilson572ba242019-09-09 16:32:59 -0700166std::optional<std::string_view> JSONParser::PeekChars(int count) {
Scott Graham66962112018-06-08 12:42:08 -0700167 if (static_cast<size_t>(index_) + count > input_.length())
Brett Wilson572ba242019-09-09 16:32:59 -0700168 return std::nullopt;
Brett Wilsonad9e4422019-09-07 13:33:06 -0700169 // Using std::string_view::substr() is significantly slower (according to
Scott Graham66962112018-06-08 12:42:08 -0700170 // base_perftests) than constructing a substring manually.
Brett Wilsonad9e4422019-09-07 13:33:06 -0700171 return std::string_view(input_.data() + index_, count);
Scott Graham66962112018-06-08 12:42:08 -0700172}
173
Brett Wilson572ba242019-09-09 16:32:59 -0700174std::optional<char> JSONParser::PeekChar() {
175 std::optional<std::string_view> chars = PeekChars(1);
Scott Graham66962112018-06-08 12:42:08 -0700176 if (chars)
177 return (*chars)[0];
Brett Wilson572ba242019-09-09 16:32:59 -0700178 return std::nullopt;
Scott Graham66962112018-06-08 12:42:08 -0700179}
180
Brett Wilson572ba242019-09-09 16:32:59 -0700181std::optional<std::string_view> JSONParser::ConsumeChars(int count) {
182 std::optional<std::string_view> chars = PeekChars(count);
Scott Graham66962112018-06-08 12:42:08 -0700183 if (chars)
184 index_ += count;
185 return chars;
186}
187
Brett Wilson572ba242019-09-09 16:32:59 -0700188std::optional<char> JSONParser::ConsumeChar() {
189 std::optional<std::string_view> chars = ConsumeChars(1);
Scott Graham66962112018-06-08 12:42:08 -0700190 if (chars)
191 return (*chars)[0];
Brett Wilson572ba242019-09-09 16:32:59 -0700192 return std::nullopt;
Scott Graham66962112018-06-08 12:42:08 -0700193}
194
195const char* JSONParser::pos() {
196 CHECK_LE(static_cast<size_t>(index_), input_.length());
197 return input_.data() + index_;
198}
199
200JSONParser::Token JSONParser::GetNextToken() {
201 EatWhitespaceAndComments();
202
Brett Wilson572ba242019-09-09 16:32:59 -0700203 std::optional<char> c = PeekChar();
Scott Graham66962112018-06-08 12:42:08 -0700204 if (!c)
205 return T_END_OF_INPUT;
206
207 switch (*c) {
208 case '{':
209 return T_OBJECT_BEGIN;
210 case '}':
211 return T_OBJECT_END;
212 case '[':
213 return T_ARRAY_BEGIN;
214 case ']':
215 return T_ARRAY_END;
216 case '"':
217 return T_STRING;
218 case '0':
219 case '1':
220 case '2':
221 case '3':
222 case '4':
223 case '5':
224 case '6':
225 case '7':
226 case '8':
227 case '9':
228 case '-':
229 return T_NUMBER;
230 case 't':
231 return T_BOOL_TRUE;
232 case 'f':
233 return T_BOOL_FALSE;
234 case 'n':
235 return T_NULL;
236 case ',':
237 return T_LIST_SEPARATOR;
238 case ':':
239 return T_OBJECT_PAIR_SEPARATOR;
240 default:
241 return T_INVALID_TOKEN;
242 }
243}
244
245void JSONParser::EatWhitespaceAndComments() {
Brett Wilson572ba242019-09-09 16:32:59 -0700246 while (std::optional<char> c = PeekChar()) {
Scott Graham66962112018-06-08 12:42:08 -0700247 switch (*c) {
248 case '\r':
249 case '\n':
250 index_last_line_ = index_;
251 // Don't increment line_number_ twice for "\r\n".
252 if (!(c == '\n' && index_ > 0 && input_[index_ - 1] == '\r')) {
253 ++line_number_;
254 }
255 FALLTHROUGH;
256 case ' ':
257 case '\t':
258 ConsumeChar();
259 break;
260 case '/':
261 if (!EatComment())
262 return;
263 break;
264 default:
265 return;
266 }
267 }
268}
269
270bool JSONParser::EatComment() {
Brett Wilson572ba242019-09-09 16:32:59 -0700271 std::optional<std::string_view> comment_start = ConsumeChars(2);
Scott Graham66962112018-06-08 12:42:08 -0700272 if (!comment_start)
273 return false;
274
275 if (comment_start == "//") {
276 // Single line comment, read to newline.
Brett Wilson572ba242019-09-09 16:32:59 -0700277 while (std::optional<char> c = PeekChar()) {
Scott Graham66962112018-06-08 12:42:08 -0700278 if (c == '\n' || c == '\r')
279 return true;
280 ConsumeChar();
281 }
282 } else if (comment_start == "/*") {
283 char previous_char = '\0';
284 // Block comment, read until end marker.
Brett Wilson572ba242019-09-09 16:32:59 -0700285 while (std::optional<char> c = PeekChar()) {
Scott Graham66962112018-06-08 12:42:08 -0700286 if (previous_char == '*' && c == '/') {
287 // EatWhitespaceAndComments will inspect pos(), which will still be on
288 // the last / of the comment, so advance once more (which may also be
289 // end of input).
290 ConsumeChar();
291 return true;
292 }
293 previous_char = *ConsumeChar();
294 }
295
296 // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT.
297 }
298
299 return false;
300}
301
Brett Wilson572ba242019-09-09 16:32:59 -0700302std::optional<Value> JSONParser::ParseNextToken() {
Scott Graham66962112018-06-08 12:42:08 -0700303 return ParseToken(GetNextToken());
304}
305
Brett Wilson572ba242019-09-09 16:32:59 -0700306std::optional<Value> JSONParser::ParseToken(Token token) {
Scott Graham66962112018-06-08 12:42:08 -0700307 switch (token) {
308 case T_OBJECT_BEGIN:
309 return ConsumeDictionary();
310 case T_ARRAY_BEGIN:
311 return ConsumeList();
312 case T_STRING:
313 return ConsumeString();
314 case T_NUMBER:
315 return ConsumeNumber();
316 case T_BOOL_TRUE:
317 case T_BOOL_FALSE:
318 case T_NULL:
319 return ConsumeLiteral();
320 default:
321 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
Brett Wilson572ba242019-09-09 16:32:59 -0700322 return std::nullopt;
Scott Graham66962112018-06-08 12:42:08 -0700323 }
324}
325
Brett Wilson572ba242019-09-09 16:32:59 -0700326std::optional<Value> JSONParser::ConsumeDictionary() {
Scott Graham66962112018-06-08 12:42:08 -0700327 if (ConsumeChar() != '{') {
328 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
Brett Wilson572ba242019-09-09 16:32:59 -0700329 return std::nullopt;
Scott Graham66962112018-06-08 12:42:08 -0700330 }
331
332 StackMarker depth_check(max_depth_, &stack_depth_);
333 if (depth_check.IsTooDeep()) {
334 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 0);
Brett Wilson572ba242019-09-09 16:32:59 -0700335 return std::nullopt;
Scott Graham66962112018-06-08 12:42:08 -0700336 }
337
338 std::vector<Value::DictStorage::value_type> dict_storage;
339
340 Token token = GetNextToken();
341 while (token != T_OBJECT_END) {
342 if (token != T_STRING) {
343 ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1);
Brett Wilson572ba242019-09-09 16:32:59 -0700344 return std::nullopt;
Scott Graham66962112018-06-08 12:42:08 -0700345 }
346
347 // First consume the key.
348 StringBuilder key;
349 if (!ConsumeStringRaw(&key)) {
Brett Wilson572ba242019-09-09 16:32:59 -0700350 return std::nullopt;
Scott Graham66962112018-06-08 12:42:08 -0700351 }
352
353 // Read the separator.
354 token = GetNextToken();
355 if (token != T_OBJECT_PAIR_SEPARATOR) {
356 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
Brett Wilson572ba242019-09-09 16:32:59 -0700357 return std::nullopt;
Scott Graham66962112018-06-08 12:42:08 -0700358 }
359
360 // The next token is the value. Ownership transfers to |dict|.
361 ConsumeChar();
Brett Wilson572ba242019-09-09 16:32:59 -0700362 std::optional<Value> value = ParseNextToken();
Scott Graham66962112018-06-08 12:42:08 -0700363 if (!value) {
364 // ReportError from deeper level.
Brett Wilson572ba242019-09-09 16:32:59 -0700365 return std::nullopt;
Scott Graham66962112018-06-08 12:42:08 -0700366 }
367
368 dict_storage.emplace_back(key.DestructiveAsString(),
369 std::make_unique<Value>(std::move(*value)));
370
371 token = GetNextToken();
372 if (token == T_LIST_SEPARATOR) {
373 ConsumeChar();
374 token = GetNextToken();
375 if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
376 ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
Brett Wilson572ba242019-09-09 16:32:59 -0700377 return std::nullopt;
Scott Graham66962112018-06-08 12:42:08 -0700378 }
379 } else if (token != T_OBJECT_END) {
380 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
Brett Wilson572ba242019-09-09 16:32:59 -0700381 return std::nullopt;
Scott Graham66962112018-06-08 12:42:08 -0700382 }
383 }
384
385 ConsumeChar(); // Closing '}'.
386
387 return Value(Value::DictStorage(std::move(dict_storage), KEEP_LAST_OF_DUPES));
388}
389
Brett Wilson572ba242019-09-09 16:32:59 -0700390std::optional<Value> JSONParser::ConsumeList() {
Scott Graham66962112018-06-08 12:42:08 -0700391 if (ConsumeChar() != '[') {
392 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
Brett Wilson572ba242019-09-09 16:32:59 -0700393 return std::nullopt;
Scott Graham66962112018-06-08 12:42:08 -0700394 }
395
396 StackMarker depth_check(max_depth_, &stack_depth_);
397 if (depth_check.IsTooDeep()) {
398 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 0);
Brett Wilson572ba242019-09-09 16:32:59 -0700399 return std::nullopt;
Scott Graham66962112018-06-08 12:42:08 -0700400 }
401
402 Value::ListStorage list_storage;
403
404 Token token = GetNextToken();
405 while (token != T_ARRAY_END) {
Brett Wilson572ba242019-09-09 16:32:59 -0700406 std::optional<Value> item = ParseToken(token);
Scott Graham66962112018-06-08 12:42:08 -0700407 if (!item) {
408 // ReportError from deeper level.
Brett Wilson572ba242019-09-09 16:32:59 -0700409 return std::nullopt;
Scott Graham66962112018-06-08 12:42:08 -0700410 }
411
412 list_storage.push_back(std::move(*item));
413
414 token = GetNextToken();
415 if (token == T_LIST_SEPARATOR) {
416 ConsumeChar();
417 token = GetNextToken();
418 if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
419 ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
Brett Wilson572ba242019-09-09 16:32:59 -0700420 return std::nullopt;
Scott Graham66962112018-06-08 12:42:08 -0700421 }
422 } else if (token != T_ARRAY_END) {
423 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
Brett Wilson572ba242019-09-09 16:32:59 -0700424 return std::nullopt;
Scott Graham66962112018-06-08 12:42:08 -0700425 }
426 }
427
428 ConsumeChar(); // Closing ']'.
429
430 return Value(std::move(list_storage));
431}
432
Brett Wilson572ba242019-09-09 16:32:59 -0700433std::optional<Value> JSONParser::ConsumeString() {
Scott Graham66962112018-06-08 12:42:08 -0700434 StringBuilder string;
435 if (!ConsumeStringRaw(&string))
Brett Wilson572ba242019-09-09 16:32:59 -0700436 return std::nullopt;
Scott Graham66962112018-06-08 12:42:08 -0700437
438 return Value(string.DestructiveAsString());
439}
440
441bool JSONParser::ConsumeStringRaw(StringBuilder* out) {
442 if (ConsumeChar() != '"') {
443 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
444 return false;
445 }
446
Brett Wilsonad9e4422019-09-07 13:33:06 -0700447 // StringBuilder will internally build a std::string_view unless a UTF-16
Scott Graham66962112018-06-08 12:42:08 -0700448 // conversion occurs, at which point it will perform a copy into a
449 // std::string.
450 StringBuilder string(pos());
451
452 while (PeekChar()) {
453 uint32_t next_char = 0;
454 if (!ReadUnicodeCharacter(input_.data(),
Scott Graham98cd3ca2018-06-14 22:26:55 -0700455 static_cast<int32_t>(input_.length()), &index_,
Scott Graham66962112018-06-08 12:42:08 -0700456 &next_char) ||
457 !IsValidCharacter(next_char)) {
458 if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0) {
459 ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1);
460 return false;
461 }
462 ConsumeChar();
463 string.Append(kUnicodeReplacementPoint);
464 continue;
465 }
466
467 if (next_char == '"') {
468 ConsumeChar();
469 *out = std::move(string);
470 return true;
471 } else if (next_char != '\\') {
472 // If this character is not an escape sequence...
473 ConsumeChar();
474 string.Append(next_char);
475 } else {
476 // And if it is an escape sequence, the input string will be adjusted
477 // (either by combining the two characters of an encoded escape sequence,
Brett Wilsonad9e4422019-09-07 13:33:06 -0700478 // or with a UTF conversion), so using std::string_view isn't possible --
479 // force a conversion.
Scott Graham66962112018-06-08 12:42:08 -0700480 string.Convert();
481
482 // Read past the escape '\' and ensure there's a character following.
Brett Wilson572ba242019-09-09 16:32:59 -0700483 std::optional<std::string_view> escape_sequence = ConsumeChars(2);
Scott Graham66962112018-06-08 12:42:08 -0700484 if (!escape_sequence) {
485 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
486 return false;
487 }
488
489 switch ((*escape_sequence)[1]) {
490 // Allowed esape sequences:
491 case 'x': { // UTF-8 sequence.
492 // UTF-8 \x escape sequences are not allowed in the spec, but they
493 // are supported here for backwards-compatiblity with the old parser.
494 escape_sequence = ConsumeChars(2);
495 if (!escape_sequence) {
496 ReportError(JSONReader::JSON_INVALID_ESCAPE, -2);
497 return false;
498 }
499
500 int hex_digit = 0;
501 if (!HexStringToInt(*escape_sequence, &hex_digit) ||
502 !IsValidCharacter(hex_digit)) {
503 ReportError(JSONReader::JSON_INVALID_ESCAPE, -2);
504 return false;
505 }
506
507 string.Append(hex_digit);
508 break;
509 }
510 case 'u': { // UTF-16 sequence.
511 // UTF units are of the form \uXXXX.
512 uint32_t code_point;
513 if (!DecodeUTF16(&code_point)) {
514 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
515 return false;
516 }
517 string.Append(code_point);
518 break;
519 }
520 case '"':
521 string.Append('"');
522 break;
523 case '\\':
524 string.Append('\\');
525 break;
526 case '/':
527 string.Append('/');
528 break;
529 case 'b':
530 string.Append('\b');
531 break;
532 case 'f':
533 string.Append('\f');
534 break;
535 case 'n':
536 string.Append('\n');
537 break;
538 case 'r':
539 string.Append('\r');
540 break;
541 case 't':
542 string.Append('\t');
543 break;
544 case 'v': // Not listed as valid escape sequence in the RFC.
545 string.Append('\v');
546 break;
547 // All other escape squences are illegal.
548 default:
549 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
550 return false;
551 }
552 }
553 }
554
555 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
556 return false;
557}
558
559// Entry is at the first X in \uXXXX.
560bool JSONParser::DecodeUTF16(uint32_t* out_code_point) {
Brett Wilson572ba242019-09-09 16:32:59 -0700561 std::optional<std::string_view> escape_sequence = ConsumeChars(4);
Scott Graham66962112018-06-08 12:42:08 -0700562 if (!escape_sequence)
563 return false;
564
565 // Consume the UTF-16 code unit, which may be a high surrogate.
566 int code_unit16_high = 0;
567 if (!HexStringToInt(*escape_sequence, &code_unit16_high))
568 return false;
569
570 // If this is a high surrogate, consume the next code unit to get the
571 // low surrogate.
572 if (CBU16_IS_SURROGATE(code_unit16_high)) {
573 // Make sure this is the high surrogate. If not, it's an encoding
574 // error.
575 if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high))
576 return false;
577
578 // Make sure that the token has more characters to consume the
579 // lower surrogate.
580 if (!ConsumeIfMatch("\\u"))
581 return false;
582
583 escape_sequence = ConsumeChars(4);
584 if (!escape_sequence)
585 return false;
586
587 int code_unit16_low = 0;
588 if (!HexStringToInt(*escape_sequence, &code_unit16_low))
589 return false;
590
591 if (!CBU16_IS_TRAIL(code_unit16_low))
592 return false;
593
594 uint32_t code_point =
595 CBU16_GET_SUPPLEMENTARY(code_unit16_high, code_unit16_low);
596 if (!IsValidCharacter(code_point))
597 return false;
598
599 *out_code_point = code_point;
600 } else {
601 // Not a surrogate.
602 DCHECK(CBU16_IS_SINGLE(code_unit16_high));
603 if (!IsValidCharacter(code_unit16_high)) {
604 if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0) {
605 return false;
606 }
607 *out_code_point = kUnicodeReplacementPoint;
608 return true;
609 }
610
611 *out_code_point = code_unit16_high;
612 }
613
614 return true;
615}
616
Brett Wilson572ba242019-09-09 16:32:59 -0700617std::optional<Value> JSONParser::ConsumeNumber() {
Scott Graham66962112018-06-08 12:42:08 -0700618 const char* num_start = pos();
619 const int start_index = index_;
620 int end_index = start_index;
621
622 if (PeekChar() == '-')
623 ConsumeChar();
624
625 if (!ReadInt(false)) {
626 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
Brett Wilson572ba242019-09-09 16:32:59 -0700627 return std::nullopt;
Scott Graham66962112018-06-08 12:42:08 -0700628 }
629 end_index = index_;
630
631 // The optional fraction part.
632 if (PeekChar() == '.') {
633 ConsumeChar();
634 if (!ReadInt(true)) {
635 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
Brett Wilson572ba242019-09-09 16:32:59 -0700636 return std::nullopt;
Scott Graham66962112018-06-08 12:42:08 -0700637 }
638 end_index = index_;
639 }
640
641 // Optional exponent part.
Brett Wilson572ba242019-09-09 16:32:59 -0700642 std::optional<char> c = PeekChar();
Scott Graham66962112018-06-08 12:42:08 -0700643 if (c == 'e' || c == 'E') {
644 ConsumeChar();
645 if (PeekChar() == '-' || PeekChar() == '+') {
646 ConsumeChar();
647 }
648 if (!ReadInt(true)) {
649 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
Brett Wilson572ba242019-09-09 16:32:59 -0700650 return std::nullopt;
Scott Graham66962112018-06-08 12:42:08 -0700651 }
652 end_index = index_;
653 }
654
655 // ReadInt is greedy because numbers have no easily detectable sentinel,
656 // so save off where the parser should be on exit (see Consume invariant at
657 // the top of the header), then make sure the next token is one which is
658 // valid.
659 int exit_index = index_;
660
661 switch (GetNextToken()) {
662 case T_OBJECT_END:
663 case T_ARRAY_END:
664 case T_LIST_SEPARATOR:
665 case T_END_OF_INPUT:
666 break;
667 default:
668 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
Brett Wilson572ba242019-09-09 16:32:59 -0700669 return std::nullopt;
Scott Graham66962112018-06-08 12:42:08 -0700670 }
671
672 index_ = exit_index;
673
Brett Wilsonad9e4422019-09-07 13:33:06 -0700674 std::string_view num_string(num_start, end_index - start_index);
Scott Graham66962112018-06-08 12:42:08 -0700675
676 int num_int;
677 if (StringToInt(num_string, &num_int))
678 return Value(num_int);
679
Brett Wilson572ba242019-09-09 16:32:59 -0700680 return std::nullopt;
Scott Graham66962112018-06-08 12:42:08 -0700681}
682
683bool JSONParser::ReadInt(bool allow_leading_zeros) {
684 size_t len = 0;
685 char first = 0;
686
Brett Wilson572ba242019-09-09 16:32:59 -0700687 while (std::optional<char> c = PeekChar()) {
Scott Graham66962112018-06-08 12:42:08 -0700688 if (!IsAsciiDigit(c))
689 break;
690
691 if (len == 0)
692 first = *c;
693
694 ++len;
695 ConsumeChar();
696 }
697
698 if (len == 0)
699 return false;
700
701 if (!allow_leading_zeros && len > 1 && first == '0')
702 return false;
703
704 return true;
705}
706
Brett Wilson572ba242019-09-09 16:32:59 -0700707std::optional<Value> JSONParser::ConsumeLiteral() {
Scott Graham66962112018-06-08 12:42:08 -0700708 if (ConsumeIfMatch("true")) {
709 return Value(true);
710 } else if (ConsumeIfMatch("false")) {
711 return Value(false);
712 } else if (ConsumeIfMatch("null")) {
713 return Value(Value::Type::NONE);
714 } else {
715 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
Brett Wilson572ba242019-09-09 16:32:59 -0700716 return std::nullopt;
Scott Graham66962112018-06-08 12:42:08 -0700717 }
718}
719
Brett Wilsonad9e4422019-09-07 13:33:06 -0700720bool JSONParser::ConsumeIfMatch(std::string_view match) {
Scott Graham66962112018-06-08 12:42:08 -0700721 if (match == PeekChars(match.size())) {
722 ConsumeChars(match.size());
723 return true;
724 }
725 return false;
726}
727
728void JSONParser::ReportError(JSONReader::JsonParseError code,
729 int column_adjust) {
730 error_code_ = code;
731 error_line_ = line_number_;
732 error_column_ = index_ - index_last_line_ + column_adjust;
733}
734
735// static
Scott Graham98cd3ca2018-06-14 22:26:55 -0700736std::string JSONParser::FormatErrorMessage(int line,
737 int column,
Scott Graham66962112018-06-08 12:42:08 -0700738 const std::string& description) {
739 if (line || column) {
Scott Graham98cd3ca2018-06-14 22:26:55 -0700740 return StringPrintf("Line: %i, column: %i, %s", line, column,
741 description.c_str());
Scott Graham66962112018-06-08 12:42:08 -0700742 }
743 return description;
744}
745
746} // namespace internal
747} // namespace base