| // Copyright (c) 2013 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "gn/parser.h" |
| |
| #include <memory> |
| #include <utility> |
| |
| #include "base/logging.h" |
| #include "gn/functions.h" |
| #include "gn/operators.h" |
| #include "gn/token.h" |
| |
| const char kGrammar_Help[] = |
| R"*(Language and grammar for GN build files |
| |
| Tokens |
| |
| GN build files are read as sequences of tokens. While splitting the file |
| into tokens, the next token is the longest sequence of characters that form a |
| valid token. |
| |
| White space and comments |
| |
| White space is comprised of spaces (U+0020), horizontal tabs (U+0009), |
| carriage returns (U+000D), and newlines (U+000A). |
| |
| Comments start at the character "#" and stop at the next newline. |
| |
| White space and comments are ignored except that they may separate tokens |
| that would otherwise combine into a single token. |
| |
| Identifiers |
| |
| Identifiers name variables and functions. |
| |
| identifier = letter { letter | digit } . |
| letter = "A" ... "Z" | "a" ... "z" | "_" . |
| digit = "0" ... "9" . |
| |
| Keywords |
| |
| The following keywords are reserved and may not be used as identifiers: |
| |
| else false if true |
| |
| Integer literals |
| |
| An integer literal represents a decimal integer value. |
| |
| integer = [ "-" ] digit { digit } . |
| |
| Leading zeros and negative zero are disallowed. |
| |
| String literals |
| |
| A string literal represents a string value consisting of the quoted |
| characters with possible escape sequences and variable expansions. |
| |
| string = `"` { char | escape | expansion } `"` . |
| escape = `\` ( "$" | `"` | char ) . |
| BracketExpansion = "{" ( identifier | ArrayAccess | ScopeAccess ) "}" . |
| Hex = "0x" [0-9A-Fa-f][0-9A-Fa-f] |
| expansion = "$" ( identifier | BracketExpansion | Hex ) . |
| char = /* any character except "$", `"`, or newline */ . |
| |
| After a backslash, certain sequences represent special characters: |
| |
| \" U+0022 quotation mark |
| \$ U+0024 dollar sign |
| \\ U+005C backslash |
| |
| All other backslashes represent themselves. |
| |
| To insert an arbitrary byte value, use $0xFF. For example, to insert a |
| newline character: "Line one$0x0ALine two". |
| |
| An expansion will evaluate the variable following the '$' and insert a |
| stringified version of it into the result. For example, to concat two path |
| components with a slash separating them: |
| "$var_one/$var_two" |
| Use the "${var_one}" format to be explicitly deliniate the variable for |
| otherwise-ambiguous cases. |
| |
| Punctuation |
| |
| The following character sequences represent punctuation: |
| |
| + += == != ( ) |
| - -= < <= [ ] |
| ! = > >= { } |
| && || . , |
| |
| Grammar |
| |
| The input tokens form a syntax tree following a context-free grammar: |
| |
| File = StatementList . |
| |
| Statement = Assignment | Call | Condition . |
| LValue = identifier | ArrayAccess | ScopeAccess . |
| Assignment = LValue AssignOp Expr . |
| Call = identifier "(" [ ExprList ] ")" [ Block ] . |
| Condition = "if" "(" Expr ")" Block |
| [ "else" ( Condition | Block ) ] . |
| Block = "{" StatementList "}" . |
| StatementList = { Statement } . |
| |
| ArrayAccess = identifier "[" Expr "]" . |
| ScopeAccess = identifier "." identifier . |
| Expr = UnaryExpr | Expr BinaryOp Expr . |
| UnaryExpr = PrimaryExpr | UnaryOp UnaryExpr . |
| PrimaryExpr = identifier | integer | string | Call |
| | ArrayAccess | ScopeAccess | Block |
| | "(" Expr ")" |
| | "[" [ ExprList [ "," ] ] "]" . |
| ExprList = Expr { "," Expr } . |
| |
| AssignOp = "=" | "+=" | "-=" . |
| UnaryOp = "!" . |
| BinaryOp = "+" | "-" // highest priority |
| | "<" | "<=" | ">" | ">=" |
| | "==" | "!=" |
| | "&&" |
| | "||" . // lowest priority |
| |
| All binary operators are left-associative. |
| |
| Types |
| |
| The GN language is dynamically typed. The following types are used: |
| |
| - Boolean: Uses the keywords "true" and "false". There is no implicit |
| conversion between booleans and integers. |
| |
| - Integers: All numbers in GN are signed 64-bit integers. |
| |
| - Strings: Strings are 8-bit with no enforced encoding. When a string is |
| used to interact with other systems with particular encodings (like the |
| Windows and Mac filesystems) it is assumed to be UTF-8. See "String |
| literals" above for more. |
| |
| - Lists: Lists are arbitrary-length ordered lists of values. See "Lists" |
| below for more. |
| |
| - Scopes: Scopes are like dictionaries that use variable names for keys. See |
| "Scopes" below for more. |
| |
| Lists |
| |
| Lists are created with [] and using commas to separate items: |
| |
| mylist = [ 0, 1, 2, "some string" ] |
| |
| A comma after the last item is optional. Lists are dereferenced using 0-based |
| indexing: |
| |
| mylist[0] += 1 |
| var = mylist[2] |
| |
| Lists can be concatenated using the '+' and '+=' operators. Bare values can |
| not be concatenated with lists, to add a single item, it must be put into a |
| list of length one. |
| |
| Items can be removed from lists using the '-' and '-=' operators. This will |
| remove all occurrences of every item in the right-hand list from the |
| left-hand list. It is an error to remove an item not in the list. This is to |
| prevent common typos and to detect dead code that is removing things that no |
| longer apply. |
| |
| It is an error to use '=' to replace a nonempty list with another nonempty |
| list. This is to prevent accidentally overwriting data when in most cases |
| '+=' was intended. To overwrite a list on purpose, first assign it to the |
| empty list: |
| |
| mylist = [] |
| mylist = otherlist |
| |
| Scopes |
| |
| All execution happens in the context of a scope which holds the current state |
| (like variables). With the exception of loops and conditions, '{' introduces |
| a new scope. |
| |
| Most scopes have a parent reference to the old scope. Variable reads |
| recursively search all parent scopes until the variable is found or there are |
| no more scopes. Variable writes always go into the current scope. This means |
| that after the closing '}' (again excepting loops and conditions), all local |
| variables will be restored to the previous values. This also means that "foo |
| = foo" can do useful work by copying a variable into the current scope that |
| was defined in a containing scope. |
| |
| Scopes can be assigned to variables. Examples of such scopes are the |
| implicitly-created "invoker" when invoking a template (which refers to the |
| variables set by the invoking code), scopes created by functions like |
| exec_script, and scopes explicitly created like |
| |
| empty_scope = {} |
| myvalues = { |
| foo = 21 |
| bar = "something" |
| } |
| |
| In the case of explicitly created scopes and scopes created by functions like |
| exec_script, there is no reference to the parent scope. Such scopes are fully |
| self-contained and do not "inherit" values from their defining scope. |
| |
| Inside an explicit scope definition can be any GN code including conditionals |
| and function calls. After the close of the scope, it will contain all |
| variables explicitly set by the code contained inside it. After this, the |
| values can be read, modified, or added to: |
| |
| myvalues.foo += 2 |
| empty_scope.new_thing = [ 1, 2, 3 ] |
| |
| Scope equality is defined as single-level scopes identical within the current |
| scope. That is, all values in the first scope must be present and identical |
| within the second, and vice versa. Note that this means inherited scopes are |
| always unequal by definition. |
| )*"; |
| |
| // Precedence constants. |
| // |
| // Currently all operators are left-associative so this list is sequential. To |
| // implement a right-assocative operators in a Pratt parser we would leave gaps |
| // in between the constants, and right-associative operators get a precedence |
| // of "<left-associated-precedence> - 1". |
| enum Precedence { |
| PRECEDENCE_ASSIGNMENT = 1, // Lowest precedence. |
| PRECEDENCE_OR = 2, |
| PRECEDENCE_AND = 3, |
| PRECEDENCE_EQUALITY = 4, |
| PRECEDENCE_RELATION = 5, |
| PRECEDENCE_SUM = 6, |
| PRECEDENCE_PREFIX = 7, |
| PRECEDENCE_CALL = 8, |
| PRECEDENCE_DOT = 9, // Highest precedence. |
| }; |
| |
| // The top-level for blocks/ifs is recursive descent, the expression parser is |
| // a Pratt parser. The basic idea there is to have the precedences (and |
| // associativities) encoded relative to each other and only parse up until you |
| // hit something of that precedence. There's a dispatch table in expressions_ |
| // at the top of parser.cc that describes how each token dispatches if it's |
| // seen as either a prefix or infix operator, and if it's infix, what its |
| // precedence is. |
| // |
| // References: |
| // http://javascript.crockford.com/tdop/tdop.html |
| // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/ |
| |
| // Indexed by Token::Type. |
| ParserHelper Parser::expressions_[] = { |
| {nullptr, nullptr, -1}, // INVALID |
| {&Parser::Literal, nullptr, -1}, // INTEGER |
| {&Parser::Literal, nullptr, -1}, // STRING |
| {&Parser::Literal, nullptr, -1}, // TRUE_TOKEN |
| {&Parser::Literal, nullptr, -1}, // FALSE_TOKEN |
| {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // EQUAL |
| {nullptr, &Parser::BinaryOperator, PRECEDENCE_SUM}, // PLUS |
| {nullptr, &Parser::BinaryOperator, PRECEDENCE_SUM}, // MINUS |
| {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // PLUS_EQUALS |
| {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // MINUS_EQUALS |
| {nullptr, &Parser::BinaryOperator, PRECEDENCE_EQUALITY}, // EQUAL_EQUAL |
| {nullptr, &Parser::BinaryOperator, PRECEDENCE_EQUALITY}, // NOT_EQUAL |
| {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // LESS_EQUAL |
| {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // GREATER_EQUAL |
| {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // LESS_THAN |
| {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // GREATER_THAN |
| {nullptr, &Parser::BinaryOperator, PRECEDENCE_AND}, // BOOLEAN_AND |
| {nullptr, &Parser::BinaryOperator, PRECEDENCE_OR}, // BOOLEAN_OR |
| {&Parser::Not, nullptr, -1}, // BANG |
| {nullptr, &Parser::DotOperator, PRECEDENCE_DOT}, // DOT |
| {&Parser::Group, nullptr, -1}, // LEFT_PAREN |
| {nullptr, nullptr, -1}, // RIGHT_PAREN |
| {&Parser::List, &Parser::Subscript, PRECEDENCE_CALL}, // LEFT_BRACKET |
| {nullptr, nullptr, -1}, // RIGHT_BRACKET |
| {&Parser::Block, nullptr, -1}, // LEFT_BRACE |
| {nullptr, nullptr, -1}, // RIGHT_BRACE |
| {nullptr, nullptr, -1}, // IF |
| {nullptr, nullptr, -1}, // ELSE |
| {&Parser::Name, &Parser::IdentifierOrCall, PRECEDENCE_CALL}, // IDENTIFIER |
| {nullptr, nullptr, -1}, // COMMA |
| {nullptr, nullptr, -1}, // UNCLASSIFIED_COMMENT |
| {nullptr, nullptr, -1}, // LINE_COMMENT |
| {nullptr, nullptr, -1}, // SUFFIX_COMMENT |
| {&Parser::BlockComment, nullptr, -1}, // BLOCK_COMMENT |
| }; |
| |
| Parser::Parser(const std::vector<Token>& tokens, Err* err) |
| : invalid_token_(Location(), Token::INVALID, std::string_view()), |
| err_(err), |
| cur_(0) { |
| for (const auto& token : tokens) { |
| switch (token.type()) { |
| case Token::LINE_COMMENT: |
| line_comment_tokens_.push_back(token); |
| break; |
| case Token::SUFFIX_COMMENT: |
| suffix_comment_tokens_.push_back(token); |
| break; |
| default: |
| // Note that BLOCK_COMMENTs (top-level standalone comments) are passed |
| // through the real parser. |
| tokens_.push_back(token); |
| break; |
| } |
| } |
| } |
| |
| Parser::~Parser() = default; |
| |
| // static |
| std::unique_ptr<ParseNode> Parser::Parse(const std::vector<Token>& tokens, |
| Err* err) { |
| Parser p(tokens, err); |
| return p.ParseFile(); |
| } |
| |
| // static |
| std::unique_ptr<ParseNode> Parser::ParseExpression( |
| const std::vector<Token>& tokens, |
| Err* err) { |
| Parser p(tokens, err); |
| std::unique_ptr<ParseNode> expr = p.ParseExpression(); |
| if (!p.at_end() && !err->has_error()) { |
| *err = Err(p.cur_token(), "Trailing garbage"); |
| return nullptr; |
| } |
| return expr; |
| } |
| |
| // static |
| std::unique_ptr<ParseNode> Parser::ParseValue(const std::vector<Token>& tokens, |
| Err* err) { |
| for (const Token& token : tokens) { |
| switch (token.type()) { |
| case Token::INTEGER: |
| case Token::STRING: |
| case Token::TRUE_TOKEN: |
| case Token::FALSE_TOKEN: |
| case Token::LEFT_BRACKET: |
| case Token::RIGHT_BRACKET: |
| case Token::COMMA: |
| continue; |
| default: |
| *err = Err(token, "Invalid token in literal value"); |
| return nullptr; |
| } |
| } |
| |
| return ParseExpression(tokens, err); |
| } |
| |
| bool Parser::IsAssignment(const ParseNode* node) const { |
| return node && node->AsBinaryOp() && |
| (node->AsBinaryOp()->op().type() == Token::EQUAL || |
| node->AsBinaryOp()->op().type() == Token::PLUS_EQUALS || |
| node->AsBinaryOp()->op().type() == Token::MINUS_EQUALS); |
| } |
| |
| bool Parser::IsStatementBreak(Token::Type token_type) const { |
| switch (token_type) { |
| case Token::IDENTIFIER: |
| case Token::LEFT_BRACE: |
| case Token::RIGHT_BRACE: |
| case Token::IF: |
| case Token::ELSE: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| bool Parser::LookAhead(Token::Type type) { |
| if (at_end()) |
| return false; |
| return cur_token().type() == type; |
| } |
| |
| bool Parser::Match(Token::Type type) { |
| if (!LookAhead(type)) |
| return false; |
| Consume(); |
| return true; |
| } |
| |
| const Token& Parser::Consume(Token::Type type, const char* error_message) { |
| Token::Type types[1] = {type}; |
| return Consume(types, 1, error_message); |
| } |
| |
| const Token& Parser::Consume(Token::Type* types, |
| size_t num_types, |
| const char* error_message) { |
| if (has_error()) { |
| // Don't overwrite current error, but make progress through tokens so that |
| // a loop that's expecting a particular token will still terminate. |
| if (!at_end()) |
| cur_++; |
| return invalid_token_; |
| } |
| if (at_end()) { |
| const char kEOFMsg[] = "I hit EOF instead."; |
| if (tokens_.empty()) |
| *err_ = Err(Location(), error_message, kEOFMsg); |
| else |
| *err_ = Err(tokens_[tokens_.size() - 1], error_message, kEOFMsg); |
| return invalid_token_; |
| } |
| |
| for (size_t i = 0; i < num_types; ++i) { |
| if (cur_token().type() == types[i]) |
| return Consume(); |
| } |
| *err_ = Err(cur_token(), error_message); |
| return invalid_token_; |
| } |
| |
| const Token& Parser::Consume() { |
| return tokens_[cur_++]; |
| } |
| |
| std::unique_ptr<ParseNode> Parser::ParseExpression() { |
| return ParseExpression(0); |
| } |
| |
| std::unique_ptr<ParseNode> Parser::ParseExpression(int precedence) { |
| if (at_end()) |
| return std::unique_ptr<ParseNode>(); |
| |
| const Token& token = Consume(); |
| PrefixFunc prefix = expressions_[token.type()].prefix; |
| |
| if (prefix == nullptr) { |
| *err_ = Err(token, "Unexpected token '" + std::string(token.value()) + "'"); |
| return std::unique_ptr<ParseNode>(); |
| } |
| |
| std::unique_ptr<ParseNode> left = (this->*prefix)(token); |
| if (has_error()) |
| return left; |
| |
| while (!at_end() && !IsStatementBreak(cur_token().type()) && |
| precedence <= expressions_[cur_token().type()].precedence) { |
| const Token& next_token = Consume(); |
| InfixFunc infix = expressions_[next_token.type()].infix; |
| if (infix == nullptr) { |
| *err_ = Err(next_token, |
| "Unexpected token '" + std::string(next_token.value()) + "'"); |
| return std::unique_ptr<ParseNode>(); |
| } |
| left = (this->*infix)(std::move(left), next_token); |
| if (has_error()) |
| return std::unique_ptr<ParseNode>(); |
| } |
| |
| return left; |
| } |
| |
| std::unique_ptr<ParseNode> Parser::Block(const Token& token) { |
| // This entrypoint into ParseBlock means it's part of an expression and we |
| // always want the result. |
| return ParseBlock(token, BlockNode::RETURNS_SCOPE); |
| } |
| |
| std::unique_ptr<ParseNode> Parser::Literal(const Token& token) { |
| return std::make_unique<LiteralNode>(token); |
| } |
| |
| std::unique_ptr<ParseNode> Parser::Name(const Token& token) { |
| return IdentifierOrCall(std::unique_ptr<ParseNode>(), token); |
| } |
| |
| std::unique_ptr<ParseNode> Parser::BlockComment(const Token& token) { |
| std::unique_ptr<BlockCommentNode> comment = |
| std::make_unique<BlockCommentNode>(); |
| comment->set_comment(token); |
| return comment; |
| } |
| |
| std::unique_ptr<ParseNode> Parser::Group(const Token& token) { |
| std::unique_ptr<ParseNode> expr = ParseExpression(); |
| if (has_error()) |
| return std::unique_ptr<ParseNode>(); |
| Consume(Token::RIGHT_PAREN, "Expected ')'"); |
| return expr; |
| } |
| |
| std::unique_ptr<ParseNode> Parser::Not(const Token& token) { |
| std::unique_ptr<ParseNode> expr = ParseExpression(PRECEDENCE_PREFIX + 1); |
| if (has_error()) |
| return std::unique_ptr<ParseNode>(); |
| if (!expr) { |
| if (!has_error()) |
| *err_ = Err(token, "Expected right-hand side for '!'."); |
| return std::unique_ptr<ParseNode>(); |
| } |
| std::unique_ptr<UnaryOpNode> unary_op = std::make_unique<UnaryOpNode>(); |
| unary_op->set_op(token); |
| unary_op->set_operand(std::move(expr)); |
| return unary_op; |
| } |
| |
| std::unique_ptr<ParseNode> Parser::List(const Token& node) { |
| std::unique_ptr<ParseNode> list(ParseList(node, Token::RIGHT_BRACKET, true)); |
| if (!has_error() && !at_end()) |
| Consume(Token::RIGHT_BRACKET, "Expected ']'"); |
| return list; |
| } |
| |
| std::unique_ptr<ParseNode> Parser::BinaryOperator( |
| std::unique_ptr<ParseNode> left, |
| const Token& token) { |
| std::unique_ptr<ParseNode> right = |
| ParseExpression(expressions_[token.type()].precedence + 1); |
| if (!right) { |
| if (!has_error()) { |
| *err_ = Err(token, "Expected right-hand side for '" + |
| std::string(token.value()) + "'"); |
| } |
| return std::unique_ptr<ParseNode>(); |
| } |
| std::unique_ptr<BinaryOpNode> binary_op = std::make_unique<BinaryOpNode>(); |
| binary_op->set_op(token); |
| binary_op->set_left(std::move(left)); |
| binary_op->set_right(std::move(right)); |
| return binary_op; |
| } |
| |
| std::unique_ptr<ParseNode> Parser::IdentifierOrCall( |
| std::unique_ptr<ParseNode> left, |
| const Token& token) { |
| std::unique_ptr<ListNode> list = std::make_unique<ListNode>(); |
| list->set_begin_token(token); |
| list->set_end(std::make_unique<EndNode>(token)); |
| std::unique_ptr<BlockNode> block; |
| bool has_arg = false; |
| if (LookAhead(Token::LEFT_PAREN)) { |
| const Token& start_token = Consume(); |
| // Parsing a function call. |
| has_arg = true; |
| if (Match(Token::RIGHT_PAREN)) { |
| // Nothing, just an empty call. |
| } else { |
| list = ParseList(start_token, Token::RIGHT_PAREN, false); |
| if (has_error()) |
| return std::unique_ptr<ParseNode>(); |
| Consume(Token::RIGHT_PAREN, "Expected ')' after call"); |
| } |
| // Optionally with a scope. |
| if (LookAhead(Token::LEFT_BRACE)) { |
| block = ParseBlock(Consume(), BlockNode::DISCARDS_RESULT); |
| if (has_error()) |
| return std::unique_ptr<ParseNode>(); |
| } |
| } |
| |
| if (!left && !has_arg) { |
| // Not a function call, just a standalone identifier. |
| return std::make_unique<IdentifierNode>(token); |
| } |
| std::unique_ptr<FunctionCallNode> func_call = |
| std::make_unique<FunctionCallNode>(); |
| func_call->set_function(token); |
| func_call->set_args(std::move(list)); |
| if (block) |
| func_call->set_block(std::move(block)); |
| return func_call; |
| } |
| |
| std::unique_ptr<ParseNode> Parser::Assignment(std::unique_ptr<ParseNode> left, |
| const Token& token) { |
| if (left->AsIdentifier() == nullptr && left->AsAccessor() == nullptr) { |
| *err_ = Err(left.get(), |
| "The left-hand side of an assignment must be an identifier, " |
| "scope access, or array access."); |
| return std::unique_ptr<ParseNode>(); |
| } |
| std::unique_ptr<ParseNode> value = ParseExpression(PRECEDENCE_ASSIGNMENT); |
| if (!value) { |
| if (!has_error()) |
| *err_ = Err(token, "Expected right-hand side for assignment."); |
| return std::unique_ptr<ParseNode>(); |
| } |
| std::unique_ptr<BinaryOpNode> assign = std::make_unique<BinaryOpNode>(); |
| assign->set_op(token); |
| assign->set_left(std::move(left)); |
| assign->set_right(std::move(value)); |
| return assign; |
| } |
| |
| std::unique_ptr<ParseNode> Parser::Subscript(std::unique_ptr<ParseNode> left, |
| const Token& token) { |
| // TODO: Maybe support more complex expressions like a[0][0]. This would |
| // require work on the evaluator too. |
| if (left->AsIdentifier() == nullptr) { |
| *err_ = Err( |
| left.get(), "May only subscript identifiers.", |
| "The thing on the left hand side of the [] must be an identifier\n" |
| "and not an expression. If you need this, you'll have to assign the\n" |
| "value to a temporary before subscripting. Sorry."); |
| return std::unique_ptr<ParseNode>(); |
| } |
| std::unique_ptr<ParseNode> value = ParseExpression(); |
| Consume(Token::RIGHT_BRACKET, "Expecting ']' after subscript."); |
| std::unique_ptr<AccessorNode> accessor = std::make_unique<AccessorNode>(); |
| accessor->set_base(left->AsIdentifier()->value()); |
| accessor->set_subscript(std::move(value)); |
| return accessor; |
| } |
| |
| std::unique_ptr<ParseNode> Parser::DotOperator(std::unique_ptr<ParseNode> left, |
| const Token& token) { |
| if (left->AsIdentifier() == nullptr) { |
| *err_ = Err( |
| left.get(), "May only use \".\" for identifiers.", |
| "The thing on the left hand side of the dot must be an identifier\n" |
| "and not an expression. If you need this, you'll have to assign the\n" |
| "value to a temporary first. Sorry."); |
| return std::unique_ptr<ParseNode>(); |
| } |
| |
| std::unique_ptr<ParseNode> right = ParseExpression(PRECEDENCE_DOT); |
| if (!right || !right->AsIdentifier()) { |
| *err_ = Err( |
| token, "Expected identifier for right-hand-side of \".\"", |
| "Good: a.cookies\nBad: a.42\nLooks good but still bad: a.cookies()"); |
| return std::unique_ptr<ParseNode>(); |
| } |
| |
| std::unique_ptr<AccessorNode> accessor = std::make_unique<AccessorNode>(); |
| accessor->set_base(left->AsIdentifier()->value()); |
| accessor->set_member(std::unique_ptr<IdentifierNode>( |
| static_cast<IdentifierNode*>(right.release()))); |
| return accessor; |
| } |
| |
| // Does not Consume the start or end token. |
| std::unique_ptr<ListNode> Parser::ParseList(const Token& start_token, |
| Token::Type stop_before, |
| bool allow_trailing_comma) { |
| std::unique_ptr<ListNode> list = std::make_unique<ListNode>(); |
| list->set_begin_token(start_token); |
| bool just_got_comma = false; |
| bool first_time = true; |
| while (!LookAhead(stop_before)) { |
| if (!first_time) { |
| if (!just_got_comma) { |
| // Require commas separate things in lists. |
| *err_ = Err(cur_token(), "Expected comma between items."); |
| return std::unique_ptr<ListNode>(); |
| } |
| } |
| first_time = false; |
| |
| // Why _OR? We're parsing things that are higher precedence than the , |
| // that separates the items of the list. , should appear lower than |
| // boolean expressions (the lowest of which is OR), but above assignments. |
| list->append_item(ParseExpression(PRECEDENCE_OR)); |
| if (has_error()) |
| return std::unique_ptr<ListNode>(); |
| if (at_end()) { |
| *err_ = |
| Err(tokens_[tokens_.size() - 1], "Unexpected end of file in list."); |
| return std::unique_ptr<ListNode>(); |
| } |
| if (list->contents().back()->AsBlockComment()) { |
| // If there was a comment inside the list, we don't need a comma to the |
| // next item, so pretend we got one, if we're expecting one. |
| just_got_comma = allow_trailing_comma; |
| } else { |
| just_got_comma = Match(Token::COMMA); |
| } |
| } |
| if (just_got_comma && !allow_trailing_comma) { |
| *err_ = Err(cur_token(), "Trailing comma"); |
| return std::unique_ptr<ListNode>(); |
| } |
| list->set_end(std::make_unique<EndNode>(cur_token())); |
| return list; |
| } |
| |
| std::unique_ptr<ParseNode> Parser::ParseFile() { |
| std::unique_ptr<BlockNode> file = |
| std::make_unique<BlockNode>(BlockNode::DISCARDS_RESULT); |
| for (;;) { |
| if (at_end()) |
| break; |
| std::unique_ptr<ParseNode> statement = ParseStatement(); |
| if (!statement) |
| break; |
| file->append_statement(std::move(statement)); |
| } |
| if (!at_end() && !has_error()) |
| *err_ = Err(cur_token(), "Unexpected here, should be newline."); |
| if (has_error()) |
| return std::unique_ptr<ParseNode>(); |
| |
| // TODO(scottmg): If this is measurably expensive, it could be done only |
| // when necessary (when reformatting, or during tests). Comments are |
| // separate from the parse tree at this point, so downstream code can remain |
| // ignorant of them. |
| AssignComments(file.get()); |
| |
| return file; |
| } |
| |
| std::unique_ptr<ParseNode> Parser::ParseStatement() { |
| if (LookAhead(Token::IF)) { |
| return ParseCondition(); |
| } else if (LookAhead(Token::BLOCK_COMMENT)) { |
| return BlockComment(Consume()); |
| } else { |
| // TODO(scottmg): Is this too strict? Just drop all the testing if we want |
| // to allow "pointless" expressions and return ParseExpression() directly. |
| std::unique_ptr<ParseNode> stmt = ParseExpression(); |
| if (stmt) { |
| if (stmt->AsFunctionCall() || IsAssignment(stmt.get())) |
| return stmt; |
| } |
| if (!has_error()) { |
| const Token& token = cur_or_last_token(); |
| *err_ = Err(token, "Expecting assignment or function call."); |
| } |
| return std::unique_ptr<ParseNode>(); |
| } |
| } |
| |
| std::unique_ptr<BlockNode> Parser::ParseBlock( |
| const Token& begin_brace, |
| BlockNode::ResultMode result_mode) { |
| if (has_error()) |
| return std::unique_ptr<BlockNode>(); |
| std::unique_ptr<BlockNode> block = std::make_unique<BlockNode>(result_mode); |
| block->set_begin_token(begin_brace); |
| |
| for (;;) { |
| if (LookAhead(Token::RIGHT_BRACE)) { |
| block->set_end(std::make_unique<EndNode>(Consume())); |
| break; |
| } |
| |
| std::unique_ptr<ParseNode> statement = ParseStatement(); |
| if (!statement) |
| return std::unique_ptr<BlockNode>(); |
| block->append_statement(std::move(statement)); |
| } |
| return block; |
| } |
| |
| std::unique_ptr<ParseNode> Parser::ParseCondition() { |
| std::unique_ptr<ConditionNode> condition = std::make_unique<ConditionNode>(); |
| condition->set_if_token(Consume(Token::IF, "Expected 'if'")); |
| Consume(Token::LEFT_PAREN, "Expected '(' after 'if'."); |
| condition->set_condition(ParseExpression()); |
| if (IsAssignment(condition->condition())) |
| *err_ = Err(condition->condition(), "Assignment not allowed in 'if'."); |
| Consume(Token::RIGHT_PAREN, "Expected ')' after condition of 'if'."); |
| condition->set_if_true(ParseBlock( |
| Consume(Token::LEFT_BRACE, "Expected '{' to start 'if' block."), |
| BlockNode::DISCARDS_RESULT)); |
| if (Match(Token::ELSE)) { |
| if (LookAhead(Token::LEFT_BRACE)) { |
| condition->set_if_false( |
| ParseBlock(Consume(), BlockNode::DISCARDS_RESULT)); |
| } else if (LookAhead(Token::IF)) { |
| condition->set_if_false(ParseStatement()); |
| } else { |
| *err_ = Err(cur_or_last_token(), "Expected '{' or 'if' after 'else'."); |
| return std::unique_ptr<ParseNode>(); |
| } |
| } |
| if (has_error()) |
| return std::unique_ptr<ParseNode>(); |
| return condition; |
| } |
| |
| void Parser::TraverseOrder(const ParseNode* root, |
| std::vector<const ParseNode*>* pre, |
| std::vector<const ParseNode*>* post) { |
| if (root) { |
| pre->push_back(root); |
| |
| if (const AccessorNode* accessor = root->AsAccessor()) { |
| TraverseOrder(accessor->subscript(), pre, post); |
| TraverseOrder(accessor->member(), pre, post); |
| } else if (const BinaryOpNode* binop = root->AsBinaryOp()) { |
| TraverseOrder(binop->left(), pre, post); |
| TraverseOrder(binop->right(), pre, post); |
| } else if (const BlockNode* block = root->AsBlock()) { |
| for (const auto& statement : block->statements()) |
| TraverseOrder(statement.get(), pre, post); |
| TraverseOrder(block->End(), pre, post); |
| } else if (const ConditionNode* condition = root->AsCondition()) { |
| TraverseOrder(condition->condition(), pre, post); |
| TraverseOrder(condition->if_true(), pre, post); |
| TraverseOrder(condition->if_false(), pre, post); |
| } else if (const FunctionCallNode* func_call = root->AsFunctionCall()) { |
| TraverseOrder(func_call->args(), pre, post); |
| TraverseOrder(func_call->block(), pre, post); |
| } else if (root->AsIdentifier()) { |
| // Nothing. |
| } else if (const ListNode* list = root->AsList()) { |
| for (const auto& node : list->contents()) |
| TraverseOrder(node.get(), pre, post); |
| TraverseOrder(list->End(), pre, post); |
| } else if (root->AsLiteral()) { |
| // Nothing. |
| } else if (const UnaryOpNode* unaryop = root->AsUnaryOp()) { |
| TraverseOrder(unaryop->operand(), pre, post); |
| } else if (root->AsBlockComment()) { |
| // Nothing. |
| } else if (root->AsEnd()) { |
| // Nothing. |
| } else { |
| CHECK(false) << "Unhandled case in TraverseOrder."; |
| } |
| |
| post->push_back(root); |
| } |
| } |
| |
| void Parser::AssignComments(ParseNode* file) { |
| // Start by generating a pre- and post- order traversal of the tree so we |
| // can determine what's before and after comments. |
| std::vector<const ParseNode*> pre; |
| std::vector<const ParseNode*> post; |
| TraverseOrder(file, &pre, &post); |
| |
| // Assign line comments to syntax immediately following. |
| int cur_comment = 0; |
| for (auto* node : pre) { |
| if (node->GetRange().is_null()) { |
| CHECK_EQ(node, file) << "Only expected on top file node"; |
| continue; |
| } |
| const Location start = node->GetRange().begin(); |
| while (cur_comment < static_cast<int>(line_comment_tokens_.size())) { |
| if (start >= line_comment_tokens_[cur_comment].location()) { |
| const_cast<ParseNode*>(node)->comments_mutable()->append_before( |
| line_comment_tokens_[cur_comment]); |
| ++cur_comment; |
| } else { |
| break; |
| } |
| } |
| } |
| |
| // Remaining line comments go at end of file. |
| for (; cur_comment < static_cast<int>(line_comment_tokens_.size()); |
| ++cur_comment) |
| file->comments_mutable()->append_after(line_comment_tokens_[cur_comment]); |
| |
| // Assign suffix to syntax immediately before. |
| cur_comment = static_cast<int>(suffix_comment_tokens_.size() - 1); |
| for (std::vector<const ParseNode*>::const_reverse_iterator i = post.rbegin(); |
| i != post.rend(); ++i) { |
| // Don't assign suffix comments to the function, list, or block, but instead |
| // to the last thing inside. |
| if ((*i)->AsFunctionCall() || (*i)->AsList() || (*i)->AsBlock()) |
| continue; |
| |
| Location start = (*i)->GetRange().begin(); |
| Location end = (*i)->GetRange().end(); |
| |
| // Don't assign suffix comments to something that starts on an earlier |
| // line, so that in: |
| // |
| // sources = [ "a", |
| // "b" ] # comment |
| // |
| // it's attached to "b", not sources = [ ... ]. |
| if (start.line_number() != end.line_number()) |
| continue; |
| |
| while (cur_comment >= 0) { |
| if (end <= suffix_comment_tokens_[cur_comment].location()) { |
| const_cast<ParseNode*>(*i)->comments_mutable()->append_suffix( |
| suffix_comment_tokens_[cur_comment]); |
| --cur_comment; |
| } else { |
| break; |
| } |
| } |
| |
| // Suffix comments were assigned in reverse, so if there were multiple on |
| // the same node, they need to be reversed. |
| if ((*i)->comments() && !(*i)->comments()->suffix().empty()) |
| const_cast<ParseNode*>(*i)->comments_mutable()->ReverseSuffix(); |
| } |
| } |
| |
| std::string IndentFor(int value) { |
| return std::string(value, ' '); |
| } |
| |
| void RenderToText(const base::Value& node, |
| int indent_level, |
| std::ostringstream& os) { |
| const base::Value* child = node.FindKey(std::string("child")); |
| std::string node_type(node.FindKey("type")->GetString()); |
| if (node_type == "ACCESSOR") { |
| // AccessorNode is a bit special, in that it holds a Token, not a ParseNode |
| // for the base. |
| os << IndentFor(indent_level) << node_type << std::endl; |
| os << IndentFor(indent_level + 1) << node.FindKey("value")->GetString() |
| << std::endl; |
| } else { |
| os << IndentFor(indent_level) << node_type; |
| if (node.FindKey("value")) { |
| os << "(" << node.FindKey("value")->GetString() << ")"; |
| } |
| os << std::endl; |
| } |
| if (node.FindKey(kJsonBeforeComment)) { |
| for (auto& v : node.FindKey(kJsonBeforeComment)->GetList()) { |
| os << IndentFor(indent_level + 1) << "+BEFORE_COMMENT(\"" << v.GetString() |
| << "\")\n"; |
| } |
| } |
| if (node.FindKey(kJsonSuffixComment)) { |
| for (auto& v : node.FindKey(kJsonSuffixComment)->GetList()) { |
| os << IndentFor(indent_level + 1) << "+SUFFIX_COMMENT(\"" << v.GetString() |
| << "\")\n"; |
| } |
| } |
| if (node.FindKey(kJsonAfterComment)) { |
| for (auto& v : node.FindKey(kJsonAfterComment)->GetList()) { |
| os << IndentFor(indent_level + 1) << "+AFTER_COMMENT(\"" << v.GetString() |
| << "\")\n"; |
| } |
| } |
| if (child) { |
| for (const base::Value& n : child->GetList()) { |
| RenderToText(n, indent_level + 1, os); |
| } |
| } |
| const base::Value* end = node.FindKey("end"); |
| if (end && |
| (end->FindKey(kJsonBeforeComment) || end->FindKey(kJsonSuffixComment) || |
| end->FindKey(kJsonAfterComment))) { |
| RenderToText(*end, indent_level + 1, os); |
| } |
| } |