|  | #!/usr/bin/env python | 
|  | # | 
|  | # Copyright 2007 Neal Norwitz | 
|  | # Portions Copyright 2007 Google Inc. | 
|  | # | 
|  | # Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | # you may not use this file except in compliance with the License. | 
|  | # You may obtain a copy of the License at | 
|  | # | 
|  | #      http://www.apache.org/licenses/LICENSE-2.0 | 
|  | # | 
|  | # Unless required by applicable law or agreed to in writing, software | 
|  | # distributed under the License is distributed on an "AS IS" BASIS, | 
|  | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | # See the License for the specific language governing permissions and | 
|  | # limitations under the License. | 
|  |  | 
|  | """Generate an Abstract Syntax Tree (AST) for C++.""" | 
|  |  | 
|  | __author__ = 'nnorwitz@google.com (Neal Norwitz)' | 
|  |  | 
|  |  | 
|  | # TODO: | 
|  | #  * Tokens should never be exported, need to convert to Nodes | 
|  | #    (return types, parameters, etc.) | 
|  | #  * Handle static class data for templatized classes | 
|  | #  * Handle casts (both C++ and C-style) | 
|  | #  * Handle conditions and loops (if/else, switch, for, while/do) | 
|  | # | 
|  | # TODO much, much later: | 
|  | #  * Handle #define | 
|  | #  * exceptions | 
|  |  | 
|  |  | 
|  | try: | 
|  | # Python 3.x | 
|  | import builtins | 
|  | except ImportError: | 
|  | # Python 2.x | 
|  | import __builtin__ as builtins | 
|  |  | 
|  | import sys | 
|  | import traceback | 
|  |  | 
|  | from cpp import keywords | 
|  | from cpp import tokenize | 
|  | from cpp import utils | 
|  |  | 
|  |  | 
|  | if not hasattr(builtins, 'reversed'): | 
|  | # Support Python 2.3 and earlier. | 
|  | def reversed(seq): | 
|  | for i in range(len(seq)-1, -1, -1): | 
|  | yield seq[i] | 
|  |  | 
|  | if not hasattr(builtins, 'next'): | 
|  | # Support Python 2.5 and earlier. | 
|  | def next(obj): | 
|  | return obj.next() | 
|  |  | 
|  |  | 
|  | VISIBILITY_PUBLIC, VISIBILITY_PROTECTED, VISIBILITY_PRIVATE = range(3) | 
|  |  | 
|  | FUNCTION_NONE = 0x00 | 
|  | FUNCTION_CONST = 0x01 | 
|  | FUNCTION_VIRTUAL = 0x02 | 
|  | FUNCTION_PURE_VIRTUAL = 0x04 | 
|  | FUNCTION_CTOR = 0x08 | 
|  | FUNCTION_DTOR = 0x10 | 
|  | FUNCTION_ATTRIBUTE = 0x20 | 
|  | FUNCTION_UNKNOWN_ANNOTATION = 0x40 | 
|  | FUNCTION_THROW = 0x80 | 
|  | FUNCTION_OVERRIDE = 0x100 | 
|  |  | 
|  | """ | 
|  | These are currently unused.  Should really handle these properly at some point. | 
|  |  | 
|  | TYPE_MODIFIER_INLINE   = 0x010000 | 
|  | TYPE_MODIFIER_EXTERN   = 0x020000 | 
|  | TYPE_MODIFIER_STATIC   = 0x040000 | 
|  | TYPE_MODIFIER_CONST    = 0x080000 | 
|  | TYPE_MODIFIER_REGISTER = 0x100000 | 
|  | TYPE_MODIFIER_VOLATILE = 0x200000 | 
|  | TYPE_MODIFIER_MUTABLE  = 0x400000 | 
|  |  | 
|  | TYPE_MODIFIER_MAP = { | 
|  | 'inline': TYPE_MODIFIER_INLINE, | 
|  | 'extern': TYPE_MODIFIER_EXTERN, | 
|  | 'static': TYPE_MODIFIER_STATIC, | 
|  | 'const': TYPE_MODIFIER_CONST, | 
|  | 'register': TYPE_MODIFIER_REGISTER, | 
|  | 'volatile': TYPE_MODIFIER_VOLATILE, | 
|  | 'mutable': TYPE_MODIFIER_MUTABLE, | 
|  | } | 
|  | """ | 
|  |  | 
|  | _INTERNAL_TOKEN = 'internal' | 
|  | _NAMESPACE_POP = 'ns-pop' | 
|  |  | 
|  |  | 
|  | # TODO(nnorwitz): use this as a singleton for templated_types, etc | 
|  | # where we don't want to create a new empty dict each time.  It is also const. | 
|  | class _NullDict(object): | 
|  | __contains__ = lambda self: False | 
|  | keys = values = items = iterkeys = itervalues = iteritems = lambda self: () | 
|  |  | 
|  |  | 
|  | # TODO(nnorwitz): move AST nodes into a separate module. | 
|  | class Node(object): | 
|  | """Base AST node.""" | 
|  |  | 
|  | def __init__(self, start, end): | 
|  | self.start = start | 
|  | self.end = end | 
|  |  | 
|  | def IsDeclaration(self): | 
|  | """Returns bool if this node is a declaration.""" | 
|  | return False | 
|  |  | 
|  | def IsDefinition(self): | 
|  | """Returns bool if this node is a definition.""" | 
|  | return False | 
|  |  | 
|  | def IsExportable(self): | 
|  | """Returns bool if this node exportable from a header file.""" | 
|  | return False | 
|  |  | 
|  | def Requires(self, node): | 
|  | """Does this AST node require the definition of the node passed in?""" | 
|  | return False | 
|  |  | 
|  | def XXX__str__(self): | 
|  | return self._StringHelper(self.__class__.__name__, '') | 
|  |  | 
|  | def _StringHelper(self, name, suffix): | 
|  | if not utils.DEBUG: | 
|  | return '%s(%s)' % (name, suffix) | 
|  | return '%s(%d, %d, %s)' % (name, self.start, self.end, suffix) | 
|  |  | 
|  | def __repr__(self): | 
|  | return str(self) | 
|  |  | 
|  |  | 
|  | class Define(Node): | 
|  | def __init__(self, start, end, name, definition): | 
|  | Node.__init__(self, start, end) | 
|  | self.name = name | 
|  | self.definition = definition | 
|  |  | 
|  | def __str__(self): | 
|  | value = '%s %s' % (self.name, self.definition) | 
|  | return self._StringHelper(self.__class__.__name__, value) | 
|  |  | 
|  |  | 
|  | class Include(Node): | 
|  | def __init__(self, start, end, filename, system): | 
|  | Node.__init__(self, start, end) | 
|  | self.filename = filename | 
|  | self.system = system | 
|  |  | 
|  | def __str__(self): | 
|  | fmt = '"%s"' | 
|  | if self.system: | 
|  | fmt = '<%s>' | 
|  | return self._StringHelper(self.__class__.__name__, fmt % self.filename) | 
|  |  | 
|  |  | 
|  | class Goto(Node): | 
|  | def __init__(self, start, end, label): | 
|  | Node.__init__(self, start, end) | 
|  | self.label = label | 
|  |  | 
|  | def __str__(self): | 
|  | return self._StringHelper(self.__class__.__name__, str(self.label)) | 
|  |  | 
|  |  | 
|  | class Expr(Node): | 
|  | def __init__(self, start, end, expr): | 
|  | Node.__init__(self, start, end) | 
|  | self.expr = expr | 
|  |  | 
|  | def Requires(self, node): | 
|  | # TODO(nnorwitz): impl. | 
|  | return False | 
|  |  | 
|  | def __str__(self): | 
|  | return self._StringHelper(self.__class__.__name__, str(self.expr)) | 
|  |  | 
|  |  | 
|  | class Return(Expr): | 
|  | pass | 
|  |  | 
|  |  | 
|  | class Delete(Expr): | 
|  | pass | 
|  |  | 
|  |  | 
|  | class Friend(Expr): | 
|  | def __init__(self, start, end, expr, namespace): | 
|  | Expr.__init__(self, start, end, expr) | 
|  | self.namespace = namespace[:] | 
|  |  | 
|  |  | 
|  | class Using(Node): | 
|  | def __init__(self, start, end, names): | 
|  | Node.__init__(self, start, end) | 
|  | self.names = names | 
|  |  | 
|  | def __str__(self): | 
|  | return self._StringHelper(self.__class__.__name__, str(self.names)) | 
|  |  | 
|  |  | 
|  | class Parameter(Node): | 
|  | def __init__(self, start, end, name, parameter_type, default): | 
|  | Node.__init__(self, start, end) | 
|  | self.name = name | 
|  | self.type = parameter_type | 
|  | self.default = default | 
|  |  | 
|  | def Requires(self, node): | 
|  | # TODO(nnorwitz): handle namespaces, etc. | 
|  | return self.type.name == node.name | 
|  |  | 
|  | def __str__(self): | 
|  | name = str(self.type) | 
|  | suffix = '%s %s' % (name, self.name) | 
|  | if self.default: | 
|  | suffix += ' = ' + ''.join([d.name for d in self.default]) | 
|  | return self._StringHelper(self.__class__.__name__, suffix) | 
|  |  | 
|  |  | 
|  | class _GenericDeclaration(Node): | 
|  | def __init__(self, start, end, name, namespace): | 
|  | Node.__init__(self, start, end) | 
|  | self.name = name | 
|  | self.namespace = namespace[:] | 
|  |  | 
|  | def FullName(self): | 
|  | prefix = '' | 
|  | if self.namespace and self.namespace[-1]: | 
|  | prefix = '::'.join(self.namespace) + '::' | 
|  | return prefix + self.name | 
|  |  | 
|  | def _TypeStringHelper(self, suffix): | 
|  | if self.namespace: | 
|  | names = [n or '<anonymous>' for n in self.namespace] | 
|  | suffix += ' in ' + '::'.join(names) | 
|  | return self._StringHelper(self.__class__.__name__, suffix) | 
|  |  | 
|  |  | 
|  | # TODO(nnorwitz): merge with Parameter in some way? | 
|  | class VariableDeclaration(_GenericDeclaration): | 
|  | def __init__(self, start, end, name, var_type, initial_value, namespace): | 
|  | _GenericDeclaration.__init__(self, start, end, name, namespace) | 
|  | self.type = var_type | 
|  | self.initial_value = initial_value | 
|  |  | 
|  | def Requires(self, node): | 
|  | # TODO(nnorwitz): handle namespaces, etc. | 
|  | return self.type.name == node.name | 
|  |  | 
|  | def ToString(self): | 
|  | """Return a string that tries to reconstitute the variable decl.""" | 
|  | suffix = '%s %s' % (self.type, self.name) | 
|  | if self.initial_value: | 
|  | suffix += ' = ' + self.initial_value | 
|  | return suffix | 
|  |  | 
|  | def __str__(self): | 
|  | return self._StringHelper(self.__class__.__name__, self.ToString()) | 
|  |  | 
|  |  | 
|  | class Typedef(_GenericDeclaration): | 
|  | def __init__(self, start, end, name, alias, namespace): | 
|  | _GenericDeclaration.__init__(self, start, end, name, namespace) | 
|  | self.alias = alias | 
|  |  | 
|  | def IsDefinition(self): | 
|  | return True | 
|  |  | 
|  | def IsExportable(self): | 
|  | return True | 
|  |  | 
|  | def Requires(self, node): | 
|  | # TODO(nnorwitz): handle namespaces, etc. | 
|  | name = node.name | 
|  | for token in self.alias: | 
|  | if token is not None and name == token.name: | 
|  | return True | 
|  | return False | 
|  |  | 
|  | def __str__(self): | 
|  | suffix = '%s, %s' % (self.name, self.alias) | 
|  | return self._TypeStringHelper(suffix) | 
|  |  | 
|  |  | 
|  | class _NestedType(_GenericDeclaration): | 
|  | def __init__(self, start, end, name, fields, namespace): | 
|  | _GenericDeclaration.__init__(self, start, end, name, namespace) | 
|  | self.fields = fields | 
|  |  | 
|  | def IsDefinition(self): | 
|  | return True | 
|  |  | 
|  | def IsExportable(self): | 
|  | return True | 
|  |  | 
|  | def __str__(self): | 
|  | suffix = '%s, {%s}' % (self.name, self.fields) | 
|  | return self._TypeStringHelper(suffix) | 
|  |  | 
|  |  | 
|  | class Union(_NestedType): | 
|  | pass | 
|  |  | 
|  |  | 
|  | class Enum(_NestedType): | 
|  | pass | 
|  |  | 
|  |  | 
|  | class Class(_GenericDeclaration): | 
|  | def __init__(self, start, end, name, bases, templated_types, body, namespace): | 
|  | _GenericDeclaration.__init__(self, start, end, name, namespace) | 
|  | self.bases = bases | 
|  | self.body = body | 
|  | self.templated_types = templated_types | 
|  |  | 
|  | def IsDeclaration(self): | 
|  | return self.bases is None and self.body is None | 
|  |  | 
|  | def IsDefinition(self): | 
|  | return not self.IsDeclaration() | 
|  |  | 
|  | def IsExportable(self): | 
|  | return not self.IsDeclaration() | 
|  |  | 
|  | def Requires(self, node): | 
|  | # TODO(nnorwitz): handle namespaces, etc. | 
|  | if self.bases: | 
|  | for token_list in self.bases: | 
|  | # TODO(nnorwitz): bases are tokens, do name comparison. | 
|  | for token in token_list: | 
|  | if token.name == node.name: | 
|  | return True | 
|  | # TODO(nnorwitz): search in body too. | 
|  | return False | 
|  |  | 
|  | def __str__(self): | 
|  | name = self.name | 
|  | if self.templated_types: | 
|  | name += '<%s>' % self.templated_types | 
|  | suffix = '%s, %s, %s' % (name, self.bases, self.body) | 
|  | return self._TypeStringHelper(suffix) | 
|  |  | 
|  |  | 
|  | class Struct(Class): | 
|  | pass | 
|  |  | 
|  |  | 
|  | class Function(_GenericDeclaration): | 
|  | def __init__(self, start, end, name, return_type, parameters, | 
|  | modifiers, templated_types, body, namespace): | 
|  | _GenericDeclaration.__init__(self, start, end, name, namespace) | 
|  | converter = TypeConverter(namespace) | 
|  | self.return_type = converter.CreateReturnType(return_type) | 
|  | self.parameters = converter.ToParameters(parameters) | 
|  | self.modifiers = modifiers | 
|  | self.body = body | 
|  | self.templated_types = templated_types | 
|  |  | 
|  | def IsDeclaration(self): | 
|  | return self.body is None | 
|  |  | 
|  | def IsDefinition(self): | 
|  | return self.body is not None | 
|  |  | 
|  | def IsExportable(self): | 
|  | if self.return_type and 'static' in self.return_type.modifiers: | 
|  | return False | 
|  | return None not in self.namespace | 
|  |  | 
|  | def Requires(self, node): | 
|  | if self.parameters: | 
|  | # TODO(nnorwitz): parameters are tokens, do name comparison. | 
|  | for p in self.parameters: | 
|  | if p.name == node.name: | 
|  | return True | 
|  | # TODO(nnorwitz): search in body too. | 
|  | return False | 
|  |  | 
|  | def __str__(self): | 
|  | # TODO(nnorwitz): add templated_types. | 
|  | suffix = ('%s %s(%s), 0x%02x, %s' % | 
|  | (self.return_type, self.name, self.parameters, | 
|  | self.modifiers, self.body)) | 
|  | return self._TypeStringHelper(suffix) | 
|  |  | 
|  |  | 
|  | class Method(Function): | 
|  | def __init__(self, start, end, name, in_class, return_type, parameters, | 
|  | modifiers, templated_types, body, namespace): | 
|  | Function.__init__(self, start, end, name, return_type, parameters, | 
|  | modifiers, templated_types, body, namespace) | 
|  | # TODO(nnorwitz): in_class could also be a namespace which can | 
|  | # mess up finding functions properly. | 
|  | self.in_class = in_class | 
|  |  | 
|  |  | 
|  | class Type(_GenericDeclaration): | 
|  | """Type used for any variable (eg class, primitive, struct, etc).""" | 
|  |  | 
|  | def __init__(self, start, end, name, templated_types, modifiers, | 
|  | reference, pointer, array): | 
|  | """ | 
|  | Args: | 
|  | name: str name of main type | 
|  | templated_types: [Class (Type?)] template type info between <> | 
|  | modifiers: [str] type modifiers (keywords) eg, const, mutable, etc. | 
|  | reference, pointer, array: bools | 
|  | """ | 
|  | _GenericDeclaration.__init__(self, start, end, name, []) | 
|  | self.templated_types = templated_types | 
|  | if not name and modifiers: | 
|  | self.name = modifiers.pop() | 
|  | self.modifiers = modifiers | 
|  | self.reference = reference | 
|  | self.pointer = pointer | 
|  | self.array = array | 
|  |  | 
|  | def __str__(self): | 
|  | prefix = '' | 
|  | if self.modifiers: | 
|  | prefix = ' '.join(self.modifiers) + ' ' | 
|  | name = str(self.name) | 
|  | if self.templated_types: | 
|  | name += '<%s>' % self.templated_types | 
|  | suffix = prefix + name | 
|  | if self.reference: | 
|  | suffix += '&' | 
|  | if self.pointer: | 
|  | suffix += '*' | 
|  | if self.array: | 
|  | suffix += '[]' | 
|  | return self._TypeStringHelper(suffix) | 
|  |  | 
|  | # By definition, Is* are always False.  A Type can only exist in | 
|  | # some sort of variable declaration, parameter, or return value. | 
|  | def IsDeclaration(self): | 
|  | return False | 
|  |  | 
|  | def IsDefinition(self): | 
|  | return False | 
|  |  | 
|  | def IsExportable(self): | 
|  | return False | 
|  |  | 
|  |  | 
|  | class TypeConverter(object): | 
|  |  | 
|  | def __init__(self, namespace_stack): | 
|  | self.namespace_stack = namespace_stack | 
|  |  | 
|  | def _GetTemplateEnd(self, tokens, start): | 
|  | count = 1 | 
|  | end = start | 
|  | while 1: | 
|  | token = tokens[end] | 
|  | end += 1 | 
|  | if token.name == '<': | 
|  | count += 1 | 
|  | elif token.name == '>': | 
|  | count -= 1 | 
|  | if count == 0: | 
|  | break | 
|  | return tokens[start:end-1], end | 
|  |  | 
|  | def ToType(self, tokens): | 
|  | """Convert [Token,...] to [Class(...), ] useful for base classes. | 
|  | For example, code like class Foo : public Bar<x, y> { ... }; | 
|  | the "Bar<x, y>" portion gets converted to an AST. | 
|  |  | 
|  | Returns: | 
|  | [Class(...), ...] | 
|  | """ | 
|  | result = [] | 
|  | name_tokens = [] | 
|  | reference = pointer = array = False | 
|  |  | 
|  | def AddType(templated_types): | 
|  | # Partition tokens into name and modifier tokens. | 
|  | names = [] | 
|  | modifiers = [] | 
|  | for t in name_tokens: | 
|  | if keywords.IsKeyword(t.name): | 
|  | modifiers.append(t.name) | 
|  | else: | 
|  | names.append(t.name) | 
|  | name = ''.join(names) | 
|  | if name_tokens: | 
|  | result.append(Type(name_tokens[0].start, name_tokens[-1].end, | 
|  | name, templated_types, modifiers, | 
|  | reference, pointer, array)) | 
|  | del name_tokens[:] | 
|  |  | 
|  | i = 0 | 
|  | end = len(tokens) | 
|  | while i < end: | 
|  | token = tokens[i] | 
|  | if token.name == '<': | 
|  | new_tokens, new_end = self._GetTemplateEnd(tokens, i+1) | 
|  | AddType(self.ToType(new_tokens)) | 
|  | # If there is a comma after the template, we need to consume | 
|  | # that here otherwise it becomes part of the name. | 
|  | i = new_end | 
|  | reference = pointer = array = False | 
|  | elif token.name == ',': | 
|  | AddType([]) | 
|  | reference = pointer = array = False | 
|  | elif token.name == '*': | 
|  | pointer = True | 
|  | elif token.name == '&': | 
|  | reference = True | 
|  | elif token.name == '[': | 
|  | pointer = True | 
|  | elif token.name == ']': | 
|  | pass | 
|  | else: | 
|  | name_tokens.append(token) | 
|  | i += 1 | 
|  |  | 
|  | if name_tokens: | 
|  | # No '<' in the tokens, just a simple name and no template. | 
|  | AddType([]) | 
|  | return result | 
|  |  | 
|  | def DeclarationToParts(self, parts, needs_name_removed): | 
|  | name = None | 
|  | default = [] | 
|  | if needs_name_removed: | 
|  | # Handle default (initial) values properly. | 
|  | for i, t in enumerate(parts): | 
|  | if t.name == '=': | 
|  | default = parts[i+1:] | 
|  | name = parts[i-1].name | 
|  | if name == ']' and parts[i-2].name == '[': | 
|  | name = parts[i-3].name | 
|  | i -= 1 | 
|  | parts = parts[:i-1] | 
|  | break | 
|  | else: | 
|  | if parts[-1].token_type == tokenize.NAME: | 
|  | name = parts.pop().name | 
|  | else: | 
|  | # TODO(nnorwitz): this is a hack that happens for code like | 
|  | # Register(Foo<T>); where it thinks this is a function call | 
|  | # but it's actually a declaration. | 
|  | name = '???' | 
|  | modifiers = [] | 
|  | type_name = [] | 
|  | other_tokens = [] | 
|  | templated_types = [] | 
|  | i = 0 | 
|  | end = len(parts) | 
|  | while i < end: | 
|  | p = parts[i] | 
|  | if keywords.IsKeyword(p.name): | 
|  | modifiers.append(p.name) | 
|  | elif p.name == '<': | 
|  | templated_tokens, new_end = self._GetTemplateEnd(parts, i+1) | 
|  | templated_types = self.ToType(templated_tokens) | 
|  | i = new_end - 1 | 
|  | # Don't add a spurious :: to data members being initialized. | 
|  | next_index = i + 1 | 
|  | if next_index < end and parts[next_index].name == '::': | 
|  | i += 1 | 
|  | elif p.name in ('[', ']', '='): | 
|  | # These are handled elsewhere. | 
|  | other_tokens.append(p) | 
|  | elif p.name not in ('*', '&', '>'): | 
|  | # Ensure that names have a space between them. | 
|  | if (type_name and type_name[-1].token_type == tokenize.NAME and | 
|  | p.token_type == tokenize.NAME): | 
|  | type_name.append(tokenize.Token(tokenize.SYNTAX, ' ', 0, 0)) | 
|  | type_name.append(p) | 
|  | else: | 
|  | other_tokens.append(p) | 
|  | i += 1 | 
|  | type_name = ''.join([t.name for t in type_name]) | 
|  | return name, type_name, templated_types, modifiers, default, other_tokens | 
|  |  | 
|  | def ToParameters(self, tokens): | 
|  | if not tokens: | 
|  | return [] | 
|  |  | 
|  | result = [] | 
|  | name = type_name = '' | 
|  | type_modifiers = [] | 
|  | pointer = reference = array = False | 
|  | first_token = None | 
|  | default = [] | 
|  |  | 
|  | def AddParameter(end): | 
|  | if default: | 
|  | del default[0]  # Remove flag. | 
|  | parts = self.DeclarationToParts(type_modifiers, True) | 
|  | (name, type_name, templated_types, modifiers, | 
|  | unused_default, unused_other_tokens) = parts | 
|  | parameter_type = Type(first_token.start, first_token.end, | 
|  | type_name, templated_types, modifiers, | 
|  | reference, pointer, array) | 
|  | p = Parameter(first_token.start, end, name, | 
|  | parameter_type, default) | 
|  | result.append(p) | 
|  |  | 
|  | template_count = 0 | 
|  | for s in tokens: | 
|  | if not first_token: | 
|  | first_token = s | 
|  | if s.name == '<': | 
|  | template_count += 1 | 
|  | elif s.name == '>': | 
|  | template_count -= 1 | 
|  | if template_count > 0: | 
|  | type_modifiers.append(s) | 
|  | continue | 
|  |  | 
|  | if s.name == ',': | 
|  | AddParameter(s.start) | 
|  | name = type_name = '' | 
|  | type_modifiers = [] | 
|  | pointer = reference = array = False | 
|  | first_token = None | 
|  | default = [] | 
|  | elif s.name == '*': | 
|  | pointer = True | 
|  | elif s.name == '&': | 
|  | reference = True | 
|  | elif s.name == '[': | 
|  | array = True | 
|  | elif s.name == ']': | 
|  | pass  # Just don't add to type_modifiers. | 
|  | elif s.name == '=': | 
|  | # Got a default value.  Add any value (None) as a flag. | 
|  | default.append(None) | 
|  | elif default: | 
|  | default.append(s) | 
|  | else: | 
|  | type_modifiers.append(s) | 
|  | AddParameter(tokens[-1].end) | 
|  | return result | 
|  |  | 
|  | def CreateReturnType(self, return_type_seq): | 
|  | if not return_type_seq: | 
|  | return None | 
|  | start = return_type_seq[0].start | 
|  | end = return_type_seq[-1].end | 
|  | _, name, templated_types, modifiers, default, other_tokens = \ | 
|  | self.DeclarationToParts(return_type_seq, False) | 
|  | names = [n.name for n in other_tokens] | 
|  | reference = '&' in names | 
|  | pointer = '*' in names | 
|  | array = '[' in names | 
|  | return Type(start, end, name, templated_types, modifiers, | 
|  | reference, pointer, array) | 
|  |  | 
|  | def GetTemplateIndices(self, names): | 
|  | # names is a list of strings. | 
|  | start = names.index('<') | 
|  | end = len(names) - 1 | 
|  | while end > 0: | 
|  | if names[end] == '>': | 
|  | break | 
|  | end -= 1 | 
|  | return start, end+1 | 
|  |  | 
|  | class AstBuilder(object): | 
|  | def __init__(self, token_stream, filename, in_class='', visibility=None, | 
|  | namespace_stack=[]): | 
|  | self.tokens = token_stream | 
|  | self.filename = filename | 
|  | # TODO(nnorwitz): use a better data structure (deque) for the queue. | 
|  | # Switching directions of the "queue" improved perf by about 25%. | 
|  | # Using a deque should be even better since we access from both sides. | 
|  | self.token_queue = [] | 
|  | self.namespace_stack = namespace_stack[:] | 
|  | self.in_class = in_class | 
|  | if in_class is None: | 
|  | self.in_class_name_only = None | 
|  | else: | 
|  | self.in_class_name_only = in_class.split('::')[-1] | 
|  | self.visibility = visibility | 
|  | self.in_function = False | 
|  | self.current_token = None | 
|  | # Keep the state whether we are currently handling a typedef or not. | 
|  | self._handling_typedef = False | 
|  |  | 
|  | self.converter = TypeConverter(self.namespace_stack) | 
|  |  | 
|  | def HandleError(self, msg, token): | 
|  | printable_queue = list(reversed(self.token_queue[-20:])) | 
|  | sys.stderr.write('Got %s in %s @ %s %s\n' % | 
|  | (msg, self.filename, token, printable_queue)) | 
|  |  | 
|  | def Generate(self): | 
|  | while 1: | 
|  | token = self._GetNextToken() | 
|  | if not token: | 
|  | break | 
|  |  | 
|  | # Get the next token. | 
|  | self.current_token = token | 
|  |  | 
|  | # Dispatch on the next token type. | 
|  | if token.token_type == _INTERNAL_TOKEN: | 
|  | if token.name == _NAMESPACE_POP: | 
|  | self.namespace_stack.pop() | 
|  | continue | 
|  |  | 
|  | try: | 
|  | result = self._GenerateOne(token) | 
|  | if result is not None: | 
|  | yield result | 
|  | except: | 
|  | self.HandleError('exception', token) | 
|  | raise | 
|  |  | 
|  | def _CreateVariable(self, pos_token, name, type_name, type_modifiers, | 
|  | ref_pointer_name_seq, templated_types, value=None): | 
|  | reference = '&' in ref_pointer_name_seq | 
|  | pointer = '*' in ref_pointer_name_seq | 
|  | array = '[' in ref_pointer_name_seq | 
|  | var_type = Type(pos_token.start, pos_token.end, type_name, | 
|  | templated_types, type_modifiers, | 
|  | reference, pointer, array) | 
|  | return VariableDeclaration(pos_token.start, pos_token.end, | 
|  | name, var_type, value, self.namespace_stack) | 
|  |  | 
|  | def _GenerateOne(self, token): | 
|  | if token.token_type == tokenize.NAME: | 
|  | if (keywords.IsKeyword(token.name) and | 
|  | not keywords.IsBuiltinType(token.name)): | 
|  | method = getattr(self, 'handle_' + token.name) | 
|  | return method() | 
|  | elif token.name == self.in_class_name_only: | 
|  | # The token name is the same as the class, must be a ctor if | 
|  | # there is a paren.  Otherwise, it's the return type. | 
|  | # Peek ahead to get the next token to figure out which. | 
|  | next = self._GetNextToken() | 
|  | self._AddBackToken(next) | 
|  | if next.token_type == tokenize.SYNTAX and next.name == '(': | 
|  | return self._GetMethod([token], FUNCTION_CTOR, None, True) | 
|  | # Fall through--handle like any other method. | 
|  |  | 
|  | # Handle data or function declaration/definition. | 
|  | syntax = tokenize.SYNTAX | 
|  | temp_tokens, last_token = \ | 
|  | self._GetVarTokensUpTo(syntax, '(', ';', '{', '[') | 
|  | temp_tokens.insert(0, token) | 
|  | if last_token.name == '(': | 
|  | # If there is an assignment before the paren, | 
|  | # this is an expression, not a method. | 
|  | expr = bool([e for e in temp_tokens if e.name == '=']) | 
|  | if expr: | 
|  | new_temp = self._GetTokensUpTo(tokenize.SYNTAX, ';') | 
|  | temp_tokens.append(last_token) | 
|  | temp_tokens.extend(new_temp) | 
|  | last_token = tokenize.Token(tokenize.SYNTAX, ';', 0, 0) | 
|  |  | 
|  | if last_token.name == '[': | 
|  | # Handle array, this isn't a method, unless it's an operator. | 
|  | # TODO(nnorwitz): keep the size somewhere. | 
|  | # unused_size = self._GetTokensUpTo(tokenize.SYNTAX, ']') | 
|  | temp_tokens.append(last_token) | 
|  | if temp_tokens[-2].name == 'operator': | 
|  | temp_tokens.append(self._GetNextToken()) | 
|  | else: | 
|  | temp_tokens2, last_token = \ | 
|  | self._GetVarTokensUpTo(tokenize.SYNTAX, ';') | 
|  | temp_tokens.extend(temp_tokens2) | 
|  |  | 
|  | if last_token.name == ';': | 
|  | # Handle data, this isn't a method. | 
|  | parts = self.converter.DeclarationToParts(temp_tokens, True) | 
|  | (name, type_name, templated_types, modifiers, default, | 
|  | unused_other_tokens) = parts | 
|  |  | 
|  | t0 = temp_tokens[0] | 
|  | names = [t.name for t in temp_tokens] | 
|  | if templated_types: | 
|  | start, end = self.converter.GetTemplateIndices(names) | 
|  | names = names[:start] + names[end:] | 
|  | default = ''.join([t.name for t in default]) | 
|  | return self._CreateVariable(t0, name, type_name, modifiers, | 
|  | names, templated_types, default) | 
|  | if last_token.name == '{': | 
|  | self._AddBackTokens(temp_tokens[1:]) | 
|  | self._AddBackToken(last_token) | 
|  | method_name = temp_tokens[0].name | 
|  | method = getattr(self, 'handle_' + method_name, None) | 
|  | if not method: | 
|  | # Must be declaring a variable. | 
|  | # TODO(nnorwitz): handle the declaration. | 
|  | return None | 
|  | return method() | 
|  | return self._GetMethod(temp_tokens, 0, None, False) | 
|  | elif token.token_type == tokenize.SYNTAX: | 
|  | if token.name == '~' and self.in_class: | 
|  | # Must be a dtor (probably not in method body). | 
|  | token = self._GetNextToken() | 
|  | # self.in_class can contain A::Name, but the dtor will only | 
|  | # be Name.  Make sure to compare against the right value. | 
|  | if (token.token_type == tokenize.NAME and | 
|  | token.name == self.in_class_name_only): | 
|  | return self._GetMethod([token], FUNCTION_DTOR, None, True) | 
|  | # TODO(nnorwitz): handle a lot more syntax. | 
|  | elif token.token_type == tokenize.PREPROCESSOR: | 
|  | # TODO(nnorwitz): handle more preprocessor directives. | 
|  | # token starts with a #, so remove it and strip whitespace. | 
|  | name = token.name[1:].lstrip() | 
|  | if name.startswith('include'): | 
|  | # Remove "include". | 
|  | name = name[7:].strip() | 
|  | assert name | 
|  | # Handle #include \<newline> "header-on-second-line.h". | 
|  | if name.startswith('\\'): | 
|  | name = name[1:].strip() | 
|  | assert name[0] in '<"', token | 
|  | assert name[-1] in '>"', token | 
|  | system = name[0] == '<' | 
|  | filename = name[1:-1] | 
|  | return Include(token.start, token.end, filename, system) | 
|  | if name.startswith('define'): | 
|  | # Remove "define". | 
|  | name = name[6:].strip() | 
|  | assert name | 
|  | value = '' | 
|  | for i, c in enumerate(name): | 
|  | if c.isspace(): | 
|  | value = name[i:].lstrip() | 
|  | name = name[:i] | 
|  | break | 
|  | return Define(token.start, token.end, name, value) | 
|  | if name.startswith('if') and name[2:3].isspace(): | 
|  | condition = name[3:].strip() | 
|  | if condition.startswith('0') or condition.startswith('(0)'): | 
|  | self._SkipIf0Blocks() | 
|  | return None | 
|  |  | 
|  | def _GetTokensUpTo(self, expected_token_type, expected_token): | 
|  | return self._GetVarTokensUpTo(expected_token_type, expected_token)[0] | 
|  |  | 
|  | def _GetVarTokensUpTo(self, expected_token_type, *expected_tokens): | 
|  | last_token = self._GetNextToken() | 
|  | tokens = [] | 
|  | while (last_token.token_type != expected_token_type or | 
|  | last_token.name not in expected_tokens): | 
|  | tokens.append(last_token) | 
|  | last_token = self._GetNextToken() | 
|  | return tokens, last_token | 
|  |  | 
|  | # TODO(nnorwitz): remove _IgnoreUpTo() it shouldn't be necessary. | 
|  | def _IgnoreUpTo(self, token_type, token): | 
|  | unused_tokens = self._GetTokensUpTo(token_type, token) | 
|  |  | 
|  | def _SkipIf0Blocks(self): | 
|  | count = 1 | 
|  | while 1: | 
|  | token = self._GetNextToken() | 
|  | if token.token_type != tokenize.PREPROCESSOR: | 
|  | continue | 
|  |  | 
|  | name = token.name[1:].lstrip() | 
|  | if name.startswith('endif'): | 
|  | count -= 1 | 
|  | if count == 0: | 
|  | break | 
|  | elif name.startswith('if'): | 
|  | count += 1 | 
|  |  | 
|  | def _GetMatchingChar(self, open_paren, close_paren, GetNextToken=None): | 
|  | if GetNextToken is None: | 
|  | GetNextToken = self._GetNextToken | 
|  | # Assumes the current token is open_paren and we will consume | 
|  | # and return up to the close_paren. | 
|  | count = 1 | 
|  | token = GetNextToken() | 
|  | while 1: | 
|  | if token.token_type == tokenize.SYNTAX: | 
|  | if token.name == open_paren: | 
|  | count += 1 | 
|  | elif token.name == close_paren: | 
|  | count -= 1 | 
|  | if count == 0: | 
|  | break | 
|  | yield token | 
|  | token = GetNextToken() | 
|  | yield token | 
|  |  | 
|  | def _GetParameters(self): | 
|  | return self._GetMatchingChar('(', ')') | 
|  |  | 
|  | def GetScope(self): | 
|  | return self._GetMatchingChar('{', '}') | 
|  |  | 
|  | def _GetNextToken(self): | 
|  | if self.token_queue: | 
|  | return self.token_queue.pop() | 
|  | return next(self.tokens) | 
|  |  | 
|  | def _AddBackToken(self, token): | 
|  | if token.whence == tokenize.WHENCE_STREAM: | 
|  | token.whence = tokenize.WHENCE_QUEUE | 
|  | self.token_queue.insert(0, token) | 
|  | else: | 
|  | assert token.whence == tokenize.WHENCE_QUEUE, token | 
|  | self.token_queue.append(token) | 
|  |  | 
|  | def _AddBackTokens(self, tokens): | 
|  | if tokens: | 
|  | if tokens[-1].whence == tokenize.WHENCE_STREAM: | 
|  | for token in tokens: | 
|  | token.whence = tokenize.WHENCE_QUEUE | 
|  | self.token_queue[:0] = reversed(tokens) | 
|  | else: | 
|  | assert tokens[-1].whence == tokenize.WHENCE_QUEUE, tokens | 
|  | self.token_queue.extend(reversed(tokens)) | 
|  |  | 
|  | def GetName(self, seq=None): | 
|  | """Returns ([tokens], next_token_info).""" | 
|  | GetNextToken = self._GetNextToken | 
|  | if seq is not None: | 
|  | it = iter(seq) | 
|  | GetNextToken = lambda: next(it) | 
|  | next_token = GetNextToken() | 
|  | tokens = [] | 
|  | last_token_was_name = False | 
|  | while (next_token.token_type == tokenize.NAME or | 
|  | (next_token.token_type == tokenize.SYNTAX and | 
|  | next_token.name in ('::', '<'))): | 
|  | # Two NAMEs in a row means the identifier should terminate. | 
|  | # It's probably some sort of variable declaration. | 
|  | if last_token_was_name and next_token.token_type == tokenize.NAME: | 
|  | break | 
|  | last_token_was_name = next_token.token_type == tokenize.NAME | 
|  | tokens.append(next_token) | 
|  | # Handle templated names. | 
|  | if next_token.name == '<': | 
|  | tokens.extend(self._GetMatchingChar('<', '>', GetNextToken)) | 
|  | last_token_was_name = True | 
|  | next_token = GetNextToken() | 
|  | return tokens, next_token | 
|  |  | 
|  | def GetMethod(self, modifiers, templated_types): | 
|  | return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(') | 
|  | assert len(return_type_and_name) >= 1 | 
|  | return self._GetMethod(return_type_and_name, modifiers, templated_types, | 
|  | False) | 
|  |  | 
|  | def _GetMethod(self, return_type_and_name, modifiers, templated_types, | 
|  | get_paren): | 
|  | template_portion = None | 
|  | if get_paren: | 
|  | token = self._GetNextToken() | 
|  | assert token.token_type == tokenize.SYNTAX, token | 
|  | if token.name == '<': | 
|  | # Handle templatized dtors. | 
|  | template_portion = [token] | 
|  | template_portion.extend(self._GetMatchingChar('<', '>')) | 
|  | token = self._GetNextToken() | 
|  | assert token.token_type == tokenize.SYNTAX, token | 
|  | assert token.name == '(', token | 
|  |  | 
|  | name = return_type_and_name.pop() | 
|  | # Handle templatized ctors. | 
|  | if name.name == '>': | 
|  | index = 1 | 
|  | while return_type_and_name[index].name != '<': | 
|  | index += 1 | 
|  | template_portion = return_type_and_name[index:] + [name] | 
|  | del return_type_and_name[index:] | 
|  | name = return_type_and_name.pop() | 
|  | elif name.name == ']': | 
|  | rt = return_type_and_name | 
|  | assert rt[-1].name == '[', return_type_and_name | 
|  | assert rt[-2].name == 'operator', return_type_and_name | 
|  | name_seq = return_type_and_name[-2:] | 
|  | del return_type_and_name[-2:] | 
|  | name = tokenize.Token(tokenize.NAME, 'operator[]', | 
|  | name_seq[0].start, name.end) | 
|  | # Get the open paren so _GetParameters() below works. | 
|  | unused_open_paren = self._GetNextToken() | 
|  |  | 
|  | # TODO(nnorwitz): store template_portion. | 
|  | return_type = return_type_and_name | 
|  | indices = name | 
|  | if return_type: | 
|  | indices = return_type[0] | 
|  |  | 
|  | # Force ctor for templatized ctors. | 
|  | if name.name == self.in_class and not modifiers: | 
|  | modifiers |= FUNCTION_CTOR | 
|  | parameters = list(self._GetParameters()) | 
|  | del parameters[-1]              # Remove trailing ')'. | 
|  |  | 
|  | # Handling operator() is especially weird. | 
|  | if name.name == 'operator' and not parameters: | 
|  | token = self._GetNextToken() | 
|  | assert token.name == '(', token | 
|  | parameters = list(self._GetParameters()) | 
|  | del parameters[-1]          # Remove trailing ')'. | 
|  |  | 
|  | token = self._GetNextToken() | 
|  | while token.token_type == tokenize.NAME: | 
|  | modifier_token = token | 
|  | token = self._GetNextToken() | 
|  | if modifier_token.name == 'const': | 
|  | modifiers |= FUNCTION_CONST | 
|  | elif modifier_token.name == '__attribute__': | 
|  | # TODO(nnorwitz): handle more __attribute__ details. | 
|  | modifiers |= FUNCTION_ATTRIBUTE | 
|  | assert token.name == '(', token | 
|  | # Consume everything between the (parens). | 
|  | unused_tokens = list(self._GetMatchingChar('(', ')')) | 
|  | token = self._GetNextToken() | 
|  | elif modifier_token.name == 'throw': | 
|  | modifiers |= FUNCTION_THROW | 
|  | assert token.name == '(', token | 
|  | # Consume everything between the (parens). | 
|  | unused_tokens = list(self._GetMatchingChar('(', ')')) | 
|  | token = self._GetNextToken() | 
|  | elif modifier_token.name == 'override': | 
|  | modifiers |= FUNCTION_OVERRIDE | 
|  | elif modifier_token.name == modifier_token.name.upper(): | 
|  | # HACK(nnorwitz):  assume that all upper-case names | 
|  | # are some macro we aren't expanding. | 
|  | modifiers |= FUNCTION_UNKNOWN_ANNOTATION | 
|  | else: | 
|  | self.HandleError('unexpected token', modifier_token) | 
|  |  | 
|  | assert token.token_type == tokenize.SYNTAX, token | 
|  | # Handle ctor initializers. | 
|  | if token.name == ':': | 
|  | # TODO(nnorwitz): anything else to handle for initializer list? | 
|  | while token.name != ';' and token.name != '{': | 
|  | token = self._GetNextToken() | 
|  |  | 
|  | # Handle pointer to functions that are really data but look | 
|  | # like method declarations. | 
|  | if token.name == '(': | 
|  | if parameters[0].name == '*': | 
|  | # name contains the return type. | 
|  | name = parameters.pop() | 
|  | # parameters contains the name of the data. | 
|  | modifiers = [p.name for p in parameters] | 
|  | # Already at the ( to open the parameter list. | 
|  | function_parameters = list(self._GetMatchingChar('(', ')')) | 
|  | del function_parameters[-1]  # Remove trailing ')'. | 
|  | # TODO(nnorwitz): store the function_parameters. | 
|  | token = self._GetNextToken() | 
|  | assert token.token_type == tokenize.SYNTAX, token | 
|  | assert token.name == ';', token | 
|  | return self._CreateVariable(indices, name.name, indices.name, | 
|  | modifiers, '', None) | 
|  | # At this point, we got something like: | 
|  | #  return_type (type::*name_)(params); | 
|  | # This is a data member called name_ that is a function pointer. | 
|  | # With this code: void (sq_type::*field_)(string&); | 
|  | # We get: name=void return_type=[] parameters=sq_type ... field_ | 
|  | # TODO(nnorwitz): is return_type always empty? | 
|  | # TODO(nnorwitz): this isn't even close to being correct. | 
|  | # Just put in something so we don't crash and can move on. | 
|  | real_name = parameters[-1] | 
|  | modifiers = [p.name for p in self._GetParameters()] | 
|  | del modifiers[-1]           # Remove trailing ')'. | 
|  | return self._CreateVariable(indices, real_name.name, indices.name, | 
|  | modifiers, '', None) | 
|  |  | 
|  | if token.name == '{': | 
|  | body = list(self.GetScope()) | 
|  | del body[-1]                # Remove trailing '}'. | 
|  | else: | 
|  | body = None | 
|  | if token.name == '=': | 
|  | token = self._GetNextToken() | 
|  |  | 
|  | if token.name == 'default' or token.name == 'delete': | 
|  | # Ignore explicitly defaulted and deleted special members | 
|  | # in C++11. | 
|  | token = self._GetNextToken() | 
|  | else: | 
|  | # Handle pure-virtual declarations. | 
|  | assert token.token_type == tokenize.CONSTANT, token | 
|  | assert token.name == '0', token | 
|  | modifiers |= FUNCTION_PURE_VIRTUAL | 
|  | token = self._GetNextToken() | 
|  |  | 
|  | if token.name == '[': | 
|  | # TODO(nnorwitz): store tokens and improve parsing. | 
|  | # template <typename T, size_t N> char (&ASH(T (&seq)[N]))[N]; | 
|  | tokens = list(self._GetMatchingChar('[', ']')) | 
|  | token = self._GetNextToken() | 
|  |  | 
|  | assert token.name == ';', (token, return_type_and_name, parameters) | 
|  |  | 
|  | # Looks like we got a method, not a function. | 
|  | if len(return_type) > 2 and return_type[-1].name == '::': | 
|  | return_type, in_class = \ | 
|  | self._GetReturnTypeAndClassName(return_type) | 
|  | return Method(indices.start, indices.end, name.name, in_class, | 
|  | return_type, parameters, modifiers, templated_types, | 
|  | body, self.namespace_stack) | 
|  | return Function(indices.start, indices.end, name.name, return_type, | 
|  | parameters, modifiers, templated_types, body, | 
|  | self.namespace_stack) | 
|  |  | 
|  | def _GetReturnTypeAndClassName(self, token_seq): | 
|  | # Splitting the return type from the class name in a method | 
|  | # can be tricky.  For example, Return::Type::Is::Hard::To::Find(). | 
|  | # Where is the return type and where is the class name? | 
|  | # The heuristic used is to pull the last name as the class name. | 
|  | # This includes all the templated type info. | 
|  | # TODO(nnorwitz): if there is only One name like in the | 
|  | # example above, punt and assume the last bit is the class name. | 
|  |  | 
|  | # Ignore a :: prefix, if exists so we can find the first real name. | 
|  | i = 0 | 
|  | if token_seq[0].name == '::': | 
|  | i = 1 | 
|  | # Ignore a :: suffix, if exists. | 
|  | end = len(token_seq) - 1 | 
|  | if token_seq[end-1].name == '::': | 
|  | end -= 1 | 
|  |  | 
|  | # Make a copy of the sequence so we can append a sentinel | 
|  | # value. This is required for GetName will has to have some | 
|  | # terminating condition beyond the last name. | 
|  | seq_copy = token_seq[i:end] | 
|  | seq_copy.append(tokenize.Token(tokenize.SYNTAX, '', 0, 0)) | 
|  | names = [] | 
|  | while i < end: | 
|  | # Iterate through the sequence parsing out each name. | 
|  | new_name, next = self.GetName(seq_copy[i:]) | 
|  | assert new_name, 'Got empty new_name, next=%s' % next | 
|  | # We got a pointer or ref.  Add it to the name. | 
|  | if next and next.token_type == tokenize.SYNTAX: | 
|  | new_name.append(next) | 
|  | names.append(new_name) | 
|  | i += len(new_name) | 
|  |  | 
|  | # Now that we have the names, it's time to undo what we did. | 
|  |  | 
|  | # Remove the sentinel value. | 
|  | names[-1].pop() | 
|  | # Flatten the token sequence for the return type. | 
|  | return_type = [e for seq in names[:-1] for e in seq] | 
|  | # The class name is the last name. | 
|  | class_name = names[-1] | 
|  | return return_type, class_name | 
|  |  | 
|  | def handle_bool(self): | 
|  | pass | 
|  |  | 
|  | def handle_char(self): | 
|  | pass | 
|  |  | 
|  | def handle_int(self): | 
|  | pass | 
|  |  | 
|  | def handle_long(self): | 
|  | pass | 
|  |  | 
|  | def handle_short(self): | 
|  | pass | 
|  |  | 
|  | def handle_double(self): | 
|  | pass | 
|  |  | 
|  | def handle_float(self): | 
|  | pass | 
|  |  | 
|  | def handle_void(self): | 
|  | pass | 
|  |  | 
|  | def handle_wchar_t(self): | 
|  | pass | 
|  |  | 
|  | def handle_unsigned(self): | 
|  | pass | 
|  |  | 
|  | def handle_signed(self): | 
|  | pass | 
|  |  | 
|  | def _GetNestedType(self, ctor): | 
|  | name = None | 
|  | name_tokens, token = self.GetName() | 
|  | if name_tokens: | 
|  | name = ''.join([t.name for t in name_tokens]) | 
|  |  | 
|  | # Handle forward declarations. | 
|  | if token.token_type == tokenize.SYNTAX and token.name == ';': | 
|  | return ctor(token.start, token.end, name, None, | 
|  | self.namespace_stack) | 
|  |  | 
|  | if token.token_type == tokenize.NAME and self._handling_typedef: | 
|  | self._AddBackToken(token) | 
|  | return ctor(token.start, token.end, name, None, | 
|  | self.namespace_stack) | 
|  |  | 
|  | # Must be the type declaration. | 
|  | fields = list(self._GetMatchingChar('{', '}')) | 
|  | del fields[-1]                  # Remove trailing '}'. | 
|  | if token.token_type == tokenize.SYNTAX and token.name == '{': | 
|  | next = self._GetNextToken() | 
|  | new_type = ctor(token.start, token.end, name, fields, | 
|  | self.namespace_stack) | 
|  | # A name means this is an anonymous type and the name | 
|  | # is the variable declaration. | 
|  | if next.token_type != tokenize.NAME: | 
|  | return new_type | 
|  | name = new_type | 
|  | token = next | 
|  |  | 
|  | # Must be variable declaration using the type prefixed with keyword. | 
|  | assert token.token_type == tokenize.NAME, token | 
|  | return self._CreateVariable(token, token.name, name, [], '', None) | 
|  |  | 
|  | def handle_struct(self): | 
|  | # Special case the handling typedef/aliasing of structs here. | 
|  | # It would be a pain to handle in the class code. | 
|  | name_tokens, var_token = self.GetName() | 
|  | if name_tokens: | 
|  | next_token = self._GetNextToken() | 
|  | is_syntax = (var_token.token_type == tokenize.SYNTAX and | 
|  | var_token.name[0] in '*&') | 
|  | is_variable = (var_token.token_type == tokenize.NAME and | 
|  | next_token.name == ';') | 
|  | variable = var_token | 
|  | if is_syntax and not is_variable: | 
|  | variable = next_token | 
|  | temp = self._GetNextToken() | 
|  | if temp.token_type == tokenize.SYNTAX and temp.name == '(': | 
|  | # Handle methods declared to return a struct. | 
|  | t0 = name_tokens[0] | 
|  | struct = tokenize.Token(tokenize.NAME, 'struct', | 
|  | t0.start-7, t0.start-2) | 
|  | type_and_name = [struct] | 
|  | type_and_name.extend(name_tokens) | 
|  | type_and_name.extend((var_token, next_token)) | 
|  | return self._GetMethod(type_and_name, 0, None, False) | 
|  | assert temp.name == ';', (temp, name_tokens, var_token) | 
|  | if is_syntax or (is_variable and not self._handling_typedef): | 
|  | modifiers = ['struct'] | 
|  | type_name = ''.join([t.name for t in name_tokens]) | 
|  | position = name_tokens[0] | 
|  | return self._CreateVariable(position, variable.name, type_name, | 
|  | modifiers, var_token.name, None) | 
|  | name_tokens.extend((var_token, next_token)) | 
|  | self._AddBackTokens(name_tokens) | 
|  | else: | 
|  | self._AddBackToken(var_token) | 
|  | return self._GetClass(Struct, VISIBILITY_PUBLIC, None) | 
|  |  | 
|  | def handle_union(self): | 
|  | return self._GetNestedType(Union) | 
|  |  | 
|  | def handle_enum(self): | 
|  | return self._GetNestedType(Enum) | 
|  |  | 
|  | def handle_auto(self): | 
|  | # TODO(nnorwitz): warn about using auto?  Probably not since it | 
|  | # will be reclaimed and useful for C++0x. | 
|  | pass | 
|  |  | 
|  | def handle_register(self): | 
|  | pass | 
|  |  | 
|  | def handle_const(self): | 
|  | pass | 
|  |  | 
|  | def handle_inline(self): | 
|  | pass | 
|  |  | 
|  | def handle_extern(self): | 
|  | pass | 
|  |  | 
|  | def handle_static(self): | 
|  | pass | 
|  |  | 
|  | def handle_virtual(self): | 
|  | # What follows must be a method. | 
|  | token = token2 = self._GetNextToken() | 
|  | if token.name == 'inline': | 
|  | # HACK(nnorwitz): handle inline dtors by ignoring 'inline'. | 
|  | token2 = self._GetNextToken() | 
|  | if token2.token_type == tokenize.SYNTAX and token2.name == '~': | 
|  | return self.GetMethod(FUNCTION_VIRTUAL + FUNCTION_DTOR, None) | 
|  | assert token.token_type == tokenize.NAME or token.name == '::', token | 
|  | return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(')  # ) | 
|  | return_type_and_name.insert(0, token) | 
|  | if token2 is not token: | 
|  | return_type_and_name.insert(1, token2) | 
|  | return self._GetMethod(return_type_and_name, FUNCTION_VIRTUAL, | 
|  | None, False) | 
|  |  | 
|  | def handle_volatile(self): | 
|  | pass | 
|  |  | 
|  | def handle_mutable(self): | 
|  | pass | 
|  |  | 
|  | def handle_public(self): | 
|  | assert self.in_class | 
|  | self.visibility = VISIBILITY_PUBLIC | 
|  |  | 
|  | def handle_protected(self): | 
|  | assert self.in_class | 
|  | self.visibility = VISIBILITY_PROTECTED | 
|  |  | 
|  | def handle_private(self): | 
|  | assert self.in_class | 
|  | self.visibility = VISIBILITY_PRIVATE | 
|  |  | 
|  | def handle_friend(self): | 
|  | tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') | 
|  | assert tokens | 
|  | t0 = tokens[0] | 
|  | return Friend(t0.start, t0.end, tokens, self.namespace_stack) | 
|  |  | 
|  | def handle_static_cast(self): | 
|  | pass | 
|  |  | 
|  | def handle_const_cast(self): | 
|  | pass | 
|  |  | 
|  | def handle_dynamic_cast(self): | 
|  | pass | 
|  |  | 
|  | def handle_reinterpret_cast(self): | 
|  | pass | 
|  |  | 
|  | def handle_new(self): | 
|  | pass | 
|  |  | 
|  | def handle_delete(self): | 
|  | tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') | 
|  | assert tokens | 
|  | return Delete(tokens[0].start, tokens[0].end, tokens) | 
|  |  | 
|  | def handle_typedef(self): | 
|  | token = self._GetNextToken() | 
|  | if (token.token_type == tokenize.NAME and | 
|  | keywords.IsKeyword(token.name)): | 
|  | # Token must be struct/enum/union/class. | 
|  | method = getattr(self, 'handle_' + token.name) | 
|  | self._handling_typedef = True | 
|  | tokens = [method()] | 
|  | self._handling_typedef = False | 
|  | else: | 
|  | tokens = [token] | 
|  |  | 
|  | # Get the remainder of the typedef up to the semi-colon. | 
|  | tokens.extend(self._GetTokensUpTo(tokenize.SYNTAX, ';')) | 
|  |  | 
|  | # TODO(nnorwitz): clean all this up. | 
|  | assert tokens | 
|  | name = tokens.pop() | 
|  | indices = name | 
|  | if tokens: | 
|  | indices = tokens[0] | 
|  | if not indices: | 
|  | indices = token | 
|  | if name.name == ')': | 
|  | # HACK(nnorwitz): Handle pointers to functions "properly". | 
|  | if (len(tokens) >= 4 and | 
|  | tokens[1].name == '(' and tokens[2].name == '*'): | 
|  | tokens.append(name) | 
|  | name = tokens[3] | 
|  | elif name.name == ']': | 
|  | # HACK(nnorwitz): Handle arrays properly. | 
|  | if len(tokens) >= 2: | 
|  | tokens.append(name) | 
|  | name = tokens[1] | 
|  | new_type = tokens | 
|  | if tokens and isinstance(tokens[0], tokenize.Token): | 
|  | new_type = self.converter.ToType(tokens)[0] | 
|  | return Typedef(indices.start, indices.end, name.name, | 
|  | new_type, self.namespace_stack) | 
|  |  | 
|  | def handle_typeid(self): | 
|  | pass  # Not needed yet. | 
|  |  | 
|  | def handle_typename(self): | 
|  | pass  # Not needed yet. | 
|  |  | 
|  | def _GetTemplatedTypes(self): | 
|  | result = {} | 
|  | tokens = list(self._GetMatchingChar('<', '>')) | 
|  | len_tokens = len(tokens) - 1    # Ignore trailing '>'. | 
|  | i = 0 | 
|  | while i < len_tokens: | 
|  | key = tokens[i].name | 
|  | i += 1 | 
|  | if keywords.IsKeyword(key) or key == ',': | 
|  | continue | 
|  | type_name = default = None | 
|  | if i < len_tokens: | 
|  | i += 1 | 
|  | if tokens[i-1].name == '=': | 
|  | assert i < len_tokens, '%s %s' % (i, tokens) | 
|  | default, unused_next_token = self.GetName(tokens[i:]) | 
|  | i += len(default) | 
|  | else: | 
|  | if tokens[i-1].name != ',': | 
|  | # We got something like: Type variable. | 
|  | # Re-adjust the key (variable) and type_name (Type). | 
|  | key = tokens[i-1].name | 
|  | type_name = tokens[i-2] | 
|  |  | 
|  | result[key] = (type_name, default) | 
|  | return result | 
|  |  | 
|  | def handle_template(self): | 
|  | token = self._GetNextToken() | 
|  | assert token.token_type == tokenize.SYNTAX, token | 
|  | assert token.name == '<', token | 
|  | templated_types = self._GetTemplatedTypes() | 
|  | # TODO(nnorwitz): for now, just ignore the template params. | 
|  | token = self._GetNextToken() | 
|  | if token.token_type == tokenize.NAME: | 
|  | if token.name == 'class': | 
|  | return self._GetClass(Class, VISIBILITY_PRIVATE, templated_types) | 
|  | elif token.name == 'struct': | 
|  | return self._GetClass(Struct, VISIBILITY_PUBLIC, templated_types) | 
|  | elif token.name == 'friend': | 
|  | return self.handle_friend() | 
|  | self._AddBackToken(token) | 
|  | tokens, last = self._GetVarTokensUpTo(tokenize.SYNTAX, '(', ';') | 
|  | tokens.append(last) | 
|  | self._AddBackTokens(tokens) | 
|  | if last.name == '(': | 
|  | return self.GetMethod(FUNCTION_NONE, templated_types) | 
|  | # Must be a variable definition. | 
|  | return None | 
|  |  | 
|  | def handle_true(self): | 
|  | pass  # Nothing to do. | 
|  |  | 
|  | def handle_false(self): | 
|  | pass  # Nothing to do. | 
|  |  | 
|  | def handle_asm(self): | 
|  | pass  # Not needed yet. | 
|  |  | 
|  | def handle_class(self): | 
|  | return self._GetClass(Class, VISIBILITY_PRIVATE, None) | 
|  |  | 
|  | def _GetBases(self): | 
|  | # Get base classes. | 
|  | bases = [] | 
|  | while 1: | 
|  | token = self._GetNextToken() | 
|  | assert token.token_type == tokenize.NAME, token | 
|  | # TODO(nnorwitz): store kind of inheritance...maybe. | 
|  | if token.name not in ('public', 'protected', 'private'): | 
|  | # If inheritance type is not specified, it is private. | 
|  | # Just put the token back so we can form a name. | 
|  | # TODO(nnorwitz): it would be good to warn about this. | 
|  | self._AddBackToken(token) | 
|  | else: | 
|  | # Check for virtual inheritance. | 
|  | token = self._GetNextToken() | 
|  | if token.name != 'virtual': | 
|  | self._AddBackToken(token) | 
|  | else: | 
|  | # TODO(nnorwitz): store that we got virtual for this base. | 
|  | pass | 
|  | base, next_token = self.GetName() | 
|  | bases_ast = self.converter.ToType(base) | 
|  | assert len(bases_ast) == 1, bases_ast | 
|  | bases.append(bases_ast[0]) | 
|  | assert next_token.token_type == tokenize.SYNTAX, next_token | 
|  | if next_token.name == '{': | 
|  | token = next_token | 
|  | break | 
|  | # Support multiple inheritance. | 
|  | assert next_token.name == ',', next_token | 
|  | return bases, token | 
|  |  | 
|  | def _GetClass(self, class_type, visibility, templated_types): | 
|  | class_name = None | 
|  | class_token = self._GetNextToken() | 
|  | if class_token.token_type != tokenize.NAME: | 
|  | assert class_token.token_type == tokenize.SYNTAX, class_token | 
|  | token = class_token | 
|  | else: | 
|  | # Skip any macro (e.g. storage class specifiers) after the | 
|  | # 'class' keyword. | 
|  | next_token = self._GetNextToken() | 
|  | if next_token.token_type == tokenize.NAME: | 
|  | self._AddBackToken(next_token) | 
|  | else: | 
|  | self._AddBackTokens([class_token, next_token]) | 
|  | name_tokens, token = self.GetName() | 
|  | class_name = ''.join([t.name for t in name_tokens]) | 
|  | bases = None | 
|  | if token.token_type == tokenize.SYNTAX: | 
|  | if token.name == ';': | 
|  | # Forward declaration. | 
|  | return class_type(class_token.start, class_token.end, | 
|  | class_name, None, templated_types, None, | 
|  | self.namespace_stack) | 
|  | if token.name in '*&': | 
|  | # Inline forward declaration.  Could be method or data. | 
|  | name_token = self._GetNextToken() | 
|  | next_token = self._GetNextToken() | 
|  | if next_token.name == ';': | 
|  | # Handle data | 
|  | modifiers = ['class'] | 
|  | return self._CreateVariable(class_token, name_token.name, | 
|  | class_name, | 
|  | modifiers, token.name, None) | 
|  | else: | 
|  | # Assume this is a method. | 
|  | tokens = (class_token, token, name_token, next_token) | 
|  | self._AddBackTokens(tokens) | 
|  | return self.GetMethod(FUNCTION_NONE, None) | 
|  | if token.name == ':': | 
|  | bases, token = self._GetBases() | 
|  |  | 
|  | body = None | 
|  | if token.token_type == tokenize.SYNTAX and token.name == '{': | 
|  | assert token.token_type == tokenize.SYNTAX, token | 
|  | assert token.name == '{', token | 
|  |  | 
|  | ast = AstBuilder(self.GetScope(), self.filename, class_name, | 
|  | visibility, self.namespace_stack) | 
|  | body = list(ast.Generate()) | 
|  |  | 
|  | if not self._handling_typedef: | 
|  | token = self._GetNextToken() | 
|  | if token.token_type != tokenize.NAME: | 
|  | assert token.token_type == tokenize.SYNTAX, token | 
|  | assert token.name == ';', token | 
|  | else: | 
|  | new_class = class_type(class_token.start, class_token.end, | 
|  | class_name, bases, None, | 
|  | body, self.namespace_stack) | 
|  |  | 
|  | modifiers = [] | 
|  | return self._CreateVariable(class_token, | 
|  | token.name, new_class, | 
|  | modifiers, token.name, None) | 
|  | else: | 
|  | if not self._handling_typedef: | 
|  | self.HandleError('non-typedef token', token) | 
|  | self._AddBackToken(token) | 
|  |  | 
|  | return class_type(class_token.start, class_token.end, class_name, | 
|  | bases, templated_types, body, self.namespace_stack) | 
|  |  | 
|  | def handle_namespace(self): | 
|  | token = self._GetNextToken() | 
|  | # Support anonymous namespaces. | 
|  | name = None | 
|  | if token.token_type == tokenize.NAME: | 
|  | name = token.name | 
|  | token = self._GetNextToken() | 
|  | self.namespace_stack.append(name) | 
|  | assert token.token_type == tokenize.SYNTAX, token | 
|  | # Create an internal token that denotes when the namespace is complete. | 
|  | internal_token = tokenize.Token(_INTERNAL_TOKEN, _NAMESPACE_POP, | 
|  | None, None) | 
|  | internal_token.whence = token.whence | 
|  | if token.name == '=': | 
|  | # TODO(nnorwitz): handle aliasing namespaces. | 
|  | name, next_token = self.GetName() | 
|  | assert next_token.name == ';', next_token | 
|  | self._AddBackToken(internal_token) | 
|  | else: | 
|  | assert token.name == '{', token | 
|  | tokens = list(self.GetScope()) | 
|  | # Replace the trailing } with the internal namespace pop token. | 
|  | tokens[-1] = internal_token | 
|  | # Handle namespace with nothing in it. | 
|  | self._AddBackTokens(tokens) | 
|  | return None | 
|  |  | 
|  | def handle_using(self): | 
|  | tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') | 
|  | assert tokens | 
|  | return Using(tokens[0].start, tokens[0].end, tokens) | 
|  |  | 
|  | def handle_explicit(self): | 
|  | assert self.in_class | 
|  | # Nothing much to do. | 
|  | # TODO(nnorwitz): maybe verify the method name == class name. | 
|  | # This must be a ctor. | 
|  | return self.GetMethod(FUNCTION_CTOR, None) | 
|  |  | 
|  | def handle_this(self): | 
|  | pass  # Nothing to do. | 
|  |  | 
|  | def handle_operator(self): | 
|  | # Pull off the next token(s?) and make that part of the method name. | 
|  | pass | 
|  |  | 
|  | def handle_sizeof(self): | 
|  | pass | 
|  |  | 
|  | def handle_case(self): | 
|  | pass | 
|  |  | 
|  | def handle_switch(self): | 
|  | pass | 
|  |  | 
|  | def handle_default(self): | 
|  | token = self._GetNextToken() | 
|  | assert token.token_type == tokenize.SYNTAX | 
|  | assert token.name == ':' | 
|  |  | 
|  | def handle_if(self): | 
|  | pass | 
|  |  | 
|  | def handle_else(self): | 
|  | pass | 
|  |  | 
|  | def handle_return(self): | 
|  | tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') | 
|  | if not tokens: | 
|  | return Return(self.current_token.start, self.current_token.end, None) | 
|  | return Return(tokens[0].start, tokens[0].end, tokens) | 
|  |  | 
|  | def handle_goto(self): | 
|  | tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') | 
|  | assert len(tokens) == 1, str(tokens) | 
|  | return Goto(tokens[0].start, tokens[0].end, tokens[0].name) | 
|  |  | 
|  | def handle_try(self): | 
|  | pass  # Not needed yet. | 
|  |  | 
|  | def handle_catch(self): | 
|  | pass  # Not needed yet. | 
|  |  | 
|  | def handle_throw(self): | 
|  | pass  # Not needed yet. | 
|  |  | 
|  | def handle_while(self): | 
|  | pass | 
|  |  | 
|  | def handle_do(self): | 
|  | pass | 
|  |  | 
|  | def handle_for(self): | 
|  | pass | 
|  |  | 
|  | def handle_break(self): | 
|  | self._IgnoreUpTo(tokenize.SYNTAX, ';') | 
|  |  | 
|  | def handle_continue(self): | 
|  | self._IgnoreUpTo(tokenize.SYNTAX, ';') | 
|  |  | 
|  |  | 
|  | def BuilderFromSource(source, filename): | 
|  | """Utility method that returns an AstBuilder from source code. | 
|  |  | 
|  | Args: | 
|  | source: 'C++ source code' | 
|  | filename: 'file1' | 
|  |  | 
|  | Returns: | 
|  | AstBuilder | 
|  | """ | 
|  | return AstBuilder(tokenize.GetTokens(source), filename) | 
|  |  | 
|  |  | 
|  | def PrintIndentifiers(filename, should_print): | 
|  | """Prints all identifiers for a C++ source file. | 
|  |  | 
|  | Args: | 
|  | filename: 'file1' | 
|  | should_print: predicate with signature: bool Function(token) | 
|  | """ | 
|  | source = utils.ReadFile(filename, False) | 
|  | if source is None: | 
|  | sys.stderr.write('Unable to find: %s\n' % filename) | 
|  | return | 
|  |  | 
|  | #print('Processing %s' % actual_filename) | 
|  | builder = BuilderFromSource(source, filename) | 
|  | try: | 
|  | for node in builder.Generate(): | 
|  | if should_print(node): | 
|  | print(node.name) | 
|  | except KeyboardInterrupt: | 
|  | return | 
|  | except: | 
|  | pass | 
|  |  | 
|  |  | 
|  | def PrintAllIndentifiers(filenames, should_print): | 
|  | """Prints all identifiers for each C++ source file in filenames. | 
|  |  | 
|  | Args: | 
|  | filenames: ['file1', 'file2', ...] | 
|  | should_print: predicate with signature: bool Function(token) | 
|  | """ | 
|  | for path in filenames: | 
|  | PrintIndentifiers(path, should_print) | 
|  |  | 
|  |  | 
|  | def main(argv): | 
|  | for filename in argv[1:]: | 
|  | source = utils.ReadFile(filename) | 
|  | if source is None: | 
|  | continue | 
|  |  | 
|  | print('Processing %s' % filename) | 
|  | builder = BuilderFromSource(source, filename) | 
|  | try: | 
|  | entire_ast = filter(None, builder.Generate()) | 
|  | except KeyboardInterrupt: | 
|  | return | 
|  | except: | 
|  | # Already printed a warning, print the traceback and continue. | 
|  | traceback.print_exc() | 
|  | else: | 
|  | if utils.DEBUG: | 
|  | for ast in entire_ast: | 
|  | print(ast) | 
|  |  | 
|  |  | 
|  | if __name__ == '__main__': | 
|  | main(sys.argv) |