1 files changed, 0 insertions, 463 deletions
diff --git a/tools/closure_linter/closure_linter/javascripttokenizer.py b/tools/closure_linter/closure_linter/javascripttokenizer.py
deleted file mode 100755
index 2ee5b81ee1..0000000000
--- a/tools/closure_linter/closure_linter/javascripttokenizer.py
+++ /dev/null
@@ -1,463 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2007 The Closure Linter Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS-IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Regular expression based JavaScript parsing classes."""
-
-__author__ = ('robbyw@google.com (Robert Walker)',
-              'ajp@google.com (Andy Perelson)')
-
-import copy
-import re
-
-from closure_linter import javascripttokens
-from closure_linter.common import matcher
-from closure_linter.common import tokenizer
-
-# Shorthand
-Type = javascripttokens.JavaScriptTokenType
-Matcher = matcher.Matcher
-
-
-class JavaScriptModes(object):
-  """Enumeration of the different matcher modes used for JavaScript."""
-  TEXT_MODE = 'text'
-  SINGLE_QUOTE_STRING_MODE = 'single_quote_string'
-  DOUBLE_QUOTE_STRING_MODE = 'double_quote_string'
-  BLOCK_COMMENT_MODE = 'block_comment'
-  DOC_COMMENT_MODE = 'doc_comment'
-  DOC_COMMENT_LEX_SPACES_MODE = 'doc_comment_spaces'
-  LINE_COMMENT_MODE = 'line_comment'
-  PARAMETER_MODE = 'parameter'
-  FUNCTION_MODE = 'function'
-
-
-class JavaScriptTokenizer(tokenizer.Tokenizer):
-  """JavaScript tokenizer.
-
-  Convert JavaScript code in to an array of tokens.
-  """
-
-  # Useful patterns for JavaScript parsing.
-  IDENTIFIER_CHAR = r'A-Za-z0-9_$'
-
-  # Number patterns based on:
-  # http://www.mozilla.org/js/language/js20-2000-07/formal/lexer-grammar.html
-  MANTISSA = r"""
-             (\d+(?!\.)) |                # Matches '10'
-             (\d+\.(?!\d)) |              # Matches '10.'
-             (\d*\.\d+)                   # Matches '.5' or '10.5'
-             """
-  DECIMAL_LITERAL = r'(%s)([eE][-+]?\d+)?' % MANTISSA
-  HEX_LITERAL = r'0[xX][0-9a-fA-F]+'
-  NUMBER = re.compile(r"""
-                      ((%s)|(%s))
-                      """ % (HEX_LITERAL, DECIMAL_LITERAL), re.VERBOSE)
-
-  # Strings come in three parts - first we match the start of the string, then
-  # the contents, then the end.  The contents consist of any character except a
-  # backslash or end of string, or a backslash followed by any character, or a
-  # backslash followed by end of line to support correct parsing of multi-line
-  # strings.
-  SINGLE_QUOTE = re.compile(r"'")
-  SINGLE_QUOTE_TEXT = re.compile(r"([^'\\]|\\(.|$))+")
-  DOUBLE_QUOTE = re.compile(r'"')
-  DOUBLE_QUOTE_TEXT = re.compile(r'([^"\\]|\\(.|$))+')
-
-  START_SINGLE_LINE_COMMENT = re.compile(r'//')
-  END_OF_LINE_SINGLE_LINE_COMMENT = re.compile(r'//$')
-
-  START_DOC_COMMENT = re.compile(r'/\*\*')
-  START_BLOCK_COMMENT = re.compile(r'/\*')
-  END_BLOCK_COMMENT = re.compile(r'\*/')
-  BLOCK_COMMENT_TEXT = re.compile(r'([^*]|\*(?!/))+')
-
-  # Comment text is anything that we are not going to parse into another special
-  # token like (inline) flags or end comments. Complicated regex to match
-  # most normal characters, and '*', '{', '}', and '@' when we are sure that
-  # it is safe. Expression [^*{\s]@ must come first, or the other options will
-  # match everything before @, and we won't match @'s that aren't part of flags
-  # like in email addresses in the @author tag.
-  DOC_COMMENT_TEXT = re.compile(r'([^*{}\s]@|[^*{}@]|\*(?!/))+')
-  DOC_COMMENT_NO_SPACES_TEXT = re.compile(r'([^*{}\s]@|[^*{}@\s]|\*(?!/))+')
-  # Match anything that is allowed in a type definition, except for tokens
-  # needed to parse it (and the lookahead assertion for "*/").
-  DOC_COMMENT_TYPE_TEXT = re.compile(r'([^*|!?=<>(){}:,\s]|\*(?!/))+')
-
-  # Match the prefix ' * ' that starts every line of jsdoc. Want to include
-  # spaces after the '*', but nothing else that occurs after a '*', and don't
-  # want to match the '*' in '*/'.
-  DOC_PREFIX = re.compile(r'\s*\*(\s+|(?!/))')
-
-  START_BLOCK = re.compile('{')
-  END_BLOCK = re.compile('}')
-
-  REGEX_CHARACTER_CLASS = r"""
-                          \[               # Opening bracket
-                          ([^\]\\]|\\.)*   # Anything but a ] or \,
-                                           # or a backslash followed by anything
-                          \]               # Closing bracket
-                          """
-  # We ensure the regex is followed by one of the above tokens to avoid
-  # incorrectly parsing something like x / y / z as x REGEX(/ y /) z
-  POST_REGEX_LIST = [
-      ';', ',', r'\.', r'\)', r'\]', '$', r'\/\/', r'\/\*', ':', '}']
-
-  REGEX = re.compile(r"""
-                     /                      # opening slash
-                     (?!\*)                 # not the start of a comment
-                     (\\.|[^\[\/\\]|(%s))*  # a backslash followed by anything,
-                                            # or anything but a / or [ or \,
-                                            # or a character class
-                     /                      # closing slash
-                     [gimsx]*               # optional modifiers
-                     (?=\s*(%s))
-                     """ % (REGEX_CHARACTER_CLASS, '|'.join(POST_REGEX_LIST)),
-                     re.VERBOSE)
-
-  ANYTHING = re.compile(r'.*')
-  PARAMETERS = re.compile(r'[^\)]+')
-  CLOSING_PAREN_WITH_SPACE = re.compile(r'\)\s*')
-
-  FUNCTION_DECLARATION = re.compile(r'\bfunction\b')
-
-  OPENING_PAREN = re.compile(r'\(')
-  CLOSING_PAREN = re.compile(r'\)')
-
-  OPENING_BRACKET = re.compile(r'\[')
-  CLOSING_BRACKET = re.compile(r'\]')
-
-  # We omit these JS keywords from the list:
-  #   function - covered by FUNCTION_DECLARATION.
-  #   delete, in, instanceof, new, typeof - included as operators.
-  #   this - included in identifiers.
-  #   null, undefined - not included, should go in some "special constant" list.
-  KEYWORD_LIST = [
-      'break',
-      'case',
-      'catch',
-      'continue',
-      'default',
-      'do',
-      'else',
-      'finally',
-      'for',
-      'if',
-      'return',
-      'switch',
-      'throw',
-      'try',
-      'var',
-      'while',
-      'with',
-  ]
-
-  # List of regular expressions to match as operators.  Some notes: for our
-  # purposes, the comma behaves similarly enough to a normal operator that we
-  # include it here.  r'\bin\b' actually matches 'in' surrounded by boundary
-  # characters - this may not match some very esoteric uses of the in operator.
-  # Operators that are subsets of larger operators must come later in this list
-  # for proper matching, e.g., '>>' must come AFTER '>>>'.
-  OPERATOR_LIST = [
-      ',',
-      r'\+\+',
-      '===',
-      '!==',
-      '>>>=',
-      '>>>',
-      '==',
-      '>=',
-      '<=',
-      '!=',
-      '<<=',
-      '>>=',
-      '<<',
-      '>>',
-      '=>',
-      '>',
-      '<',
-      r'\+=',
-      r'\+',
-      '--',
-      r'\^=',
-      '-=',
-      '-',
-      '/=',
-      '/',
-      r'\*=',
-      r'\*',
-      '%=',
-      '%',
-      '&&',
-      r'\|\|',
-      '&=',
-      '&',
-      r'\|=',
-      r'\|',
-      '=',
-      '!',
-      ':',
-      r'\?',
-      r'\^',
-      r'\bdelete\b',
-      r'\bin\b',
-      r'\binstanceof\b',
-      r'\bnew\b',
-      r'\btypeof\b',
-      r'\bvoid\b',
-      r'\.',
-  ]
-  OPERATOR = re.compile('|'.join(OPERATOR_LIST))
-
-  WHITESPACE = re.compile(r'\s+')
-  SEMICOLON = re.compile(r';')
-  # Technically JavaScript identifiers can't contain '.', but we treat a set of
-  # nested identifiers as a single identifier, except for trailing dots.
-  NESTED_IDENTIFIER = r'[a-zA-Z_$]([%s]|\.[a-zA-Z_$])*' % IDENTIFIER_CHAR
-  IDENTIFIER = re.compile(NESTED_IDENTIFIER)
-
-  SIMPLE_LVALUE = re.compile(r"""
-                             (?P<identifier>%s)      # a valid identifier
-                             (?=\s*                  # optional whitespace
-                             \=                      # look ahead to equal sign
-                             (?!=))                  # not follwed by equal
-                             """ % NESTED_IDENTIFIER, re.VERBOSE)
-
-  # A doc flag is a @ sign followed by non-space characters that appears at the
-  # beginning of the line, after whitespace, or after a '{'.  The look-behind
-  # check is necessary to not match someone@google.com as a flag.
-  DOC_FLAG = re.compile(r'(^|(?<=\s))@(?P<name>[a-zA-Z]+)')
-  # To properly parse parameter names and complex doctypes containing
-  # whitespace, we need to tokenize whitespace into a token after certain
-  # doctags. All statetracker.HAS_TYPE that are not listed here must not contain
-  # any whitespace in their types.
-  DOC_FLAG_LEX_SPACES = re.compile(
-      r'(^|(?<=\s))@(?P<name>%s)\b' %
-      '|'.join([
-          'const',
-          'enum',
-          'extends',
-          'final',
-          'implements',
-          'param',
-          'private',
-          'protected',
-          'public',
-          'return',
-          'type',
-          'typedef'
-      ]))
-
-  DOC_INLINE_FLAG = re.compile(r'(?<={)@(?P<name>[a-zA-Z]+)')
-
-  DOC_TYPE_BLOCK_START = re.compile(r'[<(]')
-  DOC_TYPE_BLOCK_END = re.compile(r'[>)]')
-  DOC_TYPE_MODIFIERS = re.compile(r'[!?|,:=]')
-
-  # Star followed by non-slash, i.e a star that does not end a comment.
-  # This is used for TYPE_GROUP below.
-  SAFE_STAR = r'(\*(?!/))'
-
-  COMMON_DOC_MATCHERS = [
-      # Find the end of the comment.
-      Matcher(END_BLOCK_COMMENT, Type.END_DOC_COMMENT,
-              JavaScriptModes.TEXT_MODE),
-
-      # Tokenize documented flags like @private.
-      Matcher(DOC_INLINE_FLAG, Type.DOC_INLINE_FLAG),
-      Matcher(DOC_FLAG_LEX_SPACES, Type.DOC_FLAG,
-              JavaScriptModes.DOC_COMMENT_LEX_SPACES_MODE),
-
-      # Encountering a doc flag should leave lex spaces mode.
-      Matcher(DOC_FLAG, Type.DOC_FLAG, JavaScriptModes.DOC_COMMENT_MODE),
-
-      # Tokenize braces so we can find types.
-      Matcher(START_BLOCK, Type.DOC_START_BRACE),
-      Matcher(END_BLOCK, Type.DOC_END_BRACE),
-
-      # And some more to parse types.
-      Matcher(DOC_TYPE_BLOCK_START, Type.DOC_TYPE_START_BLOCK),
-      Matcher(DOC_TYPE_BLOCK_END, Type.DOC_TYPE_END_BLOCK),
-
-      Matcher(DOC_TYPE_MODIFIERS, Type.DOC_TYPE_MODIFIER),
-      Matcher(DOC_COMMENT_TYPE_TEXT, Type.COMMENT),
-
-      Matcher(DOC_PREFIX, Type.DOC_PREFIX, None, True)]
-
-  # When text is not matched, it is given this default type based on mode.
-  # If unspecified in this map, the default default is Type.NORMAL.
-  JAVASCRIPT_DEFAULT_TYPES = {
-      JavaScriptModes.DOC_COMMENT_MODE: Type.COMMENT,
-      JavaScriptModes.DOC_COMMENT_LEX_SPACES_MODE: Type.COMMENT
-  }
-
-  @classmethod
-  def BuildMatchers(cls):
-    """Builds the token matcher group.
-
-    The token matcher groups work as follows: it is a list of Matcher objects.
-    The matchers will be tried in this order, and the first to match will be
-    returned.  Hence the order is important because the matchers that come first
-    overrule the matchers that come later.
-
-    Returns:
-      The completed token matcher group.
-    """
-    # Match a keyword string followed by a non-identifier character in order to
-    # not match something like doSomething as do + Something.
-    keyword = re.compile('(%s)((?=[^%s])|$)' % (
-        '|'.join(cls.KEYWORD_LIST), cls.IDENTIFIER_CHAR))
-    return {
-
-        # Matchers for basic text mode.
-        JavaScriptModes.TEXT_MODE: [
-            # Check a big group - strings, starting comments, and regexes - all
-            # of which could be intertwined.  'string with /regex/',
-            # /regex with 'string'/, /* comment with /regex/ and string */ (and
-            # so on)
-            Matcher(cls.START_DOC_COMMENT, Type.START_DOC_COMMENT,
-                    JavaScriptModes.DOC_COMMENT_MODE),
-            Matcher(cls.START_BLOCK_COMMENT, Type.START_BLOCK_COMMENT,
-                    JavaScriptModes.BLOCK_COMMENT_MODE),
-            Matcher(cls.END_OF_LINE_SINGLE_LINE_COMMENT,
-                    Type.START_SINGLE_LINE_COMMENT),
-            Matcher(cls.START_SINGLE_LINE_COMMENT,
-                    Type.START_SINGLE_LINE_COMMENT,
-                    JavaScriptModes.LINE_COMMENT_MODE),
-            Matcher(cls.SINGLE_QUOTE, Type.SINGLE_QUOTE_STRING_START,
-                    JavaScriptModes.SINGLE_QUOTE_STRING_MODE),
-            Matcher(cls.DOUBLE_QUOTE, Type.DOUBLE_QUOTE_STRING_START,
-                    JavaScriptModes.DOUBLE_QUOTE_STRING_MODE),
-            Matcher(cls.REGEX, Type.REGEX),
-
-            # Next we check for start blocks appearing outside any of the items
-            # above.
-            Matcher(cls.START_BLOCK, Type.START_BLOCK),
-            Matcher(cls.END_BLOCK, Type.END_BLOCK),
-
-            # Then we search for function declarations.
-            Matcher(cls.FUNCTION_DECLARATION, Type.FUNCTION_DECLARATION,
-                    JavaScriptModes.FUNCTION_MODE),
-
-            # Next, we convert non-function related parens to tokens.
-            Matcher(cls.OPENING_PAREN, Type.START_PAREN),
-            Matcher(cls.CLOSING_PAREN, Type.END_PAREN),
-
-            # Next, we convert brackets to tokens.
-            Matcher(cls.OPENING_BRACKET, Type.START_BRACKET),
-            Matcher(cls.CLOSING_BRACKET, Type.END_BRACKET),
-
-            # Find numbers.  This has to happen before operators because
-            # scientific notation numbers can have + and - in them.
-            Matcher(cls.NUMBER, Type.NUMBER),
-
-            # Find operators and simple assignments
-            Matcher(cls.SIMPLE_LVALUE, Type.SIMPLE_LVALUE),
-            Matcher(cls.OPERATOR, Type.OPERATOR),
-
-            # Find key words and whitespace.
-            Matcher(keyword, Type.KEYWORD),
-            Matcher(cls.WHITESPACE, Type.WHITESPACE),
-
-            # Find identifiers.
-            Matcher(cls.IDENTIFIER, Type.IDENTIFIER),
-
-            # Finally, we convert semicolons to tokens.
-            Matcher(cls.SEMICOLON, Type.SEMICOLON)],
-
-        # Matchers for single quote strings.
-        JavaScriptModes.SINGLE_QUOTE_STRING_MODE: [
-            Matcher(cls.SINGLE_QUOTE_TEXT, Type.STRING_TEXT),
-            Matcher(cls.SINGLE_QUOTE, Type.SINGLE_QUOTE_STRING_END,
-                    JavaScriptModes.TEXT_MODE)],
-
-        # Matchers for double quote strings.
-        JavaScriptModes.DOUBLE_QUOTE_STRING_MODE: [
-            Matcher(cls.DOUBLE_QUOTE_TEXT, Type.STRING_TEXT),
-            Matcher(cls.DOUBLE_QUOTE, Type.DOUBLE_QUOTE_STRING_END,
-                    JavaScriptModes.TEXT_MODE)],
-
-        # Matchers for block comments.
-        JavaScriptModes.BLOCK_COMMENT_MODE: [
-            # First we check for exiting a block comment.
-            Matcher(cls.END_BLOCK_COMMENT, Type.END_BLOCK_COMMENT,
-                    JavaScriptModes.TEXT_MODE),
-
-            # Match non-comment-ending text..
-            Matcher(cls.BLOCK_COMMENT_TEXT, Type.COMMENT)],
-
-        # Matchers for doc comments.
-        JavaScriptModes.DOC_COMMENT_MODE: cls.COMMON_DOC_MATCHERS + [
-            Matcher(cls.DOC_COMMENT_TEXT, Type.COMMENT)],
-
-        JavaScriptModes.DOC_COMMENT_LEX_SPACES_MODE: cls.COMMON_DOC_MATCHERS + [
-            Matcher(cls.WHITESPACE, Type.COMMENT),
-            Matcher(cls.DOC_COMMENT_NO_SPACES_TEXT, Type.COMMENT)],
-
-        # Matchers for single line comments.
-        JavaScriptModes.LINE_COMMENT_MODE: [
-            # We greedy match until the end of the line in line comment mode.
-            Matcher(cls.ANYTHING, Type.COMMENT, JavaScriptModes.TEXT_MODE)],
-
-        # Matchers for code after the function keyword.
-        JavaScriptModes.FUNCTION_MODE: [
-            # Must match open paren before anything else and move into parameter
-            # mode, otherwise everything inside the parameter list is parsed
-            # incorrectly.
-            Matcher(cls.OPENING_PAREN, Type.START_PARAMETERS,
-                    JavaScriptModes.PARAMETER_MODE),
-            Matcher(cls.WHITESPACE, Type.WHITESPACE),
-            Matcher(cls.IDENTIFIER, Type.FUNCTION_NAME)],
-
-        # Matchers for function parameters
-        JavaScriptModes.PARAMETER_MODE: [
-            # When in function parameter mode, a closing paren is treated
-            # specially. Everything else is treated as lines of parameters.
-            Matcher(cls.CLOSING_PAREN_WITH_SPACE, Type.END_PARAMETERS,
-                    JavaScriptModes.TEXT_MODE),
-            Matcher(cls.PARAMETERS, Type.PARAMETERS,
-                    JavaScriptModes.PARAMETER_MODE)]}
-
-  def __init__(self, parse_js_doc=True):
-    """Create a tokenizer object.
-
-    Args:
-      parse_js_doc: Whether to do detailed parsing of javascript doc comments,
-          or simply treat them as normal comments.  Defaults to parsing JsDoc.
-    """
-    matchers = self.BuildMatchers()
-    if not parse_js_doc:
-      # Make a copy so the original doesn't get modified.
-      matchers = copy.deepcopy(matchers)
-      matchers[JavaScriptModes.DOC_COMMENT_MODE] = matchers[
-          JavaScriptModes.BLOCK_COMMENT_MODE]
-
-    tokenizer.Tokenizer.__init__(self, JavaScriptModes.TEXT_MODE, matchers,
-        self.JAVASCRIPT_DEFAULT_TYPES)
-
-  def _CreateToken(self, string, token_type, line, line_number, values=None):
-    """Creates a new JavaScriptToken object.
-
-    Args:
-      string: The string of input the token contains.
-      token_type: The type of token.
-      line: The text of the line this token is in.
-      line_number: The line number of the token.
-      values: A dict of named values within the token.  For instance, a
-        function declaration may have a value called 'name' which captures the
-        name of the function.
-    """
-    return javascripttokens.JavaScriptToken(string, token_type, line,
-                                            line_number, values, line_number)