diff options
Diffstat (limited to 'tools/closure_linter/closure_linter/indentation.py')
-rwxr-xr-x | tools/closure_linter/closure_linter/indentation.py | 617 |
1 files changed, 0 insertions, 617 deletions
diff --git a/tools/closure_linter/closure_linter/indentation.py b/tools/closure_linter/closure_linter/indentation.py deleted file mode 100755 index d48ad2b862..0000000000 --- a/tools/closure_linter/closure_linter/indentation.py +++ /dev/null @@ -1,617 +0,0 @@ -#!/usr/bin/env python -# Copyright 2010 The Closure Linter Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS-IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Methods for checking EcmaScript files for indentation issues.""" - -__author__ = ('robbyw@google.com (Robert Walker)') - -import gflags as flags - -from closure_linter import ecmametadatapass -from closure_linter import errors -from closure_linter import javascripttokens -from closure_linter import tokenutil -from closure_linter.common import error -from closure_linter.common import position - - -flags.DEFINE_boolean('debug_indentation', False, - 'Whether to print debugging information for indentation.') - - -# Shorthand -Context = ecmametadatapass.EcmaContext -Error = error.Error -Position = position.Position -Type = javascripttokens.JavaScriptTokenType - - -# The general approach: -# -# 1. Build a stack of tokens that can affect indentation. -# For each token, we determine if it is a block or continuation token. -# Some tokens need to be temporarily overwritten in case they are removed -# before the end of the line. -# Much of the work here is determining which tokens to keep on the stack -# at each point. Operators, for example, should be removed once their -# expression or line is gone, while parentheses must stay until the matching -# end parentheses is found. -# -# 2. Given that stack, determine the allowable indentations. -# Due to flexible indentation rules in JavaScript, there may be many -# allowable indentations for each stack. We follows the general -# "no false positives" approach of GJsLint and build the most permissive -# set possible. - - -class TokenInfo(object): - """Stores information about a token. - - Attributes: - token: The token - is_block: Whether the token represents a block indentation. - is_transient: Whether the token should be automatically removed without - finding a matching end token. - overridden_by: TokenInfo for a token that overrides the indentation that - this token would require. - is_permanent_override: Whether the override on this token should persist - even after the overriding token is removed from the stack. For example: - x([ - 1], - 2); - needs this to be set so the last line is not required to be a continuation - indent. - line_number: The effective line number of this token. Will either be the - actual line number or the one before it in the case of a mis-wrapped - operator. - """ - - def __init__(self, token, is_block=False): - """Initializes a TokenInfo object. - - Args: - token: The token - is_block: Whether the token represents a block indentation. - """ - self.token = token - self.overridden_by = None - self.is_permanent_override = False - self.is_block = is_block - self.is_transient = not is_block and token.type not in ( - Type.START_PAREN, Type.START_PARAMETERS) - self.line_number = token.line_number - - def __repr__(self): - result = '\n %s' % self.token - if self.overridden_by: - result = '%s OVERRIDDEN [by "%s"]' % ( - result, self.overridden_by.token.string) - result += ' {is_block: %s, is_transient: %s}' % ( - self.is_block, self.is_transient) - return result - - -class IndentationRules(object): - """EmcaScript indentation rules. - - Can be used to find common indentation errors in JavaScript, ActionScript and - other Ecma like scripting languages. - """ - - def __init__(self): - """Initializes the IndentationRules checker.""" - self._stack = [] - - # Map from line number to number of characters it is off in indentation. - self._start_index_offset = {} - - def Finalize(self): - if self._stack: - old_stack = self._stack - self._stack = [] - raise Exception('INTERNAL ERROR: indentation stack is not empty: %r' % - old_stack) - - def CheckToken(self, token, state): - """Checks a token for indentation errors. - - Args: - token: The current token under consideration - state: Additional information about the current tree state - - Returns: - An error array [error code, error string, error token] if the token is - improperly indented, or None if indentation is correct. - """ - - token_type = token.type - indentation_errors = [] - stack = self._stack - is_first = self._IsFirstNonWhitespaceTokenInLine(token) - - # Add tokens that could decrease indentation before checking. - if token_type == Type.END_PAREN: - self._PopTo(Type.START_PAREN) - - elif token_type == Type.END_PARAMETERS: - self._PopTo(Type.START_PARAMETERS) - - elif token_type == Type.END_BRACKET: - self._PopTo(Type.START_BRACKET) - - elif token_type == Type.END_BLOCK: - start_token = self._PopTo(Type.START_BLOCK) - # Check for required goog.scope comment. - if start_token: - goog_scope = tokenutil.GoogScopeOrNoneFromStartBlock(start_token.token) - if goog_scope is not None: - if not token.line.endswith('; // goog.scope\n'): - if (token.line.find('//') > -1 and - token.line.find('goog.scope') > - token.line.find('//')): - indentation_errors.append([ - errors.MALFORMED_END_OF_SCOPE_COMMENT, - ('Malformed end of goog.scope comment. Please use the ' - 'exact following syntax to close the scope:\n' - '}); // goog.scope'), - token, - Position(token.start_index, token.length)]) - else: - indentation_errors.append([ - errors.MISSING_END_OF_SCOPE_COMMENT, - ('Missing comment for end of goog.scope which opened at line ' - '%d. End the scope with:\n' - '}); // goog.scope' % - (start_token.line_number)), - token, - Position(token.start_index, token.length)]) - - elif token_type == Type.KEYWORD and token.string in ('case', 'default'): - self._Add(self._PopTo(Type.START_BLOCK)) - - elif token_type == Type.SEMICOLON: - self._PopTransient() - - if (is_first and - token_type not in (Type.COMMENT, Type.DOC_PREFIX, Type.STRING_TEXT)): - if flags.FLAGS.debug_indentation: - print 'Line #%d: stack %r' % (token.line_number, stack) - - # Ignore lines that start in JsDoc since we don't check them properly yet. - # TODO(robbyw): Support checking JsDoc indentation. - # Ignore lines that start as multi-line strings since indentation is N/A. - # Ignore lines that start with operators since we report that already. - # Ignore lines with tabs since we report that already. - expected = self._GetAllowableIndentations() - actual = self._GetActualIndentation(token) - - # Special case comments describing else, case, and default. Allow them - # to outdent to the parent block. - if token_type in Type.COMMENT_TYPES: - next_code = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES) - if next_code and next_code.type == Type.END_BLOCK: - next_code = tokenutil.SearchExcept(next_code, Type.NON_CODE_TYPES) - if next_code and next_code.string in ('else', 'case', 'default'): - # TODO(robbyw): This almost certainly introduces false negatives. - expected |= self._AddToEach(expected, -2) - - if actual >= 0 and actual not in expected: - expected = sorted(expected) - indentation_errors.append([ - errors.WRONG_INDENTATION, - 'Wrong indentation: expected any of {%s} but got %d' % ( - ', '.join('%d' % x for x in expected if x < 80), actual), - token, - Position(actual, expected[0])]) - self._start_index_offset[token.line_number] = expected[0] - actual - - # Add tokens that could increase indentation. - if token_type == Type.START_BRACKET: - self._Add(TokenInfo( - token=token, - is_block=token.metadata.context.type == Context.ARRAY_LITERAL)) - - elif token_type == Type.START_BLOCK or token.metadata.is_implied_block: - self._Add(TokenInfo(token=token, is_block=True)) - - elif token_type in (Type.START_PAREN, Type.START_PARAMETERS): - self._Add(TokenInfo(token=token, is_block=False)) - - elif token_type == Type.KEYWORD and token.string == 'return': - self._Add(TokenInfo(token)) - - elif not token.IsLastInLine() and ( - token.IsAssignment() or token.IsOperator('?')): - self._Add(TokenInfo(token=token)) - - # Handle implied block closes. - if token.metadata.is_implied_block_close: - self._PopToImpliedBlock() - - # Add some tokens only if they appear at the end of the line. - is_last = self._IsLastCodeInLine(token) - if is_last: - next_code_token = tokenutil.GetNextCodeToken(token) - # Increase required indentation if this is an overlong wrapped statement - # ending in an operator. - if token_type == Type.OPERATOR: - if token.string == ':': - if stack and stack[-1].token.string == '?': - # When a ternary : is on a different line than its '?', it doesn't - # add indentation. - if token.line_number == stack[-1].token.line_number: - self._Add(TokenInfo(token)) - elif token.metadata.context.type == Context.CASE_BLOCK: - # Pop transient tokens from say, line continuations, e.g., - # case x. - # y: - # Want to pop the transient 4 space continuation indent. - self._PopTransient() - # Starting the body of the case statement, which is a type of - # block. - self._Add(TokenInfo(token=token, is_block=True)) - elif token.metadata.context.type == Context.LITERAL_ELEMENT: - # When in an object literal, acts as operator indicating line - # continuations. - self._Add(TokenInfo(token)) - else: - # ':' might also be a statement label, no effect on indentation in - # this case. - pass - - elif token.string != ',': - self._Add(TokenInfo(token)) - else: - # The token is a comma. - if token.metadata.context.type == Context.VAR: - self._Add(TokenInfo(token)) - elif token.metadata.context.type != Context.PARAMETERS: - self._PopTransient() - # Increase required indentation if this is the end of a statement that's - # continued with an operator on the next line (e.g. the '.'). - elif (next_code_token and next_code_token.type == Type.OPERATOR and - not next_code_token.metadata.IsUnaryOperator()): - self._Add(TokenInfo(token)) - elif token_type == Type.PARAMETERS and token.string.endswith(','): - # Parameter lists. - self._Add(TokenInfo(token)) - elif token.IsKeyword('var'): - self._Add(TokenInfo(token)) - elif token.metadata.is_implied_semicolon: - self._PopTransient() - elif token.IsAssignment(): - self._Add(TokenInfo(token)) - - return indentation_errors - - def _AddToEach(self, original, amount): - """Returns a new set with the given amount added to each element. - - Args: - original: The original set of numbers - amount: The amount to add to each element - - Returns: - A new set containing each element of the original set added to the amount. - """ - return set([x + amount for x in original]) - - _HARD_STOP_TYPES = (Type.START_PAREN, Type.START_PARAMETERS, - Type.START_BRACKET) - - _HARD_STOP_STRINGS = ('return', '?') - - def _IsHardStop(self, token): - """Determines if the given token can have a hard stop after it. - - Args: - token: token to examine - - Returns: - Whether the token can have a hard stop after it. - - Hard stops are indentations defined by the position of another token as in - indentation lined up with return, (, [, and ?. - """ - return (token.type in self._HARD_STOP_TYPES or - token.string in self._HARD_STOP_STRINGS or - token.IsAssignment()) - - def _GetAllowableIndentations(self): - """Computes the set of allowable indentations. - - Returns: - The set of allowable indentations, given the current stack. - """ - expected = set([0]) - hard_stops = set([]) - - # Whether the tokens are still in the same continuation, meaning additional - # indentation is optional. As an example: - # x = 5 + - # 6 + - # 7; - # The second '+' does not add any required indentation. - in_same_continuation = False - - for token_info in self._stack: - token = token_info.token - - # Handle normal additive indentation tokens. - if not token_info.overridden_by and token.string != 'return': - if token_info.is_block: - expected = self._AddToEach(expected, 2) - hard_stops = self._AddToEach(hard_stops, 2) - in_same_continuation = False - elif in_same_continuation: - expected |= self._AddToEach(expected, 4) - hard_stops |= self._AddToEach(hard_stops, 4) - else: - expected = self._AddToEach(expected, 4) - hard_stops |= self._AddToEach(hard_stops, 4) - in_same_continuation = True - - # Handle hard stops after (, [, return, =, and ? - if self._IsHardStop(token): - override_is_hard_stop = (token_info.overridden_by and - self._IsHardStop( - token_info.overridden_by.token)) - if token.type == Type.START_PAREN and token.previous: - # For someFunction(...) we allow to indent at the beginning of the - # identifier +4 - prev = token.previous - if (prev.type == Type.IDENTIFIER and - prev.line_number == token.line_number): - hard_stops.add(prev.start_index + 4) - if not override_is_hard_stop: - start_index = token.start_index - if token.line_number in self._start_index_offset: - start_index += self._start_index_offset[token.line_number] - if (token.type in (Type.START_PAREN, Type.START_PARAMETERS) and - not token_info.overridden_by): - hard_stops.add(start_index + 1) - - elif token.string == 'return' and not token_info.overridden_by: - hard_stops.add(start_index + 7) - - elif token.type == Type.START_BRACKET: - hard_stops.add(start_index + 1) - - elif token.IsAssignment(): - hard_stops.add(start_index + len(token.string) + 1) - - elif token.IsOperator('?') and not token_info.overridden_by: - hard_stops.add(start_index + 2) - - return (expected | hard_stops) or set([0]) - - def _GetActualIndentation(self, token): - """Gets the actual indentation of the line containing the given token. - - Args: - token: Any token on the line. - - Returns: - The actual indentation of the line containing the given token. Returns - -1 if this line should be ignored due to the presence of tabs. - """ - # Move to the first token in the line - token = tokenutil.GetFirstTokenInSameLine(token) - - # If it is whitespace, it is the indentation. - if token.type == Type.WHITESPACE: - if token.string.find('\t') >= 0: - return -1 - else: - return len(token.string) - elif token.type == Type.PARAMETERS: - return len(token.string) - len(token.string.lstrip()) - else: - return 0 - - def _IsFirstNonWhitespaceTokenInLine(self, token): - """Determines if the given token is the first non-space token on its line. - - Args: - token: The token. - - Returns: - True if the token is the first non-whitespace token on its line. - """ - if token.type in (Type.WHITESPACE, Type.BLANK_LINE): - return False - if token.IsFirstInLine(): - return True - return (token.previous and token.previous.IsFirstInLine() and - token.previous.type == Type.WHITESPACE) - - def _IsLastCodeInLine(self, token): - """Determines if the given token is the last code token on its line. - - Args: - token: The token. - - Returns: - True if the token is the last code token on its line. - """ - if token.type in Type.NON_CODE_TYPES: - return False - start_token = token - while True: - token = token.next - if not token or token.line_number != start_token.line_number: - return True - if token.type not in Type.NON_CODE_TYPES: - return False - - def _AllFunctionPropertyAssignTokens(self, start_token, end_token): - """Checks if tokens are (likely) a valid function property assignment. - - Args: - start_token: Start of the token range. - end_token: End of the token range. - - Returns: - True if all tokens between start_token and end_token are legal tokens - within a function declaration and assignment into a property. - """ - for token in tokenutil.GetTokenRange(start_token, end_token): - fn_decl_tokens = (Type.FUNCTION_DECLARATION, - Type.PARAMETERS, - Type.START_PARAMETERS, - Type.END_PARAMETERS, - Type.END_PAREN) - if (token.type not in fn_decl_tokens and - token.IsCode() and - not tokenutil.IsIdentifierOrDot(token) and - not token.IsAssignment() and - not (token.type == Type.OPERATOR and token.string == ',')): - return False - return True - - def _Add(self, token_info): - """Adds the given token info to the stack. - - Args: - token_info: The token information to add. - """ - if self._stack and self._stack[-1].token == token_info.token: - # Don't add the same token twice. - return - - if token_info.is_block or token_info.token.type == Type.START_PAREN: - scope_token = tokenutil.GoogScopeOrNoneFromStartBlock(token_info.token) - token_info.overridden_by = TokenInfo(scope_token) if scope_token else None - - if (token_info.token.type == Type.START_BLOCK and - token_info.token.metadata.context.type == Context.BLOCK): - # Handle function() {} assignments: their block contents get special - # treatment and are allowed to just indent by two whitespace. - # For example - # long.long.name = function( - # a) { - # In this case the { and the = are on different lines. But the - # override should still apply for all previous stack tokens that are - # part of an assignment of a block. - - has_assignment = any(x for x in self._stack if x.token.IsAssignment()) - if has_assignment: - last_token = token_info.token.previous - for stack_info in reversed(self._stack): - if (last_token and - not self._AllFunctionPropertyAssignTokens(stack_info.token, - last_token)): - break - stack_info.overridden_by = token_info - stack_info.is_permanent_override = True - last_token = stack_info.token - - index = len(self._stack) - 1 - while index >= 0: - stack_info = self._stack[index] - stack_token = stack_info.token - - if stack_info.line_number == token_info.line_number: - # In general, tokens only override each other when they are on - # the same line. - stack_info.overridden_by = token_info - if (token_info.token.type == Type.START_BLOCK and - (stack_token.IsAssignment() or - stack_token.type in (Type.IDENTIFIER, Type.START_PAREN))): - # Multi-line blocks have lasting overrides, as in: - # callFn({ - # a: 10 - # }, - # 30); - # b/11450054. If a string is not closed properly then close_block - # could be null. - close_block = token_info.token.metadata.context.end_token - stack_info.is_permanent_override = close_block and ( - close_block.line_number != token_info.token.line_number) - else: - break - index -= 1 - - self._stack.append(token_info) - - def _Pop(self): - """Pops the top token from the stack. - - Returns: - The popped token info. - """ - token_info = self._stack.pop() - if token_info.token.type not in (Type.START_BLOCK, Type.START_BRACKET): - # Remove any temporary overrides. - self._RemoveOverrides(token_info) - else: - # For braces and brackets, which can be object and array literals, remove - # overrides when the literal is closed on the same line. - token_check = token_info.token - same_type = token_check.type - goal_type = None - if token_info.token.type == Type.START_BRACKET: - goal_type = Type.END_BRACKET - else: - goal_type = Type.END_BLOCK - line_number = token_info.token.line_number - count = 0 - while token_check and token_check.line_number == line_number: - if token_check.type == goal_type: - count -= 1 - if not count: - self._RemoveOverrides(token_info) - break - if token_check.type == same_type: - count += 1 - token_check = token_check.next - return token_info - - def _PopToImpliedBlock(self): - """Pops the stack until an implied block token is found.""" - while not self._Pop().token.metadata.is_implied_block: - pass - - def _PopTo(self, stop_type): - """Pops the stack until a token of the given type is popped. - - Args: - stop_type: The type of token to pop to. - - Returns: - The token info of the given type that was popped. - """ - last = None - while True: - last = self._Pop() - if last.token.type == stop_type: - break - return last - - def _RemoveOverrides(self, token_info): - """Marks any token that was overridden by this token as active again. - - Args: - token_info: The token that is being removed from the stack. - """ - for stack_token in self._stack: - if (stack_token.overridden_by == token_info and - not stack_token.is_permanent_override): - stack_token.overridden_by = None - - def _PopTransient(self): - """Pops all transient tokens - i.e. not blocks, literals, or parens.""" - while self._stack and self._stack[-1].is_transient: - self._Pop() |