diff options
Diffstat (limited to 'deps/v8/src/parsing/scanner-inl.h')
-rw-r--r-- | deps/v8/src/parsing/scanner-inl.h | 772 |
1 files changed, 373 insertions, 399 deletions
diff --git a/deps/v8/src/parsing/scanner-inl.h b/deps/v8/src/parsing/scanner-inl.h index 9647957062..1e2cf9e447 100644 --- a/deps/v8/src/parsing/scanner-inl.h +++ b/deps/v8/src/parsing/scanner-inl.h @@ -6,159 +6,19 @@ #define V8_PARSING_SCANNER_INL_H_ #include "src/char-predicates-inl.h" +#include "src/parsing/keywords-gen.h" #include "src/parsing/scanner.h" -#include "src/unicode-cache-inl.h" namespace v8 { namespace internal { -// Make sure tokens are stored as a single byte. -STATIC_ASSERT(sizeof(Token::Value) == 1); - -// Table of one-character tokens, by character (0x00..0x7F only). -// clang-format off -static const Token::Value one_char_tokens[] = { - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::LPAREN, // 0x28 - Token::RPAREN, // 0x29 - Token::ILLEGAL, - Token::ILLEGAL, - Token::COMMA, // 0x2C - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::COLON, // 0x3A - Token::SEMICOLON, // 0x3B - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::CONDITIONAL, // 0x3F - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::LBRACK, // 0x5B - Token::ILLEGAL, - Token::RBRACK, // 0x5D - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::LBRACE, // 0x7B - Token::ILLEGAL, - Token::RBRACE, // 0x7D - Token::BIT_NOT, // 0x7E - Token::ILLEGAL -}; -// clang-format on - // ---------------------------------------------------------------------------- // Keyword Matcher #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \ KEYWORD_GROUP('a') \ - KEYWORD("arguments", Token::ARGUMENTS) \ - KEYWORD("as", Token::AS) \ KEYWORD("async", Token::ASYNC) \ KEYWORD("await", Token::AWAIT) \ - KEYWORD("anonymous", Token::ANONYMOUS) \ KEYWORD_GROUP('b') \ KEYWORD("break", Token::BREAK) \ KEYWORD_GROUP('c') \ @@ -166,7 +26,6 @@ static const Token::Value one_char_tokens[] = { KEYWORD("catch", Token::CATCH) \ KEYWORD("class", Token::CLASS) \ KEYWORD("const", Token::CONST) \ - KEYWORD("constructor", Token::CONSTRUCTOR) \ KEYWORD("continue", Token::CONTINUE) \ KEYWORD_GROUP('d') \ KEYWORD("debugger", Token::DEBUGGER) \ @@ -176,17 +35,13 @@ static const Token::Value one_char_tokens[] = { KEYWORD_GROUP('e') \ KEYWORD("else", Token::ELSE) \ KEYWORD("enum", Token::ENUM) \ - KEYWORD("eval", Token::EVAL) \ KEYWORD("export", Token::EXPORT) \ KEYWORD("extends", Token::EXTENDS) \ KEYWORD_GROUP('f') \ KEYWORD("false", Token::FALSE_LITERAL) \ KEYWORD("finally", Token::FINALLY) \ KEYWORD("for", Token::FOR) \ - KEYWORD("from", Token::FROM) \ KEYWORD("function", Token::FUNCTION) \ - KEYWORD_GROUP('g') \ - KEYWORD("get", Token::GET) \ KEYWORD_GROUP('i') \ KEYWORD("if", Token::IF) \ KEYWORD("implements", Token::FUTURE_STRICT_RESERVED_WORD) \ @@ -196,36 +51,26 @@ static const Token::Value one_char_tokens[] = { KEYWORD("interface", Token::FUTURE_STRICT_RESERVED_WORD) \ KEYWORD_GROUP('l') \ KEYWORD("let", Token::LET) \ - KEYWORD_GROUP('m') \ - KEYWORD("meta", Token::META) \ KEYWORD_GROUP('n') \ - KEYWORD("name", Token::NAME) \ KEYWORD("new", Token::NEW) \ KEYWORD("null", Token::NULL_LITERAL) \ - KEYWORD_GROUP('o') \ - KEYWORD("of", Token::OF) \ KEYWORD_GROUP('p') \ KEYWORD("package", Token::FUTURE_STRICT_RESERVED_WORD) \ KEYWORD("private", Token::FUTURE_STRICT_RESERVED_WORD) \ KEYWORD("protected", Token::FUTURE_STRICT_RESERVED_WORD) \ - KEYWORD("prototype", Token::PROTOTYPE) \ KEYWORD("public", Token::FUTURE_STRICT_RESERVED_WORD) \ KEYWORD_GROUP('r') \ KEYWORD("return", Token::RETURN) \ KEYWORD_GROUP('s') \ - KEYWORD("set", Token::SET) \ KEYWORD("static", Token::STATIC) \ KEYWORD("super", Token::SUPER) \ KEYWORD("switch", Token::SWITCH) \ KEYWORD_GROUP('t') \ - KEYWORD("target", Token::TARGET) \ KEYWORD("this", Token::THIS) \ KEYWORD("throw", Token::THROW) \ KEYWORD("true", Token::TRUE_LITERAL) \ KEYWORD("try", Token::TRY) \ KEYWORD("typeof", Token::TYPEOF) \ - KEYWORD_GROUP('u') \ - KEYWORD("undefined", Token::UNDEFINED) \ KEYWORD_GROUP('v') \ KEYWORD("var", Token::VAR) \ KEYWORD("void", Token::VOID) \ @@ -233,124 +78,235 @@ static const Token::Value one_char_tokens[] = { KEYWORD("while", Token::WHILE) \ KEYWORD("with", Token::WITH) \ KEYWORD_GROUP('y') \ - KEYWORD("yield", Token::YIELD) \ - KEYWORD_GROUP('_') \ - KEYWORD("__proto__", Token::PROTO_UNDERSCORED) \ - KEYWORD_GROUP('#') \ - KEYWORD("#constructor", Token::PRIVATE_CONSTRUCTOR) + KEYWORD("yield", Token::YIELD) + +constexpr bool IsKeywordStart(char c) { +#define KEYWORD_GROUP_CHECK(ch) c == ch || +#define KEYWORD_CHECK(keyword, token) + return KEYWORDS(KEYWORD_GROUP_CHECK, KEYWORD_CHECK) /* || */ false; +#undef KEYWORD_GROUP_CHECK +#undef KEYWORD_CHECK +} V8_INLINE Token::Value KeywordOrIdentifierToken(const uint8_t* input, int input_length) { DCHECK_GE(input_length, 1); - const int kMinLength = 2; - const int kMaxLength = 12; - if (input_length < kMinLength || input_length > kMaxLength) { - return Token::IDENTIFIER; - } - switch (input[0]) { - default: -#define KEYWORD_GROUP_CASE(ch) \ - break; \ - case ch: -#define KEYWORD(keyword, token) \ - { \ - /* 'keyword' is a char array, so sizeof(keyword) is */ \ - /* strlen(keyword) plus 1 for the NUL char. */ \ - const int keyword_length = sizeof(keyword) - 1; \ - STATIC_ASSERT(keyword_length >= kMinLength); \ - STATIC_ASSERT(keyword_length <= kMaxLength); \ - DCHECK_EQ(input[0], keyword[0]); \ - DCHECK(token == Token::FUTURE_STRICT_RESERVED_WORD || \ - 0 == strncmp(keyword, Token::String(token), sizeof(keyword))); \ - if (input_length == keyword_length && input[1] == keyword[1] && \ - (keyword_length <= 2 || input[2] == keyword[2]) && \ - (keyword_length <= 3 || input[3] == keyword[3]) && \ - (keyword_length <= 4 || input[4] == keyword[4]) && \ - (keyword_length <= 5 || input[5] == keyword[5]) && \ - (keyword_length <= 6 || input[6] == keyword[6]) && \ - (keyword_length <= 7 || input[7] == keyword[7]) && \ - (keyword_length <= 8 || input[8] == keyword[8]) && \ - (keyword_length <= 9 || input[9] == keyword[9]) && \ - (keyword_length <= 10 || input[10] == keyword[10])) { \ - return token; \ - } \ - } + return PerfectKeywordHash::GetToken(reinterpret_cast<const char*>(input), + input_length); +} + +// Recursive constexpr template magic to check if a character is in a given +// string. +template <int N> +constexpr bool IsInString(const char (&s)[N], char c, size_t i = 0) { + return i >= N ? false : s[i] == c ? true : IsInString(s, c, i + 1); +} + +inline constexpr bool CanBeKeywordCharacter(char c) { + return IsInString( +#define KEYWORD_GROUP_CASE(ch) // Nothing +#define KEYWORD(keyword, token) keyword + // Use C string literal concatenation ("a" "b" becomes "ab") to build one + // giant string containing all the keywords. KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD) - } - return Token::IDENTIFIER; -#undef KEYWORDS #undef KEYWORD #undef KEYWORD_GROUP_CASE + , + c); +} + +// Make sure tokens are stored as a single byte. +STATIC_ASSERT(sizeof(Token::Value) == 1); + +// Get the shortest token that this character starts, the token may change +// depending on subsequent characters. +constexpr Token::Value GetOneCharToken(char c) { + // clang-format off + return + c == '(' ? Token::LPAREN : + c == ')' ? Token::RPAREN : + c == '{' ? Token::LBRACE : + c == '}' ? Token::RBRACE : + c == '[' ? Token::LBRACK : + c == ']' ? Token::RBRACK : + c == '?' ? Token::CONDITIONAL : + c == ':' ? Token::COLON : + c == ';' ? Token::SEMICOLON : + c == ',' ? Token::COMMA : + c == '.' ? Token::PERIOD : + c == '|' ? Token::BIT_OR : + c == '&' ? Token::BIT_AND : + c == '^' ? Token::BIT_XOR : + c == '~' ? Token::BIT_NOT : + c == '!' ? Token::NOT : + c == '<' ? Token::LT : + c == '>' ? Token::GT : + c == '%' ? Token::MOD : + c == '=' ? Token::ASSIGN : + c == '+' ? Token::ADD : + c == '-' ? Token::SUB : + c == '*' ? Token::MUL : + c == '/' ? Token::DIV : + c == '#' ? Token::PRIVATE_NAME : + c == '"' ? Token::STRING : + c == '\'' ? Token::STRING : + c == '`' ? Token::TEMPLATE_SPAN : + c == '\\' ? Token::IDENTIFIER : + // Whitespace or line terminator + c == ' ' ? Token::WHITESPACE : + c == '\t' ? Token::WHITESPACE : + c == '\v' ? Token::WHITESPACE : + c == '\f' ? Token::WHITESPACE : + c == '\r' ? Token::WHITESPACE : + c == '\n' ? Token::WHITESPACE : + // IsDecimalDigit must be tested before IsAsciiIdentifier + IsDecimalDigit(c) ? Token::NUMBER : + IsAsciiIdentifier(c) ? Token::IDENTIFIER : + Token::ILLEGAL; + // clang-format on } +// Table of one-character tokens, by character (0x00..0x7F only). +static const constexpr Token::Value one_char_tokens[128] = { +#define CALL_GET_SCAN_FLAGS(N) GetOneCharToken(N), + INT_0_TO_127_LIST(CALL_GET_SCAN_FLAGS) +#undef CALL_GET_SCAN_FLAGS +}; + +#undef KEYWORDS + V8_INLINE Token::Value Scanner::ScanIdentifierOrKeyword() { - LiteralScope literal(this); - return ScanIdentifierOrKeywordInner(&literal); + next().literal_chars.Start(); + return ScanIdentifierOrKeywordInner(); +} + +// Character flags for the fast path of scanning a keyword or identifier token. +enum class ScanFlags : uint8_t { + kTerminatesLiteral = 1 << 0, + // "Cannot" rather than "can" so that this flag can be ORed together across + // multiple characters. + kCannotBeKeyword = 1 << 1, + kCannotBeKeywordStart = 1 << 2, + kStringTerminator = 1 << 3, + kNeedsSlowPath = 1 << 4, +}; +constexpr uint8_t GetScanFlags(char c) { + return + // Keywords are all lowercase and only contain letters. + // Note that non-identifier characters do not set this flag, so + // that it plays well with kTerminatesLiteral. + (IsAsciiIdentifier(c) && !CanBeKeywordCharacter(c) + ? static_cast<uint8_t>(ScanFlags::kCannotBeKeyword) + : 0) | + (IsKeywordStart(c) + ? 0 + : static_cast<uint8_t>(ScanFlags::kCannotBeKeywordStart)) | + // Anything that isn't an identifier character will terminate the + // literal, or at least terminates the literal fast path processing + // (like an escape). + (!IsAsciiIdentifier(c) + ? static_cast<uint8_t>(ScanFlags::kTerminatesLiteral) + : 0) | + // Possible string termination characters. + ((c == '\'' || c == '"' || c == '\n' || c == '\r' || c == '\\') + ? static_cast<uint8_t>(ScanFlags::kStringTerminator) + : 0) | + // Escapes are processed on the slow path. + (c == '\\' ? static_cast<uint8_t>(ScanFlags::kNeedsSlowPath) : 0); +} +inline bool TerminatesLiteral(uint8_t scan_flags) { + return (scan_flags & static_cast<uint8_t>(ScanFlags::kTerminatesLiteral)); +} +inline bool CanBeKeyword(uint8_t scan_flags) { + return !(scan_flags & static_cast<uint8_t>(ScanFlags::kCannotBeKeyword)); } +inline bool NeedsSlowPath(uint8_t scan_flags) { + return (scan_flags & static_cast<uint8_t>(ScanFlags::kNeedsSlowPath)); +} +inline bool MayTerminateString(uint8_t scan_flags) { + return (scan_flags & static_cast<uint8_t>(ScanFlags::kStringTerminator)); +} +// Table of precomputed scan flags for the 128 ASCII characters, for branchless +// flag calculation during the scan. +static constexpr const uint8_t character_scan_flags[128] = { +#define CALL_GET_SCAN_FLAGS(N) GetScanFlags(N), + INT_0_TO_127_LIST(CALL_GET_SCAN_FLAGS) +#undef CALL_GET_SCAN_FLAGS +}; -V8_INLINE Token::Value Scanner::ScanIdentifierOrKeywordInner( - LiteralScope* literal) { - DCHECK(unicode_cache_->IsIdentifierStart(c0_)); +inline bool CharCanBeKeyword(uc32 c) { + return static_cast<uint32_t>(c) < arraysize(character_scan_flags) && + CanBeKeyword(character_scan_flags[c]); +} + +V8_INLINE Token::Value Scanner::ScanIdentifierOrKeywordInner() { + DCHECK(IsIdentifierStart(c0_)); bool escaped = false; - if (IsInRange(c0_, 'a', 'z') || c0_ == '_') { - do { + bool can_be_keyword = true; + + STATIC_ASSERT(arraysize(character_scan_flags) == kMaxAscii + 1); + if (V8_LIKELY(static_cast<uint32_t>(c0_) <= kMaxAscii)) { + if (V8_LIKELY(c0_ != '\\')) { + uint8_t scan_flags = character_scan_flags[c0_]; + DCHECK(!TerminatesLiteral(scan_flags)); + STATIC_ASSERT(static_cast<uint8_t>(ScanFlags::kCannotBeKeywordStart) == + static_cast<uint8_t>(ScanFlags::kCannotBeKeyword) << 1); + scan_flags >>= 1; + // Make sure the shifting above doesn't set NeedsSlowPath. Otherwise we'll + // fall into the slow path after scanning the identifier. + DCHECK(!NeedsSlowPath(scan_flags)); AddLiteralChar(static_cast<char>(c0_)); - Advance(); - } while (IsInRange(c0_, 'a', 'z') || c0_ == '_'); - - if (IsDecimalDigit(c0_) || IsInRange(c0_, 'A', 'Z') || c0_ == '$') { - // Identifier starting with lowercase or _. - do { - AddLiteralChar(static_cast<char>(c0_)); - Advance(); - } while (IsAsciiIdentifier(c0_)); - - if (c0_ <= kMaxAscii && c0_ != '\\') { - literal->Complete(); - return Token::IDENTIFIER; + AdvanceUntil([this, &scan_flags](uc32 c0) { + if (V8_UNLIKELY(static_cast<uint32_t>(c0) > kMaxAscii)) { + // A non-ascii character means we need to drop through to the slow + // path. + // TODO(leszeks): This would be most efficient as a goto to the slow + // path, check codegen and maybe use a bool instead. + scan_flags |= static_cast<uint8_t>(ScanFlags::kNeedsSlowPath); + return true; + } + uint8_t char_flags = character_scan_flags[c0]; + scan_flags |= char_flags; + if (TerminatesLiteral(char_flags)) { + return true; + } else { + AddLiteralChar(static_cast<char>(c0)); + return false; + } + }); + + if (V8_LIKELY(!NeedsSlowPath(scan_flags))) { + if (!CanBeKeyword(scan_flags)) return Token::IDENTIFIER; + // Could be a keyword or identifier. + Vector<const uint8_t> chars = next().literal_chars.one_byte_literal(); + return KeywordOrIdentifierToken(chars.start(), chars.length()); } - } else if (c0_ <= kMaxAscii && c0_ != '\\') { - // Only a-z+ or _: could be a keyword or identifier. - Vector<const uint8_t> chars = next().literal_chars.one_byte_literal(); - Token::Value token = - KeywordOrIdentifierToken(chars.start(), chars.length()); - if (token == Token::IDENTIFIER || - token == Token::FUTURE_STRICT_RESERVED_WORD || - Token::IsContextualKeyword(token)) - literal->Complete(); - return token; - } - } else if (IsInRange(c0_, 'A', 'Z') || c0_ == '$') { - do { - AddLiteralChar(static_cast<char>(c0_)); - Advance(); - } while (IsAsciiIdentifier(c0_)); - if (c0_ <= kMaxAscii && c0_ != '\\') { - literal->Complete(); - return Token::IDENTIFIER; - } - } else if (c0_ == '\\') { - escaped = true; - uc32 c = ScanIdentifierUnicodeEscape(); - DCHECK(!unicode_cache_->IsIdentifierStart(-1)); - if (c == '\\' || !unicode_cache_->IsIdentifierStart(c)) { - return Token::ILLEGAL; + can_be_keyword = CanBeKeyword(scan_flags); + } else { + // Special case for escapes at the start of an identifier. + escaped = true; + uc32 c = ScanIdentifierUnicodeEscape(); + DCHECK(!IsIdentifierStart(-1)); + if (c == '\\' || !IsIdentifierStart(c)) { + return Token::ILLEGAL; + } + AddLiteralChar(c); + can_be_keyword = CharCanBeKeyword(c); } - AddLiteralChar(c); } - return ScanIdentifierOrKeywordInnerSlow(literal, escaped); + return ScanIdentifierOrKeywordInnerSlow(escaped, can_be_keyword); } V8_INLINE Token::Value Scanner::SkipWhiteSpace() { int start_position = source_pos(); // We won't skip behind the end of input. - DCHECK(!unicode_cache_->IsWhiteSpaceOrLineTerminator(kEndOfInput)); + DCHECK(!IsWhiteSpaceOrLineTerminator(kEndOfInput)); // Advance as long as character is a WhiteSpace or LineTerminator. - while (unicode_cache_->IsWhiteSpaceOrLineTerminator(c0_)) { + while (IsWhiteSpaceOrLineTerminator(c0_)) { if (!next().after_line_terminator && unibrow::IsLineTerminator(c0_)) { next().after_line_terminator = true; } @@ -371,178 +327,194 @@ V8_INLINE Token::Value Scanner::ScanSingleToken() { do { next().location.beg_pos = source_pos(); - if (static_cast<unsigned>(c0_) <= 0x7F) { - Token::Value token = one_char_tokens[c0_]; - if (token != Token::ILLEGAL) { - Advance(); - return token; - } - } - - switch (c0_) { - case '"': - case '\'': - return ScanString(); - - case '<': - // < <= << <<= <!-- - Advance(); - if (c0_ == '=') return Select(Token::LTE); - if (c0_ == '<') return Select('=', Token::ASSIGN_SHL, Token::SHL); - if (c0_ == '!') { - token = ScanHtmlComment(); - continue; - } - return Token::LT; - - case '>': - // > >= >> >>= >>> >>>= - Advance(); - if (c0_ == '=') return Select(Token::GTE); - if (c0_ == '>') { - // >> >>= >>> >>>= + if (V8_LIKELY(static_cast<unsigned>(c0_) <= kMaxAscii)) { + token = one_char_tokens[c0_]; + + switch (token) { + case Token::LPAREN: + case Token::RPAREN: + case Token::LBRACE: + case Token::RBRACE: + case Token::LBRACK: + case Token::RBRACK: + case Token::CONDITIONAL: + case Token::COLON: + case Token::SEMICOLON: + case Token::COMMA: + case Token::BIT_NOT: + case Token::ILLEGAL: + // One character tokens. + return Select(token); + + case Token::STRING: + return ScanString(); + + case Token::LT: + // < <= << <<= <!-- Advance(); - if (c0_ == '=') return Select(Token::ASSIGN_SAR); - if (c0_ == '>') return Select('=', Token::ASSIGN_SHR, Token::SHR); - return Token::SAR; - } - return Token::GT; - - case '=': - // = == === => - Advance(); - if (c0_ == '=') return Select('=', Token::EQ_STRICT, Token::EQ); - if (c0_ == '>') return Select(Token::ARROW); - return Token::ASSIGN; - - case '!': - // ! != !== - Advance(); - if (c0_ == '=') return Select('=', Token::NE_STRICT, Token::NE); - return Token::NOT; - - case '+': - // + ++ += - Advance(); - if (c0_ == '+') return Select(Token::INC); - if (c0_ == '=') return Select(Token::ASSIGN_ADD); - return Token::ADD; - - case '-': - // - -- --> -= - Advance(); - if (c0_ == '-') { - Advance(); - if (c0_ == '>' && next().after_line_terminator) { - // For compatibility with SpiderMonkey, we skip lines that - // start with an HTML comment end '-->'. - token = SkipSingleHTMLComment(); + if (c0_ == '=') return Select(Token::LTE); + if (c0_ == '<') return Select('=', Token::ASSIGN_SHL, Token::SHL); + if (c0_ == '!') { + token = ScanHtmlComment(); continue; } - return Token::DEC; - } - if (c0_ == '=') return Select(Token::ASSIGN_SUB); - return Token::SUB; - - case '*': - // * *= - Advance(); - if (c0_ == '*') return Select('=', Token::ASSIGN_EXP, Token::EXP); - if (c0_ == '=') return Select(Token::ASSIGN_MUL); - return Token::MUL; - - case '%': - // % %= - return Select('=', Token::ASSIGN_MOD, Token::MOD); - - case '/': - // / // /* /= - Advance(); - if (c0_ == '/') { - uc32 c = Peek(); - if (c == '#' || c == '@') { + return Token::LT; + + case Token::GT: + // > >= >> >>= >>> >>>= + Advance(); + if (c0_ == '=') return Select(Token::GTE); + if (c0_ == '>') { + // >> >>= >>> >>>= Advance(); + if (c0_ == '=') return Select(Token::ASSIGN_SAR); + if (c0_ == '>') return Select('=', Token::ASSIGN_SHR, Token::SHR); + return Token::SAR; + } + return Token::GT; + + case Token::ASSIGN: + // = == === => + Advance(); + if (c0_ == '=') return Select('=', Token::EQ_STRICT, Token::EQ); + if (c0_ == '>') return Select(Token::ARROW); + return Token::ASSIGN; + + case Token::NOT: + // ! != !== + Advance(); + if (c0_ == '=') return Select('=', Token::NE_STRICT, Token::NE); + return Token::NOT; + + case Token::ADD: + // + ++ += + Advance(); + if (c0_ == '+') return Select(Token::INC); + if (c0_ == '=') return Select(Token::ASSIGN_ADD); + return Token::ADD; + + case Token::SUB: + // - -- --> -= + Advance(); + if (c0_ == '-') { Advance(); - token = SkipSourceURLComment(); + if (c0_ == '>' && next().after_line_terminator) { + // For compatibility with SpiderMonkey, we skip lines that + // start with an HTML comment end '-->'. + token = SkipSingleHTMLComment(); + continue; + } + return Token::DEC; + } + if (c0_ == '=') return Select(Token::ASSIGN_SUB); + return Token::SUB; + + case Token::MUL: + // * *= + Advance(); + if (c0_ == '*') return Select('=', Token::ASSIGN_EXP, Token::EXP); + if (c0_ == '=') return Select(Token::ASSIGN_MUL); + return Token::MUL; + + case Token::MOD: + // % %= + return Select('=', Token::ASSIGN_MOD, Token::MOD); + + case Token::DIV: + // / // /* /= + Advance(); + if (c0_ == '/') { + uc32 c = Peek(); + if (c == '#' || c == '@') { + Advance(); + Advance(); + token = SkipSourceURLComment(); + continue; + } + token = SkipSingleLineComment(); continue; } - token = SkipSingleLineComment(); - continue; - } - if (c0_ == '*') { - token = SkipMultiLineComment(); - continue; - } - if (c0_ == '=') return Select(Token::ASSIGN_DIV); - return Token::DIV; - - case '&': - // & && &= - Advance(); - if (c0_ == '&') return Select(Token::AND); - if (c0_ == '=') return Select(Token::ASSIGN_BIT_AND); - return Token::BIT_AND; - - case '|': - // | || |= - Advance(); - if (c0_ == '|') return Select(Token::OR); - if (c0_ == '=') return Select(Token::ASSIGN_BIT_OR); - return Token::BIT_OR; - - case '^': - // ^ ^= - return Select('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR); - - case '.': - // . Number - Advance(); - if (IsDecimalDigit(c0_)) return ScanNumber(true); - if (c0_ == '.') { - if (Peek() == '.') { - Advance(); - Advance(); - return Token::ELLIPSIS; + if (c0_ == '*') { + token = SkipMultiLineComment(); + continue; } - } - return Token::PERIOD; + if (c0_ == '=') return Select(Token::ASSIGN_DIV); + return Token::DIV; + + case Token::BIT_AND: + // & && &= + Advance(); + if (c0_ == '&') return Select(Token::AND); + if (c0_ == '=') return Select(Token::ASSIGN_BIT_AND); + return Token::BIT_AND; - case '`': - Advance(); - return ScanTemplateSpan(); + case Token::BIT_OR: + // | || |= + Advance(); + if (c0_ == '|') return Select(Token::OR); + if (c0_ == '=') return Select(Token::ASSIGN_BIT_OR); + return Token::BIT_OR; + + case Token::BIT_XOR: + // ^ ^= + return Select('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR); - case '#': - return ScanPrivateName(); + case Token::PERIOD: + // . Number + Advance(); + if (IsDecimalDigit(c0_)) return ScanNumber(true); + if (c0_ == '.') { + if (Peek() == '.') { + Advance(); + Advance(); + return Token::ELLIPSIS; + } + } + return Token::PERIOD; - default: - if (unicode_cache_->IsIdentifierStart(c0_) || - (CombineSurrogatePair() && - unicode_cache_->IsIdentifierStart(c0_))) { - Token::Value token = ScanIdentifierOrKeyword(); - if (!Token::IsContextualKeyword(token)) return token; + case Token::TEMPLATE_SPAN: + Advance(); + return ScanTemplateSpan(); - next().contextual_token = token; - return Token::IDENTIFIER; - } - if (IsDecimalDigit(c0_)) return ScanNumber(false); - if (c0_ == kEndOfInput) return Token::EOS; - token = SkipWhiteSpace(); - continue; + case Token::PRIVATE_NAME: + return ScanPrivateName(); + + case Token::WHITESPACE: + token = SkipWhiteSpace(); + continue; + + case Token::NUMBER: + return ScanNumber(false); + + case Token::IDENTIFIER: + return ScanIdentifierOrKeyword(); + + default: + UNREACHABLE(); + } } + + if (IsIdentifierStart(c0_) || + (CombineSurrogatePair() && IsIdentifierStart(c0_))) { + return ScanIdentifierOrKeyword(); + } + if (c0_ == kEndOfInput) { + return source_->has_parser_error() ? Token::ILLEGAL : Token::EOS; + } + token = SkipWhiteSpace(); + // Continue scanning for tokens as long as we're just skipping whitespace. } while (token == Token::WHITESPACE); return token; } -void Scanner::Scan() { - next().literal_chars.Drop(); - next().raw_literal_chars.Drop(); - next().contextual_token = Token::UNINITIALIZED; - next().invalid_template_escape_message = MessageTemplate::kNone; +void Scanner::Scan(TokenDesc* next_desc) { + DCHECK_EQ(next_desc, &next()); - next().token = ScanSingleToken(); - next().location.end_pos = source_pos(); + next_desc->token = ScanSingleToken(); + DCHECK_IMPLIES(has_parser_error(), next_desc->token == Token::ILLEGAL); + next_desc->location.end_pos = source_pos(); #ifdef DEBUG SanityCheckTokenDesc(current()); @@ -551,6 +523,8 @@ void Scanner::Scan() { #endif } +void Scanner::Scan() { Scan(next_); } + } // namespace internal } // namespace v8 |