aboutsummaryrefslogtreecommitdiff
path: root/deps/v8/src/parsing/scanner-inl.h
diff options
context:
space:
mode:
Diffstat (limited to 'deps/v8/src/parsing/scanner-inl.h')
-rw-r--r--deps/v8/src/parsing/scanner-inl.h530
1 files changed, 522 insertions, 8 deletions
diff --git a/deps/v8/src/parsing/scanner-inl.h b/deps/v8/src/parsing/scanner-inl.h
index 809ef655a7..9647957062 100644
--- a/deps/v8/src/parsing/scanner-inl.h
+++ b/deps/v8/src/parsing/scanner-inl.h
@@ -5,25 +5,354 @@
#ifndef V8_PARSING_SCANNER_INL_H_
#define V8_PARSING_SCANNER_INL_H_
+#include "src/char-predicates-inl.h"
#include "src/parsing/scanner.h"
#include "src/unicode-cache-inl.h"
namespace v8 {
namespace internal {
+// Make sure tokens are stored as a single byte.
+STATIC_ASSERT(sizeof(Token::Value) == 1);
+
+// Table of one-character tokens, by character (0x00..0x7F only).
+// clang-format off
+static const Token::Value one_char_tokens[] = {
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::LPAREN, // 0x28
+ Token::RPAREN, // 0x29
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::COMMA, // 0x2C
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::COLON, // 0x3A
+ Token::SEMICOLON, // 0x3B
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::CONDITIONAL, // 0x3F
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::LBRACK, // 0x5B
+ Token::ILLEGAL,
+ Token::RBRACK, // 0x5D
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::ILLEGAL,
+ Token::LBRACE, // 0x7B
+ Token::ILLEGAL,
+ Token::RBRACE, // 0x7D
+ Token::BIT_NOT, // 0x7E
+ Token::ILLEGAL
+};
+// clang-format on
+
+// ----------------------------------------------------------------------------
+// Keyword Matcher
+
+#define KEYWORDS(KEYWORD_GROUP, KEYWORD) \
+ KEYWORD_GROUP('a') \
+ KEYWORD("arguments", Token::ARGUMENTS) \
+ KEYWORD("as", Token::AS) \
+ KEYWORD("async", Token::ASYNC) \
+ KEYWORD("await", Token::AWAIT) \
+ KEYWORD("anonymous", Token::ANONYMOUS) \
+ KEYWORD_GROUP('b') \
+ KEYWORD("break", Token::BREAK) \
+ KEYWORD_GROUP('c') \
+ KEYWORD("case", Token::CASE) \
+ KEYWORD("catch", Token::CATCH) \
+ KEYWORD("class", Token::CLASS) \
+ KEYWORD("const", Token::CONST) \
+ KEYWORD("constructor", Token::CONSTRUCTOR) \
+ KEYWORD("continue", Token::CONTINUE) \
+ KEYWORD_GROUP('d') \
+ KEYWORD("debugger", Token::DEBUGGER) \
+ KEYWORD("default", Token::DEFAULT) \
+ KEYWORD("delete", Token::DELETE) \
+ KEYWORD("do", Token::DO) \
+ KEYWORD_GROUP('e') \
+ KEYWORD("else", Token::ELSE) \
+ KEYWORD("enum", Token::ENUM) \
+ KEYWORD("eval", Token::EVAL) \
+ KEYWORD("export", Token::EXPORT) \
+ KEYWORD("extends", Token::EXTENDS) \
+ KEYWORD_GROUP('f') \
+ KEYWORD("false", Token::FALSE_LITERAL) \
+ KEYWORD("finally", Token::FINALLY) \
+ KEYWORD("for", Token::FOR) \
+ KEYWORD("from", Token::FROM) \
+ KEYWORD("function", Token::FUNCTION) \
+ KEYWORD_GROUP('g') \
+ KEYWORD("get", Token::GET) \
+ KEYWORD_GROUP('i') \
+ KEYWORD("if", Token::IF) \
+ KEYWORD("implements", Token::FUTURE_STRICT_RESERVED_WORD) \
+ KEYWORD("import", Token::IMPORT) \
+ KEYWORD("in", Token::IN) \
+ KEYWORD("instanceof", Token::INSTANCEOF) \
+ KEYWORD("interface", Token::FUTURE_STRICT_RESERVED_WORD) \
+ KEYWORD_GROUP('l') \
+ KEYWORD("let", Token::LET) \
+ KEYWORD_GROUP('m') \
+ KEYWORD("meta", Token::META) \
+ KEYWORD_GROUP('n') \
+ KEYWORD("name", Token::NAME) \
+ KEYWORD("new", Token::NEW) \
+ KEYWORD("null", Token::NULL_LITERAL) \
+ KEYWORD_GROUP('o') \
+ KEYWORD("of", Token::OF) \
+ KEYWORD_GROUP('p') \
+ KEYWORD("package", Token::FUTURE_STRICT_RESERVED_WORD) \
+ KEYWORD("private", Token::FUTURE_STRICT_RESERVED_WORD) \
+ KEYWORD("protected", Token::FUTURE_STRICT_RESERVED_WORD) \
+ KEYWORD("prototype", Token::PROTOTYPE) \
+ KEYWORD("public", Token::FUTURE_STRICT_RESERVED_WORD) \
+ KEYWORD_GROUP('r') \
+ KEYWORD("return", Token::RETURN) \
+ KEYWORD_GROUP('s') \
+ KEYWORD("set", Token::SET) \
+ KEYWORD("static", Token::STATIC) \
+ KEYWORD("super", Token::SUPER) \
+ KEYWORD("switch", Token::SWITCH) \
+ KEYWORD_GROUP('t') \
+ KEYWORD("target", Token::TARGET) \
+ KEYWORD("this", Token::THIS) \
+ KEYWORD("throw", Token::THROW) \
+ KEYWORD("true", Token::TRUE_LITERAL) \
+ KEYWORD("try", Token::TRY) \
+ KEYWORD("typeof", Token::TYPEOF) \
+ KEYWORD_GROUP('u') \
+ KEYWORD("undefined", Token::UNDEFINED) \
+ KEYWORD_GROUP('v') \
+ KEYWORD("var", Token::VAR) \
+ KEYWORD("void", Token::VOID) \
+ KEYWORD_GROUP('w') \
+ KEYWORD("while", Token::WHILE) \
+ KEYWORD("with", Token::WITH) \
+ KEYWORD_GROUP('y') \
+ KEYWORD("yield", Token::YIELD) \
+ KEYWORD_GROUP('_') \
+ KEYWORD("__proto__", Token::PROTO_UNDERSCORED) \
+ KEYWORD_GROUP('#') \
+ KEYWORD("#constructor", Token::PRIVATE_CONSTRUCTOR)
+
+V8_INLINE Token::Value KeywordOrIdentifierToken(const uint8_t* input,
+ int input_length) {
+ DCHECK_GE(input_length, 1);
+ const int kMinLength = 2;
+ const int kMaxLength = 12;
+ if (input_length < kMinLength || input_length > kMaxLength) {
+ return Token::IDENTIFIER;
+ }
+ switch (input[0]) {
+ default:
+#define KEYWORD_GROUP_CASE(ch) \
+ break; \
+ case ch:
+#define KEYWORD(keyword, token) \
+ { \
+ /* 'keyword' is a char array, so sizeof(keyword) is */ \
+ /* strlen(keyword) plus 1 for the NUL char. */ \
+ const int keyword_length = sizeof(keyword) - 1; \
+ STATIC_ASSERT(keyword_length >= kMinLength); \
+ STATIC_ASSERT(keyword_length <= kMaxLength); \
+ DCHECK_EQ(input[0], keyword[0]); \
+ DCHECK(token == Token::FUTURE_STRICT_RESERVED_WORD || \
+ 0 == strncmp(keyword, Token::String(token), sizeof(keyword))); \
+ if (input_length == keyword_length && input[1] == keyword[1] && \
+ (keyword_length <= 2 || input[2] == keyword[2]) && \
+ (keyword_length <= 3 || input[3] == keyword[3]) && \
+ (keyword_length <= 4 || input[4] == keyword[4]) && \
+ (keyword_length <= 5 || input[5] == keyword[5]) && \
+ (keyword_length <= 6 || input[6] == keyword[6]) && \
+ (keyword_length <= 7 || input[7] == keyword[7]) && \
+ (keyword_length <= 8 || input[8] == keyword[8]) && \
+ (keyword_length <= 9 || input[9] == keyword[9]) && \
+ (keyword_length <= 10 || input[10] == keyword[10])) { \
+ return token; \
+ } \
+ }
+ KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD)
+ }
+ return Token::IDENTIFIER;
+#undef KEYWORDS
+#undef KEYWORD
+#undef KEYWORD_GROUP_CASE
+}
+
+V8_INLINE Token::Value Scanner::ScanIdentifierOrKeyword() {
+ LiteralScope literal(this);
+ return ScanIdentifierOrKeywordInner(&literal);
+}
+
+V8_INLINE Token::Value Scanner::ScanIdentifierOrKeywordInner(
+ LiteralScope* literal) {
+ DCHECK(unicode_cache_->IsIdentifierStart(c0_));
+ bool escaped = false;
+ if (IsInRange(c0_, 'a', 'z') || c0_ == '_') {
+ do {
+ AddLiteralChar(static_cast<char>(c0_));
+ Advance();
+ } while (IsInRange(c0_, 'a', 'z') || c0_ == '_');
+
+ if (IsDecimalDigit(c0_) || IsInRange(c0_, 'A', 'Z') || c0_ == '$') {
+ // Identifier starting with lowercase or _.
+ do {
+ AddLiteralChar(static_cast<char>(c0_));
+ Advance();
+ } while (IsAsciiIdentifier(c0_));
+
+ if (c0_ <= kMaxAscii && c0_ != '\\') {
+ literal->Complete();
+ return Token::IDENTIFIER;
+ }
+ } else if (c0_ <= kMaxAscii && c0_ != '\\') {
+ // Only a-z+ or _: could be a keyword or identifier.
+ Vector<const uint8_t> chars = next().literal_chars.one_byte_literal();
+ Token::Value token =
+ KeywordOrIdentifierToken(chars.start(), chars.length());
+ if (token == Token::IDENTIFIER ||
+ token == Token::FUTURE_STRICT_RESERVED_WORD ||
+ Token::IsContextualKeyword(token))
+ literal->Complete();
+ return token;
+ }
+ } else if (IsInRange(c0_, 'A', 'Z') || c0_ == '$') {
+ do {
+ AddLiteralChar(static_cast<char>(c0_));
+ Advance();
+ } while (IsAsciiIdentifier(c0_));
+
+ if (c0_ <= kMaxAscii && c0_ != '\\') {
+ literal->Complete();
+ return Token::IDENTIFIER;
+ }
+ } else if (c0_ == '\\') {
+ escaped = true;
+ uc32 c = ScanIdentifierUnicodeEscape();
+ DCHECK(!unicode_cache_->IsIdentifierStart(-1));
+ if (c == '\\' || !unicode_cache_->IsIdentifierStart(c)) {
+ return Token::ILLEGAL;
+ }
+ AddLiteralChar(c);
+ }
+
+ return ScanIdentifierOrKeywordInnerSlow(literal, escaped);
+}
+
V8_INLINE Token::Value Scanner::SkipWhiteSpace() {
int start_position = source_pos();
- while (true) {
- // We won't skip behind the end of input.
- DCHECK(!unicode_cache_->IsWhiteSpace(kEndOfInput));
+ // We won't skip behind the end of input.
+ DCHECK(!unicode_cache_->IsWhiteSpaceOrLineTerminator(kEndOfInput));
- // Advance as long as character is a WhiteSpace or LineTerminator.
- // Remember if the latter is the case.
- if (unibrow::IsLineTerminator(c0_)) {
+ // Advance as long as character is a WhiteSpace or LineTerminator.
+ while (unicode_cache_->IsWhiteSpaceOrLineTerminator(c0_)) {
+ if (!next().after_line_terminator && unibrow::IsLineTerminator(c0_)) {
next().after_line_terminator = true;
- } else if (!unicode_cache_->IsWhiteSpace(c0_)) {
- break;
}
Advance();
}
@@ -37,6 +366,191 @@ V8_INLINE Token::Value Scanner::SkipWhiteSpace() {
return Token::WHITESPACE;
}
+V8_INLINE Token::Value Scanner::ScanSingleToken() {
+ Token::Value token;
+ do {
+ next().location.beg_pos = source_pos();
+
+ if (static_cast<unsigned>(c0_) <= 0x7F) {
+ Token::Value token = one_char_tokens[c0_];
+ if (token != Token::ILLEGAL) {
+ Advance();
+ return token;
+ }
+ }
+
+ switch (c0_) {
+ case '"':
+ case '\'':
+ return ScanString();
+
+ case '<':
+ // < <= << <<= <!--
+ Advance();
+ if (c0_ == '=') return Select(Token::LTE);
+ if (c0_ == '<') return Select('=', Token::ASSIGN_SHL, Token::SHL);
+ if (c0_ == '!') {
+ token = ScanHtmlComment();
+ continue;
+ }
+ return Token::LT;
+
+ case '>':
+ // > >= >> >>= >>> >>>=
+ Advance();
+ if (c0_ == '=') return Select(Token::GTE);
+ if (c0_ == '>') {
+ // >> >>= >>> >>>=
+ Advance();
+ if (c0_ == '=') return Select(Token::ASSIGN_SAR);
+ if (c0_ == '>') return Select('=', Token::ASSIGN_SHR, Token::SHR);
+ return Token::SAR;
+ }
+ return Token::GT;
+
+ case '=':
+ // = == === =>
+ Advance();
+ if (c0_ == '=') return Select('=', Token::EQ_STRICT, Token::EQ);
+ if (c0_ == '>') return Select(Token::ARROW);
+ return Token::ASSIGN;
+
+ case '!':
+ // ! != !==
+ Advance();
+ if (c0_ == '=') return Select('=', Token::NE_STRICT, Token::NE);
+ return Token::NOT;
+
+ case '+':
+ // + ++ +=
+ Advance();
+ if (c0_ == '+') return Select(Token::INC);
+ if (c0_ == '=') return Select(Token::ASSIGN_ADD);
+ return Token::ADD;
+
+ case '-':
+ // - -- --> -=
+ Advance();
+ if (c0_ == '-') {
+ Advance();
+ if (c0_ == '>' && next().after_line_terminator) {
+ // For compatibility with SpiderMonkey, we skip lines that
+ // start with an HTML comment end '-->'.
+ token = SkipSingleHTMLComment();
+ continue;
+ }
+ return Token::DEC;
+ }
+ if (c0_ == '=') return Select(Token::ASSIGN_SUB);
+ return Token::SUB;
+
+ case '*':
+ // * *=
+ Advance();
+ if (c0_ == '*') return Select('=', Token::ASSIGN_EXP, Token::EXP);
+ if (c0_ == '=') return Select(Token::ASSIGN_MUL);
+ return Token::MUL;
+
+ case '%':
+ // % %=
+ return Select('=', Token::ASSIGN_MOD, Token::MOD);
+
+ case '/':
+ // / // /* /=
+ Advance();
+ if (c0_ == '/') {
+ uc32 c = Peek();
+ if (c == '#' || c == '@') {
+ Advance();
+ Advance();
+ token = SkipSourceURLComment();
+ continue;
+ }
+ token = SkipSingleLineComment();
+ continue;
+ }
+ if (c0_ == '*') {
+ token = SkipMultiLineComment();
+ continue;
+ }
+ if (c0_ == '=') return Select(Token::ASSIGN_DIV);
+ return Token::DIV;
+
+ case '&':
+ // & && &=
+ Advance();
+ if (c0_ == '&') return Select(Token::AND);
+ if (c0_ == '=') return Select(Token::ASSIGN_BIT_AND);
+ return Token::BIT_AND;
+
+ case '|':
+ // | || |=
+ Advance();
+ if (c0_ == '|') return Select(Token::OR);
+ if (c0_ == '=') return Select(Token::ASSIGN_BIT_OR);
+ return Token::BIT_OR;
+
+ case '^':
+ // ^ ^=
+ return Select('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR);
+
+ case '.':
+ // . Number
+ Advance();
+ if (IsDecimalDigit(c0_)) return ScanNumber(true);
+ if (c0_ == '.') {
+ if (Peek() == '.') {
+ Advance();
+ Advance();
+ return Token::ELLIPSIS;
+ }
+ }
+ return Token::PERIOD;
+
+ case '`':
+ Advance();
+ return ScanTemplateSpan();
+
+ case '#':
+ return ScanPrivateName();
+
+ default:
+ if (unicode_cache_->IsIdentifierStart(c0_) ||
+ (CombineSurrogatePair() &&
+ unicode_cache_->IsIdentifierStart(c0_))) {
+ Token::Value token = ScanIdentifierOrKeyword();
+ if (!Token::IsContextualKeyword(token)) return token;
+
+ next().contextual_token = token;
+ return Token::IDENTIFIER;
+ }
+ if (IsDecimalDigit(c0_)) return ScanNumber(false);
+ if (c0_ == kEndOfInput) return Token::EOS;
+ token = SkipWhiteSpace();
+ continue;
+ }
+ // Continue scanning for tokens as long as we're just skipping whitespace.
+ } while (token == Token::WHITESPACE);
+
+ return token;
+}
+
+void Scanner::Scan() {
+ next().literal_chars.Drop();
+ next().raw_literal_chars.Drop();
+ next().contextual_token = Token::UNINITIALIZED;
+ next().invalid_template_escape_message = MessageTemplate::kNone;
+
+ next().token = ScanSingleToken();
+ next().location.end_pos = source_pos();
+
+#ifdef DEBUG
+ SanityCheckTokenDesc(current());
+ SanityCheckTokenDesc(next());
+ SanityCheckTokenDesc(next_next());
+#endif
+}
+
} // namespace internal
} // namespace v8