// Copyright 2011 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // Features shared by parsing and pre-parsing scanners. #include "src/parsing/scanner.h" #include #include #include "src/ast/ast-value-factory.h" #include "src/numbers/conversions-inl.h" #include "src/objects/bigint.h" #include "src/parsing/scanner-inl.h" #include "src/zone/zone.h" namespace v8 { namespace internal { class Scanner::ErrorState { public: ErrorState(MessageTemplate* message_stack, Scanner::Location* location_stack) : message_stack_(message_stack), old_message_(*message_stack), location_stack_(location_stack), old_location_(*location_stack) { *message_stack_ = MessageTemplate::kNone; *location_stack_ = Location::invalid(); } ~ErrorState() { *message_stack_ = old_message_; *location_stack_ = old_location_; } void MoveErrorTo(TokenDesc* dest) { if (*message_stack_ == MessageTemplate::kNone) { return; } if (dest->invalid_template_escape_message == MessageTemplate::kNone) { dest->invalid_template_escape_message = *message_stack_; dest->invalid_template_escape_location = *location_stack_; } *message_stack_ = MessageTemplate::kNone; *location_stack_ = Location::invalid(); } private: MessageTemplate* const message_stack_; MessageTemplate const old_message_; Scanner::Location* const location_stack_; Scanner::Location const old_location_; }; // ---------------------------------------------------------------------------- // Scanner::BookmarkScope const size_t Scanner::BookmarkScope::kNoBookmark = std::numeric_limits::max() - 1; const size_t Scanner::BookmarkScope::kBookmarkWasApplied = std::numeric_limits::max(); void Scanner::BookmarkScope::Set(size_t position) { DCHECK_EQ(bookmark_, kNoBookmark); bookmark_ = position; } void Scanner::BookmarkScope::Apply() { DCHECK(HasBeenSet()); // Caller hasn't called SetBookmark. if (had_parser_error_) { scanner_->set_parser_error(); } else { scanner_->reset_parser_error_flag(); scanner_->SeekNext(bookmark_); } bookmark_ = kBookmarkWasApplied; } bool Scanner::BookmarkScope::HasBeenSet() const { return bookmark_ != kNoBookmark && bookmark_ != kBookmarkWasApplied; } bool Scanner::BookmarkScope::HasBeenApplied() const { return bookmark_ == kBookmarkWasApplied; } // ---------------------------------------------------------------------------- // Scanner Scanner::Scanner(Utf16CharacterStream* source, bool is_module) : source_(source), found_html_comment_(false), allow_harmony_optional_chaining_(false), allow_harmony_nullish_(false), is_module_(is_module), octal_pos_(Location::invalid()), octal_message_(MessageTemplate::kNone) { DCHECK_NOT_NULL(source); } void Scanner::Initialize() { // Need to capture identifiers in order to recognize "get" and "set" // in object literals. Init(); next().after_line_terminator = true; Scan(); } template uc32 Scanner::ScanHexNumber(int expected_length) { DCHECK_LE(expected_length, 4); // prevent overflow int begin = source_pos() - 2; uc32 x = 0; for (int i = 0; i < expected_length; i++) { int d = HexValue(c0_); if (d < 0) { ReportScannerError(Location(begin, begin + expected_length + 2), unicode ? MessageTemplate::kInvalidUnicodeEscapeSequence : MessageTemplate::kInvalidHexEscapeSequence); return -1; } x = x * 16 + d; Advance(); } return x; } template uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value, int beg_pos) { uc32 x = 0; int d = HexValue(c0_); if (d < 0) return -1; while (d >= 0) { x = x * 16 + d; if (x > max_value) { ReportScannerError(Location(beg_pos, source_pos() + 1), MessageTemplate::kUndefinedUnicodeCodePoint); return -1; } Advance(); d = HexValue(c0_); } return x; } Token::Value Scanner::Next() { // Rotate through tokens. TokenDesc* previous = current_; current_ = next_; // Either we already have the next token lined up, in which case next_next_ // simply becomes next_. In that case we use current_ as new next_next_ and // clear its token to indicate that it wasn't scanned yet. Otherwise we use // current_ as next_ and scan into it, leaving next_next_ uninitialized. if (V8_LIKELY(next_next().token == Token::UNINITIALIZED)) { next_ = previous; // User 'previous' instead of 'next_' because for some reason the compiler // thinks 'next_' could be modified before the entry into Scan. previous->after_line_terminator = false; Scan(previous); } else { next_ = next_next_; next_next_ = previous; previous->token = Token::UNINITIALIZED; DCHECK_NE(Token::UNINITIALIZED, current().token); } return current().token; } Token::Value Scanner::PeekAhead() { DCHECK(next().token != Token::DIV); DCHECK(next().token != Token::ASSIGN_DIV); if (next_next().token != Token::UNINITIALIZED) { return next_next().token; } TokenDesc* temp = next_; next_ = next_next_; next().after_line_terminator = false; Scan(); next_next_ = next_; next_ = temp; return next_next().token; } Token::Value Scanner::SkipSingleHTMLComment() { if (is_module_) { ReportScannerError(source_pos(), MessageTemplate::kHtmlCommentInModule); return Token::ILLEGAL; } return SkipSingleLineComment(); } Token::Value Scanner::SkipSingleLineComment() { // The line terminator at the end of the line is not considered // to be part of the single-line comment; it is recognized // separately by the lexical grammar and becomes part of the // stream of input elements for the syntactic grammar (see // ECMA-262, section 7.4). AdvanceUntil([](uc32 c0_) { return unibrow::IsLineTerminator(c0_); }); return Token::WHITESPACE; } Token::Value Scanner::SkipSourceURLComment() { TryToParseSourceURLComment(); if (unibrow::IsLineTerminator(c0_) || c0_ == kEndOfInput) { return Token::WHITESPACE; } return SkipSingleLineComment(); } void Scanner::TryToParseSourceURLComment() { // Magic comments are of the form: //[#@]\s=\s*\s*.* and this // function will just return if it cannot parse a magic comment. DCHECK(!IsWhiteSpaceOrLineTerminator(kEndOfInput)); if (!IsWhiteSpace(c0_)) return; Advance(); LiteralBuffer name; name.Start(); while (c0_ != kEndOfInput && !IsWhiteSpaceOrLineTerminator(c0_) && c0_ != '=') { name.AddChar(c0_); Advance(); } if (!name.is_one_byte()) return; Vector name_literal = name.one_byte_literal(); LiteralBuffer* value; if (name_literal == StaticCharVector("sourceURL")) { value = &source_url_; } else if (name_literal == StaticCharVector("sourceMappingURL")) { value = &source_mapping_url_; } else { return; } if (c0_ != '=') return; value->Start(); Advance(); while (IsWhiteSpace(c0_)) { Advance(); } while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) { // Disallowed characters. if (c0_ == '"' || c0_ == '\'') { value->Start(); return; } if (IsWhiteSpace(c0_)) { break; } value->AddChar(c0_); Advance(); } // Allow whitespace at the end. while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) { if (!IsWhiteSpace(c0_)) { value->Start(); break; } Advance(); } } Token::Value Scanner::SkipMultiLineComment() { DCHECK_EQ(c0_, '*'); // Until we see the first newline, check for * and newline characters. if (!next().after_line_terminator) { do { AdvanceUntil([](uc32 c0) { if (V8_UNLIKELY(static_cast(c0) > kMaxAscii)) { return unibrow::IsLineTerminator(c0); } uint8_t char_flags = character_scan_flags[c0]; return MultilineCommentCharacterNeedsSlowPath(char_flags); }); while (c0_ == '*') { Advance(); if (c0_ == '/') { Advance(); return Token::WHITESPACE; } } if (unibrow::IsLineTerminator(c0_)) { next().after_line_terminator = true; break; } } while (c0_ != kEndOfInput); } // After we've seen newline, simply try to find '*/'. while (c0_ != kEndOfInput) { AdvanceUntil([](uc32 c0) { return c0 == '*'; }); while (c0_ == '*') { Advance(); if (c0_ == '/') { Advance(); return Token::WHITESPACE; } } } return Token::ILLEGAL; } void Scanner::SkipHashBang() { if (c0_ == '#' && Peek() == '!' && source_pos() == 0) { SkipSingleLineComment(); Scan(); } } Token::Value Scanner::ScanHtmlComment() { // Check for