summaryrefslogtreecommitdiff
path: root/deps/v8/src/parsing/scanner.cc
diff options
context:
space:
mode:
Diffstat (limited to 'deps/v8/src/parsing/scanner.cc')
-rw-r--r--deps/v8/src/parsing/scanner.cc227
1 files changed, 92 insertions, 135 deletions
diff --git a/deps/v8/src/parsing/scanner.cc b/deps/v8/src/parsing/scanner.cc
index 525b1bc681..43fc589e88 100644
--- a/deps/v8/src/parsing/scanner.cc
+++ b/deps/v8/src/parsing/scanner.cc
@@ -13,16 +13,15 @@
#include "src/ast/ast-value-factory.h"
#include "src/conversions-inl.h"
#include "src/objects/bigint.h"
-#include "src/parsing/duplicate-finder.h" // For Scanner::FindSymbol
#include "src/parsing/scanner-inl.h"
+#include "src/zone/zone.h"
namespace v8 {
namespace internal {
class Scanner::ErrorState {
public:
- ErrorState(MessageTemplate::Template* message_stack,
- Scanner::Location* location_stack)
+ ErrorState(MessageTemplate* message_stack, Scanner::Location* location_stack)
: message_stack_(message_stack),
old_message_(*message_stack),
location_stack_(location_stack),
@@ -49,8 +48,8 @@ class Scanner::ErrorState {
}
private:
- MessageTemplate::Template* const message_stack_;
- MessageTemplate::Template const old_message_;
+ MessageTemplate* const message_stack_;
+ MessageTemplate const old_message_;
Scanner::Location* const location_stack_;
Scanner::Location const old_location_;
};
@@ -59,7 +58,6 @@ class Scanner::ErrorState {
// Scanner::LiteralBuffer
Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const {
- DCHECK(is_used_);
if (is_one_byte()) {
return isolate->factory()->InternalizeOneByteString(one_byte_literal());
}
@@ -67,20 +65,21 @@ Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const {
}
int Scanner::LiteralBuffer::NewCapacity(int min_capacity) {
- int capacity = Max(min_capacity, backing_store_.length());
- int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth);
- return new_capacity;
+ return min_capacity < (kMaxGrowth / (kGrowthFactor - 1))
+ ? min_capacity * kGrowthFactor
+ : min_capacity + kMaxGrowth;
}
void Scanner::LiteralBuffer::ExpandBuffer() {
- Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity));
+ int min_capacity = Max(kInitialCapacity, backing_store_.length());
+ Vector<byte> new_store = Vector<byte>::New(NewCapacity(min_capacity));
MemCopy(new_store.start(), backing_store_.start(), position_);
backing_store_.Dispose();
backing_store_ = new_store;
}
void Scanner::LiteralBuffer::ConvertToTwoByte() {
- DCHECK(is_one_byte_);
+ DCHECK(is_one_byte());
Vector<byte> new_store;
int new_content_size = position_ * kUC16Size;
if (new_content_size >= backing_store_.length()) {
@@ -104,7 +103,7 @@ void Scanner::LiteralBuffer::ConvertToTwoByte() {
}
void Scanner::LiteralBuffer::AddTwoByteChar(uc32 code_unit) {
- DCHECK(!is_one_byte_);
+ DCHECK(!is_one_byte());
if (position_ >= backing_store_.length()) ExpandBuffer();
if (code_unit <=
static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
@@ -124,36 +123,23 @@ void Scanner::LiteralBuffer::AddTwoByteChar(uc32 code_unit) {
// ----------------------------------------------------------------------------
// Scanner::BookmarkScope
-const size_t Scanner::BookmarkScope::kBookmarkAtFirstPos =
- std::numeric_limits<size_t>::max() - 2;
const size_t Scanner::BookmarkScope::kNoBookmark =
std::numeric_limits<size_t>::max() - 1;
const size_t Scanner::BookmarkScope::kBookmarkWasApplied =
std::numeric_limits<size_t>::max();
-void Scanner::BookmarkScope::Set() {
+void Scanner::BookmarkScope::Set(size_t position) {
DCHECK_EQ(bookmark_, kNoBookmark);
-
- // The first token is a bit special, since current_ will still be
- // uninitialized. In this case, store kBookmarkAtFirstPos and special-case it
- // when
- // applying the bookmark.
- DCHECK_IMPLIES(scanner_->current().token == Token::UNINITIALIZED,
- scanner_->current().location.beg_pos ==
- scanner_->next().location.beg_pos);
- bookmark_ = (scanner_->current().token == Token::UNINITIALIZED)
- ? kBookmarkAtFirstPos
- : scanner_->location().beg_pos;
+ bookmark_ = position;
}
void Scanner::BookmarkScope::Apply() {
DCHECK(HasBeenSet()); // Caller hasn't called SetBookmark.
- if (bookmark_ == kBookmarkAtFirstPos) {
- scanner_->SeekNext(0);
+ if (had_parser_error_) {
+ scanner_->set_parser_error();
} else {
+ scanner_->reset_parser_error_flag();
scanner_->SeekNext(bookmark_);
- scanner_->Next();
- DCHECK_EQ(scanner_->location().beg_pos, static_cast<int>(bookmark_));
}
bookmark_ = kBookmarkWasApplied;
}
@@ -169,10 +155,8 @@ bool Scanner::BookmarkScope::HasBeenApplied() const {
// ----------------------------------------------------------------------------
// Scanner
-Scanner::Scanner(UnicodeCache* unicode_cache, Utf16CharacterStream* source,
- bool is_module)
- : unicode_cache_(unicode_cache),
- source_(source),
+Scanner::Scanner(Utf16CharacterStream* source, bool is_module)
+ : source_(source),
found_html_comment_(false),
allow_harmony_numeric_separator_(false),
is_module_(is_module),
@@ -241,13 +225,14 @@ Token::Value Scanner::Next() {
// current_ as next_ and scan into it, leaving next_next_ uninitialized.
if (V8_LIKELY(next_next().token == Token::UNINITIALIZED)) {
next_ = previous;
- next().after_line_terminator = false;
- Scan();
+ // User 'previous' instead of 'next_' because for some reason the compiler
+ // thinks 'next_' could be modified before the entry into Scan.
+ previous->after_line_terminator = false;
+ Scan(previous);
} else {
next_ = next_next_;
next_next_ = previous;
previous->token = Token::UNINITIALIZED;
- previous->contextual_token = Token::UNINITIALIZED;
DCHECK_NE(Token::UNINITIALIZED, current().token);
}
return current().token;
@@ -300,42 +285,41 @@ Token::Value Scanner::SkipSourceURLComment() {
void Scanner::TryToParseSourceURLComment() {
// Magic comments are of the form: //[#@]\s<name>=\s*<value>\s*.* and this
// function will just return if it cannot parse a magic comment.
- DCHECK(!unicode_cache_->IsWhiteSpaceOrLineTerminator(kEndOfInput));
- if (!unicode_cache_->IsWhiteSpace(c0_)) return;
+ DCHECK(!IsWhiteSpaceOrLineTerminator(kEndOfInput));
+ if (!IsWhiteSpace(c0_)) return;
Advance();
LiteralBuffer name;
name.Start();
- while (c0_ != kEndOfInput &&
- !unicode_cache_->IsWhiteSpaceOrLineTerminator(c0_) && c0_ != '=') {
+ while (c0_ != kEndOfInput && !IsWhiteSpaceOrLineTerminator(c0_) &&
+ c0_ != '=') {
name.AddChar(c0_);
Advance();
}
if (!name.is_one_byte()) return;
Vector<const uint8_t> name_literal = name.one_byte_literal();
LiteralBuffer* value;
- if (name_literal == STATIC_CHAR_VECTOR("sourceURL")) {
+ if (name_literal == StaticCharVector("sourceURL")) {
value = &source_url_;
- } else if (name_literal == STATIC_CHAR_VECTOR("sourceMappingURL")) {
+ } else if (name_literal == StaticCharVector("sourceMappingURL")) {
value = &source_mapping_url_;
} else {
return;
}
if (c0_ != '=')
return;
- value->Drop();
value->Start();
Advance();
- while (unicode_cache_->IsWhiteSpace(c0_)) {
+ while (IsWhiteSpace(c0_)) {
Advance();
}
while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
// Disallowed characters.
if (c0_ == '"' || c0_ == '\'') {
- value->Drop();
+ value->Start();
return;
}
- if (unicode_cache_->IsWhiteSpace(c0_)) {
+ if (IsWhiteSpace(c0_)) {
break;
}
value->AddChar(c0_);
@@ -343,8 +327,8 @@ void Scanner::TryToParseSourceURLComment() {
}
// Allow whitespace at the end.
while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
- if (!unicode_cache_->IsWhiteSpace(c0_)) {
- value->Drop();
+ if (!IsWhiteSpace(c0_)) {
+ value->Start();
break;
}
Advance();
@@ -377,6 +361,13 @@ Token::Value Scanner::SkipMultiLineComment() {
return Token::ILLEGAL;
}
+void Scanner::SkipHashBang() {
+ if (c0_ == '#' && Peek() == '!' && source_pos() == 0) {
+ SkipSingleLineComment();
+ Scan();
+ }
+}
+
Token::Value Scanner::ScanHtmlComment() {
// Check for <!-- comments.
DCHECK_EQ(c0_, '!');
@@ -393,51 +384,20 @@ Token::Value Scanner::ScanHtmlComment() {
#ifdef DEBUG
void Scanner::SanityCheckTokenDesc(const TokenDesc& token) const {
- // Most tokens should not have literal_chars or even raw_literal chars.
- // The rules are:
- // - UNINITIALIZED: we don't care.
- // - TEMPLATE_*: need both literal + raw literal chars.
- // - IDENTIFIERS, STRINGS, etc.: need a literal, but no raw literal.
- // - all others: should have neither.
- // Furthermore, only TEMPLATE_* tokens can have a
- // invalid_template_escape_message.
+ // Only TEMPLATE_* tokens can have a invalid_template_escape_message.
+ // ILLEGAL and UNINITIALIZED can have garbage for the field.
switch (token.token) {
case Token::UNINITIALIZED:
+ case Token::ILLEGAL:
// token.literal_chars & other members might be garbage. That's ok.
- break;
case Token::TEMPLATE_SPAN:
case Token::TEMPLATE_TAIL:
- DCHECK(token.raw_literal_chars.is_used());
- DCHECK(token.literal_chars.is_used());
- break;
- case Token::ESCAPED_KEYWORD:
- case Token::ESCAPED_STRICT_RESERVED_WORD:
- case Token::FUTURE_STRICT_RESERVED_WORD:
- case Token::IDENTIFIER:
- case Token::NUMBER:
- case Token::BIGINT:
- case Token::REGEXP_LITERAL:
- case Token::SMI:
- case Token::STRING:
- case Token::PRIVATE_NAME:
- DCHECK(token.literal_chars.is_used());
- DCHECK(!token.raw_literal_chars.is_used());
- DCHECK_EQ(token.invalid_template_escape_message, MessageTemplate::kNone);
break;
default:
- DCHECK(!token.literal_chars.is_used());
- DCHECK(!token.raw_literal_chars.is_used());
DCHECK_EQ(token.invalid_template_escape_message, MessageTemplate::kNone);
break;
}
-
- DCHECK_IMPLIES(token.token != Token::IDENTIFIER,
- token.contextual_token == Token::UNINITIALIZED);
- DCHECK_IMPLIES(token.contextual_token != Token::UNINITIALIZED,
- token.token == Token::IDENTIFIER &&
- Token::IsContextualKeyword(token.contextual_token));
- DCHECK(!Token::IsContextualKeyword(token.token));
}
#endif // DEBUG
@@ -541,24 +501,45 @@ Token::Value Scanner::ScanString() {
uc32 quote = c0_;
Advance(); // consume quote
- LiteralScope literal(this);
+ next().literal_chars.Start();
while (true) {
+ if (V8_UNLIKELY(c0_ == kEndOfInput)) return Token::ILLEGAL;
+ if ((V8_UNLIKELY(static_cast<uint32_t>(c0_) >= kMaxAscii) &&
+ !unibrow::IsStringLiteralLineTerminator(c0_)) ||
+ !MayTerminateString(character_scan_flags[c0_])) {
+ AddLiteralChar(c0_);
+ AdvanceUntil([this](uc32 c0) {
+ if (V8_UNLIKELY(static_cast<uint32_t>(c0) > kMaxAscii)) {
+ if (V8_UNLIKELY(unibrow::IsStringLiteralLineTerminator(c0))) {
+ return true;
+ }
+ AddLiteralChar(c0);
+ return false;
+ }
+ uint8_t char_flags = character_scan_flags[c0];
+ if (MayTerminateString(char_flags)) return true;
+ AddLiteralChar(c0);
+ return false;
+ });
+ }
if (c0_ == quote) {
- literal.Complete();
Advance();
return Token::STRING;
}
- if (c0_ == kEndOfInput || unibrow::IsStringLiteralLineTerminator(c0_)) {
- return Token::ILLEGAL;
- }
if (c0_ == '\\') {
Advance();
// TODO(verwaest): Check whether we can remove the additional check.
- if (c0_ == kEndOfInput || !ScanEscape<false>()) {
+ if (V8_UNLIKELY(c0_ == kEndOfInput || !ScanEscape<false>())) {
return Token::ILLEGAL;
}
continue;
}
+ if (V8_UNLIKELY(c0_ == kEndOfInput ||
+ unibrow::IsStringLiteralLineTerminator(c0_))) {
+ return Token::ILLEGAL;
+ }
+ DCHECK_NE(quote, c0_);
+ DCHECK((c0_ == '\'' || c0_ == '"'));
AddLiteralCharAdvance();
}
}
@@ -570,17 +551,17 @@ Token::Value Scanner::ScanPrivateName() {
return Token::ILLEGAL;
}
- LiteralScope literal(this);
+ next().literal_chars.Start();
DCHECK_EQ(c0_, '#');
- DCHECK(!unicode_cache_->IsIdentifierStart(kEndOfInput));
- if (!unicode_cache_->IsIdentifierStart(Peek())) {
+ DCHECK(!IsIdentifierStart(kEndOfInput));
+ if (!IsIdentifierStart(Peek())) {
ReportScannerError(source_pos(),
MessageTemplate::kInvalidOrUnexpectedToken);
return Token::ILLEGAL;
}
AddLiteralCharAdvance();
- Token::Value token = ScanIdentifierOrKeywordInner(&literal);
+ Token::Value token = ScanIdentifierOrKeywordInner();
return token == Token::ILLEGAL ? Token::ILLEGAL : Token::PRIVATE_NAME;
}
@@ -605,7 +586,7 @@ Token::Value Scanner::ScanTemplateSpan() {
ErrorState octal_error_state(&octal_message_, &octal_pos_);
Token::Value result = Token::TEMPLATE_SPAN;
- LiteralScope literal(this);
+ next().literal_chars.Start();
next().raw_literal_chars.Start();
const bool capture_raw = true;
while (true) {
@@ -658,10 +639,8 @@ Token::Value Scanner::ScanTemplateSpan() {
AddLiteralChar(c);
}
}
- literal.Complete();
next().location.end_pos = source_pos();
next().token = result;
- next().contextual_token = Token::UNINITIALIZED;
return result;
}
@@ -669,7 +648,6 @@ Token::Value Scanner::ScanTemplateSpan() {
Handle<String> Scanner::SourceUrl(Isolate* isolate) const {
Handle<String> tmp;
if (source_url_.length() > 0) {
- DCHECK(source_url_.is_used());
tmp = source_url_.Internalize(isolate);
}
return tmp;
@@ -678,7 +656,6 @@ Handle<String> Scanner::SourceUrl(Isolate* isolate) const {
Handle<String> Scanner::SourceMappingUrl(Isolate* isolate) const {
Handle<String> tmp;
if (source_mapping_url_.length() > 0) {
- DCHECK(source_mapping_url_.is_used());
tmp = source_mapping_url_.Internalize(isolate);
}
return tmp;
@@ -847,7 +824,7 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
NumberKind kind = DECIMAL;
- LiteralScope literal(this);
+ next().literal_chars.Start();
bool at_start = !seen_period;
int start_pos = source_pos(); // For reporting octal positions.
if (seen_period) {
@@ -905,10 +882,8 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
}
if (next().literal_chars.one_byte_literal().length() <= 10 &&
- value <= Smi::kMaxValue && c0_ != '.' &&
- !unicode_cache_->IsIdentifierStart(c0_)) {
+ value <= Smi::kMaxValue && c0_ != '.' && !IsIdentifierStart(c0_)) {
next().smi_value_ = static_cast<uint32_t>(value);
- literal.Complete();
if (kind == DECIMAL_WITH_LEADING_ZERO) {
octal_pos_ = Location(start_pos, source_pos());
@@ -963,12 +938,10 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
// not be an identifier start or a decimal digit; see ECMA-262
// section 7.8.3, page 17 (note that we read only one decimal digit
// if the value is 0).
- if (IsDecimalDigit(c0_) || unicode_cache_->IsIdentifierStart(c0_)) {
+ if (IsDecimalDigit(c0_) || IsIdentifierStart(c0_)) {
return Token::ILLEGAL;
}
- literal.Complete();
-
if (kind == DECIMAL_WITH_LEADING_ZERO) {
octal_pos_ = Location(start_pos, source_pos());
octal_message_ = MessageTemplate::kStrictDecimalWithLeadingZero;
@@ -1004,54 +977,49 @@ uc32 Scanner::ScanUnicodeEscape() {
return ScanHexNumber<capture_raw, unicode>(4);
}
-Token::Value Scanner::ScanIdentifierOrKeywordInnerSlow(LiteralScope* literal,
- bool escaped) {
+Token::Value Scanner::ScanIdentifierOrKeywordInnerSlow(bool escaped,
+ bool can_be_keyword) {
while (true) {
if (c0_ == '\\') {
escaped = true;
uc32 c = ScanIdentifierUnicodeEscape();
// Only allow legal identifier part characters.
// TODO(verwaest): Make this true.
- // DCHECK(!unicode_cache_->IsIdentifierPart('\\'));
- DCHECK(!unicode_cache_->IsIdentifierPart(-1));
- if (c == '\\' || !unicode_cache_->IsIdentifierPart(c)) {
+ // DCHECK(!IsIdentifierPart('\'));
+ DCHECK(!IsIdentifierPart(-1));
+ if (c == '\\' || !IsIdentifierPart(c)) {
return Token::ILLEGAL;
}
+ can_be_keyword = can_be_keyword && CharCanBeKeyword(c);
AddLiteralChar(c);
- } else if (unicode_cache_->IsIdentifierPart(c0_) ||
- (CombineSurrogatePair() &&
- unicode_cache_->IsIdentifierPart(c0_))) {
+ } else if (IsIdentifierPart(c0_) ||
+ (CombineSurrogatePair() && IsIdentifierPart(c0_))) {
+ can_be_keyword = can_be_keyword && CharCanBeKeyword(c0_);
AddLiteralCharAdvance();
} else {
break;
}
}
- if (next().literal_chars.is_one_byte()) {
+ if (can_be_keyword && next().literal_chars.is_one_byte()) {
Vector<const uint8_t> chars = next().literal_chars.one_byte_literal();
Token::Value token =
KeywordOrIdentifierToken(chars.start(), chars.length());
/* TODO(adamk): YIELD should be handled specially. */
if (token == Token::FUTURE_STRICT_RESERVED_WORD) {
- literal->Complete();
if (escaped) return Token::ESCAPED_STRICT_RESERVED_WORD;
return token;
}
- if (token == Token::IDENTIFIER || Token::IsContextualKeyword(token)) {
- literal->Complete();
- return token;
- }
+ if (token == Token::IDENTIFIER) return token;
if (!escaped) return token;
- literal->Complete();
if (token == Token::LET || token == Token::STATIC) {
return Token::ESCAPED_STRICT_RESERVED_WORD;
}
return Token::ESCAPED_KEYWORD;
}
- literal->Complete();
return Token::IDENTIFIER;
}
@@ -1065,7 +1033,7 @@ bool Scanner::ScanRegExpPattern() {
// Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
// the scanner should pass uninterpreted bodies to the RegExp
// constructor.
- LiteralScope literal(this);
+ next().literal_chars.Start();
if (next().token == Token::ASSIGN_DIV) {
AddLiteralChar('=');
}
@@ -1098,9 +1066,7 @@ bool Scanner::ScanRegExpPattern() {
}
Advance(); // consume '/'
- literal.Complete();
next().token = Token::REGEXP_LITERAL;
- next().contextual_token = Token::UNINITIALIZED;
return true;
}
@@ -1110,7 +1076,7 @@ Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() {
// Scan regular expression flags.
int flags = 0;
- while (unicode_cache_->IsIdentifierPart(c0_)) {
+ while (IsIdentifierPart(c0_)) {
RegExp::Flags flag = RegExp::kNone;
switch (c0_) {
case 'g':
@@ -1173,7 +1139,6 @@ const AstRawString* Scanner::CurrentRawSymbol(
double Scanner::DoubleValue() {
DCHECK(is_literal_one_byte());
return StringToDouble(
- unicode_cache_,
literal_one_byte_string(),
ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY);
}
@@ -1188,14 +1153,6 @@ const char* Scanner::CurrentLiteralAsCString(Zone* zone) const {
return buffer;
}
-bool Scanner::IsDuplicateSymbol(DuplicateFinder* duplicate_finder,
- AstValueFactory* ast_value_factory) const {
- DCHECK_NOT_NULL(duplicate_finder);
- DCHECK_NOT_NULL(ast_value_factory);
- const AstRawString* string = CurrentSymbol(ast_value_factory);
- return !duplicate_finder->known_symbols_.insert(string).second;
-}
-
void Scanner::SeekNext(size_t position) {
// Use with care: This cleanly resets most, but not all scanner state.
// TODO(vogelheim): Fix this, or at least DCHECK the relevant conditions.
@@ -1206,7 +1163,7 @@ void Scanner::SeekNext(size_t position) {
// current_ will remain unchanged, so overwrite it fully.)
for (TokenDesc& token : token_storage_) {
token.token = Token::UNINITIALIZED;
- token.contextual_token = Token::UNINITIALIZED;
+ token.invalid_template_escape_message = MessageTemplate::kNone;
}
// 2, reset the source to the desired position,
source_->Seek(position);