diff options
Diffstat (limited to 'deps/v8/src/parsing/scanner-character-streams.cc')
-rw-r--r-- | deps/v8/src/parsing/scanner-character-streams.cc | 57 |
1 files changed, 31 insertions, 26 deletions
diff --git a/deps/v8/src/parsing/scanner-character-streams.cc b/deps/v8/src/parsing/scanner-character-streams.cc index c5175c4de7..20aa5c9f8e 100644 --- a/deps/v8/src/parsing/scanner-character-streams.cc +++ b/deps/v8/src/parsing/scanner-character-streams.cc @@ -16,7 +16,7 @@ namespace v8 { namespace internal { namespace { -const unibrow::uchar kUtf8Bom = 0xfeff; +const unibrow::uchar kUtf8Bom = 0xFEFF; } // namespace // ---------------------------------------------------------------------------- @@ -203,7 +203,7 @@ class Utf8ExternalStreamingStream : public BufferedUtf16CharacterStream { Utf8ExternalStreamingStream( ScriptCompiler::ExternalSourceStream* source_stream, RuntimeCallStats* stats) - : current_({0, {0, 0, unibrow::Utf8::Utf8IncrementalBuffer(0)}}), + : current_({0, {0, 0, 0, unibrow::Utf8::State::kAccept}}), source_stream_(source_stream), stats_(stats) {} ~Utf8ExternalStreamingStream() override { @@ -223,7 +223,8 @@ class Utf8ExternalStreamingStream : public BufferedUtf16CharacterStream { struct StreamPosition { size_t bytes; size_t chars; - unibrow::Utf8::Utf8IncrementalBuffer incomplete_char; + uint32_t incomplete_char; + unibrow::Utf8::State state; }; // Position contains a StreamPosition and the index of the chunk the position @@ -268,25 +269,25 @@ bool Utf8ExternalStreamingStream::SkipToPosition(size_t position) { const Chunk& chunk = chunks_[current_.chunk_no]; DCHECK(current_.pos.bytes >= chunk.start.bytes); - unibrow::Utf8::Utf8IncrementalBuffer incomplete_char = - chunk.start.incomplete_char; + unibrow::Utf8::State state = chunk.start.state; + uint32_t incomplete_char = chunk.start.incomplete_char; size_t it = current_.pos.bytes - chunk.start.bytes; size_t chars = chunk.start.chars; while (it < chunk.length && chars < position) { - unibrow::uchar t = - unibrow::Utf8::ValueOfIncremental(chunk.data[it], &incomplete_char); + unibrow::uchar t = unibrow::Utf8::ValueOfIncremental( + chunk.data[it], &it, &state, &incomplete_char); if (t == kUtf8Bom && current_.pos.chars == 0) { // BOM detected at beginning of the stream. Don't copy it. } else if (t != unibrow::Utf8::kIncomplete) { chars++; if (t > unibrow::Utf16::kMaxNonSurrogateCharCode) chars++; } - it++; } current_.pos.bytes += it; current_.pos.chars = chars; current_.pos.incomplete_char = incomplete_char; + current_.pos.state = state; current_.chunk_no += (it == chunk.length); return current_.pos.chars == position; @@ -304,31 +305,33 @@ void Utf8ExternalStreamingStream::FillBufferFromCurrentChunk() { uint16_t* cursor = buffer_ + (buffer_end_ - buffer_start_); DCHECK_EQ(cursor, buffer_end_); + unibrow::Utf8::State state = current_.pos.state; + uint32_t incomplete_char = current_.pos.incomplete_char; + // If the current chunk is the last (empty) chunk we'll have to process // any left-over, partial characters. if (chunk.length == 0) { - unibrow::uchar t = - unibrow::Utf8::ValueOfIncrementalFinish(¤t_.pos.incomplete_char); + unibrow::uchar t = unibrow::Utf8::ValueOfIncrementalFinish(&state); if (t != unibrow::Utf8::kBufferEmpty) { - DCHECK_LT(t, unibrow::Utf16::kMaxNonSurrogateCharCode); + DCHECK_EQ(t, unibrow::Utf8::kBadChar); *cursor = static_cast<uc16>(t); buffer_end_++; current_.pos.chars++; + current_.pos.incomplete_char = 0; + current_.pos.state = state; } return; } - unibrow::Utf8::Utf8IncrementalBuffer incomplete_char = - current_.pos.incomplete_char; - size_t it; - for (it = current_.pos.bytes - chunk.start.bytes; - it < chunk.length && cursor + 1 < buffer_start_ + kBufferSize; it++) { - unibrow::uchar t = - unibrow::Utf8::ValueOfIncremental(chunk.data[it], &incomplete_char); - if (t == unibrow::Utf8::kIncomplete) continue; + size_t it = current_.pos.bytes - chunk.start.bytes; + while (it < chunk.length && cursor + 1 < buffer_start_ + kBufferSize) { + unibrow::uchar t = unibrow::Utf8::ValueOfIncremental( + chunk.data[it], &it, &state, &incomplete_char); if (V8_LIKELY(t < kUtf8Bom)) { *(cursor++) = static_cast<uc16>(t); // The by most frequent case. - } else if (t == kUtf8Bom && current_.pos.bytes + it == 2) { + } else if (t == unibrow::Utf8::kIncomplete) { + continue; + } else if (t == kUtf8Bom && current_.pos.bytes + it == 3) { // BOM detected at beginning of the stream. Don't copy it. } else if (t <= unibrow::Utf16::kMaxNonSurrogateCharCode) { *(cursor++) = static_cast<uc16>(t); @@ -341,13 +344,15 @@ void Utf8ExternalStreamingStream::FillBufferFromCurrentChunk() { current_.pos.bytes = chunk.start.bytes + it; current_.pos.chars += (cursor - buffer_end_); current_.pos.incomplete_char = incomplete_char; + current_.pos.state = state; current_.chunk_no += (it == chunk.length); buffer_end_ = cursor; } bool Utf8ExternalStreamingStream::FetchChunk() { - RuntimeCallTimerScope scope(stats_, &RuntimeCallStats::GetMoreDataCallback); + RuntimeCallTimerScope scope(stats_, + RuntimeCallCounterId::kGetMoreDataCallback); DCHECK_EQ(current_.chunk_no, chunks_.size()); DCHECK(chunks_.empty() || chunks_.back().length != 0); @@ -395,16 +400,15 @@ void Utf8ExternalStreamingStream::SearchPosition(size_t position) { // checking whether the # bytes in a chunk are equal to the # chars, and if // so avoid the expensive SkipToPosition.) bool ascii_only_chunk = - chunks_[chunk_no].start.incomplete_char == - unibrow::Utf8::Utf8IncrementalBuffer(0) && + chunks_[chunk_no].start.incomplete_char == 0 && (chunks_[chunk_no + 1].start.bytes - chunks_[chunk_no].start.bytes) == (chunks_[chunk_no + 1].start.chars - chunks_[chunk_no].start.chars); if (ascii_only_chunk) { size_t skip = position - chunks_[chunk_no].start.chars; current_ = {chunk_no, {chunks_[chunk_no].start.bytes + skip, - chunks_[chunk_no].start.chars + skip, - unibrow::Utf8::Utf8IncrementalBuffer(0)}}; + chunks_[chunk_no].start.chars + skip, 0, + unibrow::Utf8::State::kAccept}}; } else { current_ = {chunk_no, chunks_[chunk_no].start}; SkipToPosition(position); @@ -491,7 +495,8 @@ size_t FindChunk(Chunks& chunks, ScriptCompiler::ExternalSourceStream* source, // Get more data if needed. We usually won't enter the loop body. bool out_of_data = !chunks.empty() && chunks.back().byte_length == 0; { - RuntimeCallTimerScope scope(stats, &RuntimeCallStats::GetMoreDataCallback); + RuntimeCallTimerScope scope(stats, + RuntimeCallCounterId::kGetMoreDataCallback); while (!out_of_data && end_pos <= position + 1) { const uint8_t* chunk = nullptr; size_t len = source->GetMoreData(&chunk); |