summaryrefslogtreecommitdiff
path: root/deps/v8/src/parsing/scanner-character-streams.cc
diff options
context:
space:
mode:
Diffstat (limited to 'deps/v8/src/parsing/scanner-character-streams.cc')
-rw-r--r--deps/v8/src/parsing/scanner-character-streams.cc57
1 files changed, 31 insertions, 26 deletions
diff --git a/deps/v8/src/parsing/scanner-character-streams.cc b/deps/v8/src/parsing/scanner-character-streams.cc
index c5175c4de7..20aa5c9f8e 100644
--- a/deps/v8/src/parsing/scanner-character-streams.cc
+++ b/deps/v8/src/parsing/scanner-character-streams.cc
@@ -16,7 +16,7 @@ namespace v8 {
namespace internal {
namespace {
-const unibrow::uchar kUtf8Bom = 0xfeff;
+const unibrow::uchar kUtf8Bom = 0xFEFF;
} // namespace
// ----------------------------------------------------------------------------
@@ -203,7 +203,7 @@ class Utf8ExternalStreamingStream : public BufferedUtf16CharacterStream {
Utf8ExternalStreamingStream(
ScriptCompiler::ExternalSourceStream* source_stream,
RuntimeCallStats* stats)
- : current_({0, {0, 0, unibrow::Utf8::Utf8IncrementalBuffer(0)}}),
+ : current_({0, {0, 0, 0, unibrow::Utf8::State::kAccept}}),
source_stream_(source_stream),
stats_(stats) {}
~Utf8ExternalStreamingStream() override {
@@ -223,7 +223,8 @@ class Utf8ExternalStreamingStream : public BufferedUtf16CharacterStream {
struct StreamPosition {
size_t bytes;
size_t chars;
- unibrow::Utf8::Utf8IncrementalBuffer incomplete_char;
+ uint32_t incomplete_char;
+ unibrow::Utf8::State state;
};
// Position contains a StreamPosition and the index of the chunk the position
@@ -268,25 +269,25 @@ bool Utf8ExternalStreamingStream::SkipToPosition(size_t position) {
const Chunk& chunk = chunks_[current_.chunk_no];
DCHECK(current_.pos.bytes >= chunk.start.bytes);
- unibrow::Utf8::Utf8IncrementalBuffer incomplete_char =
- chunk.start.incomplete_char;
+ unibrow::Utf8::State state = chunk.start.state;
+ uint32_t incomplete_char = chunk.start.incomplete_char;
size_t it = current_.pos.bytes - chunk.start.bytes;
size_t chars = chunk.start.chars;
while (it < chunk.length && chars < position) {
- unibrow::uchar t =
- unibrow::Utf8::ValueOfIncremental(chunk.data[it], &incomplete_char);
+ unibrow::uchar t = unibrow::Utf8::ValueOfIncremental(
+ chunk.data[it], &it, &state, &incomplete_char);
if (t == kUtf8Bom && current_.pos.chars == 0) {
// BOM detected at beginning of the stream. Don't copy it.
} else if (t != unibrow::Utf8::kIncomplete) {
chars++;
if (t > unibrow::Utf16::kMaxNonSurrogateCharCode) chars++;
}
- it++;
}
current_.pos.bytes += it;
current_.pos.chars = chars;
current_.pos.incomplete_char = incomplete_char;
+ current_.pos.state = state;
current_.chunk_no += (it == chunk.length);
return current_.pos.chars == position;
@@ -304,31 +305,33 @@ void Utf8ExternalStreamingStream::FillBufferFromCurrentChunk() {
uint16_t* cursor = buffer_ + (buffer_end_ - buffer_start_);
DCHECK_EQ(cursor, buffer_end_);
+ unibrow::Utf8::State state = current_.pos.state;
+ uint32_t incomplete_char = current_.pos.incomplete_char;
+
// If the current chunk is the last (empty) chunk we'll have to process
// any left-over, partial characters.
if (chunk.length == 0) {
- unibrow::uchar t =
- unibrow::Utf8::ValueOfIncrementalFinish(&current_.pos.incomplete_char);
+ unibrow::uchar t = unibrow::Utf8::ValueOfIncrementalFinish(&state);
if (t != unibrow::Utf8::kBufferEmpty) {
- DCHECK_LT(t, unibrow::Utf16::kMaxNonSurrogateCharCode);
+ DCHECK_EQ(t, unibrow::Utf8::kBadChar);
*cursor = static_cast<uc16>(t);
buffer_end_++;
current_.pos.chars++;
+ current_.pos.incomplete_char = 0;
+ current_.pos.state = state;
}
return;
}
- unibrow::Utf8::Utf8IncrementalBuffer incomplete_char =
- current_.pos.incomplete_char;
- size_t it;
- for (it = current_.pos.bytes - chunk.start.bytes;
- it < chunk.length && cursor + 1 < buffer_start_ + kBufferSize; it++) {
- unibrow::uchar t =
- unibrow::Utf8::ValueOfIncremental(chunk.data[it], &incomplete_char);
- if (t == unibrow::Utf8::kIncomplete) continue;
+ size_t it = current_.pos.bytes - chunk.start.bytes;
+ while (it < chunk.length && cursor + 1 < buffer_start_ + kBufferSize) {
+ unibrow::uchar t = unibrow::Utf8::ValueOfIncremental(
+ chunk.data[it], &it, &state, &incomplete_char);
if (V8_LIKELY(t < kUtf8Bom)) {
*(cursor++) = static_cast<uc16>(t); // The by most frequent case.
- } else if (t == kUtf8Bom && current_.pos.bytes + it == 2) {
+ } else if (t == unibrow::Utf8::kIncomplete) {
+ continue;
+ } else if (t == kUtf8Bom && current_.pos.bytes + it == 3) {
// BOM detected at beginning of the stream. Don't copy it.
} else if (t <= unibrow::Utf16::kMaxNonSurrogateCharCode) {
*(cursor++) = static_cast<uc16>(t);
@@ -341,13 +344,15 @@ void Utf8ExternalStreamingStream::FillBufferFromCurrentChunk() {
current_.pos.bytes = chunk.start.bytes + it;
current_.pos.chars += (cursor - buffer_end_);
current_.pos.incomplete_char = incomplete_char;
+ current_.pos.state = state;
current_.chunk_no += (it == chunk.length);
buffer_end_ = cursor;
}
bool Utf8ExternalStreamingStream::FetchChunk() {
- RuntimeCallTimerScope scope(stats_, &RuntimeCallStats::GetMoreDataCallback);
+ RuntimeCallTimerScope scope(stats_,
+ RuntimeCallCounterId::kGetMoreDataCallback);
DCHECK_EQ(current_.chunk_no, chunks_.size());
DCHECK(chunks_.empty() || chunks_.back().length != 0);
@@ -395,16 +400,15 @@ void Utf8ExternalStreamingStream::SearchPosition(size_t position) {
// checking whether the # bytes in a chunk are equal to the # chars, and if
// so avoid the expensive SkipToPosition.)
bool ascii_only_chunk =
- chunks_[chunk_no].start.incomplete_char ==
- unibrow::Utf8::Utf8IncrementalBuffer(0) &&
+ chunks_[chunk_no].start.incomplete_char == 0 &&
(chunks_[chunk_no + 1].start.bytes - chunks_[chunk_no].start.bytes) ==
(chunks_[chunk_no + 1].start.chars - chunks_[chunk_no].start.chars);
if (ascii_only_chunk) {
size_t skip = position - chunks_[chunk_no].start.chars;
current_ = {chunk_no,
{chunks_[chunk_no].start.bytes + skip,
- chunks_[chunk_no].start.chars + skip,
- unibrow::Utf8::Utf8IncrementalBuffer(0)}};
+ chunks_[chunk_no].start.chars + skip, 0,
+ unibrow::Utf8::State::kAccept}};
} else {
current_ = {chunk_no, chunks_[chunk_no].start};
SkipToPosition(position);
@@ -491,7 +495,8 @@ size_t FindChunk(Chunks& chunks, ScriptCompiler::ExternalSourceStream* source,
// Get more data if needed. We usually won't enter the loop body.
bool out_of_data = !chunks.empty() && chunks.back().byte_length == 0;
{
- RuntimeCallTimerScope scope(stats, &RuntimeCallStats::GetMoreDataCallback);
+ RuntimeCallTimerScope scope(stats,
+ RuntimeCallCounterId::kGetMoreDataCallback);
while (!out_of_data && end_pos <= position + 1) {
const uint8_t* chunk = nullptr;
size_t len = source->GetMoreData(&chunk);