diff options
Diffstat (limited to 'deps/v8/src/parsing/scanner-character-streams.cc')
-rw-r--r-- | deps/v8/src/parsing/scanner-character-streams.cc | 253 |
1 files changed, 88 insertions, 165 deletions
diff --git a/deps/v8/src/parsing/scanner-character-streams.cc b/deps/v8/src/parsing/scanner-character-streams.cc index 91ed54f7be..7cdef87c9c 100644 --- a/deps/v8/src/parsing/scanner-character-streams.cc +++ b/deps/v8/src/parsing/scanner-character-streams.cc @@ -8,7 +8,7 @@ #include "src/globals.h" #include "src/handles.h" #include "src/list-inl.h" // TODO(mstarzinger): Temporary cycle breaker! -#include "src/objects.h" +#include "src/objects-inl.h" #include "src/unicode-inl.h" namespace v8 { @@ -16,6 +16,33 @@ namespace internal { namespace { +size_t CopyUtf8CharsToUtf16Chars(uint16_t* dest, size_t length, const byte* src, + size_t* src_pos, size_t src_length) { + static const unibrow::uchar kMaxUtf16Character = + unibrow::Utf16::kMaxNonSurrogateCharCode; + size_t i = 0; + // Because of the UTF-16 lead and trail surrogates, we stop filling the buffer + // one character early (in the normal case), because we need to have at least + // two free spaces in the buffer to be sure that the next character will fit. + while (i < length - 1) { + if (*src_pos == src_length) break; + unibrow::uchar c = src[*src_pos]; + if (c <= unibrow::Utf8::kMaxOneByteChar) { + *src_pos = *src_pos + 1; + } else { + c = unibrow::Utf8::CalculateValue(src + *src_pos, src_length - *src_pos, + src_pos); + } + if (c > kMaxUtf16Character) { + dest[i++] = unibrow::Utf16::LeadSurrogate(c); + dest[i++] = unibrow::Utf16::TrailSurrogate(c); + } else { + dest[i++] = static_cast<uc16>(c); + } + } + return i; +} + size_t CopyCharsHelper(uint16_t* dest, size_t length, const uint8_t* src, size_t* src_pos, size_t src_length, ScriptCompiler::StreamedSource::Encoding encoding) { @@ -24,8 +51,7 @@ size_t CopyCharsHelper(uint16_t* dest, size_t length, const uint8_t* src, if (length == 0) return 0; if (encoding == ScriptCompiler::StreamedSource::UTF8) { - return v8::internal::Utf8ToUtf16CharacterStream::CopyChars( - dest, length, src, src_pos, src_length); + return CopyUtf8CharsToUtf16Chars(dest, length, src, src_pos, src_length); } size_t to_fill = length; @@ -175,163 +201,7 @@ size_t GenericStringUtf16CharacterStream::FillBuffer(size_t from_pos) { // ---------------------------------------------------------------------------- -// Utf8ToUtf16CharacterStream -Utf8ToUtf16CharacterStream::Utf8ToUtf16CharacterStream(const byte* data, - size_t length) - : BufferedUtf16CharacterStream(), - raw_data_(data), - raw_data_length_(length), - raw_data_pos_(0), - raw_character_position_(0) { - ReadBlock(); -} - - -Utf8ToUtf16CharacterStream::~Utf8ToUtf16CharacterStream() { } - - -size_t Utf8ToUtf16CharacterStream::CopyChars(uint16_t* dest, size_t length, - const byte* src, size_t* src_pos, - size_t src_length) { - static const unibrow::uchar kMaxUtf16Character = - unibrow::Utf16::kMaxNonSurrogateCharCode; - size_t i = 0; - // Because of the UTF-16 lead and trail surrogates, we stop filling the buffer - // one character early (in the normal case), because we need to have at least - // two free spaces in the buffer to be sure that the next character will fit. - while (i < length - 1) { - if (*src_pos == src_length) break; - unibrow::uchar c = src[*src_pos]; - if (c <= unibrow::Utf8::kMaxOneByteChar) { - *src_pos = *src_pos + 1; - } else { - c = unibrow::Utf8::CalculateValue(src + *src_pos, src_length - *src_pos, - src_pos); - } - if (c > kMaxUtf16Character) { - dest[i++] = unibrow::Utf16::LeadSurrogate(c); - dest[i++] = unibrow::Utf16::TrailSurrogate(c); - } else { - dest[i++] = static_cast<uc16>(c); - } - } - return i; -} - - -size_t Utf8ToUtf16CharacterStream::BufferSeekForward(size_t delta) { - size_t old_pos = pos_; - size_t target_pos = pos_ + delta; - SetRawPosition(target_pos); - pos_ = raw_character_position_; - ReadBlock(); - return pos_ - old_pos; -} - - -size_t Utf8ToUtf16CharacterStream::FillBuffer(size_t char_position) { - SetRawPosition(char_position); - if (raw_character_position_ != char_position) { - // char_position was not a valid position in the stream (hit the end - // while spooling to it). - return 0u; - } - size_t i = CopyChars(buffer_, kBufferSize, raw_data_, &raw_data_pos_, - raw_data_length_); - raw_character_position_ = char_position + i; - return i; -} - - -static const byte kUtf8MultiByteMask = 0xC0; -static const byte kUtf8MultiByteCharFollower = 0x80; - - -#ifdef DEBUG -static const byte kUtf8MultiByteCharStart = 0xC0; -static bool IsUtf8MultiCharacterStart(byte first_byte) { - return (first_byte & kUtf8MultiByteMask) == kUtf8MultiByteCharStart; -} -#endif - - -static bool IsUtf8MultiCharacterFollower(byte later_byte) { - return (later_byte & kUtf8MultiByteMask) == kUtf8MultiByteCharFollower; -} - - -// Move the cursor back to point at the preceding UTF-8 character start -// in the buffer. -static inline void Utf8CharacterBack(const byte* buffer, size_t* cursor) { - byte character = buffer[--*cursor]; - if (character > unibrow::Utf8::kMaxOneByteChar) { - DCHECK(IsUtf8MultiCharacterFollower(character)); - // Last byte of a multi-byte character encoding. Step backwards until - // pointing to the first byte of the encoding, recognized by having the - // top two bits set. - while (IsUtf8MultiCharacterFollower(buffer[--*cursor])) { } - DCHECK(IsUtf8MultiCharacterStart(buffer[*cursor])); - } -} - - -// Move the cursor forward to point at the next following UTF-8 character start -// in the buffer. -static inline void Utf8CharacterForward(const byte* buffer, size_t* cursor) { - byte character = buffer[(*cursor)++]; - if (character > unibrow::Utf8::kMaxOneByteChar) { - // First character of a multi-byte character encoding. - // The number of most-significant one-bits determines the length of the - // encoding: - // 110..... - (0xCx, 0xDx) one additional byte (minimum). - // 1110.... - (0xEx) two additional bytes. - // 11110... - (0xFx) three additional bytes (maximum). - DCHECK(IsUtf8MultiCharacterStart(character)); - // Additional bytes is: - // 1 if value in range 0xC0 .. 0xDF. - // 2 if value in range 0xE0 .. 0xEF. - // 3 if value in range 0xF0 .. 0xF7. - // Encode that in a single value. - size_t additional_bytes = - ((0x3211u) >> (((character - 0xC0) >> 2) & 0xC)) & 0x03; - *cursor += additional_bytes; - DCHECK(!IsUtf8MultiCharacterFollower(buffer[1 + additional_bytes])); - } -} - - -// This can't set a raw position between two surrogate pairs, since there -// is no position in the UTF8 stream that corresponds to that. This assumes -// that the surrogate pair is correctly coded as a 4 byte UTF-8 sequence. If -// it is illegally coded as two 3 byte sequences then there is no problem here. -void Utf8ToUtf16CharacterStream::SetRawPosition(size_t target_position) { - if (raw_character_position_ > target_position) { - // Spool backwards in utf8 buffer. - do { - size_t old_pos = raw_data_pos_; - Utf8CharacterBack(raw_data_, &raw_data_pos_); - raw_character_position_--; - DCHECK(old_pos - raw_data_pos_ <= 4); - // Step back over both code units for surrogate pairs. - if (old_pos - raw_data_pos_ == 4) raw_character_position_--; - } while (raw_character_position_ > target_position); - // No surrogate pair splitting. - DCHECK(raw_character_position_ == target_position); - return; - } - // Spool forwards in the utf8 buffer. - while (raw_character_position_ < target_position) { - if (raw_data_pos_ == raw_data_length_) return; - size_t old_pos = raw_data_pos_; - Utf8CharacterForward(raw_data_, &raw_data_pos_); - raw_character_position_++; - DCHECK(raw_data_pos_ - old_pos <= 4); - if (raw_data_pos_ - old_pos == 4) raw_character_position_++; - } - // No surrogate pair splitting. - DCHECK(raw_character_position_ == target_position); -} - +// ExternalStreamingStream size_t ExternalStreamingStream::FillBuffer(size_t position) { // Ignore "position" which is the position in the decoded data. Instead, @@ -559,15 +429,11 @@ void ExternalStreamingStream::HandleUtf8SplitCharacters( ExternalTwoByteStringUtf16CharacterStream:: ~ExternalTwoByteStringUtf16CharacterStream() { } - ExternalTwoByteStringUtf16CharacterStream:: ExternalTwoByteStringUtf16CharacterStream( Handle<ExternalTwoByteString> data, int start_position, int end_position) - : Utf16CharacterStream(), - source_(data), - raw_data_(data->GetTwoByteData(start_position)), - bookmark_(kNoBookmark) { + : raw_data_(data->GetTwoByteData(start_position)), bookmark_(kNoBookmark) { buffer_cursor_ = raw_data_, buffer_end_ = raw_data_ + (end_position - start_position); pos_ = start_position; @@ -585,5 +451,62 @@ void ExternalTwoByteStringUtf16CharacterStream::ResetToBookmark() { pos_ = bookmark_; buffer_cursor_ = raw_data_ + bookmark_; } + +// ---------------------------------------------------------------------------- +// ExternalOneByteStringUtf16CharacterStream + +ExternalOneByteStringUtf16CharacterStream:: + ~ExternalOneByteStringUtf16CharacterStream() {} + +ExternalOneByteStringUtf16CharacterStream:: + ExternalOneByteStringUtf16CharacterStream( + Handle<ExternalOneByteString> data, int start_position, + int end_position) + : raw_data_(data->GetChars()), + length_(end_position), + bookmark_(kNoBookmark) { + DCHECK(end_position >= start_position); + pos_ = start_position; +} + +ExternalOneByteStringUtf16CharacterStream:: + ExternalOneByteStringUtf16CharacterStream(const char* data, size_t length) + : raw_data_(reinterpret_cast<const uint8_t*>(data)), + length_(length), + bookmark_(kNoBookmark) {} + +ExternalOneByteStringUtf16CharacterStream:: + ExternalOneByteStringUtf16CharacterStream(const char* data) + : ExternalOneByteStringUtf16CharacterStream(data, strlen(data)) {} + +bool ExternalOneByteStringUtf16CharacterStream::SetBookmark() { + bookmark_ = pos_; + return true; +} + +void ExternalOneByteStringUtf16CharacterStream::ResetToBookmark() { + DCHECK(bookmark_ != kNoBookmark); + pos_ = bookmark_; + buffer_cursor_ = buffer_; + buffer_end_ = buffer_ + FillBuffer(pos_); +} + +size_t ExternalOneByteStringUtf16CharacterStream::BufferSeekForward( + size_t delta) { + size_t old_pos = pos_; + pos_ = Min(pos_ + delta, length_); + ReadBlock(); + return pos_ - old_pos; +} + +size_t ExternalOneByteStringUtf16CharacterStream::FillBuffer(size_t from_pos) { + if (from_pos >= length_) return 0; + size_t length = Min(kBufferSize, length_ - from_pos); + for (size_t i = 0; i < length; ++i) { + buffer_[i] = static_cast<uc16>(raw_data_[from_pos + i]); + } + return length; +} + } // namespace internal } // namespace v8 |