// Copyright 2011 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "src/parsing/scanner-character-streams.h" #include #include #include "include/v8.h" #include "src/common/globals.h" #include "src/handles/handles.h" #include "src/logging/counters.h" #include "src/objects/objects-inl.h" #include "src/parsing/scanner.h" #include "src/strings/unicode-inl.h" namespace v8 { namespace internal { class ScopedExternalStringLock { public: explicit ScopedExternalStringLock(ExternalString string) { DCHECK(!string.is_null()); if (string.IsExternalOneByteString()) { resource_ = ExternalOneByteString::cast(string).resource(); } else { DCHECK(string.IsExternalTwoByteString()); resource_ = ExternalTwoByteString::cast(string).resource(); } DCHECK(resource_); resource_->Lock(); } // Copying a lock increases the locking depth. ScopedExternalStringLock(const ScopedExternalStringLock& other) V8_NOEXCEPT : resource_(other.resource_) { resource_->Lock(); } ~ScopedExternalStringLock() { resource_->Unlock(); } private: // Not nullptr. const v8::String::ExternalStringResourceBase* resource_; }; namespace { const unibrow::uchar kUtf8Bom = 0xFEFF; } // namespace template struct Range { const Char* start; const Char* end; size_t length() { return static_cast(end - start); } bool unaligned_start() const { return reinterpret_cast(start) % sizeof(Char) == 1; } }; // A Char stream backed by an on-heap SeqOneByteString or SeqTwoByteString. template class OnHeapStream { public: using String = typename CharTraits::String; OnHeapStream(Handle string, size_t start_offset, size_t end) : string_(string), start_offset_(start_offset), length_(end) {} OnHeapStream(const OnHeapStream&) V8_NOEXCEPT : start_offset_(0), length_(0) { UNREACHABLE(); } // The no_gc argument is only here because of the templated way this class // is used along with other implementations that require V8 heap access. Range GetDataAt(size_t pos, RuntimeCallStats* stats, DisallowHeapAllocation* no_gc) { return {&string_->GetChars(*no_gc)[start_offset_ + Min(length_, pos)], &string_->GetChars(*no_gc)[start_offset_ + length_]}; } static const bool kCanBeCloned = false; static const bool kCanAccessHeap = true; private: Handle string_; const size_t start_offset_; const size_t length_; }; // A Char stream backed by an off-heap ExternalOneByteString or // ExternalTwoByteString. template class ExternalStringStream { using ExternalString = typename CharTraits::ExternalString; public: ExternalStringStream(ExternalString string, size_t start_offset, size_t length) : lock_(string), data_(string.GetChars() + start_offset), length_(length) {} ExternalStringStream(const ExternalStringStream& other) V8_NOEXCEPT : lock_(other.lock_), data_(other.data_), length_(other.length_) {} // The no_gc argument is only here because of the templated way this class // is used along with other implementations that require V8 heap access. Range GetDataAt(size_t pos, RuntimeCallStats* stats, DisallowHeapAllocation* no_gc = nullptr) { return {&data_[Min(length_, pos)], &data_[length_]}; } static const bool kCanBeCloned = true; static const bool kCanAccessHeap = false; private: ScopedExternalStringLock lock_; const Char* const data_; const size_t length_; }; // A Char stream backed by a C array. Testing only. template class TestingStream { public: TestingStream(const Char* data, size_t length) : data_(data), length_(length) {} // The no_gc argument is only here because of the templated way this class // is used along with other implementations that require V8 heap access. Range GetDataAt(size_t pos, RuntimeCallStats* stats, DisallowHeapAllocation* no_gc = nullptr) { return {&data_[Min(length_, pos)], &data_[length_]}; } static const bool kCanBeCloned = true; static const bool kCanAccessHeap = false; private: const Char* const data_; const size_t length_; }; // A Char stream backed by multiple source-stream provided off-heap chunks. template class ChunkedStream { public: explicit ChunkedStream(ScriptCompiler::ExternalSourceStream* source) : source_(source) {} ChunkedStream(const ChunkedStream&) V8_NOEXCEPT { // TODO(rmcilroy): Implement cloning for chunked streams. UNREACHABLE(); } // The no_gc argument is only here because of the templated way this class // is used along with other implementations that require V8 heap access. Range GetDataAt(size_t pos, RuntimeCallStats* stats, DisallowHeapAllocation* no_gc = nullptr) { Chunk chunk = FindChunk(pos, stats); size_t buffer_end = chunk.length; size_t buffer_pos = Min(buffer_end, pos - chunk.position); return {&chunk.data[buffer_pos], &chunk.data[buffer_end]}; } ~ChunkedStream() { for (Chunk& chunk : chunks_) delete[] chunk.data; } static const bool kCanBeCloned = false; static const bool kCanAccessHeap = false; private: struct Chunk { Chunk(const Char* const data, size_t position, size_t length) : data(data), position(position), length(length) {} const Char* const data; // The logical position of data. const size_t position; const size_t length; size_t end_position() const { return position + length; } }; Chunk FindChunk(size_t position, RuntimeCallStats* stats) { while (V8_UNLIKELY(chunks_.empty())) FetchChunk(size_t{0}, stats); // Walk forwards while the position is in front of the current chunk. while (position >= chunks_.back().end_position() && chunks_.back().length > 0) { FetchChunk(chunks_.back().end_position(), stats); } // Walk backwards. for (auto reverse_it = chunks_.rbegin(); reverse_it != chunks_.rend(); ++reverse_it) { if (reverse_it->position <= position) return *reverse_it; } UNREACHABLE(); } virtual void ProcessChunk(const uint8_t* data, size_t position, size_t length) { // Incoming data has to be aligned to Char size. DCHECK_EQ(0, length % sizeof(Char)); chunks_.emplace_back(reinterpret_cast(data), position, length / sizeof(Char)); } void FetchChunk(size_t position, RuntimeCallStats* stats) { const uint8_t* data = nullptr; size_t length; { RuntimeCallTimerScope scope(stats, RuntimeCallCounterId::kGetMoreDataCallback); length = source_->GetMoreData(&data); } ProcessChunk(data, position, length); } ScriptCompiler::ExternalSourceStream* source_; protected: std::vector chunks_; }; // Provides a buffered utf-16 view on the bytes from the underlying ByteStream. // Chars are buffered if either the underlying stream isn't utf-16 or the // underlying utf-16 stream might move (is on-heap). template