diff options
Diffstat (limited to 'deps/v8/test/cctest/parsing/test-scanner-streams.cc')
-rw-r--r-- | deps/v8/test/cctest/parsing/test-scanner-streams.cc | 139 |
1 files changed, 137 insertions, 2 deletions
diff --git a/deps/v8/test/cctest/parsing/test-scanner-streams.cc b/deps/v8/test/cctest/parsing/test-scanner-streams.cc index 6f9b58daf3..a9dc4482ef 100644 --- a/deps/v8/test/cctest/parsing/test-scanner-streams.cc +++ b/deps/v8/test/cctest/parsing/test-scanner-streams.cc @@ -185,6 +185,110 @@ TEST(Utf8SplitBOM) { } } +TEST(Utf8SplitMultiBOM) { + // Construct chunks with a split BOM followed by another split BOM. + const char* chunks = "\xef\xbb\0\xbf\xef\xbb\0\xbf\0\0"; + ChunkSource chunk_source(chunks); + std::unique_ptr<i::Utf16CharacterStream> stream( + v8::internal::ScannerStream::For( + &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr)); + + // Read the data, ensuring we get exactly one of the two BOMs back. + CHECK_EQ(0xFEFF, stream->Advance()); + CHECK_EQ(i::Utf16CharacterStream::kEndOfInput, stream->Advance()); +} + +TEST(Utf8AdvanceUntil) { + // Test utf-8 advancing until a certain char. + + const char line_term = '\n'; + const size_t kLen = arraysize(unicode_utf8); + char data[kLen + 1]; + strncpy(data, unicode_utf8, kLen); + data[kLen - 1] = line_term; + data[kLen] = '\0'; + + { + const char* chunks[] = {data, "\0"}; + ChunkSource chunk_source(chunks); + std::unique_ptr<v8::internal::Utf16CharacterStream> stream( + v8::internal::ScannerStream::For( + &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr)); + + int32_t res = stream->AdvanceUntil( + [](int32_t c0_) { return unibrow::IsLineTerminator(c0_); }); + CHECK_EQ(line_term, res); + } +} + +TEST(AdvanceMatchAdvanceUntil) { + // Test if single advance and advanceUntil behave the same + + char data[] = {'a', 'b', '\n', 'c', '\0'}; + + { + const char* chunks[] = {data, "\0"}; + ChunkSource chunk_source_a(chunks); + + std::unique_ptr<v8::internal::Utf16CharacterStream> stream_advance( + v8::internal::ScannerStream::For( + &chunk_source_a, v8::ScriptCompiler::StreamedSource::UTF8, + nullptr)); + + ChunkSource chunk_source_au(chunks); + std::unique_ptr<v8::internal::Utf16CharacterStream> stream_advance_until( + v8::internal::ScannerStream::For( + &chunk_source_au, v8::ScriptCompiler::StreamedSource::UTF8, + nullptr)); + + int32_t au_c0_ = stream_advance_until->AdvanceUntil( + [](int32_t c0_) { return unibrow::IsLineTerminator(c0_); }); + + int32_t a_c0_ = '0'; + while (!unibrow::IsLineTerminator(a_c0_)) { + a_c0_ = stream_advance->Advance(); + } + + // Check both advances methods have the same output + CHECK_EQ(a_c0_, au_c0_); + + // Check if both set the cursor to the correct position by advancing both + // streams by one character. + a_c0_ = stream_advance->Advance(); + au_c0_ = stream_advance_until->Advance(); + CHECK_EQ(a_c0_, au_c0_); + } +} + +TEST(Utf8AdvanceUntilOverChunkBoundaries) { + // Test utf-8 advancing until a certain char, crossing chunk boundaries. + + // Split the test string at each byte and pass it to the stream. This way, + // we'll have a split at each possible boundary. + size_t len = strlen(unicode_utf8); + char buffer[arraysize(unicode_utf8) + 4]; + for (size_t i = 1; i < len; i++) { + // Copy source string into buffer, splitting it at i. + // Then add three chunks, 0..i-1, i..strlen-1, empty. + strncpy(buffer, unicode_utf8, i); + strncpy(buffer + i + 1, unicode_utf8 + i, len - i); + buffer[i] = '\0'; + buffer[len + 1] = '\n'; + buffer[len + 2] = '\0'; + buffer[len + 3] = '\0'; + const char* chunks[] = {buffer, buffer + i + 1, buffer + len + 2}; + + ChunkSource chunk_source(chunks); + std::unique_ptr<v8::internal::Utf16CharacterStream> stream( + v8::internal::ScannerStream::For( + &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr)); + + int32_t res = stream->AdvanceUntil( + [](int32_t c0_) { return unibrow::IsLineTerminator(c0_); }); + CHECK_EQ(buffer[len + 1], res); + } +} + TEST(Utf8ChunkBoundaries) { // Test utf-8 parsing at chunk boundaries. @@ -323,7 +427,7 @@ void TestCharacterStreams(const char* one_byte_source, unsigned length, // This avoids the GC from trying to free a stack allocated resource. if (uc16_string->IsExternalString()) i::Handle<i::ExternalTwoByteString>::cast(uc16_string) - ->set_resource(nullptr); + ->SetResource(isolate, nullptr); } // 1-byte external string @@ -343,7 +447,7 @@ void TestCharacterStreams(const char* one_byte_source, unsigned length, // This avoids the GC from trying to free a stack allocated resource. if (ext_one_byte_string->IsExternalString()) i::Handle<i::ExternalOneByteString>::cast(ext_one_byte_string) - ->set_resource(nullptr); + ->SetResource(isolate, nullptr); } // 1-byte generic i::String @@ -560,3 +664,34 @@ TEST(TestOverlongAndInvalidSequences) { CHECK_EQ(unicode_expected.size(), arraysize(cases)); TestChunkStreamAgainstReference(cases, unicode_expected); } + +TEST(RelocatingCharacterStream) { + ManualGCScope manual_gc_scope; + CcTest::InitializeVM(); + i::Isolate* i_isolate = CcTest::i_isolate(); + v8::HandleScope scope(CcTest::isolate()); + + const char* string = "abcd"; + int length = static_cast<int>(strlen(string)); + std::unique_ptr<i::uc16[]> uc16_buffer(new i::uc16[length]); + for (int i = 0; i < length; i++) { + uc16_buffer[i] = string[i]; + } + i::Vector<const i::uc16> two_byte_vector(uc16_buffer.get(), length); + i::Handle<i::String> two_byte_string = + i_isolate->factory() + ->NewStringFromTwoByte(two_byte_vector, i::NOT_TENURED) + .ToHandleChecked(); + std::unique_ptr<i::Utf16CharacterStream> two_byte_string_stream( + i::ScannerStream::For(i_isolate, two_byte_string, 0, length)); + CHECK_EQ('a', two_byte_string_stream->Advance()); + CHECK_EQ('b', two_byte_string_stream->Advance()); + CHECK_EQ(size_t{2}, two_byte_string_stream->pos()); + i::String* raw = *two_byte_string; + i_isolate->heap()->CollectGarbage(i::NEW_SPACE, + i::GarbageCollectionReason::kUnknown); + // GC moved the string. + CHECK_NE(raw, *two_byte_string); + CHECK_EQ('c', two_byte_string_stream->Advance()); + CHECK_EQ('d', two_byte_string_stream->Advance()); +} |