From 2dcc3665abf57c3607cebffdeeca062f5894885d Mon Sep 17 00:00:00 2001 From: Michaël Zasso Date: Thu, 1 Aug 2019 08:38:30 +0200 Subject: deps: update V8 to 7.6.303.28 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR-URL: https://github.com/nodejs/node/pull/28016 Reviewed-By: Colin Ihrig Reviewed-By: Refael Ackermann (רפאל פלחי) Reviewed-By: Rich Trott Reviewed-By: Michael Dawson Reviewed-By: Jiawen Geng --- deps/v8/src/strings/OWNERS | 5 + deps/v8/src/strings/char-predicates-inl.h | 125 ++ deps/v8/src/strings/char-predicates.cc | 44 + deps/v8/src/strings/char-predicates.h | 85 + deps/v8/src/strings/string-builder-inl.h | 310 +++ deps/v8/src/strings/string-builder.cc | 294 +++ deps/v8/src/strings/string-case.cc | 136 ++ deps/v8/src/strings/string-case.h | 17 + deps/v8/src/strings/string-hasher-inl.h | 81 + deps/v8/src/strings/string-hasher.h | 58 + deps/v8/src/strings/string-search.h | 548 +++++ deps/v8/src/strings/string-stream.cc | 476 +++++ deps/v8/src/strings/string-stream.h | 214 ++ deps/v8/src/strings/unicode-decoder.cc | 81 + deps/v8/src/strings/unicode-decoder.h | 74 + deps/v8/src/strings/unicode-inl.h | 194 ++ deps/v8/src/strings/unicode.cc | 3192 +++++++++++++++++++++++++++++ deps/v8/src/strings/unicode.h | 257 +++ deps/v8/src/strings/uri.cc | 510 +++++ deps/v8/src/strings/uri.h | 55 + 20 files changed, 6756 insertions(+) create mode 100644 deps/v8/src/strings/OWNERS create mode 100644 deps/v8/src/strings/char-predicates-inl.h create mode 100644 deps/v8/src/strings/char-predicates.cc create mode 100644 deps/v8/src/strings/char-predicates.h create mode 100644 deps/v8/src/strings/string-builder-inl.h create mode 100644 deps/v8/src/strings/string-builder.cc create mode 100644 deps/v8/src/strings/string-case.cc create mode 100644 deps/v8/src/strings/string-case.h create mode 100644 deps/v8/src/strings/string-hasher-inl.h create mode 100644 deps/v8/src/strings/string-hasher.h create mode 100644 deps/v8/src/strings/string-search.h create mode 100644 deps/v8/src/strings/string-stream.cc create mode 100644 deps/v8/src/strings/string-stream.h create mode 100644 deps/v8/src/strings/unicode-decoder.cc create mode 100644 deps/v8/src/strings/unicode-decoder.h create mode 100644 deps/v8/src/strings/unicode-inl.h create mode 100644 deps/v8/src/strings/unicode.cc create mode 100644 deps/v8/src/strings/unicode.h create mode 100644 deps/v8/src/strings/uri.cc create mode 100644 deps/v8/src/strings/uri.h (limited to 'deps/v8/src/strings') diff --git a/deps/v8/src/strings/OWNERS b/deps/v8/src/strings/OWNERS new file mode 100644 index 0000000000..037c916f24 --- /dev/null +++ b/deps/v8/src/strings/OWNERS @@ -0,0 +1,5 @@ +bmeurer@chromium.org +jgruber@chromium.org +jkummerow@chromium.org +verwaest@chromium.org +yangguo@chromium.org diff --git a/deps/v8/src/strings/char-predicates-inl.h b/deps/v8/src/strings/char-predicates-inl.h new file mode 100644 index 0000000000..cdd8ddb4ea --- /dev/null +++ b/deps/v8/src/strings/char-predicates-inl.h @@ -0,0 +1,125 @@ +// Copyright 2011 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_STRINGS_CHAR_PREDICATES_INL_H_ +#define V8_STRINGS_CHAR_PREDICATES_INL_H_ + +#include "src/strings/char-predicates.h" + +namespace v8 { +namespace internal { + +// If c is in 'A'-'Z' or 'a'-'z', return its lower-case. +// Else, return something outside of 'A'-'Z' and 'a'-'z'. +// Note: it ignores LOCALE. +inline constexpr int AsciiAlphaToLower(uc32 c) { return c | 0x20; } + +inline constexpr bool IsCarriageReturn(uc32 c) { return c == 0x000D; } + +inline constexpr bool IsLineFeed(uc32 c) { return c == 0x000A; } + +inline constexpr bool IsAsciiIdentifier(uc32 c) { + return IsAlphaNumeric(c) || c == '$' || c == '_'; +} + +inline constexpr bool IsAlphaNumeric(uc32 c) { + return IsInRange(AsciiAlphaToLower(c), 'a', 'z') || IsDecimalDigit(c); +} + +inline constexpr bool IsDecimalDigit(uc32 c) { + // ECMA-262, 3rd, 7.8.3 (p 16) + return IsInRange(c, '0', '9'); +} + +inline constexpr bool IsHexDigit(uc32 c) { + // ECMA-262, 3rd, 7.6 (p 15) + return IsDecimalDigit(c) || IsInRange(AsciiAlphaToLower(c), 'a', 'f'); +} + +inline constexpr bool IsOctalDigit(uc32 c) { + // ECMA-262, 6th, 7.8.3 + return IsInRange(c, '0', '7'); +} + +inline constexpr bool IsNonOctalDecimalDigit(uc32 c) { + return IsInRange(c, '8', '9'); +} + +inline constexpr bool IsBinaryDigit(uc32 c) { + // ECMA-262, 6th, 7.8.3 + return c == '0' || c == '1'; +} + +inline constexpr bool IsRegExpWord(uc16 c) { + return IsInRange(AsciiAlphaToLower(c), 'a', 'z') || IsDecimalDigit(c) || + (c == '_'); +} + +inline constexpr bool IsRegExpNewline(uc16 c) { + // CR LF LS PS + return c != 0x000A && c != 0x000D && c != 0x2028 && c != 0x2029; +} + +// Constexpr cache table for character flags. +enum AsciiCharFlags { + kIsIdentifierStart = 1 << 0, + kIsIdentifierPart = 1 << 1, + kIsWhiteSpace = 1 << 2, + kIsWhiteSpaceOrLineTerminator = 1 << 3 +}; +constexpr uint8_t BuildAsciiCharFlags(uc32 c) { + // clang-format off + return + (IsAsciiIdentifier(c) || c == '\\') ? ( + kIsIdentifierPart | (!IsDecimalDigit(c) ? kIsIdentifierStart : 0)) : 0 | + (c == ' ' || c == '\t' || c == '\v' || c == '\f') ? + kIsWhiteSpace | kIsWhiteSpaceOrLineTerminator : 0 | + (c == '\r' || c == '\n') ? kIsWhiteSpaceOrLineTerminator : 0; + // clang-format on +} +const constexpr uint8_t kAsciiCharFlags[128] = { +#define BUILD_CHAR_FLAGS(N) BuildAsciiCharFlags(N), + INT_0_TO_127_LIST(BUILD_CHAR_FLAGS) +#undef BUILD_CHAR_FLAGS +}; + +bool IsIdentifierStart(uc32 c) { + if (!IsInRange(c, 0, 127)) return IsIdentifierStartSlow(c); + DCHECK_EQ(IsIdentifierStartSlow(c), + static_cast(kAsciiCharFlags[c] & kIsIdentifierStart)); + return kAsciiCharFlags[c] & kIsIdentifierStart; +} + +bool IsIdentifierPart(uc32 c) { + if (!IsInRange(c, 0, 127)) return IsIdentifierPartSlow(c); + DCHECK_EQ(IsIdentifierPartSlow(c), + static_cast(kAsciiCharFlags[c] & kIsIdentifierPart)); + return kAsciiCharFlags[c] & kIsIdentifierPart; +} + +bool IsWhiteSpace(uc32 c) { + if (!IsInRange(c, 0, 127)) return IsWhiteSpaceSlow(c); + DCHECK_EQ(IsWhiteSpaceSlow(c), + static_cast(kAsciiCharFlags[c] & kIsWhiteSpace)); + return kAsciiCharFlags[c] & kIsWhiteSpace; +} + +bool IsWhiteSpaceOrLineTerminator(uc32 c) { + if (!IsInRange(c, 0, 127)) return IsWhiteSpaceOrLineTerminatorSlow(c); + DCHECK_EQ( + IsWhiteSpaceOrLineTerminatorSlow(c), + static_cast(kAsciiCharFlags[c] & kIsWhiteSpaceOrLineTerminator)); + return kAsciiCharFlags[c] & kIsWhiteSpaceOrLineTerminator; +} + +bool IsLineTerminatorSequence(uc32 c, uc32 next) { + if (!unibrow::IsLineTerminator(c)) return false; + if (c == 0x000d && next == 0x000a) return false; // CR with following LF. + return true; +} + +} // namespace internal +} // namespace v8 + +#endif // V8_STRINGS_CHAR_PREDICATES_INL_H_ diff --git a/deps/v8/src/strings/char-predicates.cc b/deps/v8/src/strings/char-predicates.cc new file mode 100644 index 0000000000..0133a03517 --- /dev/null +++ b/deps/v8/src/strings/char-predicates.cc @@ -0,0 +1,44 @@ +// Copyright 2011 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_INTL_SUPPORT +#error Internationalization is expected to be enabled. +#endif // V8_INTL_SUPPORT + +#include "src/strings/char-predicates.h" + +#include "unicode/uchar.h" +#include "unicode/urename.h" + +namespace v8 { +namespace internal { + +// ES#sec-names-and-keywords Names and Keywords +// UnicodeIDStart, '$', '_' and '\' +bool IsIdentifierStartSlow(uc32 c) { + // cannot use u_isIDStart because it does not work for + // Other_ID_Start characters. + return u_hasBinaryProperty(c, UCHAR_ID_START) || + (c < 0x60 && (c == '$' || c == '\\' || c == '_')); +} + +// ES#sec-names-and-keywords Names and Keywords +// UnicodeIDContinue, '$', '_', '\', ZWJ, and ZWNJ +bool IsIdentifierPartSlow(uc32 c) { + // Can't use u_isIDPart because it does not work for + // Other_ID_Continue characters. + return u_hasBinaryProperty(c, UCHAR_ID_CONTINUE) || + (c < 0x60 && (c == '$' || c == '\\' || c == '_')) || c == 0x200C || + c == 0x200D; +} + +// ES#sec-white-space White Space +// gC=Zs, U+0009, U+000B, U+000C, U+FEFF +bool IsWhiteSpaceSlow(uc32 c) { + return (u_charType(c) == U_SPACE_SEPARATOR) || + (c < 0x0D && (c == 0x09 || c == 0x0B || c == 0x0C)) || c == 0xFEFF; +} + +} // namespace internal +} // namespace v8 diff --git a/deps/v8/src/strings/char-predicates.h b/deps/v8/src/strings/char-predicates.h new file mode 100644 index 0000000000..43b4d091d1 --- /dev/null +++ b/deps/v8/src/strings/char-predicates.h @@ -0,0 +1,85 @@ +// Copyright 2011 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_STRINGS_CHAR_PREDICATES_H_ +#define V8_STRINGS_CHAR_PREDICATES_H_ + +#include "src/common/globals.h" +#include "src/strings/unicode.h" + +namespace v8 { +namespace internal { + +// Unicode character predicates as defined by ECMA-262, 3rd, +// used for lexical analysis. + +inline constexpr int AsciiAlphaToLower(uc32 c); +inline constexpr bool IsCarriageReturn(uc32 c); +inline constexpr bool IsLineFeed(uc32 c); +inline constexpr bool IsAsciiIdentifier(uc32 c); +inline constexpr bool IsAlphaNumeric(uc32 c); +inline constexpr bool IsDecimalDigit(uc32 c); +inline constexpr bool IsHexDigit(uc32 c); +inline constexpr bool IsOctalDigit(uc32 c); +inline constexpr bool IsBinaryDigit(uc32 c); +inline constexpr bool IsRegExpWord(uc32 c); +inline constexpr bool IsRegExpNewline(uc32 c); + +// ES#sec-names-and-keywords +// This includes '_', '$' and '\', and ID_Start according to +// http://www.unicode.org/reports/tr31/, which consists of categories +// 'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', but excluding properties +// 'Pattern_Syntax' or 'Pattern_White_Space'. +inline bool IsIdentifierStart(uc32 c); +#ifdef V8_INTL_SUPPORT +V8_EXPORT_PRIVATE bool IsIdentifierStartSlow(uc32 c); +#else +inline bool IsIdentifierStartSlow(uc32 c) { + // Non-BMP characters are not supported without I18N. + return (c <= 0xFFFF) ? unibrow::ID_Start::Is(c) : false; +} +#endif + +// ES#sec-names-and-keywords +// This includes \u200c and \u200d, and ID_Continue according to +// http://www.unicode.org/reports/tr31/, which consists of ID_Start, +// the categories 'Mn', 'Mc', 'Nd', 'Pc', but excluding properties +// 'Pattern_Syntax' or 'Pattern_White_Space'. +inline bool IsIdentifierPart(uc32 c); +#ifdef V8_INTL_SUPPORT +V8_EXPORT_PRIVATE bool IsIdentifierPartSlow(uc32 c); +#else +inline bool IsIdentifierPartSlow(uc32 c) { + // Non-BMP charaacters are not supported without I18N. + if (c <= 0xFFFF) { + return unibrow::ID_Start::Is(c) || unibrow::ID_Continue::Is(c); + } + return false; +} +#endif + +// ES6 draft section 11.2 +// This includes all code points of Unicode category 'Zs'. +// Further included are \u0009, \u000b, \u000c, and \ufeff. +inline bool IsWhiteSpace(uc32 c); +#ifdef V8_INTL_SUPPORT +V8_EXPORT_PRIVATE bool IsWhiteSpaceSlow(uc32 c); +#else +inline bool IsWhiteSpaceSlow(uc32 c) { return unibrow::WhiteSpace::Is(c); } +#endif + +// WhiteSpace and LineTerminator according to ES6 draft section 11.2 and 11.3 +// This includes all the characters with Unicode category 'Z' (= Zs+Zl+Zp) +// as well as \u0009 - \u000d and \ufeff. +inline bool IsWhiteSpaceOrLineTerminator(uc32 c); +inline bool IsWhiteSpaceOrLineTerminatorSlow(uc32 c) { + return IsWhiteSpaceSlow(c) || unibrow::IsLineTerminator(c); +} + +inline bool IsLineTerminatorSequence(uc32 c, uc32 next); + +} // namespace internal +} // namespace v8 + +#endif // V8_STRINGS_CHAR_PREDICATES_H_ diff --git a/deps/v8/src/strings/string-builder-inl.h b/deps/v8/src/strings/string-builder-inl.h new file mode 100644 index 0000000000..88d69b37b5 --- /dev/null +++ b/deps/v8/src/strings/string-builder-inl.h @@ -0,0 +1,310 @@ +// Copyright 2014 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_STRINGS_STRING_BUILDER_INL_H_ +#define V8_STRINGS_STRING_BUILDER_INL_H_ + +#include "src/common/assert-scope.h" +#include "src/execution/isolate.h" +#include "src/handles/handles-inl.h" +#include "src/heap/factory.h" +#include "src/objects/fixed-array.h" +#include "src/objects/objects.h" +#include "src/objects/string-inl.h" +#include "src/utils/utils.h" + +namespace v8 { +namespace internal { + +const int kStringBuilderConcatHelperLengthBits = 11; +const int kStringBuilderConcatHelperPositionBits = 19; + +using StringBuilderSubstringLength = + BitField; +using StringBuilderSubstringPosition = + BitField; + +template +void StringBuilderConcatHelper(String special, sinkchar* sink, + FixedArray fixed_array, int array_length); + +// Returns the result length of the concatenation. +// On illegal argument, -1 is returned. +int StringBuilderConcatLength(int special_length, FixedArray fixed_array, + int array_length, bool* one_byte); + +class FixedArrayBuilder { + public: + explicit FixedArrayBuilder(Isolate* isolate, int initial_capacity); + explicit FixedArrayBuilder(Handle backing_store); + + bool HasCapacity(int elements); + void EnsureCapacity(Isolate* isolate, int elements); + + void Add(Object value); + void Add(Smi value); + + Handle array() { return array_; } + + int length() { return length_; } + + int capacity(); + + Handle ToJSArray(Handle target_array); + + private: + Handle array_; + int length_; + bool has_non_smi_elements_; +}; + +class ReplacementStringBuilder { + public: + ReplacementStringBuilder(Heap* heap, Handle subject, + int estimated_part_count); + + // Caution: Callers must ensure the builder has enough capacity. + static inline void AddSubjectSlice(FixedArrayBuilder* builder, int from, + int to) { + DCHECK_GE(from, 0); + int length = to - from; + DCHECK_GT(length, 0); + if (StringBuilderSubstringLength::is_valid(length) && + StringBuilderSubstringPosition::is_valid(from)) { + int encoded_slice = StringBuilderSubstringLength::encode(length) | + StringBuilderSubstringPosition::encode(from); + builder->Add(Smi::FromInt(encoded_slice)); + } else { + // Otherwise encode as two smis. + builder->Add(Smi::FromInt(-length)); + builder->Add(Smi::FromInt(from)); + } + } + + void AddSubjectSlice(int from, int to) { + EnsureCapacity(2); // Subject slices are encoded with up to two smis. + AddSubjectSlice(&array_builder_, from, to); + IncrementCharacterCount(to - from); + } + + void AddString(Handle string); + + MaybeHandle ToString(); + + void IncrementCharacterCount(int by) { + if (character_count_ > String::kMaxLength - by) { + STATIC_ASSERT(String::kMaxLength < kMaxInt); + character_count_ = kMaxInt; + } else { + character_count_ += by; + } + } + + private: + void AddElement(Handle element); + void EnsureCapacity(int elements); + + Heap* heap_; + FixedArrayBuilder array_builder_; + Handle subject_; + int character_count_; + bool is_one_byte_; +}; + +class IncrementalStringBuilder { + public: + explicit IncrementalStringBuilder(Isolate* isolate); + + V8_INLINE String::Encoding CurrentEncoding() { return encoding_; } + + template + V8_INLINE void Append(SrcChar c); + + V8_INLINE void AppendCharacter(uint8_t c) { + if (encoding_ == String::ONE_BYTE_ENCODING) { + Append(c); + } else { + Append(c); + } + } + + V8_INLINE void AppendCString(const char* s) { + const uint8_t* u = reinterpret_cast(s); + if (encoding_ == String::ONE_BYTE_ENCODING) { + while (*u != '\0') Append(*(u++)); + } else { + while (*u != '\0') Append(*(u++)); + } + } + + V8_INLINE void AppendCString(const uc16* s) { + if (encoding_ == String::ONE_BYTE_ENCODING) { + while (*s != '\0') Append(*(s++)); + } else { + while (*s != '\0') Append(*(s++)); + } + } + + V8_INLINE bool CurrentPartCanFit(int length) { + return part_length_ - current_index_ > length; + } + + // We make a rough estimate to find out if the current string can be + // serialized without allocating a new string part. The worst case length of + // an escaped character is 6. Shifting the remaining string length right by 3 + // is a more pessimistic estimate, but faster to calculate. + V8_INLINE int EscapedLengthIfCurrentPartFits(int length) { + if (length > kMaxPartLength) return 0; + STATIC_ASSERT((kMaxPartLength << 3) <= String::kMaxLength); + // This shift will not overflow because length is already less than the + // maximum part length. + int worst_case_length = length << 3; + return CurrentPartCanFit(worst_case_length) ? worst_case_length : 0; + } + + void AppendString(Handle string); + + MaybeHandle Finish(); + + V8_INLINE bool HasOverflowed() const { return overflowed_; } + + int Length() const; + + // Change encoding to two-byte. + void ChangeEncoding() { + DCHECK_EQ(String::ONE_BYTE_ENCODING, encoding_); + ShrinkCurrentPart(); + encoding_ = String::TWO_BYTE_ENCODING; + Extend(); + } + + template + class NoExtend { + public: + NoExtend(Handle string, int offset, + const DisallowHeapAllocation& no_gc) { + DCHECK(string->IsSeqOneByteString() || string->IsSeqTwoByteString()); + if (sizeof(DestChar) == 1) { + start_ = reinterpret_cast( + Handle::cast(string)->GetChars(no_gc) + offset); + } else { + start_ = reinterpret_cast( + Handle::cast(string)->GetChars(no_gc) + offset); + } + cursor_ = start_; + } + + V8_INLINE void Append(DestChar c) { *(cursor_++) = c; } + V8_INLINE void AppendCString(const char* s) { + const uint8_t* u = reinterpret_cast(s); + while (*u != '\0') Append(*(u++)); + } + + int written() { return static_cast(cursor_ - start_); } + + private: + DestChar* start_; + DestChar* cursor_; + DISALLOW_HEAP_ALLOCATION(no_gc_) + }; + + template + class NoExtendString : public NoExtend { + public: + NoExtendString(Handle string, int required_length) + : NoExtend(string, 0), string_(string) { + DCHECK(string->length() >= required_length); + } + + Handle Finalize() { + Handle string = Handle::cast(string_); + int length = NoExtend::written(); + Handle result = SeqString::Truncate(string, length); + string_ = Handle(); + return result; + } + + private: + Handle string_; + }; + + template + class NoExtendBuilder : public NoExtend { + public: + NoExtendBuilder(IncrementalStringBuilder* builder, int required_length, + const DisallowHeapAllocation& no_gc) + : NoExtend(builder->current_part(), builder->current_index_, + no_gc), + builder_(builder) { + DCHECK(builder->CurrentPartCanFit(required_length)); + } + + ~NoExtendBuilder() { + builder_->current_index_ += NoExtend::written(); + } + + private: + IncrementalStringBuilder* builder_; + }; + + private: + Factory* factory() { return isolate_->factory(); } + + V8_INLINE Handle accumulator() { return accumulator_; } + + V8_INLINE void set_accumulator(Handle string) { + *accumulator_.location() = string->ptr(); + } + + V8_INLINE Handle current_part() { return current_part_; } + + V8_INLINE void set_current_part(Handle string) { + *current_part_.location() = string->ptr(); + } + + // Add the current part to the accumulator. + void Accumulate(Handle new_part); + + // Finish the current part and allocate a new part. + void Extend(); + + // Shrink current part to the right size. + void ShrinkCurrentPart() { + DCHECK(current_index_ < part_length_); + set_current_part(SeqString::Truncate( + Handle::cast(current_part()), current_index_)); + } + + static const int kInitialPartLength = 32; + static const int kMaxPartLength = 16 * 1024; + static const int kPartLengthGrowthFactor = 2; + + Isolate* isolate_; + String::Encoding encoding_; + bool overflowed_; + int part_length_; + int current_index_; + Handle accumulator_; + Handle current_part_; +}; + +template +void IncrementalStringBuilder::Append(SrcChar c) { + DCHECK_EQ(encoding_ == String::ONE_BYTE_ENCODING, sizeof(DestChar) == 1); + if (sizeof(DestChar) == 1) { + DCHECK_EQ(String::ONE_BYTE_ENCODING, encoding_); + SeqOneByteString::cast(*current_part_) + .SeqOneByteStringSet(current_index_++, c); + } else { + DCHECK_EQ(String::TWO_BYTE_ENCODING, encoding_); + SeqTwoByteString::cast(*current_part_) + .SeqTwoByteStringSet(current_index_++, c); + } + if (current_index_ == part_length_) Extend(); +} +} // namespace internal +} // namespace v8 + +#endif // V8_STRINGS_STRING_BUILDER_INL_H_ diff --git a/deps/v8/src/strings/string-builder.cc b/deps/v8/src/strings/string-builder.cc new file mode 100644 index 0000000000..f647aed190 --- /dev/null +++ b/deps/v8/src/strings/string-builder.cc @@ -0,0 +1,294 @@ +// Copyright 2014 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "src/strings/string-builder-inl.h" + +#include "src/execution/isolate-inl.h" +#include "src/objects/fixed-array-inl.h" +#include "src/objects/js-array-inl.h" + +namespace v8 { +namespace internal { + +template +void StringBuilderConcatHelper(String special, sinkchar* sink, + FixedArray fixed_array, int array_length) { + DisallowHeapAllocation no_gc; + int position = 0; + for (int i = 0; i < array_length; i++) { + Object element = fixed_array.get(i); + if (element.IsSmi()) { + // Smi encoding of position and length. + int encoded_slice = Smi::ToInt(element); + int pos; + int len; + if (encoded_slice > 0) { + // Position and length encoded in one smi. + pos = StringBuilderSubstringPosition::decode(encoded_slice); + len = StringBuilderSubstringLength::decode(encoded_slice); + } else { + // Position and length encoded in two smis. + Object obj = fixed_array.get(++i); + DCHECK(obj.IsSmi()); + pos = Smi::ToInt(obj); + len = -encoded_slice; + } + String::WriteToFlat(special, sink + position, pos, pos + len); + position += len; + } else { + String string = String::cast(element); + int element_length = string.length(); + String::WriteToFlat(string, sink + position, 0, element_length); + position += element_length; + } + } +} + +template void StringBuilderConcatHelper(String special, uint8_t* sink, + FixedArray fixed_array, + int array_length); + +template void StringBuilderConcatHelper(String special, uc16* sink, + FixedArray fixed_array, + int array_length); + +int StringBuilderConcatLength(int special_length, FixedArray fixed_array, + int array_length, bool* one_byte) { + DisallowHeapAllocation no_gc; + int position = 0; + for (int i = 0; i < array_length; i++) { + int increment = 0; + Object elt = fixed_array.get(i); + if (elt.IsSmi()) { + // Smi encoding of position and length. + int smi_value = Smi::ToInt(elt); + int pos; + int len; + if (smi_value > 0) { + // Position and length encoded in one smi. + pos = StringBuilderSubstringPosition::decode(smi_value); + len = StringBuilderSubstringLength::decode(smi_value); + } else { + // Position and length encoded in two smis. + len = -smi_value; + // Get the position and check that it is a positive smi. + i++; + if (i >= array_length) return -1; + Object next_smi = fixed_array.get(i); + if (!next_smi.IsSmi()) return -1; + pos = Smi::ToInt(next_smi); + if (pos < 0) return -1; + } + DCHECK_GE(pos, 0); + DCHECK_GE(len, 0); + if (pos > special_length || len > special_length - pos) return -1; + increment = len; + } else if (elt.IsString()) { + String element = String::cast(elt); + int element_length = element.length(); + increment = element_length; + if (*one_byte && !element.IsOneByteRepresentation()) { + *one_byte = false; + } + } else { + return -1; + } + if (increment > String::kMaxLength - position) { + return kMaxInt; // Provoke throw on allocation. + } + position += increment; + } + return position; +} + +FixedArrayBuilder::FixedArrayBuilder(Isolate* isolate, int initial_capacity) + : array_(isolate->factory()->NewFixedArrayWithHoles(initial_capacity)), + length_(0), + has_non_smi_elements_(false) { + // Require a non-zero initial size. Ensures that doubling the size to + // extend the array will work. + DCHECK_GT(initial_capacity, 0); +} + +FixedArrayBuilder::FixedArrayBuilder(Handle backing_store) + : array_(backing_store), length_(0), has_non_smi_elements_(false) { + // Require a non-zero initial size. Ensures that doubling the size to + // extend the array will work. + DCHECK_GT(backing_store->length(), 0); +} + +bool FixedArrayBuilder::HasCapacity(int elements) { + int length = array_->length(); + int required_length = length_ + elements; + return (length >= required_length); +} + +void FixedArrayBuilder::EnsureCapacity(Isolate* isolate, int elements) { + int length = array_->length(); + int required_length = length_ + elements; + if (length < required_length) { + int new_length = length; + do { + new_length *= 2; + } while (new_length < required_length); + Handle extended_array = + isolate->factory()->NewFixedArrayWithHoles(new_length); + array_->CopyTo(0, *extended_array, 0, length_); + array_ = extended_array; + } +} + +void FixedArrayBuilder::Add(Object value) { + DCHECK(!value.IsSmi()); + array_->set(length_, value); + length_++; + has_non_smi_elements_ = true; +} + +void FixedArrayBuilder::Add(Smi value) { + DCHECK(value.IsSmi()); + array_->set(length_, value); + length_++; +} + +int FixedArrayBuilder::capacity() { return array_->length(); } + +Handle FixedArrayBuilder::ToJSArray(Handle target_array) { + JSArray::SetContent(target_array, array_); + target_array->set_length(Smi::FromInt(length_)); + return target_array; +} + +ReplacementStringBuilder::ReplacementStringBuilder(Heap* heap, + Handle subject, + int estimated_part_count) + : heap_(heap), + array_builder_(Isolate::FromHeap(heap), estimated_part_count), + subject_(subject), + character_count_(0), + is_one_byte_(subject->IsOneByteRepresentation()) { + // Require a non-zero initial size. Ensures that doubling the size to + // extend the array will work. + DCHECK_GT(estimated_part_count, 0); +} + +void ReplacementStringBuilder::EnsureCapacity(int elements) { + array_builder_.EnsureCapacity(Isolate::FromHeap(heap_), elements); +} + +void ReplacementStringBuilder::AddString(Handle string) { + int length = string->length(); + DCHECK_GT(length, 0); + AddElement(string); + if (!string->IsOneByteRepresentation()) { + is_one_byte_ = false; + } + IncrementCharacterCount(length); +} + +MaybeHandle ReplacementStringBuilder::ToString() { + Isolate* isolate = Isolate::FromHeap(heap_); + if (array_builder_.length() == 0) { + return isolate->factory()->empty_string(); + } + + Handle joined_string; + if (is_one_byte_) { + Handle seq; + ASSIGN_RETURN_ON_EXCEPTION( + isolate, seq, isolate->factory()->NewRawOneByteString(character_count_), + String); + + DisallowHeapAllocation no_gc; + uint8_t* char_buffer = seq->GetChars(no_gc); + StringBuilderConcatHelper(*subject_, char_buffer, *array_builder_.array(), + array_builder_.length()); + joined_string = Handle::cast(seq); + } else { + // Two-byte. + Handle seq; + ASSIGN_RETURN_ON_EXCEPTION( + isolate, seq, isolate->factory()->NewRawTwoByteString(character_count_), + String); + + DisallowHeapAllocation no_gc; + uc16* char_buffer = seq->GetChars(no_gc); + StringBuilderConcatHelper(*subject_, char_buffer, *array_builder_.array(), + array_builder_.length()); + joined_string = Handle::cast(seq); + } + return joined_string; +} + +void ReplacementStringBuilder::AddElement(Handle element) { + DCHECK(element->IsSmi() || element->IsString()); + EnsureCapacity(1); + DisallowHeapAllocation no_gc; + array_builder_.Add(*element); +} + +IncrementalStringBuilder::IncrementalStringBuilder(Isolate* isolate) + : isolate_(isolate), + encoding_(String::ONE_BYTE_ENCODING), + overflowed_(false), + part_length_(kInitialPartLength), + current_index_(0) { + // Create an accumulator handle starting with the empty string. + accumulator_ = + Handle::New(ReadOnlyRoots(isolate).empty_string(), isolate); + current_part_ = + factory()->NewRawOneByteString(part_length_).ToHandleChecked(); +} + +int IncrementalStringBuilder::Length() const { + return accumulator_->length() + current_index_; +} + +void IncrementalStringBuilder::Accumulate(Handle new_part) { + Handle new_accumulator; + if (accumulator()->length() + new_part->length() > String::kMaxLength) { + // Set the flag and carry on. Delay throwing the exception till the end. + new_accumulator = factory()->empty_string(); + overflowed_ = true; + } else { + new_accumulator = + factory()->NewConsString(accumulator(), new_part).ToHandleChecked(); + } + set_accumulator(new_accumulator); +} + +void IncrementalStringBuilder::Extend() { + DCHECK_EQ(current_index_, current_part()->length()); + Accumulate(current_part()); + if (part_length_ <= kMaxPartLength / kPartLengthGrowthFactor) { + part_length_ *= kPartLengthGrowthFactor; + } + Handle new_part; + if (encoding_ == String::ONE_BYTE_ENCODING) { + new_part = factory()->NewRawOneByteString(part_length_).ToHandleChecked(); + } else { + new_part = factory()->NewRawTwoByteString(part_length_).ToHandleChecked(); + } + // Reuse the same handle to avoid being invalidated when exiting handle scope. + set_current_part(new_part); + current_index_ = 0; +} + +MaybeHandle IncrementalStringBuilder::Finish() { + ShrinkCurrentPart(); + Accumulate(current_part()); + if (overflowed_) { + THROW_NEW_ERROR(isolate_, NewInvalidStringLengthError(), String); + } + return accumulator(); +} + +void IncrementalStringBuilder::AppendString(Handle string) { + ShrinkCurrentPart(); + part_length_ = kInitialPartLength; // Allocate conservatively. + Extend(); // Attach current part and allocate new part. + Accumulate(string); +} +} // namespace internal +} // namespace v8 diff --git a/deps/v8/src/strings/string-case.cc b/deps/v8/src/strings/string-case.cc new file mode 100644 index 0000000000..88370a81e3 --- /dev/null +++ b/deps/v8/src/strings/string-case.cc @@ -0,0 +1,136 @@ +// Copyright 2016 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "src/strings/string-case.h" + +#include "src/base/logging.h" +#include "src/common/assert-scope.h" +#include "src/common/globals.h" +#include "src/utils/utils.h" + +namespace v8 { +namespace internal { + +// FastAsciiConvert tries to do character processing on a word_t basis if +// source and destination strings are properly aligned. Natural alignment of +// string data depends on kTaggedSize so we define word_t via Tagged_t. +using word_t = std::make_unsigned::type; + +const word_t kWordTAllBitsSet = std::numeric_limits::max(); +const word_t kOneInEveryByte = kWordTAllBitsSet / 0xFF; +const word_t kAsciiMask = kOneInEveryByte << 7; + +#ifdef DEBUG +bool CheckFastAsciiConvert(char* dst, const char* src, int length, bool changed, + bool is_to_lower) { + bool expected_changed = false; + for (int i = 0; i < length; i++) { + if (dst[i] == src[i]) continue; + expected_changed = true; + if (is_to_lower) { + DCHECK('A' <= src[i] && src[i] <= 'Z'); + DCHECK(dst[i] == src[i] + ('a' - 'A')); + } else { + DCHECK('a' <= src[i] && src[i] <= 'z'); + DCHECK(dst[i] == src[i] - ('a' - 'A')); + } + } + return (expected_changed == changed); +} +#endif + +// Given a word and two range boundaries returns a word with high bit +// set in every byte iff the corresponding input byte was strictly in +// the range (m, n). All the other bits in the result are cleared. +// This function is only useful when it can be inlined and the +// boundaries are statically known. +// Requires: all bytes in the input word and the boundaries must be +// ASCII (less than 0x7F). +static inline word_t AsciiRangeMask(word_t w, char m, char n) { + // Use strict inequalities since in edge cases the function could be + // further simplified. + DCHECK(0 < m && m < n); + // Has high bit set in every w byte less than n. + word_t tmp1 = kOneInEveryByte * (0x7F + n) - w; + // Has high bit set in every w byte greater than m. + word_t tmp2 = w + kOneInEveryByte * (0x7F - m); + return (tmp1 & tmp2 & (kOneInEveryByte * 0x80)); +} + +template +int FastAsciiConvert(char* dst, const char* src, int length, + bool* changed_out) { +#ifdef DEBUG + char* saved_dst = dst; +#endif + const char* saved_src = src; + DisallowHeapAllocation no_gc; + // We rely on the distance between upper and lower case letters + // being a known power of 2. + DCHECK_EQ('a' - 'A', 1 << 5); + // Boundaries for the range of input characters than require conversion. + static const char lo = is_lower ? 'A' - 1 : 'a' - 1; + static const char hi = is_lower ? 'Z' + 1 : 'z' + 1; + bool changed = false; + const char* const limit = src + length; + + // dst is newly allocated and always aligned. + DCHECK(IsAligned(reinterpret_cast
(dst), sizeof(word_t))); + // Only attempt processing one word at a time if src is also aligned. + if (IsAligned(reinterpret_cast
(src), sizeof(word_t))) { + // Process the prefix of the input that requires no conversion one aligned + // (machine) word at a time. + while (src <= limit - sizeof(word_t)) { + const word_t w = *reinterpret_cast(src); + if ((w & kAsciiMask) != 0) return static_cast(src - saved_src); + if (AsciiRangeMask(w, lo, hi) != 0) { + changed = true; + break; + } + *reinterpret_cast(dst) = w; + src += sizeof(word_t); + dst += sizeof(word_t); + } + // Process the remainder of the input performing conversion when + // required one word at a time. + while (src <= limit - sizeof(word_t)) { + const word_t w = *reinterpret_cast(src); + if ((w & kAsciiMask) != 0) return static_cast(src - saved_src); + word_t m = AsciiRangeMask(w, lo, hi); + // The mask has high (7th) bit set in every byte that needs + // conversion and we know that the distance between cases is + // 1 << 5. + *reinterpret_cast(dst) = w ^ (m >> 2); + src += sizeof(word_t); + dst += sizeof(word_t); + } + } + // Process the last few bytes of the input (or the whole input if + // unaligned access is not supported). + while (src < limit) { + char c = *src; + if ((c & kAsciiMask) != 0) return static_cast(src - saved_src); + if (lo < c && c < hi) { + c ^= (1 << 5); + changed = true; + } + *dst = c; + ++src; + ++dst; + } + + DCHECK( + CheckFastAsciiConvert(saved_dst, saved_src, length, changed, is_lower)); + + *changed_out = changed; + return length; +} + +template int FastAsciiConvert(char* dst, const char* src, int length, + bool* changed_out); +template int FastAsciiConvert(char* dst, const char* src, int length, + bool* changed_out); + +} // namespace internal +} // namespace v8 diff --git a/deps/v8/src/strings/string-case.h b/deps/v8/src/strings/string-case.h new file mode 100644 index 0000000000..f45732fb54 --- /dev/null +++ b/deps/v8/src/strings/string-case.h @@ -0,0 +1,17 @@ +// Copyright 2016 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_STRINGS_STRING_CASE_H_ +#define V8_STRINGS_STRING_CASE_H_ + +namespace v8 { +namespace internal { + +template +int FastAsciiConvert(char* dst, const char* src, int length, bool* changed_out); + +} // namespace internal +} // namespace v8 + +#endif // V8_STRINGS_STRING_CASE_H_ diff --git a/deps/v8/src/strings/string-hasher-inl.h b/deps/v8/src/strings/string-hasher-inl.h new file mode 100644 index 0000000000..b547d0a78d --- /dev/null +++ b/deps/v8/src/strings/string-hasher-inl.h @@ -0,0 +1,81 @@ +// Copyright 2017 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_STRINGS_STRING_HASHER_INL_H_ +#define V8_STRINGS_STRING_HASHER_INL_H_ + +#include "src/strings/string-hasher.h" + +#include "src/objects/objects.h" +#include "src/objects/string-inl.h" +#include "src/strings/char-predicates-inl.h" +#include "src/utils/utils-inl.h" + +namespace v8 { +namespace internal { + +uint32_t StringHasher::AddCharacterCore(uint32_t running_hash, uint16_t c) { + running_hash += c; + running_hash += (running_hash << 10); + running_hash ^= (running_hash >> 6); + return running_hash; +} + +uint32_t StringHasher::GetHashCore(uint32_t running_hash) { + running_hash += (running_hash << 3); + running_hash ^= (running_hash >> 11); + running_hash += (running_hash << 15); + int32_t hash = static_cast(running_hash & String::kHashBitMask); + int32_t mask = (hash - 1) >> 31; + return running_hash | (kZeroHash & mask); +} + +uint32_t StringHasher::GetTrivialHash(int length) { + DCHECK_GT(length, String::kMaxHashCalcLength); + // String hash of a large string is simply the length. + return (length << String::kHashShift) | String::kIsNotArrayIndexMask; +} + +template +uint32_t StringHasher::HashSequentialString(const schar* chars, int length, + uint64_t seed) { + // Check whether the string is a valid array index. In that case, compute the + // array index hash. It'll fall through to compute a regular string hash from + // the start if it turns out that the string isn't a valid array index. + if (IsInRange(length, 1, String::kMaxArrayIndexSize)) { + if (IsDecimalDigit(chars[0]) && (length == 1 || chars[0] != '0')) { + uint32_t index = chars[0] - '0'; + int i = 1; + do { + if (i == length) { + return MakeArrayIndexHash(index, length); + } + } while (TryAddIndexChar(&index, chars[i++])); + } + } else if (length > String::kMaxHashCalcLength) { + return GetTrivialHash(length); + } + + // Non-array-index hash. + DCHECK_LE(0, length); + DCHECK_IMPLIES(0 < length, chars != nullptr); + uint32_t running_hash = static_cast(seed); + const schar* end = &chars[length]; + while (chars != end) { + running_hash = AddCharacterCore(running_hash, *chars++); + } + + return (GetHashCore(running_hash) << String::kHashShift) | + String::kIsNotArrayIndexMask; +} + +std::size_t SeededStringHasher::operator()(const char* name) const { + return StringHasher::HashSequentialString( + name, static_cast(strlen(name)), hashseed_); +} + +} // namespace internal +} // namespace v8 + +#endif // V8_STRINGS_STRING_HASHER_INL_H_ diff --git a/deps/v8/src/strings/string-hasher.h b/deps/v8/src/strings/string-hasher.h new file mode 100644 index 0000000000..b3917b75cd --- /dev/null +++ b/deps/v8/src/strings/string-hasher.h @@ -0,0 +1,58 @@ +// Copyright 2017 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_STRINGS_STRING_HASHER_H_ +#define V8_STRINGS_STRING_HASHER_H_ + +#include "src/common/globals.h" + +namespace v8 { +namespace internal { + +template +class Vector; + +class V8_EXPORT_PRIVATE StringHasher final { + public: + StringHasher() = delete; + template + static inline uint32_t HashSequentialString(const schar* chars, int length, + uint64_t seed); + + // Calculated hash value for a string consisting of 1 to + // String::kMaxArrayIndexSize digits with no leading zeros (except "0"). + // value is represented decimal value. + static uint32_t MakeArrayIndexHash(uint32_t value, int length); + + // No string is allowed to have a hash of zero. That value is reserved + // for internal properties. If the hash calculation yields zero then we + // use 27 instead. + static const int kZeroHash = 27; + + // Reusable parts of the hashing algorithm. + V8_INLINE static uint32_t AddCharacterCore(uint32_t running_hash, uint16_t c); + V8_INLINE static uint32_t GetHashCore(uint32_t running_hash); + + static inline uint32_t GetTrivialHash(int length); +}; + +// Useful for std containers that require something ()'able. +struct SeededStringHasher { + explicit SeededStringHasher(uint64_t hashseed) : hashseed_(hashseed) {} + inline std::size_t operator()(const char* name) const; + + uint64_t hashseed_; +}; + +// Useful for std containers that require something ()'able. +struct StringEquals { + bool operator()(const char* name1, const char* name2) const { + return strcmp(name1, name2) == 0; + } +}; + +} // namespace internal +} // namespace v8 + +#endif // V8_STRINGS_STRING_HASHER_H_ diff --git a/deps/v8/src/strings/string-search.h b/deps/v8/src/strings/string-search.h new file mode 100644 index 0000000000..1d5800ebcf --- /dev/null +++ b/deps/v8/src/strings/string-search.h @@ -0,0 +1,548 @@ +// Copyright 2011 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_STRINGS_STRING_SEARCH_H_ +#define V8_STRINGS_STRING_SEARCH_H_ + +#include "src/execution/isolate.h" +#include "src/utils/vector.h" + +namespace v8 { +namespace internal { + +//--------------------------------------------------------------------- +// String Search object. +//--------------------------------------------------------------------- + +// Class holding constants and methods that apply to all string search variants, +// independently of subject and pattern char size. +class StringSearchBase { + protected: + // Cap on the maximal shift in the Boyer-Moore implementation. By setting a + // limit, we can fix the size of tables. For a needle longer than this limit, + // search will not be optimal, since we only build tables for a suffix + // of the string, but it is a safe approximation. + static const int kBMMaxShift = Isolate::kBMMaxShift; + + // Reduce alphabet to this size. + // One of the tables used by Boyer-Moore and Boyer-Moore-Horspool has size + // proportional to the input alphabet. We reduce the alphabet size by + // equating input characters modulo a smaller alphabet size. This gives + // a potentially less efficient searching, but is a safe approximation. + // For needles using only characters in the same Unicode 256-code point page, + // there is no search speed degradation. + static const int kLatin1AlphabetSize = 256; + static const int kUC16AlphabetSize = Isolate::kUC16AlphabetSize; + + // Bad-char shift table stored in the state. It's length is the alphabet size. + // For patterns below this length, the skip length of Boyer-Moore is too short + // to compensate for the algorithmic overhead compared to simple brute force. + static const int kBMMinPatternLength = 7; + + static inline bool IsOneByteString(Vector string) { + return true; + } + + static inline bool IsOneByteString(Vector string) { + return String::IsOneByte(string.begin(), string.length()); + } + + friend class Isolate; +}; + +template +class StringSearch : private StringSearchBase { + public: + StringSearch(Isolate* isolate, Vector pattern) + : isolate_(isolate), + pattern_(pattern), + start_(Max(0, pattern.length() - kBMMaxShift)) { + if (sizeof(PatternChar) > sizeof(SubjectChar)) { + if (!IsOneByteString(pattern_)) { + strategy_ = &FailSearch; + return; + } + } + int pattern_length = pattern_.length(); + if (pattern_length < kBMMinPatternLength) { + if (pattern_length == 1) { + strategy_ = &SingleCharSearch; + return; + } + strategy_ = &LinearSearch; + return; + } + strategy_ = &InitialSearch; + } + + int Search(Vector subject, int index) { + return strategy_(this, subject, index); + } + + static inline int AlphabetSize() { + if (sizeof(PatternChar) == 1) { + // Latin1 needle. + return kLatin1AlphabetSize; + } else { + DCHECK_EQ(sizeof(PatternChar), 2); + // UC16 needle. + return kUC16AlphabetSize; + } + } + + private: + using SearchFunction = int (*)(StringSearch*, + Vector, int); + + static int FailSearch(StringSearch*, + Vector, int) { + return -1; + } + + static int SingleCharSearch(StringSearch* search, + Vector subject, + int start_index); + + static int LinearSearch(StringSearch* search, + Vector subject, int start_index); + + static int InitialSearch(StringSearch* search, + Vector subject, int start_index); + + static int BoyerMooreHorspoolSearch( + StringSearch* search, + Vector subject, int start_index); + + static int BoyerMooreSearch(StringSearch* search, + Vector subject, + int start_index); + + void PopulateBoyerMooreHorspoolTable(); + + void PopulateBoyerMooreTable(); + + static inline bool exceedsOneByte(uint8_t c) { return false; } + + static inline bool exceedsOneByte(uint16_t c) { + return c > String::kMaxOneByteCharCodeU; + } + + static inline int CharOccurrence(int* bad_char_occurrence, + SubjectChar char_code) { + if (sizeof(SubjectChar) == 1) { + return bad_char_occurrence[static_cast(char_code)]; + } + if (sizeof(PatternChar) == 1) { + if (exceedsOneByte(char_code)) { + return -1; + } + return bad_char_occurrence[static_cast(char_code)]; + } + // Both pattern and subject are UC16. Reduce character to equivalence class. + int equiv_class = char_code % kUC16AlphabetSize; + return bad_char_occurrence[equiv_class]; + } + + // The following tables are shared by all searches. + // TODO(lrn): Introduce a way for a pattern to keep its tables + // between searches (e.g., for an Atom RegExp). + + // Store for the BoyerMoore(Horspool) bad char shift table. + // Return a table covering the last kBMMaxShift+1 positions of + // pattern. + int* bad_char_table() { return isolate_->bad_char_shift_table(); } + + // Store for the BoyerMoore good suffix shift table. + int* good_suffix_shift_table() { + // Return biased pointer that maps the range [start_..pattern_.length() + // to the kGoodSuffixShiftTable array. + return isolate_->good_suffix_shift_table() - start_; + } + + // Table used temporarily while building the BoyerMoore good suffix + // shift table. + int* suffix_table() { + // Return biased pointer that maps the range [start_..pattern_.length() + // to the kSuffixTable array. + return isolate_->suffix_table() - start_; + } + + Isolate* isolate_; + // The pattern to search for. + Vector pattern_; + // Pointer to implementation of the search. + SearchFunction strategy_; + // Cache value of Max(0, pattern_length() - kBMMaxShift) + int start_; +}; + +template +inline T AlignDown(T value, U alignment) { + return reinterpret_cast( + (reinterpret_cast(value) & ~(alignment - 1))); +} + +inline uint8_t GetHighestValueByte(uc16 character) { + return Max(static_cast(character & 0xFF), + static_cast(character >> 8)); +} + +inline uint8_t GetHighestValueByte(uint8_t character) { return character; } + +template +inline int FindFirstCharacter(Vector pattern, + Vector subject, int index) { + const PatternChar pattern_first_char = pattern[0]; + const int max_n = (subject.length() - pattern.length() + 1); + + const uint8_t search_byte = GetHighestValueByte(pattern_first_char); + const SubjectChar search_char = static_cast(pattern_first_char); + int pos = index; + do { + DCHECK_GE(max_n - pos, 0); + const SubjectChar* char_pos = reinterpret_cast( + memchr(subject.begin() + pos, search_byte, + (max_n - pos) * sizeof(SubjectChar))); + if (char_pos == nullptr) return -1; + char_pos = AlignDown(char_pos, sizeof(SubjectChar)); + pos = static_cast(char_pos - subject.begin()); + if (subject[pos] == search_char) return pos; + } while (++pos < max_n); + + return -1; +} + +//--------------------------------------------------------------------- +// Single Character Pattern Search Strategy +//--------------------------------------------------------------------- + +template +int StringSearch::SingleCharSearch( + StringSearch* search, + Vector subject, int index) { + DCHECK_EQ(1, search->pattern_.length()); + PatternChar pattern_first_char = search->pattern_[0]; + if (sizeof(PatternChar) > sizeof(SubjectChar)) { + if (exceedsOneByte(pattern_first_char)) { + return -1; + } + } + return FindFirstCharacter(search->pattern_, subject, index); +} + +//--------------------------------------------------------------------- +// Linear Search Strategy +//--------------------------------------------------------------------- + +template +inline bool CharCompare(const PatternChar* pattern, const SubjectChar* subject, + int length) { + DCHECK_GT(length, 0); + int pos = 0; + do { + if (pattern[pos] != subject[pos]) { + return false; + } + pos++; + } while (pos < length); + return true; +} + +// Simple linear search for short patterns. Never bails out. +template +int StringSearch::LinearSearch( + StringSearch* search, + Vector subject, int index) { + Vector pattern = search->pattern_; + DCHECK_GT(pattern.length(), 1); + int pattern_length = pattern.length(); + int i = index; + int n = subject.length() - pattern_length; + while (i <= n) { + i = FindFirstCharacter(pattern, subject, i); + if (i == -1) return -1; + DCHECK_LE(i, n); + i++; + // Loop extracted to separate function to allow using return to do + // a deeper break. + if (CharCompare(pattern.begin() + 1, subject.begin() + i, + pattern_length - 1)) { + return i - 1; + } + } + return -1; +} + +//--------------------------------------------------------------------- +// Boyer-Moore string search +//--------------------------------------------------------------------- + +template +int StringSearch::BoyerMooreSearch( + StringSearch* search, + Vector subject, int start_index) { + Vector pattern = search->pattern_; + int subject_length = subject.length(); + int pattern_length = pattern.length(); + // Only preprocess at most kBMMaxShift last characters of pattern. + int start = search->start_; + + int* bad_char_occurence = search->bad_char_table(); + int* good_suffix_shift = search->good_suffix_shift_table(); + + PatternChar last_char = pattern[pattern_length - 1]; + int index = start_index; + // Continue search from i. + while (index <= subject_length - pattern_length) { + int j = pattern_length - 1; + int c; + while (last_char != (c = subject[index + j])) { + int shift = j - CharOccurrence(bad_char_occurence, c); + index += shift; + if (index > subject_length - pattern_length) { + return -1; + } + } + while (j >= 0 && pattern[j] == (c = subject[index + j])) j--; + if (j < 0) { + return index; + } else if (j < start) { + // we have matched more than our tables allow us to be smart about. + // Fall back on BMH shift. + index += pattern_length - 1 - + CharOccurrence(bad_char_occurence, + static_cast(last_char)); + } else { + int gs_shift = good_suffix_shift[j + 1]; + int bc_occ = CharOccurrence(bad_char_occurence, c); + int shift = j - bc_occ; + if (gs_shift > shift) { + shift = gs_shift; + } + index += shift; + } + } + + return -1; +} + +template +void StringSearch::PopulateBoyerMooreTable() { + int pattern_length = pattern_.length(); + const PatternChar* pattern = pattern_.begin(); + // Only look at the last kBMMaxShift characters of pattern (from start_ + // to pattern_length). + int start = start_; + int length = pattern_length - start; + + // Biased tables so that we can use pattern indices as table indices, + // even if we only cover the part of the pattern from offset start. + int* shift_table = good_suffix_shift_table(); + int* suffix_table = this->suffix_table(); + + // Initialize table. + for (int i = start; i < pattern_length; i++) { + shift_table[i] = length; + } + shift_table[pattern_length] = 1; + suffix_table[pattern_length] = pattern_length + 1; + + if (pattern_length <= start) { + return; + } + + // Find suffixes. + PatternChar last_char = pattern[pattern_length - 1]; + int suffix = pattern_length + 1; + { + int i = pattern_length; + while (i > start) { + PatternChar c = pattern[i - 1]; + while (suffix <= pattern_length && c != pattern[suffix - 1]) { + if (shift_table[suffix] == length) { + shift_table[suffix] = suffix - i; + } + suffix = suffix_table[suffix]; + } + suffix_table[--i] = --suffix; + if (suffix == pattern_length) { + // No suffix to extend, so we check against last_char only. + while ((i > start) && (pattern[i - 1] != last_char)) { + if (shift_table[pattern_length] == length) { + shift_table[pattern_length] = pattern_length - i; + } + suffix_table[--i] = pattern_length; + } + if (i > start) { + suffix_table[--i] = --suffix; + } + } + } + } + // Build shift table using suffixes. + if (suffix < pattern_length) { + for (int i = start; i <= pattern_length; i++) { + if (shift_table[i] == length) { + shift_table[i] = suffix - start; + } + if (i == suffix) { + suffix = suffix_table[suffix]; + } + } + } +} + +//--------------------------------------------------------------------- +// Boyer-Moore-Horspool string search. +//--------------------------------------------------------------------- + +template +int StringSearch::BoyerMooreHorspoolSearch( + StringSearch* search, + Vector subject, int start_index) { + Vector pattern = search->pattern_; + int subject_length = subject.length(); + int pattern_length = pattern.length(); + int* char_occurrences = search->bad_char_table(); + int badness = -pattern_length; + + // How bad we are doing without a good-suffix table. + PatternChar last_char = pattern[pattern_length - 1]; + int last_char_shift = + pattern_length - 1 - + CharOccurrence(char_occurrences, static_cast(last_char)); + // Perform search + int index = start_index; // No matches found prior to this index. + while (index <= subject_length - pattern_length) { + int j = pattern_length - 1; + int subject_char; + while (last_char != (subject_char = subject[index + j])) { + int bc_occ = CharOccurrence(char_occurrences, subject_char); + int shift = j - bc_occ; + index += shift; + badness += 1 - shift; // at most zero, so badness cannot increase. + if (index > subject_length - pattern_length) { + return -1; + } + } + j--; + while (j >= 0 && pattern[j] == (subject[index + j])) j--; + if (j < 0) { + return index; + } else { + index += last_char_shift; + // Badness increases by the number of characters we have + // checked, and decreases by the number of characters we + // can skip by shifting. It's a measure of how we are doing + // compared to reading each character exactly once. + badness += (pattern_length - j) - last_char_shift; + if (badness > 0) { + search->PopulateBoyerMooreTable(); + search->strategy_ = &BoyerMooreSearch; + return BoyerMooreSearch(search, subject, index); + } + } + } + return -1; +} + +template +void StringSearch::PopulateBoyerMooreHorspoolTable() { + int pattern_length = pattern_.length(); + + int* bad_char_occurrence = bad_char_table(); + + // Only preprocess at most kBMMaxShift last characters of pattern. + int start = start_; + // Run forwards to populate bad_char_table, so that *last* instance + // of character equivalence class is the one registered. + // Notice: Doesn't include the last character. + int table_size = AlphabetSize(); + if (start == 0) { // All patterns less than kBMMaxShift in length. + memset(bad_char_occurrence, -1, table_size * sizeof(*bad_char_occurrence)); + } else { + for (int i = 0; i < table_size; i++) { + bad_char_occurrence[i] = start - 1; + } + } + for (int i = start; i < pattern_length - 1; i++) { + PatternChar c = pattern_[i]; + int bucket = (sizeof(PatternChar) == 1) ? c : c % AlphabetSize(); + bad_char_occurrence[bucket] = i; + } +} + +//--------------------------------------------------------------------- +// Linear string search with bailout to BMH. +//--------------------------------------------------------------------- + +// Simple linear search for short patterns, which bails out if the string +// isn't found very early in the subject. Upgrades to BoyerMooreHorspool. +template +int StringSearch::InitialSearch( + StringSearch* search, + Vector subject, int index) { + Vector pattern = search->pattern_; + int pattern_length = pattern.length(); + // Badness is a count of how much work we have done. When we have + // done enough work we decide it's probably worth switching to a better + // algorithm. + int badness = -10 - (pattern_length << 2); + + // We know our pattern is at least 2 characters, we cache the first so + // the common case of the first character not matching is faster. + for (int i = index, n = subject.length() - pattern_length; i <= n; i++) { + badness++; + if (badness <= 0) { + i = FindFirstCharacter(pattern, subject, i); + if (i == -1) return -1; + DCHECK_LE(i, n); + int j = 1; + do { + if (pattern[j] != subject[i + j]) { + break; + } + j++; + } while (j < pattern_length); + if (j == pattern_length) { + return i; + } + badness += j; + } else { + search->PopulateBoyerMooreHorspoolTable(); + search->strategy_ = &BoyerMooreHorspoolSearch; + return BoyerMooreHorspoolSearch(search, subject, i); + } + } + return -1; +} + +// Perform a a single stand-alone search. +// If searching multiple times for the same pattern, a search +// object should be constructed once and the Search function then called +// for each search. +template +int SearchString(Isolate* isolate, Vector subject, + Vector pattern, int start_index) { + StringSearch search(isolate, pattern); + return search.Search(subject, start_index); +} + +// A wrapper function around SearchString that wraps raw pointers to the subject +// and pattern as vectors before calling SearchString. Used from the +// StringIndexOf builtin. +template +intptr_t SearchStringRaw(Isolate* isolate, const SubjectChar* subject_ptr, + int subject_length, const PatternChar* pattern_ptr, + int pattern_length, int start_index) { + DisallowHeapAllocation no_gc; + Vector subject(subject_ptr, subject_length); + Vector pattern(pattern_ptr, pattern_length); + return SearchString(isolate, subject, pattern, start_index); +} + +} // namespace internal +} // namespace v8 + +#endif // V8_STRINGS_STRING_SEARCH_H_ diff --git a/deps/v8/src/strings/string-stream.cc b/deps/v8/src/strings/string-stream.cc new file mode 100644 index 0000000000..db1891949e --- /dev/null +++ b/deps/v8/src/strings/string-stream.cc @@ -0,0 +1,476 @@ +// Copyright 2014 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "src/strings/string-stream.h" + +#include + +#include "src/handles/handles-inl.h" +#include "src/logging/log.h" +#include "src/objects/js-array-inl.h" +#include "src/objects/objects-inl.h" +#include "src/objects/prototype.h" +#include "src/utils/vector.h" + +namespace v8 { +namespace internal { + +static const int kMentionedObjectCacheMaxSize = 256; + +char* HeapStringAllocator::allocate(unsigned bytes) { + space_ = NewArray(bytes); + return space_; +} + +char* FixedStringAllocator::allocate(unsigned bytes) { + CHECK_LE(bytes, length_); + return buffer_; +} + +char* FixedStringAllocator::grow(unsigned* old) { + *old = length_; + return buffer_; +} + +bool StringStream::Put(char c) { + if (full()) return false; + DCHECK(length_ < capacity_); + // Since the trailing '\0' is not accounted for in length_ fullness is + // indicated by a difference of 1 between length_ and capacity_. Thus when + // reaching a difference of 2 we need to grow the buffer. + if (length_ == capacity_ - 2) { + unsigned new_capacity = capacity_; + char* new_buffer = allocator_->grow(&new_capacity); + if (new_capacity > capacity_) { + capacity_ = new_capacity; + buffer_ = new_buffer; + } else { + // Reached the end of the available buffer. + DCHECK_GE(capacity_, 5); + length_ = capacity_ - 1; // Indicate fullness of the stream. + buffer_[length_ - 4] = '.'; + buffer_[length_ - 3] = '.'; + buffer_[length_ - 2] = '.'; + buffer_[length_ - 1] = '\n'; + buffer_[length_] = '\0'; + return false; + } + } + buffer_[length_] = c; + buffer_[length_ + 1] = '\0'; + length_++; + return true; +} + +// A control character is one that configures a format element. For +// instance, in %.5s, .5 are control characters. +static bool IsControlChar(char c) { + switch (c) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '.': + case '-': + return true; + default: + return false; + } +} + +void StringStream::Add(Vector format, Vector elms) { + // If we already ran out of space then return immediately. + if (full()) return; + int offset = 0; + int elm = 0; + while (offset < format.length()) { + if (format[offset] != '%' || elm == elms.length()) { + Put(format[offset]); + offset++; + continue; + } + // Read this formatting directive into a temporary buffer + EmbeddedVector temp; + int format_length = 0; + // Skip over the whole control character sequence until the + // format element type + temp[format_length++] = format[offset++]; + while (offset < format.length() && IsControlChar(format[offset])) + temp[format_length++] = format[offset++]; + if (offset >= format.length()) return; + char type = format[offset]; + temp[format_length++] = type; + temp[format_length] = '\0'; + offset++; + FmtElm current = elms[elm++]; + switch (type) { + case 's': { + DCHECK_EQ(FmtElm::C_STR, current.type_); + const char* value = current.data_.u_c_str_; + Add(value); + break; + } + case 'w': { + DCHECK_EQ(FmtElm::LC_STR, current.type_); + Vector value = *current.data_.u_lc_str_; + for (int i = 0; i < value.length(); i++) + Put(static_cast(value[i])); + break; + } + case 'o': { + DCHECK_EQ(FmtElm::OBJ, current.type_); + Object obj(current.data_.u_obj_); + PrintObject(obj); + break; + } + case 'k': { + DCHECK_EQ(FmtElm::INT, current.type_); + int value = current.data_.u_int_; + if (0x20 <= value && value <= 0x7F) { + Put(value); + } else if (value <= 0xFF) { + Add("\\x%02x", value); + } else { + Add("\\u%04x", value); + } + break; + } + case 'i': + case 'd': + case 'u': + case 'x': + case 'c': + case 'X': { + int value = current.data_.u_int_; + EmbeddedVector formatted; + int length = SNPrintF(formatted, temp.begin(), value); + Add(Vector(formatted.begin(), length)); + break; + } + case 'f': + case 'g': + case 'G': + case 'e': + case 'E': { + double value = current.data_.u_double_; + int inf = std::isinf(value); + if (inf == -1) { + Add("-inf"); + } else if (inf == 1) { + Add("inf"); + } else if (std::isnan(value)) { + Add("nan"); + } else { + EmbeddedVector formatted; + SNPrintF(formatted, temp.begin(), value); + Add(formatted.begin()); + } + break; + } + case 'p': { + void* value = current.data_.u_pointer_; + EmbeddedVector formatted; + SNPrintF(formatted, temp.begin(), value); + Add(formatted.begin()); + break; + } + default: + UNREACHABLE(); + } + } + + // Verify that the buffer is 0-terminated + DCHECK_EQ(buffer_[length_], '\0'); +} + +void StringStream::PrintObject(Object o) { + o.ShortPrint(this); + if (o.IsString()) { + if (String::cast(o).length() <= String::kMaxShortPrintLength) { + return; + } + } else if (o.IsNumber() || o.IsOddball()) { + return; + } + if (o.IsHeapObject() && object_print_mode_ == kPrintObjectVerbose) { + // TODO(delphick): Consider whether we can get the isolate without using + // TLS. + Isolate* isolate = Isolate::Current(); + DebugObjectCache* debug_object_cache = + isolate->string_stream_debug_object_cache(); + for (size_t i = 0; i < debug_object_cache->size(); i++) { + if (*(*debug_object_cache)[i] == o) { + Add("#%d#", static_cast(i)); + return; + } + } + if (debug_object_cache->size() < kMentionedObjectCacheMaxSize) { + Add("#%d#", static_cast(debug_object_cache->size())); + debug_object_cache->push_back(handle(HeapObject::cast(o), isolate)); + } else { + Add("@%p", o); + } + } +} + +std::unique_ptr StringStream::ToCString() const { + char* str = NewArray(length_ + 1); + MemCopy(str, buffer_, length_); + str[length_] = '\0'; + return std::unique_ptr(str); +} + +void StringStream::Log(Isolate* isolate) { + LOG(isolate, StringEvent("StackDump", buffer_)); +} + +void StringStream::OutputToFile(FILE* out) { + // Dump the output to stdout, but make sure to break it up into + // manageable chunks to avoid losing parts of the output in the OS + // printing code. This is a problem on Windows in particular; see + // the VPrint() function implementations in platform-win32.cc. + unsigned position = 0; + for (unsigned next; (next = position + 2048) < length_; position = next) { + char save = buffer_[next]; + buffer_[next] = '\0'; + internal::PrintF(out, "%s", &buffer_[position]); + buffer_[next] = save; + } + internal::PrintF(out, "%s", &buffer_[position]); +} + +Handle StringStream::ToString(Isolate* isolate) { + return isolate->factory() + ->NewStringFromUtf8(Vector(buffer_, length_)) + .ToHandleChecked(); +} + +void StringStream::ClearMentionedObjectCache(Isolate* isolate) { + isolate->set_string_stream_current_security_token(Object()); + if (isolate->string_stream_debug_object_cache() == nullptr) { + isolate->set_string_stream_debug_object_cache(new DebugObjectCache()); + } + isolate->string_stream_debug_object_cache()->clear(); +} + +#ifdef DEBUG +bool StringStream::IsMentionedObjectCacheClear(Isolate* isolate) { + return object_print_mode_ == kPrintObjectConcise || + isolate->string_stream_debug_object_cache()->size() == 0; +} +#endif + +bool StringStream::Put(String str) { return Put(str, 0, str.length()); } + +bool StringStream::Put(String str, int start, int end) { + StringCharacterStream stream(str, start); + for (int i = start; i < end && stream.HasMore(); i++) { + uint16_t c = stream.GetNext(); + if (c >= 127 || c < 32) { + c = '?'; + } + if (!Put(static_cast(c))) { + return false; // Output was truncated. + } + } + return true; +} + +void StringStream::PrintName(Object name) { + if (name.IsString()) { + String str = String::cast(name); + if (str.length() > 0) { + Put(str); + } else { + Add("/* anonymous */"); + } + } else { + Add("%o", name); + } +} + +void StringStream::PrintUsingMap(JSObject js_object) { + Map map = js_object.map(); + int real_size = map.NumberOfOwnDescriptors(); + DescriptorArray descs = map.instance_descriptors(); + for (int i = 0; i < real_size; i++) { + PropertyDetails details = descs.GetDetails(i); + if (details.location() == kField) { + DCHECK_EQ(kData, details.kind()); + Object key = descs.GetKey(i); + if (key.IsString() || key.IsNumber()) { + int len = 3; + if (key.IsString()) { + len = String::cast(key).length(); + } + for (; len < 18; len++) Put(' '); + if (key.IsString()) { + Put(String::cast(key)); + } else { + key.ShortPrint(); + } + Add(": "); + FieldIndex index = FieldIndex::ForDescriptor(map, i); + if (js_object.IsUnboxedDoubleField(index)) { + double value = js_object.RawFastDoublePropertyAt(index); + Add(" %.16g\n", FmtElm(value)); + } else { + Object value = js_object.RawFastPropertyAt(index); + Add("%o\n", value); + } + } + } + } +} + +void StringStream::PrintFixedArray(FixedArray array, unsigned int limit) { + ReadOnlyRoots roots = array.GetReadOnlyRoots(); + for (unsigned int i = 0; i < 10 && i < limit; i++) { + Object element = array.get(i); + if (element.IsTheHole(roots)) continue; + for (int len = 1; len < 18; len++) { + Put(' '); + } + Add("%d: %o\n", i, array.get(i)); + } + if (limit >= 10) { + Add(" ...\n"); + } +} + +void StringStream::PrintByteArray(ByteArray byte_array) { + unsigned int limit = byte_array.length(); + for (unsigned int i = 0; i < 10 && i < limit; i++) { + byte b = byte_array.get(i); + Add(" %d: %3d 0x%02x", i, b, b); + if (b >= ' ' && b <= '~') { + Add(" '%c'", b); + } else if (b == '\n') { + Add(" '\n'"); + } else if (b == '\r') { + Add(" '\r'"); + } else if (b >= 1 && b <= 26) { + Add(" ^%c", b + 'A' - 1); + } + Add("\n"); + } + if (limit >= 10) { + Add(" ...\n"); + } +} + +void StringStream::PrintMentionedObjectCache(Isolate* isolate) { + if (object_print_mode_ == kPrintObjectConcise) return; + DebugObjectCache* debug_object_cache = + isolate->string_stream_debug_object_cache(); + Add("==== Key ============================================\n\n"); + for (size_t i = 0; i < debug_object_cache->size(); i++) { + HeapObject printee = *(*debug_object_cache)[i]; + Add(" #%d# %p: ", static_cast(i), + reinterpret_cast(printee.ptr())); + printee.ShortPrint(this); + Add("\n"); + if (printee.IsJSObject()) { + if (printee.IsJSValue()) { + Add(" value(): %o\n", JSValue::cast(printee).value()); + } + PrintUsingMap(JSObject::cast(printee)); + if (printee.IsJSArray()) { + JSArray array = JSArray::cast(printee); + if (array.HasObjectElements()) { + unsigned int limit = FixedArray::cast(array.elements()).length(); + unsigned int length = + static_cast(JSArray::cast(array).length().Number()); + if (length < limit) limit = length; + PrintFixedArray(FixedArray::cast(array.elements()), limit); + } + } + } else if (printee.IsByteArray()) { + PrintByteArray(ByteArray::cast(printee)); + } else if (printee.IsFixedArray()) { + unsigned int limit = FixedArray::cast(printee).length(); + PrintFixedArray(FixedArray::cast(printee), limit); + } + } +} + +void StringStream::PrintSecurityTokenIfChanged(JSFunction fun) { + Object token = fun.native_context().security_token(); + Isolate* isolate = fun.GetIsolate(); + if (token != isolate->string_stream_current_security_token()) { + Add("Security context: %o\n", token); + isolate->set_string_stream_current_security_token(token); + } +} + +void StringStream::PrintFunction(JSFunction fun, Object receiver, Code* code) { + PrintPrototype(fun, receiver); + *code = fun.code(); +} + +void StringStream::PrintPrototype(JSFunction fun, Object receiver) { + Object name = fun.shared().Name(); + bool print_name = false; + Isolate* isolate = fun.GetIsolate(); + if (receiver.IsNullOrUndefined(isolate) || receiver.IsTheHole(isolate) || + receiver.IsJSProxy()) { + print_name = true; + } else if (!isolate->context().is_null()) { + if (!receiver.IsJSObject()) { + receiver = receiver.GetPrototypeChainRootMap(isolate).prototype(); + } + + for (PrototypeIterator iter(isolate, JSObject::cast(receiver), + kStartAtReceiver); + !iter.IsAtEnd(); iter.Advance()) { + if (iter.GetCurrent().IsJSProxy()) break; + Object key = iter.GetCurrent().SlowReverseLookup(fun); + if (!key.IsUndefined(isolate)) { + if (!name.IsString() || !key.IsString() || + !String::cast(name).Equals(String::cast(key))) { + print_name = true; + } + if (name.IsString() && String::cast(name).length() == 0) { + print_name = false; + } + name = key; + break; + } + } + } + PrintName(name); + // Also known as - if the name in the function doesn't match the name under + // which it was looked up. + if (print_name) { + Add("(aka "); + PrintName(fun.shared().Name()); + Put(')'); + } +} + +char* HeapStringAllocator::grow(unsigned* bytes) { + unsigned new_bytes = *bytes * 2; + // Check for overflow. + if (new_bytes <= *bytes) { + return space_; + } + char* new_space = NewArray(new_bytes); + if (new_space == nullptr) { + return space_; + } + MemCopy(new_space, space_, *bytes); + *bytes = new_bytes; + DeleteArray(space_); + space_ = new_space; + return new_space; +} + +} // namespace internal +} // namespace v8 diff --git a/deps/v8/src/strings/string-stream.h b/deps/v8/src/strings/string-stream.h new file mode 100644 index 0000000000..d7b616c6ff --- /dev/null +++ b/deps/v8/src/strings/string-stream.h @@ -0,0 +1,214 @@ +// Copyright 2014 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_STRINGS_STRING_STREAM_H_ +#define V8_STRINGS_STRING_STREAM_H_ + +#include "src/base/small-vector.h" +#include "src/handles/handles.h" +#include "src/objects/heap-object.h" +#include "src/utils/allocation.h" +#include "src/utils/vector.h" + +namespace v8 { +namespace internal { + +// Forward declarations. +class ByteArray; + +class StringAllocator { + public: + virtual ~StringAllocator() = default; + // Allocate a number of bytes. + virtual char* allocate(unsigned bytes) = 0; + // Allocate a larger number of bytes and copy the old buffer to the new one. + // bytes is an input and output parameter passing the old size of the buffer + // and returning the new size. If allocation fails then we return the old + // buffer and do not increase the size. + virtual char* grow(unsigned* bytes) = 0; +}; + +// Normal allocator uses new[] and delete[]. +class HeapStringAllocator final : public StringAllocator { + public: + ~HeapStringAllocator() override { DeleteArray(space_); } + char* allocate(unsigned bytes) override; + char* grow(unsigned* bytes) override; + + private: + char* space_; +}; + +class FixedStringAllocator final : public StringAllocator { + public: + FixedStringAllocator(char* buffer, unsigned length) + : buffer_(buffer), length_(length) {} + ~FixedStringAllocator() override = default; + + char* allocate(unsigned bytes) override; + char* grow(unsigned* bytes) override; + + private: + char* buffer_; + unsigned length_; + DISALLOW_COPY_AND_ASSIGN(FixedStringAllocator); +}; + +template +class SmallStringOptimizedAllocator final : public StringAllocator { + public: + using SmallVector = base::SmallVector; + + explicit SmallStringOptimizedAllocator(SmallVector* vector) V8_NOEXCEPT + : vector_(vector) {} + + char* allocate(unsigned bytes) override { + vector_->resize_no_init(bytes); + return vector_->data(); + } + + char* grow(unsigned* bytes) override { + unsigned new_bytes = *bytes * 2; + // Check for overflow. + if (new_bytes <= *bytes) { + return vector_->data(); + } + vector_->resize_no_init(new_bytes); + *bytes = new_bytes; + return vector_->data(); + } + + private: + SmallVector* vector_; +}; + +class StringStream final { + class FmtElm final { + public: + FmtElm(int value) : FmtElm(INT) { // NOLINT + data_.u_int_ = value; + } + explicit FmtElm(double value) : FmtElm(DOUBLE) { // NOLINT + data_.u_double_ = value; + } + FmtElm(const char* value) : FmtElm(C_STR) { // NOLINT + data_.u_c_str_ = value; + } + FmtElm(const Vector& value) : FmtElm(LC_STR) { // NOLINT + data_.u_lc_str_ = &value; + } + FmtElm(Object value) : FmtElm(OBJ) { // NOLINT + data_.u_obj_ = value.ptr(); + } + FmtElm(Handle value) : FmtElm(HANDLE) { // NOLINT + data_.u_handle_ = value.location(); + } + FmtElm(void* value) : FmtElm(POINTER) { // NOLINT + data_.u_pointer_ = value; + } + + private: + friend class StringStream; + enum Type { INT, DOUBLE, C_STR, LC_STR, OBJ, HANDLE, POINTER }; + +#ifdef DEBUG + Type type_; + explicit FmtElm(Type type) : type_(type) {} +#else + explicit FmtElm(Type) {} +#endif + + union { + int u_int_; + double u_double_; + const char* u_c_str_; + const Vector* u_lc_str_; + Address u_obj_; + Address* u_handle_; + void* u_pointer_; + } data_; + }; + + public: + enum ObjectPrintMode { kPrintObjectConcise, kPrintObjectVerbose }; + explicit StringStream(StringAllocator* allocator, + ObjectPrintMode object_print_mode = kPrintObjectVerbose) + : allocator_(allocator), + object_print_mode_(object_print_mode), + capacity_(kInitialCapacity), + length_(0), + buffer_(allocator_->allocate(kInitialCapacity)) { + buffer_[0] = 0; + } + + bool Put(char c); + bool Put(String str); + bool Put(String str, int start, int end); + void Add(const char* format) { Add(CStrVector(format)); } + void Add(Vector format) { Add(format, Vector()); } + + template + void Add(const char* format, Args... args) { + Add(CStrVector(format), args...); + } + + template + void Add(Vector format, Args... args) { + FmtElm elems[]{args...}; + Add(format, ArrayVector(elems)); + } + + // Getting the message out. + void OutputToFile(FILE* out); + void OutputToStdOut() { OutputToFile(stdout); } + void Log(Isolate* isolate); + Handle ToString(Isolate* isolate); + std::unique_ptr ToCString() const; + int length() const { return length_; } + + // Object printing support. + void PrintName(Object o); + void PrintFixedArray(FixedArray array, unsigned int limit); + void PrintByteArray(ByteArray ba); + void PrintUsingMap(JSObject js_object); + void PrintPrototype(JSFunction fun, Object receiver); + void PrintSecurityTokenIfChanged(JSFunction function); + // NOTE: Returns the code in the output parameter. + void PrintFunction(JSFunction function, Object receiver, Code* code); + + // Reset the stream. + void Reset() { + length_ = 0; + buffer_[0] = 0; + } + + // Mentioned object cache support. + void PrintMentionedObjectCache(Isolate* isolate); + V8_EXPORT_PRIVATE static void ClearMentionedObjectCache(Isolate* isolate); +#ifdef DEBUG + bool IsMentionedObjectCacheClear(Isolate* isolate); +#endif + + static const int kInitialCapacity = 16; + + private: + void Add(Vector format, Vector elms); + void PrintObject(Object obj); + + StringAllocator* allocator_; + ObjectPrintMode object_print_mode_; + unsigned capacity_; + unsigned length_; // does not include terminating 0-character + char* buffer_; + + bool full() const { return (capacity_ - length_) == 1; } + int space() const { return capacity_ - length_; } + + DISALLOW_IMPLICIT_CONSTRUCTORS(StringStream); +}; + +} // namespace internal +} // namespace v8 + +#endif // V8_STRINGS_STRING_STREAM_H_ diff --git a/deps/v8/src/strings/unicode-decoder.cc b/deps/v8/src/strings/unicode-decoder.cc new file mode 100644 index 0000000000..8ee66ec251 --- /dev/null +++ b/deps/v8/src/strings/unicode-decoder.cc @@ -0,0 +1,81 @@ +// Copyright 2014 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "src/strings/unicode-decoder.h" + +#include "src/strings/unicode-inl.h" +#include "src/utils/memcopy.h" + +namespace v8 { +namespace internal { + +Utf8Decoder::Utf8Decoder(const Vector& chars) + : encoding_(Encoding::kAscii), + non_ascii_start_(NonAsciiStart(chars.begin(), chars.length())), + utf16_length_(non_ascii_start_) { + if (non_ascii_start_ == chars.length()) return; + + const uint8_t* cursor = chars.begin() + non_ascii_start_; + const uint8_t* end = chars.begin() + chars.length(); + + bool is_one_byte = true; + uint32_t incomplete_char = 0; + unibrow::Utf8::State state = unibrow::Utf8::State::kAccept; + + while (cursor < end) { + unibrow::uchar t = + unibrow::Utf8::ValueOfIncremental(&cursor, &state, &incomplete_char); + if (t != unibrow::Utf8::kIncomplete) { + is_one_byte = is_one_byte && t <= unibrow::Latin1::kMaxChar; + utf16_length_++; + if (t > unibrow::Utf16::kMaxNonSurrogateCharCode) utf16_length_++; + } + } + + unibrow::uchar t = unibrow::Utf8::ValueOfIncrementalFinish(&state); + if (t != unibrow::Utf8::kBufferEmpty) { + is_one_byte = false; + utf16_length_++; + } + + encoding_ = is_one_byte ? Encoding::kLatin1 : Encoding::kUtf16; +} + +template +void Utf8Decoder::Decode(Char* out, const Vector& data) { + CopyChars(out, data.begin(), non_ascii_start_); + + out += non_ascii_start_; + + uint32_t incomplete_char = 0; + unibrow::Utf8::State state = unibrow::Utf8::State::kAccept; + + const uint8_t* cursor = data.begin() + non_ascii_start_; + const uint8_t* end = data.begin() + data.length(); + + while (cursor < end) { + unibrow::uchar t = + unibrow::Utf8::ValueOfIncremental(&cursor, &state, &incomplete_char); + if (t != unibrow::Utf8::kIncomplete) { + if (sizeof(Char) == 1 || t <= unibrow::Utf16::kMaxNonSurrogateCharCode) { + *(out++) = static_cast(t); + } else { + *(out++) = unibrow::Utf16::LeadSurrogate(t); + *(out++) = unibrow::Utf16::TrailSurrogate(t); + } + } + } + + unibrow::uchar t = unibrow::Utf8::ValueOfIncrementalFinish(&state); + if (t != unibrow::Utf8::kBufferEmpty) *out = static_cast(t); +} + +template void Utf8Decoder::Decode(uint8_t* out, + const Vector& data); + +template void Utf8Decoder::Decode(uint16_t* out, + const Vector& data); + +} // namespace internal +} // namespace v8 diff --git a/deps/v8/src/strings/unicode-decoder.h b/deps/v8/src/strings/unicode-decoder.h new file mode 100644 index 0000000000..e35d176770 --- /dev/null +++ b/deps/v8/src/strings/unicode-decoder.h @@ -0,0 +1,74 @@ +// Copyright 2014 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_STRINGS_UNICODE_DECODER_H_ +#define V8_STRINGS_UNICODE_DECODER_H_ + +#include "src/strings/unicode.h" +#include "src/utils/vector.h" + +namespace v8 { +namespace internal { + +// The return value may point to the first aligned word containing the first +// non-one-byte character, rather than directly to the non-one-byte character. +// If the return value is >= the passed length, the entire string was +// one-byte. +inline int NonAsciiStart(const uint8_t* chars, int length) { + const uint8_t* start = chars; + const uint8_t* limit = chars + length; + + if (static_cast(length) >= kIntptrSize) { + // Check unaligned bytes. + while (!IsAligned(reinterpret_cast(chars), kIntptrSize)) { + if (*chars > unibrow::Utf8::kMaxOneByteChar) { + return static_cast(chars - start); + } + ++chars; + } + // Check aligned words. + DCHECK_EQ(unibrow::Utf8::kMaxOneByteChar, 0x7F); + const uintptr_t non_one_byte_mask = kUintptrAllBitsSet / 0xFF * 0x80; + while (chars + sizeof(uintptr_t) <= limit) { + if (*reinterpret_cast(chars) & non_one_byte_mask) { + return static_cast(chars - start); + } + chars += sizeof(uintptr_t); + } + } + // Check remaining unaligned bytes. + while (chars < limit) { + if (*chars > unibrow::Utf8::kMaxOneByteChar) { + return static_cast(chars - start); + } + ++chars; + } + + return static_cast(chars - start); +} + +class V8_EXPORT_PRIVATE Utf8Decoder final { + public: + enum class Encoding : uint8_t { kAscii, kLatin1, kUtf16 }; + + explicit Utf8Decoder(const Vector& chars); + + bool is_ascii() const { return encoding_ == Encoding::kAscii; } + bool is_one_byte() const { return encoding_ <= Encoding::kLatin1; } + int utf16_length() const { return utf16_length_; } + int non_ascii_start() const { return non_ascii_start_; } + + template + V8_EXPORT_PRIVATE void Decode(Char* out, const Vector& data); + + private: + Encoding encoding_; + int non_ascii_start_; + int utf16_length_; +}; + +} // namespace internal +} // namespace v8 + +#endif // V8_STRINGS_UNICODE_DECODER_H_ diff --git a/deps/v8/src/strings/unicode-inl.h b/deps/v8/src/strings/unicode-inl.h new file mode 100644 index 0000000000..6f730b26be --- /dev/null +++ b/deps/v8/src/strings/unicode-inl.h @@ -0,0 +1,194 @@ +// Copyright 2007-2010 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_STRINGS_UNICODE_INL_H_ +#define V8_STRINGS_UNICODE_INL_H_ + +#include "src/base/logging.h" +#include "src/strings/unicode.h" +#include "src/utils/utils.h" + +namespace unibrow { + +#ifndef V8_INTL_SUPPORT +template +bool Predicate::get(uchar code_point) { + CacheEntry entry = entries_[code_point & kMask]; + if (entry.code_point() == code_point) return entry.value(); + return CalculateValue(code_point); +} + +template +bool Predicate::CalculateValue(uchar code_point) { + bool result = T::Is(code_point); + entries_[code_point & kMask] = CacheEntry(code_point, result); + return result; +} + +template +int Mapping::get(uchar c, uchar n, uchar* result) { + CacheEntry entry = entries_[c & kMask]; + if (entry.code_point_ == c) { + if (entry.offset_ == 0) { + return 0; + } else { + result[0] = c + entry.offset_; + return 1; + } + } else { + return CalculateValue(c, n, result); + } +} + +template +int Mapping::CalculateValue(uchar c, uchar n, uchar* result) { + bool allow_caching = true; + int length = T::Convert(c, n, result, &allow_caching); + if (allow_caching) { + if (length == 1) { + entries_[c & kMask] = CacheEntry(c, result[0] - c); + return 1; + } else { + entries_[c & kMask] = CacheEntry(c, 0); + return 0; + } + } else { + return length; + } +} +#endif // !V8_INTL_SUPPORT + +// Decodes UTF-8 bytes incrementally, allowing the decoding of bytes as they +// stream in. This **must** be followed by a call to ValueOfIncrementalFinish +// when the stream is complete, to ensure incomplete sequences are handled. +uchar Utf8::ValueOfIncremental(const byte** cursor, State* state, + Utf8IncrementalBuffer* buffer) { + DCHECK_NOT_NULL(buffer); + State old_state = *state; + byte next = **cursor; + *cursor += 1; + + if (V8_LIKELY(next <= kMaxOneByteChar && old_state == State::kAccept)) { + DCHECK_EQ(0u, *buffer); + return static_cast(next); + } + + // So we're at the lead byte of a 2/3/4 sequence, or we're at a continuation + // char in that sequence. + Utf8DfaDecoder::Decode(next, state, buffer); + + switch (*state) { + case State::kAccept: { + uchar t = *buffer; + *buffer = 0; + return t; + } + + case State::kReject: + *state = State::kAccept; + *buffer = 0; + + // If we hit a bad byte, we need to determine if we were trying to start + // a sequence or continue one. If we were trying to start a sequence, + // that means it's just an invalid lead byte and we need to continue to + // the next (which we already did above). If we were already in a + // sequence, we need to reprocess this same byte after resetting to the + // initial state. + if (old_state != State::kAccept) { + // We were trying to continue a sequence, so let's reprocess this byte + // next time. + *cursor -= 1; + } + return kBadChar; + + default: + return kIncomplete; + } +} + +unsigned Utf8::EncodeOneByte(char* str, uint8_t c) { + static const int kMask = ~(1 << 6); + if (c <= kMaxOneByteChar) { + str[0] = c; + return 1; + } + str[0] = 0xC0 | (c >> 6); + str[1] = 0x80 | (c & kMask); + return 2; +} + +// Encode encodes the UTF-16 code units c and previous into the given str +// buffer, and combines surrogate code units into single code points. If +// replace_invalid is set to true, orphan surrogate code units will be replaced +// with kBadChar. +unsigned Utf8::Encode(char* str, uchar c, int previous, bool replace_invalid) { + static const int kMask = ~(1 << 6); + if (c <= kMaxOneByteChar) { + str[0] = c; + return 1; + } else if (c <= kMaxTwoByteChar) { + str[0] = 0xC0 | (c >> 6); + str[1] = 0x80 | (c & kMask); + return 2; + } else if (c <= kMaxThreeByteChar) { + DCHECK(!Utf16::IsLeadSurrogate(Utf16::kNoPreviousCharacter)); + if (Utf16::IsSurrogatePair(previous, c)) { + const int kUnmatchedSize = kSizeOfUnmatchedSurrogate; + return Encode(str - kUnmatchedSize, + Utf16::CombineSurrogatePair(previous, c), + Utf16::kNoPreviousCharacter, replace_invalid) - + kUnmatchedSize; + } else if (replace_invalid && + (Utf16::IsLeadSurrogate(c) || Utf16::IsTrailSurrogate(c))) { + c = kBadChar; + } + str[0] = 0xE0 | (c >> 12); + str[1] = 0x80 | ((c >> 6) & kMask); + str[2] = 0x80 | (c & kMask); + return 3; + } else { + str[0] = 0xF0 | (c >> 18); + str[1] = 0x80 | ((c >> 12) & kMask); + str[2] = 0x80 | ((c >> 6) & kMask); + str[3] = 0x80 | (c & kMask); + return 4; + } +} + +uchar Utf8::ValueOf(const byte* bytes, size_t length, size_t* cursor) { + if (length <= 0) return kBadChar; + byte first = bytes[0]; + // Characters between 0000 and 007F are encoded as a single character + if (V8_LIKELY(first <= kMaxOneByteChar)) { + *cursor += 1; + return first; + } + return CalculateValue(bytes, length, cursor); +} + +unsigned Utf8::Length(uchar c, int previous) { + if (c <= kMaxOneByteChar) { + return 1; + } else if (c <= kMaxTwoByteChar) { + return 2; + } else if (c <= kMaxThreeByteChar) { + DCHECK(!Utf16::IsLeadSurrogate(Utf16::kNoPreviousCharacter)); + if (Utf16::IsSurrogatePair(previous, c)) { + return kSizeOfUnmatchedSurrogate - kBytesSavedByCombiningSurrogates; + } + return 3; + } else { + return 4; + } +} + +bool Utf8::IsValidCharacter(uchar c) { + return c < 0xD800u || (c >= 0xE000u && c < 0xFDD0u) || + (c > 0xFDEFu && c <= 0x10FFFFu && (c & 0xFFFEu) != 0xFFFEu && + c != kBadChar); +} + +} // namespace unibrow + +#endif // V8_STRINGS_UNICODE_INL_H_ diff --git a/deps/v8/src/strings/unicode.cc b/deps/v8/src/strings/unicode.cc new file mode 100644 index 0000000000..21faccd0b4 --- /dev/null +++ b/deps/v8/src/strings/unicode.cc @@ -0,0 +1,3192 @@ +// Copyright 2012 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// This file was generated at 2014-10-08 15:25:47.940335 + +#include "src/strings/unicode.h" +#include +#include +#include "src/strings/unicode-inl.h" + +#ifdef V8_INTL_SUPPORT +#include "unicode/uchar.h" +#endif + +namespace unibrow { + +#ifndef V8_INTL_SUPPORT +static const int kStartBit = (1 << 30); +static const int kChunkBits = (1 << 13); +#endif // !V8_INTL_SUPPORT + +static const uchar kSentinel = static_cast(-1); + +/** + * \file + * Implementations of functions for working with Unicode. + */ + +using int16_t = signed short; // NOLINT +using uint16_t = unsigned short; // NOLINT +using int32_t = int; // NOLINT + +#ifndef V8_INTL_SUPPORT +// All access to the character table should go through this function. +template +static inline uchar TableGet(const int32_t* table, int index) { + return table[D * index]; +} + +static inline uchar GetEntry(int32_t entry) { return entry & (kStartBit - 1); } + +static inline bool IsStart(int32_t entry) { return (entry & kStartBit) != 0; } + +/** + * Look up a character in the Unicode table using a mix of binary and + * interpolation search. For a uniformly distributed array + * interpolation search beats binary search by a wide margin. However, + * in this case interpolation search degenerates because of some very + * high values in the lower end of the table so this function uses a + * combination. The average number of steps to look up the information + * about a character is around 10, slightly higher if there is no + * information available about the character. + */ +static bool LookupPredicate(const int32_t* table, uint16_t size, uchar chr) { + static const int kEntryDist = 1; + uint16_t value = chr & (kChunkBits - 1); + unsigned int low = 0; + unsigned int high = size - 1; + while (high != low) { + unsigned int mid = low + ((high - low) >> 1); + uchar current_value = GetEntry(TableGet(table, mid)); + // If we've found an entry less than or equal to this one, and the + // next one is not also less than this one, we've arrived. + if ((current_value <= value) && + (mid + 1 == size || + GetEntry(TableGet(table, mid + 1)) > value)) { + low = mid; + break; + } else if (current_value < value) { + low = mid + 1; + } else if (current_value > value) { + // If we've just checked the bottom-most value and it's not + // the one we're looking for, we're done. + if (mid == 0) break; + high = mid - 1; + } + } + int32_t field = TableGet(table, low); + uchar entry = GetEntry(field); + bool is_start = IsStart(field); + return (entry == value) || (entry < value && is_start); +} +#endif // !V8_INTL_SUPPORT + +template +struct MultiCharacterSpecialCase { + static const uchar kEndOfEncoding = kSentinel; + uchar chars[kW]; +}; + +#ifndef V8_INTL_SUPPORT +// Look up the mapping for the given character in the specified table, +// which is of the specified length and uses the specified special case +// mapping for multi-char mappings. The next parameter is the character +// following the one to map. The result will be written in to the result +// buffer and the number of characters written will be returned. Finally, +// if the allow_caching_ptr is non-null then false will be stored in +// it if the result contains multiple characters or depends on the +// context. +// If ranges are linear, a match between a start and end point is +// offset by the distance between the match and the start. Otherwise +// the result is the same as for the start point on the entire range. +template +static int LookupMapping(const int32_t* table, uint16_t size, + const MultiCharacterSpecialCase* multi_chars, + uchar chr, uchar next, uchar* result, + bool* allow_caching_ptr) { + static const int kEntryDist = 2; + uint16_t key = chr & (kChunkBits - 1); + uint16_t chunk_start = chr - key; + unsigned int low = 0; + unsigned int high = size - 1; + while (high != low) { + unsigned int mid = low + ((high - low) >> 1); + uchar current_value = GetEntry(TableGet(table, mid)); + // If we've found an entry less than or equal to this one, and the next one + // is not also less than this one, we've arrived. + if ((current_value <= key) && + (mid + 1 == size || + GetEntry(TableGet(table, mid + 1)) > key)) { + low = mid; + break; + } else if (current_value < key) { + low = mid + 1; + } else if (current_value > key) { + // If we've just checked the bottom-most value and it's not + // the one we're looking for, we're done. + if (mid == 0) break; + high = mid - 1; + } + } + int32_t field = TableGet(table, low); + uchar entry = GetEntry(field); + bool is_start = IsStart(field); + bool found = (entry == key) || (entry < key && is_start); + if (found) { + int32_t value = table[2 * low + 1]; + if (value == 0) { + // 0 means not present + return 0; + } else if ((value & 3) == 0) { + // Low bits 0 means a constant offset from the given character. + if (ranges_are_linear) { + result[0] = chr + (value >> 2); + } else { + result[0] = entry + chunk_start + (value >> 2); + } + return 1; + } else if ((value & 3) == 1) { + // Low bits 1 means a special case mapping + if (allow_caching_ptr) *allow_caching_ptr = false; + const MultiCharacterSpecialCase& mapping = multi_chars[value >> 2]; + int length = 0; + for (length = 0; length < kW; length++) { + uchar mapped = mapping.chars[length]; + if (mapped == MultiCharacterSpecialCase::kEndOfEncoding) break; + if (ranges_are_linear) { + result[length] = mapped + (key - entry); + } else { + result[length] = mapped; + } + } + return length; + } else { + // Low bits 2 means a really really special case + if (allow_caching_ptr) *allow_caching_ptr = false; + // The cases of this switch are defined in unicode.py in the + // really_special_cases mapping. + switch (value >> 2) { + case 1: + // Really special case 1: upper case sigma. This letter + // converts to two different lower case sigmas depending on + // whether or not it occurs at the end of a word. + if (next != 0 && Letter::Is(next)) { + result[0] = 0x03C3; + } else { + result[0] = 0x03C2; + } + return 1; + default: + return 0; + } + return -1; + } + } else { + return 0; + } +} +#endif // !V8_INTL_SUPPORT + +// This method decodes an UTF-8 value according to RFC 3629 and +// https://encoding.spec.whatwg.org/#utf-8-decoder . +uchar Utf8::CalculateValue(const byte* str, size_t max_length, size_t* cursor) { + DCHECK_GT(max_length, 0); + DCHECK_GT(str[0], kMaxOneByteChar); + + State state = State::kAccept; + Utf8IncrementalBuffer buffer = 0; + uchar t; + + const byte* start = str; + const byte* end = str + max_length; + + do { + t = ValueOfIncremental(&str, &state, &buffer); + } while (str < end && t == kIncomplete); + + *cursor += str - start; + return (state == State::kAccept) ? t : kBadChar; +} + +// Finishes the incremental decoding, ensuring that if an unfinished sequence +// is left that it is replaced by a replacement char. +uchar Utf8::ValueOfIncrementalFinish(State* state) { + if (*state == State::kAccept) { + return kBufferEmpty; + } else { + DCHECK_GT(*state, State::kAccept); + *state = State::kAccept; + return kBadChar; + } +} + +bool Utf8::ValidateEncoding(const byte* bytes, size_t length) { + State state = State::kAccept; + Utf8IncrementalBuffer throw_away = 0; + for (size_t i = 0; i < length && state != State::kReject; i++) { + Utf8DfaDecoder::Decode(bytes[i], &state, &throw_away); + } + return state == State::kAccept; +} + +// Uppercase: point.category == 'Lu' +// TODO(jshin): Check if it's ok to exclude Other_Uppercase characters. +#ifdef V8_INTL_SUPPORT +bool Uppercase::Is(uchar c) { return static_cast(u_isupper(c)); } +#else +static const uint16_t kUppercaseTable0Size = 455; +static const int32_t kUppercaseTable0[455] = { + 1073741889, 90, 1073742016, 214, + 1073742040, 222, 256, 258, // NOLINT + 260, 262, 264, 266, + 268, 270, 272, 274, // NOLINT + 276, 278, 280, 282, + 284, 286, 288, 290, // NOLINT + 292, 294, 296, 298, + 300, 302, 304, 306, // NOLINT + 308, 310, 313, 315, + 317, 319, 321, 323, // NOLINT + 325, 327, 330, 332, + 334, 336, 338, 340, // NOLINT + 342, 344, 346, 348, + 350, 352, 354, 356, // NOLINT + 358, 360, 362, 364, + 366, 368, 370, 372, // NOLINT + 374, 1073742200, 377, 379, + 381, 1073742209, 386, 388, // NOLINT + 1073742214, 391, 1073742217, 395, + 1073742222, 401, 1073742227, 404, // NOLINT + 1073742230, 408, 1073742236, 413, + 1073742239, 416, 418, 420, // NOLINT + 1073742246, 423, 425, 428, + 1073742254, 431, 1073742257, 435, // NOLINT + 437, 1073742263, 440, 444, + 452, 455, 458, 461, // NOLINT + 463, 465, 467, 469, + 471, 473, 475, 478, // NOLINT + 480, 482, 484, 486, + 488, 490, 492, 494, // NOLINT + 497, 500, 1073742326, 504, + 506, 508, 510, 512, // NOLINT + 514, 516, 518, 520, + 522, 524, 526, 528, // NOLINT + 530, 532, 534, 536, + 538, 540, 542, 544, // NOLINT + 546, 548, 550, 552, + 554, 556, 558, 560, // NOLINT + 562, 1073742394, 571, 1073742397, + 574, 577, 1073742403, 582, // NOLINT + 584, 586, 588, 590, + 880, 882, 886, 895, // NOLINT + 902, 1073742728, 906, 908, + 1073742734, 911, 1073742737, 929, // NOLINT + 1073742755, 939, 975, 1073742802, + 980, 984, 986, 988, // NOLINT + 990, 992, 994, 996, + 998, 1000, 1002, 1004, // NOLINT + 1006, 1012, 1015, 1073742841, + 1018, 1073742845, 1071, 1120, // NOLINT + 1122, 1124, 1126, 1128, + 1130, 1132, 1134, 1136, // NOLINT + 1138, 1140, 1142, 1144, + 1146, 1148, 1150, 1152, // NOLINT + 1162, 1164, 1166, 1168, + 1170, 1172, 1174, 1176, // NOLINT + 1178, 1180, 1182, 1184, + 1186, 1188, 1190, 1192, // NOLINT + 1194, 1196, 1198, 1200, + 1202, 1204, 1206, 1208, // NOLINT + 1210, 1212, 1214, 1073743040, + 1217, 1219, 1221, 1223, // NOLINT + 1225, 1227, 1229, 1232, + 1234, 1236, 1238, 1240, // NOLINT + 1242, 1244, 1246, 1248, + 1250, 1252, 1254, 1256, // NOLINT + 1258, 1260, 1262, 1264, + 1266, 1268, 1270, 1272, // NOLINT + 1274, 1276, 1278, 1280, + 1282, 1284, 1286, 1288, // NOLINT + 1290, 1292, 1294, 1296, + 1298, 1300, 1302, 1304, // NOLINT + 1306, 1308, 1310, 1312, + 1314, 1316, 1318, 1320, // NOLINT + 1322, 1324, 1326, 1073743153, + 1366, 1073746080, 4293, 4295, // NOLINT + 4301, 7680, 7682, 7684, + 7686, 7688, 7690, 7692, // NOLINT + 7694, 7696, 7698, 7700, + 7702, 7704, 7706, 7708, // NOLINT + 7710, 7712, 7714, 7716, + 7718, 7720, 7722, 7724, // NOLINT + 7726, 7728, 7730, 7732, + 7734, 7736, 7738, 7740, // NOLINT + 7742, 7744, 7746, 7748, + 7750, 7752, 7754, 7756, // NOLINT + 7758, 7760, 7762, 7764, + 7766, 7768, 7770, 7772, // NOLINT + 7774, 7776, 7778, 7780, + 7782, 7784, 7786, 7788, // NOLINT + 7790, 7792, 7794, 7796, + 7798, 7800, 7802, 7804, // NOLINT + 7806, 7808, 7810, 7812, + 7814, 7816, 7818, 7820, // NOLINT + 7822, 7824, 7826, 7828, + 7838, 7840, 7842, 7844, // NOLINT + 7846, 7848, 7850, 7852, + 7854, 7856, 7858, 7860, // NOLINT + 7862, 7864, 7866, 7868, + 7870, 7872, 7874, 7876, // NOLINT + 7878, 7880, 7882, 7884, + 7886, 7888, 7890, 7892, // NOLINT + 7894, 7896, 7898, 7900, + 7902, 7904, 7906, 7908, // NOLINT + 7910, 7912, 7914, 7916, + 7918, 7920, 7922, 7924, // NOLINT + 7926, 7928, 7930, 7932, + 7934, 1073749768, 7951, 1073749784, // NOLINT + 7965, 1073749800, 7983, 1073749816, + 7999, 1073749832, 8013, 8025, // NOLINT + 8027, 8029, 8031, 1073749864, + 8047, 1073749944, 8123, 1073749960, // NOLINT + 8139, 1073749976, 8155, 1073749992, + 8172, 1073750008, 8187}; // NOLINT +static const uint16_t kUppercaseTable1Size = 86; +static const int32_t kUppercaseTable1[86] = { + 258, 263, 1073742091, 269, 1073742096, 274, + 277, 1073742105, // NOLINT + 285, 292, 294, 296, 1073742122, 301, + 1073742128, 307, // NOLINT + 1073742142, 319, 325, 387, 1073744896, 3118, + 3168, 1073744994, // NOLINT + 3172, 3175, 3177, 3179, 1073745005, 3184, + 3186, 3189, // NOLINT + 1073745022, 3200, 3202, 3204, 3206, 3208, + 3210, 3212, // NOLINT + 3214, 3216, 3218, 3220, 3222, 3224, + 3226, 3228, // NOLINT + 3230, 3232, 3234, 3236, 3238, 3240, + 3242, 3244, // NOLINT + 3246, 3248, 3250, 3252, 3254, 3256, + 3258, 3260, // NOLINT + 3262, 3264, 3266, 3268, 3270, 3272, + 3274, 3276, // NOLINT + 3278, 3280, 3282, 3284, 3286, 3288, + 3290, 3292, // NOLINT + 3294, 3296, 3298, 3307, 3309, 3314}; // NOLINT +static const uint16_t kUppercaseTable5Size = 101; +static const int32_t kUppercaseTable5[101] = { + 1600, 1602, 1604, 1606, 1608, 1610, 1612, 1614, // NOLINT + 1616, 1618, 1620, 1622, 1624, 1626, 1628, 1630, // NOLINT + 1632, 1634, 1636, 1638, 1640, 1642, 1644, 1664, // NOLINT + 1666, 1668, 1670, 1672, 1674, 1676, 1678, 1680, // NOLINT + 1682, 1684, 1686, 1688, 1690, 1826, 1828, 1830, // NOLINT + 1832, 1834, 1836, 1838, 1842, 1844, 1846, 1848, // NOLINT + 1850, 1852, 1854, 1856, 1858, 1860, 1862, 1864, // NOLINT + 1866, 1868, 1870, 1872, 1874, 1876, 1878, 1880, // NOLINT + 1882, 1884, 1886, 1888, 1890, 1892, 1894, 1896, // NOLINT + 1898, 1900, 1902, 1913, 1915, 1073743741, 1918, 1920, // NOLINT + 1922, 1924, 1926, 1931, 1933, 1936, 1938, 1942, // NOLINT + 1944, 1946, 1948, 1950, 1952, 1954, 1956, 1958, // NOLINT + 1960, 1073743786, 1965, 1073743792, 1969}; // NOLINT +static const uint16_t kUppercaseTable7Size = 2; +static const int32_t kUppercaseTable7[2] = {1073749793, 7994}; // NOLINT +bool Uppercase::Is(uchar c) { + int chunk_index = c >> 13; + switch (chunk_index) { + case 0: + return LookupPredicate(kUppercaseTable0, kUppercaseTable0Size, c); + case 1: + return LookupPredicate(kUppercaseTable1, kUppercaseTable1Size, c); + case 5: + return LookupPredicate(kUppercaseTable5, kUppercaseTable5Size, c); + case 7: + return LookupPredicate(kUppercaseTable7, kUppercaseTable7Size, c); + default: + return false; + } +} +#endif // V8_INTL_SUPPORT + +// Letter: point.category in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl'] +#ifdef V8_INTL_SUPPORT +bool Letter::Is(uchar c) { return static_cast(u_isalpha(c)); } +#else +static const uint16_t kLetterTable0Size = 431; +static const int32_t kLetterTable0[431] = { + 1073741889, 90, 1073741921, 122, + 170, 181, 186, 1073742016, // NOLINT + 214, 1073742040, 246, 1073742072, + 705, 1073742534, 721, 1073742560, // NOLINT + 740, 748, 750, 1073742704, + 884, 1073742710, 887, 1073742714, // NOLINT + 893, 895, 902, 1073742728, + 906, 908, 1073742734, 929, // NOLINT + 1073742755, 1013, 1073742839, 1153, + 1073742986, 1327, 1073743153, 1366, // NOLINT + 1369, 1073743201, 1415, 1073743312, + 1514, 1073743344, 1522, 1073743392, // NOLINT + 1610, 1073743470, 1647, 1073743473, + 1747, 1749, 1073743589, 1766, // NOLINT + 1073743598, 1775, 1073743610, 1788, + 1791, 1808, 1073743634, 1839, // NOLINT + 1073743693, 1957, 1969, 1073743818, + 2026, 1073743860, 2037, 2042, // NOLINT + 1073743872, 2069, 2074, 2084, + 2088, 1073743936, 2136, 1073744032, // NOLINT + 2226, 1073744132, 2361, 2365, + 2384, 1073744216, 2401, 1073744241, // NOLINT + 2432, 1073744261, 2444, 1073744271, + 2448, 1073744275, 2472, 1073744298, // NOLINT + 2480, 2482, 1073744310, 2489, + 2493, 2510, 1073744348, 2525, // NOLINT + 1073744351, 2529, 1073744368, 2545, + 1073744389, 2570, 1073744399, 2576, // NOLINT + 1073744403, 2600, 1073744426, 2608, + 1073744434, 2611, 1073744437, 2614, // NOLINT + 1073744440, 2617, 1073744473, 2652, + 2654, 1073744498, 2676, 1073744517, // NOLINT + 2701, 1073744527, 2705, 1073744531, + 2728, 1073744554, 2736, 1073744562, // NOLINT + 2739, 1073744565, 2745, 2749, + 2768, 1073744608, 2785, 1073744645, // NOLINT + 2828, 1073744655, 2832, 1073744659, + 2856, 1073744682, 2864, 1073744690, // NOLINT + 2867, 1073744693, 2873, 2877, + 1073744732, 2909, 1073744735, 2913, // NOLINT + 2929, 2947, 1073744773, 2954, + 1073744782, 2960, 1073744786, 2965, // NOLINT + 1073744793, 2970, 2972, 1073744798, + 2975, 1073744803, 2980, 1073744808, // NOLINT + 2986, 1073744814, 3001, 3024, + 1073744901, 3084, 1073744910, 3088, // NOLINT + 1073744914, 3112, 1073744938, 3129, + 3133, 1073744984, 3161, 1073744992, // NOLINT + 3169, 1073745029, 3212, 1073745038, + 3216, 1073745042, 3240, 1073745066, // NOLINT + 3251, 1073745077, 3257, 3261, + 3294, 1073745120, 3297, 1073745137, // NOLINT + 3314, 1073745157, 3340, 1073745166, + 3344, 1073745170, 3386, 3389, // NOLINT + 3406, 1073745248, 3425, 1073745274, + 3455, 1073745285, 3478, 1073745306, // NOLINT + 3505, 1073745331, 3515, 3517, + 1073745344, 3526, 1073745409, 3632, // NOLINT + 1073745458, 3635, 1073745472, 3654, + 1073745537, 3714, 3716, 1073745543, // NOLINT + 3720, 3722, 3725, 1073745556, + 3735, 1073745561, 3743, 1073745569, // NOLINT + 3747, 3749, 3751, 1073745578, + 3755, 1073745581, 3760, 1073745586, // NOLINT + 3763, 3773, 1073745600, 3780, + 3782, 1073745628, 3807, 3840, // NOLINT + 1073745728, 3911, 1073745737, 3948, + 1073745800, 3980, 1073745920, 4138, // NOLINT + 4159, 1073746000, 4181, 1073746010, + 4189, 4193, 1073746021, 4198, // NOLINT + 1073746030, 4208, 1073746037, 4225, + 4238, 1073746080, 4293, 4295, // NOLINT + 4301, 1073746128, 4346, 1073746172, + 4680, 1073746506, 4685, 1073746512, // NOLINT + 4694, 4696, 1073746522, 4701, + 1073746528, 4744, 1073746570, 4749, // NOLINT + 1073746576, 4784, 1073746610, 4789, + 1073746616, 4798, 4800, 1073746626, // NOLINT + 4805, 1073746632, 4822, 1073746648, + 4880, 1073746706, 4885, 1073746712, // NOLINT + 4954, 1073746816, 5007, 1073746848, + 5108, 1073746945, 5740, 1073747567, // NOLINT + 5759, 1073747585, 5786, 1073747616, + 5866, 1073747694, 5880, 1073747712, // NOLINT + 5900, 1073747726, 5905, 1073747744, + 5937, 1073747776, 5969, 1073747808, // NOLINT + 5996, 1073747822, 6000, 1073747840, + 6067, 6103, 6108, 1073748000, // NOLINT + 6263, 1073748096, 6312, 6314, + 1073748144, 6389, 1073748224, 6430, // NOLINT + 1073748304, 6509, 1073748336, 6516, + 1073748352, 6571, 1073748417, 6599, // NOLINT + 1073748480, 6678, 1073748512, 6740, + 6823, 1073748741, 6963, 1073748805, // NOLINT + 6987, 1073748867, 7072, 1073748910, + 7087, 1073748922, 7141, 1073748992, // NOLINT + 7203, 1073749069, 7247, 1073749082, + 7293, 1073749225, 7404, 1073749230, // NOLINT + 7409, 1073749237, 7414, 1073749248, + 7615, 1073749504, 7957, 1073749784, // NOLINT + 7965, 1073749792, 8005, 1073749832, + 8013, 1073749840, 8023, 8025, // NOLINT + 8027, 8029, 1073749855, 8061, + 1073749888, 8116, 1073749942, 8124, // NOLINT + 8126, 1073749954, 8132, 1073749958, + 8140, 1073749968, 8147, 1073749974, // NOLINT + 8155, 1073749984, 8172, 1073750002, + 8180, 1073750006, 8188}; // NOLINT +static const uint16_t kLetterTable1Size = 87; +static const int32_t kLetterTable1[87] = { + 113, 127, 1073741968, 156, + 258, 263, 1073742090, 275, // NOLINT + 277, 1073742105, 285, 292, + 294, 296, 1073742122, 301, // NOLINT + 1073742127, 313, 1073742140, 319, + 1073742149, 329, 334, 1073742176, // NOLINT + 392, 1073744896, 3118, 1073744944, + 3166, 1073744992, 3300, 1073745131, // NOLINT + 3310, 1073745138, 3315, 1073745152, + 3365, 3367, 3373, 1073745200, // NOLINT + 3431, 3439, 1073745280, 3478, + 1073745312, 3494, 1073745320, 3502, // NOLINT + 1073745328, 3510, 1073745336, 3518, + 1073745344, 3526, 1073745352, 3534, // NOLINT + 1073745360, 3542, 1073745368, 3550, + 3631, 1073745925, 4103, 1073745953, // NOLINT + 4137, 1073745969, 4149, 1073745976, + 4156, 1073745985, 4246, 1073746077, // NOLINT + 4255, 1073746081, 4346, 1073746172, + 4351, 1073746181, 4397, 1073746225, // NOLINT + 4494, 1073746336, 4538, 1073746416, + 4607, 1073746944, 8191}; // NOLINT +static const uint16_t kLetterTable2Size = 4; +static const int32_t kLetterTable2[4] = {1073741824, 3509, 1073745408, + 8191}; // NOLINT +static const uint16_t kLetterTable3Size = 2; +static const int32_t kLetterTable3[2] = {1073741824, 8191}; // NOLINT +static const uint16_t kLetterTable4Size = 2; +static const int32_t kLetterTable4[2] = {1073741824, 8140}; // NOLINT +static const uint16_t kLetterTable5Size = 100; +static const int32_t kLetterTable5[100] = { + 1073741824, 1164, 1073743056, 1277, + 1073743104, 1548, 1073743376, 1567, // NOLINT + 1073743402, 1579, 1073743424, 1646, + 1073743487, 1693, 1073743520, 1775, // NOLINT + 1073743639, 1823, 1073743650, 1928, + 1073743755, 1934, 1073743760, 1965, // NOLINT + 1073743792, 1969, 1073743863, 2049, + 1073743875, 2053, 1073743879, 2058, // NOLINT + 1073743884, 2082, 1073743936, 2163, + 1073744002, 2227, 1073744114, 2295, // NOLINT + 2299, 1073744138, 2341, 1073744176, + 2374, 1073744224, 2428, 1073744260, // NOLINT + 2482, 2511, 1073744352, 2532, + 1073744358, 2543, 1073744378, 2558, // NOLINT + 1073744384, 2600, 1073744448, 2626, + 1073744452, 2635, 1073744480, 2678, // NOLINT + 2682, 1073744510, 2735, 2737, + 1073744565, 2742, 1073744569, 2749, // NOLINT + 2752, 2754, 1073744603, 2781, + 1073744608, 2794, 1073744626, 2804, // NOLINT + 1073744641, 2822, 1073744649, 2830, + 1073744657, 2838, 1073744672, 2854, // NOLINT + 1073744680, 2862, 1073744688, 2906, + 1073744732, 2911, 1073744740, 2917, // NOLINT + 1073744832, 3042, 1073744896, 8191}; // NOLINT +static const uint16_t kLetterTable6Size = 6; +static const int32_t kLetterTable6[6] = {1073741824, 6051, 1073747888, 6086, + 1073747915, 6139}; // NOLINT +static const uint16_t kLetterTable7Size = 48; +static const int32_t kLetterTable7[48] = { + 1073748224, 6765, 1073748592, 6873, + 1073748736, 6918, 1073748755, 6935, // NOLINT + 6941, 1073748767, 6952, 1073748778, + 6966, 1073748792, 6972, 6974, // NOLINT + 1073748800, 6977, 1073748803, 6980, + 1073748806, 7089, 1073748947, 7485, // NOLINT + 1073749328, 7567, 1073749394, 7623, + 1073749488, 7675, 1073749616, 7796, // NOLINT + 1073749622, 7932, 1073749793, 7994, + 1073749825, 8026, 1073749862, 8126, // NOLINT + 1073749954, 8135, 1073749962, 8143, + 1073749970, 8151, 1073749978, 8156}; // NOLINT +bool Letter::Is(uchar c) { + int chunk_index = c >> 13; + switch (chunk_index) { + case 0: + return LookupPredicate(kLetterTable0, kLetterTable0Size, c); + case 1: + return LookupPredicate(kLetterTable1, kLetterTable1Size, c); + case 2: + return LookupPredicate(kLetterTable2, kLetterTable2Size, c); + case 3: + return LookupPredicate(kLetterTable3, kLetterTable3Size, c); + case 4: + return LookupPredicate(kLetterTable4, kLetterTable4Size, c); + case 5: + return LookupPredicate(kLetterTable5, kLetterTable5Size, c); + case 6: + return LookupPredicate(kLetterTable6, kLetterTable6Size, c); + case 7: + return LookupPredicate(kLetterTable7, kLetterTable7Size, c); + default: + return false; + } +} +#endif + +#ifndef V8_INTL_SUPPORT +// ID_Start: ((point.category in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', +// 'Nl'] or 'Other_ID_Start' in point.properties) and ('Pattern_Syntax' not in +// point.properties) and ('Pattern_White_Space' not in point.properties)) or +// ('JS_ID_Start' in point.properties) + +static const uint16_t kID_StartTable0Size = 434; +static const int32_t kID_StartTable0[434] = { + 36, 1073741889, 90, 92, + 95, 1073741921, 122, 170, // NOLINT + 181, 186, 1073742016, 214, + 1073742040, 246, 1073742072, 705, // NOLINT + 1073742534, 721, 1073742560, 740, + 748, 750, 1073742704, 884, // NOLINT + 1073742710, 887, 1073742714, 893, + 895, 902, 1073742728, 906, // NOLINT + 908, 1073742734, 929, 1073742755, + 1013, 1073742839, 1153, 1073742986, // NOLINT + 1327, 1073743153, 1366, 1369, + 1073743201, 1415, 1073743312, 1514, // NOLINT + 1073743344, 1522, 1073743392, 1610, + 1073743470, 1647, 1073743473, 1747, // NOLINT + 1749, 1073743589, 1766, 1073743598, + 1775, 1073743610, 1788, 1791, // NOLINT + 1808, 1073743634, 1839, 1073743693, + 1957, 1969, 1073743818, 2026, // NOLINT + 1073743860, 2037, 2042, 1073743872, + 2069, 2074, 2084, 2088, // NOLINT + 1073743936, 2136, 1073744032, 2226, + 1073744132, 2361, 2365, 2384, // NOLINT + 1073744216, 2401, 1073744241, 2432, + 1073744261, 2444, 1073744271, 2448, // NOLINT + 1073744275, 2472, 1073744298, 2480, + 2482, 1073744310, 2489, 2493, // NOLINT + 2510, 1073744348, 2525, 1073744351, + 2529, 1073744368, 2545, 1073744389, // NOLINT + 2570, 1073744399, 2576, 1073744403, + 2600, 1073744426, 2608, 1073744434, // NOLINT + 2611, 1073744437, 2614, 1073744440, + 2617, 1073744473, 2652, 2654, // NOLINT + 1073744498, 2676, 1073744517, 2701, + 1073744527, 2705, 1073744531, 2728, // NOLINT + 1073744554, 2736, 1073744562, 2739, + 1073744565, 2745, 2749, 2768, // NOLINT + 1073744608, 2785, 1073744645, 2828, + 1073744655, 2832, 1073744659, 2856, // NOLINT + 1073744682, 2864, 1073744690, 2867, + 1073744693, 2873, 2877, 1073744732, // NOLINT + 2909, 1073744735, 2913, 2929, + 2947, 1073744773, 2954, 1073744782, // NOLINT + 2960, 1073744786, 2965, 1073744793, + 2970, 2972, 1073744798, 2975, // NOLINT + 1073744803, 2980, 1073744808, 2986, + 1073744814, 3001, 3024, 1073744901, // NOLINT + 3084, 1073744910, 3088, 1073744914, + 3112, 1073744938, 3129, 3133, // NOLINT + 1073744984, 3161, 1073744992, 3169, + 1073745029, 3212, 1073745038, 3216, // NOLINT + 1073745042, 3240, 1073745066, 3251, + 1073745077, 3257, 3261, 3294, // NOLINT + 1073745120, 3297, 1073745137, 3314, + 1073745157, 3340, 1073745166, 3344, // NOLINT + 1073745170, 3386, 3389, 3406, + 1073745248, 3425, 1073745274, 3455, // NOLINT + 1073745285, 3478, 1073745306, 3505, + 1073745331, 3515, 3517, 1073745344, // NOLINT + 3526, 1073745409, 3632, 1073745458, + 3635, 1073745472, 3654, 1073745537, // NOLINT + 3714, 3716, 1073745543, 3720, + 3722, 3725, 1073745556, 3735, // NOLINT + 1073745561, 3743, 1073745569, 3747, + 3749, 3751, 1073745578, 3755, // NOLINT + 1073745581, 3760, 1073745586, 3763, + 3773, 1073745600, 3780, 3782, // NOLINT + 1073745628, 3807, 3840, 1073745728, + 3911, 1073745737, 3948, 1073745800, // NOLINT + 3980, 1073745920, 4138, 4159, + 1073746000, 4181, 1073746010, 4189, // NOLINT + 4193, 1073746021, 4198, 1073746030, + 4208, 1073746037, 4225, 4238, // NOLINT + 1073746080, 4293, 4295, 4301, + 1073746128, 4346, 1073746172, 4680, // NOLINT + 1073746506, 4685, 1073746512, 4694, + 4696, 1073746522, 4701, 1073746528, // NOLINT + 4744, 1073746570, 4749, 1073746576, + 4784, 1073746610, 4789, 1073746616, // NOLINT + 4798, 4800, 1073746626, 4805, + 1073746632, 4822, 1073746648, 4880, // NOLINT + 1073746706, 4885, 1073746712, 4954, + 1073746816, 5007, 1073746848, 5108, // NOLINT + 1073746945, 5740, 1073747567, 5759, + 1073747585, 5786, 1073747616, 5866, // NOLINT + 1073747694, 5880, 1073747712, 5900, + 1073747726, 5905, 1073747744, 5937, // NOLINT + 1073747776, 5969, 1073747808, 5996, + 1073747822, 6000, 1073747840, 6067, // NOLINT + 6103, 6108, 1073748000, 6263, + 1073748096, 6312, 6314, 1073748144, // NOLINT + 6389, 1073748224, 6430, 1073748304, + 6509, 1073748336, 6516, 1073748352, // NOLINT + 6571, 1073748417, 6599, 1073748480, + 6678, 1073748512, 6740, 6823, // NOLINT + 1073748741, 6963, 1073748805, 6987, + 1073748867, 7072, 1073748910, 7087, // NOLINT + 1073748922, 7141, 1073748992, 7203, + 1073749069, 7247, 1073749082, 7293, // NOLINT + 1073749225, 7404, 1073749230, 7409, + 1073749237, 7414, 1073749248, 7615, // NOLINT + 1073749504, 7957, 1073749784, 7965, + 1073749792, 8005, 1073749832, 8013, // NOLINT + 1073749840, 8023, 8025, 8027, + 8029, 1073749855, 8061, 1073749888, // NOLINT + 8116, 1073749942, 8124, 8126, + 1073749954, 8132, 1073749958, 8140, // NOLINT + 1073749968, 8147, 1073749974, 8155, + 1073749984, 8172, 1073750002, 8180, // NOLINT + 1073750006, 8188}; // NOLINT +static const uint16_t kID_StartTable1Size = 84; +static const int32_t kID_StartTable1[84] = { + 113, 127, 1073741968, 156, + 258, 263, 1073742090, 275, // NOLINT + 277, 1073742104, 285, 292, + 294, 296, 1073742122, 313, // NOLINT + 1073742140, 319, 1073742149, 329, + 334, 1073742176, 392, 1073744896, // NOLINT + 3118, 1073744944, 3166, 1073744992, + 3300, 1073745131, 3310, 1073745138, // NOLINT + 3315, 1073745152, 3365, 3367, + 3373, 1073745200, 3431, 3439, // NOLINT + 1073745280, 3478, 1073745312, 3494, + 1073745320, 3502, 1073745328, 3510, // NOLINT + 1073745336, 3518, 1073745344, 3526, + 1073745352, 3534, 1073745360, 3542, // NOLINT + 1073745368, 3550, 1073745925, 4103, + 1073745953, 4137, 1073745969, 4149, // NOLINT + 1073745976, 4156, 1073745985, 4246, + 1073746075, 4255, 1073746081, 4346, // NOLINT + 1073746172, 4351, 1073746181, 4397, + 1073746225, 4494, 1073746336, 4538, // NOLINT + 1073746416, 4607, 1073746944, 8191}; // NOLINT +static const uint16_t kID_StartTable2Size = 4; +static const int32_t kID_StartTable2[4] = {1073741824, 3509, 1073745408, + 8191}; // NOLINT +static const uint16_t kID_StartTable3Size = 2; +static const int32_t kID_StartTable3[2] = {1073741824, 8191}; // NOLINT +static const uint16_t kID_StartTable4Size = 2; +static const int32_t kID_StartTable4[2] = {1073741824, 8140}; // NOLINT +static const uint16_t kID_StartTable5Size = 100; +static const int32_t kID_StartTable5[100] = { + 1073741824, 1164, 1073743056, 1277, + 1073743104, 1548, 1073743376, 1567, // NOLINT + 1073743402, 1579, 1073743424, 1646, + 1073743487, 1693, 1073743520, 1775, // NOLINT + 1073743639, 1823, 1073743650, 1928, + 1073743755, 1934, 1073743760, 1965, // NOLINT + 1073743792, 1969, 1073743863, 2049, + 1073743875, 2053, 1073743879, 2058, // NOLINT + 1073743884, 2082, 1073743936, 2163, + 1073744002, 2227, 1073744114, 2295, // NOLINT + 2299, 1073744138, 2341, 1073744176, + 2374, 1073744224, 2428, 1073744260, // NOLINT + 2482, 2511, 1073744352, 2532, + 1073744358, 2543, 1073744378, 2558, // NOLINT + 1073744384, 2600, 1073744448, 2626, + 1073744452, 2635, 1073744480, 2678, // NOLINT + 2682, 1073744510, 2735, 2737, + 1073744565, 2742, 1073744569, 2749, // NOLINT + 2752, 2754, 1073744603, 2781, + 1073744608, 2794, 1073744626, 2804, // NOLINT + 1073744641, 2822, 1073744649, 2830, + 1073744657, 2838, 1073744672, 2854, // NOLINT + 1073744680, 2862, 1073744688, 2906, + 1073744732, 2911, 1073744740, 2917, // NOLINT + 1073744832, 3042, 1073744896, 8191}; // NOLINT +static const uint16_t kID_StartTable6Size = 6; +static const int32_t kID_StartTable6[6] = {1073741824, 6051, 1073747888, 6086, + 1073747915, 6139}; // NOLINT +static const uint16_t kID_StartTable7Size = 48; +static const int32_t kID_StartTable7[48] = { + 1073748224, 6765, 1073748592, 6873, + 1073748736, 6918, 1073748755, 6935, // NOLINT + 6941, 1073748767, 6952, 1073748778, + 6966, 1073748792, 6972, 6974, // NOLINT + 1073748800, 6977, 1073748803, 6980, + 1073748806, 7089, 1073748947, 7485, // NOLINT + 1073749328, 7567, 1073749394, 7623, + 1073749488, 7675, 1073749616, 7796, // NOLINT + 1073749622, 7932, 1073749793, 7994, + 1073749825, 8026, 1073749862, 8126, // NOLINT + 1073749954, 8135, 1073749962, 8143, + 1073749970, 8151, 1073749978, 8156}; // NOLINT +bool ID_Start::Is(uchar c) { + int chunk_index = c >> 13; + switch (chunk_index) { + case 0: + return LookupPredicate(kID_StartTable0, kID_StartTable0Size, c); + case 1: + return LookupPredicate(kID_StartTable1, kID_StartTable1Size, c); + case 2: + return LookupPredicate(kID_StartTable2, kID_StartTable2Size, c); + case 3: + return LookupPredicate(kID_StartTable3, kID_StartTable3Size, c); + case 4: + return LookupPredicate(kID_StartTable4, kID_StartTable4Size, c); + case 5: + return LookupPredicate(kID_StartTable5, kID_StartTable5Size, c); + case 6: + return LookupPredicate(kID_StartTable6, kID_StartTable6Size, c); + case 7: + return LookupPredicate(kID_StartTable7, kID_StartTable7Size, c); + default: + return false; + } +} + +// ID_Continue: point.category in ['Nd', 'Mn', 'Mc', 'Pc'] or +// 'Other_ID_Continue' in point.properties or 'JS_ID_Continue' in +// point.properties + +static const uint16_t kID_ContinueTable0Size = 315; +static const int32_t kID_ContinueTable0[315] = { + 1073741872, 57, 95, 183, + 1073742592, 879, 903, 1073742979, // NOLINT + 1159, 1073743249, 1469, 1471, + 1073743297, 1474, 1073743300, 1477, // NOLINT + 1479, 1073743376, 1562, 1073743435, + 1641, 1648, 1073743574, 1756, // NOLINT + 1073743583, 1764, 1073743591, 1768, + 1073743594, 1773, 1073743600, 1785, // NOLINT + 1809, 1073743664, 1866, 1073743782, + 1968, 1073743808, 1993, 1073743851, // NOLINT + 2035, 1073743894, 2073, 1073743899, + 2083, 1073743909, 2087, 1073743913, // NOLINT + 2093, 1073743961, 2139, 1073744100, + 2307, 1073744186, 2364, 1073744190, // NOLINT + 2383, 1073744209, 2391, 1073744226, + 2403, 1073744230, 2415, 1073744257, // NOLINT + 2435, 2492, 1073744318, 2500, + 1073744327, 2504, 1073744331, 2509, // NOLINT + 2519, 1073744354, 2531, 1073744358, + 2543, 1073744385, 2563, 2620, // NOLINT + 1073744446, 2626, 1073744455, 2632, + 1073744459, 2637, 2641, 1073744486, // NOLINT + 2673, 2677, 1073744513, 2691, + 2748, 1073744574, 2757, 1073744583, // NOLINT + 2761, 1073744587, 2765, 1073744610, + 2787, 1073744614, 2799, 1073744641, // NOLINT + 2819, 2876, 1073744702, 2884, + 1073744711, 2888, 1073744715, 2893, // NOLINT + 1073744726, 2903, 1073744738, 2915, + 1073744742, 2927, 2946, 1073744830, // NOLINT + 3010, 1073744838, 3016, 1073744842, + 3021, 3031, 1073744870, 3055, // NOLINT + 1073744896, 3075, 1073744958, 3140, + 1073744966, 3144, 1073744970, 3149, // NOLINT + 1073744981, 3158, 1073744994, 3171, + 1073744998, 3183, 1073745025, 3203, // NOLINT + 3260, 1073745086, 3268, 1073745094, + 3272, 1073745098, 3277, 1073745109, // NOLINT + 3286, 1073745122, 3299, 1073745126, + 3311, 1073745153, 3331, 1073745214, // NOLINT + 3396, 1073745222, 3400, 1073745226, + 3405, 3415, 1073745250, 3427, // NOLINT + 1073745254, 3439, 1073745282, 3459, + 3530, 1073745359, 3540, 3542, // NOLINT + 1073745368, 3551, 1073745382, 3567, + 1073745394, 3571, 3633, 1073745460, // NOLINT + 3642, 1073745479, 3662, 1073745488, + 3673, 3761, 1073745588, 3769, // NOLINT + 1073745595, 3772, 1073745608, 3789, + 1073745616, 3801, 1073745688, 3865, // NOLINT + 1073745696, 3881, 3893, 3895, + 3897, 1073745726, 3903, 1073745777, // NOLINT + 3972, 1073745798, 3975, 1073745805, + 3991, 1073745817, 4028, 4038, // NOLINT + 1073745963, 4158, 1073745984, 4169, + 1073746006, 4185, 1073746014, 4192, // NOLINT + 1073746018, 4196, 1073746023, 4205, + 1073746033, 4212, 1073746050, 4237, // NOLINT + 1073746063, 4253, 1073746781, 4959, + 1073746793, 4977, 1073747730, 5908, // NOLINT + 1073747762, 5940, 1073747794, 5971, + 1073747826, 6003, 1073747892, 6099, // NOLINT + 6109, 1073747936, 6121, 1073747979, + 6157, 1073747984, 6169, 6313, // NOLINT + 1073748256, 6443, 1073748272, 6459, + 1073748294, 6479, 1073748400, 6592, // NOLINT + 1073748424, 6601, 1073748432, 6618, + 1073748503, 6683, 1073748565, 6750, // NOLINT + 1073748576, 6780, 1073748607, 6793, + 1073748624, 6809, 1073748656, 6845, // NOLINT + 1073748736, 6916, 1073748788, 6980, + 1073748816, 7001, 1073748843, 7027, // NOLINT + 1073748864, 7042, 1073748897, 7085, + 1073748912, 7097, 1073748966, 7155, // NOLINT + 1073749028, 7223, 1073749056, 7241, + 1073749072, 7257, 1073749200, 7378, // NOLINT + 1073749204, 7400, 7405, 1073749234, + 7412, 1073749240, 7417, 1073749440, // NOLINT + 7669, 1073749500, 7679}; // NOLINT +static const uint16_t kID_ContinueTable1Size = 19; +static const int32_t kID_ContinueTable1[19] = { + 1073741836, 13, 1073741887, 64, + 84, 1073742032, 220, 225, // NOLINT + 1073742053, 240, 1073745135, 3313, + 3455, 1073745376, 3583, 1073745962, // NOLINT + 4143, 1073746073, 4250}; // NOLINT +static const uint16_t kID_ContinueTable5Size = 63; +static const int32_t kID_ContinueTable5[63] = { + 1073743392, 1577, 1647, 1073743476, + 1661, 1695, 1073743600, 1777, // NOLINT + 2050, 2054, 2059, 1073743907, + 2087, 1073744000, 2177, 1073744052, // NOLINT + 2244, 1073744080, 2265, 1073744096, + 2289, 1073744128, 2313, 1073744166, // NOLINT + 2349, 1073744199, 2387, 1073744256, + 2435, 1073744307, 2496, 1073744336, // NOLINT + 2521, 2533, 1073744368, 2553, + 1073744425, 2614, 2627, 1073744460, // NOLINT + 2637, 1073744464, 2649, 1073744507, + 2685, 2736, 1073744562, 2740, // NOLINT + 1073744567, 2744, 1073744574, 2751, + 2753, 1073744619, 2799, 1073744629, // NOLINT + 2806, 1073744867, 3050, 1073744876, + 3053, 1073744880, 3065}; // NOLINT +static const uint16_t kID_ContinueTable7Size = 12; +static const int32_t kID_ContinueTable7[12] = { + 6942, 1073749504, 7695, 1073749536, + 7725, 1073749555, 7732, 1073749581, // NOLINT + 7759, 1073749776, 7961, 7999}; // NOLINT +bool ID_Continue::Is(uchar c) { + int chunk_index = c >> 13; + switch (chunk_index) { + case 0: + return LookupPredicate(kID_ContinueTable0, kID_ContinueTable0Size, c); + case 1: + return LookupPredicate(kID_ContinueTable1, kID_ContinueTable1Size, c); + case 5: + return LookupPredicate(kID_ContinueTable5, kID_ContinueTable5Size, c); + case 7: + return LookupPredicate(kID_ContinueTable7, kID_ContinueTable7Size, c); + default: + return false; + } +} + +// WhiteSpace: (point.category == 'Zs') or ('JS_White_Space' in +// point.properties) + +static const uint16_t kWhiteSpaceTable0Size = 6; +static const int32_t kWhiteSpaceTable0[6] = {9, 1073741835, 12, + 32, 160, 5760}; // NOLINT +static const uint16_t kWhiteSpaceTable1Size = 5; +static const int32_t kWhiteSpaceTable1[5] = {1073741824, 10, 47, 95, + 4096}; // NOLINT +static const uint16_t kWhiteSpaceTable7Size = 1; +static const int32_t kWhiteSpaceTable7[1] = {7935}; // NOLINT +bool WhiteSpace::Is(uchar c) { + int chunk_index = c >> 13; + switch (chunk_index) { + case 0: + return LookupPredicate(kWhiteSpaceTable0, kWhiteSpaceTable0Size, c); + case 1: + return LookupPredicate(kWhiteSpaceTable1, kWhiteSpaceTable1Size, c); + case 7: + return LookupPredicate(kWhiteSpaceTable7, kWhiteSpaceTable7Size, c); + default: + return false; + } +} +#endif // !V8_INTL_SUPPORT + +#ifndef V8_INTL_SUPPORT +static const MultiCharacterSpecialCase<2> kToLowercaseMultiStrings0[2] = + { // NOLINT + {{105, 775}}, + {{kSentinel}}}; // NOLINT +static const uint16_t kToLowercaseTable0Size = 488; // NOLINT +static const int32_t kToLowercaseTable0[976] = { + 1073741889, 128, 90, 128, 1073742016, 128, + 214, 128, 1073742040, 128, 222, 128, + 256, 4, 258, 4, // NOLINT + 260, 4, 262, 4, 264, 4, + 266, 4, 268, 4, 270, 4, + 272, 4, 274, 4, // NOLINT + 276, 4, 278, 4, 280, 4, + 282, 4, 284, 4, 286, 4, + 288, 4, 290, 4, // NOLINT + 292, 4, 294, 4, 296, 4, + 298, 4, 300, 4, 302, 4, + 304, 1, 306, 4, // NOLINT + 308, 4, 310, 4, 313, 4, + 315, 4, 317, 4, 319, 4, + 321, 4, 323, 4, // NOLINT + 325, 4, 327, 4, 330, 4, + 332, 4, 334, 4, 336, 4, + 338, 4, 340, 4, // NOLINT + 342, 4, 344, 4, 346, 4, + 348, 4, 350, 4, 352, 4, + 354, 4, 356, 4, // NOLINT + 358, 4, 360, 4, 362, 4, + 364, 4, 366, 4, 368, 4, + 370, 4, 372, 4, // NOLINT + 374, 4, 376, -484, 377, 4, + 379, 4, 381, 4, 385, 840, + 386, 4, 388, 4, // NOLINT + 390, 824, 391, 4, 1073742217, 820, + 394, 820, 395, 4, 398, 316, + 399, 808, 400, 812, // NOLINT + 401, 4, 403, 820, 404, 828, + 406, 844, 407, 836, 408, 4, + 412, 844, 413, 852, // NOLINT + 415, 856, 416, 4, 418, 4, + 420, 4, 422, 872, 423, 4, + 425, 872, 428, 4, // NOLINT + 430, 872, 431, 4, 1073742257, 868, + 434, 868, 435, 4, 437, 4, + 439, 876, 440, 4, // NOLINT + 444, 4, 452, 8, 453, 4, + 455, 8, 456, 4, 458, 8, + 459, 4, 461, 4, // NOLINT + 463, 4, 465, 4, 467, 4, + 469, 4, 471, 4, 473, 4, + 475, 4, 478, 4, // NOLINT + 480, 4, 482, 4, 484, 4, + 486, 4, 488, 4, 490, 4, + 492, 4, 494, 4, // NOLINT + 497, 8, 498, 4, 500, 4, + 502, -388, 503, -224, 504, 4, + 506, 4, 508, 4, // NOLINT + 510, 4, 512, 4, 514, 4, + 516, 4, 518, 4, 520, 4, + 522, 4, 524, 4, // NOLINT + 526, 4, 528, 4, 530, 4, + 532, 4, 534, 4, 536, 4, + 538, 4, 540, 4, // NOLINT + 542, 4, 544, -520, 546, 4, + 548, 4, 550, 4, 552, 4, + 554, 4, 556, 4, // NOLINT + 558, 4, 560, 4, 562, 4, + 570, 43180, 571, 4, 573, -652, + 574, 43168, 577, 4, // NOLINT + 579, -780, 580, 276, 581, 284, + 582, 4, 584, 4, 586, 4, + 588, 4, 590, 4, // NOLINT + 880, 4, 882, 4, 886, 4, + 895, 464, 902, 152, 1073742728, 148, + 906, 148, 908, 256, // NOLINT + 1073742734, 252, 911, 252, 1073742737, 128, + 929, 128, 931, 6, 1073742756, 128, + 939, 128, 975, 32, // NOLINT + 984, 4, 986, 4, 988, 4, + 990, 4, 992, 4, 994, 4, + 996, 4, 998, 4, // NOLINT + 1000, 4, 1002, 4, 1004, 4, + 1006, 4, 1012, -240, 1015, 4, + 1017, -28, 1018, 4, // NOLINT + 1073742845, -520, 1023, -520, 1073742848, 320, + 1039, 320, 1073742864, 128, 1071, 128, + 1120, 4, 1122, 4, // NOLINT + 1124, 4, 1126, 4, 1128, 4, + 1130, 4, 1132, 4, 1134, 4, + 1136, 4, 1138, 4, // NOLINT + 1140, 4, 1142, 4, 1144, 4, + 1146, 4, 1148, 4, 1150, 4, + 1152, 4, 1162, 4, // NOLINT + 1164, 4, 1166, 4, 1168, 4, + 1170, 4, 1172, 4, 1174, 4, + 1176, 4, 1178, 4, // NOLINT + 1180, 4, 1182, 4, 1184, 4, + 1186, 4, 1188, 4, 1190, 4, + 1192, 4, 1194, 4, // NOLINT + 1196, 4, 1198, 4, 1200, 4, + 1202, 4, 1204, 4, 1206, 4, + 1208, 4, 1210, 4, // NOLINT + 1212, 4, 1214, 4, 1216, 60, + 1217, 4, 1219, 4, 1221, 4, + 1223, 4, 1225, 4, // NOLINT + 1227, 4, 1229, 4, 1232, 4, + 1234, 4, 1236, 4, 1238, 4, + 1240, 4, 1242, 4, // NOLINT + 1244, 4, 1246, 4, 1248, 4, + 1250, 4, 1252, 4, 1254, 4, + 1256, 4, 1258, 4, // NOLINT + 1260, 4, 1262, 4, 1264, 4, + 1266, 4, 1268, 4, 1270, 4, + 1272, 4, 1274, 4, // NOLINT + 1276, 4, 1278, 4, 1280, 4, + 1282, 4, 1284, 4, 1286, 4, + 1288, 4, 1290, 4, // NOLINT + 1292, 4, 1294, 4, 1296, 4, + 1298, 4, 1300, 4, 1302, 4, + 1304, 4, 1306, 4, // NOLINT + 1308, 4, 1310, 4, 1312, 4, + 1314, 4, 1316, 4, 1318, 4, + 1320, 4, 1322, 4, // NOLINT + 1324, 4, 1326, 4, 1073743153, 192, + 1366, 192, 1073746080, 29056, 4293, 29056, + 4295, 29056, 4301, 29056, // NOLINT + 7680, 4, 7682, 4, 7684, 4, + 7686, 4, 7688, 4, 7690, 4, + 7692, 4, 7694, 4, // NOLINT + 7696, 4, 7698, 4, 7700, 4, + 7702, 4, 7704, 4, 7706, 4, + 7708, 4, 7710, 4, // NOLINT + 7712, 4, 7714, 4, 7716, 4, + 7718, 4, 7720, 4, 7722, 4, + 7724, 4, 7726, 4, // NOLINT + 7728, 4, 7730, 4, 7732, 4, + 7734, 4, 7736, 4, 7738, 4, + 7740, 4, 7742, 4, // NOLINT + 7744, 4, 7746, 4, 7748, 4, + 7750, 4, 7752, 4, 7754, 4, + 7756, 4, 7758, 4, // NOLINT + 7760, 4, 7762, 4, 7764, 4, + 7766, 4, 7768, 4, 7770, 4, + 7772, 4, 7774, 4, // NOLINT + 7776, 4, 7778, 4, 7780, 4, + 7782, 4, 7784, 4, 7786, 4, + 7788, 4, 7790, 4, // NOLINT + 7792, 4, 7794, 4, 7796, 4, + 7798, 4, 7800, 4, 7802, 4, + 7804, 4, 7806, 4, // NOLINT + 7808, 4, 7810, 4, 7812, 4, + 7814, 4, 7816, 4, 7818, 4, + 7820, 4, 7822, 4, // NOLINT + 7824, 4, 7826, 4, 7828, 4, + 7838, -30460, 7840, 4, 7842, 4, + 7844, 4, 7846, 4, // NOLINT + 7848, 4, 7850, 4, 7852, 4, + 7854, 4, 7856, 4, 7858, 4, + 7860, 4, 7862, 4, // NOLINT + 7864, 4, 7866, 4, 7868, 4, + 7870, 4, 7872, 4, 7874, 4, + 7876, 4, 7878, 4, // NOLINT + 7880, 4, 7882, 4, 7884, 4, + 7886, 4, 7888, 4, 7890, 4, + 7892, 4, 7894, 4, // NOLINT + 7896, 4, 7898, 4, 7900, 4, + 7902, 4, 7904, 4, 7906, 4, + 7908, 4, 7910, 4, // NOLINT + 7912, 4, 7914, 4, 7916, 4, + 7918, 4, 7920, 4, 7922, 4, + 7924, 4, 7926, 4, // NOLINT + 7928, 4, 7930, 4, 7932, 4, + 7934, 4, 1073749768, -32, 7951, -32, + 1073749784, -32, 7965, -32, // NOLINT + 1073749800, -32, 7983, -32, 1073749816, -32, + 7999, -32, 1073749832, -32, 8013, -32, + 8025, -32, 8027, -32, // NOLINT + 8029, -32, 8031, -32, 1073749864, -32, + 8047, -32, 1073749896, -32, 8079, -32, + 1073749912, -32, 8095, -32, // NOLINT + 1073749928, -32, 8111, -32, 1073749944, -32, + 8121, -32, 1073749946, -296, 8123, -296, + 8124, -36, 1073749960, -344, // NOLINT + 8139, -344, 8140, -36, 1073749976, -32, + 8153, -32, 1073749978, -400, 8155, -400, + 1073749992, -32, 8169, -32, // NOLINT + 1073749994, -448, 8171, -448, 8172, -28, + 1073750008, -512, 8185, -512, 1073750010, -504, + 8187, -504, 8188, -36}; // NOLINT +static const uint16_t kToLowercaseMultiStrings0Size = 2; // NOLINT +static const MultiCharacterSpecialCase<1> kToLowercaseMultiStrings1[1] = + { // NOLINT + {{kSentinel}}}; // NOLINT +static const uint16_t kToLowercaseTable1Size = 79; // NOLINT +static const int32_t kToLowercaseTable1[158] = { + 294, -30068, 298, -33532, 299, -33048, 306, + 112, 1073742176, 64, 367, 64, 387, 4, + 1073743030, 104, // NOLINT + 1231, 104, 1073744896, 192, 3118, 192, 3168, + 4, 3170, -42972, 3171, -15256, 3172, -42908, + 3175, 4, // NOLINT + 3177, 4, 3179, 4, 3181, -43120, 3182, + -42996, 3183, -43132, 3184, -43128, 3186, 4, + 3189, 4, // NOLINT + 1073745022, -43260, 3199, -43260, 3200, 4, 3202, + 4, 3204, 4, 3206, 4, 3208, 4, + 3210, 4, // NOLINT + 3212, 4, 3214, 4, 3216, 4, 3218, + 4, 3220, 4, 3222, 4, 3224, 4, + 3226, 4, // NOLINT + 3228, 4, 3230, 4, 3232, 4, 3234, + 4, 3236, 4, 3238, 4, 3240, 4, + 3242, 4, // NOLINT + 3244, 4, 3246, 4, 3248, 4, 3250, + 4, 3252, 4, 3254, 4, 3256, 4, + 3258, 4, // NOLINT + 3260, 4, 3262, 4, 3264, 4, 3266, + 4, 3268, 4, 3270, 4, 3272, 4, + 3274, 4, // NOLINT + 3276, 4, 3278, 4, 3280, 4, 3282, + 4, 3284, 4, 3286, 4, 3288, 4, + 3290, 4, // NOLINT + 3292, 4, 3294, 4, 3296, 4, 3298, + 4, 3307, 4, 3309, 4, 3314, 4}; // NOLINT +static const uint16_t kToLowercaseMultiStrings1Size = 1; // NOLINT +static const MultiCharacterSpecialCase<1> kToLowercaseMultiStrings5[1] = + { // NOLINT + {{kSentinel}}}; // NOLINT +static const uint16_t kToLowercaseTable5Size = 103; // NOLINT +static const int32_t kToLowercaseTable5[206] = { + 1600, 4, 1602, 4, 1604, 4, 1606, 4, + 1608, 4, 1610, 4, 1612, 4, 1614, 4, // NOLINT + 1616, 4, 1618, 4, 1620, 4, 1622, 4, + 1624, 4, 1626, 4, 1628, 4, 1630, 4, // NOLINT + 1632, 4, 1634, 4, 1636, 4, 1638, 4, + 1640, 4, 1642, 4, 1644, 4, 1664, 4, // NOLINT + 1666, 4, 1668, 4, 1670, 4, 1672, 4, + 1674, 4, 1676, 4, 1678, 4, 1680, 4, // NOLINT + 1682, 4, 1684, 4, 1686, 4, 1688, 4, + 1690, 4, 1826, 4, 1828, 4, 1830, 4, // NOLINT + 1832, 4, 1834, 4, 1836, 4, 1838, 4, + 1842, 4, 1844, 4, 1846, 4, 1848, 4, // NOLINT + 1850, 4, 1852, 4, 1854, 4, 1856, 4, + 1858, 4, 1860, 4, 1862, 4, 1864, 4, // NOLINT + 1866, 4, 1868, 4, 1870, 4, 1872, 4, + 1874, 4, 1876, 4, 1878, 4, 1880, 4, // NOLINT + 1882, 4, 1884, 4, 1886, 4, 1888, 4, + 1890, 4, 1892, 4, 1894, 4, 1896, 4, // NOLINT + 1898, 4, 1900, 4, 1902, 4, 1913, 4, + 1915, 4, 1917, -141328, 1918, 4, 1920, 4, // NOLINT + 1922, 4, 1924, 4, 1926, 4, 1931, 4, + 1933, -169120, 1936, 4, 1938, 4, 1942, 4, // NOLINT + 1944, 4, 1946, 4, 1948, 4, 1950, 4, + 1952, 4, 1954, 4, 1956, 4, 1958, 4, // NOLINT + 1960, 4, 1962, -169232, 1963, -169276, 1964, -169260, + 1965, -169220, 1968, -169032, 1969, -169128}; // NOLINT +static const uint16_t kToLowercaseMultiStrings5Size = 1; // NOLINT +static const MultiCharacterSpecialCase<1> kToLowercaseMultiStrings7[1] = + { // NOLINT + {{kSentinel}}}; // NOLINT +static const uint16_t kToLowercaseTable7Size = 2; // NOLINT +static const int32_t kToLowercaseTable7[4] = {1073749793, 128, 7994, + 128}; // NOLINT +static const uint16_t kToLowercaseMultiStrings7Size = 1; // NOLINT +int ToLowercase::Convert(uchar c, uchar n, uchar* result, + bool* allow_caching_ptr) { + int chunk_index = c >> 13; + switch (chunk_index) { + case 0: + return LookupMapping(kToLowercaseTable0, kToLowercaseTable0Size, + kToLowercaseMultiStrings0, c, n, result, + allow_caching_ptr); + case 1: + return LookupMapping(kToLowercaseTable1, kToLowercaseTable1Size, + kToLowercaseMultiStrings1, c, n, result, + allow_caching_ptr); + case 5: + return LookupMapping(kToLowercaseTable5, kToLowercaseTable5Size, + kToLowercaseMultiStrings5, c, n, result, + allow_caching_ptr); + case 7: + return LookupMapping(kToLowercaseTable7, kToLowercaseTable7Size, + kToLowercaseMultiStrings7, c, n, result, + allow_caching_ptr); + default: + return 0; + } +} + +static const MultiCharacterSpecialCase<3> kToUppercaseMultiStrings0[62] = + { // NOLINT + {{83, 83, kSentinel}}, {{700, 78, kSentinel}}, + {{74, 780, kSentinel}}, {{921, 776, 769}}, // NOLINT + {{933, 776, 769}}, {{1333, 1362, kSentinel}}, + {{72, 817, kSentinel}}, {{84, 776, kSentinel}}, // NOLINT + {{87, 778, kSentinel}}, {{89, 778, kSentinel}}, + {{65, 702, kSentinel}}, {{933, 787, kSentinel}}, // NOLINT + {{933, 787, 768}}, {{933, 787, 769}}, + {{933, 787, 834}}, {{7944, 921, kSentinel}}, // NOLINT + {{7945, 921, kSentinel}}, {{7946, 921, kSentinel}}, + {{7947, 921, kSentinel}}, {{7948, 921, kSentinel}}, // NOLINT + {{7949, 921, kSentinel}}, {{7950, 921, kSentinel}}, + {{7951, 921, kSentinel}}, {{7976, 921, kSentinel}}, // NOLINT + {{7977, 921, kSentinel}}, {{7978, 921, kSentinel}}, + {{7979, 921, kSentinel}}, {{7980, 921, kSentinel}}, // NOLINT + {{7981, 921, kSentinel}}, {{7982, 921, kSentinel}}, + {{7983, 921, kSentinel}}, {{8040, 921, kSentinel}}, // NOLINT + {{8041, 921, kSentinel}}, {{8042, 921, kSentinel}}, + {{8043, 921, kSentinel}}, {{8044, 921, kSentinel}}, // NOLINT + {{8045, 921, kSentinel}}, {{8046, 921, kSentinel}}, + {{8047, 921, kSentinel}}, {{8122, 921, kSentinel}}, // NOLINT + {{913, 921, kSentinel}}, {{902, 921, kSentinel}}, + {{913, 834, kSentinel}}, {{913, 834, 921}}, // NOLINT + {{8138, 921, kSentinel}}, {{919, 921, kSentinel}}, + {{905, 921, kSentinel}}, {{919, 834, kSentinel}}, // NOLINT + {{919, 834, 921}}, {{921, 776, 768}}, + {{921, 834, kSentinel}}, {{921, 776, 834}}, // NOLINT + {{933, 776, 768}}, {{929, 787, kSentinel}}, + {{933, 834, kSentinel}}, {{933, 776, 834}}, // NOLINT + {{8186, 921, kSentinel}}, {{937, 921, kSentinel}}, + {{911, 921, kSentinel}}, {{937, 834, kSentinel}}, // NOLINT + {{937, 834, 921}}, {{kSentinel}}}; // NOLINT +static const uint16_t kToUppercaseTable0Size = 590; // NOLINT +static const int32_t kToUppercaseTable0[1180] = { + 1073741921, -128, 122, -128, 181, 2972, + 223, 1, 1073742048, -128, 246, -128, + 1073742072, -128, 254, -128, // NOLINT + 255, 484, 257, -4, 259, -4, + 261, -4, 263, -4, 265, -4, + 267, -4, 269, -4, // NOLINT + 271, -4, 273, -4, 275, -4, + 277, -4, 279, -4, 281, -4, + 283, -4, 285, -4, // NOLINT + 287, -4, 289, -4, 291, -4, + 293, -4, 295, -4, 297, -4, + 299, -4, 301, -4, // NOLINT + 303, -4, 305, -928, 307, -4, + 309, -4, 311, -4, 314, -4, + 316, -4, 318, -4, // NOLINT + 320, -4, 322, -4, 324, -4, + 326, -4, 328, -4, 329, 5, + 331, -4, 333, -4, // NOLINT + 335, -4, 337, -4, 339, -4, + 341, -4, 343, -4, 345, -4, + 347, -4, 349, -4, // NOLINT + 351, -4, 353, -4, 355, -4, + 357, -4, 359, -4, 361, -4, + 363, -4, 365, -4, // NOLINT + 367, -4, 369, -4, 371, -4, + 373, -4, 375, -4, 378, -4, + 380, -4, 382, -4, // NOLINT + 383, -1200, 384, 780, 387, -4, + 389, -4, 392, -4, 396, -4, + 402, -4, 405, 388, // NOLINT + 409, -4, 410, 652, 414, 520, + 417, -4, 419, -4, 421, -4, + 424, -4, 429, -4, // NOLINT + 432, -4, 436, -4, 438, -4, + 441, -4, 445, -4, 447, 224, + 453, -4, 454, -8, // NOLINT + 456, -4, 457, -8, 459, -4, + 460, -8, 462, -4, 464, -4, + 466, -4, 468, -4, // NOLINT + 470, -4, 472, -4, 474, -4, + 476, -4, 477, -316, 479, -4, + 481, -4, 483, -4, // NOLINT + 485, -4, 487, -4, 489, -4, + 491, -4, 493, -4, 495, -4, + 496, 9, 498, -4, // NOLINT + 499, -8, 501, -4, 505, -4, + 507, -4, 509, -4, 511, -4, + 513, -4, 515, -4, // NOLINT + 517, -4, 519, -4, 521, -4, + 523, -4, 525, -4, 527, -4, + 529, -4, 531, -4, // NOLINT + 533, -4, 535, -4, 537, -4, + 539, -4, 541, -4, 543, -4, + 547, -4, 549, -4, // NOLINT + 551, -4, 553, -4, 555, -4, + 557, -4, 559, -4, 561, -4, + 563, -4, 572, -4, // NOLINT + 1073742399, 43260, 576, 43260, 578, -4, + 583, -4, 585, -4, 587, -4, + 589, -4, 591, -4, // NOLINT + 592, 43132, 593, 43120, 594, 43128, + 595, -840, 596, -824, 1073742422, -820, + 599, -820, 601, -808, // NOLINT + 603, -812, 604, 169276, 608, -820, + 609, 169260, 611, -828, 613, 169120, + 614, 169232, 616, -836, // NOLINT + 617, -844, 619, 42972, 620, 169220, + 623, -844, 625, 42996, 626, -852, + 629, -856, 637, 42908, // NOLINT + 640, -872, 643, -872, 647, 169128, + 648, -872, 649, -276, 1073742474, -868, + 651, -868, 652, -284, // NOLINT + 658, -876, 670, 169032, 837, 336, + 881, -4, 883, -4, 887, -4, + 1073742715, 520, 893, 520, // NOLINT + 912, 13, 940, -152, 1073742765, -148, + 943, -148, 944, 17, 1073742769, -128, + 961, -128, 962, -124, // NOLINT + 1073742787, -128, 971, -128, 972, -256, + 1073742797, -252, 974, -252, 976, -248, + 977, -228, 981, -188, // NOLINT + 982, -216, 983, -32, 985, -4, + 987, -4, 989, -4, 991, -4, + 993, -4, 995, -4, // NOLINT + 997, -4, 999, -4, 1001, -4, + 1003, -4, 1005, -4, 1007, -4, + 1008, -344, 1009, -320, // NOLINT + 1010, 28, 1011, -464, 1013, -384, + 1016, -4, 1019, -4, 1073742896, -128, + 1103, -128, 1073742928, -320, // NOLINT + 1119, -320, 1121, -4, 1123, -4, + 1125, -4, 1127, -4, 1129, -4, + 1131, -4, 1133, -4, // NOLINT + 1135, -4, 1137, -4, 1139, -4, + 1141, -4, 1143, -4, 1145, -4, + 1147, -4, 1149, -4, // NOLINT + 1151, -4, 1153, -4, 1163, -4, + 1165, -4, 1167, -4, 1169, -4, + 1171, -4, 1173, -4, // NOLINT + 1175, -4, 1177, -4, 1179, -4, + 1181, -4, 1183, -4, 1185, -4, + 1187, -4, 1189, -4, // NOLINT + 1191, -4, 1193, -4, 1195, -4, + 1197, -4, 1199, -4, 1201, -4, + 1203, -4, 1205, -4, // NOLINT + 1207, -4, 1209, -4, 1211, -4, + 1213, -4, 1215, -4, 1218, -4, + 1220, -4, 1222, -4, // NOLINT + 1224, -4, 1226, -4, 1228, -4, + 1230, -4, 1231, -60, 1233, -4, + 1235, -4, 1237, -4, // NOLINT + 1239, -4, 1241, -4, 1243, -4, + 1245, -4, 1247, -4, 1249, -4, + 1251, -4, 1253, -4, // NOLINT + 1255, -4, 1257, -4, 1259, -4, + 1261, -4, 1263, -4, 1265, -4, + 1267, -4, 1269, -4, // NOLINT + 1271, -4, 1273, -4, 1275, -4, + 1277, -4, 1279, -4, 1281, -4, + 1283, -4, 1285, -4, // NOLINT + 1287, -4, 1289, -4, 1291, -4, + 1293, -4, 1295, -4, 1297, -4, + 1299, -4, 1301, -4, // NOLINT + 1303, -4, 1305, -4, 1307, -4, + 1309, -4, 1311, -4, 1313, -4, + 1315, -4, 1317, -4, // NOLINT + 1319, -4, 1321, -4, 1323, -4, + 1325, -4, 1327, -4, 1073743201, -192, + 1414, -192, 1415, 21, // NOLINT + 7545, 141328, 7549, 15256, 7681, -4, + 7683, -4, 7685, -4, 7687, -4, + 7689, -4, 7691, -4, // NOLINT + 7693, -4, 7695, -4, 7697, -4, + 7699, -4, 7701, -4, 7703, -4, + 7705, -4, 7707, -4, // NOLINT + 7709, -4, 7711, -4, 7713, -4, + 7715, -4, 7717, -4, 7719, -4, + 7721, -4, 7723, -4, // NOLINT + 7725, -4, 7727, -4, 7729, -4, + 7731, -4, 7733, -4, 7735, -4, + 7737, -4, 7739, -4, // NOLINT + 7741, -4, 7743, -4, 7745, -4, + 7747, -4, 7749, -4, 7751, -4, + 7753, -4, 7755, -4, // NOLINT + 7757, -4, 7759, -4, 7761, -4, + 7763, -4, 7765, -4, 7767, -4, + 7769, -4, 7771, -4, // NOLINT + 7773, -4, 7775, -4, 7777, -4, + 7779, -4, 7781, -4, 7783, -4, + 7785, -4, 7787, -4, // NOLINT + 7789, -4, 7791, -4, 7793, -4, + 7795, -4, 7797, -4, 7799, -4, + 7801, -4, 7803, -4, // NOLINT + 7805, -4, 7807, -4, 7809, -4, + 7811, -4, 7813, -4, 7815, -4, + 7817, -4, 7819, -4, // NOLINT + 7821, -4, 7823, -4, 7825, -4, + 7827, -4, 7829, -4, 7830, 25, + 7831, 29, 7832, 33, // NOLINT + 7833, 37, 7834, 41, 7835, -236, + 7841, -4, 7843, -4, 7845, -4, + 7847, -4, 7849, -4, // NOLINT + 7851, -4, 7853, -4, 7855, -4, + 7857, -4, 7859, -4, 7861, -4, + 7863, -4, 7865, -4, // NOLINT + 7867, -4, 7869, -4, 7871, -4, + 7873, -4, 7875, -4, 7877, -4, + 7879, -4, 7881, -4, // NOLINT + 7883, -4, 7885, -4, 7887, -4, + 7889, -4, 7891, -4, 7893, -4, + 7895, -4, 7897, -4, // NOLINT + 7899, -4, 7901, -4, 7903, -4, + 7905, -4, 7907, -4, 7909, -4, + 7911, -4, 7913, -4, // NOLINT + 7915, -4, 7917, -4, 7919, -4, + 7921, -4, 7923, -4, 7925, -4, + 7927, -4, 7929, -4, // NOLINT + 7931, -4, 7933, -4, 7935, -4, + 1073749760, 32, 7943, 32, 1073749776, 32, + 7957, 32, 1073749792, 32, // NOLINT + 7975, 32, 1073749808, 32, 7991, 32, + 1073749824, 32, 8005, 32, 8016, 45, + 8017, 32, 8018, 49, // NOLINT + 8019, 32, 8020, 53, 8021, 32, + 8022, 57, 8023, 32, 1073749856, 32, + 8039, 32, 1073749872, 296, // NOLINT + 8049, 296, 1073749874, 344, 8053, 344, + 1073749878, 400, 8055, 400, 1073749880, 512, + 8057, 512, 1073749882, 448, // NOLINT + 8059, 448, 1073749884, 504, 8061, 504, + 8064, 61, 8065, 65, 8066, 69, + 8067, 73, 8068, 77, // NOLINT + 8069, 81, 8070, 85, 8071, 89, + 8072, 61, 8073, 65, 8074, 69, + 8075, 73, 8076, 77, // NOLINT + 8077, 81, 8078, 85, 8079, 89, + 8080, 93, 8081, 97, 8082, 101, + 8083, 105, 8084, 109, // NOLINT + 8085, 113, 8086, 117, 8087, 121, + 8088, 93, 8089, 97, 8090, 101, + 8091, 105, 8092, 109, // NOLINT + 8093, 113, 8094, 117, 8095, 121, + 8096, 125, 8097, 129, 8098, 133, + 8099, 137, 8100, 141, // NOLINT + 8101, 145, 8102, 149, 8103, 153, + 8104, 125, 8105, 129, 8106, 133, + 8107, 137, 8108, 141, // NOLINT + 8109, 145, 8110, 149, 8111, 153, + 1073749936, 32, 8113, 32, 8114, 157, + 8115, 161, 8116, 165, // NOLINT + 8118, 169, 8119, 173, 8124, 161, + 8126, -28820, 8130, 177, 8131, 181, + 8132, 185, 8134, 189, // NOLINT + 8135, 193, 8140, 181, 1073749968, 32, + 8145, 32, 8146, 197, 8147, 13, + 8150, 201, 8151, 205, // NOLINT + 1073749984, 32, 8161, 32, 8162, 209, + 8163, 17, 8164, 213, 8165, 28, + 8166, 217, 8167, 221, // NOLINT + 8178, 225, 8179, 229, 8180, 233, + 8182, 237, 8183, 241, 8188, 229}; // NOLINT +static const uint16_t kToUppercaseMultiStrings0Size = 62; // NOLINT +static const MultiCharacterSpecialCase<1> kToUppercaseMultiStrings1[1] = + { // NOLINT + {{kSentinel}}}; // NOLINT +static const uint16_t kToUppercaseTable1Size = 73; // NOLINT +static const int32_t kToUppercaseTable1[146] = { + 334, -112, 1073742192, -64, 383, -64, + 388, -4, 1073743056, -104, 1257, -104, + 1073744944, -192, 3166, -192, // NOLINT + 3169, -4, 3173, -43180, 3174, -43168, + 3176, -4, 3178, -4, 3180, -4, + 3187, -4, 3190, -4, // NOLINT + 3201, -4, 3203, -4, 3205, -4, + 3207, -4, 3209, -4, 3211, -4, + 3213, -4, 3215, -4, // NOLINT + 3217, -4, 3219, -4, 3221, -4, + 3223, -4, 3225, -4, 3227, -4, + 3229, -4, 3231, -4, // NOLINT + 3233, -4, 3235, -4, 3237, -4, + 3239, -4, 3241, -4, 3243, -4, + 3245, -4, 3247, -4, // NOLINT + 3249, -4, 3251, -4, 3253, -4, + 3255, -4, 3257, -4, 3259, -4, + 3261, -4, 3263, -4, // NOLINT + 3265, -4, 3267, -4, 3269, -4, + 3271, -4, 3273, -4, 3275, -4, + 3277, -4, 3279, -4, // NOLINT + 3281, -4, 3283, -4, 3285, -4, + 3287, -4, 3289, -4, 3291, -4, + 3293, -4, 3295, -4, // NOLINT + 3297, -4, 3299, -4, 3308, -4, + 3310, -4, 3315, -4, 1073745152, -29056, + 3365, -29056, 3367, -29056, // NOLINT + 3373, -29056}; // NOLINT +static const uint16_t kToUppercaseMultiStrings1Size = 1; // NOLINT +static const MultiCharacterSpecialCase<1> kToUppercaseMultiStrings5[1] = + { // NOLINT + {{kSentinel}}}; // NOLINT +static const uint16_t kToUppercaseTable5Size = 95; // NOLINT +static const int32_t + kToUppercaseTable5[190] = {1601, -4, 1603, -4, 1605, -4, 1607, -4, 1609, -4, + 1611, -4, 1613, -4, 1615, -4, // NOLINT + 1617, -4, 1619, -4, 1621, -4, 1623, -4, 1625, -4, + 1627, -4, 1629, -4, 1631, -4, // NOLINT + 1633, -4, 1635, -4, 1637, -4, 1639, -4, 1641, -4, + 1643, -4, 1645, -4, 1665, -4, // NOLINT + 1667, -4, 1669, -4, 1671, -4, 1673, -4, 1675, -4, + 1677, -4, 1679, -4, 1681, -4, // NOLINT + 1683, -4, 1685, -4, 1687, -4, 1689, -4, 1691, -4, + 1827, -4, 1829, -4, 1831, -4, // NOLINT + 1833, -4, 1835, -4, 1837, -4, 1839, -4, 1843, -4, + 1845, -4, 1847, -4, 1849, -4, // NOLINT + 1851, -4, 1853, -4, 1855, -4, 1857, -4, 1859, -4, + 1861, -4, 1863, -4, 1865, -4, // NOLINT + 1867, -4, 1869, -4, 1871, -4, 1873, -4, 1875, -4, + 1877, -4, 1879, -4, 1881, -4, // NOLINT + 1883, -4, 1885, -4, 1887, -4, 1889, -4, 1891, -4, + 1893, -4, 1895, -4, 1897, -4, // NOLINT + 1899, -4, 1901, -4, 1903, -4, 1914, -4, 1916, -4, + 1919, -4, 1921, -4, 1923, -4, // NOLINT + 1925, -4, 1927, -4, 1932, -4, 1937, -4, 1939, -4, + 1943, -4, 1945, -4, 1947, -4, // NOLINT + 1949, -4, 1951, -4, 1953, -4, 1955, -4, 1957, -4, + 1959, -4, 1961, -4}; // NOLINT +static const uint16_t kToUppercaseMultiStrings5Size = 1; // NOLINT +static const MultiCharacterSpecialCase<3> kToUppercaseMultiStrings7[12] = + { // NOLINT + {{70, 70, kSentinel}}, + {{70, 73, kSentinel}}, + {{70, 76, kSentinel}}, + {{70, 70, 73}}, // NOLINT + {{70, 70, 76}}, + {{83, 84, kSentinel}}, + {{1348, 1350, kSentinel}}, + {{1348, 1333, kSentinel}}, // NOLINT + {{1348, 1339, kSentinel}}, + {{1358, 1350, kSentinel}}, + {{1348, 1341, kSentinel}}, + {{kSentinel}}}; // NOLINT +static const uint16_t kToUppercaseTable7Size = 14; // NOLINT +static const int32_t kToUppercaseTable7[28] = + {6912, 1, 6913, 5, 6914, 9, 6915, 13, + 6916, 17, 6917, 21, 6918, 21, 6931, 25, // NOLINT + 6932, 29, 6933, 33, 6934, 37, 6935, 41, + 1073749825, -128, 8026, -128}; // NOLINT +static const uint16_t kToUppercaseMultiStrings7Size = 12; // NOLINT +int ToUppercase::Convert(uchar c, uchar n, uchar* result, + bool* allow_caching_ptr) { + int chunk_index = c >> 13; + switch (chunk_index) { + case 0: + return LookupMapping(kToUppercaseTable0, kToUppercaseTable0Size, + kToUppercaseMultiStrings0, c, n, result, + allow_caching_ptr); + case 1: + return LookupMapping(kToUppercaseTable1, kToUppercaseTable1Size, + kToUppercaseMultiStrings1, c, n, result, + allow_caching_ptr); + case 5: + return LookupMapping(kToUppercaseTable5, kToUppercaseTable5Size, + kToUppercaseMultiStrings5, c, n, result, + allow_caching_ptr); + case 7: + return LookupMapping(kToUppercaseTable7, kToUppercaseTable7Size, + kToUppercaseMultiStrings7, c, n, result, + allow_caching_ptr); + default: + return 0; + } +} + +static const MultiCharacterSpecialCase<1> kEcma262CanonicalizeMultiStrings0[1] = + { // NOLINT + {{kSentinel}}}; // NOLINT +static const uint16_t kEcma262CanonicalizeTable0Size = 498; // NOLINT +static const int32_t kEcma262CanonicalizeTable0[996] = { + 1073741921, -128, 122, -128, 181, 2972, + 1073742048, -128, 246, -128, 1073742072, -128, + 254, -128, 255, 484, // NOLINT + 257, -4, 259, -4, 261, -4, + 263, -4, 265, -4, 267, -4, + 269, -4, 271, -4, // NOLINT + 273, -4, 275, -4, 277, -4, + 279, -4, 281, -4, 283, -4, + 285, -4, 287, -4, // NOLINT + 289, -4, 291, -4, 293, -4, + 295, -4, 297, -4, 299, -4, + 301, -4, 303, -4, // NOLINT + 307, -4, 309, -4, 311, -4, + 314, -4, 316, -4, 318, -4, + 320, -4, 322, -4, // NOLINT + 324, -4, 326, -4, 328, -4, + 331, -4, 333, -4, 335, -4, + 337, -4, 339, -4, // NOLINT + 341, -4, 343, -4, 345, -4, + 347, -4, 349, -4, 351, -4, + 353, -4, 355, -4, // NOLINT + 357, -4, 359, -4, 361, -4, + 363, -4, 365, -4, 367, -4, + 369, -4, 371, -4, // NOLINT + 373, -4, 375, -4, 378, -4, + 380, -4, 382, -4, 384, 780, + 387, -4, 389, -4, // NOLINT + 392, -4, 396, -4, 402, -4, + 405, 388, 409, -4, 410, 652, + 414, 520, 417, -4, // NOLINT + 419, -4, 421, -4, 424, -4, + 429, -4, 432, -4, 436, -4, + 438, -4, 441, -4, // NOLINT + 445, -4, 447, 224, 453, -4, + 454, -8, 456, -4, 457, -8, + 459, -4, 460, -8, // NOLINT + 462, -4, 464, -4, 466, -4, + 468, -4, 470, -4, 472, -4, + 474, -4, 476, -4, // NOLINT + 477, -316, 479, -4, 481, -4, + 483, -4, 485, -4, 487, -4, + 489, -4, 491, -4, // NOLINT + 493, -4, 495, -4, 498, -4, + 499, -8, 501, -4, 505, -4, + 507, -4, 509, -4, // NOLINT + 511, -4, 513, -4, 515, -4, + 517, -4, 519, -4, 521, -4, + 523, -4, 525, -4, // NOLINT + 527, -4, 529, -4, 531, -4, + 533, -4, 535, -4, 537, -4, + 539, -4, 541, -4, // NOLINT + 543, -4, 547, -4, 549, -4, + 551, -4, 553, -4, 555, -4, + 557, -4, 559, -4, // NOLINT + 561, -4, 563, -4, 572, -4, + 1073742399, 43260, 576, 43260, 578, -4, + 583, -4, 585, -4, // NOLINT + 587, -4, 589, -4, 591, -4, + 592, 43132, 593, 43120, 594, 43128, + 595, -840, 596, -824, // NOLINT + 1073742422, -820, 599, -820, 601, -808, + 603, -812, 604, 169276, 608, -820, + 609, 169260, 611, -828, // NOLINT + 613, 169120, 614, 169232, 616, -836, + 617, -844, 619, 42972, 620, 169220, + 623, -844, 625, 42996, // NOLINT + 626, -852, 629, -856, 637, 42908, + 640, -872, 643, -872, 647, 169128, + 648, -872, 649, -276, // NOLINT + 1073742474, -868, 651, -868, 652, -284, + 658, -876, 670, 169032, 837, 336, + 881, -4, 883, -4, // NOLINT + 887, -4, 1073742715, 520, 893, 520, + 940, -152, 1073742765, -148, 943, -148, + 1073742769, -128, 961, -128, // NOLINT + 962, -124, 1073742787, -128, 971, -128, + 972, -256, 1073742797, -252, 974, -252, + 976, -248, 977, -228, // NOLINT + 981, -188, 982, -216, 983, -32, + 985, -4, 987, -4, 989, -4, + 991, -4, 993, -4, // NOLINT + 995, -4, 997, -4, 999, -4, + 1001, -4, 1003, -4, 1005, -4, + 1007, -4, 1008, -344, // NOLINT + 1009, -320, 1010, 28, 1011, -464, + 1013, -384, 1016, -4, 1019, -4, + 1073742896, -128, 1103, -128, // NOLINT + 1073742928, -320, 1119, -320, 1121, -4, + 1123, -4, 1125, -4, 1127, -4, + 1129, -4, 1131, -4, // NOLINT + 1133, -4, 1135, -4, 1137, -4, + 1139, -4, 1141, -4, 1143, -4, + 1145, -4, 1147, -4, // NOLINT + 1149, -4, 1151, -4, 1153, -4, + 1163, -4, 1165, -4, 1167, -4, + 1169, -4, 1171, -4, // NOLINT + 1173, -4, 1175, -4, 1177, -4, + 1179, -4, 1181, -4, 1183, -4, + 1185, -4, 1187, -4, // NOLINT + 1189, -4, 1191, -4, 1193, -4, + 1195, -4, 1197, -4, 1199, -4, + 1201, -4, 1203, -4, // NOLINT + 1205, -4, 1207, -4, 1209, -4, + 1211, -4, 1213, -4, 1215, -4, + 1218, -4, 1220, -4, // NOLINT + 1222, -4, 1224, -4, 1226, -4, + 1228, -4, 1230, -4, 1231, -60, + 1233, -4, 1235, -4, // NOLINT + 1237, -4, 1239, -4, 1241, -4, + 1243, -4, 1245, -4, 1247, -4, + 1249, -4, 1251, -4, // NOLINT + 1253, -4, 1255, -4, 1257, -4, + 1259, -4, 1261, -4, 1263, -4, + 1265, -4, 1267, -4, // NOLINT + 1269, -4, 1271, -4, 1273, -4, + 1275, -4, 1277, -4, 1279, -4, + 1281, -4, 1283, -4, // NOLINT + 1285, -4, 1287, -4, 1289, -4, + 1291, -4, 1293, -4, 1295, -4, + 1297, -4, 1299, -4, // NOLINT + 1301, -4, 1303, -4, 1305, -4, + 1307, -4, 1309, -4, 1311, -4, + 1313, -4, 1315, -4, // NOLINT + 1317, -4, 1319, -4, 1321, -4, + 1323, -4, 1325, -4, 1327, -4, + 1073743201, -192, 1414, -192, // NOLINT + 7545, 141328, 7549, 15256, 7681, -4, + 7683, -4, 7685, -4, 7687, -4, + 7689, -4, 7691, -4, // NOLINT + 7693, -4, 7695, -4, 7697, -4, + 7699, -4, 7701, -4, 7703, -4, + 7705, -4, 7707, -4, // NOLINT + 7709, -4, 7711, -4, 7713, -4, + 7715, -4, 7717, -4, 7719, -4, + 7721, -4, 7723, -4, // NOLINT + 7725, -4, 7727, -4, 7729, -4, + 7731, -4, 7733, -4, 7735, -4, + 7737, -4, 7739, -4, // NOLINT + 7741, -4, 7743, -4, 7745, -4, + 7747, -4, 7749, -4, 7751, -4, + 7753, -4, 7755, -4, // NOLINT + 7757, -4, 7759, -4, 7761, -4, + 7763, -4, 7765, -4, 7767, -4, + 7769, -4, 7771, -4, // NOLINT + 7773, -4, 7775, -4, 7777, -4, + 7779, -4, 7781, -4, 7783, -4, + 7785, -4, 7787, -4, // NOLINT + 7789, -4, 7791, -4, 7793, -4, + 7795, -4, 7797, -4, 7799, -4, + 7801, -4, 7803, -4, // NOLINT + 7805, -4, 7807, -4, 7809, -4, + 7811, -4, 7813, -4, 7815, -4, + 7817, -4, 7819, -4, // NOLINT + 7821, -4, 7823, -4, 7825, -4, + 7827, -4, 7829, -4, 7835, -236, + 7841, -4, 7843, -4, // NOLINT + 7845, -4, 7847, -4, 7849, -4, + 7851, -4, 7853, -4, 7855, -4, + 7857, -4, 7859, -4, // NOLINT + 7861, -4, 7863, -4, 7865, -4, + 7867, -4, 7869, -4, 7871, -4, + 7873, -4, 7875, -4, // NOLINT + 7877, -4, 7879, -4, 7881, -4, + 7883, -4, 7885, -4, 7887, -4, + 7889, -4, 7891, -4, // NOLINT + 7893, -4, 7895, -4, 7897, -4, + 7899, -4, 7901, -4, 7903, -4, + 7905, -4, 7907, -4, // NOLINT + 7909, -4, 7911, -4, 7913, -4, + 7915, -4, 7917, -4, 7919, -4, + 7921, -4, 7923, -4, // NOLINT + 7925, -4, 7927, -4, 7929, -4, + 7931, -4, 7933, -4, 7935, -4, + 1073749760, 32, 7943, 32, // NOLINT + 1073749776, 32, 7957, 32, 1073749792, 32, + 7975, 32, 1073749808, 32, 7991, 32, + 1073749824, 32, 8005, 32, // NOLINT + 8017, 32, 8019, 32, 8021, 32, + 8023, 32, 1073749856, 32, 8039, 32, + 1073749872, 296, 8049, 296, // NOLINT + 1073749874, 344, 8053, 344, 1073749878, 400, + 8055, 400, 1073749880, 512, 8057, 512, + 1073749882, 448, 8059, 448, // NOLINT + 1073749884, 504, 8061, 504, 1073749936, 32, + 8113, 32, 8126, -28820, 1073749968, 32, + 8145, 32, 1073749984, 32, // NOLINT + 8161, 32, 8165, 28}; // NOLINT +static const uint16_t kEcma262CanonicalizeMultiStrings0Size = 1; // NOLINT +static const MultiCharacterSpecialCase<1> kEcma262CanonicalizeMultiStrings1[1] = + { // NOLINT + {{kSentinel}}}; // NOLINT +static const uint16_t kEcma262CanonicalizeTable1Size = 73; // NOLINT +static const int32_t kEcma262CanonicalizeTable1[146] = { + 334, -112, 1073742192, -64, 383, -64, + 388, -4, 1073743056, -104, 1257, -104, + 1073744944, -192, 3166, -192, // NOLINT + 3169, -4, 3173, -43180, 3174, -43168, + 3176, -4, 3178, -4, 3180, -4, + 3187, -4, 3190, -4, // NOLINT + 3201, -4, 3203, -4, 3205, -4, + 3207, -4, 3209, -4, 3211, -4, + 3213, -4, 3215, -4, // NOLINT + 3217, -4, 3219, -4, 3221, -4, + 3223, -4, 3225, -4, 3227, -4, + 3229, -4, 3231, -4, // NOLINT + 3233, -4, 3235, -4, 3237, -4, + 3239, -4, 3241, -4, 3243, -4, + 3245, -4, 3247, -4, // NOLINT + 3249, -4, 3251, -4, 3253, -4, + 3255, -4, 3257, -4, 3259, -4, + 3261, -4, 3263, -4, // NOLINT + 3265, -4, 3267, -4, 3269, -4, + 3271, -4, 3273, -4, 3275, -4, + 3277, -4, 3279, -4, // NOLINT + 3281, -4, 3283, -4, 3285, -4, + 3287, -4, 3289, -4, 3291, -4, + 3293, -4, 3295, -4, // NOLINT + 3297, -4, 3299, -4, 3308, -4, + 3310, -4, 3315, -4, 1073745152, -29056, + 3365, -29056, 3367, -29056, // NOLINT + 3373, -29056}; // NOLINT +static const uint16_t kEcma262CanonicalizeMultiStrings1Size = 1; // NOLINT +static const MultiCharacterSpecialCase<1> kEcma262CanonicalizeMultiStrings5[1] = + { // NOLINT + {{kSentinel}}}; // NOLINT +static const uint16_t kEcma262CanonicalizeTable5Size = 95; // NOLINT +static const int32_t kEcma262CanonicalizeTable5[190] = { + 1601, -4, 1603, -4, 1605, -4, 1607, -4, + 1609, -4, 1611, -4, 1613, -4, 1615, -4, // NOLINT + 1617, -4, 1619, -4, 1621, -4, 1623, -4, + 1625, -4, 1627, -4, 1629, -4, 1631, -4, // NOLINT + 1633, -4, 1635, -4, 1637, -4, 1639, -4, + 1641, -4, 1643, -4, 1645, -4, 1665, -4, // NOLINT + 1667, -4, 1669, -4, 1671, -4, 1673, -4, + 1675, -4, 1677, -4, 1679, -4, 1681, -4, // NOLINT + 1683, -4, 1685, -4, 1687, -4, 1689, -4, + 1691, -4, 1827, -4, 1829, -4, 1831, -4, // NOLINT + 1833, -4, 1835, -4, 1837, -4, 1839, -4, + 1843, -4, 1845, -4, 1847, -4, 1849, -4, // NOLINT + 1851, -4, 1853, -4, 1855, -4, 1857, -4, + 1859, -4, 1861, -4, 1863, -4, 1865, -4, // NOLINT + 1867, -4, 1869, -4, 1871, -4, 1873, -4, + 1875, -4, 1877, -4, 1879, -4, 1881, -4, // NOLINT + 1883, -4, 1885, -4, 1887, -4, 1889, -4, + 1891, -4, 1893, -4, 1895, -4, 1897, -4, // NOLINT + 1899, -4, 1901, -4, 1903, -4, 1914, -4, + 1916, -4, 1919, -4, 1921, -4, 1923, -4, // NOLINT + 1925, -4, 1927, -4, 1932, -4, 1937, -4, + 1939, -4, 1943, -4, 1945, -4, 1947, -4, // NOLINT + 1949, -4, 1951, -4, 1953, -4, 1955, -4, + 1957, -4, 1959, -4, 1961, -4}; // NOLINT +static const uint16_t kEcma262CanonicalizeMultiStrings5Size = 1; // NOLINT +static const MultiCharacterSpecialCase<1> kEcma262CanonicalizeMultiStrings7[1] = + { // NOLINT + {{kSentinel}}}; // NOLINT +static const uint16_t kEcma262CanonicalizeTable7Size = 2; // NOLINT +static const int32_t kEcma262CanonicalizeTable7[4] = {1073749825, -128, 8026, + -128}; // NOLINT +static const uint16_t kEcma262CanonicalizeMultiStrings7Size = 1; // NOLINT +int Ecma262Canonicalize::Convert(uchar c, uchar n, uchar* result, + bool* allow_caching_ptr) { + int chunk_index = c >> 13; + switch (chunk_index) { + case 0: + return LookupMapping( + kEcma262CanonicalizeTable0, kEcma262CanonicalizeTable0Size, + kEcma262CanonicalizeMultiStrings0, c, n, result, allow_caching_ptr); + case 1: + return LookupMapping( + kEcma262CanonicalizeTable1, kEcma262CanonicalizeTable1Size, + kEcma262CanonicalizeMultiStrings1, c, n, result, allow_caching_ptr); + case 5: + return LookupMapping( + kEcma262CanonicalizeTable5, kEcma262CanonicalizeTable5Size, + kEcma262CanonicalizeMultiStrings5, c, n, result, allow_caching_ptr); + case 7: + return LookupMapping( + kEcma262CanonicalizeTable7, kEcma262CanonicalizeTable7Size, + kEcma262CanonicalizeMultiStrings7, c, n, result, allow_caching_ptr); + default: + return 0; + } +} + +static const MultiCharacterSpecialCase<4> + kEcma262UnCanonicalizeMultiStrings0[507] = { // NOLINT + {{65, 97, kSentinel}}, + {{90, 122, kSentinel}}, + {{181, 924, 956, kSentinel}}, + {{192, 224, kSentinel}}, // NOLINT + {{214, 246, kSentinel}}, + {{216, 248, kSentinel}}, + {{222, 254, kSentinel}}, + {{255, 376, kSentinel}}, // NOLINT + {{256, 257, kSentinel}}, + {{258, 259, kSentinel}}, + {{260, 261, kSentinel}}, + {{262, 263, kSentinel}}, // NOLINT + {{264, 265, kSentinel}}, + {{266, 267, kSentinel}}, + {{268, 269, kSentinel}}, + {{270, 271, kSentinel}}, // NOLINT + {{272, 273, kSentinel}}, + {{274, 275, kSentinel}}, + {{276, 277, kSentinel}}, + {{278, 279, kSentinel}}, // NOLINT + {{280, 281, kSentinel}}, + {{282, 283, kSentinel}}, + {{284, 285, kSentinel}}, + {{286, 287, kSentinel}}, // NOLINT + {{288, 289, kSentinel}}, + {{290, 291, kSentinel}}, + {{292, 293, kSentinel}}, + {{294, 295, kSentinel}}, // NOLINT + {{296, 297, kSentinel}}, + {{298, 299, kSentinel}}, + {{300, 301, kSentinel}}, + {{302, 303, kSentinel}}, // NOLINT + {{306, 307, kSentinel}}, + {{308, 309, kSentinel}}, + {{310, 311, kSentinel}}, + {{313, 314, kSentinel}}, // NOLINT + {{315, 316, kSentinel}}, + {{317, 318, kSentinel}}, + {{319, 320, kSentinel}}, + {{321, 322, kSentinel}}, // NOLINT + {{323, 324, kSentinel}}, + {{325, 326, kSentinel}}, + {{327, 328, kSentinel}}, + {{330, 331, kSentinel}}, // NOLINT + {{332, 333, kSentinel}}, + {{334, 335, kSentinel}}, + {{336, 337, kSentinel}}, + {{338, 339, kSentinel}}, // NOLINT + {{340, 341, kSentinel}}, + {{342, 343, kSentinel}}, + {{344, 345, kSentinel}}, + {{346, 347, kSentinel}}, // NOLINT + {{348, 349, kSentinel}}, + {{350, 351, kSentinel}}, + {{352, 353, kSentinel}}, + {{354, 355, kSentinel}}, // NOLINT + {{356, 357, kSentinel}}, + {{358, 359, kSentinel}}, + {{360, 361, kSentinel}}, + {{362, 363, kSentinel}}, // NOLINT + {{364, 365, kSentinel}}, + {{366, 367, kSentinel}}, + {{368, 369, kSentinel}}, + {{370, 371, kSentinel}}, // NOLINT + {{372, 373, kSentinel}}, + {{374, 375, kSentinel}}, + {{377, 378, kSentinel}}, + {{379, 380, kSentinel}}, // NOLINT + {{381, 382, kSentinel}}, + {{384, 579, kSentinel}}, + {{385, 595, kSentinel}}, + {{386, 387, kSentinel}}, // NOLINT + {{388, 389, kSentinel}}, + {{390, 596, kSentinel}}, + {{391, 392, kSentinel}}, + {{393, 598, kSentinel}}, // NOLINT + {{394, 599, kSentinel}}, + {{395, 396, kSentinel}}, + {{398, 477, kSentinel}}, + {{399, 601, kSentinel}}, // NOLINT + {{400, 603, kSentinel}}, + {{401, 402, kSentinel}}, + {{403, 608, kSentinel}}, + {{404, 611, kSentinel}}, // NOLINT + {{405, 502, kSentinel}}, + {{406, 617, kSentinel}}, + {{407, 616, kSentinel}}, + {{408, 409, kSentinel}}, // NOLINT + {{410, 573, kSentinel}}, + {{412, 623, kSentinel}}, + {{413, 626, kSentinel}}, + {{414, 544, kSentinel}}, // NOLINT + {{415, 629, kSentinel}}, + {{416, 417, kSentinel}}, + {{418, 419, kSentinel}}, + {{420, 421, kSentinel}}, // NOLINT + {{422, 640, kSentinel}}, + {{423, 424, kSentinel}}, + {{425, 643, kSentinel}}, + {{428, 429, kSentinel}}, // NOLINT + {{430, 648, kSentinel}}, + {{431, 432, kSentinel}}, + {{433, 650, kSentinel}}, + {{434, 651, kSentinel}}, // NOLINT + {{435, 436, kSentinel}}, + {{437, 438, kSentinel}}, + {{439, 658, kSentinel}}, + {{440, 441, kSentinel}}, // NOLINT + {{444, 445, kSentinel}}, + {{447, 503, kSentinel}}, + {{452, 453, 454, kSentinel}}, + {{455, 456, 457, kSentinel}}, // NOLINT + {{458, 459, 460, kSentinel}}, + {{461, 462, kSentinel}}, + {{463, 464, kSentinel}}, + {{465, 466, kSentinel}}, // NOLINT + {{467, 468, kSentinel}}, + {{469, 470, kSentinel}}, + {{471, 472, kSentinel}}, + {{473, 474, kSentinel}}, // NOLINT + {{475, 476, kSentinel}}, + {{478, 479, kSentinel}}, + {{480, 481, kSentinel}}, + {{482, 483, kSentinel}}, // NOLINT + {{484, 485, kSentinel}}, + {{486, 487, kSentinel}}, + {{488, 489, kSentinel}}, + {{490, 491, kSentinel}}, // NOLINT + {{492, 493, kSentinel}}, + {{494, 495, kSentinel}}, + {{497, 498, 499, kSentinel}}, + {{500, 501, kSentinel}}, // NOLINT + {{504, 505, kSentinel}}, + {{506, 507, kSentinel}}, + {{508, 509, kSentinel}}, + {{510, 511, kSentinel}}, // NOLINT + {{512, 513, kSentinel}}, + {{514, 515, kSentinel}}, + {{516, 517, kSentinel}}, + {{518, 519, kSentinel}}, // NOLINT + {{520, 521, kSentinel}}, + {{522, 523, kSentinel}}, + {{524, 525, kSentinel}}, + {{526, 527, kSentinel}}, // NOLINT + {{528, 529, kSentinel}}, + {{530, 531, kSentinel}}, + {{532, 533, kSentinel}}, + {{534, 535, kSentinel}}, // NOLINT + {{536, 537, kSentinel}}, + {{538, 539, kSentinel}}, + {{540, 541, kSentinel}}, + {{542, 543, kSentinel}}, // NOLINT + {{546, 547, kSentinel}}, + {{548, 549, kSentinel}}, + {{550, 551, kSentinel}}, + {{552, 553, kSentinel}}, // NOLINT + {{554, 555, kSentinel}}, + {{556, 557, kSentinel}}, + {{558, 559, kSentinel}}, + {{560, 561, kSentinel}}, // NOLINT + {{562, 563, kSentinel}}, + {{570, 11365, kSentinel}}, + {{571, 572, kSentinel}}, + {{574, 11366, kSentinel}}, // NOLINT + {{575, 11390, kSentinel}}, + {{576, 11391, kSentinel}}, + {{577, 578, kSentinel}}, + {{580, 649, kSentinel}}, // NOLINT + {{581, 652, kSentinel}}, + {{582, 583, kSentinel}}, + {{584, 585, kSentinel}}, + {{586, 587, kSentinel}}, // NOLINT + {{588, 589, kSentinel}}, + {{590, 591, kSentinel}}, + {{592, 11375, kSentinel}}, + {{593, 11373, kSentinel}}, // NOLINT + {{594, 11376, kSentinel}}, + {{604, 42923, kSentinel}}, + {{609, 42924, kSentinel}}, + {{613, 42893, kSentinel}}, // NOLINT + {{614, 42922, kSentinel}}, + {{619, 11362, kSentinel}}, + {{620, 42925, kSentinel}}, + {{625, 11374, kSentinel}}, // NOLINT + {{637, 11364, kSentinel}}, + {{647, 42929, kSentinel}}, + {{670, 42928, kSentinel}}, + {{837, 921, 953, 8126}}, // NOLINT + {{880, 881, kSentinel}}, + {{882, 883, kSentinel}}, + {{886, 887, kSentinel}}, + {{891, 1021, kSentinel}}, // NOLINT + {{893, 1023, kSentinel}}, + {{895, 1011, kSentinel}}, + {{902, 940, kSentinel}}, + {{904, 941, kSentinel}}, // NOLINT + {{906, 943, kSentinel}}, + {{908, 972, kSentinel}}, + {{910, 973, kSentinel}}, + {{911, 974, kSentinel}}, // NOLINT + {{913, 945, kSentinel}}, + {{914, 946, 976, kSentinel}}, + {{915, 947, kSentinel}}, + {{916, 948, kSentinel}}, // NOLINT + {{917, 949, 1013, kSentinel}}, + {{918, 950, kSentinel}}, + {{919, 951, kSentinel}}, + {{920, 952, 977, kSentinel}}, // NOLINT + {{922, 954, 1008, kSentinel}}, + {{923, 955, kSentinel}}, + {{925, 957, kSentinel}}, + {{927, 959, kSentinel}}, // NOLINT + {{928, 960, 982, kSentinel}}, + {{929, 961, 1009, kSentinel}}, + {{931, 962, 963, kSentinel}}, + {{932, 964, kSentinel}}, // NOLINT + {{933, 965, kSentinel}}, + {{934, 966, 981, kSentinel}}, + {{935, 967, kSentinel}}, + {{939, 971, kSentinel}}, // NOLINT + {{975, 983, kSentinel}}, + {{984, 985, kSentinel}}, + {{986, 987, kSentinel}}, + {{988, 989, kSentinel}}, // NOLINT + {{990, 991, kSentinel}}, + {{992, 993, kSentinel}}, + {{994, 995, kSentinel}}, + {{996, 997, kSentinel}}, // NOLINT + {{998, 999, kSentinel}}, + {{1000, 1001, kSentinel}}, + {{1002, 1003, kSentinel}}, + {{1004, 1005, kSentinel}}, // NOLINT + {{1006, 1007, kSentinel}}, + {{1010, 1017, kSentinel}}, + {{1015, 1016, kSentinel}}, + {{1018, 1019, kSentinel}}, // NOLINT + {{1024, 1104, kSentinel}}, + {{1039, 1119, kSentinel}}, + {{1040, 1072, kSentinel}}, + {{1071, 1103, kSentinel}}, // NOLINT + {{1120, 1121, kSentinel}}, + {{1122, 1123, kSentinel}}, + {{1124, 1125, kSentinel}}, + {{1126, 1127, kSentinel}}, // NOLINT + {{1128, 1129, kSentinel}}, + {{1130, 1131, kSentinel}}, + {{1132, 1133, kSentinel}}, + {{1134, 1135, kSentinel}}, // NOLINT + {{1136, 1137, kSentinel}}, + {{1138, 1139, kSentinel}}, + {{1140, 1141, kSentinel}}, + {{1142, 1143, kSentinel}}, // NOLINT + {{1144, 1145, kSentinel}}, + {{1146, 1147, kSentinel}}, + {{1148, 1149, kSentinel}}, + {{1150, 1151, kSentinel}}, // NOLINT + {{1152, 1153, kSentinel}}, + {{1162, 1163, kSentinel}}, + {{1164, 1165, kSentinel}}, + {{1166, 1167, kSentinel}}, // NOLINT + {{1168, 1169, kSentinel}}, + {{1170, 1171, kSentinel}}, + {{1172, 1173, kSentinel}}, + {{1174, 1175, kSentinel}}, // NOLINT + {{1176, 1177, kSentinel}}, + {{1178, 1179, kSentinel}}, + {{1180, 1181, kSentinel}}, + {{1182, 1183, kSentinel}}, // NOLINT + {{1184, 1185, kSentinel}}, + {{1186, 1187, kSentinel}}, + {{1188, 1189, kSentinel}}, + {{1190, 1191, kSentinel}}, // NOLINT + {{1192, 1193, kSentinel}}, + {{1194, 1195, kSentinel}}, + {{1196, 1197, kSentinel}}, + {{1198, 1199, kSentinel}}, // NOLINT + {{1200, 1201, kSentinel}}, + {{1202, 1203, kSentinel}}, + {{1204, 1205, kSentinel}}, + {{1206, 1207, kSentinel}}, // NOLINT + {{1208, 1209, kSentinel}}, + {{1210, 1211, kSentinel}}, + {{1212, 1213, kSentinel}}, + {{1214, 1215, kSentinel}}, // NOLINT + {{1216, 1231, kSentinel}}, + {{1217, 1218, kSentinel}}, + {{1219, 1220, kSentinel}}, + {{1221, 1222, kSentinel}}, // NOLINT + {{1223, 1224, kSentinel}}, + {{1225, 1226, kSentinel}}, + {{1227, 1228, kSentinel}}, + {{1229, 1230, kSentinel}}, // NOLINT + {{1232, 1233, kSentinel}}, + {{1234, 1235, kSentinel}}, + {{1236, 1237, kSentinel}}, + {{1238, 1239, kSentinel}}, // NOLINT + {{1240, 1241, kSentinel}}, + {{1242, 1243, kSentinel}}, + {{1244, 1245, kSentinel}}, + {{1246, 1247, kSentinel}}, // NOLINT + {{1248, 1249, kSentinel}}, + {{1250, 1251, kSentinel}}, + {{1252, 1253, kSentinel}}, + {{1254, 1255, kSentinel}}, // NOLINT + {{1256, 1257, kSentinel}}, + {{1258, 1259, kSentinel}}, + {{1260, 1261, kSentinel}}, + {{1262, 1263, kSentinel}}, // NOLINT + {{1264, 1265, kSentinel}}, + {{1266, 1267, kSentinel}}, + {{1268, 1269, kSentinel}}, + {{1270, 1271, kSentinel}}, // NOLINT + {{1272, 1273, kSentinel}}, + {{1274, 1275, kSentinel}}, + {{1276, 1277, kSentinel}}, + {{1278, 1279, kSentinel}}, // NOLINT + {{1280, 1281, kSentinel}}, + {{1282, 1283, kSentinel}}, + {{1284, 1285, kSentinel}}, + {{1286, 1287, kSentinel}}, // NOLINT + {{1288, 1289, kSentinel}}, + {{1290, 1291, kSentinel}}, + {{1292, 1293, kSentinel}}, + {{1294, 1295, kSentinel}}, // NOLINT + {{1296, 1297, kSentinel}}, + {{1298, 1299, kSentinel}}, + {{1300, 1301, kSentinel}}, + {{1302, 1303, kSentinel}}, // NOLINT + {{1304, 1305, kSentinel}}, + {{1306, 1307, kSentinel}}, + {{1308, 1309, kSentinel}}, + {{1310, 1311, kSentinel}}, // NOLINT + {{1312, 1313, kSentinel}}, + {{1314, 1315, kSentinel}}, + {{1316, 1317, kSentinel}}, + {{1318, 1319, kSentinel}}, // NOLINT + {{1320, 1321, kSentinel}}, + {{1322, 1323, kSentinel}}, + {{1324, 1325, kSentinel}}, + {{1326, 1327, kSentinel}}, // NOLINT + {{1329, 1377, kSentinel}}, + {{1366, 1414, kSentinel}}, + {{4256, 11520, kSentinel}}, + {{4293, 11557, kSentinel}}, // NOLINT + {{4295, 11559, kSentinel}}, + {{4301, 11565, kSentinel}}, + {{7545, 42877, kSentinel}}, + {{7549, 11363, kSentinel}}, // NOLINT + {{7680, 7681, kSentinel}}, + {{7682, 7683, kSentinel}}, + {{7684, 7685, kSentinel}}, + {{7686, 7687, kSentinel}}, // NOLINT + {{7688, 7689, kSentinel}}, + {{7690, 7691, kSentinel}}, + {{7692, 7693, kSentinel}}, + {{7694, 7695, kSentinel}}, // NOLINT + {{7696, 7697, kSentinel}}, + {{7698, 7699, kSentinel}}, + {{7700, 7701, kSentinel}}, + {{7702, 7703, kSentinel}}, // NOLINT + {{7704, 7705, kSentinel}}, + {{7706, 7707, kSentinel}}, + {{7708, 7709, kSentinel}}, + {{7710, 7711, kSentinel}}, // NOLINT + {{7712, 7713, kSentinel}}, + {{7714, 7715, kSentinel}}, + {{7716, 7717, kSentinel}}, + {{7718, 7719, kSentinel}}, // NOLINT + {{7720, 7721, kSentinel}}, + {{7722, 7723, kSentinel}}, + {{7724, 7725, kSentinel}}, + {{7726, 7727, kSentinel}}, // NOLINT + {{7728, 7729, kSentinel}}, + {{7730, 7731, kSentinel}}, + {{7732, 7733, kSentinel}}, + {{7734, 7735, kSentinel}}, // NOLINT + {{7736, 7737, kSentinel}}, + {{7738, 7739, kSentinel}}, + {{7740, 7741, kSentinel}}, + {{7742, 7743, kSentinel}}, // NOLINT + {{7744, 7745, kSentinel}}, + {{7746, 7747, kSentinel}}, + {{7748, 7749, kSentinel}}, + {{7750, 7751, kSentinel}}, // NOLINT + {{7752, 7753, kSentinel}}, + {{7754, 7755, kSentinel}}, + {{7756, 7757, kSentinel}}, + {{7758, 7759, kSentinel}}, // NOLINT + {{7760, 7761, kSentinel}}, + {{7762, 7763, kSentinel}}, + {{7764, 7765, kSentinel}}, + {{7766, 7767, kSentinel}}, // NOLINT + {{7768, 7769, kSentinel}}, + {{7770, 7771, kSentinel}}, + {{7772, 7773, kSentinel}}, + {{7774, 7775, kSentinel}}, // NOLINT + {{7776, 7777, 7835, kSentinel}}, + {{7778, 7779, kSentinel}}, + {{7780, 7781, kSentinel}}, + {{7782, 7783, kSentinel}}, // NOLINT + {{7784, 7785, kSentinel}}, + {{7786, 7787, kSentinel}}, + {{7788, 7789, kSentinel}}, + {{7790, 7791, kSentinel}}, // NOLINT + {{7792, 7793, kSentinel}}, + {{7794, 7795, kSentinel}}, + {{7796, 7797, kSentinel}}, + {{7798, 7799, kSentinel}}, // NOLINT + {{7800, 7801, kSentinel}}, + {{7802, 7803, kSentinel}}, + {{7804, 7805, kSentinel}}, + {{7806, 7807, kSentinel}}, // NOLINT + {{7808, 7809, kSentinel}}, + {{7810, 7811, kSentinel}}, + {{7812, 7813, kSentinel}}, + {{7814, 7815, kSentinel}}, // NOLINT + {{7816, 7817, kSentinel}}, + {{7818, 7819, kSentinel}}, + {{7820, 7821, kSentinel}}, + {{7822, 7823, kSentinel}}, // NOLINT + {{7824, 7825, kSentinel}}, + {{7826, 7827, kSentinel}}, + {{7828, 7829, kSentinel}}, + {{7840, 7841, kSentinel}}, // NOLINT + {{7842, 7843, kSentinel}}, + {{7844, 7845, kSentinel}}, + {{7846, 7847, kSentinel}}, + {{7848, 7849, kSentinel}}, // NOLINT + {{7850, 7851, kSentinel}}, + {{7852, 7853, kSentinel}}, + {{7854, 7855, kSentinel}}, + {{7856, 7857, kSentinel}}, // NOLINT + {{7858, 7859, kSentinel}}, + {{7860, 7861, kSentinel}}, + {{7862, 7863, kSentinel}}, + {{7864, 7865, kSentinel}}, // NOLINT + {{7866, 7867, kSentinel}}, + {{7868, 7869, kSentinel}}, + {{7870, 7871, kSentinel}}, + {{7872, 7873, kSentinel}}, // NOLINT + {{7874, 7875, kSentinel}}, + {{7876, 7877, kSentinel}}, + {{7878, 7879, kSentinel}}, + {{7880, 7881, kSentinel}}, // NOLINT + {{7882, 7883, kSentinel}}, + {{7884, 7885, kSentinel}}, + {{7886, 7887, kSentinel}}, + {{7888, 7889, kSentinel}}, // NOLINT + {{7890, 7891, kSentinel}}, + {{7892, 7893, kSentinel}}, + {{7894, 7895, kSentinel}}, + {{7896, 7897, kSentinel}}, // NOLINT + {{7898, 7899, kSentinel}}, + {{7900, 7901, kSentinel}}, + {{7902, 7903, kSentinel}}, + {{7904, 7905, kSentinel}}, // NOLINT + {{7906, 7907, kSentinel}}, + {{7908, 7909, kSentinel}}, + {{7910, 7911, kSentinel}}, + {{7912, 7913, kSentinel}}, // NOLINT + {{7914, 7915, kSentinel}}, + {{7916, 7917, kSentinel}}, + {{7918, 7919, kSentinel}}, + {{7920, 7921, kSentinel}}, // NOLINT + {{7922, 7923, kSentinel}}, + {{7924, 7925, kSentinel}}, + {{7926, 7927, kSentinel}}, + {{7928, 7929, kSentinel}}, // NOLINT + {{7930, 7931, kSentinel}}, + {{7932, 7933, kSentinel}}, + {{7934, 7935, kSentinel}}, + {{7936, 7944, kSentinel}}, // NOLINT + {{7943, 7951, kSentinel}}, + {{7952, 7960, kSentinel}}, + {{7957, 7965, kSentinel}}, + {{7968, 7976, kSentinel}}, // NOLINT + {{7975, 7983, kSentinel}}, + {{7984, 7992, kSentinel}}, + {{7991, 7999, kSentinel}}, + {{8000, 8008, kSentinel}}, // NOLINT + {{8005, 8013, kSentinel}}, + {{8017, 8025, kSentinel}}, + {{8019, 8027, kSentinel}}, + {{8021, 8029, kSentinel}}, // NOLINT + {{8023, 8031, kSentinel}}, + {{8032, 8040, kSentinel}}, + {{8039, 8047, kSentinel}}, + {{8048, 8122, kSentinel}}, // NOLINT + {{8049, 8123, kSentinel}}, + {{8050, 8136, kSentinel}}, + {{8053, 8139, kSentinel}}, + {{8054, 8154, kSentinel}}, // NOLINT + {{8055, 8155, kSentinel}}, + {{8056, 8184, kSentinel}}, + {{8057, 8185, kSentinel}}, + {{8058, 8170, kSentinel}}, // NOLINT + {{8059, 8171, kSentinel}}, + {{8060, 8186, kSentinel}}, + {{8061, 8187, kSentinel}}, + {{8112, 8120, kSentinel}}, // NOLINT + {{8113, 8121, kSentinel}}, + {{8144, 8152, kSentinel}}, + {{8145, 8153, kSentinel}}, + {{8160, 8168, kSentinel}}, // NOLINT + {{8161, 8169, kSentinel}}, + {{8165, 8172, kSentinel}}, + {{kSentinel}}}; // NOLINT +static const uint16_t kEcma262UnCanonicalizeTable0Size = 1005; // NOLINT +static const int32_t kEcma262UnCanonicalizeTable0[2010] = { + 1073741889, 1, 90, 5, 1073741921, 1, + 122, 5, 181, 9, 1073742016, 13, + 214, 17, 1073742040, 21, // NOLINT + 222, 25, 1073742048, 13, 246, 17, + 1073742072, 21, 254, 25, 255, 29, + 256, 33, 257, 33, // NOLINT + 258, 37, 259, 37, 260, 41, + 261, 41, 262, 45, 263, 45, + 264, 49, 265, 49, // NOLINT + 266, 53, 267, 53, 268, 57, + 269, 57, 270, 61, 271, 61, + 272, 65, 273, 65, // NOLINT + 274, 69, 275, 69, 276, 73, + 277, 73, 278, 77, 279, 77, + 280, 81, 281, 81, // NOLINT + 282, 85, 283, 85, 284, 89, + 285, 89, 286, 93, 287, 93, + 288, 97, 289, 97, // NOLINT + 290, 101, 291, 101, 292, 105, + 293, 105, 294, 109, 295, 109, + 296, 113, 297, 113, // NOLINT + 298, 117, 299, 117, 300, 121, + 301, 121, 302, 125, 303, 125, + 306, 129, 307, 129, // NOLINT + 308, 133, 309, 133, 310, 137, + 311, 137, 313, 141, 314, 141, + 315, 145, 316, 145, // NOLINT + 317, 149, 318, 149, 319, 153, + 320, 153, 321, 157, 322, 157, + 323, 161, 324, 161, // NOLINT + 325, 165, 326, 165, 327, 169, + 328, 169, 330, 173, 331, 173, + 332, 177, 333, 177, // NOLINT + 334, 181, 335, 181, 336, 185, + 337, 185, 338, 189, 339, 189, + 340, 193, 341, 193, // NOLINT + 342, 197, 343, 197, 344, 201, + 345, 201, 346, 205, 347, 205, + 348, 209, 349, 209, // NOLINT + 350, 213, 351, 213, 352, 217, + 353, 217, 354, 221, 355, 221, + 356, 225, 357, 225, // NOLINT + 358, 229, 359, 229, 360, 233, + 361, 233, 362, 237, 363, 237, + 364, 241, 365, 241, // NOLINT + 366, 245, 367, 245, 368, 249, + 369, 249, 370, 253, 371, 253, + 372, 257, 373, 257, // NOLINT + 374, 261, 375, 261, 376, 29, + 377, 265, 378, 265, 379, 269, + 380, 269, 381, 273, // NOLINT + 382, 273, 384, 277, 385, 281, + 386, 285, 387, 285, 388, 289, + 389, 289, 390, 293, // NOLINT + 391, 297, 392, 297, 1073742217, 301, + 394, 305, 395, 309, 396, 309, + 398, 313, 399, 317, // NOLINT + 400, 321, 401, 325, 402, 325, + 403, 329, 404, 333, 405, 337, + 406, 341, 407, 345, // NOLINT + 408, 349, 409, 349, 410, 353, + 412, 357, 413, 361, 414, 365, + 415, 369, 416, 373, // NOLINT + 417, 373, 418, 377, 419, 377, + 420, 381, 421, 381, 422, 385, + 423, 389, 424, 389, // NOLINT + 425, 393, 428, 397, 429, 397, + 430, 401, 431, 405, 432, 405, + 1073742257, 409, 434, 413, // NOLINT + 435, 417, 436, 417, 437, 421, + 438, 421, 439, 425, 440, 429, + 441, 429, 444, 433, // NOLINT + 445, 433, 447, 437, 452, 441, + 453, 441, 454, 441, 455, 445, + 456, 445, 457, 445, // NOLINT + 458, 449, 459, 449, 460, 449, + 461, 453, 462, 453, 463, 457, + 464, 457, 465, 461, // NOLINT + 466, 461, 467, 465, 468, 465, + 469, 469, 470, 469, 471, 473, + 472, 473, 473, 477, // NOLINT + 474, 477, 475, 481, 476, 481, + 477, 313, 478, 485, 479, 485, + 480, 489, 481, 489, // NOLINT + 482, 493, 483, 493, 484, 497, + 485, 497, 486, 501, 487, 501, + 488, 505, 489, 505, // NOLINT + 490, 509, 491, 509, 492, 513, + 493, 513, 494, 517, 495, 517, + 497, 521, 498, 521, // NOLINT + 499, 521, 500, 525, 501, 525, + 502, 337, 503, 437, 504, 529, + 505, 529, 506, 533, // NOLINT + 507, 533, 508, 537, 509, 537, + 510, 541, 511, 541, 512, 545, + 513, 545, 514, 549, // NOLINT + 515, 549, 516, 553, 517, 553, + 518, 557, 519, 557, 520, 561, + 521, 561, 522, 565, // NOLINT + 523, 565, 524, 569, 525, 569, + 526, 573, 527, 573, 528, 577, + 529, 577, 530, 581, // NOLINT + 531, 581, 532, 585, 533, 585, + 534, 589, 535, 589, 536, 593, + 537, 593, 538, 597, // NOLINT + 539, 597, 540, 601, 541, 601, + 542, 605, 543, 605, 544, 365, + 546, 609, 547, 609, // NOLINT + 548, 613, 549, 613, 550, 617, + 551, 617, 552, 621, 553, 621, + 554, 625, 555, 625, // NOLINT + 556, 629, 557, 629, 558, 633, + 559, 633, 560, 637, 561, 637, + 562, 641, 563, 641, // NOLINT + 570, 645, 571, 649, 572, 649, + 573, 353, 574, 653, 1073742399, 657, + 576, 661, 577, 665, // NOLINT + 578, 665, 579, 277, 580, 669, + 581, 673, 582, 677, 583, 677, + 584, 681, 585, 681, // NOLINT + 586, 685, 587, 685, 588, 689, + 589, 689, 590, 693, 591, 693, + 592, 697, 593, 701, // NOLINT + 594, 705, 595, 281, 596, 293, + 1073742422, 301, 599, 305, 601, 317, + 603, 321, 604, 709, // NOLINT + 608, 329, 609, 713, 611, 333, + 613, 717, 614, 721, 616, 345, + 617, 341, 619, 725, // NOLINT + 620, 729, 623, 357, 625, 733, + 626, 361, 629, 369, 637, 737, + 640, 385, 643, 393, // NOLINT + 647, 741, 648, 401, 649, 669, + 1073742474, 409, 651, 413, 652, 673, + 658, 425, 670, 745, // NOLINT + 837, 749, 880, 753, 881, 753, + 882, 757, 883, 757, 886, 761, + 887, 761, 1073742715, 765, // NOLINT + 893, 769, 895, 773, 902, 777, + 1073742728, 781, 906, 785, 908, 789, + 1073742734, 793, 911, 797, // NOLINT + 913, 801, 914, 805, 1073742739, 809, + 916, 813, 917, 817, 1073742742, 821, + 919, 825, 920, 829, // NOLINT + 921, 749, 922, 833, 923, 837, + 924, 9, 1073742749, 841, 927, 845, + 928, 849, 929, 853, // NOLINT + 931, 857, 1073742756, 861, 933, 865, + 934, 869, 1073742759, 873, 939, 877, + 940, 777, 1073742765, 781, // NOLINT + 943, 785, 945, 801, 946, 805, + 1073742771, 809, 948, 813, 949, 817, + 1073742774, 821, 951, 825, // NOLINT + 952, 829, 953, 749, 954, 833, + 955, 837, 956, 9, 1073742781, 841, + 959, 845, 960, 849, // NOLINT + 961, 853, 962, 857, 963, 857, + 1073742788, 861, 965, 865, 966, 869, + 1073742791, 873, 971, 877, // NOLINT + 972, 789, 1073742797, 793, 974, 797, + 975, 881, 976, 805, 977, 829, + 981, 869, 982, 849, // NOLINT + 983, 881, 984, 885, 985, 885, + 986, 889, 987, 889, 988, 893, + 989, 893, 990, 897, // NOLINT + 991, 897, 992, 901, 993, 901, + 994, 905, 995, 905, 996, 909, + 997, 909, 998, 913, // NOLINT + 999, 913, 1000, 917, 1001, 917, + 1002, 921, 1003, 921, 1004, 925, + 1005, 925, 1006, 929, // NOLINT + 1007, 929, 1008, 833, 1009, 853, + 1010, 933, 1011, 773, 1013, 817, + 1015, 937, 1016, 937, // NOLINT + 1017, 933, 1018, 941, 1019, 941, + 1073742845, 765, 1023, 769, 1073742848, 945, + 1039, 949, 1073742864, 953, // NOLINT + 1071, 957, 1073742896, 953, 1103, 957, + 1073742928, 945, 1119, 949, 1120, 961, + 1121, 961, 1122, 965, // NOLINT + 1123, 965, 1124, 969, 1125, 969, + 1126, 973, 1127, 973, 1128, 977, + 1129, 977, 1130, 981, // NOLINT + 1131, 981, 1132, 985, 1133, 985, + 1134, 989, 1135, 989, 1136, 993, + 1137, 993, 1138, 997, // NOLINT + 1139, 997, 1140, 1001, 1141, 1001, + 1142, 1005, 1143, 1005, 1144, 1009, + 1145, 1009, 1146, 1013, // NOLINT + 1147, 1013, 1148, 1017, 1149, 1017, + 1150, 1021, 1151, 1021, 1152, 1025, + 1153, 1025, 1162, 1029, // NOLINT + 1163, 1029, 1164, 1033, 1165, 1033, + 1166, 1037, 1167, 1037, 1168, 1041, + 1169, 1041, 1170, 1045, // NOLINT + 1171, 1045, 1172, 1049, 1173, 1049, + 1174, 1053, 1175, 1053, 1176, 1057, + 1177, 1057, 1178, 1061, // NOLINT + 1179, 1061, 1180, 1065, 1181, 1065, + 1182, 1069, 1183, 1069, 1184, 1073, + 1185, 1073, 1186, 1077, // NOLINT + 1187, 1077, 1188, 1081, 1189, 1081, + 1190, 1085, 1191, 1085, 1192, 1089, + 1193, 1089, 1194, 1093, // NOLINT + 1195, 1093, 1196, 1097, 1197, 1097, + 1198, 1101, 1199, 1101, 1200, 1105, + 1201, 1105, 1202, 1109, // NOLINT + 1203, 1109, 1204, 1113, 1205, 1113, + 1206, 1117, 1207, 1117, 1208, 1121, + 1209, 1121, 1210, 1125, // NOLINT + 1211, 1125, 1212, 1129, 1213, 1129, + 1214, 1133, 1215, 1133, 1216, 1137, + 1217, 1141, 1218, 1141, // NOLINT + 1219, 1145, 1220, 1145, 1221, 1149, + 1222, 1149, 1223, 1153, 1224, 1153, + 1225, 1157, 1226, 1157, // NOLINT + 1227, 1161, 1228, 1161, 1229, 1165, + 1230, 1165, 1231, 1137, 1232, 1169, + 1233, 1169, 1234, 1173, // NOLINT + 1235, 1173, 1236, 1177, 1237, 1177, + 1238, 1181, 1239, 1181, 1240, 1185, + 1241, 1185, 1242, 1189, // NOLINT + 1243, 1189, 1244, 1193, 1245, 1193, + 1246, 1197, 1247, 1197, 1248, 1201, + 1249, 1201, 1250, 1205, // NOLINT + 1251, 1205, 1252, 1209, 1253, 1209, + 1254, 1213, 1255, 1213, 1256, 1217, + 1257, 1217, 1258, 1221, // NOLINT + 1259, 1221, 1260, 1225, 1261, 1225, + 1262, 1229, 1263, 1229, 1264, 1233, + 1265, 1233, 1266, 1237, // NOLINT + 1267, 1237, 1268, 1241, 1269, 1241, + 1270, 1245, 1271, 1245, 1272, 1249, + 1273, 1249, 1274, 1253, // NOLINT + 1275, 1253, 1276, 1257, 1277, 1257, + 1278, 1261, 1279, 1261, 1280, 1265, + 1281, 1265, 1282, 1269, // NOLINT + 1283, 1269, 1284, 1273, 1285, 1273, + 1286, 1277, 1287, 1277, 1288, 1281, + 1289, 1281, 1290, 1285, // NOLINT + 1291, 1285, 1292, 1289, 1293, 1289, + 1294, 1293, 1295, 1293, 1296, 1297, + 1297, 1297, 1298, 1301, // NOLINT + 1299, 1301, 1300, 1305, 1301, 1305, + 1302, 1309, 1303, 1309, 1304, 1313, + 1305, 1313, 1306, 1317, // NOLINT + 1307, 1317, 1308, 1321, 1309, 1321, + 1310, 1325, 1311, 1325, 1312, 1329, + 1313, 1329, 1314, 1333, // NOLINT + 1315, 1333, 1316, 1337, 1317, 1337, + 1318, 1341, 1319, 1341, 1320, 1345, + 1321, 1345, 1322, 1349, // NOLINT + 1323, 1349, 1324, 1353, 1325, 1353, + 1326, 1357, 1327, 1357, 1073743153, 1361, + 1366, 1365, 1073743201, 1361, // NOLINT + 1414, 1365, 1073746080, 1369, 4293, 1373, + 4295, 1377, 4301, 1381, 7545, 1385, + 7549, 1389, 7680, 1393, // NOLINT + 7681, 1393, 7682, 1397, 7683, 1397, + 7684, 1401, 7685, 1401, 7686, 1405, + 7687, 1405, 7688, 1409, // NOLINT + 7689, 1409, 7690, 1413, 7691, 1413, + 7692, 1417, 7693, 1417, 7694, 1421, + 7695, 1421, 7696, 1425, // NOLINT + 7697, 1425, 7698, 1429, 7699, 1429, + 7700, 1433, 7701, 1433, 7702, 1437, + 7703, 1437, 7704, 1441, // NOLINT + 7705, 1441, 7706, 1445, 7707, 1445, + 7708, 1449, 7709, 1449, 7710, 1453, + 7711, 1453, 7712, 1457, // NOLINT + 7713, 1457, 7714, 1461, 7715, 1461, + 7716, 1465, 7717, 1465, 7718, 1469, + 7719, 1469, 7720, 1473, // NOLINT + 7721, 1473, 7722, 1477, 7723, 1477, + 7724, 1481, 7725, 1481, 7726, 1485, + 7727, 1485, 7728, 1489, // NOLINT + 7729, 1489, 7730, 1493, 7731, 1493, + 7732, 1497, 7733, 1497, 7734, 1501, + 7735, 1501, 7736, 1505, // NOLINT + 7737, 1505, 7738, 1509, 7739, 1509, + 7740, 1513, 7741, 1513, 7742, 1517, + 7743, 1517, 7744, 1521, // NOLINT + 7745, 1521, 7746, 1525, 7747, 1525, + 7748, 1529, 7749, 1529, 7750, 1533, + 7751, 1533, 7752, 1537, // NOLINT + 7753, 1537, 7754, 1541, 7755, 1541, + 7756, 1545, 7757, 1545, 7758, 1549, + 7759, 1549, 7760, 1553, // NOLINT + 7761, 1553, 7762, 1557, 7763, 1557, + 7764, 1561, 7765, 1561, 7766, 1565, + 7767, 1565, 7768, 1569, // NOLINT + 7769, 1569, 7770, 1573, 7771, 1573, + 7772, 1577, 7773, 1577, 7774, 1581, + 7775, 1581, 7776, 1585, // NOLINT + 7777, 1585, 7778, 1589, 7779, 1589, + 7780, 1593, 7781, 1593, 7782, 1597, + 7783, 1597, 7784, 1601, // NOLINT + 7785, 1601, 7786, 1605, 7787, 1605, + 7788, 1609, 7789, 1609, 7790, 1613, + 7791, 1613, 7792, 1617, // NOLINT + 7793, 1617, 7794, 1621, 7795, 1621, + 7796, 1625, 7797, 1625, 7798, 1629, + 7799, 1629, 7800, 1633, // NOLINT + 7801, 1633, 7802, 1637, 7803, 1637, + 7804, 1641, 7805, 1641, 7806, 1645, + 7807, 1645, 7808, 1649, // NOLINT + 7809, 1649, 7810, 1653, 7811, 1653, + 7812, 1657, 7813, 1657, 7814, 1661, + 7815, 1661, 7816, 1665, // NOLINT + 7817, 1665, 7818, 1669, 7819, 1669, + 7820, 1673, 7821, 1673, 7822, 1677, + 7823, 1677, 7824, 1681, // NOLINT + 7825, 1681, 7826, 1685, 7827, 1685, + 7828, 1689, 7829, 1689, 7835, 1585, + 7840, 1693, 7841, 1693, // NOLINT + 7842, 1697, 7843, 1697, 7844, 1701, + 7845, 1701, 7846, 1705, 7847, 1705, + 7848, 1709, 7849, 1709, // NOLINT + 7850, 1713, 7851, 1713, 7852, 1717, + 7853, 1717, 7854, 1721, 7855, 1721, + 7856, 1725, 7857, 1725, // NOLINT + 7858, 1729, 7859, 1729, 7860, 1733, + 7861, 1733, 7862, 1737, 7863, 1737, + 7864, 1741, 7865, 1741, // NOLINT + 7866, 1745, 7867, 1745, 7868, 1749, + 7869, 1749, 7870, 1753, 7871, 1753, + 7872, 1757, 7873, 1757, // NOLINT + 7874, 1761, 7875, 1761, 7876, 1765, + 7877, 1765, 7878, 1769, 7879, 1769, + 7880, 1773, 7881, 1773, // NOLINT + 7882, 1777, 7883, 1777, 7884, 1781, + 7885, 1781, 7886, 1785, 7887, 1785, + 7888, 1789, 7889, 1789, // NOLINT + 7890, 1793, 7891, 1793, 7892, 1797, + 7893, 1797, 7894, 1801, 7895, 1801, + 7896, 1805, 7897, 1805, // NOLINT + 7898, 1809, 7899, 1809, 7900, 1813, + 7901, 1813, 7902, 1817, 7903, 1817, + 7904, 1821, 7905, 1821, // NOLINT + 7906, 1825, 7907, 1825, 7908, 1829, + 7909, 1829, 7910, 1833, 7911, 1833, + 7912, 1837, 7913, 1837, // NOLINT + 7914, 1841, 7915, 1841, 7916, 1845, + 7917, 1845, 7918, 1849, 7919, 1849, + 7920, 1853, 7921, 1853, // NOLINT + 7922, 1857, 7923, 1857, 7924, 1861, + 7925, 1861, 7926, 1865, 7927, 1865, + 7928, 1869, 7929, 1869, // NOLINT + 7930, 1873, 7931, 1873, 7932, 1877, + 7933, 1877, 7934, 1881, 7935, 1881, + 1073749760, 1885, 7943, 1889, // NOLINT + 1073749768, 1885, 7951, 1889, 1073749776, 1893, + 7957, 1897, 1073749784, 1893, 7965, 1897, + 1073749792, 1901, 7975, 1905, // NOLINT + 1073749800, 1901, 7983, 1905, 1073749808, 1909, + 7991, 1913, 1073749816, 1909, 7999, 1913, + 1073749824, 1917, 8005, 1921, // NOLINT + 1073749832, 1917, 8013, 1921, 8017, 1925, + 8019, 1929, 8021, 1933, 8023, 1937, + 8025, 1925, 8027, 1929, // NOLINT + 8029, 1933, 8031, 1937, 1073749856, 1941, + 8039, 1945, 1073749864, 1941, 8047, 1945, + 1073749872, 1949, 8049, 1953, // NOLINT + 1073749874, 1957, 8053, 1961, 1073749878, 1965, + 8055, 1969, 1073749880, 1973, 8057, 1977, + 1073749882, 1981, 8059, 1985, // NOLINT + 1073749884, 1989, 8061, 1993, 1073749936, 1997, + 8113, 2001, 1073749944, 1997, 8121, 2001, + 1073749946, 1949, 8123, 1953, // NOLINT + 8126, 749, 1073749960, 1957, 8139, 1961, + 1073749968, 2005, 8145, 2009, 1073749976, 2005, + 8153, 2009, 1073749978, 1965, // NOLINT + 8155, 1969, 1073749984, 2013, 8161, 2017, + 8165, 2021, 1073749992, 2013, 8169, 2017, + 1073749994, 1981, 8171, 1985, // NOLINT + 8172, 2021, 1073750008, 1973, 8185, 1977, + 1073750010, 1989, 8187, 1993}; // NOLINT +static const uint16_t kEcma262UnCanonicalizeMultiStrings0Size = 507; // NOLINT +static const MultiCharacterSpecialCase<2> + kEcma262UnCanonicalizeMultiStrings1[83] = { // NOLINT + {{8498, 8526}}, {{8544, 8560}}, {{8559, 8575}}, + {{8579, 8580}}, // NOLINT + {{9398, 9424}}, {{9423, 9449}}, {{11264, 11312}}, + {{11310, 11358}}, // NOLINT + {{11360, 11361}}, {{619, 11362}}, {{7549, 11363}}, + {{637, 11364}}, // NOLINT + {{570, 11365}}, {{574, 11366}}, {{11367, 11368}}, + {{11369, 11370}}, // NOLINT + {{11371, 11372}}, {{593, 11373}}, {{625, 11374}}, + {{592, 11375}}, // NOLINT + {{594, 11376}}, {{11378, 11379}}, {{11381, 11382}}, + {{575, 11390}}, // NOLINT + {{576, 11391}}, {{11392, 11393}}, {{11394, 11395}}, + {{11396, 11397}}, // NOLINT + {{11398, 11399}}, {{11400, 11401}}, {{11402, 11403}}, + {{11404, 11405}}, // NOLINT + {{11406, 11407}}, {{11408, 11409}}, {{11410, 11411}}, + {{11412, 11413}}, // NOLINT + {{11414, 11415}}, {{11416, 11417}}, {{11418, 11419}}, + {{11420, 11421}}, // NOLINT + {{11422, 11423}}, {{11424, 11425}}, {{11426, 11427}}, + {{11428, 11429}}, // NOLINT + {{11430, 11431}}, {{11432, 11433}}, {{11434, 11435}}, + {{11436, 11437}}, // NOLINT + {{11438, 11439}}, {{11440, 11441}}, {{11442, 11443}}, + {{11444, 11445}}, // NOLINT + {{11446, 11447}}, {{11448, 11449}}, {{11450, 11451}}, + {{11452, 11453}}, // NOLINT + {{11454, 11455}}, {{11456, 11457}}, {{11458, 11459}}, + {{11460, 11461}}, // NOLINT + {{11462, 11463}}, {{11464, 11465}}, {{11466, 11467}}, + {{11468, 11469}}, // NOLINT + {{11470, 11471}}, {{11472, 11473}}, {{11474, 11475}}, + {{11476, 11477}}, // NOLINT + {{11478, 11479}}, {{11480, 11481}}, {{11482, 11483}}, + {{11484, 11485}}, // NOLINT + {{11486, 11487}}, {{11488, 11489}}, {{11490, 11491}}, + {{11499, 11500}}, // NOLINT + {{11501, 11502}}, {{11506, 11507}}, {{4256, 11520}}, + {{4293, 11557}}, // NOLINT + {{4295, 11559}}, {{4301, 11565}}, {{kSentinel}}}; // NOLINT +static const uint16_t kEcma262UnCanonicalizeTable1Size = 149; // NOLINT +static const int32_t kEcma262UnCanonicalizeTable1[298] = { + 306, 1, 334, 1, 1073742176, 5, 367, 9, + 1073742192, 5, 383, 9, 387, 13, 388, 13, // NOLINT + 1073743030, 17, 1231, 21, 1073743056, 17, 1257, 21, + 1073744896, 25, 3118, 29, 1073744944, 25, 3166, 29, // NOLINT + 3168, 33, 3169, 33, 3170, 37, 3171, 41, + 3172, 45, 3173, 49, 3174, 53, 3175, 57, // NOLINT + 3176, 57, 3177, 61, 3178, 61, 3179, 65, + 3180, 65, 3181, 69, 3182, 73, 3183, 77, // NOLINT + 3184, 81, 3186, 85, 3187, 85, 3189, 89, + 3190, 89, 1073745022, 93, 3199, 97, 3200, 101, // NOLINT + 3201, 101, 3202, 105, 3203, 105, 3204, 109, + 3205, 109, 3206, 113, 3207, 113, 3208, 117, // NOLINT + 3209, 117, 3210, 121, 3211, 121, 3212, 125, + 3213, 125, 3214, 129, 3215, 129, 3216, 133, // NOLINT + 3217, 133, 3218, 137, 3219, 137, 3220, 141, + 3221, 141, 3222, 145, 3223, 145, 3224, 149, // NOLINT + 3225, 149, 3226, 153, 3227, 153, 3228, 157, + 3229, 157, 3230, 161, 3231, 161, 3232, 165, // NOLINT + 3233, 165, 3234, 169, 3235, 169, 3236, 173, + 3237, 173, 3238, 177, 3239, 177, 3240, 181, // NOLINT + 3241, 181, 3242, 185, 3243, 185, 3244, 189, + 3245, 189, 3246, 193, 3247, 193, 3248, 197, // NOLINT + 3249, 197, 3250, 201, 3251, 201, 3252, 205, + 3253, 205, 3254, 209, 3255, 209, 3256, 213, // NOLINT + 3257, 213, 3258, 217, 3259, 217, 3260, 221, + 3261, 221, 3262, 225, 3263, 225, 3264, 229, // NOLINT + 3265, 229, 3266, 233, 3267, 233, 3268, 237, + 3269, 237, 3270, 241, 3271, 241, 3272, 245, // NOLINT + 3273, 245, 3274, 249, 3275, 249, 3276, 253, + 3277, 253, 3278, 257, 3279, 257, 3280, 261, // NOLINT + 3281, 261, 3282, 265, 3283, 265, 3284, 269, + 3285, 269, 3286, 273, 3287, 273, 3288, 277, // NOLINT + 3289, 277, 3290, 281, 3291, 281, 3292, 285, + 3293, 285, 3294, 289, 3295, 289, 3296, 293, // NOLINT + 3297, 293, 3298, 297, 3299, 297, 3307, 301, + 3308, 301, 3309, 305, 3310, 305, 3314, 309, // NOLINT + 3315, 309, 1073745152, 313, 3365, 317, 3367, 321, + 3373, 325}; // NOLINT +static const uint16_t kEcma262UnCanonicalizeMultiStrings1Size = 83; // NOLINT +static const MultiCharacterSpecialCase<2> + kEcma262UnCanonicalizeMultiStrings5[104] = { // NOLINT + {{42560, 42561}}, {{42562, 42563}}, + {{42564, 42565}}, {{42566, 42567}}, // NOLINT + {{42568, 42569}}, {{42570, 42571}}, + {{42572, 42573}}, {{42574, 42575}}, // NOLINT + {{42576, 42577}}, {{42578, 42579}}, + {{42580, 42581}}, {{42582, 42583}}, // NOLINT + {{42584, 42585}}, {{42586, 42587}}, + {{42588, 42589}}, {{42590, 42591}}, // NOLINT + {{42592, 42593}}, {{42594, 42595}}, + {{42596, 42597}}, {{42598, 42599}}, // NOLINT + {{42600, 42601}}, {{42602, 42603}}, + {{42604, 42605}}, {{42624, 42625}}, // NOLINT + {{42626, 42627}}, {{42628, 42629}}, + {{42630, 42631}}, {{42632, 42633}}, // NOLINT + {{42634, 42635}}, {{42636, 42637}}, + {{42638, 42639}}, {{42640, 42641}}, // NOLINT + {{42642, 42643}}, {{42644, 42645}}, + {{42646, 42647}}, {{42648, 42649}}, // NOLINT + {{42650, 42651}}, {{42786, 42787}}, + {{42788, 42789}}, {{42790, 42791}}, // NOLINT + {{42792, 42793}}, {{42794, 42795}}, + {{42796, 42797}}, {{42798, 42799}}, // NOLINT + {{42802, 42803}}, {{42804, 42805}}, + {{42806, 42807}}, {{42808, 42809}}, // NOLINT + {{42810, 42811}}, {{42812, 42813}}, + {{42814, 42815}}, {{42816, 42817}}, // NOLINT + {{42818, 42819}}, {{42820, 42821}}, + {{42822, 42823}}, {{42824, 42825}}, // NOLINT + {{42826, 42827}}, {{42828, 42829}}, + {{42830, 42831}}, {{42832, 42833}}, // NOLINT + {{42834, 42835}}, {{42836, 42837}}, + {{42838, 42839}}, {{42840, 42841}}, // NOLINT + {{42842, 42843}}, {{42844, 42845}}, + {{42846, 42847}}, {{42848, 42849}}, // NOLINT + {{42850, 42851}}, {{42852, 42853}}, + {{42854, 42855}}, {{42856, 42857}}, // NOLINT + {{42858, 42859}}, {{42860, 42861}}, + {{42862, 42863}}, {{42873, 42874}}, // NOLINT + {{42875, 42876}}, {{7545, 42877}}, + {{42878, 42879}}, {{42880, 42881}}, // NOLINT + {{42882, 42883}}, {{42884, 42885}}, + {{42886, 42887}}, {{42891, 42892}}, // NOLINT + {{613, 42893}}, {{42896, 42897}}, + {{42898, 42899}}, {{42902, 42903}}, // NOLINT + {{42904, 42905}}, {{42906, 42907}}, + {{42908, 42909}}, {{42910, 42911}}, // NOLINT + {{42912, 42913}}, {{42914, 42915}}, + {{42916, 42917}}, {{42918, 42919}}, // NOLINT + {{42920, 42921}}, {{614, 42922}}, + {{604, 42923}}, {{609, 42924}}, // NOLINT + {{620, 42925}}, {{670, 42928}}, + {{647, 42929}}, {{kSentinel}}}; // NOLINT +static const uint16_t kEcma262UnCanonicalizeTable5Size = 198; // NOLINT +static const int32_t + kEcma262UnCanonicalizeTable5[396] = + {1600, 1, 1601, 1, 1602, 5, 1603, 5, + 1604, 9, 1605, 9, 1606, 13, 1607, 13, // NOLINT + 1608, 17, 1609, 17, 1610, 21, 1611, 21, + 1612, 25, 1613, 25, 1614, 29, 1615, 29, // NOLINT + 1616, 33, 1617, 33, 1618, 37, 1619, 37, + 1620, 41, 1621, 41, 1622, 45, 1623, 45, // NOLINT + 1624, 49, 1625, 49, 1626, 53, 1627, 53, + 1628, 57, 1629, 57, 1630, 61, 1631, 61, // NOLINT + 1632, 65, 1633, 65, 1634, 69, 1635, 69, + 1636, 73, 1637, 73, 1638, 77, 1639, 77, // NOLINT + 1640, 81, 1641, 81, 1642, 85, 1643, 85, + 1644, 89, 1645, 89, 1664, 93, 1665, 93, // NOLINT + 1666, 97, 1667, 97, 1668, 101, 1669, 101, + 1670, 105, 1671, 105, 1672, 109, 1673, 109, // NOLINT + 1674, 113, 1675, 113, 1676, 117, 1677, 117, + 1678, 121, 1679, 121, 1680, 125, 1681, 125, // NOLINT + 1682, 129, 1683, 129, 1684, 133, 1685, 133, + 1686, 137, 1687, 137, 1688, 141, 1689, 141, // NOLINT + 1690, 145, 1691, 145, 1826, 149, 1827, 149, + 1828, 153, 1829, 153, 1830, 157, 1831, 157, // NOLINT + 1832, 161, 1833, 161, 1834, 165, 1835, 165, + 1836, 169, 1837, 169, 1838, 173, 1839, 173, // NOLINT + 1842, 177, 1843, 177, 1844, 181, 1845, 181, + 1846, 185, 1847, 185, 1848, 189, 1849, 189, // NOLINT + 1850, 193, 1851, 193, 1852, 197, 1853, 197, + 1854, 201, 1855, 201, 1856, 205, 1857, 205, // NOLINT + 1858, 209, 1859, 209, 1860, 213, 1861, 213, + 1862, 217, 1863, 217, 1864, 221, 1865, 221, // NOLINT + 1866, 225, 1867, 225, 1868, 229, 1869, 229, + 1870, 233, 1871, 233, 1872, 237, 1873, 237, // NOLINT + 1874, 241, 1875, 241, 1876, 245, 1877, 245, + 1878, 249, 1879, 249, 1880, 253, 1881, 253, // NOLINT + 1882, 257, 1883, 257, 1884, 261, 1885, 261, + 1886, 265, 1887, 265, 1888, 269, 1889, 269, // NOLINT + 1890, 273, 1891, 273, 1892, 277, 1893, 277, + 1894, 281, 1895, 281, 1896, 285, 1897, 285, // NOLINT + 1898, 289, 1899, 289, 1900, 293, 1901, 293, + 1902, 297, 1903, 297, 1913, 301, 1914, 301, // NOLINT + 1915, 305, 1916, 305, 1917, 309, 1918, 313, + 1919, 313, 1920, 317, 1921, 317, 1922, 321, // NOLINT + 1923, 321, 1924, 325, 1925, 325, 1926, 329, + 1927, 329, 1931, 333, 1932, 333, 1933, 337, // NOLINT + 1936, 341, 1937, 341, 1938, 345, 1939, 345, + 1942, 349, 1943, 349, 1944, 353, 1945, 353, // NOLINT + 1946, 357, 1947, 357, 1948, 361, 1949, 361, + 1950, 365, 1951, 365, 1952, 369, 1953, 369, // NOLINT + 1954, 373, 1955, 373, 1956, 377, 1957, 377, + 1958, 381, 1959, 381, 1960, 385, 1961, 385, // NOLINT + 1962, 389, 1963, 393, 1964, 397, 1965, 401, + 1968, 405, 1969, 409}; // NOLINT +static const uint16_t kEcma262UnCanonicalizeMultiStrings5Size = 104; // NOLINT +static const MultiCharacterSpecialCase<2> + kEcma262UnCanonicalizeMultiStrings7[3] = { // NOLINT + {{65313, 65345}}, + {{65338, 65370}}, + {{kSentinel}}}; // NOLINT +static const uint16_t kEcma262UnCanonicalizeTable7Size = 4; // NOLINT +static const int32_t kEcma262UnCanonicalizeTable7[8] = { + 1073749793, 1, 7994, 5, 1073749825, 1, 8026, 5}; // NOLINT +static const uint16_t kEcma262UnCanonicalizeMultiStrings7Size = 3; // NOLINT +int Ecma262UnCanonicalize::Convert(uchar c, uchar n, uchar* result, + bool* allow_caching_ptr) { + int chunk_index = c >> 13; + switch (chunk_index) { + case 0: + return LookupMapping( + kEcma262UnCanonicalizeTable0, kEcma262UnCanonicalizeTable0Size, + kEcma262UnCanonicalizeMultiStrings0, c, n, result, allow_caching_ptr); + case 1: + return LookupMapping( + kEcma262UnCanonicalizeTable1, kEcma262UnCanonicalizeTable1Size, + kEcma262UnCanonicalizeMultiStrings1, c, n, result, allow_caching_ptr); + case 5: + return LookupMapping( + kEcma262UnCanonicalizeTable5, kEcma262UnCanonicalizeTable5Size, + kEcma262UnCanonicalizeMultiStrings5, c, n, result, allow_caching_ptr); + case 7: + return LookupMapping( + kEcma262UnCanonicalizeTable7, kEcma262UnCanonicalizeTable7Size, + kEcma262UnCanonicalizeMultiStrings7, c, n, result, allow_caching_ptr); + default: + return 0; + } +} + +static const MultiCharacterSpecialCase<1> + kCanonicalizationRangeMultiStrings0[1] = { // NOLINT + {{kSentinel}}}; // NOLINT +static const uint16_t kCanonicalizationRangeTable0Size = 70; // NOLINT +static const int32_t kCanonicalizationRangeTable0[140] = { + 1073741889, 100, 90, 0, 1073741921, 100, 122, 0, + 1073742016, 88, 214, 0, 1073742040, 24, 222, 0, // NOLINT + 1073742048, 88, 246, 0, 1073742072, 24, 254, 0, + 1073742715, 8, 893, 0, 1073742728, 8, 906, 0, // NOLINT + 1073742749, 8, 927, 0, 1073742759, 16, 939, 0, + 1073742765, 8, 943, 0, 1073742781, 8, 959, 0, // NOLINT + 1073742791, 16, 971, 0, 1073742845, 8, 1023, 0, + 1073742848, 60, 1039, 0, 1073742864, 124, 1071, 0, // NOLINT + 1073742896, 124, 1103, 0, 1073742928, 60, 1119, 0, + 1073743153, 148, 1366, 0, 1073743201, 148, 1414, 0, // NOLINT + 1073746080, 148, 4293, 0, 1073749760, 28, 7943, 0, + 1073749768, 28, 7951, 0, 1073749776, 20, 7957, 0, // NOLINT + 1073749784, 20, 7965, 0, 1073749792, 28, 7975, 0, + 1073749800, 28, 7983, 0, 1073749808, 28, 7991, 0, // NOLINT + 1073749816, 28, 7999, 0, 1073749824, 20, 8005, 0, + 1073749832, 20, 8013, 0, 1073749856, 28, 8039, 0, // NOLINT + 1073749864, 28, 8047, 0, 1073749874, 12, 8053, 0, + 1073749960, 12, 8139, 0}; // NOLINT +static const uint16_t kCanonicalizationRangeMultiStrings0Size = 1; // NOLINT +static const MultiCharacterSpecialCase<1> + kCanonicalizationRangeMultiStrings1[1] = { // NOLINT + {{kSentinel}}}; // NOLINT +static const uint16_t kCanonicalizationRangeTable1Size = 14; // NOLINT +static const int32_t kCanonicalizationRangeTable1[28] = { + 1073742176, 60, 367, 0, 1073742192, 60, 383, 0, + 1073743030, 100, 1231, 0, 1073743056, 100, 1257, 0, // NOLINT + 1073744896, 184, 3118, 0, 1073744944, 184, 3166, 0, + 1073745152, 148, 3365, 0}; // NOLINT +static const uint16_t kCanonicalizationRangeMultiStrings1Size = 1; // NOLINT +static const MultiCharacterSpecialCase<1> + kCanonicalizationRangeMultiStrings7[1] = { // NOLINT + {{kSentinel}}}; // NOLINT +static const uint16_t kCanonicalizationRangeTable7Size = 4; // NOLINT +static const int32_t kCanonicalizationRangeTable7[8] = { + 1073749793, 100, 7994, 0, 1073749825, 100, 8026, 0}; // NOLINT +static const uint16_t kCanonicalizationRangeMultiStrings7Size = 1; // NOLINT +int CanonicalizationRange::Convert(uchar c, uchar n, uchar* result, + bool* allow_caching_ptr) { + int chunk_index = c >> 13; + switch (chunk_index) { + case 0: + return LookupMapping( + kCanonicalizationRangeTable0, kCanonicalizationRangeTable0Size, + kCanonicalizationRangeMultiStrings0, c, n, result, allow_caching_ptr); + case 1: + return LookupMapping( + kCanonicalizationRangeTable1, kCanonicalizationRangeTable1Size, + kCanonicalizationRangeMultiStrings1, c, n, result, allow_caching_ptr); + case 7: + return LookupMapping( + kCanonicalizationRangeTable7, kCanonicalizationRangeTable7Size, + kCanonicalizationRangeMultiStrings7, c, n, result, allow_caching_ptr); + default: + return 0; + } +} + +const uchar UnicodeData::kMaxCodePoint = 0xFFFD; + +int UnicodeData::GetByteCount() { + return kUppercaseTable0Size * sizeof(int32_t) // NOLINT + + kUppercaseTable1Size * sizeof(int32_t) // NOLINT + + kUppercaseTable5Size * sizeof(int32_t) // NOLINT + + kUppercaseTable7Size * sizeof(int32_t) // NOLINT + + kLetterTable0Size * sizeof(int32_t) // NOLINT + + kLetterTable1Size * sizeof(int32_t) // NOLINT + + kLetterTable2Size * sizeof(int32_t) // NOLINT + + kLetterTable3Size * sizeof(int32_t) // NOLINT + + kLetterTable4Size * sizeof(int32_t) // NOLINT + + kLetterTable5Size * sizeof(int32_t) // NOLINT + + kLetterTable6Size * sizeof(int32_t) // NOLINT + + kLetterTable7Size * sizeof(int32_t) // NOLINT + + kID_StartTable0Size * sizeof(int32_t) // NOLINT + + kID_StartTable1Size * sizeof(int32_t) // NOLINT + + kID_StartTable2Size * sizeof(int32_t) // NOLINT + + kID_StartTable3Size * sizeof(int32_t) // NOLINT + + kID_StartTable4Size * sizeof(int32_t) // NOLINT + + kID_StartTable5Size * sizeof(int32_t) // NOLINT + + kID_StartTable6Size * sizeof(int32_t) // NOLINT + + kID_StartTable7Size * sizeof(int32_t) // NOLINT + + kID_ContinueTable0Size * sizeof(int32_t) // NOLINT + + kID_ContinueTable1Size * sizeof(int32_t) // NOLINT + + kID_ContinueTable5Size * sizeof(int32_t) // NOLINT + + kID_ContinueTable7Size * sizeof(int32_t) // NOLINT + + kWhiteSpaceTable0Size * sizeof(int32_t) // NOLINT + + kWhiteSpaceTable1Size * sizeof(int32_t) // NOLINT + + kWhiteSpaceTable7Size * sizeof(int32_t) // NOLINT + + kToLowercaseMultiStrings0Size * + sizeof(MultiCharacterSpecialCase<2>) // NOLINT + + kToLowercaseMultiStrings1Size * + sizeof(MultiCharacterSpecialCase<1>) // NOLINT + + kToLowercaseMultiStrings5Size * + sizeof(MultiCharacterSpecialCase<1>) // NOLINT + + kToLowercaseMultiStrings7Size * + sizeof(MultiCharacterSpecialCase<1>) // NOLINT + + kToUppercaseMultiStrings0Size * + sizeof(MultiCharacterSpecialCase<3>) // NOLINT + + kToUppercaseMultiStrings1Size * + sizeof(MultiCharacterSpecialCase<1>) // NOLINT + + kToUppercaseMultiStrings5Size * + sizeof(MultiCharacterSpecialCase<1>) // NOLINT + + kToUppercaseMultiStrings7Size * + sizeof(MultiCharacterSpecialCase<3>) // NOLINT + + kEcma262CanonicalizeMultiStrings0Size * + sizeof(MultiCharacterSpecialCase<1>) // NOLINT + + kEcma262CanonicalizeMultiStrings1Size * + sizeof(MultiCharacterSpecialCase<1>) // NOLINT + + kEcma262CanonicalizeMultiStrings5Size * + sizeof(MultiCharacterSpecialCase<1>) // NOLINT + + kEcma262CanonicalizeMultiStrings7Size * + sizeof(MultiCharacterSpecialCase<1>) // NOLINT + + kEcma262UnCanonicalizeMultiStrings0Size * + sizeof(MultiCharacterSpecialCase<4>) // NOLINT + + kEcma262UnCanonicalizeMultiStrings1Size * + sizeof(MultiCharacterSpecialCase<2>) // NOLINT + + kEcma262UnCanonicalizeMultiStrings5Size * + sizeof(MultiCharacterSpecialCase<2>) // NOLINT + + kEcma262UnCanonicalizeMultiStrings7Size * + sizeof(MultiCharacterSpecialCase<2>) // NOLINT + + kCanonicalizationRangeMultiStrings0Size * + sizeof(MultiCharacterSpecialCase<1>) // NOLINT + + kCanonicalizationRangeMultiStrings1Size * + sizeof(MultiCharacterSpecialCase<1>) // NOLINT + + kCanonicalizationRangeMultiStrings7Size * + sizeof(MultiCharacterSpecialCase<1>); // NOLINT +} +#endif // !V8_INTL_SUPPORT + +} // namespace unibrow diff --git a/deps/v8/src/strings/unicode.h b/deps/v8/src/strings/unicode.h new file mode 100644 index 0000000000..bd94300e34 --- /dev/null +++ b/deps/v8/src/strings/unicode.h @@ -0,0 +1,257 @@ +// Copyright 2011 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_STRINGS_UNICODE_H_ +#define V8_STRINGS_UNICODE_H_ + +#include +#include "src/common/globals.h" +#include "src/third_party/utf8-decoder/utf8-decoder.h" +#include "src/utils/utils.h" +/** + * \file + * Definitions and convenience functions for working with unicode. + */ + +namespace unibrow { + +using uchar = unsigned int; +using byte = unsigned char; + +/** + * The max length of the result of converting the case of a single + * character. + */ +const int kMaxMappingSize = 4; + +#ifndef V8_INTL_SUPPORT +template +class Predicate { + public: + inline Predicate() = default; + inline bool get(uchar c); + + private: + friend class Test; + bool CalculateValue(uchar c); + class CacheEntry { + public: + inline CacheEntry() + : bit_field_(CodePointField::encode(0) | ValueField::encode(0)) {} + inline CacheEntry(uchar code_point, bool value) + : bit_field_( + CodePointField::encode(CodePointField::kMask & code_point) | + ValueField::encode(value)) { + DCHECK_IMPLIES((CodePointField::kMask & code_point) != code_point, + code_point == static_cast(-1)); + } + + uchar code_point() const { return CodePointField::decode(bit_field_); } + bool value() const { return ValueField::decode(bit_field_); } + + private: + class CodePointField : public v8::internal::BitField {}; + class ValueField : public v8::internal::BitField {}; + + uint32_t bit_field_; + }; + static const int kSize = size; + static const int kMask = kSize - 1; + CacheEntry entries_[kSize]; +}; + +// A cache used in case conversion. It caches the value for characters +// that either have no mapping or map to a single character independent +// of context. Characters that map to more than one character or that +// map differently depending on context are always looked up. +template +class Mapping { + public: + inline Mapping() = default; + inline int get(uchar c, uchar n, uchar* result); + + private: + friend class Test; + int CalculateValue(uchar c, uchar n, uchar* result); + struct CacheEntry { + inline CacheEntry() : code_point_(kNoChar), offset_(0) {} + inline CacheEntry(uchar code_point, signed offset) + : code_point_(code_point), offset_(offset) {} + uchar code_point_; + signed offset_; + static const int kNoChar = (1 << 21) - 1; + }; + static const int kSize = size; + static const int kMask = kSize - 1; + CacheEntry entries_[kSize]; +}; + +class UnicodeData { + private: + friend class Test; + static int GetByteCount(); + static const uchar kMaxCodePoint; +}; + +#endif // !V8_INTL_SUPPORT + +class Utf16 { + public: + static const int kNoPreviousCharacter = -1; + static inline bool IsSurrogatePair(int lead, int trail) { + return IsLeadSurrogate(lead) && IsTrailSurrogate(trail); + } + static inline bool IsLeadSurrogate(int code) { + return (code & 0xfc00) == 0xd800; + } + static inline bool IsTrailSurrogate(int code) { + return (code & 0xfc00) == 0xdc00; + } + + static inline int CombineSurrogatePair(uchar lead, uchar trail) { + return 0x10000 + ((lead & 0x3ff) << 10) + (trail & 0x3ff); + } + static const uchar kMaxNonSurrogateCharCode = 0xffff; + // Encoding a single UTF-16 code unit will produce 1, 2 or 3 bytes + // of UTF-8 data. The special case where the unit is a surrogate + // trail produces 1 byte net, because the encoding of the pair is + // 4 bytes and the 3 bytes that were used to encode the lead surrogate + // can be reclaimed. + static const int kMaxExtraUtf8BytesForOneUtf16CodeUnit = 3; + // One UTF-16 surrogate is endoded (illegally) as 3 UTF-8 bytes. + // The illegality stems from the surrogate not being part of a pair. + static const int kUtf8BytesToCodeASurrogate = 3; + static inline uint16_t LeadSurrogate(uint32_t char_code) { + return 0xd800 + (((char_code - 0x10000) >> 10) & 0x3ff); + } + static inline uint16_t TrailSurrogate(uint32_t char_code) { + return 0xdc00 + (char_code & 0x3ff); + } +}; + +class Latin1 { + public: + static const uint16_t kMaxChar = 0xff; + // Convert the character to Latin-1 case equivalent if possible. + static inline uint16_t TryConvertToLatin1(uint16_t c) { + switch (c) { + // This are equivalent characters in unicode. + case 0x39c: + case 0x3bc: + return 0xb5; + // This is an uppercase of a Latin-1 character + // outside of Latin-1. + case 0x178: + return 0xff; + } + return c; + } +}; + +class V8_EXPORT_PRIVATE Utf8 { + public: + using State = Utf8DfaDecoder::State; + + static inline uchar Length(uchar chr, int previous); + static inline unsigned EncodeOneByte(char* out, uint8_t c); + static inline unsigned Encode(char* out, uchar c, int previous, + bool replace_invalid = false); + static uchar CalculateValue(const byte* str, size_t length, size_t* cursor); + + // The unicode replacement character, used to signal invalid unicode + // sequences (e.g. an orphan surrogate) when converting to a UTF-8 encoding. + static const uchar kBadChar = 0xFFFD; + static const uchar kBufferEmpty = 0x0; + static const uchar kIncomplete = 0xFFFFFFFC; // any non-valid code point. + static const unsigned kMaxEncodedSize = 4; + static const unsigned kMaxOneByteChar = 0x7f; + static const unsigned kMaxTwoByteChar = 0x7ff; + static const unsigned kMaxThreeByteChar = 0xffff; + static const unsigned kMaxFourByteChar = 0x1fffff; + + // A single surrogate is coded as a 3 byte UTF-8 sequence, but two together + // that match are coded as a 4 byte UTF-8 sequence. + static const unsigned kBytesSavedByCombiningSurrogates = 2; + static const unsigned kSizeOfUnmatchedSurrogate = 3; + // The maximum size a single UTF-16 code unit may take up when encoded as + // UTF-8. + static const unsigned kMax16BitCodeUnitSize = 3; + static inline uchar ValueOf(const byte* str, size_t length, size_t* cursor); + + using Utf8IncrementalBuffer = uint32_t; + static inline uchar ValueOfIncremental(const byte** cursor, State* state, + Utf8IncrementalBuffer* buffer); + static uchar ValueOfIncrementalFinish(State* state); + + // Excludes non-characters from the set of valid code points. + static inline bool IsValidCharacter(uchar c); + + // Validate if the input has a valid utf-8 encoding. Unlike JS source code + // this validation function will accept any unicode code point, including + // kBadChar and BOMs. + // + // This method checks for: + // - valid utf-8 endcoding (e.g. no over-long encodings), + // - absence of surrogates, + // - valid code point range. + static bool ValidateEncoding(const byte* str, size_t length); +}; + +struct Uppercase { + static bool Is(uchar c); +}; +struct Letter { + static bool Is(uchar c); +}; +#ifndef V8_INTL_SUPPORT +struct V8_EXPORT_PRIVATE ID_Start { + static bool Is(uchar c); +}; +struct V8_EXPORT_PRIVATE ID_Continue { + static bool Is(uchar c); +}; +struct V8_EXPORT_PRIVATE WhiteSpace { + static bool Is(uchar c); +}; +#endif // !V8_INTL_SUPPORT + +// LineTerminator: 'JS_Line_Terminator' in point.properties +// ES#sec-line-terminators lists exactly 4 code points: +// LF (U+000A), CR (U+000D), LS(U+2028), PS(U+2029) +V8_INLINE bool IsLineTerminator(uchar c) { + return c == 0x000A || c == 0x000D || c == 0x2028 || c == 0x2029; +} + +V8_INLINE bool IsStringLiteralLineTerminator(uchar c) { + return c == 0x000A || c == 0x000D; +} + +#ifndef V8_INTL_SUPPORT +struct ToLowercase { + static const int kMaxWidth = 3; + static const bool kIsToLower = true; + static int Convert(uchar c, uchar n, uchar* result, bool* allow_caching_ptr); +}; +struct ToUppercase { + static const int kMaxWidth = 3; + static const bool kIsToLower = false; + static int Convert(uchar c, uchar n, uchar* result, bool* allow_caching_ptr); +}; +struct V8_EXPORT_PRIVATE Ecma262Canonicalize { + static const int kMaxWidth = 1; + static int Convert(uchar c, uchar n, uchar* result, bool* allow_caching_ptr); +}; +struct V8_EXPORT_PRIVATE Ecma262UnCanonicalize { + static const int kMaxWidth = 4; + static int Convert(uchar c, uchar n, uchar* result, bool* allow_caching_ptr); +}; +struct V8_EXPORT_PRIVATE CanonicalizationRange { + static const int kMaxWidth = 1; + static int Convert(uchar c, uchar n, uchar* result, bool* allow_caching_ptr); +}; +#endif // !V8_INTL_SUPPORT + +} // namespace unibrow + +#endif // V8_STRINGS_UNICODE_H_ diff --git a/deps/v8/src/strings/uri.cc b/deps/v8/src/strings/uri.cc new file mode 100644 index 0000000000..430c8dd0eb --- /dev/null +++ b/deps/v8/src/strings/uri.cc @@ -0,0 +1,510 @@ +// Copyright 2016 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "src/strings/uri.h" + +#include + +#include "src/execution/isolate-inl.h" +#include "src/strings/char-predicates-inl.h" +#include "src/strings/string-search.h" +#include "src/strings/unicode-inl.h" + +namespace v8 { +namespace internal { + +namespace { // anonymous namespace for DecodeURI helper functions +bool IsReservedPredicate(uc16 c) { + switch (c) { + case '#': + case '$': + case '&': + case '+': + case ',': + case '/': + case ':': + case ';': + case '=': + case '?': + case '@': + return true; + default: + return false; + } +} + +bool IsReplacementCharacter(const uint8_t* octets, int length) { + // The replacement character is at codepoint U+FFFD in the Unicode Specials + // table. Its UTF-8 encoding is 0xEF 0xBF 0xBD. + if (length != 3 || octets[0] != 0xEF || octets[1] != 0xBF || + octets[2] != 0xBD) { + return false; + } + return true; +} + +bool DecodeOctets(const uint8_t* octets, int length, + std::vector* buffer) { + size_t cursor = 0; + uc32 value = unibrow::Utf8::ValueOf(octets, length, &cursor); + if (value == unibrow::Utf8::kBadChar && + !IsReplacementCharacter(octets, length)) { + return false; + } + + if (value <= static_cast(unibrow::Utf16::kMaxNonSurrogateCharCode)) { + buffer->push_back(value); + } else { + buffer->push_back(unibrow::Utf16::LeadSurrogate(value)); + buffer->push_back(unibrow::Utf16::TrailSurrogate(value)); + } + return true; +} + +int TwoDigitHex(uc16 character1, uc16 character2) { + if (character1 > 'f') return -1; + int high = HexValue(character1); + if (high == -1) return -1; + if (character2 > 'f') return -1; + int low = HexValue(character2); + if (low == -1) return -1; + return (high << 4) + low; +} + +template +void AddToBuffer(uc16 decoded, String::FlatContent* uri_content, int index, + bool is_uri, std::vector* buffer) { + if (is_uri && IsReservedPredicate(decoded)) { + buffer->push_back('%'); + uc16 first = uri_content->Get(index + 1); + uc16 second = uri_content->Get(index + 2); + DCHECK_GT(std::numeric_limits::max(), first); + DCHECK_GT(std::numeric_limits::max(), second); + + buffer->push_back(first); + buffer->push_back(second); + } else { + buffer->push_back(decoded); + } +} + +bool IntoTwoByte(int index, bool is_uri, int uri_length, + String::FlatContent* uri_content, std::vector* buffer) { + for (int k = index; k < uri_length; k++) { + uc16 code = uri_content->Get(k); + if (code == '%') { + int two_digits; + if (k + 2 >= uri_length || + (two_digits = TwoDigitHex(uri_content->Get(k + 1), + uri_content->Get(k + 2))) < 0) { + return false; + } + k += 2; + uc16 decoded = static_cast(two_digits); + if (decoded > unibrow::Utf8::kMaxOneByteChar) { + uint8_t octets[unibrow::Utf8::kMaxEncodedSize]; + octets[0] = decoded; + + int number_of_continuation_bytes = 0; + while ((decoded << ++number_of_continuation_bytes) & 0x80) { + if (number_of_continuation_bytes > 3 || k + 3 >= uri_length) { + return false; + } + if (uri_content->Get(++k) != '%' || + (two_digits = TwoDigitHex(uri_content->Get(k + 1), + uri_content->Get(k + 2))) < 0) { + return false; + } + k += 2; + uc16 continuation_byte = static_cast(two_digits); + octets[number_of_continuation_bytes] = continuation_byte; + } + + if (!DecodeOctets(octets, number_of_continuation_bytes, buffer)) { + return false; + } + } else { + AddToBuffer(decoded, uri_content, k - 2, is_uri, buffer); + } + } else { + buffer->push_back(code); + } + } + return true; +} + +bool IntoOneAndTwoByte(Handle uri, bool is_uri, + std::vector* one_byte_buffer, + std::vector* two_byte_buffer) { + DisallowHeapAllocation no_gc; + String::FlatContent uri_content = uri->GetFlatContent(no_gc); + + int uri_length = uri->length(); + for (int k = 0; k < uri_length; k++) { + uc16 code = uri_content.Get(k); + if (code == '%') { + int two_digits; + if (k + 2 >= uri_length || + (two_digits = TwoDigitHex(uri_content.Get(k + 1), + uri_content.Get(k + 2))) < 0) { + return false; + } + + uc16 decoded = static_cast(two_digits); + if (decoded > unibrow::Utf8::kMaxOneByteChar) { + return IntoTwoByte(k, is_uri, uri_length, &uri_content, + two_byte_buffer); + } + + AddToBuffer(decoded, &uri_content, k, is_uri, one_byte_buffer); + k += 2; + } else { + if (code > unibrow::Utf8::kMaxOneByteChar) { + return IntoTwoByte(k, is_uri, uri_length, &uri_content, + two_byte_buffer); + } + one_byte_buffer->push_back(code); + } + } + return true; +} + +} // anonymous namespace + +MaybeHandle Uri::Decode(Isolate* isolate, Handle uri, + bool is_uri) { + uri = String::Flatten(isolate, uri); + std::vector one_byte_buffer; + std::vector two_byte_buffer; + + if (!IntoOneAndTwoByte(uri, is_uri, &one_byte_buffer, &two_byte_buffer)) { + THROW_NEW_ERROR(isolate, NewURIError(), String); + } + + if (two_byte_buffer.empty()) { + return isolate->factory()->NewStringFromOneByte(Vector( + one_byte_buffer.data(), static_cast(one_byte_buffer.size()))); + } + + Handle result; + int result_length = + static_cast(one_byte_buffer.size() + two_byte_buffer.size()); + ASSIGN_RETURN_ON_EXCEPTION( + isolate, result, isolate->factory()->NewRawTwoByteString(result_length), + String); + + DisallowHeapAllocation no_gc; + CopyChars(result->GetChars(no_gc), one_byte_buffer.data(), + one_byte_buffer.size()); + CopyChars(result->GetChars(no_gc) + one_byte_buffer.size(), + two_byte_buffer.data(), two_byte_buffer.size()); + + return result; +} + +namespace { // anonymous namespace for EncodeURI helper functions +bool IsUnescapePredicateInUriComponent(uc16 c) { + if (IsAlphaNumeric(c)) { + return true; + } + + switch (c) { + case '!': + case '\'': + case '(': + case ')': + case '*': + case '-': + case '.': + case '_': + case '~': + return true; + default: + return false; + } +} + +bool IsUriSeparator(uc16 c) { + switch (c) { + case '#': + case ':': + case ';': + case '/': + case '?': + case '$': + case '&': + case '+': + case ',': + case '@': + case '=': + return true; + default: + return false; + } +} + +void AddEncodedOctetToBuffer(uint8_t octet, std::vector* buffer) { + buffer->push_back('%'); + buffer->push_back(HexCharOfValue(octet >> 4)); + buffer->push_back(HexCharOfValue(octet & 0x0F)); +} + +void EncodeSingle(uc16 c, std::vector* buffer) { + char s[4] = {}; + int number_of_bytes; + number_of_bytes = + unibrow::Utf8::Encode(s, c, unibrow::Utf16::kNoPreviousCharacter, false); + for (int k = 0; k < number_of_bytes; k++) { + AddEncodedOctetToBuffer(s[k], buffer); + } +} + +void EncodePair(uc16 cc1, uc16 cc2, std::vector* buffer) { + char s[4] = {}; + int number_of_bytes = + unibrow::Utf8::Encode(s, unibrow::Utf16::CombineSurrogatePair(cc1, cc2), + unibrow::Utf16::kNoPreviousCharacter, false); + for (int k = 0; k < number_of_bytes; k++) { + AddEncodedOctetToBuffer(s[k], buffer); + } +} + +} // anonymous namespace + +MaybeHandle Uri::Encode(Isolate* isolate, Handle uri, + bool is_uri) { + uri = String::Flatten(isolate, uri); + int uri_length = uri->length(); + std::vector buffer; + buffer.reserve(uri_length); + + { + DisallowHeapAllocation no_gc; + String::FlatContent uri_content = uri->GetFlatContent(no_gc); + + for (int k = 0; k < uri_length; k++) { + uc16 cc1 = uri_content.Get(k); + if (unibrow::Utf16::IsLeadSurrogate(cc1)) { + k++; + if (k < uri_length) { + uc16 cc2 = uri->Get(k); + if (unibrow::Utf16::IsTrailSurrogate(cc2)) { + EncodePair(cc1, cc2, &buffer); + continue; + } + } + } else if (!unibrow::Utf16::IsTrailSurrogate(cc1)) { + if (IsUnescapePredicateInUriComponent(cc1) || + (is_uri && IsUriSeparator(cc1))) { + buffer.push_back(cc1); + } else { + EncodeSingle(cc1, &buffer); + } + continue; + } + + AllowHeapAllocation allocate_error_and_return; + THROW_NEW_ERROR(isolate, NewURIError(), String); + } + } + + return isolate->factory()->NewStringFromOneByte(VectorOf(buffer)); +} + +namespace { // Anonymous namespace for Escape and Unescape + +template +int UnescapeChar(Vector vector, int i, int length, int* step) { + uint16_t character = vector[i]; + int32_t hi = 0; + int32_t lo = 0; + if (character == '%' && i <= length - 6 && vector[i + 1] == 'u' && + (hi = TwoDigitHex(vector[i + 2], vector[i + 3])) > -1 && + (lo = TwoDigitHex(vector[i + 4], vector[i + 5])) > -1) { + *step = 6; + return (hi << 8) + lo; + } else if (character == '%' && i <= length - 3 && + (lo = TwoDigitHex(vector[i + 1], vector[i + 2])) > -1) { + *step = 3; + return lo; + } else { + *step = 1; + return character; + } +} + +template +MaybeHandle UnescapeSlow(Isolate* isolate, Handle string, + int start_index) { + bool one_byte = true; + int length = string->length(); + + int unescaped_length = 0; + { + DisallowHeapAllocation no_allocation; + Vector vector = string->GetCharVector(no_allocation); + for (int i = start_index; i < length; unescaped_length++) { + int step; + if (UnescapeChar(vector, i, length, &step) > + String::kMaxOneByteCharCode) { + one_byte = false; + } + i += step; + } + } + + DCHECK(start_index < length); + Handle first_part = + isolate->factory()->NewProperSubString(string, 0, start_index); + + int dest_position = 0; + Handle second_part; + DCHECK_LE(unescaped_length, String::kMaxLength); + if (one_byte) { + Handle dest = isolate->factory() + ->NewRawOneByteString(unescaped_length) + .ToHandleChecked(); + DisallowHeapAllocation no_allocation; + Vector vector = string->GetCharVector(no_allocation); + for (int i = start_index; i < length; dest_position++) { + int step; + dest->SeqOneByteStringSet(dest_position, + UnescapeChar(vector, i, length, &step)); + i += step; + } + second_part = dest; + } else { + Handle dest = isolate->factory() + ->NewRawTwoByteString(unescaped_length) + .ToHandleChecked(); + DisallowHeapAllocation no_allocation; + Vector vector = string->GetCharVector(no_allocation); + for (int i = start_index; i < length; dest_position++) { + int step; + dest->SeqTwoByteStringSet(dest_position, + UnescapeChar(vector, i, length, &step)); + i += step; + } + second_part = dest; + } + return isolate->factory()->NewConsString(first_part, second_part); +} + +bool IsNotEscaped(uint16_t c) { + if (IsAlphaNumeric(c)) { + return true; + } + // @*_+-./ + switch (c) { + case '@': + case '*': + case '_': + case '+': + case '-': + case '.': + case '/': + return true; + default: + return false; + } +} + +template +static MaybeHandle UnescapePrivate(Isolate* isolate, + Handle source) { + int index; + { + DisallowHeapAllocation no_allocation; + StringSearch search(isolate, StaticCharVector("%")); + index = search.Search(source->GetCharVector(no_allocation), 0); + if (index < 0) return source; + } + return UnescapeSlow(isolate, source, index); +} + +template +static MaybeHandle EscapePrivate(Isolate* isolate, + Handle string) { + DCHECK(string->IsFlat()); + int escaped_length = 0; + int length = string->length(); + + { + DisallowHeapAllocation no_allocation; + Vector vector = string->GetCharVector(no_allocation); + for (int i = 0; i < length; i++) { + uint16_t c = vector[i]; + if (c >= 256) { + escaped_length += 6; + } else if (IsNotEscaped(c)) { + escaped_length++; + } else { + escaped_length += 3; + } + + // We don't allow strings that are longer than a maximal length. + DCHECK_LT(String::kMaxLength, 0x7FFFFFFF - 6); // Cannot overflow. + if (escaped_length > String::kMaxLength) break; // Provoke exception. + } + } + + // No length change implies no change. Return original string if no change. + if (escaped_length == length) return string; + + Handle dest; + ASSIGN_RETURN_ON_EXCEPTION( + isolate, dest, isolate->factory()->NewRawOneByteString(escaped_length), + String); + int dest_position = 0; + + { + DisallowHeapAllocation no_allocation; + Vector vector = string->GetCharVector(no_allocation); + for (int i = 0; i < length; i++) { + uint16_t c = vector[i]; + if (c >= 256) { + dest->SeqOneByteStringSet(dest_position, '%'); + dest->SeqOneByteStringSet(dest_position + 1, 'u'); + dest->SeqOneByteStringSet(dest_position + 2, HexCharOfValue(c >> 12)); + dest->SeqOneByteStringSet(dest_position + 3, + HexCharOfValue((c >> 8) & 0xF)); + dest->SeqOneByteStringSet(dest_position + 4, + HexCharOfValue((c >> 4) & 0xF)); + dest->SeqOneByteStringSet(dest_position + 5, HexCharOfValue(c & 0xF)); + dest_position += 6; + } else if (IsNotEscaped(c)) { + dest->SeqOneByteStringSet(dest_position, c); + dest_position++; + } else { + dest->SeqOneByteStringSet(dest_position, '%'); + dest->SeqOneByteStringSet(dest_position + 1, HexCharOfValue(c >> 4)); + dest->SeqOneByteStringSet(dest_position + 2, HexCharOfValue(c & 0xF)); + dest_position += 3; + } + } + } + + return dest; +} + +} // Anonymous namespace + +MaybeHandle Uri::Escape(Isolate* isolate, Handle string) { + Handle result; + string = String::Flatten(isolate, string); + return String::IsOneByteRepresentationUnderneath(*string) + ? EscapePrivate(isolate, string) + : EscapePrivate(isolate, string); +} + +MaybeHandle Uri::Unescape(Isolate* isolate, Handle string) { + Handle result; + string = String::Flatten(isolate, string); + return String::IsOneByteRepresentationUnderneath(*string) + ? UnescapePrivate(isolate, string) + : UnescapePrivate(isolate, string); +} + +} // namespace internal +} // namespace v8 diff --git a/deps/v8/src/strings/uri.h b/deps/v8/src/strings/uri.h new file mode 100644 index 0000000000..cb159c3aeb --- /dev/null +++ b/deps/v8/src/strings/uri.h @@ -0,0 +1,55 @@ +// Copyright 2016 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_STRINGS_URI_H_ +#define V8_STRINGS_URI_H_ + +#include "src/utils/allocation.h" +#include "src/handles/maybe-handles.h" +#include "src/objects/objects.h" + +namespace v8 { +namespace internal { + +class Uri : public AllStatic { + public: + // ES6 section 18.2.6.2 decodeURI (encodedURI) + static MaybeHandle DecodeUri(Isolate* isolate, Handle uri) { + return Decode(isolate, uri, true); + } + + // ES6 section 18.2.6.3 decodeURIComponent (encodedURIComponent) + static MaybeHandle DecodeUriComponent(Isolate* isolate, + Handle component) { + return Decode(isolate, component, false); + } + + // ES6 section 18.2.6.4 encodeURI (uri) + static MaybeHandle EncodeUri(Isolate* isolate, Handle uri) { + return Encode(isolate, uri, true); + } + + // ES6 section 18.2.6.5 encodeURIComponenet (uriComponent) + static MaybeHandle EncodeUriComponent(Isolate* isolate, + Handle component) { + return Encode(isolate, component, false); + } + + // ES6 section B.2.1.1 escape (string) + static MaybeHandle Escape(Isolate* isolate, Handle string); + + // ES6 section B.2.1.2 unescape (string) + static MaybeHandle Unescape(Isolate* isolate, Handle string); + + private: + static MaybeHandle Decode(Isolate* isolate, Handle uri, + bool is_uri); + static MaybeHandle Encode(Isolate* isolate, Handle uri, + bool is_uri); +}; + +} // namespace internal +} // namespace v8 + +#endif // V8_STRINGS_URI_H_ -- cgit v1.2.3