aboutsummaryrefslogtreecommitdiff
path: root/deps/v8/src/objects/intl-objects.cc
diff options
context:
space:
mode:
Diffstat (limited to 'deps/v8/src/objects/intl-objects.cc')
-rw-r--r--deps/v8/src/objects/intl-objects.cc1561
1 files changed, 957 insertions, 604 deletions
diff --git a/deps/v8/src/objects/intl-objects.cc b/deps/v8/src/objects/intl-objects.cc
index dcacb4dd2f..dfabb29af9 100644
--- a/deps/v8/src/objects/intl-objects.cc
+++ b/deps/v8/src/objects/intl-objects.cc
@@ -7,7 +7,6 @@
#endif // V8_INTL_SUPPORT
#include "src/objects/intl-objects.h"
-#include "src/objects/intl-objects-inl.h"
#include <algorithm>
#include <memory>
@@ -17,31 +16,382 @@
#include "src/api-inl.h"
#include "src/global-handles.h"
#include "src/heap/factory.h"
-#include "src/intl.h"
#include "src/isolate.h"
#include "src/objects-inl.h"
#include "src/objects/js-collator-inl.h"
#include "src/objects/js-date-time-format-inl.h"
+#include "src/objects/js-locale-inl.h"
#include "src/objects/js-number-format-inl.h"
#include "src/objects/string.h"
#include "src/property-descriptor.h"
+#include "src/string-case.h"
+#include "unicode/basictz.h"
#include "unicode/brkiter.h"
+#include "unicode/calendar.h"
#include "unicode/coll.h"
#include "unicode/decimfmt.h"
#include "unicode/locid.h"
+#include "unicode/normalizer2.h"
#include "unicode/numfmt.h"
#include "unicode/numsys.h"
-#include "unicode/regex.h"
-#include "unicode/smpdtfmt.h"
#include "unicode/timezone.h"
-#include "unicode/ucol.h"
-#include "unicode/ures.h"
-#include "unicode/uvernum.h"
-#include "unicode/uversion.h"
+#include "unicode/ustring.h"
+#include "unicode/uvernum.h" // U_ICU_VERSION_MAJOR_NUM
+
+#define XSTR(s) STR(s)
+#define STR(s) #s
+static_assert(
+ V8_MINIMUM_ICU_VERSION <= U_ICU_VERSION_MAJOR_NUM,
+ "v8 is required to build with ICU " XSTR(V8_MINIMUM_ICU_VERSION) " and up");
+#undef STR
+#undef XSTR
namespace v8 {
namespace internal {
+namespace {
+inline bool IsASCIIUpper(uint16_t ch) { return ch >= 'A' && ch <= 'Z'; }
+
+const uint8_t kToLower[256] = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B,
+ 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23,
+ 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B,
+ 0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73,
+ 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
+ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B,
+ 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x80, 0x81, 0x82, 0x83,
+ 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B,
+ 0x9C, 0x9D, 0x9E, 0x9F, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
+ 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3,
+ 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
+ 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB,
+ 0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xD7,
+ 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3,
+ 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
+ 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB,
+ 0xFC, 0xFD, 0xFE, 0xFF,
+};
+
+inline uint16_t ToLatin1Lower(uint16_t ch) {
+ return static_cast<uint16_t>(kToLower[ch]);
+}
+
+inline uint16_t ToASCIIUpper(uint16_t ch) {
+ return ch & ~((ch >= 'a' && ch <= 'z') << 5);
+}
+
+// Does not work for U+00DF (sharp-s), U+00B5 (micron), U+00FF.
+inline uint16_t ToLatin1Upper(uint16_t ch) {
+ DCHECK(ch != 0xDF && ch != 0xB5 && ch != 0xFF);
+ return ch &
+ ~(((ch >= 'a' && ch <= 'z') || (((ch & 0xE0) == 0xE0) && ch != 0xF7))
+ << 5);
+}
+
+template <typename Char>
+bool ToUpperFastASCII(const Vector<const Char>& src,
+ Handle<SeqOneByteString> result) {
+ // Do a faster loop for the case where all the characters are ASCII.
+ uint16_t ored = 0;
+ int32_t index = 0;
+ for (auto it = src.begin(); it != src.end(); ++it) {
+ uint16_t ch = static_cast<uint16_t>(*it);
+ ored |= ch;
+ result->SeqOneByteStringSet(index++, ToASCIIUpper(ch));
+ }
+ return !(ored & ~0x7F);
+}
+
+const uint16_t sharp_s = 0xDF;
+
+template <typename Char>
+bool ToUpperOneByte(const Vector<const Char>& src, uint8_t* dest,
+ int* sharp_s_count) {
+ // Still pretty-fast path for the input with non-ASCII Latin-1 characters.
+
+ // There are two special cases.
+ // 1. U+00B5 and U+00FF are mapped to a character beyond U+00FF.
+ // 2. Lower case sharp-S converts to "SS" (two characters)
+ *sharp_s_count = 0;
+ for (auto it = src.begin(); it != src.end(); ++it) {
+ uint16_t ch = static_cast<uint16_t>(*it);
+ if (V8_UNLIKELY(ch == sharp_s)) {
+ ++(*sharp_s_count);
+ continue;
+ }
+ if (V8_UNLIKELY(ch == 0xB5 || ch == 0xFF)) {
+ // Since this upper-cased character does not fit in an 8-bit string, we
+ // need to take the 16-bit path.
+ return false;
+ }
+ *dest++ = ToLatin1Upper(ch);
+ }
+
+ return true;
+}
+
+template <typename Char>
+void ToUpperWithSharpS(const Vector<const Char>& src,
+ Handle<SeqOneByteString> result) {
+ int32_t dest_index = 0;
+ for (auto it = src.begin(); it != src.end(); ++it) {
+ uint16_t ch = static_cast<uint16_t>(*it);
+ if (ch == sharp_s) {
+ result->SeqOneByteStringSet(dest_index++, 'S');
+ result->SeqOneByteStringSet(dest_index++, 'S');
+ } else {
+ result->SeqOneByteStringSet(dest_index++, ToLatin1Upper(ch));
+ }
+ }
+}
+
+inline int FindFirstUpperOrNonAscii(String s, int length) {
+ for (int index = 0; index < length; ++index) {
+ uint16_t ch = s->Get(index);
+ if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) {
+ return index;
+ }
+ }
+ return length;
+}
+
+const UChar* GetUCharBufferFromFlat(const String::FlatContent& flat,
+ std::unique_ptr<uc16[]>* dest,
+ int32_t length) {
+ DCHECK(flat.IsFlat());
+ if (flat.IsOneByte()) {
+ if (!*dest) {
+ dest->reset(NewArray<uc16>(length));
+ CopyChars(dest->get(), flat.ToOneByteVector().start(), length);
+ }
+ return reinterpret_cast<const UChar*>(dest->get());
+ } else {
+ return reinterpret_cast<const UChar*>(flat.ToUC16Vector().start());
+ }
+}
+
+template <typename T>
+MaybeHandle<T> New(Isolate* isolate, Handle<JSFunction> constructor,
+ Handle<Object> locales, Handle<Object> options) {
+ Handle<JSObject> result;
+ ASSIGN_RETURN_ON_EXCEPTION(
+ isolate, result,
+ JSObject::New(constructor, constructor, Handle<AllocationSite>::null()),
+ T);
+ return T::Initialize(isolate, Handle<T>::cast(result), locales, options);
+}
+} // namespace
+
+const uint8_t* Intl::ToLatin1LowerTable() { return &kToLower[0]; }
+
+icu::UnicodeString Intl::ToICUUnicodeString(Isolate* isolate,
+ Handle<String> string) {
+ string = String::Flatten(isolate, string);
+ {
+ DisallowHeapAllocation no_gc;
+ std::unique_ptr<uc16[]> sap;
+ return icu::UnicodeString(
+ GetUCharBufferFromFlat(string->GetFlatContent(no_gc), &sap,
+ string->length()),
+ string->length());
+ }
+}
+
+namespace {
+MaybeHandle<String> LocaleConvertCase(Isolate* isolate, Handle<String> s,
+ bool is_to_upper, const char* lang) {
+ auto case_converter = is_to_upper ? u_strToUpper : u_strToLower;
+ int32_t src_length = s->length();
+ int32_t dest_length = src_length;
+ UErrorCode status;
+ Handle<SeqTwoByteString> result;
+ std::unique_ptr<uc16[]> sap;
+
+ if (dest_length == 0) return ReadOnlyRoots(isolate).empty_string_handle();
+
+ // This is not a real loop. It'll be executed only once (no overflow) or
+ // twice (overflow).
+ for (int i = 0; i < 2; ++i) {
+ // Case conversion can increase the string length (e.g. sharp-S => SS) so
+ // that we have to handle RangeError exceptions here.
+ ASSIGN_RETURN_ON_EXCEPTION(
+ isolate, result, isolate->factory()->NewRawTwoByteString(dest_length),
+ String);
+ DisallowHeapAllocation no_gc;
+ DCHECK(s->IsFlat());
+ String::FlatContent flat = s->GetFlatContent(no_gc);
+ const UChar* src = GetUCharBufferFromFlat(flat, &sap, src_length);
+ status = U_ZERO_ERROR;
+ dest_length =
+ case_converter(reinterpret_cast<UChar*>(result->GetChars(no_gc)),
+ dest_length, src, src_length, lang, &status);
+ if (status != U_BUFFER_OVERFLOW_ERROR) break;
+ }
+
+ // In most cases, the output will fill the destination buffer completely
+ // leading to an unterminated string (U_STRING_NOT_TERMINATED_WARNING).
+ // Only in rare cases, it'll be shorter than the destination buffer and
+ // |result| has to be truncated.
+ DCHECK(U_SUCCESS(status));
+ if (V8_LIKELY(status == U_STRING_NOT_TERMINATED_WARNING)) {
+ DCHECK(dest_length == result->length());
+ return result;
+ }
+ DCHECK(dest_length < result->length());
+ return SeqString::Truncate(result, dest_length);
+}
+
+} // namespace
+
+// A stripped-down version of ConvertToLower that can only handle flat one-byte
+// strings and does not allocate. Note that {src} could still be, e.g., a
+// one-byte sliced string with a two-byte parent string.
+// Called from TF builtins.
+String Intl::ConvertOneByteToLower(String src, String dst) {
+ DCHECK_EQ(src->length(), dst->length());
+ DCHECK(src->HasOnlyOneByteChars());
+ DCHECK(src->IsFlat());
+ DCHECK(dst->IsSeqOneByteString());
+
+ DisallowHeapAllocation no_gc;
+
+ const int length = src->length();
+ String::FlatContent src_flat = src->GetFlatContent(no_gc);
+ uint8_t* dst_data = SeqOneByteString::cast(dst)->GetChars(no_gc);
+
+ if (src_flat.IsOneByte()) {
+ const uint8_t* src_data = src_flat.ToOneByteVector().start();
+
+ bool has_changed_character = false;
+ int index_to_first_unprocessed =
+ FastAsciiConvert<true>(reinterpret_cast<char*>(dst_data),
+ reinterpret_cast<const char*>(src_data), length,
+ &has_changed_character);
+
+ if (index_to_first_unprocessed == length) {
+ return has_changed_character ? dst : src;
+ }
+
+ // If not ASCII, we keep the result up to index_to_first_unprocessed and
+ // process the rest.
+ for (int index = index_to_first_unprocessed; index < length; ++index) {
+ dst_data[index] = ToLatin1Lower(static_cast<uint16_t>(src_data[index]));
+ }
+ } else {
+ DCHECK(src_flat.IsTwoByte());
+ int index_to_first_unprocessed = FindFirstUpperOrNonAscii(src, length);
+ if (index_to_first_unprocessed == length) return src;
+
+ const uint16_t* src_data = src_flat.ToUC16Vector().start();
+ CopyChars(dst_data, src_data, index_to_first_unprocessed);
+ for (int index = index_to_first_unprocessed; index < length; ++index) {
+ dst_data[index] = ToLatin1Lower(static_cast<uint16_t>(src_data[index]));
+ }
+ }
+
+ return dst;
+}
+
+MaybeHandle<String> Intl::ConvertToLower(Isolate* isolate, Handle<String> s) {
+ if (!s->HasOnlyOneByteChars()) {
+ // Use a slower implementation for strings with characters beyond U+00FF.
+ return LocaleConvertCase(isolate, s, false, "");
+ }
+
+ int length = s->length();
+
+ // We depend here on the invariant that the length of a Latin1
+ // string is invariant under ToLowerCase, and the result always
+ // fits in the Latin1 range in the *root locale*. It does not hold
+ // for ToUpperCase even in the root locale.
+
+ // Scan the string for uppercase and non-ASCII characters for strings
+ // shorter than a machine-word without any memory allocation overhead.
+ // TODO(jshin): Apply this to a longer input by breaking FastAsciiConvert()
+ // to two parts, one for scanning the prefix with no change and the other for
+ // handling ASCII-only characters.
+
+ bool is_short = length < static_cast<int>(sizeof(uintptr_t));
+ if (is_short) {
+ bool is_lower_ascii = FindFirstUpperOrNonAscii(*s, length) == length;
+ if (is_lower_ascii) return s;
+ }
+
+ Handle<SeqOneByteString> result =
+ isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
+
+ return Handle<String>(Intl::ConvertOneByteToLower(*s, *result), isolate);
+}
+
+MaybeHandle<String> Intl::ConvertToUpper(Isolate* isolate, Handle<String> s) {
+ int32_t length = s->length();
+ if (s->HasOnlyOneByteChars() && length > 0) {
+ Handle<SeqOneByteString> result =
+ isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
+
+ DCHECK(s->IsFlat());
+ int sharp_s_count;
+ bool is_result_single_byte;
+ {
+ DisallowHeapAllocation no_gc;
+ String::FlatContent flat = s->GetFlatContent(no_gc);
+ uint8_t* dest = result->GetChars(no_gc);
+ if (flat.IsOneByte()) {
+ Vector<const uint8_t> src = flat.ToOneByteVector();
+ bool has_changed_character = false;
+ int index_to_first_unprocessed = FastAsciiConvert<false>(
+ reinterpret_cast<char*>(result->GetChars(no_gc)),
+ reinterpret_cast<const char*>(src.start()), length,
+ &has_changed_character);
+ if (index_to_first_unprocessed == length) {
+ return has_changed_character ? result : s;
+ }
+ // If not ASCII, we keep the result up to index_to_first_unprocessed and
+ // process the rest.
+ is_result_single_byte =
+ ToUpperOneByte(src.SubVector(index_to_first_unprocessed, length),
+ dest + index_to_first_unprocessed, &sharp_s_count);
+ } else {
+ DCHECK(flat.IsTwoByte());
+ Vector<const uint16_t> src = flat.ToUC16Vector();
+ if (ToUpperFastASCII(src, result)) return result;
+ is_result_single_byte = ToUpperOneByte(src, dest, &sharp_s_count);
+ }
+ }
+
+ // Go to the full Unicode path if there are characters whose uppercase
+ // is beyond the Latin-1 range (cannot be represented in OneByteString).
+ if (V8_UNLIKELY(!is_result_single_byte)) {
+ return LocaleConvertCase(isolate, s, true, "");
+ }
+
+ if (sharp_s_count == 0) return result;
+
+ // We have sharp_s_count sharp-s characters, but the result is still
+ // in the Latin-1 range.
+ ASSIGN_RETURN_ON_EXCEPTION(
+ isolate, result,
+ isolate->factory()->NewRawOneByteString(length + sharp_s_count),
+ String);
+ DisallowHeapAllocation no_gc;
+ String::FlatContent flat = s->GetFlatContent(no_gc);
+ if (flat.IsOneByte()) {
+ ToUpperWithSharpS(flat.ToOneByteVector(), result);
+ } else {
+ ToUpperWithSharpS(flat.ToUC16Vector(), result);
+ }
+
+ return result;
+ }
+
+ return LocaleConvertCase(isolate, s, true, "");
+}
+
std::string Intl::GetNumberingSystem(const icu::Locale& icu_locale) {
// Ugly hack. ICU doesn't expose numbering system in any way, so we have
// to assume that for given locale NumberingSystem constructor produces the
@@ -49,54 +399,18 @@ std::string Intl::GetNumberingSystem(const icu::Locale& icu_locale) {
UErrorCode status = U_ZERO_ERROR;
std::unique_ptr<icu::NumberingSystem> numbering_system(
icu::NumberingSystem::createInstance(icu_locale, status));
- std::string value;
- if (U_SUCCESS(status)) {
- value = numbering_system->getName();
- }
- return value;
+ if (U_SUCCESS(status)) return numbering_system->getName();
+ return "latn";
}
-MaybeHandle<JSObject> Intl::CachedOrNewService(
- Isolate* isolate, Handle<String> service, Handle<Object> locales,
- Handle<Object> options, Handle<Object> internal_options) {
- Handle<Object> result;
- Handle<Object> undefined_value(ReadOnlyRoots(isolate).undefined_value(),
- isolate);
- Handle<Object> args[] = {service, locales, options, internal_options};
- ASSIGN_RETURN_ON_EXCEPTION(
- isolate, result,
- Execution::Call(isolate, isolate->cached_or_new_service(),
- undefined_value, arraysize(args), args),
- JSArray);
- return Handle<JSObject>::cast(result);
-}
-
-icu::Locale Intl::CreateICULocale(Isolate* isolate,
- Handle<String> bcp47_locale_str) {
- v8::Isolate* v8_isolate = reinterpret_cast<v8::Isolate*>(isolate);
- v8::String::Utf8Value bcp47_locale(v8_isolate,
- v8::Utils::ToLocal(bcp47_locale_str));
- CHECK_NOT_NULL(*bcp47_locale);
-
+icu::Locale Intl::CreateICULocale(const std::string& bcp47_locale) {
DisallowHeapAllocation no_gc;
// Convert BCP47 into ICU locale format.
UErrorCode status = U_ZERO_ERROR;
- char icu_result[ULOC_FULLNAME_CAPACITY];
- int parsed_length = 0;
- // bcp47_locale_str should be a canonicalized language tag, which
- // means this shouldn't fail.
- uloc_forLanguageTag(*bcp47_locale, icu_result, ULOC_FULLNAME_CAPACITY,
- &parsed_length, &status);
+ icu::Locale icu_locale = icu::Locale::forLanguageTag(bcp47_locale, status);
CHECK(U_SUCCESS(status));
-
- // bcp47_locale is already checked for its structural validity
- // so that it should be parsed completely.
- int bcp47length = bcp47_locale.length();
- CHECK_EQ(bcp47length, parsed_length);
-
- icu::Locale icu_locale(icu_result);
if (icu_locale.isBogus()) {
FATAL("Failed to create ICU locale, are ICU data files missing?");
}
@@ -184,62 +498,16 @@ bool RemoveLocaleScriptTag(const std::string& icu_locale,
} // namespace
-std::set<std::string> Intl::GetAvailableLocales(const ICUService service) {
- const icu::Locale* icu_available_locales = nullptr;
- int32_t count = 0;
+std::set<std::string> Intl::BuildLocaleSet(
+ const icu::Locale* icu_available_locales, int32_t count) {
std::set<std::string> locales;
-
- switch (service) {
- case ICUService::kBreakIterator:
- case ICUService::kSegmenter:
- icu_available_locales = icu::BreakIterator::getAvailableLocales(count);
- break;
- case ICUService::kCollator:
- icu_available_locales = icu::Collator::getAvailableLocales(count);
- break;
- case ICUService::kRelativeDateTimeFormatter:
- case ICUService::kDateFormat:
- icu_available_locales = icu::DateFormat::getAvailableLocales(count);
- break;
- case ICUService::kNumberFormat:
- icu_available_locales = icu::NumberFormat::getAvailableLocales(count);
- break;
- case ICUService::kPluralRules:
- // TODO(littledan): For PluralRules, filter out locales that
- // don't support PluralRules.
- // PluralRules is missing an appropriate getAvailableLocales method,
- // so we should filter from all locales, but it's not clear how; see
- // https://ssl.icu-project.org/trac/ticket/12756
- icu_available_locales = icu::Locale::getAvailableLocales(count);
- break;
- case ICUService::kListFormatter: {
- // TODO(ftang): for now just use
- // icu::Locale::getAvailableLocales(count) until we migrate to
- // Intl::GetAvailableLocales().
- // ICU FR at https://unicode-org.atlassian.net/browse/ICU-20015
- icu_available_locales = icu::Locale::getAvailableLocales(count);
- break;
- }
- }
-
- UErrorCode error = U_ZERO_ERROR;
- char result[ULOC_FULLNAME_CAPACITY];
-
for (int32_t i = 0; i < count; ++i) {
- const char* icu_name = icu_available_locales[i].getName();
-
- error = U_ZERO_ERROR;
- // No need to force strict BCP47 rules.
- uloc_toLanguageTag(icu_name, result, ULOC_FULLNAME_CAPACITY, FALSE, &error);
- if (U_FAILURE(error) || error == U_STRING_NOT_TERMINATED_WARNING) {
- // This shouldn't happen, but lets not break the user.
- continue;
- }
- std::string locale(result);
+ std::string locale =
+ Intl::ToLanguageTag(icu_available_locales[i]).FromJust();
locales.insert(locale);
std::string shortened_locale;
- if (RemoveLocaleScriptTag(icu_name, &shortened_locale)) {
+ if (RemoveLocaleScriptTag(locale, &shortened_locale)) {
std::replace(shortened_locale.begin(), shortened_locale.end(), '_', '-');
locales.insert(shortened_locale);
}
@@ -248,75 +516,39 @@ std::set<std::string> Intl::GetAvailableLocales(const ICUService service) {
return locales;
}
-namespace {
-
-// TODO(gsathya): Remove this once we port ResolveLocale to C++.
-ICUService StringToICUService(Handle<String> service) {
- std::unique_ptr<char[]> service_cstr = service->ToCString();
- if (strcmp(service_cstr.get(), "collator") == 0) {
- return ICUService::kCollator;
- } else if (strcmp(service_cstr.get(), "numberformat") == 0) {
- return ICUService::kNumberFormat;
- } else if (strcmp(service_cstr.get(), "dateformat") == 0) {
- return ICUService::kDateFormat;
- } else if (strcmp(service_cstr.get(), "breakiterator") == 0) {
- return ICUService::kBreakIterator;
- } else if (strcmp(service_cstr.get(), "pluralrules") == 0) {
- return ICUService::kPluralRules;
- } else if (strcmp(service_cstr.get(), "relativetimeformat") == 0) {
- return ICUService::kRelativeDateTimeFormatter;
- } else if (strcmp(service_cstr.get(), "listformat") == 0) {
- return ICUService::kListFormatter;
- } else if (service->IsUtf8EqualTo(CStrVector("segmenter"))) {
- return ICUService::kSegmenter;
- }
- UNREACHABLE();
-}
-
-const char* ICUServiceToString(ICUService service) {
- switch (service) {
- case ICUService::kCollator:
- return "Intl.Collator";
- case ICUService::kNumberFormat:
- return "Intl.NumberFormat";
- case ICUService::kDateFormat:
- return "Intl.DateFormat";
- case ICUService::kBreakIterator:
- return "Intl.v8BreakIterator";
- case ICUService::kPluralRules:
- return "Intl.PluralRules";
- case ICUService::kRelativeDateTimeFormatter:
- return "Intl.RelativeTimeFormat";
- case ICUService::kListFormatter:
- return "Intl.kListFormat";
- case ICUService::kSegmenter:
- return "Intl.kSegmenter";
- }
- UNREACHABLE();
-}
-
-} // namespace
-
-V8_WARN_UNUSED_RESULT MaybeHandle<JSObject> Intl::AvailableLocalesOf(
- Isolate* isolate, Handle<String> service) {
- Factory* factory = isolate->factory();
- std::set<std::string> results =
- Intl::GetAvailableLocales(StringToICUService(service));
- Handle<JSObject> locales = factory->NewJSObjectWithNullProto();
+Maybe<std::string> Intl::ToLanguageTag(const icu::Locale& locale) {
+ UErrorCode status = U_ZERO_ERROR;
+ std::string res = locale.toLanguageTag<std::string>(status);
+ if (U_FAILURE(status)) {
+ return Nothing<std::string>();
+ }
+ CHECK(U_SUCCESS(status));
- int32_t i = 0;
- for (auto iter = results.begin(); iter != results.end(); ++iter) {
- RETURN_ON_EXCEPTION(
- isolate,
- JSObject::SetOwnPropertyIgnoreAttributes(
- locales, factory->NewStringFromAsciiChecked(iter->c_str()),
- factory->NewNumber(i++), NONE),
- JSObject);
+ // Hack to remove -true and -yes from unicode extensions
+ // Address https://crbug.com/v8/8565
+ // TODO(ftang): Move the following "remove true" logic into ICU toLanguageTag
+ // by fixing ICU-20310.
+ size_t u_ext_start = res.find("-u-");
+ if (u_ext_start != std::string::npos) {
+ // remove "-true" and "-yes" after -u-
+ const std::vector<std::string> remove_items({"-true", "-yes"});
+ for (auto item = remove_items.begin(); item != remove_items.end(); item++) {
+ for (size_t sep_remove =
+ res.find(*item, u_ext_start + 5 /* strlen("-u-xx") == 5 */);
+ sep_remove != std::string::npos; sep_remove = res.find(*item)) {
+ size_t end_of_sep_remove = sep_remove + item->length();
+ if (res.length() == end_of_sep_remove ||
+ res.at(end_of_sep_remove) == '-') {
+ res.erase(sep_remove, item->length());
+ }
+ }
+ }
}
- return locales;
+ return Just(res);
}
-std::string Intl::DefaultLocale(Isolate* isolate) {
+namespace {
+std::string DefaultLocale(Isolate* isolate) {
if (isolate->default_locale().empty()) {
icu::Locale default_locale;
// Translate ICU's fallback locale to a well-known locale.
@@ -324,32 +556,16 @@ std::string Intl::DefaultLocale(Isolate* isolate) {
isolate->set_default_locale("en-US");
} else {
// Set the locale
- char result[ULOC_FULLNAME_CAPACITY];
- UErrorCode status = U_ZERO_ERROR;
- int32_t length =
- uloc_toLanguageTag(default_locale.getName(), result,
- ULOC_FULLNAME_CAPACITY, FALSE, &status);
isolate->set_default_locale(
- U_SUCCESS(status) ? std::string(result, length) : "und");
+ default_locale.isBogus()
+ ? "und"
+ : Intl::ToLanguageTag(default_locale).FromJust());
}
DCHECK(!isolate->default_locale().empty());
}
return isolate->default_locale();
}
-
-bool Intl::IsObjectOfType(Isolate* isolate, Handle<Object> input,
- Intl::Type expected_type) {
- if (!input->IsJSObject()) return false;
- Handle<JSObject> obj = Handle<JSObject>::cast(input);
-
- Handle<Symbol> marker = isolate->factory()->intl_initialized_marker_symbol();
- Handle<Object> tag = JSReceiver::GetDataProperty(obj, marker);
-
- if (!tag->IsSmi()) return false;
-
- Intl::Type type = Intl::TypeFromSmi(Smi::cast(*tag));
- return type == expected_type;
-}
+} // namespace
// See ecma402/#legacy-constructor.
MaybeHandle<Object> Intl::LegacyUnwrapReceiver(Isolate* isolate,
@@ -378,161 +594,6 @@ MaybeHandle<Object> Intl::LegacyUnwrapReceiver(Isolate* isolate,
return receiver;
}
-namespace {
-
-#if USE_CHROMIUM_ICU == 0 && U_ICU_VERSION_MAJOR_NUM < 63
-// Define general regexp macros.
-// Note "(?:" means the regexp group a non-capture group.
-#define REGEX_ALPHA "[a-z]"
-#define REGEX_DIGIT "[0-9]"
-#define REGEX_ALPHANUM "(?:" REGEX_ALPHA "|" REGEX_DIGIT ")"
-
-void BuildLanguageTagRegexps(Isolate* isolate) {
-// Define the language tag regexp macros.
-// For info on BCP 47 see https://tools.ietf.org/html/bcp47 .
-// Because language tags are case insensitive per BCP 47 2.1.1 and regexp's
-// defined below will always be used after lowercasing the input, uppercase
-// ranges in BCP 47 2.1 are dropped and grandfathered tags are all lowercased.
-// clang-format off
-#define BCP47_REGULAR \
- "(?:art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|" \
- "zh-min|zh-min-nan|zh-xiang)"
-#define BCP47_IRREGULAR \
- "(?:en-gb-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|" \
- "i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|" \
- "i-tsu|sgn-be-fr|sgn-be-nl|sgn-ch-de)"
-#define BCP47_GRANDFATHERED "(?:" BCP47_IRREGULAR "|" BCP47_REGULAR ")"
-#define BCP47_PRIVATE_USE "(?:x(?:-" REGEX_ALPHANUM "{1,8})+)"
-
-#define BCP47_SINGLETON "(?:" REGEX_DIGIT "|" "[a-wy-z])"
-
-#define BCP47_EXTENSION "(?:" BCP47_SINGLETON "(?:-" REGEX_ALPHANUM "{2,8})+)"
-#define BCP47_VARIANT \
- "(?:" REGEX_ALPHANUM "{5,8}" "|" "(?:" REGEX_DIGIT REGEX_ALPHANUM "{3}))"
-
-#define BCP47_REGION "(?:" REGEX_ALPHA "{2}" "|" REGEX_DIGIT "{3})"
-#define BCP47_SCRIPT "(?:" REGEX_ALPHA "{4})"
-#define BCP47_EXT_LANG "(?:" REGEX_ALPHA "{3}(?:-" REGEX_ALPHA "{3}){0,2})"
-#define BCP47_LANGUAGE "(?:" REGEX_ALPHA "{2,3}(?:-" BCP47_EXT_LANG ")?" \
- "|" REGEX_ALPHA "{4}" "|" REGEX_ALPHA "{5,8})"
-#define BCP47_LANG_TAG \
- BCP47_LANGUAGE \
- "(?:-" BCP47_SCRIPT ")?" \
- "(?:-" BCP47_REGION ")?" \
- "(?:-" BCP47_VARIANT ")*" \
- "(?:-" BCP47_EXTENSION ")*" \
- "(?:-" BCP47_PRIVATE_USE ")?"
- // clang-format on
-
- constexpr char kLanguageTagSingletonRegexp[] = "^" BCP47_SINGLETON "$";
- constexpr char kLanguageTagVariantRegexp[] = "^" BCP47_VARIANT "$";
- constexpr char kLanguageTagRegexp[] =
- "^(?:" BCP47_LANG_TAG "|" BCP47_PRIVATE_USE "|" BCP47_GRANDFATHERED ")$";
-
- UErrorCode status = U_ZERO_ERROR;
- icu::RegexMatcher* language_singleton_regexp_matcher = new icu::RegexMatcher(
- icu::UnicodeString(kLanguageTagSingletonRegexp, -1, US_INV), 0, status);
- icu::RegexMatcher* language_tag_regexp_matcher = new icu::RegexMatcher(
- icu::UnicodeString(kLanguageTagRegexp, -1, US_INV), 0, status);
- icu::RegexMatcher* language_variant_regexp_matcher = new icu::RegexMatcher(
- icu::UnicodeString(kLanguageTagVariantRegexp, -1, US_INV), 0, status);
- CHECK(U_SUCCESS(status));
-
- isolate->set_language_tag_regexp_matchers(language_singleton_regexp_matcher,
- language_tag_regexp_matcher,
- language_variant_regexp_matcher);
-// Undefine the language tag regexp macros.
-#undef BCP47_EXTENSION
-#undef BCP47_EXT_LANG
-#undef BCP47_GRANDFATHERED
-#undef BCP47_IRREGULAR
-#undef BCP47_LANG_TAG
-#undef BCP47_LANGUAGE
-#undef BCP47_PRIVATE_USE
-#undef BCP47_REGION
-#undef BCP47_REGULAR
-#undef BCP47_SCRIPT
-#undef BCP47_SINGLETON
-#undef BCP47_VARIANT
-}
-
-// Undefine the general regexp macros.
-#undef REGEX_ALPHA
-#undef REGEX_DIGIT
-#undef REGEX_ALPHANUM
-
-icu::RegexMatcher* GetLanguageSingletonRegexMatcher(Isolate* isolate) {
- icu::RegexMatcher* language_singleton_regexp_matcher =
- isolate->language_singleton_regexp_matcher();
- if (language_singleton_regexp_matcher == nullptr) {
- BuildLanguageTagRegexps(isolate);
- language_singleton_regexp_matcher =
- isolate->language_singleton_regexp_matcher();
- }
- return language_singleton_regexp_matcher;
-}
-
-icu::RegexMatcher* GetLanguageTagRegexMatcher(Isolate* isolate) {
- icu::RegexMatcher* language_tag_regexp_matcher =
- isolate->language_tag_regexp_matcher();
- if (language_tag_regexp_matcher == nullptr) {
- BuildLanguageTagRegexps(isolate);
- language_tag_regexp_matcher = isolate->language_tag_regexp_matcher();
- }
- return language_tag_regexp_matcher;
-}
-
-icu::RegexMatcher* GetLanguageVariantRegexMatcher(Isolate* isolate) {
- icu::RegexMatcher* language_variant_regexp_matcher =
- isolate->language_variant_regexp_matcher();
- if (language_variant_regexp_matcher == nullptr) {
- BuildLanguageTagRegexps(isolate);
- language_variant_regexp_matcher =
- isolate->language_variant_regexp_matcher();
- }
- return language_variant_regexp_matcher;
-}
-#endif // USE_CHROMIUM_ICU == 0 && U_ICU_VERSION_MAJOR_NUM < 63
-
-} // anonymous namespace
-
-MaybeHandle<JSObject> Intl::ResolveLocale(Isolate* isolate, const char* service,
- Handle<Object> requestedLocales,
- Handle<Object> options) {
- Handle<String> service_str =
- isolate->factory()->NewStringFromAsciiChecked(service);
-
- Handle<JSFunction> resolve_locale_function = isolate->resolve_locale();
-
- Handle<Object> result;
- Handle<Object> undefined_value = isolate->factory()->undefined_value();
- Handle<Object> args[] = {service_str, requestedLocales, options};
- ASSIGN_RETURN_ON_EXCEPTION(
- isolate, result,
- Execution::Call(isolate, resolve_locale_function, undefined_value,
- arraysize(args), args),
- JSObject);
-
- return Handle<JSObject>::cast(result);
-}
-
-MaybeHandle<JSObject> Intl::CanonicalizeLocaleListJS(Isolate* isolate,
- Handle<Object> locales) {
- Handle<JSFunction> canonicalize_locale_list_function =
- isolate->canonicalize_locale_list();
-
- Handle<Object> result;
- Handle<Object> undefined_value = isolate->factory()->undefined_value();
- Handle<Object> args[] = {locales};
- ASSIGN_RETURN_ON_EXCEPTION(
- isolate, result,
- Execution::Call(isolate, canonicalize_locale_list_function,
- undefined_value, arraysize(args), args),
- JSObject);
-
- return Handle<JSObject>::cast(result);
-}
-
Maybe<bool> Intl::GetStringOption(Isolate* isolate, Handle<JSReceiver> options,
const char* property,
std::vector<const char*> values,
@@ -618,111 +679,6 @@ char AsciiToLower(char c) {
return c | (1 << 5);
}
-#if USE_CHROMIUM_ICU == 0 && U_ICU_VERSION_MAJOR_NUM < 63
-/**
- * Check the structural Validity of the language tag per ECMA 402 6.2.2:
- * - Well-formed per RFC 5646 2.1
- * - There are no duplicate variant subtags
- * - There are no duplicate singleton (extension) subtags
- *
- * One extra-check is done (from RFC 5646 2.2.9): the tag is compared
- * against the list of grandfathered tags. However, subtags for
- * primary/extended language, script, region, variant are not checked
- * against the IANA language subtag registry.
- *
- * ICU 62 or earlier is too permissible and lets invalid tags, like
- * hant-cmn-cn, through.
- *
- * Returns false if the language tag is invalid.
- */
-bool IsStructurallyValidLanguageTag(Isolate* isolate,
- const std::string& locale_in) {
- if (!String::IsAscii(locale_in.c_str(),
- static_cast<int>(locale_in.length()))) {
- return false;
- }
- std::string locale(locale_in);
- icu::RegexMatcher* language_tag_regexp_matcher =
- GetLanguageTagRegexMatcher(isolate);
-
- // Check if it's well-formed, including grandfathered tags.
- icu::UnicodeString locale_uni(locale.c_str(), -1, US_INV);
- // Note: icu::RegexMatcher::reset does not make a copy of the input string
- // so cannot use a temp value; ie: cannot create it as a call parameter.
- language_tag_regexp_matcher->reset(locale_uni);
- UErrorCode status = U_ZERO_ERROR;
- bool is_valid_lang_tag = language_tag_regexp_matcher->matches(status);
- if (!is_valid_lang_tag || V8_UNLIKELY(U_FAILURE(status))) {
- return false;
- }
-
- // Just return if it's a x- form. It's all private.
- if (locale.find("x-") == 0) {
- return true;
- }
-
- // Check if there are any duplicate variants or singletons (extensions).
-
- // Remove private use section.
- locale = locale.substr(0, locale.find("-x-"));
-
- // Skip language since it can match variant regex, so we start from 1.
- // We are matching i-klingon here, but that's ok, since i-klingon-klingon
- // is not valid and would fail LANGUAGE_TAG_RE test.
- size_t pos = 0;
- std::vector<std::string> parts;
- while ((pos = locale.find('-')) != std::string::npos) {
- std::string token = locale.substr(0, pos);
- parts.push_back(token);
- locale = locale.substr(pos + 1);
- }
- if (locale.length() != 0) {
- parts.push_back(locale);
- }
-
- icu::RegexMatcher* language_variant_regexp_matcher =
- GetLanguageVariantRegexMatcher(isolate);
-
- icu::RegexMatcher* language_singleton_regexp_matcher =
- GetLanguageSingletonRegexMatcher(isolate);
-
- std::vector<std::string> variants;
- std::vector<std::string> extensions;
- for (auto it = parts.begin() + 1; it != parts.end(); it++) {
- icu::UnicodeString part(it->data(), -1, US_INV);
- language_variant_regexp_matcher->reset(part);
- bool is_language_variant = language_variant_regexp_matcher->matches(status);
- if (V8_UNLIKELY(U_FAILURE(status))) {
- return false;
- }
- if (is_language_variant && extensions.size() == 0) {
- if (std::find(variants.begin(), variants.end(), *it) == variants.end()) {
- variants.push_back(*it);
- } else {
- return false;
- }
- }
-
- language_singleton_regexp_matcher->reset(part);
- bool is_language_singleton =
- language_singleton_regexp_matcher->matches(status);
- if (V8_UNLIKELY(U_FAILURE(status))) {
- return false;
- }
- if (is_language_singleton) {
- if (std::find(extensions.begin(), extensions.end(), *it) ==
- extensions.end()) {
- extensions.push_back(*it);
- } else {
- return false;
- }
- }
- }
-
- return true;
-}
-#endif // USE_CHROMIUM_ICU == 0 || U_ICU_VERSION_MAJOR_NUM < 63
-
bool IsLowerAscii(char c) { return c >= 'a' && c < 'z'; }
bool IsTwoLetterLanguage(const std::string& locale) {
@@ -795,15 +751,6 @@ Maybe<std::string> Intl::CanonicalizeLanguageTag(Isolate* isolate,
// the input before any more check.
std::transform(locale.begin(), locale.end(), locale.begin(), AsciiToLower);
-#if USE_CHROMIUM_ICU == 0 && U_ICU_VERSION_MAJOR_NUM < 63
- if (!IsStructurallyValidLanguageTag(isolate, locale)) {
- THROW_NEW_ERROR_RETURN_VALUE(
- isolate,
- NewRangeError(MessageTemplate::kInvalidLanguageTag, locale_str),
- Nothing<std::string>());
- }
-#endif
-
// ICU maps a few grandfathered tags to what looks like a regular language
// tag even though IANA language tag registry does not have a preferred
// entry map for them. Return them as they're with lowercasing.
@@ -819,38 +766,26 @@ Maybe<std::string> Intl::CanonicalizeLanguageTag(Isolate* isolate,
// handle long locale names better. See
// https://unicode-org.atlassian.net/browse/ICU-13417
UErrorCode error = U_ZERO_ERROR;
- char icu_result[ULOC_FULLNAME_CAPACITY];
// uloc_forLanguageTag checks the structrual validity. If the input BCP47
// language tag is parsed all the way to the end, it indicates that the input
// is structurally valid. Due to a couple of bugs, we can't use it
// without Chromium patches or ICU 62 or earlier.
- int parsed_length;
- uloc_forLanguageTag(locale.c_str(), icu_result, ULOC_FULLNAME_CAPACITY,
- &parsed_length, &error);
- if (U_FAILURE(error) ||
-#if USE_CHROMIUM_ICU == 1 || U_ICU_VERSION_MAJOR_NUM >= 63
- static_cast<size_t>(parsed_length) < locale.length() ||
-#endif
- error == U_STRING_NOT_TERMINATED_WARNING) {
+ icu::Locale icu_locale = icu::Locale::forLanguageTag(locale.c_str(), error);
+ if (U_FAILURE(error) || icu_locale.isBogus()) {
THROW_NEW_ERROR_RETURN_VALUE(
isolate,
NewRangeError(MessageTemplate::kInvalidLanguageTag, locale_str),
Nothing<std::string>());
}
-
- // Force strict BCP47 rules.
- char result[ULOC_FULLNAME_CAPACITY];
- int32_t result_len = uloc_toLanguageTag(icu_result, result,
- ULOC_FULLNAME_CAPACITY, TRUE, &error);
-
- if (U_FAILURE(error)) {
+ Maybe<std::string> maybe_to_language_tag = Intl::ToLanguageTag(icu_locale);
+ if (maybe_to_language_tag.IsNothing()) {
THROW_NEW_ERROR_RETURN_VALUE(
isolate,
NewRangeError(MessageTemplate::kInvalidLanguageTag, locale_str),
Nothing<std::string>());
}
- return Just(std::string(result, result_len));
+ return Intl::ToLanguageTag(icu_locale);
}
Maybe<std::vector<std::string>> Intl::CanonicalizeLocaleList(
@@ -862,7 +797,15 @@ Maybe<std::vector<std::string>> Intl::CanonicalizeLocaleList(
}
// 2. Let seen be a new empty List.
std::vector<std::string> seen;
- // 3. If Type(locales) is String, then
+ // 3. If Type(locales) is String or locales has an [[InitializedLocale]]
+ // internal slot, then
+ if (locales->IsJSLocale()) {
+ // Since this value came from JSLocale, which is already went though the
+ // CanonializeLanguageTag process once, therefore there are no need to
+ // call CanonializeLanguageTag again.
+ seen.push_back(JSLocale::ToString(Handle<JSLocale>::cast(locales)));
+ return Just(seen);
+ }
if (locales->IsString()) {
// 3a. Let O be CreateArrayFromList(« locales »).
// Instead of creating a one-element array and then iterating over it,
@@ -898,21 +841,31 @@ Maybe<std::vector<std::string>> Intl::CanonicalizeLocaleList(
// 7a. Let Pk be ToString(k).
// 7b. Let kPresent be ? HasProperty(O, Pk).
LookupIterator it(isolate, o, k);
+ Maybe<bool> maybe_found = JSReceiver::HasProperty(&it);
+ MAYBE_RETURN(maybe_found, Nothing<std::vector<std::string>>());
// 7c. If kPresent is true, then
- if (!it.IsFound()) continue;
+ if (!maybe_found.FromJust()) continue;
// 7c i. Let kValue be ? Get(O, Pk).
Handle<Object> k_value;
ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, k_value, Object::GetProperty(&it),
Nothing<std::vector<std::string>>());
// 7c ii. If Type(kValue) is not String or Object, throw a TypeError
// exception.
- // 7c iii. Let tag be ? ToString(kValue).
- // 7c iv. If IsStructurallyValidLanguageTag(tag) is false, throw a
- // RangeError exception.
- // 7c v. Let canonicalizedTag be CanonicalizeLanguageTag(tag).
+ // 7c iii. If Type(kValue) is Object and kValue has an [[InitializedLocale]]
+ // internal slot, then
std::string canonicalized_tag;
- if (!CanonicalizeLanguageTag(isolate, k_value).To(&canonicalized_tag)) {
- return Nothing<std::vector<std::string>>();
+ if (k_value->IsJSLocale()) {
+ // 7c iii. 1. Let tag be kValue.[[Locale]].
+ canonicalized_tag = JSLocale::ToString(Handle<JSLocale>::cast(k_value));
+ // 7c iv. Else,
+ } else {
+ // 7c iv 1. Let tag be ? ToString(kValue).
+ // 7c v. If IsStructurallyValidLanguageTag(tag) is false, throw a
+ // RangeError exception.
+ // 7c vi. Let canonicalizedTag be CanonicalizeLanguageTag(tag).
+ if (!CanonicalizeLanguageTag(isolate, k_value).To(&canonicalized_tag)) {
+ return Nothing<std::vector<std::string>>();
+ }
}
// 7c vi. If canonicalizedTag is not an element of seen, append
// canonicalizedTag as the last element of seen.
@@ -938,7 +891,7 @@ MaybeHandle<String> Intl::StringLocaleConvertCase(Isolate* isolate,
return MaybeHandle<String>();
}
std::string requested_locale = requested_locales.size() == 0
- ? Intl::DefaultLocale(isolate)
+ ? DefaultLocale(isolate)
: requested_locales[0];
size_t dash = requested_locale.find('-');
if (dash != std::string::npos) {
@@ -955,7 +908,10 @@ MaybeHandle<String> Intl::StringLocaleConvertCase(Isolate* isolate,
// tags (x-foo) or grandfathered irregular tags (e.g. i-enochian) would have
// only 'x' or 'i' when they get here.
if (V8_UNLIKELY(requested_locale.length() != 2)) {
- return ConvertCase(s, to_upper, isolate);
+ if (to_upper) {
+ return ConvertToUpper(isolate, s);
+ }
+ return ConvertToLower(isolate, s);
}
// TODO(jshin): Consider adding a fast path for ASCII or Latin-1. The fastpath
// in the root locale needs to be adjusted for az, lt and tr because even case
@@ -963,9 +919,12 @@ MaybeHandle<String> Intl::StringLocaleConvertCase(Isolate* isolate,
// Greek (el) does not require any adjustment.
if (V8_UNLIKELY((requested_locale == "tr") || (requested_locale == "el") ||
(requested_locale == "lt") || (requested_locale == "az"))) {
- return LocaleConvertCase(s, isolate, to_upper, requested_locale.c_str());
+ return LocaleConvertCase(isolate, s, to_upper, requested_locale.c_str());
} else {
- return ConvertCase(s, to_upper, isolate);
+ if (to_upper) {
+ return ConvertToUpper(isolate, s);
+ }
+ return ConvertToLower(isolate, s);
}
}
@@ -974,46 +933,57 @@ MaybeHandle<Object> Intl::StringLocaleCompare(Isolate* isolate,
Handle<String> string2,
Handle<Object> locales,
Handle<Object> options) {
- Factory* factory = isolate->factory();
- Handle<JSObject> collator;
+ // We only cache the instance when both locales and options are undefined,
+ // as that is the only case when the specified side-effects of examining
+ // those arguments are unobservable.
+ bool can_cache =
+ locales->IsUndefined(isolate) && options->IsUndefined(isolate);
+ if (can_cache) {
+ // Both locales and options are undefined, check the cache.
+ icu::Collator* cached_icu_collator =
+ static_cast<icu::Collator*>(isolate->get_cached_icu_object(
+ Isolate::ICUObjectCacheType::kDefaultCollator));
+ // We may use the cached icu::Collator for a fast path.
+ if (cached_icu_collator != nullptr) {
+ return Intl::CompareStrings(isolate, *cached_icu_collator, string1,
+ string2);
+ }
+ }
+
+ Handle<JSFunction> constructor = Handle<JSFunction>(
+ JSFunction::cast(
+ isolate->context()->native_context()->intl_collator_function()),
+ isolate);
+
+ Handle<JSCollator> collator;
ASSIGN_RETURN_ON_EXCEPTION(
isolate, collator,
- CachedOrNewService(isolate, factory->NewStringFromStaticChars("collator"),
- locales, options, factory->undefined_value()),
- Object);
- CHECK(collator->IsJSCollator());
- return Intl::CompareStrings(isolate, Handle<JSCollator>::cast(collator),
- string1, string2);
+ New<JSCollator>(isolate, constructor, locales, options), Object);
+ if (can_cache) {
+ isolate->set_icu_object_in_cache(
+ Isolate::ICUObjectCacheType::kDefaultCollator,
+ std::static_pointer_cast<icu::UObject>(
+ collator->icu_collator()->get()));
+ }
+ icu::Collator* icu_collator = collator->icu_collator()->raw();
+ return Intl::CompareStrings(isolate, *icu_collator, string1, string2);
}
// ecma402/#sec-collator-comparestrings
Handle<Object> Intl::CompareStrings(Isolate* isolate,
- Handle<JSCollator> collator,
+ const icu::Collator& icu_collator,
Handle<String> string1,
Handle<String> string2) {
Factory* factory = isolate->factory();
- icu::Collator* icu_collator = collator->icu_collator()->raw();
- CHECK_NOT_NULL(icu_collator);
string1 = String::Flatten(isolate, string1);
string2 = String::Flatten(isolate, string2);
UCollationResult result;
UErrorCode status = U_ZERO_ERROR;
- {
- DisallowHeapAllocation no_gc;
- int32_t length1 = string1->length();
- int32_t length2 = string2->length();
- String::FlatContent flat1 = string1->GetFlatContent();
- String::FlatContent flat2 = string2->GetFlatContent();
- std::unique_ptr<uc16[]> sap1;
- std::unique_ptr<uc16[]> sap2;
- icu::UnicodeString string_val1(
- FALSE, GetUCharBufferFromFlat(flat1, &sap1, length1), length1);
- icu::UnicodeString string_val2(
- FALSE, GetUCharBufferFromFlat(flat2, &sap2, length2), length2);
- result = icu_collator->compare(string_val1, string_val2, status);
- }
+ icu::UnicodeString string_val1 = Intl::ToICUUnicodeString(isolate, string1);
+ icu::UnicodeString string_val2 = Intl::ToICUUnicodeString(isolate, string2);
+ result = icu_collator.compare(string_val1, string_val2, status);
DCHECK(U_SUCCESS(status));
return factory->NewNumberFromInt(result);
@@ -1024,28 +994,49 @@ MaybeHandle<String> Intl::NumberToLocaleString(Isolate* isolate,
Handle<Object> num,
Handle<Object> locales,
Handle<Object> options) {
- Factory* factory = isolate->factory();
- Handle<JSObject> number_format_holder;
- // 2. Let numberFormat be ? Construct(%NumberFormat%, « locales, options »).
- ASSIGN_RETURN_ON_EXCEPTION(
- isolate, number_format_holder,
- CachedOrNewService(isolate,
- factory->NewStringFromStaticChars("numberformat"),
- locales, options, factory->undefined_value()),
- String);
- DCHECK(number_format_holder->IsJSNumberFormat());
- Handle<JSNumberFormat> number_format = Handle<JSNumberFormat>(
- JSNumberFormat::cast(*number_format_holder), isolate);
-
Handle<Object> number_obj;
ASSIGN_RETURN_ON_EXCEPTION(isolate, number_obj,
Object::ToNumber(isolate, num), String);
- // Spec treats -0 and +0 as 0.
- double number = number_obj->Number() + 0;
+ double number = number_obj->Number();
+
+ // We only cache the instance when both locales and options are undefined,
+ // as that is the only case when the specified side-effects of examining
+ // those arguments are unobservable.
+ bool can_cache =
+ locales->IsUndefined(isolate) && options->IsUndefined(isolate);
+ if (can_cache) {
+ icu::NumberFormat* cached_number_format =
+ static_cast<icu::NumberFormat*>(isolate->get_cached_icu_object(
+ Isolate::ICUObjectCacheType::kDefaultNumberFormat));
+ // We may use the cached icu::NumberFormat for a fast path.
+ if (cached_number_format != nullptr) {
+ return JSNumberFormat::FormatNumber(isolate, *cached_number_format,
+ number);
+ }
+ }
+
+ Handle<JSFunction> constructor = Handle<JSFunction>(
+ JSFunction::cast(
+ isolate->context()->native_context()->intl_number_format_function()),
+ isolate);
+ Handle<JSNumberFormat> number_format;
+ // 2. Let numberFormat be ? Construct(%NumberFormat%, « locales, options »).
+ ASSIGN_RETURN_ON_EXCEPTION(
+ isolate, number_format,
+ New<JSNumberFormat>(isolate, constructor, locales, options), String);
+
+ if (can_cache) {
+ isolate->set_icu_object_in_cache(
+ Isolate::ICUObjectCacheType::kDefaultNumberFormat,
+ std::static_pointer_cast<icu::UObject>(
+ number_format->icu_number_format()->get()));
+ }
// Return FormatNumber(numberFormat, x).
- return JSNumberFormat::FormatNumber(isolate, number_format, number);
+ icu::NumberFormat* icu_number_format =
+ number_format->icu_number_format()->raw();
+ return JSNumberFormat::FormatNumber(isolate, *icu_number_format, number);
}
namespace {
@@ -1232,10 +1223,19 @@ std::string BestAvailableLocale(const std::set<std::string>& available_locales,
}
}
-// Removes unicode extensions from a given bcp47 language tag.
-// For example, converts 'en-US-u-co-emoji' to 'en-US'.
-std::string RemoveUnicodeExtensions(const std::string& locale) {
+struct ParsedLocale {
+ std::string no_extensions_locale;
+ std::string extension;
+};
+
+// Returns a struct containing a bcp47 tag without unicode extensions
+// and the removed unicode extensions.
+//
+// For example, given 'en-US-u-co-emoji' returns 'en-US' and
+// 'u-co-emoji'.
+ParsedLocale ParseBCP47Locale(const std::string& locale) {
size_t length = locale.length();
+ ParsedLocale parsed_locale;
// Privateuse or grandfathered locales have no extension sequences.
if ((length > 1) && (locale[1] == '-')) {
@@ -1243,20 +1243,25 @@ std::string RemoveUnicodeExtensions(const std::string& locale) {
// privateuse extension. ICU can sometimes mess up the
// canonicalization.
CHECK(locale[0] == 'x' || locale[0] == 'i');
- return locale;
+ parsed_locale.no_extensions_locale = locale;
+ return parsed_locale;
}
size_t unicode_extension_start = locale.find("-u-");
// No unicode extensions found.
- if (unicode_extension_start == std::string::npos) return locale;
+ if (unicode_extension_start == std::string::npos) {
+ parsed_locale.no_extensions_locale = locale;
+ return parsed_locale;
+ }
size_t private_extension_start = locale.find("-x-");
// Unicode extensions found within privateuse subtags don't count.
if (private_extension_start != std::string::npos &&
private_extension_start < unicode_extension_start) {
- return locale;
+ parsed_locale.no_extensions_locale = locale;
+ return parsed_locale;
}
const std::string beginning = locale.substr(0, unicode_extension_start);
@@ -1277,7 +1282,10 @@ std::string RemoveUnicodeExtensions(const std::string& locale) {
}
const std::string end = locale.substr(unicode_extension_end);
- return beginning + end;
+ parsed_locale.no_extensions_locale = beginning + end;
+ parsed_locale.extension = locale.substr(
+ unicode_extension_start, unicode_extension_end - unicode_extension_start);
+ return parsed_locale;
}
// ecma402/#sec-lookupsupportedlocales
@@ -1291,7 +1299,8 @@ std::vector<std::string> LookupSupportedLocales(
for (const std::string& locale : requested_locales) {
// 2. a. Let noExtensionsLocale be the String value that is locale
// with all Unicode locale extension sequences removed.
- std::string no_extension_locale = RemoveUnicodeExtensions(locale);
+ std::string no_extension_locale =
+ ParseBCP47Locale(locale).no_extensions_locale;
// 2. b. Let availableLocale be
// BestAvailableLocale(availableLocales, noExtensionsLocale).
@@ -1317,56 +1326,38 @@ std::vector<std::string> BestFitSupportedLocales(
return LookupSupportedLocales(available_locales, requested_locales);
}
-enum MatcherOption { kBestFit, kLookup };
-
-// TODO(bstell): should this be moved somewhere where it is reusable?
-// Implement steps 5, 6, 7 for ECMA 402 9.2.9 SupportedLocales
-// https://tc39.github.io/ecma402/#sec-supportedlocales
-MaybeHandle<JSObject> CreateReadOnlyArray(Isolate* isolate,
- std::vector<std::string> elements) {
+// ecma262 #sec-createarrayfromlist
+Handle<JSArray> CreateArrayFromList(Isolate* isolate,
+ std::vector<std::string> elements,
+ PropertyAttributes attr) {
Factory* factory = isolate->factory();
- if (elements.size() >= kMaxUInt32) {
- THROW_NEW_ERROR(
- isolate, NewRangeError(MessageTemplate::kInvalidArrayLength), JSObject);
- }
-
- PropertyAttributes attr =
- static_cast<PropertyAttributes>(READ_ONLY | DONT_DELETE);
-
- // 5. Let subset be CreateArrayFromList(elements).
- // 6. Let keys be subset.[[OwnPropertyKeys]]().
- Handle<JSArray> subset = factory->NewJSArray(0);
+ // Let array be ! ArrayCreate(0).
+ Handle<JSArray> array = factory->NewJSArray(0);
- // 7. For each element P of keys in List order, do
uint32_t length = static_cast<uint32_t>(elements.size());
+ // 3. Let n be 0.
+ // 4. For each element e of elements, do
for (uint32_t i = 0; i < length; i++) {
+ // a. Let status be CreateDataProperty(array, ! ToString(n), e).
const std::string& part = elements[i];
Handle<String> value =
factory->NewStringFromUtf8(CStrVector(part.c_str())).ToHandleChecked();
- JSObject::AddDataElement(subset, i, value, attr);
+ JSObject::AddDataElement(array, i, value, attr);
}
-
- // 7.a. Let desc be PropertyDescriptor { [[Configurable]]: false,
- // [[Writable]]: false }.
- PropertyDescriptor desc;
- desc.set_writable(false);
- desc.set_configurable(false);
-
- // 7.b. Perform ! DefinePropertyOrThrow(subset, P, desc).
- JSArray::ArraySetLength(isolate, subset, &desc, kThrowOnError).ToChecked();
- return subset;
+ // 5. Return array.
+ return array;
}
// ECMA 402 9.2.9 SupportedLocales(availableLocales, requestedLocales, options)
// https://tc39.github.io/ecma402/#sec-supportedlocales
MaybeHandle<JSObject> SupportedLocales(
- Isolate* isolate, ICUService service,
+ Isolate* isolate, const char* method,
const std::set<std::string>& available_locales,
const std::vector<std::string>& requested_locales, Handle<Object> options) {
std::vector<std::string> supported_locales;
// 2. Else, let matcher be "best fit".
- MatcherOption matcher = kBestFit;
+ Intl::MatcherOption matcher = Intl::MatcherOption::kBestFit;
// 1. If options is not undefined, then
if (!options->IsUndefined(isolate)) {
@@ -1377,60 +1368,53 @@ MaybeHandle<JSObject> SupportedLocales(
// 1. b. Let matcher be ? GetOption(options, "localeMatcher", "string",
// « "lookup", "best fit" », "best fit").
- std::unique_ptr<char[]> matcher_str = nullptr;
- std::vector<const char*> matcher_values = {"lookup", "best fit"};
- Maybe<bool> maybe_found_matcher = Intl::GetStringOption(
- isolate, options_obj, "localeMatcher", matcher_values,
- ICUServiceToString(service), &matcher_str);
- MAYBE_RETURN(maybe_found_matcher, MaybeHandle<JSObject>());
- if (maybe_found_matcher.FromJust()) {
- DCHECK_NOT_NULL(matcher_str.get());
- if (strcmp(matcher_str.get(), "lookup") == 0) {
- matcher = kLookup;
- }
- }
+ Maybe<Intl::MatcherOption> maybe_locale_matcher =
+ Intl::GetLocaleMatcher(isolate, options_obj, method);
+ MAYBE_RETURN(maybe_locale_matcher, MaybeHandle<JSObject>());
+ matcher = maybe_locale_matcher.FromJust();
}
// 3. If matcher is "best fit", then
// a. Let supportedLocales be BestFitSupportedLocales(availableLocales,
// requestedLocales).
- if (matcher == kBestFit) {
+ if (matcher == Intl::MatcherOption::kBestFit) {
supported_locales =
BestFitSupportedLocales(available_locales, requested_locales);
} else {
// 4. Else,
// a. Let supportedLocales be LookupSupportedLocales(availableLocales,
// requestedLocales).
- DCHECK_EQ(matcher, kLookup);
+ DCHECK_EQ(matcher, Intl::MatcherOption::kLookup);
supported_locales =
LookupSupportedLocales(available_locales, requested_locales);
}
- // TODO(jkummerow): Possibly revisit why the spec has the individual entries
- // readonly but the array is not frozen.
- // https://github.com/tc39/ecma402/issues/258
+ // 5. Return CreateArrayFromList(supportedLocales).
+ PropertyAttributes attr = static_cast<PropertyAttributes>(NONE);
+ return CreateArrayFromList(isolate, supported_locales, attr);
+}
- // 5. Let subset be CreateArrayFromList(supportedLocales).
- // 6. Let keys be subset.[[OwnPropertyKeys]]().
- // 7. For each element P of keys in List order, do
- // a. Let desc be PropertyDescriptor { [[Configurable]]: false,
- // [[Writable]]: false }.
- // b. Perform ! DefinePropertyOrThrow(subset, P, desc).
- MaybeHandle<JSObject> subset =
- CreateReadOnlyArray(isolate, supported_locales);
+} // namespace
- // 8. Return subset.
- return subset;
+// ecma-402 #sec-intl.getcanonicallocales
+MaybeHandle<JSArray> Intl::GetCanonicalLocales(Isolate* isolate,
+ Handle<Object> locales) {
+ // 1. Let ll be ? CanonicalizeLocaleList(locales).
+ Maybe<std::vector<std::string>> maybe_ll =
+ CanonicalizeLocaleList(isolate, locales, false);
+ MAYBE_RETURN(maybe_ll, MaybeHandle<JSArray>());
+
+ // 2. Return CreateArrayFromList(ll).
+ PropertyAttributes attr = static_cast<PropertyAttributes>(NONE);
+ return CreateArrayFromList(isolate, maybe_ll.FromJust(), attr);
}
-} // namespace
// ECMA 402 Intl.*.supportedLocalesOf
-MaybeHandle<JSObject> Intl::SupportedLocalesOf(Isolate* isolate,
- ICUService service,
- Handle<Object> locales,
- Handle<Object> options) {
+MaybeHandle<JSObject> Intl::SupportedLocalesOf(
+ Isolate* isolate, const char* method,
+ const std::set<std::string>& available_locales, Handle<Object> locales,
+ Handle<Object> options) {
// Let availableLocales be %Collator%.[[AvailableLocales]].
- std::set<std::string> available_locales = GetAvailableLocales(service);
// Let requestedLocales be ? CanonicalizeLocaleList(locales).
Maybe<std::vector<std::string>> requested_locales =
@@ -1438,17 +1422,57 @@ MaybeHandle<JSObject> Intl::SupportedLocalesOf(Isolate* isolate,
MAYBE_RETURN(requested_locales, MaybeHandle<JSObject>());
// Return ? SupportedLocales(availableLocales, requestedLocales, options).
- return SupportedLocales(isolate, service, available_locales,
+ return SupportedLocales(isolate, method, available_locales,
requested_locales.FromJust(), options);
}
-std::map<std::string, std::string> Intl::LookupUnicodeExtensions(
- const icu::Locale& icu_locale, const std::set<std::string>& relevant_keys) {
+namespace {
+template <typename T>
+bool IsValidExtension(const icu::Locale& locale, const char* key,
+ const std::string& value) {
+ UErrorCode status = U_ZERO_ERROR;
+ std::unique_ptr<icu::StringEnumeration> enumeration(
+ T::getKeywordValuesForLocale(key, icu::Locale(locale.getBaseName()),
+ false, status));
+ if (U_SUCCESS(status)) {
+ int32_t length;
+ std::string legacy_type(uloc_toLegacyType(key, value.c_str()));
+ for (const char* item = enumeration->next(&length, status); item != nullptr;
+ item = enumeration->next(&length, status)) {
+ if (U_SUCCESS(status) && legacy_type == item) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+bool IsValidCalendar(const icu::Locale& locale, const std::string& value) {
+ return IsValidExtension<icu::Calendar>(locale, "calendar", value);
+}
+
+bool IsValidCollation(const icu::Locale& locale, const std::string& value) {
+ std::set<std::string> invalid_values = {"standard", "search"};
+ if (invalid_values.find(value) != invalid_values.end()) return false;
+ return IsValidExtension<icu::Collator>(locale, "collation", value);
+}
+
+bool IsValidNumberingSystem(const std::string& value) {
+ std::set<std::string> invalid_values = {"native", "traditio", "finance"};
+ if (invalid_values.find(value) != invalid_values.end()) return false;
+ UErrorCode status = U_ZERO_ERROR;
+ std::unique_ptr<icu::NumberingSystem> numbering_system(
+ icu::NumberingSystem::createInstanceByName(value.c_str(), status));
+ return U_SUCCESS(status) && numbering_system.get() != nullptr;
+}
+
+std::map<std::string, std::string> LookupAndValidateUnicodeExtensions(
+ icu::Locale* icu_locale, const std::set<std::string>& relevant_keys) {
std::map<std::string, std::string> extensions;
UErrorCode status = U_ZERO_ERROR;
std::unique_ptr<icu::StringEnumeration> keywords(
- icu_locale.createKeywords(status));
+ icu_locale->createKeywords(status));
if (U_FAILURE(status)) return extensions;
if (!keywords) return extensions;
@@ -1466,7 +1490,7 @@ std::map<std::string, std::string> Intl::LookupUnicodeExtensions(
continue;
}
- icu_locale.getKeywordValue(keyword, value, ULOC_FULLNAME_CAPACITY, status);
+ icu_locale->getKeywordValue(keyword, value, ULOC_FULLNAME_CAPACITY, status);
// Ignore failures in ICU and skip to the next keyword.
//
@@ -1478,16 +1502,345 @@ std::map<std::string, std::string> Intl::LookupUnicodeExtensions(
const char* bcp47_key = uloc_toUnicodeLocaleKey(keyword);
- // Ignore keywords that we don't recognize - spec allows that.
if (bcp47_key && (relevant_keys.find(bcp47_key) != relevant_keys.end())) {
const char* bcp47_value = uloc_toUnicodeLocaleType(bcp47_key, value);
- extensions.insert(
- std::pair<std::string, std::string>(bcp47_key, bcp47_value));
+ bool is_valid_value = false;
+ // 8.h.ii.1.a If keyLocaleData contains requestedValue, then
+ if (strcmp("ca", bcp47_key) == 0) {
+ is_valid_value = IsValidCalendar(*icu_locale, bcp47_value);
+ } else if (strcmp("co", bcp47_key) == 0) {
+ is_valid_value = IsValidCollation(*icu_locale, bcp47_value);
+ } else if (strcmp("hc", bcp47_key) == 0) {
+ // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/calendar.xml
+ std::set<std::string> valid_values = {"h11", "h12", "h23", "h24"};
+ is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
+ } else if (strcmp("lb", bcp47_key) == 0) {
+ // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/segmentation.xml
+ std::set<std::string> valid_values = {"strict", "normal", "loose"};
+ is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
+ } else if (strcmp("kn", bcp47_key) == 0) {
+ // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/collation.xml
+ std::set<std::string> valid_values = {"true", "false"};
+ is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
+ } else if (strcmp("kf", bcp47_key) == 0) {
+ // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/collation.xml
+ std::set<std::string> valid_values = {"upper", "lower", "false"};
+ is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
+ } else if (strcmp("nu", bcp47_key) == 0) {
+ is_valid_value = IsValidNumberingSystem(bcp47_value);
+ }
+ if (is_valid_value) {
+ extensions.insert(
+ std::pair<std::string, std::string>(bcp47_key, bcp47_value));
+ continue;
+ }
}
+ status = U_ZERO_ERROR;
+ icu_locale->setKeywordValue(keyword, nullptr, status);
+ CHECK(U_SUCCESS(status));
}
return extensions;
}
+// ecma402/#sec-lookupmatcher
+std::string LookupMatcher(Isolate* isolate,
+ const std::set<std::string>& available_locales,
+ const std::vector<std::string>& requested_locales) {
+ // 1. Let result be a new Record.
+ std::string result;
+
+ // 2. For each element locale of requestedLocales in List order, do
+ for (const std::string& locale : requested_locales) {
+ // 2. a. Let noExtensionsLocale be the String value that is locale
+ // with all Unicode locale extension sequences removed.
+ ParsedLocale parsed_locale = ParseBCP47Locale(locale);
+ std::string no_extensions_locale = parsed_locale.no_extensions_locale;
+
+ // 2. b. Let availableLocale be
+ // BestAvailableLocale(availableLocales, noExtensionsLocale).
+ std::string available_locale =
+ BestAvailableLocale(available_locales, no_extensions_locale);
+
+ // 2. c. If availableLocale is not undefined, append locale to the
+ // end of subset.
+ if (!available_locale.empty()) {
+ // Note: The following steps are not performed here because we
+ // can use ICU to parse the unicode locale extension sequence
+ // as part of Intl::ResolveLocale.
+ //
+ // There's no need to separate the unicode locale extensions
+ // right here. Instead just return the available locale with the
+ // extensions.
+ //
+ // 2. c. i. Set result.[[locale]] to availableLocale.
+ // 2. c. ii. If locale and noExtensionsLocale are not the same
+ // String value, then
+ // 2. c. ii. 1. Let extension be the String value consisting of
+ // the first substring of locale that is a Unicode locale
+ // extension sequence.
+ // 2. c. ii. 2. Set result.[[extension]] to extension.
+ // 2. c. iii. Return result.
+ return available_locale + parsed_locale.extension;
+ }
+ }
+
+ // 3. Let defLocale be DefaultLocale();
+ // 4. Set result.[[locale]] to defLocale.
+ // 5. Return result.
+ return DefaultLocale(isolate);
+}
+
+} // namespace
+
+// This function doesn't correspond exactly with the spec. Instead
+// we use ICU to do all the string manipulations that the spec
+// peforms.
+//
+// The spec uses this function to normalize values for various
+// relevant extension keys (such as disallowing "search" for
+// collation). Instead of doing this here, we let the callers of
+// this method perform such normalization.
+//
+// ecma402/#sec-resolvelocale
+Intl::ResolvedLocale Intl::ResolveLocale(
+ Isolate* isolate, const std::set<std::string>& available_locales,
+ const std::vector<std::string>& requested_locales, MatcherOption matcher,
+ const std::set<std::string>& relevant_extension_keys) {
+ std::string locale;
+ if (matcher == Intl::MatcherOption::kLookup) {
+ locale = LookupMatcher(isolate, available_locales, requested_locales);
+ } else if (matcher == Intl::MatcherOption::kBestFit) {
+ // TODO(intl): Implement better lookup algorithm.
+ locale = LookupMatcher(isolate, available_locales, requested_locales);
+ }
+
+ icu::Locale icu_locale = CreateICULocale(locale);
+ std::map<std::string, std::string> extensions =
+ LookupAndValidateUnicodeExtensions(&icu_locale, relevant_extension_keys);
+
+ std::string canonicalized_locale = Intl::ToLanguageTag(icu_locale).FromJust();
+
+ // TODO(gsathya): Remove privateuse subtags from extensions.
+
+ return Intl::ResolvedLocale{canonicalized_locale, icu_locale, extensions};
+}
+
+Managed<icu::UnicodeString> Intl::SetTextToBreakIterator(
+ Isolate* isolate, Handle<String> text, icu::BreakIterator* break_iterator) {
+ icu::UnicodeString* u_text =
+ (icu::UnicodeString*)(Intl::ToICUUnicodeString(isolate, text).clone());
+
+ Handle<Managed<icu::UnicodeString>> new_u_text =
+ Managed<icu::UnicodeString>::FromRawPtr(isolate, 0, u_text);
+
+ break_iterator->setText(*u_text);
+ return *new_u_text;
+}
+
+// ecma262 #sec-string.prototype.normalize
+MaybeHandle<String> Intl::Normalize(Isolate* isolate, Handle<String> string,
+ Handle<Object> form_input) {
+ const char* form_name;
+ UNormalization2Mode form_mode;
+ if (form_input->IsUndefined(isolate)) {
+ // default is FNC
+ form_name = "nfc";
+ form_mode = UNORM2_COMPOSE;
+ } else {
+ Handle<String> form;
+ ASSIGN_RETURN_ON_EXCEPTION(isolate, form,
+ Object::ToString(isolate, form_input), String);
+
+ if (String::Equals(isolate, form, isolate->factory()->NFC_string())) {
+ form_name = "nfc";
+ form_mode = UNORM2_COMPOSE;
+ } else if (String::Equals(isolate, form,
+ isolate->factory()->NFD_string())) {
+ form_name = "nfc";
+ form_mode = UNORM2_DECOMPOSE;
+ } else if (String::Equals(isolate, form,
+ isolate->factory()->NFKC_string())) {
+ form_name = "nfkc";
+ form_mode = UNORM2_COMPOSE;
+ } else if (String::Equals(isolate, form,
+ isolate->factory()->NFKD_string())) {
+ form_name = "nfkc";
+ form_mode = UNORM2_DECOMPOSE;
+ } else {
+ Handle<String> valid_forms =
+ isolate->factory()->NewStringFromStaticChars("NFC, NFD, NFKC, NFKD");
+ THROW_NEW_ERROR(
+ isolate,
+ NewRangeError(MessageTemplate::kNormalizationForm, valid_forms),
+ String);
+ }
+ }
+
+ int length = string->length();
+ string = String::Flatten(isolate, string);
+ icu::UnicodeString result;
+ std::unique_ptr<uc16[]> sap;
+ UErrorCode status = U_ZERO_ERROR;
+ icu::UnicodeString input = ToICUUnicodeString(isolate, string);
+ // Getting a singleton. Should not free it.
+ const icu::Normalizer2* normalizer =
+ icu::Normalizer2::getInstance(nullptr, form_name, form_mode, status);
+ DCHECK(U_SUCCESS(status));
+ CHECK_NOT_NULL(normalizer);
+ int32_t normalized_prefix_length =
+ normalizer->spanQuickCheckYes(input, status);
+ // Quick return if the input is already normalized.
+ if (length == normalized_prefix_length) return string;
+ icu::UnicodeString unnormalized =
+ input.tempSubString(normalized_prefix_length);
+ // Read-only alias of the normalized prefix.
+ result.setTo(false, input.getBuffer(), normalized_prefix_length);
+ // copy-on-write; normalize the suffix and append to |result|.
+ normalizer->normalizeSecondAndAppend(result, unnormalized, status);
+
+ if (U_FAILURE(status)) {
+ THROW_NEW_ERROR(isolate, NewTypeError(MessageTemplate::kIcuError), String);
+ }
+
+ return Intl::ToString(isolate, result);
+}
+
+// ICUTimezoneCache calls out to ICU for TimezoneCache
+// functionality in a straightforward way.
+class ICUTimezoneCache : public base::TimezoneCache {
+ public:
+ ICUTimezoneCache() : timezone_(nullptr) { Clear(); }
+
+ ~ICUTimezoneCache() override { Clear(); };
+
+ const char* LocalTimezone(double time_ms) override;
+
+ double DaylightSavingsOffset(double time_ms) override;
+
+ double LocalTimeOffset(double time_ms, bool is_utc) override;
+
+ void Clear() override;
+
+ private:
+ icu::TimeZone* GetTimeZone();
+
+ bool GetOffsets(double time_ms, bool is_utc, int32_t* raw_offset,
+ int32_t* dst_offset);
+
+ icu::TimeZone* timezone_;
+
+ std::string timezone_name_;
+ std::string dst_timezone_name_;
+};
+
+const char* ICUTimezoneCache::LocalTimezone(double time_ms) {
+ bool is_dst = DaylightSavingsOffset(time_ms) != 0;
+ std::string* name = is_dst ? &dst_timezone_name_ : &timezone_name_;
+ if (name->empty()) {
+ icu::UnicodeString result;
+ GetTimeZone()->getDisplayName(is_dst, icu::TimeZone::LONG, result);
+ result += '\0';
+
+ icu::StringByteSink<std::string> byte_sink(name);
+ result.toUTF8(byte_sink);
+ }
+ DCHECK(!name->empty());
+ return name->c_str();
+}
+
+icu::TimeZone* ICUTimezoneCache::GetTimeZone() {
+ if (timezone_ == nullptr) {
+ timezone_ = icu::TimeZone::createDefault();
+ }
+ return timezone_;
+}
+
+bool ICUTimezoneCache::GetOffsets(double time_ms, bool is_utc,
+ int32_t* raw_offset, int32_t* dst_offset) {
+ UErrorCode status = U_ZERO_ERROR;
+ // TODO(jshin): ICU TimeZone class handles skipped time differently from
+ // Ecma 262 (https://github.com/tc39/ecma262/pull/778) and icu::TimeZone
+ // class does not expose the necessary API. Fixing
+ // http://bugs.icu-project.org/trac/ticket/13268 would make it easy to
+ // implement the proposed spec change. A proposed fix for ICU is
+ // https://chromium-review.googlesource.com/851265 .
+ // In the meantime, use an internal (still public) API of icu::BasicTimeZone.
+ // Once it's accepted by the upstream, get rid of cast. Note that casting
+ // TimeZone to BasicTimeZone is safe because we know that icu::TimeZone used
+ // here is a BasicTimeZone.
+ if (is_utc) {
+ GetTimeZone()->getOffset(time_ms, false, *raw_offset, *dst_offset, status);
+ } else {
+ static_cast<const icu::BasicTimeZone*>(GetTimeZone())
+ ->getOffsetFromLocal(time_ms, icu::BasicTimeZone::kFormer,
+ icu::BasicTimeZone::kFormer, *raw_offset,
+ *dst_offset, status);
+ }
+
+ return U_SUCCESS(status);
+}
+
+double ICUTimezoneCache::DaylightSavingsOffset(double time_ms) {
+ int32_t raw_offset, dst_offset;
+ if (!GetOffsets(time_ms, true, &raw_offset, &dst_offset)) return 0;
+ return dst_offset;
+}
+
+double ICUTimezoneCache::LocalTimeOffset(double time_ms, bool is_utc) {
+ int32_t raw_offset, dst_offset;
+ if (!GetOffsets(time_ms, is_utc, &raw_offset, &dst_offset)) return 0;
+ return raw_offset + dst_offset;
+}
+
+void ICUTimezoneCache::Clear() {
+ delete timezone_;
+ timezone_ = nullptr;
+ timezone_name_.clear();
+ dst_timezone_name_.clear();
+}
+
+base::TimezoneCache* Intl::CreateTimeZoneCache() {
+ return FLAG_icu_timezone_data ? new ICUTimezoneCache()
+ : base::OS::CreateTimezoneCache();
+}
+
+Maybe<Intl::CaseFirst> Intl::GetCaseFirst(Isolate* isolate,
+ Handle<JSReceiver> options,
+ const char* method) {
+ return Intl::GetStringOption<Intl::CaseFirst>(
+ isolate, options, "caseFirst", method, {"upper", "lower", "false"},
+ {Intl::CaseFirst::kUpper, Intl::CaseFirst::kLower,
+ Intl::CaseFirst::kFalse},
+ Intl::CaseFirst::kUndefined);
+}
+
+Maybe<Intl::HourCycle> Intl::GetHourCycle(Isolate* isolate,
+ Handle<JSReceiver> options,
+ const char* method) {
+ return Intl::GetStringOption<Intl::HourCycle>(
+ isolate, options, "hourCycle", method, {"h11", "h12", "h23", "h24"},
+ {Intl::HourCycle::kH11, Intl::HourCycle::kH12, Intl::HourCycle::kH23,
+ Intl::HourCycle::kH24},
+ Intl::HourCycle::kUndefined);
+}
+
+Maybe<Intl::MatcherOption> Intl::GetLocaleMatcher(Isolate* isolate,
+ Handle<JSReceiver> options,
+ const char* method) {
+ return Intl::GetStringOption<Intl::MatcherOption>(
+ isolate, options, "localeMatcher", method, {"best fit", "lookup"},
+ {Intl::MatcherOption::kLookup, Intl::MatcherOption::kBestFit},
+ Intl::MatcherOption::kLookup);
+}
+
+Intl::HourCycle Intl::ToHourCycle(const std::string& hc) {
+ if (hc == "h11") return Intl::HourCycle::kH11;
+ if (hc == "h12") return Intl::HourCycle::kH12;
+ if (hc == "h23") return Intl::HourCycle::kH23;
+ if (hc == "h24") return Intl::HourCycle::kH24;
+ return Intl::HourCycle::kUndefined;
+}
+
} // namespace internal
} // namespace v8