diff options
Diffstat (limited to 'deps/v8/src/objects/js-list-format.cc')
-rw-r--r-- | deps/v8/src/objects/js-list-format.cc | 401 |
1 files changed, 401 insertions, 0 deletions
diff --git a/deps/v8/src/objects/js-list-format.cc b/deps/v8/src/objects/js-list-format.cc new file mode 100644 index 0000000000..66dbe0bfd9 --- /dev/null +++ b/deps/v8/src/objects/js-list-format.cc @@ -0,0 +1,401 @@ +// Copyright 2018 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_INTL_SUPPORT +#error Internationalization is expected to be enabled. +#endif // V8_INTL_SUPPORT + +#include "src/objects/js-list-format.h" + +#include <memory> +#include <vector> + +#include "src/elements.h" +#include "src/heap/factory.h" +#include "src/isolate.h" +#include "src/objects-inl.h" +#include "src/objects/intl-objects.h" +#include "src/objects/js-array-inl.h" +#include "src/objects/js-list-format-inl.h" +#include "src/objects/managed.h" +#include "unicode/listformatter.h" + +namespace v8 { +namespace internal { + +namespace { +const char* kStandard = "standard"; +const char* kOr = "or"; +const char* kUnit = "unit"; +const char* kStandardShort = "standard-short"; +const char* kUnitShort = "unit-short"; +const char* kUnitNarrow = "unit-narrow"; + +const char* GetIcuStyleString(JSListFormat::Style style, + JSListFormat::Type type) { + switch (type) { + case JSListFormat::Type::CONJUNCTION: + switch (style) { + case JSListFormat::Style::LONG: + return kStandard; + case JSListFormat::Style::SHORT: + return kStandardShort; + case JSListFormat::Style::NARROW: + // Currently, ListFormat::createInstance on "standard-narrow" will + // fail so we use "standard-short" here. + // See https://unicode.org/cldr/trac/ticket/11254 + // TODO(ftang): change to return kStandardNarrow; after the above + // issue fixed in CLDR/ICU. + // CLDR bug: https://unicode.org/cldr/trac/ticket/11254 + // ICU bug: https://unicode-org.atlassian.net/browse/ICU-20014 + return kStandardShort; + case JSListFormat::Style::COUNT: + UNREACHABLE(); + } + case JSListFormat::Type::DISJUNCTION: + switch (style) { + // Currently, ListFormat::createInstance on "or-short" and "or-narrow" + // will fail so we use "or" here. + // See https://unicode.org/cldr/trac/ticket/11254 + // TODO(ftang): change to return kOr, kOrShort or kOrNarrow depend on + // style after the above issue fixed in CLDR/ICU. + // CLDR bug: https://unicode.org/cldr/trac/ticket/11254 + // ICU bug: https://unicode-org.atlassian.net/browse/ICU-20014 + case JSListFormat::Style::LONG: + case JSListFormat::Style::SHORT: + case JSListFormat::Style::NARROW: + return kOr; + case JSListFormat::Style::COUNT: + UNREACHABLE(); + } + case JSListFormat::Type::UNIT: + switch (style) { + case JSListFormat::Style::LONG: + return kUnit; + case JSListFormat::Style::SHORT: + return kUnitShort; + case JSListFormat::Style::NARROW: + return kUnitNarrow; + case JSListFormat::Style::COUNT: + UNREACHABLE(); + } + case JSListFormat::Type::COUNT: + UNREACHABLE(); + } +} + +} // namespace + +JSListFormat::Style get_style(const char* str) { + switch (str[0]) { + case 'n': + if (strcmp(&str[1], "arrow") == 0) return JSListFormat::Style::NARROW; + break; + case 'l': + if (strcmp(&str[1], "ong") == 0) return JSListFormat::Style::LONG; + break; + case 's': + if (strcmp(&str[1], "hort") == 0) return JSListFormat::Style::SHORT; + break; + } + UNREACHABLE(); +} + +JSListFormat::Type get_type(const char* str) { + switch (str[0]) { + case 'c': + if (strcmp(&str[1], "onjunction") == 0) + return JSListFormat::Type::CONJUNCTION; + break; + case 'd': + if (strcmp(&str[1], "isjunction") == 0) + return JSListFormat::Type::DISJUNCTION; + break; + case 'u': + if (strcmp(&str[1], "nit") == 0) return JSListFormat::Type::UNIT; + break; + } + UNREACHABLE(); +} + +MaybeHandle<JSListFormat> JSListFormat::InitializeListFormat( + Isolate* isolate, Handle<JSListFormat> list_format_holder, + Handle<Object> input_locales, Handle<Object> input_options) { + Factory* factory = isolate->factory(); + list_format_holder->set_flags(0); + + Handle<JSReceiver> options; + // 2. If options is undefined, then + if (input_options->IsUndefined(isolate)) { + // a. Let options be ObjectCreate(null). + options = isolate->factory()->NewJSObjectWithNullProto(); + // 3. Else + } else { + // a. Let options be ? ToObject(options). + ASSIGN_RETURN_ON_EXCEPTION(isolate, options, + Object::ToObject(isolate, input_options), + JSListFormat); + } + + // 5. Let t be GetOption(options, "type", "string", «"conjunction", + // "disjunction", "unit"», "conjunction"). + std::unique_ptr<char[]> type_str = nullptr; + std::vector<const char*> type_values = {"conjunction", "disjunction", "unit"}; + Maybe<bool> maybe_found_type = Intl::GetStringOption( + isolate, options, "type", type_values, "Intl.ListFormat", &type_str); + Type type_enum = Type::CONJUNCTION; + MAYBE_RETURN(maybe_found_type, MaybeHandle<JSListFormat>()); + if (maybe_found_type.FromJust()) { + DCHECK_NOT_NULL(type_str.get()); + type_enum = get_type(type_str.get()); + } + // 6. Set listFormat.[[Type]] to t. + list_format_holder->set_type(type_enum); + + // 7. Let s be ? GetOption(options, "style", "string", + // «"long", "short", "narrow"», "long"). + std::unique_ptr<char[]> style_str = nullptr; + std::vector<const char*> style_values = {"long", "short", "narrow"}; + Maybe<bool> maybe_found_style = Intl::GetStringOption( + isolate, options, "style", style_values, "Intl.ListFormat", &style_str); + Style style_enum = Style::LONG; + MAYBE_RETURN(maybe_found_style, MaybeHandle<JSListFormat>()); + if (maybe_found_style.FromJust()) { + DCHECK_NOT_NULL(style_str.get()); + style_enum = get_style(style_str.get()); + } + // 15. Set listFormat.[[Style]] to s. + list_format_holder->set_style(style_enum); + + // 10. Let r be ResolveLocale(%ListFormat%.[[AvailableLocales]], + // requestedLocales, opt, undefined, localeData). + Handle<JSObject> r; + ASSIGN_RETURN_ON_EXCEPTION( + isolate, r, + Intl::ResolveLocale(isolate, "listformat", input_locales, options), + JSListFormat); + + Handle<Object> locale_obj = + JSObject::GetDataProperty(r, factory->locale_string()); + Handle<String> locale; + ASSIGN_RETURN_ON_EXCEPTION( + isolate, locale, Object::ToString(isolate, locale_obj), JSListFormat); + + // 18. Set listFormat.[[Locale]] to the value of r.[[Locale]]. + list_format_holder->set_locale(*locale); + + std::unique_ptr<char[]> locale_name = locale->ToCString(); + icu::Locale icu_locale(locale_name.get()); + UErrorCode status = U_ZERO_ERROR; + icu::ListFormatter* formatter = icu::ListFormatter::createInstance( + icu_locale, GetIcuStyleString(style_enum, type_enum), status); + if (U_FAILURE(status)) { + delete formatter; + FATAL("Failed to create ICU list formatter, are ICU data files missing?"); + } + CHECK_NOT_NULL(formatter); + + Handle<Managed<icu::ListFormatter>> managed_formatter = + Managed<icu::ListFormatter>::FromRawPtr(isolate, 0, formatter); + + list_format_holder->set_formatter(*managed_formatter); + return list_format_holder; +} + +Handle<JSObject> JSListFormat::ResolvedOptions( + Isolate* isolate, Handle<JSListFormat> format_holder) { + Factory* factory = isolate->factory(); + Handle<JSObject> result = factory->NewJSObject(isolate->object_function()); + Handle<String> locale(format_holder->locale(), isolate); + JSObject::AddProperty(isolate, result, factory->locale_string(), locale, + NONE); + JSObject::AddProperty(isolate, result, factory->style_string(), + format_holder->StyleAsString(), NONE); + JSObject::AddProperty(isolate, result, factory->type_string(), + format_holder->TypeAsString(), NONE); + return result; +} + +icu::ListFormatter* JSListFormat::UnpackFormatter(Isolate* isolate, + Handle<JSListFormat> holder) { + return Managed<icu::ListFormatter>::cast(holder->formatter())->raw(); +} + +Handle<String> JSListFormat::StyleAsString() const { + switch (style()) { + case Style::LONG: + return GetReadOnlyRoots().long_string_handle(); + case Style::SHORT: + return GetReadOnlyRoots().short_string_handle(); + case Style::NARROW: + return GetReadOnlyRoots().narrow_string_handle(); + case Style::COUNT: + UNREACHABLE(); + } +} + +Handle<String> JSListFormat::TypeAsString() const { + switch (type()) { + case Type::CONJUNCTION: + return GetReadOnlyRoots().conjunction_string_handle(); + case Type::DISJUNCTION: + return GetReadOnlyRoots().disjunction_string_handle(); + case Type::UNIT: + return GetReadOnlyRoots().unit_string_handle(); + case Type::COUNT: + UNREACHABLE(); + } +} + +namespace { + +// TODO(ftang) remove the following hack after icu::ListFormat support +// FieldPosition. +// This is a temporary workaround until icu::ListFormat support FieldPosition +// It is inefficient and won't work correctly on the edge case that the input +// contains fraction of the list pattern. +// For example the following under English will mark the "an" incorrectly +// since the formatted is "a, b, and an". +// listFormat.formatToParts(["a", "b", "an"]) +// https://ssl.icu-project.org/trac/ticket/13754 +MaybeHandle<JSArray> GenerateListFormatParts( + Isolate* isolate, const icu::UnicodeString& formatted, + const icu::UnicodeString items[], int length) { + Factory* factory = isolate->factory(); + int estimate_size = length * 2 + 1; + Handle<JSArray> array = factory->NewJSArray(estimate_size); + int index = 0; + int last_pos = 0; + for (int i = 0; i < length; i++) { + int found = formatted.indexOf(items[i], last_pos); + DCHECK_GE(found, 0); + if (found > last_pos) { + Handle<String> substring; + ASSIGN_RETURN_ON_EXCEPTION( + isolate, substring, + Intl::ToString(isolate, formatted, last_pos, found), JSArray); + Intl::AddElement(isolate, array, index++, factory->literal_string(), + substring); + } + last_pos = found + items[i].length(); + Handle<String> substring; + ASSIGN_RETURN_ON_EXCEPTION( + isolate, substring, Intl::ToString(isolate, formatted, found, last_pos), + JSArray); + Intl::AddElement(isolate, array, index++, factory->element_string(), + substring); + } + if (last_pos < formatted.length()) { + Handle<String> substring; + ASSIGN_RETURN_ON_EXCEPTION( + isolate, substring, + Intl::ToString(isolate, formatted, last_pos, formatted.length()), + JSArray); + Intl::AddElement(isolate, array, index++, factory->literal_string(), + substring); + } + return array; +} + +// Extract String from JSArray into array of UnicodeString +Maybe<bool> ToUnicodeStringArray(Isolate* isolate, Handle<JSArray> array, + icu::UnicodeString items[], uint32_t length) { + Factory* factory = isolate->factory(); + // In general, ElementsAccessor::Get actually isn't guaranteed to give us the + // elements in order. But given that it was created by a builtin we control, + // it shouldn't be possible for it to be problematic. Add DCHECK to ensure + // that. + DCHECK(array->HasFastPackedElements()); + auto* accessor = array->GetElementsAccessor(); + DCHECK(length == accessor->NumberOfElements(*array)); + // ecma402 #sec-createpartsfromlist + // 2. If list contains any element value such that Type(value) is not String, + // throw a TypeError exception. + // + // Per spec it looks like we're supposed to throw a TypeError exception if the + // item isn't already a string, rather than coercing to a string. Moreover, + // the way the spec's written it looks like we're supposed to run through the + // whole list to check that they're all strings before going further. + for (uint32_t i = 0; i < length; i++) { + Handle<Object> item = accessor->Get(array, i); + DCHECK(!item.is_null()); + if (!item->IsString()) { + THROW_NEW_ERROR_RETURN_VALUE( + isolate, + NewTypeError(MessageTemplate::kArrayItemNotType, + factory->NewStringFromStaticChars("list"), + factory->NewNumber(i), + factory->NewStringFromStaticChars("String")), + Nothing<bool>()); + } + } + for (uint32_t i = 0; i < length; i++) { + Handle<String> string = Handle<String>::cast(accessor->Get(array, i)); + DisallowHeapAllocation no_gc; + string = String::Flatten(isolate, string); + std::unique_ptr<uc16[]> sap; + items[i] = + icu::UnicodeString(GetUCharBufferFromFlat(string->GetFlatContent(), + &sap, string->length()), + string->length()); + } + return Just(true); +} + +} // namespace + +Maybe<bool> FormatListCommon(Isolate* isolate, + Handle<JSListFormat> format_holder, + Handle<JSArray> list, + icu::UnicodeString& formatted, uint32_t* length, + std::unique_ptr<icu::UnicodeString[]>& array) { + DCHECK(!list->IsUndefined()); + + icu::ListFormatter* formatter = + JSListFormat::UnpackFormatter(isolate, format_holder); + CHECK_NOT_NULL(formatter); + + *length = list->GetElementsAccessor()->NumberOfElements(*list); + array.reset(new icu::UnicodeString[*length]); + + // ecma402 #sec-createpartsfromlist + // 2. If list contains any element value such that Type(value) is not String, + // throw a TypeError exception. + MAYBE_RETURN(ToUnicodeStringArray(isolate, list, array.get(), *length), + Nothing<bool>()); + + UErrorCode status = U_ZERO_ERROR; + formatter->format(array.get(), *length, formatted, status); + DCHECK(U_SUCCESS(status)); + return Just(true); +} + +// ecma402 #sec-formatlist +MaybeHandle<String> JSListFormat::FormatList(Isolate* isolate, + Handle<JSListFormat> format_holder, + Handle<JSArray> list) { + icu::UnicodeString formatted; + uint32_t length; + std::unique_ptr<icu::UnicodeString[]> array; + MAYBE_RETURN( + FormatListCommon(isolate, format_holder, list, formatted, &length, array), + Handle<String>()); + return Intl::ToString(isolate, formatted); +} + +// ecma42 #sec-formatlisttoparts +MaybeHandle<JSArray> JSListFormat::FormatListToParts( + Isolate* isolate, Handle<JSListFormat> format_holder, + Handle<JSArray> list) { + icu::UnicodeString formatted; + uint32_t length; + std::unique_ptr<icu::UnicodeString[]> array; + MAYBE_RETURN( + FormatListCommon(isolate, format_holder, list, formatted, &length, array), + Handle<JSArray>()); + return GenerateListFormatParts(isolate, formatted, array.get(), length); +} + +} // namespace internal +} // namespace v8 |