summaryrefslogtreecommitdiff
path: root/deps/v8/src/objects/js-list-format.cc
diff options
context:
space:
mode:
Diffstat (limited to 'deps/v8/src/objects/js-list-format.cc')
-rw-r--r--deps/v8/src/objects/js-list-format.cc401
1 files changed, 401 insertions, 0 deletions
diff --git a/deps/v8/src/objects/js-list-format.cc b/deps/v8/src/objects/js-list-format.cc
new file mode 100644
index 0000000000..66dbe0bfd9
--- /dev/null
+++ b/deps/v8/src/objects/js-list-format.cc
@@ -0,0 +1,401 @@
+// Copyright 2018 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_INTL_SUPPORT
+#error Internationalization is expected to be enabled.
+#endif // V8_INTL_SUPPORT
+
+#include "src/objects/js-list-format.h"
+
+#include <memory>
+#include <vector>
+
+#include "src/elements.h"
+#include "src/heap/factory.h"
+#include "src/isolate.h"
+#include "src/objects-inl.h"
+#include "src/objects/intl-objects.h"
+#include "src/objects/js-array-inl.h"
+#include "src/objects/js-list-format-inl.h"
+#include "src/objects/managed.h"
+#include "unicode/listformatter.h"
+
+namespace v8 {
+namespace internal {
+
+namespace {
+const char* kStandard = "standard";
+const char* kOr = "or";
+const char* kUnit = "unit";
+const char* kStandardShort = "standard-short";
+const char* kUnitShort = "unit-short";
+const char* kUnitNarrow = "unit-narrow";
+
+const char* GetIcuStyleString(JSListFormat::Style style,
+ JSListFormat::Type type) {
+ switch (type) {
+ case JSListFormat::Type::CONJUNCTION:
+ switch (style) {
+ case JSListFormat::Style::LONG:
+ return kStandard;
+ case JSListFormat::Style::SHORT:
+ return kStandardShort;
+ case JSListFormat::Style::NARROW:
+ // Currently, ListFormat::createInstance on "standard-narrow" will
+ // fail so we use "standard-short" here.
+ // See https://unicode.org/cldr/trac/ticket/11254
+ // TODO(ftang): change to return kStandardNarrow; after the above
+ // issue fixed in CLDR/ICU.
+ // CLDR bug: https://unicode.org/cldr/trac/ticket/11254
+ // ICU bug: https://unicode-org.atlassian.net/browse/ICU-20014
+ return kStandardShort;
+ case JSListFormat::Style::COUNT:
+ UNREACHABLE();
+ }
+ case JSListFormat::Type::DISJUNCTION:
+ switch (style) {
+ // Currently, ListFormat::createInstance on "or-short" and "or-narrow"
+ // will fail so we use "or" here.
+ // See https://unicode.org/cldr/trac/ticket/11254
+ // TODO(ftang): change to return kOr, kOrShort or kOrNarrow depend on
+ // style after the above issue fixed in CLDR/ICU.
+ // CLDR bug: https://unicode.org/cldr/trac/ticket/11254
+ // ICU bug: https://unicode-org.atlassian.net/browse/ICU-20014
+ case JSListFormat::Style::LONG:
+ case JSListFormat::Style::SHORT:
+ case JSListFormat::Style::NARROW:
+ return kOr;
+ case JSListFormat::Style::COUNT:
+ UNREACHABLE();
+ }
+ case JSListFormat::Type::UNIT:
+ switch (style) {
+ case JSListFormat::Style::LONG:
+ return kUnit;
+ case JSListFormat::Style::SHORT:
+ return kUnitShort;
+ case JSListFormat::Style::NARROW:
+ return kUnitNarrow;
+ case JSListFormat::Style::COUNT:
+ UNREACHABLE();
+ }
+ case JSListFormat::Type::COUNT:
+ UNREACHABLE();
+ }
+}
+
+} // namespace
+
+JSListFormat::Style get_style(const char* str) {
+ switch (str[0]) {
+ case 'n':
+ if (strcmp(&str[1], "arrow") == 0) return JSListFormat::Style::NARROW;
+ break;
+ case 'l':
+ if (strcmp(&str[1], "ong") == 0) return JSListFormat::Style::LONG;
+ break;
+ case 's':
+ if (strcmp(&str[1], "hort") == 0) return JSListFormat::Style::SHORT;
+ break;
+ }
+ UNREACHABLE();
+}
+
+JSListFormat::Type get_type(const char* str) {
+ switch (str[0]) {
+ case 'c':
+ if (strcmp(&str[1], "onjunction") == 0)
+ return JSListFormat::Type::CONJUNCTION;
+ break;
+ case 'd':
+ if (strcmp(&str[1], "isjunction") == 0)
+ return JSListFormat::Type::DISJUNCTION;
+ break;
+ case 'u':
+ if (strcmp(&str[1], "nit") == 0) return JSListFormat::Type::UNIT;
+ break;
+ }
+ UNREACHABLE();
+}
+
+MaybeHandle<JSListFormat> JSListFormat::InitializeListFormat(
+ Isolate* isolate, Handle<JSListFormat> list_format_holder,
+ Handle<Object> input_locales, Handle<Object> input_options) {
+ Factory* factory = isolate->factory();
+ list_format_holder->set_flags(0);
+
+ Handle<JSReceiver> options;
+ // 2. If options is undefined, then
+ if (input_options->IsUndefined(isolate)) {
+ // a. Let options be ObjectCreate(null).
+ options = isolate->factory()->NewJSObjectWithNullProto();
+ // 3. Else
+ } else {
+ // a. Let options be ? ToObject(options).
+ ASSIGN_RETURN_ON_EXCEPTION(isolate, options,
+ Object::ToObject(isolate, input_options),
+ JSListFormat);
+ }
+
+ // 5. Let t be GetOption(options, "type", "string", «"conjunction",
+ // "disjunction", "unit"», "conjunction").
+ std::unique_ptr<char[]> type_str = nullptr;
+ std::vector<const char*> type_values = {"conjunction", "disjunction", "unit"};
+ Maybe<bool> maybe_found_type = Intl::GetStringOption(
+ isolate, options, "type", type_values, "Intl.ListFormat", &type_str);
+ Type type_enum = Type::CONJUNCTION;
+ MAYBE_RETURN(maybe_found_type, MaybeHandle<JSListFormat>());
+ if (maybe_found_type.FromJust()) {
+ DCHECK_NOT_NULL(type_str.get());
+ type_enum = get_type(type_str.get());
+ }
+ // 6. Set listFormat.[[Type]] to t.
+ list_format_holder->set_type(type_enum);
+
+ // 7. Let s be ? GetOption(options, "style", "string",
+ // «"long", "short", "narrow"», "long").
+ std::unique_ptr<char[]> style_str = nullptr;
+ std::vector<const char*> style_values = {"long", "short", "narrow"};
+ Maybe<bool> maybe_found_style = Intl::GetStringOption(
+ isolate, options, "style", style_values, "Intl.ListFormat", &style_str);
+ Style style_enum = Style::LONG;
+ MAYBE_RETURN(maybe_found_style, MaybeHandle<JSListFormat>());
+ if (maybe_found_style.FromJust()) {
+ DCHECK_NOT_NULL(style_str.get());
+ style_enum = get_style(style_str.get());
+ }
+ // 15. Set listFormat.[[Style]] to s.
+ list_format_holder->set_style(style_enum);
+
+ // 10. Let r be ResolveLocale(%ListFormat%.[[AvailableLocales]],
+ // requestedLocales, opt, undefined, localeData).
+ Handle<JSObject> r;
+ ASSIGN_RETURN_ON_EXCEPTION(
+ isolate, r,
+ Intl::ResolveLocale(isolate, "listformat", input_locales, options),
+ JSListFormat);
+
+ Handle<Object> locale_obj =
+ JSObject::GetDataProperty(r, factory->locale_string());
+ Handle<String> locale;
+ ASSIGN_RETURN_ON_EXCEPTION(
+ isolate, locale, Object::ToString(isolate, locale_obj), JSListFormat);
+
+ // 18. Set listFormat.[[Locale]] to the value of r.[[Locale]].
+ list_format_holder->set_locale(*locale);
+
+ std::unique_ptr<char[]> locale_name = locale->ToCString();
+ icu::Locale icu_locale(locale_name.get());
+ UErrorCode status = U_ZERO_ERROR;
+ icu::ListFormatter* formatter = icu::ListFormatter::createInstance(
+ icu_locale, GetIcuStyleString(style_enum, type_enum), status);
+ if (U_FAILURE(status)) {
+ delete formatter;
+ FATAL("Failed to create ICU list formatter, are ICU data files missing?");
+ }
+ CHECK_NOT_NULL(formatter);
+
+ Handle<Managed<icu::ListFormatter>> managed_formatter =
+ Managed<icu::ListFormatter>::FromRawPtr(isolate, 0, formatter);
+
+ list_format_holder->set_formatter(*managed_formatter);
+ return list_format_holder;
+}
+
+Handle<JSObject> JSListFormat::ResolvedOptions(
+ Isolate* isolate, Handle<JSListFormat> format_holder) {
+ Factory* factory = isolate->factory();
+ Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
+ Handle<String> locale(format_holder->locale(), isolate);
+ JSObject::AddProperty(isolate, result, factory->locale_string(), locale,
+ NONE);
+ JSObject::AddProperty(isolate, result, factory->style_string(),
+ format_holder->StyleAsString(), NONE);
+ JSObject::AddProperty(isolate, result, factory->type_string(),
+ format_holder->TypeAsString(), NONE);
+ return result;
+}
+
+icu::ListFormatter* JSListFormat::UnpackFormatter(Isolate* isolate,
+ Handle<JSListFormat> holder) {
+ return Managed<icu::ListFormatter>::cast(holder->formatter())->raw();
+}
+
+Handle<String> JSListFormat::StyleAsString() const {
+ switch (style()) {
+ case Style::LONG:
+ return GetReadOnlyRoots().long_string_handle();
+ case Style::SHORT:
+ return GetReadOnlyRoots().short_string_handle();
+ case Style::NARROW:
+ return GetReadOnlyRoots().narrow_string_handle();
+ case Style::COUNT:
+ UNREACHABLE();
+ }
+}
+
+Handle<String> JSListFormat::TypeAsString() const {
+ switch (type()) {
+ case Type::CONJUNCTION:
+ return GetReadOnlyRoots().conjunction_string_handle();
+ case Type::DISJUNCTION:
+ return GetReadOnlyRoots().disjunction_string_handle();
+ case Type::UNIT:
+ return GetReadOnlyRoots().unit_string_handle();
+ case Type::COUNT:
+ UNREACHABLE();
+ }
+}
+
+namespace {
+
+// TODO(ftang) remove the following hack after icu::ListFormat support
+// FieldPosition.
+// This is a temporary workaround until icu::ListFormat support FieldPosition
+// It is inefficient and won't work correctly on the edge case that the input
+// contains fraction of the list pattern.
+// For example the following under English will mark the "an" incorrectly
+// since the formatted is "a, b, and an".
+// listFormat.formatToParts(["a", "b", "an"])
+// https://ssl.icu-project.org/trac/ticket/13754
+MaybeHandle<JSArray> GenerateListFormatParts(
+ Isolate* isolate, const icu::UnicodeString& formatted,
+ const icu::UnicodeString items[], int length) {
+ Factory* factory = isolate->factory();
+ int estimate_size = length * 2 + 1;
+ Handle<JSArray> array = factory->NewJSArray(estimate_size);
+ int index = 0;
+ int last_pos = 0;
+ for (int i = 0; i < length; i++) {
+ int found = formatted.indexOf(items[i], last_pos);
+ DCHECK_GE(found, 0);
+ if (found > last_pos) {
+ Handle<String> substring;
+ ASSIGN_RETURN_ON_EXCEPTION(
+ isolate, substring,
+ Intl::ToString(isolate, formatted, last_pos, found), JSArray);
+ Intl::AddElement(isolate, array, index++, factory->literal_string(),
+ substring);
+ }
+ last_pos = found + items[i].length();
+ Handle<String> substring;
+ ASSIGN_RETURN_ON_EXCEPTION(
+ isolate, substring, Intl::ToString(isolate, formatted, found, last_pos),
+ JSArray);
+ Intl::AddElement(isolate, array, index++, factory->element_string(),
+ substring);
+ }
+ if (last_pos < formatted.length()) {
+ Handle<String> substring;
+ ASSIGN_RETURN_ON_EXCEPTION(
+ isolate, substring,
+ Intl::ToString(isolate, formatted, last_pos, formatted.length()),
+ JSArray);
+ Intl::AddElement(isolate, array, index++, factory->literal_string(),
+ substring);
+ }
+ return array;
+}
+
+// Extract String from JSArray into array of UnicodeString
+Maybe<bool> ToUnicodeStringArray(Isolate* isolate, Handle<JSArray> array,
+ icu::UnicodeString items[], uint32_t length) {
+ Factory* factory = isolate->factory();
+ // In general, ElementsAccessor::Get actually isn't guaranteed to give us the
+ // elements in order. But given that it was created by a builtin we control,
+ // it shouldn't be possible for it to be problematic. Add DCHECK to ensure
+ // that.
+ DCHECK(array->HasFastPackedElements());
+ auto* accessor = array->GetElementsAccessor();
+ DCHECK(length == accessor->NumberOfElements(*array));
+ // ecma402 #sec-createpartsfromlist
+ // 2. If list contains any element value such that Type(value) is not String,
+ // throw a TypeError exception.
+ //
+ // Per spec it looks like we're supposed to throw a TypeError exception if the
+ // item isn't already a string, rather than coercing to a string. Moreover,
+ // the way the spec's written it looks like we're supposed to run through the
+ // whole list to check that they're all strings before going further.
+ for (uint32_t i = 0; i < length; i++) {
+ Handle<Object> item = accessor->Get(array, i);
+ DCHECK(!item.is_null());
+ if (!item->IsString()) {
+ THROW_NEW_ERROR_RETURN_VALUE(
+ isolate,
+ NewTypeError(MessageTemplate::kArrayItemNotType,
+ factory->NewStringFromStaticChars("list"),
+ factory->NewNumber(i),
+ factory->NewStringFromStaticChars("String")),
+ Nothing<bool>());
+ }
+ }
+ for (uint32_t i = 0; i < length; i++) {
+ Handle<String> string = Handle<String>::cast(accessor->Get(array, i));
+ DisallowHeapAllocation no_gc;
+ string = String::Flatten(isolate, string);
+ std::unique_ptr<uc16[]> sap;
+ items[i] =
+ icu::UnicodeString(GetUCharBufferFromFlat(string->GetFlatContent(),
+ &sap, string->length()),
+ string->length());
+ }
+ return Just(true);
+}
+
+} // namespace
+
+Maybe<bool> FormatListCommon(Isolate* isolate,
+ Handle<JSListFormat> format_holder,
+ Handle<JSArray> list,
+ icu::UnicodeString& formatted, uint32_t* length,
+ std::unique_ptr<icu::UnicodeString[]>& array) {
+ DCHECK(!list->IsUndefined());
+
+ icu::ListFormatter* formatter =
+ JSListFormat::UnpackFormatter(isolate, format_holder);
+ CHECK_NOT_NULL(formatter);
+
+ *length = list->GetElementsAccessor()->NumberOfElements(*list);
+ array.reset(new icu::UnicodeString[*length]);
+
+ // ecma402 #sec-createpartsfromlist
+ // 2. If list contains any element value such that Type(value) is not String,
+ // throw a TypeError exception.
+ MAYBE_RETURN(ToUnicodeStringArray(isolate, list, array.get(), *length),
+ Nothing<bool>());
+
+ UErrorCode status = U_ZERO_ERROR;
+ formatter->format(array.get(), *length, formatted, status);
+ DCHECK(U_SUCCESS(status));
+ return Just(true);
+}
+
+// ecma402 #sec-formatlist
+MaybeHandle<String> JSListFormat::FormatList(Isolate* isolate,
+ Handle<JSListFormat> format_holder,
+ Handle<JSArray> list) {
+ icu::UnicodeString formatted;
+ uint32_t length;
+ std::unique_ptr<icu::UnicodeString[]> array;
+ MAYBE_RETURN(
+ FormatListCommon(isolate, format_holder, list, formatted, &length, array),
+ Handle<String>());
+ return Intl::ToString(isolate, formatted);
+}
+
+// ecma42 #sec-formatlisttoparts
+MaybeHandle<JSArray> JSListFormat::FormatListToParts(
+ Isolate* isolate, Handle<JSListFormat> format_holder,
+ Handle<JSArray> list) {
+ icu::UnicodeString formatted;
+ uint32_t length;
+ std::unique_ptr<icu::UnicodeString[]> array;
+ MAYBE_RETURN(
+ FormatListCommon(isolate, format_holder, list, formatted, &length, array),
+ Handle<JSArray>());
+ return GenerateListFormatParts(isolate, formatted, array.get(), length);
+}
+
+} // namespace internal
+} // namespace v8