summaryrefslogtreecommitdiff
path: root/deps/v8/src/runtime/runtime-i18n.cc
diff options
context:
space:
mode:
Diffstat (limited to 'deps/v8/src/runtime/runtime-i18n.cc')
-rw-r--r--deps/v8/src/runtime/runtime-i18n.cc453
1 files changed, 192 insertions, 261 deletions
diff --git a/deps/v8/src/runtime/runtime-i18n.cc b/deps/v8/src/runtime/runtime-i18n.cc
index 75e0952581..6630fadc10 100644
--- a/deps/v8/src/runtime/runtime-i18n.cc
+++ b/deps/v8/src/runtime/runtime-i18n.cc
@@ -8,13 +8,15 @@
#include <memory>
-#include "src/api.h"
#include "src/api-natives.h"
+#include "src/api.h"
#include "src/arguments.h"
#include "src/factory.h"
#include "src/i18n.h"
#include "src/isolate-inl.h"
#include "src/messages.h"
+#include "src/string-case.h"
+#include "src/utils.h"
#include "unicode/brkiter.h"
#include "unicode/calendar.h"
@@ -70,7 +72,7 @@ RUNTIME_FUNCTION(Runtime_CanonicalizeLanguageTag) {
HandleScope scope(isolate);
Factory* factory = isolate->factory();
- DCHECK(args.length() == 1);
+ DCHECK_EQ(1, args.length());
CONVERT_ARG_HANDLE_CHECKED(String, locale_id_str, 0);
v8::String::Utf8Value locale_id(v8::Utils::ToLocal(locale_id_str));
@@ -107,7 +109,7 @@ RUNTIME_FUNCTION(Runtime_AvailableLocalesOf) {
HandleScope scope(isolate);
Factory* factory = isolate->factory();
- DCHECK(args.length() == 1);
+ DCHECK_EQ(1, args.length());
CONVERT_ARG_HANDLE_CHECKED(String, service, 0);
const icu::Locale* available_locales = NULL;
@@ -152,7 +154,7 @@ RUNTIME_FUNCTION(Runtime_GetDefaultICULocale) {
HandleScope scope(isolate);
Factory* factory = isolate->factory();
- DCHECK(args.length() == 0);
+ DCHECK_EQ(0, args.length());
icu::Locale default_locale;
@@ -173,7 +175,7 @@ RUNTIME_FUNCTION(Runtime_GetLanguageTagVariants) {
HandleScope scope(isolate);
Factory* factory = isolate->factory();
- DCHECK(args.length() == 1);
+ DCHECK_EQ(1, args.length());
CONVERT_ARG_HANDLE_CHECKED(JSArray, input, 0);
@@ -257,7 +259,7 @@ RUNTIME_FUNCTION(Runtime_GetLanguageTagVariants) {
RUNTIME_FUNCTION(Runtime_IsInitializedIntlObject) {
HandleScope scope(isolate);
- DCHECK(args.length() == 1);
+ DCHECK_EQ(1, args.length());
CONVERT_ARG_HANDLE_CHECKED(Object, input, 0);
@@ -273,7 +275,7 @@ RUNTIME_FUNCTION(Runtime_IsInitializedIntlObject) {
RUNTIME_FUNCTION(Runtime_IsInitializedIntlObjectOfType) {
HandleScope scope(isolate);
- DCHECK(args.length() == 2);
+ DCHECK_EQ(2, args.length());
CONVERT_ARG_HANDLE_CHECKED(Object, input, 0);
CONVERT_ARG_HANDLE_CHECKED(String, expected_type, 1);
@@ -291,63 +293,33 @@ RUNTIME_FUNCTION(Runtime_IsInitializedIntlObjectOfType) {
RUNTIME_FUNCTION(Runtime_MarkAsInitializedIntlObjectOfType) {
HandleScope scope(isolate);
- DCHECK(args.length() == 3);
+ DCHECK_EQ(2, args.length());
CONVERT_ARG_HANDLE_CHECKED(JSObject, input, 0);
CONVERT_ARG_HANDLE_CHECKED(String, type, 1);
- CONVERT_ARG_HANDLE_CHECKED(JSObject, impl, 2);
Handle<Symbol> marker = isolate->factory()->intl_initialized_marker_symbol();
JSObject::SetProperty(input, marker, type, STRICT).Assert();
- marker = isolate->factory()->intl_impl_object_symbol();
- JSObject::SetProperty(input, marker, impl, STRICT).Assert();
-
return isolate->heap()->undefined_value();
}
-RUNTIME_FUNCTION(Runtime_GetImplFromInitializedIntlObject) {
- HandleScope scope(isolate);
-
- DCHECK(args.length() == 1);
-
- CONVERT_ARG_HANDLE_CHECKED(JSObject, input, 0);
-
- if (!input->IsJSObject()) {
- THROW_NEW_ERROR_RETURN_FAILURE(
- isolate, NewTypeError(MessageTemplate::kNotIntlObject, input));
- }
-
- Handle<JSObject> obj = Handle<JSObject>::cast(input);
-
- Handle<Symbol> marker = isolate->factory()->intl_impl_object_symbol();
-
- Handle<Object> impl = JSReceiver::GetDataProperty(obj, marker);
- if (!impl->IsJSObject()) {
- THROW_NEW_ERROR_RETURN_FAILURE(
- isolate, NewTypeError(MessageTemplate::kNotIntlObject, obj));
- }
- return *impl;
-}
-
-
RUNTIME_FUNCTION(Runtime_CreateDateTimeFormat) {
HandleScope scope(isolate);
- DCHECK(args.length() == 3);
+ DCHECK_EQ(3, args.length());
CONVERT_ARG_HANDLE_CHECKED(String, locale, 0);
CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1);
CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2);
- Handle<ObjectTemplateInfo> date_format_template = I18N::GetTemplate(isolate);
+ Handle<JSFunction> constructor(
+ isolate->native_context()->intl_date_time_format_function());
- // Create an empty object wrapper.
Handle<JSObject> local_object;
- ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
- isolate, local_object,
- ApiNatives::InstantiateObject(date_format_template));
+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, local_object,
+ JSObject::New(constructor, constructor));
// Set date time formatter as internal field of the resulting JS object.
icu::SimpleDateFormat* date_format =
@@ -357,11 +329,6 @@ RUNTIME_FUNCTION(Runtime_CreateDateTimeFormat) {
local_object->SetInternalField(0, reinterpret_cast<Smi*>(date_format));
- Factory* factory = isolate->factory();
- Handle<String> key = factory->NewStringFromStaticChars("dateFormat");
- Handle<String> value = factory->NewStringFromStaticChars("valid");
- JSObject::AddProperty(local_object, key, value, NONE);
-
// Make object handle weak so we can delete the data format once GC kicks in.
Handle<Object> wrapper = isolate->global_handles()->Create(*local_object);
GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(),
@@ -374,7 +341,7 @@ RUNTIME_FUNCTION(Runtime_CreateDateTimeFormat) {
RUNTIME_FUNCTION(Runtime_InternalDateFormat) {
HandleScope scope(isolate);
- DCHECK(args.length() == 2);
+ DCHECK_EQ(2, args.length());
CONVERT_ARG_HANDLE_CHECKED(JSObject, date_format_holder, 0);
CONVERT_ARG_HANDLE_CHECKED(JSDate, date, 1);
@@ -384,7 +351,7 @@ RUNTIME_FUNCTION(Runtime_InternalDateFormat) {
icu::SimpleDateFormat* date_format =
DateFormat::UnpackDateFormat(isolate, date_format_holder);
- if (!date_format) return isolate->ThrowIllegalOperation();
+ CHECK_NOT_NULL(date_format);
icu::UnicodeString result;
date_format->format(value->Number(), result);
@@ -475,7 +442,7 @@ RUNTIME_FUNCTION(Runtime_InternalDateFormatToParts) {
HandleScope scope(isolate);
Factory* factory = isolate->factory();
- DCHECK(args.length() == 2);
+ DCHECK_EQ(2, args.length());
CONVERT_ARG_HANDLE_CHECKED(JSObject, date_format_holder, 0);
CONVERT_ARG_HANDLE_CHECKED(JSDate, date, 1);
@@ -485,7 +452,7 @@ RUNTIME_FUNCTION(Runtime_InternalDateFormatToParts) {
icu::SimpleDateFormat* date_format =
DateFormat::UnpackDateFormat(isolate, date_format_holder);
- if (!date_format) return isolate->ThrowIllegalOperation();
+ CHECK_NOT_NULL(date_format);
icu::UnicodeString formatted;
icu::FieldPositionIterator fp_iter;
@@ -528,47 +495,21 @@ RUNTIME_FUNCTION(Runtime_InternalDateFormatToParts) {
return *result;
}
-RUNTIME_FUNCTION(Runtime_InternalDateParse) {
- HandleScope scope(isolate);
-
- DCHECK(args.length() == 2);
-
- CONVERT_ARG_HANDLE_CHECKED(JSObject, date_format_holder, 0);
- CONVERT_ARG_HANDLE_CHECKED(String, date_string, 1);
-
- v8::String::Utf8Value utf8_date(v8::Utils::ToLocal(date_string));
- icu::UnicodeString u_date(icu::UnicodeString::fromUTF8(*utf8_date));
- icu::SimpleDateFormat* date_format =
- DateFormat::UnpackDateFormat(isolate, date_format_holder);
- if (!date_format) return isolate->ThrowIllegalOperation();
-
- UErrorCode status = U_ZERO_ERROR;
- UDate date = date_format->parse(u_date, status);
- if (U_FAILURE(status)) return isolate->heap()->undefined_value();
-
- RETURN_RESULT_OR_FAILURE(
- isolate, JSDate::New(isolate->date_function(), isolate->date_function(),
- static_cast<double>(date)));
-}
-
-
RUNTIME_FUNCTION(Runtime_CreateNumberFormat) {
HandleScope scope(isolate);
- DCHECK(args.length() == 3);
+ DCHECK_EQ(3, args.length());
CONVERT_ARG_HANDLE_CHECKED(String, locale, 0);
CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1);
CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2);
- Handle<ObjectTemplateInfo> number_format_template =
- I18N::GetTemplate(isolate);
+ Handle<JSFunction> constructor(
+ isolate->native_context()->intl_number_format_function());
- // Create an empty object wrapper.
Handle<JSObject> local_object;
- ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
- isolate, local_object,
- ApiNatives::InstantiateObject(number_format_template));
+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, local_object,
+ JSObject::New(constructor, constructor));
// Set number formatter as internal field of the resulting JS object.
icu::DecimalFormat* number_format =
@@ -578,11 +519,6 @@ RUNTIME_FUNCTION(Runtime_CreateNumberFormat) {
local_object->SetInternalField(0, reinterpret_cast<Smi*>(number_format));
- Factory* factory = isolate->factory();
- Handle<String> key = factory->NewStringFromStaticChars("numberFormat");
- Handle<String> value = factory->NewStringFromStaticChars("valid");
- JSObject::AddProperty(local_object, key, value, NONE);
-
Handle<Object> wrapper = isolate->global_handles()->Create(*local_object);
GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(),
NumberFormat::DeleteNumberFormat,
@@ -594,7 +530,7 @@ RUNTIME_FUNCTION(Runtime_CreateNumberFormat) {
RUNTIME_FUNCTION(Runtime_InternalNumberFormat) {
HandleScope scope(isolate);
- DCHECK(args.length() == 2);
+ DCHECK_EQ(2, args.length());
CONVERT_ARG_HANDLE_CHECKED(JSObject, number_format_holder, 0);
CONVERT_ARG_HANDLE_CHECKED(Object, number, 1);
@@ -604,7 +540,7 @@ RUNTIME_FUNCTION(Runtime_InternalNumberFormat) {
icu::DecimalFormat* number_format =
NumberFormat::UnpackNumberFormat(isolate, number_format_holder);
- if (!number_format) return isolate->ThrowIllegalOperation();
+ CHECK_NOT_NULL(number_format);
icu::UnicodeString result;
number_format->format(value->Number(), result);
@@ -616,62 +552,21 @@ RUNTIME_FUNCTION(Runtime_InternalNumberFormat) {
}
-RUNTIME_FUNCTION(Runtime_InternalNumberParse) {
- HandleScope scope(isolate);
-
- DCHECK(args.length() == 2);
-
- CONVERT_ARG_HANDLE_CHECKED(JSObject, number_format_holder, 0);
- CONVERT_ARG_HANDLE_CHECKED(String, number_string, 1);
-
- isolate->CountUsage(v8::Isolate::UseCounterFeature::kIntlV8Parse);
-
- v8::String::Utf8Value utf8_number(v8::Utils::ToLocal(number_string));
- icu::UnicodeString u_number(icu::UnicodeString::fromUTF8(*utf8_number));
- icu::DecimalFormat* number_format =
- NumberFormat::UnpackNumberFormat(isolate, number_format_holder);
- if (!number_format) return isolate->ThrowIllegalOperation();
-
- UErrorCode status = U_ZERO_ERROR;
- icu::Formattable result;
- // ICU 4.6 doesn't support parseCurrency call. We need to wait for ICU49
- // to be part of Chrome.
- // TODO(cira): Include currency parsing code using parseCurrency call.
- // We need to check if the formatter parses all currencies or only the
- // one it was constructed with (it will impact the API - how to return ISO
- // code and the value).
- number_format->parse(u_number, result, status);
- if (U_FAILURE(status)) return isolate->heap()->undefined_value();
-
- switch (result.getType()) {
- case icu::Formattable::kDouble:
- return *isolate->factory()->NewNumber(result.getDouble());
- case icu::Formattable::kLong:
- return *isolate->factory()->NewNumberFromInt(result.getLong());
- case icu::Formattable::kInt64:
- return *isolate->factory()->NewNumber(
- static_cast<double>(result.getInt64()));
- default:
- return isolate->heap()->undefined_value();
- }
-}
-
-
RUNTIME_FUNCTION(Runtime_CreateCollator) {
HandleScope scope(isolate);
- DCHECK(args.length() == 3);
+ DCHECK_EQ(3, args.length());
CONVERT_ARG_HANDLE_CHECKED(String, locale, 0);
CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1);
CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2);
- Handle<ObjectTemplateInfo> collator_template = I18N::GetTemplate(isolate);
+ Handle<JSFunction> constructor(
+ isolate->native_context()->intl_collator_function());
- // Create an empty object wrapper.
Handle<JSObject> local_object;
- ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
- isolate, local_object, ApiNatives::InstantiateObject(collator_template));
+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, local_object,
+ JSObject::New(constructor, constructor));
// Set collator as internal field of the resulting JS object.
icu::Collator* collator =
@@ -681,11 +576,6 @@ RUNTIME_FUNCTION(Runtime_CreateCollator) {
local_object->SetInternalField(0, reinterpret_cast<Smi*>(collator));
- Factory* factory = isolate->factory();
- Handle<String> key = factory->NewStringFromStaticChars("collator");
- Handle<String> value = factory->NewStringFromStaticChars("valid");
- JSObject::AddProperty(local_object, key, value, NONE);
-
Handle<Object> wrapper = isolate->global_handles()->Create(*local_object);
GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(),
Collator::DeleteCollator,
@@ -697,14 +587,14 @@ RUNTIME_FUNCTION(Runtime_CreateCollator) {
RUNTIME_FUNCTION(Runtime_InternalCompare) {
HandleScope scope(isolate);
- DCHECK(args.length() == 3);
+ DCHECK_EQ(3, args.length());
CONVERT_ARG_HANDLE_CHECKED(JSObject, collator_holder, 0);
CONVERT_ARG_HANDLE_CHECKED(String, string1, 1);
CONVERT_ARG_HANDLE_CHECKED(String, string2, 2);
icu::Collator* collator = Collator::UnpackCollator(isolate, collator_holder);
- if (!collator) return isolate->ThrowIllegalOperation();
+ CHECK_NOT_NULL(collator);
string1 = String::Flatten(string1);
string2 = String::Flatten(string2);
@@ -742,7 +632,7 @@ RUNTIME_FUNCTION(Runtime_StringNormalize) {
{"nfkc", UNORM2_DECOMPOSE},
};
- DCHECK(args.length() == 2);
+ DCHECK_EQ(2, args.length());
CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
CONVERT_NUMBER_CHECKED(int, form_id, Int32, args[1]);
@@ -791,23 +681,21 @@ RUNTIME_FUNCTION(Runtime_StringNormalize) {
RUNTIME_FUNCTION(Runtime_CreateBreakIterator) {
HandleScope scope(isolate);
- DCHECK(args.length() == 3);
+ DCHECK_EQ(3, args.length());
CONVERT_ARG_HANDLE_CHECKED(String, locale, 0);
CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1);
CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2);
- Handle<ObjectTemplateInfo> break_iterator_template =
- I18N::GetTemplate2(isolate);
+ Handle<JSFunction> constructor(
+ isolate->native_context()->intl_v8_break_iterator_function());
- // Create an empty object wrapper.
Handle<JSObject> local_object;
- ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
- isolate, local_object,
- ApiNatives::InstantiateObject(break_iterator_template));
+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, local_object,
+ JSObject::New(constructor, constructor));
// Set break iterator as internal field of the resulting JS object.
- icu::BreakIterator* break_iterator = BreakIterator::InitializeBreakIterator(
+ icu::BreakIterator* break_iterator = V8BreakIterator::InitializeBreakIterator(
isolate, locale, options, resolved);
if (!break_iterator) return isolate->ThrowIllegalOperation();
@@ -816,16 +704,11 @@ RUNTIME_FUNCTION(Runtime_CreateBreakIterator) {
// Make sure that the pointer to adopted text is NULL.
local_object->SetInternalField(1, static_cast<Smi*>(nullptr));
- Factory* factory = isolate->factory();
- Handle<String> key = factory->NewStringFromStaticChars("breakIterator");
- Handle<String> value = factory->NewStringFromStaticChars("valid");
- JSObject::AddProperty(local_object, key, value, NONE);
-
// Make object handle weak so we can delete the break iterator once GC kicks
// in.
Handle<Object> wrapper = isolate->global_handles()->Create(*local_object);
GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(),
- BreakIterator::DeleteBreakIterator,
+ V8BreakIterator::DeleteBreakIterator,
WeakCallbackType::kInternalFields);
return *local_object;
}
@@ -834,14 +717,14 @@ RUNTIME_FUNCTION(Runtime_CreateBreakIterator) {
RUNTIME_FUNCTION(Runtime_BreakIteratorAdoptText) {
HandleScope scope(isolate);
- DCHECK(args.length() == 2);
+ DCHECK_EQ(2, args.length());
CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
CONVERT_ARG_HANDLE_CHECKED(String, text, 1);
icu::BreakIterator* break_iterator =
- BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
- if (!break_iterator) return isolate->ThrowIllegalOperation();
+ V8BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
+ CHECK_NOT_NULL(break_iterator);
icu::UnicodeString* u_text = reinterpret_cast<icu::UnicodeString*>(
break_iterator_holder->GetInternalField(1));
@@ -865,13 +748,13 @@ RUNTIME_FUNCTION(Runtime_BreakIteratorAdoptText) {
RUNTIME_FUNCTION(Runtime_BreakIteratorFirst) {
HandleScope scope(isolate);
- DCHECK(args.length() == 1);
+ DCHECK_EQ(1, args.length());
CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
icu::BreakIterator* break_iterator =
- BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
- if (!break_iterator) return isolate->ThrowIllegalOperation();
+ V8BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
+ CHECK_NOT_NULL(break_iterator);
return *isolate->factory()->NewNumberFromInt(break_iterator->first());
}
@@ -880,13 +763,13 @@ RUNTIME_FUNCTION(Runtime_BreakIteratorFirst) {
RUNTIME_FUNCTION(Runtime_BreakIteratorNext) {
HandleScope scope(isolate);
- DCHECK(args.length() == 1);
+ DCHECK_EQ(1, args.length());
CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
icu::BreakIterator* break_iterator =
- BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
- if (!break_iterator) return isolate->ThrowIllegalOperation();
+ V8BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
+ CHECK_NOT_NULL(break_iterator);
return *isolate->factory()->NewNumberFromInt(break_iterator->next());
}
@@ -895,13 +778,13 @@ RUNTIME_FUNCTION(Runtime_BreakIteratorNext) {
RUNTIME_FUNCTION(Runtime_BreakIteratorCurrent) {
HandleScope scope(isolate);
- DCHECK(args.length() == 1);
+ DCHECK_EQ(1, args.length());
CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
icu::BreakIterator* break_iterator =
- BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
- if (!break_iterator) return isolate->ThrowIllegalOperation();
+ V8BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
+ CHECK_NOT_NULL(break_iterator);
return *isolate->factory()->NewNumberFromInt(break_iterator->current());
}
@@ -910,13 +793,13 @@ RUNTIME_FUNCTION(Runtime_BreakIteratorCurrent) {
RUNTIME_FUNCTION(Runtime_BreakIteratorBreakType) {
HandleScope scope(isolate);
- DCHECK(args.length() == 1);
+ DCHECK_EQ(1, args.length());
CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
icu::BreakIterator* break_iterator =
- BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
- if (!break_iterator) return isolate->ThrowIllegalOperation();
+ V8BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
+ CHECK_NOT_NULL(break_iterator);
// TODO(cira): Remove cast once ICU fixes base BreakIterator class.
icu::RuleBasedBreakIterator* rule_based_iterator =
@@ -956,6 +839,7 @@ MUST_USE_RESULT Object* LocaleConvertCase(Handle<String> s, Isolate* isolate,
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result, isolate->factory()->NewRawTwoByteString(dest_length));
DisallowHeapAllocation no_gc;
+ DCHECK(s->IsFlat());
String::FlatContent flat = s->GetFlatContent();
const UChar* src = GetUCharBufferFromFlat(flat, &sap, src_length);
status = U_ZERO_ERROR;
@@ -1041,15 +925,14 @@ bool ToUpperFastASCII(const Vector<const Char>& src,
const uint16_t sharp_s = 0xDF;
template <typename Char>
-bool ToUpperOneByte(const Vector<const Char>& src,
- Handle<SeqOneByteString> result, int* sharp_s_count) {
+bool ToUpperOneByte(const Vector<const Char>& src, uint8_t* dest,
+ int* sharp_s_count) {
// Still pretty-fast path for the input with non-ASCII Latin-1 characters.
// There are two special cases.
// 1. U+00B5 and U+00FF are mapped to a character beyond U+00FF.
// 2. Lower case sharp-S converts to "SS" (two characters)
*sharp_s_count = 0;
- int32_t index = 0;
for (auto it = src.begin(); it != src.end(); ++it) {
uint16_t ch = static_cast<uint16_t>(*it);
if (V8_UNLIKELY(ch == sharp_s)) {
@@ -1061,7 +944,7 @@ bool ToUpperOneByte(const Vector<const Char>& src,
// need to take the 16-bit path.
return false;
}
- result->SeqOneByteStringSet(index++, ToLatin1Upper(ch));
+ *dest++ = ToLatin1Upper(ch);
}
return true;
@@ -1082,105 +965,112 @@ void ToUpperWithSharpS(const Vector<const Char>& src,
}
}
-} // namespace
+inline int FindFirstUpperOrNonAscii(Handle<String> s, int length) {
+ for (int index = 0; index < length; ++index) {
+ uint16_t ch = s->Get(index);
+ if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) {
+ return index;
+ }
+ }
+ return length;
+}
-RUNTIME_FUNCTION(Runtime_StringToLowerCaseI18N) {
- HandleScope scope(isolate);
- DCHECK_EQ(args.length(), 1);
- CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
+MUST_USE_RESULT Object* ConvertToLower(Handle<String> s, Isolate* isolate) {
+ if (!s->HasOnlyOneByteChars()) {
+ // Use a slower implementation for strings with characters beyond U+00FF.
+ return LocaleConvertCase(s, isolate, false, "");
+ }
int length = s->length();
- s = String::Flatten(s);
- // First scan the string for uppercase and non-ASCII characters:
- if (s->HasOnlyOneByteChars()) {
- int first_index_to_lower = length;
- for (int index = 0; index < length; ++index) {
- // Blink specializes this path for one-byte strings, so it
- // does not need to do a generic get, but can do the equivalent
- // of SeqOneByteStringGet.
- uint16_t ch = s->Get(index);
- if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) {
- first_index_to_lower = index;
- break;
- }
- }
+ // We depend here on the invariant that the length of a Latin1
+ // string is invariant under ToLowerCase, and the result always
+ // fits in the Latin1 range in the *root locale*. It does not hold
+ // for ToUpperCase even in the root locale.
+
+ // Scan the string for uppercase and non-ASCII characters for strings
+ // shorter than a machine-word without any memory allocation overhead.
+ // TODO(jshin): Apply this to a longer input by breaking FastAsciiConvert()
+ // to two parts, one for scanning the prefix with no change and the other for
+ // handling ASCII-only characters.
+ int index_to_first_unprocessed = length;
+ const bool is_short = length < static_cast<int>(sizeof(uintptr_t));
+ if (is_short) {
+ index_to_first_unprocessed = FindFirstUpperOrNonAscii(s, length);
// Nothing to do if the string is all ASCII with no uppercase.
- if (first_index_to_lower == length) return *s;
+ if (index_to_first_unprocessed == length) return *s;
+ }
- // We depend here on the invariant that the length of a Latin1
- // string is invariant under ToLowerCase, and the result always
- // fits in the Latin1 range in the *root locale*. It does not hold
- // for ToUpperCase even in the root locale.
- Handle<SeqOneByteString> result;
- ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
- isolate, result, isolate->factory()->NewRawOneByteString(length));
+ Handle<SeqOneByteString> result =
+ isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
- DisallowHeapAllocation no_gc;
- String::FlatContent flat = s->GetFlatContent();
- if (flat.IsOneByte()) {
- const uint8_t* src = flat.ToOneByteVector().start();
- CopyChars(result->GetChars(), src,
- static_cast<size_t>(first_index_to_lower));
- for (int index = first_index_to_lower; index < length; ++index) {
- uint16_t ch = static_cast<uint16_t>(src[index]);
- result->SeqOneByteStringSet(index, ToLatin1Lower(ch));
- }
- } else {
- const uint16_t* src = flat.ToUC16Vector().start();
- CopyChars(result->GetChars(), src,
- static_cast<size_t>(first_index_to_lower));
- for (int index = first_index_to_lower; index < length; ++index) {
- uint16_t ch = src[index];
- result->SeqOneByteStringSet(index, ToLatin1Lower(ch));
- }
+ DisallowHeapAllocation no_gc;
+ DCHECK(s->IsFlat());
+ String::FlatContent flat = s->GetFlatContent();
+ uint8_t* dest = result->GetChars();
+ if (flat.IsOneByte()) {
+ const uint8_t* src = flat.ToOneByteVector().start();
+ bool has_changed_character = false;
+ index_to_first_unprocessed = FastAsciiConvert<true>(
+ reinterpret_cast<char*>(dest), reinterpret_cast<const char*>(src),
+ length, &has_changed_character);
+ // If not ASCII, we keep the result up to index_to_first_unprocessed and
+ // process the rest.
+ if (index_to_first_unprocessed == length)
+ return has_changed_character ? *result : *s;
+
+ for (int index = index_to_first_unprocessed; index < length; ++index) {
+ dest[index] = ToLatin1Lower(static_cast<uint16_t>(src[index]));
+ }
+ } else {
+ if (index_to_first_unprocessed == length) {
+ DCHECK(!is_short);
+ index_to_first_unprocessed = FindFirstUpperOrNonAscii(s, length);
+ }
+ // Nothing to do if the string is all ASCII with no uppercase.
+ if (index_to_first_unprocessed == length) return *s;
+ const uint16_t* src = flat.ToUC16Vector().start();
+ CopyChars(dest, src, index_to_first_unprocessed);
+ for (int index = index_to_first_unprocessed; index < length; ++index) {
+ dest[index] = ToLatin1Lower(static_cast<uint16_t>(src[index]));
}
-
- return *result;
}
- // Blink had an additional case here for ASCII 2-byte strings, but
- // that is subsumed by the above code (assuming there isn't a false
- // negative for HasOnlyOneByteChars).
-
- // Do a slower implementation for cases that include non-ASCII characters.
- return LocaleConvertCase(s, isolate, false, "");
+ return *result;
}
-RUNTIME_FUNCTION(Runtime_StringToUpperCaseI18N) {
- HandleScope scope(isolate);
- DCHECK_EQ(args.length(), 1);
- CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
-
- // This function could be optimized for no-op cases the way lowercase
- // counterpart is, but in empirical testing, few actual calls to upper()
- // are no-ops. So, it wouldn't be worth the extra time for pre-scanning.
-
+MUST_USE_RESULT Object* ConvertToUpper(Handle<String> s, Isolate* isolate) {
int32_t length = s->length();
- s = String::Flatten(s);
-
if (s->HasOnlyOneByteChars()) {
- Handle<SeqOneByteString> result;
- ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
- isolate, result, isolate->factory()->NewRawOneByteString(length));
+ Handle<SeqOneByteString> result =
+ isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
+ DCHECK(s->IsFlat());
int sharp_s_count;
bool is_result_single_byte;
{
DisallowHeapAllocation no_gc;
String::FlatContent flat = s->GetFlatContent();
- // If it was ok to slow down ASCII-only input slightly, ToUpperFastASCII
- // could be removed because ToUpperOneByte is pretty fast now (it
- // does not call ICU API any more.).
+ uint8_t* dest = result->GetChars();
if (flat.IsOneByte()) {
Vector<const uint8_t> src = flat.ToOneByteVector();
- if (ToUpperFastASCII(src, result)) return *result;
- is_result_single_byte = ToUpperOneByte(src, result, &sharp_s_count);
+ bool has_changed_character = false;
+ int index_to_first_unprocessed =
+ FastAsciiConvert<false>(reinterpret_cast<char*>(result->GetChars()),
+ reinterpret_cast<const char*>(src.start()),
+ length, &has_changed_character);
+ if (index_to_first_unprocessed == length)
+ return has_changed_character ? *result : *s;
+ // If not ASCII, we keep the result up to index_to_first_unprocessed and
+ // process the rest.
+ is_result_single_byte =
+ ToUpperOneByte(src.SubVector(index_to_first_unprocessed, length),
+ dest + index_to_first_unprocessed, &sharp_s_count);
} else {
DCHECK(flat.IsTwoByte());
Vector<const uint16_t> src = flat.ToUC16Vector();
if (ToUpperFastASCII(src, result)) return *result;
- is_result_single_byte = ToUpperOneByte(src, result, &sharp_s_count);
+ is_result_single_byte = ToUpperOneByte(src, dest, &sharp_s_count);
}
}
@@ -1211,26 +1101,67 @@ RUNTIME_FUNCTION(Runtime_StringToUpperCaseI18N) {
return LocaleConvertCase(s, isolate, true, "");
}
+MUST_USE_RESULT Object* ConvertCase(Handle<String> s, bool is_upper,
+ Isolate* isolate) {
+ return is_upper ? ConvertToUpper(s, isolate) : ConvertToLower(s, isolate);
+}
+
+} // namespace
+
+RUNTIME_FUNCTION(Runtime_StringToLowerCaseI18N) {
+ HandleScope scope(isolate);
+ DCHECK_EQ(args.length(), 1);
+ CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
+ s = String::Flatten(s);
+ return ConvertToLower(s, isolate);
+}
+
+RUNTIME_FUNCTION(Runtime_StringToUpperCaseI18N) {
+ HandleScope scope(isolate);
+ DCHECK_EQ(args.length(), 1);
+ CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
+ s = String::Flatten(s);
+ return ConvertToUpper(s, isolate);
+}
+
RUNTIME_FUNCTION(Runtime_StringLocaleConvertCase) {
HandleScope scope(isolate);
DCHECK_EQ(args.length(), 3);
CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
CONVERT_BOOLEAN_ARG_CHECKED(is_upper, 1);
- CONVERT_ARG_HANDLE_CHECKED(SeqOneByteString, lang, 2);
-
- // All the languages requiring special handling ("az", "el", "lt", "tr")
- // have a 2-letter language code.
- DCHECK(lang->length() == 2);
- uint8_t lang_str[3];
- memcpy(lang_str, lang->GetChars(), 2);
- lang_str[2] = 0;
+ CONVERT_ARG_HANDLE_CHECKED(String, lang_arg, 2);
+
+ // Primary language tag can be up to 8 characters long in theory.
+ // https://tools.ietf.org/html/bcp47#section-2.2.1
+ DCHECK(lang_arg->length() <= 8);
+ lang_arg = String::Flatten(lang_arg);
s = String::Flatten(s);
+
+ // All the languages requiring special-handling have two-letter codes.
+ if (V8_UNLIKELY(lang_arg->length() > 2))
+ return ConvertCase(s, is_upper, isolate);
+
+ char c1, c2;
+ {
+ DisallowHeapAllocation no_gc;
+ String::FlatContent lang = lang_arg->GetFlatContent();
+ c1 = lang.Get(0);
+ c2 = lang.Get(1);
+ }
// TODO(jshin): Consider adding a fast path for ASCII or Latin-1. The fastpath
// in the root locale needs to be adjusted for az, lt and tr because even case
// mapping of ASCII range characters are different in those locales.
- // Greek (el) does not require any adjustment, though.
- return LocaleConvertCase(s, isolate, is_upper,
- reinterpret_cast<const char*>(lang_str));
+ // Greek (el) does not require any adjustment.
+ if (V8_UNLIKELY(c1 == 't' && c2 == 'r'))
+ return LocaleConvertCase(s, isolate, is_upper, "tr");
+ if (V8_UNLIKELY(c1 == 'e' && c2 == 'l'))
+ return LocaleConvertCase(s, isolate, is_upper, "el");
+ if (V8_UNLIKELY(c1 == 'l' && c2 == 't'))
+ return LocaleConvertCase(s, isolate, is_upper, "lt");
+ if (V8_UNLIKELY(c1 == 'a' && c2 == 'z'))
+ return LocaleConvertCase(s, isolate, is_upper, "az");
+
+ return ConvertCase(s, is_upper, isolate);
}
RUNTIME_FUNCTION(Runtime_DateCacheVersion) {