summaryrefslogtreecommitdiff
path: root/deps/v8/src/objects/js-segment-iterator.cc
diff options
context:
space:
mode:
Diffstat (limited to 'deps/v8/src/objects/js-segment-iterator.cc')
-rw-r--r--deps/v8/src/objects/js-segment-iterator.cc290
1 files changed, 290 insertions, 0 deletions
diff --git a/deps/v8/src/objects/js-segment-iterator.cc b/deps/v8/src/objects/js-segment-iterator.cc
new file mode 100644
index 0000000000..74b0330719
--- /dev/null
+++ b/deps/v8/src/objects/js-segment-iterator.cc
@@ -0,0 +1,290 @@
+// Copyright 2018 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_INTL_SUPPORT
+#error Internationalization is expected to be enabled.
+#endif // V8_INTL_SUPPORT
+
+#include "src/objects/js-segment-iterator.h"
+
+#include <map>
+#include <memory>
+#include <string>
+
+#include "src/heap/factory.h"
+#include "src/isolate.h"
+#include "src/objects-inl.h"
+#include "src/objects/intl-objects.h"
+#include "src/objects/js-segment-iterator-inl.h"
+#include "src/objects/managed.h"
+#include "unicode/brkiter.h"
+
+namespace v8 {
+namespace internal {
+
+MaybeHandle<String> JSSegmentIterator::GetSegment(Isolate* isolate,
+ int32_t start,
+ int32_t end) const {
+ return Intl::ToString(isolate, *(unicode_string()->raw()), start, end);
+}
+
+Handle<String> JSSegmentIterator::GranularityAsString() const {
+ switch (granularity()) {
+ case JSSegmenter::Granularity::GRAPHEME:
+ return GetReadOnlyRoots().grapheme_string_handle();
+ case JSSegmenter::Granularity::WORD:
+ return GetReadOnlyRoots().word_string_handle();
+ case JSSegmenter::Granularity::SENTENCE:
+ return GetReadOnlyRoots().sentence_string_handle();
+ case JSSegmenter::Granularity::COUNT:
+ UNREACHABLE();
+ }
+}
+
+MaybeHandle<JSSegmentIterator> JSSegmentIterator::Create(
+ Isolate* isolate, icu::BreakIterator* break_iterator,
+ JSSegmenter::Granularity granularity, Handle<String> text) {
+ CHECK_NOT_NULL(break_iterator);
+ // 1. Let iterator be ObjectCreate(%SegmentIteratorPrototype%).
+ Handle<Map> map = Handle<Map>(
+ isolate->native_context()->intl_segment_iterator_map(), isolate);
+ Handle<JSObject> result = isolate->factory()->NewJSObjectFromMap(map);
+
+ Handle<JSSegmentIterator> segment_iterator =
+ Handle<JSSegmentIterator>::cast(result);
+
+ segment_iterator->set_flags(0);
+ segment_iterator->set_granularity(granularity);
+ // 2. Let iterator.[[SegmentIteratorSegmenter]] be segmenter.
+ Handle<Managed<icu::BreakIterator>> managed_break_iterator =
+ Managed<icu::BreakIterator>::FromRawPtr(isolate, 0, break_iterator);
+ segment_iterator->set_icu_break_iterator(*managed_break_iterator);
+
+ // 3. Let iterator.[[SegmentIteratorString]] be string.
+ Managed<icu::UnicodeString> unicode_string =
+ Intl::SetTextToBreakIterator(isolate, text, break_iterator);
+ segment_iterator->set_unicode_string(unicode_string);
+
+ // 4. Let iterator.[[SegmentIteratorIndex]] be 0.
+ // step 4 is stored inside break_iterator.
+
+ // 5. Let iterator.[[SegmentIteratorBreakType]] be undefined.
+ segment_iterator->set_is_break_type_set(false);
+
+ return segment_iterator;
+}
+
+// ecma402 #sec-segment-iterator-prototype-breakType
+Handle<Object> JSSegmentIterator::BreakType() const {
+ if (!is_break_type_set()) {
+ return GetReadOnlyRoots().undefined_value_handle();
+ }
+ icu::BreakIterator* break_iterator = icu_break_iterator()->raw();
+ int32_t rule_status = break_iterator->getRuleStatus();
+ switch (granularity()) {
+ case JSSegmenter::Granularity::GRAPHEME:
+ return GetReadOnlyRoots().undefined_value_handle();
+ case JSSegmenter::Granularity::WORD:
+ if (rule_status >= UBRK_WORD_NONE && rule_status < UBRK_WORD_NONE_LIMIT) {
+ // "words" that do not fit into any of other categories. Includes spaces
+ // and most punctuation.
+ return GetReadOnlyRoots().none_string_handle();
+ }
+ if ((rule_status >= UBRK_WORD_NUMBER &&
+ rule_status < UBRK_WORD_NUMBER_LIMIT) ||
+ (rule_status >= UBRK_WORD_LETTER &&
+ rule_status < UBRK_WORD_LETTER_LIMIT) ||
+ (rule_status >= UBRK_WORD_KANA &&
+ rule_status < UBRK_WORD_KANA_LIMIT) ||
+ (rule_status >= UBRK_WORD_IDEO &&
+ rule_status < UBRK_WORD_IDEO_LIMIT)) {
+ // words that appear to be numbers, letters, kana characters,
+ // ideographic characters, etc
+ return GetReadOnlyRoots().word_string_handle();
+ }
+ return GetReadOnlyRoots().undefined_value_handle();
+ case JSSegmenter::Granularity::SENTENCE:
+ if (rule_status >= UBRK_SENTENCE_TERM &&
+ rule_status < UBRK_SENTENCE_TERM_LIMIT) {
+ // sentences ending with a sentence terminator ('.', '?', '!', etc.)
+ // character, possibly followed by a hard separator (CR, LF, PS, etc.)
+ return GetReadOnlyRoots().term_string_handle();
+ }
+ if ((rule_status >= UBRK_SENTENCE_SEP &&
+ rule_status < UBRK_SENTENCE_SEP_LIMIT)) {
+ // sentences that do not contain an ending sentence terminator ('.',
+ // '?', '!', etc.) character, but are ended only by a hard separator
+ // (CR, LF, PS, etc.) hard, or mandatory line breaks
+ return GetReadOnlyRoots().sep_string_handle();
+ }
+ return GetReadOnlyRoots().undefined_value_handle();
+ case JSSegmenter::Granularity::COUNT:
+ UNREACHABLE();
+ }
+}
+
+// ecma402 #sec-segment-iterator-prototype-index
+Handle<Object> JSSegmentIterator::Index(
+ Isolate* isolate, Handle<JSSegmentIterator> segment_iterator) {
+ icu::BreakIterator* icu_break_iterator =
+ segment_iterator->icu_break_iterator()->raw();
+ CHECK_NOT_NULL(icu_break_iterator);
+ return isolate->factory()->NewNumberFromInt(icu_break_iterator->current());
+}
+
+// ecma402 #sec-segment-iterator-prototype-next
+MaybeHandle<JSReceiver> JSSegmentIterator::Next(
+ Isolate* isolate, Handle<JSSegmentIterator> segment_iterator) {
+ Factory* factory = isolate->factory();
+ icu::BreakIterator* icu_break_iterator =
+ segment_iterator->icu_break_iterator()->raw();
+ // 3. Let _previousIndex be iterator.[[SegmentIteratorIndex]].
+ int32_t prev = icu_break_iterator->current();
+ // 4. Let done be AdvanceSegmentIterator(iterator, forwards).
+ int32_t index = icu_break_iterator->next();
+ segment_iterator->set_is_break_type_set(true);
+ if (index == icu::BreakIterator::DONE) {
+ // 5. If done is true, return CreateIterResultObject(undefined, true).
+ return factory->NewJSIteratorResult(isolate->factory()->undefined_value(),
+ true);
+ }
+ // 6. Let newIndex be iterator.[[SegmentIteratorIndex]].
+ Handle<Object> new_index = factory->NewNumberFromInt(index);
+
+ // 8. Let segment be the substring of string from previousIndex to
+ // newIndex, inclusive of previousIndex and exclusive of newIndex.
+ Handle<String> segment;
+ ASSIGN_RETURN_ON_EXCEPTION(isolate, segment,
+ segment_iterator->GetSegment(isolate, prev, index),
+ JSReceiver);
+
+ // 9. Let breakType be iterator.[[SegmentIteratorBreakType]].
+ Handle<Object> break_type = segment_iterator->BreakType();
+
+ // 10. Let result be ! ObjectCreate(%ObjectPrototype%).
+ Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
+
+ // 11. Perform ! CreateDataProperty(result "segment", segment).
+ CHECK(JSReceiver::CreateDataProperty(
+ isolate, result, factory->segment_string(), segment, kDontThrow)
+ .FromJust());
+
+ // 12. Perform ! CreateDataProperty(result, "breakType", breakType).
+ CHECK(JSReceiver::CreateDataProperty(isolate, result,
+ factory->breakType_string(), break_type,
+ kDontThrow)
+ .FromJust());
+
+ // 13. Perform ! CreateDataProperty(result, "index", newIndex).
+ CHECK(JSReceiver::CreateDataProperty(isolate, result, factory->index_string(),
+ new_index, kDontThrow)
+ .FromJust());
+
+ // 14. Return CreateIterResultObject(result, false).
+ return factory->NewJSIteratorResult(result, false);
+}
+
+// ecma402 #sec-segment-iterator-prototype-following
+Maybe<bool> JSSegmentIterator::Following(
+ Isolate* isolate, Handle<JSSegmentIterator> segment_iterator,
+ Handle<Object> from_obj) {
+ Factory* factory = isolate->factory();
+ icu::BreakIterator* icu_break_iterator =
+ segment_iterator->icu_break_iterator()->raw();
+ // 3. If from is not undefined,
+ if (!from_obj->IsUndefined()) {
+ // a. Let from be ? ToIndex(from).
+ uint32_t from;
+ Handle<Object> index;
+ ASSIGN_RETURN_ON_EXCEPTION_VALUE(
+ isolate, index,
+ Object::ToIndex(isolate, from_obj, MessageTemplate::kInvalidIndex),
+ Nothing<bool>());
+ if (!index->ToArrayIndex(&from)) {
+ THROW_NEW_ERROR_RETURN_VALUE(
+ isolate,
+ NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
+ factory->NewStringFromStaticChars("from"),
+ factory->NewStringFromStaticChars("following"), index),
+ Nothing<bool>());
+ }
+ // b. Let length be the length of iterator.[[SegmentIteratorString]].
+ uint32_t length =
+ static_cast<uint32_t>(icu_break_iterator->getText().getLength());
+
+ // c. If from ≥ length, throw a RangeError exception.
+ if (from >= length) {
+ THROW_NEW_ERROR_RETURN_VALUE(
+ isolate,
+ NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
+ factory->NewStringFromStaticChars("from"),
+ factory->NewStringFromStaticChars("following"),
+ from_obj),
+ Nothing<bool>());
+ }
+
+ // d. Let iterator.[[SegmentIteratorPosition]] be from.
+ segment_iterator->set_is_break_type_set(true);
+ icu_break_iterator->following(from);
+ return Just(false);
+ }
+ // 4. return AdvanceSegmentIterator(iterator, forward).
+ // 4. .... or if direction is backwards and position is 0, return true.
+ // 4. If direction is forwards and position is the length of string ... return
+ // true.
+ segment_iterator->set_is_break_type_set(true);
+ return Just(icu_break_iterator->next() == icu::BreakIterator::DONE);
+}
+
+// ecma402 #sec-segment-iterator-prototype-preceding
+Maybe<bool> JSSegmentIterator::Preceding(
+ Isolate* isolate, Handle<JSSegmentIterator> segment_iterator,
+ Handle<Object> from_obj) {
+ Factory* factory = isolate->factory();
+ icu::BreakIterator* icu_break_iterator =
+ segment_iterator->icu_break_iterator()->raw();
+ // 3. If from is not undefined,
+ if (!from_obj->IsUndefined()) {
+ // a. Let from be ? ToIndex(from).
+ uint32_t from;
+ Handle<Object> index;
+ ASSIGN_RETURN_ON_EXCEPTION_VALUE(
+ isolate, index,
+ Object::ToIndex(isolate, from_obj, MessageTemplate::kInvalidIndex),
+ Nothing<bool>());
+
+ if (!index->ToArrayIndex(&from)) {
+ THROW_NEW_ERROR_RETURN_VALUE(
+ isolate,
+ NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
+ factory->NewStringFromStaticChars("from"),
+ factory->NewStringFromStaticChars("preceding"), index),
+ Nothing<bool>());
+ }
+ // b. Let length be the length of iterator.[[SegmentIteratorString]].
+ uint32_t length =
+ static_cast<uint32_t>(icu_break_iterator->getText().getLength());
+ // c. If from > length or from = 0, throw a RangeError exception.
+ if (from > length || from == 0) {
+ THROW_NEW_ERROR_RETURN_VALUE(
+ isolate,
+ NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
+ factory->NewStringFromStaticChars("from"),
+ factory->NewStringFromStaticChars("preceding"),
+ from_obj),
+ Nothing<bool>());
+ }
+ // d. Let iterator.[[SegmentIteratorIndex]] be from.
+ segment_iterator->set_is_break_type_set(true);
+ icu_break_iterator->preceding(from);
+ return Just(false);
+ }
+ // 4. return AdvanceSegmentIterator(iterator, backwards).
+ // 4. .... or if direction is backwards and position is 0, return true.
+ segment_iterator->set_is_break_type_set(true);
+ return Just(icu_break_iterator->previous() == icu::BreakIterator::DONE);
+}
+
+} // namespace internal
+} // namespace v8