diff options
Diffstat (limited to 'deps/node/deps/icu-small/source/i18n/uitercollationiterator.cpp')
-rw-r--r-- | deps/node/deps/icu-small/source/i18n/uitercollationiterator.cpp | 450 |
1 files changed, 0 insertions, 450 deletions
diff --git a/deps/node/deps/icu-small/source/i18n/uitercollationiterator.cpp b/deps/node/deps/icu-small/source/i18n/uitercollationiterator.cpp deleted file mode 100644 index 103c91ca..00000000 --- a/deps/node/deps/icu-small/source/i18n/uitercollationiterator.cpp +++ /dev/null @@ -1,450 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2012-2014, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* uitercollationiterator.cpp -* -* created on: 2012sep23 (from utf16collationiterator.cpp) -* created by: Markus W. Scherer -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_COLLATION - -#include "unicode/uiter.h" -#include "charstr.h" -#include "cmemory.h" -#include "collation.h" -#include "collationdata.h" -#include "collationfcd.h" -#include "collationiterator.h" -#include "normalizer2impl.h" -#include "uassert.h" -#include "uitercollationiterator.h" - -U_NAMESPACE_BEGIN - -UIterCollationIterator::~UIterCollationIterator() {} - -void -UIterCollationIterator::resetToOffset(int32_t newOffset) { - reset(); - iter.move(&iter, newOffset, UITER_START); -} - -int32_t -UIterCollationIterator::getOffset() const { - return iter.getIndex(&iter, UITER_CURRENT); -} - -uint32_t -UIterCollationIterator::handleNextCE32(UChar32 &c, UErrorCode & /*errorCode*/) { - c = iter.next(&iter); - if(c < 0) { - return Collation::FALLBACK_CE32; - } - return UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, c); -} - -UChar -UIterCollationIterator::handleGetTrailSurrogate() { - UChar32 trail = iter.next(&iter); - if(!U16_IS_TRAIL(trail) && trail >= 0) { iter.previous(&iter); } - return (UChar)trail; -} - -UChar32 -UIterCollationIterator::nextCodePoint(UErrorCode & /*errorCode*/) { - return uiter_next32(&iter); -} - -UChar32 -UIterCollationIterator::previousCodePoint(UErrorCode & /*errorCode*/) { - return uiter_previous32(&iter); -} - -void -UIterCollationIterator::forwardNumCodePoints(int32_t num, UErrorCode & /*errorCode*/) { - while(num > 0 && (uiter_next32(&iter)) >= 0) { - --num; - } -} - -void -UIterCollationIterator::backwardNumCodePoints(int32_t num, UErrorCode & /*errorCode*/) { - while(num > 0 && (uiter_previous32(&iter)) >= 0) { - --num; - } -} - -// FCDUIterCollationIterator ----------------------------------------------- *** - -FCDUIterCollationIterator::~FCDUIterCollationIterator() {} - -void -FCDUIterCollationIterator::resetToOffset(int32_t newOffset) { - UIterCollationIterator::resetToOffset(newOffset); - start = newOffset; - state = ITER_CHECK_FWD; -} - -int32_t -FCDUIterCollationIterator::getOffset() const { - if(state <= ITER_CHECK_BWD) { - return iter.getIndex(&iter, UITER_CURRENT); - } else if(state == ITER_IN_FCD_SEGMENT) { - return pos; - } else if(pos == 0) { - return start; - } else { - return limit; - } -} - -uint32_t -FCDUIterCollationIterator::handleNextCE32(UChar32 &c, UErrorCode &errorCode) { - for(;;) { - if(state == ITER_CHECK_FWD) { - c = iter.next(&iter); - if(c < 0) { - return Collation::FALLBACK_CE32; - } - if(CollationFCD::hasTccc(c)) { - if(CollationFCD::maybeTibetanCompositeVowel(c) || - CollationFCD::hasLccc(iter.current(&iter))) { - iter.previous(&iter); - if(!nextSegment(errorCode)) { - c = U_SENTINEL; - return Collation::FALLBACK_CE32; - } - continue; - } - } - break; - } else if(state == ITER_IN_FCD_SEGMENT && pos != limit) { - c = iter.next(&iter); - ++pos; - U_ASSERT(c >= 0); - break; - } else if(state >= IN_NORM_ITER_AT_LIMIT && pos != normalized.length()) { - c = normalized[pos++]; - break; - } else { - switchToForward(); - } - } - return UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, c); -} - -UChar -FCDUIterCollationIterator::handleGetTrailSurrogate() { - if(state <= ITER_IN_FCD_SEGMENT) { - UChar32 trail = iter.next(&iter); - if(U16_IS_TRAIL(trail)) { - if(state == ITER_IN_FCD_SEGMENT) { ++pos; } - } else if(trail >= 0) { - iter.previous(&iter); - } - return (UChar)trail; - } else { - U_ASSERT(pos < normalized.length()); - UChar trail; - if(U16_IS_TRAIL(trail = normalized[pos])) { ++pos; } - return trail; - } -} - -UChar32 -FCDUIterCollationIterator::nextCodePoint(UErrorCode &errorCode) { - UChar32 c; - for(;;) { - if(state == ITER_CHECK_FWD) { - c = iter.next(&iter); - if(c < 0) { - return c; - } - if(CollationFCD::hasTccc(c)) { - if(CollationFCD::maybeTibetanCompositeVowel(c) || - CollationFCD::hasLccc(iter.current(&iter))) { - iter.previous(&iter); - if(!nextSegment(errorCode)) { - return U_SENTINEL; - } - continue; - } - } - if(U16_IS_LEAD(c)) { - UChar32 trail = iter.next(&iter); - if(U16_IS_TRAIL(trail)) { - return U16_GET_SUPPLEMENTARY(c, trail); - } else if(trail >= 0) { - iter.previous(&iter); - } - } - return c; - } else if(state == ITER_IN_FCD_SEGMENT && pos != limit) { - c = uiter_next32(&iter); - pos += U16_LENGTH(c); - U_ASSERT(c >= 0); - return c; - } else if(state >= IN_NORM_ITER_AT_LIMIT && pos != normalized.length()) { - c = normalized.char32At(pos); - pos += U16_LENGTH(c); - return c; - } else { - switchToForward(); - } - } -} - -UChar32 -FCDUIterCollationIterator::previousCodePoint(UErrorCode &errorCode) { - UChar32 c; - for(;;) { - if(state == ITER_CHECK_BWD) { - c = iter.previous(&iter); - if(c < 0) { - start = pos = 0; - state = ITER_IN_FCD_SEGMENT; - return U_SENTINEL; - } - if(CollationFCD::hasLccc(c)) { - UChar32 prev = U_SENTINEL; - if(CollationFCD::maybeTibetanCompositeVowel(c) || - CollationFCD::hasTccc(prev = iter.previous(&iter))) { - iter.next(&iter); - if(prev >= 0) { - iter.next(&iter); - } - if(!previousSegment(errorCode)) { - return U_SENTINEL; - } - continue; - } - // hasLccc(trail)=true for all trail surrogates - if(U16_IS_TRAIL(c)) { - if(prev < 0) { - prev = iter.previous(&iter); - } - if(U16_IS_LEAD(prev)) { - return U16_GET_SUPPLEMENTARY(prev, c); - } - } - if(prev >= 0) { - iter.next(&iter); - } - } - return c; - } else if(state == ITER_IN_FCD_SEGMENT && pos != start) { - c = uiter_previous32(&iter); - pos -= U16_LENGTH(c); - U_ASSERT(c >= 0); - return c; - } else if(state >= IN_NORM_ITER_AT_LIMIT && pos != 0) { - c = normalized.char32At(pos - 1); - pos -= U16_LENGTH(c); - return c; - } else { - switchToBackward(); - } - } -} - -void -FCDUIterCollationIterator::forwardNumCodePoints(int32_t num, UErrorCode &errorCode) { - // Specify the class to avoid a virtual-function indirection. - // In Java, we would declare this class final. - while(num > 0 && FCDUIterCollationIterator::nextCodePoint(errorCode) >= 0) { - --num; - } -} - -void -FCDUIterCollationIterator::backwardNumCodePoints(int32_t num, UErrorCode &errorCode) { - // Specify the class to avoid a virtual-function indirection. - // In Java, we would declare this class final. - while(num > 0 && FCDUIterCollationIterator::previousCodePoint(errorCode) >= 0) { - --num; - } -} - -void -FCDUIterCollationIterator::switchToForward() { - U_ASSERT(state == ITER_CHECK_BWD || - (state == ITER_IN_FCD_SEGMENT && pos == limit) || - (state >= IN_NORM_ITER_AT_LIMIT && pos == normalized.length())); - if(state == ITER_CHECK_BWD) { - // Turn around from backward checking. - start = pos = iter.getIndex(&iter, UITER_CURRENT); - if(pos == limit) { - state = ITER_CHECK_FWD; // Check forward. - } else { // pos < limit - state = ITER_IN_FCD_SEGMENT; // Stay in FCD segment. - } - } else { - // Reached the end of the FCD segment. - if(state == ITER_IN_FCD_SEGMENT) { - // The input text segment is FCD, extend it forward. - } else { - // The input text segment needed to be normalized. - // Switch to checking forward from it. - if(state == IN_NORM_ITER_AT_START) { - iter.move(&iter, limit - start, UITER_CURRENT); - } - start = limit; - } - state = ITER_CHECK_FWD; - } -} - -UBool -FCDUIterCollationIterator::nextSegment(UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { return FALSE; } - U_ASSERT(state == ITER_CHECK_FWD); - // The input text [start..(iter index)[ passes the FCD check. - pos = iter.getIndex(&iter, UITER_CURRENT); - // Collect the characters being checked, in case they need to be normalized. - UnicodeString s; - uint8_t prevCC = 0; - for(;;) { - // Fetch the next character and its fcd16 value. - UChar32 c = uiter_next32(&iter); - if(c < 0) { break; } - uint16_t fcd16 = nfcImpl.getFCD16(c); - uint8_t leadCC = (uint8_t)(fcd16 >> 8); - if(leadCC == 0 && !s.isEmpty()) { - // FCD boundary before this character. - uiter_previous32(&iter); - break; - } - s.append(c); - if(leadCC != 0 && (prevCC > leadCC || CollationFCD::isFCD16OfTibetanCompositeVowel(fcd16))) { - // Fails FCD check. Find the next FCD boundary and normalize. - for(;;) { - c = uiter_next32(&iter); - if(c < 0) { break; } - if(nfcImpl.getFCD16(c) <= 0xff) { - uiter_previous32(&iter); - break; - } - s.append(c); - } - if(!normalize(s, errorCode)) { return FALSE; } - start = pos; - limit = pos + s.length(); - state = IN_NORM_ITER_AT_LIMIT; - pos = 0; - return TRUE; - } - prevCC = (uint8_t)fcd16; - if(prevCC == 0) { - // FCD boundary after the last character. - break; - } - } - limit = pos + s.length(); - U_ASSERT(pos != limit); - iter.move(&iter, -s.length(), UITER_CURRENT); - state = ITER_IN_FCD_SEGMENT; - return TRUE; -} - -void -FCDUIterCollationIterator::switchToBackward() { - U_ASSERT(state == ITER_CHECK_FWD || - (state == ITER_IN_FCD_SEGMENT && pos == start) || - (state >= IN_NORM_ITER_AT_LIMIT && pos == 0)); - if(state == ITER_CHECK_FWD) { - // Turn around from forward checking. - limit = pos = iter.getIndex(&iter, UITER_CURRENT); - if(pos == start) { - state = ITER_CHECK_BWD; // Check backward. - } else { // pos > start - state = ITER_IN_FCD_SEGMENT; // Stay in FCD segment. - } - } else { - // Reached the start of the FCD segment. - if(state == ITER_IN_FCD_SEGMENT) { - // The input text segment is FCD, extend it backward. - } else { - // The input text segment needed to be normalized. - // Switch to checking backward from it. - if(state == IN_NORM_ITER_AT_LIMIT) { - iter.move(&iter, start - limit, UITER_CURRENT); - } - limit = start; - } - state = ITER_CHECK_BWD; - } -} - -UBool -FCDUIterCollationIterator::previousSegment(UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { return FALSE; } - U_ASSERT(state == ITER_CHECK_BWD); - // The input text [(iter index)..limit[ passes the FCD check. - pos = iter.getIndex(&iter, UITER_CURRENT); - // Collect the characters being checked, in case they need to be normalized. - UnicodeString s; - uint8_t nextCC = 0; - for(;;) { - // Fetch the previous character and its fcd16 value. - UChar32 c = uiter_previous32(&iter); - if(c < 0) { break; } - uint16_t fcd16 = nfcImpl.getFCD16(c); - uint8_t trailCC = (uint8_t)fcd16; - if(trailCC == 0 && !s.isEmpty()) { - // FCD boundary after this character. - uiter_next32(&iter); - break; - } - s.append(c); - if(trailCC != 0 && ((nextCC != 0 && trailCC > nextCC) || - CollationFCD::isFCD16OfTibetanCompositeVowel(fcd16))) { - // Fails FCD check. Find the previous FCD boundary and normalize. - while(fcd16 > 0xff) { - c = uiter_previous32(&iter); - if(c < 0) { break; } - fcd16 = nfcImpl.getFCD16(c); - if(fcd16 == 0) { - (void)uiter_next32(&iter); - break; - } - s.append(c); - } - s.reverse(); - if(!normalize(s, errorCode)) { return FALSE; } - limit = pos; - start = pos - s.length(); - state = IN_NORM_ITER_AT_START; - pos = normalized.length(); - return TRUE; - } - nextCC = (uint8_t)(fcd16 >> 8); - if(nextCC == 0) { - // FCD boundary before the following character. - break; - } - } - start = pos - s.length(); - U_ASSERT(pos != start); - iter.move(&iter, s.length(), UITER_CURRENT); - state = ITER_IN_FCD_SEGMENT; - return TRUE; -} - -UBool -FCDUIterCollationIterator::normalize(const UnicodeString &s, UErrorCode &errorCode) { - // NFD without argument checking. - U_ASSERT(U_SUCCESS(errorCode)); - nfcImpl.decompose(s, normalized, errorCode); - return U_SUCCESS(errorCode); -} - -U_NAMESPACE_END - -#endif // !UCONFIG_NO_COLLATION |