summaryrefslogtreecommitdiff
path: root/deps/icu-small/source/common/ustrcase.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'deps/icu-small/source/common/ustrcase.cpp')
-rw-r--r--deps/icu-small/source/common/ustrcase.cpp305
1 files changed, 228 insertions, 77 deletions
diff --git a/deps/icu-small/source/common/ustrcase.cpp b/deps/icu-small/source/common/ustrcase.cpp
index b1beb34277..978bd3b7b8 100644
--- a/deps/icu-small/source/common/ustrcase.cpp
+++ b/deps/icu-small/source/common/ustrcase.cpp
@@ -52,16 +52,8 @@ int32_t checkOverflowAndEditsError(int32_t destIndex, int32_t destCapacity,
return destIndex;
}
-} // namespace
-
-U_NAMESPACE_END
-
-U_NAMESPACE_USE
-
-/* string casing ------------------------------------------------------------ */
-
/* Appends a full case mapping result, see UCASE_MAX_STRING_LENGTH. */
-static inline int32_t
+inline int32_t
appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
int32_t result, const UChar *s,
int32_t cpLength, uint32_t options, icu::Edits *edits) {
@@ -134,7 +126,7 @@ appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
return destIndex;
}
-static inline int32_t
+inline int32_t
appendUChar(UChar *dest, int32_t destIndex, int32_t destCapacity, UChar c) {
if(destIndex<destCapacity) {
dest[destIndex]=c;
@@ -144,28 +136,34 @@ appendUChar(UChar *dest, int32_t destIndex, int32_t destCapacity, UChar c) {
return destIndex+1;
}
-static inline int32_t
+int32_t
+appendNonEmptyUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity,
+ const UChar *s, int32_t length, uint32_t options, icu::Edits *edits) {
+ if(edits!=NULL) {
+ edits->addUnchanged(length);
+ }
+ if(options & U_OMIT_UNCHANGED_TEXT) {
+ return destIndex;
+ }
+ if(length>(INT32_MAX-destIndex)) {
+ return -1; // integer overflow
+ }
+ if((destIndex+length)<=destCapacity) {
+ u_memcpy(dest+destIndex, s, length);
+ }
+ return destIndex + length;
+}
+
+inline int32_t
appendUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity,
const UChar *s, int32_t length, uint32_t options, icu::Edits *edits) {
- if(length>0) {
- if(edits!=NULL) {
- edits->addUnchanged(length);
- }
- if(options & U_OMIT_UNCHANGED_TEXT) {
- return destIndex;
- }
- if(length>(INT32_MAX-destIndex)) {
- return -1; // integer overflow
- }
- if((destIndex+length)<=destCapacity) {
- u_memcpy(dest+destIndex, s, length);
- }
- destIndex+=length;
+ if (length <= 0) {
+ return destIndex;
}
- return destIndex;
+ return appendNonEmptyUnchanged(dest, destIndex, destCapacity, s, length, options, edits);
}
-static UChar32 U_CALLCONV
+UChar32 U_CALLCONV
utf16_caseContextIterator(void *context, int8_t dir) {
UCaseContext *csc=(UCaseContext *)context;
UChar32 c;
@@ -197,39 +195,205 @@ utf16_caseContextIterator(void *context, int8_t dir) {
return U_SENTINEL;
}
-/*
- * Case-maps [srcStart..srcLimit[ but takes
- * context [0..srcLength[ into account.
+/**
+ * caseLocale >= 0: Lowercases [srcStart..srcLimit[ but takes context [0..srcLength[ into account.
+ * caseLocale < 0: Case-folds [srcStart..srcLimit[.
*/
-static int32_t
-_caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map,
- UChar *dest, int32_t destCapacity,
- const UChar *src, UCaseContext *csc,
- int32_t srcStart, int32_t srcLimit,
- icu::Edits *edits,
- UErrorCode &errorCode) {
- /* case mapping loop */
- int32_t srcIndex=srcStart;
- int32_t destIndex=0;
- while(srcIndex<srcLimit) {
- int32_t cpStart;
- csc->cpStart=cpStart=srcIndex;
+int32_t toLower(int32_t caseLocale, uint32_t options,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, UCaseContext *csc, int32_t srcStart, int32_t srcLimit,
+ icu::Edits *edits, UErrorCode &errorCode) {
+ const int8_t *latinToLower;
+ if (caseLocale == UCASE_LOC_ROOT ||
+ (caseLocale >= 0 ?
+ !(caseLocale == UCASE_LOC_TURKISH || caseLocale == UCASE_LOC_LITHUANIAN) :
+ (options & _FOLD_CASE_OPTIONS_MASK) == U_FOLD_CASE_DEFAULT)) {
+ latinToLower = LatinCase::TO_LOWER_NORMAL;
+ } else {
+ latinToLower = LatinCase::TO_LOWER_TR_LT;
+ }
+ const UTrie2 *trie = ucase_getTrie();
+ int32_t destIndex = 0;
+ int32_t prev = srcStart;
+ int32_t srcIndex = srcStart;
+ for (;;) {
+ // fast path for simple cases
+ UChar lead;
+ while (srcIndex < srcLimit) {
+ lead = src[srcIndex];
+ int32_t delta;
+ if (lead < LatinCase::LONG_S) {
+ int8_t d = latinToLower[lead];
+ if (d == LatinCase::EXC) { break; }
+ ++srcIndex;
+ if (d == 0) { continue; }
+ delta = d;
+ } else if (lead >= 0xd800) {
+ break; // surrogate or higher
+ } else {
+ uint16_t props = UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, lead);
+ if (UCASE_HAS_EXCEPTION(props)) { break; }
+ ++srcIndex;
+ if (!UCASE_IS_UPPER_OR_TITLE(props) || (delta = UCASE_GET_DELTA(props)) == 0) {
+ continue;
+ }
+ }
+ lead += delta;
+ destIndex = appendUnchanged(dest, destIndex, destCapacity,
+ src + prev, srcIndex - 1 - prev, options, edits);
+ if (destIndex >= 0) {
+ destIndex = appendUChar(dest, destIndex, destCapacity, lead);
+ if (edits != nullptr) {
+ edits->addReplace(1, 1);
+ }
+ }
+ if (destIndex < 0) {
+ errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ prev = srcIndex;
+ }
+ if (srcIndex >= srcLimit) {
+ break;
+ }
+ // slow path
+ int32_t cpStart = srcIndex++;
+ UChar trail;
UChar32 c;
- U16_NEXT(src, srcIndex, srcLimit, c);
- csc->cpLimit=srcIndex;
+ if (U16_IS_LEAD(lead) && srcIndex < srcLimit && U16_IS_TRAIL(trail = src[srcIndex])) {
+ c = U16_GET_SUPPLEMENTARY(lead, trail);
+ ++srcIndex;
+ } else {
+ c = lead;
+ }
const UChar *s;
- c=map(c, utf16_caseContextIterator, csc, &s, caseLocale);
- destIndex = appendResult(dest, destIndex, destCapacity, c, s,
- srcIndex - cpStart, options, edits);
- if (destIndex < 0) {
- errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
+ if (caseLocale >= 0) {
+ csc->cpStart = cpStart;
+ csc->cpLimit = srcIndex;
+ c = ucase_toFullLower(c, utf16_caseContextIterator, csc, &s, caseLocale);
+ } else {
+ c = ucase_toFullFolding(c, &s, options);
}
+ if (c >= 0) {
+ destIndex = appendUnchanged(dest, destIndex, destCapacity,
+ src + prev, cpStart - prev, options, edits);
+ if (destIndex >= 0) {
+ destIndex = appendResult(dest, destIndex, destCapacity, c, s,
+ srcIndex - cpStart, options, edits);
+ }
+ if (destIndex < 0) {
+ errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ prev = srcIndex;
+ }
+ }
+ destIndex = appendUnchanged(dest, destIndex, destCapacity,
+ src + prev, srcIndex - prev, options, edits);
+ if (destIndex < 0) {
+ errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
}
+ return destIndex;
+}
+int32_t toUpper(int32_t caseLocale, uint32_t options,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, UCaseContext *csc, int32_t srcLength,
+ icu::Edits *edits, UErrorCode &errorCode) {
+ const int8_t *latinToUpper;
+ if (caseLocale == UCASE_LOC_TURKISH) {
+ latinToUpper = LatinCase::TO_UPPER_TR;
+ } else {
+ latinToUpper = LatinCase::TO_UPPER_NORMAL;
+ }
+ const UTrie2 *trie = ucase_getTrie();
+ int32_t destIndex = 0;
+ int32_t prev = 0;
+ int32_t srcIndex = 0;
+ for (;;) {
+ // fast path for simple cases
+ UChar lead;
+ while (srcIndex < srcLength) {
+ lead = src[srcIndex];
+ int32_t delta;
+ if (lead < LatinCase::LONG_S) {
+ int8_t d = latinToUpper[lead];
+ if (d == LatinCase::EXC) { break; }
+ ++srcIndex;
+ if (d == 0) { continue; }
+ delta = d;
+ } else if (lead >= 0xd800) {
+ break; // surrogate or higher
+ } else {
+ uint16_t props = UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, lead);
+ if (UCASE_HAS_EXCEPTION(props)) { break; }
+ ++srcIndex;
+ if (UCASE_GET_TYPE(props) != UCASE_LOWER || (delta = UCASE_GET_DELTA(props)) == 0) {
+ continue;
+ }
+ }
+ lead += delta;
+ destIndex = appendUnchanged(dest, destIndex, destCapacity,
+ src + prev, srcIndex - 1 - prev, options, edits);
+ if (destIndex >= 0) {
+ destIndex = appendUChar(dest, destIndex, destCapacity, lead);
+ if (edits != nullptr) {
+ edits->addReplace(1, 1);
+ }
+ }
+ if (destIndex < 0) {
+ errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ prev = srcIndex;
+ }
+ if (srcIndex >= srcLength) {
+ break;
+ }
+ // slow path
+ int32_t cpStart;
+ csc->cpStart = cpStart = srcIndex++;
+ UChar trail;
+ UChar32 c;
+ if (U16_IS_LEAD(lead) && srcIndex < srcLength && U16_IS_TRAIL(trail = src[srcIndex])) {
+ c = U16_GET_SUPPLEMENTARY(lead, trail);
+ ++srcIndex;
+ } else {
+ c = lead;
+ }
+ csc->cpLimit = srcIndex;
+ const UChar *s;
+ c = ucase_toFullUpper(c, utf16_caseContextIterator, csc, &s, caseLocale);
+ if (c >= 0) {
+ destIndex = appendUnchanged(dest, destIndex, destCapacity,
+ src + prev, cpStart - prev, options, edits);
+ if (destIndex >= 0) {
+ destIndex = appendResult(dest, destIndex, destCapacity, c, s,
+ srcIndex - cpStart, options, edits);
+ }
+ if (destIndex < 0) {
+ errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ prev = srcIndex;
+ }
+ }
+ destIndex = appendUnchanged(dest, destIndex, destCapacity,
+ src + prev, srcIndex - prev, options, edits);
+ if (destIndex < 0) {
+ errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
return destIndex;
}
+} // namespace
+
+U_NAMESPACE_END
+
+U_NAMESPACE_USE
+
#if !UCONFIG_NO_BREAK_ITERATION
U_CFUNC int32_t U_CALLCONV
@@ -344,11 +508,10 @@ ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *it
if((options&U_TITLECASE_NO_LOWERCASE)==0) {
/* Normal operation: Lowercase the rest of the word. */
destIndex+=
- _caseMap(
- caseLocale, options, ucase_toFullLower,
+ toLower(
+ caseLocale, options,
dest+destIndex, destCapacity-destIndex,
- src, &csc,
- titleLimit, index,
+ src, &csc, titleLimit, index,
edits, errorCode);
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
errorCode=U_ZERO_ERROR;
@@ -1013,8 +1176,8 @@ ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_IT
UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src;
csc.limit=srcLength;
- int32_t destIndex = _caseMap(
- caseLocale, options, ucase_toFullLower,
+ int32_t destIndex = toLower(
+ caseLocale, options,
dest, destCapacity,
src, &csc, 0, srcLength,
edits, errorCode);
@@ -1035,10 +1198,10 @@ ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_IT
UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src;
csc.limit=srcLength;
- destIndex = _caseMap(
- caseLocale, options, ucase_toFullUpper,
+ destIndex = toUpper(
+ caseLocale, options,
dest, destCapacity,
- src, &csc, 0, srcLength,
+ src, &csc, srcLength,
edits, errorCode);
}
return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
@@ -1050,23 +1213,11 @@ ustrcase_internalFold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK
const UChar *src, int32_t srcLength,
icu::Edits *edits,
UErrorCode &errorCode) {
- /* case mapping loop */
- int32_t srcIndex = 0;
- int32_t destIndex = 0;
- while (srcIndex < srcLength) {
- int32_t cpStart = srcIndex;
- UChar32 c;
- U16_NEXT(src, srcIndex, srcLength, c);
- const UChar *s;
- c = ucase_toFullFolding(c, &s, options);
- destIndex = appendResult(dest, destIndex, destCapacity, c, s,
- srcIndex - cpStart, options, edits);
- if (destIndex < 0) {
- errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- }
-
+ int32_t destIndex = toLower(
+ -1, options,
+ dest, destCapacity,
+ src, nullptr, 0, srcLength,
+ edits, errorCode);
return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
}