diff options
Diffstat (limited to 'deps/icu-small/source/common')
95 files changed, 2077 insertions, 1469 deletions
diff --git a/deps/icu-small/source/common/bmpset.cpp b/deps/icu-small/source/common/bmpset.cpp index f84bfd7f5b..35bc80dce3 100644 --- a/deps/icu-small/source/common/bmpset.cpp +++ b/deps/icu-small/source/common/bmpset.cpp @@ -100,9 +100,9 @@ static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) { ++lead; } if(lead<limitLead) { - bits=~((1<<lead)-1); + bits=~(((unsigned)1<<lead)-1); if(limitLead<0x20) { - bits&=(1<<limitLead)-1; + bits&=((unsigned)1<<limitLead)-1; } for(trail=0; trail<64; ++trail) { table[trail]|=bits; diff --git a/deps/icu-small/source/common/brkeng.cpp b/deps/icu-small/source/common/brkeng.cpp index da64b3bdef..a513bafb16 100644 --- a/deps/icu-small/source/common/brkeng.cpp +++ b/deps/icu-small/source/common/brkeng.cpp @@ -59,58 +59,47 @@ LanguageBreakFactory::~LanguageBreakFactory() { ****************************************************************** */ -UnhandledEngine::UnhandledEngine(UErrorCode &/*status*/) { - for (int32_t i = 0; i < UPRV_LENGTHOF(fHandled); ++i) { - fHandled[i] = 0; - } +UnhandledEngine::UnhandledEngine(UErrorCode &status) : fHandled(nullptr) { + (void)status; } UnhandledEngine::~UnhandledEngine() { - for (int32_t i = 0; i < UPRV_LENGTHOF(fHandled); ++i) { - if (fHandled[i] != 0) { - delete fHandled[i]; - } - } + delete fHandled; + fHandled = nullptr; } UBool -UnhandledEngine::handles(UChar32 c, int32_t breakType) const { - return (breakType >= 0 && breakType < UPRV_LENGTHOF(fHandled) - && fHandled[breakType] != 0 && fHandled[breakType]->contains(c)); +UnhandledEngine::handles(UChar32 c) const { + return fHandled && fHandled->contains(c); } int32_t UnhandledEngine::findBreaks( UText *text, int32_t /* startPos */, int32_t endPos, - int32_t breakType, UVector32 &/*foundBreaks*/ ) const { - if (breakType >= 0 && breakType < UPRV_LENGTHOF(fHandled)) { - UChar32 c = utext_current32(text); - while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[breakType]->contains(c)) { - utext_next32(text); // TODO: recast loop to work with post-increment operations. - c = utext_current32(text); - } + UChar32 c = utext_current32(text); + while((int32_t)utext_getNativeIndex(text) < endPos && fHandled->contains(c)) { + utext_next32(text); // TODO: recast loop to work with post-increment operations. + c = utext_current32(text); } return 0; } void -UnhandledEngine::handleCharacter(UChar32 c, int32_t breakType) { - if (breakType >= 0 && breakType < UPRV_LENGTHOF(fHandled)) { - if (fHandled[breakType] == 0) { - fHandled[breakType] = new UnicodeSet(); - if (fHandled[breakType] == 0) { - return; - } - } - if (!fHandled[breakType]->contains(c)) { - UErrorCode status = U_ZERO_ERROR; - // Apply the entire script of the character. - int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT); - fHandled[breakType]->applyIntPropertyValue(UCHAR_SCRIPT, script, status); +UnhandledEngine::handleCharacter(UChar32 c) { + if (fHandled == nullptr) { + fHandled = new UnicodeSet(); + if (fHandled == nullptr) { + return; } } + if (!fHandled->contains(c)) { + UErrorCode status = U_ZERO_ERROR; + // Apply the entire script of the character. + int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT); + fHandled->applyIntPropertyValue(UCHAR_SCRIPT, script, status); + } } /* @@ -138,7 +127,7 @@ U_NAMESPACE_BEGIN static UMutex gBreakEngineMutex = U_MUTEX_INITIALIZER; const LanguageBreakEngine * -ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) { +ICULanguageBreakFactory::getEngineFor(UChar32 c) { const LanguageBreakEngine *lbe = NULL; UErrorCode status = U_ZERO_ERROR; @@ -156,14 +145,14 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) { int32_t i = fEngines->size(); while (--i >= 0) { lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i)); - if (lbe != NULL && lbe->handles(c, breakType)) { + if (lbe != NULL && lbe->handles(c)) { return lbe; } } } // We didn't find an engine. Create one. - lbe = loadEngineFor(c, breakType); + lbe = loadEngineFor(c); if (lbe != NULL) { fEngines->push((void *)lbe, status); } @@ -171,11 +160,11 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) { } const LanguageBreakEngine * -ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) { +ICULanguageBreakFactory::loadEngineFor(UChar32 c) { UErrorCode status = U_ZERO_ERROR; UScriptCode code = uscript_getScript(c, &status); if (U_SUCCESS(status)) { - DictionaryMatcher *m = loadDictionaryMatcherFor(code, breakType); + DictionaryMatcher *m = loadDictionaryMatcherFor(code); if (m != NULL) { const LanguageBreakEngine *engine = NULL; switch(code) { @@ -236,7 +225,7 @@ ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) { } DictionaryMatcher * -ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script, int32_t /* brkType */) { +ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script) { UErrorCode status = U_ZERO_ERROR; // open root from brkitr tree. UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status); diff --git a/deps/icu-small/source/common/brkeng.h b/deps/icu-small/source/common/brkeng.h index 5c61d2ed5d..e40fce13f6 100644 --- a/deps/icu-small/source/common/brkeng.h +++ b/deps/icu-small/source/common/brkeng.h @@ -54,11 +54,10 @@ class LanguageBreakEngine : public UMemory { * a particular kind of break.</p> * * @param c A character which begins a run that the engine might handle - * @param breakType The type of text break which the caller wants to determine * @return TRUE if this engine handles the particular character and break * type. */ - virtual UBool handles(UChar32 c, int32_t breakType) const = 0; + virtual UBool handles(UChar32 c) const = 0; /** * <p>Find any breaks within a run in the supplied text.</p> @@ -68,14 +67,12 @@ class LanguageBreakEngine : public UMemory { * is capable of handling. * @param startPos The start of the run within the supplied text. * @param endPos The end of the run within the supplied text. - * @param breakType The type of break desired, or -1. * @param foundBreaks A Vector of int32_t to receive the breaks. * @return The number of breaks found. */ virtual int32_t findBreaks( UText *text, int32_t startPos, int32_t endPos, - int32_t breakType, UVector32 &foundBreaks ) const = 0; }; @@ -125,11 +122,9 @@ class LanguageBreakFactory : public UMemory { * * @param c A character that begins a run for which a LanguageBreakEngine is * sought. - * @param breakType The kind of text break for which a LanguageBreakEngine is - * sought. * @return A LanguageBreakEngine with the desired characteristics, or 0. */ - virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType) = 0; + virtual const LanguageBreakEngine *getEngineFor(UChar32 c) = 0; }; @@ -152,11 +147,11 @@ class UnhandledEngine : public LanguageBreakEngine { private: /** - * The sets of characters handled, for each break type + * The sets of characters handled. * @internal */ - UnicodeSet *fHandled[4]; + UnicodeSet *fHandled; public: @@ -176,11 +171,10 @@ class UnhandledEngine : public LanguageBreakEngine { * a particular kind of break.</p> * * @param c A character which begins a run that the engine might handle - * @param breakType The type of text break which the caller wants to determine * @return TRUE if this engine handles the particular character and break * type. */ - virtual UBool handles(UChar32 c, int32_t breakType) const; + virtual UBool handles(UChar32 c) const; /** * <p>Find any breaks within a run in the supplied text.</p> @@ -190,23 +184,20 @@ class UnhandledEngine : public LanguageBreakEngine { * is capable of handling. * @param startPos The start of the run within the supplied text. * @param endPos The end of the run within the supplied text. - * @param breakType The type of break desired, or -1. * @param foundBreaks An allocated C array of the breaks found, if any * @return The number of breaks found. */ virtual int32_t findBreaks( UText *text, int32_t startPos, int32_t endPos, - int32_t breakType, UVector32 &foundBreaks ) const; /** * <p>Tell the engine to handle a particular character and break type.</p> * * @param c A character which the engine should handle - * @param breakType The type of text break for which the engine should handle c */ - virtual void handleCharacter(UChar32 c, int32_t breakType); + virtual void handleCharacter(UChar32 c); }; @@ -250,11 +241,9 @@ class ICULanguageBreakFactory : public LanguageBreakFactory { * * @param c A character that begins a run for which a LanguageBreakEngine is * sought. - * @param breakType The kind of text break for which a LanguageBreakEngine is - * sought. * @return A LanguageBreakEngine with the desired characteristics, or 0. */ - virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType); + virtual const LanguageBreakEngine *getEngineFor(UChar32 c); protected: /** @@ -263,21 +252,17 @@ protected: * * @param c A character that begins a run for which a LanguageBreakEngine is * sought. - * @param breakType The kind of text break for which a LanguageBreakEngine is - * sought. * @return A LanguageBreakEngine with the desired characteristics, or 0. */ - virtual const LanguageBreakEngine *loadEngineFor(UChar32 c, int32_t breakType); + virtual const LanguageBreakEngine *loadEngineFor(UChar32 c); /** * <p>Create a DictionaryMatcher for the specified script and break type.</p> * @param script An ISO 15924 script code that identifies the dictionary to be * created. - * @param breakType The kind of text break for which a dictionary is - * sought. * @return A DictionaryMatcher with the desired characteristics, or NULL. */ - virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script, int32_t breakType); + virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script); }; U_NAMESPACE_END diff --git a/deps/icu-small/source/common/brkiter.cpp b/deps/icu-small/source/common/brkiter.cpp index a509ff10c9..23e0cc3c15 100644 --- a/deps/icu-small/source/common/brkiter.cpp +++ b/deps/icu-small/source/common/brkiter.cpp @@ -52,7 +52,7 @@ U_NAMESPACE_BEGIN // ------------------------------------- BreakIterator* -BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode &status) +BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &status) { char fnbuff[256]; char ext[4]={'\0'}; @@ -121,7 +121,6 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind, U_LOCALE_BASED(locBased, *(BreakIterator*)result); locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status), actualLocale.data()); - result->setBreakType(kind); } ures_close(b); @@ -413,10 +412,10 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) BreakIterator *result = NULL; switch (kind) { case UBRK_CHARACTER: - result = BreakIterator::buildInstance(loc, "grapheme", kind, status); + result = BreakIterator::buildInstance(loc, "grapheme", status); break; case UBRK_WORD: - result = BreakIterator::buildInstance(loc, "word", kind, status); + result = BreakIterator::buildInstance(loc, "word", status); break; case UBRK_LINE: uprv_strcpy(lbType, "line"); @@ -429,10 +428,10 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) uprv_strcat(lbType, lbKeyValue); } } - result = BreakIterator::buildInstance(loc, lbType, kind, status); + result = BreakIterator::buildInstance(loc, lbType, status); break; case UBRK_SENTENCE: - result = BreakIterator::buildInstance(loc, "sentence", kind, status); + result = BreakIterator::buildInstance(loc, "sentence", status); #if !UCONFIG_NO_FILTERED_BREAK_ITERATION { char ssKeyValue[kKeyValueLenMax] = {0}; @@ -449,7 +448,7 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) #endif break; case UBRK_TITLE: - result = BreakIterator::buildInstance(loc, "title", kind, status); + result = BreakIterator::buildInstance(loc, "title", status); break; default: status = U_ILLEGAL_ARGUMENT_ERROR; diff --git a/deps/icu-small/source/common/bytesinkutil.cpp b/deps/icu-small/source/common/bytesinkutil.cpp index bf1a2d45f8..6af7ddfd59 100644 --- a/deps/icu-small/source/common/bytesinkutil.cpp +++ b/deps/icu-small/source/common/bytesinkutil.cpp @@ -92,20 +92,16 @@ ByteSinkUtil::appendTwoBytes(UChar32 c, ByteSink &sink) { sink.Append(s8, 2); } -UBool -ByteSinkUtil::appendUnchanged(const uint8_t *s, int32_t length, - ByteSink &sink, uint32_t options, Edits *edits, - UErrorCode &errorCode) { - if (U_FAILURE(errorCode)) { return FALSE; } - if (length > 0) { - if (edits != nullptr) { - edits->addUnchanged(length); - } - if ((options & U_OMIT_UNCHANGED_TEXT) == 0) { - sink.Append(reinterpret_cast<const char *>(s), length); - } +void +ByteSinkUtil::appendNonEmptyUnchanged(const uint8_t *s, int32_t length, + ByteSink &sink, uint32_t options, Edits *edits) { + U_ASSERT(length > 0); + if (edits != nullptr) { + edits->addUnchanged(length); + } + if ((options & U_OMIT_UNCHANGED_TEXT) == 0) { + sink.Append(reinterpret_cast<const char *>(s), length); } - return TRUE; } UBool @@ -117,7 +113,11 @@ ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit, errorCode = U_INDEX_OUTOFBOUNDS_ERROR; return FALSE; } - return appendUnchanged(s, (int32_t)(limit - s), sink, options, edits, errorCode); + int32_t length = (int32_t)(limit - s); + if (length > 0) { + appendNonEmptyUnchanged(s, length, sink, options, edits); + } + return TRUE; } U_NAMESPACE_END diff --git a/deps/icu-small/source/common/bytesinkutil.h b/deps/icu-small/source/common/bytesinkutil.h index 004b49c4ce..8287ffea4c 100644 --- a/deps/icu-small/source/common/bytesinkutil.h +++ b/deps/icu-small/source/common/bytesinkutil.h @@ -43,11 +43,19 @@ public: static UBool appendUnchanged(const uint8_t *s, int32_t length, ByteSink &sink, uint32_t options, Edits *edits, - UErrorCode &errorCode); + UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return FALSE; } + if (length > 0) { appendNonEmptyUnchanged(s, length, sink, options, edits); } + return TRUE; + } static UBool appendUnchanged(const uint8_t *s, const uint8_t *limit, ByteSink &sink, uint32_t options, Edits *edits, UErrorCode &errorCode); + +private: + static void appendNonEmptyUnchanged(const uint8_t *s, int32_t length, + ByteSink &sink, uint32_t options, Edits *edits); }; U_NAMESPACE_END diff --git a/deps/icu-small/source/common/cmemory.cpp b/deps/icu-small/source/common/cmemory.cpp index 300279c243..0b7e432c4d 100644 --- a/deps/icu-small/source/common/cmemory.cpp +++ b/deps/icu-small/source/common/cmemory.cpp @@ -41,30 +41,6 @@ static int n=0; static long b=0; #endif -#if U_DEBUG - -static char gValidMemorySink = 0; - -U_CAPI void uprv_checkValidMemory(const void *p, size_t n) { - /* - * Access the memory to ensure that it's all valid. - * Load and save a computed value to try to ensure that the compiler - * does not throw away the whole loop. - * A thread analyzer might complain about un-mutexed access to gValidMemorySink - * which is true but harmless because no one ever uses the value in gValidMemorySink. - */ - const char *s = (const char *)p; - char c = gValidMemorySink; - size_t i; - U_ASSERT(p != NULL); - for(i = 0; i < n; ++i) { - c ^= s[i]; - } - gValidMemorySink = c; -} - -#endif /* U_DEBUG */ - U_CAPI void * U_EXPORT2 uprv_malloc(size_t s) { #if U_DEBUG && defined(UPRV_MALLOC_COUNT) diff --git a/deps/icu-small/source/common/cmemory.h b/deps/icu-small/source/common/cmemory.h index 83a0129651..a44f9a1902 100644 --- a/deps/icu-small/source/common/cmemory.h +++ b/deps/icu-small/source/common/cmemory.h @@ -36,31 +36,10 @@ #include <stdio.h> #endif -#if U_DEBUG - -/* - * The C++ standard requires that the source pointer for memcpy() & memmove() - * is valid, not NULL, and not at the end of an allocated memory block. - * In debug mode, we read one byte from the source point to verify that it's - * a valid, readable pointer. - */ - -U_CAPI void uprv_checkValidMemory(const void *p, size_t n); - -#define uprv_memcpy(dst, src, size) ( \ - uprv_checkValidMemory(src, 1), \ - U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size)) -#define uprv_memmove(dst, src, size) ( \ - uprv_checkValidMemory(src, 1), \ - U_STANDARD_CPP_NAMESPACE memmove(dst, src, size)) - -#else #define uprv_memcpy(dst, src, size) U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size) #define uprv_memmove(dst, src, size) U_STANDARD_CPP_NAMESPACE memmove(dst, src, size) -#endif /* U_DEBUG */ - /** * \def UPRV_LENGTHOF * Convenience macro to determine the length of a fixed array at compile-time. diff --git a/deps/icu-small/source/common/cstring.h b/deps/icu-small/source/common/cstring.h index 2232efcda5..ed0b1a7c8b 100644 --- a/deps/icu-small/source/common/cstring.h +++ b/deps/icu-small/source/common/cstring.h @@ -40,28 +40,10 @@ #define uprv_strchr(s, c) U_STANDARD_CPP_NAMESPACE strchr(s, c) #define uprv_strstr(s, c) U_STANDARD_CPP_NAMESPACE strstr(s, c) #define uprv_strrchr(s, c) U_STANDARD_CPP_NAMESPACE strrchr(s, c) - -#if U_DEBUG - -#define uprv_strncpy(dst, src, size) ( \ - uprv_checkValidMemory(src, 1), \ - U_STANDARD_CPP_NAMESPACE strncpy(dst, src, size)) -#define uprv_strncmp(s1, s2, n) ( \ - uprv_checkValidMemory(s1, 1), \ - uprv_checkValidMemory(s2, 1), \ - U_STANDARD_CPP_NAMESPACE strncmp(s1, s2, n)) -#define uprv_strncat(dst, src, n) ( \ - uprv_checkValidMemory(src, 1), \ - U_STANDARD_CPP_NAMESPACE strncat(dst, src, n)) - -#else - #define uprv_strncpy(dst, src, size) U_STANDARD_CPP_NAMESPACE strncpy(dst, src, size) #define uprv_strncmp(s1, s2, n) U_STANDARD_CPP_NAMESPACE strncmp(s1, s2, n) #define uprv_strncat(dst, src, n) U_STANDARD_CPP_NAMESPACE strncat(dst, src, n) -#endif /* U_DEBUG */ - /** * Is c an ASCII-repertoire letter a-z or A-Z? * Note: The implementation is specific to whether ICU is compiled for diff --git a/deps/icu-small/source/common/dictbe.cpp b/deps/icu-small/source/common/dictbe.cpp index 02fc8a4726..419d062ef2 100644 --- a/deps/icu-small/source/common/dictbe.cpp +++ b/deps/icu-small/source/common/dictbe.cpp @@ -29,24 +29,21 @@ U_NAMESPACE_BEGIN ****************************************************************** */ -DictionaryBreakEngine::DictionaryBreakEngine(uint32_t breakTypes) { - fTypes = breakTypes; +DictionaryBreakEngine::DictionaryBreakEngine() { } DictionaryBreakEngine::~DictionaryBreakEngine() { } UBool -DictionaryBreakEngine::handles(UChar32 c, int32_t breakType) const { - return (breakType >= 0 && breakType < 32 && (((uint32_t)1 << breakType) & fTypes) - && fSet.contains(c)); +DictionaryBreakEngine::handles(UChar32 c) const { + return fSet.contains(c); } int32_t DictionaryBreakEngine::findBreaks( UText *text, int32_t startPos, int32_t endPos, - int32_t breakType, UVector32 &foundBreaks ) const { (void)startPos; // TODO: remove this param? int32_t result = 0; @@ -66,10 +63,8 @@ DictionaryBreakEngine::findBreaks( UText *text, } rangeStart = start; rangeEnd = current; - if (breakType >= 0 && breakType < 32 && (((uint32_t)1 << breakType) & fTypes)) { - result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks); - utext_setNativeIndex(text, current); - } + result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks); + utext_setNativeIndex(text, current); return result; } @@ -194,7 +189,7 @@ static const int32_t THAI_MIN_WORD = 2; static const int32_t THAI_MIN_WORD_SPAN = THAI_MIN_WORD * 2; ThaiBreakEngine::ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status) - : DictionaryBreakEngine((1<<UBRK_WORD) | (1<<UBRK_LINE)), + : DictionaryBreakEngine(), fDictionary(adoptDictionary) { fThaiWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]]"), status); @@ -436,7 +431,7 @@ static const int32_t LAO_MIN_WORD = 2; static const int32_t LAO_MIN_WORD_SPAN = LAO_MIN_WORD * 2; LaoBreakEngine::LaoBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status) - : DictionaryBreakEngine((1<<UBRK_WORD) | (1<<UBRK_LINE)), + : DictionaryBreakEngine(), fDictionary(adoptDictionary) { fLaoWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Laoo:]&[:LineBreak=SA:]]"), status); @@ -632,7 +627,7 @@ static const int32_t BURMESE_MIN_WORD = 2; static const int32_t BURMESE_MIN_WORD_SPAN = BURMESE_MIN_WORD * 2; BurmeseBreakEngine::BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status) - : DictionaryBreakEngine((1<<UBRK_WORD) | (1<<UBRK_LINE)), + : DictionaryBreakEngine(), fDictionary(adoptDictionary) { fBurmeseWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Mymr:]&[:LineBreak=SA:]]"), status); @@ -825,7 +820,7 @@ static const int32_t KHMER_MIN_WORD = 2; static const int32_t KHMER_MIN_WORD_SPAN = KHMER_MIN_WORD * 2; KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status) - : DictionaryBreakEngine((1 << UBRK_WORD) | (1 << UBRK_LINE)), + : DictionaryBreakEngine(), fDictionary(adoptDictionary) { fKhmerWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]]"), status); @@ -1047,7 +1042,7 @@ foundBest: */ static const uint32_t kuint32max = 0xFFFFFFFF; CjkBreakEngine::CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType type, UErrorCode &status) -: DictionaryBreakEngine(1 << UBRK_WORD), fDictionary(adoptDictionary) { +: DictionaryBreakEngine(), fDictionary(adoptDictionary) { // Korean dictionary only includes Hangul syllables fHangulWordSet.applyPattern(UNICODE_STRING_SIMPLE("[\\uac00-\\ud7a3]"), status); fHanWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Han:]"), status); @@ -1324,8 +1319,8 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText, } if (katakanaRunLength < kMaxKatakanaGroupLength) { uint32_t newSnlp = bestSnlp.elementAti(i) + getKatakanaCost(katakanaRunLength); - if (newSnlp < (uint32_t)bestSnlp.elementAti(j)) { - bestSnlp.setElementAt(newSnlp, j); + if (newSnlp < (uint32_t)bestSnlp.elementAti(i+katakanaRunLength)) { + bestSnlp.setElementAt(newSnlp, i+katakanaRunLength); prev.setElementAt(i, i+katakanaRunLength); // prev[j] = i; } } diff --git a/deps/icu-small/source/common/dictbe.h b/deps/icu-small/source/common/dictbe.h index ffc1ae9f26..99d176cc2e 100644 --- a/deps/icu-small/source/common/dictbe.h +++ b/deps/icu-small/source/common/dictbe.h @@ -42,27 +42,12 @@ class DictionaryBreakEngine : public LanguageBreakEngine { UnicodeSet fSet; - /** - * The set of break types handled by this engine - * @internal - */ - - uint32_t fTypes; - - /** - * <p>Default constructor.</p> - * - */ - DictionaryBreakEngine(); - public: /** - * <p>Constructor setting the break types handled.</p> - * - * @param breakTypes A bitmap of types handled by the engine. + * <p>Constructor </p> */ - DictionaryBreakEngine( uint32_t breakTypes ); + DictionaryBreakEngine(); /** * <p>Virtual destructor.</p> @@ -74,11 +59,10 @@ class DictionaryBreakEngine : public LanguageBreakEngine { * a particular kind of break.</p> * * @param c A character which begins a run that the engine might handle - * @param breakType The type of text break which the caller wants to determine * @return TRUE if this engine handles the particular character and break * type. */ - virtual UBool handles( UChar32 c, int32_t breakType ) const; + virtual UBool handles(UChar32 c) const; /** * <p>Find any breaks within a run in the supplied text.</p> @@ -88,14 +72,12 @@ class DictionaryBreakEngine : public LanguageBreakEngine { * that starts from the first character in the range. * @param startPos The start of the run within the supplied text. * @param endPos The end of the run within the supplied text. - * @param breakType The type of break desired, or -1. * @param foundBreaks vector of int32_t to receive the break positions * @return The number of breaks found. */ virtual int32_t findBreaks( UText *text, int32_t startPos, int32_t endPos, - int32_t breakType, UVector32 &foundBreaks ) const; protected: @@ -108,13 +90,6 @@ class DictionaryBreakEngine : public LanguageBreakEngine { virtual void setCharacters( const UnicodeSet &set ); /** - * <p>Set the break types handled by this engine.</p> - * - * @param breakTypes A bitmap of types handled by the engine. - */ -// virtual void setBreakTypes( uint32_t breakTypes ); - - /** * <p>Divide up a range of known dictionary characters handled by this break engine.</p> * * @param text A UText representing the text diff --git a/deps/icu-small/source/common/filteredbrk.cpp b/deps/icu-small/source/common/filteredbrk.cpp index 6a38b1bf3b..162b38de5d 100644 --- a/deps/icu-small/source/common/filteredbrk.cpp +++ b/deps/icu-small/source/common/filteredbrk.cpp @@ -694,6 +694,11 @@ FilteredBreakIteratorBuilder::createInstance(const Locale& where, UErrorCode& st } FilteredBreakIteratorBuilder * +FilteredBreakIteratorBuilder::createInstance(UErrorCode &status) { + return createEmptyInstance(status); +} + +FilteredBreakIteratorBuilder * FilteredBreakIteratorBuilder::createEmptyInstance(UErrorCode& status) { if(U_FAILURE(status)) return NULL; LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(status), status); diff --git a/deps/icu-small/source/common/rbbi.cpp b/deps/icu-small/source/common/rbbi.cpp index 54b289e24d..69f92d94c6 100644 --- a/deps/icu-small/source/common/rbbi.cpp +++ b/deps/icu-small/source/common/rbbi.cpp @@ -64,7 +64,9 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedBreakIterator) * Constructs a RuleBasedBreakIterator that uses the already-created * tables object that is passed in as a parameter. */ -RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status) { +RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status) + : fSCharIter(UnicodeString()) +{ init(status); fData = new RBBIDataWrapper(data, status); // status checked in constructor if (U_FAILURE(status)) {return;} @@ -80,7 +82,9 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode // RuleBasedBreakIterator::RuleBasedBreakIterator(const uint8_t *compiledRules, uint32_t ruleLength, - UErrorCode &status) { + UErrorCode &status) + : fSCharIter(UnicodeString()) +{ init(status); if (U_FAILURE(status)) { return; @@ -110,6 +114,7 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(const uint8_t *compiledRules, // //------------------------------------------------------------------------------- RuleBasedBreakIterator::RuleBasedBreakIterator(UDataMemory* udm, UErrorCode &status) + : fSCharIter(UnicodeString()) { init(status); fData = new RBBIDataWrapper(udm, status); // status checked in constructor @@ -130,6 +135,7 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(UDataMemory* udm, UErrorCode &sta RuleBasedBreakIterator::RuleBasedBreakIterator( const UnicodeString &rules, UParseError &parseError, UErrorCode &status) + : fSCharIter(UnicodeString()) { init(status); if (U_FAILURE(status)) {return;} @@ -152,7 +158,9 @@ RuleBasedBreakIterator::RuleBasedBreakIterator( const UnicodeString &rules, // Used when creating a RuleBasedBreakIterator from a set // of rules. //------------------------------------------------------------------------------- -RuleBasedBreakIterator::RuleBasedBreakIterator() { +RuleBasedBreakIterator::RuleBasedBreakIterator() + : fSCharIter(UnicodeString()) +{ UErrorCode status = U_ZERO_ERROR; init(status); } @@ -165,7 +173,8 @@ RuleBasedBreakIterator::RuleBasedBreakIterator() { // //------------------------------------------------------------------------------- RuleBasedBreakIterator::RuleBasedBreakIterator(const RuleBasedBreakIterator& other) -: BreakIterator(other) +: BreakIterator(other), + fSCharIter(UnicodeString()) { UErrorCode status = U_ZERO_ERROR; this->init(status); @@ -177,17 +186,13 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(const RuleBasedBreakIterator& oth * Destructor */ RuleBasedBreakIterator::~RuleBasedBreakIterator() { - if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) { + if (fCharIter != &fSCharIter) { // fCharIter was adopted from the outside. delete fCharIter; } fCharIter = NULL; - delete fSCharIter; - fSCharIter = NULL; - delete fDCharIter; - fDCharIter = NULL; - utext_close(fText); + utext_close(&fText); if (fData != NULL) { fData->removeReference(); @@ -217,26 +222,29 @@ RuleBasedBreakIterator::operator=(const RuleBasedBreakIterator& that) { } BreakIterator::operator=(that); - fBreakType = that.fBreakType; if (fLanguageBreakEngines != NULL) { delete fLanguageBreakEngines; fLanguageBreakEngines = NULL; // Just rebuild for now } // TODO: clone fLanguageBreakEngines from "that" UErrorCode status = U_ZERO_ERROR; - fText = utext_clone(fText, that.fText, FALSE, TRUE, &status); + utext_clone(&fText, &that.fText, FALSE, TRUE, &status); - if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) { + if (fCharIter != &fSCharIter) { delete fCharIter; } - fCharIter = NULL; + fCharIter = &fSCharIter; - if (that.fCharIter != NULL ) { + if (that.fCharIter != NULL && that.fCharIter != &that.fSCharIter) { // This is a little bit tricky - it will intially appear that // this->fCharIter is adopted, even if that->fCharIter was // not adopted. That's ok. fCharIter = that.fCharIter->clone(); } + fSCharIter = that.fSCharIter; + if (fCharIter == NULL) { + fCharIter = &fSCharIter; + } if (fData != NULL) { fData->removeReference(); @@ -269,33 +277,30 @@ RuleBasedBreakIterator::operator=(const RuleBasedBreakIterator& that) { // //----------------------------------------------------------------------------- void RuleBasedBreakIterator::init(UErrorCode &status) { - fText = NULL; fCharIter = NULL; - fSCharIter = NULL; - fDCharIter = NULL; fData = NULL; fPosition = 0; fRuleStatusIndex = 0; fDone = false; fDictionaryCharCount = 0; - fBreakType = UBRK_WORD; // Defaulting BreakType to word gives reasonable - // dictionary behavior for Break Iterators that are - // built from rules. Even better would be the ability to - // declare the type in the rules. - fLanguageBreakEngines = NULL; fUnhandledBreakEngine = NULL; fBreakCache = NULL; fDictionaryCache = NULL; - if (U_FAILURE(status)) { + // Note: IBM xlC is unable to assign or initialize member fText from UTEXT_INITIALIZER. + // fText = UTEXT_INITIALIZER; + static const UText initializedUText = UTEXT_INITIALIZER; + uprv_memcpy(&fText, &initializedUText, sizeof(UText)); + + if (U_FAILURE(status)) { return; } - fText = utext_openUChars(NULL, NULL, 0, &status); + utext_openUChars(&fText, NULL, 0, &status); fDictionaryCache = new DictionaryCache(this, status); fBreakCache = new BreakCache(this, status); - if (U_SUCCESS(status) && (fText == NULL || fDictionaryCache == NULL || fBreakCache == NULL)) { + if (U_SUCCESS(status) && (fDictionaryCache == NULL || fBreakCache == NULL)) { status = U_MEMORY_ALLOCATION_ERROR; } @@ -344,7 +349,7 @@ RuleBasedBreakIterator::operator==(const BreakIterator& that) const { const RuleBasedBreakIterator& that2 = (const RuleBasedBreakIterator&) that; - if (!utext_equals(fText, that2.fText)) { + if (!utext_equals(&fText, &that2.fText)) { // The two break iterators are operating on different text, // or have a different iteration position. // Note that fText's position is always the same as the break iterator's position. @@ -385,7 +390,7 @@ void RuleBasedBreakIterator::setText(UText *ut, UErrorCode &status) { } fBreakCache->reset(); fDictionaryCache->reset(); - fText = utext_clone(fText, ut, FALSE, TRUE, &status); + utext_clone(&fText, ut, FALSE, TRUE, &status); // Set up a dummy CharacterIterator to be returned if anyone // calls getText(). With input from UText, there is no reasonable @@ -393,27 +398,20 @@ void RuleBasedBreakIterator::setText(UText *ut, UErrorCode &status) { // Return one over an empty string instead - this is the closest // we can come to signaling a failure. // (GetText() is obsolete, this failure is sort of OK) - if (fDCharIter == NULL) { - static const UChar c = 0; - fDCharIter = new UCharCharacterIterator(&c, 0); - if (fDCharIter == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - } + fSCharIter.setText(UnicodeString()); - if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) { + if (fCharIter != &fSCharIter) { // existing fCharIter was adopted from the outside. Delete it now. delete fCharIter; } - fCharIter = fDCharIter; + fCharIter = &fSCharIter; this->first(); } UText *RuleBasedBreakIterator::getUText(UText *fillIn, UErrorCode &status) const { - UText *result = utext_clone(fillIn, fText, FALSE, TRUE, &status); + UText *result = utext_clone(fillIn, &fText, FALSE, TRUE, &status); return result; } @@ -439,7 +437,7 @@ void RuleBasedBreakIterator::adoptText(CharacterIterator* newText) { // If we are holding a CharacterIterator adopted from a // previous call to this function, delete it now. - if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) { + if (fCharIter != &fSCharIter) { delete fCharIter; } @@ -450,9 +448,9 @@ RuleBasedBreakIterator::adoptText(CharacterIterator* newText) { if (newText==NULL || newText->startIndex() != 0) { // startIndex !=0 wants to be an error, but there's no way to report it. // Make the iterator text be an empty string. - fText = utext_openUChars(fText, NULL, 0, &status); + utext_openUChars(&fText, NULL, 0, &status); } else { - fText = utext_openCharacterIterator(fText, newText, &status); + utext_openCharacterIterator(&fText, newText, &status); } this->first(); } @@ -467,23 +465,19 @@ RuleBasedBreakIterator::setText(const UnicodeString& newText) { UErrorCode status = U_ZERO_ERROR; fBreakCache->reset(); fDictionaryCache->reset(); - fText = utext_openConstUnicodeString(fText, &newText, &status); + utext_openConstUnicodeString(&fText, &newText, &status); // Set up a character iterator on the string. // Needed in case someone calls getText(). // Can not, unfortunately, do this lazily on the (probably never) // call to getText(), because getText is const. - if (fSCharIter == NULL) { - fSCharIter = new StringCharacterIterator(newText); - } else { - fSCharIter->setText(newText); - } + fSCharIter.setText(newText); - if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) { + if (fCharIter != &fSCharIter) { // old fCharIter was adopted from the outside. Delete it. delete fCharIter; } - fCharIter = fSCharIter; + fCharIter = &fSCharIter; this->first(); } @@ -503,14 +497,14 @@ RuleBasedBreakIterator &RuleBasedBreakIterator::refreshInputText(UText *input, U status = U_ILLEGAL_ARGUMENT_ERROR; return *this; } - int64_t pos = utext_getNativeIndex(fText); + int64_t pos = utext_getNativeIndex(&fText); // Shallow read-only clone of the new UText into the existing input UText - fText = utext_clone(fText, input, FALSE, TRUE, &status); + utext_clone(&fText, input, FALSE, TRUE, &status); if (U_FAILURE(status)) { return *this; } - utext_setNativeIndex(fText, pos); - if (utext_getNativeIndex(fText) != pos) { + utext_setNativeIndex(&fText, pos); + if (utext_getNativeIndex(&fText) != pos) { // Sanity check. The new input utext is supposed to have the exact same // contents as the old. If we can't set to the same position, it doesn't. // The contents underlying the old utext might be invalid at this point, @@ -540,7 +534,7 @@ int32_t RuleBasedBreakIterator::first(void) { * @return The text's past-the-end offset. */ int32_t RuleBasedBreakIterator::last(void) { - int32_t endPos = (int32_t)utext_nativeLength(fText); + int32_t endPos = (int32_t)utext_nativeLength(&fText); UBool endShouldBeBoundary = isBoundary(endPos); // Has side effect of setting iterator position. (void)endShouldBeBoundary; U_ASSERT(endShouldBeBoundary); @@ -611,8 +605,8 @@ int32_t RuleBasedBreakIterator::following(int32_t startPos) { // Move requested offset to a code point start. It might be on a trail surrogate, // or on a trail byte if the input is UTF-8. Or it may be beyond the end of the text. - utext_setNativeIndex(fText, startPos); - startPos = (int32_t)utext_getNativeIndex(fText); + utext_setNativeIndex(&fText, startPos); + startPos = (int32_t)utext_getNativeIndex(&fText); UErrorCode status = U_ZERO_ERROR; fBreakCache->following(startPos, status); @@ -626,15 +620,15 @@ int32_t RuleBasedBreakIterator::following(int32_t startPos) { * @return The position of the last boundary before the starting position. */ int32_t RuleBasedBreakIterator::preceding(int32_t offset) { - if (fText == NULL || offset > utext_nativeLength(fText)) { + if (offset > utext_nativeLength(&fText)) { return last(); } // Move requested offset to a code point start. It might be on a trail surrogate, // or on a trail byte if the input is UTF-8. - utext_setNativeIndex(fText, offset); - int32_t adjustedOffset = utext_getNativeIndex(fText); + utext_setNativeIndex(&fText, offset); + int32_t adjustedOffset = utext_getNativeIndex(&fText); UErrorCode status = U_ZERO_ERROR; fBreakCache->preceding(adjustedOffset, status); @@ -660,8 +654,8 @@ UBool RuleBasedBreakIterator::isBoundary(int32_t offset) { // Note that isBoundary() is always be false for offsets that are not on code point boundaries. // But we still need the side effect of leaving iteration at the following boundary. - utext_setNativeIndex(fText, offset); - int32_t adjustedOffset = utext_getNativeIndex(fText); + utext_setNativeIndex(&fText, offset); + int32_t adjustedOffset = utext_getNativeIndex(&fText); bool result = false; UErrorCode status = U_ZERO_ERROR; @@ -669,7 +663,7 @@ UBool RuleBasedBreakIterator::isBoundary(int32_t offset) { result = (fBreakCache->current() == offset); } - if (result && adjustedOffset < offset && utext_char32At(fText, offset) == U_SENTINEL) { + if (result && adjustedOffset < offset && utext_char32At(&fText, offset) == U_SENTINEL) { // Original offset is beyond the end of the text. Return FALSE, it's not a boundary, // but the iteration position remains set to the end of the text, which is a boundary. return FALSE; @@ -789,9 +783,9 @@ int32_t RuleBasedBreakIterator::handleNext() { // if we're already at the end of the text, return DONE. initialPosition = fPosition; - UTEXT_SETNATIVEINDEX(fText, initialPosition); + UTEXT_SETNATIVEINDEX(&fText, initialPosition); result = initialPosition; - c = UTEXT_NEXT32(fText); + c = UTEXT_NEXT32(&fText); if (c==U_SENTINEL) { fDone = TRUE; return UBRK_DONE; @@ -854,7 +848,7 @@ int32_t RuleBasedBreakIterator::handleNext() { #ifdef RBBI_DEBUG if (gTrace) { - RBBIDebugPrintf(" %4ld ", utext_getNativeIndex(fText)); + RBBIDebugPrintf(" %4ld ", utext_getNativeIndex(&fText)); if (0x20<=c && c<0x7f) { RBBIDebugPrintf("\"%c\" ", c); } else { @@ -867,9 +861,7 @@ int32_t RuleBasedBreakIterator::handleNext() { // State Transition - move machine to its next state // - // Note: fNextState is defined as uint16_t[2], but we are casting - // a generated RBBI table to RBBIStateTableRow and some tables - // actually have more than 2 categories. + // fNextState is a variable-length array. U_ASSERT(category<fData->fHeader->fCatCount); state = row->fNextState[category]; /*Not accessing beyond memory*/ row = (RBBIStateTableRow *) @@ -880,7 +872,7 @@ int32_t RuleBasedBreakIterator::handleNext() { if (row->fAccepting == -1) { // Match found, common case. if (mode != RBBI_START) { - result = (int32_t)UTEXT_GETNATIVEINDEX(fText); + result = (int32_t)UTEXT_GETNATIVEINDEX(&fText); } fRuleStatusIndex = row->fTagIdx; // Remember the break status (tag) values. } @@ -898,7 +890,7 @@ int32_t RuleBasedBreakIterator::handleNext() { int16_t rule = row->fLookAhead; if (rule != 0) { // At the position of a '/' in a look-ahead match. Record it. - int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(fText); + int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(&fText); lookAheadMatches.setPosition(rule, pos); } @@ -914,7 +906,7 @@ int32_t RuleBasedBreakIterator::handleNext() { // the input position. The next iteration will be processing the // first real input character. if (mode == RBBI_RUN) { - c = UTEXT_NEXT32(fText); + c = UTEXT_NEXT32(&fText); } else { if (mode == RBBI_START) { mode = RBBI_RUN; @@ -928,9 +920,9 @@ int32_t RuleBasedBreakIterator::handleNext() { // (This really indicates a defect in the break rules. They should always match // at least one character.) if (result == initialPosition) { - utext_setNativeIndex(fText, initialPosition); - utext_next32(fText); - result = (int32_t)utext_getNativeIndex(fText); + utext_setNativeIndex(&fText, initialPosition); + utext_next32(&fText); + result = (int32_t)utext_getNativeIndex(&fText); fRuleStatusIndex = 0; } @@ -965,7 +957,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) { int32_t initialPosition = 0; const RBBIStateTable *stateTable = fData->fSafeRevTable; - UTEXT_SETNATIVEINDEX(fText, fromPosition); + UTEXT_SETNATIVEINDEX(&fText, fromPosition); #ifdef RBBI_DEBUG if (gTrace) { RBBIDebugPuts("Handle Previous pos char state category"); @@ -973,14 +965,14 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) { #endif // if we're already at the start of the text, return DONE. - if (fText == NULL || fData == NULL || UTEXT_GETNATIVEINDEX(fText)==0) { + if (fData == NULL || UTEXT_GETNATIVEINDEX(&fText)==0) { return BreakIterator::DONE; } // Set up the starting char. - initialPosition = (int32_t)UTEXT_GETNATIVEINDEX(fText); + initialPosition = (int32_t)UTEXT_GETNATIVEINDEX(&fText); result = initialPosition; - c = UTEXT_PREVIOUS32(fText); + c = UTEXT_PREVIOUS32(&fText); // Set the initial state for the state machine state = START_STATE; @@ -1028,7 +1020,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) { #ifdef RBBI_DEBUG if (gTrace) { - RBBIDebugPrintf(" %4d ", (int32_t)utext_getNativeIndex(fText)); + RBBIDebugPrintf(" %4d ", (int32_t)utext_getNativeIndex(&fText)); if (0x20<=c && c<0x7f) { RBBIDebugPrintf("\"%c\" ", c); } else { @@ -1041,9 +1033,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) { // State Transition - move machine to its next state // - // Note: fNextState is defined as uint16_t[2], but we are casting - // a generated RBBI table to RBBIStateTableRow and some tables - // actually have more than 2 categories. + // fNextState is a variable-length array. U_ASSERT(category<fData->fHeader->fCatCount); state = row->fNextState[category]; /*Not accessing beyond memory*/ row = (RBBIStateTableRow *) @@ -1051,7 +1041,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) { if (row->fAccepting == -1) { // Match found, common case. - result = (int32_t)UTEXT_GETNATIVEINDEX(fText); + result = (int32_t)UTEXT_GETNATIVEINDEX(&fText); } int16_t completedRule = row->fAccepting; @@ -1059,14 +1049,14 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) { // Lookahead match is completed. int32_t lookaheadResult = lookAheadMatches.getPosition(completedRule); if (lookaheadResult >= 0) { - UTEXT_SETNATIVEINDEX(fText, lookaheadResult); + UTEXT_SETNATIVEINDEX(&fText, lookaheadResult); return lookaheadResult; } } int16_t rule = row->fLookAhead; if (rule != 0) { // At the position of a '/' in a look-ahead match. Record it. - int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(fText); + int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(&fText); lookAheadMatches.setPosition(rule, pos); } @@ -1082,7 +1072,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) { // the input position. The next iteration will be processing the // first real input character. if (mode == RBBI_RUN) { - c = UTEXT_PREVIOUS32(fText); + c = UTEXT_PREVIOUS32(&fText); } else { if (mode == RBBI_START) { mode = RBBI_RUN; @@ -1096,9 +1086,9 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) { // (This really indicates a defect in the break rules. They should always match // at least one character.) if (result == initialPosition) { - UTEXT_SETNATIVEINDEX(fText, initialPosition); - UTEXT_PREVIOUS32(fText); - result = (int32_t)UTEXT_GETNATIVEINDEX(fText); + UTEXT_SETNATIVEINDEX(&fText, initialPosition); + UTEXT_PREVIOUS32(&fText); + result = (int32_t)UTEXT_GETNATIVEINDEX(&fText); } #ifdef RBBI_DEBUG @@ -1247,7 +1237,7 @@ static void U_CALLCONV initLanguageFactories() { static const LanguageBreakEngine* -getLanguageBreakEngineFromFactory(UChar32 c, int32_t breakType) +getLanguageBreakEngineFromFactory(UChar32 c) { umtx_initOnce(gLanguageBreakFactoriesInitOnce, &initLanguageFactories); if (gLanguageBreakFactories == NULL) { @@ -1258,7 +1248,7 @@ getLanguageBreakEngineFromFactory(UChar32 c, int32_t breakType) const LanguageBreakEngine *lbe = NULL; while (--i >= 0) { LanguageBreakFactory *factory = (LanguageBreakFactory *)(gLanguageBreakFactories->elementAt(i)); - lbe = factory->getEngineFor(c, breakType); + lbe = factory->getEngineFor(c); if (lbe != NULL) { break; } @@ -1290,14 +1280,14 @@ RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) { int32_t i = fLanguageBreakEngines->size(); while (--i >= 0) { lbe = (const LanguageBreakEngine *)(fLanguageBreakEngines->elementAt(i)); - if (lbe->handles(c, fBreakType)) { + if (lbe->handles(c)) { return lbe; } } // No existing dictionary took the character. See if a factory wants to // give us a new LanguageBreakEngine for this character. - lbe = getLanguageBreakEngineFromFactory(c, fBreakType); + lbe = getLanguageBreakEngineFromFactory(c); // If we got one, use it and push it on our stack. if (lbe != NULL) { @@ -1313,6 +1303,7 @@ RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) { fUnhandledBreakEngine = new UnhandledEngine(status); if (U_SUCCESS(status) && fUnhandledBreakEngine == NULL) { status = U_MEMORY_ALLOCATION_ERROR; + return nullptr; } // Put it last so that scripts for which we have an engine get tried // first. @@ -1327,25 +1318,19 @@ RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) { // Tell the reject engine about the character; at its discretion, it may // add more than just the one character. - fUnhandledBreakEngine->handleCharacter(c, fBreakType); + fUnhandledBreakEngine->handleCharacter(c); return fUnhandledBreakEngine; } - - -/*int32_t RuleBasedBreakIterator::getBreakType() const { - return fBreakType; -}*/ - -void RuleBasedBreakIterator::setBreakType(int32_t type) { - fBreakType = type; -} - void RuleBasedBreakIterator::dumpCache() { fBreakCache->dumpCache(); } +void RuleBasedBreakIterator::dumpTables() { + fData->printData(); +} + /** * Returns the description used to create this iterator */ diff --git a/deps/icu-small/source/common/rbbi_cache.cpp b/deps/icu-small/source/common/rbbi_cache.cpp index 9d716bb342..ba9329d477 100644 --- a/deps/icu-small/source/common/rbbi_cache.cpp +++ b/deps/icu-small/source/common/rbbi_cache.cpp @@ -26,14 +26,11 @@ U_NAMESPACE_BEGIN */ RuleBasedBreakIterator::DictionaryCache::DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status) : - fBI(bi), fBreaks(NULL), fPositionInCache(-1), + fBI(bi), fBreaks(status), fPositionInCache(-1), fStart(0), fLimit(0), fFirstRuleStatusIndex(0), fOtherRuleStatusIndex(0) { - fBreaks = new UVector32(status); } RuleBasedBreakIterator::DictionaryCache::~DictionaryCache() { - delete fBreaks; - fBreaks = NULL; } void RuleBasedBreakIterator::DictionaryCache::reset() { @@ -42,7 +39,7 @@ void RuleBasedBreakIterator::DictionaryCache::reset() { fLimit = 0; fFirstRuleStatusIndex = 0; fOtherRuleStatusIndex = 0; - fBreaks->removeAllElements(); + fBreaks.removeAllElements(); } UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_t *result, int32_t *statusIndex) { @@ -54,13 +51,13 @@ UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_ // Sequential iteration, move from previous boundary to the following int32_t r = 0; - if (fPositionInCache >= 0 && fPositionInCache < fBreaks->size() && fBreaks->elementAti(fPositionInCache) == fromPos) { + if (fPositionInCache >= 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) { ++fPositionInCache; - if (fPositionInCache >= fBreaks->size()) { + if (fPositionInCache >= fBreaks.size()) { fPositionInCache = -1; return FALSE; } - r = fBreaks->elementAti(fPositionInCache); + r = fBreaks.elementAti(fPositionInCache); U_ASSERT(r > fromPos); *result = r; *statusIndex = fOtherRuleStatusIndex; @@ -69,8 +66,8 @@ UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_ // Random indexing. Linear search for the boundary following the given position. - for (fPositionInCache = 0; fPositionInCache < fBreaks->size(); ++fPositionInCache) { - r= fBreaks->elementAti(fPositionInCache); + for (fPositionInCache = 0; fPositionInCache < fBreaks.size(); ++fPositionInCache) { + r= fBreaks.elementAti(fPositionInCache); if (r > fromPos) { *result = r; *statusIndex = fOtherRuleStatusIndex; @@ -90,16 +87,16 @@ UBool RuleBasedBreakIterator::DictionaryCache::preceding(int32_t fromPos, int32_ } if (fromPos == fLimit) { - fPositionInCache = fBreaks->size() - 1; + fPositionInCache = fBreaks.size() - 1; if (fPositionInCache >= 0) { - U_ASSERT(fBreaks->elementAti(fPositionInCache) == fromPos); + U_ASSERT(fBreaks.elementAti(fPositionInCache) == fromPos); } } int32_t r; - if (fPositionInCache > 0 && fPositionInCache < fBreaks->size() && fBreaks->elementAti(fPositionInCache) == fromPos) { + if (fPositionInCache > 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) { --fPositionInCache; - r = fBreaks->elementAti(fPositionInCache); + r = fBreaks.elementAti(fPositionInCache); U_ASSERT(r < fromPos); *result = r; *statusIndex = ( r== fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex; @@ -111,8 +108,8 @@ UBool RuleBasedBreakIterator::DictionaryCache::preceding(int32_t fromPos, int32_ return FALSE; } - for (fPositionInCache = fBreaks->size()-1; fPositionInCache >= 0; --fPositionInCache) { - r = fBreaks->elementAti(fPositionInCache); + for (fPositionInCache = fBreaks.size()-1; fPositionInCache >= 0; --fPositionInCache) { + r = fBreaks.elementAti(fPositionInCache); if (r < fromPos) { *result = r; *statusIndex = ( r == fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex; @@ -141,7 +138,7 @@ void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPo int32_t current; UErrorCode status = U_ZERO_ERROR; int32_t foundBreakCount = 0; - UText *text = fBI->fText; + UText *text = &fBI->fText; // Loop through the text, looking for ranges of dictionary characters. // For each span, find the appropriate break engine, and ask it to find @@ -168,7 +165,7 @@ void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPo // Ask the language object if there are any breaks. It will add them to the cache and // leave the text pointer on the other side of its range, ready to search for the next one. if (lbe != NULL) { - foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBI->fBreakType, *fBreaks); + foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBreaks); } // Reload the loop variables for the next go-round @@ -182,21 +179,21 @@ void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPo // printf("foundBreakCount = %d\n", foundBreakCount); if (foundBreakCount > 0) { - U_ASSERT(foundBreakCount == fBreaks->size()); - if (startPos < fBreaks->elementAti(0)) { + U_ASSERT(foundBreakCount == fBreaks.size()); + if (startPos < fBreaks.elementAti(0)) { // The dictionary did not place a boundary at the start of the segment of text. // Add one now. This should not commonly happen, but it would be easy for interactions // of the rules for dictionary segments and the break engine implementations to // inadvertently cause it. Cover it here, just in case. - fBreaks->insertElementAt(startPos, 0, status); + fBreaks.insertElementAt(startPos, 0, status); } - if (endPos > fBreaks->peeki()) { - fBreaks->push(endPos, status); + if (endPos > fBreaks.peeki()) { + fBreaks.push(endPos, status); } fPositionInCache = 0; // Note: Dictionary matching may extend beyond the original limit. - fStart = fBreaks->elementAti(0); - fLimit = fBreaks->peeki(); + fStart = fBreaks.elementAti(0); + fLimit = fBreaks.peeki(); } else { // there were no language-based breaks, even though the segment contained // dictionary characters. Subsequent attempts to fetch boundaries from the dictionary cache diff --git a/deps/icu-small/source/common/rbbi_cache.h b/deps/icu-small/source/common/rbbi_cache.h index 8dc7320db9..fd6deb4333 100644 --- a/deps/icu-small/source/common/rbbi_cache.h +++ b/deps/icu-small/source/common/rbbi_cache.h @@ -56,7 +56,7 @@ class RuleBasedBreakIterator::DictionaryCache: public UMemory { RuleBasedBreakIterator *fBI; - UVector32 *fBreaks; // A vector containing the boundaries. + UVector32 fBreaks; // A vector containing the boundaries. int32_t fPositionInCache; // Index in fBreaks of last boundary returned by following() // or preceding(). Optimizes sequential access. int32_t fStart; // Text position of first boundary in cache. diff --git a/deps/icu-small/source/common/rbbidata.cpp b/deps/icu-small/source/common/rbbidata.cpp index d66eca82f8..18912a6a7b 100644 --- a/deps/icu-small/source/common/rbbidata.cpp +++ b/deps/icu-small/source/common/rbbidata.cpp @@ -267,8 +267,8 @@ void RBBIDataWrapper::printTable(const char *heading, const RBBIStateTable *tab #endif -#ifdef RBBI_DEBUG void RBBIDataWrapper::printData() { +#ifdef RBBI_DEBUG RBBIDebugPrintf("RBBI Data at %p\n", (void *)fHeader); RBBIDebugPrintf(" Version = {%d %d %d %d}\n", fHeader->fFormatVersion[0], fHeader->fFormatVersion[1], fHeader->fFormatVersion[2], fHeader->fFormatVersion[3]); @@ -285,8 +285,8 @@ void RBBIDataWrapper::printData() { RBBIDebugPrintf("%c", fRuleSource[c]); } RBBIDebugPrintf("\n\n"); -} #endif +} U_NAMESPACE_END diff --git a/deps/icu-small/source/common/rbbidata.h b/deps/icu-small/source/common/rbbidata.h index 75427863d9..8b21acca30 100644 --- a/deps/icu-small/source/common/rbbidata.h +++ b/deps/icu-small/source/common/rbbidata.h @@ -116,9 +116,10 @@ struct RBBIStateTableRow { /* StatusTable of the set of matching */ /* tags (rule status values) */ int16_t fReserved; - uint16_t fNextState[2]; /* Next State, indexed by char category. */ - /* This array does not have two elements */ - /* Array Size is actually fData->fHeader->fCatCount */ + uint16_t fNextState[1]; /* Next State, indexed by char category. */ + /* Variable-length array declared with length 1 */ + /* to disable bounds checkers. */ + /* Array Size is actually fData->fHeader->fCatCount*/ /* CAUTION: see RBBITableBuilder::getTableSize() */ /* before changing anything here. */ }; @@ -129,7 +130,9 @@ struct RBBIStateTable { uint32_t fRowLen; /* Length of a state table row, in bytes. */ uint32_t fFlags; /* Option Flags for this state table */ uint32_t fReserved; /* reserved */ - char fTableData[4]; /* First RBBIStateTableRow begins here. */ + char fTableData[1]; /* First RBBIStateTableRow begins here. */ + /* Variable-length array declared with length 1 */ + /* to disable bounds checkers. */ /* (making it char[] simplifies ugly address */ /* arithmetic for indexing variable length rows.) */ }; @@ -162,13 +165,8 @@ public: UBool operator ==(const RBBIDataWrapper &other) const; int32_t hashCode(); const UnicodeString &getRuleSourceString() const; -#ifdef RBBI_DEBUG void printData(); void printTable(const char *heading, const RBBIStateTable *table); -#else - #define printData() - #define printTable(heading, table) -#endif /* */ /* Pointers to items within the data */ diff --git a/deps/icu-small/source/common/rbbirb.cpp b/deps/icu-small/source/common/rbbirb.cpp index c67f6f8166..9fc8f8e814 100644 --- a/deps/icu-small/source/common/rbbirb.cpp +++ b/deps/icu-small/source/common/rbbirb.cpp @@ -47,7 +47,7 @@ U_NAMESPACE_BEGIN RBBIRuleBuilder::RBBIRuleBuilder(const UnicodeString &rules, UParseError *parseErr, UErrorCode &status) - : fRules(rules) + : fRules(rules), fStrippedRules(rules) { fStatus = &status; // status is checked below fParseError = parseErr; @@ -147,8 +147,9 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() { return NULL; } - // Remove comments and whitespace from the rules to make it smaller. - UnicodeString strippedRules((const UnicodeString&)RBBIRuleScanner::stripRules(fRules)); + // Remove whitespace from the rules to make it smaller. + // The rule parser has already removed comments. + fStrippedRules = fScanner->stripRules(fStrippedRules); // Calculate the size of each section in the data. // Sizes here are padded up to a multiple of 8 for better memory alignment. @@ -162,7 +163,7 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() { int32_t safeRevTableSize = align8(fSafeRevTables->getTableSize()); int32_t trieSize = align8(fSetBuilder->getTrieSize()); int32_t statusTableSize = align8(fRuleStatusVals->size() * sizeof(int32_t)); - int32_t rulesSize = align8((strippedRules.length()+1) * sizeof(UChar)); + int32_t rulesSize = align8((fStrippedRules.length()+1) * sizeof(UChar)); (void)safeFwdTableSize; @@ -225,7 +226,7 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() { data->fStatusTable = data->fTrie + trieSize; data->fStatusTableLen= statusTableSize; data->fRuleSource = data->fStatusTable + statusTableSize; - data->fRuleSourceLen = strippedRules.length() * sizeof(UChar); + data->fRuleSourceLen = fStrippedRules.length() * sizeof(UChar); uprv_memset(data->fReserved, 0, sizeof(data->fReserved)); @@ -245,7 +246,7 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() { ruleStatusTable[i] = fRuleStatusVals->elementAti(i); } - strippedRules.extract((UChar *)((uint8_t *)data+data->fRuleSource), rulesSize/2+1, *fStatus); + fStrippedRules.extract((UChar *)((uint8_t *)data+data->fRuleSource), rulesSize/2+1, *fStatus); return data; } @@ -281,10 +282,10 @@ RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules, // // UnicodeSet processing. // Munge the Unicode Sets to create a set of character categories. - // Generate the mapping tables (TRIE) from input 32-bit characters to + // Generate the mapping tables (TRIE) from input code points to // the character categories. // - builder.fSetBuilder->build(); + builder.fSetBuilder->buildRanges(); // @@ -316,6 +317,11 @@ RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules, } #endif + builder.optimizeTables(); + builder.fSetBuilder->buildTrie(); + + + // // Package up the compiled data into a memory image // in the run-time format. @@ -347,6 +353,29 @@ RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules, return This; } +void RBBIRuleBuilder::optimizeTables() { + int32_t leftClass; + int32_t rightClass; + + leftClass = 3; + rightClass = 0; + while (fForwardTables->findDuplCharClassFrom(leftClass, rightClass)) { + fSetBuilder->mergeCategories(leftClass, rightClass); + fForwardTables->removeColumn(rightClass); + fReverseTables->removeColumn(rightClass); + fSafeFwdTables->removeColumn(rightClass); + fSafeRevTables->removeColumn(rightClass); + } + + fForwardTables->removeDuplicateStates(); + fReverseTables->removeDuplicateStates(); + fSafeFwdTables->removeDuplicateStates(); + fSafeRevTables->removeDuplicateStates(); + + + +} + U_NAMESPACE_END #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ diff --git a/deps/icu-small/source/common/rbbirb.h b/deps/icu-small/source/common/rbbirb.h index 6fbdbff744..511f394b45 100644 --- a/deps/icu-small/source/common/rbbirb.h +++ b/deps/icu-small/source/common/rbbirb.h @@ -126,10 +126,19 @@ public: ); virtual ~RBBIRuleBuilder(); + + /** + * Fold together redundant character classes (table columns) and + * redundant states (table rows). Done after initial table generation, + * before serializing the result. + */ + void optimizeTables(); + char *fDebugEnv; // controls debug trace output UErrorCode *fStatus; // Error reporting. Keeping status UParseError *fParseError; // here avoids passing it everywhere. const UnicodeString &fRules; // The rule string that we are compiling + UnicodeString fStrippedRules; // The rule string, with comments stripped. RBBIRuleScanner *fScanner; // The scanner. RBBINode *fForwardTree; // The parse trees, generated by the scanner, diff --git a/deps/icu-small/source/common/rbbiscan.cpp b/deps/icu-small/source/common/rbbiscan.cpp index 1653a0c7bc..e3472ed599 100644 --- a/deps/icu-small/source/common/rbbiscan.cpp +++ b/deps/icu-small/source/common/rbbiscan.cpp @@ -822,27 +822,24 @@ static const UChar chRParen = 0x29; //------------------------------------------------------------------------------ // -// stripRules Return a rules string without unnecessary -// characters. +// stripRules Return a rules string without extra spaces. +// (Comments are removed separately, during rule parsing.) // //------------------------------------------------------------------------------ UnicodeString RBBIRuleScanner::stripRules(const UnicodeString &rules) { UnicodeString strippedRules; - int rulesLength = rules.length(); - for (int idx = 0; idx < rulesLength; ) { - UChar ch = rules[idx++]; - if (ch == chPound) { - while (idx < rulesLength - && ch != chCR && ch != chLF && ch != chNEL) - { - ch = rules[idx++]; - } - } - if (!u_isISOControl(ch)) { - strippedRules.append(ch); + int32_t rulesLength = rules.length(); + bool skippingSpaces = false; + + for (int32_t idx=0; idx<rulesLength; idx = rules.moveIndex32(idx, 1)) { + UChar32 cp = rules.char32At(idx); + bool whiteSpace = u_hasBinaryProperty(cp, UCHAR_PATTERN_WHITE_SPACE); + if (skippingSpaces && whiteSpace) { + continue; } + strippedRules.append(cp); + skippingSpaces = whiteSpace; } - // strippedRules = strippedRules.unescape(); return strippedRules; } @@ -942,6 +939,7 @@ void RBBIRuleScanner::nextChar(RBBIRuleChar &c) { // It will be treated as white-space, and serves to break up anything // that might otherwise incorrectly clump together with a comment in // the middle (a variable name, for example.) + int32_t commentStart = fScanIndex; for (;;) { c.fChar = nextCharLL(); if (c.fChar == (UChar32)-1 || // EOF @@ -950,6 +948,9 @@ void RBBIRuleScanner::nextChar(RBBIRuleChar &c) { c.fChar == chNEL || c.fChar == chLS) {break;} } + for (int32_t i=commentStart; i<fNextIndex-1; ++i) { + fRB->fStrippedRules.setCharAt(i, u' '); + } } if (c.fChar == (UChar32)-1) { return; diff --git a/deps/icu-small/source/common/rbbisetb.cpp b/deps/icu-small/source/common/rbbisetb.cpp index c172da00df..4e7389b4af 100644 --- a/deps/icu-small/source/common/rbbisetb.cpp +++ b/deps/icu-small/source/common/rbbisetb.cpp @@ -91,7 +91,7 @@ RBBISetBuilder::~RBBISetBuilder() // from the Unicode Sets. // //------------------------------------------------------------------------ -void RBBISetBuilder::build() { +void RBBISetBuilder::buildRanges() { RBBINode *usetNode; RangeDescriptor *rlRange; @@ -245,11 +245,16 @@ void RBBISetBuilder::build() { if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rgroup")) {printRangeGroups();} if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "esets")) {printSets();} +} + + +// +// Build the Trie table for mapping UChar32 values to the corresponding +// range group number. +// +void RBBISetBuilder::buildTrie() { + RangeDescriptor *rlRange; - // - // Build the Trie table for mapping UChar32 values to the corresponding - // range group number - // fTrie = utrie2_open(0, // Initial value for all code points. 0, // Error value for out-of-range input. fStatus); @@ -265,6 +270,22 @@ void RBBISetBuilder::build() { } +void RBBISetBuilder::mergeCategories(int32_t left, int32_t right) { + U_ASSERT(left >= 1); + U_ASSERT(right > left); + for (RangeDescriptor *rd = fRangeList; rd != nullptr; rd = rd->fNext) { + int32_t rangeNum = rd->fNum & ~DICT_BIT; + int32_t rangeDict = rd->fNum & DICT_BIT; + if (rangeNum == right) { + rd->fNum = left | rangeDict; + } else if (rangeNum > right) { + rd->fNum--; + } + } + --fGroupCount; +} + + //----------------------------------------------------------------------------------- // // getTrieSize() Return the size that will be required to serialize the Trie. @@ -446,7 +467,7 @@ void RBBISetBuilder::printRangeGroups() { lastPrintedGroupNum = groupNum; RBBIDebugPrintf("%2i ", groupNum); - if (rlRange->fNum & 0x4000) { RBBIDebugPrintf(" <DICT> ");} + if (rlRange->fNum & DICT_BIT) { RBBIDebugPrintf(" <DICT> ");} for (i=0; i<rlRange->fIncludesSets->size(); i++) { RBBINode *usetNode = (RBBINode *)rlRange->fIncludesSets->elementAt(i); @@ -639,20 +660,20 @@ void RangeDescriptor::split(UChar32 where, UErrorCode &status) { void RangeDescriptor::setDictionaryFlag() { int i; - for (i=0; i<this->fIncludesSets->size(); i++) { - RBBINode *usetNode = (RBBINode *)fIncludesSets->elementAt(i); - UnicodeString setName; - RBBINode *setRef = usetNode->fParent; - if (setRef != NULL) { + static const char16_t *dictionary = u"dictionary"; + for (i=0; i<fIncludesSets->size(); i++) { + RBBINode *usetNode = (RBBINode *)fIncludesSets->elementAt(i); + RBBINode *setRef = usetNode->fParent; + if (setRef != nullptr) { RBBINode *varRef = setRef->fParent; - if (varRef != NULL && varRef->fType == RBBINode::varRef) { - setName = varRef->fText; + if (varRef && varRef->fType == RBBINode::varRef) { + const UnicodeString *setName = &varRef->fText; + if (setName->compare(dictionary, -1) == 0) { + fNum |= RBBISetBuilder::DICT_BIT; + break; + } } } - if (setName.compare(UNICODE_STRING("dictionary", 10)) == 0) { // TODO: no string literals. - this->fNum |= 0x4000; - break; - } } } diff --git a/deps/icu-small/source/common/rbbisetb.h b/deps/icu-small/source/common/rbbisetb.h index 7cedb45b33..a7a91b3b37 100644 --- a/deps/icu-small/source/common/rbbisetb.h +++ b/deps/icu-small/source/common/rbbisetb.h @@ -82,7 +82,8 @@ public: RBBISetBuilder(RBBIRuleBuilder *rb); ~RBBISetBuilder(); - void build(); + void buildRanges(); + void buildTrie(); void addValToSets(UVector *sets, uint32_t val); void addValToSet (RBBINode *usetNode, uint32_t val); int32_t getNumCharCategories() const; // CharCategories are the same as input symbol set to the @@ -93,6 +94,13 @@ public: UChar32 getFirstChar(int32_t val) const; UBool sawBOF() const; // Indicate whether any references to the {bof} pseudo // character were encountered. + /** merge two character categories that have been identified as having equivalent behavior. + * The ranges belonging to the right category (table column) will be added to the left. + */ + void mergeCategories(int32_t left, int32_t right); + + static constexpr int32_t DICT_BIT = 0x4000; + #ifdef RBBI_DEBUG void printSets(); void printRanges(); diff --git a/deps/icu-small/source/common/rbbitblb.cpp b/deps/icu-small/source/common/rbbitblb.cpp index b3e6ca51d1..61661a5442 100644 --- a/deps/icu-small/source/common/rbbitblb.cpp +++ b/deps/icu-small/source/common/rbbitblb.cpp @@ -22,6 +22,7 @@ #include "rbbidata.h" #include "cstring.h" #include "uassert.h" +#include "uvectr32.h" #include "cmemory.h" U_NAMESPACE_BEGIN @@ -761,7 +762,7 @@ void RBBITableBuilder::flagAcceptingStates() { // if sd->fAccepting already had a value other than 0 or -1, leave it be. // If the end marker node is from a look-ahead rule, set - // the fLookAhead field or this state also. + // the fLookAhead field for this state also. if (endMarker->fLookAheadEnd) { // TODO: don't change value if already set? // TODO: allow for more than one active look-ahead rule in engine. @@ -1077,7 +1078,128 @@ void RBBITableBuilder::printPosSets(RBBINode *n) { } #endif +// +// findDuplCharClassFrom() +// +bool RBBITableBuilder::findDuplCharClassFrom(int32_t &baseCategory, int32_t &duplCategory) { + int32_t numStates = fDStates->size(); + int32_t numCols = fRB->fSetBuilder->getNumCharCategories(); + + uint16_t table_base; + uint16_t table_dupl; + for (; baseCategory < numCols-1; ++baseCategory) { + for (duplCategory=baseCategory+1; duplCategory < numCols; ++duplCategory) { + for (int32_t state=0; state<numStates; state++) { + RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state); + table_base = (uint16_t)sd->fDtran->elementAti(baseCategory); + table_dupl = (uint16_t)sd->fDtran->elementAti(duplCategory); + if (table_base != table_dupl) { + break; + } + } + if (table_base == table_dupl) { + return true; + } + } + } + return false; +} + + +// +// removeColumn() +// +void RBBITableBuilder::removeColumn(int32_t column) { + int32_t numStates = fDStates->size(); + for (int32_t state=0; state<numStates; state++) { + RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state); + U_ASSERT(column < sd->fDtran->size()); + sd->fDtran->removeElementAt(column); + } +} + +/* + * findDuplicateState + */ +bool RBBITableBuilder::findDuplicateState(int32_t &firstState, int32_t &duplState) { + int32_t numStates = fDStates->size(); + int32_t numCols = fRB->fSetBuilder->getNumCharCategories(); + + for (; firstState<numStates-1; ++firstState) { + RBBIStateDescriptor *firstSD = (RBBIStateDescriptor *)fDStates->elementAt(firstState); + for (duplState=firstState+1; duplState<numStates; ++duplState) { + RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(duplState); + if (firstSD->fAccepting != duplSD->fAccepting || + firstSD->fLookAhead != duplSD->fLookAhead || + firstSD->fTagsIdx != duplSD->fTagsIdx) { + continue; + } + bool rowsMatch = true; + for (int32_t col=0; col < numCols; ++col) { + int32_t firstVal = firstSD->fDtran->elementAti(col); + int32_t duplVal = duplSD->fDtran->elementAti(col); + if (!((firstVal == duplVal) || + ((firstVal == firstState || firstVal == duplState) && + (duplVal == firstState || duplVal == duplState)))) { + rowsMatch = false; + break; + } + } + if (rowsMatch) { + return true; + } + } + } + return false; +} + +void RBBITableBuilder::removeState(int32_t keepState, int32_t duplState) { + U_ASSERT(keepState < duplState); + U_ASSERT(duplState < fDStates->size()); + RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(duplState); + fDStates->removeElementAt(duplState); + delete duplSD; + + int32_t numStates = fDStates->size(); + int32_t numCols = fRB->fSetBuilder->getNumCharCategories(); + for (int32_t state=0; state<numStates; ++state) { + RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state); + for (int32_t col=0; col<numCols; col++) { + int32_t existingVal = sd->fDtran->elementAti(col); + int32_t newVal = existingVal; + if (existingVal == duplState) { + newVal = keepState; + } else if (existingVal > duplState) { + newVal = existingVal - 1; + } + sd->fDtran->setElementAt(newVal, col); + } + if (sd->fAccepting == duplState) { + sd->fAccepting = keepState; + } else if (sd->fAccepting > duplState) { + sd->fAccepting--; + } + if (sd->fLookAhead == duplState) { + sd->fLookAhead = keepState; + } else if (sd->fLookAhead > duplState) { + sd->fLookAhead--; + } + } +} + + +/* + * RemoveDuplicateStates + */ +void RBBITableBuilder::removeDuplicateStates() { + int32_t firstState = 3; + int32_t duplicateState = 0; + while (findDuplicateState(firstState, duplicateState)) { + // printf("Removing duplicate states (%d, %d)\n", firstState, duplicateState); + removeState(firstState, duplicateState); + } +} //----------------------------------------------------------------------------- // @@ -1095,21 +1217,17 @@ int32_t RBBITableBuilder::getTableSize() const { return 0; } - size = sizeof(RBBIStateTable) - 4; // The header, with no rows to the table. + size = offsetof(RBBIStateTable, fTableData); // The header, with no rows to the table. numRows = fDStates->size(); numCols = fRB->fSetBuilder->getNumCharCategories(); - // Note The declaration of RBBIStateTableRow is for a table of two columns. - // Therefore we subtract two from numCols when determining - // how much storage to add to a row for the total columns. - rowSize = sizeof(RBBIStateTableRow) + sizeof(uint16_t)*(numCols-2); + rowSize = offsetof(RBBIStateTableRow, fNextState) + sizeof(uint16_t)*numCols; size += numRows * rowSize; return size; } - //----------------------------------------------------------------------------- // // exportTable() export the state transition table in the format required @@ -1126,14 +1244,14 @@ void RBBITableBuilder::exportTable(void *where) { return; } - if (fRB->fSetBuilder->getNumCharCategories() > 0x7fff || + int32_t catCount = fRB->fSetBuilder->getNumCharCategories(); + if (catCount > 0x7fff || fDStates->size() > 0x7fff) { *fStatus = U_BRK_INTERNAL_ERROR; return; } - table->fRowLen = sizeof(RBBIStateTableRow) + - sizeof(uint16_t) * (fRB->fSetBuilder->getNumCharCategories() - 2); + table->fRowLen = offsetof(RBBIStateTableRow, fNextState) + sizeof(uint16_t) * catCount; table->fNumStates = fDStates->size(); table->fFlags = 0; if (fRB->fLookAheadHardBreak) { @@ -1152,7 +1270,7 @@ void RBBITableBuilder::exportTable(void *where) { row->fAccepting = (int16_t)sd->fAccepting; row->fLookAhead = (int16_t)sd->fLookAhead; row->fTagIdx = (int16_t)sd->fTagsIdx; - for (col=0; col<fRB->fSetBuilder->getNumCharCategories(); col++) { + for (col=0; col<catCount; col++) { row->fNextState[col] = (uint16_t)sd->fDtran->elementAti(col); } } @@ -1259,7 +1377,7 @@ RBBIStateDescriptor::RBBIStateDescriptor(int lastInputSymbol, UErrorCode *fStatu fPositions = NULL; fDtran = NULL; - fDtran = new UVector(lastInputSymbol+1, *fStatus); + fDtran = new UVector32(lastInputSymbol+1, *fStatus); if (U_FAILURE(*fStatus)) { return; } @@ -1267,7 +1385,7 @@ RBBIStateDescriptor::RBBIStateDescriptor(int lastInputSymbol, UErrorCode *fStatu *fStatus = U_MEMORY_ALLOCATION_ERROR; return; } - fDtran->setSize(lastInputSymbol+1, *fStatus); // fDtran needs to be pre-sized. + fDtran->setSize(lastInputSymbol+1); // fDtran needs to be pre-sized. // It is indexed by input symbols, and will // hold the next state number for each // symbol. diff --git a/deps/icu-small/source/common/rbbitblb.h b/deps/icu-small/source/common/rbbitblb.h index 1041501878..09b57b5cf0 100644 --- a/deps/icu-small/source/common/rbbitblb.h +++ b/deps/icu-small/source/common/rbbitblb.h @@ -24,6 +24,7 @@ U_NAMESPACE_BEGIN class RBBIRuleScanner; class RBBIRuleBuilder; +class UVector32; // // class RBBITableBuilder is part of the RBBI rule compiler. @@ -42,9 +43,24 @@ public: void build(); int32_t getTableSize() const; // Return the runtime size in bytes of // the built state table - void exportTable(void *where); // fill in the runtime state table. - // Sufficient memory must exist at - // the specified location. + + /** Fill in the runtime state table. Sufficient memory must exist at the specified location. + */ + void exportTable(void *where); + + /** Find duplicate (redundant) character classes, beginning after the specifed + * pair, within this state table. This is an iterator-like function, used to + * identify char classes (state table columns) that can be eliminated. + */ + bool findDuplCharClassFrom(int &baseClass, int &duplClass); + + /** Remove a column from the state table. Used when two character categories + * have been found equivalent, and merged together, to eliminate the uneeded table column. + */ + void removeColumn(int32_t column); + + /** Check for, and remove dupicate states (table rows). */ + void removeDuplicateStates(); private: @@ -60,8 +76,29 @@ private: void flagTaggedStates(); void mergeRuleStatusVals(); + /** + * Merge redundant state table columns, eliminating character classes with identical behavior. + * Done after the state tables are generated, just before converting to their run-time format. + */ + int32_t mergeColumns(); + void addRuleRootNodes(UVector *dest, RBBINode *node); + /** Find the next duplicate state. An iterator function. + * @param firstState (in/out) begin looking at this state, return the first of the + * pair of duplicates. + * @param duplicateState returns the duplicate state of fistState + * @return true if a duplicate pair of states was found. + */ + bool findDuplicateState(int32_t &firstState, int32_t &duplicateState); + + /** Remove a duplicate state/ + * @param keepState First of the duplicate pair. Keep it. + * @param duplState Duplicate state. Remove it. Redirect all references to the duplicate state + * to refer to keepState instead. + */ + void removeState(int32_t keepState, int32_t duplState); + // Set functions for UVector. // TODO: make a USet subclass of UVector @@ -112,7 +149,7 @@ public: // with this state. Unordered (it's a set). // UVector contents are RBBINode * - UVector *fDtran; // Transitions out of this state. + UVector32 *fDtran; // Transitions out of this state. // indexed by input character // contents is int index of dest state // in RBBITableBuilder.fDStates diff --git a/deps/icu-small/source/common/sharedobject.cpp b/deps/icu-small/source/common/sharedobject.cpp index 37aa458e00..6eeca8605f 100644 --- a/deps/icu-small/source/common/sharedobject.cpp +++ b/deps/icu-small/source/common/sharedobject.cpp @@ -8,7 +8,10 @@ * sharedobject.cpp */ #include "sharedobject.h" +#include "mutex.h" #include "uassert.h" +#include "umutex.h" +#include "unifiedcache.h" U_NAMESPACE_BEGIN @@ -17,69 +20,41 @@ SharedObject::~SharedObject() {} UnifiedCacheBase::~UnifiedCacheBase() {} void -SharedObject::addRef(UBool fromWithinCache) const { - umtx_atomic_inc(&totalRefCount); - - // Although items in use may not be correct immediately, it - // will be correct eventually. - if (umtx_atomic_inc(&hardRefCount) == 1 && cachePtr != NULL) { - // If this object is cached, and the hardRefCount goes from 0 to 1, - // then the increment must happen from within the cache while the - // cache global mutex is locked. In this way, we can be rest assured - // that data races can't happen if the cache performs some task if - // the hardRefCount is zero while the global cache mutex is locked. - (void)fromWithinCache; // Suppress unused variable warning in non-debug builds. - U_ASSERT(fromWithinCache); - cachePtr->incrementItemsInUse(); - } +SharedObject::addRef() const { + umtx_atomic_inc(&hardRefCount); } +// removeRef Decrement the reference count and delete if it is zero. +// Note that SharedObjects with a non-null cachePtr are owned by the +// unified cache, and the cache will be responsible for the actual deletion. +// The deletion could be as soon as immediately following the +// update to the reference count, if another thread is running +// a cache eviction cycle concurrently. +// NO ACCESS TO *this PERMITTED AFTER REFERENCE COUNT == 0 for cached objects. +// THE OBJECT MAY ALREADY BE GONE. void -SharedObject::removeRef(UBool fromWithinCache) const { - UBool decrementItemsInUse = (umtx_atomic_dec(&hardRefCount) == 0); - UBool allReferencesGone = (umtx_atomic_dec(&totalRefCount) == 0); - - // Although items in use may not be correct immediately, it - // will be correct eventually. - if (decrementItemsInUse && cachePtr != NULL) { - if (fromWithinCache) { - cachePtr->decrementItemsInUse(); +SharedObject::removeRef() const { + const UnifiedCacheBase *cache = this->cachePtr; + int32_t updatedRefCount = umtx_atomic_dec(&hardRefCount); + U_ASSERT(updatedRefCount >= 0); + if (updatedRefCount == 0) { + if (cache) { + cache->handleUnreferencedObject(); } else { - cachePtr->decrementItemsInUseWithLockingAndEviction(); + delete this; } } - if (allReferencesGone) { - delete this; - } } -void -SharedObject::addSoftRef() const { - umtx_atomic_inc(&totalRefCount); - ++softRefCount; -} - -void -SharedObject::removeSoftRef() const { - --softRefCount; - if (umtx_atomic_dec(&totalRefCount) == 0) { - delete this; - } -} int32_t SharedObject::getRefCount() const { - return umtx_loadAcquire(totalRefCount); -} - -int32_t -SharedObject::getHardRefCount() const { return umtx_loadAcquire(hardRefCount); } void SharedObject::deleteIfZeroRefCount() const { - if(getRefCount() == 0) { + if (this->cachePtr == nullptr && getRefCount() == 0) { delete this; } } diff --git a/deps/icu-small/source/common/sharedobject.h b/deps/icu-small/source/common/sharedobject.h index 783b55948a..54655d0d71 100644 --- a/deps/icu-small/source/common/sharedobject.h +++ b/deps/icu-small/source/common/sharedobject.h @@ -17,6 +17,8 @@ U_NAMESPACE_BEGIN +class SharedObject; + /** * Base class for unified cache exposing enough methods to SharedObject * instances to allow their addRef() and removeRef() methods to @@ -28,22 +30,12 @@ public: UnifiedCacheBase() { } /** - * Called by addRefWhileHoldingCacheLock() when the hard reference count - * of its instance goes from 0 to 1. + * Notify the cache implementation that an object was seen transitioning to + * zero hard references. The cache may use this to keep track the number of + * unreferenced SharedObjects, and to trigger evictions. */ - virtual void incrementItemsInUse() const = 0; + virtual void handleUnreferencedObject() const = 0; - /** - * Called by removeRef() when the hard reference count of its instance - * drops from 1 to 0. - */ - virtual void decrementItemsInUseWithLockingAndEviction() const = 0; - - /** - * Called by removeRefWhileHoldingCacheLock() when the hard reference - * count of its instance drops from 1 to 0. - */ - virtual void decrementItemsInUse() const = 0; virtual ~UnifiedCacheBase(); private: UnifiedCacheBase(const UnifiedCacheBase &); @@ -63,7 +55,6 @@ class U_COMMON_API SharedObject : public UObject { public: /** Initializes totalRefCount, softRefCount to 0. */ SharedObject() : - totalRefCount(0), softRefCount(0), hardRefCount(0), cachePtr(NULL) {} @@ -71,7 +62,6 @@ public: /** Initializes totalRefCount, softRefCount to 0. */ SharedObject(const SharedObject &other) : UObject(other), - totalRefCount(0), softRefCount(0), hardRefCount(0), cachePtr(NULL) {} @@ -79,93 +69,45 @@ public: virtual ~SharedObject(); /** - * Increments the number of references to this object. Thread-safe. + * Increments the number of hard references to this object. Thread-safe. + * Not for use from within the Unified Cache implementation. */ - void addRef() const { addRef(FALSE); } + void addRef() const; /** - * Increments the number of references to this object. - * Must be called only from within the internals of UnifiedCache and - * only while the cache global mutex is held. - */ - void addRefWhileHoldingCacheLock() const { addRef(TRUE); } - - /** - * Increments the number of soft references to this object. - * Must be called only from within the internals of UnifiedCache and - * only while the cache global mutex is held. - */ - void addSoftRef() const; - - /** - * Decrements the number of references to this object. Thread-safe. - */ - void removeRef() const { removeRef(FALSE); } - - /** - * Decrements the number of references to this object. - * Must be called only from within the internals of UnifiedCache and - * only while the cache global mutex is held. - */ - void removeRefWhileHoldingCacheLock() const { removeRef(TRUE); } - - /** - * Decrements the number of soft references to this object. - * Must be called only from within the internals of UnifiedCache and - * only while the cache global mutex is held. + * Decrements the number of hard references to this object, and + * arrange for possible cache-eviction and/or deletion if ref + * count goes to zero. Thread-safe. + * + * Not for use from within the UnifiedCache implementation. */ - void removeSoftRef() const; + void removeRef() const; /** - * Returns the reference counter including soft references. + * Returns the number of hard references for this object. * Uses a memory barrier. */ int32_t getRefCount() const; /** - * Returns the count of soft references only. - * Must be called only from within the internals of UnifiedCache and - * only while the cache global mutex is held. - */ - int32_t getSoftRefCount() const { return softRefCount; } - - /** - * Returns the count of hard references only. Uses a memory barrier. - * Used for testing the cache. Regular clients won't need this. - */ - int32_t getHardRefCount() const; - - /** * If noHardReferences() == TRUE then this object has no hard references. * Must be called only from within the internals of UnifiedCache. */ - inline UBool noHardReferences() const { return getHardRefCount() == 0; } + inline UBool noHardReferences() const { return getRefCount() == 0; } /** * If hasHardReferences() == TRUE then this object has hard references. * Must be called only from within the internals of UnifiedCache. */ - inline UBool hasHardReferences() const { return getHardRefCount() != 0; } - - /** - * If noSoftReferences() == TRUE then this object has no soft references. - * Must be called only from within the internals of UnifiedCache and - * only while the cache global mutex is held. - */ - UBool noSoftReferences() const { return (softRefCount == 0); } + inline UBool hasHardReferences() const { return getRefCount() != 0; } /** - * Deletes this object if it has no references or soft references. + * Deletes this object if it has no references. + * Available for non-cached SharedObjects only. Ownership of cached objects + * is with the UnifiedCache, which is soley responsible for eviction and deletion. */ void deleteIfZeroRefCount() const; - /** - * @internal For UnifedCache use only to register this object with itself. - * Must be called before this object is exposed to multiple threads. - */ - void registerWithCache(const UnifiedCacheBase *ptr) const { - cachePtr = ptr; - } /** * Returns a writable version of ptr. @@ -219,15 +161,21 @@ public: } private: - mutable u_atomic_int32_t totalRefCount; - - // Any thread modifying softRefCount must hold the global cache mutex + /** + * The number of references from the UnifiedCache, which is + * the number of times that the sharedObject is stored as a hash table value. + * For use by UnifiedCache implementation code only. + * All access is synchronized by UnifiedCache's gCacheMutex + */ mutable int32_t softRefCount; + friend class UnifiedCache; + /** + * Reference count, excluding references from within the UnifiedCache implementation. + */ mutable u_atomic_int32_t hardRefCount; + mutable const UnifiedCacheBase *cachePtr; - void addRef(UBool withCacheLock) const; - void removeRef(UBool withCacheLock) const; }; diff --git a/deps/icu-small/source/common/sprpimpl.h b/deps/icu-small/source/common/sprpimpl.h index aff40ad0da..26de904b1f 100644 --- a/deps/icu-small/source/common/sprpimpl.h +++ b/deps/icu-small/source/common/sprpimpl.h @@ -90,7 +90,6 @@ struct UStringPrepProfile{ UTrie sprepTrie; const uint16_t* mappingData; UDataMemory* sprepData; - const UBiDiProps *bdp; /* used only if checkBiDi is set */ int32_t refCount; UBool isDataLoaded; UBool doNFKC; diff --git a/deps/icu-small/source/common/ubidi.cpp b/deps/icu-small/source/common/ubidi.cpp index 8e2fc36e5f..531ed64cff 100644 --- a/deps/icu-small/source/common/ubidi.cpp +++ b/deps/icu-small/source/common/ubidi.cpp @@ -152,9 +152,6 @@ ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */ uprv_memset(pBiDi, 0, sizeof(UBiDi)); - /* get BiDi properties */ - pBiDi->bdp=ubidi_getSingleton(); - /* allocate memory for arrays as requested */ if(maxLength>0) { if( !getInitialDirPropsMemory(pBiDi, maxLength) || @@ -925,7 +922,7 @@ bracketProcessChar(BracketData *bd, int32_t position) { else match=0; if(match!=c && /* has a matching char */ - ubidi_getPairedBracketType(bd->pBiDi->bdp, c)==U_BPT_OPEN) { /* opening bracket */ + ubidi_getPairedBracketType(c)==U_BPT_OPEN) { /* opening bracket */ /* special case: process synonyms create an opening entry for each synonym */ if(match==0x232A) { /* RIGHT-POINTING ANGLE BRACKET */ @@ -3033,7 +3030,7 @@ ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c) if( pBiDi->fnClassCallback == NULL || (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT ) { - dir = ubidi_getClass(pBiDi->bdp, c); + dir = ubidi_getClass(c); } if(dir >= U_CHAR_DIRECTION_COUNT) { dir = (UCharDirection)ON; diff --git a/deps/icu-small/source/common/ubidi_props.cpp b/deps/icu-small/source/common/ubidi_props.cpp index dcfb52c897..4141c21938 100644 --- a/deps/icu-small/source/common/ubidi_props.cpp +++ b/deps/icu-small/source/common/ubidi_props.cpp @@ -44,13 +44,6 @@ struct UBiDiProps { #define INCLUDED_FROM_UBIDI_PROPS_C #include "ubidi_props_data.h" -/* UBiDiProps singleton ----------------------------------------------------- */ - -U_CFUNC const UBiDiProps * -ubidi_getSingleton() { - return &ubidi_props_singleton; -} - /* set of property starts for UnicodeSet ------------------------------------ */ static UBool U_CALLCONV @@ -64,7 +57,7 @@ _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32 } U_CFUNC void -ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *pErrorCode) { +ubidi_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { int32_t i, length; UChar32 c, start, limit; @@ -76,19 +69,19 @@ ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode * } /* add the start code point of each same-value range of the trie */ - utrie2_enum(&bdp->trie, NULL, _enumPropertyStartsRange, sa); + utrie2_enum(&ubidi_props_singleton.trie, NULL, _enumPropertyStartsRange, sa); /* add the code points from the bidi mirroring table */ - length=bdp->indexes[UBIDI_IX_MIRROR_LENGTH]; + length=ubidi_props_singleton.indexes[UBIDI_IX_MIRROR_LENGTH]; for(i=0; i<length; ++i) { - c=UBIDI_GET_MIRROR_CODE_POINT(bdp->mirrors[i]); + c=UBIDI_GET_MIRROR_CODE_POINT(ubidi_props_singleton.mirrors[i]); sa->addRange(sa->set, c, c+1); } /* add the code points from the Joining_Group array where the value changes */ - start=bdp->indexes[UBIDI_IX_JG_START]; - limit=bdp->indexes[UBIDI_IX_JG_LIMIT]; - jgArray=bdp->jgArray; + start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START]; + limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT]; + jgArray=ubidi_props_singleton.jgArray; for(;;) { prev=0; while(start<limit) { @@ -103,11 +96,11 @@ ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode * /* add the limit code point if the last value was not 0 (it is now start==limit) */ sa->add(sa->set, limit); } - if(limit==bdp->indexes[UBIDI_IX_JG_LIMIT]) { + if(limit==ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT]) { /* switch to the second Joining_Group range */ - start=bdp->indexes[UBIDI_IX_JG_START2]; - limit=bdp->indexes[UBIDI_IX_JG_LIMIT2]; - jgArray=bdp->jgArray2; + start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START2]; + limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT2]; + jgArray=ubidi_props_singleton.jgArray2; } else { break; } @@ -121,14 +114,8 @@ ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode * /* property access functions ------------------------------------------------ */ U_CFUNC int32_t -ubidi_getMaxValue(const UBiDiProps *bdp, UProperty which) { - int32_t max; - - if(bdp==NULL) { - return -1; - } - - max=bdp->indexes[UBIDI_MAX_VALUES_INDEX]; +ubidi_getMaxValue(UProperty which) { + int32_t max=ubidi_props_singleton.indexes[UBIDI_MAX_VALUES_INDEX]; switch(which) { case UCHAR_BIDI_CLASS: return (max&UBIDI_CLASS_MASK); @@ -144,19 +131,19 @@ ubidi_getMaxValue(const UBiDiProps *bdp, UProperty which) { } U_CAPI UCharDirection -ubidi_getClass(const UBiDiProps *bdp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&bdp->trie, c); +ubidi_getClass(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); return (UCharDirection)UBIDI_GET_CLASS(props); } U_CFUNC UBool -ubidi_isMirrored(const UBiDiProps *bdp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&bdp->trie, c); +ubidi_isMirrored(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); return (UBool)UBIDI_GET_FLAG(props, UBIDI_IS_MIRRORED_SHIFT); } static UChar32 -getMirror(const UBiDiProps *bdp, UChar32 c, uint16_t props) { +getMirror(UChar32 c, uint16_t props) { int32_t delta=UBIDI_GET_MIRROR_DELTA(props); if(delta!=UBIDI_ESC_MIRROR_DELTA) { return c+delta; @@ -167,8 +154,8 @@ getMirror(const UBiDiProps *bdp, UChar32 c, uint16_t props) { int32_t i, length; UChar32 c2; - mirrors=bdp->mirrors; - length=bdp->indexes[UBIDI_IX_MIRROR_LENGTH]; + mirrors=ubidi_props_singleton.mirrors; + length=ubidi_props_singleton.indexes[UBIDI_IX_MIRROR_LENGTH]; /* linear search */ for(i=0; i<length; ++i) { @@ -188,59 +175,59 @@ getMirror(const UBiDiProps *bdp, UChar32 c, uint16_t props) { } U_CFUNC UChar32 -ubidi_getMirror(const UBiDiProps *bdp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&bdp->trie, c); - return getMirror(bdp, c, props); +ubidi_getMirror(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); + return getMirror(c, props); } U_CFUNC UBool -ubidi_isBidiControl(const UBiDiProps *bdp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&bdp->trie, c); +ubidi_isBidiControl(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); return (UBool)UBIDI_GET_FLAG(props, UBIDI_BIDI_CONTROL_SHIFT); } U_CFUNC UBool -ubidi_isJoinControl(const UBiDiProps *bdp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&bdp->trie, c); +ubidi_isJoinControl(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); return (UBool)UBIDI_GET_FLAG(props, UBIDI_JOIN_CONTROL_SHIFT); } U_CFUNC UJoiningType -ubidi_getJoiningType(const UBiDiProps *bdp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&bdp->trie, c); +ubidi_getJoiningType(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); return (UJoiningType)((props&UBIDI_JT_MASK)>>UBIDI_JT_SHIFT); } U_CFUNC UJoiningGroup -ubidi_getJoiningGroup(const UBiDiProps *bdp, UChar32 c) { +ubidi_getJoiningGroup(UChar32 c) { UChar32 start, limit; - start=bdp->indexes[UBIDI_IX_JG_START]; - limit=bdp->indexes[UBIDI_IX_JG_LIMIT]; + start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START]; + limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT]; if(start<=c && c<limit) { - return (UJoiningGroup)bdp->jgArray[c-start]; + return (UJoiningGroup)ubidi_props_singleton.jgArray[c-start]; } - start=bdp->indexes[UBIDI_IX_JG_START2]; - limit=bdp->indexes[UBIDI_IX_JG_LIMIT2]; + start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START2]; + limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT2]; if(start<=c && c<limit) { - return (UJoiningGroup)bdp->jgArray2[c-start]; + return (UJoiningGroup)ubidi_props_singleton.jgArray2[c-start]; } return U_JG_NO_JOINING_GROUP; } U_CFUNC UBidiPairedBracketType -ubidi_getPairedBracketType(const UBiDiProps *bdp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&bdp->trie, c); +ubidi_getPairedBracketType(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); return (UBidiPairedBracketType)((props&UBIDI_BPT_MASK)>>UBIDI_BPT_SHIFT); } U_CFUNC UChar32 -ubidi_getPairedBracket(const UBiDiProps *bdp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&bdp->trie, c); +ubidi_getPairedBracket(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); if((props&UBIDI_BPT_MASK)==0) { return c; } else { - return getMirror(bdp, c, props); + return getMirror(c, props); } } @@ -248,20 +235,20 @@ ubidi_getPairedBracket(const UBiDiProps *bdp, UChar32 c) { U_CFUNC UCharDirection u_charDirection(UChar32 c) { - return ubidi_getClass(&ubidi_props_singleton, c); + return ubidi_getClass(c); } U_CFUNC UBool u_isMirrored(UChar32 c) { - return ubidi_isMirrored(&ubidi_props_singleton, c); + return ubidi_isMirrored(c); } U_CFUNC UChar32 u_charMirror(UChar32 c) { - return ubidi_getMirror(&ubidi_props_singleton, c); + return ubidi_getMirror(c); } U_STABLE UChar32 U_EXPORT2 u_getBidiPairedBracket(UChar32 c) { - return ubidi_getPairedBracket(&ubidi_props_singleton, c); + return ubidi_getPairedBracket(c); } diff --git a/deps/icu-small/source/common/ubidi_props.h b/deps/icu-small/source/common/ubidi_props.h index 69e8853e69..698ee9c52b 100644 --- a/deps/icu-small/source/common/ubidi_props.h +++ b/deps/icu-small/source/common/ubidi_props.h @@ -31,46 +31,40 @@ U_CDECL_BEGIN /* library API -------------------------------------------------------------- */ -struct UBiDiProps; -typedef struct UBiDiProps UBiDiProps; - -U_CFUNC const UBiDiProps * -ubidi_getSingleton(void); - U_CFUNC void -ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *pErrorCode); +ubidi_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode); /* property access functions */ U_CFUNC int32_t -ubidi_getMaxValue(const UBiDiProps *bdp, UProperty which); +ubidi_getMaxValue(UProperty which); U_CAPI UCharDirection -ubidi_getClass(const UBiDiProps *bdp, UChar32 c); +ubidi_getClass(UChar32 c); U_CFUNC UBool -ubidi_isMirrored(const UBiDiProps *bdp, UChar32 c); +ubidi_isMirrored(UChar32 c); U_CFUNC UChar32 -ubidi_getMirror(const UBiDiProps *bdp, UChar32 c); +ubidi_getMirror(UChar32 c); U_CFUNC UBool -ubidi_isBidiControl(const UBiDiProps *bdp, UChar32 c); +ubidi_isBidiControl(UChar32 c); U_CFUNC UBool -ubidi_isJoinControl(const UBiDiProps *bdp, UChar32 c); +ubidi_isJoinControl(UChar32 c); U_CFUNC UJoiningType -ubidi_getJoiningType(const UBiDiProps *bdp, UChar32 c); +ubidi_getJoiningType(UChar32 c); U_CFUNC UJoiningGroup -ubidi_getJoiningGroup(const UBiDiProps *bdp, UChar32 c); +ubidi_getJoiningGroup(UChar32 c); U_CFUNC UBidiPairedBracketType -ubidi_getPairedBracketType(const UBiDiProps *bdp, UChar32 c); +ubidi_getPairedBracketType(UChar32 c); U_CFUNC UChar32 -ubidi_getPairedBracket(const UBiDiProps *bdp, UChar32 c); +ubidi_getPairedBracket(UChar32 c); /* file definitions --------------------------------------------------------- */ diff --git a/deps/icu-small/source/common/ubidiimp.h b/deps/icu-small/source/common/ubidiimp.h index fd64fac34d..a5d0727495 100644 --- a/deps/icu-small/source/common/ubidiimp.h +++ b/deps/icu-small/source/common/ubidiimp.h @@ -254,8 +254,6 @@ struct UBiDi { */ const UBiDi * pParaBiDi; - const UBiDiProps *bdp; - /* alias pointer to the current text */ const UChar *text; diff --git a/deps/icu-small/source/common/ucase.cpp b/deps/icu-small/source/common/ucase.cpp index 1f41dbf6de..28d5a4cac6 100644 --- a/deps/icu-small/source/common/ucase.cpp +++ b/deps/icu-small/source/common/ucase.cpp @@ -77,9 +77,12 @@ ucase_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { /* data access primitives --------------------------------------------------- */ -#define GET_EXCEPTIONS(csp, props) ((csp)->exceptions+((props)>>UCASE_EXC_SHIFT)) +U_CFUNC const UTrie2 * U_EXPORT2 +ucase_getTrie() { + return &ucase_props_singleton.trie; +} -#define PROPS_HAS_EXCEPTION(props) ((props)&UCASE_EXCEPTION) +#define GET_EXCEPTIONS(csp, props) ((csp)->exceptions+((props)>>UCASE_EXC_SHIFT)) /* number of bits in an 8-bit integer value */ static const uint8_t flagsOffset[256]={ @@ -128,8 +131,8 @@ static const uint8_t flagsOffset[256]={ U_CAPI UChar32 U_EXPORT2 ucase_tolower(UChar32 c) { uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!PROPS_HAS_EXCEPTION(props)) { - if(UCASE_GET_TYPE(props)>=UCASE_UPPER) { + if(!UCASE_HAS_EXCEPTION(props)) { + if(UCASE_IS_UPPER_OR_TITLE(props)) { c+=UCASE_GET_DELTA(props); } } else { @@ -145,7 +148,7 @@ ucase_tolower(UChar32 c) { U_CAPI UChar32 U_EXPORT2 ucase_toupper(UChar32 c) { uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!PROPS_HAS_EXCEPTION(props)) { + if(!UCASE_HAS_EXCEPTION(props)) { if(UCASE_GET_TYPE(props)==UCASE_LOWER) { c+=UCASE_GET_DELTA(props); } @@ -162,7 +165,7 @@ ucase_toupper(UChar32 c) { U_CAPI UChar32 U_EXPORT2 ucase_totitle(UChar32 c) { uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!PROPS_HAS_EXCEPTION(props)) { + if(!UCASE_HAS_EXCEPTION(props)) { if(UCASE_GET_TYPE(props)==UCASE_LOWER) { c+=UCASE_GET_DELTA(props); } @@ -223,7 +226,7 @@ ucase_addCaseClosure(UChar32 c, const USetAdder *sa) { } props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!PROPS_HAS_EXCEPTION(props)) { + if(!UCASE_HAS_EXCEPTION(props)) { if(UCASE_GET_TYPE(props)!=UCASE_NONE) { /* add the one simple case mapping, no matter what type it is */ int32_t delta=UCASE_GET_DELTA(props); @@ -419,6 +422,138 @@ FullCaseFoldingIterator::next(UnicodeString &full) { return c; } +namespace LatinCase { + +const int8_t TO_LOWER_NORMAL[LIMIT] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, + + 0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC +}; + +const int8_t TO_LOWER_TR_LT[LIMIT] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, EXC, 0, 1, 0, 1, 0, EXC, 0, + EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, + + 0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC +}; + +const int8_t TO_UPPER_NORMAL[LIMIT] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, + -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC, + -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, + -32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121, + + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, + + -1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC +}; + +const int8_t TO_UPPER_TR[LIMIT] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, -32, -32, -32, -32, -32, -32, -32, -32, EXC, -32, -32, -32, -32, -32, -32, + -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC, + -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, + -32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121, + + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, + + -1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC +}; + +} // namespace LatinCase + U_NAMESPACE_END /** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */ @@ -439,7 +574,7 @@ ucase_getTypeOrIgnorable(UChar32 c) { static inline int32_t getDotType(UChar32 c) { uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!PROPS_HAS_EXCEPTION(props)) { + if(!UCASE_HAS_EXCEPTION(props)) { return props&UCASE_DOT_MASK; } else { const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props); @@ -878,8 +1013,8 @@ ucase_toFullLower(UChar32 c, U_ASSERT(c >= 0); UChar32 result=c; uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!PROPS_HAS_EXCEPTION(props)) { - if(UCASE_GET_TYPE(props)>=UCASE_UPPER) { + if(!UCASE_HAS_EXCEPTION(props)) { + if(UCASE_IS_UPPER_OR_TITLE(props)) { result=c+UCASE_GET_DELTA(props); } } else { @@ -1024,7 +1159,7 @@ toUpperOrTitle(UChar32 c, U_ASSERT(c >= 0); UChar32 result=c; uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!PROPS_HAS_EXCEPTION(props)) { + if(!UCASE_HAS_EXCEPTION(props)) { if(UCASE_GET_TYPE(props)==UCASE_LOWER) { result=c+UCASE_GET_DELTA(props); } @@ -1169,8 +1304,8 @@ ucase_toFullTitle(UChar32 c, U_CAPI UChar32 U_EXPORT2 ucase_fold(UChar32 c, uint32_t options) { uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!PROPS_HAS_EXCEPTION(props)) { - if(UCASE_GET_TYPE(props)>=UCASE_UPPER) { + if(!UCASE_HAS_EXCEPTION(props)) { + if(UCASE_IS_UPPER_OR_TITLE(props)) { c+=UCASE_GET_DELTA(props); } } else { @@ -1234,8 +1369,8 @@ ucase_toFullFolding(UChar32 c, U_ASSERT(c >= 0); UChar32 result=c; uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!PROPS_HAS_EXCEPTION(props)) { - if(UCASE_GET_TYPE(props)>=UCASE_UPPER) { + if(!UCASE_HAS_EXCEPTION(props)) { + if(UCASE_IS_UPPER_OR_TITLE(props)) { result=c+UCASE_GET_DELTA(props); } } else { diff --git a/deps/icu-small/source/common/ucase.h b/deps/icu-small/source/common/ucase.h index 9d6365eadf..a7a8c9f00d 100644 --- a/deps/icu-small/source/common/ucase.h +++ b/deps/icu-small/source/common/ucase.h @@ -26,6 +26,7 @@ #include "putilimp.h" #include "uset_imp.h" #include "udataswp.h" +#include "utrie2.h" #ifdef __cplusplus U_NAMESPACE_BEGIN @@ -148,6 +149,33 @@ private: int32_t rowCpIndex; }; +/** + * Fast case mapping data for ASCII/Latin. + * Linear arrays of delta bytes: 0=no mapping; EXC=exception. + * Deltas must not cross the ASCII boundary, or else they cannot be easily used + * in simple UTF-8 code. + */ +namespace LatinCase { + +/** Case mapping/folding data for code points up to U+017F. */ +constexpr UChar LIMIT = 0x180; +/** U+017F case-folds and uppercases crossing the ASCII boundary. */ +constexpr UChar LONG_S = 0x17f; +/** Exception: Complex mapping, or too-large delta. */ +constexpr int8_t EXC = -0x80; + +/** Deltas for lowercasing for most locales, and default case folding. */ +extern const int8_t TO_LOWER_NORMAL[LIMIT]; +/** Deltas for lowercasing for tr/az/lt, and Turkic case folding. */ +extern const int8_t TO_LOWER_TR_LT[LIMIT]; + +/** Deltas for uppercasing for most locales. */ +extern const int8_t TO_UPPER_NORMAL[LIMIT]; +/** Deltas for uppercasing for tr/az. */ +extern const int8_t TO_UPPER_TR[LIMIT]; + +} // namespace LatinCase + U_NAMESPACE_END #endif @@ -308,6 +336,9 @@ enum { /* definitions for 16-bit case properties word ------------------------------ */ +U_CFUNC const UTrie2 * U_EXPORT2 +ucase_getTrie(); + /* 2-bit constants for types of cased characters */ #define UCASE_TYPE_MASK 3 enum { @@ -320,10 +351,14 @@ enum { #define UCASE_GET_TYPE(props) ((props)&UCASE_TYPE_MASK) #define UCASE_GET_TYPE_AND_IGNORABLE(props) ((props)&7) +#define UCASE_IS_UPPER_OR_TITLE(props) ((props)&2) + #define UCASE_IGNORABLE 4 #define UCASE_SENSITIVE 8 #define UCASE_EXCEPTION 0x10 +#define UCASE_HAS_EXCEPTION(props) ((props)&UCASE_EXCEPTION) + #define UCASE_DOT_MASK 0x60 enum { UCASE_NO_DOT=0, /* normal characters with cc=0 */ diff --git a/deps/icu-small/source/common/ucasemap.cpp b/deps/icu-small/source/common/ucasemap.cpp index 8eec93c6e3..99e30c9fc6 100644 --- a/deps/icu-small/source/common/ucasemap.cpp +++ b/deps/icu-small/source/common/ucasemap.cpp @@ -165,9 +165,7 @@ appendResult(int32_t cpLength, int32_t result, const UChar *s, inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); } inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); } -} // namespace - -static UChar32 U_CALLCONV +UChar32 U_CALLCONV utf8_caseContextIterator(void *context, int8_t dir) { UCaseContext *csc=(UCaseContext *)context; UChar32 c; @@ -199,36 +197,227 @@ utf8_caseContextIterator(void *context, int8_t dir) { return U_SENTINEL; } -/* - * Case-maps [srcStart..srcLimit[ but takes - * context [0..srcLength[ into account. +/** + * caseLocale >= 0: Lowercases [srcStart..srcLimit[ but takes context [0..srcLength[ into account. + * caseLocale < 0: Case-folds [srcStart..srcLimit[. */ -static void -_caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map, - const uint8_t *src, UCaseContext *csc, - int32_t srcStart, int32_t srcLimit, - icu::ByteSink &sink, icu::Edits *edits, - UErrorCode &errorCode) { - /* case mapping loop */ - int32_t srcIndex=srcStart; - while (U_SUCCESS(errorCode) && srcIndex<srcLimit) { +void toLower(int32_t caseLocale, uint32_t options, + const uint8_t *src, UCaseContext *csc, int32_t srcStart, int32_t srcLimit, + icu::ByteSink &sink, icu::Edits *edits, UErrorCode &errorCode) { + const int8_t *latinToLower; + if (caseLocale == UCASE_LOC_ROOT || + (caseLocale >= 0 ? + !(caseLocale == UCASE_LOC_TURKISH || caseLocale == UCASE_LOC_LITHUANIAN) : + (options & _FOLD_CASE_OPTIONS_MASK) == U_FOLD_CASE_DEFAULT)) { + latinToLower = LatinCase::TO_LOWER_NORMAL; + } else { + latinToLower = LatinCase::TO_LOWER_TR_LT; + } + const UTrie2 *trie = ucase_getTrie(); + int32_t prev = srcStart; + int32_t srcIndex = srcStart; + for (;;) { + // fast path for simple cases int32_t cpStart; - csc->cpStart=cpStart=srcIndex; UChar32 c; - U8_NEXT(src, srcIndex, srcLimit, c); - csc->cpLimit=srcIndex; - if(c<0) { - // Malformed UTF-8. - ByteSinkUtil::appendUnchanged(src+cpStart, srcIndex-cpStart, + for (;;) { + if (U_FAILURE(errorCode) || srcIndex >= srcLimit) { + c = U_SENTINEL; + break; + } + uint8_t lead = src[srcIndex++]; + if (lead <= 0x7f) { + int8_t d = latinToLower[lead]; + if (d == LatinCase::EXC) { + cpStart = srcIndex - 1; + c = lead; + break; + } + if (d == 0) { continue; } + ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 1 - prev, + sink, options, edits, errorCode); + char ascii = (char)(lead + d); + sink.Append(&ascii, 1); + if (edits != nullptr) { + edits->addReplace(1, 1); + } + prev = srcIndex; + continue; + } else if (lead < 0xe3) { + uint8_t t; + if (0xc2 <= lead && lead <= 0xc5 && srcIndex < srcLimit && + (t = src[srcIndex] - 0x80) <= 0x3f) { + // U+0080..U+017F + ++srcIndex; + c = ((lead - 0xc0) << 6) | t; + int8_t d = latinToLower[c]; + if (d == LatinCase::EXC) { + cpStart = srcIndex - 2; + break; + } + if (d == 0) { continue; } + ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 2 - prev, + sink, options, edits, errorCode); + ByteSinkUtil::appendTwoBytes(c + d, sink); + if (edits != nullptr) { + edits->addReplace(2, 2); + } + prev = srcIndex; + continue; + } + } else if ((lead <= 0xe9 || lead == 0xeb || lead == 0xec) && + (srcIndex + 2) <= srcLimit && + U8_IS_TRAIL(src[srcIndex]) && U8_IS_TRAIL(src[srcIndex + 1])) { + // most of CJK: no case mappings + srcIndex += 2; + continue; + } + cpStart = --srcIndex; + U8_NEXT(src, srcIndex, srcLimit, c); + if (c < 0) { + // ill-formed UTF-8 + continue; + } + uint16_t props = UTRIE2_GET16(trie, c); + if (UCASE_HAS_EXCEPTION(props)) { break; } + int32_t delta; + if (!UCASE_IS_UPPER_OR_TITLE(props) || (delta = UCASE_GET_DELTA(props)) == 0) { + continue; + } + ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev, sink, options, edits, errorCode); + ByteSinkUtil::appendCodePoint(srcIndex - cpStart, c + delta, sink, edits); + prev = srcIndex; + } + if (c < 0) { + break; + } + // slow path + const UChar *s; + if (caseLocale >= 0) { + csc->cpStart = cpStart; + csc->cpLimit = srcIndex; + c = ucase_toFullLower(c, utf8_caseContextIterator, csc, &s, caseLocale); } else { - const UChar *s; - c=map(c, utf8_caseContextIterator, csc, &s, caseLocale); + c = ucase_toFullFolding(c, &s, options); + } + if (c >= 0) { + ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev, + sink, options, edits, errorCode); appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode); + prev = srcIndex; } } + ByteSinkUtil::appendUnchanged(src + prev, srcIndex - prev, + sink, options, edits, errorCode); } +void toUpper(int32_t caseLocale, uint32_t options, + const uint8_t *src, UCaseContext *csc, int32_t srcLength, + icu::ByteSink &sink, icu::Edits *edits, UErrorCode &errorCode) { + const int8_t *latinToUpper; + if (caseLocale == UCASE_LOC_TURKISH) { + latinToUpper = LatinCase::TO_UPPER_TR; + } else { + latinToUpper = LatinCase::TO_UPPER_NORMAL; + } + const UTrie2 *trie = ucase_getTrie(); + int32_t prev = 0; + int32_t srcIndex = 0; + for (;;) { + // fast path for simple cases + int32_t cpStart; + UChar32 c; + for (;;) { + if (U_FAILURE(errorCode) || srcIndex >= srcLength) { + c = U_SENTINEL; + break; + } + uint8_t lead = src[srcIndex++]; + if (lead <= 0x7f) { + int8_t d = latinToUpper[lead]; + if (d == LatinCase::EXC) { + cpStart = srcIndex - 1; + c = lead; + break; + } + if (d == 0) { continue; } + ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 1 - prev, + sink, options, edits, errorCode); + char ascii = (char)(lead + d); + sink.Append(&ascii, 1); + if (edits != nullptr) { + edits->addReplace(1, 1); + } + prev = srcIndex; + continue; + } else if (lead < 0xe3) { + uint8_t t; + if (0xc2 <= lead && lead <= 0xc5 && srcIndex < srcLength && + (t = src[srcIndex] - 0x80) <= 0x3f) { + // U+0080..U+017F + ++srcIndex; + c = ((lead - 0xc0) << 6) | t; + int8_t d = latinToUpper[c]; + if (d == LatinCase::EXC) { + cpStart = srcIndex - 2; + break; + } + if (d == 0) { continue; } + ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 2 - prev, + sink, options, edits, errorCode); + ByteSinkUtil::appendTwoBytes(c + d, sink); + if (edits != nullptr) { + edits->addReplace(2, 2); + } + prev = srcIndex; + continue; + } + } else if ((lead <= 0xe9 || lead == 0xeb || lead == 0xec) && + (srcIndex + 2) <= srcLength && + U8_IS_TRAIL(src[srcIndex]) && U8_IS_TRAIL(src[srcIndex + 1])) { + // most of CJK: no case mappings + srcIndex += 2; + continue; + } + cpStart = --srcIndex; + U8_NEXT(src, srcIndex, srcLength, c); + if (c < 0) { + // ill-formed UTF-8 + continue; + } + uint16_t props = UTRIE2_GET16(trie, c); + if (UCASE_HAS_EXCEPTION(props)) { break; } + int32_t delta; + if (UCASE_GET_TYPE(props) != UCASE_LOWER || (delta = UCASE_GET_DELTA(props)) == 0) { + continue; + } + ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev, + sink, options, edits, errorCode); + ByteSinkUtil::appendCodePoint(srcIndex - cpStart, c + delta, sink, edits); + prev = srcIndex; + } + if (c < 0) { + break; + } + // slow path + csc->cpStart = cpStart; + csc->cpLimit = srcIndex; + const UChar *s; + c = ucase_toFullUpper(c, utf8_caseContextIterator, csc, &s, caseLocale); + if (c >= 0) { + ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev, + sink, options, edits, errorCode); + appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode); + prev = srcIndex; + } + } + ByteSinkUtil::appendUnchanged(src + prev, srcIndex - prev, + sink, options, edits, errorCode); +} + +} // namespace + #if !UCONFIG_NO_BREAK_ITERATION U_CFUNC void U_CALLCONV @@ -335,10 +524,9 @@ ucasemap_internalUTF8ToTitle( if(titleLimit<index) { if((options&U_TITLECASE_NO_LOWERCASE)==0) { /* Normal operation: Lowercase the rest of the word. */ - _caseMap(caseLocale, options, ucase_toFullLower, - src, &csc, - titleLimit, index, - sink, edits, errorCode); + toLower(caseLocale, options, + src, &csc, titleLimit, index, + sink, edits, errorCode); if(U_FAILURE(errorCode)) { return; } @@ -538,8 +726,8 @@ ucasemap_internalUTF8ToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREA UCaseContext csc=UCASECONTEXT_INITIALIZER; csc.p=(void *)src; csc.limit=srcLength; - _caseMap( - caseLocale, options, ucase_toFullLower, + toLower( + caseLocale, options, src, &csc, 0, srcLength, sink, edits, errorCode); } @@ -555,9 +743,9 @@ ucasemap_internalUTF8ToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREA UCaseContext csc=UCASECONTEXT_INITIALIZER; csc.p=(void *)src; csc.limit=srcLength; - _caseMap( - caseLocale, options, ucase_toFullUpper, - src, &csc, 0, srcLength, + toUpper( + caseLocale, options, + src, &csc, srcLength, sink, edits, errorCode); } } @@ -567,22 +755,10 @@ ucasemap_internalUTF8Fold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_B const uint8_t *src, int32_t srcLength, icu::ByteSink &sink, icu::Edits *edits, UErrorCode &errorCode) { - /* case mapping loop */ - int32_t srcIndex = 0; - while (U_SUCCESS(errorCode) && srcIndex < srcLength) { - int32_t cpStart = srcIndex; - UChar32 c; - U8_NEXT(src, srcIndex, srcLength, c); - if(c<0) { - // Malformed UTF-8. - ByteSinkUtil::appendUnchanged(src+cpStart, srcIndex-cpStart, - sink, options, edits, errorCode); - } else { - const UChar *s; - c = ucase_toFullFolding(c, &s, options); - appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode); - } - } + toLower( + -1, options, + src, nullptr, 0, srcLength, + sink, edits, errorCode); } void diff --git a/deps/icu-small/source/common/ucasemap_imp.h b/deps/icu-small/source/common/ucasemap_imp.h index 99a6490279..7788fd9371 100644 --- a/deps/icu-small/source/common/ucasemap_imp.h +++ b/deps/icu-small/source/common/ucasemap_imp.h @@ -60,15 +60,6 @@ u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1, int32_t *matchLen1, int32_t *matchLen2, UErrorCode *pErrorCode); -/** - * Are the Unicode properties loaded? - * This must be used before internal functions are called that do - * not perform this check. - * Generate a debug assertion failure if data is not loaded. - */ -U_CFUNC UBool -uprv_haveProperties(UErrorCode *pErrorCode); - #ifdef __cplusplus U_NAMESPACE_BEGIN diff --git a/deps/icu-small/source/common/uchar.cpp b/deps/icu-small/source/common/uchar.cpp index c3f037d73e..996c3fdc40 100644 --- a/deps/icu-small/source/common/uchar.cpp +++ b/deps/icu-small/source/common/uchar.cpp @@ -42,14 +42,6 @@ /* getting a uint32_t properties word from the data */ #define GET_PROPS(c, result) ((result)=UTRIE2_GET16(&propsTrie, c)); -U_CFUNC UBool -uprv_haveProperties(UErrorCode *pErrorCode) { - if(U_FAILURE(*pErrorCode)) { - return FALSE; - } - return TRUE; -} - /* API functions ------------------------------------------------------------ */ /* Gets the Unicode character's general category.*/ diff --git a/deps/icu-small/source/common/ucmndata.cpp b/deps/icu-small/source/common/ucmndata.cpp index 251c7ba182..ba2310bb7a 100644 --- a/deps/icu-small/source/common/ucmndata.cpp +++ b/deps/icu-small/source/common/ucmndata.cpp @@ -77,7 +77,11 @@ typedef struct { typedef struct { uint32_t count; uint32_t reserved; - PointerTOCEntry entry[2]; /* Actual size is from count. */ + /** + * Variable-length array declared with length 1 to disable bounds checkers. + * The actual array length is in the count field. + */ + PointerTOCEntry entry[1]; } PointerTOC; diff --git a/deps/icu-small/source/common/ucmndata.h b/deps/icu-small/source/common/ucmndata.h index 8c36897f16..1684441432 100644 --- a/deps/icu-small/source/common/ucmndata.h +++ b/deps/icu-small/source/common/ucmndata.h @@ -52,7 +52,11 @@ typedef struct { typedef struct { uint32_t count; - UDataOffsetTOCEntry entry[2]; /* Actual size of array is from count. */ + /** + * Variable-length array declared with length 1 to disable bounds checkers. + * The actual array length is in the count field. + */ + UDataOffsetTOCEntry entry[1]; } UDataOffsetTOC; /** diff --git a/deps/icu-small/source/common/ucnv2022.cpp b/deps/icu-small/source/common/ucnv2022.cpp index 1b625ea06c..854ca60cc3 100644 --- a/deps/icu-small/source/common/ucnv2022.cpp +++ b/deps/icu-small/source/common/ucnv2022.cpp @@ -3512,14 +3512,14 @@ _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorC case 'k': if(myConverterData->version == 0) { if(length == 1) { - if((UBool)args->converter->fromUnicodeStatus) { + if(args->converter->fromUnicodeStatus) { /* in DBCS mode: switch to SBCS */ args->converter->fromUnicodeStatus = 0; *p++ = UCNV_SI; } *p++ = subchar[0]; } else /* length == 2*/ { - if(!(UBool)args->converter->fromUnicodeStatus) { + if(!args->converter->fromUnicodeStatus) { /* in SBCS mode: switch to DBCS */ args->converter->fromUnicodeStatus = 1; *p++ = UCNV_SO; diff --git a/deps/icu-small/source/common/ucnv_err.cpp b/deps/icu-small/source/common/ucnv_err.cpp index 18218835a2..63794d2334 100644 --- a/deps/icu-small/source/common/ucnv_err.cpp +++ b/deps/icu-small/source/common/ucnv_err.cpp @@ -60,11 +60,12 @@ * To avoid dependency on other code, this list is hard coded here. * When an ignorable code point is found and is unmappable, the default callbacks * will ignore them. - * For a list of the default ignorable code points, use this link: http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[%3ADI%3A]&g= + * For a list of the default ignorable code points, use this link: + * https://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5B%3ADI%3A%5D&abb=on&g=&i= * * This list should be sync with the one in CharsetCallback.java */ -#define IS_DEFAULT_IGNORABLE_CODE_POINT(c) (\ +#define IS_DEFAULT_IGNORABLE_CODE_POINT(c) ( \ (c == 0x00AD) || \ (c == 0x034F) || \ (c == 0x061C) || \ @@ -74,26 +75,15 @@ (0x180B <= c && c <= 0x180E) || \ (0x200B <= c && c <= 0x200F) || \ (0x202A <= c && c <= 0x202E) || \ - (c == 0x2060) || \ - (0x2066 <= c && c <= 0x2069) || \ - (0x2061 <= c && c <= 0x2064) || \ - (0x206A <= c && c <= 0x206F) || \ + (0x2060 <= c && c <= 0x206F) || \ (c == 0x3164) || \ - (0x0FE00 <= c && c <= 0x0FE0F) || \ - (c == 0x0FEFF) || \ - (c == 0x0FFA0) || \ - (0x01BCA0 <= c && c <= 0x01BCA3) || \ - (0x01D173 <= c && c <= 0x01D17A) || \ - (c == 0x0E0001) || \ - (0x0E0020 <= c && c <= 0x0E007F) || \ - (0x0E0100 <= c && c <= 0x0E01EF) || \ - (c == 0x2065) || \ - (0x0FFF0 <= c && c <= 0x0FFF8) || \ - (c == 0x0E0000) || \ - (0x0E0002 <= c && c <= 0x0E001F) || \ - (0x0E0080 <= c && c <= 0x0E00FF) || \ - (0x0E01F0 <= c && c <= 0x0E0FFF) \ - ) + (0xFE00 <= c && c <= 0xFE0F) || \ + (c == 0xFEFF) || \ + (c == 0xFFA0) || \ + (0xFFF0 <= c && c <= 0xFFF8) || \ + (0x1BCA0 <= c && c <= 0x1BCA3) || \ + (0x1D173 <= c && c <= 0x1D17A) || \ + (0xE0000 <= c && c <= 0xE0FFF)) /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */ diff --git a/deps/icu-small/source/common/ucnv_u32.cpp b/deps/icu-small/source/common/ucnv_u32.cpp index 3fac04b300..ca8c6788d3 100644 --- a/deps/icu-small/source/common/ucnv_u32.cpp +++ b/deps/icu-small/source/common/ucnv_u32.cpp @@ -55,7 +55,7 @@ T_UConverter_toUnicode_UTF32_BE(UConverterToUnicodeArgs * args, uint32_t ch, i; /* Restore state of current sequence */ - if (args->converter->toUnicodeStatus && myTarget < targetLimit) { + if (args->converter->toULength > 0 && myTarget < targetLimit) { i = args->converter->toULength; /* restore # of bytes consumed */ args->converter->toULength = 0; @@ -136,7 +136,7 @@ T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(UConverterToUnicodeArgs * args, int32_t offsetNum = 0; /* Restore state of current sequence */ - if (args->converter->toUnicodeStatus && myTarget < targetLimit) { + if (args->converter->toULength > 0 && myTarget < targetLimit) { i = args->converter->toULength; /* restore # of bytes consumed */ args->converter->toULength = 0; @@ -517,7 +517,7 @@ T_UConverter_toUnicode_UTF32_LE(UConverterToUnicodeArgs * args, uint32_t ch, i; /* Restore state of current sequence */ - if (args->converter->toUnicodeStatus && myTarget < targetLimit) + if (args->converter->toULength > 0 && myTarget < targetLimit) { i = args->converter->toULength; /* restore # of bytes consumed */ args->converter->toULength = 0; @@ -604,7 +604,7 @@ T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(UConverterToUnicodeArgs * args, int32_t offsetNum = 0; /* Restore state of current sequence */ - if (args->converter->toUnicodeStatus && myTarget < targetLimit) + if (args->converter->toULength > 0 && myTarget < targetLimit) { i = args->converter->toULength; /* restore # of bytes consumed */ args->converter->toULength = 0; diff --git a/deps/icu-small/source/common/ucnv_u8.cpp b/deps/icu-small/source/common/ucnv_u8.cpp index c7ef87fd50..5a07244b02 100644 --- a/deps/icu-small/source/common/ucnv_u8.cpp +++ b/deps/icu-small/source/common/ucnv_u8.cpp @@ -76,7 +76,7 @@ static void U_CALLCONV ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args, int32_t i, inBytes; /* Restore size of current sequence */ - if (cnv->toUnicodeStatus && myTarget < targetLimit) + if (cnv->toULength > 0 && myTarget < targetLimit) { inBytes = cnv->mode; /* restore # of bytes to consume */ i = cnv->toULength; /* restore # of bytes consumed */ @@ -194,7 +194,7 @@ static void U_CALLCONV ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeAr int32_t i, inBytes; /* Restore size of current sequence */ - if (cnv->toUnicodeStatus && myTarget < targetLimit) + if (cnv->toULength > 0 && myTarget < targetLimit) { inBytes = cnv->mode; /* restore # of bytes to consume */ i = cnv->toULength; /* restore # of bytes consumed */ @@ -670,12 +670,13 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs, targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target); /* get the converter state from the UTF-8 UConverter */ - c=(UChar32)utf8->toUnicodeStatus; - if(c!=0) { + if(utf8->toULength > 0) { toULength=oldToULength=utf8->toULength; toULimit=(int8_t)utf8->mode; + c=(UChar32)utf8->toUnicodeStatus; } else { toULength=oldToULength=toULimit=0; + c = 0; } count=(int32_t)(sourceLimit-source)+oldToULength; @@ -695,36 +696,20 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs, // Use a single counter for source and target, counting the minimum of // the source length and the target capacity. // Let the standard converter handle edge cases. - const uint8_t *limit=sourceLimit; if(count>targetCapacity) { - limit-=(count-targetCapacity); count=targetCapacity; } - // The conversion loop checks count>0 only once per 1/2/3-byte character. - // If the buffer ends with a truncated 2- or 3-byte sequence, + // The conversion loop checks count>0 only once per character. + // If the buffer ends with a truncated sequence, // then we reduce the count to stop before that, // and collect the remaining bytes after the conversion loop. - { - // Do not go back into the bytes that will be read for finishing a partial - // sequence from the previous buffer. - int32_t length=count-toULimit; - if(length>0) { - uint8_t b1=*(limit-1); - if(U8_IS_SINGLE(b1)) { - // common ASCII character - } else if(U8_IS_TRAIL(b1) && length>=2) { - uint8_t b2=*(limit-2); - if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) { - // truncated 3-byte sequence - count-=2; - } - } else if(0xc2<=b1 && b1<0xf0) { - // truncated 2- or 3-byte sequence - --count; - } - } - } + + // Do not go back into the bytes that will be read for finishing a partial + // sequence from the previous buffer. + int32_t length=count-toULimit; + U8_TRUNCATE_IF_INCOMPLETE(source, 0, length); + count=toULimit+length; } if(c!=0) { @@ -814,7 +799,7 @@ moreBytes: } /* copy the legal byte sequence to the target */ - if(count>=toULength) { + { int8_t i; for(i=0; i<oldToULength; ++i) { @@ -825,14 +810,6 @@ moreBytes: *target++=*source++; } count-=toULength; - } else { - // A supplementary character that does not fit into the target. - // Let the standard converter handle this. - source-=(toULength-oldToULength); - pToUArgs->source=(char *)source; - pFromUArgs->target=(char *)target; - *pErrorCode=U_USING_DEFAULT_WARNING; - return; } } } @@ -856,8 +833,7 @@ moreBytes: utf8->toULength=toULength; utf8->mode=toULimit; break; - } else if(!U8_IS_TRAIL(b=*source)) { - /* lead byte in trail byte position */ + } else if(!icu::UTF8::isValidTrail(c, b=*source, toULength, toULimit)) { utf8->toULength=toULength; *pErrorCode=U_ILLEGAL_CHAR_FOUND; break; diff --git a/deps/icu-small/source/common/ucnvlat1.cpp b/deps/icu-small/source/common/ucnvlat1.cpp index 9855ebe6e7..15eeb5c51f 100644 --- a/deps/icu-small/source/common/ucnvlat1.cpp +++ b/deps/icu-small/source/common/ucnvlat1.cpp @@ -340,7 +340,11 @@ ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs, targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target); /* get the converter state from the UTF-8 UConverter */ - c=(UChar32)utf8->toUnicodeStatus; + if (utf8->toULength > 0) { + c=(UChar32)utf8->toUnicodeStatus; + } else { + c = 0; + } if(c!=0 && source<sourceLimit) { if(targetCapacity==0) { *pErrorCode=U_BUFFER_OVERFLOW_ERROR; @@ -620,7 +624,7 @@ ucnv_ASCIIFromUTF8(UConverterFromUnicodeArgs *pFromUArgs, uint8_t c; - if(pToUArgs->converter->toUnicodeStatus!=0) { + if(pToUArgs->converter->toULength > 0) { /* no handling of partial UTF-8 characters here, fall back to pivoting */ *pErrorCode=U_USING_DEFAULT_WARNING; return; diff --git a/deps/icu-small/source/common/ucnvmbcs.cpp b/deps/icu-small/source/common/ucnvmbcs.cpp index e5efa7fc1b..9052394b4f 100644 --- a/deps/icu-small/source/common/ucnvmbcs.cpp +++ b/deps/icu-small/source/common/ucnvmbcs.cpp @@ -5064,12 +5064,13 @@ ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs, hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY); /* get the converter state from the UTF-8 UConverter */ - c=(UChar32)utf8->toUnicodeStatus; - if(c!=0) { + if(utf8->toULength > 0) { toULength=oldToULength=utf8->toULength; toULimit=(int8_t)utf8->mode; + c=(UChar32)utf8->toUnicodeStatus; } else { toULength=oldToULength=toULimit=0; + c = 0; } // The conversion loop checks source<sourceLimit only once per 1/2/3-byte character. @@ -5359,12 +5360,13 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs, hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY); /* get the converter state from the UTF-8 UConverter */ - c=(UChar32)utf8->toUnicodeStatus; - if(c!=0) { + if(utf8->toULength > 0) { toULength=oldToULength=utf8->toULength; toULimit=(int8_t)utf8->mode; + c=(UChar32)utf8->toUnicodeStatus; } else { toULength=oldToULength=toULimit=0; + c = 0; } // The conversion loop checks source<sourceLimit only once per 1/2/3-byte character. diff --git a/deps/icu-small/source/common/ucurr.cpp b/deps/icu-small/source/common/ucurr.cpp index aa9d855f50..d1e5f62a9a 100644 --- a/deps/icu-small/source/common/ucurr.cpp +++ b/deps/icu-small/source/common/ucurr.cpp @@ -17,6 +17,7 @@ #include "unicode/ustring.h" #include "unicode/parsepos.h" #include "ustr_imp.h" +#include "charstr.h" #include "cmemory.h" #include "cstring.h" #include "uassert.h" @@ -28,9 +29,12 @@ #include "uinvchar.h" #include "uresimp.h" #include "ulist.h" +#include "uresimp.h" #include "ureslocs.h" #include "ulocimp.h" +using namespace icu; + //#define UCURR_DEBUG_EQUIV 1 #ifdef UCURR_DEBUG_EQUIV #include "stdio.h" @@ -104,6 +108,7 @@ static const char VAR_DELIM_STR[] = "_"; // Tag for localized display names (symbols) of currencies static const char CURRENCIES[] = "Currencies"; +static const char CURRENCIES_NARROW[] = "Currencies%narrow"; static const char CURRENCYPLURALS[] = "CurrencyPlurals"; static const UChar EUR_STR[] = {0x0045,0x0055,0x0052,0}; @@ -698,7 +703,7 @@ ucurr_getName(const UChar* currency, } int32_t choice = (int32_t) nameStyle; - if (choice < 0 || choice > 1) { + if (choice < 0 || choice > 2) { *ec = U_ILLEGAL_ARGUMENT_ERROR; return 0; } @@ -731,15 +736,19 @@ ucurr_getName(const UChar* currency, const UChar* s = NULL; ec2 = U_ZERO_ERROR; - UResourceBundle* rb = ures_open(U_ICUDATA_CURR, loc, &ec2); - - rb = ures_getByKey(rb, CURRENCIES, rb, &ec2); - - // Fetch resource with multi-level resource inheritance fallback - rb = ures_getByKeyWithFallback(rb, buf, rb, &ec2); - - s = ures_getStringByIndex(rb, choice, len, &ec2); - ures_close(rb); + LocalUResourceBundlePointer rb(ures_open(U_ICUDATA_CURR, loc, &ec2)); + + if (nameStyle == UCURR_NARROW_SYMBOL_NAME) { + CharString key; + key.append(CURRENCIES_NARROW, ec2); + key.append("/", ec2); + key.append(buf, ec2); + s = ures_getStringByKeyWithFallback(rb.getAlias(), key.data(), len, &ec2); + } else { + ures_getByKey(rb.getAlias(), CURRENCIES, rb.getAlias(), &ec2); + ures_getByKeyWithFallback(rb.getAlias(), buf, rb.getAlias(), &ec2); + s = ures_getStringByIndex(rb.getAlias(), choice, len, &ec2); + } // If we've succeeded we're done. Otherwise, try to fallback. // If that fails (because we are already at root) then exit. diff --git a/deps/icu-small/source/common/unicode/brkiter.h b/deps/icu-small/source/common/unicode/brkiter.h index c64bb71222..607f3ec625 100644 --- a/deps/icu-small/source/common/unicode/brkiter.h +++ b/deps/icu-small/source/common/unicode/brkiter.h @@ -298,15 +298,14 @@ public: virtual int32_t next(int32_t n) = 0; /** - * For RuleBasedBreakIterators, return the status tag from the - * break rule that determined the most recently - * returned break position. + * For RuleBasedBreakIterators, return the status tag from the break rule + * that determined the boundary at the current iteration position. * <p> * For break iterator types that do not support a rule status, * a default value of 0 is returned. * <p> - * @return the status from the break rule that determined the most recently - * returned break position. + * @return the status from the break rule that determined the boundary at + * the current iteration position. * @see RuleBaseBreakIterator::getRuleStatus() * @see UWordBreak * @stable ICU 52 @@ -315,7 +314,7 @@ public: /** * For RuleBasedBreakIterators, get the status (tag) values from the break rule(s) - * that determined the most recently returned break position. + * that determined the boundary at the current iteration position. * <p> * For break iterator types that do not support rule status, * no values are returned. @@ -334,7 +333,7 @@ public: * normal way, without attempting to store any values. * @param status receives error codes. * @return The number of rule status values from rules that determined - * the most recent boundary returned by the break iterator. + * the boundary at the current iteration position. * In the event of a U_BUFFER_OVERFLOW_ERROR, the return value * is the total number of status values that were available, * not the reduced number that were actually returned. @@ -616,7 +615,7 @@ public: virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0; private: - static BreakIterator* buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode& status); + static BreakIterator* buildInstance(const Locale& loc, const char *type, UErrorCode& status); static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status); static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status); diff --git a/deps/icu-small/source/common/unicode/bytestriebuilder.h b/deps/icu-small/source/common/unicode/bytestriebuilder.h index 0f9f5e2c06..7a806bb7f0 100644 --- a/deps/icu-small/source/common/unicode/bytestriebuilder.h +++ b/deps/icu-small/source/common/unicode/bytestriebuilder.h @@ -154,7 +154,6 @@ private: const char *s; }; - // don't use #ifndef U_HIDE_INTERNAL_API with private class members or virtual methods. virtual Node *createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length, Node *nextNode) const; diff --git a/deps/icu-small/source/common/unicode/casemap.h b/deps/icu-small/source/common/unicode/casemap.h index 4a4917bdca..4b77256d74 100644 --- a/deps/icu-small/source/common/unicode/casemap.h +++ b/deps/icu-small/source/common/unicode/casemap.h @@ -18,8 +18,6 @@ U_NAMESPACE_BEGIN -#ifndef U_HIDE_DRAFT_API - class BreakIterator; class ByteSink; class Edits; @@ -27,7 +25,7 @@ class Edits; /** * Low-level C++ case mapping functions. * - * @draft ICU 59 + * @stable ICU 59 */ class U_COMMON_API CaseMap U_FINAL : public UMemory { public: @@ -59,7 +57,7 @@ public: * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. * * @see u_strToLower - * @draft ICU 59 + * @stable ICU 59 */ static int32_t toLower( const char *locale, uint32_t options, @@ -95,7 +93,7 @@ public: * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. * * @see u_strToUpper - * @draft ICU 59 + * @stable ICU 59 */ static int32_t toUpper( const char *locale, uint32_t options, @@ -146,7 +144,7 @@ public: * * @see u_strToTitle * @see ucasemap_toTitle - * @draft ICU 59 + * @stable ICU 59 */ static int32_t toTitle( const char *locale, uint32_t options, BreakIterator *iter, @@ -188,7 +186,7 @@ public: * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. * * @see u_strFoldCase - * @draft ICU 59 + * @stable ICU 59 */ static int32_t fold( uint32_t options, @@ -196,6 +194,7 @@ public: char16_t *dest, int32_t destCapacity, Edits *edits, UErrorCode &errorCode); +#ifndef U_HIDE_DRAFT_API /** * Lowercases a UTF-8 string and optionally records edits. * Casing is locale-dependent and context-sensitive. @@ -318,6 +317,7 @@ public: uint32_t options, StringPiece src, ByteSink &sink, Edits *edits, UErrorCode &errorCode); +#endif // U_HIDE_DRAFT_API /** * Lowercases a UTF-8 string and optionally records edits. @@ -347,7 +347,7 @@ public: * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. * * @see ucasemap_utf8ToLower - * @draft ICU 59 + * @stable ICU 59 */ static int32_t utf8ToLower( const char *locale, uint32_t options, @@ -383,7 +383,7 @@ public: * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. * * @see ucasemap_utf8ToUpper - * @draft ICU 59 + * @stable ICU 59 */ static int32_t utf8ToUpper( const char *locale, uint32_t options, @@ -433,7 +433,7 @@ public: * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. * * @see ucasemap_utf8ToTitle - * @draft ICU 59 + * @stable ICU 59 */ static int32_t utf8ToTitle( const char *locale, uint32_t options, BreakIterator *iter, @@ -475,7 +475,7 @@ public: * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. * * @see ucasemap_utf8FoldCase - * @draft ICU 59 + * @stable ICU 59 */ static int32_t utf8Fold( uint32_t options, @@ -489,8 +489,6 @@ private: CaseMap &operator=(const CaseMap &other) = delete; }; -#endif // U_HIDE_DRAFT_API - U_NAMESPACE_END #endif // __CASEMAP_H__ diff --git a/deps/icu-small/source/common/unicode/char16ptr.h b/deps/icu-small/source/common/unicode/char16ptr.h index fbce177591..49d0e029a9 100644 --- a/deps/icu-small/source/common/unicode/char16ptr.h +++ b/deps/icu-small/source/common/unicode/char16ptr.h @@ -30,25 +30,23 @@ U_NAMESPACE_BEGIN # define U_ALIASING_BARRIER(ptr) asm volatile("" : : "rm"(ptr) : "memory") #endif -// Do not use #ifndef U_HIDE_DRAFT_API for the following class, it -// is now used in place of UChar* in several stable C++ methods /** * char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types. - * @draft ICU 59 + * @stable ICU 59 */ class U_COMMON_API Char16Ptr U_FINAL { public: /** * Copies the pointer. * @param p pointer - * @draft ICU 59 + * @stable ICU 59 */ inline Char16Ptr(char16_t *p); #if !U_CHAR16_IS_TYPEDEF /** * Converts the pointer to char16_t *. * @param p pointer to be converted - * @draft ICU 59 + * @stable ICU 59 */ inline Char16Ptr(uint16_t *p); #endif @@ -57,32 +55,32 @@ public: * Converts the pointer to char16_t *. * (Only defined if U_SIZEOF_WCHAR_T==2.) * @param p pointer to be converted - * @draft ICU 59 + * @stable ICU 59 */ inline Char16Ptr(wchar_t *p); #endif /** * nullptr constructor. * @param p nullptr - * @draft ICU 59 + * @stable ICU 59 */ inline Char16Ptr(std::nullptr_t p); /** * Destructor. - * @draft ICU 59 + * @stable ICU 59 */ inline ~Char16Ptr(); /** * Pointer access. * @return the wrapped pointer - * @draft ICU 59 + * @stable ICU 59 */ inline char16_t *get() const; /** * char16_t pointer access via type conversion (e.g., static_cast). * @return the wrapped pointer - * @draft ICU 59 + * @stable ICU 59 */ inline operator char16_t *() const { return get(); } @@ -137,25 +135,23 @@ char16_t *Char16Ptr::get() const { return u_.cp; } #endif -// Do not use #ifndef U_HIDE_DRAFT_API for the following class, it is -// now used in place of const UChar* in several stable C++ methods /** * const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types. - * @draft ICU 59 + * @stable ICU 59 */ class U_COMMON_API ConstChar16Ptr U_FINAL { public: /** * Copies the pointer. * @param p pointer - * @draft ICU 59 + * @stable ICU 59 */ inline ConstChar16Ptr(const char16_t *p); #if !U_CHAR16_IS_TYPEDEF /** * Converts the pointer to char16_t *. * @param p pointer to be converted - * @draft ICU 59 + * @stable ICU 59 */ inline ConstChar16Ptr(const uint16_t *p); #endif @@ -164,33 +160,33 @@ public: * Converts the pointer to char16_t *. * (Only defined if U_SIZEOF_WCHAR_T==2.) * @param p pointer to be converted - * @draft ICU 59 + * @stable ICU 59 */ inline ConstChar16Ptr(const wchar_t *p); #endif /** * nullptr constructor. * @param p nullptr - * @draft ICU 59 + * @stable ICU 59 */ inline ConstChar16Ptr(const std::nullptr_t p); /** * Destructor. - * @draft ICU 59 + * @stable ICU 59 */ inline ~ConstChar16Ptr(); /** * Pointer access. * @return the wrapped pointer - * @draft ICU 59 + * @stable ICU 59 */ inline const char16_t *get() const; /** * char16_t pointer access via type conversion (e.g., static_cast). * @return the wrapped pointer - * @draft ICU 59 + * @stable ICU 59 */ inline operator const char16_t *() const { return get(); } @@ -250,7 +246,7 @@ const char16_t *ConstChar16Ptr::get() const { return u_.cp; } * Includes an aliasing barrier if available. * @param p pointer * @return p as const UChar * - * @draft ICU 59 + * @stable ICU 59 */ inline const UChar *toUCharPtr(const char16_t *p) { #ifdef U_ALIASING_BARRIER @@ -264,7 +260,7 @@ inline const UChar *toUCharPtr(const char16_t *p) { * Includes an aliasing barrier if available. * @param p pointer * @return p as UChar * - * @draft ICU 59 + * @stable ICU 59 */ inline UChar *toUCharPtr(char16_t *p) { #ifdef U_ALIASING_BARRIER @@ -278,7 +274,7 @@ inline UChar *toUCharPtr(char16_t *p) { * Includes an aliasing barrier if available. * @param p pointer * @return p as const OldUChar * - * @draft ICU 59 + * @stable ICU 59 */ inline const OldUChar *toOldUCharPtr(const char16_t *p) { #ifdef U_ALIASING_BARRIER @@ -292,7 +288,7 @@ inline const OldUChar *toOldUCharPtr(const char16_t *p) { * Includes an aliasing barrier if available. * @param p pointer * @return p as OldUChar * - * @draft ICU 59 + * @stable ICU 59 */ inline OldUChar *toOldUCharPtr(char16_t *p) { #ifdef U_ALIASING_BARRIER diff --git a/deps/icu-small/source/common/unicode/chariter.h b/deps/icu-small/source/common/unicode/chariter.h index dbed89dbe6..292794f6d6 100644 --- a/deps/icu-small/source/common/unicode/chariter.h +++ b/deps/icu-small/source/common/unicode/chariter.h @@ -569,7 +569,7 @@ public: * Returns the numeric index in the underlying text-storage * object of the character the iterator currently refers to * (i.e., the character returned by current()). - * @return the numberic index in the text-storage object of + * @return the numeric index in the text-storage object of * the character the iterator currently refers to * @stable ICU 2.0 */ diff --git a/deps/icu-small/source/common/unicode/dtintrv.h b/deps/icu-small/source/common/unicode/dtintrv.h index 2221b36c9b..c99011e26c 100644 --- a/deps/icu-small/source/common/unicode/dtintrv.h +++ b/deps/icu-small/source/common/unicode/dtintrv.h @@ -69,7 +69,7 @@ public: * <pre> * . Base* polymorphic_pointer = createPolymorphicObject(); * . if (polymorphic_pointer->getDynamicClassID() == - * . erived::getStaticClassID()) ... + * . derived::getStaticClassID()) ... * </pre> * @return The class ID for all objects of this class. * @stable ICU 4.0 diff --git a/deps/icu-small/source/common/unicode/edits.h b/deps/icu-small/source/common/unicode/edits.h index 082c3733a8..5a72574c14 100644 --- a/deps/icu-small/source/common/unicode/edits.h +++ b/deps/icu-small/source/common/unicode/edits.h @@ -17,8 +17,6 @@ U_NAMESPACE_BEGIN -#ifndef U_HIDE_DRAFT_API - /** * Records lengths of string edits but not replacement text. * Supports replacements, insertions, deletions in linear progression. @@ -27,13 +25,13 @@ U_NAMESPACE_BEGIN * An Edits object tracks a separate UErrorCode, but ICU string transformation functions * (e.g., case mapping functions) merge any such errors into their API's UErrorCode. * - * @draft ICU 59 + * @stable ICU 59 */ class U_COMMON_API Edits U_FINAL : public UMemory { public: /** * Constructs an empty object. - * @draft ICU 59 + * @stable ICU 59 */ Edits() : array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0), numChanges(0), @@ -64,7 +62,7 @@ public: /** * Destructor. - * @draft ICU 59 + * @stable ICU 59 */ ~Edits(); @@ -88,20 +86,20 @@ public: /** * Resets the data but may not release memory. - * @draft ICU 59 + * @stable ICU 59 */ void reset() U_NOEXCEPT; /** * Adds a record for an unchanged segment of text. * Normally called from inside ICU string transformation functions, not user code. - * @draft ICU 59 + * @stable ICU 59 */ void addUnchanged(int32_t unchangedLength); /** * Adds a record for a text replacement/insertion/deletion. * Normally called from inside ICU string transformation functions, not user code. - * @draft ICU 59 + * @stable ICU 59 */ void addReplace(int32_t oldLength, int32_t newLength); /** @@ -112,33 +110,35 @@ public: * and an error occurred while recording edits. * Otherwise unchanged. * @return TRUE if U_FAILURE(outErrorCode) - * @draft ICU 59 + * @stable ICU 59 */ UBool copyErrorTo(UErrorCode &outErrorCode); /** * How much longer is the new text compared with the old text? * @return new length minus old length - * @draft ICU 59 + * @stable ICU 59 */ int32_t lengthDelta() const { return delta; } /** * @return TRUE if there are any change edits - * @draft ICU 59 + * @stable ICU 59 */ UBool hasChanges() const { return numChanges != 0; } +#ifndef U_HIDE_DRAFT_API /** * @return the number of change edits * @draft ICU 60 */ int32_t numberOfChanges() const { return numChanges; } +#endif // U_HIDE_DRAFT_API /** * Access to the list of edits. * @see getCoarseIterator * @see getFineIterator - * @draft ICU 59 + * @stable ICU 59 */ struct U_COMMON_API Iterator U_FINAL : public UMemory { /** @@ -152,12 +152,12 @@ public: srcIndex(0), replIndex(0), destIndex(0) {} /** * Copy constructor. - * @draft ICU 59 + * @stable ICU 59 */ Iterator(const Iterator &other) = default; /** * Assignment operator. - * @draft ICU 59 + * @stable ICU 59 */ Iterator &operator=(const Iterator &other) = default; @@ -167,7 +167,7 @@ public: * or else the function returns immediately. Check for U_FAILURE() * on output or use with function chaining. (See User Guide for details.) * @return TRUE if there is another edit - * @draft ICU 59 + * @stable ICU 59 */ UBool next(UErrorCode &errorCode) { return next(onlyChanges_, errorCode); } @@ -188,12 +188,13 @@ public: * or else the function returns immediately. Check for U_FAILURE() * on output or use with function chaining. (See User Guide for details.) * @return TRUE if the edit for the source index was found - * @draft ICU 59 + * @stable ICU 59 */ UBool findSourceIndex(int32_t i, UErrorCode &errorCode) { return findIndex(i, TRUE, errorCode) == 0; } +#ifndef U_HIDE_DRAFT_API /** * Finds the edit that contains the destination index. * The destination index may be found in a non-change @@ -264,39 +265,40 @@ public: * @draft ICU 60 */ int32_t sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode); +#endif // U_HIDE_DRAFT_API /** * @return TRUE if this edit replaces oldLength() units with newLength() different ones. * FALSE if oldLength units remain unchanged. - * @draft ICU 59 + * @stable ICU 59 */ UBool hasChange() const { return changed; } /** * @return the number of units in the original string which are replaced or remain unchanged. - * @draft ICU 59 + * @stable ICU 59 */ int32_t oldLength() const { return oldLength_; } /** * @return the number of units in the modified string, if hasChange() is TRUE. * Same as oldLength if hasChange() is FALSE. - * @draft ICU 59 + * @stable ICU 59 */ int32_t newLength() const { return newLength_; } /** * @return the current index into the source string - * @draft ICU 59 + * @stable ICU 59 */ int32_t sourceIndex() const { return srcIndex; } /** * @return the current index into the replacement-characters-only string, * not counting unchanged spans - * @draft ICU 59 + * @stable ICU 59 */ int32_t replacementIndex() const { return replIndex; } /** * @return the current index into the full destination string - * @draft ICU 59 + * @stable ICU 59 */ int32_t destinationIndex() const { return destIndex; } @@ -331,7 +333,7 @@ public: * Returns an Iterator for coarse-grained changes for simple string updates. * Skips non-changes. * @return an Iterator that merges adjacent changes. - * @draft ICU 59 + * @stable ICU 59 */ Iterator getCoarseChangesIterator() const { return Iterator(array, length, TRUE, TRUE); @@ -340,7 +342,7 @@ public: /** * Returns an Iterator for coarse-grained changes and non-changes for simple string updates. * @return an Iterator that merges adjacent changes. - * @draft ICU 59 + * @stable ICU 59 */ Iterator getCoarseIterator() const { return Iterator(array, length, FALSE, TRUE); @@ -350,7 +352,7 @@ public: * Returns an Iterator for fine-grained changes for modifying styled text. * Skips non-changes. * @return an Iterator that separates adjacent changes. - * @draft ICU 59 + * @stable ICU 59 */ Iterator getFineChangesIterator() const { return Iterator(array, length, TRUE, FALSE); @@ -359,12 +361,13 @@ public: /** * Returns an Iterator for fine-grained changes and non-changes for modifying styled text. * @return an Iterator that separates adjacent changes. - * @draft ICU 59 + * @stable ICU 59 */ Iterator getFineIterator() const { return Iterator(array, length, FALSE, FALSE); } +#ifndef U_HIDE_DRAFT_API /** * Merges the two input Edits and appends the result to this object. * @@ -393,6 +396,7 @@ public: * @draft ICU 60 */ Edits &mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode); +#endif // U_HIDE_DRAFT_API private: void releaseArray() U_NOEXCEPT; @@ -415,8 +419,6 @@ private: uint16_t stackArray[STACK_CAPACITY]; }; -#endif // U_HIDE_DRAFT_API - U_NAMESPACE_END #endif // __EDITS_H__ diff --git a/deps/icu-small/source/common/unicode/filteredbrk.h b/deps/icu-small/source/common/unicode/filteredbrk.h index a0319bf0a7..751d1faf40 100644 --- a/deps/icu-small/source/common/unicode/filteredbrk.h +++ b/deps/icu-small/source/common/unicode/filteredbrk.h @@ -64,9 +64,7 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject { * @deprecated ICU 60 use createEmptyInstance instead * @see createEmptyInstance() */ - static inline FilteredBreakIteratorBuilder *createInstance(UErrorCode &status) { - return createEmptyInstance(status); - } + static FilteredBreakIteratorBuilder *createInstance(UErrorCode &status); #endif /* U_HIDE_DEPRECATED_API */ #ifndef U_HIDE_DRAFT_API @@ -105,7 +103,6 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject { */ virtual UBool unsuppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0; -#ifndef U_HIDE_DEPRECATED_API /** * This function has been deprecated in favor of wrapIteratorWithFilter() * The behavior is identical. @@ -116,7 +113,6 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject { * @see wrapBreakIteratorWithFilter() */ virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0; -#endif /* U_HIDE_DEPRECATED_API */ #ifndef U_HIDE_DRAFT_API /** diff --git a/deps/icu-small/source/common/unicode/locid.h b/deps/icu-small/source/common/unicode/locid.h index c752344f33..c84774e07f 100644 --- a/deps/icu-small/source/common/unicode/locid.h +++ b/deps/icu-small/source/common/unicode/locid.h @@ -353,7 +353,7 @@ public: * the default locale ID of the runtime environment. * * @param newLocale Locale to set to. If NULL, set to the value obtained - * from the runtime environement. + * from the runtime environment. * @param success The error code. * @system * @stable ICU 2.0 @@ -629,7 +629,7 @@ public: /** * Fills in "name" with the name of this locale in a format suitable for user display - * in the locale specfied by "displayLocale". This function uses getDisplayLanguage(), + * in the locale specified by "displayLocale". This function uses getDisplayLanguage(), * getDisplayCountry(), and getDisplayVariant() to do its work, and outputs the display * name in the format "language (country[,variant])". For example, if displayLocale is * fr_FR, then en_US's display name would be "Anglais (États-Unis)", and no_NO_NY's diff --git a/deps/icu-small/source/common/unicode/parseerr.h b/deps/icu-small/source/common/unicode/parseerr.h index c8283bfcc9..c05487601c 100644 --- a/deps/icu-small/source/common/unicode/parseerr.h +++ b/deps/icu-small/source/common/unicode/parseerr.h @@ -58,9 +58,9 @@ enum { U_PARSE_CONTEXT_LEN = 16 }; typedef struct UParseError { /** - * The line on which the error occured. If the parser uses this + * The line on which the error occurred. If the parser uses this * field, it sets it to the line number of the source text line on - * which the error appears, which will be be a value >= 1. If the + * which the error appears, which will be a value >= 1. If the * parse does not support line numbers, the value will be <= 0. * @stable ICU 2.0 */ diff --git a/deps/icu-small/source/common/unicode/platform.h b/deps/icu-small/source/common/unicode/platform.h index 12e2929d24..a3f8d32f89 100644 --- a/deps/icu-small/source/common/unicode/platform.h +++ b/deps/icu-small/source/common/unicode/platform.h @@ -482,9 +482,9 @@ /* Otherwise use the predefined value. */ #elif !defined(__cplusplus) # define U_CPLUSPLUS_VERSION 0 -#elif __cplusplus >= 201402L +#elif __cplusplus >= 201402L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201402L) # define U_CPLUSPLUS_VERSION 14 -#elif __cplusplus >= 201103L +#elif __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L) # define U_CPLUSPLUS_VERSION 11 #else // C++98 or C++03 @@ -631,7 +631,7 @@ namespace std { */ #ifdef U_CHARSET_IS_UTF8 /* Use the predefined value. */ -#elif U_PLATFORM == U_PF_ANDROID || U_PLATFORM_IS_DARWIN_BASED +#elif U_PLATFORM_IS_LINUX_BASED || U_PLATFORM_IS_DARWIN_BASED # define U_CHARSET_IS_UTF8 1 #else # define U_CHARSET_IS_UTF8 0 @@ -749,8 +749,10 @@ namespace std { #else /* * Notes: - * Visual Studio 10 (_MSC_VER>=1600) defines char16_t but - * does not support u"abc" string literals. + * Visual Studio 2010 (_MSC_VER==1600) defines char16_t as a typedef + * and does not support u"abc" string literals. + * Visual Studio 2015 (_MSC_VER>=1900) and above adds support for + * both char16_t and u"abc" string literals. * gcc 4.4 defines the __CHAR16_TYPE__ macro to a usable type but * does not support u"abc" string literals. * C++11 and C11 require support for UTF-16 literals diff --git a/deps/icu-small/source/common/unicode/putil.h b/deps/icu-small/source/common/unicode/putil.h index 91d6bb10f7..14bb99ccc5 100644 --- a/deps/icu-small/source/common/unicode/putil.h +++ b/deps/icu-small/source/common/unicode/putil.h @@ -38,7 +38,7 @@ /** * Platform utilities isolates the platform dependencies of the - * libarary. For each platform which this code is ported to, these + * library. For each platform which this code is ported to, these * functions may have to be re-implemented. */ @@ -53,7 +53,7 @@ * The data directory is determined as follows: * If u_setDataDirectory() has been called, that is it, otherwise * if the ICU_DATA environment variable is set, use that, otherwise - * If a data directory was specifed at ICU build time + * If a data directory was specified at ICU build time * <code> * \code * #define ICU_DATA_DIR "path" @@ -93,7 +93,7 @@ U_STABLE void U_EXPORT2 u_setDataDirectory(const char *directory); #ifndef U_HIDE_INTERNAL_API /** * Return the time zone files override directory, or an empty string if - * no directory was specified. Certain time zone resources will be preferrentially + * no directory was specified. Certain time zone resources will be preferentially * loaded from individual files in this directory. * * @return the time zone data override directory. diff --git a/deps/icu-small/source/common/unicode/rbbi.h b/deps/icu-small/source/common/unicode/rbbi.h index c3c201dd35..0c41d69d23 100644 --- a/deps/icu-small/source/common/unicode/rbbi.h +++ b/deps/icu-small/source/common/unicode/rbbi.h @@ -29,7 +29,6 @@ #include "unicode/udata.h" #include "unicode/parseerr.h" #include "unicode/schriter.h" -#include "unicode/uchriter.h" U_NAMESPACE_BEGIN @@ -58,34 +57,18 @@ private: * The UText through which this BreakIterator accesses the text * @internal */ - UText *fText; - - /** - * A character iterator that refers to the same text as the UText, above. - * Only included for compatibility with old API, which was based on CharacterIterators. - * Value may be adopted from outside, or one of fSCharIter or fDCharIter, below. - */ - CharacterIterator *fCharIter; - - /** - * When the input text is provided by a UnicodeString, this will point to - * a characterIterator that wraps that data. Needed only for the - * implementation of getText(), a backwards compatibility issue. - */ - StringCharacterIterator *fSCharIter; - - /** - * When the input text is provided by a UText, this - * dummy CharacterIterator over an empty string will - * be returned from getText() - */ - UCharCharacterIterator *fDCharIter; + UText fText; +#ifndef U_HIDE_INTERNAL_API +public: +#endif /* U_HIDE_INTERNAL_API */ /** - * The rule data for this BreakIterator instance + * The rule data for this BreakIterator instance. + * Not for general use; Public only for testing purposes. * @internal */ RBBIDataWrapper *fData; +private: /** * The iteration state - current position, rule status for the current position, @@ -106,23 +89,10 @@ private: int32_t fRuleStatusIndex; /** - * True when iteration has run off the end, and iterator functions should return UBRK_DONE. - */ - UBool fDone; - - /** * Cache of previously determined boundary positions. */ - public: // TODO: debug, return to private. class BreakCache; BreakCache *fBreakCache; - private: - /** - * Counter for the number of characters encountered with the "dictionary" - * flag set. - * @internal - */ - uint32_t fDictionaryCharCount; /** * Cache of boundary positions within a region of text that has been @@ -150,11 +120,30 @@ private: UnhandledEngine *fUnhandledBreakEngine; /** - * - * The type of the break iterator, or -1 if it has not been set. + * Counter for the number of characters encountered with the "dictionary" + * flag set. * @internal */ - int32_t fBreakType; + uint32_t fDictionaryCharCount; + + /** + * A character iterator that refers to the same text as the UText, above. + * Only included for compatibility with old API, which was based on CharacterIterators. + * Value may be adopted from outside, or one of fSCharIter or fDCharIter, below. + */ + CharacterIterator *fCharIter; + + /** + * When the input text is provided by a UnicodeString, this will point to + * a characterIterator that wraps that data. Needed only for the + * implementation of getText(), a backwards compatibility issue. + */ + StringCharacterIterator fSCharIter; + + /** + * True when iteration has run off the end, and iterator functions should return UBRK_DONE. + */ + UBool fDone; //======================================================================= // constructors @@ -206,17 +195,17 @@ public: UErrorCode &status); /** - * Contruct a RuleBasedBreakIterator from a set of precompiled binary rules. + * Construct a RuleBasedBreakIterator from a set of precompiled binary rules. * Binary rules are obtained from RulesBasedBreakIterator::getBinaryRules(). * Construction of a break iterator in this way is substantially faster than - * constuction from source rules. + * construction from source rules. * * Ownership of the storage containing the compiled rules remains with the * caller of this function. The compiled rules must not be modified or * deleted during the life of the break iterator. * * The compiled rules are not compatible across different major versions of ICU. - * The compiled rules are comaptible only between machines with the same + * The compiled rules are compatible only between machines with the same * byte ordering (little or big endian) and the same base character set family * (ASCII or EBCDIC). * @@ -285,7 +274,7 @@ public: * behavior, and iterating over the same text, as this one. * Differs from the copy constructor in that it is polymorphic, and * will correctly clone (copy) a derived class. - * clone() is thread safe. Multiple threads may simultaeneously + * clone() is thread safe. Multiple threads may simultaneously * clone the same source break iterator. * @return a newly-constructed RuleBasedBreakIterator * @stable ICU 2.0 @@ -450,7 +439,7 @@ public: virtual int32_t preceding(int32_t offset); /** - * Returns true if the specfied position is a boundary position. As a side + * Returns true if the specified position is a boundary position. As a side * effect, leaves the iterator pointing to the first boundary position at * or after "offset". * @param offset the offset to check. @@ -471,8 +460,8 @@ public: /** - * Return the status tag from the break rule that determined the most recently - * returned break position. For break rules that do not specify a + * Return the status tag from the break rule that determined the boundary at + * the current iteration position. For break rules that do not specify a * status, a default value of 0 is returned. If more than one break rule * would cause a boundary to be located at some position in the text, * the numerically largest of the applicable status values is returned. @@ -489,16 +478,14 @@ public: * position from <code>next()</code>, <code>previous()</code>, or * any other break iterator functions that returns a boundary position. * <p> + * Note that <code>getRuleStatus()</code> returns the value corresponding to + * <code>current()</code> index even after <code>next()</code> has returned DONE. + * <p> * When creating custom break rules, one is free to define whatever * status values may be convenient for the application. * <p> - * Note: this function is not thread safe. It should not have been - * declared const, and the const remains only for compatibility - * reasons. (The function is logically const, but not bit-wise const). - * TODO: check this. Probably thread safe now. - * <p> - * @return the status from the break rule that determined the most recently - * returned break position. + * @return the status from the break rule that determined the boundary + * at the current iteration position. * * @see UWordBreak * @stable ICU 2.2 @@ -506,8 +493,8 @@ public: virtual int32_t getRuleStatus() const; /** - * Get the status (tag) values from the break rule(s) that determined the most - * recently returned break position. + * Get the status (tag) values from the break rule(s) that determined the boundary + * at the current iteration position. * <p> * The returned status value(s) are stored into an array provided by the caller. * The values are stored in sorted (ascending) order. @@ -518,10 +505,10 @@ public: * @param fillInVec an array to be filled in with the status values. * @param capacity the length of the supplied vector. A length of zero causes * the function to return the number of status values, in the - * normal way, without attemtping to store any values. + * normal way, without attempting to store any values. * @param status receives error codes. - * @return The number of rule status values from rules that determined - * the most recent boundary returned by the break iterator. + * @return The number of rule status values from the rules that determined + * the boundary at the current iteration position. * In the event of a U_BUFFER_OVERFLOW_ERROR, the return value * is the total number of status values that were available, * not the reduced number that were actually returned. @@ -561,7 +548,7 @@ public: * * Create a clone (copy) of this break iterator in memory provided * by the caller. The idea is to increase performance by avoiding - * a storage allocation. Use of this functoin is NOT RECOMMENDED. + * a storage allocation. Use of this function is NOT RECOMMENDED. * Performance gains are minimal, and correct buffer management is * tricky. Use clone() instead. * @@ -574,7 +561,7 @@ public: * storage for the cloned object. * * @param status Error status. U_SAFECLONE_ALLOCATED_WARNING will be - * returned if the the provided buffer was too small, and + * returned if the provided buffer was too small, and * the clone was therefore put on the heap. * * @return Pointer to the clone object. This may differ from the stackBuffer @@ -597,7 +584,7 @@ public: * The binary data can only be used with the same version of ICU * and on the same platform type (processor endian-ness) * - * @param length Returns the length of the binary data. (Out paramter.) + * @param length Returns the length of the binary data. (Out parameter.) * * @return A pointer to the binary (compiled) rule data. The storage * belongs to the RulesBasedBreakIterator object, not the @@ -646,12 +633,6 @@ private: void reset(void); /** - * Set the type of the break iterator. - * @internal - */ - void setBreakType(int32_t type); - - /** * Common initialization function, used by constructors and bufferClone. * @internal */ @@ -697,6 +678,13 @@ private: * @internal */ void dumpCache(); + + /** + * Debugging function only. + * @internal + */ + void dumpTables(); + #endif /* U_HIDE_INTERNAL_API */ }; diff --git a/deps/icu-small/source/common/unicode/resbund.h b/deps/icu-small/source/common/unicode/resbund.h index 358ed7eeb9..ab0b60bbb2 100644 --- a/deps/icu-small/source/common/unicode/resbund.h +++ b/deps/icu-small/source/common/unicode/resbund.h @@ -132,7 +132,7 @@ public: ResourceBundle(UErrorCode &err); /** - * Standard constructor, onstructs a resource bundle for the locale-specific + * Standard constructor, constructs a resource bundle for the locale-specific * bundle in the specified package. * * @param packageName The packageName and locale together point to an ICU udata object, diff --git a/deps/icu-small/source/common/unicode/schriter.h b/deps/icu-small/source/common/unicode/schriter.h index d83a57f8d0..1a12769e8d 100644 --- a/deps/icu-small/source/common/unicode/schriter.h +++ b/deps/icu-small/source/common/unicode/schriter.h @@ -69,7 +69,7 @@ public: * Create an iterator over the UnicodeString referred to by "textStr". * The UnicodeString object is copied. * The iteration range begins with the code unit specified by - * "textBegin" and ends with the code unit BEFORE the code unit specfied + * "textBegin" and ends with the code unit BEFORE the code unit specified * by "textEnd". The starting position is specified by "textPos". If * "textBegin" and "textEnd" don't form a valid range on "text" (i.e., * textBegin >= textEnd or either is negative or greater than text.size()), diff --git a/deps/icu-small/source/common/unicode/ubidi.h b/deps/icu-small/source/common/unicode/ubidi.h index ef21f24206..254a5bf9ef 100644 --- a/deps/icu-small/source/common/unicode/ubidi.h +++ b/deps/icu-small/source/common/unicode/ubidi.h @@ -692,7 +692,7 @@ typedef enum UBiDiReorderingMode { * @stable ICU 3.6 */ UBIDI_REORDER_DEFAULT = 0, /** Logical to Visual algorithm which handles numbers in a way which - * mimicks the behavior of Windows XP. + * mimics the behavior of Windows XP. * @stable ICU 3.6 */ UBIDI_REORDER_NUMBERS_SPECIAL, /** Logical to Visual algorithm grouping numbers with adjacent R characters @@ -1142,7 +1142,7 @@ ubidi_setContext(UBiDi *pBiDi, /** * Perform the Unicode Bidi algorithm. It is defined in the - * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Anned #9</a>, + * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>, * version 13, * also described in The Unicode Standard, Version 4.0 .<p> * diff --git a/deps/icu-small/source/common/unicode/ubrk.h b/deps/icu-small/source/common/unicode/ubrk.h index 600328c49c..73c1553b24 100644 --- a/deps/icu-small/source/common/unicode/ubrk.h +++ b/deps/icu-small/source/common/unicode/ubrk.h @@ -268,7 +268,6 @@ ubrk_openRules(const UChar *rules, UParseError *parseErr, UErrorCode *status); -#ifndef U_HIDE_DRAFT_API /** * Open a new UBreakIterator for locating text boundaries using precompiled binary rules. * Opening a UBreakIterator this way is substantially faster than using ubrk_openRules. @@ -287,15 +286,13 @@ ubrk_openRules(const UChar *rules, * @param status Pointer to UErrorCode to receive any errors. * @return UBreakIterator for the specified rules. * @see ubrk_getBinaryRules - * @draft ICU 59 + * @stable ICU 59 */ -U_DRAFT UBreakIterator* U_EXPORT2 +U_STABLE UBreakIterator* U_EXPORT2 ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength, const UChar * text, int32_t textLength, UErrorCode * status); -#endif /* U_HIDE_DRAFT_API */ - /** * Thread safe cloning operation * @param bi iterator to be cloned @@ -510,7 +507,7 @@ ubrk_countAvailable(void); /** -* Returns true if the specfied position is a boundary position. As a side +* Returns true if the specified position is a boundary position. As a side * effect, leaves the iterator pointing to the first boundary position at * or after "offset". * @param bi The break iterator to use. @@ -544,7 +541,7 @@ ubrk_getRuleStatus(UBreakIterator *bi); * @param fillInVec an array to be filled in with the status values. * @param capacity the length of the supplied vector. A length of zero causes * the function to return the number of status values, in the - * normal way, without attemtping to store any values. + * normal way, without attempting to store any values. * @param status receives error codes. * @return The number of rule status values from rules that determined * the most recent boundary returned by the break iterator. @@ -596,7 +593,6 @@ ubrk_refreshUText(UBreakIterator *bi, UErrorCode *status); -#ifndef U_HIDE_DRAFT_API /** * Get a compiled binary version of the rules specifying the behavior of a UBreakIterator. * The binary rules may be used with ubrk_openBinaryRules to open a new UBreakIterator @@ -620,15 +616,13 @@ ubrk_refreshUText(UBreakIterator *bi, * otherwise 0. If not preflighting and this is larger than * rulesCapacity, *status will be set to an error. * @see ubrk_openBinaryRules - * @draft ICU 59 + * @stable ICU 59 */ -U_DRAFT int32_t U_EXPORT2 +U_STABLE int32_t U_EXPORT2 ubrk_getBinaryRules(UBreakIterator *bi, uint8_t * binaryRules, int32_t rulesCapacity, UErrorCode * status); -#endif /* U_HIDE_DRAFT_API */ - #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ #endif diff --git a/deps/icu-small/source/common/unicode/uchar.h b/deps/icu-small/source/common/unicode/uchar.h index 3613374d9a..4b72ecfc26 100644 --- a/deps/icu-small/source/common/unicode/uchar.h +++ b/deps/icu-small/source/common/unicode/uchar.h @@ -112,11 +112,11 @@ U_CDECL_BEGIN * Comparison: * - u_isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property; * most of general categories "Z" (separators) + most whitespace ISO controls - * (including no-break spaces, but excluding IS1..IS4 and ZWSP) + * (including no-break spaces, but excluding IS1..IS4) * - u_isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces * - u_isJavaSpaceChar: Java isSpaceChar; just Z (including no-break spaces) * - u_isspace: Z + whitespace ISO controls (including no-break spaces) - * - u_isblank: "horizontal spaces" = TAB + Zs - ZWSP + * - u_isblank: "horizontal spaces" = TAB + Zs */ /** @@ -2702,8 +2702,7 @@ u_isgraph(UChar32 c); * * same as * - * TRUE for U+0009 (TAB) and characters with general category "Zs" (space separators) - * except Zero Width Space (ZWSP, U+200B). + * TRUE for U+0009 (TAB) and characters with general category "Zs" (space separators). * * Note: There are several ICU whitespace functions; please see the uchar.h * file documentation for a detailed comparison. diff --git a/deps/icu-small/source/common/unicode/uclean.h b/deps/icu-small/source/common/unicode/uclean.h index 3f73af37b8..ab0cd6da6b 100644 --- a/deps/icu-small/source/common/unicode/uclean.h +++ b/deps/icu-small/source/common/unicode/uclean.h @@ -70,7 +70,7 @@ u_init(UErrorCode *status); * This has the effect of restoring ICU to its initial condition, before * any of these override functions were installed. Refer to * u_setMemoryFunctions(), u_setMutexFunctions and - * utrace_setFunctions(). If ICU is to be reinitialized after after + * utrace_setFunctions(). If ICU is to be reinitialized after * calling u_cleanup(), these runtime override functions will need to * be set up again if they are still required. * <p> @@ -104,7 +104,7 @@ u_cleanup(void); U_CDECL_BEGIN /** * Pointer type for a user supplied memory allocation function. - * @param context user supplied value, obtained from from u_setMemoryFunctions(). + * @param context user supplied value, obtained from u_setMemoryFunctions(). * @param size The number of bytes to be allocated * @return Pointer to the newly allocated memory, or NULL if the allocation failed. * @stable ICU 2.8 @@ -113,7 +113,7 @@ U_CDECL_BEGIN typedef void *U_CALLCONV UMemAllocFn(const void *context, size_t size); /** * Pointer type for a user supplied memory re-allocation function. - * @param context user supplied value, obtained from from u_setMemoryFunctions(). + * @param context user supplied value, obtained from u_setMemoryFunctions(). * @param size The number of bytes to be allocated * @return Pointer to the newly allocated memory, or NULL if the allocation failed. * @stable ICU 2.8 @@ -123,7 +123,7 @@ typedef void *U_CALLCONV UMemReallocFn(const void *context, void *mem, size_t si /** * Pointer type for a user supplied memory free function. Behavior should be * similar the standard C library free(). - * @param context user supplied value, obtained from from u_setMemoryFunctions(). + * @param context user supplied value, obtained from u_setMemoryFunctions(). * @param mem Pointer to the memory block to be resized * @param size The new size for the block * @return Pointer to the resized memory block, or NULL if the resizing failed. @@ -179,8 +179,8 @@ U_CDECL_BEGIN * The user-supplied function will be called by ICU whenever ICU needs to create a * new mutex. The function implementation should create a mutex, and store a pointer * to something that uniquely identifies the mutex into the UMTX that is supplied - * as a paramter. - * @param context user supplied value, obtained from from u_setMutexFunctions(). + * as a parameter. + * @param context user supplied value, obtained from u_setMutexFunctions(). * @param mutex Receives a pointer that identifies the new mutex. * The mutex init function must set the UMTX to a non-null value. * Subsequent calls by ICU to lock, unlock, or destroy a mutex will @@ -197,7 +197,7 @@ typedef void U_CALLCONV UMtxInitFn (const void *context, UMTX *mutex, UErrorCod * Function Pointer type for a user supplied mutex functions. * One of the user-supplied functions with this signature will be called by ICU * whenever ICU needs to lock, unlock, or destroy a mutex. - * @param context user supplied value, obtained from from u_setMutexFunctions(). + * @param context user supplied value, obtained from u_setMutexFunctions(). * @param mutex specify the mutex on which to operate. * @deprecated ICU 52. This function is no longer supported. * @system @@ -229,7 +229,7 @@ u_setMutexFunctions(const void *context, UMtxInitFn *init, UMtxFn *destroy, UMtx /** * Pointer type for a user supplied atomic increment or decrement function. - * @param context user supplied value, obtained from from u_setAtomicIncDecFunctions(). + * @param context user supplied value, obtained from u_setAtomicIncDecFunctions(). * @param p Pointer to a 32 bit int to be incremented or decremented * @return The value of the variable after the inc or dec operation. * @deprecated ICU 52. This function is no longer supported. diff --git a/deps/icu-small/source/common/unicode/ucnv.h b/deps/icu-small/source/common/unicode/ucnv.h index 05d0050f4a..53b4c6f073 100644 --- a/deps/icu-small/source/common/unicode/ucnv.h +++ b/deps/icu-small/source/common/unicode/ucnv.h @@ -207,7 +207,7 @@ typedef void (U_EXPORT2 *UConverterToUCallback) ( /** * Function pointer for error callback in the unicode to codepage direction. - * Called when an error has occured in conversion from unicode, or on open/close of the callback (see reason). + * Called when an error has occurred in conversion from unicode, or on open/close of the callback (see reason). * @param context Pointer to the callback's private data * @param args Information about the conversion in progress * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence @@ -353,7 +353,7 @@ ucnv_compareNames(const char *name1, const char *name2); * ucnv_getAlias for a complete list that is available. * If this parameter is NULL, the default converter will be used. * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT> - * @return the created Unicode converter object, or <TT>NULL</TT> if an error occured + * @return the created Unicode converter object, or <TT>NULL</TT> if an error occurred * @see ucnv_openU * @see ucnv_openCCSID * @see ucnv_getAvailableName @@ -386,7 +386,7 @@ ucnv_open(const char *converterName, UErrorCode *err); * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, * U_FILE_ACCESS_ERROR</TT> * @return the created Unicode converter object, or <TT>NULL</TT> if an - * error occured + * error occurred * @see ucnv_open * @see ucnv_openCCSID * @see ucnv_close @@ -489,7 +489,7 @@ ucnv_openCCSID(int32_t codepage, * @param packageName name of the package (equivalent to 'path' in udata_open() call) * @param converterName name of the data item to be used, without suffix. * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT> - * @return the created Unicode converter object, or <TT>NULL</TT> if an error occured + * @return the created Unicode converter object, or <TT>NULL</TT> if an error occurred * @see udata_open * @see ucnv_open * @see ucnv_safeClone diff --git a/deps/icu-small/source/common/unicode/ucnv_err.h b/deps/icu-small/source/common/unicode/ucnv_err.h index e8a79bcd81..08c96c1440 100644 --- a/deps/icu-small/source/common/unicode/ucnv_err.h +++ b/deps/icu-small/source/common/unicode/ucnv_err.h @@ -119,19 +119,19 @@ typedef struct UConverter UConverter; #define UCNV_ESCAPE_JAVA "J" /** * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX) - * TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\\xXXXX) + * TO_U_CALLBACK_ESCAPE option to escape the character value according to C (\\xXXXX) * @stable ICU 2.0 */ #define UCNV_ESCAPE_C "C" /** * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly - * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly + * TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly * @stable ICU 2.0 */ #define UCNV_ESCAPE_XML_DEC "D" /** * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly - * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly + * TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly * @stable ICU 2.0 */ #define UCNV_ESCAPE_XML_HEX "X" @@ -171,7 +171,7 @@ typedef enum { code points. The error code U_INVALID_CHAR_FOUND will be set. */ UCNV_RESET = 3, /**< The callback is called with this reason when a - 'reset' has occured. Callback should reset all + 'reset' has occurred. Callback should reset all state. */ UCNV_CLOSE = 4, /**< Called when the converter is closed. The callback should release any allocated memory.*/ @@ -199,7 +199,7 @@ typedef struct { const UChar *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */ char *target; /**< Pointer to the target buffer. @stable ICU 2.0 */ const char *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */ - int32_t *offsets; /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */ + int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */ } UConverterFromUnicodeArgs; @@ -215,7 +215,7 @@ typedef struct { const char *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */ UChar *target; /**< Pointer to the target buffer. @stable ICU 2.0 */ const UChar *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */ - int32_t *offsets; /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */ + int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */ } UConverterToUnicodeArgs; diff --git a/deps/icu-small/source/common/unicode/ucurr.h b/deps/icu-small/source/common/unicode/ucurr.h index ecb54d146f..adfaf0023b 100644 --- a/deps/icu-small/source/common/unicode/ucurr.h +++ b/deps/icu-small/source/common/unicode/ucurr.h @@ -103,6 +103,19 @@ typedef enum UCurrNameStyle { * @stable ICU 2.6 */ UCURR_LONG_NAME + +#ifndef U_HIDE_DRAFT_API + , + /** + * Selector for getName() indicating the narrow currency symbol. + * The narrow currency symbol is similar to the regular currency + * symbol, but it always takes the shortest form: for example, + * "$" instead of "US$" for USD in en-CA. + * + * @draft ICU 61 + */ + UCURR_NARROW_SYMBOL_NAME +#endif // U_HIDE_DRAFT_API } UCurrNameStyle; #if !UCONFIG_NO_SERVICE diff --git a/deps/icu-small/source/common/unicode/umachine.h b/deps/icu-small/source/common/unicode/umachine.h index 30de4dba0d..a9dc1631b0 100644 --- a/deps/icu-small/source/common/unicode/umachine.h +++ b/deps/icu-small/source/common/unicode/umachine.h @@ -299,6 +299,10 @@ typedef int8_t UBool; // for AIX, uchar.h needs to be included # include <uchar.h> # define U_CHAR16_IS_TYPEDEF 1 +#elif defined(_MSC_VER) && (_MSC_VER < 1900) +// Versions of Visual Studio/MSVC below 2015 do not support char16_t as a real type, +// and instead use a typedef. https://msdn.microsoft.com/library/bb531344.aspx +# define U_CHAR16_IS_TYPEDEF 1 #else # define U_CHAR16_IS_TYPEDEF 0 #endif @@ -366,7 +370,7 @@ typedef int8_t UBool; * Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined. * The current UChar responds to UCHAR_TYPE but OldUChar does not. * - * @draft ICU 59 + * @stable ICU 59 */ #if U_SIZEOF_WCHAR_T==2 typedef wchar_t OldUChar; diff --git a/deps/icu-small/source/common/unicode/uniset.h b/deps/icu-small/source/common/unicode/uniset.h index 914818a00e..c2e0ad48bd 100644 --- a/deps/icu-small/source/common/unicode/uniset.h +++ b/deps/icu-small/source/common/unicode/uniset.h @@ -1521,6 +1521,7 @@ private: UnicodeString& rebuiltPat, uint32_t options, UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute), + int32_t depth, UErrorCode& ec); //---------------------------------------------------------------- diff --git a/deps/icu-small/source/common/unicode/unistr.h b/deps/icu-small/source/common/unicode/unistr.h index b99a686126..d0b271754b 100644 --- a/deps/icu-small/source/common/unicode/unistr.h +++ b/deps/icu-small/source/common/unicode/unistr.h @@ -2995,10 +2995,6 @@ public: */ UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text); - /* - * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, - * it should always be available regardless of U_HIDE_DRAFT_API status - */ #if !U_CHAR16_IS_TYPEDEF /** * uint16_t * constructor. @@ -3008,16 +3004,12 @@ public: * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> * on the compiler command line or similar. * @param text NUL-terminated UTF-16 string - * @draft ICU 59 + * @stable ICU 59 */ UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) : UnicodeString(ConstChar16Ptr(text)) {} #endif - /* - * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, - * it should always be available regardless of U_HIDE_DRAFT_API status - */ #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) /** * wchar_t * constructor. @@ -3028,16 +3020,12 @@ public: * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> * on the compiler command line or similar. * @param text NUL-terminated UTF-16 string - * @draft ICU 59 + * @stable ICU 59 */ UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) : UnicodeString(ConstChar16Ptr(text)) {} #endif - /* - * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, - * it should always be available regardless of U_HIDE_DRAFT_API status - */ /** * nullptr_t constructor. * Effectively the same as the default constructor, makes an empty string object. @@ -3046,7 +3034,7 @@ public: * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> * on the compiler command line or similar. * @param text nullptr - * @draft ICU 59 + * @stable ICU 59 */ UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text); @@ -3060,26 +3048,18 @@ public: UnicodeString(const char16_t *text, int32_t textLength); - /* - * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, - * it should always be available regardless of U_HIDE_DRAFT_API status - */ #if !U_CHAR16_IS_TYPEDEF /** * uint16_t * constructor. * Delegates to UnicodeString(const char16_t *, int32_t). * @param text UTF-16 string * @param length string length - * @draft ICU 59 + * @stable ICU 59 */ UnicodeString(const uint16_t *text, int32_t length) : UnicodeString(ConstChar16Ptr(text), length) {} #endif - /* - * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, - * it should always be available regardless of U_HIDE_DRAFT_API status - */ #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) /** * wchar_t * constructor. @@ -3087,22 +3067,18 @@ public: * Delegates to UnicodeString(const char16_t *, int32_t). * @param text NUL-terminated UTF-16 string * @param length string length - * @draft ICU 59 + * @stable ICU 59 */ UnicodeString(const wchar_t *text, int32_t length) : UnicodeString(ConstChar16Ptr(text), length) {} #endif - /* - * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, - * it should always be available regardless of U_HIDE_DRAFT_API status - */ /** * nullptr_t constructor. * Effectively the same as the default constructor, makes an empty string object. * @param text nullptr * @param length ignored - * @draft ICU 59 + * @stable ICU 59 */ inline UnicodeString(const std::nullptr_t text, int32_t length); @@ -3152,10 +3128,6 @@ public: */ UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity); - /* - * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, - * it should always be available regardless of U_HIDE_DRAFT_API status - */ #if !U_CHAR16_IS_TYPEDEF /** * Writable-aliasing uint16_t * constructor. @@ -3163,16 +3135,12 @@ public: * @param buffer writable buffer of/for UTF-16 text * @param buffLength length of the current buffer contents * @param buffCapacity buffer capacity - * @draft ICU 59 + * @stable ICU 59 */ UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) : UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {} #endif - /* - * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, - * it should always be available regardless of U_HIDE_DRAFT_API status - */ #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) /** * Writable-aliasing wchar_t * constructor. @@ -3181,23 +3149,19 @@ public: * @param buffer writable buffer of/for UTF-16 text * @param buffLength length of the current buffer contents * @param buffCapacity buffer capacity - * @draft ICU 59 + * @stable ICU 59 */ UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) : UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {} #endif - /* - * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, - * it should always be available regardless of U_HIDE_DRAFT_API status - */ /** * Writable-aliasing nullptr_t constructor. * Effectively the same as the default constructor, makes an empty string object. * @param buffer nullptr * @param buffLength ignored * @param buffCapacity ignored - * @draft ICU 59 + * @stable ICU 59 */ inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity); diff --git a/deps/icu-small/source/common/unicode/urename.h b/deps/icu-small/source/common/unicode/urename.h index 982655c442..d8ab85091f 100644 --- a/deps/icu-small/source/common/unicode/urename.h +++ b/deps/icu-small/source/common/unicode/urename.h @@ -107,7 +107,6 @@ #define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data) #define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data) #define allowedHourFormatsCleanup U_ICU_ENTRY_POINT_RENAME(allowedHourFormatsCleanup) -#define checkImpl U_ICU_ENTRY_POINT_RENAME(checkImpl) #define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup) #define dayPeriodRulesCleanup U_ICU_ENTRY_POINT_RENAME(dayPeriodRulesCleanup) #define deleteAllowedHourFormats U_ICU_ENTRY_POINT_RENAME(deleteAllowedHourFormats) @@ -446,7 +445,6 @@ #define ubidi_getReorderingOptions U_ICU_ENTRY_POINT_RENAME(ubidi_getReorderingOptions) #define ubidi_getResultLength U_ICU_ENTRY_POINT_RENAME(ubidi_getResultLength) #define ubidi_getRuns U_ICU_ENTRY_POINT_RENAME(ubidi_getRuns) -#define ubidi_getSingleton U_ICU_ENTRY_POINT_RENAME(ubidi_getSingleton) #define ubidi_getText U_ICU_ENTRY_POINT_RENAME(ubidi_getText) #define ubidi_getVisualIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualIndex) #define ubidi_getVisualMap U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualMap) @@ -551,6 +549,7 @@ #define ucase_addStringCaseClosure U_ICU_ENTRY_POINT_RENAME(ucase_addStringCaseClosure) #define ucase_fold U_ICU_ENTRY_POINT_RENAME(ucase_fold) #define ucase_getCaseLocale U_ICU_ENTRY_POINT_RENAME(ucase_getCaseLocale) +#define ucase_getTrie U_ICU_ENTRY_POINT_RENAME(ucase_getTrie) #define ucase_getType U_ICU_ENTRY_POINT_RENAME(ucase_getType) #define ucase_getTypeOrIgnorable U_ICU_ENTRY_POINT_RENAME(ucase_getTypeOrIgnorable) #define ucase_hasBinaryProperty U_ICU_ENTRY_POINT_RENAME(ucase_hasBinaryProperty) @@ -862,6 +861,7 @@ #define udatpg_getBestPatternWithOptions U_ICU_ENTRY_POINT_RENAME(udatpg_getBestPatternWithOptions) #define udatpg_getDateTimeFormat U_ICU_ENTRY_POINT_RENAME(udatpg_getDateTimeFormat) #define udatpg_getDecimal U_ICU_ENTRY_POINT_RENAME(udatpg_getDecimal) +#define udatpg_getFieldDisplayName U_ICU_ENTRY_POINT_RENAME(udatpg_getFieldDisplayName) #define udatpg_getPatternForSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getPatternForSkeleton) #define udatpg_getSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getSkeleton) #define udatpg_open U_ICU_ENTRY_POINT_RENAME(udatpg_open) @@ -1326,7 +1326,6 @@ #define uprv_getRawUTCtime U_ICU_ENTRY_POINT_RENAME(uprv_getRawUTCtime) #define uprv_getStaticCurrencyName U_ICU_ENTRY_POINT_RENAME(uprv_getStaticCurrencyName) #define uprv_getUTCtime U_ICU_ENTRY_POINT_RENAME(uprv_getUTCtime) -#define uprv_haveProperties U_ICU_ENTRY_POINT_RENAME(uprv_haveProperties) #define uprv_int32Comparator U_ICU_ENTRY_POINT_RENAME(uprv_int32Comparator) #define uprv_isASCIILetter U_ICU_ENTRY_POINT_RENAME(uprv_isASCIILetter) #define uprv_isInfinite U_ICU_ENTRY_POINT_RENAME(uprv_isInfinite) diff --git a/deps/icu-small/source/common/unicode/ures.h b/deps/icu-small/source/common/unicode/ures.h index 918b9f208e..af0ce76f25 100644 --- a/deps/icu-small/source/common/unicode/ures.h +++ b/deps/icu-small/source/common/unicode/ures.h @@ -16,7 +16,7 @@ * 04/04/99 helena Fixed internal header inclusion. * 04/15/99 Madhu Updated Javadoc * 06/14/99 stephen Removed functions taking a filename suffix. -* 07/20/99 stephen Language-independent ypedef to void* +* 07/20/99 stephen Language-independent typedef to void* * 11/09/99 weiv Added ures_getLocale() * 06/24/02 weiv Added support for resource sharing ****************************************************************************** @@ -138,7 +138,7 @@ typedef enum { /** * Opens a UResourceBundle, from which users can extract strings by using * their corresponding keys. - * Note that the caller is responsible of calling <TT>ures_close</TT> on each succesfully + * Note that the caller is responsible of calling <TT>ures_close</TT> on each successfully * opened resource bundle. * @param packageName The packageName and locale together point to an ICU udata object, * as defined by <code> udata_open( packageName, "res", locale, err) </code> @@ -301,7 +301,7 @@ ures_getVersion(const UResourceBundle* resB, * you to query for the real locale of the resource. For example, if you requested * "en_US_CALIFORNIA" and only "en_US" bundle exists, "en_US" will be returned. * For subresources, the locale where this resource comes from will be returned. - * If fallback has occured, getLocale will reflect this. + * If fallback has occurred, getLocale will reflect this. * * @param resourceBundle resource bundle in question * @param status just for catching illegal arguments @@ -580,7 +580,7 @@ ures_hasNext(const UResourceBundle *resourceBundle); * @param fillIn if NULL a new UResourceBundle struct is allocated and must be closed by the caller. * Alternatively, you can supply a struct to be filled by this function. * @param status fills in the outgoing error code. You may still get a non NULL result even if an - * error occured. Check status instead. + * error occurred. Check status instead. * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it * @stable ICU 2.0 */ @@ -596,7 +596,7 @@ ures_getNextResource(UResourceBundle *resourceBundle, * @param resourceBundle a resource * @param len fill in length of the string * @param key fill in for key associated with this string. NULL if no key - * @param status fills in the outgoing error code. If an error occured, we may return NULL, but don't + * @param status fills in the outgoing error code. If an error occurred, we may return NULL, but don't * count on it. Check status instead! * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file. * @stable ICU 2.0 @@ -615,7 +615,7 @@ ures_getNextString(UResourceBundle *resourceBundle, * @param fillIn if NULL a new UResourceBundle struct is allocated and must be closed by the caller. * Alternatively, you can supply a struct to be filled by this function. * @param status fills in the outgoing error code. Don't count on NULL being returned if an error has - * occured. Check status instead. + * occurred. Check status instead. * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it * @stable ICU 2.0 */ @@ -631,7 +631,7 @@ ures_getByIndex(const UResourceBundle *resourceBundle, * @param resourceBundle a resource * @param indexS an index to the wanted string. * @param len fill in length of the string - * @param status fills in the outgoing error code. If an error occured, we may return NULL, but don't + * @param status fills in the outgoing error code. If an error occurred, we may return NULL, but don't * count on it. Check status instead! * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file. * @stable ICU 2.0 @@ -722,7 +722,7 @@ ures_getByKey(const UResourceBundle *resourceBundle, * @param resB a resource * @param key a key associated with the wanted string * @param len fill in length of the string - * @param status fills in the outgoing error code. If an error occured, we may return NULL, but don't + * @param status fills in the outgoing error code. If an error occurred, we may return NULL, but don't * count on it. Check status instead! * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file. * @stable ICU 2.0 diff --git a/deps/icu-small/source/common/unicode/uscript.h b/deps/icu-small/source/common/unicode/uscript.h index 3ec235d50c..0befa1cd42 100644 --- a/deps/icu-small/source/common/unicode/uscript.h +++ b/deps/icu-small/source/common/unicode/uscript.h @@ -476,7 +476,7 @@ typedef enum UScriptCode { * @param nameOrAbbrOrLocale name of the script, as given in * PropertyValueAliases.txt, or ISO 15924 code or locale * @param fillIn the UScriptCode buffer to fill in the script code - * @param capacity the capacity (size) fo UScriptCode buffer passed in. + * @param capacity the capacity (size) of UScriptCode buffer passed in. * @param err the error status code. * @return The number of script codes filled in the buffer passed in * @stable ICU 2.4 diff --git a/deps/icu-small/source/common/unicode/ushape.h b/deps/icu-small/source/common/unicode/ushape.h index 5af8ffe1c5..3064e08572 100644 --- a/deps/icu-small/source/common/unicode/ushape.h +++ b/deps/icu-small/source/common/unicode/ushape.h @@ -93,7 +93,7 @@ * which must not indicate a failure before the function call. * * @return The number of UChars written to the destination buffer. - * If an error occured, then no output was written, or it may be + * If an error occurred, then no output was written, or it may be * incomplete. If <code>U_BUFFER_OVERFLOW_ERROR</code> is set, then * the return value indicates the necessary destination buffer size. * @stable ICU 2.0 diff --git a/deps/icu-small/source/common/unicode/usprep.h b/deps/icu-small/source/common/unicode/usprep.h index 33ca1461ce..7cdc6cdd18 100644 --- a/deps/icu-small/source/common/unicode/usprep.h +++ b/deps/icu-small/source/common/unicode/usprep.h @@ -33,14 +33,14 @@ * StringPrep prepares Unicode strings for use in network protocols. * Profiles of StingPrep are set of rules and data according to with the * Unicode Strings are prepared. Each profiles contains tables which describe - * how a code point should be treated. The tables are broadly classied into + * how a code point should be treated. The tables are broadly classified into * <ul> - * <li> Unassinged Table: Contains code points that are unassigned + * <li> Unassigned Table: Contains code points that are unassigned * in the Unicode Version supported by StringPrep. Currently * RFC 3454 supports Unicode 3.2. </li> - * <li> Prohibited Table: Contains code points that are prohibted from + * <li> Prohibited Table: Contains code points that are prohibited from * the output of the StringPrep processing function. </li> - * <li> Mapping Table: Contains code ponts that are deleted from the output or case mapped. </li> + * <li> Mapping Table: Contains code points that are deleted from the output or case mapped. </li> * </ul> * * The procedure for preparing Unicode strings: @@ -230,7 +230,7 @@ U_NAMESPACE_END /** * Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC), - * checks for prohited and BiDi characters in the order defined by RFC 3454 + * checks for prohibited and BiDi characters in the order defined by RFC 3454 * depending on the options specified in the profile. * * @param prep The profile to use diff --git a/deps/icu-small/source/common/unicode/ustring.h b/deps/icu-small/source/common/unicode/ustring.h index 1ea27126cc..cf6ec0b6b4 100644 --- a/deps/icu-small/source/common/unicode/ustring.h +++ b/deps/icu-small/source/common/unicode/ustring.h @@ -403,7 +403,7 @@ u_strspn(const UChar *string, const UChar *matchSet); * @param saveState The current pointer within the original string, * which is set by this function. The saveState * parameter should the address of a local variable of type - * UChar *. (i.e. defined "Uhar *myLocalSaveState" and use + * UChar *. (i.e. defined "UChar *myLocalSaveState" and use * &myLocalSaveState for this parameter). * @return A pointer to the next token found in src, or NULL * when there are no more tokens. @@ -884,7 +884,7 @@ u_memrchr32(const UChar *s, UChar32 c, int32_t count); * Unicode String literals in C. * We need one macro to declare a variable for the string * and to statically preinitialize it if possible, - * and a second macro to dynamically intialize such a string variable if necessary. + * and a second macro to dynamically initialize such a string variable if necessary. * * The macros are defined for maximum performance. * They work only for strings that contain "invariant characters", i.e., diff --git a/deps/icu-small/source/common/unicode/utext.h b/deps/icu-small/source/common/unicode/utext.h index 7eea1da240..51d11a2e00 100644 --- a/deps/icu-small/source/common/unicode/utext.h +++ b/deps/icu-small/source/common/unicode/utext.h @@ -655,10 +655,10 @@ utext_getPreviousNativeIndex(UText *ut); * @param ut the UText from which to extract data. * @param nativeStart the native index of the first character to extract.\ * If the specified index is out of range, - * it will be pinned to to be within 0 <= index <= textLength + * it will be pinned to be within 0 <= index <= textLength * @param nativeLimit the native string index of the position following the last * character to extract. If the specified index is out of range, - * it will be pinned to to be within 0 <= index <= textLength. + * it will be pinned to be within 0 <= index <= textLength. * nativeLimit must be >= nativeStart. * @param dest the UChar (UTF-16) buffer into which the extracted text is placed * @param destCapacity The size, in UChars, of the destination buffer. May be zero @@ -906,7 +906,7 @@ utext_copy(UText *ut, * Caution: freezing a UText will disable changes made via the specific * frozen UText wrapper only; it will not have any effect on the ability to * directly modify the text by bypassing the UText. Any such backdoor modifications - * are always an error while UText access is occuring because the underlying + * are always an error while UText access is occurring because the underlying * text can get out of sync with UText's buffering. * </p> * @@ -1452,7 +1452,7 @@ struct UText { void *pExtra; /** - * (protected) Pointer to string or text-containin object or similar. + * (protected) Pointer to string or text-containing object or similar. * This is the source of the text that this UText is wrapping, in a format * that is known to the text provider functions. * @stable ICU 3.4 diff --git a/deps/icu-small/source/common/unicode/utf8.h b/deps/icu-small/source/common/unicode/utf8.h index 59b4b25570..1f07634359 100644 --- a/deps/icu-small/source/common/unicode/utf8.h +++ b/deps/icu-small/source/common/unicode/utf8.h @@ -348,29 +348,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @see U8_NEXT_UNSAFE * @stable ICU 2.4 */ -#define U8_NEXT(s, i, length, c) { \ - (c)=(uint8_t)(s)[(i)++]; \ - if(!U8_IS_SINGLE(c)) { \ - uint8_t __t1, __t2; \ - if( /* handle U+0800..U+FFFF inline */ \ - (0xe0<=(c) && (c)<0xf0) && \ - (((i)+1)<(length) || (length)<0) && \ - U8_IS_VALID_LEAD3_AND_T1((c), __t1=(s)[i]) && \ - (__t2=(s)[(i)+1]-0x80)<=0x3f) { \ - (c)=(((c)&0xf)<<12)|((__t1&0x3f)<<6)|__t2; \ - (i)+=2; \ - } else if( /* handle U+0080..U+07FF inline */ \ - ((c)<0xe0 && (c)>=0xc2) && \ - ((i)!=(length)) && \ - (__t1=(s)[i]-0x80)<=0x3f) { \ - (c)=(((c)&0x1f)<<6)|__t1; \ - ++(i); \ - } else { \ - /* function call for "complicated" and error cases */ \ - (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (length), c, -1); \ - } \ - } \ -} +#define U8_NEXT(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, U_SENTINEL) /** * Get a code point from a string at a code point boundary offset, @@ -396,26 +374,33 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @see U8_NEXT * @stable ICU 51 */ -#define U8_NEXT_OR_FFFD(s, i, length, c) { \ +#define U8_NEXT_OR_FFFD(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, 0xfffd) + +/** @internal */ +#define U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, sub) { \ (c)=(uint8_t)(s)[(i)++]; \ if(!U8_IS_SINGLE(c)) { \ - uint8_t __t1, __t2; \ - if( /* handle U+0800..U+FFFF inline */ \ - (0xe0<=(c) && (c)<0xf0) && \ - (((i)+1)<(length) || (length)<0) && \ - U8_IS_VALID_LEAD3_AND_T1((c), __t1=(s)[i]) && \ - (__t2=(s)[(i)+1]-0x80)<=0x3f) { \ - (c)=(((c)&0xf)<<12)|((__t1&0x3f)<<6)|__t2; \ - (i)+=2; \ - } else if( /* handle U+0080..U+07FF inline */ \ - ((c)<0xe0 && (c)>=0xc2) && \ - ((i)!=(length)) && \ - (__t1=(s)[i]-0x80)<=0x3f) { \ - (c)=(((c)&0x1f)<<6)|__t1; \ - ++(i); \ + uint8_t __t = 0; \ + if((i)!=(length) && \ + /* fetch/validate/assemble all but last trail byte */ \ + ((c)>=0xe0 ? \ + ((c)<0xf0 ? /* U+0800..U+FFFF except surrogates */ \ + U8_LEAD3_T1_BITS[(c)&=0xf]&(1<<((__t=(s)[i])>>5)) && \ + (__t&=0x3f, 1) \ + : /* U+10000..U+10FFFF */ \ + ((c)-=0xf0)<=4 && \ + U8_LEAD4_T1_BITS[(__t=(s)[i])>>4]&(1<<(c)) && \ + ((c)=((c)<<6)|(__t&0x3f), ++(i)!=(length)) && \ + (__t=(s)[i]-0x80)<=0x3f) && \ + /* valid second-to-last trail byte */ \ + ((c)=((c)<<6)|__t, ++(i)!=(length)) \ + : /* U+0080..U+07FF */ \ + (c)>=0xc2 && ((c)&=0x1f, 1)) && \ + /* last trail byte */ \ + (__t=(s)[i]-0x80)<=0x3f && \ + ((c)=((c)<<6)|__t, ++(i), 1)) { \ } else { \ - /* function call for "complicated" and error cases */ \ - (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (length), c, -3); \ + (c)=(sub); /* ill-formed*/ \ } \ } \ } @@ -434,21 +419,22 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @stable ICU 2.4 */ #define U8_APPEND_UNSAFE(s, i, c) { \ - if((uint32_t)(c)<=0x7f) { \ - (s)[(i)++]=(uint8_t)(c); \ + uint32_t __uc=(c); \ + if(__uc<=0x7f) { \ + (s)[(i)++]=(uint8_t)__uc; \ } else { \ - if((uint32_t)(c)<=0x7ff) { \ - (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \ + if(__uc<=0x7ff) { \ + (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \ } else { \ - if((uint32_t)(c)<=0xffff) { \ - (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \ + if(__uc<=0xffff) { \ + (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \ } else { \ - (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \ - (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \ + (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \ + (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \ } \ - (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \ + (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \ } \ - (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ + (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ } \ } @@ -470,17 +456,23 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @stable ICU 2.4 */ #define U8_APPEND(s, i, capacity, c, isError) { \ - if((uint32_t)(c)<=0x7f) { \ - (s)[(i)++]=(uint8_t)(c); \ - } else if((uint32_t)(c)<=0x7ff && (i)+1<(capacity)) { \ - (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \ - (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ - } else if((uint32_t)(c)<=0xd7ff && (i)+2<(capacity)) { \ - (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \ - (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \ - (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ + uint32_t __uc=(c); \ + if(__uc<=0x7f) { \ + (s)[(i)++]=(uint8_t)__uc; \ + } else if(__uc<=0x7ff && (i)+1<(capacity)) { \ + (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \ + (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ + } else if((__uc<=0xd7ff || (0xe000<=__uc && __uc<=0xffff)) && (i)+2<(capacity)) { \ + (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \ + (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \ + (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ + } else if(0xffff<__uc && __uc<=0x10ffff && (i)+3<(capacity)) { \ + (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \ + (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \ + (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \ + (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ } else { \ - (i)=utf8_appendCharSafeBody(s, (i), (capacity), c, &(isError)); \ + (isError)=TRUE; \ } \ } @@ -600,12 +592,15 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * If the offset points to a UTF-8 trail byte, * then the offset is moved backward to the corresponding lead byte. * Otherwise, it is not modified. + * * "Safe" macro, checks for illegal sequences and for string boundaries. + * Unlike U8_TRUNCATE_IF_INCOMPLETE(), this macro always reads s[i]. * * @param s const uint8_t * string * @param start int32_t starting string offset (usually 0) * @param i int32_t string offset, must be start<=i * @see U8_SET_CP_START_UNSAFE + * @see U8_TRUNCATE_IF_INCOMPLETE * @stable ICU 2.4 */ #define U8_SET_CP_START(s, start, i) { \ @@ -614,6 +609,57 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); } \ } +#ifndef U_HIDE_DRAFT_API +/** + * If the string ends with a UTF-8 byte sequence that is valid so far + * but incomplete, then reduce the length of the string to end before + * the lead byte of that incomplete sequence. + * For example, if the string ends with E1 80, the length is reduced by 2. + * + * In all other cases (the string ends with a complete sequence, or it is not + * possible for any further trail byte to extend the trailing sequence) + * the length remains unchanged. + * + * Useful for processing text split across multiple buffers + * (save the incomplete sequence for later) + * and for optimizing iteration + * (check for string length only once per character). + * + * "Safe" macro, checks for illegal sequences and for string boundaries. + * Unlike U8_SET_CP_START(), this macro never reads s[length]. + * + * (In UTF-16, simply check for U16_IS_LEAD(last code unit).) + * + * @param s const uint8_t * string + * @param start int32_t starting string offset (usually 0) + * @param length int32_t string length (usually start<=length) + * @see U8_SET_CP_START + * @draft ICU 61 + */ +#define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) \ + if((length)>(start)) { \ + uint8_t __b1=s[(length)-1]; \ + if(U8_IS_SINGLE(__b1)) { \ + /* common ASCII character */ \ + } else if(U8_IS_LEAD(__b1)) { \ + --(length); \ + } else if(U8_IS_TRAIL(__b1) && ((length)-2)>=(start)) { \ + uint8_t __b2=s[(length)-2]; \ + if(0xe0<=__b2 && __b2<=0xf4) { \ + if(__b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(__b2, __b1) : \ + U8_IS_VALID_LEAD4_AND_T1(__b2, __b1)) { \ + (length)-=2; \ + } \ + } else if(U8_IS_TRAIL(__b2) && ((length)-3)>=(start)) { \ + uint8_t __b3=s[(length)-3]; \ + if(0xf0<=__b3 && __b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(__b3, __b2)) { \ + (length)-=3; \ + } \ + } \ + } \ + } +#endif // U_HIDE_DRAFT_API + /* definitions with backward iteration -------------------------------------- */ /** diff --git a/deps/icu-small/source/common/unicode/utrace.h b/deps/icu-small/source/common/unicode/utrace.h index 5d561109c7..bf6fd036f0 100644 --- a/deps/icu-small/source/common/unicode/utrace.h +++ b/deps/icu-small/source/common/unicode/utrace.h @@ -183,7 +183,7 @@ UTraceData(const void *context, int32_t fnNumber, int32_t level, * tracing functions must themselves filter by checking that the * current thread is the desired thread. * - * @param context an uninterpretted pointer. Whatever is passed in + * @param context an uninterpreted pointer. Whatever is passed in * here will in turn be passed to each of the tracing * functions UTraceEntry, UTraceExit and UTraceData. * ICU does not use or alter this pointer. @@ -320,7 +320,7 @@ utrace_getFunctions(const void **context, * human readable form. Note that a UTraceData function may choose * to not format the data; it could, for example, save it in * in the raw form it was received (more compact), leaving - * formatting for a later trace analyis tool. + * formatting for a later trace analysis tool. * @param outBuf pointer to a buffer to receive the formatted output. Output * will be nul terminated if there is space in the buffer - * if the length of the requested output < the output buffer size. diff --git a/deps/icu-small/source/common/unicode/utypes.h b/deps/icu-small/source/common/unicode/utypes.h index 4c40e6a87c..b6cf496511 100644 --- a/deps/icu-small/source/common/unicode/utypes.h +++ b/deps/icu-small/source/common/unicode/utypes.h @@ -145,7 +145,7 @@ /** * U_ICU_ENTRY_POINT is the name of the DLL entry point to the ICU data library. * Defined as a literal, not a string. - * Tricky Preprocessor use - ## operator replaces macro paramters with the literal string + * Tricky Preprocessor use - ## operator replaces macro parameters with the literal string * from the corresponding macro invocation, _before_ other macro substitutions. * Need a nested \#defines to get the actual version numbers rather than * the literal text U_ICU_VERSION_MAJOR_NUM into the name. @@ -446,14 +446,14 @@ typedef enum UErrorCode { U_BUFFER_OVERFLOW_ERROR = 15, /**< A result would not fit in the supplied buffer */ U_UNSUPPORTED_ERROR = 16, /**< Requested operation not supported in current context */ U_RESOURCE_TYPE_MISMATCH = 17, /**< an operation is requested over a resource that does not support it */ - U_ILLEGAL_ESCAPE_SEQUENCE = 18, /**< ISO-2022 illlegal escape sequence */ + U_ILLEGAL_ESCAPE_SEQUENCE = 18, /**< ISO-2022 illegal escape sequence */ U_UNSUPPORTED_ESCAPE_SEQUENCE = 19, /**< ISO-2022 unsupported escape sequence */ U_NO_SPACE_AVAILABLE = 20, /**< No space available for in-buffer expansion for Arabic shaping */ U_CE_NOT_FOUND_ERROR = 21, /**< Currently used only while setting variable top, but can be used generally */ U_PRIMARY_TOO_LONG_ERROR = 22, /**< User tried to set variable top to a primary that is longer than two bytes */ U_STATE_TOO_OLD_ERROR = 23, /**< ICU cannot construct a service from this state, as it is no longer supported */ U_TOO_MANY_ALIASES_ERROR = 24, /**< There are too many aliases in the path to the requested resource. - It is very possible that a circular alias definition has occured */ + It is very possible that a circular alias definition has occurred */ U_ENUM_OUT_OF_SYNC_ERROR = 25, /**< UEnumeration out of sync with underlying collection */ U_INVARIANT_CONVERSION_ERROR = 26, /**< Unable to convert a UChar* string to char* with the invariant converter. */ U_INVALID_STATE_ERROR = 27, /**< Requested operation can not be completed with ICU in its current state */ @@ -499,7 +499,7 @@ typedef enum UErrorCode { U_MULTIPLE_COMPOUND_FILTERS, /**< More than one compound filter */ U_INVALID_RBT_SYNTAX, /**< A "::id" rule was passed to the RuleBasedTransliterator parser */ U_INVALID_PROPERTY_PATTERN, /**< UNUSED as of ICU 2.4 */ - U_MALFORMED_PRAGMA, /**< A 'use' pragma is invlalid */ + U_MALFORMED_PRAGMA, /**< A 'use' pragma is invalid */ U_UNCLOSED_SEGMENT, /**< A closing ')' is missing */ U_ILLEGAL_CHAR_IN_SEGMENT, /**< UNUSED as of ICU 2.4 */ U_VARIABLE_RANGE_EXHAUSTED, /**< Too many stand-ins generated for the given variable range */ @@ -539,12 +539,15 @@ typedef enum UErrorCode { U_DEFAULT_KEYWORD_MISSING, /**< Missing DEFAULT rule in plural rules */ U_DECIMAL_NUMBER_SYNTAX_ERROR, /**< Decimal number syntax error */ U_FORMAT_INEXACT_ERROR, /**< Cannot format a number exactly and rounding mode is ROUND_UNNECESSARY @stable ICU 4.8 */ +#ifndef U_HIDE_DRAFT_API + U_NUMBER_ARG_OUTOFBOUNDS_ERROR, /**< The argument to a NumberFormatter helper method was out of bounds; the bounds are usually 0 to 999. @draft ICU 61 */ +#endif // U_HIDE_DRAFT_API #ifndef U_HIDE_DEPRECATED_API /** * One more than the highest normal formatting API error code. * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ - U_FMT_PARSE_ERROR_LIMIT, + U_FMT_PARSE_ERROR_LIMIT = 0x10113, #endif // U_HIDE_DEPRECATED_API /* @@ -555,7 +558,7 @@ typedef enum UErrorCode { U_BRK_HEX_DIGITS_EXPECTED, /**< Hex digits expected as part of a escaped char in a rule. */ U_BRK_SEMICOLON_EXPECTED, /**< Missing ';' at the end of a RBBI rule. */ U_BRK_RULE_SYNTAX, /**< Syntax error in RBBI rule. */ - U_BRK_UNCLOSED_SET, /**< UnicodeSet witing an RBBI rule missing a closing ']'. */ + U_BRK_UNCLOSED_SET, /**< UnicodeSet writing an RBBI rule missing a closing ']'. */ U_BRK_ASSIGN_ERROR, /**< Syntax error in RBBI rule assignment statement. */ U_BRK_VARIABLE_REDFINITION, /**< RBBI rule $Variable redefined. */ U_BRK_MISMATCHED_PAREN, /**< Mis-matched parentheses in an RBBI rule. */ @@ -564,7 +567,7 @@ typedef enum UErrorCode { U_BRK_INIT_ERROR, /**< Initialization failure. Probable missing ICU Data. */ U_BRK_RULE_EMPTY_SET, /**< Rule contains an empty Unicode Set. */ U_BRK_UNRECOGNIZED_OPTION, /**< !!option in RBBI rules not recognized. */ - U_BRK_MALFORMED_RULE_TAG, /**< The {nnn} tag on a rule is mal formed */ + U_BRK_MALFORMED_RULE_TAG, /**< The {nnn} tag on a rule is malformed */ #ifndef U_HIDE_DEPRECATED_API /** * One more than the highest normal BreakIterator error code. diff --git a/deps/icu-small/source/common/unicode/uvernum.h b/deps/icu-small/source/common/unicode/uvernum.h index d905a0f50d..0427bcb03d 100644 --- a/deps/icu-small/source/common/unicode/uvernum.h +++ b/deps/icu-small/source/common/unicode/uvernum.h @@ -58,13 +58,13 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.4 */ -#define U_ICU_VERSION_MAJOR_NUM 60 +#define U_ICU_VERSION_MAJOR_NUM 61 /** The current ICU minor version as an integer. * This value will change in the subsequent releases of ICU * @stable ICU 2.6 */ -#define U_ICU_VERSION_MINOR_NUM 2 +#define U_ICU_VERSION_MINOR_NUM 1 /** The current ICU patchlevel version as an integer. * This value will change in the subsequent releases of ICU @@ -84,7 +84,7 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.6 */ -#define U_ICU_VERSION_SUFFIX _60 +#define U_ICU_VERSION_SUFFIX _61 /** * \def U_DEF2_ICU_ENTRY_POINT_RENAME @@ -119,19 +119,26 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.4 */ -#define U_ICU_VERSION "60.2" +#define U_ICU_VERSION "61.1" -/** The current ICU library major/minor version as a string without dots, for library name suffixes. - * This value will change in the subsequent releases of ICU - * @stable ICU 2.6 +/** + * The current ICU library major version number as a string, for library name suffixes. + * This value will change in subsequent releases of ICU. + * + * Until ICU 4.8, this was the combination of the single-digit major and minor ICU version numbers + * into one string without dots ("48"). + * Since ICU 49, it is the double-digit major ICU version number. + * See http://userguide.icu-project.org/design#TOC-Version-Numbers-in-ICU + * + * @stable ICU 2.6 */ -#define U_ICU_VERSION_SHORT "60" +#define U_ICU_VERSION_SHORT "61" #ifndef U_HIDE_INTERNAL_API /** Data version in ICU4C. * @internal ICU 4.4 Internal Use Only **/ -#define U_ICU_DATA_VERSION "60.2" +#define U_ICU_DATA_VERSION "61.1" #endif /* U_HIDE_INTERNAL_API */ /*=========================================================================== diff --git a/deps/icu-small/source/common/unicode/uversion.h b/deps/icu-small/source/common/unicode/uversion.h index cda24b6e0f..3f0251d399 100644 --- a/deps/icu-small/source/common/unicode/uversion.h +++ b/deps/icu-small/source/common/unicode/uversion.h @@ -105,7 +105,7 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]; * @stable ICU 2.4 */ -/* Define namespace symbols if the compiler supports it. */ +/* Define C++ namespace symbols. */ #ifdef __cplusplus # if U_DISABLE_RENAMING # define U_ICU_NAMESPACE icu @@ -122,7 +122,13 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]; # define U_NAMESPACE_QUALIFIER U_ICU_NAMESPACE:: # ifndef U_USING_ICU_NAMESPACE -# define U_USING_ICU_NAMESPACE 1 +# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \ + defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) || \ + defined(U_LAYOUTEX_IMPLEMENTATION) || defined(U_TOOLUTIL_IMPLEMENTATION) +# define U_USING_ICU_NAMESPACE 0 +# else +# define U_USING_ICU_NAMESPACE 0 +# endif # endif # if U_USING_ICU_NAMESPACE U_NAMESPACE_USE diff --git a/deps/icu-small/source/common/unifiedcache.cpp b/deps/icu-small/source/common/unifiedcache.cpp index fd0be593d7..f0f660ed06 100644 --- a/deps/icu-small/source/common/unifiedcache.cpp +++ b/deps/icu-small/source/common/unifiedcache.cpp @@ -6,24 +6,26 @@ * others. All Rights Reserved. ****************************************************************************** * -* File UNIFIEDCACHE.CPP +* File unifiedcache.cpp ****************************************************************************** */ -#include "uhash.h" #include "unifiedcache.h" -#include "umutex.h" + +#include <algorithm> // For std::max() + #include "mutex.h" #include "uassert.h" +#include "uhash.h" #include "ucln_cmn.h" +#include "umutex.h" static icu::UnifiedCache *gCache = NULL; -static icu::SharedObject *gNoValue = NULL; static UMutex gCacheMutex = U_MUTEX_INITIALIZER; static UConditionVar gInProgressValueAddedCond = U_CONDITION_INITIALIZER; static icu::UInitOnce gCacheInitOnce = U_INITONCE_INITIALIZER; -static const int32_t MAX_EVICT_ITERATIONS = 10; +static const int32_t MAX_EVICT_ITERATIONS = 10; static const int32_t DEFAULT_MAX_UNUSED = 1000; static const int32_t DEFAULT_PERCENTAGE_OF_IN_USE = 100; @@ -35,10 +37,6 @@ static UBool U_CALLCONV unifiedcache_cleanup() { delete gCache; gCache = NULL; } - if (gNoValue) { - delete gNoValue; - gNoValue = NULL; - } return TRUE; } U_CDECL_END @@ -73,23 +71,15 @@ static void U_CALLCONV cacheInit(UErrorCode &status) { ucln_common_registerCleanup( UCLN_COMMON_UNIFIED_CACHE, unifiedcache_cleanup); - // gNoValue must be created first to avoid assertion error in - // cache constructor. - gNoValue = new SharedObject(); gCache = new UnifiedCache(status); if (gCache == NULL) { status = U_MEMORY_ALLOCATION_ERROR; } if (U_FAILURE(status)) { delete gCache; - delete gNoValue; gCache = NULL; - gNoValue = NULL; return; } - // We add a softref because we want hash elements with gNoValue to be - // elligible for purging but we don't ever want gNoValue to be deleted. - gNoValue->addSoftRef(); } UnifiedCache *UnifiedCache::getInstance(UErrorCode &status) { @@ -104,14 +94,24 @@ UnifiedCache *UnifiedCache::getInstance(UErrorCode &status) { UnifiedCache::UnifiedCache(UErrorCode &status) : fHashtable(NULL), fEvictPos(UHASH_FIRST), - fItemsInUseCount(0), + fNumValuesTotal(0), + fNumValuesInUse(0), fMaxUnused(DEFAULT_MAX_UNUSED), fMaxPercentageOfInUse(DEFAULT_PERCENTAGE_OF_IN_USE), - fAutoEvictedCount(0) { + fAutoEvictedCount(0), + fNoValue(nullptr) { if (U_FAILURE(status)) { return; } - U_ASSERT(gNoValue != NULL); + fNoValue = new SharedObject(); + if (fNoValue == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + fNoValue->softRefCount = 1; // Add fake references to prevent fNoValue from being deleted + fNoValue->hardRefCount = 1; // when other references to it are removed. + fNoValue->cachePtr = this; + fHashtable = uhash_open( &ucache_hashKeys, &ucache_compareKeys, @@ -139,7 +139,7 @@ void UnifiedCache::setEvictionPolicy( int32_t UnifiedCache::unusedCount() const { Mutex lock(&gCacheMutex); - return uhash_count(fHashtable) - fItemsInUseCount; + return uhash_count(fHashtable) - fNumValuesInUse; } int64_t UnifiedCache::autoEvictedCount() const { @@ -161,6 +161,12 @@ void UnifiedCache::flush() const { while (_flush(FALSE)); } +void UnifiedCache::handleUnreferencedObject() const { + Mutex lock(&gCacheMutex); + --fNumValuesInUse; + _runEvictionSlice(); +} + #ifdef UNIFIED_CACHE_DEBUG #include <stdio.h> @@ -199,7 +205,7 @@ void UnifiedCache::_dumpContents() const { "Unified Cache: Key '%s', error %d, value %p, total refcount %d, soft refcount %d\n", key->writeDescription(buffer, 256), key->creationStatus, - sharedObject == gNoValue ? NULL :sharedObject, + sharedObject == fNoValue ? NULL :sharedObject, sharedObject->getRefCount(), sharedObject->getSoftRefCount()); } @@ -219,10 +225,11 @@ UnifiedCache::~UnifiedCache() { _flush(TRUE); } uhash_close(fHashtable); + fHashtable = nullptr; + delete fNoValue; + fNoValue = nullptr; } -// Returns the next element in the cache round robin style. -// On entry, gCacheMutex must be held. const UHashElement * UnifiedCache::_nextElement() const { const UHashElement *element = uhash_nextElement(fHashtable, &fEvictPos); @@ -233,46 +240,36 @@ UnifiedCache::_nextElement() const { return element; } -// Flushes the contents of the cache. If cache values hold references to other -// cache values then _flush should be called in a loop until it returns FALSE. -// On entry, gCacheMutex must be held. -// On exit, those values with are evictable are flushed. If all is true -// then every value is flushed even if it is not evictable. -// Returns TRUE if any value in cache was flushed or FALSE otherwise. UBool UnifiedCache::_flush(UBool all) const { UBool result = FALSE; int32_t origSize = uhash_count(fHashtable); for (int32_t i = 0; i < origSize; ++i) { const UHashElement *element = _nextElement(); + if (element == nullptr) { + break; + } if (all || _isEvictable(element)) { const SharedObject *sharedObject = (const SharedObject *) element->value.pointer; + U_ASSERT(sharedObject->cachePtr = this); uhash_removeElement(fHashtable, element); - sharedObject->removeSoftRef(); + removeSoftRef(sharedObject); // Deletes the sharedObject when softRefCount goes to zero. result = TRUE; } } return result; } -// Computes how many items should be evicted. -// On entry, gCacheMutex must be held. -// Returns number of items that should be evicted or a value <= 0 if no -// items need to be evicted. int32_t UnifiedCache::_computeCountOfItemsToEvict() const { - int32_t maxPercentageOfInUseCount = - fItemsInUseCount * fMaxPercentageOfInUse / 100; - int32_t maxUnusedCount = fMaxUnused; - if (maxUnusedCount < maxPercentageOfInUseCount) { - maxUnusedCount = maxPercentageOfInUseCount; - } - return uhash_count(fHashtable) - fItemsInUseCount - maxUnusedCount; + int32_t totalItems = uhash_count(fHashtable); + int32_t evictableItems = totalItems - fNumValuesInUse; + + int32_t unusedLimitByPercentage = fNumValuesInUse * fMaxPercentageOfInUse / 100; + int32_t unusedLimit = std::max(unusedLimitByPercentage, fMaxUnused); + int32_t countOfItemsToEvict = std::max(0, evictableItems - unusedLimit); + return countOfItemsToEvict; } -// Run an eviction slice. -// On entry, gCacheMutex must be held. -// _runEvictionSlice runs a slice of the evict pipeline by examining the next -// 10 entries in the cache round robin style evicting them if they are eligible. void UnifiedCache::_runEvictionSlice() const { int32_t maxItemsToEvict = _computeCountOfItemsToEvict(); if (maxItemsToEvict <= 0) { @@ -280,11 +277,14 @@ void UnifiedCache::_runEvictionSlice() const { } for (int32_t i = 0; i < MAX_EVICT_ITERATIONS; ++i) { const UHashElement *element = _nextElement(); + if (element == nullptr) { + break; + } if (_isEvictable(element)) { const SharedObject *sharedObject = (const SharedObject *) element->value.pointer; uhash_removeElement(fHashtable, element); - sharedObject->removeSoftRef(); + removeSoftRef(sharedObject); // Deletes sharedObject when SoftRefCount goes to zero. ++fAutoEvictedCount; if (--maxItemsToEvict == 0) { break; @@ -293,11 +293,6 @@ void UnifiedCache::_runEvictionSlice() const { } } - -// Places a new value and creationStatus in the cache for the given key. -// On entry, gCacheMutex must be held. key must not exist in the cache. -// On exit, value and creation status placed under key. Soft reference added -// to value on successful add. On error sets status. void UnifiedCache::_putNew( const CacheKeyBase &key, const SharedObject *value, @@ -312,24 +307,17 @@ void UnifiedCache::_putNew( return; } keyToAdopt->fCreationStatus = creationStatus; - if (value->noSoftReferences()) { + if (value->softRefCount == 0) { _registerMaster(keyToAdopt, value); } - uhash_put(fHashtable, keyToAdopt, (void *) value, &status); + void *oldValue = uhash_put(fHashtable, keyToAdopt, (void *) value, &status); + U_ASSERT(oldValue == nullptr); + (void)oldValue; if (U_SUCCESS(status)) { - value->addSoftRef(); + value->softRefCount++; } } -// Places value and status at key if there is no value at key or if cache -// entry for key is in progress. Otherwise, it leaves the current value and -// status there. -// On entry. gCacheMutex must not be held. value must be -// included in the reference count of the object to which it points. -// On exit, value and status are changed to what was already in the cache if -// something was there and not in progress. Otherwise, value and status are left -// unchanged in which case they are placed in the cache on a best-effort basis. -// Caller must call removeRef() on value. void UnifiedCache::_putIfAbsentAndGet( const CacheKeyBase &key, const SharedObject *&value, @@ -352,15 +340,7 @@ void UnifiedCache::_putIfAbsentAndGet( _runEvictionSlice(); } -// Attempts to fetch value and status for key from cache. -// On entry, gCacheMutex must not be held value must be NULL and status must -// be U_ZERO_ERROR. -// On exit, either returns FALSE (In this -// case caller should try to create the object) or returns TRUE with value -// pointing to the fetched value and status set to fetched status. When -// FALSE is returned status may be set to failure if an in progress hash -// entry could not be made but value will remain unchanged. When TRUE is -// returned, caler must call removeRef() on value. + UBool UnifiedCache::_poll( const CacheKeyBase &key, const SharedObject *&value, @@ -369,27 +349,29 @@ UBool UnifiedCache::_poll( U_ASSERT(status == U_ZERO_ERROR); Mutex lock(&gCacheMutex); const UHashElement *element = uhash_find(fHashtable, &key); - while (element != NULL && _inProgress(element)) { + + // If the hash table contains an inProgress placeholder entry for this key, + // this means that another thread is currently constructing the value object. + // Loop, waiting for that construction to complete. + while (element != NULL && _inProgress(element)) { umtx_condWait(&gInProgressValueAddedCond, &gCacheMutex); element = uhash_find(fHashtable, &key); } + + // If the hash table contains an entry for the key, + // fetch out the contents and return them. if (element != NULL) { - _fetch(element, value, status); + _fetch(element, value, status); return TRUE; } - _putNew(key, gNoValue, U_ZERO_ERROR, status); + + // The hash table contained nothing for this key. + // Insert an inProgress place holder value. + // Our caller will create the final value and update the hash table. + _putNew(key, fNoValue, U_ZERO_ERROR, status); return FALSE; } -// Gets value out of cache. -// On entry. gCacheMutex must not be held. value must be NULL. status -// must be U_ZERO_ERROR. -// On exit. value and status set to what is in cache at key or on cache -// miss the key's createObject() is called and value and status are set to -// the result of that. In this latter case, best effort is made to add the -// value and status to the cache. If createObject() fails to create a value, -// gNoValue is stored in cache, and value is set to NULL. Caller must call -// removeRef on value if non NULL. void UnifiedCache::_get( const CacheKeyBase &key, const SharedObject *&value, @@ -398,7 +380,7 @@ void UnifiedCache::_get( U_ASSERT(value == NULL); U_ASSERT(status == U_ZERO_ERROR); if (_poll(key, value, status)) { - if (value == gNoValue) { + if (value == fNoValue) { SharedObject::clearPtr(value); } return; @@ -410,46 +392,22 @@ void UnifiedCache::_get( U_ASSERT(value == NULL || value->hasHardReferences()); U_ASSERT(value != NULL || status != U_ZERO_ERROR); if (value == NULL) { - SharedObject::copyPtr(gNoValue, value); + SharedObject::copyPtr(fNoValue, value); } _putIfAbsentAndGet(key, value, status); - if (value == gNoValue) { + if (value == fNoValue) { SharedObject::clearPtr(value); } } -void UnifiedCache::decrementItemsInUseWithLockingAndEviction() const { - Mutex mutex(&gCacheMutex); - decrementItemsInUse(); - _runEvictionSlice(); -} - -void UnifiedCache::incrementItemsInUse() const { - ++fItemsInUseCount; -} - -void UnifiedCache::decrementItemsInUse() const { - --fItemsInUseCount; +void UnifiedCache::_registerMaster( + const CacheKeyBase *theKey, const SharedObject *value) const { + theKey->fIsMaster = true; + value->cachePtr = this; + ++fNumValuesTotal; + ++fNumValuesInUse; } -// Register a master cache entry. -// On entry, gCacheMutex must be held. -// On exit, items in use count incremented, entry is marked as a master -// entry, and value registered with cache so that subsequent calls to -// addRef() and removeRef() on it correctly updates items in use count -void UnifiedCache::_registerMaster( - const CacheKeyBase *theKey, const SharedObject *value) const { - theKey->fIsMaster = TRUE; - ++fItemsInUseCount; - value->registerWithCache(this); -} - -// Store a value and error in given hash entry. -// On entry, gCacheMutex must be held. Hash entry element must be in progress. -// value must be non NULL. -// On Exit, soft reference added to value. value and status stored in hash -// entry. Soft reference removed from previous stored value. Waiting -// threads notified. void UnifiedCache::_put( const UHashElement *element, const SharedObject *value, @@ -458,86 +416,52 @@ void UnifiedCache::_put( const CacheKeyBase *theKey = (const CacheKeyBase *) element->key.pointer; const SharedObject *oldValue = (const SharedObject *) element->value.pointer; theKey->fCreationStatus = status; - if (value->noSoftReferences()) { + if (value->softRefCount == 0) { _registerMaster(theKey, value); } - value->addSoftRef(); + value->softRefCount++; UHashElement *ptr = const_cast<UHashElement *>(element); ptr->value.pointer = (void *) value; - oldValue->removeSoftRef(); + U_ASSERT(oldValue == fNoValue); + removeSoftRef(oldValue); // Tell waiting threads that we replace in-progress status with // an error. umtx_condBroadcast(&gInProgressValueAddedCond); } -void -UnifiedCache::copyPtr(const SharedObject *src, const SharedObject *&dest) { - if(src != dest) { - if(dest != NULL) { - dest->removeRefWhileHoldingCacheLock(); - } - dest = src; - if(src != NULL) { - src->addRefWhileHoldingCacheLock(); - } - } -} - -void -UnifiedCache::clearPtr(const SharedObject *&ptr) { - if (ptr != NULL) { - ptr->removeRefWhileHoldingCacheLock(); - ptr = NULL; - } -} - - -// Fetch value and error code from a particular hash entry. -// On entry, gCacheMutex must be held. value must be either NULL or must be -// included in the ref count of the object to which it points. -// On exit, value and status set to what is in the hash entry. Caller must -// eventually call removeRef on value. -// If hash entry is in progress, value will be set to gNoValue and status will -// be set to U_ZERO_ERROR. void UnifiedCache::_fetch( const UHashElement *element, const SharedObject *&value, - UErrorCode &status) { + UErrorCode &status) const { const CacheKeyBase *theKey = (const CacheKeyBase *) element->key.pointer; status = theKey->fCreationStatus; - // Since we have the cache lock, calling regular SharedObject methods + // Since we have the cache lock, calling regular SharedObject add/removeRef // could cause us to deadlock on ourselves since they may need to lock // the cache mutex. - UnifiedCache::copyPtr((const SharedObject *) element->value.pointer, value); + removeHardRef(value); + value = static_cast<const SharedObject *>(element->value.pointer); + addHardRef(value); } -// Determine if given hash entry is in progress. -// On entry, gCacheMutex must be held. -UBool UnifiedCache::_inProgress(const UHashElement *element) { - const SharedObject *value = NULL; + +UBool UnifiedCache::_inProgress(const UHashElement* element) const { UErrorCode status = U_ZERO_ERROR; + const SharedObject * value = NULL; _fetch(element, value, status); UBool result = _inProgress(value, status); - - // Since we have the cache lock, calling regular SharedObject methods - // could cause us to deadlock on ourselves since they may need to lock - // the cache mutex. - UnifiedCache::clearPtr(value); + removeHardRef(value); return result; } -// Determine if given hash entry is in progress. -// On entry, gCacheMutex must be held. UBool UnifiedCache::_inProgress( - const SharedObject *theValue, UErrorCode creationStatus) { - return (theValue == gNoValue && creationStatus == U_ZERO_ERROR); + const SharedObject* theValue, UErrorCode creationStatus) const { + return (theValue == fNoValue && creationStatus == U_ZERO_ERROR); } -// Determine if given hash entry is eligible for eviction. -// On entry, gCacheMutex must be held. -UBool UnifiedCache::_isEvictable(const UHashElement *element) { +UBool UnifiedCache::_isEvictable(const UHashElement *element) const +{ const CacheKeyBase *theKey = (const CacheKeyBase *) element->key.pointer; const SharedObject *theValue = (const SharedObject *) element->value.pointer; @@ -549,7 +473,47 @@ UBool UnifiedCache::_isEvictable(const UHashElement *element) { // We can evict entries that are either not a master or have just // one reference (The one reference being from the cache itself). - return (!theKey->fIsMaster || (theValue->getSoftRefCount() == 1 && theValue->noHardReferences())); + return (!theKey->fIsMaster || (theValue->softRefCount == 1 && theValue->noHardReferences())); +} + +void UnifiedCache::removeSoftRef(const SharedObject *value) const { + U_ASSERT(value->cachePtr == this); + U_ASSERT(value->softRefCount > 0); + if (--value->softRefCount == 0) { + --fNumValuesTotal; + if (value->noHardReferences()) { + delete value; + } else { + // This path only happens from flush(all). Which only happens from the + // UnifiedCache destructor. Nulling out value.cacheptr changes the behavior + // of value.removeRef(), causing the deletion to be done there. + value->cachePtr = nullptr; + } + } +} + +int32_t UnifiedCache::removeHardRef(const SharedObject *value) const { + int refCount = 0; + if (value) { + refCount = umtx_atomic_dec(&value->hardRefCount); + U_ASSERT(refCount >= 0); + if (refCount == 0) { + --fNumValuesInUse; + } + } + return refCount; +} + +int32_t UnifiedCache::addHardRef(const SharedObject *value) const { + int refCount = 0; + if (value) { + refCount = umtx_atomic_inc(&value->hardRefCount); + U_ASSERT(refCount >= 1); + if (refCount == 1) { + fNumValuesInUse++; + } + } + return refCount; } U_NAMESPACE_END diff --git a/deps/icu-small/source/common/unifiedcache.h b/deps/icu-small/source/common/unifiedcache.h index 947ebbdc78..b3ccd60d17 100644 --- a/deps/icu-small/source/common/unifiedcache.h +++ b/deps/icu-small/source/common/unifiedcache.h @@ -190,7 +190,7 @@ class U_COMMON_API UnifiedCache : public UnifiedCacheBase { UnifiedCache(UErrorCode &status); /** - * Returns the cache instance. + * Return a pointer to the global cache instance. */ static UnifiedCache *getInstance(UErrorCode &status); @@ -294,7 +294,7 @@ class U_COMMON_API UnifiedCache : public UnifiedCacheBase { /** * Configures at what point evcition of unused entries will begin. - * Eviction is triggered whenever the number of unused entries exeeds + * Eviction is triggered whenever the number of evictable keys exeeds * BOTH count AND (number of in-use items) * (percentageOfInUseItems / 100). * Once the number of unused entries drops below one of these, * eviction ceases. Because eviction happens incrementally, @@ -341,60 +341,214 @@ class U_COMMON_API UnifiedCache : public UnifiedCacheBase { */ int32_t unusedCount() const; - virtual void incrementItemsInUse() const; - virtual void decrementItemsInUseWithLockingAndEviction() const; - virtual void decrementItemsInUse() const; + virtual void handleUnreferencedObject() const; virtual ~UnifiedCache(); + private: UHashtable *fHashtable; mutable int32_t fEvictPos; - mutable int32_t fItemsInUseCount; + mutable int32_t fNumValuesTotal; + mutable int32_t fNumValuesInUse; int32_t fMaxUnused; int32_t fMaxPercentageOfInUse; mutable int64_t fAutoEvictedCount; + SharedObject *fNoValue; + UnifiedCache(const UnifiedCache &other); UnifiedCache &operator=(const UnifiedCache &other); + + /** + * Flushes the contents of the cache. If cache values hold references to other + * cache values then _flush should be called in a loop until it returns FALSE. + * + * On entry, gCacheMutex must be held. + * On exit, those values with are evictable are flushed. + * + * @param all if false flush evictable items only, which are those with no external + * references, plus those that can be safely recreated.<br> + * if true, flush all elements. Any values (sharedObjects) with remaining + * hard (external) references are not deleted, but are detached from + * the cache, so that a subsequent removeRefs can delete them. + * _flush is not thread safe when all is true. + * @return TRUE if any value in cache was flushed or FALSE otherwise. + */ UBool _flush(UBool all) const; + + /** + * Gets value out of cache. + * On entry. gCacheMutex must not be held. value must be NULL. status + * must be U_ZERO_ERROR. + * On exit. value and status set to what is in cache at key or on cache + * miss the key's createObject() is called and value and status are set to + * the result of that. In this latter case, best effort is made to add the + * value and status to the cache. If createObject() fails to create a value, + * fNoValue is stored in cache, and value is set to NULL. Caller must call + * removeRef on value if non NULL. + */ void _get( const CacheKeyBase &key, const SharedObject *&value, const void *creationContext, UErrorCode &status) const; - UBool _poll( - const CacheKeyBase &key, - const SharedObject *&value, - UErrorCode &status) const; - void _putNew( - const CacheKeyBase &key, - const SharedObject *value, - const UErrorCode creationStatus, - UErrorCode &status) const; + + /** + * Attempts to fetch value and status for key from cache. + * On entry, gCacheMutex must not be held value must be NULL and status must + * be U_ZERO_ERROR. + * On exit, either returns FALSE (In this + * case caller should try to create the object) or returns TRUE with value + * pointing to the fetched value and status set to fetched status. When + * FALSE is returned status may be set to failure if an in progress hash + * entry could not be made but value will remain unchanged. When TRUE is + * returned, caller must call removeRef() on value. + */ + UBool _poll( + const CacheKeyBase &key, + const SharedObject *&value, + UErrorCode &status) const; + + /** + * Places a new value and creationStatus in the cache for the given key. + * On entry, gCacheMutex must be held. key must not exist in the cache. + * On exit, value and creation status placed under key. Soft reference added + * to value on successful add. On error sets status. + */ + void _putNew( + const CacheKeyBase &key, + const SharedObject *value, + const UErrorCode creationStatus, + UErrorCode &status) const; + + /** + * Places value and status at key if there is no value at key or if cache + * entry for key is in progress. Otherwise, it leaves the current value and + * status there. + * + * On entry. gCacheMutex must not be held. Value must be + * included in the reference count of the object to which it points. + * + * On exit, value and status are changed to what was already in the cache if + * something was there and not in progress. Otherwise, value and status are left + * unchanged in which case they are placed in the cache on a best-effort basis. + * Caller must call removeRef() on value. + */ void _putIfAbsentAndGet( const CacheKeyBase &key, const SharedObject *&value, UErrorCode &status) const; - const UHashElement *_nextElement() const; + + /** + * Returns the next element in the cache round robin style. + * Returns nullptr if the cache is empty. + * On entry, gCacheMutex must be held. + */ + const UHashElement *_nextElement() const; + + /** + * Return the number of cache items that would need to be evicted + * to bring usage into conformance with eviction policy. + * + * An item corresponds to an entry in the hash table, a hash table element. + * + * On entry, gCacheMutex must be held. + */ int32_t _computeCountOfItemsToEvict() const; + + /** + * Run an eviction slice. + * On entry, gCacheMutex must be held. + * _runEvictionSlice runs a slice of the evict pipeline by examining the next + * 10 entries in the cache round robin style evicting them if they are eligible. + */ void _runEvictionSlice() const; - void _registerMaster( - const CacheKeyBase *theKey, const SharedObject *value) const; + + /** + * Register a master cache entry. A master key is the first key to create + * a given SharedObject value. Subsequent keys whose create function + * produce referneces to an already existing SharedObject are not masters - + * they can be evicted and subsequently recreated. + * + * On entry, gCacheMutex must be held. + * On exit, items in use count incremented, entry is marked as a master + * entry, and value registered with cache so that subsequent calls to + * addRef() and removeRef() on it correctly interact with the cache. + */ + void _registerMaster(const CacheKeyBase *theKey, const SharedObject *value) const; + + /** + * Store a value and creation error status in given hash entry. + * On entry, gCacheMutex must be held. Hash entry element must be in progress. + * value must be non NULL. + * On Exit, soft reference added to value. value and status stored in hash + * entry. Soft reference removed from previous stored value. Waiting + * threads notified. + */ void _put( const UHashElement *element, const SharedObject *value, const UErrorCode status) const; + /** + * Remove a soft reference, and delete the SharedObject if no references remain. + * To be used from within the UnifiedCache implementation only. + * gCacheMutex must be held by caller. + * @param value the SharedObject to be acted on. + */ + void removeSoftRef(const SharedObject *value) const; + + /** + * Increment the hard reference count of the given SharedObject. + * gCacheMutex must be held by the caller. + * Update numValuesEvictable on transitions between zero and one reference. + * + * @param value The SharedObject to be referenced. + * @return the hard reference count after the addition. + */ + int32_t addHardRef(const SharedObject *value) const; + + /** + * Decrement the hard reference count of the given SharedObject. + * gCacheMutex must be held by the caller. + * Update numValuesEvictable on transitions between one and zero reference. + * + * @param value The SharedObject to be referenced. + * @return the hard reference count after the removal. + */ + int32_t removeHardRef(const SharedObject *value) const; + + #ifdef UNIFIED_CACHE_DEBUG void _dumpContents() const; #endif - static void copyPtr(const SharedObject *src, const SharedObject *&dest); - static void clearPtr(const SharedObject *&ptr); - static void _fetch( - const UHashElement *element, - const SharedObject *&value, - UErrorCode &status); - static UBool _inProgress(const UHashElement *element); - static UBool _inProgress( - const SharedObject *theValue, UErrorCode creationStatus); - static UBool _isEvictable(const UHashElement *element); + + /** + * Fetch value and error code from a particular hash entry. + * On entry, gCacheMutex must be held. value must be either NULL or must be + * included in the ref count of the object to which it points. + * On exit, value and status set to what is in the hash entry. Caller must + * eventually call removeRef on value. + * If hash entry is in progress, value will be set to gNoValue and status will + * be set to U_ZERO_ERROR. + */ + void _fetch(const UHashElement *element, const SharedObject *&value, + UErrorCode &status) const; + + /** + * Determine if given hash entry is in progress. + * On entry, gCacheMutex must be held. + */ + UBool _inProgress(const UHashElement *element) const; + + /** + * Determine if given hash entry is in progress. + * On entry, gCacheMutex must be held. + */ + UBool _inProgress(const SharedObject *theValue, UErrorCode creationStatus) const; + + /** + * Determine if given hash entry is eligible for eviction. + * On entry, gCacheMutex must be held. + */ + UBool _isEvictable(const UHashElement *element) const; }; U_NAMESPACE_END diff --git a/deps/icu-small/source/common/uniset_closure.cpp b/deps/icu-small/source/common/uniset_closure.cpp index b5cc213941..97c7bc9d35 100644 --- a/deps/icu-small/source/common/uniset_closure.cpp +++ b/deps/icu-small/source/common/uniset_closure.cpp @@ -129,7 +129,7 @@ UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern, // _applyPattern calls add() etc., which set pat to empty. UnicodeString rebuiltPat; RuleCharacterIterator chars(pattern, symbols, pos); - applyPattern(chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, status); + applyPattern(chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, 0, status); if (U_FAILURE(status)) return *this; if (chars.inVariable()) { // syntaxError(chars, "Extra chars in variable value"); diff --git a/deps/icu-small/source/common/uniset_props.cpp b/deps/icu-small/source/common/uniset_props.cpp index d0ed074a9b..ef5d6a32b2 100644 --- a/deps/icu-small/source/common/uniset_props.cpp +++ b/deps/icu-small/source/common/uniset_props.cpp @@ -231,7 +231,7 @@ void U_CALLCONV UnicodeSet_initInclusion(int32_t src, UErrorCode &status) { ucase_addPropertyStarts(&sa, &status); break; case UPROPS_SRC_BIDI: - ubidi_addPropertyStarts(ubidi_getSingleton(), &sa, &status); + ubidi_addPropertyStarts(&sa, &status); break; default: status = U_INTERNAL_PROGRAM_ERROR; @@ -257,6 +257,7 @@ const UnicodeSet* UnicodeSet::getInclusions(int32_t src, UErrorCode &status) { return i.fSet; } +namespace { // Cache some sets for other services -------------------------------------- *** void U_CALLCONV createUni32Set(UErrorCode &errorCode) { @@ -315,6 +316,8 @@ isPOSIXClose(const UnicodeString &pattern, int32_t pos) { // memory leak checker tools #define _dbgct(me) +} // namespace + //---------------------------------------------------------------- // Constructors &c //---------------------------------------------------------------- @@ -382,7 +385,7 @@ UnicodeSet::applyPatternIgnoreSpace(const UnicodeString& pattern, // _applyPattern calls add() etc., which set pat to empty. UnicodeString rebuiltPat; RuleCharacterIterator chars(pattern, symbols, pos); - applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, NULL, status); + applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, NULL, 0, status); if (U_FAILURE(status)) return; if (chars.inVariable()) { // syntaxError(chars, "Extra chars in variable value"); @@ -406,6 +409,8 @@ UBool UnicodeSet::resemblesPattern(const UnicodeString& pattern, int32_t pos) { // Implementation: Pattern parsing //---------------------------------------------------------------- +namespace { + /** * A small all-inline class to manage a UnicodeSet pointer. Add * operator->() etc. as needed. @@ -424,6 +429,10 @@ public: } }; +constexpr int32_t MAX_DEPTH = 100; + +} // namespace + /** * Parse the pattern from the given RuleCharacterIterator. The * iterator is advanced over the parsed pattern. @@ -443,8 +452,13 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars, UnicodeString& rebuiltPat, uint32_t options, UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute), + int32_t depth, UErrorCode& ec) { if (U_FAILURE(ec)) return; + if (depth > MAX_DEPTH) { + ec = U_ILLEGAL_ARGUMENT_ERROR; + return; + } // Syntax characters: [ ] ^ - & { } @@ -579,7 +593,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars, } switch (setMode) { case 1: - nested->applyPattern(chars, symbols, patLocal, options, caseClosure, ec); + nested->applyPattern(chars, symbols, patLocal, options, caseClosure, depth + 1, ec); break; case 2: chars.skipIgnored(opts); @@ -837,6 +851,8 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars, // Property set implementation //---------------------------------------------------------------- +namespace { + static UBool numericValueFilter(UChar32 ch, void* context) { return u_getNumericValue(ch) == *(double*)context; } @@ -868,6 +884,8 @@ static UBool scriptExtensionsFilter(UChar32 ch, void* context) { return uscript_hasScript(ch, *(UScriptCode*)context); } +} // namespace + /** * Generic filter-based scanning code for UCD property UnicodeSets. */ @@ -924,6 +942,8 @@ void UnicodeSet::applyFilter(UnicodeSet::Filter filter, } } +namespace { + static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) { /* Note: we use ' ' in compiler code page */ int32_t j = 0; @@ -941,6 +961,8 @@ static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) { return TRUE; } +} // namespace + //---------------------------------------------------------------- // Property set API //---------------------------------------------------------------- diff --git a/deps/icu-small/source/common/uprops.cpp b/deps/icu-small/source/common/uprops.cpp index ace3c4d6d0..b76896db1b 100644 --- a/deps/icu-small/source/common/uprops.cpp +++ b/deps/icu-small/source/common/uprops.cpp @@ -38,8 +38,6 @@ U_NAMESPACE_USE -#define GET_BIDI_PROPS() ubidi_getSingleton() - /* general properties API functions ----------------------------------------- */ struct BinaryProperty; @@ -62,15 +60,15 @@ static UBool caseBinaryPropertyContains(const BinaryProperty &/*prop*/, UChar32 } static UBool isBidiControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return ubidi_isBidiControl(GET_BIDI_PROPS(), c); + return ubidi_isBidiControl(c); } static UBool isMirrored(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return ubidi_isMirrored(GET_BIDI_PROPS(), c); + return ubidi_isMirrored(c); } static UBool isJoinControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return ubidi_isJoinControl(GET_BIDI_PROPS(), c); + return ubidi_isJoinControl(c); } #if UCONFIG_NO_NORMALIZATION @@ -329,11 +327,11 @@ static int32_t getBiDiClass(const IntProperty &/*prop*/, UChar32 c, UProperty /* } static int32_t getBiDiPairedBracketType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return (int32_t)ubidi_getPairedBracketType(GET_BIDI_PROPS(), c); + return (int32_t)ubidi_getPairedBracketType(c); } static int32_t biDiGetMaxValue(const IntProperty &/*prop*/, UProperty which) { - return ubidi_getMaxValue(GET_BIDI_PROPS(), which); + return ubidi_getMaxValue(which); } #if UCONFIG_NO_NORMALIZATION @@ -351,11 +349,11 @@ static int32_t getGeneralCategory(const IntProperty &/*prop*/, UChar32 c, UPrope } static int32_t getJoiningGroup(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return ubidi_getJoiningGroup(GET_BIDI_PROPS(), c); + return ubidi_getJoiningGroup(c); } static int32_t getJoiningType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return ubidi_getJoiningType(GET_BIDI_PROPS(), c); + return ubidi_getJoiningType(c); } static int32_t getNumericType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { diff --git a/deps/icu-small/source/common/ushape.cpp b/deps/icu-small/source/common/ushape.cpp index d7886ac06c..c3f3ef9e20 100644 --- a/deps/icu-small/source/common/ushape.cpp +++ b/deps/icu-small/source/common/ushape.cpp @@ -342,18 +342,16 @@ static void _shapeToArabicDigitsWithContext(UChar *s, int32_t length, UChar digitBase, UBool isLogical, UBool lastStrongWasAL) { - const UBiDiProps *bdp; int32_t i; UChar c; - bdp=ubidi_getSingleton(); digitBase-=0x30; /* the iteration direction depends on the type of input */ if(isLogical) { for(i=0; i<length; ++i) { c=s[i]; - switch(ubidi_getClass(bdp, c)) { + switch(ubidi_getClass(c)) { case U_LEFT_TO_RIGHT: /* L */ case U_RIGHT_TO_LEFT: /* R */ lastStrongWasAL=FALSE; @@ -373,7 +371,7 @@ _shapeToArabicDigitsWithContext(UChar *s, int32_t length, } else { for(i=length; i>0; /* pre-decrement in the body */) { c=s[--i]; - switch(ubidi_getClass(bdp, c)) { + switch(ubidi_getClass(c)) { case U_LEFT_TO_RIGHT: /* L */ case U_RIGHT_TO_LEFT: /* R */ lastStrongWasAL=FALSE; diff --git a/deps/icu-small/source/common/usprep.cpp b/deps/icu-small/source/common/usprep.cpp index c4f831be2e..54a77172fe 100644 --- a/deps/icu-small/source/common/usprep.cpp +++ b/deps/icu-small/source/common/usprep.cpp @@ -347,10 +347,6 @@ usprep_getProfile(const char* path, newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0); newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0); - if(newProfile->checkBiDi) { - newProfile->bdp = ubidi_getSingleton(); - } - LocalMemory<UStringPrepKey> key; LocalMemory<char> keyName; LocalMemory<char> keyPath; @@ -735,7 +731,7 @@ usprep_prepare( const UStringPrepProfile* profile, } if(profile->checkBiDi) { - direction = ubidi_getClass(profile->bdp, ch); + direction = ubidi_getClass(ch); if(firstCharDir == U_CHAR_DIRECTION_COUNT){ firstCharDir = direction; } diff --git a/deps/icu-small/source/common/ustr_wcs.cpp b/deps/icu-small/source/common/ustr_wcs.cpp index 8b6e99221e..0372824f21 100644 --- a/deps/icu-small/source/common/ustr_wcs.cpp +++ b/deps/icu-small/source/common/ustr_wcs.cpp @@ -342,7 +342,7 @@ _strFromWCS( UChar *dest, pSrcLimit = src + srcLength; for(;;){ - register int32_t nulLen = 0; + int32_t nulLen = 0; /* find nulls in the string */ while(nulLen<srcLength && pSrc[nulLen++]!=0){ diff --git a/deps/icu-small/source/common/ustrcase.cpp b/deps/icu-small/source/common/ustrcase.cpp index b1beb34277..978bd3b7b8 100644 --- a/deps/icu-small/source/common/ustrcase.cpp +++ b/deps/icu-small/source/common/ustrcase.cpp @@ -52,16 +52,8 @@ int32_t checkOverflowAndEditsError(int32_t destIndex, int32_t destCapacity, return destIndex; } -} // namespace - -U_NAMESPACE_END - -U_NAMESPACE_USE - -/* string casing ------------------------------------------------------------ */ - /* Appends a full case mapping result, see UCASE_MAX_STRING_LENGTH. */ -static inline int32_t +inline int32_t appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity, int32_t result, const UChar *s, int32_t cpLength, uint32_t options, icu::Edits *edits) { @@ -134,7 +126,7 @@ appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity, return destIndex; } -static inline int32_t +inline int32_t appendUChar(UChar *dest, int32_t destIndex, int32_t destCapacity, UChar c) { if(destIndex<destCapacity) { dest[destIndex]=c; @@ -144,28 +136,34 @@ appendUChar(UChar *dest, int32_t destIndex, int32_t destCapacity, UChar c) { return destIndex+1; } -static inline int32_t +int32_t +appendNonEmptyUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity, + const UChar *s, int32_t length, uint32_t options, icu::Edits *edits) { + if(edits!=NULL) { + edits->addUnchanged(length); + } + if(options & U_OMIT_UNCHANGED_TEXT) { + return destIndex; + } + if(length>(INT32_MAX-destIndex)) { + return -1; // integer overflow + } + if((destIndex+length)<=destCapacity) { + u_memcpy(dest+destIndex, s, length); + } + return destIndex + length; +} + +inline int32_t appendUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity, const UChar *s, int32_t length, uint32_t options, icu::Edits *edits) { - if(length>0) { - if(edits!=NULL) { - edits->addUnchanged(length); - } - if(options & U_OMIT_UNCHANGED_TEXT) { - return destIndex; - } - if(length>(INT32_MAX-destIndex)) { - return -1; // integer overflow - } - if((destIndex+length)<=destCapacity) { - u_memcpy(dest+destIndex, s, length); - } - destIndex+=length; + if (length <= 0) { + return destIndex; } - return destIndex; + return appendNonEmptyUnchanged(dest, destIndex, destCapacity, s, length, options, edits); } -static UChar32 U_CALLCONV +UChar32 U_CALLCONV utf16_caseContextIterator(void *context, int8_t dir) { UCaseContext *csc=(UCaseContext *)context; UChar32 c; @@ -197,39 +195,205 @@ utf16_caseContextIterator(void *context, int8_t dir) { return U_SENTINEL; } -/* - * Case-maps [srcStart..srcLimit[ but takes - * context [0..srcLength[ into account. +/** + * caseLocale >= 0: Lowercases [srcStart..srcLimit[ but takes context [0..srcLength[ into account. + * caseLocale < 0: Case-folds [srcStart..srcLimit[. */ -static int32_t -_caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map, - UChar *dest, int32_t destCapacity, - const UChar *src, UCaseContext *csc, - int32_t srcStart, int32_t srcLimit, - icu::Edits *edits, - UErrorCode &errorCode) { - /* case mapping loop */ - int32_t srcIndex=srcStart; - int32_t destIndex=0; - while(srcIndex<srcLimit) { - int32_t cpStart; - csc->cpStart=cpStart=srcIndex; +int32_t toLower(int32_t caseLocale, uint32_t options, + UChar *dest, int32_t destCapacity, + const UChar *src, UCaseContext *csc, int32_t srcStart, int32_t srcLimit, + icu::Edits *edits, UErrorCode &errorCode) { + const int8_t *latinToLower; + if (caseLocale == UCASE_LOC_ROOT || + (caseLocale >= 0 ? + !(caseLocale == UCASE_LOC_TURKISH || caseLocale == UCASE_LOC_LITHUANIAN) : + (options & _FOLD_CASE_OPTIONS_MASK) == U_FOLD_CASE_DEFAULT)) { + latinToLower = LatinCase::TO_LOWER_NORMAL; + } else { + latinToLower = LatinCase::TO_LOWER_TR_LT; + } + const UTrie2 *trie = ucase_getTrie(); + int32_t destIndex = 0; + int32_t prev = srcStart; + int32_t srcIndex = srcStart; + for (;;) { + // fast path for simple cases + UChar lead; + while (srcIndex < srcLimit) { + lead = src[srcIndex]; + int32_t delta; + if (lead < LatinCase::LONG_S) { + int8_t d = latinToLower[lead]; + if (d == LatinCase::EXC) { break; } + ++srcIndex; + if (d == 0) { continue; } + delta = d; + } else if (lead >= 0xd800) { + break; // surrogate or higher + } else { + uint16_t props = UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, lead); + if (UCASE_HAS_EXCEPTION(props)) { break; } + ++srcIndex; + if (!UCASE_IS_UPPER_OR_TITLE(props) || (delta = UCASE_GET_DELTA(props)) == 0) { + continue; + } + } + lead += delta; + destIndex = appendUnchanged(dest, destIndex, destCapacity, + src + prev, srcIndex - 1 - prev, options, edits); + if (destIndex >= 0) { + destIndex = appendUChar(dest, destIndex, destCapacity, lead); + if (edits != nullptr) { + edits->addReplace(1, 1); + } + } + if (destIndex < 0) { + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + prev = srcIndex; + } + if (srcIndex >= srcLimit) { + break; + } + // slow path + int32_t cpStart = srcIndex++; + UChar trail; UChar32 c; - U16_NEXT(src, srcIndex, srcLimit, c); - csc->cpLimit=srcIndex; + if (U16_IS_LEAD(lead) && srcIndex < srcLimit && U16_IS_TRAIL(trail = src[srcIndex])) { + c = U16_GET_SUPPLEMENTARY(lead, trail); + ++srcIndex; + } else { + c = lead; + } const UChar *s; - c=map(c, utf16_caseContextIterator, csc, &s, caseLocale); - destIndex = appendResult(dest, destIndex, destCapacity, c, s, - srcIndex - cpStart, options, edits); - if (destIndex < 0) { - errorCode = U_INDEX_OUTOFBOUNDS_ERROR; - return 0; + if (caseLocale >= 0) { + csc->cpStart = cpStart; + csc->cpLimit = srcIndex; + c = ucase_toFullLower(c, utf16_caseContextIterator, csc, &s, caseLocale); + } else { + c = ucase_toFullFolding(c, &s, options); } + if (c >= 0) { + destIndex = appendUnchanged(dest, destIndex, destCapacity, + src + prev, cpStart - prev, options, edits); + if (destIndex >= 0) { + destIndex = appendResult(dest, destIndex, destCapacity, c, s, + srcIndex - cpStart, options, edits); + } + if (destIndex < 0) { + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + prev = srcIndex; + } + } + destIndex = appendUnchanged(dest, destIndex, destCapacity, + src + prev, srcIndex - prev, options, edits); + if (destIndex < 0) { + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; } + return destIndex; +} +int32_t toUpper(int32_t caseLocale, uint32_t options, + UChar *dest, int32_t destCapacity, + const UChar *src, UCaseContext *csc, int32_t srcLength, + icu::Edits *edits, UErrorCode &errorCode) { + const int8_t *latinToUpper; + if (caseLocale == UCASE_LOC_TURKISH) { + latinToUpper = LatinCase::TO_UPPER_TR; + } else { + latinToUpper = LatinCase::TO_UPPER_NORMAL; + } + const UTrie2 *trie = ucase_getTrie(); + int32_t destIndex = 0; + int32_t prev = 0; + int32_t srcIndex = 0; + for (;;) { + // fast path for simple cases + UChar lead; + while (srcIndex < srcLength) { + lead = src[srcIndex]; + int32_t delta; + if (lead < LatinCase::LONG_S) { + int8_t d = latinToUpper[lead]; + if (d == LatinCase::EXC) { break; } + ++srcIndex; + if (d == 0) { continue; } + delta = d; + } else if (lead >= 0xd800) { + break; // surrogate or higher + } else { + uint16_t props = UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, lead); + if (UCASE_HAS_EXCEPTION(props)) { break; } + ++srcIndex; + if (UCASE_GET_TYPE(props) != UCASE_LOWER || (delta = UCASE_GET_DELTA(props)) == 0) { + continue; + } + } + lead += delta; + destIndex = appendUnchanged(dest, destIndex, destCapacity, + src + prev, srcIndex - 1 - prev, options, edits); + if (destIndex >= 0) { + destIndex = appendUChar(dest, destIndex, destCapacity, lead); + if (edits != nullptr) { + edits->addReplace(1, 1); + } + } + if (destIndex < 0) { + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + prev = srcIndex; + } + if (srcIndex >= srcLength) { + break; + } + // slow path + int32_t cpStart; + csc->cpStart = cpStart = srcIndex++; + UChar trail; + UChar32 c; + if (U16_IS_LEAD(lead) && srcIndex < srcLength && U16_IS_TRAIL(trail = src[srcIndex])) { + c = U16_GET_SUPPLEMENTARY(lead, trail); + ++srcIndex; + } else { + c = lead; + } + csc->cpLimit = srcIndex; + const UChar *s; + c = ucase_toFullUpper(c, utf16_caseContextIterator, csc, &s, caseLocale); + if (c >= 0) { + destIndex = appendUnchanged(dest, destIndex, destCapacity, + src + prev, cpStart - prev, options, edits); + if (destIndex >= 0) { + destIndex = appendResult(dest, destIndex, destCapacity, c, s, + srcIndex - cpStart, options, edits); + } + if (destIndex < 0) { + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + prev = srcIndex; + } + } + destIndex = appendUnchanged(dest, destIndex, destCapacity, + src + prev, srcIndex - prev, options, edits); + if (destIndex < 0) { + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } return destIndex; } +} // namespace + +U_NAMESPACE_END + +U_NAMESPACE_USE + #if !UCONFIG_NO_BREAK_ITERATION U_CFUNC int32_t U_CALLCONV @@ -344,11 +508,10 @@ ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *it if((options&U_TITLECASE_NO_LOWERCASE)==0) { /* Normal operation: Lowercase the rest of the word. */ destIndex+= - _caseMap( - caseLocale, options, ucase_toFullLower, + toLower( + caseLocale, options, dest+destIndex, destCapacity-destIndex, - src, &csc, - titleLimit, index, + src, &csc, titleLimit, index, edits, errorCode); if(errorCode==U_BUFFER_OVERFLOW_ERROR) { errorCode=U_ZERO_ERROR; @@ -1013,8 +1176,8 @@ ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_IT UCaseContext csc=UCASECONTEXT_INITIALIZER; csc.p=(void *)src; csc.limit=srcLength; - int32_t destIndex = _caseMap( - caseLocale, options, ucase_toFullLower, + int32_t destIndex = toLower( + caseLocale, options, dest, destCapacity, src, &csc, 0, srcLength, edits, errorCode); @@ -1035,10 +1198,10 @@ ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_IT UCaseContext csc=UCASECONTEXT_INITIALIZER; csc.p=(void *)src; csc.limit=srcLength; - destIndex = _caseMap( - caseLocale, options, ucase_toFullUpper, + destIndex = toUpper( + caseLocale, options, dest, destCapacity, - src, &csc, 0, srcLength, + src, &csc, srcLength, edits, errorCode); } return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode); @@ -1050,23 +1213,11 @@ ustrcase_internalFold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK const UChar *src, int32_t srcLength, icu::Edits *edits, UErrorCode &errorCode) { - /* case mapping loop */ - int32_t srcIndex = 0; - int32_t destIndex = 0; - while (srcIndex < srcLength) { - int32_t cpStart = srcIndex; - UChar32 c; - U16_NEXT(src, srcIndex, srcLength, c); - const UChar *s; - c = ucase_toFullFolding(c, &s, options); - destIndex = appendResult(dest, destIndex, destCapacity, c, s, - srcIndex - cpStart, options, edits); - if (destIndex < 0) { - errorCode = U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - } - + int32_t destIndex = toLower( + -1, options, + dest, destCapacity, + src, nullptr, 0, srcLength, + edits, errorCode); return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode); } diff --git a/deps/icu-small/source/common/utf_impl.cpp b/deps/icu-small/source/common/utf_impl.cpp index f78c566e09..9dd241a12b 100644 --- a/deps/icu-small/source/common/utf_impl.cpp +++ b/deps/icu-small/source/common/utf_impl.cpp @@ -238,33 +238,45 @@ utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, U int32_t i=*pi; if(U8_IS_TRAIL(c) && i>start) { uint8_t b1=s[--i]; - if(0xc2<=b1 && b1<0xe0) { - *pi=i; - return ((b1-0xc0)<<6)|(c&0x3f); + if(U8_IS_LEAD(b1)) { + if(b1<0xe0) { + *pi=i; + return ((b1-0xc0)<<6)|(c&0x3f); + } else if(b1<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b1, c) : U8_IS_VALID_LEAD4_AND_T1(b1, c)) { + // Truncated 3- or 4-byte sequence. + *pi=i; + return errorValue(1, strict); + } } else if(U8_IS_TRAIL(b1) && i>start) { // Extract the value bits from the last trail byte. c&=0x3f; uint8_t b2=s[--i]; - if(0xe0<=b2 && b2<0xf0) { - b2&=0xf; - if(strict!=-2) { - if(U8_IS_VALID_LEAD3_AND_T1(b2, b1)) { - *pi=i; - c=(b2<<12)|((b1&0x3f)<<6)|c; - if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) { - return c; - } else { - // strict: forbid non-characters like U+fffe - return errorValue(2, strict); + if(0xe0<=b2 && b2<=0xf4) { + if(b2<0xf0) { + b2&=0xf; + if(strict!=-2) { + if(U8_IS_VALID_LEAD3_AND_T1(b2, b1)) { + *pi=i; + c=(b2<<12)|((b1&0x3f)<<6)|c; + if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) { + return c; + } else { + // strict: forbid non-characters like U+fffe + return errorValue(2, strict); + } + } + } else { + // strict=-2 -> lenient: allow surrogates + b1-=0x80; + if((b2>0 || b1>=0x20)) { + *pi=i; + return (b2<<12)|(b1<<6)|c; } } - } else { - // strict=-2 -> lenient: allow surrogates - b1-=0x80; - if((b2>0 || b1>=0x20)) { - *pi=i; - return (b2<<12)|(b1<<6)|c; - } + } else if(U8_IS_VALID_LEAD4_AND_T1(b2, b1)) { + // Truncated 4-byte sequence. + *pi=i; + return errorValue(2, strict); } } else if(U8_IS_TRAIL(b2) && i>start) { uint8_t b3=s[--i]; @@ -281,16 +293,7 @@ utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, U } } } - } else if(0xf0<=b2 && b2<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b2, b1)) { - // Truncated 4-byte sequence. - *pi=i; - return errorValue(2, strict); } - } else if((0xe0<=b1 && b1<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b1, c)) || - (0xf0<=b1 && b1<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b1, c))) { - // Truncated 3- or 4-byte sequence. - *pi=i; - return errorValue(1, strict); } } return errorValue(0, strict); @@ -303,29 +306,23 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i) { uint8_t c=s[i]; if(U8_IS_TRAIL(c) && i>start) { uint8_t b1=s[--i]; - if(0xc2<=b1 && b1<0xe0) { - return i; + if(U8_IS_LEAD(b1)) { + if(b1<0xe0 || + (b1<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b1, c) : U8_IS_VALID_LEAD4_AND_T1(b1, c))) { + return i; + } } else if(U8_IS_TRAIL(b1) && i>start) { uint8_t b2=s[--i]; - if(0xe0<=b2 && b2<0xf0) { - if(U8_IS_VALID_LEAD3_AND_T1(b2, b1)) { + if(0xe0<=b2 && b2<=0xf4) { + if(b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b2, b1) : U8_IS_VALID_LEAD4_AND_T1(b2, b1)) { return i; } } else if(U8_IS_TRAIL(b2) && i>start) { uint8_t b3=s[--i]; - if(0xf0<=b3 && b3<=0xf4) { - if(U8_IS_VALID_LEAD4_AND_T1(b3, b2)) { - return i; - } + if(0xf0<=b3 && b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b3, b2)) { + return i; } - } else if(0xf0<=b2 && b2<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b2, b1)) { - // Truncated 4-byte sequence. - return i; } - } else if((0xe0<=b1 && b1<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b1, c)) || - (0xf0<=b1 && b1<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b1, c))) { - // Truncated 3- or 4-byte sequence. - return i; } } return orig_i; diff --git a/deps/icu-small/source/common/utrie.h b/deps/icu-small/source/common/utrie.h index 9c5382c594..641027a1a3 100644 --- a/deps/icu-small/source/common/utrie.h +++ b/deps/icu-small/source/common/utrie.h @@ -556,7 +556,7 @@ struct UNewTrie { * Index values at build-time are 32 bits wide for easier processing. * Bit 31 is set if the data block is used by multiple index values (from utrie_setRange()). */ - int32_t index[UTRIE_MAX_INDEX_LENGTH]; + int32_t index[UTRIE_MAX_INDEX_LENGTH+UTRIE_SURROGATE_BLOCK_COUNT]; uint32_t *data; uint32_t leadUnitValue; diff --git a/deps/icu-small/source/common/uts46.cpp b/deps/icu-small/source/common/uts46.cpp index 9b8d3ded2f..5a23572eb6 100644 --- a/deps/icu-small/source/common/uts46.cpp +++ b/deps/icu-small/source/common/uts46.cpp @@ -1126,7 +1126,6 @@ isASCIIOkBiDi(const char *s, int32_t length) { UBool UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const { - const UBiDiProps *bdp=ubidi_getSingleton(); // [IDNA2008-Tables] // 200C..200D ; CONTEXTJ # ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER for(int32_t i=0; i<labelLength; ++i) { @@ -1148,7 +1147,7 @@ UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const { } // check precontext (Joining_Type:{L,D})(Joining_Type:T)* for(;;) { - UJoiningType type=ubidi_getJoiningType(bdp, c); + UJoiningType type=ubidi_getJoiningType(c); if(type==U_JT_TRANSPARENT) { if(j==0) { return FALSE; @@ -1166,7 +1165,7 @@ UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const { return FALSE; } U16_NEXT_UNSAFE(label, j, c); - UJoiningType type=ubidi_getJoiningType(bdp, c); + UJoiningType type=ubidi_getJoiningType(c); if(type==U_JT_TRANSPARENT) { // just skip this character } else if(type==U_JT_RIGHT_JOINING || type==U_JT_DUAL_JOINING) { diff --git a/deps/icu-small/source/common/utypes.cpp b/deps/icu-small/source/common/utypes.cpp index 8f5791be16..5d6a0504ba 100644 --- a/deps/icu-small/source/common/utypes.cpp +++ b/deps/icu-small/source/common/utypes.cpp @@ -125,7 +125,8 @@ _uFmtErrorName[U_FMT_PARSE_ERROR_LIMIT - U_FMT_PARSE_ERROR_START] = { "U_UNDEFINED_KEYWORD", "U_DEFAULT_KEYWORD_MISSING", "U_DECIMAL_NUMBER_SYNTAX_ERROR", - "U_FORMAT_INEXACT_ERROR" + "U_FORMAT_INEXACT_ERROR", + "U_NUMBER_ARG_OUTOFBOUNDS_ERROR" }; static const char * const |