summaryrefslogtreecommitdiff
path: root/deps/icu-small/source/common
diff options
context:
space:
mode:
Diffstat (limited to 'deps/icu-small/source/common')
-rw-r--r--deps/icu-small/source/common/bmpset.cpp4
-rw-r--r--deps/icu-small/source/common/brkeng.cpp65
-rw-r--r--deps/icu-small/source/common/brkeng.h33
-rw-r--r--deps/icu-small/source/common/brkiter.cpp13
-rw-r--r--deps/icu-small/source/common/bytesinkutil.cpp28
-rw-r--r--deps/icu-small/source/common/bytesinkutil.h10
-rw-r--r--deps/icu-small/source/common/cmemory.cpp24
-rw-r--r--deps/icu-small/source/common/cmemory.h21
-rw-r--r--deps/icu-small/source/common/cstring.h18
-rw-r--r--deps/icu-small/source/common/dictbe.cpp29
-rw-r--r--deps/icu-small/source/common/dictbe.h31
-rw-r--r--deps/icu-small/source/common/filteredbrk.cpp5
-rw-r--r--deps/icu-small/source/common/rbbi.cpp195
-rw-r--r--deps/icu-small/source/common/rbbi_cache.cpp47
-rw-r--r--deps/icu-small/source/common/rbbi_cache.h2
-rw-r--r--deps/icu-small/source/common/rbbidata.cpp4
-rw-r--r--deps/icu-small/source/common/rbbidata.h16
-rw-r--r--deps/icu-small/source/common/rbbirb.cpp45
-rw-r--r--deps/icu-small/source/common/rbbirb.h9
-rw-r--r--deps/icu-small/source/common/rbbiscan.cpp31
-rw-r--r--deps/icu-small/source/common/rbbisetb.cpp55
-rw-r--r--deps/icu-small/source/common/rbbisetb.h10
-rw-r--r--deps/icu-small/source/common/rbbitblb.cpp144
-rw-r--r--deps/icu-small/source/common/rbbitblb.h45
-rw-r--r--deps/icu-small/source/common/sharedobject.cpp69
-rw-r--r--deps/icu-small/source/common/sharedobject.h116
-rw-r--r--deps/icu-small/source/common/sprpimpl.h1
-rw-r--r--deps/icu-small/source/common/ubidi.cpp7
-rw-r--r--deps/icu-small/source/common/ubidi_props.cpp103
-rw-r--r--deps/icu-small/source/common/ubidi_props.h28
-rw-r--r--deps/icu-small/source/common/ubidiimp.h2
-rw-r--r--deps/icu-small/source/common/ucase.cpp165
-rw-r--r--deps/icu-small/source/common/ucase.h35
-rw-r--r--deps/icu-small/source/common/ucasemap.cpp272
-rw-r--r--deps/icu-small/source/common/ucasemap_imp.h9
-rw-r--r--deps/icu-small/source/common/uchar.cpp8
-rw-r--r--deps/icu-small/source/common/ucmndata.cpp6
-rw-r--r--deps/icu-small/source/common/ucmndata.h6
-rw-r--r--deps/icu-small/source/common/ucnv2022.cpp4
-rw-r--r--deps/icu-small/source/common/ucnv_err.cpp32
-rw-r--r--deps/icu-small/source/common/ucnv_u32.cpp8
-rw-r--r--deps/icu-small/source/common/ucnv_u8.cpp54
-rw-r--r--deps/icu-small/source/common/ucnvlat1.cpp8
-rw-r--r--deps/icu-small/source/common/ucnvmbcs.cpp10
-rw-r--r--deps/icu-small/source/common/ucurr.cpp29
-rw-r--r--deps/icu-small/source/common/unicode/brkiter.h15
-rw-r--r--deps/icu-small/source/common/unicode/bytestriebuilder.h1
-rw-r--r--deps/icu-small/source/common/unicode/casemap.h24
-rw-r--r--deps/icu-small/source/common/unicode/char16ptr.h44
-rw-r--r--deps/icu-small/source/common/unicode/chariter.h2
-rw-r--r--deps/icu-small/source/common/unicode/dtintrv.h2
-rw-r--r--deps/icu-small/source/common/unicode/edits.h58
-rw-r--r--deps/icu-small/source/common/unicode/filteredbrk.h6
-rw-r--r--deps/icu-small/source/common/unicode/locid.h4
-rw-r--r--deps/icu-small/source/common/unicode/parseerr.h4
-rw-r--r--deps/icu-small/source/common/unicode/platform.h12
-rw-r--r--deps/icu-small/source/common/unicode/putil.h6
-rw-r--r--deps/icu-small/source/common/unicode/rbbi.h124
-rw-r--r--deps/icu-small/source/common/unicode/resbund.h2
-rw-r--r--deps/icu-small/source/common/unicode/schriter.h2
-rw-r--r--deps/icu-small/source/common/unicode/ubidi.h4
-rw-r--r--deps/icu-small/source/common/unicode/ubrk.h18
-rw-r--r--deps/icu-small/source/common/unicode/uchar.h7
-rw-r--r--deps/icu-small/source/common/unicode/uclean.h16
-rw-r--r--deps/icu-small/source/common/unicode/ucnv.h8
-rw-r--r--deps/icu-small/source/common/unicode/ucnv_err.h12
-rw-r--r--deps/icu-small/source/common/unicode/ucurr.h13
-rw-r--r--deps/icu-small/source/common/unicode/umachine.h6
-rw-r--r--deps/icu-small/source/common/unicode/uniset.h1
-rw-r--r--deps/icu-small/source/common/unicode/unistr.h54
-rw-r--r--deps/icu-small/source/common/unicode/urename.h5
-rw-r--r--deps/icu-small/source/common/unicode/ures.h16
-rw-r--r--deps/icu-small/source/common/unicode/uscript.h2
-rw-r--r--deps/icu-small/source/common/unicode/ushape.h2
-rw-r--r--deps/icu-small/source/common/unicode/usprep.h10
-rw-r--r--deps/icu-small/source/common/unicode/ustring.h4
-rw-r--r--deps/icu-small/source/common/unicode/utext.h8
-rw-r--r--deps/icu-small/source/common/unicode/utf8.h166
-rw-r--r--deps/icu-small/source/common/unicode/utrace.h4
-rw-r--r--deps/icu-small/source/common/unicode/utypes.h17
-rw-r--r--deps/icu-small/source/common/unicode/uvernum.h25
-rw-r--r--deps/icu-small/source/common/unicode/uversion.h10
-rw-r--r--deps/icu-small/source/common/unifiedcache.cpp304
-rw-r--r--deps/icu-small/source/common/unifiedcache.h210
-rw-r--r--deps/icu-small/source/common/uniset_closure.cpp2
-rw-r--r--deps/icu-small/source/common/uniset_props.cpp28
-rw-r--r--deps/icu-small/source/common/uprops.cpp16
-rw-r--r--deps/icu-small/source/common/ushape.cpp6
-rw-r--r--deps/icu-small/source/common/usprep.cpp6
-rw-r--r--deps/icu-small/source/common/ustr_wcs.cpp2
-rw-r--r--deps/icu-small/source/common/ustrcase.cpp305
-rw-r--r--deps/icu-small/source/common/utf_impl.cpp87
-rw-r--r--deps/icu-small/source/common/utrie.h2
-rw-r--r--deps/icu-small/source/common/uts46.cpp5
-rw-r--r--deps/icu-small/source/common/utypes.cpp3
95 files changed, 2077 insertions, 1469 deletions
diff --git a/deps/icu-small/source/common/bmpset.cpp b/deps/icu-small/source/common/bmpset.cpp
index f84bfd7f5b..35bc80dce3 100644
--- a/deps/icu-small/source/common/bmpset.cpp
+++ b/deps/icu-small/source/common/bmpset.cpp
@@ -100,9 +100,9 @@ static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) {
++lead;
}
if(lead<limitLead) {
- bits=~((1<<lead)-1);
+ bits=~(((unsigned)1<<lead)-1);
if(limitLead<0x20) {
- bits&=(1<<limitLead)-1;
+ bits&=((unsigned)1<<limitLead)-1;
}
for(trail=0; trail<64; ++trail) {
table[trail]|=bits;
diff --git a/deps/icu-small/source/common/brkeng.cpp b/deps/icu-small/source/common/brkeng.cpp
index da64b3bdef..a513bafb16 100644
--- a/deps/icu-small/source/common/brkeng.cpp
+++ b/deps/icu-small/source/common/brkeng.cpp
@@ -59,58 +59,47 @@ LanguageBreakFactory::~LanguageBreakFactory() {
******************************************************************
*/
-UnhandledEngine::UnhandledEngine(UErrorCode &/*status*/) {
- for (int32_t i = 0; i < UPRV_LENGTHOF(fHandled); ++i) {
- fHandled[i] = 0;
- }
+UnhandledEngine::UnhandledEngine(UErrorCode &status) : fHandled(nullptr) {
+ (void)status;
}
UnhandledEngine::~UnhandledEngine() {
- for (int32_t i = 0; i < UPRV_LENGTHOF(fHandled); ++i) {
- if (fHandled[i] != 0) {
- delete fHandled[i];
- }
- }
+ delete fHandled;
+ fHandled = nullptr;
}
UBool
-UnhandledEngine::handles(UChar32 c, int32_t breakType) const {
- return (breakType >= 0 && breakType < UPRV_LENGTHOF(fHandled)
- && fHandled[breakType] != 0 && fHandled[breakType]->contains(c));
+UnhandledEngine::handles(UChar32 c) const {
+ return fHandled && fHandled->contains(c);
}
int32_t
UnhandledEngine::findBreaks( UText *text,
int32_t /* startPos */,
int32_t endPos,
- int32_t breakType,
UVector32 &/*foundBreaks*/ ) const {
- if (breakType >= 0 && breakType < UPRV_LENGTHOF(fHandled)) {
- UChar32 c = utext_current32(text);
- while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[breakType]->contains(c)) {
- utext_next32(text); // TODO: recast loop to work with post-increment operations.
- c = utext_current32(text);
- }
+ UChar32 c = utext_current32(text);
+ while((int32_t)utext_getNativeIndex(text) < endPos && fHandled->contains(c)) {
+ utext_next32(text); // TODO: recast loop to work with post-increment operations.
+ c = utext_current32(text);
}
return 0;
}
void
-UnhandledEngine::handleCharacter(UChar32 c, int32_t breakType) {
- if (breakType >= 0 && breakType < UPRV_LENGTHOF(fHandled)) {
- if (fHandled[breakType] == 0) {
- fHandled[breakType] = new UnicodeSet();
- if (fHandled[breakType] == 0) {
- return;
- }
- }
- if (!fHandled[breakType]->contains(c)) {
- UErrorCode status = U_ZERO_ERROR;
- // Apply the entire script of the character.
- int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT);
- fHandled[breakType]->applyIntPropertyValue(UCHAR_SCRIPT, script, status);
+UnhandledEngine::handleCharacter(UChar32 c) {
+ if (fHandled == nullptr) {
+ fHandled = new UnicodeSet();
+ if (fHandled == nullptr) {
+ return;
}
}
+ if (!fHandled->contains(c)) {
+ UErrorCode status = U_ZERO_ERROR;
+ // Apply the entire script of the character.
+ int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT);
+ fHandled->applyIntPropertyValue(UCHAR_SCRIPT, script, status);
+ }
}
/*
@@ -138,7 +127,7 @@ U_NAMESPACE_BEGIN
static UMutex gBreakEngineMutex = U_MUTEX_INITIALIZER;
const LanguageBreakEngine *
-ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) {
+ICULanguageBreakFactory::getEngineFor(UChar32 c) {
const LanguageBreakEngine *lbe = NULL;
UErrorCode status = U_ZERO_ERROR;
@@ -156,14 +145,14 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) {
int32_t i = fEngines->size();
while (--i >= 0) {
lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
- if (lbe != NULL && lbe->handles(c, breakType)) {
+ if (lbe != NULL && lbe->handles(c)) {
return lbe;
}
}
}
// We didn't find an engine. Create one.
- lbe = loadEngineFor(c, breakType);
+ lbe = loadEngineFor(c);
if (lbe != NULL) {
fEngines->push((void *)lbe, status);
}
@@ -171,11 +160,11 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) {
}
const LanguageBreakEngine *
-ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) {
+ICULanguageBreakFactory::loadEngineFor(UChar32 c) {
UErrorCode status = U_ZERO_ERROR;
UScriptCode code = uscript_getScript(c, &status);
if (U_SUCCESS(status)) {
- DictionaryMatcher *m = loadDictionaryMatcherFor(code, breakType);
+ DictionaryMatcher *m = loadDictionaryMatcherFor(code);
if (m != NULL) {
const LanguageBreakEngine *engine = NULL;
switch(code) {
@@ -236,7 +225,7 @@ ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) {
}
DictionaryMatcher *
-ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script, int32_t /* brkType */) {
+ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script) {
UErrorCode status = U_ZERO_ERROR;
// open root from brkitr tree.
UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status);
diff --git a/deps/icu-small/source/common/brkeng.h b/deps/icu-small/source/common/brkeng.h
index 5c61d2ed5d..e40fce13f6 100644
--- a/deps/icu-small/source/common/brkeng.h
+++ b/deps/icu-small/source/common/brkeng.h
@@ -54,11 +54,10 @@ class LanguageBreakEngine : public UMemory {
* a particular kind of break.</p>
*
* @param c A character which begins a run that the engine might handle
- * @param breakType The type of text break which the caller wants to determine
* @return TRUE if this engine handles the particular character and break
* type.
*/
- virtual UBool handles(UChar32 c, int32_t breakType) const = 0;
+ virtual UBool handles(UChar32 c) const = 0;
/**
* <p>Find any breaks within a run in the supplied text.</p>
@@ -68,14 +67,12 @@ class LanguageBreakEngine : public UMemory {
* is capable of handling.
* @param startPos The start of the run within the supplied text.
* @param endPos The end of the run within the supplied text.
- * @param breakType The type of break desired, or -1.
* @param foundBreaks A Vector of int32_t to receive the breaks.
* @return The number of breaks found.
*/
virtual int32_t findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
- int32_t breakType,
UVector32 &foundBreaks ) const = 0;
};
@@ -125,11 +122,9 @@ class LanguageBreakFactory : public UMemory {
*
* @param c A character that begins a run for which a LanguageBreakEngine is
* sought.
- * @param breakType The kind of text break for which a LanguageBreakEngine is
- * sought.
* @return A LanguageBreakEngine with the desired characteristics, or 0.
*/
- virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType) = 0;
+ virtual const LanguageBreakEngine *getEngineFor(UChar32 c) = 0;
};
@@ -152,11 +147,11 @@ class UnhandledEngine : public LanguageBreakEngine {
private:
/**
- * The sets of characters handled, for each break type
+ * The sets of characters handled.
* @internal
*/
- UnicodeSet *fHandled[4];
+ UnicodeSet *fHandled;
public:
@@ -176,11 +171,10 @@ class UnhandledEngine : public LanguageBreakEngine {
* a particular kind of break.</p>
*
* @param c A character which begins a run that the engine might handle
- * @param breakType The type of text break which the caller wants to determine
* @return TRUE if this engine handles the particular character and break
* type.
*/
- virtual UBool handles(UChar32 c, int32_t breakType) const;
+ virtual UBool handles(UChar32 c) const;
/**
* <p>Find any breaks within a run in the supplied text.</p>
@@ -190,23 +184,20 @@ class UnhandledEngine : public LanguageBreakEngine {
* is capable of handling.
* @param startPos The start of the run within the supplied text.
* @param endPos The end of the run within the supplied text.
- * @param breakType The type of break desired, or -1.
* @param foundBreaks An allocated C array of the breaks found, if any
* @return The number of breaks found.
*/
virtual int32_t findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
- int32_t breakType,
UVector32 &foundBreaks ) const;
/**
* <p>Tell the engine to handle a particular character and break type.</p>
*
* @param c A character which the engine should handle
- * @param breakType The type of text break for which the engine should handle c
*/
- virtual void handleCharacter(UChar32 c, int32_t breakType);
+ virtual void handleCharacter(UChar32 c);
};
@@ -250,11 +241,9 @@ class ICULanguageBreakFactory : public LanguageBreakFactory {
*
* @param c A character that begins a run for which a LanguageBreakEngine is
* sought.
- * @param breakType The kind of text break for which a LanguageBreakEngine is
- * sought.
* @return A LanguageBreakEngine with the desired characteristics, or 0.
*/
- virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType);
+ virtual const LanguageBreakEngine *getEngineFor(UChar32 c);
protected:
/**
@@ -263,21 +252,17 @@ protected:
*
* @param c A character that begins a run for which a LanguageBreakEngine is
* sought.
- * @param breakType The kind of text break for which a LanguageBreakEngine is
- * sought.
* @return A LanguageBreakEngine with the desired characteristics, or 0.
*/
- virtual const LanguageBreakEngine *loadEngineFor(UChar32 c, int32_t breakType);
+ virtual const LanguageBreakEngine *loadEngineFor(UChar32 c);
/**
* <p>Create a DictionaryMatcher for the specified script and break type.</p>
* @param script An ISO 15924 script code that identifies the dictionary to be
* created.
- * @param breakType The kind of text break for which a dictionary is
- * sought.
* @return A DictionaryMatcher with the desired characteristics, or NULL.
*/
- virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script, int32_t breakType);
+ virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script);
};
U_NAMESPACE_END
diff --git a/deps/icu-small/source/common/brkiter.cpp b/deps/icu-small/source/common/brkiter.cpp
index a509ff10c9..23e0cc3c15 100644
--- a/deps/icu-small/source/common/brkiter.cpp
+++ b/deps/icu-small/source/common/brkiter.cpp
@@ -52,7 +52,7 @@ U_NAMESPACE_BEGIN
// -------------------------------------
BreakIterator*
-BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode &status)
+BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &status)
{
char fnbuff[256];
char ext[4]={'\0'};
@@ -121,7 +121,6 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind,
U_LOCALE_BASED(locBased, *(BreakIterator*)result);
locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status),
actualLocale.data());
- result->setBreakType(kind);
}
ures_close(b);
@@ -413,10 +412,10 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
BreakIterator *result = NULL;
switch (kind) {
case UBRK_CHARACTER:
- result = BreakIterator::buildInstance(loc, "grapheme", kind, status);
+ result = BreakIterator::buildInstance(loc, "grapheme", status);
break;
case UBRK_WORD:
- result = BreakIterator::buildInstance(loc, "word", kind, status);
+ result = BreakIterator::buildInstance(loc, "word", status);
break;
case UBRK_LINE:
uprv_strcpy(lbType, "line");
@@ -429,10 +428,10 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
uprv_strcat(lbType, lbKeyValue);
}
}
- result = BreakIterator::buildInstance(loc, lbType, kind, status);
+ result = BreakIterator::buildInstance(loc, lbType, status);
break;
case UBRK_SENTENCE:
- result = BreakIterator::buildInstance(loc, "sentence", kind, status);
+ result = BreakIterator::buildInstance(loc, "sentence", status);
#if !UCONFIG_NO_FILTERED_BREAK_ITERATION
{
char ssKeyValue[kKeyValueLenMax] = {0};
@@ -449,7 +448,7 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
#endif
break;
case UBRK_TITLE:
- result = BreakIterator::buildInstance(loc, "title", kind, status);
+ result = BreakIterator::buildInstance(loc, "title", status);
break;
default:
status = U_ILLEGAL_ARGUMENT_ERROR;
diff --git a/deps/icu-small/source/common/bytesinkutil.cpp b/deps/icu-small/source/common/bytesinkutil.cpp
index bf1a2d45f8..6af7ddfd59 100644
--- a/deps/icu-small/source/common/bytesinkutil.cpp
+++ b/deps/icu-small/source/common/bytesinkutil.cpp
@@ -92,20 +92,16 @@ ByteSinkUtil::appendTwoBytes(UChar32 c, ByteSink &sink) {
sink.Append(s8, 2);
}
-UBool
-ByteSinkUtil::appendUnchanged(const uint8_t *s, int32_t length,
- ByteSink &sink, uint32_t options, Edits *edits,
- UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) { return FALSE; }
- if (length > 0) {
- if (edits != nullptr) {
- edits->addUnchanged(length);
- }
- if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
- sink.Append(reinterpret_cast<const char *>(s), length);
- }
+void
+ByteSinkUtil::appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
+ ByteSink &sink, uint32_t options, Edits *edits) {
+ U_ASSERT(length > 0);
+ if (edits != nullptr) {
+ edits->addUnchanged(length);
+ }
+ if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
+ sink.Append(reinterpret_cast<const char *>(s), length);
}
- return TRUE;
}
UBool
@@ -117,7 +113,11 @@ ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit,
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return FALSE;
}
- return appendUnchanged(s, (int32_t)(limit - s), sink, options, edits, errorCode);
+ int32_t length = (int32_t)(limit - s);
+ if (length > 0) {
+ appendNonEmptyUnchanged(s, length, sink, options, edits);
+ }
+ return TRUE;
}
U_NAMESPACE_END
diff --git a/deps/icu-small/source/common/bytesinkutil.h b/deps/icu-small/source/common/bytesinkutil.h
index 004b49c4ce..8287ffea4c 100644
--- a/deps/icu-small/source/common/bytesinkutil.h
+++ b/deps/icu-small/source/common/bytesinkutil.h
@@ -43,11 +43,19 @@ public:
static UBool appendUnchanged(const uint8_t *s, int32_t length,
ByteSink &sink, uint32_t options, Edits *edits,
- UErrorCode &errorCode);
+ UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return FALSE; }
+ if (length > 0) { appendNonEmptyUnchanged(s, length, sink, options, edits); }
+ return TRUE;
+ }
static UBool appendUnchanged(const uint8_t *s, const uint8_t *limit,
ByteSink &sink, uint32_t options, Edits *edits,
UErrorCode &errorCode);
+
+private:
+ static void appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
+ ByteSink &sink, uint32_t options, Edits *edits);
};
U_NAMESPACE_END
diff --git a/deps/icu-small/source/common/cmemory.cpp b/deps/icu-small/source/common/cmemory.cpp
index 300279c243..0b7e432c4d 100644
--- a/deps/icu-small/source/common/cmemory.cpp
+++ b/deps/icu-small/source/common/cmemory.cpp
@@ -41,30 +41,6 @@ static int n=0;
static long b=0;
#endif
-#if U_DEBUG
-
-static char gValidMemorySink = 0;
-
-U_CAPI void uprv_checkValidMemory(const void *p, size_t n) {
- /*
- * Access the memory to ensure that it's all valid.
- * Load and save a computed value to try to ensure that the compiler
- * does not throw away the whole loop.
- * A thread analyzer might complain about un-mutexed access to gValidMemorySink
- * which is true but harmless because no one ever uses the value in gValidMemorySink.
- */
- const char *s = (const char *)p;
- char c = gValidMemorySink;
- size_t i;
- U_ASSERT(p != NULL);
- for(i = 0; i < n; ++i) {
- c ^= s[i];
- }
- gValidMemorySink = c;
-}
-
-#endif /* U_DEBUG */
-
U_CAPI void * U_EXPORT2
uprv_malloc(size_t s) {
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
diff --git a/deps/icu-small/source/common/cmemory.h b/deps/icu-small/source/common/cmemory.h
index 83a0129651..a44f9a1902 100644
--- a/deps/icu-small/source/common/cmemory.h
+++ b/deps/icu-small/source/common/cmemory.h
@@ -36,31 +36,10 @@
#include <stdio.h>
#endif
-#if U_DEBUG
-
-/*
- * The C++ standard requires that the source pointer for memcpy() & memmove()
- * is valid, not NULL, and not at the end of an allocated memory block.
- * In debug mode, we read one byte from the source point to verify that it's
- * a valid, readable pointer.
- */
-
-U_CAPI void uprv_checkValidMemory(const void *p, size_t n);
-
-#define uprv_memcpy(dst, src, size) ( \
- uprv_checkValidMemory(src, 1), \
- U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size))
-#define uprv_memmove(dst, src, size) ( \
- uprv_checkValidMemory(src, 1), \
- U_STANDARD_CPP_NAMESPACE memmove(dst, src, size))
-
-#else
#define uprv_memcpy(dst, src, size) U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size)
#define uprv_memmove(dst, src, size) U_STANDARD_CPP_NAMESPACE memmove(dst, src, size)
-#endif /* U_DEBUG */
-
/**
* \def UPRV_LENGTHOF
* Convenience macro to determine the length of a fixed array at compile-time.
diff --git a/deps/icu-small/source/common/cstring.h b/deps/icu-small/source/common/cstring.h
index 2232efcda5..ed0b1a7c8b 100644
--- a/deps/icu-small/source/common/cstring.h
+++ b/deps/icu-small/source/common/cstring.h
@@ -40,28 +40,10 @@
#define uprv_strchr(s, c) U_STANDARD_CPP_NAMESPACE strchr(s, c)
#define uprv_strstr(s, c) U_STANDARD_CPP_NAMESPACE strstr(s, c)
#define uprv_strrchr(s, c) U_STANDARD_CPP_NAMESPACE strrchr(s, c)
-
-#if U_DEBUG
-
-#define uprv_strncpy(dst, src, size) ( \
- uprv_checkValidMemory(src, 1), \
- U_STANDARD_CPP_NAMESPACE strncpy(dst, src, size))
-#define uprv_strncmp(s1, s2, n) ( \
- uprv_checkValidMemory(s1, 1), \
- uprv_checkValidMemory(s2, 1), \
- U_STANDARD_CPP_NAMESPACE strncmp(s1, s2, n))
-#define uprv_strncat(dst, src, n) ( \
- uprv_checkValidMemory(src, 1), \
- U_STANDARD_CPP_NAMESPACE strncat(dst, src, n))
-
-#else
-
#define uprv_strncpy(dst, src, size) U_STANDARD_CPP_NAMESPACE strncpy(dst, src, size)
#define uprv_strncmp(s1, s2, n) U_STANDARD_CPP_NAMESPACE strncmp(s1, s2, n)
#define uprv_strncat(dst, src, n) U_STANDARD_CPP_NAMESPACE strncat(dst, src, n)
-#endif /* U_DEBUG */
-
/**
* Is c an ASCII-repertoire letter a-z or A-Z?
* Note: The implementation is specific to whether ICU is compiled for
diff --git a/deps/icu-small/source/common/dictbe.cpp b/deps/icu-small/source/common/dictbe.cpp
index 02fc8a4726..419d062ef2 100644
--- a/deps/icu-small/source/common/dictbe.cpp
+++ b/deps/icu-small/source/common/dictbe.cpp
@@ -29,24 +29,21 @@ U_NAMESPACE_BEGIN
******************************************************************
*/
-DictionaryBreakEngine::DictionaryBreakEngine(uint32_t breakTypes) {
- fTypes = breakTypes;
+DictionaryBreakEngine::DictionaryBreakEngine() {
}
DictionaryBreakEngine::~DictionaryBreakEngine() {
}
UBool
-DictionaryBreakEngine::handles(UChar32 c, int32_t breakType) const {
- return (breakType >= 0 && breakType < 32 && (((uint32_t)1 << breakType) & fTypes)
- && fSet.contains(c));
+DictionaryBreakEngine::handles(UChar32 c) const {
+ return fSet.contains(c);
}
int32_t
DictionaryBreakEngine::findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
- int32_t breakType,
UVector32 &foundBreaks ) const {
(void)startPos; // TODO: remove this param?
int32_t result = 0;
@@ -66,10 +63,8 @@ DictionaryBreakEngine::findBreaks( UText *text,
}
rangeStart = start;
rangeEnd = current;
- if (breakType >= 0 && breakType < 32 && (((uint32_t)1 << breakType) & fTypes)) {
- result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks);
- utext_setNativeIndex(text, current);
- }
+ result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks);
+ utext_setNativeIndex(text, current);
return result;
}
@@ -194,7 +189,7 @@ static const int32_t THAI_MIN_WORD = 2;
static const int32_t THAI_MIN_WORD_SPAN = THAI_MIN_WORD * 2;
ThaiBreakEngine::ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status)
- : DictionaryBreakEngine((1<<UBRK_WORD) | (1<<UBRK_LINE)),
+ : DictionaryBreakEngine(),
fDictionary(adoptDictionary)
{
fThaiWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]]"), status);
@@ -436,7 +431,7 @@ static const int32_t LAO_MIN_WORD = 2;
static const int32_t LAO_MIN_WORD_SPAN = LAO_MIN_WORD * 2;
LaoBreakEngine::LaoBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status)
- : DictionaryBreakEngine((1<<UBRK_WORD) | (1<<UBRK_LINE)),
+ : DictionaryBreakEngine(),
fDictionary(adoptDictionary)
{
fLaoWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Laoo:]&[:LineBreak=SA:]]"), status);
@@ -632,7 +627,7 @@ static const int32_t BURMESE_MIN_WORD = 2;
static const int32_t BURMESE_MIN_WORD_SPAN = BURMESE_MIN_WORD * 2;
BurmeseBreakEngine::BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status)
- : DictionaryBreakEngine((1<<UBRK_WORD) | (1<<UBRK_LINE)),
+ : DictionaryBreakEngine(),
fDictionary(adoptDictionary)
{
fBurmeseWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Mymr:]&[:LineBreak=SA:]]"), status);
@@ -825,7 +820,7 @@ static const int32_t KHMER_MIN_WORD = 2;
static const int32_t KHMER_MIN_WORD_SPAN = KHMER_MIN_WORD * 2;
KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status)
- : DictionaryBreakEngine((1 << UBRK_WORD) | (1 << UBRK_LINE)),
+ : DictionaryBreakEngine(),
fDictionary(adoptDictionary)
{
fKhmerWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]]"), status);
@@ -1047,7 +1042,7 @@ foundBest:
*/
static const uint32_t kuint32max = 0xFFFFFFFF;
CjkBreakEngine::CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType type, UErrorCode &status)
-: DictionaryBreakEngine(1 << UBRK_WORD), fDictionary(adoptDictionary) {
+: DictionaryBreakEngine(), fDictionary(adoptDictionary) {
// Korean dictionary only includes Hangul syllables
fHangulWordSet.applyPattern(UNICODE_STRING_SIMPLE("[\\uac00-\\ud7a3]"), status);
fHanWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Han:]"), status);
@@ -1324,8 +1319,8 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
}
if (katakanaRunLength < kMaxKatakanaGroupLength) {
uint32_t newSnlp = bestSnlp.elementAti(i) + getKatakanaCost(katakanaRunLength);
- if (newSnlp < (uint32_t)bestSnlp.elementAti(j)) {
- bestSnlp.setElementAt(newSnlp, j);
+ if (newSnlp < (uint32_t)bestSnlp.elementAti(i+katakanaRunLength)) {
+ bestSnlp.setElementAt(newSnlp, i+katakanaRunLength);
prev.setElementAt(i, i+katakanaRunLength); // prev[j] = i;
}
}
diff --git a/deps/icu-small/source/common/dictbe.h b/deps/icu-small/source/common/dictbe.h
index ffc1ae9f26..99d176cc2e 100644
--- a/deps/icu-small/source/common/dictbe.h
+++ b/deps/icu-small/source/common/dictbe.h
@@ -42,27 +42,12 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
UnicodeSet fSet;
- /**
- * The set of break types handled by this engine
- * @internal
- */
-
- uint32_t fTypes;
-
- /**
- * <p>Default constructor.</p>
- *
- */
- DictionaryBreakEngine();
-
public:
/**
- * <p>Constructor setting the break types handled.</p>
- *
- * @param breakTypes A bitmap of types handled by the engine.
+ * <p>Constructor </p>
*/
- DictionaryBreakEngine( uint32_t breakTypes );
+ DictionaryBreakEngine();
/**
* <p>Virtual destructor.</p>
@@ -74,11 +59,10 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
* a particular kind of break.</p>
*
* @param c A character which begins a run that the engine might handle
- * @param breakType The type of text break which the caller wants to determine
* @return TRUE if this engine handles the particular character and break
* type.
*/
- virtual UBool handles( UChar32 c, int32_t breakType ) const;
+ virtual UBool handles(UChar32 c) const;
/**
* <p>Find any breaks within a run in the supplied text.</p>
@@ -88,14 +72,12 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
* that starts from the first character in the range.
* @param startPos The start of the run within the supplied text.
* @param endPos The end of the run within the supplied text.
- * @param breakType The type of break desired, or -1.
* @param foundBreaks vector of int32_t to receive the break positions
* @return The number of breaks found.
*/
virtual int32_t findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
- int32_t breakType,
UVector32 &foundBreaks ) const;
protected:
@@ -108,13 +90,6 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
virtual void setCharacters( const UnicodeSet &set );
/**
- * <p>Set the break types handled by this engine.</p>
- *
- * @param breakTypes A bitmap of types handled by the engine.
- */
-// virtual void setBreakTypes( uint32_t breakTypes );
-
- /**
* <p>Divide up a range of known dictionary characters handled by this break engine.</p>
*
* @param text A UText representing the text
diff --git a/deps/icu-small/source/common/filteredbrk.cpp b/deps/icu-small/source/common/filteredbrk.cpp
index 6a38b1bf3b..162b38de5d 100644
--- a/deps/icu-small/source/common/filteredbrk.cpp
+++ b/deps/icu-small/source/common/filteredbrk.cpp
@@ -694,6 +694,11 @@ FilteredBreakIteratorBuilder::createInstance(const Locale& where, UErrorCode& st
}
FilteredBreakIteratorBuilder *
+FilteredBreakIteratorBuilder::createInstance(UErrorCode &status) {
+ return createEmptyInstance(status);
+}
+
+FilteredBreakIteratorBuilder *
FilteredBreakIteratorBuilder::createEmptyInstance(UErrorCode& status) {
if(U_FAILURE(status)) return NULL;
LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(status), status);
diff --git a/deps/icu-small/source/common/rbbi.cpp b/deps/icu-small/source/common/rbbi.cpp
index 54b289e24d..69f92d94c6 100644
--- a/deps/icu-small/source/common/rbbi.cpp
+++ b/deps/icu-small/source/common/rbbi.cpp
@@ -64,7 +64,9 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedBreakIterator)
* Constructs a RuleBasedBreakIterator that uses the already-created
* tables object that is passed in as a parameter.
*/
-RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status) {
+RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status)
+ : fSCharIter(UnicodeString())
+{
init(status);
fData = new RBBIDataWrapper(data, status); // status checked in constructor
if (U_FAILURE(status)) {return;}
@@ -80,7 +82,9 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode
//
RuleBasedBreakIterator::RuleBasedBreakIterator(const uint8_t *compiledRules,
uint32_t ruleLength,
- UErrorCode &status) {
+ UErrorCode &status)
+ : fSCharIter(UnicodeString())
+{
init(status);
if (U_FAILURE(status)) {
return;
@@ -110,6 +114,7 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(const uint8_t *compiledRules,
//
//-------------------------------------------------------------------------------
RuleBasedBreakIterator::RuleBasedBreakIterator(UDataMemory* udm, UErrorCode &status)
+ : fSCharIter(UnicodeString())
{
init(status);
fData = new RBBIDataWrapper(udm, status); // status checked in constructor
@@ -130,6 +135,7 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(UDataMemory* udm, UErrorCode &sta
RuleBasedBreakIterator::RuleBasedBreakIterator( const UnicodeString &rules,
UParseError &parseError,
UErrorCode &status)
+ : fSCharIter(UnicodeString())
{
init(status);
if (U_FAILURE(status)) {return;}
@@ -152,7 +158,9 @@ RuleBasedBreakIterator::RuleBasedBreakIterator( const UnicodeString &rules,
// Used when creating a RuleBasedBreakIterator from a set
// of rules.
//-------------------------------------------------------------------------------
-RuleBasedBreakIterator::RuleBasedBreakIterator() {
+RuleBasedBreakIterator::RuleBasedBreakIterator()
+ : fSCharIter(UnicodeString())
+{
UErrorCode status = U_ZERO_ERROR;
init(status);
}
@@ -165,7 +173,8 @@ RuleBasedBreakIterator::RuleBasedBreakIterator() {
//
//-------------------------------------------------------------------------------
RuleBasedBreakIterator::RuleBasedBreakIterator(const RuleBasedBreakIterator& other)
-: BreakIterator(other)
+: BreakIterator(other),
+ fSCharIter(UnicodeString())
{
UErrorCode status = U_ZERO_ERROR;
this->init(status);
@@ -177,17 +186,13 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(const RuleBasedBreakIterator& oth
* Destructor
*/
RuleBasedBreakIterator::~RuleBasedBreakIterator() {
- if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) {
+ if (fCharIter != &fSCharIter) {
// fCharIter was adopted from the outside.
delete fCharIter;
}
fCharIter = NULL;
- delete fSCharIter;
- fSCharIter = NULL;
- delete fDCharIter;
- fDCharIter = NULL;
- utext_close(fText);
+ utext_close(&fText);
if (fData != NULL) {
fData->removeReference();
@@ -217,26 +222,29 @@ RuleBasedBreakIterator::operator=(const RuleBasedBreakIterator& that) {
}
BreakIterator::operator=(that);
- fBreakType = that.fBreakType;
if (fLanguageBreakEngines != NULL) {
delete fLanguageBreakEngines;
fLanguageBreakEngines = NULL; // Just rebuild for now
}
// TODO: clone fLanguageBreakEngines from "that"
UErrorCode status = U_ZERO_ERROR;
- fText = utext_clone(fText, that.fText, FALSE, TRUE, &status);
+ utext_clone(&fText, &that.fText, FALSE, TRUE, &status);
- if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) {
+ if (fCharIter != &fSCharIter) {
delete fCharIter;
}
- fCharIter = NULL;
+ fCharIter = &fSCharIter;
- if (that.fCharIter != NULL ) {
+ if (that.fCharIter != NULL && that.fCharIter != &that.fSCharIter) {
// This is a little bit tricky - it will intially appear that
// this->fCharIter is adopted, even if that->fCharIter was
// not adopted. That's ok.
fCharIter = that.fCharIter->clone();
}
+ fSCharIter = that.fSCharIter;
+ if (fCharIter == NULL) {
+ fCharIter = &fSCharIter;
+ }
if (fData != NULL) {
fData->removeReference();
@@ -269,33 +277,30 @@ RuleBasedBreakIterator::operator=(const RuleBasedBreakIterator& that) {
//
//-----------------------------------------------------------------------------
void RuleBasedBreakIterator::init(UErrorCode &status) {
- fText = NULL;
fCharIter = NULL;
- fSCharIter = NULL;
- fDCharIter = NULL;
fData = NULL;
fPosition = 0;
fRuleStatusIndex = 0;
fDone = false;
fDictionaryCharCount = 0;
- fBreakType = UBRK_WORD; // Defaulting BreakType to word gives reasonable
- // dictionary behavior for Break Iterators that are
- // built from rules. Even better would be the ability to
- // declare the type in the rules.
-
fLanguageBreakEngines = NULL;
fUnhandledBreakEngine = NULL;
fBreakCache = NULL;
fDictionaryCache = NULL;
- if (U_FAILURE(status)) {
+ // Note: IBM xlC is unable to assign or initialize member fText from UTEXT_INITIALIZER.
+ // fText = UTEXT_INITIALIZER;
+ static const UText initializedUText = UTEXT_INITIALIZER;
+ uprv_memcpy(&fText, &initializedUText, sizeof(UText));
+
+ if (U_FAILURE(status)) {
return;
}
- fText = utext_openUChars(NULL, NULL, 0, &status);
+ utext_openUChars(&fText, NULL, 0, &status);
fDictionaryCache = new DictionaryCache(this, status);
fBreakCache = new BreakCache(this, status);
- if (U_SUCCESS(status) && (fText == NULL || fDictionaryCache == NULL || fBreakCache == NULL)) {
+ if (U_SUCCESS(status) && (fDictionaryCache == NULL || fBreakCache == NULL)) {
status = U_MEMORY_ALLOCATION_ERROR;
}
@@ -344,7 +349,7 @@ RuleBasedBreakIterator::operator==(const BreakIterator& that) const {
const RuleBasedBreakIterator& that2 = (const RuleBasedBreakIterator&) that;
- if (!utext_equals(fText, that2.fText)) {
+ if (!utext_equals(&fText, &that2.fText)) {
// The two break iterators are operating on different text,
// or have a different iteration position.
// Note that fText's position is always the same as the break iterator's position.
@@ -385,7 +390,7 @@ void RuleBasedBreakIterator::setText(UText *ut, UErrorCode &status) {
}
fBreakCache->reset();
fDictionaryCache->reset();
- fText = utext_clone(fText, ut, FALSE, TRUE, &status);
+ utext_clone(&fText, ut, FALSE, TRUE, &status);
// Set up a dummy CharacterIterator to be returned if anyone
// calls getText(). With input from UText, there is no reasonable
@@ -393,27 +398,20 @@ void RuleBasedBreakIterator::setText(UText *ut, UErrorCode &status) {
// Return one over an empty string instead - this is the closest
// we can come to signaling a failure.
// (GetText() is obsolete, this failure is sort of OK)
- if (fDCharIter == NULL) {
- static const UChar c = 0;
- fDCharIter = new UCharCharacterIterator(&c, 0);
- if (fDCharIter == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- }
+ fSCharIter.setText(UnicodeString());
- if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) {
+ if (fCharIter != &fSCharIter) {
// existing fCharIter was adopted from the outside. Delete it now.
delete fCharIter;
}
- fCharIter = fDCharIter;
+ fCharIter = &fSCharIter;
this->first();
}
UText *RuleBasedBreakIterator::getUText(UText *fillIn, UErrorCode &status) const {
- UText *result = utext_clone(fillIn, fText, FALSE, TRUE, &status);
+ UText *result = utext_clone(fillIn, &fText, FALSE, TRUE, &status);
return result;
}
@@ -439,7 +437,7 @@ void
RuleBasedBreakIterator::adoptText(CharacterIterator* newText) {
// If we are holding a CharacterIterator adopted from a
// previous call to this function, delete it now.
- if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) {
+ if (fCharIter != &fSCharIter) {
delete fCharIter;
}
@@ -450,9 +448,9 @@ RuleBasedBreakIterator::adoptText(CharacterIterator* newText) {
if (newText==NULL || newText->startIndex() != 0) {
// startIndex !=0 wants to be an error, but there's no way to report it.
// Make the iterator text be an empty string.
- fText = utext_openUChars(fText, NULL, 0, &status);
+ utext_openUChars(&fText, NULL, 0, &status);
} else {
- fText = utext_openCharacterIterator(fText, newText, &status);
+ utext_openCharacterIterator(&fText, newText, &status);
}
this->first();
}
@@ -467,23 +465,19 @@ RuleBasedBreakIterator::setText(const UnicodeString& newText) {
UErrorCode status = U_ZERO_ERROR;
fBreakCache->reset();
fDictionaryCache->reset();
- fText = utext_openConstUnicodeString(fText, &newText, &status);
+ utext_openConstUnicodeString(&fText, &newText, &status);
// Set up a character iterator on the string.
// Needed in case someone calls getText().
// Can not, unfortunately, do this lazily on the (probably never)
// call to getText(), because getText is const.
- if (fSCharIter == NULL) {
- fSCharIter = new StringCharacterIterator(newText);
- } else {
- fSCharIter->setText(newText);
- }
+ fSCharIter.setText(newText);
- if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) {
+ if (fCharIter != &fSCharIter) {
// old fCharIter was adopted from the outside. Delete it.
delete fCharIter;
}
- fCharIter = fSCharIter;
+ fCharIter = &fSCharIter;
this->first();
}
@@ -503,14 +497,14 @@ RuleBasedBreakIterator &RuleBasedBreakIterator::refreshInputText(UText *input, U
status = U_ILLEGAL_ARGUMENT_ERROR;
return *this;
}
- int64_t pos = utext_getNativeIndex(fText);
+ int64_t pos = utext_getNativeIndex(&fText);
// Shallow read-only clone of the new UText into the existing input UText
- fText = utext_clone(fText, input, FALSE, TRUE, &status);
+ utext_clone(&fText, input, FALSE, TRUE, &status);
if (U_FAILURE(status)) {
return *this;
}
- utext_setNativeIndex(fText, pos);
- if (utext_getNativeIndex(fText) != pos) {
+ utext_setNativeIndex(&fText, pos);
+ if (utext_getNativeIndex(&fText) != pos) {
// Sanity check. The new input utext is supposed to have the exact same
// contents as the old. If we can't set to the same position, it doesn't.
// The contents underlying the old utext might be invalid at this point,
@@ -540,7 +534,7 @@ int32_t RuleBasedBreakIterator::first(void) {
* @return The text's past-the-end offset.
*/
int32_t RuleBasedBreakIterator::last(void) {
- int32_t endPos = (int32_t)utext_nativeLength(fText);
+ int32_t endPos = (int32_t)utext_nativeLength(&fText);
UBool endShouldBeBoundary = isBoundary(endPos); // Has side effect of setting iterator position.
(void)endShouldBeBoundary;
U_ASSERT(endShouldBeBoundary);
@@ -611,8 +605,8 @@ int32_t RuleBasedBreakIterator::following(int32_t startPos) {
// Move requested offset to a code point start. It might be on a trail surrogate,
// or on a trail byte if the input is UTF-8. Or it may be beyond the end of the text.
- utext_setNativeIndex(fText, startPos);
- startPos = (int32_t)utext_getNativeIndex(fText);
+ utext_setNativeIndex(&fText, startPos);
+ startPos = (int32_t)utext_getNativeIndex(&fText);
UErrorCode status = U_ZERO_ERROR;
fBreakCache->following(startPos, status);
@@ -626,15 +620,15 @@ int32_t RuleBasedBreakIterator::following(int32_t startPos) {
* @return The position of the last boundary before the starting position.
*/
int32_t RuleBasedBreakIterator::preceding(int32_t offset) {
- if (fText == NULL || offset > utext_nativeLength(fText)) {
+ if (offset > utext_nativeLength(&fText)) {
return last();
}
// Move requested offset to a code point start. It might be on a trail surrogate,
// or on a trail byte if the input is UTF-8.
- utext_setNativeIndex(fText, offset);
- int32_t adjustedOffset = utext_getNativeIndex(fText);
+ utext_setNativeIndex(&fText, offset);
+ int32_t adjustedOffset = utext_getNativeIndex(&fText);
UErrorCode status = U_ZERO_ERROR;
fBreakCache->preceding(adjustedOffset, status);
@@ -660,8 +654,8 @@ UBool RuleBasedBreakIterator::isBoundary(int32_t offset) {
// Note that isBoundary() is always be false for offsets that are not on code point boundaries.
// But we still need the side effect of leaving iteration at the following boundary.
- utext_setNativeIndex(fText, offset);
- int32_t adjustedOffset = utext_getNativeIndex(fText);
+ utext_setNativeIndex(&fText, offset);
+ int32_t adjustedOffset = utext_getNativeIndex(&fText);
bool result = false;
UErrorCode status = U_ZERO_ERROR;
@@ -669,7 +663,7 @@ UBool RuleBasedBreakIterator::isBoundary(int32_t offset) {
result = (fBreakCache->current() == offset);
}
- if (result && adjustedOffset < offset && utext_char32At(fText, offset) == U_SENTINEL) {
+ if (result && adjustedOffset < offset && utext_char32At(&fText, offset) == U_SENTINEL) {
// Original offset is beyond the end of the text. Return FALSE, it's not a boundary,
// but the iteration position remains set to the end of the text, which is a boundary.
return FALSE;
@@ -789,9 +783,9 @@ int32_t RuleBasedBreakIterator::handleNext() {
// if we're already at the end of the text, return DONE.
initialPosition = fPosition;
- UTEXT_SETNATIVEINDEX(fText, initialPosition);
+ UTEXT_SETNATIVEINDEX(&fText, initialPosition);
result = initialPosition;
- c = UTEXT_NEXT32(fText);
+ c = UTEXT_NEXT32(&fText);
if (c==U_SENTINEL) {
fDone = TRUE;
return UBRK_DONE;
@@ -854,7 +848,7 @@ int32_t RuleBasedBreakIterator::handleNext() {
#ifdef RBBI_DEBUG
if (gTrace) {
- RBBIDebugPrintf(" %4ld ", utext_getNativeIndex(fText));
+ RBBIDebugPrintf(" %4ld ", utext_getNativeIndex(&fText));
if (0x20<=c && c<0x7f) {
RBBIDebugPrintf("\"%c\" ", c);
} else {
@@ -867,9 +861,7 @@ int32_t RuleBasedBreakIterator::handleNext() {
// State Transition - move machine to its next state
//
- // Note: fNextState is defined as uint16_t[2], but we are casting
- // a generated RBBI table to RBBIStateTableRow and some tables
- // actually have more than 2 categories.
+ // fNextState is a variable-length array.
U_ASSERT(category<fData->fHeader->fCatCount);
state = row->fNextState[category]; /*Not accessing beyond memory*/
row = (RBBIStateTableRow *)
@@ -880,7 +872,7 @@ int32_t RuleBasedBreakIterator::handleNext() {
if (row->fAccepting == -1) {
// Match found, common case.
if (mode != RBBI_START) {
- result = (int32_t)UTEXT_GETNATIVEINDEX(fText);
+ result = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
}
fRuleStatusIndex = row->fTagIdx; // Remember the break status (tag) values.
}
@@ -898,7 +890,7 @@ int32_t RuleBasedBreakIterator::handleNext() {
int16_t rule = row->fLookAhead;
if (rule != 0) {
// At the position of a '/' in a look-ahead match. Record it.
- int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(fText);
+ int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
lookAheadMatches.setPosition(rule, pos);
}
@@ -914,7 +906,7 @@ int32_t RuleBasedBreakIterator::handleNext() {
// the input position. The next iteration will be processing the
// first real input character.
if (mode == RBBI_RUN) {
- c = UTEXT_NEXT32(fText);
+ c = UTEXT_NEXT32(&fText);
} else {
if (mode == RBBI_START) {
mode = RBBI_RUN;
@@ -928,9 +920,9 @@ int32_t RuleBasedBreakIterator::handleNext() {
// (This really indicates a defect in the break rules. They should always match
// at least one character.)
if (result == initialPosition) {
- utext_setNativeIndex(fText, initialPosition);
- utext_next32(fText);
- result = (int32_t)utext_getNativeIndex(fText);
+ utext_setNativeIndex(&fText, initialPosition);
+ utext_next32(&fText);
+ result = (int32_t)utext_getNativeIndex(&fText);
fRuleStatusIndex = 0;
}
@@ -965,7 +957,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) {
int32_t initialPosition = 0;
const RBBIStateTable *stateTable = fData->fSafeRevTable;
- UTEXT_SETNATIVEINDEX(fText, fromPosition);
+ UTEXT_SETNATIVEINDEX(&fText, fromPosition);
#ifdef RBBI_DEBUG
if (gTrace) {
RBBIDebugPuts("Handle Previous pos char state category");
@@ -973,14 +965,14 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) {
#endif
// if we're already at the start of the text, return DONE.
- if (fText == NULL || fData == NULL || UTEXT_GETNATIVEINDEX(fText)==0) {
+ if (fData == NULL || UTEXT_GETNATIVEINDEX(&fText)==0) {
return BreakIterator::DONE;
}
// Set up the starting char.
- initialPosition = (int32_t)UTEXT_GETNATIVEINDEX(fText);
+ initialPosition = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
result = initialPosition;
- c = UTEXT_PREVIOUS32(fText);
+ c = UTEXT_PREVIOUS32(&fText);
// Set the initial state for the state machine
state = START_STATE;
@@ -1028,7 +1020,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) {
#ifdef RBBI_DEBUG
if (gTrace) {
- RBBIDebugPrintf(" %4d ", (int32_t)utext_getNativeIndex(fText));
+ RBBIDebugPrintf(" %4d ", (int32_t)utext_getNativeIndex(&fText));
if (0x20<=c && c<0x7f) {
RBBIDebugPrintf("\"%c\" ", c);
} else {
@@ -1041,9 +1033,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) {
// State Transition - move machine to its next state
//
- // Note: fNextState is defined as uint16_t[2], but we are casting
- // a generated RBBI table to RBBIStateTableRow and some tables
- // actually have more than 2 categories.
+ // fNextState is a variable-length array.
U_ASSERT(category<fData->fHeader->fCatCount);
state = row->fNextState[category]; /*Not accessing beyond memory*/
row = (RBBIStateTableRow *)
@@ -1051,7 +1041,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) {
if (row->fAccepting == -1) {
// Match found, common case.
- result = (int32_t)UTEXT_GETNATIVEINDEX(fText);
+ result = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
}
int16_t completedRule = row->fAccepting;
@@ -1059,14 +1049,14 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) {
// Lookahead match is completed.
int32_t lookaheadResult = lookAheadMatches.getPosition(completedRule);
if (lookaheadResult >= 0) {
- UTEXT_SETNATIVEINDEX(fText, lookaheadResult);
+ UTEXT_SETNATIVEINDEX(&fText, lookaheadResult);
return lookaheadResult;
}
}
int16_t rule = row->fLookAhead;
if (rule != 0) {
// At the position of a '/' in a look-ahead match. Record it.
- int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(fText);
+ int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
lookAheadMatches.setPosition(rule, pos);
}
@@ -1082,7 +1072,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) {
// the input position. The next iteration will be processing the
// first real input character.
if (mode == RBBI_RUN) {
- c = UTEXT_PREVIOUS32(fText);
+ c = UTEXT_PREVIOUS32(&fText);
} else {
if (mode == RBBI_START) {
mode = RBBI_RUN;
@@ -1096,9 +1086,9 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) {
// (This really indicates a defect in the break rules. They should always match
// at least one character.)
if (result == initialPosition) {
- UTEXT_SETNATIVEINDEX(fText, initialPosition);
- UTEXT_PREVIOUS32(fText);
- result = (int32_t)UTEXT_GETNATIVEINDEX(fText);
+ UTEXT_SETNATIVEINDEX(&fText, initialPosition);
+ UTEXT_PREVIOUS32(&fText);
+ result = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
}
#ifdef RBBI_DEBUG
@@ -1247,7 +1237,7 @@ static void U_CALLCONV initLanguageFactories() {
static const LanguageBreakEngine*
-getLanguageBreakEngineFromFactory(UChar32 c, int32_t breakType)
+getLanguageBreakEngineFromFactory(UChar32 c)
{
umtx_initOnce(gLanguageBreakFactoriesInitOnce, &initLanguageFactories);
if (gLanguageBreakFactories == NULL) {
@@ -1258,7 +1248,7 @@ getLanguageBreakEngineFromFactory(UChar32 c, int32_t breakType)
const LanguageBreakEngine *lbe = NULL;
while (--i >= 0) {
LanguageBreakFactory *factory = (LanguageBreakFactory *)(gLanguageBreakFactories->elementAt(i));
- lbe = factory->getEngineFor(c, breakType);
+ lbe = factory->getEngineFor(c);
if (lbe != NULL) {
break;
}
@@ -1290,14 +1280,14 @@ RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) {
int32_t i = fLanguageBreakEngines->size();
while (--i >= 0) {
lbe = (const LanguageBreakEngine *)(fLanguageBreakEngines->elementAt(i));
- if (lbe->handles(c, fBreakType)) {
+ if (lbe->handles(c)) {
return lbe;
}
}
// No existing dictionary took the character. See if a factory wants to
// give us a new LanguageBreakEngine for this character.
- lbe = getLanguageBreakEngineFromFactory(c, fBreakType);
+ lbe = getLanguageBreakEngineFromFactory(c);
// If we got one, use it and push it on our stack.
if (lbe != NULL) {
@@ -1313,6 +1303,7 @@ RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) {
fUnhandledBreakEngine = new UnhandledEngine(status);
if (U_SUCCESS(status) && fUnhandledBreakEngine == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
+ return nullptr;
}
// Put it last so that scripts for which we have an engine get tried
// first.
@@ -1327,25 +1318,19 @@ RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) {
// Tell the reject engine about the character; at its discretion, it may
// add more than just the one character.
- fUnhandledBreakEngine->handleCharacter(c, fBreakType);
+ fUnhandledBreakEngine->handleCharacter(c);
return fUnhandledBreakEngine;
}
-
-
-/*int32_t RuleBasedBreakIterator::getBreakType() const {
- return fBreakType;
-}*/
-
-void RuleBasedBreakIterator::setBreakType(int32_t type) {
- fBreakType = type;
-}
-
void RuleBasedBreakIterator::dumpCache() {
fBreakCache->dumpCache();
}
+void RuleBasedBreakIterator::dumpTables() {
+ fData->printData();
+}
+
/**
* Returns the description used to create this iterator
*/
diff --git a/deps/icu-small/source/common/rbbi_cache.cpp b/deps/icu-small/source/common/rbbi_cache.cpp
index 9d716bb342..ba9329d477 100644
--- a/deps/icu-small/source/common/rbbi_cache.cpp
+++ b/deps/icu-small/source/common/rbbi_cache.cpp
@@ -26,14 +26,11 @@ U_NAMESPACE_BEGIN
*/
RuleBasedBreakIterator::DictionaryCache::DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
- fBI(bi), fBreaks(NULL), fPositionInCache(-1),
+ fBI(bi), fBreaks(status), fPositionInCache(-1),
fStart(0), fLimit(0), fFirstRuleStatusIndex(0), fOtherRuleStatusIndex(0) {
- fBreaks = new UVector32(status);
}
RuleBasedBreakIterator::DictionaryCache::~DictionaryCache() {
- delete fBreaks;
- fBreaks = NULL;
}
void RuleBasedBreakIterator::DictionaryCache::reset() {
@@ -42,7 +39,7 @@ void RuleBasedBreakIterator::DictionaryCache::reset() {
fLimit = 0;
fFirstRuleStatusIndex = 0;
fOtherRuleStatusIndex = 0;
- fBreaks->removeAllElements();
+ fBreaks.removeAllElements();
}
UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_t *result, int32_t *statusIndex) {
@@ -54,13 +51,13 @@ UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_
// Sequential iteration, move from previous boundary to the following
int32_t r = 0;
- if (fPositionInCache >= 0 && fPositionInCache < fBreaks->size() && fBreaks->elementAti(fPositionInCache) == fromPos) {
+ if (fPositionInCache >= 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) {
++fPositionInCache;
- if (fPositionInCache >= fBreaks->size()) {
+ if (fPositionInCache >= fBreaks.size()) {
fPositionInCache = -1;
return FALSE;
}
- r = fBreaks->elementAti(fPositionInCache);
+ r = fBreaks.elementAti(fPositionInCache);
U_ASSERT(r > fromPos);
*result = r;
*statusIndex = fOtherRuleStatusIndex;
@@ -69,8 +66,8 @@ UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_
// Random indexing. Linear search for the boundary following the given position.
- for (fPositionInCache = 0; fPositionInCache < fBreaks->size(); ++fPositionInCache) {
- r= fBreaks->elementAti(fPositionInCache);
+ for (fPositionInCache = 0; fPositionInCache < fBreaks.size(); ++fPositionInCache) {
+ r= fBreaks.elementAti(fPositionInCache);
if (r > fromPos) {
*result = r;
*statusIndex = fOtherRuleStatusIndex;
@@ -90,16 +87,16 @@ UBool RuleBasedBreakIterator::DictionaryCache::preceding(int32_t fromPos, int32_
}
if (fromPos == fLimit) {
- fPositionInCache = fBreaks->size() - 1;
+ fPositionInCache = fBreaks.size() - 1;
if (fPositionInCache >= 0) {
- U_ASSERT(fBreaks->elementAti(fPositionInCache) == fromPos);
+ U_ASSERT(fBreaks.elementAti(fPositionInCache) == fromPos);
}
}
int32_t r;
- if (fPositionInCache > 0 && fPositionInCache < fBreaks->size() && fBreaks->elementAti(fPositionInCache) == fromPos) {
+ if (fPositionInCache > 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) {
--fPositionInCache;
- r = fBreaks->elementAti(fPositionInCache);
+ r = fBreaks.elementAti(fPositionInCache);
U_ASSERT(r < fromPos);
*result = r;
*statusIndex = ( r== fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
@@ -111,8 +108,8 @@ UBool RuleBasedBreakIterator::DictionaryCache::preceding(int32_t fromPos, int32_
return FALSE;
}
- for (fPositionInCache = fBreaks->size()-1; fPositionInCache >= 0; --fPositionInCache) {
- r = fBreaks->elementAti(fPositionInCache);
+ for (fPositionInCache = fBreaks.size()-1; fPositionInCache >= 0; --fPositionInCache) {
+ r = fBreaks.elementAti(fPositionInCache);
if (r < fromPos) {
*result = r;
*statusIndex = ( r == fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
@@ -141,7 +138,7 @@ void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPo
int32_t current;
UErrorCode status = U_ZERO_ERROR;
int32_t foundBreakCount = 0;
- UText *text = fBI->fText;
+ UText *text = &fBI->fText;
// Loop through the text, looking for ranges of dictionary characters.
// For each span, find the appropriate break engine, and ask it to find
@@ -168,7 +165,7 @@ void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPo
// Ask the language object if there are any breaks. It will add them to the cache and
// leave the text pointer on the other side of its range, ready to search for the next one.
if (lbe != NULL) {
- foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBI->fBreakType, *fBreaks);
+ foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBreaks);
}
// Reload the loop variables for the next go-round
@@ -182,21 +179,21 @@ void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPo
// printf("foundBreakCount = %d\n", foundBreakCount);
if (foundBreakCount > 0) {
- U_ASSERT(foundBreakCount == fBreaks->size());
- if (startPos < fBreaks->elementAti(0)) {
+ U_ASSERT(foundBreakCount == fBreaks.size());
+ if (startPos < fBreaks.elementAti(0)) {
// The dictionary did not place a boundary at the start of the segment of text.
// Add one now. This should not commonly happen, but it would be easy for interactions
// of the rules for dictionary segments and the break engine implementations to
// inadvertently cause it. Cover it here, just in case.
- fBreaks->insertElementAt(startPos, 0, status);
+ fBreaks.insertElementAt(startPos, 0, status);
}
- if (endPos > fBreaks->peeki()) {
- fBreaks->push(endPos, status);
+ if (endPos > fBreaks.peeki()) {
+ fBreaks.push(endPos, status);
}
fPositionInCache = 0;
// Note: Dictionary matching may extend beyond the original limit.
- fStart = fBreaks->elementAti(0);
- fLimit = fBreaks->peeki();
+ fStart = fBreaks.elementAti(0);
+ fLimit = fBreaks.peeki();
} else {
// there were no language-based breaks, even though the segment contained
// dictionary characters. Subsequent attempts to fetch boundaries from the dictionary cache
diff --git a/deps/icu-small/source/common/rbbi_cache.h b/deps/icu-small/source/common/rbbi_cache.h
index 8dc7320db9..fd6deb4333 100644
--- a/deps/icu-small/source/common/rbbi_cache.h
+++ b/deps/icu-small/source/common/rbbi_cache.h
@@ -56,7 +56,7 @@ class RuleBasedBreakIterator::DictionaryCache: public UMemory {
RuleBasedBreakIterator *fBI;
- UVector32 *fBreaks; // A vector containing the boundaries.
+ UVector32 fBreaks; // A vector containing the boundaries.
int32_t fPositionInCache; // Index in fBreaks of last boundary returned by following()
// or preceding(). Optimizes sequential access.
int32_t fStart; // Text position of first boundary in cache.
diff --git a/deps/icu-small/source/common/rbbidata.cpp b/deps/icu-small/source/common/rbbidata.cpp
index d66eca82f8..18912a6a7b 100644
--- a/deps/icu-small/source/common/rbbidata.cpp
+++ b/deps/icu-small/source/common/rbbidata.cpp
@@ -267,8 +267,8 @@ void RBBIDataWrapper::printTable(const char *heading, const RBBIStateTable *tab
#endif
-#ifdef RBBI_DEBUG
void RBBIDataWrapper::printData() {
+#ifdef RBBI_DEBUG
RBBIDebugPrintf("RBBI Data at %p\n", (void *)fHeader);
RBBIDebugPrintf(" Version = {%d %d %d %d}\n", fHeader->fFormatVersion[0], fHeader->fFormatVersion[1],
fHeader->fFormatVersion[2], fHeader->fFormatVersion[3]);
@@ -285,8 +285,8 @@ void RBBIDataWrapper::printData() {
RBBIDebugPrintf("%c", fRuleSource[c]);
}
RBBIDebugPrintf("\n\n");
-}
#endif
+}
U_NAMESPACE_END
diff --git a/deps/icu-small/source/common/rbbidata.h b/deps/icu-small/source/common/rbbidata.h
index 75427863d9..8b21acca30 100644
--- a/deps/icu-small/source/common/rbbidata.h
+++ b/deps/icu-small/source/common/rbbidata.h
@@ -116,9 +116,10 @@ struct RBBIStateTableRow {
/* StatusTable of the set of matching */
/* tags (rule status values) */
int16_t fReserved;
- uint16_t fNextState[2]; /* Next State, indexed by char category. */
- /* This array does not have two elements */
- /* Array Size is actually fData->fHeader->fCatCount */
+ uint16_t fNextState[1]; /* Next State, indexed by char category. */
+ /* Variable-length array declared with length 1 */
+ /* to disable bounds checkers. */
+ /* Array Size is actually fData->fHeader->fCatCount*/
/* CAUTION: see RBBITableBuilder::getTableSize() */
/* before changing anything here. */
};
@@ -129,7 +130,9 @@ struct RBBIStateTable {
uint32_t fRowLen; /* Length of a state table row, in bytes. */
uint32_t fFlags; /* Option Flags for this state table */
uint32_t fReserved; /* reserved */
- char fTableData[4]; /* First RBBIStateTableRow begins here. */
+ char fTableData[1]; /* First RBBIStateTableRow begins here. */
+ /* Variable-length array declared with length 1 */
+ /* to disable bounds checkers. */
/* (making it char[] simplifies ugly address */
/* arithmetic for indexing variable length rows.) */
};
@@ -162,13 +165,8 @@ public:
UBool operator ==(const RBBIDataWrapper &other) const;
int32_t hashCode();
const UnicodeString &getRuleSourceString() const;
-#ifdef RBBI_DEBUG
void printData();
void printTable(const char *heading, const RBBIStateTable *table);
-#else
- #define printData()
- #define printTable(heading, table)
-#endif
/* */
/* Pointers to items within the data */
diff --git a/deps/icu-small/source/common/rbbirb.cpp b/deps/icu-small/source/common/rbbirb.cpp
index c67f6f8166..9fc8f8e814 100644
--- a/deps/icu-small/source/common/rbbirb.cpp
+++ b/deps/icu-small/source/common/rbbirb.cpp
@@ -47,7 +47,7 @@ U_NAMESPACE_BEGIN
RBBIRuleBuilder::RBBIRuleBuilder(const UnicodeString &rules,
UParseError *parseErr,
UErrorCode &status)
- : fRules(rules)
+ : fRules(rules), fStrippedRules(rules)
{
fStatus = &status; // status is checked below
fParseError = parseErr;
@@ -147,8 +147,9 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
return NULL;
}
- // Remove comments and whitespace from the rules to make it smaller.
- UnicodeString strippedRules((const UnicodeString&)RBBIRuleScanner::stripRules(fRules));
+ // Remove whitespace from the rules to make it smaller.
+ // The rule parser has already removed comments.
+ fStrippedRules = fScanner->stripRules(fStrippedRules);
// Calculate the size of each section in the data.
// Sizes here are padded up to a multiple of 8 for better memory alignment.
@@ -162,7 +163,7 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
int32_t safeRevTableSize = align8(fSafeRevTables->getTableSize());
int32_t trieSize = align8(fSetBuilder->getTrieSize());
int32_t statusTableSize = align8(fRuleStatusVals->size() * sizeof(int32_t));
- int32_t rulesSize = align8((strippedRules.length()+1) * sizeof(UChar));
+ int32_t rulesSize = align8((fStrippedRules.length()+1) * sizeof(UChar));
(void)safeFwdTableSize;
@@ -225,7 +226,7 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
data->fStatusTable = data->fTrie + trieSize;
data->fStatusTableLen= statusTableSize;
data->fRuleSource = data->fStatusTable + statusTableSize;
- data->fRuleSourceLen = strippedRules.length() * sizeof(UChar);
+ data->fRuleSourceLen = fStrippedRules.length() * sizeof(UChar);
uprv_memset(data->fReserved, 0, sizeof(data->fReserved));
@@ -245,7 +246,7 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
ruleStatusTable[i] = fRuleStatusVals->elementAti(i);
}
- strippedRules.extract((UChar *)((uint8_t *)data+data->fRuleSource), rulesSize/2+1, *fStatus);
+ fStrippedRules.extract((UChar *)((uint8_t *)data+data->fRuleSource), rulesSize/2+1, *fStatus);
return data;
}
@@ -281,10 +282,10 @@ RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules,
//
// UnicodeSet processing.
// Munge the Unicode Sets to create a set of character categories.
- // Generate the mapping tables (TRIE) from input 32-bit characters to
+ // Generate the mapping tables (TRIE) from input code points to
// the character categories.
//
- builder.fSetBuilder->build();
+ builder.fSetBuilder->buildRanges();
//
@@ -316,6 +317,11 @@ RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules,
}
#endif
+ builder.optimizeTables();
+ builder.fSetBuilder->buildTrie();
+
+
+
//
// Package up the compiled data into a memory image
// in the run-time format.
@@ -347,6 +353,29 @@ RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules,
return This;
}
+void RBBIRuleBuilder::optimizeTables() {
+ int32_t leftClass;
+ int32_t rightClass;
+
+ leftClass = 3;
+ rightClass = 0;
+ while (fForwardTables->findDuplCharClassFrom(leftClass, rightClass)) {
+ fSetBuilder->mergeCategories(leftClass, rightClass);
+ fForwardTables->removeColumn(rightClass);
+ fReverseTables->removeColumn(rightClass);
+ fSafeFwdTables->removeColumn(rightClass);
+ fSafeRevTables->removeColumn(rightClass);
+ }
+
+ fForwardTables->removeDuplicateStates();
+ fReverseTables->removeDuplicateStates();
+ fSafeFwdTables->removeDuplicateStates();
+ fSafeRevTables->removeDuplicateStates();
+
+
+
+}
+
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
diff --git a/deps/icu-small/source/common/rbbirb.h b/deps/icu-small/source/common/rbbirb.h
index 6fbdbff744..511f394b45 100644
--- a/deps/icu-small/source/common/rbbirb.h
+++ b/deps/icu-small/source/common/rbbirb.h
@@ -126,10 +126,19 @@ public:
);
virtual ~RBBIRuleBuilder();
+
+ /**
+ * Fold together redundant character classes (table columns) and
+ * redundant states (table rows). Done after initial table generation,
+ * before serializing the result.
+ */
+ void optimizeTables();
+
char *fDebugEnv; // controls debug trace output
UErrorCode *fStatus; // Error reporting. Keeping status
UParseError *fParseError; // here avoids passing it everywhere.
const UnicodeString &fRules; // The rule string that we are compiling
+ UnicodeString fStrippedRules; // The rule string, with comments stripped.
RBBIRuleScanner *fScanner; // The scanner.
RBBINode *fForwardTree; // The parse trees, generated by the scanner,
diff --git a/deps/icu-small/source/common/rbbiscan.cpp b/deps/icu-small/source/common/rbbiscan.cpp
index 1653a0c7bc..e3472ed599 100644
--- a/deps/icu-small/source/common/rbbiscan.cpp
+++ b/deps/icu-small/source/common/rbbiscan.cpp
@@ -822,27 +822,24 @@ static const UChar chRParen = 0x29;
//------------------------------------------------------------------------------
//
-// stripRules Return a rules string without unnecessary
-// characters.
+// stripRules Return a rules string without extra spaces.
+// (Comments are removed separately, during rule parsing.)
//
//------------------------------------------------------------------------------
UnicodeString RBBIRuleScanner::stripRules(const UnicodeString &rules) {
UnicodeString strippedRules;
- int rulesLength = rules.length();
- for (int idx = 0; idx < rulesLength; ) {
- UChar ch = rules[idx++];
- if (ch == chPound) {
- while (idx < rulesLength
- && ch != chCR && ch != chLF && ch != chNEL)
- {
- ch = rules[idx++];
- }
- }
- if (!u_isISOControl(ch)) {
- strippedRules.append(ch);
+ int32_t rulesLength = rules.length();
+ bool skippingSpaces = false;
+
+ for (int32_t idx=0; idx<rulesLength; idx = rules.moveIndex32(idx, 1)) {
+ UChar32 cp = rules.char32At(idx);
+ bool whiteSpace = u_hasBinaryProperty(cp, UCHAR_PATTERN_WHITE_SPACE);
+ if (skippingSpaces && whiteSpace) {
+ continue;
}
+ strippedRules.append(cp);
+ skippingSpaces = whiteSpace;
}
- // strippedRules = strippedRules.unescape();
return strippedRules;
}
@@ -942,6 +939,7 @@ void RBBIRuleScanner::nextChar(RBBIRuleChar &c) {
// It will be treated as white-space, and serves to break up anything
// that might otherwise incorrectly clump together with a comment in
// the middle (a variable name, for example.)
+ int32_t commentStart = fScanIndex;
for (;;) {
c.fChar = nextCharLL();
if (c.fChar == (UChar32)-1 || // EOF
@@ -950,6 +948,9 @@ void RBBIRuleScanner::nextChar(RBBIRuleChar &c) {
c.fChar == chNEL ||
c.fChar == chLS) {break;}
}
+ for (int32_t i=commentStart; i<fNextIndex-1; ++i) {
+ fRB->fStrippedRules.setCharAt(i, u' ');
+ }
}
if (c.fChar == (UChar32)-1) {
return;
diff --git a/deps/icu-small/source/common/rbbisetb.cpp b/deps/icu-small/source/common/rbbisetb.cpp
index c172da00df..4e7389b4af 100644
--- a/deps/icu-small/source/common/rbbisetb.cpp
+++ b/deps/icu-small/source/common/rbbisetb.cpp
@@ -91,7 +91,7 @@ RBBISetBuilder::~RBBISetBuilder()
// from the Unicode Sets.
//
//------------------------------------------------------------------------
-void RBBISetBuilder::build() {
+void RBBISetBuilder::buildRanges() {
RBBINode *usetNode;
RangeDescriptor *rlRange;
@@ -245,11 +245,16 @@ void RBBISetBuilder::build() {
if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rgroup")) {printRangeGroups();}
if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "esets")) {printSets();}
+}
+
+
+//
+// Build the Trie table for mapping UChar32 values to the corresponding
+// range group number.
+//
+void RBBISetBuilder::buildTrie() {
+ RangeDescriptor *rlRange;
- //
- // Build the Trie table for mapping UChar32 values to the corresponding
- // range group number
- //
fTrie = utrie2_open(0, // Initial value for all code points.
0, // Error value for out-of-range input.
fStatus);
@@ -265,6 +270,22 @@ void RBBISetBuilder::build() {
}
+void RBBISetBuilder::mergeCategories(int32_t left, int32_t right) {
+ U_ASSERT(left >= 1);
+ U_ASSERT(right > left);
+ for (RangeDescriptor *rd = fRangeList; rd != nullptr; rd = rd->fNext) {
+ int32_t rangeNum = rd->fNum & ~DICT_BIT;
+ int32_t rangeDict = rd->fNum & DICT_BIT;
+ if (rangeNum == right) {
+ rd->fNum = left | rangeDict;
+ } else if (rangeNum > right) {
+ rd->fNum--;
+ }
+ }
+ --fGroupCount;
+}
+
+
//-----------------------------------------------------------------------------------
//
// getTrieSize() Return the size that will be required to serialize the Trie.
@@ -446,7 +467,7 @@ void RBBISetBuilder::printRangeGroups() {
lastPrintedGroupNum = groupNum;
RBBIDebugPrintf("%2i ", groupNum);
- if (rlRange->fNum & 0x4000) { RBBIDebugPrintf(" <DICT> ");}
+ if (rlRange->fNum & DICT_BIT) { RBBIDebugPrintf(" <DICT> ");}
for (i=0; i<rlRange->fIncludesSets->size(); i++) {
RBBINode *usetNode = (RBBINode *)rlRange->fIncludesSets->elementAt(i);
@@ -639,20 +660,20 @@ void RangeDescriptor::split(UChar32 where, UErrorCode &status) {
void RangeDescriptor::setDictionaryFlag() {
int i;
- for (i=0; i<this->fIncludesSets->size(); i++) {
- RBBINode *usetNode = (RBBINode *)fIncludesSets->elementAt(i);
- UnicodeString setName;
- RBBINode *setRef = usetNode->fParent;
- if (setRef != NULL) {
+ static const char16_t *dictionary = u"dictionary";
+ for (i=0; i<fIncludesSets->size(); i++) {
+ RBBINode *usetNode = (RBBINode *)fIncludesSets->elementAt(i);
+ RBBINode *setRef = usetNode->fParent;
+ if (setRef != nullptr) {
RBBINode *varRef = setRef->fParent;
- if (varRef != NULL && varRef->fType == RBBINode::varRef) {
- setName = varRef->fText;
+ if (varRef && varRef->fType == RBBINode::varRef) {
+ const UnicodeString *setName = &varRef->fText;
+ if (setName->compare(dictionary, -1) == 0) {
+ fNum |= RBBISetBuilder::DICT_BIT;
+ break;
+ }
}
}
- if (setName.compare(UNICODE_STRING("dictionary", 10)) == 0) { // TODO: no string literals.
- this->fNum |= 0x4000;
- break;
- }
}
}
diff --git a/deps/icu-small/source/common/rbbisetb.h b/deps/icu-small/source/common/rbbisetb.h
index 7cedb45b33..a7a91b3b37 100644
--- a/deps/icu-small/source/common/rbbisetb.h
+++ b/deps/icu-small/source/common/rbbisetb.h
@@ -82,7 +82,8 @@ public:
RBBISetBuilder(RBBIRuleBuilder *rb);
~RBBISetBuilder();
- void build();
+ void buildRanges();
+ void buildTrie();
void addValToSets(UVector *sets, uint32_t val);
void addValToSet (RBBINode *usetNode, uint32_t val);
int32_t getNumCharCategories() const; // CharCategories are the same as input symbol set to the
@@ -93,6 +94,13 @@ public:
UChar32 getFirstChar(int32_t val) const;
UBool sawBOF() const; // Indicate whether any references to the {bof} pseudo
// character were encountered.
+ /** merge two character categories that have been identified as having equivalent behavior.
+ * The ranges belonging to the right category (table column) will be added to the left.
+ */
+ void mergeCategories(int32_t left, int32_t right);
+
+ static constexpr int32_t DICT_BIT = 0x4000;
+
#ifdef RBBI_DEBUG
void printSets();
void printRanges();
diff --git a/deps/icu-small/source/common/rbbitblb.cpp b/deps/icu-small/source/common/rbbitblb.cpp
index b3e6ca51d1..61661a5442 100644
--- a/deps/icu-small/source/common/rbbitblb.cpp
+++ b/deps/icu-small/source/common/rbbitblb.cpp
@@ -22,6 +22,7 @@
#include "rbbidata.h"
#include "cstring.h"
#include "uassert.h"
+#include "uvectr32.h"
#include "cmemory.h"
U_NAMESPACE_BEGIN
@@ -761,7 +762,7 @@ void RBBITableBuilder::flagAcceptingStates() {
// if sd->fAccepting already had a value other than 0 or -1, leave it be.
// If the end marker node is from a look-ahead rule, set
- // the fLookAhead field or this state also.
+ // the fLookAhead field for this state also.
if (endMarker->fLookAheadEnd) {
// TODO: don't change value if already set?
// TODO: allow for more than one active look-ahead rule in engine.
@@ -1077,7 +1078,128 @@ void RBBITableBuilder::printPosSets(RBBINode *n) {
}
#endif
+//
+// findDuplCharClassFrom()
+//
+bool RBBITableBuilder::findDuplCharClassFrom(int32_t &baseCategory, int32_t &duplCategory) {
+ int32_t numStates = fDStates->size();
+ int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
+
+ uint16_t table_base;
+ uint16_t table_dupl;
+ for (; baseCategory < numCols-1; ++baseCategory) {
+ for (duplCategory=baseCategory+1; duplCategory < numCols; ++duplCategory) {
+ for (int32_t state=0; state<numStates; state++) {
+ RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
+ table_base = (uint16_t)sd->fDtran->elementAti(baseCategory);
+ table_dupl = (uint16_t)sd->fDtran->elementAti(duplCategory);
+ if (table_base != table_dupl) {
+ break;
+ }
+ }
+ if (table_base == table_dupl) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+
+//
+// removeColumn()
+//
+void RBBITableBuilder::removeColumn(int32_t column) {
+ int32_t numStates = fDStates->size();
+ for (int32_t state=0; state<numStates; state++) {
+ RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
+ U_ASSERT(column < sd->fDtran->size());
+ sd->fDtran->removeElementAt(column);
+ }
+}
+
+/*
+ * findDuplicateState
+ */
+bool RBBITableBuilder::findDuplicateState(int32_t &firstState, int32_t &duplState) {
+ int32_t numStates = fDStates->size();
+ int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
+
+ for (; firstState<numStates-1; ++firstState) {
+ RBBIStateDescriptor *firstSD = (RBBIStateDescriptor *)fDStates->elementAt(firstState);
+ for (duplState=firstState+1; duplState<numStates; ++duplState) {
+ RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(duplState);
+ if (firstSD->fAccepting != duplSD->fAccepting ||
+ firstSD->fLookAhead != duplSD->fLookAhead ||
+ firstSD->fTagsIdx != duplSD->fTagsIdx) {
+ continue;
+ }
+ bool rowsMatch = true;
+ for (int32_t col=0; col < numCols; ++col) {
+ int32_t firstVal = firstSD->fDtran->elementAti(col);
+ int32_t duplVal = duplSD->fDtran->elementAti(col);
+ if (!((firstVal == duplVal) ||
+ ((firstVal == firstState || firstVal == duplState) &&
+ (duplVal == firstState || duplVal == duplState)))) {
+ rowsMatch = false;
+ break;
+ }
+ }
+ if (rowsMatch) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+void RBBITableBuilder::removeState(int32_t keepState, int32_t duplState) {
+ U_ASSERT(keepState < duplState);
+ U_ASSERT(duplState < fDStates->size());
+ RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(duplState);
+ fDStates->removeElementAt(duplState);
+ delete duplSD;
+
+ int32_t numStates = fDStates->size();
+ int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
+ for (int32_t state=0; state<numStates; ++state) {
+ RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
+ for (int32_t col=0; col<numCols; col++) {
+ int32_t existingVal = sd->fDtran->elementAti(col);
+ int32_t newVal = existingVal;
+ if (existingVal == duplState) {
+ newVal = keepState;
+ } else if (existingVal > duplState) {
+ newVal = existingVal - 1;
+ }
+ sd->fDtran->setElementAt(newVal, col);
+ }
+ if (sd->fAccepting == duplState) {
+ sd->fAccepting = keepState;
+ } else if (sd->fAccepting > duplState) {
+ sd->fAccepting--;
+ }
+ if (sd->fLookAhead == duplState) {
+ sd->fLookAhead = keepState;
+ } else if (sd->fLookAhead > duplState) {
+ sd->fLookAhead--;
+ }
+ }
+}
+
+
+/*
+ * RemoveDuplicateStates
+ */
+void RBBITableBuilder::removeDuplicateStates() {
+ int32_t firstState = 3;
+ int32_t duplicateState = 0;
+ while (findDuplicateState(firstState, duplicateState)) {
+ // printf("Removing duplicate states (%d, %d)\n", firstState, duplicateState);
+ removeState(firstState, duplicateState);
+ }
+}
//-----------------------------------------------------------------------------
//
@@ -1095,21 +1217,17 @@ int32_t RBBITableBuilder::getTableSize() const {
return 0;
}
- size = sizeof(RBBIStateTable) - 4; // The header, with no rows to the table.
+ size = offsetof(RBBIStateTable, fTableData); // The header, with no rows to the table.
numRows = fDStates->size();
numCols = fRB->fSetBuilder->getNumCharCategories();
- // Note The declaration of RBBIStateTableRow is for a table of two columns.
- // Therefore we subtract two from numCols when determining
- // how much storage to add to a row for the total columns.
- rowSize = sizeof(RBBIStateTableRow) + sizeof(uint16_t)*(numCols-2);
+ rowSize = offsetof(RBBIStateTableRow, fNextState) + sizeof(uint16_t)*numCols;
size += numRows * rowSize;
return size;
}
-
//-----------------------------------------------------------------------------
//
// exportTable() export the state transition table in the format required
@@ -1126,14 +1244,14 @@ void RBBITableBuilder::exportTable(void *where) {
return;
}
- if (fRB->fSetBuilder->getNumCharCategories() > 0x7fff ||
+ int32_t catCount = fRB->fSetBuilder->getNumCharCategories();
+ if (catCount > 0x7fff ||
fDStates->size() > 0x7fff) {
*fStatus = U_BRK_INTERNAL_ERROR;
return;
}
- table->fRowLen = sizeof(RBBIStateTableRow) +
- sizeof(uint16_t) * (fRB->fSetBuilder->getNumCharCategories() - 2);
+ table->fRowLen = offsetof(RBBIStateTableRow, fNextState) + sizeof(uint16_t) * catCount;
table->fNumStates = fDStates->size();
table->fFlags = 0;
if (fRB->fLookAheadHardBreak) {
@@ -1152,7 +1270,7 @@ void RBBITableBuilder::exportTable(void *where) {
row->fAccepting = (int16_t)sd->fAccepting;
row->fLookAhead = (int16_t)sd->fLookAhead;
row->fTagIdx = (int16_t)sd->fTagsIdx;
- for (col=0; col<fRB->fSetBuilder->getNumCharCategories(); col++) {
+ for (col=0; col<catCount; col++) {
row->fNextState[col] = (uint16_t)sd->fDtran->elementAti(col);
}
}
@@ -1259,7 +1377,7 @@ RBBIStateDescriptor::RBBIStateDescriptor(int lastInputSymbol, UErrorCode *fStatu
fPositions = NULL;
fDtran = NULL;
- fDtran = new UVector(lastInputSymbol+1, *fStatus);
+ fDtran = new UVector32(lastInputSymbol+1, *fStatus);
if (U_FAILURE(*fStatus)) {
return;
}
@@ -1267,7 +1385,7 @@ RBBIStateDescriptor::RBBIStateDescriptor(int lastInputSymbol, UErrorCode *fStatu
*fStatus = U_MEMORY_ALLOCATION_ERROR;
return;
}
- fDtran->setSize(lastInputSymbol+1, *fStatus); // fDtran needs to be pre-sized.
+ fDtran->setSize(lastInputSymbol+1); // fDtran needs to be pre-sized.
// It is indexed by input symbols, and will
// hold the next state number for each
// symbol.
diff --git a/deps/icu-small/source/common/rbbitblb.h b/deps/icu-small/source/common/rbbitblb.h
index 1041501878..09b57b5cf0 100644
--- a/deps/icu-small/source/common/rbbitblb.h
+++ b/deps/icu-small/source/common/rbbitblb.h
@@ -24,6 +24,7 @@ U_NAMESPACE_BEGIN
class RBBIRuleScanner;
class RBBIRuleBuilder;
+class UVector32;
//
// class RBBITableBuilder is part of the RBBI rule compiler.
@@ -42,9 +43,24 @@ public:
void build();
int32_t getTableSize() const; // Return the runtime size in bytes of
// the built state table
- void exportTable(void *where); // fill in the runtime state table.
- // Sufficient memory must exist at
- // the specified location.
+
+ /** Fill in the runtime state table. Sufficient memory must exist at the specified location.
+ */
+ void exportTable(void *where);
+
+ /** Find duplicate (redundant) character classes, beginning after the specifed
+ * pair, within this state table. This is an iterator-like function, used to
+ * identify char classes (state table columns) that can be eliminated.
+ */
+ bool findDuplCharClassFrom(int &baseClass, int &duplClass);
+
+ /** Remove a column from the state table. Used when two character categories
+ * have been found equivalent, and merged together, to eliminate the uneeded table column.
+ */
+ void removeColumn(int32_t column);
+
+ /** Check for, and remove dupicate states (table rows). */
+ void removeDuplicateStates();
private:
@@ -60,8 +76,29 @@ private:
void flagTaggedStates();
void mergeRuleStatusVals();
+ /**
+ * Merge redundant state table columns, eliminating character classes with identical behavior.
+ * Done after the state tables are generated, just before converting to their run-time format.
+ */
+ int32_t mergeColumns();
+
void addRuleRootNodes(UVector *dest, RBBINode *node);
+ /** Find the next duplicate state. An iterator function.
+ * @param firstState (in/out) begin looking at this state, return the first of the
+ * pair of duplicates.
+ * @param duplicateState returns the duplicate state of fistState
+ * @return true if a duplicate pair of states was found.
+ */
+ bool findDuplicateState(int32_t &firstState, int32_t &duplicateState);
+
+ /** Remove a duplicate state/
+ * @param keepState First of the duplicate pair. Keep it.
+ * @param duplState Duplicate state. Remove it. Redirect all references to the duplicate state
+ * to refer to keepState instead.
+ */
+ void removeState(int32_t keepState, int32_t duplState);
+
// Set functions for UVector.
// TODO: make a USet subclass of UVector
@@ -112,7 +149,7 @@ public:
// with this state. Unordered (it's a set).
// UVector contents are RBBINode *
- UVector *fDtran; // Transitions out of this state.
+ UVector32 *fDtran; // Transitions out of this state.
// indexed by input character
// contents is int index of dest state
// in RBBITableBuilder.fDStates
diff --git a/deps/icu-small/source/common/sharedobject.cpp b/deps/icu-small/source/common/sharedobject.cpp
index 37aa458e00..6eeca8605f 100644
--- a/deps/icu-small/source/common/sharedobject.cpp
+++ b/deps/icu-small/source/common/sharedobject.cpp
@@ -8,7 +8,10 @@
* sharedobject.cpp
*/
#include "sharedobject.h"
+#include "mutex.h"
#include "uassert.h"
+#include "umutex.h"
+#include "unifiedcache.h"
U_NAMESPACE_BEGIN
@@ -17,69 +20,41 @@ SharedObject::~SharedObject() {}
UnifiedCacheBase::~UnifiedCacheBase() {}
void
-SharedObject::addRef(UBool fromWithinCache) const {
- umtx_atomic_inc(&totalRefCount);
-
- // Although items in use may not be correct immediately, it
- // will be correct eventually.
- if (umtx_atomic_inc(&hardRefCount) == 1 && cachePtr != NULL) {
- // If this object is cached, and the hardRefCount goes from 0 to 1,
- // then the increment must happen from within the cache while the
- // cache global mutex is locked. In this way, we can be rest assured
- // that data races can't happen if the cache performs some task if
- // the hardRefCount is zero while the global cache mutex is locked.
- (void)fromWithinCache; // Suppress unused variable warning in non-debug builds.
- U_ASSERT(fromWithinCache);
- cachePtr->incrementItemsInUse();
- }
+SharedObject::addRef() const {
+ umtx_atomic_inc(&hardRefCount);
}
+// removeRef Decrement the reference count and delete if it is zero.
+// Note that SharedObjects with a non-null cachePtr are owned by the
+// unified cache, and the cache will be responsible for the actual deletion.
+// The deletion could be as soon as immediately following the
+// update to the reference count, if another thread is running
+// a cache eviction cycle concurrently.
+// NO ACCESS TO *this PERMITTED AFTER REFERENCE COUNT == 0 for cached objects.
+// THE OBJECT MAY ALREADY BE GONE.
void
-SharedObject::removeRef(UBool fromWithinCache) const {
- UBool decrementItemsInUse = (umtx_atomic_dec(&hardRefCount) == 0);
- UBool allReferencesGone = (umtx_atomic_dec(&totalRefCount) == 0);
-
- // Although items in use may not be correct immediately, it
- // will be correct eventually.
- if (decrementItemsInUse && cachePtr != NULL) {
- if (fromWithinCache) {
- cachePtr->decrementItemsInUse();
+SharedObject::removeRef() const {
+ const UnifiedCacheBase *cache = this->cachePtr;
+ int32_t updatedRefCount = umtx_atomic_dec(&hardRefCount);
+ U_ASSERT(updatedRefCount >= 0);
+ if (updatedRefCount == 0) {
+ if (cache) {
+ cache->handleUnreferencedObject();
} else {
- cachePtr->decrementItemsInUseWithLockingAndEviction();
+ delete this;
}
}
- if (allReferencesGone) {
- delete this;
- }
}
-void
-SharedObject::addSoftRef() const {
- umtx_atomic_inc(&totalRefCount);
- ++softRefCount;
-}
-
-void
-SharedObject::removeSoftRef() const {
- --softRefCount;
- if (umtx_atomic_dec(&totalRefCount) == 0) {
- delete this;
- }
-}
int32_t
SharedObject::getRefCount() const {
- return umtx_loadAcquire(totalRefCount);
-}
-
-int32_t
-SharedObject::getHardRefCount() const {
return umtx_loadAcquire(hardRefCount);
}
void
SharedObject::deleteIfZeroRefCount() const {
- if(getRefCount() == 0) {
+ if (this->cachePtr == nullptr && getRefCount() == 0) {
delete this;
}
}
diff --git a/deps/icu-small/source/common/sharedobject.h b/deps/icu-small/source/common/sharedobject.h
index 783b55948a..54655d0d71 100644
--- a/deps/icu-small/source/common/sharedobject.h
+++ b/deps/icu-small/source/common/sharedobject.h
@@ -17,6 +17,8 @@
U_NAMESPACE_BEGIN
+class SharedObject;
+
/**
* Base class for unified cache exposing enough methods to SharedObject
* instances to allow their addRef() and removeRef() methods to
@@ -28,22 +30,12 @@ public:
UnifiedCacheBase() { }
/**
- * Called by addRefWhileHoldingCacheLock() when the hard reference count
- * of its instance goes from 0 to 1.
+ * Notify the cache implementation that an object was seen transitioning to
+ * zero hard references. The cache may use this to keep track the number of
+ * unreferenced SharedObjects, and to trigger evictions.
*/
- virtual void incrementItemsInUse() const = 0;
+ virtual void handleUnreferencedObject() const = 0;
- /**
- * Called by removeRef() when the hard reference count of its instance
- * drops from 1 to 0.
- */
- virtual void decrementItemsInUseWithLockingAndEviction() const = 0;
-
- /**
- * Called by removeRefWhileHoldingCacheLock() when the hard reference
- * count of its instance drops from 1 to 0.
- */
- virtual void decrementItemsInUse() const = 0;
virtual ~UnifiedCacheBase();
private:
UnifiedCacheBase(const UnifiedCacheBase &);
@@ -63,7 +55,6 @@ class U_COMMON_API SharedObject : public UObject {
public:
/** Initializes totalRefCount, softRefCount to 0. */
SharedObject() :
- totalRefCount(0),
softRefCount(0),
hardRefCount(0),
cachePtr(NULL) {}
@@ -71,7 +62,6 @@ public:
/** Initializes totalRefCount, softRefCount to 0. */
SharedObject(const SharedObject &other) :
UObject(other),
- totalRefCount(0),
softRefCount(0),
hardRefCount(0),
cachePtr(NULL) {}
@@ -79,93 +69,45 @@ public:
virtual ~SharedObject();
/**
- * Increments the number of references to this object. Thread-safe.
+ * Increments the number of hard references to this object. Thread-safe.
+ * Not for use from within the Unified Cache implementation.
*/
- void addRef() const { addRef(FALSE); }
+ void addRef() const;
/**
- * Increments the number of references to this object.
- * Must be called only from within the internals of UnifiedCache and
- * only while the cache global mutex is held.
- */
- void addRefWhileHoldingCacheLock() const { addRef(TRUE); }
-
- /**
- * Increments the number of soft references to this object.
- * Must be called only from within the internals of UnifiedCache and
- * only while the cache global mutex is held.
- */
- void addSoftRef() const;
-
- /**
- * Decrements the number of references to this object. Thread-safe.
- */
- void removeRef() const { removeRef(FALSE); }
-
- /**
- * Decrements the number of references to this object.
- * Must be called only from within the internals of UnifiedCache and
- * only while the cache global mutex is held.
- */
- void removeRefWhileHoldingCacheLock() const { removeRef(TRUE); }
-
- /**
- * Decrements the number of soft references to this object.
- * Must be called only from within the internals of UnifiedCache and
- * only while the cache global mutex is held.
+ * Decrements the number of hard references to this object, and
+ * arrange for possible cache-eviction and/or deletion if ref
+ * count goes to zero. Thread-safe.
+ *
+ * Not for use from within the UnifiedCache implementation.
*/
- void removeSoftRef() const;
+ void removeRef() const;
/**
- * Returns the reference counter including soft references.
+ * Returns the number of hard references for this object.
* Uses a memory barrier.
*/
int32_t getRefCount() const;
/**
- * Returns the count of soft references only.
- * Must be called only from within the internals of UnifiedCache and
- * only while the cache global mutex is held.
- */
- int32_t getSoftRefCount() const { return softRefCount; }
-
- /**
- * Returns the count of hard references only. Uses a memory barrier.
- * Used for testing the cache. Regular clients won't need this.
- */
- int32_t getHardRefCount() const;
-
- /**
* If noHardReferences() == TRUE then this object has no hard references.
* Must be called only from within the internals of UnifiedCache.
*/
- inline UBool noHardReferences() const { return getHardRefCount() == 0; }
+ inline UBool noHardReferences() const { return getRefCount() == 0; }
/**
* If hasHardReferences() == TRUE then this object has hard references.
* Must be called only from within the internals of UnifiedCache.
*/
- inline UBool hasHardReferences() const { return getHardRefCount() != 0; }
-
- /**
- * If noSoftReferences() == TRUE then this object has no soft references.
- * Must be called only from within the internals of UnifiedCache and
- * only while the cache global mutex is held.
- */
- UBool noSoftReferences() const { return (softRefCount == 0); }
+ inline UBool hasHardReferences() const { return getRefCount() != 0; }
/**
- * Deletes this object if it has no references or soft references.
+ * Deletes this object if it has no references.
+ * Available for non-cached SharedObjects only. Ownership of cached objects
+ * is with the UnifiedCache, which is soley responsible for eviction and deletion.
*/
void deleteIfZeroRefCount() const;
- /**
- * @internal For UnifedCache use only to register this object with itself.
- * Must be called before this object is exposed to multiple threads.
- */
- void registerWithCache(const UnifiedCacheBase *ptr) const {
- cachePtr = ptr;
- }
/**
* Returns a writable version of ptr.
@@ -219,15 +161,21 @@ public:
}
private:
- mutable u_atomic_int32_t totalRefCount;
-
- // Any thread modifying softRefCount must hold the global cache mutex
+ /**
+ * The number of references from the UnifiedCache, which is
+ * the number of times that the sharedObject is stored as a hash table value.
+ * For use by UnifiedCache implementation code only.
+ * All access is synchronized by UnifiedCache's gCacheMutex
+ */
mutable int32_t softRefCount;
+ friend class UnifiedCache;
+ /**
+ * Reference count, excluding references from within the UnifiedCache implementation.
+ */
mutable u_atomic_int32_t hardRefCount;
+
mutable const UnifiedCacheBase *cachePtr;
- void addRef(UBool withCacheLock) const;
- void removeRef(UBool withCacheLock) const;
};
diff --git a/deps/icu-small/source/common/sprpimpl.h b/deps/icu-small/source/common/sprpimpl.h
index aff40ad0da..26de904b1f 100644
--- a/deps/icu-small/source/common/sprpimpl.h
+++ b/deps/icu-small/source/common/sprpimpl.h
@@ -90,7 +90,6 @@ struct UStringPrepProfile{
UTrie sprepTrie;
const uint16_t* mappingData;
UDataMemory* sprepData;
- const UBiDiProps *bdp; /* used only if checkBiDi is set */
int32_t refCount;
UBool isDataLoaded;
UBool doNFKC;
diff --git a/deps/icu-small/source/common/ubidi.cpp b/deps/icu-small/source/common/ubidi.cpp
index 8e2fc36e5f..531ed64cff 100644
--- a/deps/icu-small/source/common/ubidi.cpp
+++ b/deps/icu-small/source/common/ubidi.cpp
@@ -152,9 +152,6 @@ ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode)
/* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
uprv_memset(pBiDi, 0, sizeof(UBiDi));
- /* get BiDi properties */
- pBiDi->bdp=ubidi_getSingleton();
-
/* allocate memory for arrays as requested */
if(maxLength>0) {
if( !getInitialDirPropsMemory(pBiDi, maxLength) ||
@@ -925,7 +922,7 @@ bracketProcessChar(BracketData *bd, int32_t position) {
else
match=0;
if(match!=c && /* has a matching char */
- ubidi_getPairedBracketType(bd->pBiDi->bdp, c)==U_BPT_OPEN) { /* opening bracket */
+ ubidi_getPairedBracketType(c)==U_BPT_OPEN) { /* opening bracket */
/* special case: process synonyms
create an opening entry for each synonym */
if(match==0x232A) { /* RIGHT-POINTING ANGLE BRACKET */
@@ -3033,7 +3030,7 @@ ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c)
if( pBiDi->fnClassCallback == NULL ||
(dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT )
{
- dir = ubidi_getClass(pBiDi->bdp, c);
+ dir = ubidi_getClass(c);
}
if(dir >= U_CHAR_DIRECTION_COUNT) {
dir = (UCharDirection)ON;
diff --git a/deps/icu-small/source/common/ubidi_props.cpp b/deps/icu-small/source/common/ubidi_props.cpp
index dcfb52c897..4141c21938 100644
--- a/deps/icu-small/source/common/ubidi_props.cpp
+++ b/deps/icu-small/source/common/ubidi_props.cpp
@@ -44,13 +44,6 @@ struct UBiDiProps {
#define INCLUDED_FROM_UBIDI_PROPS_C
#include "ubidi_props_data.h"
-/* UBiDiProps singleton ----------------------------------------------------- */
-
-U_CFUNC const UBiDiProps *
-ubidi_getSingleton() {
- return &ubidi_props_singleton;
-}
-
/* set of property starts for UnicodeSet ------------------------------------ */
static UBool U_CALLCONV
@@ -64,7 +57,7 @@ _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32
}
U_CFUNC void
-ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *pErrorCode) {
+ubidi_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
int32_t i, length;
UChar32 c, start, limit;
@@ -76,19 +69,19 @@ ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *
}
/* add the start code point of each same-value range of the trie */
- utrie2_enum(&bdp->trie, NULL, _enumPropertyStartsRange, sa);
+ utrie2_enum(&ubidi_props_singleton.trie, NULL, _enumPropertyStartsRange, sa);
/* add the code points from the bidi mirroring table */
- length=bdp->indexes[UBIDI_IX_MIRROR_LENGTH];
+ length=ubidi_props_singleton.indexes[UBIDI_IX_MIRROR_LENGTH];
for(i=0; i<length; ++i) {
- c=UBIDI_GET_MIRROR_CODE_POINT(bdp->mirrors[i]);
+ c=UBIDI_GET_MIRROR_CODE_POINT(ubidi_props_singleton.mirrors[i]);
sa->addRange(sa->set, c, c+1);
}
/* add the code points from the Joining_Group array where the value changes */
- start=bdp->indexes[UBIDI_IX_JG_START];
- limit=bdp->indexes[UBIDI_IX_JG_LIMIT];
- jgArray=bdp->jgArray;
+ start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START];
+ limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT];
+ jgArray=ubidi_props_singleton.jgArray;
for(;;) {
prev=0;
while(start<limit) {
@@ -103,11 +96,11 @@ ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *
/* add the limit code point if the last value was not 0 (it is now start==limit) */
sa->add(sa->set, limit);
}
- if(limit==bdp->indexes[UBIDI_IX_JG_LIMIT]) {
+ if(limit==ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT]) {
/* switch to the second Joining_Group range */
- start=bdp->indexes[UBIDI_IX_JG_START2];
- limit=bdp->indexes[UBIDI_IX_JG_LIMIT2];
- jgArray=bdp->jgArray2;
+ start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START2];
+ limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT2];
+ jgArray=ubidi_props_singleton.jgArray2;
} else {
break;
}
@@ -121,14 +114,8 @@ ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *
/* property access functions ------------------------------------------------ */
U_CFUNC int32_t
-ubidi_getMaxValue(const UBiDiProps *bdp, UProperty which) {
- int32_t max;
-
- if(bdp==NULL) {
- return -1;
- }
-
- max=bdp->indexes[UBIDI_MAX_VALUES_INDEX];
+ubidi_getMaxValue(UProperty which) {
+ int32_t max=ubidi_props_singleton.indexes[UBIDI_MAX_VALUES_INDEX];
switch(which) {
case UCHAR_BIDI_CLASS:
return (max&UBIDI_CLASS_MASK);
@@ -144,19 +131,19 @@ ubidi_getMaxValue(const UBiDiProps *bdp, UProperty which) {
}
U_CAPI UCharDirection
-ubidi_getClass(const UBiDiProps *bdp, UChar32 c) {
- uint16_t props=UTRIE2_GET16(&bdp->trie, c);
+ubidi_getClass(UChar32 c) {
+ uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
return (UCharDirection)UBIDI_GET_CLASS(props);
}
U_CFUNC UBool
-ubidi_isMirrored(const UBiDiProps *bdp, UChar32 c) {
- uint16_t props=UTRIE2_GET16(&bdp->trie, c);
+ubidi_isMirrored(UChar32 c) {
+ uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
return (UBool)UBIDI_GET_FLAG(props, UBIDI_IS_MIRRORED_SHIFT);
}
static UChar32
-getMirror(const UBiDiProps *bdp, UChar32 c, uint16_t props) {
+getMirror(UChar32 c, uint16_t props) {
int32_t delta=UBIDI_GET_MIRROR_DELTA(props);
if(delta!=UBIDI_ESC_MIRROR_DELTA) {
return c+delta;
@@ -167,8 +154,8 @@ getMirror(const UBiDiProps *bdp, UChar32 c, uint16_t props) {
int32_t i, length;
UChar32 c2;
- mirrors=bdp->mirrors;
- length=bdp->indexes[UBIDI_IX_MIRROR_LENGTH];
+ mirrors=ubidi_props_singleton.mirrors;
+ length=ubidi_props_singleton.indexes[UBIDI_IX_MIRROR_LENGTH];
/* linear search */
for(i=0; i<length; ++i) {
@@ -188,59 +175,59 @@ getMirror(const UBiDiProps *bdp, UChar32 c, uint16_t props) {
}
U_CFUNC UChar32
-ubidi_getMirror(const UBiDiProps *bdp, UChar32 c) {
- uint16_t props=UTRIE2_GET16(&bdp->trie, c);
- return getMirror(bdp, c, props);
+ubidi_getMirror(UChar32 c) {
+ uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
+ return getMirror(c, props);
}
U_CFUNC UBool
-ubidi_isBidiControl(const UBiDiProps *bdp, UChar32 c) {
- uint16_t props=UTRIE2_GET16(&bdp->trie, c);
+ubidi_isBidiControl(UChar32 c) {
+ uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
return (UBool)UBIDI_GET_FLAG(props, UBIDI_BIDI_CONTROL_SHIFT);
}
U_CFUNC UBool
-ubidi_isJoinControl(const UBiDiProps *bdp, UChar32 c) {
- uint16_t props=UTRIE2_GET16(&bdp->trie, c);
+ubidi_isJoinControl(UChar32 c) {
+ uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
return (UBool)UBIDI_GET_FLAG(props, UBIDI_JOIN_CONTROL_SHIFT);
}
U_CFUNC UJoiningType
-ubidi_getJoiningType(const UBiDiProps *bdp, UChar32 c) {
- uint16_t props=UTRIE2_GET16(&bdp->trie, c);
+ubidi_getJoiningType(UChar32 c) {
+ uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
return (UJoiningType)((props&UBIDI_JT_MASK)>>UBIDI_JT_SHIFT);
}
U_CFUNC UJoiningGroup
-ubidi_getJoiningGroup(const UBiDiProps *bdp, UChar32 c) {
+ubidi_getJoiningGroup(UChar32 c) {
UChar32 start, limit;
- start=bdp->indexes[UBIDI_IX_JG_START];
- limit=bdp->indexes[UBIDI_IX_JG_LIMIT];
+ start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START];
+ limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT];
if(start<=c && c<limit) {
- return (UJoiningGroup)bdp->jgArray[c-start];
+ return (UJoiningGroup)ubidi_props_singleton.jgArray[c-start];
}
- start=bdp->indexes[UBIDI_IX_JG_START2];
- limit=bdp->indexes[UBIDI_IX_JG_LIMIT2];
+ start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START2];
+ limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT2];
if(start<=c && c<limit) {
- return (UJoiningGroup)bdp->jgArray2[c-start];
+ return (UJoiningGroup)ubidi_props_singleton.jgArray2[c-start];
}
return U_JG_NO_JOINING_GROUP;
}
U_CFUNC UBidiPairedBracketType
-ubidi_getPairedBracketType(const UBiDiProps *bdp, UChar32 c) {
- uint16_t props=UTRIE2_GET16(&bdp->trie, c);
+ubidi_getPairedBracketType(UChar32 c) {
+ uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
return (UBidiPairedBracketType)((props&UBIDI_BPT_MASK)>>UBIDI_BPT_SHIFT);
}
U_CFUNC UChar32
-ubidi_getPairedBracket(const UBiDiProps *bdp, UChar32 c) {
- uint16_t props=UTRIE2_GET16(&bdp->trie, c);
+ubidi_getPairedBracket(UChar32 c) {
+ uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c);
if((props&UBIDI_BPT_MASK)==0) {
return c;
} else {
- return getMirror(bdp, c, props);
+ return getMirror(c, props);
}
}
@@ -248,20 +235,20 @@ ubidi_getPairedBracket(const UBiDiProps *bdp, UChar32 c) {
U_CFUNC UCharDirection
u_charDirection(UChar32 c) {
- return ubidi_getClass(&ubidi_props_singleton, c);
+ return ubidi_getClass(c);
}
U_CFUNC UBool
u_isMirrored(UChar32 c) {
- return ubidi_isMirrored(&ubidi_props_singleton, c);
+ return ubidi_isMirrored(c);
}
U_CFUNC UChar32
u_charMirror(UChar32 c) {
- return ubidi_getMirror(&ubidi_props_singleton, c);
+ return ubidi_getMirror(c);
}
U_STABLE UChar32 U_EXPORT2
u_getBidiPairedBracket(UChar32 c) {
- return ubidi_getPairedBracket(&ubidi_props_singleton, c);
+ return ubidi_getPairedBracket(c);
}
diff --git a/deps/icu-small/source/common/ubidi_props.h b/deps/icu-small/source/common/ubidi_props.h
index 69e8853e69..698ee9c52b 100644
--- a/deps/icu-small/source/common/ubidi_props.h
+++ b/deps/icu-small/source/common/ubidi_props.h
@@ -31,46 +31,40 @@ U_CDECL_BEGIN
/* library API -------------------------------------------------------------- */
-struct UBiDiProps;
-typedef struct UBiDiProps UBiDiProps;
-
-U_CFUNC const UBiDiProps *
-ubidi_getSingleton(void);
-
U_CFUNC void
-ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *pErrorCode);
+ubidi_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode);
/* property access functions */
U_CFUNC int32_t
-ubidi_getMaxValue(const UBiDiProps *bdp, UProperty which);
+ubidi_getMaxValue(UProperty which);
U_CAPI UCharDirection
-ubidi_getClass(const UBiDiProps *bdp, UChar32 c);
+ubidi_getClass(UChar32 c);
U_CFUNC UBool
-ubidi_isMirrored(const UBiDiProps *bdp, UChar32 c);
+ubidi_isMirrored(UChar32 c);
U_CFUNC UChar32
-ubidi_getMirror(const UBiDiProps *bdp, UChar32 c);
+ubidi_getMirror(UChar32 c);
U_CFUNC UBool
-ubidi_isBidiControl(const UBiDiProps *bdp, UChar32 c);
+ubidi_isBidiControl(UChar32 c);
U_CFUNC UBool
-ubidi_isJoinControl(const UBiDiProps *bdp, UChar32 c);
+ubidi_isJoinControl(UChar32 c);
U_CFUNC UJoiningType
-ubidi_getJoiningType(const UBiDiProps *bdp, UChar32 c);
+ubidi_getJoiningType(UChar32 c);
U_CFUNC UJoiningGroup
-ubidi_getJoiningGroup(const UBiDiProps *bdp, UChar32 c);
+ubidi_getJoiningGroup(UChar32 c);
U_CFUNC UBidiPairedBracketType
-ubidi_getPairedBracketType(const UBiDiProps *bdp, UChar32 c);
+ubidi_getPairedBracketType(UChar32 c);
U_CFUNC UChar32
-ubidi_getPairedBracket(const UBiDiProps *bdp, UChar32 c);
+ubidi_getPairedBracket(UChar32 c);
/* file definitions --------------------------------------------------------- */
diff --git a/deps/icu-small/source/common/ubidiimp.h b/deps/icu-small/source/common/ubidiimp.h
index fd64fac34d..a5d0727495 100644
--- a/deps/icu-small/source/common/ubidiimp.h
+++ b/deps/icu-small/source/common/ubidiimp.h
@@ -254,8 +254,6 @@ struct UBiDi {
*/
const UBiDi * pParaBiDi;
- const UBiDiProps *bdp;
-
/* alias pointer to the current text */
const UChar *text;
diff --git a/deps/icu-small/source/common/ucase.cpp b/deps/icu-small/source/common/ucase.cpp
index 1f41dbf6de..28d5a4cac6 100644
--- a/deps/icu-small/source/common/ucase.cpp
+++ b/deps/icu-small/source/common/ucase.cpp
@@ -77,9 +77,12 @@ ucase_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
/* data access primitives --------------------------------------------------- */
-#define GET_EXCEPTIONS(csp, props) ((csp)->exceptions+((props)>>UCASE_EXC_SHIFT))
+U_CFUNC const UTrie2 * U_EXPORT2
+ucase_getTrie() {
+ return &ucase_props_singleton.trie;
+}
-#define PROPS_HAS_EXCEPTION(props) ((props)&UCASE_EXCEPTION)
+#define GET_EXCEPTIONS(csp, props) ((csp)->exceptions+((props)>>UCASE_EXC_SHIFT))
/* number of bits in an 8-bit integer value */
static const uint8_t flagsOffset[256]={
@@ -128,8 +131,8 @@ static const uint8_t flagsOffset[256]={
U_CAPI UChar32 U_EXPORT2
ucase_tolower(UChar32 c) {
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
- if(!PROPS_HAS_EXCEPTION(props)) {
- if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
+ if(!UCASE_HAS_EXCEPTION(props)) {
+ if(UCASE_IS_UPPER_OR_TITLE(props)) {
c+=UCASE_GET_DELTA(props);
}
} else {
@@ -145,7 +148,7 @@ ucase_tolower(UChar32 c) {
U_CAPI UChar32 U_EXPORT2
ucase_toupper(UChar32 c) {
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
- if(!PROPS_HAS_EXCEPTION(props)) {
+ if(!UCASE_HAS_EXCEPTION(props)) {
if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
c+=UCASE_GET_DELTA(props);
}
@@ -162,7 +165,7 @@ ucase_toupper(UChar32 c) {
U_CAPI UChar32 U_EXPORT2
ucase_totitle(UChar32 c) {
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
- if(!PROPS_HAS_EXCEPTION(props)) {
+ if(!UCASE_HAS_EXCEPTION(props)) {
if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
c+=UCASE_GET_DELTA(props);
}
@@ -223,7 +226,7 @@ ucase_addCaseClosure(UChar32 c, const USetAdder *sa) {
}
props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
- if(!PROPS_HAS_EXCEPTION(props)) {
+ if(!UCASE_HAS_EXCEPTION(props)) {
if(UCASE_GET_TYPE(props)!=UCASE_NONE) {
/* add the one simple case mapping, no matter what type it is */
int32_t delta=UCASE_GET_DELTA(props);
@@ -419,6 +422,138 @@ FullCaseFoldingIterator::next(UnicodeString &full) {
return c;
}
+namespace LatinCase {
+
+const int8_t TO_LOWER_NORMAL[LIMIT] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+ EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1,
+
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC
+};
+
+const int8_t TO_LOWER_TR_LT[LIMIT] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32, 32, 32, 32,
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32,
+ 32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0, EXC, 0, 1, 0, 1, 0, EXC, 0,
+ EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1,
+
+ 0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+ 1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC
+};
+
+const int8_t TO_UPPER_NORMAL[LIMIT] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
+ -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC,
+ -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
+ -32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121,
+
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
+ 0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0,
+
+ -1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1,
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC
+};
+
+const int8_t TO_UPPER_TR[LIMIT] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, -32, -32, -32, -32, -32, -32, -32, -32, EXC, -32, -32, -32, -32, -32, -32,
+ -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC,
+ -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
+ -32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121,
+
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
+ 0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0,
+
+ -1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1,
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
+ 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC
+};
+
+} // namespace LatinCase
+
U_NAMESPACE_END
/** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */
@@ -439,7 +574,7 @@ ucase_getTypeOrIgnorable(UChar32 c) {
static inline int32_t
getDotType(UChar32 c) {
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
- if(!PROPS_HAS_EXCEPTION(props)) {
+ if(!UCASE_HAS_EXCEPTION(props)) {
return props&UCASE_DOT_MASK;
} else {
const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
@@ -878,8 +1013,8 @@ ucase_toFullLower(UChar32 c,
U_ASSERT(c >= 0);
UChar32 result=c;
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
- if(!PROPS_HAS_EXCEPTION(props)) {
- if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
+ if(!UCASE_HAS_EXCEPTION(props)) {
+ if(UCASE_IS_UPPER_OR_TITLE(props)) {
result=c+UCASE_GET_DELTA(props);
}
} else {
@@ -1024,7 +1159,7 @@ toUpperOrTitle(UChar32 c,
U_ASSERT(c >= 0);
UChar32 result=c;
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
- if(!PROPS_HAS_EXCEPTION(props)) {
+ if(!UCASE_HAS_EXCEPTION(props)) {
if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
result=c+UCASE_GET_DELTA(props);
}
@@ -1169,8 +1304,8 @@ ucase_toFullTitle(UChar32 c,
U_CAPI UChar32 U_EXPORT2
ucase_fold(UChar32 c, uint32_t options) {
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
- if(!PROPS_HAS_EXCEPTION(props)) {
- if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
+ if(!UCASE_HAS_EXCEPTION(props)) {
+ if(UCASE_IS_UPPER_OR_TITLE(props)) {
c+=UCASE_GET_DELTA(props);
}
} else {
@@ -1234,8 +1369,8 @@ ucase_toFullFolding(UChar32 c,
U_ASSERT(c >= 0);
UChar32 result=c;
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
- if(!PROPS_HAS_EXCEPTION(props)) {
- if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
+ if(!UCASE_HAS_EXCEPTION(props)) {
+ if(UCASE_IS_UPPER_OR_TITLE(props)) {
result=c+UCASE_GET_DELTA(props);
}
} else {
diff --git a/deps/icu-small/source/common/ucase.h b/deps/icu-small/source/common/ucase.h
index 9d6365eadf..a7a8c9f00d 100644
--- a/deps/icu-small/source/common/ucase.h
+++ b/deps/icu-small/source/common/ucase.h
@@ -26,6 +26,7 @@
#include "putilimp.h"
#include "uset_imp.h"
#include "udataswp.h"
+#include "utrie2.h"
#ifdef __cplusplus
U_NAMESPACE_BEGIN
@@ -148,6 +149,33 @@ private:
int32_t rowCpIndex;
};
+/**
+ * Fast case mapping data for ASCII/Latin.
+ * Linear arrays of delta bytes: 0=no mapping; EXC=exception.
+ * Deltas must not cross the ASCII boundary, or else they cannot be easily used
+ * in simple UTF-8 code.
+ */
+namespace LatinCase {
+
+/** Case mapping/folding data for code points up to U+017F. */
+constexpr UChar LIMIT = 0x180;
+/** U+017F case-folds and uppercases crossing the ASCII boundary. */
+constexpr UChar LONG_S = 0x17f;
+/** Exception: Complex mapping, or too-large delta. */
+constexpr int8_t EXC = -0x80;
+
+/** Deltas for lowercasing for most locales, and default case folding. */
+extern const int8_t TO_LOWER_NORMAL[LIMIT];
+/** Deltas for lowercasing for tr/az/lt, and Turkic case folding. */
+extern const int8_t TO_LOWER_TR_LT[LIMIT];
+
+/** Deltas for uppercasing for most locales. */
+extern const int8_t TO_UPPER_NORMAL[LIMIT];
+/** Deltas for uppercasing for tr/az. */
+extern const int8_t TO_UPPER_TR[LIMIT];
+
+} // namespace LatinCase
+
U_NAMESPACE_END
#endif
@@ -308,6 +336,9 @@ enum {
/* definitions for 16-bit case properties word ------------------------------ */
+U_CFUNC const UTrie2 * U_EXPORT2
+ucase_getTrie();
+
/* 2-bit constants for types of cased characters */
#define UCASE_TYPE_MASK 3
enum {
@@ -320,10 +351,14 @@ enum {
#define UCASE_GET_TYPE(props) ((props)&UCASE_TYPE_MASK)
#define UCASE_GET_TYPE_AND_IGNORABLE(props) ((props)&7)
+#define UCASE_IS_UPPER_OR_TITLE(props) ((props)&2)
+
#define UCASE_IGNORABLE 4
#define UCASE_SENSITIVE 8
#define UCASE_EXCEPTION 0x10
+#define UCASE_HAS_EXCEPTION(props) ((props)&UCASE_EXCEPTION)
+
#define UCASE_DOT_MASK 0x60
enum {
UCASE_NO_DOT=0, /* normal characters with cc=0 */
diff --git a/deps/icu-small/source/common/ucasemap.cpp b/deps/icu-small/source/common/ucasemap.cpp
index 8eec93c6e3..99e30c9fc6 100644
--- a/deps/icu-small/source/common/ucasemap.cpp
+++ b/deps/icu-small/source/common/ucasemap.cpp
@@ -165,9 +165,7 @@ appendResult(int32_t cpLength, int32_t result, const UChar *s,
inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); }
inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); }
-} // namespace
-
-static UChar32 U_CALLCONV
+UChar32 U_CALLCONV
utf8_caseContextIterator(void *context, int8_t dir) {
UCaseContext *csc=(UCaseContext *)context;
UChar32 c;
@@ -199,36 +197,227 @@ utf8_caseContextIterator(void *context, int8_t dir) {
return U_SENTINEL;
}
-/*
- * Case-maps [srcStart..srcLimit[ but takes
- * context [0..srcLength[ into account.
+/**
+ * caseLocale >= 0: Lowercases [srcStart..srcLimit[ but takes context [0..srcLength[ into account.
+ * caseLocale < 0: Case-folds [srcStart..srcLimit[.
*/
-static void
-_caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map,
- const uint8_t *src, UCaseContext *csc,
- int32_t srcStart, int32_t srcLimit,
- icu::ByteSink &sink, icu::Edits *edits,
- UErrorCode &errorCode) {
- /* case mapping loop */
- int32_t srcIndex=srcStart;
- while (U_SUCCESS(errorCode) && srcIndex<srcLimit) {
+void toLower(int32_t caseLocale, uint32_t options,
+ const uint8_t *src, UCaseContext *csc, int32_t srcStart, int32_t srcLimit,
+ icu::ByteSink &sink, icu::Edits *edits, UErrorCode &errorCode) {
+ const int8_t *latinToLower;
+ if (caseLocale == UCASE_LOC_ROOT ||
+ (caseLocale >= 0 ?
+ !(caseLocale == UCASE_LOC_TURKISH || caseLocale == UCASE_LOC_LITHUANIAN) :
+ (options & _FOLD_CASE_OPTIONS_MASK) == U_FOLD_CASE_DEFAULT)) {
+ latinToLower = LatinCase::TO_LOWER_NORMAL;
+ } else {
+ latinToLower = LatinCase::TO_LOWER_TR_LT;
+ }
+ const UTrie2 *trie = ucase_getTrie();
+ int32_t prev = srcStart;
+ int32_t srcIndex = srcStart;
+ for (;;) {
+ // fast path for simple cases
int32_t cpStart;
- csc->cpStart=cpStart=srcIndex;
UChar32 c;
- U8_NEXT(src, srcIndex, srcLimit, c);
- csc->cpLimit=srcIndex;
- if(c<0) {
- // Malformed UTF-8.
- ByteSinkUtil::appendUnchanged(src+cpStart, srcIndex-cpStart,
+ for (;;) {
+ if (U_FAILURE(errorCode) || srcIndex >= srcLimit) {
+ c = U_SENTINEL;
+ break;
+ }
+ uint8_t lead = src[srcIndex++];
+ if (lead <= 0x7f) {
+ int8_t d = latinToLower[lead];
+ if (d == LatinCase::EXC) {
+ cpStart = srcIndex - 1;
+ c = lead;
+ break;
+ }
+ if (d == 0) { continue; }
+ ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 1 - prev,
+ sink, options, edits, errorCode);
+ char ascii = (char)(lead + d);
+ sink.Append(&ascii, 1);
+ if (edits != nullptr) {
+ edits->addReplace(1, 1);
+ }
+ prev = srcIndex;
+ continue;
+ } else if (lead < 0xe3) {
+ uint8_t t;
+ if (0xc2 <= lead && lead <= 0xc5 && srcIndex < srcLimit &&
+ (t = src[srcIndex] - 0x80) <= 0x3f) {
+ // U+0080..U+017F
+ ++srcIndex;
+ c = ((lead - 0xc0) << 6) | t;
+ int8_t d = latinToLower[c];
+ if (d == LatinCase::EXC) {
+ cpStart = srcIndex - 2;
+ break;
+ }
+ if (d == 0) { continue; }
+ ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 2 - prev,
+ sink, options, edits, errorCode);
+ ByteSinkUtil::appendTwoBytes(c + d, sink);
+ if (edits != nullptr) {
+ edits->addReplace(2, 2);
+ }
+ prev = srcIndex;
+ continue;
+ }
+ } else if ((lead <= 0xe9 || lead == 0xeb || lead == 0xec) &&
+ (srcIndex + 2) <= srcLimit &&
+ U8_IS_TRAIL(src[srcIndex]) && U8_IS_TRAIL(src[srcIndex + 1])) {
+ // most of CJK: no case mappings
+ srcIndex += 2;
+ continue;
+ }
+ cpStart = --srcIndex;
+ U8_NEXT(src, srcIndex, srcLimit, c);
+ if (c < 0) {
+ // ill-formed UTF-8
+ continue;
+ }
+ uint16_t props = UTRIE2_GET16(trie, c);
+ if (UCASE_HAS_EXCEPTION(props)) { break; }
+ int32_t delta;
+ if (!UCASE_IS_UPPER_OR_TITLE(props) || (delta = UCASE_GET_DELTA(props)) == 0) {
+ continue;
+ }
+ ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev,
sink, options, edits, errorCode);
+ ByteSinkUtil::appendCodePoint(srcIndex - cpStart, c + delta, sink, edits);
+ prev = srcIndex;
+ }
+ if (c < 0) {
+ break;
+ }
+ // slow path
+ const UChar *s;
+ if (caseLocale >= 0) {
+ csc->cpStart = cpStart;
+ csc->cpLimit = srcIndex;
+ c = ucase_toFullLower(c, utf8_caseContextIterator, csc, &s, caseLocale);
} else {
- const UChar *s;
- c=map(c, utf8_caseContextIterator, csc, &s, caseLocale);
+ c = ucase_toFullFolding(c, &s, options);
+ }
+ if (c >= 0) {
+ ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev,
+ sink, options, edits, errorCode);
appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode);
+ prev = srcIndex;
}
}
+ ByteSinkUtil::appendUnchanged(src + prev, srcIndex - prev,
+ sink, options, edits, errorCode);
}
+void toUpper(int32_t caseLocale, uint32_t options,
+ const uint8_t *src, UCaseContext *csc, int32_t srcLength,
+ icu::ByteSink &sink, icu::Edits *edits, UErrorCode &errorCode) {
+ const int8_t *latinToUpper;
+ if (caseLocale == UCASE_LOC_TURKISH) {
+ latinToUpper = LatinCase::TO_UPPER_TR;
+ } else {
+ latinToUpper = LatinCase::TO_UPPER_NORMAL;
+ }
+ const UTrie2 *trie = ucase_getTrie();
+ int32_t prev = 0;
+ int32_t srcIndex = 0;
+ for (;;) {
+ // fast path for simple cases
+ int32_t cpStart;
+ UChar32 c;
+ for (;;) {
+ if (U_FAILURE(errorCode) || srcIndex >= srcLength) {
+ c = U_SENTINEL;
+ break;
+ }
+ uint8_t lead = src[srcIndex++];
+ if (lead <= 0x7f) {
+ int8_t d = latinToUpper[lead];
+ if (d == LatinCase::EXC) {
+ cpStart = srcIndex - 1;
+ c = lead;
+ break;
+ }
+ if (d == 0) { continue; }
+ ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 1 - prev,
+ sink, options, edits, errorCode);
+ char ascii = (char)(lead + d);
+ sink.Append(&ascii, 1);
+ if (edits != nullptr) {
+ edits->addReplace(1, 1);
+ }
+ prev = srcIndex;
+ continue;
+ } else if (lead < 0xe3) {
+ uint8_t t;
+ if (0xc2 <= lead && lead <= 0xc5 && srcIndex < srcLength &&
+ (t = src[srcIndex] - 0x80) <= 0x3f) {
+ // U+0080..U+017F
+ ++srcIndex;
+ c = ((lead - 0xc0) << 6) | t;
+ int8_t d = latinToUpper[c];
+ if (d == LatinCase::EXC) {
+ cpStart = srcIndex - 2;
+ break;
+ }
+ if (d == 0) { continue; }
+ ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 2 - prev,
+ sink, options, edits, errorCode);
+ ByteSinkUtil::appendTwoBytes(c + d, sink);
+ if (edits != nullptr) {
+ edits->addReplace(2, 2);
+ }
+ prev = srcIndex;
+ continue;
+ }
+ } else if ((lead <= 0xe9 || lead == 0xeb || lead == 0xec) &&
+ (srcIndex + 2) <= srcLength &&
+ U8_IS_TRAIL(src[srcIndex]) && U8_IS_TRAIL(src[srcIndex + 1])) {
+ // most of CJK: no case mappings
+ srcIndex += 2;
+ continue;
+ }
+ cpStart = --srcIndex;
+ U8_NEXT(src, srcIndex, srcLength, c);
+ if (c < 0) {
+ // ill-formed UTF-8
+ continue;
+ }
+ uint16_t props = UTRIE2_GET16(trie, c);
+ if (UCASE_HAS_EXCEPTION(props)) { break; }
+ int32_t delta;
+ if (UCASE_GET_TYPE(props) != UCASE_LOWER || (delta = UCASE_GET_DELTA(props)) == 0) {
+ continue;
+ }
+ ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev,
+ sink, options, edits, errorCode);
+ ByteSinkUtil::appendCodePoint(srcIndex - cpStart, c + delta, sink, edits);
+ prev = srcIndex;
+ }
+ if (c < 0) {
+ break;
+ }
+ // slow path
+ csc->cpStart = cpStart;
+ csc->cpLimit = srcIndex;
+ const UChar *s;
+ c = ucase_toFullUpper(c, utf8_caseContextIterator, csc, &s, caseLocale);
+ if (c >= 0) {
+ ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev,
+ sink, options, edits, errorCode);
+ appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode);
+ prev = srcIndex;
+ }
+ }
+ ByteSinkUtil::appendUnchanged(src + prev, srcIndex - prev,
+ sink, options, edits, errorCode);
+}
+
+} // namespace
+
#if !UCONFIG_NO_BREAK_ITERATION
U_CFUNC void U_CALLCONV
@@ -335,10 +524,9 @@ ucasemap_internalUTF8ToTitle(
if(titleLimit<index) {
if((options&U_TITLECASE_NO_LOWERCASE)==0) {
/* Normal operation: Lowercase the rest of the word. */
- _caseMap(caseLocale, options, ucase_toFullLower,
- src, &csc,
- titleLimit, index,
- sink, edits, errorCode);
+ toLower(caseLocale, options,
+ src, &csc, titleLimit, index,
+ sink, edits, errorCode);
if(U_FAILURE(errorCode)) {
return;
}
@@ -538,8 +726,8 @@ ucasemap_internalUTF8ToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREA
UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src;
csc.limit=srcLength;
- _caseMap(
- caseLocale, options, ucase_toFullLower,
+ toLower(
+ caseLocale, options,
src, &csc, 0, srcLength,
sink, edits, errorCode);
}
@@ -555,9 +743,9 @@ ucasemap_internalUTF8ToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREA
UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src;
csc.limit=srcLength;
- _caseMap(
- caseLocale, options, ucase_toFullUpper,
- src, &csc, 0, srcLength,
+ toUpper(
+ caseLocale, options,
+ src, &csc, srcLength,
sink, edits, errorCode);
}
}
@@ -567,22 +755,10 @@ ucasemap_internalUTF8Fold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_B
const uint8_t *src, int32_t srcLength,
icu::ByteSink &sink, icu::Edits *edits,
UErrorCode &errorCode) {
- /* case mapping loop */
- int32_t srcIndex = 0;
- while (U_SUCCESS(errorCode) && srcIndex < srcLength) {
- int32_t cpStart = srcIndex;
- UChar32 c;
- U8_NEXT(src, srcIndex, srcLength, c);
- if(c<0) {
- // Malformed UTF-8.
- ByteSinkUtil::appendUnchanged(src+cpStart, srcIndex-cpStart,
- sink, options, edits, errorCode);
- } else {
- const UChar *s;
- c = ucase_toFullFolding(c, &s, options);
- appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode);
- }
- }
+ toLower(
+ -1, options,
+ src, nullptr, 0, srcLength,
+ sink, edits, errorCode);
}
void
diff --git a/deps/icu-small/source/common/ucasemap_imp.h b/deps/icu-small/source/common/ucasemap_imp.h
index 99a6490279..7788fd9371 100644
--- a/deps/icu-small/source/common/ucasemap_imp.h
+++ b/deps/icu-small/source/common/ucasemap_imp.h
@@ -60,15 +60,6 @@ u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1,
int32_t *matchLen1, int32_t *matchLen2,
UErrorCode *pErrorCode);
-/**
- * Are the Unicode properties loaded?
- * This must be used before internal functions are called that do
- * not perform this check.
- * Generate a debug assertion failure if data is not loaded.
- */
-U_CFUNC UBool
-uprv_haveProperties(UErrorCode *pErrorCode);
-
#ifdef __cplusplus
U_NAMESPACE_BEGIN
diff --git a/deps/icu-small/source/common/uchar.cpp b/deps/icu-small/source/common/uchar.cpp
index c3f037d73e..996c3fdc40 100644
--- a/deps/icu-small/source/common/uchar.cpp
+++ b/deps/icu-small/source/common/uchar.cpp
@@ -42,14 +42,6 @@
/* getting a uint32_t properties word from the data */
#define GET_PROPS(c, result) ((result)=UTRIE2_GET16(&propsTrie, c));
-U_CFUNC UBool
-uprv_haveProperties(UErrorCode *pErrorCode) {
- if(U_FAILURE(*pErrorCode)) {
- return FALSE;
- }
- return TRUE;
-}
-
/* API functions ------------------------------------------------------------ */
/* Gets the Unicode character's general category.*/
diff --git a/deps/icu-small/source/common/ucmndata.cpp b/deps/icu-small/source/common/ucmndata.cpp
index 251c7ba182..ba2310bb7a 100644
--- a/deps/icu-small/source/common/ucmndata.cpp
+++ b/deps/icu-small/source/common/ucmndata.cpp
@@ -77,7 +77,11 @@ typedef struct {
typedef struct {
uint32_t count;
uint32_t reserved;
- PointerTOCEntry entry[2]; /* Actual size is from count. */
+ /**
+ * Variable-length array declared with length 1 to disable bounds checkers.
+ * The actual array length is in the count field.
+ */
+ PointerTOCEntry entry[1];
} PointerTOC;
diff --git a/deps/icu-small/source/common/ucmndata.h b/deps/icu-small/source/common/ucmndata.h
index 8c36897f16..1684441432 100644
--- a/deps/icu-small/source/common/ucmndata.h
+++ b/deps/icu-small/source/common/ucmndata.h
@@ -52,7 +52,11 @@ typedef struct {
typedef struct {
uint32_t count;
- UDataOffsetTOCEntry entry[2]; /* Actual size of array is from count. */
+ /**
+ * Variable-length array declared with length 1 to disable bounds checkers.
+ * The actual array length is in the count field.
+ */
+ UDataOffsetTOCEntry entry[1];
} UDataOffsetTOC;
/**
diff --git a/deps/icu-small/source/common/ucnv2022.cpp b/deps/icu-small/source/common/ucnv2022.cpp
index 1b625ea06c..854ca60cc3 100644
--- a/deps/icu-small/source/common/ucnv2022.cpp
+++ b/deps/icu-small/source/common/ucnv2022.cpp
@@ -3512,14 +3512,14 @@ _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorC
case 'k':
if(myConverterData->version == 0) {
if(length == 1) {
- if((UBool)args->converter->fromUnicodeStatus) {
+ if(args->converter->fromUnicodeStatus) {
/* in DBCS mode: switch to SBCS */
args->converter->fromUnicodeStatus = 0;
*p++ = UCNV_SI;
}
*p++ = subchar[0];
} else /* length == 2*/ {
- if(!(UBool)args->converter->fromUnicodeStatus) {
+ if(!args->converter->fromUnicodeStatus) {
/* in SBCS mode: switch to DBCS */
args->converter->fromUnicodeStatus = 1;
*p++ = UCNV_SO;
diff --git a/deps/icu-small/source/common/ucnv_err.cpp b/deps/icu-small/source/common/ucnv_err.cpp
index 18218835a2..63794d2334 100644
--- a/deps/icu-small/source/common/ucnv_err.cpp
+++ b/deps/icu-small/source/common/ucnv_err.cpp
@@ -60,11 +60,12 @@
* To avoid dependency on other code, this list is hard coded here.
* When an ignorable code point is found and is unmappable, the default callbacks
* will ignore them.
- * For a list of the default ignorable code points, use this link: http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[%3ADI%3A]&g=
+ * For a list of the default ignorable code points, use this link:
+ * https://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5B%3ADI%3A%5D&abb=on&g=&i=
*
* This list should be sync with the one in CharsetCallback.java
*/
-#define IS_DEFAULT_IGNORABLE_CODE_POINT(c) (\
+#define IS_DEFAULT_IGNORABLE_CODE_POINT(c) ( \
(c == 0x00AD) || \
(c == 0x034F) || \
(c == 0x061C) || \
@@ -74,26 +75,15 @@
(0x180B <= c && c <= 0x180E) || \
(0x200B <= c && c <= 0x200F) || \
(0x202A <= c && c <= 0x202E) || \
- (c == 0x2060) || \
- (0x2066 <= c && c <= 0x2069) || \
- (0x2061 <= c && c <= 0x2064) || \
- (0x206A <= c && c <= 0x206F) || \
+ (0x2060 <= c && c <= 0x206F) || \
(c == 0x3164) || \
- (0x0FE00 <= c && c <= 0x0FE0F) || \
- (c == 0x0FEFF) || \
- (c == 0x0FFA0) || \
- (0x01BCA0 <= c && c <= 0x01BCA3) || \
- (0x01D173 <= c && c <= 0x01D17A) || \
- (c == 0x0E0001) || \
- (0x0E0020 <= c && c <= 0x0E007F) || \
- (0x0E0100 <= c && c <= 0x0E01EF) || \
- (c == 0x2065) || \
- (0x0FFF0 <= c && c <= 0x0FFF8) || \
- (c == 0x0E0000) || \
- (0x0E0002 <= c && c <= 0x0E001F) || \
- (0x0E0080 <= c && c <= 0x0E00FF) || \
- (0x0E01F0 <= c && c <= 0x0E0FFF) \
- )
+ (0xFE00 <= c && c <= 0xFE0F) || \
+ (c == 0xFEFF) || \
+ (c == 0xFFA0) || \
+ (0xFFF0 <= c && c <= 0xFFF8) || \
+ (0x1BCA0 <= c && c <= 0x1BCA3) || \
+ (0x1D173 <= c && c <= 0x1D17A) || \
+ (0xE0000 <= c && c <= 0xE0FFF))
/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
diff --git a/deps/icu-small/source/common/ucnv_u32.cpp b/deps/icu-small/source/common/ucnv_u32.cpp
index 3fac04b300..ca8c6788d3 100644
--- a/deps/icu-small/source/common/ucnv_u32.cpp
+++ b/deps/icu-small/source/common/ucnv_u32.cpp
@@ -55,7 +55,7 @@ T_UConverter_toUnicode_UTF32_BE(UConverterToUnicodeArgs * args,
uint32_t ch, i;
/* Restore state of current sequence */
- if (args->converter->toUnicodeStatus && myTarget < targetLimit) {
+ if (args->converter->toULength > 0 && myTarget < targetLimit) {
i = args->converter->toULength; /* restore # of bytes consumed */
args->converter->toULength = 0;
@@ -136,7 +136,7 @@ T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(UConverterToUnicodeArgs * args,
int32_t offsetNum = 0;
/* Restore state of current sequence */
- if (args->converter->toUnicodeStatus && myTarget < targetLimit) {
+ if (args->converter->toULength > 0 && myTarget < targetLimit) {
i = args->converter->toULength; /* restore # of bytes consumed */
args->converter->toULength = 0;
@@ -517,7 +517,7 @@ T_UConverter_toUnicode_UTF32_LE(UConverterToUnicodeArgs * args,
uint32_t ch, i;
/* Restore state of current sequence */
- if (args->converter->toUnicodeStatus && myTarget < targetLimit)
+ if (args->converter->toULength > 0 && myTarget < targetLimit)
{
i = args->converter->toULength; /* restore # of bytes consumed */
args->converter->toULength = 0;
@@ -604,7 +604,7 @@ T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(UConverterToUnicodeArgs * args,
int32_t offsetNum = 0;
/* Restore state of current sequence */
- if (args->converter->toUnicodeStatus && myTarget < targetLimit)
+ if (args->converter->toULength > 0 && myTarget < targetLimit)
{
i = args->converter->toULength; /* restore # of bytes consumed */
args->converter->toULength = 0;
diff --git a/deps/icu-small/source/common/ucnv_u8.cpp b/deps/icu-small/source/common/ucnv_u8.cpp
index c7ef87fd50..5a07244b02 100644
--- a/deps/icu-small/source/common/ucnv_u8.cpp
+++ b/deps/icu-small/source/common/ucnv_u8.cpp
@@ -76,7 +76,7 @@ static void U_CALLCONV ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
int32_t i, inBytes;
/* Restore size of current sequence */
- if (cnv->toUnicodeStatus && myTarget < targetLimit)
+ if (cnv->toULength > 0 && myTarget < targetLimit)
{
inBytes = cnv->mode; /* restore # of bytes to consume */
i = cnv->toULength; /* restore # of bytes consumed */
@@ -194,7 +194,7 @@ static void U_CALLCONV ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeAr
int32_t i, inBytes;
/* Restore size of current sequence */
- if (cnv->toUnicodeStatus && myTarget < targetLimit)
+ if (cnv->toULength > 0 && myTarget < targetLimit)
{
inBytes = cnv->mode; /* restore # of bytes to consume */
i = cnv->toULength; /* restore # of bytes consumed */
@@ -670,12 +670,13 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
/* get the converter state from the UTF-8 UConverter */
- c=(UChar32)utf8->toUnicodeStatus;
- if(c!=0) {
+ if(utf8->toULength > 0) {
toULength=oldToULength=utf8->toULength;
toULimit=(int8_t)utf8->mode;
+ c=(UChar32)utf8->toUnicodeStatus;
} else {
toULength=oldToULength=toULimit=0;
+ c = 0;
}
count=(int32_t)(sourceLimit-source)+oldToULength;
@@ -695,36 +696,20 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
// Use a single counter for source and target, counting the minimum of
// the source length and the target capacity.
// Let the standard converter handle edge cases.
- const uint8_t *limit=sourceLimit;
if(count>targetCapacity) {
- limit-=(count-targetCapacity);
count=targetCapacity;
}
- // The conversion loop checks count>0 only once per 1/2/3-byte character.
- // If the buffer ends with a truncated 2- or 3-byte sequence,
+ // The conversion loop checks count>0 only once per character.
+ // If the buffer ends with a truncated sequence,
// then we reduce the count to stop before that,
// and collect the remaining bytes after the conversion loop.
- {
- // Do not go back into the bytes that will be read for finishing a partial
- // sequence from the previous buffer.
- int32_t length=count-toULimit;
- if(length>0) {
- uint8_t b1=*(limit-1);
- if(U8_IS_SINGLE(b1)) {
- // common ASCII character
- } else if(U8_IS_TRAIL(b1) && length>=2) {
- uint8_t b2=*(limit-2);
- if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
- // truncated 3-byte sequence
- count-=2;
- }
- } else if(0xc2<=b1 && b1<0xf0) {
- // truncated 2- or 3-byte sequence
- --count;
- }
- }
- }
+
+ // Do not go back into the bytes that will be read for finishing a partial
+ // sequence from the previous buffer.
+ int32_t length=count-toULimit;
+ U8_TRUNCATE_IF_INCOMPLETE(source, 0, length);
+ count=toULimit+length;
}
if(c!=0) {
@@ -814,7 +799,7 @@ moreBytes:
}
/* copy the legal byte sequence to the target */
- if(count>=toULength) {
+ {
int8_t i;
for(i=0; i<oldToULength; ++i) {
@@ -825,14 +810,6 @@ moreBytes:
*target++=*source++;
}
count-=toULength;
- } else {
- // A supplementary character that does not fit into the target.
- // Let the standard converter handle this.
- source-=(toULength-oldToULength);
- pToUArgs->source=(char *)source;
- pFromUArgs->target=(char *)target;
- *pErrorCode=U_USING_DEFAULT_WARNING;
- return;
}
}
}
@@ -856,8 +833,7 @@ moreBytes:
utf8->toULength=toULength;
utf8->mode=toULimit;
break;
- } else if(!U8_IS_TRAIL(b=*source)) {
- /* lead byte in trail byte position */
+ } else if(!icu::UTF8::isValidTrail(c, b=*source, toULength, toULimit)) {
utf8->toULength=toULength;
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
break;
diff --git a/deps/icu-small/source/common/ucnvlat1.cpp b/deps/icu-small/source/common/ucnvlat1.cpp
index 9855ebe6e7..15eeb5c51f 100644
--- a/deps/icu-small/source/common/ucnvlat1.cpp
+++ b/deps/icu-small/source/common/ucnvlat1.cpp
@@ -340,7 +340,11 @@ ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
/* get the converter state from the UTF-8 UConverter */
- c=(UChar32)utf8->toUnicodeStatus;
+ if (utf8->toULength > 0) {
+ c=(UChar32)utf8->toUnicodeStatus;
+ } else {
+ c = 0;
+ }
if(c!=0 && source<sourceLimit) {
if(targetCapacity==0) {
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
@@ -620,7 +624,7 @@ ucnv_ASCIIFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
uint8_t c;
- if(pToUArgs->converter->toUnicodeStatus!=0) {
+ if(pToUArgs->converter->toULength > 0) {
/* no handling of partial UTF-8 characters here, fall back to pivoting */
*pErrorCode=U_USING_DEFAULT_WARNING;
return;
diff --git a/deps/icu-small/source/common/ucnvmbcs.cpp b/deps/icu-small/source/common/ucnvmbcs.cpp
index e5efa7fc1b..9052394b4f 100644
--- a/deps/icu-small/source/common/ucnvmbcs.cpp
+++ b/deps/icu-small/source/common/ucnvmbcs.cpp
@@ -5064,12 +5064,13 @@ ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
/* get the converter state from the UTF-8 UConverter */
- c=(UChar32)utf8->toUnicodeStatus;
- if(c!=0) {
+ if(utf8->toULength > 0) {
toULength=oldToULength=utf8->toULength;
toULimit=(int8_t)utf8->mode;
+ c=(UChar32)utf8->toUnicodeStatus;
} else {
toULength=oldToULength=toULimit=0;
+ c = 0;
}
// The conversion loop checks source<sourceLimit only once per 1/2/3-byte character.
@@ -5359,12 +5360,13 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
/* get the converter state from the UTF-8 UConverter */
- c=(UChar32)utf8->toUnicodeStatus;
- if(c!=0) {
+ if(utf8->toULength > 0) {
toULength=oldToULength=utf8->toULength;
toULimit=(int8_t)utf8->mode;
+ c=(UChar32)utf8->toUnicodeStatus;
} else {
toULength=oldToULength=toULimit=0;
+ c = 0;
}
// The conversion loop checks source<sourceLimit only once per 1/2/3-byte character.
diff --git a/deps/icu-small/source/common/ucurr.cpp b/deps/icu-small/source/common/ucurr.cpp
index aa9d855f50..d1e5f62a9a 100644
--- a/deps/icu-small/source/common/ucurr.cpp
+++ b/deps/icu-small/source/common/ucurr.cpp
@@ -17,6 +17,7 @@
#include "unicode/ustring.h"
#include "unicode/parsepos.h"
#include "ustr_imp.h"
+#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
#include "uassert.h"
@@ -28,9 +29,12 @@
#include "uinvchar.h"
#include "uresimp.h"
#include "ulist.h"
+#include "uresimp.h"
#include "ureslocs.h"
#include "ulocimp.h"
+using namespace icu;
+
//#define UCURR_DEBUG_EQUIV 1
#ifdef UCURR_DEBUG_EQUIV
#include "stdio.h"
@@ -104,6 +108,7 @@ static const char VAR_DELIM_STR[] = "_";
// Tag for localized display names (symbols) of currencies
static const char CURRENCIES[] = "Currencies";
+static const char CURRENCIES_NARROW[] = "Currencies%narrow";
static const char CURRENCYPLURALS[] = "CurrencyPlurals";
static const UChar EUR_STR[] = {0x0045,0x0055,0x0052,0};
@@ -698,7 +703,7 @@ ucurr_getName(const UChar* currency,
}
int32_t choice = (int32_t) nameStyle;
- if (choice < 0 || choice > 1) {
+ if (choice < 0 || choice > 2) {
*ec = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
@@ -731,15 +736,19 @@ ucurr_getName(const UChar* currency,
const UChar* s = NULL;
ec2 = U_ZERO_ERROR;
- UResourceBundle* rb = ures_open(U_ICUDATA_CURR, loc, &ec2);
-
- rb = ures_getByKey(rb, CURRENCIES, rb, &ec2);
-
- // Fetch resource with multi-level resource inheritance fallback
- rb = ures_getByKeyWithFallback(rb, buf, rb, &ec2);
-
- s = ures_getStringByIndex(rb, choice, len, &ec2);
- ures_close(rb);
+ LocalUResourceBundlePointer rb(ures_open(U_ICUDATA_CURR, loc, &ec2));
+
+ if (nameStyle == UCURR_NARROW_SYMBOL_NAME) {
+ CharString key;
+ key.append(CURRENCIES_NARROW, ec2);
+ key.append("/", ec2);
+ key.append(buf, ec2);
+ s = ures_getStringByKeyWithFallback(rb.getAlias(), key.data(), len, &ec2);
+ } else {
+ ures_getByKey(rb.getAlias(), CURRENCIES, rb.getAlias(), &ec2);
+ ures_getByKeyWithFallback(rb.getAlias(), buf, rb.getAlias(), &ec2);
+ s = ures_getStringByIndex(rb.getAlias(), choice, len, &ec2);
+ }
// If we've succeeded we're done. Otherwise, try to fallback.
// If that fails (because we are already at root) then exit.
diff --git a/deps/icu-small/source/common/unicode/brkiter.h b/deps/icu-small/source/common/unicode/brkiter.h
index c64bb71222..607f3ec625 100644
--- a/deps/icu-small/source/common/unicode/brkiter.h
+++ b/deps/icu-small/source/common/unicode/brkiter.h
@@ -298,15 +298,14 @@ public:
virtual int32_t next(int32_t n) = 0;
/**
- * For RuleBasedBreakIterators, return the status tag from the
- * break rule that determined the most recently
- * returned break position.
+ * For RuleBasedBreakIterators, return the status tag from the break rule
+ * that determined the boundary at the current iteration position.
* <p>
* For break iterator types that do not support a rule status,
* a default value of 0 is returned.
* <p>
- * @return the status from the break rule that determined the most recently
- * returned break position.
+ * @return the status from the break rule that determined the boundary at
+ * the current iteration position.
* @see RuleBaseBreakIterator::getRuleStatus()
* @see UWordBreak
* @stable ICU 52
@@ -315,7 +314,7 @@ public:
/**
* For RuleBasedBreakIterators, get the status (tag) values from the break rule(s)
- * that determined the most recently returned break position.
+ * that determined the boundary at the current iteration position.
* <p>
* For break iterator types that do not support rule status,
* no values are returned.
@@ -334,7 +333,7 @@ public:
* normal way, without attempting to store any values.
* @param status receives error codes.
* @return The number of rule status values from rules that determined
- * the most recent boundary returned by the break iterator.
+ * the boundary at the current iteration position.
* In the event of a U_BUFFER_OVERFLOW_ERROR, the return value
* is the total number of status values that were available,
* not the reduced number that were actually returned.
@@ -616,7 +615,7 @@ public:
virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0;
private:
- static BreakIterator* buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode& status);
+ static BreakIterator* buildInstance(const Locale& loc, const char *type, UErrorCode& status);
static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status);
static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status);
diff --git a/deps/icu-small/source/common/unicode/bytestriebuilder.h b/deps/icu-small/source/common/unicode/bytestriebuilder.h
index 0f9f5e2c06..7a806bb7f0 100644
--- a/deps/icu-small/source/common/unicode/bytestriebuilder.h
+++ b/deps/icu-small/source/common/unicode/bytestriebuilder.h
@@ -154,7 +154,6 @@ private:
const char *s;
};
- // don't use #ifndef U_HIDE_INTERNAL_API with private class members or virtual methods.
virtual Node *createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length,
Node *nextNode) const;
diff --git a/deps/icu-small/source/common/unicode/casemap.h b/deps/icu-small/source/common/unicode/casemap.h
index 4a4917bdca..4b77256d74 100644
--- a/deps/icu-small/source/common/unicode/casemap.h
+++ b/deps/icu-small/source/common/unicode/casemap.h
@@ -18,8 +18,6 @@
U_NAMESPACE_BEGIN
-#ifndef U_HIDE_DRAFT_API
-
class BreakIterator;
class ByteSink;
class Edits;
@@ -27,7 +25,7 @@ class Edits;
/**
* Low-level C++ case mapping functions.
*
- * @draft ICU 59
+ * @stable ICU 59
*/
class U_COMMON_API CaseMap U_FINAL : public UMemory {
public:
@@ -59,7 +57,7 @@ public:
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
*
* @see u_strToLower
- * @draft ICU 59
+ * @stable ICU 59
*/
static int32_t toLower(
const char *locale, uint32_t options,
@@ -95,7 +93,7 @@ public:
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
*
* @see u_strToUpper
- * @draft ICU 59
+ * @stable ICU 59
*/
static int32_t toUpper(
const char *locale, uint32_t options,
@@ -146,7 +144,7 @@ public:
*
* @see u_strToTitle
* @see ucasemap_toTitle
- * @draft ICU 59
+ * @stable ICU 59
*/
static int32_t toTitle(
const char *locale, uint32_t options, BreakIterator *iter,
@@ -188,7 +186,7 @@ public:
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
*
* @see u_strFoldCase
- * @draft ICU 59
+ * @stable ICU 59
*/
static int32_t fold(
uint32_t options,
@@ -196,6 +194,7 @@ public:
char16_t *dest, int32_t destCapacity, Edits *edits,
UErrorCode &errorCode);
+#ifndef U_HIDE_DRAFT_API
/**
* Lowercases a UTF-8 string and optionally records edits.
* Casing is locale-dependent and context-sensitive.
@@ -318,6 +317,7 @@ public:
uint32_t options,
StringPiece src, ByteSink &sink, Edits *edits,
UErrorCode &errorCode);
+#endif // U_HIDE_DRAFT_API
/**
* Lowercases a UTF-8 string and optionally records edits.
@@ -347,7 +347,7 @@ public:
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
*
* @see ucasemap_utf8ToLower
- * @draft ICU 59
+ * @stable ICU 59
*/
static int32_t utf8ToLower(
const char *locale, uint32_t options,
@@ -383,7 +383,7 @@ public:
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
*
* @see ucasemap_utf8ToUpper
- * @draft ICU 59
+ * @stable ICU 59
*/
static int32_t utf8ToUpper(
const char *locale, uint32_t options,
@@ -433,7 +433,7 @@ public:
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
*
* @see ucasemap_utf8ToTitle
- * @draft ICU 59
+ * @stable ICU 59
*/
static int32_t utf8ToTitle(
const char *locale, uint32_t options, BreakIterator *iter,
@@ -475,7 +475,7 @@ public:
* the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
*
* @see ucasemap_utf8FoldCase
- * @draft ICU 59
+ * @stable ICU 59
*/
static int32_t utf8Fold(
uint32_t options,
@@ -489,8 +489,6 @@ private:
CaseMap &operator=(const CaseMap &other) = delete;
};
-#endif // U_HIDE_DRAFT_API
-
U_NAMESPACE_END
#endif // __CASEMAP_H__
diff --git a/deps/icu-small/source/common/unicode/char16ptr.h b/deps/icu-small/source/common/unicode/char16ptr.h
index fbce177591..49d0e029a9 100644
--- a/deps/icu-small/source/common/unicode/char16ptr.h
+++ b/deps/icu-small/source/common/unicode/char16ptr.h
@@ -30,25 +30,23 @@ U_NAMESPACE_BEGIN
# define U_ALIASING_BARRIER(ptr) asm volatile("" : : "rm"(ptr) : "memory")
#endif
-// Do not use #ifndef U_HIDE_DRAFT_API for the following class, it
-// is now used in place of UChar* in several stable C++ methods
/**
* char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
- * @draft ICU 59
+ * @stable ICU 59
*/
class U_COMMON_API Char16Ptr U_FINAL {
public:
/**
* Copies the pointer.
* @param p pointer
- * @draft ICU 59
+ * @stable ICU 59
*/
inline Char16Ptr(char16_t *p);
#if !U_CHAR16_IS_TYPEDEF
/**
* Converts the pointer to char16_t *.
* @param p pointer to be converted
- * @draft ICU 59
+ * @stable ICU 59
*/
inline Char16Ptr(uint16_t *p);
#endif
@@ -57,32 +55,32 @@ public:
* Converts the pointer to char16_t *.
* (Only defined if U_SIZEOF_WCHAR_T==2.)
* @param p pointer to be converted
- * @draft ICU 59
+ * @stable ICU 59
*/
inline Char16Ptr(wchar_t *p);
#endif
/**
* nullptr constructor.
* @param p nullptr
- * @draft ICU 59
+ * @stable ICU 59
*/
inline Char16Ptr(std::nullptr_t p);
/**
* Destructor.
- * @draft ICU 59
+ * @stable ICU 59
*/
inline ~Char16Ptr();
/**
* Pointer access.
* @return the wrapped pointer
- * @draft ICU 59
+ * @stable ICU 59
*/
inline char16_t *get() const;
/**
* char16_t pointer access via type conversion (e.g., static_cast).
* @return the wrapped pointer
- * @draft ICU 59
+ * @stable ICU 59
*/
inline operator char16_t *() const { return get(); }
@@ -137,25 +135,23 @@ char16_t *Char16Ptr::get() const { return u_.cp; }
#endif
-// Do not use #ifndef U_HIDE_DRAFT_API for the following class, it is
-// now used in place of const UChar* in several stable C++ methods
/**
* const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
- * @draft ICU 59
+ * @stable ICU 59
*/
class U_COMMON_API ConstChar16Ptr U_FINAL {
public:
/**
* Copies the pointer.
* @param p pointer
- * @draft ICU 59
+ * @stable ICU 59
*/
inline ConstChar16Ptr(const char16_t *p);
#if !U_CHAR16_IS_TYPEDEF
/**
* Converts the pointer to char16_t *.
* @param p pointer to be converted
- * @draft ICU 59
+ * @stable ICU 59
*/
inline ConstChar16Ptr(const uint16_t *p);
#endif
@@ -164,33 +160,33 @@ public:
* Converts the pointer to char16_t *.
* (Only defined if U_SIZEOF_WCHAR_T==2.)
* @param p pointer to be converted
- * @draft ICU 59
+ * @stable ICU 59
*/
inline ConstChar16Ptr(const wchar_t *p);
#endif
/**
* nullptr constructor.
* @param p nullptr
- * @draft ICU 59
+ * @stable ICU 59
*/
inline ConstChar16Ptr(const std::nullptr_t p);
/**
* Destructor.
- * @draft ICU 59
+ * @stable ICU 59
*/
inline ~ConstChar16Ptr();
/**
* Pointer access.
* @return the wrapped pointer
- * @draft ICU 59
+ * @stable ICU 59
*/
inline const char16_t *get() const;
/**
* char16_t pointer access via type conversion (e.g., static_cast).
* @return the wrapped pointer
- * @draft ICU 59
+ * @stable ICU 59
*/
inline operator const char16_t *() const { return get(); }
@@ -250,7 +246,7 @@ const char16_t *ConstChar16Ptr::get() const { return u_.cp; }
* Includes an aliasing barrier if available.
* @param p pointer
* @return p as const UChar *
- * @draft ICU 59
+ * @stable ICU 59
*/
inline const UChar *toUCharPtr(const char16_t *p) {
#ifdef U_ALIASING_BARRIER
@@ -264,7 +260,7 @@ inline const UChar *toUCharPtr(const char16_t *p) {
* Includes an aliasing barrier if available.
* @param p pointer
* @return p as UChar *
- * @draft ICU 59
+ * @stable ICU 59
*/
inline UChar *toUCharPtr(char16_t *p) {
#ifdef U_ALIASING_BARRIER
@@ -278,7 +274,7 @@ inline UChar *toUCharPtr(char16_t *p) {
* Includes an aliasing barrier if available.
* @param p pointer
* @return p as const OldUChar *
- * @draft ICU 59
+ * @stable ICU 59
*/
inline const OldUChar *toOldUCharPtr(const char16_t *p) {
#ifdef U_ALIASING_BARRIER
@@ -292,7 +288,7 @@ inline const OldUChar *toOldUCharPtr(const char16_t *p) {
* Includes an aliasing barrier if available.
* @param p pointer
* @return p as OldUChar *
- * @draft ICU 59
+ * @stable ICU 59
*/
inline OldUChar *toOldUCharPtr(char16_t *p) {
#ifdef U_ALIASING_BARRIER
diff --git a/deps/icu-small/source/common/unicode/chariter.h b/deps/icu-small/source/common/unicode/chariter.h
index dbed89dbe6..292794f6d6 100644
--- a/deps/icu-small/source/common/unicode/chariter.h
+++ b/deps/icu-small/source/common/unicode/chariter.h
@@ -569,7 +569,7 @@ public:
* Returns the numeric index in the underlying text-storage
* object of the character the iterator currently refers to
* (i.e., the character returned by current()).
- * @return the numberic index in the text-storage object of
+ * @return the numeric index in the text-storage object of
* the character the iterator currently refers to
* @stable ICU 2.0
*/
diff --git a/deps/icu-small/source/common/unicode/dtintrv.h b/deps/icu-small/source/common/unicode/dtintrv.h
index 2221b36c9b..c99011e26c 100644
--- a/deps/icu-small/source/common/unicode/dtintrv.h
+++ b/deps/icu-small/source/common/unicode/dtintrv.h
@@ -69,7 +69,7 @@ public:
* <pre>
* . Base* polymorphic_pointer = createPolymorphicObject();
* . if (polymorphic_pointer->getDynamicClassID() ==
- * . erived::getStaticClassID()) ...
+ * . derived::getStaticClassID()) ...
* </pre>
* @return The class ID for all objects of this class.
* @stable ICU 4.0
diff --git a/deps/icu-small/source/common/unicode/edits.h b/deps/icu-small/source/common/unicode/edits.h
index 082c3733a8..5a72574c14 100644
--- a/deps/icu-small/source/common/unicode/edits.h
+++ b/deps/icu-small/source/common/unicode/edits.h
@@ -17,8 +17,6 @@
U_NAMESPACE_BEGIN
-#ifndef U_HIDE_DRAFT_API
-
/**
* Records lengths of string edits but not replacement text.
* Supports replacements, insertions, deletions in linear progression.
@@ -27,13 +25,13 @@ U_NAMESPACE_BEGIN
* An Edits object tracks a separate UErrorCode, but ICU string transformation functions
* (e.g., case mapping functions) merge any such errors into their API's UErrorCode.
*
- * @draft ICU 59
+ * @stable ICU 59
*/
class U_COMMON_API Edits U_FINAL : public UMemory {
public:
/**
* Constructs an empty object.
- * @draft ICU 59
+ * @stable ICU 59
*/
Edits() :
array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0), numChanges(0),
@@ -64,7 +62,7 @@ public:
/**
* Destructor.
- * @draft ICU 59
+ * @stable ICU 59
*/
~Edits();
@@ -88,20 +86,20 @@ public:
/**
* Resets the data but may not release memory.
- * @draft ICU 59
+ * @stable ICU 59
*/
void reset() U_NOEXCEPT;
/**
* Adds a record for an unchanged segment of text.
* Normally called from inside ICU string transformation functions, not user code.
- * @draft ICU 59
+ * @stable ICU 59
*/
void addUnchanged(int32_t unchangedLength);
/**
* Adds a record for a text replacement/insertion/deletion.
* Normally called from inside ICU string transformation functions, not user code.
- * @draft ICU 59
+ * @stable ICU 59
*/
void addReplace(int32_t oldLength, int32_t newLength);
/**
@@ -112,33 +110,35 @@ public:
* and an error occurred while recording edits.
* Otherwise unchanged.
* @return TRUE if U_FAILURE(outErrorCode)
- * @draft ICU 59
+ * @stable ICU 59
*/
UBool copyErrorTo(UErrorCode &outErrorCode);
/**
* How much longer is the new text compared with the old text?
* @return new length minus old length
- * @draft ICU 59
+ * @stable ICU 59
*/
int32_t lengthDelta() const { return delta; }
/**
* @return TRUE if there are any change edits
- * @draft ICU 59
+ * @stable ICU 59
*/
UBool hasChanges() const { return numChanges != 0; }
+#ifndef U_HIDE_DRAFT_API
/**
* @return the number of change edits
* @draft ICU 60
*/
int32_t numberOfChanges() const { return numChanges; }
+#endif // U_HIDE_DRAFT_API
/**
* Access to the list of edits.
* @see getCoarseIterator
* @see getFineIterator
- * @draft ICU 59
+ * @stable ICU 59
*/
struct U_COMMON_API Iterator U_FINAL : public UMemory {
/**
@@ -152,12 +152,12 @@ public:
srcIndex(0), replIndex(0), destIndex(0) {}
/**
* Copy constructor.
- * @draft ICU 59
+ * @stable ICU 59
*/
Iterator(const Iterator &other) = default;
/**
* Assignment operator.
- * @draft ICU 59
+ * @stable ICU 59
*/
Iterator &operator=(const Iterator &other) = default;
@@ -167,7 +167,7 @@ public:
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
* @return TRUE if there is another edit
- * @draft ICU 59
+ * @stable ICU 59
*/
UBool next(UErrorCode &errorCode) { return next(onlyChanges_, errorCode); }
@@ -188,12 +188,13 @@ public:
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
* @return TRUE if the edit for the source index was found
- * @draft ICU 59
+ * @stable ICU 59
*/
UBool findSourceIndex(int32_t i, UErrorCode &errorCode) {
return findIndex(i, TRUE, errorCode) == 0;
}
+#ifndef U_HIDE_DRAFT_API
/**
* Finds the edit that contains the destination index.
* The destination index may be found in a non-change
@@ -264,39 +265,40 @@ public:
* @draft ICU 60
*/
int32_t sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode);
+#endif // U_HIDE_DRAFT_API
/**
* @return TRUE if this edit replaces oldLength() units with newLength() different ones.
* FALSE if oldLength units remain unchanged.
- * @draft ICU 59
+ * @stable ICU 59
*/
UBool hasChange() const { return changed; }
/**
* @return the number of units in the original string which are replaced or remain unchanged.
- * @draft ICU 59
+ * @stable ICU 59
*/
int32_t oldLength() const { return oldLength_; }
/**
* @return the number of units in the modified string, if hasChange() is TRUE.
* Same as oldLength if hasChange() is FALSE.
- * @draft ICU 59
+ * @stable ICU 59
*/
int32_t newLength() const { return newLength_; }
/**
* @return the current index into the source string
- * @draft ICU 59
+ * @stable ICU 59
*/
int32_t sourceIndex() const { return srcIndex; }
/**
* @return the current index into the replacement-characters-only string,
* not counting unchanged spans
- * @draft ICU 59
+ * @stable ICU 59
*/
int32_t replacementIndex() const { return replIndex; }
/**
* @return the current index into the full destination string
- * @draft ICU 59
+ * @stable ICU 59
*/
int32_t destinationIndex() const { return destIndex; }
@@ -331,7 +333,7 @@ public:
* Returns an Iterator for coarse-grained changes for simple string updates.
* Skips non-changes.
* @return an Iterator that merges adjacent changes.
- * @draft ICU 59
+ * @stable ICU 59
*/
Iterator getCoarseChangesIterator() const {
return Iterator(array, length, TRUE, TRUE);
@@ -340,7 +342,7 @@ public:
/**
* Returns an Iterator for coarse-grained changes and non-changes for simple string updates.
* @return an Iterator that merges adjacent changes.
- * @draft ICU 59
+ * @stable ICU 59
*/
Iterator getCoarseIterator() const {
return Iterator(array, length, FALSE, TRUE);
@@ -350,7 +352,7 @@ public:
* Returns an Iterator for fine-grained changes for modifying styled text.
* Skips non-changes.
* @return an Iterator that separates adjacent changes.
- * @draft ICU 59
+ * @stable ICU 59
*/
Iterator getFineChangesIterator() const {
return Iterator(array, length, TRUE, FALSE);
@@ -359,12 +361,13 @@ public:
/**
* Returns an Iterator for fine-grained changes and non-changes for modifying styled text.
* @return an Iterator that separates adjacent changes.
- * @draft ICU 59
+ * @stable ICU 59
*/
Iterator getFineIterator() const {
return Iterator(array, length, FALSE, FALSE);
}
+#ifndef U_HIDE_DRAFT_API
/**
* Merges the two input Edits and appends the result to this object.
*
@@ -393,6 +396,7 @@ public:
* @draft ICU 60
*/
Edits &mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode);
+#endif // U_HIDE_DRAFT_API
private:
void releaseArray() U_NOEXCEPT;
@@ -415,8 +419,6 @@ private:
uint16_t stackArray[STACK_CAPACITY];
};
-#endif // U_HIDE_DRAFT_API
-
U_NAMESPACE_END
#endif // __EDITS_H__
diff --git a/deps/icu-small/source/common/unicode/filteredbrk.h b/deps/icu-small/source/common/unicode/filteredbrk.h
index a0319bf0a7..751d1faf40 100644
--- a/deps/icu-small/source/common/unicode/filteredbrk.h
+++ b/deps/icu-small/source/common/unicode/filteredbrk.h
@@ -64,9 +64,7 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
* @deprecated ICU 60 use createEmptyInstance instead
* @see createEmptyInstance()
*/
- static inline FilteredBreakIteratorBuilder *createInstance(UErrorCode &status) {
- return createEmptyInstance(status);
- }
+ static FilteredBreakIteratorBuilder *createInstance(UErrorCode &status);
#endif /* U_HIDE_DEPRECATED_API */
#ifndef U_HIDE_DRAFT_API
@@ -105,7 +103,6 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
*/
virtual UBool unsuppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0;
-#ifndef U_HIDE_DEPRECATED_API
/**
* This function has been deprecated in favor of wrapIteratorWithFilter()
* The behavior is identical.
@@ -116,7 +113,6 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
* @see wrapBreakIteratorWithFilter()
*/
virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0;
-#endif /* U_HIDE_DEPRECATED_API */
#ifndef U_HIDE_DRAFT_API
/**
diff --git a/deps/icu-small/source/common/unicode/locid.h b/deps/icu-small/source/common/unicode/locid.h
index c752344f33..c84774e07f 100644
--- a/deps/icu-small/source/common/unicode/locid.h
+++ b/deps/icu-small/source/common/unicode/locid.h
@@ -353,7 +353,7 @@ public:
* the default locale ID of the runtime environment.
*
* @param newLocale Locale to set to. If NULL, set to the value obtained
- * from the runtime environement.
+ * from the runtime environment.
* @param success The error code.
* @system
* @stable ICU 2.0
@@ -629,7 +629,7 @@ public:
/**
* Fills in "name" with the name of this locale in a format suitable for user display
- * in the locale specfied by "displayLocale". This function uses getDisplayLanguage(),
+ * in the locale specified by "displayLocale". This function uses getDisplayLanguage(),
* getDisplayCountry(), and getDisplayVariant() to do its work, and outputs the display
* name in the format "language (country[,variant])". For example, if displayLocale is
* fr_FR, then en_US's display name would be "Anglais (&Eacute;tats-Unis)", and no_NO_NY's
diff --git a/deps/icu-small/source/common/unicode/parseerr.h b/deps/icu-small/source/common/unicode/parseerr.h
index c8283bfcc9..c05487601c 100644
--- a/deps/icu-small/source/common/unicode/parseerr.h
+++ b/deps/icu-small/source/common/unicode/parseerr.h
@@ -58,9 +58,9 @@ enum { U_PARSE_CONTEXT_LEN = 16 };
typedef struct UParseError {
/**
- * The line on which the error occured. If the parser uses this
+ * The line on which the error occurred. If the parser uses this
* field, it sets it to the line number of the source text line on
- * which the error appears, which will be be a value >= 1. If the
+ * which the error appears, which will be a value >= 1. If the
* parse does not support line numbers, the value will be <= 0.
* @stable ICU 2.0
*/
diff --git a/deps/icu-small/source/common/unicode/platform.h b/deps/icu-small/source/common/unicode/platform.h
index 12e2929d24..a3f8d32f89 100644
--- a/deps/icu-small/source/common/unicode/platform.h
+++ b/deps/icu-small/source/common/unicode/platform.h
@@ -482,9 +482,9 @@
/* Otherwise use the predefined value. */
#elif !defined(__cplusplus)
# define U_CPLUSPLUS_VERSION 0
-#elif __cplusplus >= 201402L
+#elif __cplusplus >= 201402L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201402L)
# define U_CPLUSPLUS_VERSION 14
-#elif __cplusplus >= 201103L
+#elif __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)
# define U_CPLUSPLUS_VERSION 11
#else
// C++98 or C++03
@@ -631,7 +631,7 @@ namespace std {
*/
#ifdef U_CHARSET_IS_UTF8
/* Use the predefined value. */
-#elif U_PLATFORM == U_PF_ANDROID || U_PLATFORM_IS_DARWIN_BASED
+#elif U_PLATFORM_IS_LINUX_BASED || U_PLATFORM_IS_DARWIN_BASED
# define U_CHARSET_IS_UTF8 1
#else
# define U_CHARSET_IS_UTF8 0
@@ -749,8 +749,10 @@ namespace std {
#else
/*
* Notes:
- * Visual Studio 10 (_MSC_VER>=1600) defines char16_t but
- * does not support u"abc" string literals.
+ * Visual Studio 2010 (_MSC_VER==1600) defines char16_t as a typedef
+ * and does not support u"abc" string literals.
+ * Visual Studio 2015 (_MSC_VER>=1900) and above adds support for
+ * both char16_t and u"abc" string literals.
* gcc 4.4 defines the __CHAR16_TYPE__ macro to a usable type but
* does not support u"abc" string literals.
* C++11 and C11 require support for UTF-16 literals
diff --git a/deps/icu-small/source/common/unicode/putil.h b/deps/icu-small/source/common/unicode/putil.h
index 91d6bb10f7..14bb99ccc5 100644
--- a/deps/icu-small/source/common/unicode/putil.h
+++ b/deps/icu-small/source/common/unicode/putil.h
@@ -38,7 +38,7 @@
/**
* Platform utilities isolates the platform dependencies of the
- * libarary. For each platform which this code is ported to, these
+ * library. For each platform which this code is ported to, these
* functions may have to be re-implemented.
*/
@@ -53,7 +53,7 @@
* The data directory is determined as follows:
* If u_setDataDirectory() has been called, that is it, otherwise
* if the ICU_DATA environment variable is set, use that, otherwise
- * If a data directory was specifed at ICU build time
+ * If a data directory was specified at ICU build time
* <code>
* \code
* #define ICU_DATA_DIR "path"
@@ -93,7 +93,7 @@ U_STABLE void U_EXPORT2 u_setDataDirectory(const char *directory);
#ifndef U_HIDE_INTERNAL_API
/**
* Return the time zone files override directory, or an empty string if
- * no directory was specified. Certain time zone resources will be preferrentially
+ * no directory was specified. Certain time zone resources will be preferentially
* loaded from individual files in this directory.
*
* @return the time zone data override directory.
diff --git a/deps/icu-small/source/common/unicode/rbbi.h b/deps/icu-small/source/common/unicode/rbbi.h
index c3c201dd35..0c41d69d23 100644
--- a/deps/icu-small/source/common/unicode/rbbi.h
+++ b/deps/icu-small/source/common/unicode/rbbi.h
@@ -29,7 +29,6 @@
#include "unicode/udata.h"
#include "unicode/parseerr.h"
#include "unicode/schriter.h"
-#include "unicode/uchriter.h"
U_NAMESPACE_BEGIN
@@ -58,34 +57,18 @@ private:
* The UText through which this BreakIterator accesses the text
* @internal
*/
- UText *fText;
-
- /**
- * A character iterator that refers to the same text as the UText, above.
- * Only included for compatibility with old API, which was based on CharacterIterators.
- * Value may be adopted from outside, or one of fSCharIter or fDCharIter, below.
- */
- CharacterIterator *fCharIter;
-
- /**
- * When the input text is provided by a UnicodeString, this will point to
- * a characterIterator that wraps that data. Needed only for the
- * implementation of getText(), a backwards compatibility issue.
- */
- StringCharacterIterator *fSCharIter;
-
- /**
- * When the input text is provided by a UText, this
- * dummy CharacterIterator over an empty string will
- * be returned from getText()
- */
- UCharCharacterIterator *fDCharIter;
+ UText fText;
+#ifndef U_HIDE_INTERNAL_API
+public:
+#endif /* U_HIDE_INTERNAL_API */
/**
- * The rule data for this BreakIterator instance
+ * The rule data for this BreakIterator instance.
+ * Not for general use; Public only for testing purposes.
* @internal
*/
RBBIDataWrapper *fData;
+private:
/**
* The iteration state - current position, rule status for the current position,
@@ -106,23 +89,10 @@ private:
int32_t fRuleStatusIndex;
/**
- * True when iteration has run off the end, and iterator functions should return UBRK_DONE.
- */
- UBool fDone;
-
- /**
* Cache of previously determined boundary positions.
*/
- public: // TODO: debug, return to private.
class BreakCache;
BreakCache *fBreakCache;
- private:
- /**
- * Counter for the number of characters encountered with the "dictionary"
- * flag set.
- * @internal
- */
- uint32_t fDictionaryCharCount;
/**
* Cache of boundary positions within a region of text that has been
@@ -150,11 +120,30 @@ private:
UnhandledEngine *fUnhandledBreakEngine;
/**
- *
- * The type of the break iterator, or -1 if it has not been set.
+ * Counter for the number of characters encountered with the "dictionary"
+ * flag set.
* @internal
*/
- int32_t fBreakType;
+ uint32_t fDictionaryCharCount;
+
+ /**
+ * A character iterator that refers to the same text as the UText, above.
+ * Only included for compatibility with old API, which was based on CharacterIterators.
+ * Value may be adopted from outside, or one of fSCharIter or fDCharIter, below.
+ */
+ CharacterIterator *fCharIter;
+
+ /**
+ * When the input text is provided by a UnicodeString, this will point to
+ * a characterIterator that wraps that data. Needed only for the
+ * implementation of getText(), a backwards compatibility issue.
+ */
+ StringCharacterIterator fSCharIter;
+
+ /**
+ * True when iteration has run off the end, and iterator functions should return UBRK_DONE.
+ */
+ UBool fDone;
//=======================================================================
// constructors
@@ -206,17 +195,17 @@ public:
UErrorCode &status);
/**
- * Contruct a RuleBasedBreakIterator from a set of precompiled binary rules.
+ * Construct a RuleBasedBreakIterator from a set of precompiled binary rules.
* Binary rules are obtained from RulesBasedBreakIterator::getBinaryRules().
* Construction of a break iterator in this way is substantially faster than
- * constuction from source rules.
+ * construction from source rules.
*
* Ownership of the storage containing the compiled rules remains with the
* caller of this function. The compiled rules must not be modified or
* deleted during the life of the break iterator.
*
* The compiled rules are not compatible across different major versions of ICU.
- * The compiled rules are comaptible only between machines with the same
+ * The compiled rules are compatible only between machines with the same
* byte ordering (little or big endian) and the same base character set family
* (ASCII or EBCDIC).
*
@@ -285,7 +274,7 @@ public:
* behavior, and iterating over the same text, as this one.
* Differs from the copy constructor in that it is polymorphic, and
* will correctly clone (copy) a derived class.
- * clone() is thread safe. Multiple threads may simultaeneously
+ * clone() is thread safe. Multiple threads may simultaneously
* clone the same source break iterator.
* @return a newly-constructed RuleBasedBreakIterator
* @stable ICU 2.0
@@ -450,7 +439,7 @@ public:
virtual int32_t preceding(int32_t offset);
/**
- * Returns true if the specfied position is a boundary position. As a side
+ * Returns true if the specified position is a boundary position. As a side
* effect, leaves the iterator pointing to the first boundary position at
* or after "offset".
* @param offset the offset to check.
@@ -471,8 +460,8 @@ public:
/**
- * Return the status tag from the break rule that determined the most recently
- * returned break position. For break rules that do not specify a
+ * Return the status tag from the break rule that determined the boundary at
+ * the current iteration position. For break rules that do not specify a
* status, a default value of 0 is returned. If more than one break rule
* would cause a boundary to be located at some position in the text,
* the numerically largest of the applicable status values is returned.
@@ -489,16 +478,14 @@ public:
* position from <code>next()</code>, <code>previous()</code>, or
* any other break iterator functions that returns a boundary position.
* <p>
+ * Note that <code>getRuleStatus()</code> returns the value corresponding to
+ * <code>current()</code> index even after <code>next()</code> has returned DONE.
+ * <p>
* When creating custom break rules, one is free to define whatever
* status values may be convenient for the application.
* <p>
- * Note: this function is not thread safe. It should not have been
- * declared const, and the const remains only for compatibility
- * reasons. (The function is logically const, but not bit-wise const).
- * TODO: check this. Probably thread safe now.
- * <p>
- * @return the status from the break rule that determined the most recently
- * returned break position.
+ * @return the status from the break rule that determined the boundary
+ * at the current iteration position.
*
* @see UWordBreak
* @stable ICU 2.2
@@ -506,8 +493,8 @@ public:
virtual int32_t getRuleStatus() const;
/**
- * Get the status (tag) values from the break rule(s) that determined the most
- * recently returned break position.
+ * Get the status (tag) values from the break rule(s) that determined the boundary
+ * at the current iteration position.
* <p>
* The returned status value(s) are stored into an array provided by the caller.
* The values are stored in sorted (ascending) order.
@@ -518,10 +505,10 @@ public:
* @param fillInVec an array to be filled in with the status values.
* @param capacity the length of the supplied vector. A length of zero causes
* the function to return the number of status values, in the
- * normal way, without attemtping to store any values.
+ * normal way, without attempting to store any values.
* @param status receives error codes.
- * @return The number of rule status values from rules that determined
- * the most recent boundary returned by the break iterator.
+ * @return The number of rule status values from the rules that determined
+ * the boundary at the current iteration position.
* In the event of a U_BUFFER_OVERFLOW_ERROR, the return value
* is the total number of status values that were available,
* not the reduced number that were actually returned.
@@ -561,7 +548,7 @@ public:
*
* Create a clone (copy) of this break iterator in memory provided
* by the caller. The idea is to increase performance by avoiding
- * a storage allocation. Use of this functoin is NOT RECOMMENDED.
+ * a storage allocation. Use of this function is NOT RECOMMENDED.
* Performance gains are minimal, and correct buffer management is
* tricky. Use clone() instead.
*
@@ -574,7 +561,7 @@ public:
* storage for the cloned object.
*
* @param status Error status. U_SAFECLONE_ALLOCATED_WARNING will be
- * returned if the the provided buffer was too small, and
+ * returned if the provided buffer was too small, and
* the clone was therefore put on the heap.
*
* @return Pointer to the clone object. This may differ from the stackBuffer
@@ -597,7 +584,7 @@ public:
* The binary data can only be used with the same version of ICU
* and on the same platform type (processor endian-ness)
*
- * @param length Returns the length of the binary data. (Out paramter.)
+ * @param length Returns the length of the binary data. (Out parameter.)
*
* @return A pointer to the binary (compiled) rule data. The storage
* belongs to the RulesBasedBreakIterator object, not the
@@ -646,12 +633,6 @@ private:
void reset(void);
/**
- * Set the type of the break iterator.
- * @internal
- */
- void setBreakType(int32_t type);
-
- /**
* Common initialization function, used by constructors and bufferClone.
* @internal
*/
@@ -697,6 +678,13 @@ private:
* @internal
*/
void dumpCache();
+
+ /**
+ * Debugging function only.
+ * @internal
+ */
+ void dumpTables();
+
#endif /* U_HIDE_INTERNAL_API */
};
diff --git a/deps/icu-small/source/common/unicode/resbund.h b/deps/icu-small/source/common/unicode/resbund.h
index 358ed7eeb9..ab0b60bbb2 100644
--- a/deps/icu-small/source/common/unicode/resbund.h
+++ b/deps/icu-small/source/common/unicode/resbund.h
@@ -132,7 +132,7 @@ public:
ResourceBundle(UErrorCode &err);
/**
- * Standard constructor, onstructs a resource bundle for the locale-specific
+ * Standard constructor, constructs a resource bundle for the locale-specific
* bundle in the specified package.
*
* @param packageName The packageName and locale together point to an ICU udata object,
diff --git a/deps/icu-small/source/common/unicode/schriter.h b/deps/icu-small/source/common/unicode/schriter.h
index d83a57f8d0..1a12769e8d 100644
--- a/deps/icu-small/source/common/unicode/schriter.h
+++ b/deps/icu-small/source/common/unicode/schriter.h
@@ -69,7 +69,7 @@ public:
* Create an iterator over the UnicodeString referred to by "textStr".
* The UnicodeString object is copied.
* The iteration range begins with the code unit specified by
- * "textBegin" and ends with the code unit BEFORE the code unit specfied
+ * "textBegin" and ends with the code unit BEFORE the code unit specified
* by "textEnd". The starting position is specified by "textPos". If
* "textBegin" and "textEnd" don't form a valid range on "text" (i.e.,
* textBegin >= textEnd or either is negative or greater than text.size()),
diff --git a/deps/icu-small/source/common/unicode/ubidi.h b/deps/icu-small/source/common/unicode/ubidi.h
index ef21f24206..254a5bf9ef 100644
--- a/deps/icu-small/source/common/unicode/ubidi.h
+++ b/deps/icu-small/source/common/unicode/ubidi.h
@@ -692,7 +692,7 @@ typedef enum UBiDiReorderingMode {
* @stable ICU 3.6 */
UBIDI_REORDER_DEFAULT = 0,
/** Logical to Visual algorithm which handles numbers in a way which
- * mimicks the behavior of Windows XP.
+ * mimics the behavior of Windows XP.
* @stable ICU 3.6 */
UBIDI_REORDER_NUMBERS_SPECIAL,
/** Logical to Visual algorithm grouping numbers with adjacent R characters
@@ -1142,7 +1142,7 @@ ubidi_setContext(UBiDi *pBiDi,
/**
* Perform the Unicode Bidi algorithm. It is defined in the
- * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Anned #9</a>,
+ * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>,
* version 13,
* also described in The Unicode Standard, Version 4.0 .<p>
*
diff --git a/deps/icu-small/source/common/unicode/ubrk.h b/deps/icu-small/source/common/unicode/ubrk.h
index 600328c49c..73c1553b24 100644
--- a/deps/icu-small/source/common/unicode/ubrk.h
+++ b/deps/icu-small/source/common/unicode/ubrk.h
@@ -268,7 +268,6 @@ ubrk_openRules(const UChar *rules,
UParseError *parseErr,
UErrorCode *status);
-#ifndef U_HIDE_DRAFT_API
/**
* Open a new UBreakIterator for locating text boundaries using precompiled binary rules.
* Opening a UBreakIterator this way is substantially faster than using ubrk_openRules.
@@ -287,15 +286,13 @@ ubrk_openRules(const UChar *rules,
* @param status Pointer to UErrorCode to receive any errors.
* @return UBreakIterator for the specified rules.
* @see ubrk_getBinaryRules
- * @draft ICU 59
+ * @stable ICU 59
*/
-U_DRAFT UBreakIterator* U_EXPORT2
+U_STABLE UBreakIterator* U_EXPORT2
ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength,
const UChar * text, int32_t textLength,
UErrorCode * status);
-#endif /* U_HIDE_DRAFT_API */
-
/**
* Thread safe cloning operation
* @param bi iterator to be cloned
@@ -510,7 +507,7 @@ ubrk_countAvailable(void);
/**
-* Returns true if the specfied position is a boundary position. As a side
+* Returns true if the specified position is a boundary position. As a side
* effect, leaves the iterator pointing to the first boundary position at
* or after "offset".
* @param bi The break iterator to use.
@@ -544,7 +541,7 @@ ubrk_getRuleStatus(UBreakIterator *bi);
* @param fillInVec an array to be filled in with the status values.
* @param capacity the length of the supplied vector. A length of zero causes
* the function to return the number of status values, in the
- * normal way, without attemtping to store any values.
+ * normal way, without attempting to store any values.
* @param status receives error codes.
* @return The number of rule status values from rules that determined
* the most recent boundary returned by the break iterator.
@@ -596,7 +593,6 @@ ubrk_refreshUText(UBreakIterator *bi,
UErrorCode *status);
-#ifndef U_HIDE_DRAFT_API
/**
* Get a compiled binary version of the rules specifying the behavior of a UBreakIterator.
* The binary rules may be used with ubrk_openBinaryRules to open a new UBreakIterator
@@ -620,15 +616,13 @@ ubrk_refreshUText(UBreakIterator *bi,
* otherwise 0. If not preflighting and this is larger than
* rulesCapacity, *status will be set to an error.
* @see ubrk_openBinaryRules
- * @draft ICU 59
+ * @stable ICU 59
*/
-U_DRAFT int32_t U_EXPORT2
+U_STABLE int32_t U_EXPORT2
ubrk_getBinaryRules(UBreakIterator *bi,
uint8_t * binaryRules, int32_t rulesCapacity,
UErrorCode * status);
-#endif /* U_HIDE_DRAFT_API */
-
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
#endif
diff --git a/deps/icu-small/source/common/unicode/uchar.h b/deps/icu-small/source/common/unicode/uchar.h
index 3613374d9a..4b72ecfc26 100644
--- a/deps/icu-small/source/common/unicode/uchar.h
+++ b/deps/icu-small/source/common/unicode/uchar.h
@@ -112,11 +112,11 @@ U_CDECL_BEGIN
* Comparison:
* - u_isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;
* most of general categories "Z" (separators) + most whitespace ISO controls
- * (including no-break spaces, but excluding IS1..IS4 and ZWSP)
+ * (including no-break spaces, but excluding IS1..IS4)
* - u_isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces
* - u_isJavaSpaceChar: Java isSpaceChar; just Z (including no-break spaces)
* - u_isspace: Z + whitespace ISO controls (including no-break spaces)
- * - u_isblank: "horizontal spaces" = TAB + Zs - ZWSP
+ * - u_isblank: "horizontal spaces" = TAB + Zs
*/
/**
@@ -2702,8 +2702,7 @@ u_isgraph(UChar32 c);
*
* same as
*
- * TRUE for U+0009 (TAB) and characters with general category "Zs" (space separators)
- * except Zero Width Space (ZWSP, U+200B).
+ * TRUE for U+0009 (TAB) and characters with general category "Zs" (space separators).
*
* Note: There are several ICU whitespace functions; please see the uchar.h
* file documentation for a detailed comparison.
diff --git a/deps/icu-small/source/common/unicode/uclean.h b/deps/icu-small/source/common/unicode/uclean.h
index 3f73af37b8..ab0cd6da6b 100644
--- a/deps/icu-small/source/common/unicode/uclean.h
+++ b/deps/icu-small/source/common/unicode/uclean.h
@@ -70,7 +70,7 @@ u_init(UErrorCode *status);
* This has the effect of restoring ICU to its initial condition, before
* any of these override functions were installed. Refer to
* u_setMemoryFunctions(), u_setMutexFunctions and
- * utrace_setFunctions(). If ICU is to be reinitialized after after
+ * utrace_setFunctions(). If ICU is to be reinitialized after
* calling u_cleanup(), these runtime override functions will need to
* be set up again if they are still required.
* <p>
@@ -104,7 +104,7 @@ u_cleanup(void);
U_CDECL_BEGIN
/**
* Pointer type for a user supplied memory allocation function.
- * @param context user supplied value, obtained from from u_setMemoryFunctions().
+ * @param context user supplied value, obtained from u_setMemoryFunctions().
* @param size The number of bytes to be allocated
* @return Pointer to the newly allocated memory, or NULL if the allocation failed.
* @stable ICU 2.8
@@ -113,7 +113,7 @@ U_CDECL_BEGIN
typedef void *U_CALLCONV UMemAllocFn(const void *context, size_t size);
/**
* Pointer type for a user supplied memory re-allocation function.
- * @param context user supplied value, obtained from from u_setMemoryFunctions().
+ * @param context user supplied value, obtained from u_setMemoryFunctions().
* @param size The number of bytes to be allocated
* @return Pointer to the newly allocated memory, or NULL if the allocation failed.
* @stable ICU 2.8
@@ -123,7 +123,7 @@ typedef void *U_CALLCONV UMemReallocFn(const void *context, void *mem, size_t si
/**
* Pointer type for a user supplied memory free function. Behavior should be
* similar the standard C library free().
- * @param context user supplied value, obtained from from u_setMemoryFunctions().
+ * @param context user supplied value, obtained from u_setMemoryFunctions().
* @param mem Pointer to the memory block to be resized
* @param size The new size for the block
* @return Pointer to the resized memory block, or NULL if the resizing failed.
@@ -179,8 +179,8 @@ U_CDECL_BEGIN
* The user-supplied function will be called by ICU whenever ICU needs to create a
* new mutex. The function implementation should create a mutex, and store a pointer
* to something that uniquely identifies the mutex into the UMTX that is supplied
- * as a paramter.
- * @param context user supplied value, obtained from from u_setMutexFunctions().
+ * as a parameter.
+ * @param context user supplied value, obtained from u_setMutexFunctions().
* @param mutex Receives a pointer that identifies the new mutex.
* The mutex init function must set the UMTX to a non-null value.
* Subsequent calls by ICU to lock, unlock, or destroy a mutex will
@@ -197,7 +197,7 @@ typedef void U_CALLCONV UMtxInitFn (const void *context, UMTX *mutex, UErrorCod
* Function Pointer type for a user supplied mutex functions.
* One of the user-supplied functions with this signature will be called by ICU
* whenever ICU needs to lock, unlock, or destroy a mutex.
- * @param context user supplied value, obtained from from u_setMutexFunctions().
+ * @param context user supplied value, obtained from u_setMutexFunctions().
* @param mutex specify the mutex on which to operate.
* @deprecated ICU 52. This function is no longer supported.
* @system
@@ -229,7 +229,7 @@ u_setMutexFunctions(const void *context, UMtxInitFn *init, UMtxFn *destroy, UMtx
/**
* Pointer type for a user supplied atomic increment or decrement function.
- * @param context user supplied value, obtained from from u_setAtomicIncDecFunctions().
+ * @param context user supplied value, obtained from u_setAtomicIncDecFunctions().
* @param p Pointer to a 32 bit int to be incremented or decremented
* @return The value of the variable after the inc or dec operation.
* @deprecated ICU 52. This function is no longer supported.
diff --git a/deps/icu-small/source/common/unicode/ucnv.h b/deps/icu-small/source/common/unicode/ucnv.h
index 05d0050f4a..53b4c6f073 100644
--- a/deps/icu-small/source/common/unicode/ucnv.h
+++ b/deps/icu-small/source/common/unicode/ucnv.h
@@ -207,7 +207,7 @@ typedef void (U_EXPORT2 *UConverterToUCallback) (
/**
* Function pointer for error callback in the unicode to codepage direction.
- * Called when an error has occured in conversion from unicode, or on open/close of the callback (see reason).
+ * Called when an error has occurred in conversion from unicode, or on open/close of the callback (see reason).
* @param context Pointer to the callback's private data
* @param args Information about the conversion in progress
* @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
@@ -353,7 +353,7 @@ ucnv_compareNames(const char *name1, const char *name2);
* ucnv_getAlias for a complete list that is available.
* If this parameter is NULL, the default converter will be used.
* @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT>
- * @return the created Unicode converter object, or <TT>NULL</TT> if an error occured
+ * @return the created Unicode converter object, or <TT>NULL</TT> if an error occurred
* @see ucnv_openU
* @see ucnv_openCCSID
* @see ucnv_getAvailableName
@@ -386,7 +386,7 @@ ucnv_open(const char *converterName, UErrorCode *err);
* @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR,
* U_FILE_ACCESS_ERROR</TT>
* @return the created Unicode converter object, or <TT>NULL</TT> if an
- * error occured
+ * error occurred
* @see ucnv_open
* @see ucnv_openCCSID
* @see ucnv_close
@@ -489,7 +489,7 @@ ucnv_openCCSID(int32_t codepage,
* @param packageName name of the package (equivalent to 'path' in udata_open() call)
* @param converterName name of the data item to be used, without suffix.
* @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT>
- * @return the created Unicode converter object, or <TT>NULL</TT> if an error occured
+ * @return the created Unicode converter object, or <TT>NULL</TT> if an error occurred
* @see udata_open
* @see ucnv_open
* @see ucnv_safeClone
diff --git a/deps/icu-small/source/common/unicode/ucnv_err.h b/deps/icu-small/source/common/unicode/ucnv_err.h
index e8a79bcd81..08c96c1440 100644
--- a/deps/icu-small/source/common/unicode/ucnv_err.h
+++ b/deps/icu-small/source/common/unicode/ucnv_err.h
@@ -119,19 +119,19 @@ typedef struct UConverter UConverter;
#define UCNV_ESCAPE_JAVA "J"
/**
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX)
- * TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\\xXXXX)
+ * TO_U_CALLBACK_ESCAPE option to escape the character value according to C (\\xXXXX)
* @stable ICU 2.0
*/
#define UCNV_ESCAPE_C "C"
/**
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
- * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
+ * TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
* @stable ICU 2.0
*/
#define UCNV_ESCAPE_XML_DEC "D"
/**
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
- * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
+ * TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
* @stable ICU 2.0
*/
#define UCNV_ESCAPE_XML_HEX "X"
@@ -171,7 +171,7 @@ typedef enum {
code points.
The error code U_INVALID_CHAR_FOUND will be set. */
UCNV_RESET = 3, /**< The callback is called with this reason when a
- 'reset' has occured. Callback should reset all
+ 'reset' has occurred. Callback should reset all
state. */
UCNV_CLOSE = 4, /**< Called when the converter is closed. The
callback should release any allocated memory.*/
@@ -199,7 +199,7 @@ typedef struct {
const UChar *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */
char *target; /**< Pointer to the target buffer. @stable ICU 2.0 */
const char *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */
- int32_t *offsets; /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
+ int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
} UConverterFromUnicodeArgs;
@@ -215,7 +215,7 @@ typedef struct {
const char *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */
UChar *target; /**< Pointer to the target buffer. @stable ICU 2.0 */
const UChar *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */
- int32_t *offsets; /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
+ int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
} UConverterToUnicodeArgs;
diff --git a/deps/icu-small/source/common/unicode/ucurr.h b/deps/icu-small/source/common/unicode/ucurr.h
index ecb54d146f..adfaf0023b 100644
--- a/deps/icu-small/source/common/unicode/ucurr.h
+++ b/deps/icu-small/source/common/unicode/ucurr.h
@@ -103,6 +103,19 @@ typedef enum UCurrNameStyle {
* @stable ICU 2.6
*/
UCURR_LONG_NAME
+
+#ifndef U_HIDE_DRAFT_API
+ ,
+ /**
+ * Selector for getName() indicating the narrow currency symbol.
+ * The narrow currency symbol is similar to the regular currency
+ * symbol, but it always takes the shortest form: for example,
+ * "$" instead of "US$" for USD in en-CA.
+ *
+ * @draft ICU 61
+ */
+ UCURR_NARROW_SYMBOL_NAME
+#endif // U_HIDE_DRAFT_API
} UCurrNameStyle;
#if !UCONFIG_NO_SERVICE
diff --git a/deps/icu-small/source/common/unicode/umachine.h b/deps/icu-small/source/common/unicode/umachine.h
index 30de4dba0d..a9dc1631b0 100644
--- a/deps/icu-small/source/common/unicode/umachine.h
+++ b/deps/icu-small/source/common/unicode/umachine.h
@@ -299,6 +299,10 @@ typedef int8_t UBool;
// for AIX, uchar.h needs to be included
# include <uchar.h>
# define U_CHAR16_IS_TYPEDEF 1
+#elif defined(_MSC_VER) && (_MSC_VER < 1900)
+// Versions of Visual Studio/MSVC below 2015 do not support char16_t as a real type,
+// and instead use a typedef. https://msdn.microsoft.com/library/bb531344.aspx
+# define U_CHAR16_IS_TYPEDEF 1
#else
# define U_CHAR16_IS_TYPEDEF 0
#endif
@@ -366,7 +370,7 @@ typedef int8_t UBool;
* Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined.
* The current UChar responds to UCHAR_TYPE but OldUChar does not.
*
- * @draft ICU 59
+ * @stable ICU 59
*/
#if U_SIZEOF_WCHAR_T==2
typedef wchar_t OldUChar;
diff --git a/deps/icu-small/source/common/unicode/uniset.h b/deps/icu-small/source/common/unicode/uniset.h
index 914818a00e..c2e0ad48bd 100644
--- a/deps/icu-small/source/common/unicode/uniset.h
+++ b/deps/icu-small/source/common/unicode/uniset.h
@@ -1521,6 +1521,7 @@ private:
UnicodeString& rebuiltPat,
uint32_t options,
UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
+ int32_t depth,
UErrorCode& ec);
//----------------------------------------------------------------
diff --git a/deps/icu-small/source/common/unicode/unistr.h b/deps/icu-small/source/common/unicode/unistr.h
index b99a686126..d0b271754b 100644
--- a/deps/icu-small/source/common/unicode/unistr.h
+++ b/deps/icu-small/source/common/unicode/unistr.h
@@ -2995,10 +2995,6 @@ public:
*/
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text);
- /*
- * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
- * it should always be available regardless of U_HIDE_DRAFT_API status
- */
#if !U_CHAR16_IS_TYPEDEF
/**
* uint16_t * constructor.
@@ -3008,16 +3004,12 @@ public:
* <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
* on the compiler command line or similar.
* @param text NUL-terminated UTF-16 string
- * @draft ICU 59
+ * @stable ICU 59
*/
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) :
UnicodeString(ConstChar16Ptr(text)) {}
#endif
- /*
- * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
- * it should always be available regardless of U_HIDE_DRAFT_API status
- */
#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
/**
* wchar_t * constructor.
@@ -3028,16 +3020,12 @@ public:
* <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
* on the compiler command line or similar.
* @param text NUL-terminated UTF-16 string
- * @draft ICU 59
+ * @stable ICU 59
*/
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) :
UnicodeString(ConstChar16Ptr(text)) {}
#endif
- /*
- * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
- * it should always be available regardless of U_HIDE_DRAFT_API status
- */
/**
* nullptr_t constructor.
* Effectively the same as the default constructor, makes an empty string object.
@@ -3046,7 +3034,7 @@ public:
* <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
* on the compiler command line or similar.
* @param text nullptr
- * @draft ICU 59
+ * @stable ICU 59
*/
UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text);
@@ -3060,26 +3048,18 @@ public:
UnicodeString(const char16_t *text,
int32_t textLength);
- /*
- * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
- * it should always be available regardless of U_HIDE_DRAFT_API status
- */
#if !U_CHAR16_IS_TYPEDEF
/**
* uint16_t * constructor.
* Delegates to UnicodeString(const char16_t *, int32_t).
* @param text UTF-16 string
* @param length string length
- * @draft ICU 59
+ * @stable ICU 59
*/
UnicodeString(const uint16_t *text, int32_t length) :
UnicodeString(ConstChar16Ptr(text), length) {}
#endif
- /*
- * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
- * it should always be available regardless of U_HIDE_DRAFT_API status
- */
#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
/**
* wchar_t * constructor.
@@ -3087,22 +3067,18 @@ public:
* Delegates to UnicodeString(const char16_t *, int32_t).
* @param text NUL-terminated UTF-16 string
* @param length string length
- * @draft ICU 59
+ * @stable ICU 59
*/
UnicodeString(const wchar_t *text, int32_t length) :
UnicodeString(ConstChar16Ptr(text), length) {}
#endif
- /*
- * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
- * it should always be available regardless of U_HIDE_DRAFT_API status
- */
/**
* nullptr_t constructor.
* Effectively the same as the default constructor, makes an empty string object.
* @param text nullptr
* @param length ignored
- * @draft ICU 59
+ * @stable ICU 59
*/
inline UnicodeString(const std::nullptr_t text, int32_t length);
@@ -3152,10 +3128,6 @@ public:
*/
UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity);
- /*
- * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
- * it should always be available regardless of U_HIDE_DRAFT_API status
- */
#if !U_CHAR16_IS_TYPEDEF
/**
* Writable-aliasing uint16_t * constructor.
@@ -3163,16 +3135,12 @@ public:
* @param buffer writable buffer of/for UTF-16 text
* @param buffLength length of the current buffer contents
* @param buffCapacity buffer capacity
- * @draft ICU 59
+ * @stable ICU 59
*/
UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) :
UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
#endif
- /*
- * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
- * it should always be available regardless of U_HIDE_DRAFT_API status
- */
#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
/**
* Writable-aliasing wchar_t * constructor.
@@ -3181,23 +3149,19 @@ public:
* @param buffer writable buffer of/for UTF-16 text
* @param buffLength length of the current buffer contents
* @param buffCapacity buffer capacity
- * @draft ICU 59
+ * @stable ICU 59
*/
UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) :
UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
#endif
- /*
- * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
- * it should always be available regardless of U_HIDE_DRAFT_API status
- */
/**
* Writable-aliasing nullptr_t constructor.
* Effectively the same as the default constructor, makes an empty string object.
* @param buffer nullptr
* @param buffLength ignored
* @param buffCapacity ignored
- * @draft ICU 59
+ * @stable ICU 59
*/
inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity);
diff --git a/deps/icu-small/source/common/unicode/urename.h b/deps/icu-small/source/common/unicode/urename.h
index 982655c442..d8ab85091f 100644
--- a/deps/icu-small/source/common/unicode/urename.h
+++ b/deps/icu-small/source/common/unicode/urename.h
@@ -107,7 +107,6 @@
#define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data)
#define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data)
#define allowedHourFormatsCleanup U_ICU_ENTRY_POINT_RENAME(allowedHourFormatsCleanup)
-#define checkImpl U_ICU_ENTRY_POINT_RENAME(checkImpl)
#define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup)
#define dayPeriodRulesCleanup U_ICU_ENTRY_POINT_RENAME(dayPeriodRulesCleanup)
#define deleteAllowedHourFormats U_ICU_ENTRY_POINT_RENAME(deleteAllowedHourFormats)
@@ -446,7 +445,6 @@
#define ubidi_getReorderingOptions U_ICU_ENTRY_POINT_RENAME(ubidi_getReorderingOptions)
#define ubidi_getResultLength U_ICU_ENTRY_POINT_RENAME(ubidi_getResultLength)
#define ubidi_getRuns U_ICU_ENTRY_POINT_RENAME(ubidi_getRuns)
-#define ubidi_getSingleton U_ICU_ENTRY_POINT_RENAME(ubidi_getSingleton)
#define ubidi_getText U_ICU_ENTRY_POINT_RENAME(ubidi_getText)
#define ubidi_getVisualIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualIndex)
#define ubidi_getVisualMap U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualMap)
@@ -551,6 +549,7 @@
#define ucase_addStringCaseClosure U_ICU_ENTRY_POINT_RENAME(ucase_addStringCaseClosure)
#define ucase_fold U_ICU_ENTRY_POINT_RENAME(ucase_fold)
#define ucase_getCaseLocale U_ICU_ENTRY_POINT_RENAME(ucase_getCaseLocale)
+#define ucase_getTrie U_ICU_ENTRY_POINT_RENAME(ucase_getTrie)
#define ucase_getType U_ICU_ENTRY_POINT_RENAME(ucase_getType)
#define ucase_getTypeOrIgnorable U_ICU_ENTRY_POINT_RENAME(ucase_getTypeOrIgnorable)
#define ucase_hasBinaryProperty U_ICU_ENTRY_POINT_RENAME(ucase_hasBinaryProperty)
@@ -862,6 +861,7 @@
#define udatpg_getBestPatternWithOptions U_ICU_ENTRY_POINT_RENAME(udatpg_getBestPatternWithOptions)
#define udatpg_getDateTimeFormat U_ICU_ENTRY_POINT_RENAME(udatpg_getDateTimeFormat)
#define udatpg_getDecimal U_ICU_ENTRY_POINT_RENAME(udatpg_getDecimal)
+#define udatpg_getFieldDisplayName U_ICU_ENTRY_POINT_RENAME(udatpg_getFieldDisplayName)
#define udatpg_getPatternForSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getPatternForSkeleton)
#define udatpg_getSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getSkeleton)
#define udatpg_open U_ICU_ENTRY_POINT_RENAME(udatpg_open)
@@ -1326,7 +1326,6 @@
#define uprv_getRawUTCtime U_ICU_ENTRY_POINT_RENAME(uprv_getRawUTCtime)
#define uprv_getStaticCurrencyName U_ICU_ENTRY_POINT_RENAME(uprv_getStaticCurrencyName)
#define uprv_getUTCtime U_ICU_ENTRY_POINT_RENAME(uprv_getUTCtime)
-#define uprv_haveProperties U_ICU_ENTRY_POINT_RENAME(uprv_haveProperties)
#define uprv_int32Comparator U_ICU_ENTRY_POINT_RENAME(uprv_int32Comparator)
#define uprv_isASCIILetter U_ICU_ENTRY_POINT_RENAME(uprv_isASCIILetter)
#define uprv_isInfinite U_ICU_ENTRY_POINT_RENAME(uprv_isInfinite)
diff --git a/deps/icu-small/source/common/unicode/ures.h b/deps/icu-small/source/common/unicode/ures.h
index 918b9f208e..af0ce76f25 100644
--- a/deps/icu-small/source/common/unicode/ures.h
+++ b/deps/icu-small/source/common/unicode/ures.h
@@ -16,7 +16,7 @@
* 04/04/99 helena Fixed internal header inclusion.
* 04/15/99 Madhu Updated Javadoc
* 06/14/99 stephen Removed functions taking a filename suffix.
-* 07/20/99 stephen Language-independent ypedef to void*
+* 07/20/99 stephen Language-independent typedef to void*
* 11/09/99 weiv Added ures_getLocale()
* 06/24/02 weiv Added support for resource sharing
******************************************************************************
@@ -138,7 +138,7 @@ typedef enum {
/**
* Opens a UResourceBundle, from which users can extract strings by using
* their corresponding keys.
- * Note that the caller is responsible of calling <TT>ures_close</TT> on each succesfully
+ * Note that the caller is responsible of calling <TT>ures_close</TT> on each successfully
* opened resource bundle.
* @param packageName The packageName and locale together point to an ICU udata object,
* as defined by <code> udata_open( packageName, "res", locale, err) </code>
@@ -301,7 +301,7 @@ ures_getVersion(const UResourceBundle* resB,
* you to query for the real locale of the resource. For example, if you requested
* "en_US_CALIFORNIA" and only "en_US" bundle exists, "en_US" will be returned.
* For subresources, the locale where this resource comes from will be returned.
- * If fallback has occured, getLocale will reflect this.
+ * If fallback has occurred, getLocale will reflect this.
*
* @param resourceBundle resource bundle in question
* @param status just for catching illegal arguments
@@ -580,7 +580,7 @@ ures_hasNext(const UResourceBundle *resourceBundle);
* @param fillIn if NULL a new UResourceBundle struct is allocated and must be closed by the caller.
* Alternatively, you can supply a struct to be filled by this function.
* @param status fills in the outgoing error code. You may still get a non NULL result even if an
- * error occured. Check status instead.
+ * error occurred. Check status instead.
* @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it
* @stable ICU 2.0
*/
@@ -596,7 +596,7 @@ ures_getNextResource(UResourceBundle *resourceBundle,
* @param resourceBundle a resource
* @param len fill in length of the string
* @param key fill in for key associated with this string. NULL if no key
- * @param status fills in the outgoing error code. If an error occured, we may return NULL, but don't
+ * @param status fills in the outgoing error code. If an error occurred, we may return NULL, but don't
* count on it. Check status instead!
* @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
* @stable ICU 2.0
@@ -615,7 +615,7 @@ ures_getNextString(UResourceBundle *resourceBundle,
* @param fillIn if NULL a new UResourceBundle struct is allocated and must be closed by the caller.
* Alternatively, you can supply a struct to be filled by this function.
* @param status fills in the outgoing error code. Don't count on NULL being returned if an error has
- * occured. Check status instead.
+ * occurred. Check status instead.
* @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it
* @stable ICU 2.0
*/
@@ -631,7 +631,7 @@ ures_getByIndex(const UResourceBundle *resourceBundle,
* @param resourceBundle a resource
* @param indexS an index to the wanted string.
* @param len fill in length of the string
- * @param status fills in the outgoing error code. If an error occured, we may return NULL, but don't
+ * @param status fills in the outgoing error code. If an error occurred, we may return NULL, but don't
* count on it. Check status instead!
* @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
* @stable ICU 2.0
@@ -722,7 +722,7 @@ ures_getByKey(const UResourceBundle *resourceBundle,
* @param resB a resource
* @param key a key associated with the wanted string
* @param len fill in length of the string
- * @param status fills in the outgoing error code. If an error occured, we may return NULL, but don't
+ * @param status fills in the outgoing error code. If an error occurred, we may return NULL, but don't
* count on it. Check status instead!
* @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
* @stable ICU 2.0
diff --git a/deps/icu-small/source/common/unicode/uscript.h b/deps/icu-small/source/common/unicode/uscript.h
index 3ec235d50c..0befa1cd42 100644
--- a/deps/icu-small/source/common/unicode/uscript.h
+++ b/deps/icu-small/source/common/unicode/uscript.h
@@ -476,7 +476,7 @@ typedef enum UScriptCode {
* @param nameOrAbbrOrLocale name of the script, as given in
* PropertyValueAliases.txt, or ISO 15924 code or locale
* @param fillIn the UScriptCode buffer to fill in the script code
- * @param capacity the capacity (size) fo UScriptCode buffer passed in.
+ * @param capacity the capacity (size) of UScriptCode buffer passed in.
* @param err the error status code.
* @return The number of script codes filled in the buffer passed in
* @stable ICU 2.4
diff --git a/deps/icu-small/source/common/unicode/ushape.h b/deps/icu-small/source/common/unicode/ushape.h
index 5af8ffe1c5..3064e08572 100644
--- a/deps/icu-small/source/common/unicode/ushape.h
+++ b/deps/icu-small/source/common/unicode/ushape.h
@@ -93,7 +93,7 @@
* which must not indicate a failure before the function call.
*
* @return The number of UChars written to the destination buffer.
- * If an error occured, then no output was written, or it may be
+ * If an error occurred, then no output was written, or it may be
* incomplete. If <code>U_BUFFER_OVERFLOW_ERROR</code> is set, then
* the return value indicates the necessary destination buffer size.
* @stable ICU 2.0
diff --git a/deps/icu-small/source/common/unicode/usprep.h b/deps/icu-small/source/common/unicode/usprep.h
index 33ca1461ce..7cdc6cdd18 100644
--- a/deps/icu-small/source/common/unicode/usprep.h
+++ b/deps/icu-small/source/common/unicode/usprep.h
@@ -33,14 +33,14 @@
* StringPrep prepares Unicode strings for use in network protocols.
* Profiles of StingPrep are set of rules and data according to with the
* Unicode Strings are prepared. Each profiles contains tables which describe
- * how a code point should be treated. The tables are broadly classied into
+ * how a code point should be treated. The tables are broadly classified into
* <ul>
- * <li> Unassinged Table: Contains code points that are unassigned
+ * <li> Unassigned Table: Contains code points that are unassigned
* in the Unicode Version supported by StringPrep. Currently
* RFC 3454 supports Unicode 3.2. </li>
- * <li> Prohibited Table: Contains code points that are prohibted from
+ * <li> Prohibited Table: Contains code points that are prohibited from
* the output of the StringPrep processing function. </li>
- * <li> Mapping Table: Contains code ponts that are deleted from the output or case mapped. </li>
+ * <li> Mapping Table: Contains code points that are deleted from the output or case mapped. </li>
* </ul>
*
* The procedure for preparing Unicode strings:
@@ -230,7 +230,7 @@ U_NAMESPACE_END
/**
* Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC),
- * checks for prohited and BiDi characters in the order defined by RFC 3454
+ * checks for prohibited and BiDi characters in the order defined by RFC 3454
* depending on the options specified in the profile.
*
* @param prep The profile to use
diff --git a/deps/icu-small/source/common/unicode/ustring.h b/deps/icu-small/source/common/unicode/ustring.h
index 1ea27126cc..cf6ec0b6b4 100644
--- a/deps/icu-small/source/common/unicode/ustring.h
+++ b/deps/icu-small/source/common/unicode/ustring.h
@@ -403,7 +403,7 @@ u_strspn(const UChar *string, const UChar *matchSet);
* @param saveState The current pointer within the original string,
* which is set by this function. The saveState
* parameter should the address of a local variable of type
- * UChar *. (i.e. defined "Uhar *myLocalSaveState" and use
+ * UChar *. (i.e. defined "UChar *myLocalSaveState" and use
* &myLocalSaveState for this parameter).
* @return A pointer to the next token found in src, or NULL
* when there are no more tokens.
@@ -884,7 +884,7 @@ u_memrchr32(const UChar *s, UChar32 c, int32_t count);
* Unicode String literals in C.
* We need one macro to declare a variable for the string
* and to statically preinitialize it if possible,
- * and a second macro to dynamically intialize such a string variable if necessary.
+ * and a second macro to dynamically initialize such a string variable if necessary.
*
* The macros are defined for maximum performance.
* They work only for strings that contain "invariant characters", i.e.,
diff --git a/deps/icu-small/source/common/unicode/utext.h b/deps/icu-small/source/common/unicode/utext.h
index 7eea1da240..51d11a2e00 100644
--- a/deps/icu-small/source/common/unicode/utext.h
+++ b/deps/icu-small/source/common/unicode/utext.h
@@ -655,10 +655,10 @@ utext_getPreviousNativeIndex(UText *ut);
* @param ut the UText from which to extract data.
* @param nativeStart the native index of the first character to extract.\
* If the specified index is out of range,
- * it will be pinned to to be within 0 <= index <= textLength
+ * it will be pinned to be within 0 <= index <= textLength
* @param nativeLimit the native string index of the position following the last
* character to extract. If the specified index is out of range,
- * it will be pinned to to be within 0 <= index <= textLength.
+ * it will be pinned to be within 0 <= index <= textLength.
* nativeLimit must be >= nativeStart.
* @param dest the UChar (UTF-16) buffer into which the extracted text is placed
* @param destCapacity The size, in UChars, of the destination buffer. May be zero
@@ -906,7 +906,7 @@ utext_copy(UText *ut,
* Caution: freezing a UText will disable changes made via the specific
* frozen UText wrapper only; it will not have any effect on the ability to
* directly modify the text by bypassing the UText. Any such backdoor modifications
- * are always an error while UText access is occuring because the underlying
+ * are always an error while UText access is occurring because the underlying
* text can get out of sync with UText's buffering.
* </p>
*
@@ -1452,7 +1452,7 @@ struct UText {
void *pExtra;
/**
- * (protected) Pointer to string or text-containin object or similar.
+ * (protected) Pointer to string or text-containing object or similar.
* This is the source of the text that this UText is wrapping, in a format
* that is known to the text provider functions.
* @stable ICU 3.4
diff --git a/deps/icu-small/source/common/unicode/utf8.h b/deps/icu-small/source/common/unicode/utf8.h
index 59b4b25570..1f07634359 100644
--- a/deps/icu-small/source/common/unicode/utf8.h
+++ b/deps/icu-small/source/common/unicode/utf8.h
@@ -348,29 +348,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
* @see U8_NEXT_UNSAFE
* @stable ICU 2.4
*/
-#define U8_NEXT(s, i, length, c) { \
- (c)=(uint8_t)(s)[(i)++]; \
- if(!U8_IS_SINGLE(c)) { \
- uint8_t __t1, __t2; \
- if( /* handle U+0800..U+FFFF inline */ \
- (0xe0<=(c) && (c)<0xf0) && \
- (((i)+1)<(length) || (length)<0) && \
- U8_IS_VALID_LEAD3_AND_T1((c), __t1=(s)[i]) && \
- (__t2=(s)[(i)+1]-0x80)<=0x3f) { \
- (c)=(((c)&0xf)<<12)|((__t1&0x3f)<<6)|__t2; \
- (i)+=2; \
- } else if( /* handle U+0080..U+07FF inline */ \
- ((c)<0xe0 && (c)>=0xc2) && \
- ((i)!=(length)) && \
- (__t1=(s)[i]-0x80)<=0x3f) { \
- (c)=(((c)&0x1f)<<6)|__t1; \
- ++(i); \
- } else { \
- /* function call for "complicated" and error cases */ \
- (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (length), c, -1); \
- } \
- } \
-}
+#define U8_NEXT(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, U_SENTINEL)
/**
* Get a code point from a string at a code point boundary offset,
@@ -396,26 +374,33 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
* @see U8_NEXT
* @stable ICU 51
*/
-#define U8_NEXT_OR_FFFD(s, i, length, c) { \
+#define U8_NEXT_OR_FFFD(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, 0xfffd)
+
+/** @internal */
+#define U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, sub) { \
(c)=(uint8_t)(s)[(i)++]; \
if(!U8_IS_SINGLE(c)) { \
- uint8_t __t1, __t2; \
- if( /* handle U+0800..U+FFFF inline */ \
- (0xe0<=(c) && (c)<0xf0) && \
- (((i)+1)<(length) || (length)<0) && \
- U8_IS_VALID_LEAD3_AND_T1((c), __t1=(s)[i]) && \
- (__t2=(s)[(i)+1]-0x80)<=0x3f) { \
- (c)=(((c)&0xf)<<12)|((__t1&0x3f)<<6)|__t2; \
- (i)+=2; \
- } else if( /* handle U+0080..U+07FF inline */ \
- ((c)<0xe0 && (c)>=0xc2) && \
- ((i)!=(length)) && \
- (__t1=(s)[i]-0x80)<=0x3f) { \
- (c)=(((c)&0x1f)<<6)|__t1; \
- ++(i); \
+ uint8_t __t = 0; \
+ if((i)!=(length) && \
+ /* fetch/validate/assemble all but last trail byte */ \
+ ((c)>=0xe0 ? \
+ ((c)<0xf0 ? /* U+0800..U+FFFF except surrogates */ \
+ U8_LEAD3_T1_BITS[(c)&=0xf]&(1<<((__t=(s)[i])>>5)) && \
+ (__t&=0x3f, 1) \
+ : /* U+10000..U+10FFFF */ \
+ ((c)-=0xf0)<=4 && \
+ U8_LEAD4_T1_BITS[(__t=(s)[i])>>4]&(1<<(c)) && \
+ ((c)=((c)<<6)|(__t&0x3f), ++(i)!=(length)) && \
+ (__t=(s)[i]-0x80)<=0x3f) && \
+ /* valid second-to-last trail byte */ \
+ ((c)=((c)<<6)|__t, ++(i)!=(length)) \
+ : /* U+0080..U+07FF */ \
+ (c)>=0xc2 && ((c)&=0x1f, 1)) && \
+ /* last trail byte */ \
+ (__t=(s)[i]-0x80)<=0x3f && \
+ ((c)=((c)<<6)|__t, ++(i), 1)) { \
} else { \
- /* function call for "complicated" and error cases */ \
- (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (length), c, -3); \
+ (c)=(sub); /* ill-formed*/ \
} \
} \
}
@@ -434,21 +419,22 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
* @stable ICU 2.4
*/
#define U8_APPEND_UNSAFE(s, i, c) { \
- if((uint32_t)(c)<=0x7f) { \
- (s)[(i)++]=(uint8_t)(c); \
+ uint32_t __uc=(c); \
+ if(__uc<=0x7f) { \
+ (s)[(i)++]=(uint8_t)__uc; \
} else { \
- if((uint32_t)(c)<=0x7ff) { \
- (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
+ if(__uc<=0x7ff) { \
+ (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
} else { \
- if((uint32_t)(c)<=0xffff) { \
- (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
+ if(__uc<=0xffff) { \
+ (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
} else { \
- (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \
- (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \
+ (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
+ (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
} \
- (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
+ (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
} \
- (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
+ (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
} \
}
@@ -470,17 +456,23 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
* @stable ICU 2.4
*/
#define U8_APPEND(s, i, capacity, c, isError) { \
- if((uint32_t)(c)<=0x7f) { \
- (s)[(i)++]=(uint8_t)(c); \
- } else if((uint32_t)(c)<=0x7ff && (i)+1<(capacity)) { \
- (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
- (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
- } else if((uint32_t)(c)<=0xd7ff && (i)+2<(capacity)) { \
- (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
- (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
- (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
+ uint32_t __uc=(c); \
+ if(__uc<=0x7f) { \
+ (s)[(i)++]=(uint8_t)__uc; \
+ } else if(__uc<=0x7ff && (i)+1<(capacity)) { \
+ (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
+ (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
+ } else if((__uc<=0xd7ff || (0xe000<=__uc && __uc<=0xffff)) && (i)+2<(capacity)) { \
+ (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
+ (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
+ (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
+ } else if(0xffff<__uc && __uc<=0x10ffff && (i)+3<(capacity)) { \
+ (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
+ (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
+ (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
+ (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
} else { \
- (i)=utf8_appendCharSafeBody(s, (i), (capacity), c, &(isError)); \
+ (isError)=TRUE; \
} \
}
@@ -600,12 +592,15 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
* If the offset points to a UTF-8 trail byte,
* then the offset is moved backward to the corresponding lead byte.
* Otherwise, it is not modified.
+ *
* "Safe" macro, checks for illegal sequences and for string boundaries.
+ * Unlike U8_TRUNCATE_IF_INCOMPLETE(), this macro always reads s[i].
*
* @param s const uint8_t * string
* @param start int32_t starting string offset (usually 0)
* @param i int32_t string offset, must be start<=i
* @see U8_SET_CP_START_UNSAFE
+ * @see U8_TRUNCATE_IF_INCOMPLETE
* @stable ICU 2.4
*/
#define U8_SET_CP_START(s, start, i) { \
@@ -614,6 +609,57 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
} \
}
+#ifndef U_HIDE_DRAFT_API
+/**
+ * If the string ends with a UTF-8 byte sequence that is valid so far
+ * but incomplete, then reduce the length of the string to end before
+ * the lead byte of that incomplete sequence.
+ * For example, if the string ends with E1 80, the length is reduced by 2.
+ *
+ * In all other cases (the string ends with a complete sequence, or it is not
+ * possible for any further trail byte to extend the trailing sequence)
+ * the length remains unchanged.
+ *
+ * Useful for processing text split across multiple buffers
+ * (save the incomplete sequence for later)
+ * and for optimizing iteration
+ * (check for string length only once per character).
+ *
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ * Unlike U8_SET_CP_START(), this macro never reads s[length].
+ *
+ * (In UTF-16, simply check for U16_IS_LEAD(last code unit).)
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param length int32_t string length (usually start<=length)
+ * @see U8_SET_CP_START
+ * @draft ICU 61
+ */
+#define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) \
+ if((length)>(start)) { \
+ uint8_t __b1=s[(length)-1]; \
+ if(U8_IS_SINGLE(__b1)) { \
+ /* common ASCII character */ \
+ } else if(U8_IS_LEAD(__b1)) { \
+ --(length); \
+ } else if(U8_IS_TRAIL(__b1) && ((length)-2)>=(start)) { \
+ uint8_t __b2=s[(length)-2]; \
+ if(0xe0<=__b2 && __b2<=0xf4) { \
+ if(__b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(__b2, __b1) : \
+ U8_IS_VALID_LEAD4_AND_T1(__b2, __b1)) { \
+ (length)-=2; \
+ } \
+ } else if(U8_IS_TRAIL(__b2) && ((length)-3)>=(start)) { \
+ uint8_t __b3=s[(length)-3]; \
+ if(0xf0<=__b3 && __b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(__b3, __b2)) { \
+ (length)-=3; \
+ } \
+ } \
+ } \
+ }
+#endif // U_HIDE_DRAFT_API
+
/* definitions with backward iteration -------------------------------------- */
/**
diff --git a/deps/icu-small/source/common/unicode/utrace.h b/deps/icu-small/source/common/unicode/utrace.h
index 5d561109c7..bf6fd036f0 100644
--- a/deps/icu-small/source/common/unicode/utrace.h
+++ b/deps/icu-small/source/common/unicode/utrace.h
@@ -183,7 +183,7 @@ UTraceData(const void *context, int32_t fnNumber, int32_t level,
* tracing functions must themselves filter by checking that the
* current thread is the desired thread.
*
- * @param context an uninterpretted pointer. Whatever is passed in
+ * @param context an uninterpreted pointer. Whatever is passed in
* here will in turn be passed to each of the tracing
* functions UTraceEntry, UTraceExit and UTraceData.
* ICU does not use or alter this pointer.
@@ -320,7 +320,7 @@ utrace_getFunctions(const void **context,
* human readable form. Note that a UTraceData function may choose
* to not format the data; it could, for example, save it in
* in the raw form it was received (more compact), leaving
- * formatting for a later trace analyis tool.
+ * formatting for a later trace analysis tool.
* @param outBuf pointer to a buffer to receive the formatted output. Output
* will be nul terminated if there is space in the buffer -
* if the length of the requested output < the output buffer size.
diff --git a/deps/icu-small/source/common/unicode/utypes.h b/deps/icu-small/source/common/unicode/utypes.h
index 4c40e6a87c..b6cf496511 100644
--- a/deps/icu-small/source/common/unicode/utypes.h
+++ b/deps/icu-small/source/common/unicode/utypes.h
@@ -145,7 +145,7 @@
/**
* U_ICU_ENTRY_POINT is the name of the DLL entry point to the ICU data library.
* Defined as a literal, not a string.
- * Tricky Preprocessor use - ## operator replaces macro paramters with the literal string
+ * Tricky Preprocessor use - ## operator replaces macro parameters with the literal string
* from the corresponding macro invocation, _before_ other macro substitutions.
* Need a nested \#defines to get the actual version numbers rather than
* the literal text U_ICU_VERSION_MAJOR_NUM into the name.
@@ -446,14 +446,14 @@ typedef enum UErrorCode {
U_BUFFER_OVERFLOW_ERROR = 15, /**< A result would not fit in the supplied buffer */
U_UNSUPPORTED_ERROR = 16, /**< Requested operation not supported in current context */
U_RESOURCE_TYPE_MISMATCH = 17, /**< an operation is requested over a resource that does not support it */
- U_ILLEGAL_ESCAPE_SEQUENCE = 18, /**< ISO-2022 illlegal escape sequence */
+ U_ILLEGAL_ESCAPE_SEQUENCE = 18, /**< ISO-2022 illegal escape sequence */
U_UNSUPPORTED_ESCAPE_SEQUENCE = 19, /**< ISO-2022 unsupported escape sequence */
U_NO_SPACE_AVAILABLE = 20, /**< No space available for in-buffer expansion for Arabic shaping */
U_CE_NOT_FOUND_ERROR = 21, /**< Currently used only while setting variable top, but can be used generally */
U_PRIMARY_TOO_LONG_ERROR = 22, /**< User tried to set variable top to a primary that is longer than two bytes */
U_STATE_TOO_OLD_ERROR = 23, /**< ICU cannot construct a service from this state, as it is no longer supported */
U_TOO_MANY_ALIASES_ERROR = 24, /**< There are too many aliases in the path to the requested resource.
- It is very possible that a circular alias definition has occured */
+ It is very possible that a circular alias definition has occurred */
U_ENUM_OUT_OF_SYNC_ERROR = 25, /**< UEnumeration out of sync with underlying collection */
U_INVARIANT_CONVERSION_ERROR = 26, /**< Unable to convert a UChar* string to char* with the invariant converter. */
U_INVALID_STATE_ERROR = 27, /**< Requested operation can not be completed with ICU in its current state */
@@ -499,7 +499,7 @@ typedef enum UErrorCode {
U_MULTIPLE_COMPOUND_FILTERS, /**< More than one compound filter */
U_INVALID_RBT_SYNTAX, /**< A "::id" rule was passed to the RuleBasedTransliterator parser */
U_INVALID_PROPERTY_PATTERN, /**< UNUSED as of ICU 2.4 */
- U_MALFORMED_PRAGMA, /**< A 'use' pragma is invlalid */
+ U_MALFORMED_PRAGMA, /**< A 'use' pragma is invalid */
U_UNCLOSED_SEGMENT, /**< A closing ')' is missing */
U_ILLEGAL_CHAR_IN_SEGMENT, /**< UNUSED as of ICU 2.4 */
U_VARIABLE_RANGE_EXHAUSTED, /**< Too many stand-ins generated for the given variable range */
@@ -539,12 +539,15 @@ typedef enum UErrorCode {
U_DEFAULT_KEYWORD_MISSING, /**< Missing DEFAULT rule in plural rules */
U_DECIMAL_NUMBER_SYNTAX_ERROR, /**< Decimal number syntax error */
U_FORMAT_INEXACT_ERROR, /**< Cannot format a number exactly and rounding mode is ROUND_UNNECESSARY @stable ICU 4.8 */
+#ifndef U_HIDE_DRAFT_API
+ U_NUMBER_ARG_OUTOFBOUNDS_ERROR, /**< The argument to a NumberFormatter helper method was out of bounds; the bounds are usually 0 to 999. @draft ICU 61 */
+#endif // U_HIDE_DRAFT_API
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal formatting API error code.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
- U_FMT_PARSE_ERROR_LIMIT,
+ U_FMT_PARSE_ERROR_LIMIT = 0x10113,
#endif // U_HIDE_DEPRECATED_API
/*
@@ -555,7 +558,7 @@ typedef enum UErrorCode {
U_BRK_HEX_DIGITS_EXPECTED, /**< Hex digits expected as part of a escaped char in a rule. */
U_BRK_SEMICOLON_EXPECTED, /**< Missing ';' at the end of a RBBI rule. */
U_BRK_RULE_SYNTAX, /**< Syntax error in RBBI rule. */
- U_BRK_UNCLOSED_SET, /**< UnicodeSet witing an RBBI rule missing a closing ']'. */
+ U_BRK_UNCLOSED_SET, /**< UnicodeSet writing an RBBI rule missing a closing ']'. */
U_BRK_ASSIGN_ERROR, /**< Syntax error in RBBI rule assignment statement. */
U_BRK_VARIABLE_REDFINITION, /**< RBBI rule $Variable redefined. */
U_BRK_MISMATCHED_PAREN, /**< Mis-matched parentheses in an RBBI rule. */
@@ -564,7 +567,7 @@ typedef enum UErrorCode {
U_BRK_INIT_ERROR, /**< Initialization failure. Probable missing ICU Data. */
U_BRK_RULE_EMPTY_SET, /**< Rule contains an empty Unicode Set. */
U_BRK_UNRECOGNIZED_OPTION, /**< !!option in RBBI rules not recognized. */
- U_BRK_MALFORMED_RULE_TAG, /**< The {nnn} tag on a rule is mal formed */
+ U_BRK_MALFORMED_RULE_TAG, /**< The {nnn} tag on a rule is malformed */
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal BreakIterator error code.
diff --git a/deps/icu-small/source/common/unicode/uvernum.h b/deps/icu-small/source/common/unicode/uvernum.h
index d905a0f50d..0427bcb03d 100644
--- a/deps/icu-small/source/common/unicode/uvernum.h
+++ b/deps/icu-small/source/common/unicode/uvernum.h
@@ -58,13 +58,13 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
-#define U_ICU_VERSION_MAJOR_NUM 60
+#define U_ICU_VERSION_MAJOR_NUM 61
/** The current ICU minor version as an integer.
* This value will change in the subsequent releases of ICU
* @stable ICU 2.6
*/
-#define U_ICU_VERSION_MINOR_NUM 2
+#define U_ICU_VERSION_MINOR_NUM 1
/** The current ICU patchlevel version as an integer.
* This value will change in the subsequent releases of ICU
@@ -84,7 +84,7 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.6
*/
-#define U_ICU_VERSION_SUFFIX _60
+#define U_ICU_VERSION_SUFFIX _61
/**
* \def U_DEF2_ICU_ENTRY_POINT_RENAME
@@ -119,19 +119,26 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
-#define U_ICU_VERSION "60.2"
+#define U_ICU_VERSION "61.1"
-/** The current ICU library major/minor version as a string without dots, for library name suffixes.
- * This value will change in the subsequent releases of ICU
- * @stable ICU 2.6
+/**
+ * The current ICU library major version number as a string, for library name suffixes.
+ * This value will change in subsequent releases of ICU.
+ *
+ * Until ICU 4.8, this was the combination of the single-digit major and minor ICU version numbers
+ * into one string without dots ("48").
+ * Since ICU 49, it is the double-digit major ICU version number.
+ * See http://userguide.icu-project.org/design#TOC-Version-Numbers-in-ICU
+ *
+ * @stable ICU 2.6
*/
-#define U_ICU_VERSION_SHORT "60"
+#define U_ICU_VERSION_SHORT "61"
#ifndef U_HIDE_INTERNAL_API
/** Data version in ICU4C.
* @internal ICU 4.4 Internal Use Only
**/
-#define U_ICU_DATA_VERSION "60.2"
+#define U_ICU_DATA_VERSION "61.1"
#endif /* U_HIDE_INTERNAL_API */
/*===========================================================================
diff --git a/deps/icu-small/source/common/unicode/uversion.h b/deps/icu-small/source/common/unicode/uversion.h
index cda24b6e0f..3f0251d399 100644
--- a/deps/icu-small/source/common/unicode/uversion.h
+++ b/deps/icu-small/source/common/unicode/uversion.h
@@ -105,7 +105,7 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
* @stable ICU 2.4
*/
-/* Define namespace symbols if the compiler supports it. */
+/* Define C++ namespace symbols. */
#ifdef __cplusplus
# if U_DISABLE_RENAMING
# define U_ICU_NAMESPACE icu
@@ -122,7 +122,13 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
# define U_NAMESPACE_QUALIFIER U_ICU_NAMESPACE::
# ifndef U_USING_ICU_NAMESPACE
-# define U_USING_ICU_NAMESPACE 1
+# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \
+ defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) || \
+ defined(U_LAYOUTEX_IMPLEMENTATION) || defined(U_TOOLUTIL_IMPLEMENTATION)
+# define U_USING_ICU_NAMESPACE 0
+# else
+# define U_USING_ICU_NAMESPACE 0
+# endif
# endif
# if U_USING_ICU_NAMESPACE
U_NAMESPACE_USE
diff --git a/deps/icu-small/source/common/unifiedcache.cpp b/deps/icu-small/source/common/unifiedcache.cpp
index fd0be593d7..f0f660ed06 100644
--- a/deps/icu-small/source/common/unifiedcache.cpp
+++ b/deps/icu-small/source/common/unifiedcache.cpp
@@ -6,24 +6,26 @@
* others. All Rights Reserved.
******************************************************************************
*
-* File UNIFIEDCACHE.CPP
+* File unifiedcache.cpp
******************************************************************************
*/
-#include "uhash.h"
#include "unifiedcache.h"
-#include "umutex.h"
+
+#include <algorithm> // For std::max()
+
#include "mutex.h"
#include "uassert.h"
+#include "uhash.h"
#include "ucln_cmn.h"
+#include "umutex.h"
static icu::UnifiedCache *gCache = NULL;
-static icu::SharedObject *gNoValue = NULL;
static UMutex gCacheMutex = U_MUTEX_INITIALIZER;
static UConditionVar gInProgressValueAddedCond = U_CONDITION_INITIALIZER;
static icu::UInitOnce gCacheInitOnce = U_INITONCE_INITIALIZER;
-static const int32_t MAX_EVICT_ITERATIONS = 10;
+static const int32_t MAX_EVICT_ITERATIONS = 10;
static const int32_t DEFAULT_MAX_UNUSED = 1000;
static const int32_t DEFAULT_PERCENTAGE_OF_IN_USE = 100;
@@ -35,10 +37,6 @@ static UBool U_CALLCONV unifiedcache_cleanup() {
delete gCache;
gCache = NULL;
}
- if (gNoValue) {
- delete gNoValue;
- gNoValue = NULL;
- }
return TRUE;
}
U_CDECL_END
@@ -73,23 +71,15 @@ static void U_CALLCONV cacheInit(UErrorCode &status) {
ucln_common_registerCleanup(
UCLN_COMMON_UNIFIED_CACHE, unifiedcache_cleanup);
- // gNoValue must be created first to avoid assertion error in
- // cache constructor.
- gNoValue = new SharedObject();
gCache = new UnifiedCache(status);
if (gCache == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
}
if (U_FAILURE(status)) {
delete gCache;
- delete gNoValue;
gCache = NULL;
- gNoValue = NULL;
return;
}
- // We add a softref because we want hash elements with gNoValue to be
- // elligible for purging but we don't ever want gNoValue to be deleted.
- gNoValue->addSoftRef();
}
UnifiedCache *UnifiedCache::getInstance(UErrorCode &status) {
@@ -104,14 +94,24 @@ UnifiedCache *UnifiedCache::getInstance(UErrorCode &status) {
UnifiedCache::UnifiedCache(UErrorCode &status) :
fHashtable(NULL),
fEvictPos(UHASH_FIRST),
- fItemsInUseCount(0),
+ fNumValuesTotal(0),
+ fNumValuesInUse(0),
fMaxUnused(DEFAULT_MAX_UNUSED),
fMaxPercentageOfInUse(DEFAULT_PERCENTAGE_OF_IN_USE),
- fAutoEvictedCount(0) {
+ fAutoEvictedCount(0),
+ fNoValue(nullptr) {
if (U_FAILURE(status)) {
return;
}
- U_ASSERT(gNoValue != NULL);
+ fNoValue = new SharedObject();
+ if (fNoValue == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ fNoValue->softRefCount = 1; // Add fake references to prevent fNoValue from being deleted
+ fNoValue->hardRefCount = 1; // when other references to it are removed.
+ fNoValue->cachePtr = this;
+
fHashtable = uhash_open(
&ucache_hashKeys,
&ucache_compareKeys,
@@ -139,7 +139,7 @@ void UnifiedCache::setEvictionPolicy(
int32_t UnifiedCache::unusedCount() const {
Mutex lock(&gCacheMutex);
- return uhash_count(fHashtable) - fItemsInUseCount;
+ return uhash_count(fHashtable) - fNumValuesInUse;
}
int64_t UnifiedCache::autoEvictedCount() const {
@@ -161,6 +161,12 @@ void UnifiedCache::flush() const {
while (_flush(FALSE));
}
+void UnifiedCache::handleUnreferencedObject() const {
+ Mutex lock(&gCacheMutex);
+ --fNumValuesInUse;
+ _runEvictionSlice();
+}
+
#ifdef UNIFIED_CACHE_DEBUG
#include <stdio.h>
@@ -199,7 +205,7 @@ void UnifiedCache::_dumpContents() const {
"Unified Cache: Key '%s', error %d, value %p, total refcount %d, soft refcount %d\n",
key->writeDescription(buffer, 256),
key->creationStatus,
- sharedObject == gNoValue ? NULL :sharedObject,
+ sharedObject == fNoValue ? NULL :sharedObject,
sharedObject->getRefCount(),
sharedObject->getSoftRefCount());
}
@@ -219,10 +225,11 @@ UnifiedCache::~UnifiedCache() {
_flush(TRUE);
}
uhash_close(fHashtable);
+ fHashtable = nullptr;
+ delete fNoValue;
+ fNoValue = nullptr;
}
-// Returns the next element in the cache round robin style.
-// On entry, gCacheMutex must be held.
const UHashElement *
UnifiedCache::_nextElement() const {
const UHashElement *element = uhash_nextElement(fHashtable, &fEvictPos);
@@ -233,46 +240,36 @@ UnifiedCache::_nextElement() const {
return element;
}
-// Flushes the contents of the cache. If cache values hold references to other
-// cache values then _flush should be called in a loop until it returns FALSE.
-// On entry, gCacheMutex must be held.
-// On exit, those values with are evictable are flushed. If all is true
-// then every value is flushed even if it is not evictable.
-// Returns TRUE if any value in cache was flushed or FALSE otherwise.
UBool UnifiedCache::_flush(UBool all) const {
UBool result = FALSE;
int32_t origSize = uhash_count(fHashtable);
for (int32_t i = 0; i < origSize; ++i) {
const UHashElement *element = _nextElement();
+ if (element == nullptr) {
+ break;
+ }
if (all || _isEvictable(element)) {
const SharedObject *sharedObject =
(const SharedObject *) element->value.pointer;
+ U_ASSERT(sharedObject->cachePtr = this);
uhash_removeElement(fHashtable, element);
- sharedObject->removeSoftRef();
+ removeSoftRef(sharedObject); // Deletes the sharedObject when softRefCount goes to zero.
result = TRUE;
}
}
return result;
}
-// Computes how many items should be evicted.
-// On entry, gCacheMutex must be held.
-// Returns number of items that should be evicted or a value <= 0 if no
-// items need to be evicted.
int32_t UnifiedCache::_computeCountOfItemsToEvict() const {
- int32_t maxPercentageOfInUseCount =
- fItemsInUseCount * fMaxPercentageOfInUse / 100;
- int32_t maxUnusedCount = fMaxUnused;
- if (maxUnusedCount < maxPercentageOfInUseCount) {
- maxUnusedCount = maxPercentageOfInUseCount;
- }
- return uhash_count(fHashtable) - fItemsInUseCount - maxUnusedCount;
+ int32_t totalItems = uhash_count(fHashtable);
+ int32_t evictableItems = totalItems - fNumValuesInUse;
+
+ int32_t unusedLimitByPercentage = fNumValuesInUse * fMaxPercentageOfInUse / 100;
+ int32_t unusedLimit = std::max(unusedLimitByPercentage, fMaxUnused);
+ int32_t countOfItemsToEvict = std::max(0, evictableItems - unusedLimit);
+ return countOfItemsToEvict;
}
-// Run an eviction slice.
-// On entry, gCacheMutex must be held.
-// _runEvictionSlice runs a slice of the evict pipeline by examining the next
-// 10 entries in the cache round robin style evicting them if they are eligible.
void UnifiedCache::_runEvictionSlice() const {
int32_t maxItemsToEvict = _computeCountOfItemsToEvict();
if (maxItemsToEvict <= 0) {
@@ -280,11 +277,14 @@ void UnifiedCache::_runEvictionSlice() const {
}
for (int32_t i = 0; i < MAX_EVICT_ITERATIONS; ++i) {
const UHashElement *element = _nextElement();
+ if (element == nullptr) {
+ break;
+ }
if (_isEvictable(element)) {
const SharedObject *sharedObject =
(const SharedObject *) element->value.pointer;
uhash_removeElement(fHashtable, element);
- sharedObject->removeSoftRef();
+ removeSoftRef(sharedObject); // Deletes sharedObject when SoftRefCount goes to zero.
++fAutoEvictedCount;
if (--maxItemsToEvict == 0) {
break;
@@ -293,11 +293,6 @@ void UnifiedCache::_runEvictionSlice() const {
}
}
-
-// Places a new value and creationStatus in the cache for the given key.
-// On entry, gCacheMutex must be held. key must not exist in the cache.
-// On exit, value and creation status placed under key. Soft reference added
-// to value on successful add. On error sets status.
void UnifiedCache::_putNew(
const CacheKeyBase &key,
const SharedObject *value,
@@ -312,24 +307,17 @@ void UnifiedCache::_putNew(
return;
}
keyToAdopt->fCreationStatus = creationStatus;
- if (value->noSoftReferences()) {
+ if (value->softRefCount == 0) {
_registerMaster(keyToAdopt, value);
}
- uhash_put(fHashtable, keyToAdopt, (void *) value, &status);
+ void *oldValue = uhash_put(fHashtable, keyToAdopt, (void *) value, &status);
+ U_ASSERT(oldValue == nullptr);
+ (void)oldValue;
if (U_SUCCESS(status)) {
- value->addSoftRef();
+ value->softRefCount++;
}
}
-// Places value and status at key if there is no value at key or if cache
-// entry for key is in progress. Otherwise, it leaves the current value and
-// status there.
-// On entry. gCacheMutex must not be held. value must be
-// included in the reference count of the object to which it points.
-// On exit, value and status are changed to what was already in the cache if
-// something was there and not in progress. Otherwise, value and status are left
-// unchanged in which case they are placed in the cache on a best-effort basis.
-// Caller must call removeRef() on value.
void UnifiedCache::_putIfAbsentAndGet(
const CacheKeyBase &key,
const SharedObject *&value,
@@ -352,15 +340,7 @@ void UnifiedCache::_putIfAbsentAndGet(
_runEvictionSlice();
}
-// Attempts to fetch value and status for key from cache.
-// On entry, gCacheMutex must not be held value must be NULL and status must
-// be U_ZERO_ERROR.
-// On exit, either returns FALSE (In this
-// case caller should try to create the object) or returns TRUE with value
-// pointing to the fetched value and status set to fetched status. When
-// FALSE is returned status may be set to failure if an in progress hash
-// entry could not be made but value will remain unchanged. When TRUE is
-// returned, caler must call removeRef() on value.
+
UBool UnifiedCache::_poll(
const CacheKeyBase &key,
const SharedObject *&value,
@@ -369,27 +349,29 @@ UBool UnifiedCache::_poll(
U_ASSERT(status == U_ZERO_ERROR);
Mutex lock(&gCacheMutex);
const UHashElement *element = uhash_find(fHashtable, &key);
- while (element != NULL && _inProgress(element)) {
+
+ // If the hash table contains an inProgress placeholder entry for this key,
+ // this means that another thread is currently constructing the value object.
+ // Loop, waiting for that construction to complete.
+ while (element != NULL && _inProgress(element)) {
umtx_condWait(&gInProgressValueAddedCond, &gCacheMutex);
element = uhash_find(fHashtable, &key);
}
+
+ // If the hash table contains an entry for the key,
+ // fetch out the contents and return them.
if (element != NULL) {
- _fetch(element, value, status);
+ _fetch(element, value, status);
return TRUE;
}
- _putNew(key, gNoValue, U_ZERO_ERROR, status);
+
+ // The hash table contained nothing for this key.
+ // Insert an inProgress place holder value.
+ // Our caller will create the final value and update the hash table.
+ _putNew(key, fNoValue, U_ZERO_ERROR, status);
return FALSE;
}
-// Gets value out of cache.
-// On entry. gCacheMutex must not be held. value must be NULL. status
-// must be U_ZERO_ERROR.
-// On exit. value and status set to what is in cache at key or on cache
-// miss the key's createObject() is called and value and status are set to
-// the result of that. In this latter case, best effort is made to add the
-// value and status to the cache. If createObject() fails to create a value,
-// gNoValue is stored in cache, and value is set to NULL. Caller must call
-// removeRef on value if non NULL.
void UnifiedCache::_get(
const CacheKeyBase &key,
const SharedObject *&value,
@@ -398,7 +380,7 @@ void UnifiedCache::_get(
U_ASSERT(value == NULL);
U_ASSERT(status == U_ZERO_ERROR);
if (_poll(key, value, status)) {
- if (value == gNoValue) {
+ if (value == fNoValue) {
SharedObject::clearPtr(value);
}
return;
@@ -410,46 +392,22 @@ void UnifiedCache::_get(
U_ASSERT(value == NULL || value->hasHardReferences());
U_ASSERT(value != NULL || status != U_ZERO_ERROR);
if (value == NULL) {
- SharedObject::copyPtr(gNoValue, value);
+ SharedObject::copyPtr(fNoValue, value);
}
_putIfAbsentAndGet(key, value, status);
- if (value == gNoValue) {
+ if (value == fNoValue) {
SharedObject::clearPtr(value);
}
}
-void UnifiedCache::decrementItemsInUseWithLockingAndEviction() const {
- Mutex mutex(&gCacheMutex);
- decrementItemsInUse();
- _runEvictionSlice();
-}
-
-void UnifiedCache::incrementItemsInUse() const {
- ++fItemsInUseCount;
-}
-
-void UnifiedCache::decrementItemsInUse() const {
- --fItemsInUseCount;
+void UnifiedCache::_registerMaster(
+ const CacheKeyBase *theKey, const SharedObject *value) const {
+ theKey->fIsMaster = true;
+ value->cachePtr = this;
+ ++fNumValuesTotal;
+ ++fNumValuesInUse;
}
-// Register a master cache entry.
-// On entry, gCacheMutex must be held.
-// On exit, items in use count incremented, entry is marked as a master
-// entry, and value registered with cache so that subsequent calls to
-// addRef() and removeRef() on it correctly updates items in use count
-void UnifiedCache::_registerMaster(
- const CacheKeyBase *theKey, const SharedObject *value) const {
- theKey->fIsMaster = TRUE;
- ++fItemsInUseCount;
- value->registerWithCache(this);
-}
-
-// Store a value and error in given hash entry.
-// On entry, gCacheMutex must be held. Hash entry element must be in progress.
-// value must be non NULL.
-// On Exit, soft reference added to value. value and status stored in hash
-// entry. Soft reference removed from previous stored value. Waiting
-// threads notified.
void UnifiedCache::_put(
const UHashElement *element,
const SharedObject *value,
@@ -458,86 +416,52 @@ void UnifiedCache::_put(
const CacheKeyBase *theKey = (const CacheKeyBase *) element->key.pointer;
const SharedObject *oldValue = (const SharedObject *) element->value.pointer;
theKey->fCreationStatus = status;
- if (value->noSoftReferences()) {
+ if (value->softRefCount == 0) {
_registerMaster(theKey, value);
}
- value->addSoftRef();
+ value->softRefCount++;
UHashElement *ptr = const_cast<UHashElement *>(element);
ptr->value.pointer = (void *) value;
- oldValue->removeSoftRef();
+ U_ASSERT(oldValue == fNoValue);
+ removeSoftRef(oldValue);
// Tell waiting threads that we replace in-progress status with
// an error.
umtx_condBroadcast(&gInProgressValueAddedCond);
}
-void
-UnifiedCache::copyPtr(const SharedObject *src, const SharedObject *&dest) {
- if(src != dest) {
- if(dest != NULL) {
- dest->removeRefWhileHoldingCacheLock();
- }
- dest = src;
- if(src != NULL) {
- src->addRefWhileHoldingCacheLock();
- }
- }
-}
-
-void
-UnifiedCache::clearPtr(const SharedObject *&ptr) {
- if (ptr != NULL) {
- ptr->removeRefWhileHoldingCacheLock();
- ptr = NULL;
- }
-}
-
-
-// Fetch value and error code from a particular hash entry.
-// On entry, gCacheMutex must be held. value must be either NULL or must be
-// included in the ref count of the object to which it points.
-// On exit, value and status set to what is in the hash entry. Caller must
-// eventually call removeRef on value.
-// If hash entry is in progress, value will be set to gNoValue and status will
-// be set to U_ZERO_ERROR.
void UnifiedCache::_fetch(
const UHashElement *element,
const SharedObject *&value,
- UErrorCode &status) {
+ UErrorCode &status) const {
const CacheKeyBase *theKey = (const CacheKeyBase *) element->key.pointer;
status = theKey->fCreationStatus;
- // Since we have the cache lock, calling regular SharedObject methods
+ // Since we have the cache lock, calling regular SharedObject add/removeRef
// could cause us to deadlock on ourselves since they may need to lock
// the cache mutex.
- UnifiedCache::copyPtr((const SharedObject *) element->value.pointer, value);
+ removeHardRef(value);
+ value = static_cast<const SharedObject *>(element->value.pointer);
+ addHardRef(value);
}
-// Determine if given hash entry is in progress.
-// On entry, gCacheMutex must be held.
-UBool UnifiedCache::_inProgress(const UHashElement *element) {
- const SharedObject *value = NULL;
+
+UBool UnifiedCache::_inProgress(const UHashElement* element) const {
UErrorCode status = U_ZERO_ERROR;
+ const SharedObject * value = NULL;
_fetch(element, value, status);
UBool result = _inProgress(value, status);
-
- // Since we have the cache lock, calling regular SharedObject methods
- // could cause us to deadlock on ourselves since they may need to lock
- // the cache mutex.
- UnifiedCache::clearPtr(value);
+ removeHardRef(value);
return result;
}
-// Determine if given hash entry is in progress.
-// On entry, gCacheMutex must be held.
UBool UnifiedCache::_inProgress(
- const SharedObject *theValue, UErrorCode creationStatus) {
- return (theValue == gNoValue && creationStatus == U_ZERO_ERROR);
+ const SharedObject* theValue, UErrorCode creationStatus) const {
+ return (theValue == fNoValue && creationStatus == U_ZERO_ERROR);
}
-// Determine if given hash entry is eligible for eviction.
-// On entry, gCacheMutex must be held.
-UBool UnifiedCache::_isEvictable(const UHashElement *element) {
+UBool UnifiedCache::_isEvictable(const UHashElement *element) const
+{
const CacheKeyBase *theKey = (const CacheKeyBase *) element->key.pointer;
const SharedObject *theValue =
(const SharedObject *) element->value.pointer;
@@ -549,7 +473,47 @@ UBool UnifiedCache::_isEvictable(const UHashElement *element) {
// We can evict entries that are either not a master or have just
// one reference (The one reference being from the cache itself).
- return (!theKey->fIsMaster || (theValue->getSoftRefCount() == 1 && theValue->noHardReferences()));
+ return (!theKey->fIsMaster || (theValue->softRefCount == 1 && theValue->noHardReferences()));
+}
+
+void UnifiedCache::removeSoftRef(const SharedObject *value) const {
+ U_ASSERT(value->cachePtr == this);
+ U_ASSERT(value->softRefCount > 0);
+ if (--value->softRefCount == 0) {
+ --fNumValuesTotal;
+ if (value->noHardReferences()) {
+ delete value;
+ } else {
+ // This path only happens from flush(all). Which only happens from the
+ // UnifiedCache destructor. Nulling out value.cacheptr changes the behavior
+ // of value.removeRef(), causing the deletion to be done there.
+ value->cachePtr = nullptr;
+ }
+ }
+}
+
+int32_t UnifiedCache::removeHardRef(const SharedObject *value) const {
+ int refCount = 0;
+ if (value) {
+ refCount = umtx_atomic_dec(&value->hardRefCount);
+ U_ASSERT(refCount >= 0);
+ if (refCount == 0) {
+ --fNumValuesInUse;
+ }
+ }
+ return refCount;
+}
+
+int32_t UnifiedCache::addHardRef(const SharedObject *value) const {
+ int refCount = 0;
+ if (value) {
+ refCount = umtx_atomic_inc(&value->hardRefCount);
+ U_ASSERT(refCount >= 1);
+ if (refCount == 1) {
+ fNumValuesInUse++;
+ }
+ }
+ return refCount;
}
U_NAMESPACE_END
diff --git a/deps/icu-small/source/common/unifiedcache.h b/deps/icu-small/source/common/unifiedcache.h
index 947ebbdc78..b3ccd60d17 100644
--- a/deps/icu-small/source/common/unifiedcache.h
+++ b/deps/icu-small/source/common/unifiedcache.h
@@ -190,7 +190,7 @@ class U_COMMON_API UnifiedCache : public UnifiedCacheBase {
UnifiedCache(UErrorCode &status);
/**
- * Returns the cache instance.
+ * Return a pointer to the global cache instance.
*/
static UnifiedCache *getInstance(UErrorCode &status);
@@ -294,7 +294,7 @@ class U_COMMON_API UnifiedCache : public UnifiedCacheBase {
/**
* Configures at what point evcition of unused entries will begin.
- * Eviction is triggered whenever the number of unused entries exeeds
+ * Eviction is triggered whenever the number of evictable keys exeeds
* BOTH count AND (number of in-use items) * (percentageOfInUseItems / 100).
* Once the number of unused entries drops below one of these,
* eviction ceases. Because eviction happens incrementally,
@@ -341,60 +341,214 @@ class U_COMMON_API UnifiedCache : public UnifiedCacheBase {
*/
int32_t unusedCount() const;
- virtual void incrementItemsInUse() const;
- virtual void decrementItemsInUseWithLockingAndEviction() const;
- virtual void decrementItemsInUse() const;
+ virtual void handleUnreferencedObject() const;
virtual ~UnifiedCache();
+
private:
UHashtable *fHashtable;
mutable int32_t fEvictPos;
- mutable int32_t fItemsInUseCount;
+ mutable int32_t fNumValuesTotal;
+ mutable int32_t fNumValuesInUse;
int32_t fMaxUnused;
int32_t fMaxPercentageOfInUse;
mutable int64_t fAutoEvictedCount;
+ SharedObject *fNoValue;
+
UnifiedCache(const UnifiedCache &other);
UnifiedCache &operator=(const UnifiedCache &other);
+
+ /**
+ * Flushes the contents of the cache. If cache values hold references to other
+ * cache values then _flush should be called in a loop until it returns FALSE.
+ *
+ * On entry, gCacheMutex must be held.
+ * On exit, those values with are evictable are flushed.
+ *
+ * @param all if false flush evictable items only, which are those with no external
+ * references, plus those that can be safely recreated.<br>
+ * if true, flush all elements. Any values (sharedObjects) with remaining
+ * hard (external) references are not deleted, but are detached from
+ * the cache, so that a subsequent removeRefs can delete them.
+ * _flush is not thread safe when all is true.
+ * @return TRUE if any value in cache was flushed or FALSE otherwise.
+ */
UBool _flush(UBool all) const;
+
+ /**
+ * Gets value out of cache.
+ * On entry. gCacheMutex must not be held. value must be NULL. status
+ * must be U_ZERO_ERROR.
+ * On exit. value and status set to what is in cache at key or on cache
+ * miss the key's createObject() is called and value and status are set to
+ * the result of that. In this latter case, best effort is made to add the
+ * value and status to the cache. If createObject() fails to create a value,
+ * fNoValue is stored in cache, and value is set to NULL. Caller must call
+ * removeRef on value if non NULL.
+ */
void _get(
const CacheKeyBase &key,
const SharedObject *&value,
const void *creationContext,
UErrorCode &status) const;
- UBool _poll(
- const CacheKeyBase &key,
- const SharedObject *&value,
- UErrorCode &status) const;
- void _putNew(
- const CacheKeyBase &key,
- const SharedObject *value,
- const UErrorCode creationStatus,
- UErrorCode &status) const;
+
+ /**
+ * Attempts to fetch value and status for key from cache.
+ * On entry, gCacheMutex must not be held value must be NULL and status must
+ * be U_ZERO_ERROR.
+ * On exit, either returns FALSE (In this
+ * case caller should try to create the object) or returns TRUE with value
+ * pointing to the fetched value and status set to fetched status. When
+ * FALSE is returned status may be set to failure if an in progress hash
+ * entry could not be made but value will remain unchanged. When TRUE is
+ * returned, caller must call removeRef() on value.
+ */
+ UBool _poll(
+ const CacheKeyBase &key,
+ const SharedObject *&value,
+ UErrorCode &status) const;
+
+ /**
+ * Places a new value and creationStatus in the cache for the given key.
+ * On entry, gCacheMutex must be held. key must not exist in the cache.
+ * On exit, value and creation status placed under key. Soft reference added
+ * to value on successful add. On error sets status.
+ */
+ void _putNew(
+ const CacheKeyBase &key,
+ const SharedObject *value,
+ const UErrorCode creationStatus,
+ UErrorCode &status) const;
+
+ /**
+ * Places value and status at key if there is no value at key or if cache
+ * entry for key is in progress. Otherwise, it leaves the current value and
+ * status there.
+ *
+ * On entry. gCacheMutex must not be held. Value must be
+ * included in the reference count of the object to which it points.
+ *
+ * On exit, value and status are changed to what was already in the cache if
+ * something was there and not in progress. Otherwise, value and status are left
+ * unchanged in which case they are placed in the cache on a best-effort basis.
+ * Caller must call removeRef() on value.
+ */
void _putIfAbsentAndGet(
const CacheKeyBase &key,
const SharedObject *&value,
UErrorCode &status) const;
- const UHashElement *_nextElement() const;
+
+ /**
+ * Returns the next element in the cache round robin style.
+ * Returns nullptr if the cache is empty.
+ * On entry, gCacheMutex must be held.
+ */
+ const UHashElement *_nextElement() const;
+
+ /**
+ * Return the number of cache items that would need to be evicted
+ * to bring usage into conformance with eviction policy.
+ *
+ * An item corresponds to an entry in the hash table, a hash table element.
+ *
+ * On entry, gCacheMutex must be held.
+ */
int32_t _computeCountOfItemsToEvict() const;
+
+ /**
+ * Run an eviction slice.
+ * On entry, gCacheMutex must be held.
+ * _runEvictionSlice runs a slice of the evict pipeline by examining the next
+ * 10 entries in the cache round robin style evicting them if they are eligible.
+ */
void _runEvictionSlice() const;
- void _registerMaster(
- const CacheKeyBase *theKey, const SharedObject *value) const;
+
+ /**
+ * Register a master cache entry. A master key is the first key to create
+ * a given SharedObject value. Subsequent keys whose create function
+ * produce referneces to an already existing SharedObject are not masters -
+ * they can be evicted and subsequently recreated.
+ *
+ * On entry, gCacheMutex must be held.
+ * On exit, items in use count incremented, entry is marked as a master
+ * entry, and value registered with cache so that subsequent calls to
+ * addRef() and removeRef() on it correctly interact with the cache.
+ */
+ void _registerMaster(const CacheKeyBase *theKey, const SharedObject *value) const;
+
+ /**
+ * Store a value and creation error status in given hash entry.
+ * On entry, gCacheMutex must be held. Hash entry element must be in progress.
+ * value must be non NULL.
+ * On Exit, soft reference added to value. value and status stored in hash
+ * entry. Soft reference removed from previous stored value. Waiting
+ * threads notified.
+ */
void _put(
const UHashElement *element,
const SharedObject *value,
const UErrorCode status) const;
+ /**
+ * Remove a soft reference, and delete the SharedObject if no references remain.
+ * To be used from within the UnifiedCache implementation only.
+ * gCacheMutex must be held by caller.
+ * @param value the SharedObject to be acted on.
+ */
+ void removeSoftRef(const SharedObject *value) const;
+
+ /**
+ * Increment the hard reference count of the given SharedObject.
+ * gCacheMutex must be held by the caller.
+ * Update numValuesEvictable on transitions between zero and one reference.
+ *
+ * @param value The SharedObject to be referenced.
+ * @return the hard reference count after the addition.
+ */
+ int32_t addHardRef(const SharedObject *value) const;
+
+ /**
+ * Decrement the hard reference count of the given SharedObject.
+ * gCacheMutex must be held by the caller.
+ * Update numValuesEvictable on transitions between one and zero reference.
+ *
+ * @param value The SharedObject to be referenced.
+ * @return the hard reference count after the removal.
+ */
+ int32_t removeHardRef(const SharedObject *value) const;
+
+
#ifdef UNIFIED_CACHE_DEBUG
void _dumpContents() const;
#endif
- static void copyPtr(const SharedObject *src, const SharedObject *&dest);
- static void clearPtr(const SharedObject *&ptr);
- static void _fetch(
- const UHashElement *element,
- const SharedObject *&value,
- UErrorCode &status);
- static UBool _inProgress(const UHashElement *element);
- static UBool _inProgress(
- const SharedObject *theValue, UErrorCode creationStatus);
- static UBool _isEvictable(const UHashElement *element);
+
+ /**
+ * Fetch value and error code from a particular hash entry.
+ * On entry, gCacheMutex must be held. value must be either NULL or must be
+ * included in the ref count of the object to which it points.
+ * On exit, value and status set to what is in the hash entry. Caller must
+ * eventually call removeRef on value.
+ * If hash entry is in progress, value will be set to gNoValue and status will
+ * be set to U_ZERO_ERROR.
+ */
+ void _fetch(const UHashElement *element, const SharedObject *&value,
+ UErrorCode &status) const;
+
+ /**
+ * Determine if given hash entry is in progress.
+ * On entry, gCacheMutex must be held.
+ */
+ UBool _inProgress(const UHashElement *element) const;
+
+ /**
+ * Determine if given hash entry is in progress.
+ * On entry, gCacheMutex must be held.
+ */
+ UBool _inProgress(const SharedObject *theValue, UErrorCode creationStatus) const;
+
+ /**
+ * Determine if given hash entry is eligible for eviction.
+ * On entry, gCacheMutex must be held.
+ */
+ UBool _isEvictable(const UHashElement *element) const;
};
U_NAMESPACE_END
diff --git a/deps/icu-small/source/common/uniset_closure.cpp b/deps/icu-small/source/common/uniset_closure.cpp
index b5cc213941..97c7bc9d35 100644
--- a/deps/icu-small/source/common/uniset_closure.cpp
+++ b/deps/icu-small/source/common/uniset_closure.cpp
@@ -129,7 +129,7 @@ UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
// _applyPattern calls add() etc., which set pat to empty.
UnicodeString rebuiltPat;
RuleCharacterIterator chars(pattern, symbols, pos);
- applyPattern(chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, status);
+ applyPattern(chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, 0, status);
if (U_FAILURE(status)) return *this;
if (chars.inVariable()) {
// syntaxError(chars, "Extra chars in variable value");
diff --git a/deps/icu-small/source/common/uniset_props.cpp b/deps/icu-small/source/common/uniset_props.cpp
index d0ed074a9b..ef5d6a32b2 100644
--- a/deps/icu-small/source/common/uniset_props.cpp
+++ b/deps/icu-small/source/common/uniset_props.cpp
@@ -231,7 +231,7 @@ void U_CALLCONV UnicodeSet_initInclusion(int32_t src, UErrorCode &status) {
ucase_addPropertyStarts(&sa, &status);
break;
case UPROPS_SRC_BIDI:
- ubidi_addPropertyStarts(ubidi_getSingleton(), &sa, &status);
+ ubidi_addPropertyStarts(&sa, &status);
break;
default:
status = U_INTERNAL_PROGRAM_ERROR;
@@ -257,6 +257,7 @@ const UnicodeSet* UnicodeSet::getInclusions(int32_t src, UErrorCode &status) {
return i.fSet;
}
+namespace {
// Cache some sets for other services -------------------------------------- ***
void U_CALLCONV createUni32Set(UErrorCode &errorCode) {
@@ -315,6 +316,8 @@ isPOSIXClose(const UnicodeString &pattern, int32_t pos) {
// memory leak checker tools
#define _dbgct(me)
+} // namespace
+
//----------------------------------------------------------------
// Constructors &c
//----------------------------------------------------------------
@@ -382,7 +385,7 @@ UnicodeSet::applyPatternIgnoreSpace(const UnicodeString& pattern,
// _applyPattern calls add() etc., which set pat to empty.
UnicodeString rebuiltPat;
RuleCharacterIterator chars(pattern, symbols, pos);
- applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, NULL, status);
+ applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, NULL, 0, status);
if (U_FAILURE(status)) return;
if (chars.inVariable()) {
// syntaxError(chars, "Extra chars in variable value");
@@ -406,6 +409,8 @@ UBool UnicodeSet::resemblesPattern(const UnicodeString& pattern, int32_t pos) {
// Implementation: Pattern parsing
//----------------------------------------------------------------
+namespace {
+
/**
* A small all-inline class to manage a UnicodeSet pointer. Add
* operator->() etc. as needed.
@@ -424,6 +429,10 @@ public:
}
};
+constexpr int32_t MAX_DEPTH = 100;
+
+} // namespace
+
/**
* Parse the pattern from the given RuleCharacterIterator. The
* iterator is advanced over the parsed pattern.
@@ -443,8 +452,13 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
UnicodeString& rebuiltPat,
uint32_t options,
UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
+ int32_t depth,
UErrorCode& ec) {
if (U_FAILURE(ec)) return;
+ if (depth > MAX_DEPTH) {
+ ec = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
// Syntax characters: [ ] ^ - & { }
@@ -579,7 +593,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
}
switch (setMode) {
case 1:
- nested->applyPattern(chars, symbols, patLocal, options, caseClosure, ec);
+ nested->applyPattern(chars, symbols, patLocal, options, caseClosure, depth + 1, ec);
break;
case 2:
chars.skipIgnored(opts);
@@ -837,6 +851,8 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
// Property set implementation
//----------------------------------------------------------------
+namespace {
+
static UBool numericValueFilter(UChar32 ch, void* context) {
return u_getNumericValue(ch) == *(double*)context;
}
@@ -868,6 +884,8 @@ static UBool scriptExtensionsFilter(UChar32 ch, void* context) {
return uscript_hasScript(ch, *(UScriptCode*)context);
}
+} // namespace
+
/**
* Generic filter-based scanning code for UCD property UnicodeSets.
*/
@@ -924,6 +942,8 @@ void UnicodeSet::applyFilter(UnicodeSet::Filter filter,
}
}
+namespace {
+
static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) {
/* Note: we use ' ' in compiler code page */
int32_t j = 0;
@@ -941,6 +961,8 @@ static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) {
return TRUE;
}
+} // namespace
+
//----------------------------------------------------------------
// Property set API
//----------------------------------------------------------------
diff --git a/deps/icu-small/source/common/uprops.cpp b/deps/icu-small/source/common/uprops.cpp
index ace3c4d6d0..b76896db1b 100644
--- a/deps/icu-small/source/common/uprops.cpp
+++ b/deps/icu-small/source/common/uprops.cpp
@@ -38,8 +38,6 @@
U_NAMESPACE_USE
-#define GET_BIDI_PROPS() ubidi_getSingleton()
-
/* general properties API functions ----------------------------------------- */
struct BinaryProperty;
@@ -62,15 +60,15 @@ static UBool caseBinaryPropertyContains(const BinaryProperty &/*prop*/, UChar32
}
static UBool isBidiControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- return ubidi_isBidiControl(GET_BIDI_PROPS(), c);
+ return ubidi_isBidiControl(c);
}
static UBool isMirrored(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- return ubidi_isMirrored(GET_BIDI_PROPS(), c);
+ return ubidi_isMirrored(c);
}
static UBool isJoinControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- return ubidi_isJoinControl(GET_BIDI_PROPS(), c);
+ return ubidi_isJoinControl(c);
}
#if UCONFIG_NO_NORMALIZATION
@@ -329,11 +327,11 @@ static int32_t getBiDiClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*
}
static int32_t getBiDiPairedBracketType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- return (int32_t)ubidi_getPairedBracketType(GET_BIDI_PROPS(), c);
+ return (int32_t)ubidi_getPairedBracketType(c);
}
static int32_t biDiGetMaxValue(const IntProperty &/*prop*/, UProperty which) {
- return ubidi_getMaxValue(GET_BIDI_PROPS(), which);
+ return ubidi_getMaxValue(which);
}
#if UCONFIG_NO_NORMALIZATION
@@ -351,11 +349,11 @@ static int32_t getGeneralCategory(const IntProperty &/*prop*/, UChar32 c, UPrope
}
static int32_t getJoiningGroup(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- return ubidi_getJoiningGroup(GET_BIDI_PROPS(), c);
+ return ubidi_getJoiningGroup(c);
}
static int32_t getJoiningType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- return ubidi_getJoiningType(GET_BIDI_PROPS(), c);
+ return ubidi_getJoiningType(c);
}
static int32_t getNumericType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
diff --git a/deps/icu-small/source/common/ushape.cpp b/deps/icu-small/source/common/ushape.cpp
index d7886ac06c..c3f3ef9e20 100644
--- a/deps/icu-small/source/common/ushape.cpp
+++ b/deps/icu-small/source/common/ushape.cpp
@@ -342,18 +342,16 @@ static void
_shapeToArabicDigitsWithContext(UChar *s, int32_t length,
UChar digitBase,
UBool isLogical, UBool lastStrongWasAL) {
- const UBiDiProps *bdp;
int32_t i;
UChar c;
- bdp=ubidi_getSingleton();
digitBase-=0x30;
/* the iteration direction depends on the type of input */
if(isLogical) {
for(i=0; i<length; ++i) {
c=s[i];
- switch(ubidi_getClass(bdp, c)) {
+ switch(ubidi_getClass(c)) {
case U_LEFT_TO_RIGHT: /* L */
case U_RIGHT_TO_LEFT: /* R */
lastStrongWasAL=FALSE;
@@ -373,7 +371,7 @@ _shapeToArabicDigitsWithContext(UChar *s, int32_t length,
} else {
for(i=length; i>0; /* pre-decrement in the body */) {
c=s[--i];
- switch(ubidi_getClass(bdp, c)) {
+ switch(ubidi_getClass(c)) {
case U_LEFT_TO_RIGHT: /* L */
case U_RIGHT_TO_LEFT: /* R */
lastStrongWasAL=FALSE;
diff --git a/deps/icu-small/source/common/usprep.cpp b/deps/icu-small/source/common/usprep.cpp
index c4f831be2e..54a77172fe 100644
--- a/deps/icu-small/source/common/usprep.cpp
+++ b/deps/icu-small/source/common/usprep.cpp
@@ -347,10 +347,6 @@ usprep_getProfile(const char* path,
newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
- if(newProfile->checkBiDi) {
- newProfile->bdp = ubidi_getSingleton();
- }
-
LocalMemory<UStringPrepKey> key;
LocalMemory<char> keyName;
LocalMemory<char> keyPath;
@@ -735,7 +731,7 @@ usprep_prepare( const UStringPrepProfile* profile,
}
if(profile->checkBiDi) {
- direction = ubidi_getClass(profile->bdp, ch);
+ direction = ubidi_getClass(ch);
if(firstCharDir == U_CHAR_DIRECTION_COUNT){
firstCharDir = direction;
}
diff --git a/deps/icu-small/source/common/ustr_wcs.cpp b/deps/icu-small/source/common/ustr_wcs.cpp
index 8b6e99221e..0372824f21 100644
--- a/deps/icu-small/source/common/ustr_wcs.cpp
+++ b/deps/icu-small/source/common/ustr_wcs.cpp
@@ -342,7 +342,7 @@ _strFromWCS( UChar *dest,
pSrcLimit = src + srcLength;
for(;;){
- register int32_t nulLen = 0;
+ int32_t nulLen = 0;
/* find nulls in the string */
while(nulLen<srcLength && pSrc[nulLen++]!=0){
diff --git a/deps/icu-small/source/common/ustrcase.cpp b/deps/icu-small/source/common/ustrcase.cpp
index b1beb34277..978bd3b7b8 100644
--- a/deps/icu-small/source/common/ustrcase.cpp
+++ b/deps/icu-small/source/common/ustrcase.cpp
@@ -52,16 +52,8 @@ int32_t checkOverflowAndEditsError(int32_t destIndex, int32_t destCapacity,
return destIndex;
}
-} // namespace
-
-U_NAMESPACE_END
-
-U_NAMESPACE_USE
-
-/* string casing ------------------------------------------------------------ */
-
/* Appends a full case mapping result, see UCASE_MAX_STRING_LENGTH. */
-static inline int32_t
+inline int32_t
appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
int32_t result, const UChar *s,
int32_t cpLength, uint32_t options, icu::Edits *edits) {
@@ -134,7 +126,7 @@ appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
return destIndex;
}
-static inline int32_t
+inline int32_t
appendUChar(UChar *dest, int32_t destIndex, int32_t destCapacity, UChar c) {
if(destIndex<destCapacity) {
dest[destIndex]=c;
@@ -144,28 +136,34 @@ appendUChar(UChar *dest, int32_t destIndex, int32_t destCapacity, UChar c) {
return destIndex+1;
}
-static inline int32_t
+int32_t
+appendNonEmptyUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity,
+ const UChar *s, int32_t length, uint32_t options, icu::Edits *edits) {
+ if(edits!=NULL) {
+ edits->addUnchanged(length);
+ }
+ if(options & U_OMIT_UNCHANGED_TEXT) {
+ return destIndex;
+ }
+ if(length>(INT32_MAX-destIndex)) {
+ return -1; // integer overflow
+ }
+ if((destIndex+length)<=destCapacity) {
+ u_memcpy(dest+destIndex, s, length);
+ }
+ return destIndex + length;
+}
+
+inline int32_t
appendUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity,
const UChar *s, int32_t length, uint32_t options, icu::Edits *edits) {
- if(length>0) {
- if(edits!=NULL) {
- edits->addUnchanged(length);
- }
- if(options & U_OMIT_UNCHANGED_TEXT) {
- return destIndex;
- }
- if(length>(INT32_MAX-destIndex)) {
- return -1; // integer overflow
- }
- if((destIndex+length)<=destCapacity) {
- u_memcpy(dest+destIndex, s, length);
- }
- destIndex+=length;
+ if (length <= 0) {
+ return destIndex;
}
- return destIndex;
+ return appendNonEmptyUnchanged(dest, destIndex, destCapacity, s, length, options, edits);
}
-static UChar32 U_CALLCONV
+UChar32 U_CALLCONV
utf16_caseContextIterator(void *context, int8_t dir) {
UCaseContext *csc=(UCaseContext *)context;
UChar32 c;
@@ -197,39 +195,205 @@ utf16_caseContextIterator(void *context, int8_t dir) {
return U_SENTINEL;
}
-/*
- * Case-maps [srcStart..srcLimit[ but takes
- * context [0..srcLength[ into account.
+/**
+ * caseLocale >= 0: Lowercases [srcStart..srcLimit[ but takes context [0..srcLength[ into account.
+ * caseLocale < 0: Case-folds [srcStart..srcLimit[.
*/
-static int32_t
-_caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map,
- UChar *dest, int32_t destCapacity,
- const UChar *src, UCaseContext *csc,
- int32_t srcStart, int32_t srcLimit,
- icu::Edits *edits,
- UErrorCode &errorCode) {
- /* case mapping loop */
- int32_t srcIndex=srcStart;
- int32_t destIndex=0;
- while(srcIndex<srcLimit) {
- int32_t cpStart;
- csc->cpStart=cpStart=srcIndex;
+int32_t toLower(int32_t caseLocale, uint32_t options,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, UCaseContext *csc, int32_t srcStart, int32_t srcLimit,
+ icu::Edits *edits, UErrorCode &errorCode) {
+ const int8_t *latinToLower;
+ if (caseLocale == UCASE_LOC_ROOT ||
+ (caseLocale >= 0 ?
+ !(caseLocale == UCASE_LOC_TURKISH || caseLocale == UCASE_LOC_LITHUANIAN) :
+ (options & _FOLD_CASE_OPTIONS_MASK) == U_FOLD_CASE_DEFAULT)) {
+ latinToLower = LatinCase::TO_LOWER_NORMAL;
+ } else {
+ latinToLower = LatinCase::TO_LOWER_TR_LT;
+ }
+ const UTrie2 *trie = ucase_getTrie();
+ int32_t destIndex = 0;
+ int32_t prev = srcStart;
+ int32_t srcIndex = srcStart;
+ for (;;) {
+ // fast path for simple cases
+ UChar lead;
+ while (srcIndex < srcLimit) {
+ lead = src[srcIndex];
+ int32_t delta;
+ if (lead < LatinCase::LONG_S) {
+ int8_t d = latinToLower[lead];
+ if (d == LatinCase::EXC) { break; }
+ ++srcIndex;
+ if (d == 0) { continue; }
+ delta = d;
+ } else if (lead >= 0xd800) {
+ break; // surrogate or higher
+ } else {
+ uint16_t props = UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, lead);
+ if (UCASE_HAS_EXCEPTION(props)) { break; }
+ ++srcIndex;
+ if (!UCASE_IS_UPPER_OR_TITLE(props) || (delta = UCASE_GET_DELTA(props)) == 0) {
+ continue;
+ }
+ }
+ lead += delta;
+ destIndex = appendUnchanged(dest, destIndex, destCapacity,
+ src + prev, srcIndex - 1 - prev, options, edits);
+ if (destIndex >= 0) {
+ destIndex = appendUChar(dest, destIndex, destCapacity, lead);
+ if (edits != nullptr) {
+ edits->addReplace(1, 1);
+ }
+ }
+ if (destIndex < 0) {
+ errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ prev = srcIndex;
+ }
+ if (srcIndex >= srcLimit) {
+ break;
+ }
+ // slow path
+ int32_t cpStart = srcIndex++;
+ UChar trail;
UChar32 c;
- U16_NEXT(src, srcIndex, srcLimit, c);
- csc->cpLimit=srcIndex;
+ if (U16_IS_LEAD(lead) && srcIndex < srcLimit && U16_IS_TRAIL(trail = src[srcIndex])) {
+ c = U16_GET_SUPPLEMENTARY(lead, trail);
+ ++srcIndex;
+ } else {
+ c = lead;
+ }
const UChar *s;
- c=map(c, utf16_caseContextIterator, csc, &s, caseLocale);
- destIndex = appendResult(dest, destIndex, destCapacity, c, s,
- srcIndex - cpStart, options, edits);
- if (destIndex < 0) {
- errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
+ if (caseLocale >= 0) {
+ csc->cpStart = cpStart;
+ csc->cpLimit = srcIndex;
+ c = ucase_toFullLower(c, utf16_caseContextIterator, csc, &s, caseLocale);
+ } else {
+ c = ucase_toFullFolding(c, &s, options);
}
+ if (c >= 0) {
+ destIndex = appendUnchanged(dest, destIndex, destCapacity,
+ src + prev, cpStart - prev, options, edits);
+ if (destIndex >= 0) {
+ destIndex = appendResult(dest, destIndex, destCapacity, c, s,
+ srcIndex - cpStart, options, edits);
+ }
+ if (destIndex < 0) {
+ errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ prev = srcIndex;
+ }
+ }
+ destIndex = appendUnchanged(dest, destIndex, destCapacity,
+ src + prev, srcIndex - prev, options, edits);
+ if (destIndex < 0) {
+ errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
}
+ return destIndex;
+}
+int32_t toUpper(int32_t caseLocale, uint32_t options,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, UCaseContext *csc, int32_t srcLength,
+ icu::Edits *edits, UErrorCode &errorCode) {
+ const int8_t *latinToUpper;
+ if (caseLocale == UCASE_LOC_TURKISH) {
+ latinToUpper = LatinCase::TO_UPPER_TR;
+ } else {
+ latinToUpper = LatinCase::TO_UPPER_NORMAL;
+ }
+ const UTrie2 *trie = ucase_getTrie();
+ int32_t destIndex = 0;
+ int32_t prev = 0;
+ int32_t srcIndex = 0;
+ for (;;) {
+ // fast path for simple cases
+ UChar lead;
+ while (srcIndex < srcLength) {
+ lead = src[srcIndex];
+ int32_t delta;
+ if (lead < LatinCase::LONG_S) {
+ int8_t d = latinToUpper[lead];
+ if (d == LatinCase::EXC) { break; }
+ ++srcIndex;
+ if (d == 0) { continue; }
+ delta = d;
+ } else if (lead >= 0xd800) {
+ break; // surrogate or higher
+ } else {
+ uint16_t props = UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, lead);
+ if (UCASE_HAS_EXCEPTION(props)) { break; }
+ ++srcIndex;
+ if (UCASE_GET_TYPE(props) != UCASE_LOWER || (delta = UCASE_GET_DELTA(props)) == 0) {
+ continue;
+ }
+ }
+ lead += delta;
+ destIndex = appendUnchanged(dest, destIndex, destCapacity,
+ src + prev, srcIndex - 1 - prev, options, edits);
+ if (destIndex >= 0) {
+ destIndex = appendUChar(dest, destIndex, destCapacity, lead);
+ if (edits != nullptr) {
+ edits->addReplace(1, 1);
+ }
+ }
+ if (destIndex < 0) {
+ errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ prev = srcIndex;
+ }
+ if (srcIndex >= srcLength) {
+ break;
+ }
+ // slow path
+ int32_t cpStart;
+ csc->cpStart = cpStart = srcIndex++;
+ UChar trail;
+ UChar32 c;
+ if (U16_IS_LEAD(lead) && srcIndex < srcLength && U16_IS_TRAIL(trail = src[srcIndex])) {
+ c = U16_GET_SUPPLEMENTARY(lead, trail);
+ ++srcIndex;
+ } else {
+ c = lead;
+ }
+ csc->cpLimit = srcIndex;
+ const UChar *s;
+ c = ucase_toFullUpper(c, utf16_caseContextIterator, csc, &s, caseLocale);
+ if (c >= 0) {
+ destIndex = appendUnchanged(dest, destIndex, destCapacity,
+ src + prev, cpStart - prev, options, edits);
+ if (destIndex >= 0) {
+ destIndex = appendResult(dest, destIndex, destCapacity, c, s,
+ srcIndex - cpStart, options, edits);
+ }
+ if (destIndex < 0) {
+ errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
+ prev = srcIndex;
+ }
+ }
+ destIndex = appendUnchanged(dest, destIndex, destCapacity,
+ src + prev, srcIndex - prev, options, edits);
+ if (destIndex < 0) {
+ errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ return 0;
+ }
return destIndex;
}
+} // namespace
+
+U_NAMESPACE_END
+
+U_NAMESPACE_USE
+
#if !UCONFIG_NO_BREAK_ITERATION
U_CFUNC int32_t U_CALLCONV
@@ -344,11 +508,10 @@ ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *it
if((options&U_TITLECASE_NO_LOWERCASE)==0) {
/* Normal operation: Lowercase the rest of the word. */
destIndex+=
- _caseMap(
- caseLocale, options, ucase_toFullLower,
+ toLower(
+ caseLocale, options,
dest+destIndex, destCapacity-destIndex,
- src, &csc,
- titleLimit, index,
+ src, &csc, titleLimit, index,
edits, errorCode);
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
errorCode=U_ZERO_ERROR;
@@ -1013,8 +1176,8 @@ ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_IT
UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src;
csc.limit=srcLength;
- int32_t destIndex = _caseMap(
- caseLocale, options, ucase_toFullLower,
+ int32_t destIndex = toLower(
+ caseLocale, options,
dest, destCapacity,
src, &csc, 0, srcLength,
edits, errorCode);
@@ -1035,10 +1198,10 @@ ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_IT
UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src;
csc.limit=srcLength;
- destIndex = _caseMap(
- caseLocale, options, ucase_toFullUpper,
+ destIndex = toUpper(
+ caseLocale, options,
dest, destCapacity,
- src, &csc, 0, srcLength,
+ src, &csc, srcLength,
edits, errorCode);
}
return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
@@ -1050,23 +1213,11 @@ ustrcase_internalFold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK
const UChar *src, int32_t srcLength,
icu::Edits *edits,
UErrorCode &errorCode) {
- /* case mapping loop */
- int32_t srcIndex = 0;
- int32_t destIndex = 0;
- while (srcIndex < srcLength) {
- int32_t cpStart = srcIndex;
- UChar32 c;
- U16_NEXT(src, srcIndex, srcLength, c);
- const UChar *s;
- c = ucase_toFullFolding(c, &s, options);
- destIndex = appendResult(dest, destIndex, destCapacity, c, s,
- srcIndex - cpStart, options, edits);
- if (destIndex < 0) {
- errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- }
-
+ int32_t destIndex = toLower(
+ -1, options,
+ dest, destCapacity,
+ src, nullptr, 0, srcLength,
+ edits, errorCode);
return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
}
diff --git a/deps/icu-small/source/common/utf_impl.cpp b/deps/icu-small/source/common/utf_impl.cpp
index f78c566e09..9dd241a12b 100644
--- a/deps/icu-small/source/common/utf_impl.cpp
+++ b/deps/icu-small/source/common/utf_impl.cpp
@@ -238,33 +238,45 @@ utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, U
int32_t i=*pi;
if(U8_IS_TRAIL(c) && i>start) {
uint8_t b1=s[--i];
- if(0xc2<=b1 && b1<0xe0) {
- *pi=i;
- return ((b1-0xc0)<<6)|(c&0x3f);
+ if(U8_IS_LEAD(b1)) {
+ if(b1<0xe0) {
+ *pi=i;
+ return ((b1-0xc0)<<6)|(c&0x3f);
+ } else if(b1<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b1, c) : U8_IS_VALID_LEAD4_AND_T1(b1, c)) {
+ // Truncated 3- or 4-byte sequence.
+ *pi=i;
+ return errorValue(1, strict);
+ }
} else if(U8_IS_TRAIL(b1) && i>start) {
// Extract the value bits from the last trail byte.
c&=0x3f;
uint8_t b2=s[--i];
- if(0xe0<=b2 && b2<0xf0) {
- b2&=0xf;
- if(strict!=-2) {
- if(U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
- *pi=i;
- c=(b2<<12)|((b1&0x3f)<<6)|c;
- if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) {
- return c;
- } else {
- // strict: forbid non-characters like U+fffe
- return errorValue(2, strict);
+ if(0xe0<=b2 && b2<=0xf4) {
+ if(b2<0xf0) {
+ b2&=0xf;
+ if(strict!=-2) {
+ if(U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
+ *pi=i;
+ c=(b2<<12)|((b1&0x3f)<<6)|c;
+ if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) {
+ return c;
+ } else {
+ // strict: forbid non-characters like U+fffe
+ return errorValue(2, strict);
+ }
+ }
+ } else {
+ // strict=-2 -> lenient: allow surrogates
+ b1-=0x80;
+ if((b2>0 || b1>=0x20)) {
+ *pi=i;
+ return (b2<<12)|(b1<<6)|c;
}
}
- } else {
- // strict=-2 -> lenient: allow surrogates
- b1-=0x80;
- if((b2>0 || b1>=0x20)) {
- *pi=i;
- return (b2<<12)|(b1<<6)|c;
- }
+ } else if(U8_IS_VALID_LEAD4_AND_T1(b2, b1)) {
+ // Truncated 4-byte sequence.
+ *pi=i;
+ return errorValue(2, strict);
}
} else if(U8_IS_TRAIL(b2) && i>start) {
uint8_t b3=s[--i];
@@ -281,16 +293,7 @@ utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, U
}
}
}
- } else if(0xf0<=b2 && b2<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b2, b1)) {
- // Truncated 4-byte sequence.
- *pi=i;
- return errorValue(2, strict);
}
- } else if((0xe0<=b1 && b1<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b1, c)) ||
- (0xf0<=b1 && b1<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b1, c))) {
- // Truncated 3- or 4-byte sequence.
- *pi=i;
- return errorValue(1, strict);
}
}
return errorValue(0, strict);
@@ -303,29 +306,23 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i) {
uint8_t c=s[i];
if(U8_IS_TRAIL(c) && i>start) {
uint8_t b1=s[--i];
- if(0xc2<=b1 && b1<0xe0) {
- return i;
+ if(U8_IS_LEAD(b1)) {
+ if(b1<0xe0 ||
+ (b1<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b1, c) : U8_IS_VALID_LEAD4_AND_T1(b1, c))) {
+ return i;
+ }
} else if(U8_IS_TRAIL(b1) && i>start) {
uint8_t b2=s[--i];
- if(0xe0<=b2 && b2<0xf0) {
- if(U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
+ if(0xe0<=b2 && b2<=0xf4) {
+ if(b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b2, b1) : U8_IS_VALID_LEAD4_AND_T1(b2, b1)) {
return i;
}
} else if(U8_IS_TRAIL(b2) && i>start) {
uint8_t b3=s[--i];
- if(0xf0<=b3 && b3<=0xf4) {
- if(U8_IS_VALID_LEAD4_AND_T1(b3, b2)) {
- return i;
- }
+ if(0xf0<=b3 && b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b3, b2)) {
+ return i;
}
- } else if(0xf0<=b2 && b2<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b2, b1)) {
- // Truncated 4-byte sequence.
- return i;
}
- } else if((0xe0<=b1 && b1<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b1, c)) ||
- (0xf0<=b1 && b1<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b1, c))) {
- // Truncated 3- or 4-byte sequence.
- return i;
}
}
return orig_i;
diff --git a/deps/icu-small/source/common/utrie.h b/deps/icu-small/source/common/utrie.h
index 9c5382c594..641027a1a3 100644
--- a/deps/icu-small/source/common/utrie.h
+++ b/deps/icu-small/source/common/utrie.h
@@ -556,7 +556,7 @@ struct UNewTrie {
* Index values at build-time are 32 bits wide for easier processing.
* Bit 31 is set if the data block is used by multiple index values (from utrie_setRange()).
*/
- int32_t index[UTRIE_MAX_INDEX_LENGTH];
+ int32_t index[UTRIE_MAX_INDEX_LENGTH+UTRIE_SURROGATE_BLOCK_COUNT];
uint32_t *data;
uint32_t leadUnitValue;
diff --git a/deps/icu-small/source/common/uts46.cpp b/deps/icu-small/source/common/uts46.cpp
index 9b8d3ded2f..5a23572eb6 100644
--- a/deps/icu-small/source/common/uts46.cpp
+++ b/deps/icu-small/source/common/uts46.cpp
@@ -1126,7 +1126,6 @@ isASCIIOkBiDi(const char *s, int32_t length) {
UBool
UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const {
- const UBiDiProps *bdp=ubidi_getSingleton();
// [IDNA2008-Tables]
// 200C..200D ; CONTEXTJ # ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
for(int32_t i=0; i<labelLength; ++i) {
@@ -1148,7 +1147,7 @@ UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const {
}
// check precontext (Joining_Type:{L,D})(Joining_Type:T)*
for(;;) {
- UJoiningType type=ubidi_getJoiningType(bdp, c);
+ UJoiningType type=ubidi_getJoiningType(c);
if(type==U_JT_TRANSPARENT) {
if(j==0) {
return FALSE;
@@ -1166,7 +1165,7 @@ UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const {
return FALSE;
}
U16_NEXT_UNSAFE(label, j, c);
- UJoiningType type=ubidi_getJoiningType(bdp, c);
+ UJoiningType type=ubidi_getJoiningType(c);
if(type==U_JT_TRANSPARENT) {
// just skip this character
} else if(type==U_JT_RIGHT_JOINING || type==U_JT_DUAL_JOINING) {
diff --git a/deps/icu-small/source/common/utypes.cpp b/deps/icu-small/source/common/utypes.cpp
index 8f5791be16..5d6a0504ba 100644
--- a/deps/icu-small/source/common/utypes.cpp
+++ b/deps/icu-small/source/common/utypes.cpp
@@ -125,7 +125,8 @@ _uFmtErrorName[U_FMT_PARSE_ERROR_LIMIT - U_FMT_PARSE_ERROR_START] = {
"U_UNDEFINED_KEYWORD",
"U_DEFAULT_KEYWORD_MISSING",
"U_DECIMAL_NUMBER_SYNTAX_ERROR",
- "U_FORMAT_INEXACT_ERROR"
+ "U_FORMAT_INEXACT_ERROR",
+ "U_NUMBER_ARG_OUTOFBOUNDS_ERROR"
};
static const char * const