diff options
Diffstat (limited to 'deps/icu-small/source/common')
149 files changed, 5923 insertions, 1472 deletions
diff --git a/deps/icu-small/source/common/brkeng.cpp b/deps/icu-small/source/common/brkeng.cpp index 42771b3617..68c74f2359 100644 --- a/deps/icu-small/source/common/brkeng.cpp +++ b/deps/icu-small/source/common/brkeng.cpp @@ -129,7 +129,7 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c) { const LanguageBreakEngine *lbe = NULL; UErrorCode status = U_ZERO_ERROR; - static UMutex gBreakEngineMutex = U_MUTEX_INITIALIZER; + static UMutex gBreakEngineMutex; Mutex m(&gBreakEngineMutex); if (fEngines == NULL) { diff --git a/deps/icu-small/source/common/brkiter.cpp b/deps/icu-small/source/common/brkiter.cpp index 23e0cc3c15..2fc4c345c7 100644 --- a/deps/icu-small/source/common/brkiter.cpp +++ b/deps/icu-small/source/common/brkiter.cpp @@ -277,7 +277,7 @@ ICUBreakIteratorService::~ICUBreakIteratorService() {} // defined in ucln_cmn.h U_NAMESPACE_END -static icu::UInitOnce gInitOnceBrkiter; +static icu::UInitOnce gInitOnceBrkiter = U_INITONCE_INITIALIZER; static icu::ICULocaleService* gService = NULL; diff --git a/deps/icu-small/source/common/bytesinkutil.h b/deps/icu-small/source/common/bytesinkutil.h index 69e4cbcd26..6808fbe677 100644 --- a/deps/icu-small/source/common/bytesinkutil.h +++ b/deps/icu-small/source/common/bytesinkutil.h @@ -59,7 +59,7 @@ private: ByteSink &sink, uint32_t options, Edits *edits); }; -class CharStringByteSink : public ByteSink { +class U_COMMON_API CharStringByteSink : public ByteSink { public: CharStringByteSink(CharString* dest); ~CharStringByteSink() override; diff --git a/deps/icu-small/source/common/characterproperties.cpp b/deps/icu-small/source/common/characterproperties.cpp index 5a57364375..7b50a4e205 100644 --- a/deps/icu-small/source/common/characterproperties.cpp +++ b/deps/icu-small/source/common/characterproperties.cpp @@ -38,8 +38,8 @@ UBool U_CALLCONV characterproperties_cleanup(); constexpr int32_t NUM_INCLUSIONS = UPROPS_SRC_COUNT + UCHAR_INT_LIMIT - UCHAR_INT_START; struct Inclusion { - UnicodeSet *fSet; - UInitOnce fInitOnce; + UnicodeSet *fSet = nullptr; + UInitOnce fInitOnce = U_INITONCE_INITIALIZER; }; Inclusion gInclusions[NUM_INCLUSIONS]; // cached getInclusions() @@ -47,10 +47,7 @@ UnicodeSet *sets[UCHAR_BINARY_LIMIT] = {}; UCPMap *maps[UCHAR_INT_LIMIT - UCHAR_INT_START] = {}; -icu::UMutex *cpMutex() { - static icu::UMutex m = U_MUTEX_INITIALIZER; - return &m; -} +icu::UMutex cpMutex; //---------------------------------------------------------------- // Inclusions list @@ -361,7 +358,7 @@ u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode) { *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; return nullptr; } - Mutex m(cpMutex()); + Mutex m(&cpMutex); UnicodeSet *set = sets[property]; if (set == nullptr) { sets[property] = set = makeSet(property, *pErrorCode); @@ -377,7 +374,7 @@ u_getIntPropertyMap(UProperty property, UErrorCode *pErrorCode) { *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; return nullptr; } - Mutex m(cpMutex()); + Mutex m(&cpMutex); UCPMap *map = maps[property - UCHAR_INT_START]; if (map == nullptr) { maps[property - UCHAR_INT_START] = map = makeMap(property, *pErrorCode); diff --git a/deps/icu-small/source/common/charstr.cpp b/deps/icu-small/source/common/charstr.cpp index 852cc53945..dda29dac63 100644 --- a/deps/icu-small/source/common/charstr.cpp +++ b/deps/icu-small/source/common/charstr.cpp @@ -35,6 +35,17 @@ CharString& CharString::operator=(CharString&& src) U_NOEXCEPT { return *this; } +char *CharString::cloneData(UErrorCode &errorCode) const { + if (U_FAILURE(errorCode)) { return nullptr; } + char *p = static_cast<char *>(uprv_malloc(len + 1)); + if (p == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + uprv_memcpy(p, buffer.getAlias(), len + 1); + return p; +} + CharString &CharString::copyFrom(const CharString &s, UErrorCode &errorCode) { if(U_SUCCESS(errorCode) && this!=&s && ensureCapacity(s.len+1, 0, errorCode)) { len=s.len; @@ -52,6 +63,18 @@ int32_t CharString::lastIndexOf(char c) const { return -1; } +bool CharString::contains(StringPiece s) const { + if (s.empty()) { return false; } + const char *p = buffer.getAlias(); + int32_t lastStart = len - s.length(); + for (int32_t i = 0; i <= lastStart; ++i) { + if (uprv_memcmp(p + i, s.data(), s.length()) == 0) { + return true; + } + } + return false; +} + CharString &CharString::truncate(int32_t newLength) { if(newLength<0) { newLength=0; diff --git a/deps/icu-small/source/common/charstr.h b/deps/icu-small/source/common/charstr.h index 1a97e01988..23b950ed6e 100644 --- a/deps/icu-small/source/common/charstr.h +++ b/deps/icu-small/source/common/charstr.h @@ -82,10 +82,24 @@ public: const char *data() const { return buffer.getAlias(); } char *data() { return buffer.getAlias(); } + /** + * Allocates length()+1 chars and copies the NUL-terminated data(). + * The caller must uprv_free() the result. + */ + char *cloneData(UErrorCode &errorCode) const; + + bool operator==(StringPiece other) const { + return len == other.length() && (len == 0 || uprv_memcmp(data(), other.data(), len) == 0); + } + bool operator!=(StringPiece other) const { + return !operator==(other); + } /** @return last index of c, or -1 if c is not in this string */ int32_t lastIndexOf(char c) const; + bool contains(StringPiece s) const; + CharString &clear() { len=0; buffer[0]=0; return *this; } CharString &truncate(int32_t newLength); diff --git a/deps/icu-small/source/common/cmemory.h b/deps/icu-small/source/common/cmemory.h index f501b20a14..b24bd0ead2 100644 --- a/deps/icu-small/source/common/cmemory.h +++ b/deps/icu-small/source/common/cmemory.h @@ -65,37 +65,36 @@ U_CAPI void * U_EXPORT2 uprv_calloc(size_t num, size_t size) U_MALLOC_ATTR U_ALLOC_SIZE_ATTR2(1,2); /** - * This should align the memory properly on any machine. - * This is very useful for the safeClone functions. - */ -typedef union { - long t1; - double t2; - void *t3; -} UAlignedMemory; - -/** * Get the least significant bits of a pointer (a memory address). * For example, with a mask of 3, the macro gets the 2 least significant bits, * which will be 0 if the pointer is 32-bit (4-byte) aligned. * - * ptrdiff_t is the most appropriate integer type to cast to. - * size_t should work too, since on most (or all?) platforms it has the same - * width as ptrdiff_t. + * uintptr_t is the most appropriate integer type to cast to. */ -#define U_POINTER_MASK_LSB(ptr, mask) (((ptrdiff_t)(char *)(ptr)) & (mask)) +#define U_POINTER_MASK_LSB(ptr, mask) ((uintptr_t)(ptr) & (mask)) /** - * Get the amount of bytes that a pointer is off by from - * the previous UAlignedMemory-aligned pointer. - */ -#define U_ALIGNMENT_OFFSET(ptr) U_POINTER_MASK_LSB(ptr, sizeof(UAlignedMemory) - 1) - -/** - * Get the amount of bytes to add to a pointer - * in order to get the next UAlignedMemory-aligned address. + * Create & return an instance of "type" in statically allocated storage. + * e.g. + * static std::mutex *myMutex = STATIC_NEW(std::mutex); + * To destroy an object created in this way, invoke the destructor explicitly, e.g. + * myMutex->~mutex(); + * DO NOT use delete. + * DO NOT use with class UMutex, which has specific support for static instances. + * + * STATIC_NEW is intended for use when + * - We want a static (or global) object. + * - We don't want it to ever be destructed, or to explicitly control destruction, + * to avoid use-after-destruction problems. + * - We want to avoid an ordinary heap allocated object, + * to avoid the possibility of memory allocation failures, and + * to avoid memory leak reports, from valgrind, for example. + * This is defined as a macro rather than a template function because each invocation + * must define distinct static storage for the object being returned. */ -#define U_ALIGNMENT_OFFSET_UP(ptr) (sizeof(UAlignedMemory) - U_ALIGNMENT_OFFSET(ptr)) +#define STATIC_NEW(type) [] () { \ + alignas(type) static char storage[sizeof(type)]; \ + return new(storage) type();} () /** * Heap clean up function, called from u_cleanup() diff --git a/deps/icu-small/source/common/edits.cpp b/deps/icu-small/source/common/edits.cpp index 00a8d601a1..95f0c19a72 100644 --- a/deps/icu-small/source/common/edits.cpp +++ b/deps/icu-small/source/common/edits.cpp @@ -243,7 +243,7 @@ UBool Edits::growArray() { return TRUE; } -UBool Edits::copyErrorTo(UErrorCode &outErrorCode) { +UBool Edits::copyErrorTo(UErrorCode &outErrorCode) const { if (U_FAILURE(outErrorCode)) { return TRUE; } if (U_SUCCESS(errorCode_)) { return FALSE; } outErrorCode = errorCode_; diff --git a/deps/icu-small/source/common/filteredbrk.cpp b/deps/icu-small/source/common/filteredbrk.cpp index 162b38de5d..ae7cf5270a 100644 --- a/deps/icu-small/source/common/filteredbrk.cpp +++ b/deps/icu-small/source/common/filteredbrk.cpp @@ -173,7 +173,7 @@ public: status = U_SAFECLONE_ALLOCATED_WARNING; return clone(); } - virtual BreakIterator* clone(void) const { return new SimpleFilteredSentenceBreakIterator(*this); } + virtual SimpleFilteredSentenceBreakIterator* clone() const { return new SimpleFilteredSentenceBreakIterator(*this); } virtual UClassID getDynamicClassID(void) const { return NULL; } virtual UBool operator==(const BreakIterator& o) const { if(this==&o) return true; return false; } diff --git a/deps/icu-small/source/common/localebuilder.cpp b/deps/icu-small/source/common/localebuilder.cpp index fe931fcf75..1dd8131e58 100644 --- a/deps/icu-small/source/common/localebuilder.cpp +++ b/deps/icu-small/source/common/localebuilder.cpp @@ -157,13 +157,18 @@ _isKeywordValue(const char* key, const char* value, int32_t value_len) } static void -_copyExtensions(const Locale& from, Locale* to, bool validate, UErrorCode& errorCode) +_copyExtensions(const Locale& from, icu::StringEnumeration *keywords, + Locale& to, bool validate, UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { return; } - LocalPointer<icu::StringEnumeration> iter(from.createKeywords(errorCode)); - if (U_FAILURE(errorCode) || iter.isNull()) { return; } + LocalPointer<icu::StringEnumeration> ownedKeywords; + if (keywords == nullptr) { + ownedKeywords.adoptInstead(from.createKeywords(errorCode)); + if (U_FAILURE(errorCode) || ownedKeywords.isNull()) { return; } + keywords = ownedKeywords.getAlias(); + } const char* key; - while ((key = iter->next(nullptr, errorCode)) != nullptr) { + while ((key = keywords->next(nullptr, errorCode)) != nullptr) { CharString value; CharStringByteSink sink(&value); from.getKeywordValue(key, sink, errorCode); @@ -176,34 +181,34 @@ _copyExtensions(const Locale& from, Locale* to, bool validate, UErrorCode& error errorCode = U_ILLEGAL_ARGUMENT_ERROR; return; } - to->setKeywordValue(key, value.data(), errorCode); + to.setKeywordValue(key, value.data(), errorCode); if (U_FAILURE(errorCode)) { return; } } } void static -_clearUAttributesAndKeyType(Locale* locale, UErrorCode& errorCode) +_clearUAttributesAndKeyType(Locale& locale, UErrorCode& errorCode) { // Clear Unicode attributes - locale->setKeywordValue(kAttributeKey, "", errorCode); + locale.setKeywordValue(kAttributeKey, "", errorCode); // Clear all Unicode keyword values - LocalPointer<icu::StringEnumeration> iter(locale->createUnicodeKeywords(errorCode)); + LocalPointer<icu::StringEnumeration> iter(locale.createUnicodeKeywords(errorCode)); if (U_FAILURE(errorCode) || iter.isNull()) { return; } const char* key; while ((key = iter->next(nullptr, errorCode)) != nullptr) { - locale->setUnicodeKeywordValue(key, nullptr, errorCode); + locale.setUnicodeKeywordValue(key, nullptr, errorCode); } } static void -_setUnicodeExtensions(Locale* locale, const CharString& value, UErrorCode& errorCode) +_setUnicodeExtensions(Locale& locale, const CharString& value, UErrorCode& errorCode) { // Add the unicode extensions to extensions_ CharString locale_str("und-u-", errorCode); locale_str.append(value, errorCode); _copyExtensions( - Locale::forLanguageTag(locale_str.data(), errorCode), + Locale::forLanguageTag(locale_str.data(), errorCode), nullptr, locale, false, errorCode); } @@ -235,10 +240,10 @@ LocaleBuilder& LocaleBuilder::setExtension(char key, StringPiece value) status_); return *this; } - _clearUAttributesAndKeyType(extensions_, status_); + _clearUAttributesAndKeyType(*extensions_, status_); if (U_FAILURE(status_)) { return *this; } if (!value.empty()) { - _setUnicodeExtensions(extensions_, value_str, status_); + _setUnicodeExtensions(*extensions_, value_str, status_); } return *this; } @@ -401,6 +406,24 @@ Locale makeBogusLocale() { return bogus; } +void LocaleBuilder::copyExtensionsFrom(const Locale& src, UErrorCode& errorCode) +{ + if (U_FAILURE(errorCode)) { return; } + LocalPointer<icu::StringEnumeration> keywords(src.createKeywords(errorCode)); + if (U_FAILURE(errorCode) || keywords.isNull() || keywords->count(errorCode) == 0) { + // Error, or no extensions to copy. + return; + } + if (extensions_ == nullptr) { + extensions_ = new Locale(); + if (extensions_ == nullptr) { + status_ = U_MEMORY_ALLOCATION_ERROR; + return; + } + } + _copyExtensions(src, keywords.getAlias(), *extensions_, false, errorCode); +} + Locale LocaleBuilder::build(UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { @@ -425,7 +448,7 @@ Locale LocaleBuilder::build(UErrorCode& errorCode) } Locale product(locale_str.data()); if (extensions_ != nullptr) { - _copyExtensions(*extensions_, &product, true, errorCode); + _copyExtensions(*extensions_, nullptr, product, true, errorCode); } if (U_FAILURE(errorCode)) { return makeBogusLocale(); @@ -433,4 +456,13 @@ Locale LocaleBuilder::build(UErrorCode& errorCode) return product; } +UBool LocaleBuilder::copyErrorTo(UErrorCode &outErrorCode) const { + if (U_FAILURE(outErrorCode)) { + // Do not overwrite the older error code + return TRUE; + } + outErrorCode = status_; + return U_FAILURE(outErrorCode); +} + U_NAMESPACE_END diff --git a/deps/icu-small/source/common/localematcher.cpp b/deps/icu-small/source/common/localematcher.cpp new file mode 100644 index 0000000000..d975fe759b --- /dev/null +++ b/deps/icu-small/source/common/localematcher.cpp @@ -0,0 +1,720 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License + +// localematcher.cpp +// created: 2019may08 Markus W. Scherer + +#ifndef __LOCMATCHER_H__ +#define __LOCMATCHER_H__ + +#include "unicode/utypes.h" +#include "unicode/localebuilder.h" +#include "unicode/localematcher.h" +#include "unicode/locid.h" +#include "unicode/stringpiece.h" +#include "unicode/uobject.h" +#include "cstring.h" +#include "localeprioritylist.h" +#include "loclikelysubtags.h" +#include "locdistance.h" +#include "lsr.h" +#include "uassert.h" +#include "uhash.h" +#include "uvector.h" + +#define UND_LSR LSR("und", "", "") + +/** + * Indicator for the lifetime of desired-locale objects passed into the LocaleMatcher. + * + * @draft ICU 65 + */ +enum ULocMatchLifetime { + /** + * Locale objects are temporary. + * The matcher will make a copy of a locale that will be used beyond one function call. + * + * @draft ICU 65 + */ + ULOCMATCH_TEMPORARY_LOCALES, + /** + * Locale objects are stored at least as long as the matcher is used. + * The matcher will keep only a pointer to a locale that will be used beyond one function call, + * avoiding a copy. + * + * @draft ICU 65 + */ + ULOCMATCH_STORED_LOCALES // TODO: permanent? cached? clone? +}; +#ifndef U_IN_DOXYGEN +typedef enum ULocMatchLifetime ULocMatchLifetime; +#endif + +U_NAMESPACE_BEGIN + +LocaleMatcher::Result::Result(LocaleMatcher::Result &&src) U_NOEXCEPT : + desiredLocale(src.desiredLocale), + supportedLocale(src.supportedLocale), + desiredIndex(src.desiredIndex), + supportedIndex(src.supportedIndex), + desiredIsOwned(src.desiredIsOwned) { + if (desiredIsOwned) { + src.desiredLocale = nullptr; + src.desiredIndex = -1; + src.desiredIsOwned = FALSE; + } +} + +LocaleMatcher::Result::~Result() { + if (desiredIsOwned) { + delete desiredLocale; + } +} + +LocaleMatcher::Result &LocaleMatcher::Result::operator=(LocaleMatcher::Result &&src) U_NOEXCEPT { + this->~Result(); + + desiredLocale = src.desiredLocale; + supportedLocale = src.supportedLocale; + desiredIndex = src.desiredIndex; + supportedIndex = src.supportedIndex; + desiredIsOwned = src.desiredIsOwned; + + if (desiredIsOwned) { + src.desiredLocale = nullptr; + src.desiredIndex = -1; + src.desiredIsOwned = FALSE; + } + return *this; +} + +Locale LocaleMatcher::Result::makeResolvedLocale(UErrorCode &errorCode) const { + if (U_FAILURE(errorCode) || supportedLocale == nullptr) { + return Locale::getRoot(); + } + const Locale *bestDesired = getDesiredLocale(); + if (bestDesired == nullptr || *supportedLocale == *bestDesired) { + return *supportedLocale; + } + LocaleBuilder b; + b.setLocale(*supportedLocale); + + // Copy the region from bestDesired, if there is one. + const char *region = bestDesired->getCountry(); + if (*region != 0) { + b.setRegion(region); + } + + // Copy the variants from bestDesired, if there are any. + // Note that this will override any supportedLocale variants. + // For example, "sco-ulster-fonipa" + "...-fonupa" => "sco-fonupa" (replacing ulster). + const char *variants = bestDesired->getVariant(); + if (*variants != 0) { + b.setVariant(variants); + } + + // Copy the extensions from bestDesired, if there are any. + // C++ note: The following note, copied from Java, may not be true, + // as long as C++ copies by legacy ICU keyword, not by extension singleton. + // Note that this will override any supportedLocale extensions. + // For example, "th-u-nu-latn-ca-buddhist" + "...-u-nu-native" => "th-u-nu-native" + // (replacing calendar). + b.copyExtensionsFrom(*bestDesired, errorCode); + return b.build(errorCode); +} + +LocaleMatcher::Builder::Builder(LocaleMatcher::Builder &&src) U_NOEXCEPT : + errorCode_(src.errorCode_), + supportedLocales_(src.supportedLocales_), + thresholdDistance_(src.thresholdDistance_), + demotion_(src.demotion_), + defaultLocale_(src.defaultLocale_), + favor_(src.favor_) { + src.supportedLocales_ = nullptr; + src.defaultLocale_ = nullptr; +} + +LocaleMatcher::Builder::~Builder() { + delete supportedLocales_; + delete defaultLocale_; +} + +LocaleMatcher::Builder &LocaleMatcher::Builder::operator=(LocaleMatcher::Builder &&src) U_NOEXCEPT { + this->~Builder(); + + errorCode_ = src.errorCode_; + supportedLocales_ = src.supportedLocales_; + thresholdDistance_ = src.thresholdDistance_; + demotion_ = src.demotion_; + defaultLocale_ = src.defaultLocale_; + favor_ = src.favor_; + + src.supportedLocales_ = nullptr; + src.defaultLocale_ = nullptr; + return *this; +} + +void LocaleMatcher::Builder::clearSupportedLocales() { + if (supportedLocales_ != nullptr) { + supportedLocales_->removeAllElements(); + } +} + +bool LocaleMatcher::Builder::ensureSupportedLocaleVector() { + if (U_FAILURE(errorCode_)) { return false; } + if (supportedLocales_ != nullptr) { return true; } + supportedLocales_ = new UVector(uprv_deleteUObject, nullptr, errorCode_); + if (U_FAILURE(errorCode_)) { return false; } + if (supportedLocales_ == nullptr) { + errorCode_ = U_MEMORY_ALLOCATION_ERROR; + return false; + } + return true; +} + +LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocalesFromListString( + StringPiece locales) { + LocalePriorityList list(locales, errorCode_); + if (U_FAILURE(errorCode_)) { return *this; } + clearSupportedLocales(); + if (!ensureSupportedLocaleVector()) { return *this; } + int32_t length = list.getLengthIncludingRemoved(); + for (int32_t i = 0; i < length; ++i) { + Locale *locale = list.orphanLocaleAt(i); + if (locale == nullptr) { continue; } + supportedLocales_->addElement(locale, errorCode_); + if (U_FAILURE(errorCode_)) { + delete locale; + break; + } + } + return *this; +} + +LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocales(Locale::Iterator &locales) { + if (U_FAILURE(errorCode_)) { return *this; } + clearSupportedLocales(); + if (!ensureSupportedLocaleVector()) { return *this; } + while (locales.hasNext()) { + const Locale &locale = locales.next(); + Locale *clone = locale.clone(); + if (clone == nullptr) { + errorCode_ = U_MEMORY_ALLOCATION_ERROR; + break; + } + supportedLocales_->addElement(clone, errorCode_); + if (U_FAILURE(errorCode_)) { + delete clone; + break; + } + } + return *this; +} + +LocaleMatcher::Builder &LocaleMatcher::Builder::addSupportedLocale(const Locale &locale) { + if (!ensureSupportedLocaleVector()) { return *this; } + Locale *clone = locale.clone(); + if (clone == nullptr) { + errorCode_ = U_MEMORY_ALLOCATION_ERROR; + return *this; + } + supportedLocales_->addElement(clone, errorCode_); + if (U_FAILURE(errorCode_)) { + delete clone; + } + return *this; +} + +LocaleMatcher::Builder &LocaleMatcher::Builder::setDefaultLocale(const Locale *defaultLocale) { + if (U_FAILURE(errorCode_)) { return *this; } + Locale *clone = nullptr; + if (defaultLocale != nullptr) { + clone = defaultLocale->clone(); + if (clone == nullptr) { + errorCode_ = U_MEMORY_ALLOCATION_ERROR; + return *this; + } + } + delete defaultLocale_; + defaultLocale_ = clone; + return *this; +} + +LocaleMatcher::Builder &LocaleMatcher::Builder::setFavorSubtag(ULocMatchFavorSubtag subtag) { + if (U_FAILURE(errorCode_)) { return *this; } + favor_ = subtag; + return *this; +} + +LocaleMatcher::Builder &LocaleMatcher::Builder::setDemotionPerDesiredLocale(ULocMatchDemotion demotion) { + if (U_FAILURE(errorCode_)) { return *this; } + demotion_ = demotion; + return *this; +} + +#if 0 +/** + * <i>Internal only!</i> + * + * @param thresholdDistance the thresholdDistance to set, with -1 = default + * @return this Builder object + * @internal + * @deprecated This API is ICU internal only. + */ +@Deprecated +LocaleMatcher::Builder &LocaleMatcher::Builder::internalSetThresholdDistance(int32_t thresholdDistance) { + if (U_FAILURE(errorCode_)) { return *this; } + if (thresholdDistance > 100) { + thresholdDistance = 100; + } + thresholdDistance_ = thresholdDistance; + return *this; +} +#endif + +UBool LocaleMatcher::Builder::copyErrorTo(UErrorCode &outErrorCode) const { + if (U_FAILURE(outErrorCode)) { return TRUE; } + if (U_SUCCESS(errorCode_)) { return FALSE; } + outErrorCode = errorCode_; + return TRUE; +} + +LocaleMatcher LocaleMatcher::Builder::build(UErrorCode &errorCode) const { + if (U_SUCCESS(errorCode) && U_FAILURE(errorCode_)) { + errorCode = errorCode_; + } + return LocaleMatcher(*this, errorCode); +} + +namespace { + +LSR getMaximalLsrOrUnd(const XLikelySubtags &likelySubtags, const Locale &locale, + UErrorCode &errorCode) { + if (U_FAILURE(errorCode) || locale.isBogus() || *locale.getName() == 0 /* "und" */) { + return UND_LSR; + } else { + return likelySubtags.makeMaximizedLsrFrom(locale, errorCode); + } +} + +int32_t hashLSR(const UHashTok token) { + const LSR *lsr = static_cast<const LSR *>(token.pointer); + return lsr->hashCode; +} + +UBool compareLSRs(const UHashTok t1, const UHashTok t2) { + const LSR *lsr1 = static_cast<const LSR *>(t1.pointer); + const LSR *lsr2 = static_cast<const LSR *>(t2.pointer); + return *lsr1 == *lsr2; +} + +bool putIfAbsent(UHashtable *lsrToIndex, const LSR &lsr, int32_t i, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return false; } + U_ASSERT(i > 0); + int32_t index = uhash_geti(lsrToIndex, &lsr); + if (index != 0) { + return false; + } else { + uhash_puti(lsrToIndex, const_cast<LSR *>(&lsr), i, &errorCode); + return U_SUCCESS(errorCode); + } +} + +} // namespace + +LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) : + likelySubtags(*XLikelySubtags::getSingleton(errorCode)), + localeDistance(*LocaleDistance::getSingleton(errorCode)), + thresholdDistance(builder.thresholdDistance_), + demotionPerDesiredLocale(0), + favorSubtag(builder.favor_), + supportedLocales(nullptr), lsrs(nullptr), supportedLocalesLength(0), + supportedLsrToIndex(nullptr), + supportedLSRs(nullptr), supportedIndexes(nullptr), supportedLSRsLength(0), + ownedDefaultLocale(nullptr), defaultLocale(nullptr), defaultLocaleIndex(-1) { + if (U_FAILURE(errorCode)) { return; } + if (thresholdDistance < 0) { + thresholdDistance = localeDistance.getDefaultScriptDistance(); + } + supportedLocalesLength = builder.supportedLocales_ != nullptr ? + builder.supportedLocales_->size() : 0; + const Locale *def = builder.defaultLocale_; + int32_t idef = -1; + if (supportedLocalesLength > 0) { + // Store the supported locales in input order, + // so that when different types are used (e.g., language tag strings) + // we can return those by parallel index. + supportedLocales = static_cast<const Locale **>( + uprv_malloc(supportedLocalesLength * sizeof(const Locale *))); + // Supported LRSs in input order. + // In C++, we store these permanently to simplify ownership management + // in the hash tables. Duplicate LSRs (if any) are unused overhead. + lsrs = new LSR[supportedLocalesLength]; + if (supportedLocales == nullptr || lsrs == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + // If the constructor fails partway, we need null pointers for destructibility. + uprv_memset(supportedLocales, 0, supportedLocalesLength * sizeof(const Locale *)); + // Also find the first supported locale whose LSR is + // the same as that for the default locale. + LSR builderDefaultLSR; + const LSR *defLSR = nullptr; + if (def != nullptr) { + builderDefaultLSR = getMaximalLsrOrUnd(likelySubtags, *def, errorCode); + if (U_FAILURE(errorCode)) { return; } + defLSR = &builderDefaultLSR; + } + for (int32_t i = 0; i < supportedLocalesLength; ++i) { + const Locale &locale = *static_cast<Locale *>(builder.supportedLocales_->elementAt(i)); + supportedLocales[i] = locale.clone(); + if (supportedLocales[i] == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + const Locale &supportedLocale = *supportedLocales[i]; + LSR &lsr = lsrs[i] = getMaximalLsrOrUnd(likelySubtags, supportedLocale, errorCode); + lsr.setHashCode(); + if (U_FAILURE(errorCode)) { return; } + if (idef < 0 && defLSR != nullptr && lsr == *defLSR) { + idef = i; + defLSR = &lsr; // owned pointer to put into supportedLsrToIndex + if (*def == supportedLocale) { + def = &supportedLocale; // owned pointer to keep + } + } + } + + // We need an unordered map from LSR to first supported locale with that LSR, + // and an ordered list of (LSR, supported index). + // We insert the supported locales in the following order: + // 1. Default locale, if it is supported. + // 2. Priority locales (aka "paradigm locales") in builder order. + // 3. Remaining locales in builder order. + // In Java, we use a LinkedHashMap for both map & ordered lists. + // In C++, we use separate structures. + // We over-allocate arrays of LSRs and indexes for simplicity. + // We reserve slots at the array starts for the default and paradigm locales, + // plus enough for all supported locales. + // If there are few paradigm locales and few duplicate supported LSRs, + // then the amount of wasted space is small. + supportedLsrToIndex = uhash_openSize(hashLSR, compareLSRs, uhash_compareLong, + supportedLocalesLength, &errorCode); + if (U_FAILURE(errorCode)) { return; } + int32_t paradigmLimit = 1 + localeDistance.getParadigmLSRsLength(); + int32_t suppLSRsCapacity = paradigmLimit + supportedLocalesLength; + supportedLSRs = static_cast<const LSR **>( + uprv_malloc(suppLSRsCapacity * sizeof(const LSR *))); + supportedIndexes = static_cast<int32_t *>( + uprv_malloc(suppLSRsCapacity * sizeof(int32_t))); + if (supportedLSRs == nullptr || supportedIndexes == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + int32_t paradigmIndex = 0; + int32_t otherIndex = paradigmLimit; + if (idef >= 0) { + uhash_puti(supportedLsrToIndex, const_cast<LSR *>(defLSR), idef + 1, &errorCode); + supportedLSRs[0] = defLSR; + supportedIndexes[0] = idef; + paradigmIndex = 1; + } + for (int32_t i = 0; i < supportedLocalesLength; ++i) { + if (i == idef) { continue; } + const Locale &locale = *supportedLocales[i]; + const LSR &lsr = lsrs[i]; + if (defLSR == nullptr) { + U_ASSERT(i == 0); + def = &locale; + defLSR = &lsr; + idef = 0; + uhash_puti(supportedLsrToIndex, const_cast<LSR *>(&lsr), 0 + 1, &errorCode); + supportedLSRs[0] = &lsr; + supportedIndexes[0] = 0; + paradigmIndex = 1; + } else if (idef >= 0 && lsr == *defLSR) { + // lsr == *defLSR means that this supported locale is + // a duplicate of the default locale. + // Either an explicit default locale is supported, and we added it before the loop, + // or there is no explicit default locale, and this is + // a duplicate of the first supported locale. + // In both cases, idef >= 0 now, so otherwise we can skip the comparison. + // For a duplicate, putIfAbsent() is a no-op, so nothing to do. + } else { + if (putIfAbsent(supportedLsrToIndex, lsr, i + 1, errorCode)) { + if (localeDistance.isParadigmLSR(lsr)) { + supportedLSRs[paradigmIndex] = &lsr; + supportedIndexes[paradigmIndex++] = i; + } else { + supportedLSRs[otherIndex] = &lsr; + supportedIndexes[otherIndex++] = i; + } + } + } + if (U_FAILURE(errorCode)) { return; } + } + // Squeeze out unused array slots. + if (paradigmIndex < paradigmLimit && paradigmLimit < otherIndex) { + uprv_memmove(supportedLSRs + paradigmIndex, supportedLSRs + paradigmLimit, + (otherIndex - paradigmLimit) * sizeof(const LSR *)); + uprv_memmove(supportedIndexes + paradigmIndex, supportedIndexes + paradigmLimit, + (otherIndex - paradigmLimit) * sizeof(int32_t)); + } + supportedLSRsLength = otherIndex - (paradigmLimit - paradigmIndex); + } + + if (def != nullptr && (idef < 0 || def != supportedLocales[idef])) { + ownedDefaultLocale = def->clone(); + if (ownedDefaultLocale == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + def = ownedDefaultLocale; + } + defaultLocale = def; + defaultLocaleIndex = idef; + + if (builder.demotion_ == ULOCMATCH_DEMOTION_REGION) { + demotionPerDesiredLocale = localeDistance.getDefaultDemotionPerDesiredLocale(); + } +} + +LocaleMatcher::LocaleMatcher(LocaleMatcher &&src) U_NOEXCEPT : + likelySubtags(src.likelySubtags), + localeDistance(src.localeDistance), + thresholdDistance(src.thresholdDistance), + demotionPerDesiredLocale(src.demotionPerDesiredLocale), + favorSubtag(src.favorSubtag), + supportedLocales(src.supportedLocales), lsrs(src.lsrs), + supportedLocalesLength(src.supportedLocalesLength), + supportedLsrToIndex(src.supportedLsrToIndex), + supportedLSRs(src.supportedLSRs), + supportedIndexes(src.supportedIndexes), + supportedLSRsLength(src.supportedLSRsLength), + ownedDefaultLocale(src.ownedDefaultLocale), defaultLocale(src.defaultLocale), + defaultLocaleIndex(src.defaultLocaleIndex) { + src.supportedLocales = nullptr; + src.lsrs = nullptr; + src.supportedLocalesLength = 0; + src.supportedLsrToIndex = nullptr; + src.supportedLSRs = nullptr; + src.supportedIndexes = nullptr; + src.supportedLSRsLength = 0; + src.ownedDefaultLocale = nullptr; + src.defaultLocale = nullptr; + src.defaultLocaleIndex = -1; +} + +LocaleMatcher::~LocaleMatcher() { + for (int32_t i = 0; i < supportedLocalesLength; ++i) { + delete supportedLocales[i]; + } + uprv_free(supportedLocales); + delete[] lsrs; + uhash_close(supportedLsrToIndex); + uprv_free(supportedLSRs); + uprv_free(supportedIndexes); + delete ownedDefaultLocale; +} + +LocaleMatcher &LocaleMatcher::operator=(LocaleMatcher &&src) U_NOEXCEPT { + this->~LocaleMatcher(); + + thresholdDistance = src.thresholdDistance; + demotionPerDesiredLocale = src.demotionPerDesiredLocale; + favorSubtag = src.favorSubtag; + supportedLocales = src.supportedLocales; + lsrs = src.lsrs; + supportedLocalesLength = src.supportedLocalesLength; + supportedLsrToIndex = src.supportedLsrToIndex; + supportedLSRs = src.supportedLSRs; + supportedIndexes = src.supportedIndexes; + supportedLSRsLength = src.supportedLSRsLength; + ownedDefaultLocale = src.ownedDefaultLocale; + defaultLocale = src.defaultLocale; + defaultLocaleIndex = src.defaultLocaleIndex; + + src.supportedLocales = nullptr; + src.lsrs = nullptr; + src.supportedLocalesLength = 0; + src.supportedLsrToIndex = nullptr; + src.supportedLSRs = nullptr; + src.supportedIndexes = nullptr; + src.supportedLSRsLength = 0; + src.ownedDefaultLocale = nullptr; + src.defaultLocale = nullptr; + src.defaultLocaleIndex = -1; + return *this; +} + +class LocaleLsrIterator { +public: + LocaleLsrIterator(const XLikelySubtags &likelySubtags, Locale::Iterator &locales, + ULocMatchLifetime lifetime) : + likelySubtags(likelySubtags), locales(locales), lifetime(lifetime) {} + + ~LocaleLsrIterator() { + if (lifetime == ULOCMATCH_TEMPORARY_LOCALES) { + delete remembered; + } + } + + bool hasNext() const { + return locales.hasNext(); + } + + LSR next(UErrorCode &errorCode) { + current = &locales.next(); + return getMaximalLsrOrUnd(likelySubtags, *current, errorCode); + } + + void rememberCurrent(int32_t desiredIndex, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return; } + bestDesiredIndex = desiredIndex; + if (lifetime == ULOCMATCH_STORED_LOCALES) { + remembered = current; + } else { + // ULOCMATCH_TEMPORARY_LOCALES + delete remembered; + remembered = new Locale(*current); + if (remembered == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + } + } + + const Locale *orphanRemembered() { + const Locale *rem = remembered; + remembered = nullptr; + return rem; + } + + int32_t getBestDesiredIndex() const { + return bestDesiredIndex; + } + +private: + const XLikelySubtags &likelySubtags; + Locale::Iterator &locales; + ULocMatchLifetime lifetime; + const Locale *current = nullptr, *remembered = nullptr; + int32_t bestDesiredIndex = -1; +}; + +const Locale *LocaleMatcher::getBestMatch(const Locale &desiredLocale, UErrorCode &errorCode) const { + if (U_FAILURE(errorCode)) { return nullptr; } + int32_t suppIndex = getBestSuppIndex( + getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode), + nullptr, errorCode); + return U_SUCCESS(errorCode) && suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale; +} + +const Locale *LocaleMatcher::getBestMatch(Locale::Iterator &desiredLocales, + UErrorCode &errorCode) const { + if (U_FAILURE(errorCode)) { return nullptr; } + if (!desiredLocales.hasNext()) { + return defaultLocale; + } + LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES); + int32_t suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode); + return U_SUCCESS(errorCode) && suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale; +} + +const Locale *LocaleMatcher::getBestMatchForListString( + StringPiece desiredLocaleList, UErrorCode &errorCode) const { + LocalePriorityList list(desiredLocaleList, errorCode); + LocalePriorityList::Iterator iter = list.iterator(); + return getBestMatch(iter, errorCode); +} + +LocaleMatcher::Result LocaleMatcher::getBestMatchResult( + const Locale &desiredLocale, UErrorCode &errorCode) const { + if (U_FAILURE(errorCode)) { + return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE); + } + int32_t suppIndex = getBestSuppIndex( + getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode), + nullptr, errorCode); + if (U_FAILURE(errorCode) || suppIndex < 0) { + return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE); + } else { + return Result(&desiredLocale, supportedLocales[suppIndex], 0, suppIndex, FALSE); + } +} + +LocaleMatcher::Result LocaleMatcher::getBestMatchResult( + Locale::Iterator &desiredLocales, UErrorCode &errorCode) const { + if (U_FAILURE(errorCode) || !desiredLocales.hasNext()) { + return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE); + } + LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES); + int32_t suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode); + if (U_FAILURE(errorCode) || suppIndex < 0) { + return Result(nullptr, defaultLocale, -1, defaultLocaleIndex, FALSE); + } else { + return Result(lsrIter.orphanRemembered(), supportedLocales[suppIndex], + lsrIter.getBestDesiredIndex(), suppIndex, TRUE); + } +} + +int32_t LocaleMatcher::getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter, + UErrorCode &errorCode) const { + if (U_FAILURE(errorCode)) { return -1; } + int32_t desiredIndex = 0; + int32_t bestSupportedLsrIndex = -1; + for (int32_t bestDistance = thresholdDistance;;) { + // Quick check for exact maximized LSR. + // Returns suppIndex+1 where 0 means not found. + if (supportedLsrToIndex != nullptr) { + desiredLSR.setHashCode(); + int32_t index = uhash_geti(supportedLsrToIndex, &desiredLSR); + if (index != 0) { + int32_t suppIndex = index - 1; + if (remainingIter != nullptr) { + remainingIter->rememberCurrent(desiredIndex, errorCode); + } + return suppIndex; + } + } + int32_t bestIndexAndDistance = localeDistance.getBestIndexAndDistance( + desiredLSR, supportedLSRs, supportedLSRsLength, bestDistance, favorSubtag); + if (bestIndexAndDistance >= 0) { + bestDistance = bestIndexAndDistance & 0xff; + if (remainingIter != nullptr) { + remainingIter->rememberCurrent(desiredIndex, errorCode); + if (U_FAILURE(errorCode)) { return -1; } + } + bestSupportedLsrIndex = bestIndexAndDistance >= 0 ? bestIndexAndDistance >> 8 : -1; + } + if ((bestDistance -= demotionPerDesiredLocale) <= 0) { + break; + } + if (remainingIter == nullptr || !remainingIter->hasNext()) { + break; + } + desiredLSR = remainingIter->next(errorCode); + if (U_FAILURE(errorCode)) { return -1; } + ++desiredIndex; + } + if (bestSupportedLsrIndex < 0) { + // no good match + return -1; + } + return supportedIndexes[bestSupportedLsrIndex]; +} + +double LocaleMatcher::internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const { + // Returns the inverse of the distance: That is, 1-distance(desired, supported). + LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, supported, errorCode); + if (U_FAILURE(errorCode)) { return 0; } + const LSR *pSuppLSR = &suppLSR; + int32_t distance = localeDistance.getBestIndexAndDistance( + getMaximalLsrOrUnd(likelySubtags, desired, errorCode), + &pSuppLSR, 1, + thresholdDistance, favorSubtag) & 0xff; + return (100 - distance) / 100.0; +} + +U_NAMESPACE_END + +#endif // __LOCMATCHER_H__ diff --git a/deps/icu-small/source/common/localeprioritylist.cpp b/deps/icu-small/source/common/localeprioritylist.cpp new file mode 100644 index 0000000000..06442fb46a --- /dev/null +++ b/deps/icu-small/source/common/localeprioritylist.cpp @@ -0,0 +1,239 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License + +// localeprioritylist.cpp +// created: 2019jul11 Markus W. Scherer + +#include "unicode/utypes.h" +#include "unicode/localpointer.h" +#include "unicode/locid.h" +#include "unicode/stringpiece.h" +#include "unicode/uobject.h" +#include "charstr.h" +#include "cmemory.h" +#include "localeprioritylist.h" +#include "uarrsort.h" +#include "uassert.h" +#include "uhash.h" + +U_NAMESPACE_BEGIN + +namespace { + +int32_t hashLocale(const UHashTok token) { + auto *locale = static_cast<const Locale *>(token.pointer); + return locale->hashCode(); +} + +UBool compareLocales(const UHashTok t1, const UHashTok t2) { + auto *l1 = static_cast<const Locale *>(t1.pointer); + auto *l2 = static_cast<const Locale *>(t2.pointer); + return *l1 == *l2; +} + +constexpr int32_t WEIGHT_ONE = 1000; + +struct LocaleAndWeight { + Locale *locale; + int32_t weight; // 0..1000 = 0.0..1.0 + int32_t index; // force stable sort + + int32_t compare(const LocaleAndWeight &other) const { + int32_t diff = other.weight - weight; // descending: other-this + if (diff != 0) { return diff; } + return index - other.index; + } +}; + +int32_t U_CALLCONV +compareLocaleAndWeight(const void * /*context*/, const void *left, const void *right) { + return static_cast<const LocaleAndWeight *>(left)-> + compare(*static_cast<const LocaleAndWeight *>(right)); +} + +const char *skipSpaces(const char *p, const char *limit) { + while (p < limit && *p == ' ') { ++p; } + return p; +} + +int32_t findTagLength(const char *p, const char *limit) { + // Look for accept-language delimiters. + // Leave other validation up to the Locale constructor. + const char *q; + for (q = p; q < limit; ++q) { + char c = *q; + if (c == ' ' || c == ',' || c == ';') { break; } + } + return static_cast<int32_t>(q - p); +} + +/** + * Parses and returns a qvalue weight in millis. + * Advances p to after the parsed substring. + * Returns a negative value if parsing fails. + */ +int32_t parseWeight(const char *&p, const char *limit) { + p = skipSpaces(p, limit); + char c; + if (p == limit || ((c = *p) != '0' && c != '1')) { return -1; } + int32_t weight = (c - '0') * 1000; + if (++p == limit || *p != '.') { return weight; } + int32_t multiplier = 100; + while (++p != limit && '0' <= (c = *p) && c <= '9') { + c -= '0'; + if (multiplier > 0) { + weight += c * multiplier; + multiplier /= 10; + } else if (multiplier == 0) { + // round up + if (c >= 5) { ++weight; } + multiplier = -1; + } // else ignore further fraction digits + } + return weight <= WEIGHT_ONE ? weight : -1; // bad if > 1.0 +} + +} // namespace + +/** + * Nothing but a wrapper over a MaybeStackArray of LocaleAndWeight. + * + * This wrapper exists (and is not in an anonymous namespace) + * so that we can forward-declare it in the header file and + * don't have to expose the MaybeStackArray specialization and + * the LocaleAndWeight to code (like the test) that #includes localeprioritylist.h. + * Also, otherwise we would have to do a platform-specific + * template export declaration of some kind for the MaybeStackArray specialization + * to be properly exported from the common DLL. + */ +struct LocaleAndWeightArray : public UMemory { + MaybeStackArray<LocaleAndWeight, 20> array; +}; + +LocalePriorityList::LocalePriorityList(StringPiece s, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return; } + list = new LocaleAndWeightArray(); + if (list == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + const char *p = s.data(); + const char *limit = p + s.length(); + while ((p = skipSpaces(p, limit)) != limit) { + if (*p == ',') { // empty range field + ++p; + continue; + } + int32_t tagLength = findTagLength(p, limit); + if (tagLength == 0) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + CharString tag(p, tagLength, errorCode); + if (U_FAILURE(errorCode)) { return; } + Locale locale = Locale(tag.data()); + if (locale.isBogus()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + int32_t weight = WEIGHT_ONE; + if ((p = skipSpaces(p + tagLength, limit)) != limit && *p == ';') { + if ((p = skipSpaces(p + 1, limit)) == limit || *p != 'q' || + (p = skipSpaces(p + 1, limit)) == limit || *p != '=' || + (++p, (weight = parseWeight(p, limit)) < 0)) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + p = skipSpaces(p, limit); + } + if (p != limit && *p != ',') { // trailing junk + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + add(locale, weight, errorCode); + if (p == limit) { break; } + ++p; + } + sort(errorCode); +} + +LocalePriorityList::~LocalePriorityList() { + if (list != nullptr) { + for (int32_t i = 0; i < listLength; ++i) { + delete list->array[i].locale; + } + delete list; + } + uhash_close(map); +} + +const Locale *LocalePriorityList::localeAt(int32_t i) const { + return list->array[i].locale; +} + +Locale *LocalePriorityList::orphanLocaleAt(int32_t i) { + if (list == nullptr) { return nullptr; } + LocaleAndWeight &lw = list->array[i]; + Locale *l = lw.locale; + lw.locale = nullptr; + return l; +} + +bool LocalePriorityList::add(const Locale &locale, int32_t weight, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return false; } + if (map == nullptr) { + if (weight <= 0) { return true; } // do not add q=0 + map = uhash_open(hashLocale, compareLocales, uhash_compareLong, &errorCode); + if (U_FAILURE(errorCode)) { return false; } + } + LocalPointer<Locale> clone; + int32_t index = uhash_geti(map, &locale); + if (index != 0) { + // Duplicate: Remove the old item and append it anew. + LocaleAndWeight &lw = list->array[index - 1]; + clone.adoptInstead(lw.locale); + lw.locale = nullptr; + lw.weight = 0; + ++numRemoved; + } + if (weight <= 0) { // do not add q=0 + if (index != 0) { + // Not strictly necessary but cleaner. + uhash_removei(map, &locale); + } + return true; + } + if (clone.isNull()) { + clone.adoptInstead(locale.clone()); + if (clone.isNull() || (clone->isBogus() && !locale.isBogus())) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return false; + } + } + if (listLength == list->array.getCapacity()) { + int32_t newCapacity = listLength < 50 ? 100 : 4 * listLength; + if (list->array.resize(newCapacity, listLength) == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return false; + } + } + uhash_puti(map, clone.getAlias(), listLength + 1, &errorCode); + if (U_FAILURE(errorCode)) { return false; } + LocaleAndWeight &lw = list->array[listLength]; + lw.locale = clone.orphan(); + lw.weight = weight; + lw.index = listLength++; + if (weight < WEIGHT_ONE) { hasWeights = true; } + U_ASSERT(uhash_count(map) == getLength()); + return true; +} + +void LocalePriorityList::sort(UErrorCode &errorCode) { + // Sort by descending weights if there is a mix of weights. + // The comparator forces a stable sort via the item index. + if (U_FAILURE(errorCode) || getLength() <= 1 || !hasWeights) { return; } + uprv_sortArray(list->array.getAlias(), listLength, sizeof(LocaleAndWeight), + compareLocaleAndWeight, nullptr, FALSE, &errorCode); +} + +U_NAMESPACE_END diff --git a/deps/icu-small/source/common/localeprioritylist.h b/deps/icu-small/source/common/localeprioritylist.h new file mode 100644 index 0000000000..80ca38a7b5 --- /dev/null +++ b/deps/icu-small/source/common/localeprioritylist.h @@ -0,0 +1,115 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License + +// localeprioritylist.h +// created: 2019jul11 Markus W. Scherer + +#ifndef __LOCALEPRIORITYLIST_H__ +#define __LOCALEPRIORITYLIST_H__ + +#include "unicode/utypes.h" +#include "unicode/locid.h" +#include "unicode/stringpiece.h" +#include "unicode/uobject.h" + +struct UHashtable; + +U_NAMESPACE_BEGIN + +struct LocaleAndWeightArray; + +/** + * Parses a list of locales from an accept-language string. + * We are a bit more lenient than the spec: + * We accept extra whitespace in more places, empty range fields, + * and any number of qvalue fraction digits. + * + * https://tools.ietf.org/html/rfc2616#section-14.4 + * 14.4 Accept-Language + * + * Accept-Language = "Accept-Language" ":" + * 1#( language-range [ ";" "q" "=" qvalue ] ) + * language-range = ( ( 1*8ALPHA *( "-" 1*8ALPHA ) ) | "*" ) + * + * Each language-range MAY be given an associated quality value which + * represents an estimate of the user's preference for the languages + * specified by that range. The quality value defaults to "q=1". For + * example, + * + * Accept-Language: da, en-gb;q=0.8, en;q=0.7 + * + * https://tools.ietf.org/html/rfc2616#section-3.9 + * 3.9 Quality Values + * + * HTTP content negotiation (section 12) uses short "floating point" + * numbers to indicate the relative importance ("weight") of various + * negotiable parameters. A weight is normalized to a real number in + * the range 0 through 1, where 0 is the minimum and 1 the maximum + * value. If a parameter has a quality value of 0, then content with + * this parameter is `not acceptable' for the client. HTTP/1.1 + * applications MUST NOT generate more than three digits after the + * decimal point. User configuration of these values SHOULD also be + * limited in this fashion. + * + * qvalue = ( "0" [ "." 0*3DIGIT ] ) + * | ( "1" [ "." 0*3("0") ] ) + */ +class U_COMMON_API LocalePriorityList : public UMemory { +public: + class Iterator : public Locale::Iterator { + public: + UBool hasNext() const override { return count < length; } + + const Locale &next() override { + for(;;) { + const Locale *locale = list.localeAt(index++); + if (locale != nullptr) { + ++count; + return *locale; + } + } + } + + private: + friend class LocalePriorityList; + + Iterator(const LocalePriorityList &list) : list(list), length(list.getLength()) {} + + const LocalePriorityList &list; + int32_t index = 0; + int32_t count = 0; + const int32_t length; + }; + + LocalePriorityList(StringPiece s, UErrorCode &errorCode); + + ~LocalePriorityList(); + + int32_t getLength() const { return listLength - numRemoved; } + + int32_t getLengthIncludingRemoved() const { return listLength; } + + Iterator iterator() const { return Iterator(*this); } + + const Locale *localeAt(int32_t i) const; + + Locale *orphanLocaleAt(int32_t i); + +private: + LocalePriorityList(const LocalePriorityList &) = delete; + LocalePriorityList &operator=(const LocalePriorityList &) = delete; + + bool add(const Locale &locale, int32_t weight, UErrorCode &errorCode); + + void sort(UErrorCode &errorCode); + + LocaleAndWeightArray *list = nullptr; + int32_t listLength = 0; + int32_t numRemoved = 0; + bool hasWeights = false; // other than 1.0 + UHashtable *map = nullptr; +}; + +U_NAMESPACE_END + +#endif // __LOCALEPRIORITYLIST_H__ diff --git a/deps/icu-small/source/common/locavailable.cpp b/deps/icu-small/source/common/locavailable.cpp index 1e608ffb9e..ad9d2ca8c7 100644 --- a/deps/icu-small/source/common/locavailable.cpp +++ b/deps/icu-small/source/common/locavailable.cpp @@ -19,11 +19,13 @@ * that then do not depend on resource bundle code and res_index bundles. */ +#include "unicode/errorcode.h" #include "unicode/utypes.h" #include "unicode/locid.h" #include "unicode/uloc.h" #include "unicode/ures.h" #include "cmemory.h" +#include "cstring.h" #include "ucln_cmn.h" #include "uassert.h" #include "umutex.h" @@ -95,84 +97,174 @@ U_NAMESPACE_USE /* ### Constants **************************************************/ -/* These strings describe the resources we attempt to load from - the locale ResourceBundle data file.*/ -static const char _kIndexLocaleName[] = "res_index"; -static const char _kIndexTag[] = "InstalledLocales"; +namespace { -static char** _installedLocales = NULL; -static int32_t _installedLocalesCount = 0; -static icu::UInitOnce _installedLocalesInitOnce; +// Enough capacity for the two lists in the res_index.res file +const char** gAvailableLocaleNames[2] = {}; +int32_t gAvailableLocaleCounts[2] = {}; +icu::UInitOnce ginstalledLocalesInitOnce = U_INITONCE_INITIALIZER; -/* ### Get available **************************************************/ +class AvailableLocalesSink : public ResourceSink { + public: + void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) U_OVERRIDE { + ResourceTable resIndexTable = value.getTable(status); + if (U_FAILURE(status)) { + return; + } + for (int32_t i = 0; resIndexTable.getKeyAndValue(i, key, value); ++i) { + ULocAvailableType type; + if (uprv_strcmp(key, "InstalledLocales") == 0) { + type = ULOC_AVAILABLE_DEFAULT; + } else if (uprv_strcmp(key, "AliasLocales") == 0) { + type = ULOC_AVAILABLE_ONLY_LEGACY_ALIASES; + } else { + // CLDRVersion, etc. + continue; + } + ResourceTable availableLocalesTable = value.getTable(status); + if (U_FAILURE(status)) { + return; + } + gAvailableLocaleCounts[type] = availableLocalesTable.getSize(); + gAvailableLocaleNames[type] = static_cast<const char**>( + uprv_malloc(gAvailableLocaleCounts[type] * sizeof(const char*))); + if (gAvailableLocaleNames[type] == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + for (int32_t j = 0; availableLocalesTable.getKeyAndValue(j, key, value); ++j) { + gAvailableLocaleNames[type][j] = key; + } + } + } +}; -static UBool U_CALLCONV uloc_cleanup(void) { - char ** temp; +class AvailableLocalesStringEnumeration : public StringEnumeration { + public: + AvailableLocalesStringEnumeration(ULocAvailableType type) : fType(type) { + } + + const char* next(int32_t *resultLength, UErrorCode&) override { + ULocAvailableType actualType = fType; + int32_t actualIndex = fIndex++; + + // If the "combined" list was requested, resolve that now + if (fType == ULOC_AVAILABLE_WITH_LEGACY_ALIASES) { + int32_t defaultLocalesCount = gAvailableLocaleCounts[ULOC_AVAILABLE_DEFAULT]; + if (actualIndex < defaultLocalesCount) { + actualType = ULOC_AVAILABLE_DEFAULT; + } else { + actualIndex -= defaultLocalesCount; + actualType = ULOC_AVAILABLE_ONLY_LEGACY_ALIASES; + } + } + + // Return the requested string + int32_t count = gAvailableLocaleCounts[actualType]; + const char* result; + if (actualIndex < count) { + result = gAvailableLocaleNames[actualType][actualIndex]; + if (resultLength != nullptr) { + *resultLength = static_cast<int32_t>(uprv_strlen(result)); + } + } else { + result = nullptr; + if (resultLength != nullptr) { + *resultLength = 0; + } + } + return result; + } + + void reset(UErrorCode&) override { + fIndex = 0; + } + + int32_t count(UErrorCode&) const override { + if (fType == ULOC_AVAILABLE_WITH_LEGACY_ALIASES) { + return gAvailableLocaleCounts[ULOC_AVAILABLE_DEFAULT] + + gAvailableLocaleCounts[ULOC_AVAILABLE_ONLY_LEGACY_ALIASES]; + } else { + return gAvailableLocaleCounts[fType]; + } + } - if (_installedLocales) { - temp = _installedLocales; - _installedLocales = NULL; + private: + ULocAvailableType fType; + int32_t fIndex = 0; +}; - _installedLocalesCount = 0; - _installedLocalesInitOnce.reset(); +/* ### Get available **************************************************/ - uprv_free(temp); +static UBool U_CALLCONV uloc_cleanup(void) { + for (int32_t i = 0; i < UPRV_LENGTHOF(gAvailableLocaleNames); i++) { + uprv_free(gAvailableLocaleNames[i]); + gAvailableLocaleNames[i] = nullptr; + gAvailableLocaleCounts[i] = 0; } + ginstalledLocalesInitOnce.reset(); return TRUE; } // Load Installed Locales. This function will be called exactly once // via the initOnce mechanism. -static void U_CALLCONV loadInstalledLocales() { - UErrorCode status = U_ZERO_ERROR; - int32_t i = 0; - int32_t localeCount; - - U_ASSERT(_installedLocales == NULL); - U_ASSERT(_installedLocalesCount == 0); +static void U_CALLCONV loadInstalledLocales(UErrorCode& status) { + ucln_common_registerCleanup(UCLN_COMMON_ULOC, uloc_cleanup); - _installedLocalesCount = 0; + icu::LocalUResourceBundlePointer rb(ures_openDirect(NULL, "res_index", &status)); + AvailableLocalesSink sink; + ures_getAllItemsWithFallback(rb.getAlias(), "", sink, status); +} - icu::LocalUResourceBundlePointer indexLocale(ures_openDirect(NULL, _kIndexLocaleName, &status)); - icu::StackUResourceBundle installed; +void _load_installedLocales(UErrorCode& status) { + umtx_initOnce(ginstalledLocalesInitOnce, &loadInstalledLocales, status); +} - ures_getByKey(indexLocale.getAlias(), _kIndexTag, installed.getAlias(), &status); +} // namespace - if(U_SUCCESS(status)) { - localeCount = ures_getSize(installed.getAlias()); - _installedLocales = (char **) uprv_malloc(sizeof(char*) * (localeCount+1)); - if (_installedLocales != NULL) { - ures_resetIterator(installed.getAlias()); - while(ures_hasNext(installed.getAlias())) { - ures_getNextString(installed.getAlias(), NULL, (const char **)&_installedLocales[i++], &status); - } - _installedLocales[i] = NULL; - _installedLocalesCount = localeCount; - ucln_common_registerCleanup(UCLN_COMMON_ULOC, uloc_cleanup); - } +U_CAPI const char* U_EXPORT2 +uloc_getAvailable(int32_t offset) { + icu::ErrorCode status; + _load_installedLocales(status); + if (status.isFailure()) { + return nullptr; + } + if (offset > gAvailableLocaleCounts[0]) { + // *status = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; } + return gAvailableLocaleNames[0][offset]; } -static void _load_installedLocales() -{ - umtx_initOnce(_installedLocalesInitOnce, &loadInstalledLocales); +U_CAPI int32_t U_EXPORT2 +uloc_countAvailable() { + icu::ErrorCode status; + _load_installedLocales(status); + if (status.isFailure()) { + return 0; + } + return gAvailableLocaleCounts[0]; } -U_CAPI const char* U_EXPORT2 -uloc_getAvailable(int32_t offset) -{ - - _load_installedLocales(); - - if (offset > _installedLocalesCount) - return NULL; - return _installedLocales[offset]; +U_CAPI UEnumeration* U_EXPORT2 +uloc_openAvailableByType(ULocAvailableType type, UErrorCode* status) { + if (U_FAILURE(*status)) { + return nullptr; + } + if (type < 0 || type >= ULOC_AVAILABLE_COUNT) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + _load_installedLocales(*status); + if (U_FAILURE(*status)) { + return nullptr; + } + LocalPointer<AvailableLocalesStringEnumeration> result( + new AvailableLocalesStringEnumeration(type), *status); + if (U_FAILURE(*status)) { + return nullptr; + } + return uenum_openFromStringEnumeration(result.orphan(), status); } -U_CAPI int32_t U_EXPORT2 -uloc_countAvailable() -{ - _load_installedLocales(); - return _installedLocalesCount; -} diff --git a/deps/icu-small/source/common/locbased.h b/deps/icu-small/source/common/locbased.h index 6db6a41dc4..9163bd11cf 100644 --- a/deps/icu-small/source/common/locbased.h +++ b/deps/icu-small/source/common/locbased.h @@ -22,7 +22,7 @@ * `actualLocale' of size ULOC_FULLNAME_CAPACITY */ #define U_LOCALE_BASED(varname, objname) \ - LocaleBased varname((objname).validLocale, (objname).actualLocale); + LocaleBased varname((objname).validLocale, (objname).actualLocale) U_NAMESPACE_BEGIN diff --git a/deps/icu-small/source/common/locdistance.cpp b/deps/icu-small/source/common/locdistance.cpp new file mode 100644 index 0000000000..800d0eacf2 --- /dev/null +++ b/deps/icu-small/source/common/locdistance.cpp @@ -0,0 +1,364 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License + +// locdistance.cpp +// created: 2019may08 Markus W. Scherer + +#include "unicode/utypes.h" +#include "unicode/bytestrie.h" +#include "unicode/localematcher.h" +#include "unicode/locid.h" +#include "unicode/uobject.h" +#include "unicode/ures.h" +#include "cstring.h" +#include "locdistance.h" +#include "loclikelysubtags.h" +#include "uassert.h" +#include "ucln_cmn.h" +#include "uinvchar.h" +#include "umutex.h" + +U_NAMESPACE_BEGIN + +namespace { + +/** + * Bit flag used on the last character of a subtag in the trie. + * Must be set consistently by the builder and the lookup code. + */ +constexpr int32_t END_OF_SUBTAG = 0x80; +/** Distance value bit flag, set by the builder. */ +constexpr int32_t DISTANCE_SKIP_SCRIPT = 0x80; +/** Distance value bit flag, set by trieNext(). */ +constexpr int32_t DISTANCE_IS_FINAL = 0x100; +constexpr int32_t DISTANCE_IS_FINAL_OR_SKIP_SCRIPT = DISTANCE_IS_FINAL | DISTANCE_SKIP_SCRIPT; + +constexpr int32_t ABOVE_THRESHOLD = 100; + +// Indexes into array of distances. +enum { + IX_DEF_LANG_DISTANCE, + IX_DEF_SCRIPT_DISTANCE, + IX_DEF_REGION_DISTANCE, + IX_MIN_REGION_DISTANCE, + IX_LIMIT +}; + +LocaleDistance *gLocaleDistance = nullptr; +UInitOnce gInitOnce = U_INITONCE_INITIALIZER; + +UBool U_CALLCONV cleanup() { + delete gLocaleDistance; + gLocaleDistance = nullptr; + gInitOnce.reset(); + return TRUE; +} + +} // namespace + +void U_CALLCONV LocaleDistance::initLocaleDistance(UErrorCode &errorCode) { + // This function is invoked only via umtx_initOnce(). + U_ASSERT(gLocaleDistance == nullptr); + const XLikelySubtags &likely = *XLikelySubtags::getSingleton(errorCode); + if (U_FAILURE(errorCode)) { return; } + const LocaleDistanceData &data = likely.getDistanceData(); + if (data.distanceTrieBytes == nullptr || + data.regionToPartitions == nullptr || data.partitions == nullptr || + // ok if no paradigms + data.distances == nullptr) { + errorCode = U_MISSING_RESOURCE_ERROR; + return; + } + gLocaleDistance = new LocaleDistance(data); + if (gLocaleDistance == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + ucln_common_registerCleanup(UCLN_COMMON_LOCALE_DISTANCE, cleanup); +} + +const LocaleDistance *LocaleDistance::getSingleton(UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return nullptr; } + umtx_initOnce(gInitOnce, &LocaleDistance::initLocaleDistance, errorCode); + return gLocaleDistance; +} + +LocaleDistance::LocaleDistance(const LocaleDistanceData &data) : + trie(data.distanceTrieBytes), + regionToPartitionsIndex(data.regionToPartitions), partitionArrays(data.partitions), + paradigmLSRs(data.paradigms), paradigmLSRsLength(data.paradigmsLength), + defaultLanguageDistance(data.distances[IX_DEF_LANG_DISTANCE]), + defaultScriptDistance(data.distances[IX_DEF_SCRIPT_DISTANCE]), + defaultRegionDistance(data.distances[IX_DEF_REGION_DISTANCE]), + minRegionDistance(data.distances[IX_MIN_REGION_DISTANCE]) { + // For the default demotion value, use the + // default region distance between unrelated Englishes. + // Thus, unless demotion is turned off, + // a mere region difference for one desired locale + // is as good as a perfect match for the next following desired locale. + // As of CLDR 36, we have <languageMatch desired="en_*_*" supported="en_*_*" distance="5"/>. + LSR en("en", "Latn", "US"); + LSR enGB("en", "Latn", "GB"); + const LSR *p_enGB = &enGB; + defaultDemotionPerDesiredLocale = getBestIndexAndDistance(en, &p_enGB, 1, + 50, ULOCMATCH_FAVOR_LANGUAGE) & 0xff; +} + +int32_t LocaleDistance::getBestIndexAndDistance( + const LSR &desired, + const LSR **supportedLSRs, int32_t supportedLSRsLength, + int32_t threshold, ULocMatchFavorSubtag favorSubtag) const { + BytesTrie iter(trie); + // Look up the desired language only once for all supported LSRs. + // Its "distance" is either a match point value of 0, or a non-match negative value. + // Note: The data builder verifies that there are no <*, supported> or <desired, *> rules. + int32_t desLangDistance = trieNext(iter, desired.language, false); + uint64_t desLangState = desLangDistance >= 0 && supportedLSRsLength > 1 ? iter.getState64() : 0; + // Index of the supported LSR with the lowest distance. + int32_t bestIndex = -1; + for (int32_t slIndex = 0; slIndex < supportedLSRsLength; ++slIndex) { + const LSR &supported = *supportedLSRs[slIndex]; + bool star = false; + int32_t distance = desLangDistance; + if (distance >= 0) { + U_ASSERT((distance & DISTANCE_IS_FINAL) == 0); + if (slIndex != 0) { + iter.resetToState64(desLangState); + } + distance = trieNext(iter, supported.language, true); + } + // Note: The data builder verifies that there are no rules with "any" (*) language and + // real (non *) script or region subtags. + // This means that if the lookup for either language fails we can use + // the default distances without further lookups. + int32_t flags; + if (distance >= 0) { + flags = distance & DISTANCE_IS_FINAL_OR_SKIP_SCRIPT; + distance &= ~DISTANCE_IS_FINAL_OR_SKIP_SCRIPT; + } else { // <*, *> + if (uprv_strcmp(desired.language, supported.language) == 0) { + distance = 0; + } else { + distance = defaultLanguageDistance; + } + flags = 0; + star = true; + } + U_ASSERT(0 <= distance && distance <= 100); + // We implement "favor subtag" by reducing the language subtag distance + // (unscientifically reducing it to a quarter of the normal value), + // so that the script distance is relatively more important. + // For example, given a default language distance of 80, we reduce it to 20, + // which is below the default threshold of 50, which is the default script distance. + if (favorSubtag == ULOCMATCH_FAVOR_SCRIPT) { + distance >>= 2; + } + if (distance >= threshold) { + continue; + } + + int32_t scriptDistance; + if (star || flags != 0) { + if (uprv_strcmp(desired.script, supported.script) == 0) { + scriptDistance = 0; + } else { + scriptDistance = defaultScriptDistance; + } + } else { + scriptDistance = getDesSuppScriptDistance(iter, iter.getState64(), + desired.script, supported.script); + flags = scriptDistance & DISTANCE_IS_FINAL; + scriptDistance &= ~DISTANCE_IS_FINAL; + } + distance += scriptDistance; + if (distance >= threshold) { + continue; + } + + if (uprv_strcmp(desired.region, supported.region) == 0) { + // regionDistance = 0 + } else if (star || (flags & DISTANCE_IS_FINAL) != 0) { + distance += defaultRegionDistance; + } else { + int32_t remainingThreshold = threshold - distance; + if (minRegionDistance >= remainingThreshold) { + continue; + } + + // From here on we know the regions are not equal. + // Map each region to zero or more partitions. (zero = one non-matching string) + // (Each array of single-character partition strings is encoded as one string.) + // If either side has more than one, then we find the maximum distance. + // This could be optimized by adding some more structure, but probably not worth it. + distance += getRegionPartitionsDistance( + iter, iter.getState64(), + partitionsForRegion(desired), + partitionsForRegion(supported), + remainingThreshold); + } + if (distance < threshold) { + if (distance == 0) { + return slIndex << 8; + } + bestIndex = slIndex; + threshold = distance; + } + } + return bestIndex >= 0 ? (bestIndex << 8) | threshold : 0xffffff00 | ABOVE_THRESHOLD; +} + +int32_t LocaleDistance::getDesSuppScriptDistance( + BytesTrie &iter, uint64_t startState, const char *desired, const char *supported) { + // Note: The data builder verifies that there are no <*, supported> or <desired, *> rules. + int32_t distance = trieNext(iter, desired, false); + if (distance >= 0) { + distance = trieNext(iter, supported, true); + } + if (distance < 0) { + UStringTrieResult result = iter.resetToState64(startState).next(u'*'); // <*, *> + U_ASSERT(USTRINGTRIE_HAS_VALUE(result)); + if (uprv_strcmp(desired, supported) == 0) { + distance = 0; // same script + } else { + distance = iter.getValue(); + U_ASSERT(distance >= 0); + } + if (result == USTRINGTRIE_FINAL_VALUE) { + distance |= DISTANCE_IS_FINAL; + } + } + return distance; +} + +int32_t LocaleDistance::getRegionPartitionsDistance( + BytesTrie &iter, uint64_t startState, + const char *desiredPartitions, const char *supportedPartitions, int32_t threshold) { + char desired = *desiredPartitions++; + char supported = *supportedPartitions++; + U_ASSERT(desired != 0 && supported != 0); + // See if we have single desired/supported partitions, from NUL-terminated + // partition strings without explicit length. + bool suppLengthGt1 = *supportedPartitions != 0; // gt1: more than 1 character + // equivalent to: if (desLength == 1 && suppLength == 1) + if (*desiredPartitions == 0 && !suppLengthGt1) { + // Fastpath for single desired/supported partitions. + UStringTrieResult result = iter.next(uprv_invCharToAscii(desired) | END_OF_SUBTAG); + if (USTRINGTRIE_HAS_NEXT(result)) { + result = iter.next(uprv_invCharToAscii(supported) | END_OF_SUBTAG); + if (USTRINGTRIE_HAS_VALUE(result)) { + return iter.getValue(); + } + } + return getFallbackRegionDistance(iter, startState); + } + + const char *supportedStart = supportedPartitions - 1; // for restart of inner loop + int32_t regionDistance = 0; + // Fall back to * only once, not for each pair of partition strings. + bool star = false; + for (;;) { + // Look up each desired-partition string only once, + // not for each (desired, supported) pair. + UStringTrieResult result = iter.next(uprv_invCharToAscii(desired) | END_OF_SUBTAG); + if (USTRINGTRIE_HAS_NEXT(result)) { + uint64_t desState = suppLengthGt1 ? iter.getState64() : 0; + for (;;) { + result = iter.next(uprv_invCharToAscii(supported) | END_OF_SUBTAG); + int32_t d; + if (USTRINGTRIE_HAS_VALUE(result)) { + d = iter.getValue(); + } else if (star) { + d = 0; + } else { + d = getFallbackRegionDistance(iter, startState); + star = true; + } + if (d >= threshold) { + return d; + } else if (regionDistance < d) { + regionDistance = d; + } + if ((supported = *supportedPartitions++) != 0) { + iter.resetToState64(desState); + } else { + break; + } + } + } else if (!star) { + int32_t d = getFallbackRegionDistance(iter, startState); + if (d >= threshold) { + return d; + } else if (regionDistance < d) { + regionDistance = d; + } + star = true; + } + if ((desired = *desiredPartitions++) != 0) { + iter.resetToState64(startState); + supportedPartitions = supportedStart; + supported = *supportedPartitions++; + } else { + break; + } + } + return regionDistance; +} + +int32_t LocaleDistance::getFallbackRegionDistance(BytesTrie &iter, uint64_t startState) { +#if U_DEBUG + UStringTrieResult result = +#endif + iter.resetToState64(startState).next(u'*'); // <*, *> + U_ASSERT(USTRINGTRIE_HAS_VALUE(result)); + int32_t distance = iter.getValue(); + U_ASSERT(distance >= 0); + return distance; +} + +int32_t LocaleDistance::trieNext(BytesTrie &iter, const char *s, bool wantValue) { + uint8_t c; + if ((c = *s) == 0) { + return -1; // no empty subtags in the distance data + } + for (;;) { + c = uprv_invCharToAscii(c); + // EBCDIC: If *s is not an invariant character, + // then c is now 0 and will simply not match anything, which is harmless. + uint8_t next = *++s; + if (next != 0) { + if (!USTRINGTRIE_HAS_NEXT(iter.next(c))) { + return -1; + } + } else { + // last character of this subtag + UStringTrieResult result = iter.next(c | END_OF_SUBTAG); + if (wantValue) { + if (USTRINGTRIE_HAS_VALUE(result)) { + int32_t value = iter.getValue(); + if (result == USTRINGTRIE_FINAL_VALUE) { + value |= DISTANCE_IS_FINAL; + } + return value; + } + } else { + if (USTRINGTRIE_HAS_NEXT(result)) { + return 0; + } + } + return -1; + } + c = next; + } +} + +UBool LocaleDistance::isParadigmLSR(const LSR &lsr) const { + // Linear search for a very short list (length 6 as of 2019). + // If there are many paradigm LSRs we should use a hash set. + U_ASSERT(paradigmLSRsLength <= 15); + for (int32_t i = 0; i < paradigmLSRsLength; ++i) { + if (lsr == paradigmLSRs[i]) { return true; } + } + return false; +} + +U_NAMESPACE_END diff --git a/deps/icu-small/source/common/locdistance.h b/deps/icu-small/source/common/locdistance.h new file mode 100644 index 0000000000..7439f51c56 --- /dev/null +++ b/deps/icu-small/source/common/locdistance.h @@ -0,0 +1,109 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License + +// locdistance.h +// created: 2019may08 Markus W. Scherer + +#ifndef __LOCDISTANCE_H__ +#define __LOCDISTANCE_H__ + +#include "unicode/utypes.h" +#include "unicode/bytestrie.h" +#include "unicode/localematcher.h" +#include "unicode/locid.h" +#include "unicode/uobject.h" +#include "lsr.h" + +U_NAMESPACE_BEGIN + +struct LocaleDistanceData; + +/** + * Offline-built data for LocaleMatcher. + * Mostly but not only the data for mapping locales to their maximized forms. + */ +class LocaleDistance final : public UMemory { +public: + static const LocaleDistance *getSingleton(UErrorCode &errorCode); + + /** + * Finds the supported LSR with the smallest distance from the desired one. + * Equivalent LSR subtags must be normalized into a canonical form. + * + * <p>Returns the index of the lowest-distance supported LSR in bits 31..8 + * (negative if none has a distance below the threshold), + * and its distance (0..ABOVE_THRESHOLD) in bits 7..0. + */ + int32_t getBestIndexAndDistance(const LSR &desired, + const LSR **supportedLSRs, int32_t supportedLSRsLength, + int32_t threshold, ULocMatchFavorSubtag favorSubtag) const; + + int32_t getParadigmLSRsLength() const { return paradigmLSRsLength; } + + UBool isParadigmLSR(const LSR &lsr) const; + + int32_t getDefaultScriptDistance() const { + return defaultScriptDistance; + } + + int32_t getDefaultDemotionPerDesiredLocale() const { + return defaultDemotionPerDesiredLocale; + } + +private: + LocaleDistance(const LocaleDistanceData &data); + LocaleDistance(const LocaleDistance &other) = delete; + LocaleDistance &operator=(const LocaleDistance &other) = delete; + + static void initLocaleDistance(UErrorCode &errorCode); + + static int32_t getDesSuppScriptDistance(BytesTrie &iter, uint64_t startState, + const char *desired, const char *supported); + + static int32_t getRegionPartitionsDistance( + BytesTrie &iter, uint64_t startState, + const char *desiredPartitions, const char *supportedPartitions, + int32_t threshold); + + static int32_t getFallbackRegionDistance(BytesTrie &iter, uint64_t startState); + + static int32_t trieNext(BytesTrie &iter, const char *s, bool wantValue); + + const char *partitionsForRegion(const LSR &lsr) const { + // ill-formed region -> one non-matching string + int32_t pIndex = regionToPartitionsIndex[lsr.regionIndex]; + return partitionArrays[pIndex]; + } + + int32_t getDefaultRegionDistance() const { + return defaultRegionDistance; + } + + // The trie maps each dlang+slang+dscript+sscript+dregion+sregion + // (encoded in ASCII with bit 7 set on the last character of each subtag) to a distance. + // There is also a trie value for each subsequence of whole subtags. + // One '*' is used for a (desired, supported) pair of "und", "Zzzz"/"", or "ZZ"/"". + BytesTrie trie; + + /** + * Maps each region to zero or more single-character partitions. + */ + const uint8_t *regionToPartitionsIndex; + const char **partitionArrays; + + /** + * Used to get the paradigm region for a cluster, if there is one. + */ + const LSR *paradigmLSRs; + int32_t paradigmLSRsLength; + + int32_t defaultLanguageDistance; + int32_t defaultScriptDistance; + int32_t defaultRegionDistance; + int32_t minRegionDistance; + int32_t defaultDemotionPerDesiredLocale; +}; + +U_NAMESPACE_END + +#endif // __LOCDISTANCE_H__ diff --git a/deps/icu-small/source/common/locdspnm.cpp b/deps/icu-small/source/common/locdspnm.cpp index da35be9e76..43334f5196 100644 --- a/deps/icu-small/source/common/locdspnm.cpp +++ b/deps/icu-small/source/common/locdspnm.cpp @@ -291,6 +291,7 @@ class LocaleDisplayNamesImpl : public LocaleDisplayNames { UnicodeString formatCloseParen; UnicodeString formatReplaceCloseParen; UDisplayContext nameLength; + UDisplayContext substitute; // Constants for capitalization context usage types. enum CapContextUsage { @@ -337,7 +338,7 @@ public: UnicodeString& result) const; private: UnicodeString& localeIdName(const char* localeId, - UnicodeString& result) const; + UnicodeString& result, bool substitute) const; UnicodeString& appendWithSep(UnicodeString& buffer, const UnicodeString& src) const; UnicodeString& adjustForUsageAndContext(CapContextUsage usage, UnicodeString& result) const; UnicodeString& scriptDisplayName(const char* script, UnicodeString& result, UBool skipAdjust) const; @@ -359,6 +360,7 @@ LocaleDisplayNamesImpl::LocaleDisplayNamesImpl(const Locale& locale, , capitalizationContext(UDISPCTX_CAPITALIZATION_NONE) , capitalizationBrkIter(NULL) , nameLength(UDISPCTX_LENGTH_FULL) + , substitute(UDISPCTX_SUBSTITUTE) { initialize(); } @@ -371,6 +373,7 @@ LocaleDisplayNamesImpl::LocaleDisplayNamesImpl(const Locale& locale, , capitalizationContext(UDISPCTX_CAPITALIZATION_NONE) , capitalizationBrkIter(NULL) , nameLength(UDISPCTX_LENGTH_FULL) + , substitute(UDISPCTX_SUBSTITUTE) { while (length-- > 0) { UDisplayContext value = *contexts++; @@ -385,6 +388,9 @@ LocaleDisplayNamesImpl::LocaleDisplayNamesImpl(const Locale& locale, case UDISPCTX_TYPE_DISPLAY_LENGTH: nameLength = value; break; + case UDISPCTX_TYPE_SUBSTITUTE_HANDLING: + substitute = value; + break; default: break; } @@ -535,6 +541,8 @@ LocaleDisplayNamesImpl::getContext(UDisplayContextType type) const { return capitalizationContext; case UDISPCTX_TYPE_DISPLAY_LENGTH: return nameLength; + case UDISPCTX_TYPE_SUBSTITUTE_HANDLING: + return substitute; default: break; } @@ -549,7 +557,7 @@ LocaleDisplayNamesImpl::adjustForUsageAndContext(CapContextUsage usage, if ( result.length() > 0 && u_islower(result.char32At(0)) && capitalizationBrkIter!= NULL && ( capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || fCapitalization[usage] ) ) { // note fCapitalization[usage] won't be set unless capitalizationContext is UI_LIST_OR_MENU or STANDALONE - static UMutex capitalizationBrkIterLock = U_MUTEX_INITIALIZER; + static UMutex capitalizationBrkIterLock; Mutex lock(&capitalizationBrkIterLock); result.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT); } @@ -583,7 +591,7 @@ LocaleDisplayNamesImpl::localeDisplayName(const Locale& loc, do { // loop construct is so we can break early out of search if (hasScript && hasCountry) { ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", script, "_", country, (char *)0); - localeIdName(buffer, resultName); + localeIdName(buffer, resultName, false); if (!resultName.isBogus()) { hasScript = FALSE; hasCountry = FALSE; @@ -592,7 +600,7 @@ LocaleDisplayNamesImpl::localeDisplayName(const Locale& loc, } if (hasScript) { ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", script, (char *)0); - localeIdName(buffer, resultName); + localeIdName(buffer, resultName, false); if (!resultName.isBogus()) { hasScript = FALSE; break; @@ -600,7 +608,7 @@ LocaleDisplayNamesImpl::localeDisplayName(const Locale& loc, } if (hasCountry) { ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", country, (char*)0); - localeIdName(buffer, resultName); + localeIdName(buffer, resultName, false); if (!resultName.isBogus()) { hasCountry = FALSE; break; @@ -609,7 +617,11 @@ LocaleDisplayNamesImpl::localeDisplayName(const Locale& loc, } while (FALSE); } if (resultName.isBogus() || resultName.isEmpty()) { - localeIdName(lang, resultName); + localeIdName(lang, resultName, substitute == UDISPCTX_SUBSTITUTE); + if (resultName.isBogus()) { + result.setToBogus(); + return result; + } } UnicodeString resultRemainder; @@ -617,13 +629,28 @@ LocaleDisplayNamesImpl::localeDisplayName(const Locale& loc, UErrorCode status = U_ZERO_ERROR; if (hasScript) { - resultRemainder.append(scriptDisplayName(script, temp, TRUE)); + UnicodeString script_str = scriptDisplayName(script, temp, TRUE); + if (script_str.isBogus()) { + result.setToBogus(); + return result; + } + resultRemainder.append(script_str); } if (hasCountry) { - appendWithSep(resultRemainder, regionDisplayName(country, temp, TRUE)); + UnicodeString region_str = regionDisplayName(country, temp, TRUE); + if (region_str.isBogus()) { + result.setToBogus(); + return result; + } + appendWithSep(resultRemainder, region_str); } if (hasVariant) { - appendWithSep(resultRemainder, variantDisplayName(variant, temp, TRUE)); + UnicodeString variant_str = variantDisplayName(variant, temp, TRUE); + if (variant_str.isBogus()) { + result.setToBogus(); + return result; + } + appendWithSep(resultRemainder, variant_str); } resultRemainder.findAndReplace(formatOpenParen, formatReplaceOpenParen); resultRemainder.findAndReplace(formatCloseParen, formatReplaceCloseParen); @@ -689,14 +716,18 @@ LocaleDisplayNamesImpl::localeDisplayName(const char* localeId, // private UnicodeString& LocaleDisplayNamesImpl::localeIdName(const char* localeId, - UnicodeString& result) const { + UnicodeString& result, bool substitute) const { if (nameLength == UDISPCTX_LENGTH_SHORT) { langData.getNoFallback("Languages%short", localeId, result); if (!result.isBogus()) { return result; } } - return langData.getNoFallback("Languages", localeId, result); + if (substitute) { + return langData.get("Languages", localeId, result); + } else { + return langData.getNoFallback("Languages", localeId, result); + } } UnicodeString& @@ -706,12 +737,16 @@ LocaleDisplayNamesImpl::languageDisplayName(const char* lang, return result = UnicodeString(lang, -1, US_INV); } if (nameLength == UDISPCTX_LENGTH_SHORT) { - langData.get("Languages%short", lang, result); + langData.getNoFallback("Languages%short", lang, result); if (!result.isBogus()) { return adjustForUsageAndContext(kCapContextUsageLanguage, result); } } - langData.get("Languages", lang, result); + if (substitute == UDISPCTX_SUBSTITUTE) { + langData.get("Languages", lang, result); + } else { + langData.getNoFallback("Languages", lang, result); + } return adjustForUsageAndContext(kCapContextUsageLanguage, result); } @@ -720,12 +755,16 @@ LocaleDisplayNamesImpl::scriptDisplayName(const char* script, UnicodeString& result, UBool skipAdjust) const { if (nameLength == UDISPCTX_LENGTH_SHORT) { - langData.get("Scripts%short", script, result); + langData.getNoFallback("Scripts%short", script, result); if (!result.isBogus()) { return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageScript, result); } } - langData.get("Scripts", script, result); + if (substitute == UDISPCTX_SUBSTITUTE) { + langData.get("Scripts", script, result); + } else { + langData.getNoFallback("Scripts", script, result); + } return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageScript, result); } @@ -746,12 +785,16 @@ LocaleDisplayNamesImpl::regionDisplayName(const char* region, UnicodeString& result, UBool skipAdjust) const { if (nameLength == UDISPCTX_LENGTH_SHORT) { - regionData.get("Countries%short", region, result); + regionData.getNoFallback("Countries%short", region, result); if (!result.isBogus()) { return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageTerritory, result); } } - regionData.get("Countries", region, result); + if (substitute == UDISPCTX_SUBSTITUTE) { + regionData.get("Countries", region, result); + } else { + regionData.getNoFallback("Countries", region, result); + } return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageTerritory, result); } @@ -767,7 +810,11 @@ LocaleDisplayNamesImpl::variantDisplayName(const char* variant, UnicodeString& result, UBool skipAdjust) const { // don't have a resource for short variant names - langData.get("Variants", variant, result); + if (substitute == UDISPCTX_SUBSTITUTE) { + langData.get("Variants", variant, result); + } else { + langData.getNoFallback("Variants", variant, result); + } return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageVariant, result); } @@ -782,7 +829,11 @@ LocaleDisplayNamesImpl::keyDisplayName(const char* key, UnicodeString& result, UBool skipAdjust) const { // don't have a resource for short key names - langData.get("Keys", key, result); + if (substitute == UDISPCTX_SUBSTITUTE) { + langData.get("Keys", key, result); + } else { + langData.getNoFallback("Keys", key, result); + } return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageKey, result); } @@ -802,9 +853,8 @@ LocaleDisplayNamesImpl::keyValueDisplayName(const char* key, UErrorCode sts = U_ZERO_ERROR; UnicodeString ustrValue(value, -1, US_INV); int32_t len; - UBool isChoice = FALSE; const UChar *currencyName = ucurr_getName(ustrValue.getTerminatedBuffer(), - locale.getBaseName(), UCURR_LONG_NAME, &isChoice, &len, &sts); + locale.getBaseName(), UCURR_LONG_NAME, nullptr /* isChoiceFormat */, &len, &sts); if (U_FAILURE(sts)) { // Return the value as is on failure result = ustrValue; @@ -815,12 +865,16 @@ LocaleDisplayNamesImpl::keyValueDisplayName(const char* key, } if (nameLength == UDISPCTX_LENGTH_SHORT) { - langData.get("Types%short", key, value, result); + langData.getNoFallback("Types%short", key, value, result); if (!result.isBogus()) { return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageKeyValue, result); } } - langData.get("Types", key, value, result); + if (substitute == UDISPCTX_SUBSTITUTE) { + langData.get("Types", key, value, result); + } else { + langData.getNoFallback("Types", key, value, result); + } return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageKeyValue, result); } diff --git a/deps/icu-small/source/common/locid.cpp b/deps/icu-small/source/common/locid.cpp index 06986b636a..c6d3f88fc3 100644 --- a/deps/icu-small/source/common/locid.cpp +++ b/deps/icu-small/source/common/locid.cpp @@ -38,19 +38,19 @@ #include "unicode/strenum.h" #include "unicode/stringpiece.h" #include "unicode/uloc.h" -#include "putilimp.h" -#include "mutex.h" -#include "umutex.h" -#include "uassert.h" + +#include "bytesinkutil.h" +#include "charstr.h" #include "cmemory.h" #include "cstring.h" +#include "mutex.h" +#include "putilimp.h" #include "uassert.h" +#include "ucln_cmn.h" #include "uhash.h" #include "ulocimp.h" -#include "ucln_cmn.h" +#include "umutex.h" #include "ustr_imp.h" -#include "charstr.h" -#include "bytesinkutil.h" U_CDECL_BEGIN static UBool U_CALLCONV locale_cleanup(void); @@ -62,10 +62,7 @@ static Locale *gLocaleCache = NULL; static UInitOnce gLocaleCacheInitOnce = U_INITONCE_INITIALIZER; // gDefaultLocaleMutex protects all access to gDefaultLocalesHashT and gDefaultLocale. -static UMutex *gDefaultLocaleMutex() { - static UMutex m = U_MUTEX_INITIALIZER; - return &m; -} +static UMutex gDefaultLocaleMutex; static UHashtable *gDefaultLocalesHashT = NULL; static Locale *gDefaultLocale = NULL; @@ -174,7 +171,7 @@ U_NAMESPACE_BEGIN Locale *locale_set_default_internal(const char *id, UErrorCode& status) { // Synchronize this entire function. - Mutex lock(gDefaultLocaleMutex()); + Mutex lock(&gDefaultLocaleMutex); UBool canonicalize = FALSE; @@ -711,7 +708,7 @@ const Locale& U_EXPORT2 Locale::getDefault() { { - Mutex lock(gDefaultLocaleMutex()); + Mutex lock(&gDefaultLocaleMutex); if (gDefaultLocale != NULL) { return *gDefaultLocale; } @@ -1399,5 +1396,7 @@ Locale::getBaseName() const { return baseName; } +Locale::Iterator::~Iterator() = default; + //eof U_NAMESPACE_END diff --git a/deps/icu-small/source/common/loclikely.cpp b/deps/icu-small/source/common/loclikely.cpp index 50cc2a65de..3b71708e54 100644 --- a/deps/icu-small/source/common/loclikely.cpp +++ b/deps/icu-small/source/common/loclikely.cpp @@ -807,24 +807,24 @@ error: return FALSE; } -#define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \ - { int32_t count = 0; \ - int32_t i; \ - for (i = 0; i < trailingLength; i++) { \ - if (trailing[i] == '-' || trailing[i] == '_') { \ - count = 0; \ - if (count > 8) { \ - goto error; \ - } \ - } else if (trailing[i] == '@') { \ - break; \ - } else if (count > 8) { \ +#define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) UPRV_BLOCK_MACRO_BEGIN { \ + int32_t count = 0; \ + int32_t i; \ + for (i = 0; i < trailingLength; i++) { \ + if (trailing[i] == '-' || trailing[i] == '_') { \ + count = 0; \ + if (count > 8) { \ goto error; \ - } else { \ - count++; \ } \ + } else if (trailing[i] == '@') { \ + break; \ + } else if (count > 8) { \ + goto error; \ + } else { \ + count++; \ } \ - } + } \ +} UPRV_BLOCK_MACRO_END static void _uloc_addLikelySubtags(const char* localeID, diff --git a/deps/icu-small/source/common/loclikelysubtags.cpp b/deps/icu-small/source/common/loclikelysubtags.cpp new file mode 100644 index 0000000000..d7f5e124c2 --- /dev/null +++ b/deps/icu-small/source/common/loclikelysubtags.cpp @@ -0,0 +1,638 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License + +// loclikelysubtags.cpp +// created: 2019may08 Markus W. Scherer + +#include <utility> +#include "unicode/utypes.h" +#include "unicode/bytestrie.h" +#include "unicode/localpointer.h" +#include "unicode/locid.h" +#include "unicode/uobject.h" +#include "unicode/ures.h" +#include "charstr.h" +#include "cstring.h" +#include "loclikelysubtags.h" +#include "lsr.h" +#include "uassert.h" +#include "ucln_cmn.h" +#include "uhash.h" +#include "uinvchar.h" +#include "umutex.h" +#include "uresdata.h" +#include "uresimp.h" + +U_NAMESPACE_BEGIN + +namespace { + +constexpr char PSEUDO_ACCENTS_PREFIX = '\''; // -XA, -PSACCENT +constexpr char PSEUDO_BIDI_PREFIX = '+'; // -XB, -PSBIDI +constexpr char PSEUDO_CRACKED_PREFIX = ','; // -XC, -PSCRACK + +/** + * Stores NUL-terminated strings with duplicate elimination. + * Checks for unique UTF-16 string pointers and converts to invariant characters. + */ +class UniqueCharStrings { +public: + UniqueCharStrings(UErrorCode &errorCode) : strings(nullptr) { + uhash_init(&map, uhash_hashUChars, uhash_compareUChars, uhash_compareLong, &errorCode); + if (U_FAILURE(errorCode)) { return; } + strings = new CharString(); + if (strings == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + } + ~UniqueCharStrings() { + uhash_close(&map); + delete strings; + } + + /** Returns/orphans the CharString that contains all strings. */ + CharString *orphanCharStrings() { + CharString *result = strings; + strings = nullptr; + return result; + } + + /** Adds a string and returns a unique number for it. */ + int32_t add(const UnicodeString &s, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return 0; } + if (isFrozen) { + errorCode = U_NO_WRITE_PERMISSION; + return 0; + } + // The string points into the resource bundle. + const char16_t *p = s.getBuffer(); + int32_t oldIndex = uhash_geti(&map, p); + if (oldIndex != 0) { // found duplicate + return oldIndex; + } + // Explicit NUL terminator for the previous string. + // The strings object is also terminated with one implicit NUL. + strings->append(0, errorCode); + int32_t newIndex = strings->length(); + strings->appendInvariantChars(s, errorCode); + uhash_puti(&map, const_cast<char16_t *>(p), newIndex, &errorCode); + return newIndex; + } + + void freeze() { isFrozen = true; } + + /** + * Returns a string pointer for its unique number, if this object is frozen. + * Otherwise nullptr. + */ + const char *get(int32_t i) const { + U_ASSERT(isFrozen); + return isFrozen && i > 0 ? strings->data() + i : nullptr; + } + +private: + UHashtable map; + CharString *strings; + bool isFrozen = false; +}; + +} // namespace + +LocaleDistanceData::LocaleDistanceData(LocaleDistanceData &&data) : + distanceTrieBytes(data.distanceTrieBytes), + regionToPartitions(data.regionToPartitions), + partitions(data.partitions), + paradigms(data.paradigms), paradigmsLength(data.paradigmsLength), + distances(data.distances) { + data.partitions = nullptr; + data.paradigms = nullptr; +} + +LocaleDistanceData::~LocaleDistanceData() { + uprv_free(partitions); + delete[] paradigms; +} + +// TODO(ICU-20777): Rename to just LikelySubtagsData. +struct XLikelySubtagsData { + UResourceBundle *langInfoBundle = nullptr; + UniqueCharStrings strings; + CharStringMap languageAliases; + CharStringMap regionAliases; + const uint8_t *trieBytes = nullptr; + LSR *lsrs = nullptr; + int32_t lsrsLength = 0; + + LocaleDistanceData distanceData; + + XLikelySubtagsData(UErrorCode &errorCode) : strings(errorCode) {} + + ~XLikelySubtagsData() { + ures_close(langInfoBundle); + delete[] lsrs; + } + + void load(UErrorCode &errorCode) { + langInfoBundle = ures_openDirect(nullptr, "langInfo", &errorCode); + if (U_FAILURE(errorCode)) { return; } + StackUResourceBundle stackTempBundle; + ResourceDataValue value; + ures_getValueWithFallback(langInfoBundle, "likely", stackTempBundle.getAlias(), + value, errorCode); + ResourceTable likelyTable = value.getTable(errorCode); + if (U_FAILURE(errorCode)) { return; } + + // Read all strings in the resource bundle and convert them to invariant char *. + LocalMemory<int32_t> languageIndexes, regionIndexes, lsrSubtagIndexes; + int32_t languagesLength = 0, regionsLength = 0, lsrSubtagsLength = 0; + if (!readStrings(likelyTable, "languageAliases", value, + languageIndexes, languagesLength, errorCode) || + !readStrings(likelyTable, "regionAliases", value, + regionIndexes, regionsLength, errorCode) || + !readStrings(likelyTable, "lsrs", value, + lsrSubtagIndexes,lsrSubtagsLength, errorCode)) { + return; + } + if ((languagesLength & 1) != 0 || + (regionsLength & 1) != 0 || + (lsrSubtagsLength % 3) != 0) { + errorCode = U_INVALID_FORMAT_ERROR; + return; + } + if (lsrSubtagsLength == 0) { + errorCode = U_MISSING_RESOURCE_ERROR; + return; + } + + if (!likelyTable.findValue("trie", value)) { + errorCode = U_MISSING_RESOURCE_ERROR; + return; + } + int32_t length; + trieBytes = value.getBinary(length, errorCode); + if (U_FAILURE(errorCode)) { return; } + + // Also read distance/matcher data if available, + // to open & keep only one resource bundle pointer + // and to use one single UniqueCharStrings. + UErrorCode matchErrorCode = U_ZERO_ERROR; + ures_getValueWithFallback(langInfoBundle, "match", stackTempBundle.getAlias(), + value, matchErrorCode); + LocalMemory<int32_t> partitionIndexes, paradigmSubtagIndexes; + int32_t partitionsLength = 0, paradigmSubtagsLength = 0; + if (U_SUCCESS(matchErrorCode)) { + ResourceTable matchTable = value.getTable(errorCode); + if (U_FAILURE(errorCode)) { return; } + + if (matchTable.findValue("trie", value)) { + distanceData.distanceTrieBytes = value.getBinary(length, errorCode); + if (U_FAILURE(errorCode)) { return; } + } + + if (matchTable.findValue("regionToPartitions", value)) { + distanceData.regionToPartitions = value.getBinary(length, errorCode); + if (U_FAILURE(errorCode)) { return; } + if (length < LSR::REGION_INDEX_LIMIT) { + errorCode = U_INVALID_FORMAT_ERROR; + return; + } + } + + if (!readStrings(matchTable, "partitions", value, + partitionIndexes, partitionsLength, errorCode) || + !readStrings(matchTable, "paradigms", value, + paradigmSubtagIndexes, paradigmSubtagsLength, errorCode)) { + return; + } + if ((paradigmSubtagsLength % 3) != 0) { + errorCode = U_INVALID_FORMAT_ERROR; + return; + } + + if (matchTable.findValue("distances", value)) { + distanceData.distances = value.getIntVector(length, errorCode); + if (U_FAILURE(errorCode)) { return; } + if (length < 4) { // LocaleDistance IX_LIMIT + errorCode = U_INVALID_FORMAT_ERROR; + return; + } + } + } else if (matchErrorCode == U_MISSING_RESOURCE_ERROR) { + // ok for likely subtags + } else { // error other than missing resource + errorCode = matchErrorCode; + return; + } + + // Fetch & store invariant-character versions of strings + // only after we have collected and de-duplicated all of them. + strings.freeze(); + + languageAliases = CharStringMap(languagesLength / 2, errorCode); + for (int32_t i = 0; i < languagesLength; i += 2) { + languageAliases.put(strings.get(languageIndexes[i]), + strings.get(languageIndexes[i + 1]), errorCode); + } + + regionAliases = CharStringMap(regionsLength / 2, errorCode); + for (int32_t i = 0; i < regionsLength; i += 2) { + regionAliases.put(strings.get(regionIndexes[i]), + strings.get(regionIndexes[i + 1]), errorCode); + } + if (U_FAILURE(errorCode)) { return; } + + lsrsLength = lsrSubtagsLength / 3; + lsrs = new LSR[lsrsLength]; + if (lsrs == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + for (int32_t i = 0, j = 0; i < lsrSubtagsLength; i += 3, ++j) { + lsrs[j] = LSR(strings.get(lsrSubtagIndexes[i]), + strings.get(lsrSubtagIndexes[i + 1]), + strings.get(lsrSubtagIndexes[i + 2])); + } + + if (partitionsLength > 0) { + distanceData.partitions = static_cast<const char **>( + uprv_malloc(partitionsLength * sizeof(const char *))); + if (distanceData.partitions == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + for (int32_t i = 0; i < partitionsLength; ++i) { + distanceData.partitions[i] = strings.get(partitionIndexes[i]); + } + } + + if (paradigmSubtagsLength > 0) { + distanceData.paradigmsLength = paradigmSubtagsLength / 3; + LSR *paradigms = new LSR[distanceData.paradigmsLength]; + if (paradigms == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + for (int32_t i = 0, j = 0; i < paradigmSubtagsLength; i += 3, ++j) { + paradigms[j] = LSR(strings.get(paradigmSubtagIndexes[i]), + strings.get(paradigmSubtagIndexes[i + 1]), + strings.get(paradigmSubtagIndexes[i + 2])); + } + distanceData.paradigms = paradigms; + } + } + +private: + bool readStrings(const ResourceTable &table, const char *key, ResourceValue &value, + LocalMemory<int32_t> &indexes, int32_t &length, UErrorCode &errorCode) { + if (table.findValue(key, value)) { + ResourceArray stringArray = value.getArray(errorCode); + if (U_FAILURE(errorCode)) { return false; } + length = stringArray.getSize(); + if (length == 0) { return true; } + int32_t *rawIndexes = indexes.allocateInsteadAndCopy(length); + if (rawIndexes == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return false; + } + for (int i = 0; i < length; ++i) { + stringArray.getValue(i, value); // returns TRUE because i < length + rawIndexes[i] = strings.add(value.getUnicodeString(errorCode), errorCode); + if (U_FAILURE(errorCode)) { return false; } + } + } + return true; + } +}; + +namespace { + +XLikelySubtags *gLikelySubtags = nullptr; +UInitOnce gInitOnce = U_INITONCE_INITIALIZER; + +UBool U_CALLCONV cleanup() { + delete gLikelySubtags; + gLikelySubtags = nullptr; + gInitOnce.reset(); + return TRUE; +} + +} // namespace + +void U_CALLCONV XLikelySubtags::initLikelySubtags(UErrorCode &errorCode) { + // This function is invoked only via umtx_initOnce(). + U_ASSERT(gLikelySubtags == nullptr); + XLikelySubtagsData data(errorCode); + data.load(errorCode); + if (U_FAILURE(errorCode)) { return; } + gLikelySubtags = new XLikelySubtags(data); + if (gLikelySubtags == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + ucln_common_registerCleanup(UCLN_COMMON_LIKELY_SUBTAGS, cleanup); +} + +const XLikelySubtags *XLikelySubtags::getSingleton(UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return nullptr; } + umtx_initOnce(gInitOnce, &XLikelySubtags::initLikelySubtags, errorCode); + return gLikelySubtags; +} + +XLikelySubtags::XLikelySubtags(XLikelySubtagsData &data) : + langInfoBundle(data.langInfoBundle), + strings(data.strings.orphanCharStrings()), + languageAliases(std::move(data.languageAliases)), + regionAliases(std::move(data.regionAliases)), + trie(data.trieBytes), + lsrs(data.lsrs), +#if U_DEBUG + lsrsLength(data.lsrsLength), +#endif + distanceData(std::move(data.distanceData)) { + data.langInfoBundle = nullptr; + data.lsrs = nullptr; + + // Cache the result of looking up language="und" encoded as "*", and "und-Zzzz" ("**"). + UStringTrieResult result = trie.next(u'*'); + U_ASSERT(USTRINGTRIE_HAS_NEXT(result)); + trieUndState = trie.getState64(); + result = trie.next(u'*'); + U_ASSERT(USTRINGTRIE_HAS_NEXT(result)); + trieUndZzzzState = trie.getState64(); + result = trie.next(u'*'); + U_ASSERT(USTRINGTRIE_HAS_VALUE(result)); + defaultLsrIndex = trie.getValue(); + trie.reset(); + + for (char16_t c = u'a'; c <= u'z'; ++c) { + result = trie.next(c); + if (result == USTRINGTRIE_NO_VALUE) { + trieFirstLetterStates[c - u'a'] = trie.getState64(); + } + trie.reset(); + } +} + +XLikelySubtags::~XLikelySubtags() { + ures_close(langInfoBundle); + delete strings; + delete[] lsrs; +} + +LSR XLikelySubtags::makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const { + const char *name = locale.getName(); + if (uprv_isAtSign(name[0]) && name[1] == 'x' && name[2] == '=') { // name.startsWith("@x=") + // Private use language tag x-subtag-subtag... + return LSR(name, "", ""); + } + return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(), + locale.getVariant(), errorCode); +} + +namespace { + +const char *getCanonical(const CharStringMap &aliases, const char *alias) { + const char *canonical = aliases.get(alias); + return canonical == nullptr ? alias : canonical; +} + +} // namespace + +LSR XLikelySubtags::makeMaximizedLsr(const char *language, const char *script, const char *region, + const char *variant, UErrorCode &errorCode) const { + // Handle pseudolocales like en-XA, ar-XB, fr-PSCRACK. + // They should match only themselves, + // not other locales with what looks like the same language and script subtags. + char c1; + if (region[0] == 'X' && (c1 = region[1]) != 0 && region[2] == 0) { + switch (c1) { + case 'A': + return LSR(PSEUDO_ACCENTS_PREFIX, language, script, region, errorCode); + case 'B': + return LSR(PSEUDO_BIDI_PREFIX, language, script, region, errorCode); + case 'C': + return LSR(PSEUDO_CRACKED_PREFIX, language, script, region, errorCode); + default: // normal locale + break; + } + } + + if (variant[0] == 'P' && variant[1] == 'S') { + if (uprv_strcmp(variant, "PSACCENT") == 0) { + return LSR(PSEUDO_ACCENTS_PREFIX, language, script, + *region == 0 ? "XA" : region, errorCode); + } else if (uprv_strcmp(variant, "PSBIDI") == 0) { + return LSR(PSEUDO_BIDI_PREFIX, language, script, + *region == 0 ? "XB" : region, errorCode); + } else if (uprv_strcmp(variant, "PSCRACK") == 0) { + return LSR(PSEUDO_CRACKED_PREFIX, language, script, + *region == 0 ? "XC" : region, errorCode); + } + // else normal locale + } + + language = getCanonical(languageAliases, language); + // (We have no script mappings.) + region = getCanonical(regionAliases, region); + return maximize(language, script, region); +} + +LSR XLikelySubtags::maximize(const char *language, const char *script, const char *region) const { + if (uprv_strcmp(language, "und") == 0) { + language = ""; + } + if (uprv_strcmp(script, "Zzzz") == 0) { + script = ""; + } + if (uprv_strcmp(region, "ZZ") == 0) { + region = ""; + } + if (*script != 0 && *region != 0 && *language != 0) { + return LSR(language, script, region); // already maximized + } + + uint32_t retainOldMask = 0; + BytesTrie iter(trie); + uint64_t state; + int32_t value; + // Small optimization: Array lookup for first language letter. + int32_t c0; + if (0 <= (c0 = uprv_lowerOrdinal(language[0])) && c0 <= 25 && + language[1] != 0 && // language.length() >= 2 + (state = trieFirstLetterStates[c0]) != 0) { + value = trieNext(iter.resetToState64(state), language, 1); + } else { + value = trieNext(iter, language, 0); + } + if (value >= 0) { + if (*language != 0) { + retainOldMask |= 4; + } + state = iter.getState64(); + } else { + retainOldMask |= 4; + iter.resetToState64(trieUndState); // "und" ("*") + state = 0; + } + + if (value > 0) { + // Intermediate or final value from just language. + if (value == SKIP_SCRIPT) { + value = 0; + } + if (*script != 0) { + retainOldMask |= 2; + } + } else { + value = trieNext(iter, script, 0); + if (value >= 0) { + if (*script != 0) { + retainOldMask |= 2; + } + state = iter.getState64(); + } else { + retainOldMask |= 2; + if (state == 0) { + iter.resetToState64(trieUndZzzzState); // "und-Zzzz" ("**") + } else { + iter.resetToState64(state); + value = trieNext(iter, "", 0); + U_ASSERT(value >= 0); + state = iter.getState64(); + } + } + } + + if (value > 0) { + // Final value from just language or language+script. + if (*region != 0) { + retainOldMask |= 1; + } + } else { + value = trieNext(iter, region, 0); + if (value >= 0) { + if (*region != 0) { + retainOldMask |= 1; + } + } else { + retainOldMask |= 1; + if (state == 0) { + value = defaultLsrIndex; + } else { + iter.resetToState64(state); + value = trieNext(iter, "", 0); + U_ASSERT(value > 0); + } + } + } + U_ASSERT(value < lsrsLength); + const LSR &result = lsrs[value]; + + if (*language == 0) { + language = "und"; + } + + if (retainOldMask == 0) { + // Quickly return a copy of the lookup-result LSR + // without new allocation of the subtags. + return LSR(result.language, result.script, result.region); + } + if ((retainOldMask & 4) == 0) { + language = result.language; + } + if ((retainOldMask & 2) == 0) { + script = result.script; + } + if ((retainOldMask & 1) == 0) { + region = result.region; + } + return LSR(language, script, region); +} + +int32_t XLikelySubtags::trieNext(BytesTrie &iter, const char *s, int32_t i) { + UStringTrieResult result; + uint8_t c; + if ((c = s[i]) == 0) { + result = iter.next(u'*'); + } else { + for (;;) { + c = uprv_invCharToAscii(c); + // EBCDIC: If s[i] is not an invariant character, + // then c is now 0 and will simply not match anything, which is harmless. + uint8_t next = s[++i]; + if (next != 0) { + if (!USTRINGTRIE_HAS_NEXT(iter.next(c))) { + return -1; + } + } else { + // last character of this subtag + result = iter.next(c | 0x80); + break; + } + c = next; + } + } + switch (result) { + case USTRINGTRIE_NO_MATCH: return -1; + case USTRINGTRIE_NO_VALUE: return 0; + case USTRINGTRIE_INTERMEDIATE_VALUE: + U_ASSERT(iter.getValue() == SKIP_SCRIPT); + return SKIP_SCRIPT; + case USTRINGTRIE_FINAL_VALUE: return iter.getValue(); + default: return -1; + } +} + +// TODO(ICU-20777): Switch Locale/uloc_ likely-subtags API from the old code +// in loclikely.cpp to this new code, including activating this +// minimizeSubtags() function. The LocaleMatcher does not minimize. +#if 0 +LSR XLikelySubtags::minimizeSubtags(const char *languageIn, const char *scriptIn, + const char *regionIn, ULocale.Minimize fieldToFavor, + UErrorCode &errorCode) const { + LSR result = maximize(languageIn, scriptIn, regionIn); + + // We could try just a series of checks, like: + // LSR result2 = addLikelySubtags(languageIn, "", ""); + // if result.equals(result2) return result2; + // However, we can optimize 2 of the cases: + // (languageIn, "", "") + // (languageIn, "", regionIn) + + // value00 = lookup(result.language, "", "") + BytesTrie iter = new BytesTrie(trie); + int value = trieNext(iter, result.language, 0); + U_ASSERT(value >= 0); + if (value == 0) { + value = trieNext(iter, "", 0); + U_ASSERT(value >= 0); + if (value == 0) { + value = trieNext(iter, "", 0); + } + } + U_ASSERT(value > 0); + LSR value00 = lsrs[value]; + boolean favorRegionOk = false; + if (result.script.equals(value00.script)) { //script is default + if (result.region.equals(value00.region)) { + return new LSR(result.language, "", ""); + } else if (fieldToFavor == ULocale.Minimize.FAVOR_REGION) { + return new LSR(result.language, "", result.region); + } else { + favorRegionOk = true; + } + } + + // The last case is not as easy to optimize. + // Maybe do later, but for now use the straightforward code. + LSR result2 = maximize(languageIn, scriptIn, ""); + if (result2.equals(result)) { + return new LSR(result.language, result.script, ""); + } else if (favorRegionOk) { + return new LSR(result.language, "", result.region); + } + return result; +} +#endif + +U_NAMESPACE_END diff --git a/deps/icu-small/source/common/loclikelysubtags.h b/deps/icu-small/source/common/loclikelysubtags.h new file mode 100644 index 0000000000..8c8a08ac5e --- /dev/null +++ b/deps/icu-small/source/common/loclikelysubtags.h @@ -0,0 +1,143 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License + +// loclikelysubtags.h +// created: 2019may08 Markus W. Scherer + +#ifndef __LOCLIKELYSUBTAGS_H__ +#define __LOCLIKELYSUBTAGS_H__ + +#include <utility> +#include "unicode/utypes.h" +#include "unicode/bytestrie.h" +#include "unicode/locid.h" +#include "unicode/uobject.h" +#include "unicode/ures.h" +#include "lsr.h" +#include "uhash.h" + +U_NAMESPACE_BEGIN + +struct XLikelySubtagsData; + +/** + * Map of const char * keys & values. + * Stores pointers as is: Does not own/copy/adopt/release strings. + */ +class CharStringMap final : public UMemory { +public: + /** Constructs an unusable non-map. */ + CharStringMap() : map(nullptr) {} + CharStringMap(int32_t size, UErrorCode &errorCode) { + map = uhash_openSize(uhash_hashChars, uhash_compareChars, uhash_compareChars, + size, &errorCode); + } + CharStringMap(CharStringMap &&other) U_NOEXCEPT : map(other.map) { + other.map = nullptr; + } + CharStringMap(const CharStringMap &other) = delete; + ~CharStringMap() { + uhash_close(map); + } + + CharStringMap &operator=(CharStringMap &&other) U_NOEXCEPT { + map = other.map; + other.map = nullptr; + return *this; + } + CharStringMap &operator=(const CharStringMap &other) = delete; + + const char *get(const char *key) const { return static_cast<const char *>(uhash_get(map, key)); } + void put(const char *key, const char *value, UErrorCode &errorCode) { + uhash_put(map, const_cast<char *>(key), const_cast<char *>(value), &errorCode); + } + +private: + UHashtable *map; +}; + +struct LocaleDistanceData { + LocaleDistanceData() = default; + LocaleDistanceData(LocaleDistanceData &&data); + ~LocaleDistanceData(); + + const uint8_t *distanceTrieBytes = nullptr; + const uint8_t *regionToPartitions = nullptr; + const char **partitions = nullptr; + const LSR *paradigms = nullptr; + int32_t paradigmsLength = 0; + const int32_t *distances = nullptr; + +private: + LocaleDistanceData &operator=(const LocaleDistanceData &) = delete; +}; + +// TODO(ICU-20777): Rename to just LikelySubtags. +class XLikelySubtags final : public UMemory { +public: + ~XLikelySubtags(); + + static constexpr int32_t SKIP_SCRIPT = 1; + + // VisibleForTesting + static const XLikelySubtags *getSingleton(UErrorCode &errorCode); + + // VisibleForTesting + LSR makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const; + + // TODO(ICU-20777): Switch Locale/uloc_ likely-subtags API from the old code + // in loclikely.cpp to this new code, including activating this + // minimizeSubtags() function. The LocaleMatcher does not minimize. +#if 0 + LSR minimizeSubtags(const char *languageIn, const char *scriptIn, const char *regionIn, + ULocale.Minimize fieldToFavor, UErrorCode &errorCode) const; +#endif + + // visible for LocaleDistance + const LocaleDistanceData &getDistanceData() const { return distanceData; } + +private: + XLikelySubtags(XLikelySubtagsData &data); + XLikelySubtags(const XLikelySubtags &other) = delete; + XLikelySubtags &operator=(const XLikelySubtags &other) = delete; + + static void initLikelySubtags(UErrorCode &errorCode); + + LSR makeMaximizedLsr(const char *language, const char *script, const char *region, + const char *variant, UErrorCode &errorCode) const; + + /** + * Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN". + */ + LSR maximize(const char *language, const char *script, const char *region) const; + + static int32_t trieNext(BytesTrie &iter, const char *s, int32_t i); + + UResourceBundle *langInfoBundle; + // We could store the strings by value, except that if there were few enough strings, + // moving the contents could copy it to a different array, + // invalidating the pointers stored in the maps. + CharString *strings; + CharStringMap languageAliases; + CharStringMap regionAliases; + + // The trie maps each lang+script+region (encoded in ASCII) to an index into lsrs. + // There is also a trie value for each intermediate lang and lang+script. + // '*' is used instead of "und", "Zzzz"/"" and "ZZ"/"". + BytesTrie trie; + uint64_t trieUndState; + uint64_t trieUndZzzzState; + int32_t defaultLsrIndex; + uint64_t trieFirstLetterStates[26]; + const LSR *lsrs; +#if U_DEBUG + int32_t lsrsLength; +#endif + + // distance/matcher data: see comment in XLikelySubtagsData::load() + LocaleDistanceData distanceData; +}; + +U_NAMESPACE_END + +#endif // __LOCLIKELYSUBTAGS_H__ diff --git a/deps/icu-small/source/common/lsr.cpp b/deps/icu-small/source/common/lsr.cpp new file mode 100644 index 0000000000..0c28eeda1b --- /dev/null +++ b/deps/icu-small/source/common/lsr.cpp @@ -0,0 +1,101 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License + +// lsr.cpp +// created: 2019may08 Markus W. Scherer + +#include "unicode/utypes.h" +#include "charstr.h" +#include "cmemory.h" +#include "cstring.h" +#include "lsr.h" +#include "uinvchar.h" +#include "ustr_imp.h" + +U_NAMESPACE_BEGIN + +LSR::LSR(char prefix, const char *lang, const char *scr, const char *r, UErrorCode &errorCode) : + language(nullptr), script(nullptr), region(r), + regionIndex(indexForRegion(region)) { + if (U_SUCCESS(errorCode)) { + CharString langScript; + langScript.append(prefix, errorCode).append(lang, errorCode).append('\0', errorCode); + int32_t scriptOffset = langScript.length(); + langScript.append(prefix, errorCode).append(scr, errorCode); + owned = langScript.cloneData(errorCode); + if (U_SUCCESS(errorCode)) { + language = owned; + script = owned + scriptOffset; + } + } +} + +LSR::LSR(LSR &&other) U_NOEXCEPT : + language(other.language), script(other.script), region(other.region), owned(other.owned), + regionIndex(other.regionIndex), hashCode(other.hashCode) { + if (owned != nullptr) { + other.language = other.script = ""; + other.owned = nullptr; + other.hashCode = 0; + } +} + +void LSR::deleteOwned() { + uprv_free(owned); +} + +LSR &LSR::operator=(LSR &&other) U_NOEXCEPT { + this->~LSR(); + language = other.language; + script = other.script; + region = other.region; + regionIndex = other.regionIndex; + owned = other.owned; + hashCode = other.hashCode; + if (owned != nullptr) { + other.language = other.script = ""; + other.owned = nullptr; + other.hashCode = 0; + } + return *this; +} + +UBool LSR::operator==(const LSR &other) const { + return + uprv_strcmp(language, other.language) == 0 && + uprv_strcmp(script, other.script) == 0 && + regionIndex == other.regionIndex && + // Compare regions if both are ill-formed (and their indexes are 0). + (regionIndex > 0 || uprv_strcmp(region, other.region) == 0); +} + +int32_t LSR::indexForRegion(const char *region) { + int32_t c = region[0]; + int32_t a = c - '0'; + if (0 <= a && a <= 9) { // digits: "419" + int32_t b = region[1] - '0'; + if (b < 0 || 9 < b) { return 0; } + c = region[2] - '0'; + if (c < 0 || 9 < c || region[3] != 0) { return 0; } + return (10 * a + b) * 10 + c + 1; + } else { // letters: "DE" + a = uprv_upperOrdinal(c); + if (a < 0 || 25 < a) { return 0; } + int32_t b = uprv_upperOrdinal(region[1]); + if (b < 0 || 25 < b || region[2] != 0) { return 0; } + return 26 * a + b + 1001; + } + return 0; +} + +LSR &LSR::setHashCode() { + if (hashCode == 0) { + hashCode = + (ustr_hashCharsN(language, static_cast<int32_t>(uprv_strlen(language))) * 37 + + ustr_hashCharsN(script, static_cast<int32_t>(uprv_strlen(script)))) * 37 + + regionIndex; + } + return *this; +} + +U_NAMESPACE_END diff --git a/deps/icu-small/source/common/lsr.h b/deps/icu-small/source/common/lsr.h new file mode 100644 index 0000000000..db6cf938f4 --- /dev/null +++ b/deps/icu-small/source/common/lsr.h @@ -0,0 +1,72 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License + +// lsr.h +// created: 2019may08 Markus W. Scherer + +#ifndef __LSR_H__ +#define __LSR_H__ + +#include "unicode/utypes.h" +#include "unicode/uobject.h" +#include "cstring.h" + +U_NAMESPACE_BEGIN + +struct LSR final : public UMemory { + static constexpr int32_t REGION_INDEX_LIMIT = 1001 + 26 * 26; + + const char *language; + const char *script; + const char *region; + char *owned = nullptr; + /** Index for region, 0 if ill-formed. @see indexForRegion */ + int32_t regionIndex = 0; + /** Only set for LSRs that will be used in a hash table. */ + int32_t hashCode = 0; + + LSR() : language("und"), script(""), region("") {} + + /** Constructor which aliases all subtag pointers. */ + LSR(const char *lang, const char *scr, const char *r) : + language(lang), script(scr), region(r), + regionIndex(indexForRegion(region)) {} + /** + * Constructor which prepends the prefix to the language and script, + * copies those into owned memory, and aliases the region. + */ + LSR(char prefix, const char *lang, const char *scr, const char *r, UErrorCode &errorCode); + LSR(LSR &&other) U_NOEXCEPT; + LSR(const LSR &other) = delete; + inline ~LSR() { + // Pure inline code for almost all instances. + if (owned != nullptr) { + deleteOwned(); + } + } + + LSR &operator=(LSR &&other) U_NOEXCEPT; + LSR &operator=(const LSR &other) = delete; + + /** + * Returns a positive index (>0) for a well-formed region code. + * Do not rely on a particular region->index mapping; it may change. + * Returns 0 for ill-formed strings. + */ + static int32_t indexForRegion(const char *region); + + UBool operator==(const LSR &other) const; + + inline UBool operator!=(const LSR &other) const { + return !operator==(other); + } + + LSR &setHashCode(); + +private: + void deleteOwned(); +}; + +U_NAMESPACE_END + +#endif // __LSR_H__ diff --git a/deps/icu-small/source/common/mutex.h b/deps/icu-small/source/common/mutex.h index 5223397bbc..44b1f90ba0 100644 --- a/deps/icu-small/source/common/mutex.h +++ b/deps/icu-small/source/common/mutex.h @@ -28,50 +28,48 @@ U_NAMESPACE_BEGIN -//---------------------------------------------------------------------------- -// Code within that accesses shared static or global data should -// should instantiate a Mutex object while doing so. You should make your own -// private mutex where possible. - -// For example: -// -// UMutex myMutex = U_MUTEX_INITIALIZER; -// -// void Function(int arg1, int arg2) -// { -// static Object* foo; // Shared read-write object -// Mutex mutex(&myMutex); // or no args for the global lock -// foo->Method(); -// // When 'mutex' goes out of scope and gets destroyed here, the lock is released -// } -// -// Note: Do NOT use the form 'Mutex mutex();' as that merely forward-declares a function -// returning a Mutex. This is a common mistake which silently slips through the -// compiler!! -// +/** + * Mutex is a helper class for convenient locking and unlocking of a UMutex. + * + * Creating a local scope Mutex will lock a UMutex, holding the lock until the Mutex + * goes out of scope. + * + * If no UMutex is specified, the ICU global mutex is implied. + * + * For example: + * + * static UMutex myMutex; + * + * void Function(int arg1, int arg2) + * { + * static Object* foo; // Shared read-write object + * Mutex mutex(&myMutex); // or no args for the global lock + * foo->Method(); + * // When 'mutex' goes out of scope and gets destroyed here, the lock is released + * } + * + * Note: Do NOT use the form 'Mutex mutex();' as that merely forward-declares a function + * returning a Mutex. This is a common mistake which silently slips through the + * compiler!! + */ class U_COMMON_API Mutex : public UMemory { public: - inline Mutex(UMutex *mutex = NULL); - inline ~Mutex(); + Mutex(UMutex *mutex = nullptr) : fMutex(mutex) { + umtx_lock(fMutex); + } + ~Mutex() { + umtx_unlock(fMutex); + } -private: - UMutex *fMutex; + Mutex(const Mutex &other) = delete; // forbid assigning of this class + Mutex &operator=(const Mutex &other) = delete; // forbid copying of this class + void *operator new(size_t s) = delete; // forbid heap allocation. Locals only. - Mutex(const Mutex &other); // forbid copying of this class - Mutex &operator=(const Mutex &other); // forbid copying of this class +private: + UMutex *fMutex; }; -inline Mutex::Mutex(UMutex *mutex) - : fMutex(mutex) -{ - umtx_lock(fMutex); -} - -inline Mutex::~Mutex() -{ - umtx_unlock(fMutex); -} U_NAMESPACE_END diff --git a/deps/icu-small/source/common/normalizer2impl.h b/deps/icu-small/source/common/normalizer2impl.h index 7ecdef6d9c..cf3015ea88 100644 --- a/deps/icu-small/source/common/normalizer2impl.h +++ b/deps/icu-small/source/common/normalizer2impl.h @@ -245,9 +245,7 @@ private: */ class U_COMMON_API Normalizer2Impl : public UObject { public: - Normalizer2Impl() : normTrie(NULL), fCanonIterData(NULL) { - fCanonIterDataInitOnce.reset(); - } + Normalizer2Impl() : normTrie(NULL), fCanonIterData(NULL) { } virtual ~Normalizer2Impl(); void init(const int32_t *inIndexes, const UCPTrie *inTrie, @@ -723,7 +721,7 @@ private: const uint16_t *extraData; // mappings and/or compositions for yesYes, yesNo & noNo characters const uint8_t *smallFCD; // [0x100] one bit per 32 BMP code points, set if any FCD!=0 - UInitOnce fCanonIterDataInitOnce; + UInitOnce fCanonIterDataInitOnce = U_INITONCE_INITIALIZER; CanonIterData *fCanonIterData; }; diff --git a/deps/icu-small/source/common/putil.cpp b/deps/icu-small/source/common/putil.cpp index e105befc3f..207350f8f2 100644 --- a/deps/icu-small/source/common/putil.cpp +++ b/deps/icu-small/source/common/putil.cpp @@ -249,7 +249,7 @@ static UDate getUTCtime_real() { } static UDate getUTCtime_fake() { - static UMutex fakeClockMutex = U_MUTEX_INTIALIZER; + static UMutex fakeClockMutex; umtx_lock(&fakeClockMutex); if(!fakeClock_set) { UDate real = getUTCtime_real(); @@ -1315,11 +1315,10 @@ uprv_pathIsAbsolute(const char *path) # endif #endif -#if U_PLATFORM_HAS_WINUWP_API != 0 +#if defined(ICU_DATA_DIR_WINDOWS) // Helper function to get the ICU Data Directory under the Windows directory location. static BOOL U_CALLCONV getIcuDataDirectoryUnderWindowsDirectory(char* directoryBuffer, UINT bufferLength) { -#if defined(ICU_DATA_DIR_WINDOWS) wchar_t windowsPath[MAX_PATH]; char windowsPathUtf8[MAX_PATH]; @@ -1346,7 +1345,6 @@ static BOOL U_CALLCONV getIcuDataDirectoryUnderWindowsDirectory(char* directoryB } } } -#endif return FALSE; } @@ -1380,9 +1378,9 @@ static void U_CALLCONV dataDirectoryInitFn() { */ # if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO /* First try to get the environment variable */ -# if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP does not support getenv +# if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP does not support getenv path=getenv("ICU_DATA"); -# endif +# endif # endif /* ICU_DATA_DIR may be set as a compile option. @@ -1411,7 +1409,7 @@ static void U_CALLCONV dataDirectoryInitFn() { } #endif -#if U_PLATFORM_HAS_WINUWP_API != 0 && defined(ICU_DATA_DIR_WINDOWS) +#if defined(ICU_DATA_DIR_WINDOWS) char datadir_path_buffer[MAX_PATH]; if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) { path = datadir_path_buffer; @@ -1461,12 +1459,17 @@ static void U_CALLCONV TimeZoneDataDirInitFn(UErrorCode &status) { const char *dir = ""; -#if U_PLATFORM_HAS_WINUWP_API != 0 - // The UWP version does not support the environment variable setting, but can possibly pick them up from the Windows directory. +#if U_PLATFORM_HAS_WINUWP_API == 1 +// The UWP version does not support the environment variable setting. + +# if defined(ICU_DATA_DIR_WINDOWS) + // When using the Windows system data, we can possibly pick up time zone data from the Windows directory. char datadir_path_buffer[MAX_PATH]; if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) { dir = datadir_path_buffer; } +# endif + #else dir = getenv("ICU_TIMEZONE_FILES_DIR"); #endif // U_PLATFORM_HAS_WINUWP_API @@ -1560,6 +1563,10 @@ static const char *uprv_getPOSIXIDForCategory(int category) { /* Nothing worked. Give it a nice POSIX default value. */ posixID = "en_US_POSIX"; + // Note: this test will not catch 'C.UTF-8', + // that will be handled in uprv_getDefaultLocaleID(). + // Leave this mapping here for the uprv_getPOSIXIDForDefaultCodepage() + // caller which expects to see "en_US_POSIX" in many branches. } return posixID; } @@ -1631,8 +1638,8 @@ The leftmost codepage (.xxx) wins. } // Copy the ID into owned memory. - // Over-allocate in case we replace "@" with "__". - char *correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID) + 1 + 1)); + // Over-allocate in case we replace "C" with "en_US_POSIX" (+10), + null termination + char *correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID) + 10 + 1)); if (correctedPOSIXLocale == nullptr) { return nullptr; } @@ -1641,9 +1648,16 @@ The leftmost codepage (.xxx) wins. char *limit; if ((limit = uprv_strchr(correctedPOSIXLocale, '.')) != nullptr) { *limit = 0; - if ((limit = uprv_strchr(correctedPOSIXLocale, '@')) != nullptr) { - *limit = 0; - } + } + if ((limit = uprv_strchr(correctedPOSIXLocale, '@')) != nullptr) { + *limit = 0; + } + + if ((uprv_strcmp("C", correctedPOSIXLocale) == 0) // no @ variant + || (uprv_strcmp("POSIX", correctedPOSIXLocale) == 0)) { + // Raw input was C.* or POSIX.*, Give it a nice POSIX default value. + // (The "C"/"POSIX" case is handled in uprv_getPOSIXIDForCategory()) + uprv_strcpy(correctedPOSIXLocale, "en_US_POSIX"); } /* Note that we scan the *uncorrected* ID. */ @@ -1668,7 +1682,7 @@ The leftmost codepage (.xxx) wins. if ((q = uprv_strchr(p, '.')) != nullptr) { /* How big will the resulting string be? */ int32_t len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p)); - uprv_strncat(correctedPOSIXLocale, p, q-p); + uprv_strncat(correctedPOSIXLocale, p, q-p); // do not include charset correctedPOSIXLocale[len] = 0; } else { @@ -2053,7 +2067,7 @@ int_getDefaultCodepage() static char codepage[64]; DWORD codepageNumber = 0; -#if U_PLATFORM_HAS_WINUWP_API > 0 +#if U_PLATFORM_HAS_WINUWP_API == 1 // UWP doesn't have a direct API to get the default ACP as Microsoft would rather // have folks use Unicode than a "system" code page, however this is the same // codepage as the system default locale codepage. (FWIW, the system locale is diff --git a/deps/icu-small/source/common/putilimp.h b/deps/icu-small/source/common/putilimp.h index f9c13d8e1b..2e9fbcc483 100644 --- a/deps/icu-small/source/common/putilimp.h +++ b/deps/icu-small/source/common/putilimp.h @@ -179,76 +179,6 @@ typedef size_t uintptr_t; /** @} */ /*===========================================================================*/ -/** @{ GCC built in functions for atomic memory operations */ -/*===========================================================================*/ - -/** - * \def U_HAVE_GCC_ATOMICS - * @internal - */ -#ifdef U_HAVE_GCC_ATOMICS - /* Use the predefined value. */ -#elif U_PLATFORM == U_PF_MINGW - #define U_HAVE_GCC_ATOMICS 0 -#elif U_GCC_MAJOR_MINOR >= 404 || defined(__clang__) - /* TODO: Intel icc and IBM xlc on AIX also support gcc atomics. (Intel originated them.) - * Add them for these compilers. - * Note: Clang sets __GNUC__ defines for version 4.2, so misses the 4.4 test here. - */ -# define U_HAVE_GCC_ATOMICS 1 -#else -# define U_HAVE_GCC_ATOMICS 0 -#endif - -/** @} */ - -/** - * \def U_HAVE_STD_ATOMICS - * Defines whether to use the C++11 std::atomic functions. - * If false, ICU will fall back to compiler or platform specific alternatives. - * Note: support for these fall back options for atomics will be removed in a future version - * of ICU, and the use of C++ 11 atomics will be required. - * @internal - */ -#ifdef U_HAVE_STD_ATOMICS - /* Use the predefined value. */ -#else -# define U_HAVE_STD_ATOMICS 1 -#endif - -/** - * \def U_HAVE_CLANG_ATOMICS - * Defines whether Clang c11 style built-in atomics are available. - * These are used in preference to gcc atomics when both are available. - */ -#ifdef U_HAVE_CLANG_ATOMICS - /* Use the predefined value. */ -#elif __has_builtin(__c11_atomic_load) && \ - __has_builtin(__c11_atomic_store) && \ - __has_builtin(__c11_atomic_fetch_add) && \ - __has_builtin(__c11_atomic_fetch_sub) -# define U_HAVE_CLANG_ATOMICS 1 -#else -# define U_HAVE_CLANG_ATOMICS 0 -#endif - - -/** - * \def U_HAVE_STD_MUTEX - * Defines whether to use the C++11 std::mutex functions. - * If false, ICU will fall back to compiler or platform specific alternatives. - * std::mutex is preferred, and used by default unless this setting is overridden. - * Note: support for other options for mutexes will be removed in a future version - * of ICU, and the use of std::mutex will be required. - * @internal - */ -#ifdef U_HAVE_STD_MUTEX - /* Use the predefined value. */ -#else -# define U_HAVE_STD_MUTEX 1 -#endif - -/*===========================================================================*/ /** @{ Programs used by ICU code */ /*===========================================================================*/ diff --git a/deps/icu-small/source/common/rbbi.cpp b/deps/icu-small/source/common/rbbi.cpp index 3b116ffaf6..01dae48de4 100644 --- a/deps/icu-small/source/common/rbbi.cpp +++ b/deps/icu-small/source/common/rbbi.cpp @@ -323,8 +323,8 @@ void RuleBasedBreakIterator::init(UErrorCode &status) { // Virtual function: does the right thing with subclasses. // //----------------------------------------------------------------------------- -BreakIterator* -RuleBasedBreakIterator::clone(void) const { +RuleBasedBreakIterator* +RuleBasedBreakIterator::clone() const { return new RuleBasedBreakIterator(*this); } @@ -352,7 +352,7 @@ RuleBasedBreakIterator::operator==(const BreakIterator& that) const { // or have a different iteration position. // Note that fText's position is always the same as the break iterator's position. return FALSE; - }; + } if (!(fPosition == that2.fPosition && fRuleStatusIndex == that2.fRuleStatusIndex && @@ -1079,10 +1079,8 @@ const uint8_t *RuleBasedBreakIterator::getBinaryRules(uint32_t &length) { } -BreakIterator * RuleBasedBreakIterator::createBufferClone(void * /*stackBuffer*/, - int32_t &bufferSize, - UErrorCode &status) -{ +RuleBasedBreakIterator *RuleBasedBreakIterator::createBufferClone( + void * /*stackBuffer*/, int32_t &bufferSize, UErrorCode &status) { if (U_FAILURE(status)){ return NULL; } diff --git a/deps/icu-small/source/common/rbbi_cache.cpp b/deps/icu-small/source/common/rbbi_cache.cpp index 17ee232080..4f9e83360a 100644 --- a/deps/icu-small/source/common/rbbi_cache.cpp +++ b/deps/icu-small/source/common/rbbi_cache.cpp @@ -519,7 +519,7 @@ UBool RuleBasedBreakIterator::BreakCache::populatePreceding(UErrorCode &status) // The initial handleNext() only advanced by a single code point. Go again. position = fBI->handleNext(); // Safe rules identify safe pairs. } - }; + } positionStatusIdx = fBI->fRuleStatusIndex; } } while (position >= fromPosition); diff --git a/deps/icu-small/source/common/resbund.cpp b/deps/icu-small/source/common/resbund.cpp index 00dbf6f8fe..5ec7541b4d 100644 --- a/deps/icu-small/source/common/resbund.cpp +++ b/deps/icu-small/source/common/resbund.cpp @@ -51,6 +51,7 @@ #include "unicode/utypes.h" #include "unicode/resbund.h" +#include "cmemory.h" #include "mutex.h" #include "uassert.h" #include "umutex.h" @@ -377,7 +378,7 @@ void ResourceBundle::getVersion(UVersionInfo versionInfo) const { } const Locale &ResourceBundle::getLocale(void) const { - static UMutex gLocaleLock = U_MUTEX_INITIALIZER; + static UMutex gLocaleLock; Mutex lock(&gLocaleLock); if (fLocale != NULL) { return *fLocale; diff --git a/deps/icu-small/source/common/resource.h b/deps/icu-small/source/common/resource.h index 3dbff785ef..5199b85888 100644 --- a/deps/icu-small/source/common/resource.h +++ b/deps/icu-small/source/common/resource.h @@ -28,6 +28,7 @@ #include "unicode/utypes.h" #include "unicode/unistr.h" #include "unicode/ures.h" +#include "restrace.h" struct ResourceData; @@ -47,8 +48,10 @@ public: ResourceArray() : items16(NULL), items32(NULL), length(0) {} /** Only for implementation use. @internal */ - ResourceArray(const uint16_t *i16, const uint32_t *i32, int32_t len) : - items16(i16), items32(i32), length(len) {} + ResourceArray(const uint16_t *i16, const uint32_t *i32, int32_t len, + const ResourceTracer& traceInfo) : + items16(i16), items32(i32), length(len), + fTraceInfo(traceInfo) {} /** * @return The number of items in the array resource. @@ -68,6 +71,7 @@ private: const uint16_t *items16; const uint32_t *items32; int32_t length; + ResourceTracer fTraceInfo; }; /** @@ -80,27 +84,37 @@ public: /** Only for implementation use. @internal */ ResourceTable(const uint16_t *k16, const int32_t *k32, - const uint16_t *i16, const uint32_t *i32, int32_t len) : - keys16(k16), keys32(k32), items16(i16), items32(i32), length(len) {} + const uint16_t *i16, const uint32_t *i32, int32_t len, + const ResourceTracer& traceInfo) : + keys16(k16), keys32(k32), items16(i16), items32(i32), length(len), + fTraceInfo(traceInfo) {} /** * @return The number of items in the array resource. */ int32_t getSize() const { return length; } /** - * @param i Array item index. + * @param i Table item index. * @param key Output-only, receives the key of the i'th item. * @param value Output-only, receives the value of the i'th item. * @return TRUE if i is non-negative and less than getSize(). */ UBool getKeyAndValue(int32_t i, const char *&key, ResourceValue &value) const; + /** + * @param key Key string to find in the table. + * @param value Output-only, receives the value of the item with that key. + * @return TRUE if the table contains the key. + */ + UBool findValue(const char *key, ResourceValue &value) const; + private: const uint16_t *keys16; const int32_t *keys32; const uint16_t *items16; const uint32_t *items32; int32_t length; + ResourceTracer fTraceInfo; }; /** diff --git a/deps/icu-small/source/common/restrace.cpp b/deps/icu-small/source/common/restrace.cpp new file mode 100644 index 0000000000..5c6498850e --- /dev/null +++ b/deps/icu-small/source/common/restrace.cpp @@ -0,0 +1,130 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if U_ENABLE_TRACING + +#include "restrace.h" +#include "charstr.h" +#include "cstring.h" +#include "utracimp.h" +#include "uresimp.h" +#include "uassert.h" +#include "util.h" + +U_NAMESPACE_BEGIN + +ResourceTracer::~ResourceTracer() = default; + +void ResourceTracer::trace(const char* resType) const { + U_ASSERT(fResB || fParent); + UTRACE_ENTRY(UTRACE_UDATA_RESOURCE); + UErrorCode status = U_ZERO_ERROR; + + CharString filePath; + getFilePath(filePath, status); + + CharString resPath; + getResPath(resPath, status); + + // The longest type ("intvector") is 9 chars + const char kSpaces[] = " "; + CharString format; + format.append(kSpaces, sizeof(kSpaces) - 1 - uprv_strlen(resType), status); + format.append("(%s) %s @ %s", status); + + UTRACE_DATA3(UTRACE_VERBOSE, + format.data(), + resType, + filePath.data(), + resPath.data()); + UTRACE_EXIT_STATUS(status); +} + +void ResourceTracer::traceOpen() const { + U_ASSERT(fResB); + UTRACE_ENTRY(UTRACE_UDATA_BUNDLE); + UErrorCode status = U_ZERO_ERROR; + + CharString filePath; + UTRACE_DATA1(UTRACE_VERBOSE, "%s", getFilePath(filePath, status).data()); + UTRACE_EXIT_STATUS(status); +} + +CharString& ResourceTracer::getFilePath(CharString& output, UErrorCode& status) const { + if (fResB) { + output.append(fResB->fData->fPath, status); + output.append('/', status); + output.append(fResB->fData->fName, status); + output.append(".res", status); + } else { + fParent->getFilePath(output, status); + } + return output; +} + +CharString& ResourceTracer::getResPath(CharString& output, UErrorCode& status) const { + if (fResB) { + output.append('/', status); + output.append(fResB->fResPath, status); + // removing the trailing / + U_ASSERT(output[output.length()-1] == '/'); + output.truncate(output.length()-1); + } else { + fParent->getResPath(output, status); + } + if (fKey) { + output.append('/', status); + output.append(fKey, status); + } + if (fIndex != -1) { + output.append('[', status); + UnicodeString indexString; + ICU_Utility::appendNumber(indexString, fIndex); + output.appendInvariantChars(indexString, status); + output.append(']', status); + } + return output; +} + +void FileTracer::traceOpen(const char* path, const char* type, const char* name) { + if (uprv_strcmp(type, "res") == 0) { + traceOpenResFile(path, name); + } else { + traceOpenDataFile(path, type, name); + } +} + +void FileTracer::traceOpenDataFile(const char* path, const char* type, const char* name) { + UTRACE_ENTRY(UTRACE_UDATA_DATA_FILE); + UErrorCode status = U_ZERO_ERROR; + + CharString filePath; + filePath.append(path, status); + filePath.append('/', status); + filePath.append(name, status); + filePath.append('.', status); + filePath.append(type, status); + + UTRACE_DATA1(UTRACE_VERBOSE, "%s", filePath.data()); + UTRACE_EXIT_STATUS(status); +} + +void FileTracer::traceOpenResFile(const char* path, const char* name) { + UTRACE_ENTRY(UTRACE_UDATA_RES_FILE); + UErrorCode status = U_ZERO_ERROR; + + CharString filePath; + filePath.append(path, status); + filePath.append('/', status); + filePath.append(name, status); + filePath.append(".res", status); + + UTRACE_DATA1(UTRACE_VERBOSE, "%s", filePath.data()); + UTRACE_EXIT_STATUS(status); +} + +U_NAMESPACE_END + +#endif // U_ENABLE_TRACING diff --git a/deps/icu-small/source/common/restrace.h b/deps/icu-small/source/common/restrace.h new file mode 100644 index 0000000000..ef29eaed57 --- /dev/null +++ b/deps/icu-small/source/common/restrace.h @@ -0,0 +1,147 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef __RESTRACE_H__ +#define __RESTRACE_H__ + +#include "unicode/utypes.h" + +#if U_ENABLE_TRACING + +struct UResourceBundle; + +U_NAMESPACE_BEGIN + +class CharString; + +/** + * Instances of this class store information used to trace reads from resource + * bundles when ICU is built with --enable-tracing. + * + * All arguments of type const UResourceBundle*, const char*, and + * const ResourceTracer& are stored as pointers. The caller must retain + * ownership for the lifetime of this ResourceTracer. + * + * Exported as U_COMMON_API for Windows because it is a value field + * in other exported types. + */ +class U_COMMON_API ResourceTracer { +public: + ResourceTracer() : + fResB(nullptr), + fParent(nullptr), + fKey(nullptr), + fIndex(-1) {} + + ResourceTracer(const UResourceBundle* resB) : + fResB(resB), + fParent(nullptr), + fKey(nullptr), + fIndex(-1) {} + + ResourceTracer(const UResourceBundle* resB, const char* key) : + fResB(resB), + fParent(nullptr), + fKey(key), + fIndex(-1) {} + + ResourceTracer(const UResourceBundle* resB, int32_t index) : + fResB(resB), + fParent(nullptr), + fKey(nullptr), + fIndex(index) {} + + ResourceTracer(const ResourceTracer& parent, const char* key) : + fResB(nullptr), + fParent(&parent), + fKey(key), + fIndex(-1) {} + + ResourceTracer(const ResourceTracer& parent, int32_t index) : + fResB(nullptr), + fParent(&parent), + fKey(nullptr), + fIndex(index) {} + + ~ResourceTracer(); + + void trace(const char* type) const; + void traceOpen() const; + + /** + * Calls trace() if the resB or parent provided to the constructor was + * non-null; otherwise, does nothing. + */ + void maybeTrace(const char* type) const { + if (fResB || fParent) { + trace(type); + } + } + +private: + const UResourceBundle* fResB; + const ResourceTracer* fParent; + const char* fKey; + int32_t fIndex; + + CharString& getFilePath(CharString& output, UErrorCode& status) const; + + CharString& getResPath(CharString& output, UErrorCode& status) const; +}; + +/** + * This class provides methods to trace data file reads when ICU is built + * with --enable-tracing. + */ +class FileTracer { +public: + static void traceOpen(const char* path, const char* type, const char* name); + +private: + static void traceOpenDataFile(const char* path, const char* type, const char* name); + static void traceOpenResFile(const char* path, const char* name); +}; + +U_NAMESPACE_END + +#else // U_ENABLE_TRACING + +U_NAMESPACE_BEGIN + +/** + * Default trivial implementation when --enable-tracing is not used. + */ +class U_COMMON_API ResourceTracer { +public: + ResourceTracer() {} + + ResourceTracer(const void*) {} + + ResourceTracer(const void*, const char*) {} + + ResourceTracer(const void*, int32_t) {} + + ResourceTracer(const ResourceTracer&, const char*) {} + + ResourceTracer(const ResourceTracer&, int32_t) {} + + void trace(const char*) const {} + + void traceOpen() const {} + + void maybeTrace(const char*) const {} +}; + +/** + * Default trivial implementation when --enable-tracing is not used. + */ +class FileTracer { +public: + static void traceOpen(const char*, const char*, const char*) {} +}; + +U_NAMESPACE_END + +#endif // U_ENABLE_TRACING + +#endif //__RESTRACE_H__ diff --git a/deps/icu-small/source/common/schriter.cpp b/deps/icu-small/source/common/schriter.cpp index f852800aaa..17b68aee9d 100644 --- a/deps/icu-small/source/common/schriter.cpp +++ b/deps/icu-small/source/common/schriter.cpp @@ -101,7 +101,7 @@ StringCharacterIterator::operator==(const ForwardCharacterIterator& that) const && end == realThat.end; } -CharacterIterator* +StringCharacterIterator* StringCharacterIterator::clone() const { return new StringCharacterIterator(*this); } diff --git a/deps/icu-small/source/common/serv.cpp b/deps/icu-small/source/common/serv.cpp index 40940740d0..044864b859 100644 --- a/deps/icu-small/source/common/serv.cpp +++ b/deps/icu-small/source/common/serv.cpp @@ -333,10 +333,7 @@ U_CDECL_END ****************************************************************** */ -static UMutex *lock() { - static UMutex m = U_MUTEX_INITIALIZER; - return &m; -} +static UMutex lock; ICUService::ICUService() : name() @@ -361,7 +358,7 @@ ICUService::ICUService(const UnicodeString& newName) ICUService::~ICUService() { { - Mutex mutex(lock()); + Mutex mutex(&lock); clearCaches(); delete factories; factories = NULL; @@ -452,7 +449,7 @@ ICUService::getKey(ICUServiceKey& key, UnicodeString* actualReturn, const ICUSer // if factory is not null, we're calling from within the mutex, // and since some unix machines don't have reentrant mutexes we // need to make sure not to try to lock it again. - XMutex mutex(lock(), factory != NULL); + XMutex mutex(&lock, factory != NULL); if (serviceCache == NULL) { ncthis->serviceCache = new Hashtable(status); @@ -618,7 +615,7 @@ ICUService::getVisibleIDs(UVector& result, const UnicodeString* matchID, UErrorC } { - Mutex mutex(lock()); + Mutex mutex(&lock); const Hashtable* map = getVisibleIDMap(status); if (map != NULL) { ICUServiceKey* fallbackKey = createKey(matchID, status); @@ -695,7 +692,7 @@ ICUService::getDisplayName(const UnicodeString& id, UnicodeString& result, const { { UErrorCode status = U_ZERO_ERROR; - Mutex mutex(lock()); + Mutex mutex(&lock); const Hashtable* map = getVisibleIDMap(status); if (map != NULL) { ICUServiceFactory* f = (ICUServiceFactory*)map->get(id); @@ -747,7 +744,7 @@ ICUService::getDisplayNames(UVector& result, result.setDeleter(userv_deleteStringPair); if (U_SUCCESS(status)) { ICUService* ncthis = (ICUService*)this; // cast away semantic const - Mutex mutex(lock()); + Mutex mutex(&lock); if (dnCache != NULL && dnCache->locale != locale) { delete dnCache; @@ -852,7 +849,7 @@ URegistryKey ICUService::registerFactory(ICUServiceFactory* factoryToAdopt, UErrorCode& status) { if (U_SUCCESS(status) && factoryToAdopt != NULL) { - Mutex mutex(lock()); + Mutex mutex(&lock); if (factories == NULL) { factories = new UVector(deleteUObject, NULL, status); @@ -883,7 +880,7 @@ ICUService::unregister(URegistryKey rkey, UErrorCode& status) ICUServiceFactory *factory = (ICUServiceFactory*)rkey; UBool result = FALSE; if (factory != NULL && factories != NULL) { - Mutex mutex(lock()); + Mutex mutex(&lock); if (factories->removeElement(factory)) { clearCaches(); @@ -903,7 +900,7 @@ void ICUService::reset() { { - Mutex mutex(lock()); + Mutex mutex(&lock); reInitializeFactories(); clearCaches(); } diff --git a/deps/icu-small/source/common/servls.cpp b/deps/icu-small/source/common/servls.cpp index 0b1b1b947d..0c2a73d98e 100644 --- a/deps/icu-small/source/common/servls.cpp +++ b/deps/icu-small/source/common/servls.cpp @@ -263,7 +263,7 @@ ICULocaleService::validateFallbackLocale() const { const Locale& loc = Locale::getDefault(); ICULocaleService* ncThis = (ICULocaleService*)this; - static UMutex llock = U_MUTEX_INITIALIZER; + static UMutex llock; { Mutex mutex(&llock); if (loc != fallbackLocale) { diff --git a/deps/icu-small/source/common/servnotf.cpp b/deps/icu-small/source/common/servnotf.cpp index 9b5997bd17..435f36b0d0 100644 --- a/deps/icu-small/source/common/servnotf.cpp +++ b/deps/icu-small/source/common/servnotf.cpp @@ -21,10 +21,7 @@ U_NAMESPACE_BEGIN EventListener::~EventListener() {} UOBJECT_DEFINE_RTTI_IMPLEMENTATION(EventListener) -static UMutex *notifyLock() { - static UMutex m = U_MUTEX_INITIALIZER; - return &m; -} +static UMutex notifyLock; ICUNotifier::ICUNotifier(void) : listeners(NULL) @@ -33,7 +30,7 @@ ICUNotifier::ICUNotifier(void) ICUNotifier::~ICUNotifier(void) { { - Mutex lmx(notifyLock()); + Mutex lmx(¬ifyLock); delete listeners; listeners = NULL; } @@ -50,7 +47,7 @@ ICUNotifier::addListener(const EventListener* l, UErrorCode& status) } if (acceptsListener(*l)) { - Mutex lmx(notifyLock()); + Mutex lmx(¬ifyLock); if (listeners == NULL) { listeners = new UVector(5, status); } else { @@ -83,7 +80,7 @@ ICUNotifier::removeListener(const EventListener *l, UErrorCode& status) } { - Mutex lmx(notifyLock()); + Mutex lmx(¬ifyLock); if (listeners != NULL) { // identity equality check for (int i = 0, e = listeners->size(); i < e; ++i) { @@ -106,7 +103,7 @@ void ICUNotifier::notifyChanged(void) { if (listeners != NULL) { - Mutex lmx(notifyLock()); + Mutex lmx(¬ifyLock); if (listeners != NULL) { for (int i = 0, e = listeners->size(); i < e; ++i) { EventListener* el = (EventListener*)listeners->elementAt(i); diff --git a/deps/icu-small/source/common/uarrsort.cpp b/deps/icu-small/source/common/uarrsort.cpp index 03c4d4e7fc..6090582164 100644 --- a/deps/icu-small/source/common/uarrsort.cpp +++ b/deps/icu-small/source/common/uarrsort.cpp @@ -34,6 +34,10 @@ enum { STACK_ITEM_SIZE=200 }; +static constexpr int32_t sizeInMaxAlignTs(int32_t sizeInBytes) { + return (sizeInBytes + sizeof(max_align_t) - 1) / sizeof(max_align_t); +} + /* UComparator convenience implementations ---------------------------------- */ U_CAPI int32_t U_EXPORT2 @@ -134,25 +138,15 @@ doInsertionSort(char *array, int32_t length, int32_t itemSize, static void insertionSort(char *array, int32_t length, int32_t itemSize, UComparator *cmp, const void *context, UErrorCode *pErrorCode) { - UAlignedMemory v[STACK_ITEM_SIZE/sizeof(UAlignedMemory)+1]; - void *pv; - /* allocate an intermediate item variable (v) */ - if(itemSize<=STACK_ITEM_SIZE) { - pv=v; - } else { - pv=uprv_malloc(itemSize); - if(pv==NULL) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } + icu::MaybeStackArray<max_align_t, sizeInMaxAlignTs(STACK_ITEM_SIZE)> v; + if (sizeInMaxAlignTs(itemSize) > v.getCapacity() && + v.resize(sizeInMaxAlignTs(itemSize)) == nullptr) { + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; + return; } - doInsertionSort(array, length, itemSize, cmp, context, pv); - - if(pv!=v) { - uprv_free(pv); - } + doInsertionSort(array, length, itemSize, cmp, context, v.getAlias()); } /* QuickSort ---------------------------------------------------------------- */ @@ -238,26 +232,16 @@ subQuickSort(char *array, int32_t start, int32_t limit, int32_t itemSize, static void quickSort(char *array, int32_t length, int32_t itemSize, UComparator *cmp, const void *context, UErrorCode *pErrorCode) { - UAlignedMemory xw[(2*STACK_ITEM_SIZE)/sizeof(UAlignedMemory)+1]; - void *p; - /* allocate two intermediate item variables (x and w) */ - if(itemSize<=STACK_ITEM_SIZE) { - p=xw; - } else { - p=uprv_malloc(2*itemSize); - if(p==NULL) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } + icu::MaybeStackArray<max_align_t, sizeInMaxAlignTs(STACK_ITEM_SIZE) * 2> xw; + if(sizeInMaxAlignTs(itemSize)*2 > xw.getCapacity() && + xw.resize(sizeInMaxAlignTs(itemSize) * 2) == nullptr) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return; } - subQuickSort(array, 0, length, itemSize, - cmp, context, p, (char *)p+itemSize); - - if(p!=xw) { - uprv_free(p); - } + subQuickSort(array, 0, length, itemSize, cmp, context, + xw.getAlias(), xw.getAlias() + sizeInMaxAlignTs(itemSize)); } /* uprv_sortArray() API ----------------------------------------------------- */ diff --git a/deps/icu-small/source/common/ubidiimp.h b/deps/icu-small/source/common/ubidiimp.h index a5d0727495..9746b2bc10 100644 --- a/deps/icu-small/source/common/ubidiimp.h +++ b/deps/icu-small/source/common/ubidiimp.h @@ -198,8 +198,8 @@ typedef struct Run { /* in a Run, logicalStart will get this bit set if the run level is odd */ #define INDEX_ODD_BIT (1UL<<31) -#define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((int32_t)(level)<<31)) -#define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((int32_t)(level)<<31)) +#define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((int32_t)((level)&1)<<31)) +#define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((int32_t)((level)&1)<<31)) #define REMOVE_ODD_BIT(x) ((x)&=~INDEX_ODD_BIT) #define GET_INDEX(x) ((x)&~INDEX_ODD_BIT) @@ -387,41 +387,49 @@ typedef union { } BidiMemoryForAllocation; /* Macros for initial checks at function entry */ -#define RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrcode, retvalue) \ - if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return retvalue -#define RETURN_IF_NOT_VALID_PARA(bidi, errcode, retvalue) \ - if(!IS_VALID_PARA(bidi)) { \ - errcode=U_INVALID_STATE_ERROR; \ - return retvalue; \ - } -#define RETURN_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode, retvalue) \ - if(!IS_VALID_PARA_OR_LINE(bidi)) { \ - errcode=U_INVALID_STATE_ERROR; \ - return retvalue; \ - } -#define RETURN_IF_BAD_RANGE(arg, start, limit, errcode, retvalue) \ - if((arg)<(start) || (arg)>=(limit)) { \ - (errcode)=U_ILLEGAL_ARGUMENT_ERROR; \ - return retvalue; \ - } - -#define RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrcode) \ - if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return -#define RETURN_VOID_IF_NOT_VALID_PARA(bidi, errcode) \ - if(!IS_VALID_PARA(bidi)) { \ - errcode=U_INVALID_STATE_ERROR; \ - return; \ - } -#define RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode) \ - if(!IS_VALID_PARA_OR_LINE(bidi)) { \ - errcode=U_INVALID_STATE_ERROR; \ - return; \ - } -#define RETURN_VOID_IF_BAD_RANGE(arg, start, limit, errcode) \ - if((arg)<(start) || (arg)>=(limit)) { \ - (errcode)=U_ILLEGAL_ARGUMENT_ERROR; \ - return; \ - } +#define RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrcode, retvalue) UPRV_BLOCK_MACRO_BEGIN { \ + if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return retvalue; \ +} UPRV_BLOCK_MACRO_END +#define RETURN_IF_NOT_VALID_PARA(bidi, errcode, retvalue) UPRV_BLOCK_MACRO_BEGIN { \ + if(!IS_VALID_PARA(bidi)) { \ + errcode=U_INVALID_STATE_ERROR; \ + return retvalue; \ + } \ +} UPRV_BLOCK_MACRO_END +#define RETURN_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode, retvalue) UPRV_BLOCK_MACRO_BEGIN { \ + if(!IS_VALID_PARA_OR_LINE(bidi)) { \ + errcode=U_INVALID_STATE_ERROR; \ + return retvalue; \ + } \ +} UPRV_BLOCK_MACRO_END +#define RETURN_IF_BAD_RANGE(arg, start, limit, errcode, retvalue) UPRV_BLOCK_MACRO_BEGIN { \ + if((arg)<(start) || (arg)>=(limit)) { \ + (errcode)=U_ILLEGAL_ARGUMENT_ERROR; \ + return retvalue; \ + } \ +} UPRV_BLOCK_MACRO_END + +#define RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrcode) UPRV_BLOCK_MACRO_BEGIN { \ + if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return; \ +} UPRV_BLOCK_MACRO_END +#define RETURN_VOID_IF_NOT_VALID_PARA(bidi, errcode) UPRV_BLOCK_MACRO_BEGIN { \ + if(!IS_VALID_PARA(bidi)) { \ + errcode=U_INVALID_STATE_ERROR; \ + return; \ + } \ +} UPRV_BLOCK_MACRO_END +#define RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode) UPRV_BLOCK_MACRO_BEGIN { \ + if(!IS_VALID_PARA_OR_LINE(bidi)) { \ + errcode=U_INVALID_STATE_ERROR; \ + return; \ + } \ +} UPRV_BLOCK_MACRO_END +#define RETURN_VOID_IF_BAD_RANGE(arg, start, limit, errcode) UPRV_BLOCK_MACRO_BEGIN { \ + if((arg)<(start) || (arg)>=(limit)) { \ + (errcode)=U_ILLEGAL_ARGUMENT_ERROR; \ + return; \ + } \ +} UPRV_BLOCK_MACRO_END /* helper function to (re)allocate memory if allowed */ U_CFUNC UBool diff --git a/deps/icu-small/source/common/ubiditransform.cpp b/deps/icu-small/source/common/ubiditransform.cpp index 394df6092d..bb3ce8cb93 100644 --- a/deps/icu-small/source/common/ubiditransform.cpp +++ b/deps/icu-small/source/common/ubiditransform.cpp @@ -31,11 +31,11 @@ #define SHAPE_LOGICAL U_SHAPE_TEXT_DIRECTION_LOGICAL #define SHAPE_VISUAL U_SHAPE_TEXT_DIRECTION_VISUAL_LTR -#define CHECK_LEN(STR, LEN, ERROR) { \ - if (LEN == 0) return 0; \ - if (LEN < -1) { *(ERROR) = U_ILLEGAL_ARGUMENT_ERROR; return 0; } \ - if (LEN == -1) LEN = u_strlen(STR); \ - } +#define CHECK_LEN(STR, LEN, ERROR) UPRV_BLOCK_MACRO_BEGIN { \ + if (LEN == 0) return 0; \ + if (LEN < -1) { *(ERROR) = U_ILLEGAL_ARGUMENT_ERROR; return 0; } \ + if (LEN == -1) LEN = u_strlen(STR); \ +} UPRV_BLOCK_MACRO_END #define MAX_ACTIONS 7 diff --git a/deps/icu-small/source/common/ucase.cpp b/deps/icu-small/source/common/ucase.cpp index 50c8d20c1f..57a4032790 100644 --- a/deps/icu-small/source/common/ucase.cpp +++ b/deps/icu-small/source/common/ucase.cpp @@ -116,7 +116,7 @@ static const uint8_t flagsOffset[256]={ * moved to the last uint16_t of the value, use +1 for beginning of next slot * @param value (out) int32_t or uint32_t output if hasSlot, otherwise not modified */ -#define GET_SLOT_VALUE(excWord, idx, pExc16, value) \ +#define GET_SLOT_VALUE(excWord, idx, pExc16, value) UPRV_BLOCK_MACRO_BEGIN { \ if(((excWord)&UCASE_EXC_DOUBLE_SLOTS)==0) { \ (pExc16)+=SLOT_OFFSET(excWord, idx); \ (value)=*pExc16; \ @@ -124,7 +124,8 @@ static const uint8_t flagsOffset[256]={ (pExc16)+=2*SLOT_OFFSET(excWord, idx); \ (value)=*pExc16++; \ (value)=((value)<<16)|*pExc16; \ - } + } \ +} UPRV_BLOCK_MACRO_END /* simple case mappings ----------------------------------------------------- */ diff --git a/deps/icu-small/source/common/uchar.cpp b/deps/icu-small/source/common/uchar.cpp index 60fe75c78d..12365fd697 100644 --- a/deps/icu-small/source/common/uchar.cpp +++ b/deps/icu-small/source/common/uchar.cpp @@ -40,7 +40,7 @@ /* constants and macros for access to the data ------------------------------ */ /* getting a uint32_t properties word from the data */ -#define GET_PROPS(c, result) ((result)=UTRIE2_GET16(&propsTrie, c)); +#define GET_PROPS(c, result) ((result)=UTRIE2_GET16(&propsTrie, c)) /* API functions ------------------------------------------------------------ */ diff --git a/deps/icu-small/source/common/uchriter.cpp b/deps/icu-small/source/common/uchriter.cpp index 822168f5c8..bedbabc74c 100644 --- a/deps/icu-small/source/common/uchriter.cpp +++ b/deps/icu-small/source/common/uchriter.cpp @@ -89,7 +89,7 @@ UCharCharacterIterator::hashCode() const { return ustr_hashUCharsN(text, textLength) ^ pos ^ begin ^ end; } -CharacterIterator* +UCharCharacterIterator* UCharCharacterIterator::clone() const { return new UCharCharacterIterator(*this); } diff --git a/deps/icu-small/source/common/ucln_cmn.cpp b/deps/icu-small/source/common/ucln_cmn.cpp index d78491df41..ab9d3adbd2 100644 --- a/deps/icu-small/source/common/ucln_cmn.cpp +++ b/deps/icu-small/source/common/ucln_cmn.cpp @@ -65,9 +65,20 @@ U_CFUNC void ucln_common_registerCleanup(ECleanupCommonType type, cleanupFunc *func) { + // Thread safety messiness: From ticket 10295, calls to registerCleanup() may occur + // concurrently. Although such cases should be storing the same value, they raise errors + // from the thread sanity checker. Doing the store within a mutex avoids those. + // BUT that can trigger a recursive entry into std::call_once() in umutex.cpp when this code, + // running from the call_once function, tries to grab the ICU global mutex, which + // re-enters the mutex init path. So, work-around by special casing UCLN_COMMON_MUTEX, not + // using the ICU global mutex for it. + // + // No other point in ICU uses std::call_once(). + U_ASSERT(UCLN_COMMON_START < type && type < UCLN_COMMON_COUNT); - if (UCLN_COMMON_START < type && type < UCLN_COMMON_COUNT) - { + if (type == UCLN_COMMON_MUTEX) { + gCommonCleanupFunctions[type] = func; + } else if (UCLN_COMMON_START < type && type < UCLN_COMMON_COUNT) { icu::Mutex m; // See ticket 10295 for discussion. gCommonCleanupFunctions[type] = func; } diff --git a/deps/icu-small/source/common/ucln_cmn.h b/deps/icu-small/source/common/ucln_cmn.h index 0ca911b47d..b837fb9462 100644 --- a/deps/icu-small/source/common/ucln_cmn.h +++ b/deps/icu-small/source/common/ucln_cmn.h @@ -22,8 +22,6 @@ /* These are the cleanup functions for various APIs. */ /* @return true if cleanup complete successfully.*/ -U_CFUNC UBool umtx_cleanup(void); - U_CFUNC UBool utrace_cleanup(void); U_CFUNC UBool ucln_lib_cleanup(void); @@ -41,6 +39,8 @@ typedef enum ECleanupCommonType { UCLN_COMMON_LOCALE_KEY_TYPE, UCLN_COMMON_LOCALE, UCLN_COMMON_LOCALE_AVAILABLE, + UCLN_COMMON_LIKELY_SUBTAGS, + UCLN_COMMON_LOCALE_DISTANCE, UCLN_COMMON_ULOC, UCLN_COMMON_CURRENCY, UCLN_COMMON_LOADED_NORMALIZER2, @@ -62,6 +62,7 @@ typedef enum ECleanupCommonType { */ UCLN_COMMON_UNIFIED_CACHE, UCLN_COMMON_URES, + UCLN_COMMON_MUTEX, // Mutexes should be the last to be cleaned up. UCLN_COMMON_COUNT /* This must be last */ } ECleanupCommonType; diff --git a/deps/icu-small/source/common/ucnv.cpp b/deps/icu-small/source/common/ucnv.cpp index abf302eadd..e2e0c5b9f7 100644 --- a/deps/icu-small/source/common/ucnv.cpp +++ b/deps/icu-small/source/common/ucnv.cpp @@ -25,6 +25,8 @@ #if !UCONFIG_NO_CONVERSION +#include <memory> + #include "unicode/ustring.h" #include "unicode/ucnv.h" #include "unicode/ucnv_err.h" @@ -158,7 +160,6 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U UConverter *localConverter, *allocatedConverter; int32_t stackBufferSize; int32_t bufferSizeNeeded; - char *stackBufferChars = (char *)stackBuffer; UErrorCode cbErr; UConverterToUnicodeArgs toUArgs = { sizeof(UConverterToUnicodeArgs), @@ -224,23 +225,22 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U } } - - /* Pointers on 64-bit platforms need to be aligned - * on a 64-bit boundary in memory. + /* Adjust (if necessary) the stackBuffer pointer to be aligned correctly for a UConverter. + * TODO(Jira ICU-20736) Redo this using std::align() once g++4.9 compatibility is no longer needed. */ - if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) { - int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars); - if(stackBufferSize > offsetUp) { - stackBufferSize -= offsetUp; - stackBufferChars += offsetUp; + if (stackBuffer) { + uintptr_t p = reinterpret_cast<uintptr_t>(stackBuffer); + uintptr_t aligned_p = (p + alignof(UConverter) - 1) & ~(alignof(UConverter) - 1); + ptrdiff_t pointerAdjustment = aligned_p - p; + if (bufferSizeNeeded + pointerAdjustment <= stackBufferSize) { + stackBuffer = reinterpret_cast<void *>(aligned_p); + stackBufferSize -= pointerAdjustment; } else { /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */ stackBufferSize = 1; } } - stackBuffer = (void *)stackBufferChars; - /* Now, see if we must allocate any memory */ if (stackBufferSize < bufferSizeNeeded || stackBuffer == NULL) { @@ -475,7 +475,7 @@ ucnv_setSubstString(UConverter *cnv, const UChar *s, int32_t length, UErrorCode *err) { - UAlignedMemory cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE / sizeof(UAlignedMemory) + 1]; + alignas(UConverter) char cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE]; char chars[UCNV_ERROR_BUFFER_LENGTH]; UConverter *clone; diff --git a/deps/icu-small/source/common/ucnv2022.cpp b/deps/icu-small/source/common/ucnv2022.cpp index 4a35ff85e1..6cd9a3d12e 100644 --- a/deps/icu-small/source/common/ucnv2022.cpp +++ b/deps/icu-small/source/common/ucnv2022.cpp @@ -3571,20 +3571,11 @@ _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorC /* * Structure for cloning an ISO 2022 converter into a single memory block. - * ucnv_safeClone() of the converter will align the entire cloneStruct, - * and then ucnv_safeClone() of the sub-converter may additionally align - * currentConverter inside the cloneStruct, for which we need the deadSpace - * after currentConverter. - * This is because UAlignedMemory may be larger than the actually - * necessary alignment size for the platform. - * The other cloneStruct fields will not be moved around, - * and are aligned properly with cloneStruct's alignment. */ struct cloneStruct { UConverter cnv; UConverter currentConverter; - UAlignedMemory deadSpace; UConverterDataISO2022 mydata; }; @@ -3602,6 +3593,10 @@ _ISO_2022_SafeClone( UConverterDataISO2022 *cnvData; int32_t i, size; + if (U_FAILURE(*status)){ + return nullptr; + } + if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */ *pBufferSize = (int32_t)sizeof(struct cloneStruct); return NULL; @@ -3619,7 +3614,7 @@ _ISO_2022_SafeClone( /* share the subconverters */ if(cnvData->currentConverter != NULL) { - size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include size of padding */ + size = (int32_t)sizeof(UConverter); localClone->mydata.currentConverter = ucnv_safeClone(cnvData->currentConverter, &localClone->currentConverter, diff --git a/deps/icu-small/source/common/ucnv_bld.cpp b/deps/icu-small/source/common/ucnv_bld.cpp index e6ef833f4e..1c2363ea89 100644 --- a/deps/icu-small/source/common/ucnv_bld.cpp +++ b/deps/icu-small/source/common/ucnv_bld.cpp @@ -194,10 +194,7 @@ static struct { /*initializes some global variables */ static UHashtable *SHARED_DATA_HASHTABLE = NULL; -static icu::UMutex *cnvCacheMutex() { /* Mutex for synchronizing cnv cache access. */ - static icu::UMutex m = U_MUTEX_INITIALIZER; - return &m; -} +static icu::UMutex cnvCacheMutex; /* Note: the global mutex is used for */ /* reference count updates. */ @@ -602,9 +599,9 @@ U_CFUNC void ucnv_unloadSharedDataIfReady(UConverterSharedData *sharedData) { if(sharedData != NULL && sharedData->isReferenceCounted) { - umtx_lock(cnvCacheMutex()); + umtx_lock(&cnvCacheMutex); ucnv_unload(sharedData); - umtx_unlock(cnvCacheMutex()); + umtx_unlock(&cnvCacheMutex); } } @@ -612,9 +609,9 @@ U_CFUNC void ucnv_incrementRefCount(UConverterSharedData *sharedData) { if(sharedData != NULL && sharedData->isReferenceCounted) { - umtx_lock(cnvCacheMutex()); + umtx_lock(&cnvCacheMutex); sharedData->referenceCounter++; - umtx_unlock(cnvCacheMutex()); + umtx_unlock(&cnvCacheMutex); } } @@ -815,9 +812,9 @@ ucnv_loadSharedData(const char *converterName, pArgs->nestedLoads=1; pArgs->pkg=NULL; - umtx_lock(cnvCacheMutex()); + umtx_lock(&cnvCacheMutex); mySharedConverterData = ucnv_load(pArgs, err); - umtx_unlock(cnvCacheMutex()); + umtx_unlock(&cnvCacheMutex); if (U_FAILURE (*err) || (mySharedConverterData == NULL)) { return NULL; @@ -1064,7 +1061,7 @@ ucnv_flushCache () * because the sequence of looking up in the cache + incrementing * is protected by cnvCacheMutex. */ - umtx_lock(cnvCacheMutex()); + umtx_lock(&cnvCacheMutex); /* * double loop: A delta/extension-only converter has a pointer to its base table's * shared data; the first iteration of the outer loop may see the delta converter @@ -1093,7 +1090,7 @@ ucnv_flushCache () } } } while(++i == 1 && remaining > 0); - umtx_unlock(cnvCacheMutex()); + umtx_unlock(&cnvCacheMutex); UTRACE_DATA1(UTRACE_INFO, "ucnv_flushCache() exits with %d converters remaining", remaining); @@ -1199,7 +1196,7 @@ internalSetName(const char *name, UErrorCode *status) { } algorithmicSharedData = getAlgorithmicTypeFromName(stackArgs.name); - umtx_lock(cnvCacheMutex()); + umtx_lock(&cnvCacheMutex); gDefaultAlgorithmicSharedData = algorithmicSharedData; gDefaultConverterContainsOption = containsOption; @@ -1215,7 +1212,7 @@ internalSetName(const char *name, UErrorCode *status) { ucnv_enableCleanup(); - umtx_unlock(cnvCacheMutex()); + umtx_unlock(&cnvCacheMutex); } #endif @@ -1240,7 +1237,7 @@ ucnv_getDefaultName() { but ucnv_setDefaultName is not thread safe. */ { - icu::Mutex lock(cnvCacheMutex()); + icu::Mutex lock(&cnvCacheMutex); name = gDefaultConverterName; } if(name==NULL) { diff --git a/deps/icu-small/source/common/ucnv_lmb.cpp b/deps/icu-small/source/common/ucnv_lmb.cpp index 6dd8e83428..5e7cfde353 100644 --- a/deps/icu-small/source/common/ucnv_lmb.cpp +++ b/deps/icu-small/source/common/ucnv_lmb.cpp @@ -1107,11 +1107,13 @@ GetUniFromLMBCSUni(char const ** ppLMBCSin) /* Called with LMBCS-style Unicode all input as required by ICU converter semantics. */ -#define CHECK_SOURCE_LIMIT(index) \ - if (args->source+index > args->sourceLimit){\ - *err = U_TRUNCATED_CHAR_FOUND;\ - args->source = args->sourceLimit;\ - return 0xffff;} +#define CHECK_SOURCE_LIMIT(index) UPRV_BLOCK_MACRO_BEGIN { \ + if (args->source+index > args->sourceLimit) { \ + *err = U_TRUNCATED_CHAR_FOUND; \ + args->source = args->sourceLimit; \ + return 0xffff; \ + } \ +} UPRV_BLOCK_MACRO_END /* Return the Unicode representation for the current LMBCS character */ diff --git a/deps/icu-small/source/common/ucnvbocu.cpp b/deps/icu-small/source/common/ucnvbocu.cpp index 5b66c5059a..7c2aab5655 100644 --- a/deps/icu-small/source/common/ucnvbocu.cpp +++ b/deps/icu-small/source/common/ucnvbocu.cpp @@ -202,14 +202,14 @@ bocu1TrailToByte[BOCU1_TRAIL_CONTROLS_COUNT]={ * @param d Divisor. * @param m Output variable for the rest (modulo result). */ -#define NEGDIVMOD(n, d, m) { \ +#define NEGDIVMOD(n, d, m) UPRV_BLOCK_MACRO_BEGIN { \ (m)=(n)%(d); \ (n)/=(d); \ if((m)<0) { \ --(n); \ (m)+=(d); \ } \ -} +} UPRV_BLOCK_MACRO_END /* Faster versions of packDiff() for single-byte-encoded diff values. */ diff --git a/deps/icu-small/source/common/ucnvhz.cpp b/deps/icu-small/source/common/ucnvhz.cpp index 3159537469..b26cf78289 100644 --- a/deps/icu-small/source/common/ucnvhz.cpp +++ b/deps/icu-small/source/common/ucnvhz.cpp @@ -38,7 +38,7 @@ #define ESC_LEN 2 -#define CONCAT_ESCAPE_MACRO( args, targetIndex,targetLength,strToAppend, err, len,sourceIndex){ \ +#define CONCAT_ESCAPE_MACRO(args, targetIndex,targetLength,strToAppend, err, len,sourceIndex) UPRV_BLOCK_MACRO_BEGIN { \ while(len-->0){ \ if(targetIndex < targetLength){ \ args->target[targetIndex] = (unsigned char) *strToAppend; \ @@ -53,7 +53,7 @@ } \ strToAppend++; \ } \ -} +} UPRV_BLOCK_MACRO_END typedef struct{ @@ -518,19 +518,11 @@ _HZ_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *e /* * Structure for cloning an HZ converter into a single memory block. - * ucnv_safeClone() of the HZ converter will align the entire cloneHZStruct, - * and then ucnv_safeClone() of the sub-converter may additionally align - * subCnv inside the cloneHZStruct, for which we need the deadSpace after - * subCnv. This is because UAlignedMemory may be larger than the actually - * necessary alignment size for the platform. - * The other cloneHZStruct fields will not be moved around, - * and are aligned properly with cloneHZStruct's alignment. */ struct cloneHZStruct { UConverter cnv; UConverter subCnv; - UAlignedMemory deadSpace; UConverterDataHZ mydata; }; @@ -545,12 +537,12 @@ _HZ_SafeClone(const UConverter *cnv, int32_t size, bufferSizeNeeded = sizeof(struct cloneHZStruct); if (U_FAILURE(*status)){ - return 0; + return nullptr; } if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */ *pBufferSize = bufferSizeNeeded; - return 0; + return nullptr; } localClone = (struct cloneHZStruct *)stackBuffer; @@ -561,7 +553,7 @@ _HZ_SafeClone(const UConverter *cnv, localClone->cnv.isExtraLocal = TRUE; /* deep-clone the sub-converter */ - size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include size of padding */ + size = (int32_t)sizeof(UConverter); ((UConverterDataHZ*)localClone->cnv.extraInfo)->gbConverter = ucnv_safeClone(((UConverterDataHZ*)cnv->extraInfo)->gbConverter, &localClone->subCnv, &size, status); diff --git a/deps/icu-small/source/common/ucnvisci.cpp b/deps/icu-small/source/common/ucnvisci.cpp index d0c07f2b27..c1ab06e137 100644 --- a/deps/icu-small/source/common/ucnvisci.cpp +++ b/deps/icu-small/source/common/ucnvisci.cpp @@ -831,7 +831,7 @@ static const uint16_t nuktaSpecialCases[][2]={ }; -#define WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err){ \ +#define WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err) UPRV_BLOCK_MACRO_BEGIN { \ int32_t offset = (int32_t)(source - args->source-1); \ /* write the targetUniChar to target */ \ if(target < targetLimit){ \ @@ -884,7 +884,7 @@ static const uint16_t nuktaSpecialCases[][2]={ (uint8_t) (targetByteUnit); \ *err = U_BUFFER_OVERFLOW_ERROR; \ } \ -} +} UPRV_BLOCK_MACRO_END /* Rules: * Explicit Halant : @@ -1119,7 +1119,7 @@ static const uint16_t lookupTable[][2]={ { GURMUKHI, PNJ_MASK } }; -#define WRITE_TO_TARGET_TO_U(args,source,target,offsets,offset,targetUniChar,delta, err){\ +#define WRITE_TO_TARGET_TO_U(args,source,target,offsets,offset,targetUniChar,delta, err) UPRV_BLOCK_MACRO_BEGIN { \ /* add offset to current Indic Block */ \ if(targetUniChar>ASCII_END && \ targetUniChar != ZWJ && \ @@ -1140,9 +1140,9 @@ static const uint16_t lookupTable[][2]={ (UChar)targetUniChar; \ *err = U_BUFFER_OVERFLOW_ERROR; \ } \ -} +} UPRV_BLOCK_MACRO_END -#define GET_MAPPING(sourceChar,targetUniChar,data){ \ +#define GET_MAPPING(sourceChar,targetUniChar,data) UPRV_BLOCK_MACRO_BEGIN { \ targetUniChar = toUnicodeTable[(sourceChar)] ; \ /* is the code point valid in current script? */ \ if(sourceChar> ASCII_END && \ @@ -1153,7 +1153,7 @@ static const uint16_t lookupTable[][2]={ targetUniChar=missingCharMarker; \ } \ } \ -} +} UPRV_BLOCK_MACRO_END /*********** * Rules for ISCII to Unicode converter diff --git a/deps/icu-small/source/common/ucnvsel.cpp b/deps/icu-small/source/common/ucnvsel.cpp index 6ccee1ae61..07b55022c3 100644 --- a/deps/icu-small/source/common/ucnvsel.cpp +++ b/deps/icu-small/source/common/ucnvsel.cpp @@ -691,36 +691,36 @@ static int16_t countOnes(uint32_t* mask, int32_t len) { /* internal function! */ static UEnumeration *selectForMask(const UConverterSelector* sel, - uint32_t *mask, UErrorCode *status) { + uint32_t *theMask, UErrorCode *status) { + LocalMemory<uint32_t> mask(theMask); // this is the context we will use. Store a table of indices to which // encodings are legit. - struct Enumerator* result = (Enumerator*)uprv_malloc(sizeof(Enumerator)); - if (result == NULL) { - uprv_free(mask); + LocalMemory<Enumerator> result(static_cast<Enumerator *>(uprv_malloc(sizeof(Enumerator)))); + if (result.isNull()) { *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; + return nullptr; } - result->index = NULL; // this will be allocated later! + result->index = nullptr; // this will be allocated later! result->length = result->cur = 0; result->sel = sel; - UEnumeration *en = (UEnumeration *)uprv_malloc(sizeof(UEnumeration)); - if (en == NULL) { + LocalMemory<UEnumeration> en(static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)))); + if (en.isNull()) { // TODO(markus): Combine Enumerator and UEnumeration into one struct. - uprv_free(mask); - uprv_free(result); *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; + return nullptr; } - memcpy(en, &defaultEncodings, sizeof(UEnumeration)); - en->context = result; + memcpy(en.getAlias(), &defaultEncodings, sizeof(UEnumeration)); int32_t columns = (sel->encodingsCount+31)/32; - int16_t numOnes = countOnes(mask, columns); + int16_t numOnes = countOnes(mask.getAlias(), columns); // now, we know the exact space we need for index if (numOnes > 0) { - result->index = (int16_t*) uprv_malloc(numOnes * sizeof(int16_t)); - + result->index = static_cast<int16_t*>(uprv_malloc(numOnes * sizeof(int16_t))); + if (result->index == nullptr) { + *status = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } int32_t i, j; int16_t k = 0; for (j = 0 ; j < columns; j++) { @@ -734,8 +734,8 @@ static UEnumeration *selectForMask(const UConverterSelector* sel, } } //otherwise, index will remain NULL (and will never be touched by //the enumerator code anyway) - uprv_free(mask); - return en; + en->context = result.orphan(); + return en.orphan(); } /* check a string against the selector - UTF16 version */ diff --git a/deps/icu-small/source/common/ucptrie.cpp b/deps/icu-small/source/common/ucptrie.cpp index b72e318387..0004160a23 100644 --- a/deps/icu-small/source/common/ucptrie.cpp +++ b/deps/icu-small/source/common/ucptrie.cpp @@ -280,7 +280,7 @@ UChar32 getRange(const void *t, UChar32 start, int32_t prevI3Block = -1; int32_t prevBlock = -1; UChar32 c = start; - uint32_t trieValue, value; + uint32_t trieValue, value = nullValue; bool haveValue = false; do { int32_t i3Block; diff --git a/deps/icu-small/source/common/ucurr.cpp b/deps/icu-small/source/common/ucurr.cpp index dba3247fef..d42c2f10b1 100644 --- a/deps/icu-small/source/common/ucurr.cpp +++ b/deps/icu-small/source/common/ucurr.cpp @@ -365,10 +365,7 @@ U_CDECL_END #if !UCONFIG_NO_SERVICE struct CReg; -static UMutex *gCRegLock() { - static UMutex m = U_MUTEX_INITIALIZER; - return &m; -} +static UMutex gCRegLock; static CReg* gCRegHead = 0; struct CReg : public icu::UMemory { @@ -394,14 +391,14 @@ struct CReg : public icu::UMemory { if (status && U_SUCCESS(*status) && _iso && _id) { CReg* n = new CReg(_iso, _id); if (n) { - umtx_lock(gCRegLock()); + umtx_lock(&gCRegLock); if (!gCRegHead) { /* register for the first time */ ucln_common_registerCleanup(UCLN_COMMON_CURRENCY, currency_cleanup); } n->next = gCRegHead; gCRegHead = n; - umtx_unlock(gCRegLock()); + umtx_unlock(&gCRegLock); return n; } *status = U_MEMORY_ALLOCATION_ERROR; @@ -411,7 +408,7 @@ struct CReg : public icu::UMemory { static UBool unreg(UCurrRegistryKey key) { UBool found = FALSE; - umtx_lock(gCRegLock()); + umtx_lock(&gCRegLock); CReg** p = &gCRegHead; while (*p) { @@ -424,13 +421,13 @@ struct CReg : public icu::UMemory { p = &((*p)->next); } - umtx_unlock(gCRegLock()); + umtx_unlock(&gCRegLock); return found; } static const UChar* get(const char* id) { const UChar* result = NULL; - umtx_lock(gCRegLock()); + umtx_lock(&gCRegLock); CReg* p = gCRegHead; /* register cleanup of the mutex */ @@ -442,7 +439,7 @@ struct CReg : public icu::UMemory { } p = p->next; } - umtx_unlock(gCRegLock()); + umtx_unlock(&gCRegLock); return result; } @@ -716,7 +713,9 @@ ucurr_getName(const UChar* currency, // We no longer support choice format data in names. Data should not contain // choice patterns. - *isChoiceFormat = FALSE; + if (isChoiceFormat != NULL) { + *isChoiceFormat = FALSE; + } if (U_SUCCESS(ec2)) { U_ASSERT(s != NULL); return s; @@ -1356,10 +1355,7 @@ static CurrencyNameCacheEntry* currCache[CURRENCY_NAME_CACHE_NUM] = {NULL}; // It is a simple round-robin replacement strategy. static int8_t currentCacheEntryIndex = 0; -static UMutex *gCurrencyCacheMutex() { - static UMutex m = U_MUTEX_INITIALIZER; - return &m; -} +static UMutex gCurrencyCacheMutex; // Cache deletion static void @@ -1408,7 +1404,7 @@ getCacheEntry(const char* locale, UErrorCode& ec) { CurrencyNameStruct* currencySymbols = NULL; CurrencyNameCacheEntry* cacheEntry = NULL; - umtx_lock(gCurrencyCacheMutex()); + umtx_lock(&gCurrencyCacheMutex); // in order to handle racing correctly, // not putting 'search' in a separate function. int8_t found = -1; @@ -1423,13 +1419,13 @@ getCacheEntry(const char* locale, UErrorCode& ec) { cacheEntry = currCache[found]; ++(cacheEntry->refCount); } - umtx_unlock(gCurrencyCacheMutex()); + umtx_unlock(&gCurrencyCacheMutex); if (found == -1) { collectCurrencyNames(locale, ¤cyNames, &total_currency_name_count, ¤cySymbols, &total_currency_symbol_count, ec); if (U_FAILURE(ec)) { return NULL; } - umtx_lock(gCurrencyCacheMutex()); + umtx_lock(&gCurrencyCacheMutex); // check again. for (int8_t i = 0; i < CURRENCY_NAME_CACHE_NUM; ++i) { if (currCache[i]!= NULL && @@ -1468,19 +1464,19 @@ getCacheEntry(const char* locale, UErrorCode& ec) { cacheEntry = currCache[found]; ++(cacheEntry->refCount); } - umtx_unlock(gCurrencyCacheMutex()); + umtx_unlock(&gCurrencyCacheMutex); } return cacheEntry; } static void releaseCacheEntry(CurrencyNameCacheEntry* cacheEntry) { - umtx_lock(gCurrencyCacheMutex()); + umtx_lock(&gCurrencyCacheMutex); --(cacheEntry->refCount); if (cacheEntry->refCount == 0) { // remove deleteCacheEntry(cacheEntry); } - umtx_unlock(gCurrencyCacheMutex()); + umtx_unlock(&gCurrencyCacheMutex); } U_CAPI void @@ -1601,10 +1597,9 @@ uprv_getStaticCurrencyName(const UChar* iso, const char* loc, { U_NAMESPACE_USE - UBool isChoiceFormat; int32_t len; const UChar* currname = ucurr_getName(iso, loc, UCURR_SYMBOL_NAME, - &isChoiceFormat, &len, &ec); + nullptr /* isChoiceFormat */, &len, &ec); if (U_SUCCESS(ec)) { result.setTo(currname, len); } diff --git a/deps/icu-small/source/common/udata.cpp b/deps/icu-small/source/common/udata.cpp index 99efbc97ee..f2faa82777 100644 --- a/deps/icu-small/source/common/udata.cpp +++ b/deps/icu-small/source/common/udata.cpp @@ -33,6 +33,7 @@ might have to #include some other header #include "cstring.h" #include "mutex.h" #include "putilimp.h" +#include "restrace.h" #include "uassert.h" #include "ucln_cmn.h" #include "ucmndata.h" @@ -110,11 +111,12 @@ static u_atomic_int32_t gHaveTriedToLoadCommonData = ATOMIC_INT32_T_INITIALIZER( static UHashtable *gCommonDataCache = NULL; /* Global hash table of opened ICU data files. */ static icu::UInitOnce gCommonDataCacheInitOnce = U_INITONCE_INITIALIZER; -#if U_PLATFORM_HAS_WINUWP_API == 0 +#if !defined(ICU_DATA_DIR_WINDOWS) static UDataFileAccess gDataFileAccess = UDATA_DEFAULT_ACCESS; // Access not synchronized. // Modifying is documented as thread-unsafe. #else -static UDataFileAccess gDataFileAccess = UDATA_NO_FILES; // Windows UWP looks in one spot explicitly +// If we are using the Windows data directory, then look in one spot only. +static UDataFileAccess gDataFileAccess = UDATA_NO_FILES; #endif static UBool U_CALLCONV @@ -206,7 +208,7 @@ setCommonICUData(UDataMemory *pData, /* The new common data. Belongs to ca return didUpdate; } -#if U_PLATFORM_HAS_WINUWP_API == 0 +#if !defined(ICU_DATA_DIR_WINDOWS) static UBool setCommonICUDataPointer(const void *pData, UBool /*warn*/, UErrorCode *pErrorCode) { @@ -320,7 +322,7 @@ static UDataMemory *udata_findCachedData(const char *path, UErrorCode &err) retVal = el->item; } #ifdef UDATA_DEBUG - fprintf(stderr, "Cache: [%s] -> %p\n", baseName, retVal); + fprintf(stderr, "Cache: [%s] -> %p\n", baseName, (void*) retVal); #endif return retVal; } @@ -383,7 +385,7 @@ static UDataMemory *udata_cacheDataItem(const char *path, UDataMemory *item, UEr #ifdef UDATA_DEBUG fprintf(stderr, "Cache: [%s] <<< %p : %s. vFunc=%p\n", newElement->name, - newElement->item, u_errorName(subErr), newElement->item->vFuncs); + (void*) newElement->item, u_errorName(subErr), (void*) newElement->item->vFuncs); #endif if (subErr == U_USING_DEFAULT_WARNING || U_FAILURE(subErr)) { @@ -477,7 +479,7 @@ UDataPathIterator::UDataPathIterator(const char *inPath, const char *pkg, nextPath = itemPath.data(); } #ifdef UDATA_DEBUG - fprintf(stderr, "SUFFIX=%s [%p]\n", inSuffix, inSuffix); + fprintf(stderr, "SUFFIX=%s [%p]\n", inSuffix, (void*) inSuffix); #endif /** Suffix **/ @@ -492,12 +494,11 @@ UDataPathIterator::UDataPathIterator(const char *inPath, const char *pkg, /* pathBuffer will hold the output path strings returned by this iterator */ #ifdef UDATA_DEBUG - fprintf(stderr, "%p: init %s -> [path=%s], [base=%s], [suff=%s], [itempath=%s], [nextpath=%s], [checklast4=%s]\n", - iter, + fprintf(stderr, "0: init %s -> [path=%s], [base=%s], [suff=%s], [itempath=%s], [nextpath=%s], [checklast4=%s]\n", item, path, basename, - suffix, + suffix.data(), itemPath.data(), nextPath, checkLastFour?"TRUE":"false"); @@ -553,7 +554,7 @@ const char *UDataPathIterator::next(UErrorCode *pErrorCode) fprintf(stderr, "rest of path (IDD) = %s\n", currentPath); fprintf(stderr, " "); { - uint32_t qqq; + int32_t qqq; for(qqq=0;qqq<pathLen;qqq++) { fprintf(stderr, " "); @@ -574,7 +575,7 @@ const char *UDataPathIterator::next(UErrorCode *pErrorCode) uprv_strlen(pathBasename)==(basenameLen+4)) { /* base+suffix = full len */ #ifdef UDATA_DEBUG - fprintf(stderr, "Have %s file on the path: %s\n", suffix, pathBuffer.data()); + fprintf(stderr, "Have %s file on the path: %s\n", suffix.data(), pathBuffer.data()); #endif /* do nothing */ } @@ -640,7 +641,8 @@ U_NAMESPACE_END * our common data. * * * *----------------------------------------------------------------------*/ -#if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP Platform does not support dll icu data at this time +#if !defined(ICU_DATA_DIR_WINDOWS) +// When using the Windows system data, we expect only a single data file. extern "C" const DataHeader U_DATA_API U_ICUDATA_ENTRY_POINT; #endif @@ -690,7 +692,8 @@ openCommonData(const char *path, /* Path from OpenChoice? */ if(gCommonICUDataArray[commonDataIndex] != NULL) { return gCommonICUDataArray[commonDataIndex]; } -#if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP Platform does not support dll icu data at this time +#if !defined(ICU_DATA_DIR_WINDOWS) +// When using the Windows system data, we expect only a single data file. int32_t i; for(i = 0; i < commonDataIndex; ++i) { if(gCommonICUDataArray[i]->pHeader == &U_ICUDATA_ENTRY_POINT) { @@ -714,7 +717,8 @@ openCommonData(const char *path, /* Path from OpenChoice? */ setCommonICUDataPointer(uprv_getICUData_conversion(), FALSE, pErrorCode); } */ -#if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP Platform does not support dll icu data at this time +#if !defined(ICU_DATA_DIR_WINDOWS) +// When using the Windows system data, we expect only a single data file. setCommonICUDataPointer(&U_ICUDATA_ENTRY_POINT, FALSE, pErrorCode); { Mutex lock; @@ -831,7 +835,7 @@ static UBool extendICUData(UErrorCode *pErr) * Use a specific mutex to avoid nested locks of the global mutex. */ #if MAP_IMPLEMENTATION==MAP_STDIO - static UMutex extendICUDataMutex = U_MUTEX_INITIALIZER; + static UMutex extendICUDataMutex; umtx_lock(&extendICUDataMutex); #endif if(!umtx_loadAcquire(gHaveTriedToLoadCommonData)) { @@ -1070,13 +1074,13 @@ static UDataMemory *doLoadFromCommonData(UBool isICUData, const char * /*pkgName /* look up the data piece in the common data */ pHeader=pCommonData->vFuncs->Lookup(pCommonData, tocEntryName, &length, subErrorCode); #ifdef UDATA_DEBUG - fprintf(stderr, "%s: pHeader=%p - %s\n", tocEntryName, pHeader, u_errorName(*subErrorCode)); + fprintf(stderr, "%s: pHeader=%p - %s\n", tocEntryName, (void*) pHeader, u_errorName(*subErrorCode)); #endif if(pHeader!=NULL) { pEntryData = checkDataItem(pHeader, isAcceptable, context, type, name, subErrorCode, pErrorCode); #ifdef UDATA_DEBUG - fprintf(stderr, "pEntryData=%p\n", pEntryData); + fprintf(stderr, "pEntryData=%p\n", (void*) pEntryData); #endif if (U_FAILURE(*pErrorCode)) { return NULL; @@ -1168,6 +1172,9 @@ doOpenChoice(const char *path, const char *type, const char *name, UBool isICUData = FALSE; + FileTracer::traceOpen(path, type, name); + + /* Is this path ICU data? */ if(path == NULL || !strcmp(path, U_ICUDATA_ALIAS) || /* "ICUDATA" */ @@ -1276,12 +1283,12 @@ doOpenChoice(const char *path, const char *type, const char *name, fprintf(stderr, " tocEntryPath = %s\n", tocEntryName.data()); #endif -#if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP Platform does not support dll icu data at this time +#if !defined(ICU_DATA_DIR_WINDOWS) if(path == NULL) { path = COMMON_DATA_NAME; /* "icudt26e" */ } #else - // Windows UWP expects only a single data file. + // When using the Windows system data, we expects only a single data file. path = COMMON_DATA_NAME; /* "icudt26e" */ #endif diff --git a/deps/icu-small/source/common/uhash.cpp b/deps/icu-small/source/common/uhash.cpp index 79241a2829..86311ceb0b 100644 --- a/deps/icu-small/source/common/uhash.cpp +++ b/deps/icu-small/source/common/uhash.cpp @@ -119,13 +119,14 @@ static const float RESIZE_POLICY_RATIO_TABLE[6] = { /* This macro expects a UHashTok.pointer as its keypointer and valuepointer parameters */ -#define HASH_DELETE_KEY_VALUE(hash, keypointer, valuepointer) \ - if (hash->keyDeleter != NULL && keypointer != NULL) { \ - (*hash->keyDeleter)(keypointer); \ - } \ - if (hash->valueDeleter != NULL && valuepointer != NULL) { \ - (*hash->valueDeleter)(valuepointer); \ - } +#define HASH_DELETE_KEY_VALUE(hash, keypointer, valuepointer) UPRV_BLOCK_MACRO_BEGIN { \ + if (hash->keyDeleter != NULL && keypointer != NULL) { \ + (*hash->keyDeleter)(keypointer); \ + } \ + if (hash->valueDeleter != NULL && valuepointer != NULL) { \ + (*hash->valueDeleter)(valuepointer); \ + } \ +} UPRV_BLOCK_MACRO_END /* * Constants for hinting whether a key or value is an integer diff --git a/deps/icu-small/source/common/uidna.cpp b/deps/icu-small/source/common/uidna.cpp index 6d56fcb8f5..09347efd6d 100644 --- a/deps/icu-small/source/common/uidna.cpp +++ b/deps/icu-small/source/common/uidna.cpp @@ -57,18 +57,16 @@ toASCIILower(UChar ch){ inline static UBool startsWithPrefix(const UChar* src , int32_t srcLength){ - UBool startsWithPrefix = TRUE; - if(srcLength < ACE_PREFIX_LENGTH){ return FALSE; } for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){ if(toASCIILower(src[i]) != ACE_PREFIX[i]){ - startsWithPrefix = FALSE; + return FALSE; } } - return startsWithPrefix; + return TRUE; } @@ -441,6 +439,7 @@ _internal_toUnicode(const UChar* src, int32_t srcLength, for(int32_t j=0; j<srcLength; j++){ if(src[j]> 0x7f){ srcIsASCII = FALSE; + break; }/*else if(isLDHChar(src[j])==FALSE){ // here we do not assemble surrogates // since we know that LDH code points diff --git a/deps/icu-small/source/common/uinvchar.cpp b/deps/icu-small/source/common/uinvchar.cpp index 2e0f42d927..ac9716066f 100644 --- a/deps/icu-small/source/common/uinvchar.cpp +++ b/deps/icu-small/source/common/uinvchar.cpp @@ -207,7 +207,8 @@ u_UCharsToChars(const UChar *us, char *cs, int32_t length) { while(length>0) { u=*us++; if(!UCHAR_IS_INVARIANT(u)) { - UPRV_UNREACHABLE; /* Variant characters were used. These are not portable in ICU. */ + U_ASSERT(FALSE); /* Variant characters were used. These are not portable in ICU. */ + u=0; } *cs++=(char)UCHAR_TO_CHAR(u); --length; @@ -445,6 +446,13 @@ uprv_copyEbcdic(const UDataSwapper *ds, return length; } +U_CFUNC UBool +uprv_isEbcdicAtSign(char c) { + static const uint8_t ebcdicAtSigns[] = { + 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 }; + return c != 0 && uprv_strchr((const char *)ebcdicAtSigns, c) != nullptr; +} + /* compare invariant strings; variant characters compare less than others and unlike each other */ U_CFUNC int32_t uprv_compareInvAscii(const UDataSwapper *ds, @@ -562,6 +570,11 @@ uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2) { } U_CAPI char U_EXPORT2 +uprv_ebcdicToAscii(char c) { + return (char)asciiFromEbcdic[(uint8_t)c]; +} + +U_CAPI char U_EXPORT2 uprv_ebcdicToLowercaseAscii(char c) { return (char)lowercaseAsciiFromEbcdic[(uint8_t)c]; } diff --git a/deps/icu-small/source/common/uinvchar.h b/deps/icu-small/source/common/uinvchar.h index 56dddfa8fd..a43cfcd982 100644 --- a/deps/icu-small/source/common/uinvchar.h +++ b/deps/icu-small/source/common/uinvchar.h @@ -68,6 +68,75 @@ uprv_isInvariantUString(const UChar *s, int32_t length); # error Unknown charset family! #endif +#ifdef __cplusplus + +U_NAMESPACE_BEGIN + +/** + * Like U_UPPER_ORDINAL(x) but with validation. + * Returns 0..25 for A..Z else a value outside 0..25. + */ +inline int32_t uprv_upperOrdinal(int32_t c) { +#if U_CHARSET_FAMILY==U_ASCII_FAMILY + return c - 'A'; +#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY + // EBCDIC: A-Z (26 letters) is split into three ranges A-I (9 letters), J-R (9), S-Z (8). + // https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout + if (c <= 'I') { return c - 'A'; } // A-I --> 0-8 + if (c < 'J') { return -1; } + if (c <= 'R') { return c - 'J' + 9; } // J-R --> 9..17 + if (c < 'S') { return -1; } + return c - 'S' + 18; // S-Z --> 18..25 +#else +# error Unknown charset family! +#endif +} + +// Like U_UPPER_ORDINAL(x) but for lowercase and with validation. +// Returns 0..25 for a..z else a value outside 0..25. +inline int32_t uprv_lowerOrdinal(int32_t c) { +#if U_CHARSET_FAMILY==U_ASCII_FAMILY + return c - 'a'; +#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY + // EBCDIC: a-z (26 letters) is split into three ranges a-i (9 letters), j-r (9), s-z (8). + // https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout + if (c <= 'i') { return c - 'a'; } // a-i --> 0-8 + if (c < 'j') { return -1; } + if (c <= 'r') { return c - 'j' + 9; } // j-r --> 9..17 + if (c < 's') { return -1; } + return c - 's' + 18; // s-z --> 18..25 +#else +# error Unknown charset family! +#endif +} + +U_NAMESPACE_END + +#endif + +/** + * Returns true if c == '@' is possible. + * The @ sign is variant, and the @ sign used on one + * EBCDIC machine won't be compiled the same way on other EBCDIC based machines. + * @internal + */ +U_CFUNC UBool +uprv_isEbcdicAtSign(char c); + +/** + * \def uprv_isAtSign + * Returns true if c == '@' is possible. + * For ASCII, checks for exactly '@'. For EBCDIC, calls uprv_isEbcdicAtSign(). + * @internal + */ +#if U_CHARSET_FAMILY==U_ASCII_FAMILY +# define uprv_isAtSign(c) ((c)=='@') +#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY +# define uprv_isAtSign(c) uprv_isEbcdicAtSign(c) +#else +# error Unknown charset family! +#endif + /** * Compare two EBCDIC invariant-character strings in ASCII order. * @internal @@ -89,6 +158,26 @@ uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2); #endif /** + * Converts an EBCDIC invariant character to ASCII. + * @internal + */ +U_INTERNAL char U_EXPORT2 +uprv_ebcdicToAscii(char c); + +/** + * \def uprv_invCharToAscii + * Converts an invariant character to ASCII. + * @internal + */ +#if U_CHARSET_FAMILY==U_ASCII_FAMILY +# define uprv_invCharToAscii(c) (c) +#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY +# define uprv_invCharToAscii(c) uprv_ebcdicToAscii(c) +#else +# error Unknown charset family! +#endif + +/** * Converts an EBCDIC invariant character to lowercase ASCII. * @internal */ diff --git a/deps/icu-small/source/common/uloc.cpp b/deps/icu-small/source/common/uloc.cpp index 73b43204b8..6a9bfcfbff 100644 --- a/deps/icu-small/source/common/uloc.cpp +++ b/deps/icu-small/source/common/uloc.cpp @@ -148,7 +148,8 @@ static const char * const LANGUAGES[] = { "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde", "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg", "mga", "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk", - "ml", "mn", "mnc", "mni", "moh", "mos", "mr", "mrj", + "ml", "mn", "mnc", "mni", "mo", + "moh", "mos", "mr", "mrj", "ms", "mt", "mua", "mul", "mus", "mwl", "mwr", "mwv", "my", "mye", "myv", "mzn", "na", "nan", "nap", "naq", "nb", "nd", "nds", "ne", @@ -264,7 +265,8 @@ static const char * const LANGUAGES_3[] = { "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde", "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga", "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd", - "mal", "mon", "mnc", "mni", "moh", "mos", "mar", "mrj", + "mal", "mon", "mnc", "mni", "mol", + "moh", "mos", "mar", "mrj", "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv", "mya", "mye", "myv", "mzn", "nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep", @@ -480,14 +482,15 @@ static const CanonicalizationMap CANONICALIZE_MAP[] = { /* Test if the locale id has BCP47 u extension and does not have '@' */ #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1) /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */ -#define _ConvertBCP47(finalID, id, buffer, length,err) \ - if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || \ - U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) { \ - finalID=id; \ - if (*err == U_STRING_NOT_TERMINATED_WARNING) { *err = U_BUFFER_OVERFLOW_ERROR; } \ - } else { \ - finalID=buffer; \ - } +#define _ConvertBCP47(finalID, id, buffer, length,err) UPRV_BLOCK_MACRO_BEGIN { \ + if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || \ + U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) { \ + finalID=id; \ + if (*err == U_STRING_NOT_TERMINATED_WARNING) { *err = U_BUFFER_OVERFLOW_ERROR; } \ + } else { \ + finalID=buffer; \ + } \ +} UPRV_BLOCK_MACRO_END /* Gets the size of the shortest subtag in the given localeID. */ static int32_t getShortestSubtagLength(const char *localeID) { int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID)); @@ -1454,31 +1457,29 @@ static const UEnumeration gKeywordsEnum = { U_CAPI UEnumeration* U_EXPORT2 uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status) { - UKeywordsContext *myContext = NULL; - UEnumeration *result = NULL; + LocalMemory<UKeywordsContext> myContext; + LocalMemory<UEnumeration> result; - if(U_FAILURE(*status)) { - return NULL; + if (U_FAILURE(*status)) { + return nullptr; } - result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration)); - /* Null pointer test */ - if (result == NULL) { + myContext.adoptInstead(static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext)))); + result.adoptInstead(static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)))); + if (myContext.isNull() || result.isNull()) { *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; + return nullptr; } - uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration)); - myContext = static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext))); - if (myContext == NULL) { + uprv_memcpy(result.getAlias(), &gKeywordsEnum, sizeof(UEnumeration)); + myContext->keywords = static_cast<char *>(uprv_malloc(keywordListSize+1)); + if (myContext->keywords == nullptr) { *status = U_MEMORY_ALLOCATION_ERROR; - uprv_free(result); - return NULL; + return nullptr; } - myContext->keywords = (char *)uprv_malloc(keywordListSize+1); uprv_memcpy(myContext->keywords, keywordList, keywordListSize); myContext->keywords[keywordListSize] = 0; myContext->current = myContext->keywords; - result->context = myContext; - return result; + result->context = myContext.orphan(); + return result.orphan(); } U_CAPI UEnumeration* U_EXPORT2 diff --git a/deps/icu-small/source/common/uloc_tag.cpp b/deps/icu-small/source/common/uloc_tag.cpp index c732170cb6..8f673541a7 100644 --- a/deps/icu-small/source/common/uloc_tag.cpp +++ b/deps/icu-small/source/common/uloc_tag.cpp @@ -1558,10 +1558,8 @@ _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendT return; } - if (!_addAttributeToList(&attrFirst, attr)) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return; - } + // duplicate attribute is ignored, causes no error. + _addAttributeToList(&attrFirst, attr); /* next tag */ pTag += len; diff --git a/deps/icu-small/source/common/umutex.cpp b/deps/icu-small/source/common/umutex.cpp index 20b03d6cd3..ccbee9960a 100644 --- a/deps/icu-small/source/common/umutex.cpp +++ b/deps/icu-small/source/common/umutex.cpp @@ -24,6 +24,7 @@ #include "unicode/utypes.h" #include "uassert.h" +#include "ucln_cmn.h" #include "cmemory.h" U_NAMESPACE_BEGIN @@ -35,60 +36,94 @@ U_NAMESPACE_BEGIN #error U_USER_MUTEX_CPP not supported #endif + /************************************************************************************************* * * ICU Mutex wrappers. * *************************************************************************************************/ -// The ICU global mutex. Used when ICU implementation code passes NULL for the mutex pointer. -static UMutex *globalMutex() { - static UMutex m = U_MUTEX_INITIALIZER; - return &m; -} +namespace { +std::mutex *initMutex; +std::condition_variable *initCondition; -U_CAPI void U_EXPORT2 -umtx_lock(UMutex *mutex) { - if (mutex == nullptr) { - mutex = globalMutex(); - } - mutex->fMutex.lock(); -} +// The ICU global mutex. +// Used when ICU implementation code passes nullptr for the mutex pointer. +UMutex globalMutex; +std::once_flag initFlag; +std::once_flag *pInitFlag = &initFlag; -U_CAPI void U_EXPORT2 -umtx_unlock(UMutex* mutex) -{ - if (mutex == nullptr) { - mutex = globalMutex(); - } - mutex->fMutex.unlock(); -} +} // Anonymous namespace -UConditionVar::UConditionVar() : fCV() { +U_CDECL_BEGIN +static UBool U_CALLCONV umtx_cleanup() { + initMutex->~mutex(); + initCondition->~condition_variable(); + UMutex::cleanup(); + + // Reset the once_flag, by destructing it and creating a fresh one in its place. + // Do not use this trick anywhere else in ICU; use umtx_initOnce, not std::call_once(). + pInitFlag->~once_flag(); + pInitFlag = new(&initFlag) std::once_flag(); + return true; } -UConditionVar::~UConditionVar() { +static void U_CALLCONV umtx_init() { + initMutex = STATIC_NEW(std::mutex); + initCondition = STATIC_NEW(std::condition_variable); + ucln_common_registerCleanup(UCLN_COMMON_MUTEX, umtx_cleanup); +} +U_CDECL_END + + +std::mutex *UMutex::getMutex() { + std::mutex *retPtr = fMutex.load(std::memory_order_acquire); + if (retPtr == nullptr) { + std::call_once(*pInitFlag, umtx_init); + std::lock_guard<std::mutex> guard(*initMutex); + retPtr = fMutex.load(std::memory_order_acquire); + if (retPtr == nullptr) { + fMutex = new(fStorage) std::mutex(); + retPtr = fMutex; + fListLink = gListHead; + gListHead = this; + } + } + U_ASSERT(retPtr != nullptr); + return retPtr; } -U_CAPI void U_EXPORT2 -umtx_condWait(UConditionVar *cond, UMutex *mutex) { - if (mutex == nullptr) { - mutex = globalMutex(); +UMutex *UMutex::gListHead = nullptr; + +void UMutex::cleanup() { + UMutex *next = nullptr; + for (UMutex *m = gListHead; m != nullptr; m = next) { + (*m->fMutex).~mutex(); + m->fMutex = nullptr; + next = m->fListLink; + m->fListLink = nullptr; } - cond->fCV.wait(mutex->fMutex); + gListHead = nullptr; } -U_CAPI void U_EXPORT2 -umtx_condBroadcast(UConditionVar *cond) { - cond->fCV.notify_all(); +U_CAPI void U_EXPORT2 +umtx_lock(UMutex *mutex) { + if (mutex == nullptr) { + mutex = &globalMutex; + } + mutex->lock(); } -U_CAPI void U_EXPORT2 -umtx_condSignal(UConditionVar *cond) { - cond->fCV.notify_one(); +U_CAPI void U_EXPORT2 +umtx_unlock(UMutex* mutex) +{ + if (mutex == nullptr) { + mutex = &globalMutex; + } + mutex->unlock(); } @@ -98,17 +133,6 @@ umtx_condSignal(UConditionVar *cond) { * *************************************************************************************************/ -static std::mutex &initMutex() { - static std::mutex m; - return m; -} - -static std::condition_variable &initCondition() { - static std::condition_variable cv; - return cv; -} - - // This function is called when a test of a UInitOnce::fState reveals that // initialization has not completed, that we either need to call the init // function on this thread, or wait for some other thread to complete. @@ -119,8 +143,8 @@ static std::condition_variable &initCondition() { // U_COMMON_API UBool U_EXPORT2 umtx_initImplPreInit(UInitOnce &uio) { - std::unique_lock<std::mutex> lock(initMutex()); - + std::call_once(*pInitFlag, umtx_init); + std::unique_lock<std::mutex> lock(*initMutex); if (umtx_loadAcquire(uio.fState) == 0) { umtx_storeRelease(uio.fState, 1); return true; // Caller will next call the init function. @@ -128,7 +152,7 @@ umtx_initImplPreInit(UInitOnce &uio) { while (umtx_loadAcquire(uio.fState) == 1) { // Another thread is currently running the initialization. // Wait until it completes. - initCondition().wait(lock); + initCondition->wait(lock); } U_ASSERT(uio.fState == 2); return false; @@ -145,10 +169,10 @@ umtx_initImplPreInit(UInitOnce &uio) { U_COMMON_API void U_EXPORT2 umtx_initImplPostInit(UInitOnce &uio) { { - std::unique_lock<std::mutex> lock(initMutex()); + std::unique_lock<std::mutex> lock(*initMutex); umtx_storeRelease(uio.fState, 2); } - initCondition().notify_all(); + initCondition->notify_all(); } U_NAMESPACE_END diff --git a/deps/icu-small/source/common/umutex.h b/deps/icu-small/source/common/umutex.h index 1674d00bb2..7588bcc5d9 100755 --- a/deps/icu-small/source/common/umutex.h +++ b/deps/icu-small/source/common/umutex.h @@ -23,6 +23,7 @@ #include <atomic> #include <condition_variable> #include <mutex> +#include <type_traits> #include "unicode/utypes.h" #include "unicode/uclean.h" @@ -36,10 +37,11 @@ #error U_USER_ATOMICS and U_USER_MUTEX_H are not supported #endif - // Export an explicit template instantiation of std::atomic<int32_t>. // When building DLLs for Windows this is required as it is used as a data member of the exported SharedObject class. // See digitlst.h, pluralaffix.h, datefmt.h, and others for similar examples. +// +// Similar story for std::atomic<std::mutex *>, and the exported UMutex class. #if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN && !defined(U_IN_DOXYGEN) #if defined(__clang__) || defined(_MSC_VER) #if defined(__clang__) @@ -48,12 +50,14 @@ #pragma clang diagnostic ignored "-Winstantiation-after-specialization" #endif template struct U_COMMON_API std::atomic<int32_t>; +template struct U_COMMON_API std::atomic<std::mutex *>; #if defined(__clang__) #pragma clang diagnostic pop #endif #elif defined(__GNUC__) // For GCC this class is already exported/visible, so no need for U_COMMON_API. template struct std::atomic<int32_t>; +template struct std::atomic<std::mutex *>; #endif #endif @@ -180,49 +184,78 @@ template<class T> void umtx_initOnce(UInitOnce &uio, void (U_CALLCONV *fp)(T, UE } } +// UMutex should be constexpr-constructible, so that no initialization code +// is run during startup. +// This works on all C++ libraries except MS VS before VS2019. +#if (defined(_CPPLIB_VER) && !defined(_MSVC_STL_VERSION)) || \ + (defined(_MSVC_STL_VERSION) && _MSVC_STL_VERSION < 142) + // (VS std lib older than VS2017) || (VS std lib version < VS2019) +# define UMUTEX_CONSTEXPR +#else +# define UMUTEX_CONSTEXPR constexpr +#endif -/************************************************************************************************* +/** + * UMutex - ICU Mutex class. * - * ICU Mutex wrappers. Originally wrapped operating system mutexes, giving the rest of ICU a - * platform independent set of mutex operations. Now vestigial, wrapping std::mutex only. - * For internal ICU use only. + * This is the preferred Mutex class for use within ICU implementation code. + * It is a thin wrapper over C++ std::mutex, with these additions: + * - Static instances are safe, not triggering static construction or destruction, + * and the associated order of construction or destruction issues. + * - Plumbed into u_cleanup() for destructing the underlying std::mutex, + * which frees any OS level resources they may be holding. * - *************************************************************************************************/ + * Limitations: + * - Static or global instances only. Cannot be heap allocated. Cannot appear as a + * member of another class. + * - No condition variables or other advanced features. If needed, you will need to use + * std::mutex and std::condition_variable directly. For an example, see unifiedcache.cpp + * + * Typical Usage: + * static UMutex myMutex; + * + * { + * Mutex lock(myMutex); + * ... // Do stuff that is protected by myMutex; + * } // myMutex is released when lock goes out of scope. + */ -struct UMutex : public icu::UMemory { - UMutex() = default; +class U_COMMON_API UMutex { +public: + UMUTEX_CONSTEXPR UMutex() {} ~UMutex() = default; + UMutex(const UMutex &other) = delete; UMutex &operator =(const UMutex &other) = delete; + void *operator new(size_t) = delete; - std::mutex fMutex = {}; // Note: struct - pubic members - because most access is from - // // plain C style functions (umtx_lock(), etc.) -}; + // requirements for C++ BasicLockable, allows UMutex to work with std::lock_guard + void lock() { + std::mutex *m = fMutex.load(std::memory_order_acquire); + if (m == nullptr) { m = getMutex(); } + m->lock(); + } + void unlock() { fMutex.load(std::memory_order_relaxed)->unlock(); } + static void cleanup(); -struct UConditionVar : public icu::UMemory { - U_COMMON_API UConditionVar(); - U_COMMON_API ~UConditionVar(); - UConditionVar(const UConditionVar &other) = delete; - UConditionVar &operator =(const UConditionVar &other) = delete; +private: + alignas(std::mutex) char fStorage[sizeof(std::mutex)] {}; + std::atomic<std::mutex *> fMutex { nullptr }; - std::condition_variable_any fCV; -}; + /** All initialized UMutexes are kept in a linked list, so that they can be found, + * and the underlying std::mutex destructed, by u_cleanup(). + */ + UMutex *fListLink { nullptr }; + static UMutex *gListHead; -#define U_MUTEX_INITIALIZER {} -#define U_CONDITION_INITIALIZER {} + /** Out-of-line function to lazily initialize a UMutex on first use. + * Initial fast check is inline, in lock(). The returned value may never + * be nullptr. + */ + std::mutex *getMutex(); +}; -// Implementation notes for UConditionVar: -// -// Use an out-of-line constructor to reduce problems with the ICU dependency checker. -// On Linux, the default constructor of std::condition_variable_any -// produces an in-line reference to global operator new(), which the -// dependency checker flags for any file that declares a UConditionVar. With -// an out-of-line constructor, the dependency is constrained to umutex.o -// -// Do not export (U_COMMON_API) the entire class, but only the constructor -// and destructor, to avoid Windows build problems with attempting to export the -// std::condition_variable_any. /* Lock a mutex. * @param mutex The given mutex to be locked. Pass NULL to specify @@ -237,30 +270,6 @@ U_INTERNAL void U_EXPORT2 umtx_lock(UMutex* mutex); */ U_INTERNAL void U_EXPORT2 umtx_unlock (UMutex* mutex); -/* - * Wait on a condition variable. - * The calling thread will unlock the mutex and wait on the condition variable. - * The mutex must be locked by the calling thread when invoking this function. - * - * @param cond the condition variable to wait on. - * @param mutex the associated mutex. - */ - -U_INTERNAL void U_EXPORT2 umtx_condWait(UConditionVar *cond, UMutex *mutex); - - -/* - * Broadcast wakeup of all threads waiting on a Condition. - * - * @param cond the condition variable. - */ -U_INTERNAL void U_EXPORT2 umtx_condBroadcast(UConditionVar *cond); - -/* - * Signal a condition variable, waking up one waiting thread. - */ -U_INTERNAL void U_EXPORT2 umtx_condSignal(UConditionVar *cond); - U_NAMESPACE_END diff --git a/deps/icu-small/source/common/unames.cpp b/deps/icu-small/source/common/unames.cpp index 038743004e..a28b6ee603 100644 --- a/deps/icu-small/source/common/unames.cpp +++ b/deps/icu-small/source/common/unames.cpp @@ -212,13 +212,13 @@ isDataLoaded(UErrorCode *pErrorCode) { return U_SUCCESS(*pErrorCode); } -#define WRITE_CHAR(buffer, bufferLength, bufferPos, c) { \ +#define WRITE_CHAR(buffer, bufferLength, bufferPos, c) UPRV_BLOCK_MACRO_BEGIN { \ if((bufferLength)>0) { \ *(buffer)++=c; \ --(bufferLength); \ } \ ++(bufferPos); \ -} +} UPRV_BLOCK_MACRO_END #define U_ISO_COMMENT U_CHAR_NAME_CHOICE_COUNT diff --git a/deps/icu-small/source/common/unicode/appendable.h b/deps/icu-small/source/common/unicode/appendable.h index 8512c2f303..4beacaf658 100644 --- a/deps/icu-small/source/common/unicode/appendable.h +++ b/deps/icu-small/source/common/unicode/appendable.h @@ -23,6 +23,9 @@ */ #include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + #include "unicode/uobject.h" U_NAMESPACE_BEGIN @@ -231,4 +234,6 @@ private: U_NAMESPACE_END +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif // __APPENDABLE_H__ diff --git a/deps/icu-small/source/common/unicode/brkiter.h b/deps/icu-small/source/common/unicode/brkiter.h index ac1bf1df29..b944497345 100644 --- a/deps/icu-small/source/common/unicode/brkiter.h +++ b/deps/icu-small/source/common/unicode/brkiter.h @@ -29,6 +29,10 @@ * \brief C++ API: Break Iterator. */ +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + #if UCONFIG_NO_BREAK_ITERATION U_NAMESPACE_BEGIN @@ -135,7 +139,7 @@ public: * method which subclasses implement. * @stable ICU 2.0 */ - virtual BreakIterator* clone(void) const = 0; + virtual BreakIterator* clone() const = 0; /** * Return a polymorphic class ID for this object. Different subclasses @@ -493,6 +497,7 @@ public: static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale, UnicodeString& name); +#ifndef U_FORCE_HIDE_DEPRECATED_API /** * Deprecated functionality. Use clone() instead. * @@ -515,6 +520,7 @@ public: virtual BreakIterator * createBufferClone(void *stackBuffer, int32_t &BufferSize, UErrorCode &status) = 0; +#endif // U_FORCE_HIDE_DEPRECATED_API #ifndef U_HIDE_DEPRECATED_API @@ -658,5 +664,7 @@ U_NAMESPACE_END #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif // BRKITER_H //eof diff --git a/deps/icu-small/source/common/unicode/bytestream.h b/deps/icu-small/source/common/unicode/bytestream.h index 61d1e8aca6..2c71c248e3 100644 --- a/deps/icu-small/source/common/unicode/bytestream.h +++ b/deps/icu-small/source/common/unicode/bytestream.h @@ -38,6 +38,9 @@ */ #include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + #include "unicode/uobject.h" #include "unicode/std_string.h" @@ -267,4 +270,6 @@ class StringByteSink : public ByteSink { U_NAMESPACE_END +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif // __BYTESTREAM_H__ diff --git a/deps/icu-small/source/common/unicode/bytestrie.h b/deps/icu-small/source/common/unicode/bytestrie.h index c57b8ccfeb..51405f64a1 100644 --- a/deps/icu-small/source/common/unicode/bytestrie.h +++ b/deps/icu-small/source/common/unicode/bytestrie.h @@ -23,6 +23,9 @@ */ #include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + #include "unicode/stringpiece.h" #include "unicode/uobject.h" #include "unicode/ustringtrie.h" @@ -94,6 +97,41 @@ public: return *this; } +#ifndef U_HIDE_DRAFT_API + /** + * Returns the state of this trie as a 64-bit integer. + * The state value is never 0. + * + * @return opaque state value + * @see resetToState64 + * @draft ICU 65 + */ + uint64_t getState64() const { + return (static_cast<uint64_t>(remainingMatchLength_ + 2) << kState64RemainingShift) | + (uint64_t)(pos_ - bytes_); + } + + /** + * Resets this trie to the saved state. + * Unlike resetToState(State), the 64-bit state value + * must be from getState64() from the same trie object or + * from one initialized the exact same way. + * Because of no validation, this method is faster. + * + * @param state The opaque trie state value from getState64(). + * @return *this + * @see getState64 + * @see resetToState + * @see reset + * @draft ICU 65 + */ + BytesTrie &resetToState64(uint64_t state) { + remainingMatchLength_ = static_cast<int32_t>(state >> kState64RemainingShift) - 2; + pos_ = bytes_ + (state & kState64PosMask); + return *this; + } +#endif /* U_HIDE_DRAFT_API */ + /** * BytesTrie state object, for saving a trie's current state * and resetting the trie back to this state later. @@ -502,6 +540,13 @@ private: static const int32_t kMaxTwoByteDelta=((kMinThreeByteDeltaLead-kMinTwoByteDeltaLead)<<8)-1; // 0x2fff static const int32_t kMaxThreeByteDelta=((kFourByteDeltaLead-kMinThreeByteDeltaLead)<<16)-1; // 0xdffff + // For getState64(): + // The remainingMatchLength_ is -1..14=(kMaxLinearMatchLength=0x10)-2 + // so we need at least 5 bits for that. + // We add 2 to store it as a positive value 1..16=kMaxLinearMatchLength. + static constexpr int32_t kState64RemainingShift = 59; + static constexpr uint64_t kState64PosMask = (UINT64_C(1) << kState64RemainingShift) - 1; + uint8_t *ownedArray_; // Fixed value referencing the BytesTrie bytes. @@ -517,4 +562,6 @@ private: U_NAMESPACE_END +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif // __BYTESTRIE_H__ diff --git a/deps/icu-small/source/common/unicode/bytestriebuilder.h b/deps/icu-small/source/common/unicode/bytestriebuilder.h index b164e3bbd6..e58f18755e 100644 --- a/deps/icu-small/source/common/unicode/bytestriebuilder.h +++ b/deps/icu-small/source/common/unicode/bytestriebuilder.h @@ -23,6 +23,9 @@ #define __BYTESTRIEBUILDER_H__ #include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + #include "unicode/bytestrie.h" #include "unicode/stringpiece.h" #include "unicode/stringtriebuilder.h" @@ -179,4 +182,6 @@ private: U_NAMESPACE_END +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif // __BYTESTRIEBUILDER_H__ diff --git a/deps/icu-small/source/common/unicode/caniter.h b/deps/icu-small/source/common/unicode/caniter.h index b47e35da07..87c946c2b4 100644 --- a/deps/icu-small/source/common/unicode/caniter.h +++ b/deps/icu-small/source/common/unicode/caniter.h @@ -12,6 +12,8 @@ #include "unicode/utypes.h" +#if U_SHOW_CPLUSPLUS_API + #if !UCONFIG_NO_NORMALIZATION #include "unicode/uobject.h" @@ -207,4 +209,6 @@ U_NAMESPACE_END #endif /* #if !UCONFIG_NO_NORMALIZATION */ +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif diff --git a/deps/icu-small/source/common/unicode/casemap.h b/deps/icu-small/source/common/unicode/casemap.h index 477eb484d1..53af84fa74 100644 --- a/deps/icu-small/source/common/unicode/casemap.h +++ b/deps/icu-small/source/common/unicode/casemap.h @@ -8,6 +8,9 @@ #define __CASEMAP_H__ #include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + #include "unicode/stringpiece.h" #include "unicode/uobject.h" @@ -489,4 +492,6 @@ private: U_NAMESPACE_END +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif // __CASEMAP_H__ diff --git a/deps/icu-small/source/common/unicode/char16ptr.h b/deps/icu-small/source/common/unicode/char16ptr.h index a7c5f1a0c5..c8a9ae6c35 100644 --- a/deps/icu-small/source/common/unicode/char16ptr.h +++ b/deps/icu-small/source/common/unicode/char16ptr.h @@ -7,9 +7,12 @@ #ifndef __CHAR16PTR_H__ #define __CHAR16PTR_H__ -#include <cstddef> #include "unicode/utypes.h" +#if U_SHOW_CPLUSPLUS_API + +#include <cstddef> + /** * \file * \brief C++ API: char16_t pointer wrappers with @@ -305,4 +308,6 @@ inline OldUChar *toOldUCharPtr(char16_t *p) { U_NAMESPACE_END +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif // __CHAR16PTR_H__ diff --git a/deps/icu-small/source/common/unicode/chariter.h b/deps/icu-small/source/common/unicode/chariter.h index 292794f6d6..7e4f446bb0 100644 --- a/deps/icu-small/source/common/unicode/chariter.h +++ b/deps/icu-small/source/common/unicode/chariter.h @@ -13,6 +13,9 @@ #define CHARITER_H #include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + #include "unicode/uobject.h" #include "unicode/unistr.h" /** @@ -377,7 +380,7 @@ public: * @return a pointer to a new CharacterIterator * @stable ICU 2.0 */ - virtual CharacterIterator* clone(void) const = 0; + virtual CharacterIterator* clone() const = 0; /** * Sets the iterator to refer to the first code unit in its @@ -725,4 +728,7 @@ CharacterIterator::getLength(void) const { } U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif diff --git a/deps/icu-small/source/common/unicode/dbbi.h b/deps/icu-small/source/common/unicode/dbbi.h index 62509c5227..9031c0b96b 100644 --- a/deps/icu-small/source/common/unicode/dbbi.h +++ b/deps/icu-small/source/common/unicode/dbbi.h @@ -13,6 +13,10 @@ #ifndef DBBI_H #define DBBI_H +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + #include "unicode/rbbi.h" #if !UCONFIG_NO_BREAK_ITERATION @@ -39,4 +43,6 @@ U_NAMESPACE_END #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif diff --git a/deps/icu-small/source/common/unicode/docmain.h b/deps/icu-small/source/common/unicode/docmain.h index 8990679987..2b38692997 100644 --- a/deps/icu-small/source/common/unicode/docmain.h +++ b/deps/icu-small/source/common/unicode/docmain.h @@ -99,7 +99,7 @@ * </tr> * <tr> * <td>Codepage Conversion</td> - * <td>ucnv.h, ucnvsel.hb</td> + * <td>ucnv.h, ucnvsel.h</td> * <td>C API</td> * </tr> * <tr> @@ -115,7 +115,7 @@ * <tr> * <td>Locales </td> * <td>uloc.h</a></td> - * <td>icu::Locale, icu::LocaleBuilder</td> + * <td>icu::Locale, icu::LocaleBuilder, icu::LocaleMatcher</td> * </tr> * <tr> * <td>Resource Bundles</td> diff --git a/deps/icu-small/source/common/unicode/dtintrv.h b/deps/icu-small/source/common/unicode/dtintrv.h index 625456f0c8..325faa3ccb 100644 --- a/deps/icu-small/source/common/unicode/dtintrv.h +++ b/deps/icu-small/source/common/unicode/dtintrv.h @@ -15,6 +15,9 @@ #define __DTINTRV_H__ #include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + #include "unicode/uobject.h" /** @@ -22,7 +25,6 @@ * \brief C++ API: Date Interval data type */ - U_NAMESPACE_BEGIN @@ -157,4 +159,6 @@ DateInterval::operator!=(const DateInterval& other) const { U_NAMESPACE_END +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif diff --git a/deps/icu-small/source/common/unicode/edits.h b/deps/icu-small/source/common/unicode/edits.h index 79e98b0cc2..c3ceaccb3b 100644 --- a/deps/icu-small/source/common/unicode/edits.h +++ b/deps/icu-small/source/common/unicode/edits.h @@ -8,6 +8,9 @@ #define __EDITS_H__ #include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + #include "unicode/uobject.h" /** @@ -159,7 +162,7 @@ public: * @return TRUE if U_FAILURE(outErrorCode) * @stable ICU 59 */ - UBool copyErrorTo(UErrorCode &outErrorCode); + UBool copyErrorTo(UErrorCode &outErrorCode) const; /** * How much longer is the new text compared with the old text? @@ -523,4 +526,6 @@ private: U_NAMESPACE_END +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif // __EDITS_H__ diff --git a/deps/icu-small/source/common/unicode/errorcode.h b/deps/icu-small/source/common/unicode/errorcode.h index 1e5df8f03e..75cdbb6a98 100644 --- a/deps/icu-small/source/common/unicode/errorcode.h +++ b/deps/icu-small/source/common/unicode/errorcode.h @@ -26,6 +26,9 @@ */ #include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + #include "unicode/uobject.h" U_NAMESPACE_BEGIN @@ -136,4 +139,6 @@ protected: U_NAMESPACE_END +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif // __ERRORCODE_H__ diff --git a/deps/icu-small/source/common/unicode/filteredbrk.h b/deps/icu-small/source/common/unicode/filteredbrk.h index 2444114e9a..4293676325 100644 --- a/deps/icu-small/source/common/unicode/filteredbrk.h +++ b/deps/icu-small/source/common/unicode/filteredbrk.h @@ -11,6 +11,9 @@ #define FILTEREDBRK_H #include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + #include "unicode/brkiter.h" #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION @@ -101,6 +104,7 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject { */ virtual UBool unsuppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0; +#ifndef U_FORCE_HIDE_DEPRECATED_API /** * This function has been deprecated in favor of wrapIteratorWithFilter() * The behavior is identical. @@ -111,6 +115,7 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject { * @see wrapBreakIteratorWithFilter() */ virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0; +#endif // U_FORCE_HIDE_DEPRECATED_API /** * Wrap (adopt) an existing break iterator in a new filtered instance. @@ -142,4 +147,6 @@ U_NAMESPACE_END #endif // #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif // #ifndef FILTEREDBRK_H diff --git a/deps/icu-small/source/common/unicode/idna.h b/deps/icu-small/source/common/unicode/idna.h index f08658e502..6dfcfe48db 100644 --- a/deps/icu-small/source/common/unicode/idna.h +++ b/deps/icu-small/source/common/unicode/idna.h @@ -24,6 +24,8 @@ #include "unicode/utypes.h" +#if U_SHOW_CPLUSPLUS_API + #if !UCONFIG_NO_IDNA #include "unicode/bytestream.h" @@ -322,4 +324,7 @@ private: U_NAMESPACE_END #endif // UCONFIG_NO_IDNA + +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif // __IDNA_H__ diff --git a/deps/icu-small/source/common/unicode/localebuilder.h b/deps/icu-small/source/common/unicode/localebuilder.h index 960e5980c0..19e10f1c07 100644 --- a/deps/icu-small/source/common/unicode/localebuilder.h +++ b/deps/icu-small/source/common/unicode/localebuilder.h @@ -3,11 +3,14 @@ #ifndef __LOCALEBUILDER_H__ #define __LOCALEBUILDER_H__ +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + #include "unicode/locid.h" +#include "unicode/localematcher.h" #include "unicode/stringpiece.h" #include "unicode/uobject.h" -#include "unicode/utypes.h" - #ifndef U_HIDE_DRAFT_API /** @@ -276,7 +279,24 @@ public: */ Locale build(UErrorCode& status); +#ifndef U_HIDE_DRAFT_API + /** + * Sets the UErrorCode if an error occurred while recording sets. + * Preserves older error codes in the outErrorCode. + * @param outErrorCode Set to an error code that occurred while setting subtags. + * Unchanged if there is no such error or if outErrorCode + * already contained an error. + * @return TRUE if U_FAILURE(outErrorCode) + * @draft ICU 65 + */ + UBool copyErrorTo(UErrorCode &outErrorCode) const; +#endif /* U_HIDE_DRAFT_API */ + private: + friend class LocaleMatcher::Result; + + void copyExtensionsFrom(const Locale& src, UErrorCode& errorCode); + UErrorCode status_; char language_[9]; char script_[5]; @@ -289,4 +309,7 @@ private: U_NAMESPACE_END #endif // U_HIDE_DRAFT_API + +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif // __LOCALEBUILDER_H__ diff --git a/deps/icu-small/source/common/unicode/localematcher.h b/deps/icu-small/source/common/unicode/localematcher.h new file mode 100644 index 0000000000..701123f750 --- /dev/null +++ b/deps/icu-small/source/common/unicode/localematcher.h @@ -0,0 +1,605 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License + +// localematcher.h +// created: 2019may08 Markus W. Scherer + +#ifndef __LOCALEMATCHER_H__ +#define __LOCALEMATCHER_H__ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/locid.h" +#include "unicode/stringpiece.h" +#include "unicode/uobject.h" + +/** + * \file + * \brief C++ API: Locale matcher: User's desired locales vs. application's supported locales. + */ + +#ifndef U_HIDE_DRAFT_API + +/** + * Builder option for whether the language subtag or the script subtag is most important. + * + * @see Builder#setFavorSubtag(FavorSubtag) + * @draft ICU 65 + */ +enum ULocMatchFavorSubtag { + /** + * Language differences are most important, then script differences, then region differences. + * (This is the default behavior.) + * + * @draft ICU 65 + */ + ULOCMATCH_FAVOR_LANGUAGE, + /** + * Makes script differences matter relatively more than language differences. + * + * @draft ICU 65 + */ + ULOCMATCH_FAVOR_SCRIPT +}; +#ifndef U_IN_DOXYGEN +typedef enum ULocMatchFavorSubtag ULocMatchFavorSubtag; +#endif + +/** + * Builder option for whether all desired locales are treated equally or + * earlier ones are preferred. + * + * @see Builder#setDemotionPerDesiredLocale(Demotion) + * @draft ICU 65 + */ +enum ULocMatchDemotion { + /** + * All desired locales are treated equally. + * + * @draft ICU 65 + */ + ULOCMATCH_DEMOTION_NONE, + /** + * Earlier desired locales are preferred. + * + * <p>From each desired locale to the next, + * the distance to any supported locale is increased by an additional amount + * which is at least as large as most region mismatches. + * A later desired locale has to have a better match with some supported locale + * due to more than merely having the same region subtag. + * + * <p>For example: <code>Supported={en, sv} desired=[en-GB, sv]</code> + * yields <code>Result(en-GB, en)</code> because + * with the demotion of sv its perfect match is no better than + * the region distance between the earlier desired locale en-GB and en=en-US. + * + * <p>Notes: + * <ul> + * <li>In some cases, language and/or script differences can be as small as + * the typical region difference. (Example: sr-Latn vs. sr-Cyrl) + * <li>It is possible for certain region differences to be larger than usual, + * and larger than the demotion. + * (As of CLDR 35 there is no such case, but + * this is possible in future versions of the data.) + * </ul> + * + * @draft ICU 65 + */ + ULOCMATCH_DEMOTION_REGION +}; +#ifndef U_IN_DOXYGEN +typedef enum ULocMatchDemotion ULocMatchDemotion; +#endif + +struct UHashtable; + +U_NAMESPACE_BEGIN + +struct LSR; + +class LocaleDistance; +class LocaleLsrIterator; +class UVector; +class XLikelySubtags; + +/** + * Immutable class that picks the best match between a user's desired locales and + * an application's supported locales. + * Movable but not copyable. + * + * <p>Example: + * <pre> + * UErrorCode errorCode = U_ZERO_ERROR; + * LocaleMatcher matcher = LocaleMatcher::Builder().setSupportedLocales("fr, en-GB, en").build(errorCode); + * Locale *bestSupported = matcher.getBestLocale(Locale.US, errorCode); // "en" + * </pre> + * + * <p>A matcher takes into account when languages are close to one another, + * such as Danish and Norwegian, + * and when regional variants are close, like en-GB and en-AU as opposed to en-US. + * + * <p>If there are multiple supported locales with the same (language, script, region) + * likely subtags, then the current implementation returns the first of those locales. + * It ignores variant subtags (except for pseudolocale variants) and extensions. + * This may change in future versions. + * + * <p>For example, the current implementation does not distinguish between + * de, de-DE, de-Latn, de-1901, de-u-co-phonebk. + * + * <p>If you prefer one equivalent locale over another, then provide only the preferred one, + * or place it earlier in the list of supported locales. + * + * <p>Otherwise, the order of supported locales may have no effect on the best-match results. + * The current implementation compares each desired locale with supported locales + * in the following order: + * 1. Default locale, if supported; + * 2. CLDR "paradigm locales" like en-GB and es-419; + * 3. other supported locales. + * This may change in future versions. + * + * <p>Often a product will just need one matcher instance, built with the languages + * that it supports. However, it may want multiple instances with different + * default languages based on additional information, such as the domain. + * + * <p>This class is not intended for public subclassing. + * + * @draft ICU 65 + */ +class U_COMMON_API LocaleMatcher : public UMemory { +public: + /** + * Data for the best-matching pair of a desired and a supported locale. + * Movable but not copyable. + * + * @draft ICU 65 + */ + class U_COMMON_API Result : public UMemory { + public: + /** + * Move constructor; might modify the source. + * This object will have the same contents that the source object had. + * + * @param src Result to move contents from. + * @draft ICU 65 + */ + Result(Result &&src) U_NOEXCEPT; + + /** + * Destructor. + * + * @draft ICU 65 + */ + ~Result(); + + /** + * Move assignment; might modify the source. + * This object will have the same contents that the source object had. + * + * @param src Result to move contents from. + * @draft ICU 65 + */ + Result &operator=(Result &&src) U_NOEXCEPT; + + /** + * Returns the best-matching desired locale. + * nullptr if the list of desired locales is empty or if none matched well enough. + * + * @return the best-matching desired locale, or nullptr. + * @draft ICU 65 + */ + inline const Locale *getDesiredLocale() const { return desiredLocale; } + + /** + * Returns the best-matching supported locale. + * If none matched well enough, this is the default locale. + * The default locale is nullptr if the list of supported locales is empty and + * no explicit default locale is set. + * + * @return the best-matching supported locale, or nullptr. + * @draft ICU 65 + */ + inline const Locale *getSupportedLocale() const { return supportedLocale; } + + /** + * Returns the index of the best-matching desired locale in the input Iterable order. + * -1 if the list of desired locales is empty or if none matched well enough. + * + * @return the index of the best-matching desired locale, or -1. + * @draft ICU 65 + */ + inline int32_t getDesiredIndex() const { return desiredIndex; } + + /** + * Returns the index of the best-matching supported locale in the + * constructor’s or builder’s input order (“set” Collection plus “added” locales). + * If the matcher was built from a locale list string, then the iteration order is that + * of a LocalePriorityList built from the same string. + * -1 if the list of supported locales is empty or if none matched well enough. + * + * @return the index of the best-matching supported locale, or -1. + * @draft ICU 65 + */ + inline int32_t getSupportedIndex() const { return supportedIndex; } + + /** + * Takes the best-matching supported locale and adds relevant fields of the + * best-matching desired locale, such as the -t- and -u- extensions. + * May replace some fields of the supported locale. + * The result is the locale that should be used for date and number formatting, collation, etc. + * Returns the root locale if getSupportedLocale() returns nullptr. + * + * <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, resolved locale=ar-SA-u-nu-latn + * + * @return a locale combining the best-matching desired and supported locales. + * @draft ICU 65 + */ + Locale makeResolvedLocale(UErrorCode &errorCode) const; + + private: + Result(const Locale *desired, const Locale *supported, + int32_t desIndex, int32_t suppIndex, UBool owned) : + desiredLocale(desired), supportedLocale(supported), + desiredIndex(desIndex), supportedIndex(suppIndex), + desiredIsOwned(owned) {} + + Result(const Result &other) = delete; + Result &operator=(const Result &other) = delete; + + const Locale *desiredLocale; + const Locale *supportedLocale; + int32_t desiredIndex; + int32_t supportedIndex; + UBool desiredIsOwned; + + friend class LocaleMatcher; + }; + + /** + * LocaleMatcher builder. + * Movable but not copyable. + * + * @see LocaleMatcher#builder() + * @draft ICU 65 + */ + class U_COMMON_API Builder : public UMemory { + public: + /** + * Constructs a builder used in chaining parameters for building a LocaleMatcher. + * + * @return a new Builder object + * @draft ICU 65 + */ + Builder() {} + + /** + * Move constructor; might modify the source. + * This builder will have the same contents that the source builder had. + * + * @param src Builder to move contents from. + * @draft ICU 65 + */ + Builder(Builder &&src) U_NOEXCEPT; + + /** + * Destructor. + * + * @draft ICU 65 + */ + ~Builder(); + + /** + * Move assignment; might modify the source. + * This builder will have the same contents that the source builder had. + * + * @param src Builder to move contents from. + * @draft ICU 65 + */ + Builder &operator=(Builder &&src) U_NOEXCEPT; + + /** + * Parses an Accept-Language string + * (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>), + * such as "af, en, fr;q=0.9", and sets the supported locales accordingly. + * Allows whitespace in more places but does not allow "*". + * Clears any previously set/added supported locales first. + * + * @param locales the Accept-Language string of locales to set + * @return this Builder object + * @draft ICU 65 + */ + Builder &setSupportedLocalesFromListString(StringPiece locales); + + /** + * Copies the supported locales, preserving iteration order. + * Clears any previously set/added supported locales first. + * Duplicates are allowed, and are not removed. + * + * @param locales the list of locale + * @return this Builder object + * @draft ICU 65 + */ + Builder &setSupportedLocales(Locale::Iterator &locales); + + /** + * Copies the supported locales from the begin/end range, preserving iteration order. + * Clears any previously set/added supported locales first. + * Duplicates are allowed, and are not removed. + * + * Each of the iterator parameter values must be an + * input iterator whose value is convertible to const Locale &. + * + * @param begin Start of range. + * @param end Exclusive end of range. + * @return this Builder object + * @draft ICU 65 + */ + template<typename Iter> + Builder &setSupportedLocales(Iter begin, Iter end) { + if (U_FAILURE(errorCode_)) { return *this; } + clearSupportedLocales(); + while (begin != end) { + addSupportedLocale(*begin++); + } + return *this; + } + + /** + * Copies the supported locales from the begin/end range, preserving iteration order. + * Calls the converter to convert each *begin to a Locale or const Locale &. + * Clears any previously set/added supported locales first. + * Duplicates are allowed, and are not removed. + * + * Each of the iterator parameter values must be an + * input iterator whose value is convertible to const Locale &. + * + * @param begin Start of range. + * @param end Exclusive end of range. + * @param converter Converter from *begin to const Locale & or compatible. + * @return this Builder object + * @draft ICU 65 + */ + template<typename Iter, typename Conv> + Builder &setSupportedLocalesViaConverter(Iter begin, Iter end, Conv converter) { + if (U_FAILURE(errorCode_)) { return *this; } + clearSupportedLocales(); + while (begin != end) { + addSupportedLocale(converter(*begin++)); + } + return *this; + } + + /** + * Adds another supported locale. + * Duplicates are allowed, and are not removed. + * + * @param locale another locale + * @return this Builder object + * @draft ICU 65 + */ + Builder &addSupportedLocale(const Locale &locale); + + /** + * Sets the default locale; if nullptr, or if it is not set explicitly, + * then the first supported locale is used as the default locale. + * + * @param defaultLocale the default locale (will be copied) + * @return this Builder object + * @draft ICU 65 + */ + Builder &setDefaultLocale(const Locale *defaultLocale); + + /** + * If ULOCMATCH_FAVOR_SCRIPT, then the language differences are smaller than script + * differences. + * This is used in situations (such as maps) where + * it is better to fall back to the same script than a similar language. + * + * @param subtag the subtag to favor + * @return this Builder object + * @draft ICU 65 + */ + Builder &setFavorSubtag(ULocMatchFavorSubtag subtag); + + /** + * Option for whether all desired locales are treated equally or + * earlier ones are preferred (this is the default). + * + * @param demotion the demotion per desired locale to set. + * @return this Builder object + * @draft ICU 65 + */ + Builder &setDemotionPerDesiredLocale(ULocMatchDemotion demotion); + + /** + * Sets the UErrorCode if an error occurred while setting parameters. + * Preserves older error codes in the outErrorCode. + * + * @param outErrorCode Set to an error code if it does not contain one already + * and an error occurred while setting parameters. + * Otherwise unchanged. + * @return TRUE if U_FAILURE(outErrorCode) + * @draft ICU 65 + */ + UBool copyErrorTo(UErrorCode &outErrorCode) const; + + /** + * Builds and returns a new locale matcher. + * This builder can continue to be used. + * + * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, + * or else the function returns immediately. Check for U_FAILURE() + * on output or use with function chaining. (See User Guide for details.) + * @return new LocaleMatcher. + * @draft ICU 65 + */ + LocaleMatcher build(UErrorCode &errorCode) const; + + private: + friend class LocaleMatcher; + + Builder(const Builder &other) = delete; + Builder &operator=(const Builder &other) = delete; + + void clearSupportedLocales(); + bool ensureSupportedLocaleVector(); + + UErrorCode errorCode_ = U_ZERO_ERROR; + UVector *supportedLocales_ = nullptr; + int32_t thresholdDistance_ = -1; + ULocMatchDemotion demotion_ = ULOCMATCH_DEMOTION_REGION; + Locale *defaultLocale_ = nullptr; + ULocMatchFavorSubtag favor_ = ULOCMATCH_FAVOR_LANGUAGE; + }; + + // FYI No public LocaleMatcher constructors in C++; use the Builder. + + /** + * Move copy constructor; might modify the source. + * This matcher will have the same settings that the source matcher had. + * @param src source matcher + * @draft ICU 65 + */ + LocaleMatcher(LocaleMatcher &&src) U_NOEXCEPT; + + /** + * Destructor. + * @draft ICU 65 + */ + ~LocaleMatcher(); + + /** + * Move assignment operator; might modify the source. + * This matcher will have the same settings that the source matcher had. + * The behavior is undefined if *this and src are the same object. + * @param src source matcher + * @return *this + * @draft ICU 65 + */ + LocaleMatcher &operator=(LocaleMatcher &&src) U_NOEXCEPT; + + /** + * Returns the supported locale which best matches the desired locale. + * + * @param desiredLocale Typically a user's language. + * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, + * or else the function returns immediately. Check for U_FAILURE() + * on output or use with function chaining. (See User Guide for details.) + * @return the best-matching supported locale. + * @draft ICU 65 + */ + const Locale *getBestMatch(const Locale &desiredLocale, UErrorCode &errorCode) const; + + /** + * Returns the supported locale which best matches one of the desired locales. + * + * @param desiredLocales Typically a user's languages, in order of preference (descending). + * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, + * or else the function returns immediately. Check for U_FAILURE() + * on output or use with function chaining. (See User Guide for details.) + * @return the best-matching supported locale. + * @draft ICU 65 + */ + const Locale *getBestMatch(Locale::Iterator &desiredLocales, UErrorCode &errorCode) const; + + /** + * Parses an Accept-Language string + * (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>), + * such as "af, en, fr;q=0.9", + * and returns the supported locale which best matches one of the desired locales. + * Allows whitespace in more places but does not allow "*". + * + * @param desiredLocaleList Typically a user's languages, as an Accept-Language string. + * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, + * or else the function returns immediately. Check for U_FAILURE() + * on output or use with function chaining. (See User Guide for details.) + * @return the best-matching supported locale. + * @draft ICU 65 + */ + const Locale *getBestMatchForListString(StringPiece desiredLocaleList, UErrorCode &errorCode) const; + + /** + * Returns the best match between the desired locale and the supported locales. + * If the result's desired locale is not nullptr, then it is the address of the input locale. + * It has not been cloned. + * + * @param desiredLocale Typically a user's language. + * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, + * or else the function returns immediately. Check for U_FAILURE() + * on output or use with function chaining. (See User Guide for details.) + * @return the best-matching pair of the desired and a supported locale. + * @draft ICU 65 + */ + Result getBestMatchResult(const Locale &desiredLocale, UErrorCode &errorCode) const; + + /** + * Returns the best match between the desired and supported locales. + * If the result's desired locale is not nullptr, then it is a clone of + * the best-matching desired locale. The Result object owns the clone. + * + * @param desiredLocales Typically a user's languages, in order of preference (descending). + * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, + * or else the function returns immediately. Check for U_FAILURE() + * on output or use with function chaining. (See User Guide for details.) + * @return the best-matching pair of a desired and a supported locale. + * @draft ICU 65 + */ + Result getBestMatchResult(Locale::Iterator &desiredLocales, UErrorCode &errorCode) const; + +#ifndef U_HIDE_INTERNAL_API + /** + * Returns a fraction between 0 and 1, where 1 means that the languages are a + * perfect match, and 0 means that they are completely different. + * + * <p>This is mostly an implementation detail, and the precise values may change over time. + * The implementation may use either the maximized forms or the others ones, or both. + * The implementation may or may not rely on the forms to be consistent with each other. + * + * <p>Callers should construct and use a matcher rather than match pairs of locales directly. + * + * @param desired Desired locale. + * @param supported Supported locale. + * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, + * or else the function returns immediately. Check for U_FAILURE() + * on output or use with function chaining. (See User Guide for details.) + * @return value between 0 and 1, inclusive. + * @internal (has a known user) + */ + double internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const; +#endif // U_HIDE_INTERNAL_API + +private: + LocaleMatcher(const Builder &builder, UErrorCode &errorCode); + LocaleMatcher(const LocaleMatcher &other) = delete; + LocaleMatcher &operator=(const LocaleMatcher &other) = delete; + + int32_t getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter, UErrorCode &errorCode) const; + + const XLikelySubtags &likelySubtags; + const LocaleDistance &localeDistance; + int32_t thresholdDistance; + int32_t demotionPerDesiredLocale; + ULocMatchFavorSubtag favorSubtag; + + // These are in input order. + const Locale ** supportedLocales; + LSR *lsrs; + int32_t supportedLocalesLength; + // These are in preference order: 1. Default locale 2. paradigm locales 3. others. + UHashtable *supportedLsrToIndex; // Map<LSR, Integer> stores index+1 because 0 is "not found" + // Array versions of the supportedLsrToIndex keys and values. + // The distance lookup loops over the supportedLSRs and returns the index of the best match. + const LSR **supportedLSRs; + int32_t *supportedIndexes; + int32_t supportedLSRsLength; + Locale *ownedDefaultLocale; + const Locale *defaultLocale; + int32_t defaultLocaleIndex; +}; + +U_NAMESPACE_END + +#endif // U_HIDE_DRAFT_API +#endif // U_SHOW_CPLUSPLUS_API +#endif // __LOCALEMATCHER_H__ diff --git a/deps/icu-small/source/common/unicode/locdspnm.h b/deps/icu-small/source/common/unicode/locdspnm.h index f6e778356f..4f06f85704 100644 --- a/deps/icu-small/source/common/unicode/locdspnm.h +++ b/deps/icu-small/source/common/unicode/locdspnm.h @@ -12,6 +12,8 @@ #include "unicode/utypes.h" +#if U_SHOW_CPLUSPLUS_API + /** * \file * \brief C++ API: Provides display names of Locale and its components. @@ -204,4 +206,6 @@ U_NAMESPACE_END #endif +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif diff --git a/deps/icu-small/source/common/unicode/locid.h b/deps/icu-small/source/common/unicode/locid.h index 7350e381ff..6fb0897fc0 100644 --- a/deps/icu-small/source/common/unicode/locid.h +++ b/deps/icu-small/source/common/unicode/locid.h @@ -31,11 +31,14 @@ #ifndef LOCID_H #define LOCID_H +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + #include "unicode/bytestream.h" #include "unicode/localpointer.h" #include "unicode/strenum.h" #include "unicode/stringpiece.h" -#include "unicode/utypes.h" #include "unicode/uobject.h" #include "unicode/putil.h" #include "unicode/uloc.h" @@ -284,16 +287,14 @@ public: */ Locale(const Locale& other); -#ifndef U_HIDE_DRAFT_API /** * Move constructor; might leave source in bogus state. * This locale will have the same contents that the source locale had. * * @param other The Locale object being moved in. - * @draft ICU 63 + * @stable ICU 63 */ Locale(Locale&& other) U_NOEXCEPT; -#endif // U_HIDE_DRAFT_API /** * Destructor @@ -310,7 +311,6 @@ public: */ Locale& operator=(const Locale& other); -#ifndef U_HIDE_DRAFT_API /** * Move assignment operator; might leave source in bogus state. * This locale will have the same contents that the source locale had. @@ -318,10 +318,9 @@ public: * * @param other The Locale object being moved in. * @return *this - * @draft ICU 63 + * @stable ICU 63 */ Locale& operator=(Locale&& other) U_NOEXCEPT; -#endif // U_HIDE_DRAFT_API /** * Checks if two locale keys are the same. @@ -389,7 +388,6 @@ public: UErrorCode& success); #endif /* U_HIDE_SYSTEM_API */ -#ifndef U_HIDE_DRAFT_API /** * Returns a Locale for the specified BCP47 language tag string. * If the specified language tag contains any ill-formed subtags, @@ -405,7 +403,7 @@ public: * @param tag the input BCP47 language tag. * @param status error information if creating the Locale failed. * @return the Locale for the specified BCP47 language tag. - * @draft ICU 63 + * @stable ICU 63 */ static Locale U_EXPORT2 forLanguageTag(StringPiece tag, UErrorCode& status); @@ -420,7 +418,7 @@ public: * @param sink the output sink receiving the BCP47 language * tag for this Locale. * @param status error information if creating the language tag failed. - * @draft ICU 63 + * @stable ICU 63 */ void toLanguageTag(ByteSink& sink, UErrorCode& status) const; @@ -432,11 +430,10 @@ public: * * @param status error information if creating the language tag failed. * @return the BCP47 language tag for this Locale. - * @draft ICU 63 + * @stable ICU 63 */ template<typename StringClass> inline StringClass toLanguageTag(UErrorCode& status) const; -#endif // U_HIDE_DRAFT_API /** * Creates a locale which has had minimal canonicalization @@ -508,7 +505,6 @@ public: */ const char * getBaseName() const; -#ifndef U_HIDE_DRAFT_API /** * Add the likely subtags for this Locale, per the algorithm described * in the following CLDR technical report: @@ -536,7 +532,7 @@ public: * @param status error information if maximizing this Locale failed. * If this Locale is not well-formed, the error code is * U_ILLEGAL_ARGUMENT_ERROR. - * @draft ICU 63 + * @stable ICU 63 */ void addLikelySubtags(UErrorCode& status); @@ -567,10 +563,9 @@ public: * @param status error information if maximizing this Locale failed. * If this Locale is not well-formed, the error code is * U_ILLEGAL_ARGUMENT_ERROR. - * @draft ICU 63 + * @stable ICU 63 */ void minimizeSubtags(UErrorCode& status); -#endif // U_HIDE_DRAFT_API /** * Gets the list of keywords for the specified locale. @@ -583,8 +578,6 @@ public: */ StringEnumeration * createKeywords(UErrorCode &status) const; -#ifndef U_HIDE_DRAFT_API - /** * Gets the list of Unicode keywords for the specified locale. * @@ -592,7 +585,7 @@ public: * @return pointer to StringEnumeration class, or NULL if there are no keywords. * Client must dispose of it by calling delete. * @see getUnicodeKeywords - * @draft ICU 63 + * @stable ICU 63 */ StringEnumeration * createUnicodeKeywords(UErrorCode &status) const; @@ -605,7 +598,7 @@ public: * * @param iterator an STL style output iterator to write the keywords to. * @param status error information if creating set of keywords failed. - * @draft ICU 63 + * @stable ICU 63 */ template<typename StringClass, typename OutputIterator> inline void getKeywords(OutputIterator iterator, UErrorCode& status) const; @@ -619,13 +612,11 @@ public: * * @param iterator an STL style output iterator to write the keywords to. * @param status error information if creating set of keywords failed. - * @draft ICU 63 + * @stable ICU 63 */ template<typename StringClass, typename OutputIterator> inline void getUnicodeKeywords(OutputIterator iterator, UErrorCode& status) const; -#endif // U_HIDE_DRAFT_API - /** * Gets the value for a keyword. * @@ -644,7 +635,6 @@ public: */ int32_t getKeywordValue(const char* keywordName, char *buffer, int32_t bufferCapacity, UErrorCode &status) const; -#ifndef U_HIDE_DRAFT_API /** * Gets the value for a keyword. * @@ -656,7 +646,7 @@ public: * @param keywordName name of the keyword for which we want the value. * @param sink the sink to receive the keyword value. * @param status error information if getting the value failed. - * @draft ICU 63 + * @stable ICU 63 */ void getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const; @@ -671,7 +661,7 @@ public: * @param keywordName name of the keyword for which we want the value. * @param status error information if getting the value failed. * @return the keyword value. - * @draft ICU 63 + * @stable ICU 63 */ template<typename StringClass> inline StringClass getKeywordValue(StringPiece keywordName, UErrorCode& status) const; @@ -687,7 +677,7 @@ public: * @param keywordName name of the keyword for which we want the value. * @param sink the sink to receive the keyword value. * @param status error information if getting the value failed. - * @draft ICU 63 + * @stable ICU 63 */ void getUnicodeKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const; @@ -702,11 +692,10 @@ public: * @param keywordName name of the keyword for which we want the value. * @param status error information if getting the value failed. * @return the keyword value. - * @draft ICU 63 + * @stable ICU 63 */ template<typename StringClass> inline StringClass getUnicodeKeywordValue(StringPiece keywordName, UErrorCode& status) const; -#endif // U_HIDE_DRAFT_API /** * Sets or removes the value for a keyword. @@ -729,7 +718,6 @@ public: */ void setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status); -#ifndef U_HIDE_DRAFT_API /** * Sets or removes the value for a keyword. * @@ -746,7 +734,7 @@ public: * NULL, will result in the keyword being removed. No error is given if * that keyword does not exist. * @param status Returns any error information while performing this operation. - * @draft ICU 63 + * @stable ICU 63 */ void setKeywordValue(StringPiece keywordName, StringPiece keywordValue, UErrorCode& status); @@ -766,10 +754,9 @@ public: * NULL, will result in the keyword being removed. No error is given if * that keyword does not exist. * @param status Returns any error information while performing this operation. - * @draft ICU 63 + * @stable ICU 63 */ void setUnicodeKeywordValue(StringPiece keywordName, StringPiece keywordValue, UErrorCode& status); -#endif // U_HIDE_DRAFT_API /** * returns the locale's three-letter language code, as specified @@ -1008,6 +995,104 @@ public: */ virtual UClassID getDynamicClassID() const; +#ifndef U_HIDE_DRAFT_API + /** + * A Locale iterator interface similar to a Java Iterator<Locale>. + * @draft ICU 65 + */ + class U_COMMON_API Iterator /* not : public UObject because this is an interface/mixin class */ { + public: + /** @draft ICU 65 */ + virtual ~Iterator(); + + /** + * @return TRUE if next() can be called again. + * @draft ICU 65 + */ + virtual UBool hasNext() const = 0; + + /** + * @return the next locale. + * @draft ICU 65 + */ + virtual const Locale &next() = 0; + }; + + /** + * A generic Locale iterator implementation over Locale input iterators. + * @draft ICU 65 + */ + template<typename Iter> + class RangeIterator : public Iterator, public UMemory { + public: + /** + * Constructs an iterator from a begin/end range. + * Each of the iterator parameter values must be an + * input iterator whose value is convertible to const Locale &. + * + * @param begin Start of range. + * @param end Exclusive end of range. + * @draft ICU 65 + */ + RangeIterator(Iter begin, Iter end) : it_(begin), end_(end) {} + + /** + * @return TRUE if next() can be called again. + * @draft ICU 65 + */ + UBool hasNext() const override { return it_ != end_; } + + /** + * @return the next locale. + * @draft ICU 65 + */ + const Locale &next() override { return *it_++; } + + private: + Iter it_; + const Iter end_; + }; + + /** + * A generic Locale iterator implementation over Locale input iterators. + * Calls the converter to convert each *begin to a const Locale &. + * @draft ICU 65 + */ + template<typename Iter, typename Conv> + class ConvertingIterator : public Iterator, public UMemory { + public: + /** + * Constructs an iterator from a begin/end range. + * Each of the iterator parameter values must be an + * input iterator whose value the converter converts to const Locale &. + * + * @param begin Start of range. + * @param end Exclusive end of range. + * @param converter Converter from *begin to const Locale & or compatible. + * @draft ICU 65 + */ + ConvertingIterator(Iter begin, Iter end, Conv converter) : + it_(begin), end_(end), converter_(converter) {} + + /** + * @return TRUE if next() can be called again. + * @draft ICU 65 + */ + UBool hasNext() const override { return it_ != end_; } + + /** + * @return the next locale. + * @draft ICU 65 + */ + const Locale &next() override { return converter_(*it_++); } + + private: + Iter it_; + const Iter end_; + Conv converter_; + }; +#endif // U_HIDE_DRAFT_API + protected: /* only protected for testing purposes. DO NOT USE. */ #ifndef U_HIDE_INTERNAL_API /** @@ -1074,7 +1159,6 @@ Locale::operator!=(const Locale& other) const return !operator==(other); } -#ifndef U_HIDE_DRAFT_API template<typename StringClass> inline StringClass Locale::toLanguageTag(UErrorCode& status) const { @@ -1083,7 +1167,6 @@ Locale::toLanguageTag(UErrorCode& status) const toLanguageTag(sink, status); return result; } -#endif // U_HIDE_DRAFT_API inline const char * Locale::getCountry() const @@ -1115,13 +1198,11 @@ Locale::getName() const return fullName; } -#ifndef U_HIDE_DRAFT_API - template<typename StringClass, typename OutputIterator> inline void Locale::getKeywords(OutputIterator iterator, UErrorCode& status) const { LocalPointer<StringEnumeration> keys(createKeywords(status)); - if (U_FAILURE(status)) { + if (U_FAILURE(status) || keys.isNull()) { return; } for (;;) { @@ -1138,7 +1219,7 @@ template<typename StringClass, typename OutputIterator> inline void Locale::getUnicodeKeywords(OutputIterator iterator, UErrorCode& status) const { LocalPointer<StringEnumeration> keys(createUnicodeKeywords(status)); - if (U_FAILURE(status)) { + if (U_FAILURE(status) || keys.isNull()) { return; } for (;;) { @@ -1169,8 +1250,6 @@ Locale::getUnicodeKeywordValue(StringPiece keywordName, UErrorCode& status) cons return result; } -#endif // U_HIDE_DRAFT_API - inline UBool Locale::isBogus(void) const { return fIsBogus; @@ -1178,4 +1257,6 @@ Locale::isBogus(void) const { U_NAMESPACE_END +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif diff --git a/deps/icu-small/source/common/unicode/messagepattern.h b/deps/icu-small/source/common/unicode/messagepattern.h index 9f2a86551c..04f00a8757 100644 --- a/deps/icu-small/source/common/unicode/messagepattern.h +++ b/deps/icu-small/source/common/unicode/messagepattern.h @@ -24,6 +24,8 @@ #include "unicode/utypes.h" +#if U_SHOW_CPLUSPLUS_API + #if !UCONFIG_NO_FORMATTING #include "unicode/parseerr.h" @@ -942,4 +944,6 @@ U_NAMESPACE_END #endif // !UCONFIG_NO_FORMATTING +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif // __MESSAGEPATTERN_H__ diff --git a/deps/icu-small/source/common/unicode/normalizer2.h b/deps/icu-small/source/common/unicode/normalizer2.h index 4caa0e3103..4aeb3bb3d8 100644 --- a/deps/icu-small/source/common/unicode/normalizer2.h +++ b/deps/icu-small/source/common/unicode/normalizer2.h @@ -26,6 +26,8 @@ #include "unicode/utypes.h" +#if U_SHOW_CPLUSPLUS_API + #if !UCONFIG_NO_NORMALIZATION #include "unicode/stringpiece.h" @@ -771,4 +773,7 @@ private: U_NAMESPACE_END #endif // !UCONFIG_NO_NORMALIZATION + +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif // __NORMALIZER2_H__ diff --git a/deps/icu-small/source/common/unicode/normlzr.h b/deps/icu-small/source/common/unicode/normlzr.h index 82335ae6d7..00dd820474 100644 --- a/deps/icu-small/source/common/unicode/normlzr.h +++ b/deps/icu-small/source/common/unicode/normlzr.h @@ -13,6 +13,8 @@ #include "unicode/utypes.h" +#if U_SHOW_CPLUSPLUS_API + /** * \file * \brief C++ API: Unicode Normalization @@ -183,6 +185,7 @@ public: Normalizer(const CharacterIterator& iter, UNormalizationMode mode); #endif /* U_HIDE_DEPRECATED_API */ +#ifndef U_FORCE_HIDE_DEPRECATED_API /** * Copy constructor. * @param copy The object to be copied. @@ -195,7 +198,7 @@ public: * @deprecated ICU 56 Use Normalizer2 instead. */ virtual ~Normalizer(); - +#endif // U_FORCE_HIDE_DEPRECATED_API //------------------------------------------------------------------------- // Static utility methods @@ -599,7 +602,7 @@ public: * @return a pointer to a new Normalizer * @deprecated ICU 56 Use Normalizer2 instead. */ - Normalizer* clone(void) const; + Normalizer* clone() const; /** * Generates a hash code for this iterator. @@ -723,12 +726,14 @@ public: static UClassID U_EXPORT2 getStaticClassID(); #endif /* U_HIDE_DEPRECATED_API */ +#ifndef U_FORCE_HIDE_DEPRECATED_API /** * ICU "poor man's RTTI", returns a UClassID for the actual class. * @return a UClassID for the actual class. * @deprecated ICU 56 Use Normalizer2 instead. */ virtual UClassID getDynamicClassID() const; +#endif // U_FORCE_HIDE_DEPRECATED_API private: //------------------------------------------------------------------------- @@ -807,3 +812,5 @@ U_NAMESPACE_END #endif /* #if !UCONFIG_NO_NORMALIZATION */ #endif // NORMLZR_H + +#endif /* U_SHOW_CPLUSPLUS_API */ diff --git a/deps/icu-small/source/common/unicode/parsepos.h b/deps/icu-small/source/common/unicode/parsepos.h index c02c816956..ae5754b8d7 100644 --- a/deps/icu-small/source/common/unicode/parsepos.h +++ b/deps/icu-small/source/common/unicode/parsepos.h @@ -19,6 +19,9 @@ #define PARSEPOS_H #include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + #include "unicode/uobject.h" @@ -229,4 +232,6 @@ ParsePosition::setErrorIndex(int32_t ei) } U_NAMESPACE_END +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif diff --git a/deps/icu-small/source/common/unicode/platform.h b/deps/icu-small/source/common/unicode/platform.h index ee0d8b7a00..74689d790c 100644 --- a/deps/icu-small/source/common/unicode/platform.h +++ b/deps/icu-small/source/common/unicode/platform.h @@ -135,6 +135,14 @@ /** Fuchsia is a POSIX-ish platform. @internal */ #define U_PF_FUCHSIA 4100 /* Maximum value for Linux-based platform is 4499 */ +/** + * Emscripten is a C++ transpiler for the Web that can target asm.js or + * WebAssembly. It provides some POSIX-compatible wrappers and stubs and + * some Linux-like functionality, but is not fully compatible with + * either. + * @internal + */ +#define U_PF_EMSCRIPTEN 5010 /** z/OS is the successor to OS/390 which was the successor to MVS. @internal */ #define U_PF_OS390 9000 /** "IBM i" is the current name of what used to be i5/OS and earlier OS/400. @internal */ @@ -192,6 +200,8 @@ # define U_PLATFORM U_PF_OS390 #elif defined(__OS400__) || defined(__TOS_OS400__) # define U_PLATFORM U_PF_OS400 +#elif defined(__EMSCRIPTEN__) +# define U_PLATFORM U_PF_EMSCRIPTEN #else # define U_PLATFORM U_PF_UNKNOWN #endif @@ -414,26 +424,40 @@ #endif /* Compatibility with compilers other than clang: http://clang.llvm.org/docs/LanguageExtensions.html */ -#ifndef __has_attribute -# define __has_attribute(x) 0 +#ifdef __has_attribute +# define UPRV_HAS_ATTRIBUTE(x) __has_attribute(x) +#else +# define UPRV_HAS_ATTRIBUTE(x) 0 #endif -#ifndef __has_cpp_attribute -# define __has_cpp_attribute(x) 0 +#ifdef __has_cpp_attribute +# define UPRV_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x) +#else +# define UPRV_HAS_CPP_ATTRIBUTE(x) 0 #endif -#ifndef __has_declspec_attribute -# define __has_declspec_attribute(x) 0 +#ifdef __has_declspec_attribute +# define UPRV_HAS_DECLSPEC_ATTRIBUTE(x) __has_declspec_attribute(x) +#else +# define UPRV_HAS_DECLSPEC_ATTRIBUTE(x) 0 #endif -#ifndef __has_builtin -# define __has_builtin(x) 0 +#ifdef __has_builtin +# define UPRV_HAS_BUILTIN(x) __has_builtin(x) +#else +# define UPRV_HAS_BUILTIN(x) 0 #endif -#ifndef __has_feature -# define __has_feature(x) 0 +#ifdef __has_feature +# define UPRV_HAS_FEATURE(x) __has_feature(x) +#else +# define UPRV_HAS_FEATURE(x) 0 #endif -#ifndef __has_extension -# define __has_extension(x) 0 +#ifdef __has_extension +# define UPRV_HAS_EXTENSION(x) __has_extension(x) +#else +# define UPRV_HAS_EXTENSION(x) 0 #endif -#ifndef __has_warning -# define __has_warning(x) 0 +#ifdef __has_warning +# define UPRV_HAS_WARNING(x) __has_warning(x) +#else +# define UPRV_HAS_WARNING(x) 0 #endif /** @@ -452,7 +476,9 @@ * Attribute to specify the size of the allocated buffer for malloc-like functions * @internal */ -#if (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || __has_attribute(alloc_size) +#if (defined(__GNUC__) && \ + (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || \ + UPRV_HAS_ATTRIBUTE(alloc_size) # define U_ALLOC_SIZE_ATTR(X) __attribute__ ((alloc_size(X))) # define U_ALLOC_SIZE_ATTR2(X,Y) __attribute__ ((alloc_size(X,Y))) #else @@ -516,8 +542,9 @@ namespace std { #elif defined(__clang__) // Test for compiler vs. feature separately. // Other compilers might choke on the feature test. -# if __has_cpp_attribute(clang::fallthrough) || \ - (__has_feature(cxx_attributes) && __has_warning("-Wimplicit-fallthrough")) +# if UPRV_HAS_CPP_ATTRIBUTE(clang::fallthrough) || \ + (UPRV_HAS_FEATURE(cxx_attributes) && \ + UPRV_HAS_WARNING("-Wimplicit-fallthrough")) # define U_FALLTHROUGH [[clang::fallthrough]] # endif #elif defined(__GNUC__) && (__GNUC__ >= 7) @@ -620,7 +647,8 @@ namespace std { */ #ifdef U_CHARSET_IS_UTF8 /* Use the predefined value. */ -#elif U_PLATFORM_IS_LINUX_BASED || U_PLATFORM_IS_DARWIN_BASED +#elif U_PLATFORM_IS_LINUX_BASED || U_PLATFORM_IS_DARWIN_BASED || \ + U_PLATFORM == U_PF_EMSCRIPTEN # define U_CHARSET_IS_UTF8 1 #else # define U_CHARSET_IS_UTF8 0 @@ -707,7 +735,7 @@ namespace std { * narrow-character strings are in EBCDIC. */ # define U_SIZEOF_WCHAR_T 2 -#else +# else /* * LOCALETYPE(*CLD) or LOCALETYPE(*LOCALE) is specified. * Wide-character strings are in 16-bit EBCDIC, @@ -786,7 +814,8 @@ namespace std { /* Use the predefined value. */ #elif defined(U_STATIC_IMPLEMENTATION) # define U_EXPORT -#elif defined(_MSC_VER) || (__has_declspec_attribute(dllexport) && __has_declspec_attribute(dllimport)) +#elif defined(_MSC_VER) || (UPRV_HAS_DECLSPEC_ATTRIBUTE(dllexport) && \ + UPRV_HAS_DECLSPEC_ATTRIBUTE(dllimport)) # define U_EXPORT __declspec(dllexport) #elif defined(__GNUC__) # define U_EXPORT __attribute__((visibility("default"))) @@ -810,7 +839,8 @@ namespace std { #ifdef U_IMPORT /* Use the predefined value. */ -#elif defined(_MSC_VER) || (__has_declspec_attribute(dllexport) && __has_declspec_attribute(dllimport)) +#elif defined(_MSC_VER) || (UPRV_HAS_DECLSPEC_ATTRIBUTE(dllexport) && \ + UPRV_HAS_DECLSPEC_ATTRIBUTE(dllimport)) /* Windows needs to export/import data. */ # define U_IMPORT __declspec(dllimport) #else @@ -852,4 +882,4 @@ namespace std { #endif /* @} */ -#endif +#endif // _PLATFORM_H diff --git a/deps/icu-small/source/common/unicode/rbbi.h b/deps/icu-small/source/common/unicode/rbbi.h index 365ae2d3d2..7825f603a5 100644 --- a/deps/icu-small/source/common/unicode/rbbi.h +++ b/deps/icu-small/source/common/unicode/rbbi.h @@ -18,6 +18,8 @@ #include "unicode/utypes.h" +#if U_SHOW_CPLUSPLUS_API + /** * \file * \brief C++ API: Rule Based Break Iterator @@ -272,7 +274,7 @@ public: * @return a newly-constructed RuleBasedBreakIterator * @stable ICU 2.0 */ - virtual BreakIterator* clone() const; + virtual RuleBasedBreakIterator* clone() const; /** * Compute a hash code for this BreakIterator @@ -536,6 +538,7 @@ public: */ static UClassID U_EXPORT2 getStaticClassID(void); +#ifndef U_FORCE_HIDE_DEPRECATED_API /** * Deprecated functionality. Use clone() instead. * @@ -562,10 +565,10 @@ public: * or if the stackBuffer was too small to hold the clone. * @deprecated ICU 52. Use clone() instead. */ - virtual BreakIterator * createBufferClone(void *stackBuffer, - int32_t &BufferSize, - UErrorCode &status); - + virtual RuleBasedBreakIterator *createBufferClone(void *stackBuffer, + int32_t &BufferSize, + UErrorCode &status); +#endif // U_FORCE_HIDE_DEPRECATED_API /** * Return the binary form of compiled break rules, @@ -696,4 +699,6 @@ U_NAMESPACE_END #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif diff --git a/deps/icu-small/source/common/unicode/rep.h b/deps/icu-small/source/common/unicode/rep.h index b1023a37a2..f66c2ac060 100644 --- a/deps/icu-small/source/common/unicode/rep.h +++ b/deps/icu-small/source/common/unicode/rep.h @@ -16,6 +16,10 @@ #ifndef REP_H #define REP_H +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + #include "unicode/uobject.h" /** @@ -189,9 +193,6 @@ public: * Clones can be used concurrently in multiple threads. * If a subclass does not implement clone(), or if an error occurs, * then NULL is returned. - * The clone functions in all subclasses return a pointer to a Replaceable - * because some compilers do not support covariant (same-as-this) - * return types; cast to the appropriate subclass if necessary. * The caller must delete the clone. * * @return a clone of this object @@ -260,4 +261,6 @@ Replaceable::char32At(int32_t offset) const { U_NAMESPACE_END +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif diff --git a/deps/icu-small/source/common/unicode/resbund.h b/deps/icu-small/source/common/unicode/resbund.h index ab0b60bbb2..708a3423d2 100644 --- a/deps/icu-small/source/common/unicode/resbund.h +++ b/deps/icu-small/source/common/unicode/resbund.h @@ -49,6 +49,9 @@ #define RESBUND_H #include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + #include "unicode/uobject.h" #include "unicode/ures.h" #include "unicode/unistr.h" @@ -489,4 +492,7 @@ private: }; U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif diff --git a/deps/icu-small/source/common/unicode/schriter.h b/deps/icu-small/source/common/unicode/schriter.h index 1a12769e8d..4925ecfe44 100644 --- a/deps/icu-small/source/common/unicode/schriter.h +++ b/deps/icu-small/source/common/unicode/schriter.h @@ -21,6 +21,9 @@ #define SCHRITER_H #include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + #include "unicode/chariter.h" #include "unicode/uchriter.h" @@ -130,7 +133,7 @@ public: * @return the newly cloned object. * @stable ICU 2.0 */ - virtual CharacterIterator* clone(void) const; + virtual StringCharacterIterator* clone() const; /** * Sets the iterator to iterate over the provided string. @@ -186,4 +189,7 @@ protected: }; U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif diff --git a/deps/icu-small/source/common/unicode/simpleformatter.h b/deps/icu-small/source/common/unicode/simpleformatter.h index 3f7d93dc09..9414bca308 100644 --- a/deps/icu-small/source/common/unicode/simpleformatter.h +++ b/deps/icu-small/source/common/unicode/simpleformatter.h @@ -17,6 +17,9 @@ */ #include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + #include "unicode/unistr.h" U_NAMESPACE_BEGIN @@ -333,4 +336,6 @@ private: U_NAMESPACE_END +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif // __SIMPLEFORMATTER_H__ diff --git a/deps/icu-small/source/common/unicode/std_string.h b/deps/icu-small/source/common/unicode/std_string.h index 729c563995..bf87230167 100644 --- a/deps/icu-small/source/common/unicode/std_string.h +++ b/deps/icu-small/source/common/unicode/std_string.h @@ -27,6 +27,8 @@ #include "unicode/utypes.h" +#if U_SHOW_CPLUSPLUS_API + // Workaround for a libstdc++ bug before libstdc++4.6 (2011). // https://bugs.llvm.org/show_bug.cgi?id=13364 #if defined(__GLIBCXX__) @@ -34,4 +36,6 @@ namespace std { class type_info; } #endif #include <string> +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif // __STD_STRING_H__ diff --git a/deps/icu-small/source/common/unicode/strenum.h b/deps/icu-small/source/common/unicode/strenum.h index fa525d4f52..e813cd84b3 100644 --- a/deps/icu-small/source/common/unicode/strenum.h +++ b/deps/icu-small/source/common/unicode/strenum.h @@ -12,6 +12,10 @@ #ifndef STRENUM_H #define STRENUM_H +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + #include "unicode/uobject.h" #include "unicode/unistr.h" @@ -67,9 +71,6 @@ public: * Clones can be used concurrently in multiple threads. * If a subclass does not implement clone(), or if an error occurs, * then NULL is returned. - * The clone functions in all subclasses return a base class pointer - * because some compilers do not support covariant (same-as-this) - * return types; cast to the appropriate subclass if necessary. * The caller must delete the clone. * * @return a clone of this object @@ -274,5 +275,7 @@ protected: U_NAMESPACE_END +#endif /* U_SHOW_CPLUSPLUS_API */ + /* STRENUM_H */ #endif diff --git a/deps/icu-small/source/common/unicode/stringpiece.h b/deps/icu-small/source/common/unicode/stringpiece.h index 640fbac5a8..15cebb0f20 100644 --- a/deps/icu-small/source/common/unicode/stringpiece.h +++ b/deps/icu-small/source/common/unicode/stringpiece.h @@ -28,6 +28,12 @@ */ #include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include <cstddef> +#include <type_traits> + #include "unicode/uobject.h" #include "unicode/std_string.h" @@ -74,6 +80,33 @@ class U_COMMON_API StringPiece : public UMemory { */ StringPiece(const std::string& str) : ptr_(str.data()), length_(static_cast<int32_t>(str.size())) { } +#ifndef U_HIDE_DRAFT_API + /** + * Constructs from some other implementation of a string piece class, from any + * C++ record type that has these two methods: + * + * \code{.cpp} + * + * struct OtherStringPieceClass { + * const char* data(); + * size_t size(); + * }; + * + * \endcode + * + * The other string piece class will typically be std::string_view from C++17 + * or absl::string_view from Abseil. + * + * @param str the other string piece + * @draft ICU 65 + */ + template <typename T, + typename = typename std::enable_if< + std::is_same<decltype(T().data()), const char*>::value && + std::is_same<decltype(T().size()), size_t>::value>::type> + StringPiece(T str) + : ptr_(str.data()), length_(static_cast<int32_t>(str.size())) {} +#endif // U_HIDE_DRAFT_API /** * Constructs from a const char * pointer and a specified length. * @param offset a const char * pointer (need not be terminated) @@ -221,4 +254,6 @@ inline UBool operator!=(const StringPiece& x, const StringPiece& y) { U_NAMESPACE_END +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif // __STRINGPIECE_H__ diff --git a/deps/icu-small/source/common/unicode/stringtriebuilder.h b/deps/icu-small/source/common/unicode/stringtriebuilder.h index c27fbd6796..2860cbf551 100644 --- a/deps/icu-small/source/common/unicode/stringtriebuilder.h +++ b/deps/icu-small/source/common/unicode/stringtriebuilder.h @@ -18,6 +18,9 @@ #define __STRINGTRIEBUILDER_H__ #include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + #include "unicode/uobject.h" /** @@ -418,4 +421,6 @@ protected: U_NAMESPACE_END +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif // __STRINGTRIEBUILDER_H__ diff --git a/deps/icu-small/source/common/unicode/symtable.h b/deps/icu-small/source/common/unicode/symtable.h index c2dc95a61b..f5a77b01ec 100644 --- a/deps/icu-small/source/common/unicode/symtable.h +++ b/deps/icu-small/source/common/unicode/symtable.h @@ -13,6 +13,9 @@ #define SYMTABLE_H #include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + #include "unicode/uobject.h" /** @@ -111,4 +114,6 @@ public: }; U_NAMESPACE_END +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif diff --git a/deps/icu-small/source/common/unicode/uchar.h b/deps/icu-small/source/common/unicode/uchar.h index d70c964e38..1b7ee099be 100644 --- a/deps/icu-small/source/common/unicode/uchar.h +++ b/deps/icu-small/source/common/unicode/uchar.h @@ -2578,8 +2578,6 @@ typedef enum UVerticalOrientation { U_STABLE UBool U_EXPORT2 u_hasBinaryProperty(UChar32 c, UProperty which); -#ifndef U_HIDE_DRAFT_API - /** * Returns a frozen USet for a binary property. * The library retains ownership over the returned object. @@ -2593,13 +2591,11 @@ u_hasBinaryProperty(UChar32 c, UProperty which); * @see UProperty * @see u_hasBinaryProperty * @see Unicode::fromUSet - * @draft ICU 63 + * @stable ICU 63 */ U_CAPI const USet * U_EXPORT2 u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode); -#endif // U_HIDE_DRAFT_API - /** * Check if a code point has the Alphabetic Unicode property. * Same as u_hasBinaryProperty(c, UCHAR_ALPHABETIC). @@ -2757,8 +2753,6 @@ u_getIntPropertyMinValue(UProperty which); U_STABLE int32_t U_EXPORT2 u_getIntPropertyMaxValue(UProperty which); -#ifndef U_HIDE_DRAFT_API - /** * Returns an immutable UCPMap for an enumerated/catalog/int-valued property. * The library retains ownership over the returned object. @@ -2772,13 +2766,11 @@ u_getIntPropertyMaxValue(UProperty which); * @return the property as a map * @see UProperty * @see u_getIntPropertyValue - * @draft ICU 63 + * @stable ICU 63 */ U_CAPI const UCPMap * U_EXPORT2 u_getIntPropertyMap(UProperty property, UErrorCode *pErrorCode); -#endif // U_HIDE_DRAFT_API - /** * Get the numeric value for a Unicode code point as defined in the * Unicode Character Database. @@ -3197,15 +3189,14 @@ U_STABLE UBool U_EXPORT2 u_isprint(UChar32 c); /** - * Determines whether the specified code point is a base character. + * Non-standard: Determines whether the specified code point is a base character. * True for general categories "L" (letters), "N" (numbers), * "Mc" (spacing combining marks), and "Me" (enclosing marks). * - * Note that this is different from the Unicode definition in - * chapter 3.5, conformance clause D13, - * which defines base characters to be all characters (not Cn) - * that do not graphically combine with preceding characters (M) - * and that are neither control (Cc) or format (Cf) characters. + * Note that this is different from the Unicode Standard definition in + * chapter 3.6, conformance clause D51 “Base character”, + * which defines base characters as the code points with general categories + * Letter (L), Number (N), Punctuation (P), Symbol (S), or Space Separator (Zs). * * @param c the code point to be tested * @return TRUE if the code point is a base character according to this function diff --git a/deps/icu-small/source/common/unicode/ucharstrie.h b/deps/icu-small/source/common/unicode/ucharstrie.h index dfc93f6d0b..d5729d944e 100644 --- a/deps/icu-small/source/common/unicode/ucharstrie.h +++ b/deps/icu-small/source/common/unicode/ucharstrie.h @@ -24,6 +24,9 @@ */ #include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + #include "unicode/unistr.h" #include "unicode/uobject.h" #include "unicode/ustringtrie.h" @@ -94,6 +97,41 @@ public: return *this; } +#ifndef U_HIDE_DRAFT_API + /** + * Returns the state of this trie as a 64-bit integer. + * The state value is never 0. + * + * @return opaque state value + * @see resetToState64 + * @draft ICU 65 + */ + uint64_t getState64() const { + return (static_cast<uint64_t>(remainingMatchLength_ + 2) << kState64RemainingShift) | + (uint64_t)(pos_ - uchars_); + } + + /** + * Resets this trie to the saved state. + * Unlike resetToState(State), the 64-bit state value + * must be from getState64() from the same trie object or + * from one initialized the exact same way. + * Because of no validation, this method is faster. + * + * @param state The opaque trie state value from getState64(). + * @return *this + * @see getState64 + * @see resetToState + * @see reset + * @draft ICU 65 + */ + UCharsTrie &resetToState64(uint64_t state) { + remainingMatchLength_ = static_cast<int32_t>(state >> kState64RemainingShift) - 2; + pos_ = uchars_ + (state & kState64PosMask); + return *this; + } +#endif /* U_HIDE_DRAFT_API */ + /** * UCharsTrie state object, for saving a trie's current state * and resetting the trie back to this state later. @@ -560,6 +598,13 @@ private: static const int32_t kMaxTwoUnitDelta=((kThreeUnitDeltaLead-kMinTwoUnitDeltaLead)<<16)-1; // 0x03feffff + // For getState64(): + // The remainingMatchLength_ is -1..14=(kMaxLinearMatchLength=0x10)-2 + // so we need at least 5 bits for that. + // We add 2 to store it as a positive value 1..16=kMaxLinearMatchLength. + static constexpr int32_t kState64RemainingShift = 59; + static constexpr uint64_t kState64PosMask = (UINT64_C(1) << kState64RemainingShift) - 1; + char16_t *ownedArray_; // Fixed value referencing the UCharsTrie words. @@ -575,4 +620,6 @@ private: U_NAMESPACE_END +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif // __UCHARSTRIE_H__ diff --git a/deps/icu-small/source/common/unicode/ucharstriebuilder.h b/deps/icu-small/source/common/unicode/ucharstriebuilder.h index 2aa4757e52..540dcc047f 100644 --- a/deps/icu-small/source/common/unicode/ucharstriebuilder.h +++ b/deps/icu-small/source/common/unicode/ucharstriebuilder.h @@ -18,6 +18,9 @@ #define __UCHARSTRIEBUILDER_H__ #include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + #include "unicode/stringtriebuilder.h" #include "unicode/ucharstrie.h" #include "unicode/unistr.h" @@ -184,4 +187,6 @@ private: U_NAMESPACE_END +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif // __UCHARSTRIEBUILDER_H__ diff --git a/deps/icu-small/source/common/unicode/uchriter.h b/deps/icu-small/source/common/unicode/uchriter.h index 38f67c5b45..bee842cc25 100644 --- a/deps/icu-small/source/common/unicode/uchriter.h +++ b/deps/icu-small/source/common/unicode/uchriter.h @@ -11,6 +11,9 @@ #define UCHRITER_H #include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + #include "unicode/chariter.h" /** @@ -132,7 +135,7 @@ public: * @return the CharacterIterator newly created * @stable ICU 2.0 */ - virtual CharacterIterator* clone(void) const; + virtual UCharCharacterIterator* clone() const; /** * Sets the iterator to refer to the first code unit in its @@ -384,4 +387,7 @@ protected: }; U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif diff --git a/deps/icu-small/source/common/unicode/uconfig.h b/deps/icu-small/source/common/unicode/uconfig.h index 9c82d82812..c0157efe6d 100644 --- a/deps/icu-small/source/common/unicode/uconfig.h +++ b/deps/icu-small/source/common/unicode/uconfig.h @@ -453,4 +453,4 @@ # define UCONFIG_NO_FILTERED_BREAK_ITERATION 0 #endif -#endif +#endif // __UCONFIG_H__ diff --git a/deps/icu-small/source/common/unicode/ucpmap.h b/deps/icu-small/source/common/unicode/ucpmap.h index f2c42b6b7f..31e1365cac 100644 --- a/deps/icu-small/source/common/unicode/ucpmap.h +++ b/deps/icu-small/source/common/unicode/ucpmap.h @@ -9,8 +9,6 @@ #include "unicode/utypes.h" -#ifndef U_HIDE_DRAFT_API - U_CDECL_BEGIN /** @@ -28,7 +26,7 @@ U_CDECL_BEGIN * * @see UCPTrie * @see UMutableCPTrie - * @draft ICU 63 + * @stable ICU 63 */ typedef struct UCPMap UCPMap; @@ -39,13 +37,13 @@ typedef struct UCPMap UCPMap; * @see ucpmap_getRange * @see ucptrie_getRange * @see umutablecptrie_getRange - * @draft ICU 63 + * @stable ICU 63 */ enum UCPMapRangeOption { /** * ucpmap_getRange() enumerates all same-value ranges as stored in the map. * Most users should use this option. - * @draft ICU 63 + * @stable ICU 63 */ UCPMAP_RANGE_NORMAL, /** @@ -61,7 +59,7 @@ enum UCPMapRangeOption { * special values optimized for UTF-16 string processing * or for special error behavior for unpaired surrogates, * but those values are not to be associated with the lead surrogate code *points*. - * @draft ICU 63 + * @stable ICU 63 */ UCPMAP_RANGE_FIXED_LEAD_SURROGATES, /** @@ -77,7 +75,7 @@ enum UCPMapRangeOption { * special values optimized for UTF-16 string processing * or for special error behavior for unpaired surrogates, * but those values are not to be associated with the lead surrogate code *points*. - * @draft ICU 63 + * @stable ICU 63 */ UCPMAP_RANGE_FIXED_ALL_SURROGATES }; @@ -93,7 +91,7 @@ typedef enum UCPMapRangeOption UCPMapRangeOption; * @param c the code point * @return the map value, * or an implementation-defined error value if the code point is not in the range 0..U+10FFFF - * @draft ICU 63 + * @stable ICU 63 */ U_CAPI uint32_t U_EXPORT2 ucpmap_get(const UCPMap *map, UChar32 c); @@ -110,7 +108,7 @@ ucpmap_get(const UCPMap *map, UChar32 c); * @param context an opaque pointer, as passed into the getRange function * @param value a value from the map * @return the modified value - * @draft ICU 63 + * @stable ICU 63 */ typedef uint32_t U_CALLCONV UCPMapValueFilter(const void *context, uint32_t value); @@ -149,7 +147,7 @@ UCPMapValueFilter(const void *context, uint32_t value); * may have been modified by filter(context, map value) * if that function pointer is not NULL * @return the range end code point, or -1 if start is not a valid code point - * @draft ICU 63 + * @stable ICU 63 */ U_CAPI UChar32 U_EXPORT2 ucpmap_getRange(const UCPMap *map, UChar32 start, @@ -158,5 +156,4 @@ ucpmap_getRange(const UCPMap *map, UChar32 start, U_CDECL_END -#endif // U_HIDE_DRAFT_API #endif diff --git a/deps/icu-small/source/common/unicode/ucptrie.h b/deps/icu-small/source/common/unicode/ucptrie.h index 2718c984e4..be06a22792 100644 --- a/deps/icu-small/source/common/unicode/ucptrie.h +++ b/deps/icu-small/source/common/unicode/ucptrie.h @@ -9,8 +9,6 @@ #include "unicode/utypes.h" -#ifndef U_HIDE_DRAFT_API - #include "unicode/localpointer.h" #include "unicode/ucpmap.h" #include "unicode/utf8.h" @@ -55,7 +53,7 @@ typedef union UCPTrieData { * The macros will return bogus values, or may crash, if used on the wrong type or value width. * * @see UMutableCPTrie - * @draft ICU 63 + * @stable ICU 63 */ struct UCPTrie { #ifndef U_IN_DOXYGEN @@ -115,23 +113,23 @@ typedef struct UCPTrie UCPTrie; * @see umutablecptrie_buildImmutable * @see ucptrie_openFromBinary * @see ucptrie_getType - * @draft ICU 63 + * @stable ICU 63 */ enum UCPTrieType { /** * For ucptrie_openFromBinary() to accept any type. * ucptrie_getType() will return the actual type. - * @draft ICU 63 + * @stable ICU 63 */ UCPTRIE_TYPE_ANY = -1, /** * Fast/simple/larger BMP data structure. Use functions and "fast" macros. - * @draft ICU 63 + * @stable ICU 63 */ UCPTRIE_TYPE_FAST, /** * Small/slower BMP data structure. Use functions and "small" macros. - * @draft ICU 63 + * @stable ICU 63 */ UCPTRIE_TYPE_SMALL }; @@ -145,30 +143,30 @@ typedef enum UCPTrieType UCPTrieType; * @see umutablecptrie_buildImmutable * @see ucptrie_openFromBinary * @see ucptrie_getValueWidth - * @draft ICU 63 + * @stable ICU 63 */ enum UCPTrieValueWidth { /** * For ucptrie_openFromBinary() to accept any data value width. * ucptrie_getValueWidth() will return the actual data value width. - * @draft ICU 63 + * @stable ICU 63 */ UCPTRIE_VALUE_BITS_ANY = -1, /** * The trie stores 16 bits per data value. * It returns them as unsigned values 0..0xffff=65535. - * @draft ICU 63 + * @stable ICU 63 */ UCPTRIE_VALUE_BITS_16, /** * The trie stores 32 bits per data value. - * @draft ICU 63 + * @stable ICU 63 */ UCPTRIE_VALUE_BITS_32, /** * The trie stores 8 bits per data value. * It returns them as unsigned values 0..0xff=255. - * @draft ICU 63 + * @stable ICU 63 */ UCPTRIE_VALUE_BITS_8 }; @@ -200,7 +198,7 @@ typedef enum UCPTrieValueWidth UCPTrieValueWidth; * @see umutablecptrie_open * @see umutablecptrie_buildImmutable * @see ucptrie_toBinary - * @draft ICU 63 + * @stable ICU 63 */ U_CAPI UCPTrie * U_EXPORT2 ucptrie_openFromBinary(UCPTrieType type, UCPTrieValueWidth valueWidth, @@ -211,30 +209,11 @@ ucptrie_openFromBinary(UCPTrieType type, UCPTrieValueWidth valueWidth, * Closes a trie and releases associated memory. * * @param trie the trie - * @draft ICU 63 + * @stable ICU 63 */ U_CAPI void U_EXPORT2 ucptrie_close(UCPTrie *trie); -#if U_SHOW_CPLUSPLUS_API - -U_NAMESPACE_BEGIN - -/** - * \class LocalUCPTriePointer - * "Smart pointer" class, closes a UCPTrie via ucptrie_close(). - * For most methods see the LocalPointerBase base class. - * - * @see LocalPointerBase - * @see LocalPointer - * @draft ICU 63 - */ -U_DEFINE_LOCAL_OPEN_POINTER(LocalUCPTriePointer, UCPTrie, ucptrie_close); - -U_NAMESPACE_END - -#endif - /** * Returns the trie type. * @@ -242,7 +221,7 @@ U_NAMESPACE_END * @return the trie type * @see ucptrie_openFromBinary * @see UCPTRIE_TYPE_ANY - * @draft ICU 63 + * @stable ICU 63 */ U_CAPI UCPTrieType U_EXPORT2 ucptrie_getType(const UCPTrie *trie); @@ -254,7 +233,7 @@ ucptrie_getType(const UCPTrie *trie); * @return the number of bits in a trie data value * @see ucptrie_openFromBinary * @see UCPTRIE_VALUE_BITS_ANY - * @draft ICU 63 + * @stable ICU 63 */ U_CAPI UCPTrieValueWidth U_EXPORT2 ucptrie_getValueWidth(const UCPTrie *trie); @@ -271,7 +250,7 @@ ucptrie_getValueWidth(const UCPTrie *trie); * @param c the code point * @return the trie value, * or the trie error value if the code point is not in the range 0..U+10FFFF - * @draft ICU 63 + * @stable ICU 63 */ U_CAPI uint32_t U_EXPORT2 ucptrie_get(const UCPTrie *trie, UChar32 c); @@ -310,7 +289,7 @@ ucptrie_get(const UCPTrie *trie, UChar32 c); * may have been modified by filter(context, trie value) * if that function pointer is not NULL * @return the range end code point, or -1 if start is not a valid code point - * @draft ICU 63 + * @stable ICU 63 */ U_CAPI UChar32 U_EXPORT2 ucptrie_getRange(const UCPTrie *trie, UChar32 start, @@ -330,7 +309,7 @@ ucptrie_getRange(const UCPTrie *trie, UChar32 start, * @return the number of bytes written or (if buffer overflow) needed for the trie * * @see ucptrie_openFromBinary() - * @draft ICU 63 + * @stable ICU 63 */ U_CAPI int32_t U_EXPORT2 ucptrie_toBinary(const UCPTrie *trie, void *data, int32_t capacity, UErrorCode *pErrorCode); @@ -341,7 +320,7 @@ ucptrie_toBinary(const UCPTrie *trie, void *data, int32_t capacity, UErrorCode * * Do not use this macro in any other way. * * @see UCPTRIE_VALUE_BITS_16 - * @draft ICU 63 + * @stable ICU 63 */ #define UCPTRIE_16(trie, i) ((trie)->data.ptr16[i]) @@ -351,7 +330,7 @@ ucptrie_toBinary(const UCPTrie *trie, void *data, int32_t capacity, UErrorCode * * Do not use this macro in any other way. * * @see UCPTRIE_VALUE_BITS_32 - * @draft ICU 63 + * @stable ICU 63 */ #define UCPTRIE_32(trie, i) ((trie)->data.ptr32[i]) @@ -361,7 +340,7 @@ ucptrie_toBinary(const UCPTrie *trie, void *data, int32_t capacity, UErrorCode * * Do not use this macro in any other way. * * @see UCPTRIE_VALUE_BITS_8 - * @draft ICU 63 + * @stable ICU 63 */ #define UCPTRIE_8(trie, i) ((trie)->data.ptr8[i]) @@ -373,7 +352,7 @@ ucptrie_toBinary(const UCPTrie *trie, void *data, int32_t capacity, UErrorCode * * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width * @param c (UChar32, in) the input code point * @return The code point's trie value. - * @draft ICU 63 + * @stable ICU 63 */ #define UCPTRIE_FAST_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_CP_INDEX(trie, 0xffff, c)) @@ -385,7 +364,7 @@ ucptrie_toBinary(const UCPTrie *trie, void *data, int32_t capacity, UErrorCode * * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width * @param c (UChar32, in) the input code point * @return The code point's trie value. - * @draft ICU 63 + * @stable ICU 63 */ #define UCPTRIE_SMALL_GET(trie, dataAccess, c) \ dataAccess(trie, _UCPTRIE_CP_INDEX(trie, UCPTRIE_SMALL_MAX, c)) @@ -401,9 +380,9 @@ ucptrie_toBinary(const UCPTrie *trie, void *data, int32_t capacity, UErrorCode * * @param limit (const UChar *, in) the limit pointer for the text, or NULL if NUL-terminated * @param c (UChar32, out) variable for the code point * @param result (out) variable for the trie lookup result - * @draft ICU 63 + * @stable ICU 63 */ -#define UCPTRIE_FAST_U16_NEXT(trie, dataAccess, src, limit, c, result) { \ +#define UCPTRIE_FAST_U16_NEXT(trie, dataAccess, src, limit, c, result) UPRV_BLOCK_MACRO_BEGIN { \ (c) = *(src)++; \ int32_t __index; \ if (!U16_IS_SURROGATE(c)) { \ @@ -419,7 +398,7 @@ ucptrie_toBinary(const UCPTrie *trie, void *data, int32_t capacity, UErrorCode * } \ } \ (result) = dataAccess(trie, __index); \ -} +} UPRV_BLOCK_MACRO_END /** * UTF-16: Reads the previous code point (UChar32 c, out), pre-decrements src, @@ -432,9 +411,9 @@ ucptrie_toBinary(const UCPTrie *trie, void *data, int32_t capacity, UErrorCode * * @param src (const UChar *, in/out) the source text pointer * @param c (UChar32, out) variable for the code point * @param result (out) variable for the trie lookup result - * @draft ICU 63 + * @stable ICU 63 */ -#define UCPTRIE_FAST_U16_PREV(trie, dataAccess, start, src, c, result) { \ +#define UCPTRIE_FAST_U16_PREV(trie, dataAccess, start, src, c, result) UPRV_BLOCK_MACRO_BEGIN { \ (c) = *--(src); \ int32_t __index; \ if (!U16_IS_SURROGATE(c)) { \ @@ -450,7 +429,7 @@ ucptrie_toBinary(const UCPTrie *trie, void *data, int32_t capacity, UErrorCode * } \ } \ (result) = dataAccess(trie, __index); \ -} +} UPRV_BLOCK_MACRO_END /** * UTF-8: Post-increments src and gets a value from the trie. @@ -466,9 +445,9 @@ ucptrie_toBinary(const UCPTrie *trie, void *data, int32_t capacity, UErrorCode * * @param src (const char *, in/out) the source text pointer * @param limit (const char *, in) the limit pointer for the text (must not be NULL) * @param result (out) variable for the trie lookup result - * @draft ICU 63 + * @stable ICU 63 */ -#define UCPTRIE_FAST_U8_NEXT(trie, dataAccess, src, limit, result) { \ +#define UCPTRIE_FAST_U8_NEXT(trie, dataAccess, src, limit, result) UPRV_BLOCK_MACRO_BEGIN { \ int32_t __lead = (uint8_t)*(src)++; \ if (!U8_IS_SINGLE(__lead)) { \ uint8_t __t1, __t2, __t3; \ @@ -496,7 +475,7 @@ ucptrie_toBinary(const UCPTrie *trie, void *data, int32_t capacity, UErrorCode * } \ } \ (result) = dataAccess(trie, __lead); \ -} +} UPRV_BLOCK_MACRO_END /** * UTF-8: Pre-decrements src and gets a value from the trie. @@ -512,9 +491,9 @@ ucptrie_toBinary(const UCPTrie *trie, void *data, int32_t capacity, UErrorCode * * @param start (const char *, in) the start pointer for the text * @param src (const char *, in/out) the source text pointer * @param result (out) variable for the trie lookup result - * @draft ICU 63 + * @stable ICU 63 */ -#define UCPTRIE_FAST_U8_PREV(trie, dataAccess, start, src, result) { \ +#define UCPTRIE_FAST_U8_PREV(trie, dataAccess, start, src, result) UPRV_BLOCK_MACRO_BEGIN { \ int32_t __index = (uint8_t)*--(src); \ if (!U8_IS_SINGLE(__index)) { \ __index = ucptrie_internalU8PrevIndex((trie), __index, (const uint8_t *)(start), \ @@ -523,7 +502,7 @@ ucptrie_toBinary(const UCPTrie *trie, void *data, int32_t capacity, UErrorCode * __index >>= 3; \ } \ (result) = dataAccess(trie, __index); \ -} +} UPRV_BLOCK_MACRO_END /** * Returns a trie value for an ASCII code point, without range checking. @@ -532,7 +511,7 @@ ucptrie_toBinary(const UCPTrie *trie, void *data, int32_t capacity, UErrorCode * * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width * @param c (UChar32, in) the input code point; must be U+0000..U+007F * @return The ASCII code point's trie value. - * @draft ICU 63 + * @stable ICU 63 */ #define UCPTRIE_ASCII_GET(trie, dataAccess, c) dataAccess(trie, c) @@ -545,7 +524,7 @@ ucptrie_toBinary(const UCPTrie *trie, void *data, int32_t capacity, UErrorCode * * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width * @param c (UChar32, in) the input code point, must be U+0000..U+FFFF * @return The BMP code point's trie value. - * @draft ICU 63 + * @stable ICU 63 */ #define UCPTRIE_FAST_BMP_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_FAST_INDEX(trie, c)) @@ -557,7 +536,7 @@ ucptrie_toBinary(const UCPTrie *trie, void *data, int32_t capacity, UErrorCode * * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width * @param c (UChar32, in) the input code point, must be U+10000..U+10FFFF * @return The supplementary code point's trie value. - * @draft ICU 63 + * @stable ICU 63 */ #define UCPTRIE_FAST_SUPP_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_SMALL_INDEX(trie, c)) @@ -642,5 +621,24 @@ ucptrie_internalU8PrevIndex(const UCPTrie *trie, UChar32 c, U_CDECL_END #endif // U_IN_DOXYGEN -#endif // U_HIDE_DRAFT_API + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUCPTriePointer + * "Smart pointer" class, closes a UCPTrie via ucptrie_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 63 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUCPTriePointer, UCPTrie, ucptrie_close); + +U_NAMESPACE_END + +#endif // U_SHOW_CPLUSPLUS_API + #endif diff --git a/deps/icu-small/source/common/unicode/ucurr.h b/deps/icu-small/source/common/unicode/ucurr.h index a1c6de80b7..7149e7adf1 100644 --- a/deps/icu-small/source/common/unicode/ucurr.h +++ b/deps/icu-small/source/common/unicode/ucurr.h @@ -159,14 +159,14 @@ ucurr_unregister(UCurrRegistryKey key, UErrorCode* status); * @param currency null-terminated 3-letter ISO 4217 code * @param locale locale in which to display currency * @param nameStyle selector for which kind of name to return - * @param isChoiceFormat fill-in set to TRUE if the returned value - * is a ChoiceFormat pattern; otherwise it is a static string + * @param isChoiceFormat always set to FALSE, or can be NULL; + * display names are static strings; + * since ICU 4.4, ChoiceFormat patterns are no longer supported * @param len fill-in parameter to receive length of result * @param ec error code * @return pointer to display string of 'len' UChars. If the resource * data contains no entry for 'currency', then 'currency' itself is - * returned. If *isChoiceFormat is TRUE, then the result is a - * ChoiceFormat pattern. Otherwise it is a static string. + * returned. * @stable ICU 2.6 */ U_STABLE const UChar* U_EXPORT2 @@ -183,8 +183,9 @@ ucurr_getName(const UChar* currency, * currency object in the en_US locale is "US dollar" or "US dollars". * @param currency null-terminated 3-letter ISO 4217 code * @param locale locale in which to display currency - * @param isChoiceFormat fill-in set to TRUE if the returned value - * is a ChoiceFormat pattern; otherwise it is a static string + * @param isChoiceFormat always set to FALSE, or can be NULL; + * display names are static strings; + * since ICU 4.4, ChoiceFormat patterns are no longer supported * @param pluralCount plural count * @param len fill-in parameter to receive length of result * @param ec error code @@ -320,7 +321,7 @@ typedef enum UCurrCurrencyType { * Provides a UEnumeration object for listing ISO-4217 codes. * @param currType You can use one of several UCurrCurrencyType values for this * variable. You can also | (or) them together to get a specific list of - * currencies. Most people will want to use the (UCURR_CURRENCY|UCURR_NON_DEPRECATED) value to + * currencies. Most people will want to use the (UCURR_COMMON|UCURR_NON_DEPRECATED) value to * get a list of current currencies. * @param pErrorCode Error code * @stable ICU 3.2 diff --git a/deps/icu-small/source/common/unicode/udata.h b/deps/icu-small/source/common/unicode/udata.h index 6419c359f6..8236877b44 100644 --- a/deps/icu-small/source/common/unicode/udata.h +++ b/deps/icu-small/source/common/unicode/udata.h @@ -264,25 +264,6 @@ udata_openChoice(const char *path, const char *type, const char *name, U_STABLE void U_EXPORT2 udata_close(UDataMemory *pData); -#if U_SHOW_CPLUSPLUS_API - -U_NAMESPACE_BEGIN - -/** - * \class LocalUDataMemoryPointer - * "Smart pointer" class, closes a UDataMemory via udata_close(). - * For most methods see the LocalPointerBase base class. - * - * @see LocalPointerBase - * @see LocalPointer - * @stable ICU 4.4 - */ -U_DEFINE_LOCAL_OPEN_POINTER(LocalUDataMemoryPointer, UDataMemory, udata_close); - -U_NAMESPACE_END - -#endif - /** * Get the pointer to the actual data inside the data memory. * The data is read-only. @@ -434,4 +415,23 @@ udata_setFileAccess(UDataFileAccess access, UErrorCode *status); U_CDECL_END +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUDataMemoryPointer + * "Smart pointer" class, closes a UDataMemory via udata_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUDataMemoryPointer, UDataMemory, udata_close); + +U_NAMESPACE_END + +#endif // U_SHOW_CPLUSPLUS_API + #endif diff --git a/deps/icu-small/source/common/unicode/uloc.h b/deps/icu-small/source/common/unicode/uloc.h index 5531070841..882f79bedc 100644 --- a/deps/icu-small/source/common/unicode/uloc.h +++ b/deps/icu-small/source/common/unicode/uloc.h @@ -742,12 +742,18 @@ uloc_getDisplayName(const char* localeID, /** - * Gets the specified locale from a list of all available locales. - * The return value is a pointer to an item of - * a locale name array. Both this array and the pointers - * it contains are owned by ICU and should not be deleted or written through - * by the caller. The locale name is terminated by a null pointer. - * @param n the specific locale name index of the available locale list + * Gets the specified locale from a list of available locales. + * + * This method corresponds to uloc_openAvailableByType called with the + * ULOC_AVAILABLE_DEFAULT type argument. + * + * The return value is a pointer to an item of a locale name array. Both this + * array and the pointers it contains are owned by ICU and should not be + * deleted or written through by the caller. The locale name is terminated by + * a null pointer. + * + * @param n the specific locale name index of the available locale list; + * should not exceed the number returned by uloc_countAvailable. * @return a specified locale name of all available locales * @stable ICU 2.0 */ @@ -762,6 +768,72 @@ uloc_getAvailable(int32_t n); */ U_STABLE int32_t U_EXPORT2 uloc_countAvailable(void); +#ifndef U_HIDE_DRAFT_API + +/** + * Types for uloc_getAvailableByType and uloc_countAvailableByType. + * + * @draft ICU 65 + */ +typedef enum ULocAvailableType { + /** + * Locales that return data when passed to ICU APIs, + * but not including legacy or alias locales. + * + * @draft ICU 65 + */ + ULOC_AVAILABLE_DEFAULT, + + /** + * Legacy or alias locales that return data when passed to ICU APIs. + * Examples of supported legacy or alias locales: + * + * - iw (alias to he) + * - mo (alias to ro) + * - zh_CN (alias to zh_Hans_CN) + * - sr_BA (alias to sr_Cyrl_BA) + * - ars (alias to ar_SA) + * + * The locales in this set are disjoint from the ones in + * ULOC_AVAILABLE_DEFAULT. To get both sets at the same time, use + * ULOC_AVAILABLE_WITH_LEGACY_ALIASES. + * + * @draft ICU 65 + */ + ULOC_AVAILABLE_ONLY_LEGACY_ALIASES, + + /** + * The union of the locales in ULOC_AVAILABLE_DEFAULT and + * ULOC_AVAILABLE_ONLY_LEGACY_ALIAS. + * + * @draft ICU 65 + */ + ULOC_AVAILABLE_WITH_LEGACY_ALIASES, + +#ifndef U_HIDE_INTERNAL_API + /** + * @internal + */ + ULOC_AVAILABLE_COUNT +#endif +} ULocAvailableType; + +/** + * Gets a list of available locales according to the type argument, allowing + * the user to access different sets of supported locales in ICU. + * + * The returned UEnumeration must be closed by the caller. + * + * @param type Type choice from ULocAvailableType. + * @param status Set if an error occurred. + * @return a UEnumeration owned by the caller, or nullptr on failure. + * @draft ICU 65 + */ +U_DRAFT UEnumeration* U_EXPORT2 +uloc_openAvailableByType(ULocAvailableType type, UErrorCode* status); + +#endif // U_HIDE_DRAFT_API + /** * * Gets a list of all available 2-letter language codes defined in ISO 639, diff --git a/deps/icu-small/source/common/unicode/umachine.h b/deps/icu-small/source/common/unicode/umachine.h index 6d932cfcfb..5cb95e58f3 100644 --- a/deps/icu-small/source/common/unicode/umachine.h +++ b/deps/icu-small/source/common/unicode/umachine.h @@ -140,6 +140,42 @@ #define U_FINAL final #endif +// Before ICU 65, function-like, multi-statement ICU macros were just defined as +// series of statements wrapped in { } blocks and the caller could choose to +// either treat them as if they were actual functions and end the invocation +// with a trailing ; creating an empty statement after the block or else omit +// this trailing ; using the knowledge that the macro would expand to { }. +// +// But doing so doesn't work well with macros that look like functions and +// compiler warnings about empty statements (ICU-20601) and ICU 65 therefore +// switches to the standard solution of wrapping such macros in do { } while. +// +// This will however break existing code that depends on being able to invoke +// these macros without a trailing ; so to be able to remain compatible with +// such code the wrapper is itself defined as macros so that it's possible to +// build ICU 65 and later with the old macro behaviour, like this: +// +// export CPPFLAGS='-DUPRV_BLOCK_MACRO_BEGIN="" -DUPRV_BLOCK_MACRO_END=""' +// runConfigureICU ... +// + +/** + * \def UPRV_BLOCK_MACRO_BEGIN + * Defined as the "do" keyword by default. + * @internal + */ +#ifndef UPRV_BLOCK_MACRO_BEGIN +#define UPRV_BLOCK_MACRO_BEGIN do +#endif + +/** + * \def UPRV_BLOCK_MACRO_END + * Defined as "while (FALSE)" by default. + * @internal + */ +#ifndef UPRV_BLOCK_MACRO_END +#define UPRV_BLOCK_MACRO_END while (FALSE) +#endif /*==========================================================================*/ /* limits for int32_t etc., like in POSIX inttypes.h */ diff --git a/deps/icu-small/source/common/unicode/umutablecptrie.h b/deps/icu-small/source/common/unicode/umutablecptrie.h index e75191a449..13e71ef25e 100644 --- a/deps/icu-small/source/common/unicode/umutablecptrie.h +++ b/deps/icu-small/source/common/unicode/umutablecptrie.h @@ -9,8 +9,6 @@ #include "unicode/utypes.h" -#ifndef U_HIDE_DRAFT_API - #include "unicode/localpointer.h" #include "unicode/ucpmap.h" #include "unicode/ucptrie.h" @@ -44,7 +42,7 @@ U_CDECL_BEGIN * * @see UCPTrie * @see umutablecptrie_buildImmutable - * @draft ICU 63 + * @stable ICU 63 */ typedef struct UMutableCPTrie UMutableCPTrie; @@ -59,7 +57,7 @@ typedef struct UMutableCPTrie UMutableCPTrie; * @param errorValue the value for out-of-range code points and ill-formed UTF-8/16 * @param pErrorCode an in/out ICU UErrorCode * @return the trie - * @draft ICU 63 + * @stable ICU 63 */ U_CAPI UMutableCPTrie * U_EXPORT2 umutablecptrie_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode); @@ -71,7 +69,7 @@ umutablecptrie_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErr * @param other the trie to clone * @param pErrorCode an in/out ICU UErrorCode * @return the trie clone - * @draft ICU 63 + * @stable ICU 63 */ U_CAPI UMutableCPTrie * U_EXPORT2 umutablecptrie_clone(const UMutableCPTrie *other, UErrorCode *pErrorCode); @@ -80,7 +78,7 @@ umutablecptrie_clone(const UMutableCPTrie *other, UErrorCode *pErrorCode); * Closes a mutable trie and releases associated memory. * * @param trie the trie - * @draft ICU 63 + * @stable ICU 63 */ U_CAPI void U_EXPORT2 umutablecptrie_close(UMutableCPTrie *trie); @@ -96,7 +94,7 @@ U_NAMESPACE_BEGIN * * @see LocalPointerBase * @see LocalPointer - * @draft ICU 63 + * @stable ICU 63 */ U_DEFINE_LOCAL_OPEN_POINTER(LocalUMutableCPTriePointer, UMutableCPTrie, umutablecptrie_close); @@ -111,7 +109,7 @@ U_NAMESPACE_END * @param map the source map * @param pErrorCode an in/out ICU UErrorCode * @return the mutable trie - * @draft ICU 63 + * @stable ICU 63 */ U_CAPI UMutableCPTrie * U_EXPORT2 umutablecptrie_fromUCPMap(const UCPMap *map, UErrorCode *pErrorCode); @@ -123,7 +121,7 @@ umutablecptrie_fromUCPMap(const UCPMap *map, UErrorCode *pErrorCode); * @param trie the immutable trie * @param pErrorCode an in/out ICU UErrorCode * @return the mutable trie - * @draft ICU 63 + * @stable ICU 63 */ U_CAPI UMutableCPTrie * U_EXPORT2 umutablecptrie_fromUCPTrie(const UCPTrie *trie, UErrorCode *pErrorCode); @@ -134,7 +132,7 @@ umutablecptrie_fromUCPTrie(const UCPTrie *trie, UErrorCode *pErrorCode); * @param trie the trie * @param c the code point * @return the value - * @draft ICU 63 + * @stable ICU 63 */ U_CAPI uint32_t U_EXPORT2 umutablecptrie_get(const UMutableCPTrie *trie, UChar32 c); @@ -166,7 +164,7 @@ umutablecptrie_get(const UMutableCPTrie *trie, UChar32 c); * may have been modified by filter(context, trie value) * if that function pointer is not NULL * @return the range end code point, or -1 if start is not a valid code point - * @draft ICU 63 + * @stable ICU 63 */ U_CAPI UChar32 U_EXPORT2 umutablecptrie_getRange(const UMutableCPTrie *trie, UChar32 start, @@ -180,7 +178,7 @@ umutablecptrie_getRange(const UMutableCPTrie *trie, UChar32 start, * @param c the code point * @param value the value * @param pErrorCode an in/out ICU UErrorCode - * @draft ICU 63 + * @stable ICU 63 */ U_CAPI void U_EXPORT2 umutablecptrie_set(UMutableCPTrie *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode); @@ -194,7 +192,7 @@ umutablecptrie_set(UMutableCPTrie *trie, UChar32 c, uint32_t value, UErrorCode * * @param end the last code point to get the value (inclusive) * @param value the value * @param pErrorCode an in/out ICU UErrorCode - * @draft ICU 63 + * @stable ICU 63 */ U_CAPI void U_EXPORT2 umutablecptrie_setRange(UMutableCPTrie *trie, @@ -229,7 +227,7 @@ umutablecptrie_setRange(UMutableCPTrie *trie, * @param pErrorCode an in/out ICU UErrorCode * * @see umutablecptrie_fromUCPTrie - * @draft ICU 63 + * @stable ICU 63 */ U_CAPI UCPTrie * U_EXPORT2 umutablecptrie_buildImmutable(UMutableCPTrie *trie, UCPTrieType type, UCPTrieValueWidth valueWidth, @@ -237,5 +235,4 @@ umutablecptrie_buildImmutable(UMutableCPTrie *trie, UCPTrieType type, UCPTrieVal U_CDECL_END -#endif // U_HIDE_DRAFT_API #endif diff --git a/deps/icu-small/source/common/unicode/unifilt.h b/deps/icu-small/source/common/unicode/unifilt.h index 99cce785b6..1a77089233 100644 --- a/deps/icu-small/source/common/unicode/unifilt.h +++ b/deps/icu-small/source/common/unicode/unifilt.h @@ -12,6 +12,10 @@ #ifndef UNIFILT_H #define UNIFILT_H +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + #include "unicode/unifunct.h" #include "unicode/unimatch.h" @@ -68,6 +72,14 @@ public: virtual ~UnicodeFilter(); /** + * Clones this object polymorphically. + * The caller owns the result and should delete it when done. + * @return clone, or nullptr if an error occurred + * @stable ICU 2.4 + */ + virtual UnicodeFilter* clone() const = 0; + + /** * Returns <tt>true</tt> for characters that are in the selected * subset. In other words, if a character is <b>to be * filtered</b>, then <tt>contains()</tt> returns @@ -119,4 +131,6 @@ protected: U_NAMESPACE_END +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif diff --git a/deps/icu-small/source/common/unicode/unifunct.h b/deps/icu-small/source/common/unicode/unifunct.h index 66a02ce7cd..2b1b766ea7 100644 --- a/deps/icu-small/source/common/unicode/unifunct.h +++ b/deps/icu-small/source/common/unicode/unifunct.h @@ -13,6 +13,9 @@ #define UNIFUNCT_H #include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + #include "unicode/uobject.h" /** @@ -124,4 +127,6 @@ protected: U_NAMESPACE_END +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif diff --git a/deps/icu-small/source/common/unicode/unimatch.h b/deps/icu-small/source/common/unicode/unimatch.h index 8bf3995018..2d3c5210c3 100644 --- a/deps/icu-small/source/common/unicode/unimatch.h +++ b/deps/icu-small/source/common/unicode/unimatch.h @@ -17,6 +17,7 @@ * \brief C++ API: Unicode Matcher */ +#if U_SHOW_CPLUSPLUS_API U_NAMESPACE_BEGIN @@ -162,4 +163,6 @@ public: U_NAMESPACE_END +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif diff --git a/deps/icu-small/source/common/unicode/uniset.h b/deps/icu-small/source/common/unicode/uniset.h index e5e7726d60..18cc937644 100644 --- a/deps/icu-small/source/common/unicode/uniset.h +++ b/deps/icu-small/source/common/unicode/uniset.h @@ -13,6 +13,10 @@ #ifndef UNICODESET_H #define UNICODESET_H +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + #include "unicode/ucpmap.h" #include "unicode/unifilt.h" #include "unicode/unistr.h" @@ -501,7 +505,7 @@ public: * @see cloneAsThawed * @stable ICU 2.0 */ - virtual UnicodeFunctor* clone() const; + virtual UnicodeSet* clone() const; /** * Returns the hash code value for this set. @@ -579,7 +583,7 @@ public: * @see cloneAsThawed * @stable ICU 3.8 */ - UnicodeFunctor *freeze(); + UnicodeSet *freeze(); /** * Clone the set and make the clone mutable. @@ -589,7 +593,7 @@ public: * @see isFrozen * @stable ICU 3.8 */ - UnicodeFunctor *cloneAsThawed() const; + UnicodeSet *cloneAsThawed() const; //---------------------------------------------------------------- // Public API @@ -1651,11 +1655,10 @@ private: const UnicodeSet* inclusions, UErrorCode &status); -#ifndef U_HIDE_DRAFT_API // Skipped: ucpmap.h is draft only. + // UCPMap is now stable ICU 63 void applyIntPropertyValue(const UCPMap *map, UCPMapValueFilter *filter, const void *context, UErrorCode &errorCode); -#endif /* U_HIDE_DRAFT_API */ /** * Set the new pattern to cache. @@ -1736,4 +1739,6 @@ inline int32_t UnicodeSet::spanBack(const UnicodeString &s, int32_t limit, USetS U_NAMESPACE_END +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif diff --git a/deps/icu-small/source/common/unicode/unistr.h b/deps/icu-small/source/common/unicode/unistr.h index 8fd144425e..da79053765 100644 --- a/deps/icu-small/source/common/unicode/unistr.h +++ b/deps/icu-small/source/common/unicode/unistr.h @@ -28,8 +28,11 @@ * \brief C++ API: Unicode String */ -#include <cstddef> #include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include <cstddef> #include "unicode/char16ptr.h" #include "unicode/rep.h" #include "unicode/std_string.h" @@ -209,7 +212,9 @@ class UnicodeStringAppendable; // unicode/appendable.h * similar functionality as the Java String and StringBuffer/StringBuilder classes. * It is a concrete implementation of the abstract class Replaceable (for transliteration). * - * A UnicodeString may also "alias" an external array of characters + * The UnicodeString equivalent of std::string’s clear() is remove(). + * + * A UnicodeString may "alias" an external array of characters * (that is, point to it, rather than own the array) * whose lifetime must then at least match the lifetime of the aliasing object. * This aliasing may be preserved when returning a UnicodeString by value, @@ -2092,8 +2097,7 @@ public: * s.truncate(0); // set to an empty string (complete truncation), or * s=UnicodeString(); // assign an empty string, or * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or - * static const char16_t nul=0; - * s.setTo(&nul, 0); // set to an empty C Unicode string + * s.setTo(u"", 0); // set to an empty C Unicode string * } * \endcode * @@ -2534,11 +2538,14 @@ public: /* Remove operations */ /** - * Remove all characters from the UnicodeString object. + * Removes all characters from the UnicodeString object and clears the bogus flag. + * This is the UnicodeString equivalent of std::string’s clear(). + * * @return a reference to this + * @see setToBogus * @stable ICU 2.0 */ - inline UnicodeString& remove(void); + inline UnicodeString& remove(); /** * Remove the characters in the range @@ -3034,11 +3041,11 @@ public: * uint16_t * constructor. * Delegates to UnicodeString(const char16_t *, int32_t). * @param text UTF-16 string - * @param length string length + * @param textLength string length * @stable ICU 59 */ - UnicodeString(const uint16_t *text, int32_t length) : - UnicodeString(ConstChar16Ptr(text), length) {} + UnicodeString(const uint16_t *text, int32_t textLength) : + UnicodeString(ConstChar16Ptr(text), textLength) {} #endif #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) @@ -3047,21 +3054,21 @@ public: * (Only defined if U_SIZEOF_WCHAR_T==2.) * Delegates to UnicodeString(const char16_t *, int32_t). * @param text NUL-terminated UTF-16 string - * @param length string length + * @param textLength string length * @stable ICU 59 */ - UnicodeString(const wchar_t *text, int32_t length) : - UnicodeString(ConstChar16Ptr(text), length) {} + UnicodeString(const wchar_t *text, int32_t textLength) : + UnicodeString(ConstChar16Ptr(text), textLength) {} #endif /** * nullptr_t constructor. * Effectively the same as the default constructor, makes an empty string object. * @param text nullptr - * @param length ignored + * @param textLength ignored * @stable ICU 59 */ - inline UnicodeString(const std::nullptr_t text, int32_t length); + inline UnicodeString(const std::nullptr_t text, int32_t textLength); /** * Readonly-aliasing char16_t* constructor. @@ -3266,13 +3273,13 @@ public: * } * \endcode * @param src String using only invariant characters. - * @param length Length of src, or -1 if NUL-terminated. + * @param textLength Length of src, or -1 if NUL-terminated. * @param inv Signature-distinguishing paramater, use US_INV. * * @see US_INV * @stable ICU 3.2 */ - UnicodeString(const char *src, int32_t length, enum EInvariant inv); + UnicodeString(const char *src, int32_t textLength, enum EInvariant inv); /** @@ -3323,9 +3330,6 @@ public: * Clones can be used concurrently in multiple threads. * If a subclass does not implement clone(), or if an error occurs, * then NULL is returned. - * The clone functions in all subclasses return a pointer to a Replaceable - * because some compilers do not support covariant (same-as-this) - * return types; cast to the appropriate subclass if necessary. * The caller must delete the clone. * * @return a clone of this object @@ -3334,7 +3338,7 @@ public: * @see getDynamicClassID * @stable ICU 2.6 */ - virtual Replaceable *clone() const; + virtual UnicodeString *clone() const; /** Destructor. * @stable ICU 2.0 @@ -4748,4 +4752,6 @@ UnicodeString::reverse(int32_t start, U_NAMESPACE_END +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif diff --git a/deps/icu-small/source/common/unicode/unorm.h b/deps/icu-small/source/common/unicode/unorm.h index 3839de1295..09dd366a96 100644 --- a/deps/icu-small/source/common/unicode/unorm.h +++ b/deps/icu-small/source/common/unicode/unorm.h @@ -131,6 +131,8 @@ // Do not conditionalize the following enum with #ifndef U_HIDE_DEPRECATED_API, // it is needed for layout of Normalizer object. +#ifndef U_FORCE_HIDE_DEPRECATED_API + /** * Constants for normalization modes. * @deprecated ICU 56 Use unorm2.h instead. @@ -155,6 +157,8 @@ typedef enum { UNORM_MODE_COUNT } UNormalizationMode; +#endif // U_FORCE_HIDE_DEPRECATED_API + #ifndef U_HIDE_DEPRECATED_API /** diff --git a/deps/icu-small/source/common/unicode/uobject.h b/deps/icu-small/source/common/unicode/uobject.h index 53b8eb005f..6a137af83c 100644 --- a/deps/icu-small/source/common/unicode/uobject.h +++ b/deps/icu-small/source/common/unicode/uobject.h @@ -20,6 +20,9 @@ #define __UOBJECT_H__ #include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + #include "unicode/platform.h" /** @@ -43,7 +46,7 @@ * @stable ICU 4.2. Since ICU 64, Use U_NOEXCEPT instead. See ICU-20422. */ #ifndef U_NO_THROW -#define U_NO_THROW throw() +#define U_NO_THROW U_NOEXCEPT #endif /*===========================================================================*/ @@ -212,11 +215,8 @@ public: * The clone() function is not available in UObject because it is not * implemented by all ICU classes. * Many ICU services provide a clone() function for their class trees, - * defined on the service's C++ base class, and all subclasses within that - * service class tree return a pointer to the service base class + * defined on the service's C++ base class * (which itself is a subclass of UObject). - * This is because some compilers do not support covariant (same-as-this) - * return types; cast to the appropriate subclass if necessary. * * @stable ICU 2.2 */ @@ -319,4 +319,6 @@ protected: U_NAMESPACE_END +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif diff --git a/deps/icu-small/source/common/unicode/urename.h b/deps/icu-small/source/common/unicode/urename.h index eaf56c9614..e302bf0976 100644 --- a/deps/icu-small/source/common/unicode/urename.h +++ b/deps/icu-small/source/common/unicode/urename.h @@ -193,10 +193,13 @@ #define res_getAlias U_ICU_ENTRY_POINT_RENAME(res_getAlias) #define res_getArrayItem U_ICU_ENTRY_POINT_RENAME(res_getArrayItem) #define res_getBinary U_ICU_ENTRY_POINT_RENAME(res_getBinary) +#define res_getBinaryNoTrace U_ICU_ENTRY_POINT_RENAME(res_getBinaryNoTrace) #define res_getIntVector U_ICU_ENTRY_POINT_RENAME(res_getIntVector) +#define res_getIntVectorNoTrace U_ICU_ENTRY_POINT_RENAME(res_getIntVectorNoTrace) #define res_getPublicType U_ICU_ENTRY_POINT_RENAME(res_getPublicType) #define res_getResource U_ICU_ENTRY_POINT_RENAME(res_getResource) #define res_getString U_ICU_ENTRY_POINT_RENAME(res_getString) +#define res_getStringNoTrace U_ICU_ENTRY_POINT_RENAME(res_getStringNoTrace) #define res_getTableItemByIndex U_ICU_ENTRY_POINT_RENAME(res_getTableItemByIndex) #define res_getTableItemByKey U_ICU_ENTRY_POINT_RENAME(res_getTableItemByKey) #define res_load U_ICU_ENTRY_POINT_RENAME(res_load) @@ -523,6 +526,7 @@ #define ucal_getDefaultTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_getDefaultTimeZone) #define ucal_getFieldDifference U_ICU_ENTRY_POINT_RENAME(ucal_getFieldDifference) #define ucal_getGregorianChange U_ICU_ENTRY_POINT_RENAME(ucal_getGregorianChange) +#define ucal_getHostTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_getHostTimeZone) #define ucal_getKeywordValuesForLocale U_ICU_ENTRY_POINT_RENAME(ucal_getKeywordValuesForLocale) #define ucal_getLimit U_ICU_ENTRY_POINT_RENAME(ucal_getLimit) #define ucal_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ucal_getLocaleByType) @@ -575,7 +579,6 @@ #define ucasemap_getLocale U_ICU_ENTRY_POINT_RENAME(ucasemap_getLocale) #define ucasemap_getOptions U_ICU_ENTRY_POINT_RENAME(ucasemap_getOptions) #define ucasemap_internalUTF8ToTitle U_ICU_ENTRY_POINT_RENAME(ucasemap_internalUTF8ToTitle) -#define ucasemap_mapUTF8 U_ICU_ENTRY_POINT_RENAME(ucasemap_mapUTF8) #define ucasemap_open U_ICU_ENTRY_POINT_RENAME(ucasemap_open) #define ucasemap_setBreakIterator U_ICU_ENTRY_POINT_RENAME(ucasemap_setBreakIterator) #define ucasemap_setLocale U_ICU_ENTRY_POINT_RENAME(ucasemap_setLocale) @@ -930,16 +933,9 @@ #define ufieldpositer_close U_ICU_ENTRY_POINT_RENAME(ufieldpositer_close) #define ufieldpositer_next U_ICU_ENTRY_POINT_RENAME(ufieldpositer_next) #define ufieldpositer_open U_ICU_ENTRY_POINT_RENAME(ufieldpositer_open) -#define ufile_close_translit U_ICU_ENTRY_POINT_RENAME(ufile_close_translit) -#define ufile_fill_uchar_buffer U_ICU_ENTRY_POINT_RENAME(ufile_fill_uchar_buffer) -#define ufile_flush_io U_ICU_ENTRY_POINT_RENAME(ufile_flush_io) -#define ufile_flush_translit U_ICU_ENTRY_POINT_RENAME(ufile_flush_translit) #define ufile_getch U_ICU_ENTRY_POINT_RENAME(ufile_getch) #define ufile_getch32 U_ICU_ENTRY_POINT_RENAME(ufile_getch32) -#define ufmt_64tou U_ICU_ENTRY_POINT_RENAME(ufmt_64tou) #define ufmt_close U_ICU_ENTRY_POINT_RENAME(ufmt_close) -#define ufmt_defaultCPToUnicode U_ICU_ENTRY_POINT_RENAME(ufmt_defaultCPToUnicode) -#define ufmt_digitvalue U_ICU_ENTRY_POINT_RENAME(ufmt_digitvalue) #define ufmt_getArrayItemByIndex U_ICU_ENTRY_POINT_RENAME(ufmt_getArrayItemByIndex) #define ufmt_getArrayLength U_ICU_ENTRY_POINT_RENAME(ufmt_getArrayLength) #define ufmt_getDate U_ICU_ENTRY_POINT_RENAME(ufmt_getDate) @@ -951,11 +947,7 @@ #define ufmt_getType U_ICU_ENTRY_POINT_RENAME(ufmt_getType) #define ufmt_getUChars U_ICU_ENTRY_POINT_RENAME(ufmt_getUChars) #define ufmt_isNumeric U_ICU_ENTRY_POINT_RENAME(ufmt_isNumeric) -#define ufmt_isdigit U_ICU_ENTRY_POINT_RENAME(ufmt_isdigit) #define ufmt_open U_ICU_ENTRY_POINT_RENAME(ufmt_open) -#define ufmt_ptou U_ICU_ENTRY_POINT_RENAME(ufmt_ptou) -#define ufmt_uto64 U_ICU_ENTRY_POINT_RENAME(ufmt_uto64) -#define ufmt_utop U_ICU_ENTRY_POINT_RENAME(ufmt_utop) #define ufmtval_getString U_ICU_ENTRY_POINT_RENAME(ufmtval_getString) #define ufmtval_nextPosition U_ICU_ENTRY_POINT_RENAME(ufmtval_nextPosition) #define ugender_getInstance U_ICU_ENTRY_POINT_RENAME(ugender_getInstance) @@ -1103,6 +1095,7 @@ #define uloc_getVariant U_ICU_ENTRY_POINT_RENAME(uloc_getVariant) #define uloc_isRightToLeft U_ICU_ENTRY_POINT_RENAME(uloc_isRightToLeft) #define uloc_minimizeSubtags U_ICU_ENTRY_POINT_RENAME(uloc_minimizeSubtags) +#define uloc_openAvailableByType U_ICU_ENTRY_POINT_RENAME(uloc_openAvailableByType) #define uloc_openKeywordList U_ICU_ENTRY_POINT_RENAME(uloc_openKeywordList) #define uloc_openKeywords U_ICU_ENTRY_POINT_RENAME(uloc_openKeywords) #define uloc_setDefault U_ICU_ENTRY_POINT_RENAME(uloc_setDefault) @@ -1159,9 +1152,6 @@ #define umsg_toPattern U_ICU_ENTRY_POINT_RENAME(umsg_toPattern) #define umsg_vformat U_ICU_ENTRY_POINT_RENAME(umsg_vformat) #define umsg_vparse U_ICU_ENTRY_POINT_RENAME(umsg_vparse) -#define umtx_condBroadcast U_ICU_ENTRY_POINT_RENAME(umtx_condBroadcast) -#define umtx_condSignal U_ICU_ENTRY_POINT_RENAME(umtx_condSignal) -#define umtx_condWait U_ICU_ENTRY_POINT_RENAME(umtx_condWait) #define umtx_lock U_ICU_ENTRY_POINT_RENAME(umtx_lock) #define umtx_unlock U_ICU_ENTRY_POINT_RENAME(umtx_unlock) #define umutablecptrie_buildImmutable U_ICU_ENTRY_POINT_RENAME(umutablecptrie_buildImmutable) @@ -1327,7 +1317,6 @@ #define uprv_decNumberAbs U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAbs) #define uprv_decNumberAdd U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAdd) #define uprv_decNumberAnd U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAnd) -#define uprv_decNumberClass U_ICU_ENTRY_POINT_RENAME(uprv_decNumberClass) #define uprv_decNumberClassToString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberClassToString) #define uprv_decNumberCompare U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompare) #define uprv_decNumberCompareSignal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompareSignal) @@ -1393,6 +1382,7 @@ #define uprv_dlsym_func U_ICU_ENTRY_POINT_RENAME(uprv_dlsym_func) #define uprv_eastrncpy U_ICU_ENTRY_POINT_RENAME(uprv_eastrncpy) #define uprv_ebcdicFromAscii U_ICU_ENTRY_POINT_RENAME(uprv_ebcdicFromAscii) +#define uprv_ebcdicToAscii U_ICU_ENTRY_POINT_RENAME(uprv_ebcdicToAscii) #define uprv_ebcdicToLowercaseAscii U_ICU_ENTRY_POINT_RENAME(uprv_ebcdicToLowercaseAscii) #define uprv_ebcdictolower U_ICU_ENTRY_POINT_RENAME(uprv_ebcdictolower) #define uprv_fabs U_ICU_ENTRY_POINT_RENAME(uprv_fabs) @@ -1412,6 +1402,7 @@ #define uprv_getUTCtime U_ICU_ENTRY_POINT_RENAME(uprv_getUTCtime) #define uprv_int32Comparator U_ICU_ENTRY_POINT_RENAME(uprv_int32Comparator) #define uprv_isASCIILetter U_ICU_ENTRY_POINT_RENAME(uprv_isASCIILetter) +#define uprv_isEbcdicAtSign U_ICU_ENTRY_POINT_RENAME(uprv_isEbcdicAtSign) #define uprv_isInfinite U_ICU_ENTRY_POINT_RENAME(uprv_isInfinite) #define uprv_isInvariantString U_ICU_ENTRY_POINT_RENAME(uprv_isInvariantString) #define uprv_isInvariantUString U_ICU_ENTRY_POINT_RENAME(uprv_isInvariantUString) @@ -1579,6 +1570,7 @@ #define ures_getUTF8String U_ICU_ENTRY_POINT_RENAME(ures_getUTF8String) #define ures_getUTF8StringByIndex U_ICU_ENTRY_POINT_RENAME(ures_getUTF8StringByIndex) #define ures_getUTF8StringByKey U_ICU_ENTRY_POINT_RENAME(ures_getUTF8StringByKey) +#define ures_getValueWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getValueWithFallback) #define ures_getVersion U_ICU_ENTRY_POINT_RENAME(ures_getVersion) #define ures_getVersionByKey U_ICU_ENTRY_POINT_RENAME(ures_getVersionByKey) #define ures_getVersionNumber U_ICU_ENTRY_POINT_RENAME(ures_getVersionNumber) diff --git a/deps/icu-small/source/common/unicode/usetiter.h b/deps/icu-small/source/common/unicode/usetiter.h index 057adbc04f..f3f470f95a 100644 --- a/deps/icu-small/source/common/unicode/usetiter.h +++ b/deps/icu-small/source/common/unicode/usetiter.h @@ -10,6 +10,9 @@ #define USETITER_H #include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + #include "unicode/uobject.h" #include "unicode/unistr.h" @@ -317,4 +320,6 @@ inline UChar32 UnicodeSetIterator::getCodepointEnd() const { U_NAMESPACE_END +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif diff --git a/deps/icu-small/source/common/unicode/utext.h b/deps/icu-small/source/common/unicode/utext.h index ff78784c61..6f1e3409d8 100644 --- a/deps/icu-small/source/common/unicode/utext.h +++ b/deps/icu-small/source/common/unicode/utext.h @@ -766,12 +766,14 @@ utext_extract(UText *ut, * * @stable ICU 3.8 */ -#define UTEXT_SETNATIVEINDEX(ut, ix) \ - { int64_t __offset = (ix) - (ut)->chunkNativeStart; \ - if (__offset>=0 && __offset<(int64_t)(ut)->nativeIndexingLimit && (ut)->chunkContents[__offset]<0xdc00) { \ - (ut)->chunkOffset=(int32_t)__offset; \ - } else { \ - utext_setNativeIndex((ut), (ix)); } } +#define UTEXT_SETNATIVEINDEX(ut, ix) UPRV_BLOCK_MACRO_BEGIN { \ + int64_t __offset = (ix) - (ut)->chunkNativeStart; \ + if (__offset>=0 && __offset<(int64_t)(ut)->nativeIndexingLimit && (ut)->chunkContents[__offset]<0xdc00) { \ + (ut)->chunkOffset=(int32_t)__offset; \ + } else { \ + utext_setNativeIndex((ut), (ix)); \ + } \ +} UPRV_BLOCK_MACRO_END diff --git a/deps/icu-small/source/common/unicode/utf16.h b/deps/icu-small/source/common/unicode/utf16.h index 0908b4f00e..3315214ae6 100644 --- a/deps/icu-small/source/common/unicode/utf16.h +++ b/deps/icu-small/source/common/unicode/utf16.h @@ -163,7 +163,7 @@ * @see U16_GET * @stable ICU 2.4 */ -#define U16_GET_UNSAFE(s, i, c) { \ +#define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[i]; \ if(U16_IS_SURROGATE(c)) { \ if(U16_IS_SURROGATE_LEAD(c)) { \ @@ -172,7 +172,7 @@ (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \ } \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Get a code point from a string at a random-access offset, @@ -197,7 +197,7 @@ * @see U16_GET_UNSAFE * @stable ICU 2.4 */ -#define U16_GET(s, start, i, length, c) { \ +#define U16_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[i]; \ if(U16_IS_SURROGATE(c)) { \ uint16_t __c2; \ @@ -211,7 +211,7 @@ } \ } \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Get a code point from a string at a random-access offset, @@ -236,7 +236,7 @@ * @see U16_GET_UNSAFE * @stable ICU 60 */ -#define U16_GET_OR_FFFD(s, start, i, length, c) { \ +#define U16_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[i]; \ if(U16_IS_SURROGATE(c)) { \ uint16_t __c2; \ @@ -254,7 +254,7 @@ } \ } \ } \ -} +} UPRV_BLOCK_MACRO_END /* definitions with forward iteration --------------------------------------- */ @@ -277,12 +277,12 @@ * @see U16_NEXT * @stable ICU 2.4 */ -#define U16_NEXT_UNSAFE(s, i, c) { \ +#define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[(i)++]; \ if(U16_IS_LEAD(c)) { \ (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Get a code point from a string at a code point boundary offset, @@ -305,7 +305,7 @@ * @see U16_NEXT_UNSAFE * @stable ICU 2.4 */ -#define U16_NEXT(s, i, length, c) { \ +#define U16_NEXT(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[(i)++]; \ if(U16_IS_LEAD(c)) { \ uint16_t __c2; \ @@ -314,7 +314,7 @@ (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ } \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Get a code point from a string at a code point boundary offset, @@ -337,7 +337,7 @@ * @see U16_NEXT_UNSAFE * @stable ICU 60 */ -#define U16_NEXT_OR_FFFD(s, i, length, c) { \ +#define U16_NEXT_OR_FFFD(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[(i)++]; \ if(U16_IS_SURROGATE(c)) { \ uint16_t __c2; \ @@ -348,7 +348,7 @@ (c)=0xfffd; \ } \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Append a code point to a string, overwriting 1 or 2 code units. @@ -363,14 +363,14 @@ * @see U16_APPEND * @stable ICU 2.4 */ -#define U16_APPEND_UNSAFE(s, i, c) { \ +#define U16_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ if((uint32_t)(c)<=0xffff) { \ (s)[(i)++]=(uint16_t)(c); \ } else { \ (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Append a code point to a string, overwriting 1 or 2 code units. @@ -389,7 +389,7 @@ * @see U16_APPEND_UNSAFE * @stable ICU 2.4 */ -#define U16_APPEND(s, i, capacity, c, isError) { \ +#define U16_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \ if((uint32_t)(c)<=0xffff) { \ (s)[(i)++]=(uint16_t)(c); \ } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \ @@ -398,7 +398,7 @@ } else /* c>0x10ffff or not enough space */ { \ (isError)=TRUE; \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Advance the string offset from one code point boundary to the next. @@ -410,11 +410,11 @@ * @see U16_FWD_1 * @stable ICU 2.4 */ -#define U16_FWD_1_UNSAFE(s, i) { \ +#define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ if(U16_IS_LEAD((s)[(i)++])) { \ ++(i); \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Advance the string offset from one code point boundary to the next. @@ -429,11 +429,11 @@ * @see U16_FWD_1_UNSAFE * @stable ICU 2.4 */ -#define U16_FWD_1(s, i, length) { \ +#define U16_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \ if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \ ++(i); \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Advance the string offset from one code point boundary to the n-th next one, @@ -447,13 +447,13 @@ * @see U16_FWD_N * @stable ICU 2.4 */ -#define U16_FWD_N_UNSAFE(s, i, n) { \ +#define U16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ int32_t __N=(n); \ while(__N>0) { \ U16_FWD_1_UNSAFE(s, i); \ --__N; \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Advance the string offset from one code point boundary to the n-th next one, @@ -470,13 +470,13 @@ * @see U16_FWD_N_UNSAFE * @stable ICU 2.4 */ -#define U16_FWD_N(s, i, length, n) { \ +#define U16_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \ int32_t __N=(n); \ while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \ U16_FWD_1(s, i, length); \ --__N; \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Adjust a random-access offset to a code point boundary @@ -491,11 +491,11 @@ * @see U16_SET_CP_START * @stable ICU 2.4 */ -#define U16_SET_CP_START_UNSAFE(s, i) { \ +#define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ if(U16_IS_TRAIL((s)[i])) { \ --(i); \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Adjust a random-access offset to a code point boundary @@ -511,11 +511,11 @@ * @see U16_SET_CP_START_UNSAFE * @stable ICU 2.4 */ -#define U16_SET_CP_START(s, start, i) { \ +#define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ --(i); \ } \ -} +} UPRV_BLOCK_MACRO_END /* definitions with backward iteration -------------------------------------- */ @@ -539,12 +539,12 @@ * @see U16_PREV * @stable ICU 2.4 */ -#define U16_PREV_UNSAFE(s, i, c) { \ +#define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[--(i)]; \ if(U16_IS_TRAIL(c)) { \ (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Move the string offset from one code point boundary to the previous one @@ -566,7 +566,7 @@ * @see U16_PREV_UNSAFE * @stable ICU 2.4 */ -#define U16_PREV(s, start, i, c) { \ +#define U16_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[--(i)]; \ if(U16_IS_TRAIL(c)) { \ uint16_t __c2; \ @@ -575,7 +575,7 @@ (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ } \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Move the string offset from one code point boundary to the previous one @@ -597,7 +597,7 @@ * @see U16_PREV_UNSAFE * @stable ICU 60 */ -#define U16_PREV_OR_FFFD(s, start, i, c) { \ +#define U16_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[--(i)]; \ if(U16_IS_SURROGATE(c)) { \ uint16_t __c2; \ @@ -608,7 +608,7 @@ (c)=0xfffd; \ } \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Move the string offset from one code point boundary to the previous one. @@ -621,11 +621,11 @@ * @see U16_BACK_1 * @stable ICU 2.4 */ -#define U16_BACK_1_UNSAFE(s, i) { \ +#define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ if(U16_IS_TRAIL((s)[--(i)])) { \ --(i); \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Move the string offset from one code point boundary to the previous one. @@ -639,11 +639,11 @@ * @see U16_BACK_1_UNSAFE * @stable ICU 2.4 */ -#define U16_BACK_1(s, start, i) { \ +#define U16_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ --(i); \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Move the string offset from one code point boundary to the n-th one before it, @@ -658,13 +658,13 @@ * @see U16_BACK_N * @stable ICU 2.4 */ -#define U16_BACK_N_UNSAFE(s, i, n) { \ +#define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ int32_t __N=(n); \ while(__N>0) { \ U16_BACK_1_UNSAFE(s, i); \ --__N; \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Move the string offset from one code point boundary to the n-th one before it, @@ -680,13 +680,13 @@ * @see U16_BACK_N_UNSAFE * @stable ICU 2.4 */ -#define U16_BACK_N(s, start, i, n) { \ +#define U16_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \ int32_t __N=(n); \ while(__N>0 && (i)>(start)) { \ U16_BACK_1(s, start, i); \ --__N; \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Adjust a random-access offset to a code point boundary after a code point. @@ -701,11 +701,11 @@ * @see U16_SET_CP_LIMIT * @stable ICU 2.4 */ -#define U16_SET_CP_LIMIT_UNSAFE(s, i) { \ +#define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ if(U16_IS_LEAD((s)[(i)-1])) { \ ++(i); \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Adjust a random-access offset to a code point boundary after a code point. @@ -724,10 +724,10 @@ * @see U16_SET_CP_LIMIT_UNSAFE * @stable ICU 2.4 */ -#define U16_SET_CP_LIMIT(s, start, i, length) { \ +#define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \ if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \ ++(i); \ } \ -} +} UPRV_BLOCK_MACRO_END #endif diff --git a/deps/icu-small/source/common/unicode/utf8.h b/deps/icu-small/source/common/unicode/utf8.h index 41155f119b..bb00130374 100644 --- a/deps/icu-small/source/common/unicode/utf8.h +++ b/deps/icu-small/source/common/unicode/utf8.h @@ -229,11 +229,11 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @see U8_GET * @stable ICU 2.4 */ -#define U8_GET_UNSAFE(s, i, c) { \ +#define U8_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ int32_t _u8_get_unsafe_index=(int32_t)(i); \ U8_SET_CP_START_UNSAFE(s, _u8_get_unsafe_index); \ U8_NEXT_UNSAFE(s, _u8_get_unsafe_index, c); \ -} +} UPRV_BLOCK_MACRO_END /** * Get a code point from a string at a random-access offset, @@ -256,11 +256,11 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @see U8_GET_UNSAFE * @stable ICU 2.4 */ -#define U8_GET(s, start, i, length, c) { \ +#define U8_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ int32_t _u8_get_index=(i); \ U8_SET_CP_START(s, start, _u8_get_index); \ U8_NEXT(s, _u8_get_index, length, c); \ -} +} UPRV_BLOCK_MACRO_END /** * Get a code point from a string at a random-access offset, @@ -287,11 +287,11 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @see U8_GET * @stable ICU 51 */ -#define U8_GET_OR_FFFD(s, start, i, length, c) { \ +#define U8_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ int32_t _u8_get_index=(i); \ U8_SET_CP_START(s, start, _u8_get_index); \ U8_NEXT_OR_FFFD(s, _u8_get_index, length, c); \ -} +} UPRV_BLOCK_MACRO_END /* definitions with forward iteration --------------------------------------- */ @@ -312,7 +312,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @see U8_NEXT * @stable ICU 2.4 */ -#define U8_NEXT_UNSAFE(s, i, c) { \ +#define U8_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(uint8_t)(s)[(i)++]; \ if(!U8_IS_SINGLE(c)) { \ if((c)<0xe0) { \ @@ -326,7 +326,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); (i)+=3; \ } \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Get a code point from a string at a code point boundary offset, @@ -377,7 +377,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); #define U8_NEXT_OR_FFFD(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, 0xfffd) /** @internal */ -#define U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, sub) { \ +#define U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, sub) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(uint8_t)(s)[(i)++]; \ if(!U8_IS_SINGLE(c)) { \ uint8_t __t = 0; \ @@ -403,7 +403,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); (c)=(sub); /* ill-formed*/ \ } \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Append a code point to a string, overwriting 1 to 4 bytes. @@ -418,7 +418,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @see U8_APPEND * @stable ICU 2.4 */ -#define U8_APPEND_UNSAFE(s, i, c) { \ +#define U8_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ uint32_t __uc=(c); \ if(__uc<=0x7f) { \ (s)[(i)++]=(uint8_t)__uc; \ @@ -436,7 +436,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); } \ (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Append a code point to a string, overwriting 1 to 4 bytes. @@ -455,7 +455,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @see U8_APPEND_UNSAFE * @stable ICU 2.4 */ -#define U8_APPEND(s, i, capacity, c, isError) { \ +#define U8_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \ uint32_t __uc=(c); \ if(__uc<=0x7f) { \ (s)[(i)++]=(uint8_t)__uc; \ @@ -474,7 +474,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); } else { \ (isError)=TRUE; \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Advance the string offset from one code point boundary to the next. @@ -486,9 +486,9 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @see U8_FWD_1 * @stable ICU 2.4 */ -#define U8_FWD_1_UNSAFE(s, i) { \ +#define U8_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ (i)+=1+U8_COUNT_TRAIL_BYTES_UNSAFE((s)[i]); \ -} +} UPRV_BLOCK_MACRO_END /** * Advance the string offset from one code point boundary to the next. @@ -503,7 +503,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @see U8_FWD_1_UNSAFE * @stable ICU 2.4 */ -#define U8_FWD_1(s, i, length) { \ +#define U8_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \ uint8_t __b=(s)[(i)++]; \ if(U8_IS_LEAD(__b) && (i)!=(length)) { \ uint8_t __t1=(s)[i]; \ @@ -524,7 +524,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); } \ } \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Advance the string offset from one code point boundary to the n-th next one, @@ -538,13 +538,13 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @see U8_FWD_N * @stable ICU 2.4 */ -#define U8_FWD_N_UNSAFE(s, i, n) { \ +#define U8_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ int32_t __N=(n); \ while(__N>0) { \ U8_FWD_1_UNSAFE(s, i); \ --__N; \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Advance the string offset from one code point boundary to the n-th next one, @@ -561,13 +561,13 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @see U8_FWD_N_UNSAFE * @stable ICU 2.4 */ -#define U8_FWD_N(s, i, length, n) { \ +#define U8_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \ int32_t __N=(n); \ while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \ U8_FWD_1(s, i, length); \ --__N; \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Adjust a random-access offset to a code point boundary @@ -582,9 +582,9 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @see U8_SET_CP_START * @stable ICU 2.4 */ -#define U8_SET_CP_START_UNSAFE(s, i) { \ +#define U8_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ while(U8_IS_TRAIL((s)[i])) { --(i); } \ -} +} UPRV_BLOCK_MACRO_END /** * Adjust a random-access offset to a code point boundary @@ -603,11 +603,11 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @see U8_TRUNCATE_IF_INCOMPLETE * @stable ICU 2.4 */ -#define U8_SET_CP_START(s, start, i) { \ +#define U8_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ if(U8_IS_TRAIL((s)[(i)])) { \ (i)=utf8_back1SafeBody(s, start, (i)); \ } \ -} +} UPRV_BLOCK_MACRO_END /** * If the string ends with a UTF-8 byte sequence that is valid so far @@ -635,7 +635,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @see U8_SET_CP_START * @stable ICU 61 */ -#define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) \ +#define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) UPRV_BLOCK_MACRO_BEGIN { \ if((length)>(start)) { \ uint8_t __b1=s[(length)-1]; \ if(U8_IS_SINGLE(__b1)) { \ @@ -656,7 +656,8 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); } \ } \ } \ - } + } \ +} UPRV_BLOCK_MACRO_END /* definitions with backward iteration -------------------------------------- */ @@ -679,7 +680,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @see U8_PREV * @stable ICU 2.4 */ -#define U8_PREV_UNSAFE(s, i, c) { \ +#define U8_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(uint8_t)(s)[--(i)]; \ if(U8_IS_TRAIL(c)) { \ uint8_t __b, __count=1, __shift=6; \ @@ -699,7 +700,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); } \ } \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Move the string offset from one code point boundary to the previous one @@ -721,12 +722,12 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @see U8_PREV_UNSAFE * @stable ICU 2.4 */ -#define U8_PREV(s, start, i, c) { \ +#define U8_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(uint8_t)(s)[--(i)]; \ if(!U8_IS_SINGLE(c)) { \ (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -1); \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Move the string offset from one code point boundary to the previous one @@ -752,12 +753,12 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @see U8_PREV * @stable ICU 51 */ -#define U8_PREV_OR_FFFD(s, start, i, c) { \ +#define U8_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(uint8_t)(s)[--(i)]; \ if(!U8_IS_SINGLE(c)) { \ (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -3); \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Move the string offset from one code point boundary to the previous one. @@ -770,9 +771,9 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @see U8_BACK_1 * @stable ICU 2.4 */ -#define U8_BACK_1_UNSAFE(s, i) { \ +#define U8_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ while(U8_IS_TRAIL((s)[--(i)])) {} \ -} +} UPRV_BLOCK_MACRO_END /** * Move the string offset from one code point boundary to the previous one. @@ -786,11 +787,11 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @see U8_BACK_1_UNSAFE * @stable ICU 2.4 */ -#define U8_BACK_1(s, start, i) { \ +#define U8_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ if(U8_IS_TRAIL((s)[--(i)])) { \ (i)=utf8_back1SafeBody(s, start, (i)); \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Move the string offset from one code point boundary to the n-th one before it, @@ -805,13 +806,13 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @see U8_BACK_N * @stable ICU 2.4 */ -#define U8_BACK_N_UNSAFE(s, i, n) { \ +#define U8_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ int32_t __N=(n); \ while(__N>0) { \ U8_BACK_1_UNSAFE(s, i); \ --__N; \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Move the string offset from one code point boundary to the n-th one before it, @@ -827,13 +828,13 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @see U8_BACK_N_UNSAFE * @stable ICU 2.4 */ -#define U8_BACK_N(s, start, i, n) { \ +#define U8_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \ int32_t __N=(n); \ while(__N>0 && (i)>(start)) { \ U8_BACK_1(s, start, i); \ --__N; \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Adjust a random-access offset to a code point boundary after a code point. @@ -848,10 +849,10 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @see U8_SET_CP_LIMIT * @stable ICU 2.4 */ -#define U8_SET_CP_LIMIT_UNSAFE(s, i) { \ +#define U8_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ U8_BACK_1_UNSAFE(s, i); \ U8_FWD_1_UNSAFE(s, i); \ -} +} UPRV_BLOCK_MACRO_END /** * Adjust a random-access offset to a code point boundary after a code point. @@ -870,11 +871,11 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @see U8_SET_CP_LIMIT_UNSAFE * @stable ICU 2.4 */ -#define U8_SET_CP_LIMIT(s, start, i, length) { \ +#define U8_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \ if((start)<(i) && ((i)<(length) || (length)<0)) { \ U8_BACK_1(s, start, i); \ U8_FWD_1(s, i, length); \ } \ -} +} UPRV_BLOCK_MACRO_END #endif diff --git a/deps/icu-small/source/common/unicode/utf_old.h b/deps/icu-small/source/common/unicode/utf_old.h index 55c17c01df..b2428e6b31 100644 --- a/deps/icu-small/source/common/unicode/utf_old.h +++ b/deps/icu-small/source/common/unicode/utf_old.h @@ -19,9 +19,6 @@ /** * \file * \brief C API: Deprecated macros for Unicode string handling - */ - -/** * * The macros in utf_old.h are all deprecated and their use discouraged. * Some of the design principles behind the set of UTF macros @@ -139,12 +136,16 @@ * * <hr> * - * @deprecated ICU 2.4. Use the macros in utf.h, utf16.h, utf8.h instead. + * Deprecated ICU 2.4. Use the macros in utf.h, utf16.h, utf8.h instead. */ #ifndef __UTF_OLD_H__ #define __UTF_OLD_H__ +#include "unicode/utf.h" +#include "unicode/utf8.h" +#include "unicode/utf16.h" + /** * \def U_HIDE_OBSOLETE_UTF_OLD_H * @@ -162,10 +163,6 @@ #if !defined(U_HIDE_DEPRECATED_API) && !U_HIDE_OBSOLETE_UTF_OLD_H -#include "unicode/utf.h" -#include "unicode/utf8.h" -#include "unicode/utf16.h" - /* Formerly utf.h, part 1 --------------------------------------------------- */ #ifdef U_USE_UTF_DEPRECATES @@ -365,21 +362,21 @@ U_CFUNC U_IMPORT const uint8_t utf8_countTrailBytes[]; /* U_IMPORT2? */ /*U_I #define UTF8_ARRAY_SIZE(size) ((5*(size))/2) /** @deprecated ICU 2.4. Renamed to U8_GET_UNSAFE, see utf_old.h. */ -#define UTF8_GET_CHAR_UNSAFE(s, i, c) { \ +#define UTF8_GET_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ int32_t _utf8_get_char_unsafe_index=(int32_t)(i); \ UTF8_SET_CHAR_START_UNSAFE(s, _utf8_get_char_unsafe_index); \ UTF8_NEXT_CHAR_UNSAFE(s, _utf8_get_char_unsafe_index, c); \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Use U8_GET instead, see utf_old.h. */ -#define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ +#define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \ int32_t _utf8_get_char_safe_index=(int32_t)(i); \ UTF8_SET_CHAR_START_SAFE(s, start, _utf8_get_char_safe_index); \ UTF8_NEXT_CHAR_SAFE(s, _utf8_get_char_safe_index, length, c, strict); \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Renamed to U8_NEXT_UNSAFE, see utf_old.h. */ -#define UTF8_NEXT_CHAR_UNSAFE(s, i, c) { \ +#define UTF8_NEXT_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[(i)++]; \ if((uint8_t)((c)-0xc0)<0x35) { \ uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \ @@ -396,10 +393,10 @@ U_CFUNC U_IMPORT const uint8_t utf8_countTrailBytes[]; /* U_IMPORT2? */ /*U_I break; \ } \ } \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Renamed to U8_APPEND_UNSAFE, see utf_old.h. */ -#define UTF8_APPEND_CHAR_UNSAFE(s, i, c) { \ +#define UTF8_APPEND_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ if((uint32_t)(c)<=0x7f) { \ (s)[(i)++]=(uint8_t)(c); \ } else { \ @@ -416,29 +413,29 @@ U_CFUNC U_IMPORT const uint8_t utf8_countTrailBytes[]; /* U_IMPORT2? */ /*U_I } \ (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ } \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Renamed to U8_FWD_1_UNSAFE, see utf_old.h. */ -#define UTF8_FWD_1_UNSAFE(s, i) { \ +#define UTF8_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ (i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Renamed to U8_FWD_N_UNSAFE, see utf_old.h. */ -#define UTF8_FWD_N_UNSAFE(s, i, n) { \ +#define UTF8_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ int32_t __N=(n); \ while(__N>0) { \ UTF8_FWD_1_UNSAFE(s, i); \ --__N; \ } \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Renamed to U8_SET_CP_START_UNSAFE, see utf_old.h. */ -#define UTF8_SET_CHAR_START_UNSAFE(s, i) { \ +#define UTF8_SET_CHAR_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ while(UTF8_IS_TRAIL((s)[i])) { --(i); } \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Use U8_NEXT instead, see utf_old.h. */ -#define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ +#define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[(i)++]; \ if((c)>=0x80) { \ if(UTF8_IS_LEAD(c)) { \ @@ -447,16 +444,16 @@ U_CFUNC U_IMPORT const uint8_t utf8_countTrailBytes[]; /* U_IMPORT2? */ /*U_I (c)=UTF8_ERROR_VALUE_1; \ } \ } \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Use U8_APPEND instead, see utf_old.h. */ -#define UTF8_APPEND_CHAR_SAFE(s, i, length, c) { \ +#define UTF8_APPEND_CHAR_SAFE(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ if((uint32_t)(c)<=0x7f) { \ (s)[(i)++]=(uint8_t)(c); \ } else { \ (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, NULL); \ } \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Renamed to U8_FWD_1, see utf_old.h. */ #define UTF8_FWD_1_SAFE(s, i, length) U8_FWD_1(s, i, length) @@ -468,7 +465,7 @@ U_CFUNC U_IMPORT const uint8_t utf8_countTrailBytes[]; /* U_IMPORT2? */ /*U_I #define UTF8_SET_CHAR_START_SAFE(s, start, i) U8_SET_CP_START(s, start, i) /** @deprecated ICU 2.4. Renamed to U8_PREV_UNSAFE, see utf_old.h. */ -#define UTF8_PREV_CHAR_UNSAFE(s, i, c) { \ +#define UTF8_PREV_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[--(i)]; \ if(UTF8_IS_TRAIL(c)) { \ uint8_t __b, __count=1, __shift=6; \ @@ -488,30 +485,30 @@ U_CFUNC U_IMPORT const uint8_t utf8_countTrailBytes[]; /* U_IMPORT2? */ /*U_I } \ } \ } \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Renamed to U8_BACK_1_UNSAFE, see utf_old.h. */ -#define UTF8_BACK_1_UNSAFE(s, i) { \ +#define UTF8_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ while(UTF8_IS_TRAIL((s)[--(i)])) {} \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Renamed to U8_BACK_N_UNSAFE, see utf_old.h. */ -#define UTF8_BACK_N_UNSAFE(s, i, n) { \ +#define UTF8_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ int32_t __N=(n); \ while(__N>0) { \ UTF8_BACK_1_UNSAFE(s, i); \ --__N; \ } \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Renamed to U8_SET_CP_LIMIT_UNSAFE, see utf_old.h. */ -#define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) { \ +#define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ UTF8_BACK_1_UNSAFE(s, i); \ UTF8_FWD_1_UNSAFE(s, i); \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Use U8_PREV instead, see utf_old.h. */ -#define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) { \ +#define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[--(i)]; \ if((c)>=0x80) { \ if((c)<=0xbf) { \ @@ -520,7 +517,7 @@ U_CFUNC U_IMPORT const uint8_t utf8_countTrailBytes[]; /* U_IMPORT2? */ /*U_I (c)=UTF8_ERROR_VALUE_1; \ } \ } \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Renamed to U8_BACK_1, see utf_old.h. */ #define UTF8_BACK_1_SAFE(s, start, i) U8_BACK_1(s, start, i) @@ -593,7 +590,7 @@ U_CFUNC U_IMPORT const uint8_t utf8_countTrailBytes[]; /* U_IMPORT2? */ /*U_I * UTF16_PREV_CHAR[_UNSAFE]() is more efficient for that. * @deprecated ICU 2.4. Renamed to U16_GET_UNSAFE, see utf_old.h. */ -#define UTF16_GET_CHAR_UNSAFE(s, i, c) { \ +#define UTF16_GET_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[i]; \ if(UTF_IS_SURROGATE(c)) { \ if(UTF_IS_SURROGATE_FIRST(c)) { \ @@ -602,10 +599,10 @@ U_CFUNC U_IMPORT const uint8_t utf8_countTrailBytes[]; /* U_IMPORT2? */ /*U_I (c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \ } \ } \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Use U16_GET instead, see utf_old.h. */ -#define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ +#define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[i]; \ if(UTF_IS_SURROGATE(c)) { \ uint16_t __c2; \ @@ -629,51 +626,51 @@ U_CFUNC U_IMPORT const uint8_t utf8_countTrailBytes[]; /* U_IMPORT2? */ /*U_I } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ (c)=UTF_ERROR_VALUE; \ } \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Renamed to U16_NEXT_UNSAFE, see utf_old.h. */ -#define UTF16_NEXT_CHAR_UNSAFE(s, i, c) { \ +#define UTF16_NEXT_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[(i)++]; \ if(UTF_IS_FIRST_SURROGATE(c)) { \ (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \ } \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Renamed to U16_APPEND_UNSAFE, see utf_old.h. */ -#define UTF16_APPEND_CHAR_UNSAFE(s, i, c) { \ +#define UTF16_APPEND_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ if((uint32_t)(c)<=0xffff) { \ (s)[(i)++]=(uint16_t)(c); \ } else { \ (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ } \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Renamed to U16_FWD_1_UNSAFE, see utf_old.h. */ -#define UTF16_FWD_1_UNSAFE(s, i) { \ +#define UTF16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \ ++(i); \ } \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Renamed to U16_FWD_N_UNSAFE, see utf_old.h. */ -#define UTF16_FWD_N_UNSAFE(s, i, n) { \ +#define UTF16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ int32_t __N=(n); \ while(__N>0) { \ UTF16_FWD_1_UNSAFE(s, i); \ --__N; \ } \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Renamed to U16_SET_CP_START_UNSAFE, see utf_old.h. */ -#define UTF16_SET_CHAR_START_UNSAFE(s, i) { \ +#define UTF16_SET_CHAR_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ if(UTF_IS_SECOND_SURROGATE((s)[i])) { \ --(i); \ } \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Use U16_NEXT instead, see utf_old.h. */ -#define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ +#define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[(i)++]; \ if(UTF_IS_FIRST_SURROGATE(c)) { \ uint16_t __c2; \ @@ -689,10 +686,10 @@ U_CFUNC U_IMPORT const uint8_t utf8_countTrailBytes[]; /* U_IMPORT2? */ /*U_I /* unmatched second surrogate or other non-character */ \ (c)=UTF_ERROR_VALUE; \ } \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h. */ -#define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \ +#define UTF16_APPEND_CHAR_SAFE(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ if((uint32_t)(c)<=0xffff) { \ (s)[(i)++]=(uint16_t)(c); \ } else if((uint32_t)(c)<=0x10ffff) { \ @@ -705,7 +702,7 @@ U_CFUNC U_IMPORT const uint8_t utf8_countTrailBytes[]; /* U_IMPORT2? */ /*U_I } else /* c>0x10ffff, write error value */ { \ (s)[(i)++]=UTF_ERROR_VALUE; \ } \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h. */ #define UTF16_FWD_1_SAFE(s, i, length) U16_FWD_1(s, i, length) @@ -717,38 +714,38 @@ U_CFUNC U_IMPORT const uint8_t utf8_countTrailBytes[]; /* U_IMPORT2? */ /*U_I #define UTF16_SET_CHAR_START_SAFE(s, start, i) U16_SET_CP_START(s, start, i) /** @deprecated ICU 2.4. Renamed to U16_PREV_UNSAFE, see utf_old.h. */ -#define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \ +#define UTF16_PREV_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[--(i)]; \ if(UTF_IS_SECOND_SURROGATE(c)) { \ (c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \ } \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Renamed to U16_BACK_1_UNSAFE, see utf_old.h. */ -#define UTF16_BACK_1_UNSAFE(s, i) { \ +#define UTF16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \ --(i); \ } \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Renamed to U16_BACK_N_UNSAFE, see utf_old.h. */ -#define UTF16_BACK_N_UNSAFE(s, i, n) { \ +#define UTF16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ int32_t __N=(n); \ while(__N>0) { \ UTF16_BACK_1_UNSAFE(s, i); \ --__N; \ } \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT_UNSAFE, see utf_old.h. */ -#define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) { \ +#define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \ ++(i); \ } \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Use U16_PREV instead, see utf_old.h. */ -#define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \ +#define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[--(i)]; \ if(UTF_IS_SECOND_SURROGATE(c)) { \ uint16_t __c2; \ @@ -764,7 +761,7 @@ U_CFUNC U_IMPORT const uint8_t utf8_countTrailBytes[]; /* U_IMPORT2? */ /*U_I /* unmatched first surrogate or other non-character */ \ (c)=UTF_ERROR_VALUE; \ } \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h. */ #define UTF16_BACK_1_SAFE(s, start, i) U16_BACK_1(s, start, i) @@ -830,122 +827,122 @@ U_CFUNC U_IMPORT const uint8_t utf8_countTrailBytes[]; /* U_IMPORT2? */ /*U_I #define UTF32_ARRAY_SIZE(size) (size) /** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_GET_CHAR_UNSAFE(s, i, c) { \ +#define UTF32_GET_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[i]; \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ +#define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[i]; \ if(!UTF32_IS_SAFE(c, strict)) { \ (c)=UTF_ERROR_VALUE; \ } \ -} +} UPRV_BLOCK_MACRO_END /* definitions with forward iteration --------------------------------------- */ /** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_NEXT_CHAR_UNSAFE(s, i, c) { \ +#define UTF32_NEXT_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[(i)++]; \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_APPEND_CHAR_UNSAFE(s, i, c) { \ +#define UTF32_APPEND_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ (s)[(i)++]=(c); \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_FWD_1_UNSAFE(s, i) { \ +#define UTF32_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ ++(i); \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_FWD_N_UNSAFE(s, i, n) { \ +#define UTF32_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ (i)+=(n); \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_SET_CHAR_START_UNSAFE(s, i) { \ -} +#define UTF32_SET_CHAR_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ +#define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[(i)++]; \ if(!UTF32_IS_SAFE(c, strict)) { \ (c)=UTF_ERROR_VALUE; \ } \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_APPEND_CHAR_SAFE(s, i, length, c) { \ +#define UTF32_APPEND_CHAR_SAFE(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ if((uint32_t)(c)<=0x10ffff) { \ (s)[(i)++]=(c); \ } else /* c>0x10ffff, write 0xfffd */ { \ (s)[(i)++]=0xfffd; \ } \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_FWD_1_SAFE(s, i, length) { \ +#define UTF32_FWD_1_SAFE(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \ ++(i); \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_FWD_N_SAFE(s, i, length, n) { \ +#define UTF32_FWD_N_SAFE(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \ if(((i)+=(n))>(length)) { \ (i)=(length); \ } \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_SET_CHAR_START_SAFE(s, start, i) { \ -} +#define UTF32_SET_CHAR_START_SAFE(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ +} UPRV_BLOCK_MACRO_END /* definitions with backward iteration -------------------------------------- */ /** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_PREV_CHAR_UNSAFE(s, i, c) { \ +#define UTF32_PREV_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[--(i)]; \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_BACK_1_UNSAFE(s, i) { \ +#define UTF32_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ --(i); \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_BACK_N_UNSAFE(s, i, n) { \ +#define UTF32_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ (i)-=(n); \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) { \ -} +#define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) { \ +#define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[--(i)]; \ if(!UTF32_IS_SAFE(c, strict)) { \ (c)=UTF_ERROR_VALUE; \ } \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_BACK_1_SAFE(s, start, i) { \ +#define UTF32_BACK_1_SAFE(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ --(i); \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_BACK_N_SAFE(s, start, i, n) { \ +#define UTF32_BACK_N_SAFE(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \ (i)-=(n); \ if((i)<(start)) { \ (i)=(start); \ } \ -} +} UPRV_BLOCK_MACRO_END /** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) { \ -} +#define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \ +} UPRV_BLOCK_MACRO_END /* Formerly utf.h, part 2 --------------------------------------------------- */ diff --git a/deps/icu-small/source/common/unicode/utrace.h b/deps/icu-small/source/common/unicode/utrace.h index bf6fd036f0..0af050756f 100644 --- a/deps/icu-small/source/common/unicode/utrace.h +++ b/deps/icu-small/source/common/unicode/utrace.h @@ -66,6 +66,7 @@ typedef enum UTraceFunctionNumber { UTRACE_FUNCTION_START=0, UTRACE_U_INIT=UTRACE_FUNCTION_START, UTRACE_U_CLEANUP, + #ifndef U_HIDE_DEPRECATED_API /** * One more than the highest normal collation trace location. @@ -83,6 +84,7 @@ typedef enum UTraceFunctionNumber { UTRACE_UCNV_FLUSH_CACHE, UTRACE_UCNV_LOAD, UTRACE_UCNV_UNLOAD, + #ifndef U_HIDE_DEPRECATED_API /** * One more than the highest normal collation trace location. @@ -101,13 +103,80 @@ typedef enum UTraceFunctionNumber { UTRACE_UCOL_STRCOLLITER, UTRACE_UCOL_OPEN_FROM_SHORT_STRING, UTRACE_UCOL_STRCOLLUTF8, /**< @stable ICU 50 */ + #ifndef U_HIDE_DEPRECATED_API /** * One more than the highest normal collation trace location. * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ - UTRACE_COLLATION_LIMIT + UTRACE_COLLATION_LIMIT, #endif // U_HIDE_DEPRECATED_API + +#ifndef U_HIDE_DRAFT_API + + /** + * The lowest resource/data location. + * @draft ICU 65 + */ + UTRACE_UDATA_START=0x3000, + + /** + * Indicates that a value was read from a resource bundle. Provides three + * C-style strings to UTraceData: type, file name, and resource path. The + * possible types are: + * + * - "string" (a string value was accessed) + * - "binary" (a binary value was accessed) + * - "intvector" (a integer vector value was accessed) + * - "int" (a signed integer value was accessed) + * - "uint" (a unsigned integer value was accessed) + * - "get" (a path was loaded, but the value was not accessed) + * - "getalias" (a path was loaded, and an alias was resolved) + * + * @draft ICU 65 + */ + UTRACE_UDATA_RESOURCE=UTRACE_UDATA_START, + + /** + * Indicates that a resource bundle was opened. + * + * Provides one C-style string to UTraceData: file name. + * @draft ICU 65 + */ + UTRACE_UDATA_BUNDLE, + + /** + * Indicates that a data file was opened, but not *.res files. + * + * Provides one C-style string to UTraceData: file name. + * + * @draft ICU 65 + */ + UTRACE_UDATA_DATA_FILE, + + /** + * Indicates that a *.res file was opened. + * + * This differs from UTRACE_UDATA_BUNDLE because a res file is typically + * opened only once per application runtime, but the bundle corresponding + * to that res file may be opened many times. + * + * Provides one C-style string to UTraceData: file name. + * + * @draft ICU 65 + */ + UTRACE_UDATA_RES_FILE, + +#endif // U_HIDE_DRAFT_API + +#ifndef U_HIDE_INTERNAL_API + /** + * One more than the highest normal resource/data trace location. + * @internal The numeric value may change over time, see ICU ticket #12420. + */ + UTRACE_RES_DATA_LIMIT, +#endif // U_HIDE_INTERNAL_API + } UTraceFunctionNumber; /** diff --git a/deps/icu-small/source/common/unicode/utypes.h b/deps/icu-small/source/common/unicode/utypes.h index 49eb12cd40..c98de9e6fc 100644 --- a/deps/icu-small/source/common/unicode/utypes.h +++ b/deps/icu-small/source/common/unicode/utypes.h @@ -385,17 +385,31 @@ typedef double UDate; /*===========================================================================*/ /** - * Error code to replace exception handling, so that the code is compatible with all C++ compilers, - * and to use the same mechanism for C and C++. + * Standard ICU4C error code type, a substitute for exceptions. + * + * Initialize the UErrorCode with U_ZERO_ERROR, and check for success or + * failure using U_SUCCESS() or U_FAILURE(): + * + * UErrorCode errorCode = U_ZERO_ERROR; + * // call ICU API that needs an error code parameter. + * if (U_FAILURE(errorCode)) { + * // An error occurred. Handle it here. + * } + * + * C++ code should use icu::ErrorCode, available in unicode/errorcode.h, or a + * suitable subclass. + * + * For more information, see: + * http://icu-project.org/userguide/conventions + * + * Note: By convention, ICU functions that take a reference (C++) or a pointer + * (C) to a UErrorCode first test: + * + * if (U_FAILURE(errorCode)) { return immediately; } * - * \par - * ICU functions that take a reference (C++) or a pointer (C) to a UErrorCode - * first test if(U_FAILURE(errorCode)) { return immediately; } * so that in a chain of such functions the first one that sets an error code * causes the following ones to not perform any operations. * - * \par - * Error codes should be tested using U_FAILURE() and U_SUCCESS(). * @stable ICU 2.0 */ typedef enum UErrorCode { diff --git a/deps/icu-small/source/common/unicode/uvernum.h b/deps/icu-small/source/common/unicode/uvernum.h index 7c114be2cc..0923c1d918 100644 --- a/deps/icu-small/source/common/unicode/uvernum.h +++ b/deps/icu-small/source/common/unicode/uvernum.h @@ -60,13 +60,13 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.4 */ -#define U_ICU_VERSION_MAJOR_NUM 64 +#define U_ICU_VERSION_MAJOR_NUM 65 /** The current ICU minor version as an integer. * This value will change in the subsequent releases of ICU * @stable ICU 2.6 */ -#define U_ICU_VERSION_MINOR_NUM 2 +#define U_ICU_VERSION_MINOR_NUM 1 /** The current ICU patchlevel version as an integer. * This value will change in the subsequent releases of ICU @@ -86,7 +86,7 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.6 */ -#define U_ICU_VERSION_SUFFIX _64 +#define U_ICU_VERSION_SUFFIX _65 /** * \def U_DEF2_ICU_ENTRY_POINT_RENAME @@ -139,7 +139,7 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.4 */ -#define U_ICU_VERSION "64.2" +#define U_ICU_VERSION "65.1" /** * The current ICU library major version number as a string, for library name suffixes. @@ -152,13 +152,13 @@ * * @stable ICU 2.6 */ -#define U_ICU_VERSION_SHORT "64" +#define U_ICU_VERSION_SHORT "65" #ifndef U_HIDE_INTERNAL_API /** Data version in ICU4C. * @internal ICU 4.4 Internal Use Only **/ -#define U_ICU_DATA_VERSION "64.2" +#define U_ICU_DATA_VERSION "65.1" #endif /* U_HIDE_INTERNAL_API */ /*=========================================================================== diff --git a/deps/icu-small/source/common/unicode/uversion.h b/deps/icu-small/source/common/unicode/uversion.h index 3f0251d399..c8c7a374c8 100644 --- a/deps/icu-small/source/common/unicode/uversion.h +++ b/deps/icu-small/source/common/unicode/uversion.h @@ -62,26 +62,22 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]; /* C++ namespace if supported. Versioned unless versioning is disabled. */ /*===========================================================================*/ +/* Define C++ namespace symbols. */ +#ifdef __cplusplus + /** * \def U_NAMESPACE_BEGIN - * This is used to begin a declaration of a public ICU C++ API. - * When not compiling for C++, it does nothing. - * When compiling for C++, it begins an extern "C++" linkage block (to protect - * against cases in which an external client includes ICU header files inside - * an extern "C" linkage block). + * This is used to begin a declaration of a public ICU C++ API within + * versioned-ICU-namespace block. * - * It also begins a versioned-ICU-namespace block. * @stable ICU 2.4 */ /** * \def U_NAMESPACE_END * This is used to end a declaration of a public ICU C++ API. - * When not compiling for C++, it does nothing. - * When compiling for C++, it ends the extern "C++" block begun by - * U_NAMESPACE_BEGIN. + * It ends the versioned-ICU-namespace block begun by U_NAMESPACE_BEGIN. * - * It also ends the versioned-ICU-namespace block begun by U_NAMESPACE_BEGIN. * @stable ICU 2.4 */ @@ -89,9 +85,6 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]; * \def U_NAMESPACE_USE * This is used to specify that the rest of the code uses the * public ICU C++ API namespace. - * This is invoked by default; we recommend that you turn it off: - * See the "Recommended Build Options" section of the ICU4C readme - * (http://source.icu-project.org/repos/icu/icu/trunk/readme.html#RecBuild) * @stable ICU 2.4 */ @@ -105,8 +98,6 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]; * @stable ICU 2.4 */ -/* Define C++ namespace symbols. */ -#ifdef __cplusplus # if U_DISABLE_RENAMING # define U_ICU_NAMESPACE icu namespace U_ICU_NAMESPACE { } @@ -116,8 +107,8 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]; namespace icu = U_ICU_NAMESPACE; # endif -# define U_NAMESPACE_BEGIN extern "C++" { namespace U_ICU_NAMESPACE { -# define U_NAMESPACE_END } } +# define U_NAMESPACE_BEGIN namespace U_ICU_NAMESPACE { +# define U_NAMESPACE_END } # define U_NAMESPACE_USE using namespace U_ICU_NAMESPACE; # define U_NAMESPACE_QUALIFIER U_ICU_NAMESPACE:: @@ -133,12 +124,7 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]; # if U_USING_ICU_NAMESPACE U_NAMESPACE_USE # endif -#else -# define U_NAMESPACE_BEGIN -# define U_NAMESPACE_END -# define U_NAMESPACE_USE -# define U_NAMESPACE_QUALIFIER -#endif +#endif /* __cplusplus */ /*===========================================================================*/ /* General version helper functions. Definitions in putil.c */ diff --git a/deps/icu-small/source/common/unifiedcache.cpp b/deps/icu-small/source/common/unifiedcache.cpp index 641f4ec659..f2dd916559 100644 --- a/deps/icu-small/source/common/unifiedcache.cpp +++ b/deps/icu-small/source/common/unifiedcache.cpp @@ -13,22 +13,15 @@ #include "unifiedcache.h" #include <algorithm> // For std::max() +#include <mutex> -#include "mutex.h" #include "uassert.h" #include "uhash.h" #include "ucln_cmn.h" -#include "umutex.h" static icu::UnifiedCache *gCache = NULL; -static icu::UMutex *gCacheMutex() { - static icu::UMutex m = U_MUTEX_INITIALIZER; - return &m; -} -static icu::UConditionVar *gInProgressValueAddedCond() { - static icu::UConditionVar cv = U_CONDITION_INITIALIZER; - return &cv; -} +static std::mutex *gCacheMutex = nullptr; +static std::condition_variable *gInProgressValueAddedCond; static icu::UInitOnce gCacheInitOnce = U_INITONCE_INITIALIZER; static const int32_t MAX_EVICT_ITERATIONS = 10; @@ -39,10 +32,12 @@ static const int32_t DEFAULT_PERCENTAGE_OF_IN_USE = 100; U_CDECL_BEGIN static UBool U_CALLCONV unifiedcache_cleanup() { gCacheInitOnce.reset(); - if (gCache) { - delete gCache; - gCache = NULL; - } + delete gCache; + gCache = nullptr; + gCacheMutex->~mutex(); + gCacheMutex = nullptr; + gInProgressValueAddedCond->~condition_variable(); + gInProgressValueAddedCond = nullptr; return TRUE; } U_CDECL_END @@ -77,6 +72,8 @@ static void U_CALLCONV cacheInit(UErrorCode &status) { ucln_common_registerCleanup( UCLN_COMMON_UNIFIED_CACHE, unifiedcache_cleanup); + gCacheMutex = STATIC_NEW(std::mutex); + gInProgressValueAddedCond = STATIC_NEW(std::condition_variable); gCache = new UnifiedCache(status); if (gCache == NULL) { status = U_MEMORY_ALLOCATION_ERROR; @@ -138,28 +135,28 @@ void UnifiedCache::setEvictionPolicy( status = U_ILLEGAL_ARGUMENT_ERROR; return; } - Mutex lock(gCacheMutex()); + std::lock_guard<std::mutex> lock(*gCacheMutex); fMaxUnused = count; fMaxPercentageOfInUse = percentageOfInUseItems; } int32_t UnifiedCache::unusedCount() const { - Mutex lock(gCacheMutex()); + std::lock_guard<std::mutex> lock(*gCacheMutex); return uhash_count(fHashtable) - fNumValuesInUse; } int64_t UnifiedCache::autoEvictedCount() const { - Mutex lock(gCacheMutex()); + std::lock_guard<std::mutex> lock(*gCacheMutex); return fAutoEvictedCount; } int32_t UnifiedCache::keyCount() const { - Mutex lock(gCacheMutex()); + std::lock_guard<std::mutex> lock(*gCacheMutex); return uhash_count(fHashtable); } void UnifiedCache::flush() const { - Mutex lock(gCacheMutex()); + std::lock_guard<std::mutex> lock(*gCacheMutex); // Use a loop in case cache items that are flushed held hard references to // other cache items making those additional cache items eligible for @@ -168,7 +165,7 @@ void UnifiedCache::flush() const { } void UnifiedCache::handleUnreferencedObject() const { - Mutex lock(gCacheMutex()); + std::lock_guard<std::mutex> lock(*gCacheMutex); --fNumValuesInUse; _runEvictionSlice(); } @@ -187,7 +184,7 @@ void UnifiedCache::dump() { } void UnifiedCache::dumpContents() const { - Mutex lock(gCacheMutex()); + std::lock_guard<std::mutex> lock(*gCacheMutex); _dumpContents(); } @@ -227,7 +224,7 @@ UnifiedCache::~UnifiedCache() { // Now all that should be left in the cache are entries that refer to // each other and entries with hard references from outside the cache. // Nothing we can do about these so proceed to wipe out the cache. - Mutex lock(gCacheMutex()); + std::lock_guard<std::mutex> lock(*gCacheMutex); _flush(TRUE); } uhash_close(fHashtable); @@ -328,7 +325,7 @@ void UnifiedCache::_putIfAbsentAndGet( const CacheKeyBase &key, const SharedObject *&value, UErrorCode &status) const { - Mutex lock(gCacheMutex()); + std::lock_guard<std::mutex> lock(*gCacheMutex); const UHashElement *element = uhash_find(fHashtable, &key); if (element != NULL && !_inProgress(element)) { _fetch(element, value, status); @@ -353,15 +350,15 @@ UBool UnifiedCache::_poll( UErrorCode &status) const { U_ASSERT(value == NULL); U_ASSERT(status == U_ZERO_ERROR); - Mutex lock(gCacheMutex()); + std::unique_lock<std::mutex> lock(*gCacheMutex); const UHashElement *element = uhash_find(fHashtable, &key); // If the hash table contains an inProgress placeholder entry for this key, // this means that another thread is currently constructing the value object. // Loop, waiting for that construction to complete. while (element != NULL && _inProgress(element)) { - umtx_condWait(gInProgressValueAddedCond(), gCacheMutex()); - element = uhash_find(fHashtable, &key); + gInProgressValueAddedCond->wait(lock); + element = uhash_find(fHashtable, &key); } // If the hash table contains an entry for the key, @@ -433,7 +430,7 @@ void UnifiedCache::_put( // Tell waiting threads that we replace in-progress status with // an error. - umtx_condBroadcast(gInProgressValueAddedCond()); + gInProgressValueAddedCond->notify_all(); } void UnifiedCache::_fetch( diff --git a/deps/icu-small/source/common/uniset.cpp b/deps/icu-small/source/common/uniset.cpp index 1db382afe6..3807b83747 100644 --- a/deps/icu-small/source/common/uniset.cpp +++ b/deps/icu-small/source/common/uniset.cpp @@ -278,11 +278,11 @@ UnicodeSet& UnicodeSet::copyFrom(const UnicodeSet& o, UBool asThawed) { * to support cloning in order to allow classes using * UnicodeMatchers, such as Transliterator, to implement cloning. */ -UnicodeFunctor* UnicodeSet::clone() const { +UnicodeSet* UnicodeSet::clone() const { return new UnicodeSet(*this); } -UnicodeFunctor *UnicodeSet::cloneAsThawed() const { +UnicodeSet *UnicodeSet::cloneAsThawed() const { return new UnicodeSet(*this, TRUE); } @@ -2172,7 +2172,7 @@ void UnicodeSet::setPattern(const char16_t *newPat, int32_t newPatLen) { // We can regenerate an equivalent pattern later when requested. } -UnicodeFunctor *UnicodeSet::freeze() { +UnicodeSet *UnicodeSet::freeze() { if(!isFrozen() && !isBogus()) { compact(); diff --git a/deps/icu-small/source/common/uniset_props.cpp b/deps/icu-small/source/common/uniset_props.cpp index 6f7918a91a..45d3dab993 100644 --- a/deps/icu-small/source/common/uniset_props.cpp +++ b/deps/icu-small/source/common/uniset_props.cpp @@ -802,7 +802,10 @@ static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) { // Property set API //---------------------------------------------------------------- -#define FAIL(ec) {ec=U_ILLEGAL_ARGUMENT_ERROR; return *this;} +#define FAIL(ec) UPRV_BLOCK_MACRO_BEGIN { \ + ec=U_ILLEGAL_ARGUMENT_ERROR; \ + return *this; \ +} UPRV_BLOCK_MACRO_END UnicodeSet& UnicodeSet::applyIntPropertyValue(UProperty prop, int32_t value, UErrorCode& ec) { diff --git a/deps/icu-small/source/common/unisetspan.cpp b/deps/icu-small/source/common/unisetspan.cpp index 0a8893472f..68e44d91ee 100644 --- a/deps/icu-small/source/common/unisetspan.cpp +++ b/deps/icu-small/source/common/unisetspan.cpp @@ -400,7 +400,7 @@ UnicodeSetStringSpan::UnicodeSetStringSpan(const UnicodeSetStringSpan &otherStri if(otherStringSpan.pSpanNotSet==&otherStringSpan.spanSet) { pSpanNotSet=&spanSet; } else { - pSpanNotSet=(UnicodeSet *)otherStringSpan.pSpanNotSet->clone(); + pSpanNotSet=otherStringSpan.pSpanNotSet->clone(); } // Allocate a block of meta data. @@ -436,7 +436,7 @@ void UnicodeSetStringSpan::addToSpanNotSet(UChar32 c) { if(spanSet.contains(c)) { return; // Nothing to do. } - UnicodeSet *newSet=(UnicodeSet *)spanSet.cloneAsThawed(); + UnicodeSet *newSet=spanSet.cloneAsThawed(); if(newSet==NULL) { return; // Out of memory. } else { diff --git a/deps/icu-small/source/common/unistr.cpp b/deps/icu-small/source/common/unistr.cpp index 31b0ed84be..eeb0c3a679 100644 --- a/deps/icu-small/source/common/unistr.cpp +++ b/deps/icu-small/source/common/unistr.cpp @@ -332,7 +332,7 @@ Replaceable::clone() const { } // UnicodeString overrides clone() with a real implementation -Replaceable * +UnicodeString * UnicodeString::clone() const { return new UnicodeString(*this); } diff --git a/deps/icu-small/source/common/uresbund.cpp b/deps/icu-small/source/common/uresbund.cpp index c9f2c860da..3a9b4340bb 100644 --- a/deps/icu-small/source/common/uresbund.cpp +++ b/deps/icu-small/source/common/uresbund.cpp @@ -31,6 +31,7 @@ #include "ucln_cmn.h" #include "cmemory.h" #include "cstring.h" +#include "mutex.h" #include "uhash.h" #include "unicode/uenum.h" #include "uenumimp.h" @@ -38,6 +39,7 @@ #include "umutex.h" #include "putilimp.h" #include "uassert.h" +#include "uresdata.h" using namespace icu; @@ -47,12 +49,9 @@ TODO: This cache should probably be removed when the deprecated code is completely removed. */ static UHashtable *cache = NULL; -static icu::UInitOnce gCacheInitOnce; +static icu::UInitOnce gCacheInitOnce = U_INITONCE_INITIALIZER; -static UMutex *resbMutex() { - static UMutex m = U_MUTEX_INITIALIZER; - return &m; -} +static UMutex resbMutex; /* INTERNAL: hashes an entry */ static int32_t U_CALLCONV hashEntry(const UHashTok parm) { @@ -96,13 +95,12 @@ static UBool chopLocale(char *name) { * Internal function */ static void entryIncrease(UResourceDataEntry *entry) { - umtx_lock(resbMutex()); + Mutex lock(&resbMutex); entry->fCountExisting++; while(entry->fParent != NULL) { entry = entry->fParent; entry->fCountExisting++; } - umtx_unlock(resbMutex()); } /** @@ -184,9 +182,8 @@ static int32_t ures_flushCache() /*if shared data hasn't even been lazy evaluated yet * return 0 */ - umtx_lock(resbMutex()); + Mutex lock(&resbMutex); if (cache == NULL) { - umtx_unlock(resbMutex()); return 0; } @@ -218,7 +215,6 @@ static int32_t ures_flushCache() * got decremented by free_entry(). */ } while(deletedMore); - umtx_unlock(resbMutex()); return rbDeletedNum; } @@ -232,9 +228,8 @@ U_CAPI UBool U_EXPORT2 ures_dumpCacheContents(void) { const UHashElement *e; UResourceDataEntry *resB; - umtx_lock(resbMutex()); + Mutex lock(&resbMutex); if (cache == NULL) { - umtx_unlock(resbMutex()); fprintf(stderr,"%s:%d: RB Cache is NULL.\n", __FILE__, __LINE__); return FALSE; } @@ -253,9 +248,6 @@ U_CAPI UBool U_EXPORT2 ures_dumpCacheContents(void) { } fprintf(stderr,"%s:%d: RB Cache still contains %d items.\n", __FILE__, __LINE__, uhash_count(cache)); - - umtx_unlock(resbMutex()); - return cacheNotEmpty; } @@ -401,7 +393,8 @@ static UResourceDataEntry *init_entry(const char *localeID, const char *path, UE /* We'll try to get alias string from the bundle */ aliasres = res_getResource(&(r->fData), "%%ALIAS"); if (aliasres != RES_BOGUS) { - const UChar *alias = res_getString(&(r->fData), aliasres, &aliasLen); + // No tracing: called during initial data loading + const UChar *alias = res_getStringNoTrace(&(r->fData), aliasres, &aliasLen); if(alias != NULL && aliasLen > 0) { /* if there is actual alias - unload and load new data */ u_UCharsToChars(alias, aliasName, aliasLen+1); r->fAlias = init_entry(aliasName, path, status); @@ -542,7 +535,8 @@ loadParentsExceptRoot(UResourceDataEntry *&t1, Resource parentRes = res_getResource(&t1->fData, "%%Parent"); if (parentRes != RES_BOGUS) { // An explicit parent was found. int32_t parentLocaleLen = 0; - const UChar *parentLocaleName = res_getString(&(t1->fData), parentRes, &parentLocaleLen); + // No tracing: called during initial data loading + const UChar *parentLocaleName = res_getStringNoTrace(&(t1->fData), parentRes, &parentLocaleLen); if(parentLocaleName != NULL && 0 < parentLocaleLen && parentLocaleLen < nameCapacity) { u_UCharsToChars(parentLocaleName, name, parentLocaleLen + 1); if (uprv_strcmp(name, kRootLocaleName) == 0) { @@ -666,107 +660,105 @@ static UResourceDataEntry *entryOpen(const char* path, const char* localeID, } } - umtx_lock(resbMutex()); - { /* umtx_lock */ - /* We're going to skip all the locales that do not have any data */ - r = findFirstExisting(path, name, &isRoot, &hasChopped, &isDefault, &intStatus); + Mutex lock(&resbMutex); // Lock resbMutex until the end of this function. + + /* We're going to skip all the locales that do not have any data */ + r = findFirstExisting(path, name, &isRoot, &hasChopped, &isDefault, &intStatus); + + // If we failed due to out-of-memory, report the failure and exit early. + if (intStatus == U_MEMORY_ALLOCATION_ERROR) { + *status = intStatus; + goto finish; + } + + if(r != NULL) { /* if there is one real locale, we can look for parents. */ + t1 = r; + hasRealData = TRUE; + if ( usingUSRData ) { /* This code inserts user override data into the inheritance chain */ + UErrorCode usrStatus = U_ZERO_ERROR; + UResourceDataEntry *u1 = init_entry(t1->fName, usrDataPath, &usrStatus); + // If we failed due to out-of-memory, report the failure and exit early. + if (intStatus == U_MEMORY_ALLOCATION_ERROR) { + *status = intStatus; + goto finish; + } + if ( u1 != NULL ) { + if(u1->fBogus == U_ZERO_ERROR) { + u1->fParent = t1; + r = u1; + } else { + /* the USR override data wasn't found, set it to be deleted */ + u1->fCountExisting = 0; + } + } + } + if (hasChopped && !isRoot) { + if (!loadParentsExceptRoot(t1, name, UPRV_LENGTHOF(name), usingUSRData, usrDataPath, status)) { + goto finish; + } + } + } + /* we could have reached this point without having any real data */ + /* if that is the case, we need to chain in the default locale */ + if(r==NULL && openType == URES_OPEN_LOCALE_DEFAULT_ROOT && !isDefault && !isRoot) { + /* insert default locale */ + uprv_strcpy(name, uloc_getDefault()); + r = findFirstExisting(path, name, &isRoot, &hasChopped, &isDefault, &intStatus); // If we failed due to out-of-memory, report the failure and exit early. if (intStatus == U_MEMORY_ALLOCATION_ERROR) { *status = intStatus; - goto finishUnlock; + goto finish; } - - if(r != NULL) { /* if there is one real locale, we can look for parents. */ + intStatus = U_USING_DEFAULT_WARNING; + if(r != NULL) { /* the default locale exists */ t1 = r; hasRealData = TRUE; - if ( usingUSRData ) { /* This code inserts user override data into the inheritance chain */ - UErrorCode usrStatus = U_ZERO_ERROR; - UResourceDataEntry *u1 = init_entry(t1->fName, usrDataPath, &usrStatus); - // If we failed due to out-of-memory, report the failure and exit early. - if (intStatus == U_MEMORY_ALLOCATION_ERROR) { - *status = intStatus; - goto finishUnlock; - } - if ( u1 != NULL ) { - if(u1->fBogus == U_ZERO_ERROR) { - u1->fParent = t1; - r = u1; - } else { - /* the USR override data wasn't found, set it to be deleted */ - u1->fCountExisting = 0; - } - } - } + isDefault = TRUE; + // TODO: Why not if (usingUSRData) { ... } like in the non-default-locale code path? if (hasChopped && !isRoot) { if (!loadParentsExceptRoot(t1, name, UPRV_LENGTHOF(name), usingUSRData, usrDataPath, status)) { - goto finishUnlock; + goto finish; } } } + } - /* we could have reached this point without having any real data */ - /* if that is the case, we need to chain in the default locale */ - if(r==NULL && openType == URES_OPEN_LOCALE_DEFAULT_ROOT && !isDefault && !isRoot) { - /* insert default locale */ - uprv_strcpy(name, uloc_getDefault()); - r = findFirstExisting(path, name, &isRoot, &hasChopped, &isDefault, &intStatus); - // If we failed due to out-of-memory, report the failure and exit early. - if (intStatus == U_MEMORY_ALLOCATION_ERROR) { - *status = intStatus; - goto finishUnlock; - } + /* we could still have r == NULL at this point - maybe even default locale is not */ + /* present */ + if(r == NULL) { + uprv_strcpy(name, kRootLocaleName); + r = findFirstExisting(path, name, &isRoot, &hasChopped, &isDefault, &intStatus); + // If we failed due to out-of-memory, report the failure and exit early. + if (intStatus == U_MEMORY_ALLOCATION_ERROR) { + *status = intStatus; + goto finish; + } + if(r != NULL) { + t1 = r; intStatus = U_USING_DEFAULT_WARNING; - if(r != NULL) { /* the default locale exists */ - t1 = r; - hasRealData = TRUE; - isDefault = TRUE; - // TODO: Why not if (usingUSRData) { ... } like in the non-default-locale code path? - if (hasChopped && !isRoot) { - if (!loadParentsExceptRoot(t1, name, UPRV_LENGTHOF(name), usingUSRData, usrDataPath, status)) { - goto finishUnlock; - } - } - } + hasRealData = TRUE; + } else { /* we don't even have the root locale */ + *status = U_MISSING_RESOURCE_ERROR; + goto finish; } - - /* we could still have r == NULL at this point - maybe even default locale is not */ - /* present */ - if(r == NULL) { - uprv_strcpy(name, kRootLocaleName); - r = findFirstExisting(path, name, &isRoot, &hasChopped, &isDefault, &intStatus); - // If we failed due to out-of-memory, report the failure and exit early. - if (intStatus == U_MEMORY_ALLOCATION_ERROR) { - *status = intStatus; - goto finishUnlock; - } - if(r != NULL) { - t1 = r; - intStatus = U_USING_DEFAULT_WARNING; - hasRealData = TRUE; - } else { /* we don't even have the root locale */ - *status = U_MISSING_RESOURCE_ERROR; - goto finishUnlock; - } - } else if(!isRoot && uprv_strcmp(t1->fName, kRootLocaleName) != 0 && - t1->fParent == NULL && !r->fData.noFallback) { - if (!insertRootBundle(t1, status)) { - goto finishUnlock; - } - if(!hasRealData) { - r->fBogus = U_USING_DEFAULT_WARNING; - } + } else if(!isRoot && uprv_strcmp(t1->fName, kRootLocaleName) != 0 && + t1->fParent == NULL && !r->fData.noFallback) { + if (!insertRootBundle(t1, status)) { + goto finish; } - - // TODO: Does this ever loop? - while(r != NULL && !isRoot && t1->fParent != NULL) { - t1->fParent->fCountExisting++; - t1 = t1->fParent; + if(!hasRealData) { + r->fBogus = U_USING_DEFAULT_WARNING; } - } /* umtx_lock */ -finishUnlock: - umtx_unlock(resbMutex()); + } + // TODO: Does this ever loop? + while(r != NULL && !isRoot && t1->fParent != NULL) { + t1->fParent->fCountExisting++; + t1 = t1->fParent; + } + +finish: if(U_SUCCESS(*status)) { if(intStatus != U_ZERO_ERROR) { *status = intStatus; @@ -790,7 +782,7 @@ entryOpenDirect(const char* path, const char* localeID, UErrorCode* status) { return NULL; } - umtx_lock(resbMutex()); + Mutex lock(&resbMutex); // findFirstExisting() without fallbacks. UResourceDataEntry *r = init_entry(localeID, path, status); if(U_SUCCESS(*status)) { @@ -828,7 +820,6 @@ entryOpenDirect(const char* path, const char* localeID, UErrorCode* status) { t1 = t1->fParent; } } - umtx_unlock(resbMutex()); return r; } @@ -871,9 +862,8 @@ static void entryCloseInt(UResourceDataEntry *resB) { */ static void entryClose(UResourceDataEntry *resB) { - umtx_lock(resbMutex()); + Mutex lock(&resbMutex); entryCloseInt(resB); - umtx_unlock(resbMutex()); } /* @@ -1177,6 +1167,7 @@ static UResourceBundle *init_resb_result(const ResourceData *rdata, Resource r, if(mainRes != result) { ures_close(mainRes); } + ResourceTracer(resB).maybeTrace("getalias"); return result; } } else { @@ -1256,6 +1247,7 @@ static UResourceBundle *init_resb_result(const ResourceData *rdata, Resource r, /*resB->fParent = parent->fRes;*/ uprv_memmove(&resB->fResData, rdata, sizeof(ResourceData)); resB->fSize = res_countArrayItems(&(resB->fResData), resB->fRes); + ResourceTracer(resB).trace("get"); return resB; } @@ -1304,7 +1296,7 @@ U_CAPI const UChar* U_EXPORT2 ures_getString(const UResourceBundle* resB, int32_ *status = U_ILLEGAL_ARGUMENT_ERROR; return NULL; } - s = res_getString(&(resB->fResData), resB->fRes, len); + s = res_getString({resB}, &(resB->fResData), resB->fRes, len); if (s == NULL) { *status = U_RESOURCE_TYPE_MISMATCH; } @@ -1393,7 +1385,7 @@ U_CAPI const uint8_t* U_EXPORT2 ures_getBinary(const UResourceBundle* resB, int3 *status = U_ILLEGAL_ARGUMENT_ERROR; return NULL; } - p = res_getBinary(&(resB->fResData), resB->fRes, len); + p = res_getBinary({resB}, &(resB->fResData), resB->fRes, len); if (p == NULL) { *status = U_RESOURCE_TYPE_MISMATCH; } @@ -1410,7 +1402,7 @@ U_CAPI const int32_t* U_EXPORT2 ures_getIntVector(const UResourceBundle* resB, i *status = U_ILLEGAL_ARGUMENT_ERROR; return NULL; } - p = res_getIntVector(&(resB->fResData), resB->fRes, len); + p = res_getIntVector({resB}, &(resB->fResData), resB->fRes, len); if (p == NULL) { *status = U_RESOURCE_TYPE_MISMATCH; } @@ -1431,7 +1423,7 @@ U_CAPI int32_t U_EXPORT2 ures_getInt(const UResourceBundle* resB, UErrorCode *st *status = U_RESOURCE_TYPE_MISMATCH; return 0xffffffff; } - return RES_GET_INT(resB->fRes); + return res_getInt({resB}, resB->fRes); } U_CAPI uint32_t U_EXPORT2 ures_getUInt(const UResourceBundle* resB, UErrorCode *status) { @@ -1446,7 +1438,7 @@ U_CAPI uint32_t U_EXPORT2 ures_getUInt(const UResourceBundle* resB, UErrorCode * *status = U_RESOURCE_TYPE_MISMATCH; return 0xffffffff; } - return RES_GET_UINT(resB->fRes); + return res_getUInt({resB}, resB->fRes); } U_CAPI UResType U_EXPORT2 ures_getType(const UResourceBundle *resB) { @@ -1457,10 +1449,18 @@ U_CAPI UResType U_EXPORT2 ures_getType(const UResourceBundle *resB) { } U_CAPI const char * U_EXPORT2 ures_getKey(const UResourceBundle *resB) { + // + // TODO: Trace ures_getKey? I guess not usually. + // + // We usually get the key string to decide whether we want the value, or to + // make a key-value pair. Tracing the value should suffice. + // + // However, I believe we have some data (e.g., in res_index) where the key + // strings are the data. Tracing the enclosing table should suffice. + // if(resB == NULL) { return NULL; } - return(resB->fKey); } @@ -1480,7 +1480,7 @@ static const UChar* ures_getStringWithAlias(const UResourceBundle *resB, Resourc ures_close(tempRes); return result; } else { - return res_getString(&(resB->fResData), r, len); + return res_getString({resB, sIndex}, &(resB->fResData), r, len); } } @@ -1516,7 +1516,7 @@ U_CAPI const UChar* U_EXPORT2 ures_getNextString(UResourceBundle *resB, int32_t* switch(RES_GET_TYPE(resB->fRes)) { case URES_STRING: case URES_STRING_V2: - return res_getString(&(resB->fResData), resB->fRes, len); + return res_getString({resB}, &(resB->fResData), resB->fRes, len); case URES_TABLE: case URES_TABLE16: case URES_TABLE32: @@ -1661,7 +1661,7 @@ U_CAPI const UChar* U_EXPORT2 ures_getStringByIndex(const UResourceBundle *resB, switch(RES_GET_TYPE(resB->fRes)) { case URES_STRING: case URES_STRING_V2: - return res_getString(&(resB->fResData), resB->fRes, len); + return res_getString({resB}, &(resB->fResData), resB->fRes, len); case URES_TABLE: case URES_TABLE16: case URES_TABLE32: @@ -1953,10 +1953,10 @@ void getAllItemsWithFallback( // When the sink sees the no-fallback/no-inheritance marker, // then it would remove the parent's item. // We would deserialize parent values even though they are overridden in a child bundle. - value.pResData = &bundle->fResData; + value.setData(&bundle->fResData); UResourceDataEntry *parentEntry = bundle->fData->fParent; UBool hasParent = parentEntry != NULL && U_SUCCESS(parentEntry->fBogus); - value.setResource(bundle->fRes); + value.setResource(bundle->fRes, ResourceTracer(bundle)); sink.put(bundle->fKey, value, !hasParent, errorCode); if (hasParent) { // We might try to query the sink whether @@ -2001,31 +2001,60 @@ void getAllItemsWithFallback( } // namespace +// Requires a ResourceDataValue fill-in, so that we need not cast from a ResourceValue. +// Unfortunately, the caller must know which subclass to make and pass in. +// Alternatively, we could make it as polymorphic as in Java by +// returning a ResourceValue pointer (possibly wrapped into a LocalPointer) +// that the caller then owns. +// +// Also requires a UResourceBundle fill-in, so that the value's ResourceTracer +// can point to a non-local bundle. +// Without tracing, the child bundle could be a function-local object. +U_CAPI void U_EXPORT2 +ures_getValueWithFallback(const UResourceBundle *bundle, const char *path, + UResourceBundle *tempFillIn, + ResourceDataValue &value, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return; } + if (path == nullptr) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + const UResourceBundle *rb; + if (*path == 0) { + // empty path + rb = bundle; + } else { + rb = ures_getByKeyWithFallback(bundle, path, tempFillIn, &errorCode); + if (U_FAILURE(errorCode)) { + return; + } + } + value.setData(&rb->fResData); + value.setResource(rb->fRes, ResourceTracer(rb)); +} + U_CAPI void U_EXPORT2 ures_getAllItemsWithFallback(const UResourceBundle *bundle, const char *path, icu::ResourceSink &sink, UErrorCode &errorCode) { if (U_FAILURE(errorCode)) { return; } - if (path == NULL) { + if (path == nullptr) { errorCode = U_ILLEGAL_ARGUMENT_ERROR; return; } - UResourceBundle stackBundle; - ures_initStackObject(&stackBundle); + StackUResourceBundle stackBundle; const UResourceBundle *rb; if (*path == 0) { // empty path rb = bundle; } else { - rb = ures_getByKeyWithFallback(bundle, path, &stackBundle, &errorCode); + rb = ures_getByKeyWithFallback(bundle, path, stackBundle.getAlias(), &errorCode); if (U_FAILURE(errorCode)) { - ures_close(&stackBundle); return; } } // Get all table items with fallback. ResourceDataValue value; getAllItemsWithFallback(rb, value, sink, errorCode); - ures_close(&stackBundle); } U_CAPI UResourceBundle* U_EXPORT2 ures_getByKey(const UResourceBundle *resB, const char* inKey, UResourceBundle *fillIn, UErrorCode *status) { @@ -2108,7 +2137,7 @@ U_CAPI const UChar* U_EXPORT2 ures_getStringByKey(const UResourceBundle *resB, c switch (RES_GET_TYPE(res)) { case URES_STRING: case URES_STRING_V2: - return res_getString(rd, res, len); + return res_getString({resB, key}, rd, res, len); case URES_ALIAS: { const UChar* result = 0; @@ -2130,7 +2159,7 @@ U_CAPI const UChar* U_EXPORT2 ures_getStringByKey(const UResourceBundle *resB, c switch (RES_GET_TYPE(res)) { case URES_STRING: case URES_STRING_V2: - return res_getString(&(resB->fResData), res, len); + return res_getString({resB, key}, &(resB->fResData), res, len); case URES_ALIAS: { const UChar* result = 0; @@ -2151,6 +2180,7 @@ U_CAPI const UChar* U_EXPORT2 ures_getStringByKey(const UResourceBundle *resB, c /* here should go a first attempt to locate the key using index table */ const ResourceData *rd = getFallbackData(resB, &key, &realData, &res, status); if(U_SUCCESS(*status)) { + // TODO: Tracing return res_getString(rd, res, len); } else { *status = U_MISSING_RESOURCE_ERROR; @@ -2296,6 +2326,8 @@ ures_openWithType(UResourceBundle *r, const char* path, const char* localeID, r->fSize = res_countArrayItems(&(r->fResData), r->fRes); r->fIndex = -1; + ResourceTracer(r).traceOpen(); + return r; } diff --git a/deps/icu-small/source/common/uresdata.cpp b/deps/icu-small/source/common/uresdata.cpp index a0b8d3ba90..1bb938be62 100644 --- a/deps/icu-small/source/common/uresdata.cpp +++ b/deps/icu-small/source/common/uresdata.cpp @@ -33,6 +33,7 @@ #include "uinvchar.h" #include "uresdata.h" #include "uresimp.h" +#include "utracimp.h" /* * Resource access helpers @@ -307,7 +308,7 @@ res_getPublicType(Resource res) { } U_CAPI const UChar * U_EXPORT2 -res_getString(const ResourceData *pResData, Resource res, int32_t *pLength) { +res_getStringNoTrace(const ResourceData *pResData, Resource res, int32_t *pLength) { const UChar *p; uint32_t offset=RES_GET_OFFSET(res); int32_t length; @@ -402,7 +403,8 @@ int32_t getStringArray(const ResourceData *pResData, const icu::ResourceArray &a } for(int32_t i = 0; i < length; ++i) { int32_t sLength; - const UChar *s = res_getString(pResData, array.internalGetResource(pResData, i), &sLength); + // No tracing: handled by the caller + const UChar *s = res_getStringNoTrace(pResData, array.internalGetResource(pResData, i), &sLength); if(s == NULL) { errorCode = U_RESOURCE_TYPE_MISMATCH; return 0; @@ -434,7 +436,7 @@ res_getAlias(const ResourceData *pResData, Resource res, int32_t *pLength) { } U_CAPI const uint8_t * U_EXPORT2 -res_getBinary(const ResourceData *pResData, Resource res, int32_t *pLength) { +res_getBinaryNoTrace(const ResourceData *pResData, Resource res, int32_t *pLength) { const uint8_t *p; uint32_t offset=RES_GET_OFFSET(res); int32_t length; @@ -454,7 +456,7 @@ res_getBinary(const ResourceData *pResData, Resource res, int32_t *pLength) { U_CAPI const int32_t * U_EXPORT2 -res_getIntVector(const ResourceData *pResData, Resource res, int32_t *pLength) { +res_getIntVectorNoTrace(const ResourceData *pResData, Resource res, int32_t *pLength) { const int32_t *p; uint32_t offset=RES_GET_OFFSET(res); int32_t length; @@ -507,7 +509,7 @@ const UChar *ResourceDataValue::getString(int32_t &length, UErrorCode &errorCode if(U_FAILURE(errorCode)) { return NULL; } - const UChar *s = res_getString(pResData, res, &length); + const UChar *s = res_getString(fTraceInfo, &getData(), res, &length); if(s == NULL) { errorCode = U_RESOURCE_TYPE_MISMATCH; } @@ -518,7 +520,7 @@ const UChar *ResourceDataValue::getAliasString(int32_t &length, UErrorCode &erro if(U_FAILURE(errorCode)) { return NULL; } - const UChar *s = res_getAlias(pResData, res, &length); + const UChar *s = res_getAlias(&getData(), res, &length); if(s == NULL) { errorCode = U_RESOURCE_TYPE_MISMATCH; } @@ -532,7 +534,7 @@ int32_t ResourceDataValue::getInt(UErrorCode &errorCode) const { if(RES_GET_TYPE(res) != URES_INT) { errorCode = U_RESOURCE_TYPE_MISMATCH; } - return RES_GET_INT(res); + return res_getInt(fTraceInfo, res); } uint32_t ResourceDataValue::getUInt(UErrorCode &errorCode) const { @@ -542,14 +544,14 @@ uint32_t ResourceDataValue::getUInt(UErrorCode &errorCode) const { if(RES_GET_TYPE(res) != URES_INT) { errorCode = U_RESOURCE_TYPE_MISMATCH; } - return RES_GET_UINT(res); + return res_getUInt(fTraceInfo, res); } const int32_t *ResourceDataValue::getIntVector(int32_t &length, UErrorCode &errorCode) const { if(U_FAILURE(errorCode)) { return NULL; } - const int32_t *iv = res_getIntVector(pResData, res, &length); + const int32_t *iv = res_getIntVector(fTraceInfo, &getData(), res, &length); if(iv == NULL) { errorCode = U_RESOURCE_TYPE_MISMATCH; } @@ -560,7 +562,7 @@ const uint8_t *ResourceDataValue::getBinary(int32_t &length, UErrorCode &errorCo if(U_FAILURE(errorCode)) { return NULL; } - const uint8_t *b = res_getBinary(pResData, res, &length); + const uint8_t *b = res_getBinary(fTraceInfo, &getData(), res, &length); if(b == NULL) { errorCode = U_RESOURCE_TYPE_MISMATCH; } @@ -578,19 +580,19 @@ ResourceArray ResourceDataValue::getArray(UErrorCode &errorCode) const { switch(RES_GET_TYPE(res)) { case URES_ARRAY: if (offset!=0) { // empty if offset==0 - items32 = (const Resource *)pResData->pRoot+offset; + items32 = (const Resource *)getData().pRoot+offset; length = *items32++; } break; case URES_ARRAY16: - items16 = pResData->p16BitUnits+offset; + items16 = getData().p16BitUnits+offset; length = *items16++; break; default: errorCode = U_RESOURCE_TYPE_MISMATCH; return ResourceArray(); } - return ResourceArray(items16, items32, length); + return ResourceArray(items16, items32, length, fTraceInfo); } ResourceTable ResourceDataValue::getTable(UErrorCode &errorCode) const { @@ -606,19 +608,19 @@ ResourceTable ResourceDataValue::getTable(UErrorCode &errorCode) const { switch(RES_GET_TYPE(res)) { case URES_TABLE: if (offset != 0) { // empty if offset==0 - keys16 = (const uint16_t *)(pResData->pRoot+offset); + keys16 = (const uint16_t *)(getData().pRoot+offset); length = *keys16++; items32 = (const Resource *)(keys16+length+(~length&1)); } break; case URES_TABLE16: - keys16 = pResData->p16BitUnits+offset; + keys16 = getData().p16BitUnits+offset; length = *keys16++; items16 = keys16 + length; break; case URES_TABLE32: if (offset != 0) { // empty if offset==0 - keys32 = pResData->pRoot+offset; + keys32 = getData().pRoot+offset; length = *keys32++; items32 = (const Resource *)keys32 + length; } @@ -627,22 +629,22 @@ ResourceTable ResourceDataValue::getTable(UErrorCode &errorCode) const { errorCode = U_RESOURCE_TYPE_MISMATCH; return ResourceTable(); } - return ResourceTable(keys16, keys32, items16, items32, length); + return ResourceTable(keys16, keys32, items16, items32, length, fTraceInfo); } UBool ResourceDataValue::isNoInheritanceMarker() const { - return ::isNoInheritanceMarker(pResData, res); + return ::isNoInheritanceMarker(&getData(), res); } int32_t ResourceDataValue::getStringArray(UnicodeString *dest, int32_t capacity, UErrorCode &errorCode) const { - return ::getStringArray(pResData, getArray(errorCode), dest, capacity, errorCode); + return ::getStringArray(&getData(), getArray(errorCode), dest, capacity, errorCode); } int32_t ResourceDataValue::getStringArrayOrStringAsArray(UnicodeString *dest, int32_t capacity, UErrorCode &errorCode) const { if(URES_IS_ARRAY(res)) { - return ::getStringArray(pResData, getArray(errorCode), dest, capacity, errorCode); + return ::getStringArray(&getData(), getArray(errorCode), dest, capacity, errorCode); } if(U_FAILURE(errorCode)) { return 0; @@ -656,7 +658,7 @@ int32_t ResourceDataValue::getStringArrayOrStringAsArray(UnicodeString *dest, in return 1; } int32_t sLength; - const UChar *s = res_getString(pResData, res, &sLength); + const UChar *s = res_getString(fTraceInfo, &getData(), res, &sLength); if(s != NULL) { dest[0].setTo(TRUE, s, sLength); return 1; @@ -671,7 +673,7 @@ UnicodeString ResourceDataValue::getStringOrFirstOfArray(UErrorCode &errorCode) return us; } int32_t sLength; - const UChar *s = res_getString(pResData, res, &sLength); + const UChar *s = res_getString(fTraceInfo, &getData(), res, &sLength); if(s != NULL) { us.setTo(TRUE, s, sLength); return us; @@ -681,7 +683,8 @@ UnicodeString ResourceDataValue::getStringOrFirstOfArray(UErrorCode &errorCode) return us; } if(array.getSize() > 0) { - s = res_getString(pResData, array.internalGetResource(pResData, 0), &sLength); + // Tracing is already performed above (unimportant for trace that this is an array) + s = res_getStringNoTrace(&getData(), array.internalGetResource(&getData(), 0), &sLength); if(s != NULL) { us.setTo(TRUE, s, sLength); return us; @@ -818,18 +821,45 @@ UBool icu::ResourceTable::getKeyAndValue(int32_t i, const char *&key, icu::ResourceValue &value) const { if(0 <= i && i < length) { icu::ResourceDataValue &rdValue = static_cast<icu::ResourceDataValue &>(value); - if (keys16 != NULL) { - key = RES_GET_KEY16(rdValue.pResData, keys16[i]); + if (keys16 != nullptr) { + key = RES_GET_KEY16(&rdValue.getData(), keys16[i]); } else { - key = RES_GET_KEY32(rdValue.pResData, keys32[i]); + key = RES_GET_KEY32(&rdValue.getData(), keys32[i]); } Resource res; - if (items16 != NULL) { - res = makeResourceFrom16(rdValue.pResData, items16[i]); + if (items16 != nullptr) { + res = makeResourceFrom16(&rdValue.getData(), items16[i]); } else { res = items32[i]; } - rdValue.setResource(res); + // Note: the ResourceTracer keeps a reference to the field of this + // ResourceTable. This is OK because the ResourceTable should remain + // alive for the duration that fields are being read from it + // (including nested fields). + rdValue.setResource(res, ResourceTracer(fTraceInfo, key)); + return TRUE; + } + return FALSE; +} + +UBool icu::ResourceTable::findValue(const char *key, ResourceValue &value) const { + icu::ResourceDataValue &rdValue = static_cast<icu::ResourceDataValue &>(value); + const char *realKey = nullptr; + int32_t i; + if (keys16 != nullptr) { + i = _res_findTableItem(&rdValue.getData(), keys16, length, key, &realKey); + } else { + i = _res_findTable32Item(&rdValue.getData(), keys32, length, key, &realKey); + } + if (i >= 0) { + Resource res; + if (items16 != nullptr) { + res = makeResourceFrom16(&rdValue.getData(), items16[i]); + } else { + res = items32[i]; + } + // Same note about lifetime as in getKeyAndValue(). + rdValue.setResource(res, ResourceTracer(fTraceInfo, key)); return TRUE; } return FALSE; @@ -875,7 +905,13 @@ uint32_t icu::ResourceArray::internalGetResource(const ResourceData *pResData, i UBool icu::ResourceArray::getValue(int32_t i, icu::ResourceValue &value) const { if(0 <= i && i < length) { icu::ResourceDataValue &rdValue = static_cast<icu::ResourceDataValue &>(value); - rdValue.setResource(internalGetResource(rdValue.pResData, i)); + // Note: the ResourceTracer keeps a reference to the field of this + // ResourceArray. This is OK because the ResourceArray should remain + // alive for the duration that fields are being read from it + // (including nested fields). + rdValue.setResource( + internalGetResource(&rdValue.getData(), i), + ResourceTracer(fTraceInfo, i)); return TRUE; } return FALSE; diff --git a/deps/icu-small/source/common/uresdata.h b/deps/icu-small/source/common/uresdata.h index 4e28ddccf6..d1b67babf2 100644 --- a/deps/icu-small/source/common/uresdata.h +++ b/deps/icu-small/source/common/uresdata.h @@ -69,14 +69,16 @@ typedef uint32_t Resource; #define RES_GET_OFFSET(res) ((res)&0x0fffffff) #define RES_GET_POINTER(pRoot, res) ((pRoot)+RES_GET_OFFSET(res)) -/* get signed and unsigned integer values directly from the Resource handle */ +/* get signed and unsigned integer values directly from the Resource handle + * NOTE: For proper logging, please use the res_getInt() constexpr + */ #if U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC -# define RES_GET_INT(res) (((int32_t)((res)<<4L))>>4L) +# define RES_GET_INT_NO_TRACE(res) (((int32_t)((res)<<4L))>>4L) #else -# define RES_GET_INT(res) (int32_t)(((res)&0x08000000) ? (res)|0xf0000000 : (res)&0x07ffffff) +# define RES_GET_INT_NO_TRACE(res) (int32_t)(((res)&0x08000000) ? (res)|0xf0000000 : (res)&0x07ffffff) #endif -#define RES_GET_UINT(res) ((res)&0x0fffffff) +#define RES_GET_UINT_NO_TRACE(res) ((res)&0x0fffffff) #define URES_IS_ARRAY(type) ((int32_t)(type)==URES_ARRAY || (int32_t)(type)==URES_ARRAY16) #define URES_IS_TABLE(type) ((int32_t)(type)==URES_TABLE || (int32_t)(type)==URES_TABLE16 || (int32_t)(type)==URES_TABLE32) @@ -423,22 +425,26 @@ res_unload(ResourceData *pResData); U_INTERNAL UResType U_EXPORT2 res_getPublicType(Resource res); +/////////////////////////////////////////////////////////////////////////// +// To enable tracing, use the inline versions of the res_get* functions. // +/////////////////////////////////////////////////////////////////////////// + /* * Return a pointer to a zero-terminated, const UChar* string * and set its length in *pLength. * Returns NULL if not found. */ U_INTERNAL const UChar * U_EXPORT2 -res_getString(const ResourceData *pResData, Resource res, int32_t *pLength); - -U_INTERNAL const UChar * U_EXPORT2 -res_getAlias(const ResourceData *pResData, Resource res, int32_t *pLength); +res_getStringNoTrace(const ResourceData *pResData, Resource res, int32_t *pLength); U_INTERNAL const uint8_t * U_EXPORT2 -res_getBinary(const ResourceData *pResData, Resource res, int32_t *pLength); +res_getBinaryNoTrace(const ResourceData *pResData, Resource res, int32_t *pLength); U_INTERNAL const int32_t * U_EXPORT2 -res_getIntVector(const ResourceData *pResData, Resource res, int32_t *pLength); +res_getIntVectorNoTrace(const ResourceData *pResData, Resource res, int32_t *pLength); + +U_INTERNAL const UChar * U_EXPORT2 +res_getAlias(const ResourceData *pResData, Resource res, int32_t *pLength); U_INTERNAL Resource U_EXPORT2 res_getResource(const ResourceData *pResData, const char *key); @@ -470,17 +476,55 @@ U_CFUNC Resource res_findResource(const ResourceData *pResData, Resource r, #ifdef __cplusplus #include "resource.h" +#include "restrace.h" U_NAMESPACE_BEGIN +inline const UChar* res_getString(const ResourceTracer& traceInfo, + const ResourceData *pResData, Resource res, int32_t *pLength) { + traceInfo.trace("string"); + return res_getStringNoTrace(pResData, res, pLength); +} + +inline const uint8_t* res_getBinary(const ResourceTracer& traceInfo, + const ResourceData *pResData, Resource res, int32_t *pLength) { + traceInfo.trace("binary"); + return res_getBinaryNoTrace(pResData, res, pLength); +} + +inline const int32_t* res_getIntVector(const ResourceTracer& traceInfo, + const ResourceData *pResData, Resource res, int32_t *pLength) { + traceInfo.trace("intvector"); + return res_getIntVectorNoTrace(pResData, res, pLength); +} + +inline int32_t res_getInt(const ResourceTracer& traceInfo, Resource res) { + traceInfo.trace("int"); + return RES_GET_INT_NO_TRACE(res); +} + +inline uint32_t res_getUInt(const ResourceTracer& traceInfo, Resource res) { + traceInfo.trace("uint"); + return RES_GET_UINT_NO_TRACE(res); +} + class ResourceDataValue : public ResourceValue { public: - ResourceDataValue() : pResData(NULL), res(static_cast<Resource>(URES_NONE)) {} + ResourceDataValue() : + res(static_cast<Resource>(URES_NONE)), + fTraceInfo() {} virtual ~ResourceDataValue(); - void setData(const ResourceData *data) { pResData = data; } - void setResource(Resource r) { res = r; } + void setData(const ResourceData *data) { + resData = *data; + } + void setResource(Resource r, ResourceTracer&& traceInfo) { + res = r; + fTraceInfo = traceInfo; + } + + const ResourceData &getData() const { return resData; } virtual UResType getType() const; virtual const UChar *getString(int32_t &length, UErrorCode &errorCode) const; virtual const UChar *getAliasString(int32_t &length, UErrorCode &errorCode) const; @@ -497,10 +541,12 @@ public: UErrorCode &errorCode) const; virtual UnicodeString getStringOrFirstOfArray(UErrorCode &errorCode) const; - const ResourceData *pResData; - private: + // TODO(ICU-20769): If UResourceBundle.fResData becomes a pointer, + // then remove this value field again and just store a pResData pointer. + ResourceData resData; Resource res; + ResourceTracer fTraceInfo; }; U_NAMESPACE_END diff --git a/deps/icu-small/source/common/uresimp.h b/deps/icu-small/source/common/uresimp.h index 16144012a5..2e477dfad3 100644 --- a/deps/icu-small/source/common/uresimp.h +++ b/deps/icu-small/source/common/uresimp.h @@ -67,6 +67,9 @@ struct UResourceBundle { char *fVersion; UResourceDataEntry *fTopLevelData; /* for getting the valid locale */ char *fResPath; /* full path to the resource: "zh_TW/CollationElements/Sequence" */ + // TODO(ICU-20769): Try to change the by-value fResData into a pointer, + // with the struct in only one place for each bundle. + // Also replace class ResourceDataValue.resData with a pResData pointer again. ResourceData fResData; char fResBuf[RES_BUFSIZE]; int32_t fResPathLen; @@ -282,6 +285,11 @@ ures_getStringByKeyWithFallback(const UResourceBundle *resB, #ifdef __cplusplus U_CAPI void U_EXPORT2 +ures_getValueWithFallback(const UResourceBundle *bundle, const char *path, + UResourceBundle *tempFillIn, + icu::ResourceDataValue &value, UErrorCode &errorCode); + +U_CAPI void U_EXPORT2 ures_getAllItemsWithFallback(const UResourceBundle *bundle, const char *path, icu::ResourceSink &sink, UErrorCode &errorCode); diff --git a/deps/icu-small/source/common/usprep.cpp b/deps/icu-small/source/common/usprep.cpp index 9155ae077b..f1c075a391 100644 --- a/deps/icu-small/source/common/usprep.cpp +++ b/deps/icu-small/source/common/usprep.cpp @@ -45,13 +45,9 @@ U_CDECL_BEGIN Static cache for already opened StringPrep profiles */ static UHashtable *SHARED_DATA_HASHTABLE = NULL; -static icu::UInitOnce gSharedDataInitOnce; - -static UMutex *usprepMutex() { - static UMutex m = U_MUTEX_INITIALIZER; - return &m; -} +static icu::UInitOnce gSharedDataInitOnce = U_INITONCE_INITIALIZER; +static UMutex usprepMutex; /* format version of spp file */ //static uint8_t formatVersion[4]={ 0, 0, 0, 0 }; @@ -151,9 +147,9 @@ usprep_internal_flushCache(UBool noRefCount){ * if shared data hasn't even been lazy evaluated yet * return 0 */ - umtx_lock(usprepMutex()); + umtx_lock(&usprepMutex); if (SHARED_DATA_HASHTABLE == NULL) { - umtx_unlock(usprepMutex()); + umtx_unlock(&usprepMutex); return 0; } @@ -184,7 +180,7 @@ usprep_internal_flushCache(UBool noRefCount){ } } - umtx_unlock(usprepMutex()); + umtx_unlock(&usprepMutex); return deletedNum; } @@ -262,7 +258,7 @@ loadData(UStringPrepProfile* profile, } /* in the mutex block, set the data for this process */ - umtx_lock(usprepMutex()); + umtx_lock(&usprepMutex); if(profile->sprepData==NULL) { profile->sprepData=dataMemory; dataMemory=NULL; @@ -271,7 +267,7 @@ loadData(UStringPrepProfile* profile, } else { p=(const int32_t *)udata_getMemory(profile->sprepData); } - umtx_unlock(usprepMutex()); + umtx_unlock(&usprepMutex); /* initialize some variables */ profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]); @@ -328,12 +324,12 @@ usprep_getProfile(const char* path, stackKey.path = (char*) path; /* fetch the data from the cache */ - umtx_lock(usprepMutex()); + umtx_lock(&usprepMutex); profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey)); if(profile != NULL) { profile->refCount++; } - umtx_unlock(usprepMutex()); + umtx_unlock(&usprepMutex); if(profile == NULL) { /* else load the data and put the data in the cache */ @@ -365,7 +361,7 @@ usprep_getProfile(const char* path, return NULL; } - umtx_lock(usprepMutex()); + umtx_lock(&usprepMutex); // If another thread already inserted the same key/value, refcount and cleanup our thread data profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey)); if(profile != NULL) { @@ -386,7 +382,7 @@ usprep_getProfile(const char* path, profile->refCount = 1; uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status); } - umtx_unlock(usprepMutex()); + umtx_unlock(&usprepMutex); } return profile; @@ -425,12 +421,12 @@ usprep_close(UStringPrepProfile* profile){ return; } - umtx_lock(usprepMutex()); + umtx_lock(&usprepMutex); /* decrement the ref count*/ if(profile->refCount > 0){ profile->refCount--; } - umtx_unlock(usprepMutex()); + umtx_unlock(&usprepMutex); } diff --git a/deps/icu-small/source/common/ustr_titlecase_brkiter.cpp b/deps/icu-small/source/common/ustr_titlecase_brkiter.cpp index 056b40eb41..457905eb60 100644 --- a/deps/icu-small/source/common/ustr_titlecase_brkiter.cpp +++ b/deps/icu-small/source/common/ustr_titlecase_brkiter.cpp @@ -45,7 +45,7 @@ public: WholeStringBreakIterator() : BreakIterator(), length(0) {} ~WholeStringBreakIterator() U_OVERRIDE; UBool operator==(const BreakIterator&) const U_OVERRIDE; - BreakIterator *clone() const U_OVERRIDE; + WholeStringBreakIterator *clone() const U_OVERRIDE; static UClassID U_EXPORT2 getStaticClassID(); UClassID getDynamicClassID() const U_OVERRIDE; CharacterIterator &getText() const U_OVERRIDE; @@ -62,9 +62,9 @@ public: int32_t preceding(int32_t offset) U_OVERRIDE; UBool isBoundary(int32_t offset) U_OVERRIDE; int32_t next(int32_t n) U_OVERRIDE; - BreakIterator *createBufferClone(void *stackBuffer, int32_t &BufferSize, - UErrorCode &errorCode) U_OVERRIDE; - BreakIterator &refreshInputText(UText *input, UErrorCode &errorCode) U_OVERRIDE; + WholeStringBreakIterator *createBufferClone(void *stackBuffer, int32_t &BufferSize, + UErrorCode &errorCode) U_OVERRIDE; + WholeStringBreakIterator &refreshInputText(UText *input, UErrorCode &errorCode) U_OVERRIDE; private: int32_t length; @@ -74,7 +74,7 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(WholeStringBreakIterator) WholeStringBreakIterator::~WholeStringBreakIterator() {} UBool WholeStringBreakIterator::operator==(const BreakIterator&) const { return FALSE; } -BreakIterator *WholeStringBreakIterator::clone() const { return nullptr; } +WholeStringBreakIterator *WholeStringBreakIterator::clone() const { return nullptr; } CharacterIterator &WholeStringBreakIterator::getText() const { UPRV_UNREACHABLE; // really should not be called @@ -113,14 +113,14 @@ int32_t WholeStringBreakIterator::preceding(int32_t /*offset*/) { return 0; } UBool WholeStringBreakIterator::isBoundary(int32_t /*offset*/) { return FALSE; } int32_t WholeStringBreakIterator::next(int32_t /*n*/) { return length; } -BreakIterator *WholeStringBreakIterator::createBufferClone( +WholeStringBreakIterator *WholeStringBreakIterator::createBufferClone( void * /*stackBuffer*/, int32_t & /*BufferSize*/, UErrorCode &errorCode) { if (U_SUCCESS(errorCode)) { errorCode = U_UNSUPPORTED_ERROR; } return nullptr; } -BreakIterator &WholeStringBreakIterator::refreshInputText( +WholeStringBreakIterator &WholeStringBreakIterator::refreshInputText( UText * /*input*/, UErrorCode &errorCode) { if (U_SUCCESS(errorCode)) { errorCode = U_UNSUPPORTED_ERROR; diff --git a/deps/icu-small/source/common/ustring.cpp b/deps/icu-small/source/common/ustring.cpp index a1a51f4b1e..6886c145d9 100644 --- a/deps/icu-small/source/common/ustring.cpp +++ b/deps/icu-small/source/common/ustring.cpp @@ -1428,7 +1428,7 @@ u_unescape(const char *src, UChar *dest, int32_t destCapacity) { * NUL-terminate a string no matter what its type. * Set warning and error codes accordingly. */ -#define __TERMINATE_STRING(dest, destCapacity, length, pErrorCode) \ +#define __TERMINATE_STRING(dest, destCapacity, length, pErrorCode) UPRV_BLOCK_MACRO_BEGIN { \ if(pErrorCode!=NULL && U_SUCCESS(*pErrorCode)) { \ /* not a public function, so no complete argument checking */ \ \ @@ -1448,7 +1448,8 @@ u_unescape(const char *src, UChar *dest, int32_t destCapacity) { /* even the string itself did not fit - set an error code */ \ *pErrorCode=U_BUFFER_OVERFLOW_ERROR; \ } \ - } + } \ +} UPRV_BLOCK_MACRO_END U_CAPI int32_t U_EXPORT2 u_terminateUChars(UChar *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) { @@ -1488,7 +1489,7 @@ u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCod the output range. [LIU] */ -#define STRING_HASH(TYPE, STR, STRLEN, DEREF) \ +#define STRING_HASH(TYPE, STR, STRLEN, DEREF) UPRV_BLOCK_MACRO_BEGIN { \ uint32_t hash = 0; \ const TYPE *p = (const TYPE*) STR; \ if (p != NULL) { \ @@ -1500,7 +1501,8 @@ u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCod p += inc; \ } \ } \ - return static_cast<int32_t>(hash) + return static_cast<int32_t>(hash); \ +} UPRV_BLOCK_MACRO_END /* Used by UnicodeString to compute its hashcode - Not public API. */ U_CAPI int32_t U_EXPORT2 diff --git a/deps/icu-small/source/common/utext.cpp b/deps/icu-small/source/common/utext.cpp index 5e3a005626..324341f1ba 100644 --- a/deps/icu-small/source/common/utext.cpp +++ b/deps/icu-small/source/common/utext.cpp @@ -567,7 +567,7 @@ enum { struct ExtendedUText { UText ut; - UAlignedMemory extension; + max_align_t extension; }; static const UText emptyText = UTEXT_INITIALIZER; @@ -582,7 +582,7 @@ utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status) { // We need to heap-allocate storage for the new UText int32_t spaceRequired = sizeof(UText); if (extraSpace > 0) { - spaceRequired = sizeof(ExtendedUText) + extraSpace - sizeof(UAlignedMemory); + spaceRequired = sizeof(ExtendedUText) + extraSpace - sizeof(max_align_t); } ut = (UText *)uprv_malloc(spaceRequired); if (ut == NULL) { diff --git a/deps/icu-small/source/common/util.cpp b/deps/icu-small/source/common/util.cpp index 838a201a73..56dd4f1bfa 100644 --- a/deps/icu-small/source/common/util.cpp +++ b/deps/icu-small/source/common/util.cpp @@ -276,6 +276,16 @@ int32_t ICU_Utility::parsePattern(const UnicodeString& pat, return -1; // text ended before end of pat } +int32_t ICU_Utility::parseAsciiInteger(const UnicodeString& str, int32_t& pos) { + int32_t result = 0; + UChar c; + while (pos < str.length() && (c = str.charAt(pos)) >= u'0' && c <= u'9') { + result = result * 10 + (c - u'0'); + pos++; + } + return result; +} + /** * Append a character to a rule that is being built up. To flush * the quoteBuf to rule, make one final call with isLiteral == TRUE. diff --git a/deps/icu-small/source/common/util.h b/deps/icu-small/source/common/util.h index 92cdc9ef69..a2be25056e 100644 --- a/deps/icu-small/source/common/util.h +++ b/deps/icu-small/source/common/util.h @@ -179,13 +179,22 @@ class U_COMMON_API ICU_Utility /* not : public UObject because all methods are s * Parse an integer at pos, either of the form \d+ or of the form * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex, * or octal format. - * @param pos INPUT-OUTPUT parameter. On input, the first - * character to parse. On output, the character after the last - * parsed character. + * @param pos INPUT-OUTPUT parameter. On input, the index of the first + * character to parse. On output, the index of the character after the + * last parsed character. */ static int32_t parseInteger(const UnicodeString& rule, int32_t& pos, int32_t limit); /** + * Parse an integer at pos using only ASCII digits. + * Base 10 only. + * @param pos INPUT-OUTPUT parameter. On input, the index of the first + * character to parse. On output, the index of the character after the + * last parsed character. + */ + static int32_t parseAsciiInteger(const UnicodeString& str, int32_t& pos); + + /** * Parse a Unicode identifier from the given string at the given * position. Return the identifier, or an empty string if there * is no identifier. diff --git a/deps/icu-small/source/common/utrace.cpp b/deps/icu-small/source/common/utrace.cpp index 01bdb38e90..04488d06a0 100644 --- a/deps/icu-small/source/common/utrace.cpp +++ b/deps/icu-small/source/common/utrace.cpp @@ -477,6 +477,16 @@ trCollNames[] = { }; +static const char* const +trResDataNames[] = { + "resc", + "bundle-open", + "file-open", + "res-open", + NULL +}; + + U_CAPI const char * U_EXPORT2 utrace_functionName(int32_t fnNumber) { if(UTRACE_FUNCTION_START <= fnNumber && fnNumber < UTRACE_FUNCTION_LIMIT) { @@ -485,6 +495,8 @@ utrace_functionName(int32_t fnNumber) { return trConvNames[fnNumber - UTRACE_CONVERSION_START]; } else if(UTRACE_COLLATION_START <= fnNumber && fnNumber < UTRACE_COLLATION_LIMIT){ return trCollNames[fnNumber - UTRACE_COLLATION_START]; + } else if(UTRACE_UDATA_START <= fnNumber && fnNumber < UTRACE_RES_DATA_LIMIT){ + return trResDataNames[fnNumber - UTRACE_UDATA_START]; } else { return "[BOGUS Trace Function Number]"; } diff --git a/deps/icu-small/source/common/utracimp.h b/deps/icu-small/source/common/utracimp.h index c2819830e1..84e7031da8 100644 --- a/deps/icu-small/source/common/utracimp.h +++ b/deps/icu-small/source/common/utracimp.h @@ -144,10 +144,12 @@ U_CDECL_END */ #define UTRACE_ENTRY(fnNumber) \ int32_t utraceFnNumber=(fnNumber); \ +UPRV_BLOCK_MACRO_BEGIN { \ if(utrace_getLevel()>=UTRACE_INFO) { \ utrace_entry(fnNumber); \ utraceFnNumber |= UTRACE_TRACED_ENTRY; \ - } + } \ +} UPRV_BLOCK_MACRO_END /** @@ -162,10 +164,12 @@ U_CDECL_END */ #define UTRACE_ENTRY_OC(fnNumber) \ int32_t utraceFnNumber=(fnNumber); \ +UPRV_BLOCK_MACRO_BEGIN { \ if(utrace_getLevel()>=UTRACE_OPEN_CLOSE) { \ utrace_entry(fnNumber); \ utraceFnNumber |= UTRACE_TRACED_ENTRY; \ - } + } \ +} UPRV_BLOCK_MACRO_END /** * Trace statement for each exit point of a function that has a UTRACE_ENTRY() @@ -179,10 +183,11 @@ U_CDECL_END * * @internal */ -#define UTRACE_EXIT() \ - {if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \ +#define UTRACE_EXIT() UPRV_BLOCK_MACRO_BEGIN { \ + if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \ utrace_exit(utraceFnNumber & ~UTRACE_TRACED_ENTRY, UTRACE_EXITV_NONE); \ - }} + } \ +} UPRV_BLOCK_MACRO_END /** * Trace statement for each exit point of a function that has a UTRACE_ENTRY() @@ -192,25 +197,29 @@ U_CDECL_END * * @internal */ -#define UTRACE_EXIT_VALUE(val) \ - {if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \ +#define UTRACE_EXIT_VALUE(val) UPRV_BLOCK_MACRO_BEGIN { \ + if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \ utrace_exit(utraceFnNumber & ~UTRACE_TRACED_ENTRY, UTRACE_EXITV_I32, val); \ - }} + } \ +} UPRV_BLOCK_MACRO_END -#define UTRACE_EXIT_STATUS(status) \ - {if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \ +#define UTRACE_EXIT_STATUS(status) UPRV_BLOCK_MACRO_BEGIN { \ + if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \ utrace_exit(utraceFnNumber & ~UTRACE_TRACED_ENTRY, UTRACE_EXITV_STATUS, status); \ - }} + } \ +} UPRV_BLOCK_MACRO_END -#define UTRACE_EXIT_VALUE_STATUS(val, status) \ - {if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \ +#define UTRACE_EXIT_VALUE_STATUS(val, status) UPRV_BLOCK_MACRO_BEGIN { \ + if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \ utrace_exit(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (UTRACE_EXITV_I32 | UTRACE_EXITV_STATUS), val, status); \ - }} + } \ +} UPRV_BLOCK_MACRO_END -#define UTRACE_EXIT_PTR_STATUS(ptr, status) \ - {if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \ +#define UTRACE_EXIT_PTR_STATUS(ptr, status) UPRV_BLOCK_MACRO_BEGIN { \ + if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \ utrace_exit(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (UTRACE_EXITV_PTR | UTRACE_EXITV_STATUS), ptr, status); \ - }} + } \ +} UPRV_BLOCK_MACRO_END /** * Trace statement used inside functions that have a UTRACE_ENTRY() statement. @@ -220,10 +229,11 @@ U_CDECL_END * Calls utrace_data() if the level is high enough. * @internal */ -#define UTRACE_DATA0(level, fmt) \ +#define UTRACE_DATA0(level, fmt) UPRV_BLOCK_MACRO_BEGIN { \ if(UTRACE_LEVEL(level)) { \ utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt)); \ - } + } \ +} UPRV_BLOCK_MACRO_END /** * Trace statement used inside functions that have a UTRACE_ENTRY() statement. @@ -233,10 +243,11 @@ U_CDECL_END * Calls utrace_data() if the level is high enough. * @internal */ -#define UTRACE_DATA1(level, fmt, a) \ +#define UTRACE_DATA1(level, fmt, a) UPRV_BLOCK_MACRO_BEGIN { \ if(UTRACE_LEVEL(level)) { \ utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY , (level), (fmt), (a)); \ - } + } \ +} UPRV_BLOCK_MACRO_END /** * Trace statement used inside functions that have a UTRACE_ENTRY() statement. @@ -246,10 +257,11 @@ U_CDECL_END * Calls utrace_data() if the level is high enough. * @internal */ -#define UTRACE_DATA2(level, fmt, a, b) \ +#define UTRACE_DATA2(level, fmt, a, b) UPRV_BLOCK_MACRO_BEGIN { \ if(UTRACE_LEVEL(level)) { \ utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY , (level), (fmt), (a), (b)); \ - } + } \ +} UPRV_BLOCK_MACRO_END /** * Trace statement used inside functions that have a UTRACE_ENTRY() statement. @@ -259,10 +271,11 @@ U_CDECL_END * Calls utrace_data() if the level is high enough. * @internal */ -#define UTRACE_DATA3(level, fmt, a, b, c) \ +#define UTRACE_DATA3(level, fmt, a, b, c) UPRV_BLOCK_MACRO_BEGIN { \ if(UTRACE_LEVEL(level)) { \ utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c)); \ - } + } \ +} UPRV_BLOCK_MACRO_END /** * Trace statement used inside functions that have a UTRACE_ENTRY() statement. @@ -272,10 +285,11 @@ U_CDECL_END * Calls utrace_data() if the level is high enough. * @internal */ -#define UTRACE_DATA4(level, fmt, a, b, c, d) \ +#define UTRACE_DATA4(level, fmt, a, b, c, d) UPRV_BLOCK_MACRO_BEGIN { \ if(UTRACE_LEVEL(level)) { \ utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d)); \ - } + } \ +} UPRV_BLOCK_MACRO_END /** * Trace statement used inside functions that have a UTRACE_ENTRY() statement. @@ -285,10 +299,11 @@ U_CDECL_END * Calls utrace_data() if the level is high enough. * @internal */ -#define UTRACE_DATA5(level, fmt, a, b, c, d, e) \ +#define UTRACE_DATA5(level, fmt, a, b, c, d, e) UPRV_BLOCK_MACRO_BEGIN { \ if(UTRACE_LEVEL(level)) { \ utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d), (e)); \ - } + } \ +} UPRV_BLOCK_MACRO_END /** * Trace statement used inside functions that have a UTRACE_ENTRY() statement. @@ -298,10 +313,11 @@ U_CDECL_END * Calls utrace_data() if the level is high enough. * @internal */ -#define UTRACE_DATA6(level, fmt, a, b, c, d, e, f) \ +#define UTRACE_DATA6(level, fmt, a, b, c, d, e, f) UPRV_BLOCK_MACRO_BEGIN { \ if(UTRACE_LEVEL(level)) { \ utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d), (e), (f)); \ - } + } \ +} UPRV_BLOCK_MACRO_END /** * Trace statement used inside functions that have a UTRACE_ENTRY() statement. @@ -311,10 +327,11 @@ U_CDECL_END * Calls utrace_data() if the level is high enough. * @internal */ -#define UTRACE_DATA7(level, fmt, a, b, c, d, e, f, g) \ +#define UTRACE_DATA7(level, fmt, a, b, c, d, e, f, g) UPRV_BLOCK_MACRO_BEGIN { \ if(UTRACE_LEVEL(level)) { \ utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d), (e), (f), (g)); \ - } + } \ +} UPRV_BLOCK_MACRO_END /** * Trace statement used inside functions that have a UTRACE_ENTRY() statement. @@ -324,10 +341,11 @@ U_CDECL_END * Calls utrace_data() if the level is high enough. * @internal */ -#define UTRACE_DATA8(level, fmt, a, b, c, d, e, f, g, h) \ +#define UTRACE_DATA8(level, fmt, a, b, c, d, e, f, g, h) UPRV_BLOCK_MACRO_BEGIN { \ if(UTRACE_LEVEL(level)) { \ utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d), (e), (f), (g), (h)); \ - } + } \ +} UPRV_BLOCK_MACRO_END /** * Trace statement used inside functions that have a UTRACE_ENTRY() statement. @@ -337,10 +355,11 @@ U_CDECL_END * Calls utrace_data() if the level is high enough. * @internal */ -#define UTRACE_DATA9(level, fmt, a, b, c, d, e, f, g, h, i) \ +#define UTRACE_DATA9(level, fmt, a, b, c, d, e, f, g, h, i) UPRV_BLOCK_MACRO_BEGIN { \ if(UTRACE_LEVEL(level)) { \ utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d), (e), (f), (g), (h), (i)); \ - } + } \ +} UPRV_BLOCK_MACRO_END #else diff --git a/deps/icu-small/source/common/utrie.h b/deps/icu-small/source/common/utrie.h index 3e2197eda6..532ba778eb 100644 --- a/deps/icu-small/source/common/utrie.h +++ b/deps/icu-small/source/common/utrie.h @@ -182,7 +182,7 @@ typedef struct UTrie UTrie; ] /** Internal trie getter from a pair of surrogates */ -#define _UTRIE_GET_FROM_PAIR(trie, data, c, c2, result, resultType) { \ +#define _UTRIE_GET_FROM_PAIR(trie, data, c, c2, result, resultType) UPRV_BLOCK_MACRO_BEGIN { \ int32_t __offset; \ \ /* get data for lead surrogate */ \ @@ -195,18 +195,18 @@ typedef struct UTrie UTrie; } else { \ (result)=(resultType)((trie)->initialValue); \ } \ -} +} UPRV_BLOCK_MACRO_END /** Internal trie getter from a BMP code point, treating a lead surrogate as a normal code point */ #define _UTRIE_GET_FROM_BMP(trie, data, c16) \ - _UTRIE_GET_RAW(trie, data, 0xd800<=(c16) && (c16)<=0xdbff ? UTRIE_LEAD_INDEX_DISP : 0, c16); + _UTRIE_GET_RAW(trie, data, 0xd800<=(c16) && (c16)<=0xdbff ? UTRIE_LEAD_INDEX_DISP : 0, c16) /** * Internal trie getter from a code point. * Could be faster(?) but longer with * if((c32)<=0xd7ff) { (result)=_UTRIE_GET_RAW(trie, data, 0, c32); } */ -#define _UTRIE_GET(trie, data, c32, result, resultType) \ +#define _UTRIE_GET(trie, data, c32, result, resultType) UPRV_BLOCK_MACRO_BEGIN { \ if((uint32_t)(c32)<=0xffff) { \ /* BMP code points */ \ (result)=_UTRIE_GET_FROM_BMP(trie, data, c32); \ @@ -217,10 +217,11 @@ typedef struct UTrie UTrie; } else { \ /* out of range */ \ (result)=(resultType)((trie)->initialValue); \ - } + } \ +} UPRV_BLOCK_MACRO_END /** Internal next-post-increment: get the next code point (c, c2) and its data */ -#define _UTRIE_NEXT(trie, data, src, limit, c, c2, result, resultType) { \ +#define _UTRIE_NEXT(trie, data, src, limit, c, c2, result, resultType) UPRV_BLOCK_MACRO_BEGIN { \ (c)=*(src)++; \ if(!U16_IS_LEAD(c)) { \ (c2)=0; \ @@ -233,10 +234,10 @@ typedef struct UTrie UTrie; (c2)=0; \ (result)=_UTRIE_GET_RAW((trie), data, UTRIE_LEAD_INDEX_DISP, (c)); \ } \ -} +} UPRV_BLOCK_MACRO_END /** Internal previous: get the previous code point (c, c2) and its data */ -#define _UTRIE_PREVIOUS(trie, data, start, src, c, c2, result, resultType) { \ +#define _UTRIE_PREVIOUS(trie, data, start, src, c, c2, result, resultType) UPRV_BLOCK_MACRO_BEGIN { \ (c)=*--(src); \ if(!U16_IS_SURROGATE(c)) { \ (c2)=0; \ @@ -257,7 +258,7 @@ typedef struct UTrie UTrie; (c2)=0; \ (result)=_UTRIE_GET_RAW((trie), data, UTRIE_LEAD_INDEX_DISP, (c)); \ } \ -} +} UPRV_BLOCK_MACRO_END /* Public UTrie API ---------------------------------------------------------*/ diff --git a/deps/icu-small/source/common/utrie2.h b/deps/icu-small/source/common/utrie2.h index 75028ee23a..671f44e16a 100644 --- a/deps/icu-small/source/common/utrie2.h +++ b/deps/icu-small/source/common/utrie2.h @@ -871,7 +871,7 @@ utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c, (trie)->data[_UTRIE2_INDEX_FROM_CP(trie, asciiOffset, c)] /** Internal next-post-increment: get the next code point (c) and its data. */ -#define _UTRIE2_U16_NEXT(trie, data, src, limit, c, result) { \ +#define _UTRIE2_U16_NEXT(trie, data, src, limit, c, result) UPRV_BLOCK_MACRO_BEGIN { \ { \ uint16_t __c2; \ (c)=*(src)++; \ @@ -885,10 +885,10 @@ utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c, (result)=_UTRIE2_GET_FROM_SUPP((trie), data, (c)); \ } \ } \ -} +} UPRV_BLOCK_MACRO_END /** Internal pre-decrement-previous: get the previous code point (c) and its data */ -#define _UTRIE2_U16_PREV(trie, data, start, src, c, result) { \ +#define _UTRIE2_U16_PREV(trie, data, start, src, c, result) UPRV_BLOCK_MACRO_BEGIN { \ { \ uint16_t __c2; \ (c)=*--(src); \ @@ -900,10 +900,10 @@ utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c, (result)=_UTRIE2_GET_FROM_SUPP((trie), data, (c)); \ } \ } \ -} +} UPRV_BLOCK_MACRO_END /** Internal UTF-8 next-post-increment: get the next code point's data. */ -#define _UTRIE2_U8_NEXT(trie, ascii, data, src, limit, result) { \ +#define _UTRIE2_U8_NEXT(trie, ascii, data, src, limit, result) UPRV_BLOCK_MACRO_BEGIN { \ uint8_t __lead=(uint8_t)*(src)++; \ if(U8_IS_SINGLE(__lead)) { \ (result)=(trie)->ascii[__lead]; \ @@ -935,10 +935,10 @@ utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c, (result)=(trie)->data[__index>>3]; \ } \ } \ -} +} UPRV_BLOCK_MACRO_END /** Internal UTF-8 pre-decrement-previous: get the previous code point's data. */ -#define _UTRIE2_U8_PREV(trie, ascii, data, start, src, result) { \ +#define _UTRIE2_U8_PREV(trie, ascii, data, start, src, result) UPRV_BLOCK_MACRO_BEGIN { \ uint8_t __b=(uint8_t)*--(src); \ if(U8_IS_SINGLE(__b)) { \ (result)=(trie)->ascii[__b]; \ @@ -948,7 +948,7 @@ utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c, (src)-=__index&7; \ (result)=(trie)->data[__index>>3]; \ } \ -} +} UPRV_BLOCK_MACRO_END U_CDECL_END |