From 71e285b94c7edaa43aa8115965cf5a36b8e0f80a Mon Sep 17 00:00:00 2001 From: Florian Dold Date: Wed, 3 Apr 2019 15:43:32 +0200 Subject: Node.js v11.13.0 --- .../source/common/characterproperties.cpp | 336 +++++++++++++++++++++ 1 file changed, 336 insertions(+) create mode 100644 deps/node/deps/icu-small/source/common/characterproperties.cpp (limited to 'deps/node/deps/icu-small/source/common/characterproperties.cpp') diff --git a/deps/node/deps/icu-small/source/common/characterproperties.cpp b/deps/node/deps/icu-small/source/common/characterproperties.cpp new file mode 100644 index 00000000..3aff85b3 --- /dev/null +++ b/deps/node/deps/icu-small/source/common/characterproperties.cpp @@ -0,0 +1,336 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// characterproperties.cpp +// created: 2018sep03 Markus W. Scherer + +#include "unicode/utypes.h" +#include "unicode/localpointer.h" +#include "unicode/uchar.h" +#include "unicode/ucpmap.h" +#include "unicode/ucptrie.h" +#include "unicode/umutablecptrie.h" +#include "unicode/uniset.h" +#include "unicode/uscript.h" +#include "unicode/uset.h" +#include "cmemory.h" +#include "mutex.h" +#include "normalizer2impl.h" +#include "uassert.h" +#include "ubidi_props.h" +#include "ucase.h" +#include "ucln_cmn.h" +#include "umutex.h" +#include "uprops.h" + +using icu::UInitOnce; +using icu::UnicodeSet; + +namespace { + +UBool U_CALLCONV characterproperties_cleanup(); + +struct Inclusion { + UnicodeSet *fSet; + UInitOnce fInitOnce; +}; +Inclusion gInclusions[UPROPS_SRC_COUNT]; // cached getInclusions() + +UnicodeSet *sets[UCHAR_BINARY_LIMIT] = {}; + +UCPMap *maps[UCHAR_INT_LIMIT - UCHAR_INT_START] = {}; + +UMutex cpMutex = U_MUTEX_INITIALIZER; + +//---------------------------------------------------------------- +// Inclusions list +//---------------------------------------------------------------- + +// USetAdder implementation +// Does not use uset.h to reduce code dependencies +void U_CALLCONV +_set_add(USet *set, UChar32 c) { + ((UnicodeSet *)set)->add(c); +} + +void U_CALLCONV +_set_addRange(USet *set, UChar32 start, UChar32 end) { + ((UnicodeSet *)set)->add(start, end); +} + +void U_CALLCONV +_set_addString(USet *set, const UChar *str, int32_t length) { + ((UnicodeSet *)set)->add(icu::UnicodeString((UBool)(length<0), str, length)); +} + +UBool U_CALLCONV characterproperties_cleanup() { + for (Inclusion &in: gInclusions) { + delete in.fSet; + in.fSet = nullptr; + in.fInitOnce.reset(); + } + for (int32_t i = 0; i < UPRV_LENGTHOF(sets); ++i) { + delete sets[i]; + sets[i] = nullptr; + } + for (int32_t i = 0; i < UPRV_LENGTHOF(maps); ++i) { + ucptrie_close(reinterpret_cast(maps[i])); + maps[i] = nullptr; + } + return TRUE; +} + +} // namespace + +U_NAMESPACE_BEGIN + +/* +Reduce excessive reallocation, and make it easier to detect initialization problems. +Usually you don't see smaller sets than this for Unicode 5.0. +*/ +constexpr int32_t DEFAULT_INCLUSION_CAPACITY = 3072; + +void U_CALLCONV CharacterProperties::initInclusion(UPropertySource src, UErrorCode &errorCode) { + // This function is invoked only via umtx_initOnce(). + // This function is a friend of class UnicodeSet. + + U_ASSERT(0 <= src && src < UPROPS_SRC_COUNT); + if (src == UPROPS_SRC_NONE) { + errorCode = U_INTERNAL_PROGRAM_ERROR; + return; + } + UnicodeSet * &incl = gInclusions[src].fSet; + U_ASSERT(incl == nullptr); + + incl = new UnicodeSet(); + if (incl == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + USetAdder sa = { + (USet *)incl, + _set_add, + _set_addRange, + _set_addString, + nullptr, // don't need remove() + nullptr // don't need removeRange() + }; + + incl->ensureCapacity(DEFAULT_INCLUSION_CAPACITY, errorCode); + switch(src) { + case UPROPS_SRC_CHAR: + uchar_addPropertyStarts(&sa, &errorCode); + break; + case UPROPS_SRC_PROPSVEC: + upropsvec_addPropertyStarts(&sa, &errorCode); + break; + case UPROPS_SRC_CHAR_AND_PROPSVEC: + uchar_addPropertyStarts(&sa, &errorCode); + upropsvec_addPropertyStarts(&sa, &errorCode); + break; +#if !UCONFIG_NO_NORMALIZATION + case UPROPS_SRC_CASE_AND_NORM: { + const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); + if(U_SUCCESS(errorCode)) { + impl->addPropertyStarts(&sa, errorCode); + } + ucase_addPropertyStarts(&sa, &errorCode); + break; + } + case UPROPS_SRC_NFC: { + const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); + if(U_SUCCESS(errorCode)) { + impl->addPropertyStarts(&sa, errorCode); + } + break; + } + case UPROPS_SRC_NFKC: { + const Normalizer2Impl *impl=Normalizer2Factory::getNFKCImpl(errorCode); + if(U_SUCCESS(errorCode)) { + impl->addPropertyStarts(&sa, errorCode); + } + break; + } + case UPROPS_SRC_NFKC_CF: { + const Normalizer2Impl *impl=Normalizer2Factory::getNFKC_CFImpl(errorCode); + if(U_SUCCESS(errorCode)) { + impl->addPropertyStarts(&sa, errorCode); + } + break; + } + case UPROPS_SRC_NFC_CANON_ITER: { + const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); + if(U_SUCCESS(errorCode)) { + impl->addCanonIterPropertyStarts(&sa, errorCode); + } + break; + } +#endif + case UPROPS_SRC_CASE: + ucase_addPropertyStarts(&sa, &errorCode); + break; + case UPROPS_SRC_BIDI: + ubidi_addPropertyStarts(&sa, &errorCode); + break; + case UPROPS_SRC_INPC: + case UPROPS_SRC_INSC: + case UPROPS_SRC_VO: + uprops_addPropertyStarts((UPropertySource)src, &sa, &errorCode); + break; + default: + errorCode = U_INTERNAL_PROGRAM_ERROR; + break; + } + + if (U_FAILURE(errorCode)) { + delete incl; + incl = nullptr; + return; + } + // Compact for caching + incl->compact(); + ucln_common_registerCleanup(UCLN_COMMON_CHARACTERPROPERTIES, characterproperties_cleanup); +} + +const UnicodeSet *getInclusionsForSource(UPropertySource src, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return nullptr; } + if (src < 0 || UPROPS_SRC_COUNT <= src) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + Inclusion &i = gInclusions[src]; + umtx_initOnce(i.fInitOnce, &CharacterProperties::initInclusion, src, errorCode); + return i.fSet; +} + +const UnicodeSet *CharacterProperties::getInclusionsForProperty( + UProperty prop, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return nullptr; } + UPropertySource src = uprops_getSource(prop); + return getInclusionsForSource(src, errorCode); +} + +U_NAMESPACE_END + +namespace { + +UnicodeSet *makeSet(UProperty property, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return nullptr; } + icu::LocalPointer set(new UnicodeSet()); + if (set.isNull()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + const UnicodeSet *inclusions = + icu::CharacterProperties::getInclusionsForProperty(property, errorCode); + if (U_FAILURE(errorCode)) { return nullptr; } + int32_t numRanges = inclusions->getRangeCount(); + UChar32 startHasProperty = -1; + + for (int32_t i = 0; i < numRanges; ++i) { + UChar32 rangeEnd = inclusions->getRangeEnd(i); + for (UChar32 c = inclusions->getRangeStart(i); c <= rangeEnd; ++c) { + // TODO: Get a UCharacterProperty.BinaryProperty to avoid the property dispatch. + if (u_hasBinaryProperty(c, property)) { + if (startHasProperty < 0) { + // Transition from false to true. + startHasProperty = c; + } + } else if (startHasProperty >= 0) { + // Transition from true to false. + set->add(startHasProperty, c - 1); + startHasProperty = -1; + } + } + } + if (startHasProperty >= 0) { + set->add(startHasProperty, 0x10FFFF); + } + set->freeze(); + return set.orphan(); +} + +UCPMap *makeMap(UProperty property, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return nullptr; } + uint32_t nullValue = property == UCHAR_SCRIPT ? USCRIPT_UNKNOWN : 0; + icu::LocalUMutableCPTriePointer mutableTrie( + umutablecptrie_open(nullValue, nullValue, &errorCode)); + const UnicodeSet *inclusions = + icu::CharacterProperties::getInclusionsForProperty(property, errorCode); + if (U_FAILURE(errorCode)) { return nullptr; } + int32_t numRanges = inclusions->getRangeCount(); + UChar32 start = 0; + uint32_t value = nullValue; + + for (int32_t i = 0; i < numRanges; ++i) { + UChar32 rangeEnd = inclusions->getRangeEnd(i); + for (UChar32 c = inclusions->getRangeStart(i); c <= rangeEnd; ++c) { + // TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch. + uint32_t nextValue = u_getIntPropertyValue(c, property); + if (value != nextValue) { + if (value != nullValue) { + umutablecptrie_setRange(mutableTrie.getAlias(), start, c - 1, value, &errorCode); + } + start = c; + value = nextValue; + } + } + } + if (value != 0) { + umutablecptrie_setRange(mutableTrie.getAlias(), start, 0x10FFFF, value, &errorCode); + } + + UCPTrieType type; + if (property == UCHAR_BIDI_CLASS || property == UCHAR_GENERAL_CATEGORY) { + type = UCPTRIE_TYPE_FAST; + } else { + type = UCPTRIE_TYPE_SMALL; + } + UCPTrieValueWidth valueWidth; + // TODO: UCharacterProperty.IntProperty + int32_t max = u_getIntPropertyMaxValue(property); + if (max <= 0xff) { + valueWidth = UCPTRIE_VALUE_BITS_8; + } else if (max <= 0xffff) { + valueWidth = UCPTRIE_VALUE_BITS_16; + } else { + valueWidth = UCPTRIE_VALUE_BITS_32; + } + return reinterpret_cast( + umutablecptrie_buildImmutable(mutableTrie.getAlias(), type, valueWidth, &errorCode)); +} + +} // namespace + +U_NAMESPACE_USE + +U_CAPI const USet * U_EXPORT2 +u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { return nullptr; } + if (property < 0 || UCHAR_BINARY_LIMIT <= property) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + Mutex m(&cpMutex); + UnicodeSet *set = sets[property]; + if (set == nullptr) { + sets[property] = set = makeSet(property, *pErrorCode); + } + if (U_FAILURE(*pErrorCode)) { return nullptr; } + return set->toUSet(); +} + +U_CAPI const UCPMap * U_EXPORT2 +u_getIntPropertyMap(UProperty property, UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { return nullptr; } + if (property < UCHAR_INT_START || UCHAR_INT_LIMIT <= property) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + Mutex m(&cpMutex); + UCPMap *map = maps[property - UCHAR_INT_START]; + if (map == nullptr) { + maps[property - UCHAR_INT_START] = map = makeMap(property, *pErrorCode); + } + return map; +} -- cgit v1.2.3