diff options
Diffstat (limited to 'deps/icu-small/source/common/uniset_props.cpp')
-rw-r--r-- | deps/icu-small/source/common/uniset_props.cpp | 251 |
1 files changed, 73 insertions, 178 deletions
diff --git a/deps/icu-small/source/common/uniset_props.cpp b/deps/icu-small/source/common/uniset_props.cpp index ef5d6a32b2..1312de2098 100644 --- a/deps/icu-small/source/common/uniset_props.cpp +++ b/deps/icu-small/source/common/uniset_props.cpp @@ -36,8 +36,6 @@ #include "uprops.h" #include "propname.h" #include "normalizer2impl.h" -#include "ucase.h" -#include "ubidi_props.h" #include "uinvchar.h" #include "uprops.h" #include "charstr.h" @@ -98,47 +96,13 @@ static const char ASSIGNED[] = "Assigned"; // [:^Cn:] U_CDECL_BEGIN static UBool U_CALLCONV uset_cleanup(); -struct Inclusion { - UnicodeSet *fSet; - UInitOnce fInitOnce; -}; -static Inclusion gInclusions[UPROPS_SRC_COUNT]; // cached getInclusions() - static UnicodeSet *uni32Singleton; static icu::UInitOnce uni32InitOnce = U_INITONCE_INITIALIZER; -//---------------------------------------------------------------- -// Inclusions list -//---------------------------------------------------------------- - -// USetAdder implementation -// Does not use uset.h to reduce code dependencies -static void U_CALLCONV -_set_add(USet *set, UChar32 c) { - ((UnicodeSet *)set)->add(c); -} - -static void U_CALLCONV -_set_addRange(USet *set, UChar32 start, UChar32 end) { - ((UnicodeSet *)set)->add(start, end); -} - -static void U_CALLCONV -_set_addString(USet *set, const UChar *str, int32_t length) { - ((UnicodeSet *)set)->add(UnicodeString((UBool)(length<0), str, length)); -} - /** * Cleanup function for UnicodeSet */ static UBool U_CALLCONV uset_cleanup(void) { - for(int32_t i = UPROPS_SRC_NONE; i < UPROPS_SRC_COUNT; ++i) { - Inclusion &in = gInclusions[i]; - delete in.fSet; - in.fSet = NULL; - in.fInitOnce.reset(); - } - delete uni32Singleton; uni32Singleton = NULL; uni32InitOnce.reset(); @@ -149,114 +113,6 @@ U_CDECL_END U_NAMESPACE_BEGIN -/* -Reduce excessive reallocation, and make it easier to detect initialization problems. -Usually you don't see smaller sets than this for Unicode 5.0. -*/ -#define DEFAULT_INCLUSION_CAPACITY 3072 - -void U_CALLCONV UnicodeSet_initInclusion(int32_t src, UErrorCode &status) { - // This function is invoked only via umtx_initOnce(). - // This function is a friend of class UnicodeSet. - - U_ASSERT(src >=0 && src<UPROPS_SRC_COUNT); - UnicodeSet * &incl = gInclusions[src].fSet; - U_ASSERT(incl == NULL); - - incl = new UnicodeSet(); - if (incl == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - USetAdder sa = { - (USet *)incl, - _set_add, - _set_addRange, - _set_addString, - NULL, // don't need remove() - NULL // don't need removeRange() - }; - - incl->ensureCapacity(DEFAULT_INCLUSION_CAPACITY, status); - switch(src) { - case UPROPS_SRC_CHAR: - uchar_addPropertyStarts(&sa, &status); - break; - case UPROPS_SRC_PROPSVEC: - upropsvec_addPropertyStarts(&sa, &status); - break; - case UPROPS_SRC_CHAR_AND_PROPSVEC: - uchar_addPropertyStarts(&sa, &status); - upropsvec_addPropertyStarts(&sa, &status); - break; -#if !UCONFIG_NO_NORMALIZATION - case UPROPS_SRC_CASE_AND_NORM: { - const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(status); - if(U_SUCCESS(status)) { - impl->addPropertyStarts(&sa, status); - } - ucase_addPropertyStarts(&sa, &status); - break; - } - case UPROPS_SRC_NFC: { - const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(status); - if(U_SUCCESS(status)) { - impl->addPropertyStarts(&sa, status); - } - break; - } - case UPROPS_SRC_NFKC: { - const Normalizer2Impl *impl=Normalizer2Factory::getNFKCImpl(status); - if(U_SUCCESS(status)) { - impl->addPropertyStarts(&sa, status); - } - break; - } - case UPROPS_SRC_NFKC_CF: { - const Normalizer2Impl *impl=Normalizer2Factory::getNFKC_CFImpl(status); - if(U_SUCCESS(status)) { - impl->addPropertyStarts(&sa, status); - } - break; - } - case UPROPS_SRC_NFC_CANON_ITER: { - const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(status); - if(U_SUCCESS(status)) { - impl->addCanonIterPropertyStarts(&sa, status); - } - break; - } -#endif - case UPROPS_SRC_CASE: - ucase_addPropertyStarts(&sa, &status); - break; - case UPROPS_SRC_BIDI: - ubidi_addPropertyStarts(&sa, &status); - break; - default: - status = U_INTERNAL_PROGRAM_ERROR; - break; - } - - if (U_FAILURE(status)) { - delete incl; - incl = NULL; - return; - } - // Compact for caching - incl->compact(); - ucln_common_registerCleanup(UCLN_COMMON_USET, uset_cleanup); -} - - - -const UnicodeSet* UnicodeSet::getInclusions(int32_t src, UErrorCode &status) { - U_ASSERT(src >=0 && src<UPROPS_SRC_COUNT); - Inclusion &i = gInclusions[src]; - umtx_initOnce(i.fInitOnce, &UnicodeSet_initInclusion, src, status); - return i.fSet; -} - namespace { // Cache some sets for other services -------------------------------------- *** @@ -857,11 +713,6 @@ static UBool numericValueFilter(UChar32 ch, void* context) { return u_getNumericValue(ch) == *(double*)context; } -static UBool generalCategoryMaskFilter(UChar32 ch, void* context) { - int32_t value = *(int32_t*)context; - return (U_GET_GC_MASK((UChar32) ch) & value) != 0; -} - static UBool versionFilter(UChar32 ch, void* context) { static const UVersionInfo none = { 0, 0, 0, 0 }; UVersionInfo v; @@ -870,16 +721,6 @@ static UBool versionFilter(UChar32 ch, void* context) { return uprv_memcmp(&v, &none, sizeof(v)) > 0 && uprv_memcmp(&v, version, sizeof(v)) <= 0; } -typedef struct { - UProperty prop; - int32_t value; -} IntPropertyContext; - -static UBool intPropertyFilter(UChar32 ch, void* context) { - IntPropertyContext* c = (IntPropertyContext*)context; - return u_getIntPropertyValue((UChar32) ch, c->prop) == c->value; -} - static UBool scriptExtensionsFilter(UChar32 ch, void* context) { return uscript_hasScript(ch, *(UScriptCode*)context); } @@ -891,7 +732,7 @@ static UBool scriptExtensionsFilter(UChar32 ch, void* context) { */ void UnicodeSet::applyFilter(UnicodeSet::Filter filter, void* context, - int32_t src, + const UnicodeSet* inclusions, UErrorCode &status) { if (U_FAILURE(status)) return; @@ -902,12 +743,8 @@ void UnicodeSet::applyFilter(UnicodeSet::Filter filter, // To improve performance, use an inclusions set which // encodes information about character ranges that are known // to have identical properties. - // getInclusions(src) contains exactly the first characters of - // same-value ranges for the given properties "source". - const UnicodeSet* inclusions = getInclusions(src, status); - if (U_FAILURE(status)) { - return; - } + // inclusions contains the first characters of + // same-value ranges for the given property. clear(); @@ -944,6 +781,43 @@ void UnicodeSet::applyFilter(UnicodeSet::Filter filter, namespace { +/** Maps map values to 1 if the mask contains their value'th bit, all others to 0. */ +uint32_t U_CALLCONV generalCategoryMaskFilter(const void *context, uint32_t value) { + uint32_t mask = *(const uint32_t *)context; + value = U_MASK(value) & mask; + if (value != 0) { value = 1; } + return value; +} + +/** Maps one map value to 1, all others to 0. */ +uint32_t U_CALLCONV intValueFilter(const void *context, uint32_t value) { + uint32_t v = *(const uint32_t *)context; + return value == v ? 1 : 0; +} + +} // namespace + +void UnicodeSet::applyIntPropertyValue(const UCPMap *map, + UCPMapValueFilter *filter, const void *context, + UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return; } + clear(); + UChar32 start = 0, end; + uint32_t value; + while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0, + filter, context, &value)) >= 0) { + if (value != 0) { + add(start, end); + } + start = end + 1; + } + if (isBogus()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } +} + +namespace { + static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) { /* Note: we use ' ' in compiler code page */ int32_t j = 0; @@ -971,16 +845,35 @@ static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) { UnicodeSet& UnicodeSet::applyIntPropertyValue(UProperty prop, int32_t value, UErrorCode& ec) { - if (U_FAILURE(ec) || isFrozen()) return *this; - + if (U_FAILURE(ec)) { return *this; } + // All of the following check isFrozen() before modifying this set. if (prop == UCHAR_GENERAL_CATEGORY_MASK) { - applyFilter(generalCategoryMaskFilter, &value, UPROPS_SRC_CHAR, ec); + const UCPMap *map = u_getIntPropertyMap(UCHAR_GENERAL_CATEGORY, &ec); + applyIntPropertyValue(map, generalCategoryMaskFilter, &value, ec); } else if (prop == UCHAR_SCRIPT_EXTENSIONS) { + const UnicodeSet* inclusions = CharacterProperties::getInclusionsForProperty(prop, ec); UScriptCode script = (UScriptCode)value; - applyFilter(scriptExtensionsFilter, &script, UPROPS_SRC_PROPSVEC, ec); + applyFilter(scriptExtensionsFilter, &script, inclusions, ec); + } else if (0 <= prop && prop < UCHAR_BINARY_LIMIT) { + if (value == 0 || value == 1) { + const USet *set = u_getBinaryPropertySet(prop, &ec); + if (U_FAILURE(ec)) { return *this; } + copyFrom(*UnicodeSet::fromUSet(set), TRUE); + if (value == 0) { + complement(); + } + } else { + clear(); + } + } else if (UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT) { + const UCPMap *map = u_getIntPropertyMap(prop, &ec); + applyIntPropertyValue(map, intValueFilter, &value, ec); } else { - IntPropertyContext c = {prop, value}; - applyFilter(intPropertyFilter, &c, uprops_getSource(prop), ec); + // This code used to always call getInclusions(property source) + // which sets an error for an unsupported property. + ec = U_ILLEGAL_ARGUMENT_ERROR; + // Otherwise we would just clear() this set because + // getIntPropertyValue(c, prop) returns 0 for all code points. } return *this; } @@ -1030,13 +923,13 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop, p == UCHAR_TRAIL_CANONICAL_COMBINING_CLASS || p == UCHAR_LEAD_CANONICAL_COMBINING_CLASS) { char* end; - double value = uprv_strtod(vname.data(), &end); + double val = uprv_strtod(vname.data(), &end); // Anything between 0 and 255 is valid even if unused. // Cast double->int only after range check. // We catch NaN here because comparing it with both 0 and 255 will be false // (as are all comparisons with NaN). - if (*end != 0 || !(0 <= value && value <= 255) || - (v = (int32_t)value) != value) { + if (*end != 0 || !(0 <= val && val <= 255) || + (v = (int32_t)val) != val) { // non-integral value or outside 0..255, or trailing junk FAIL(ec); } @@ -1052,11 +945,12 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop, case UCHAR_NUMERIC_VALUE: { char* end; - double value = uprv_strtod(vname.data(), &end); + double val = uprv_strtod(vname.data(), &end); if (*end != 0) { FAIL(ec); } - applyFilter(numericValueFilter, &value, UPROPS_SRC_CHAR, ec); + applyFilter(numericValueFilter, &val, + CharacterProperties::getInclusionsForProperty(p, ec), ec); return *this; } case UCHAR_NAME: @@ -1085,7 +979,8 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop, if (!mungeCharName(buf, vname.data(), sizeof(buf))) FAIL(ec); UVersionInfo version; u_versionFromString(version, buf); - applyFilter(versionFilter, &version, UPROPS_SRC_PROPSVEC, ec); + applyFilter(versionFilter, &version, + CharacterProperties::getInclusionsForProperty(p, ec), ec); return *this; } case UCHAR_SCRIPT_EXTENSIONS: |