1 files changed, 73 insertions, 178 deletions
diff --git a/deps/icu-small/source/common/uniset_props.cpp b/deps/icu-small/source/common/uniset_props.cpp
index ef5d6a32b2..1312de2098 100644
--- a/deps/icu-small/source/common/uniset_props.cpp
+++ b/deps/icu-small/source/common/uniset_props.cpp
@@ -36,8 +36,6 @@
 #include "uprops.h"
 #include "propname.h"
 #include "normalizer2impl.h"
-#include "ucase.h"
-#include "ubidi_props.h"
 #include "uinvchar.h"
 #include "uprops.h"
 #include "charstr.h"
@@ -98,47 +96,13 @@ static const char ASSIGNED[] = "Assigned"; // [:^Cn:]
 U_CDECL_BEGIN
 static UBool U_CALLCONV uset_cleanup();
 
-struct Inclusion {
-    UnicodeSet  *fSet;
-    UInitOnce    fInitOnce;
-};
-static Inclusion gInclusions[UPROPS_SRC_COUNT]; // cached getInclusions()
-
 static UnicodeSet *uni32Singleton;
 static icu::UInitOnce uni32InitOnce = U_INITONCE_INITIALIZER;
 
-//----------------------------------------------------------------
-// Inclusions list
-//----------------------------------------------------------------
-
-// USetAdder implementation
-// Does not use uset.h to reduce code dependencies
-static void U_CALLCONV
-_set_add(USet *set, UChar32 c) {
-    ((UnicodeSet *)set)->add(c);
-}
-
-static void U_CALLCONV
-_set_addRange(USet *set, UChar32 start, UChar32 end) {
-    ((UnicodeSet *)set)->add(start, end);
-}
-
-static void U_CALLCONV
-_set_addString(USet *set, const UChar *str, int32_t length) {
-    ((UnicodeSet *)set)->add(UnicodeString((UBool)(length<0), str, length));
-}
-
 /**
  * Cleanup function for UnicodeSet
  */
 static UBool U_CALLCONV uset_cleanup(void) {
-    for(int32_t i = UPROPS_SRC_NONE; i < UPROPS_SRC_COUNT; ++i) {
-        Inclusion &in = gInclusions[i];
-        delete in.fSet;
-        in.fSet = NULL;
-        in.fInitOnce.reset();
-    }
-
     delete uni32Singleton;
     uni32Singleton = NULL;
     uni32InitOnce.reset();
@@ -149,114 +113,6 @@ U_CDECL_END
 
 U_NAMESPACE_BEGIN
 
-/*
-Reduce excessive reallocation, and make it easier to detect initialization problems.
-Usually you don't see smaller sets than this for Unicode 5.0.
-*/
-#define DEFAULT_INCLUSION_CAPACITY 3072
-
-void U_CALLCONV UnicodeSet_initInclusion(int32_t src, UErrorCode &status) {
-    // This function is invoked only via umtx_initOnce().
-    // This function is a friend of class UnicodeSet.
-
-    U_ASSERT(src >=0 && src<UPROPS_SRC_COUNT);
-    UnicodeSet * &incl = gInclusions[src].fSet;
-    U_ASSERT(incl == NULL);
-
-    incl = new UnicodeSet();
-    if (incl == NULL) {
-        status = U_MEMORY_ALLOCATION_ERROR;
-        return;
-    }
-    USetAdder sa = {
-        (USet *)incl,
-        _set_add,
-        _set_addRange,
-        _set_addString,
-        NULL, // don't need remove()
-        NULL // don't need removeRange()
-    };
-
-    incl->ensureCapacity(DEFAULT_INCLUSION_CAPACITY, status);
-    switch(src) {
-    case UPROPS_SRC_CHAR:
-        uchar_addPropertyStarts(&sa, &status);
-        break;
-    case UPROPS_SRC_PROPSVEC:
-        upropsvec_addPropertyStarts(&sa, &status);
-        break;
-    case UPROPS_SRC_CHAR_AND_PROPSVEC:
-        uchar_addPropertyStarts(&sa, &status);
-        upropsvec_addPropertyStarts(&sa, &status);
-        break;
-#if !UCONFIG_NO_NORMALIZATION
-    case UPROPS_SRC_CASE_AND_NORM: {
-        const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(status);
-        if(U_SUCCESS(status)) {
-            impl->addPropertyStarts(&sa, status);
-        }
-        ucase_addPropertyStarts(&sa, &status);
-        break;
-    }
-    case UPROPS_SRC_NFC: {
-        const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(status);
-        if(U_SUCCESS(status)) {
-            impl->addPropertyStarts(&sa, status);
-        }
-        break;
-    }
-    case UPROPS_SRC_NFKC: {
-        const Normalizer2Impl *impl=Normalizer2Factory::getNFKCImpl(status);
-        if(U_SUCCESS(status)) {
-            impl->addPropertyStarts(&sa, status);
-        }
-        break;
-    }
-    case UPROPS_SRC_NFKC_CF: {
-        const Normalizer2Impl *impl=Normalizer2Factory::getNFKC_CFImpl(status);
-        if(U_SUCCESS(status)) {
-            impl->addPropertyStarts(&sa, status);
-        }
-        break;
-    }
-    case UPROPS_SRC_NFC_CANON_ITER: {
-        const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(status);
-        if(U_SUCCESS(status)) {
-            impl->addCanonIterPropertyStarts(&sa, status);
-        }
-        break;
-    }
-#endif
-    case UPROPS_SRC_CASE:
-        ucase_addPropertyStarts(&sa, &status);
-        break;
-    case UPROPS_SRC_BIDI:
-        ubidi_addPropertyStarts(&sa, &status);
-        break;
-    default:
-        status = U_INTERNAL_PROGRAM_ERROR;
-        break;
-    }
-
-    if (U_FAILURE(status)) {
-        delete incl;
-        incl = NULL;
-        return;
-    }
-    // Compact for caching
-    incl->compact();
-    ucln_common_registerCleanup(UCLN_COMMON_USET, uset_cleanup);
-}
-
-
-
-const UnicodeSet* UnicodeSet::getInclusions(int32_t src, UErrorCode &status) {
-    U_ASSERT(src >=0 && src<UPROPS_SRC_COUNT);
-    Inclusion &i = gInclusions[src];
-    umtx_initOnce(i.fInitOnce, &UnicodeSet_initInclusion, src, status);
-    return i.fSet;
-}
-
 namespace {
 
 // Cache some sets for other services -------------------------------------- ***
@@ -857,11 +713,6 @@ static UBool numericValueFilter(UChar32 ch, void* context) {
     return u_getNumericValue(ch) == *(double*)context;
 }
 
-static UBool generalCategoryMaskFilter(UChar32 ch, void* context) {
-    int32_t value = *(int32_t*)context;
-    return (U_GET_GC_MASK((UChar32) ch) & value) != 0;
-}
-
 static UBool versionFilter(UChar32 ch, void* context) {
     static const UVersionInfo none = { 0, 0, 0, 0 };
     UVersionInfo v;
@@ -870,16 +721,6 @@ static UBool versionFilter(UChar32 ch, void* context) {
     return uprv_memcmp(&v, &none, sizeof(v)) > 0 && uprv_memcmp(&v, version, sizeof(v)) <= 0;
 }
 
-typedef struct {
-    UProperty prop;
-    int32_t value;
-} IntPropertyContext;
-
-static UBool intPropertyFilter(UChar32 ch, void* context) {
-    IntPropertyContext* c = (IntPropertyContext*)context;
-    return u_getIntPropertyValue((UChar32) ch, c->prop) == c->value;
-}
-
 static UBool scriptExtensionsFilter(UChar32 ch, void* context) {
     return uscript_hasScript(ch, *(UScriptCode*)context);
 }
@@ -891,7 +732,7 @@ static UBool scriptExtensionsFilter(UChar32 ch, void* context) {
  */
 void UnicodeSet::applyFilter(UnicodeSet::Filter filter,
                              void* context,
-                             int32_t src,
+                             const UnicodeSet* inclusions,
                              UErrorCode &status) {
     if (U_FAILURE(status)) return;
 
@@ -902,12 +743,8 @@ void UnicodeSet::applyFilter(UnicodeSet::Filter filter,
     // To improve performance, use an inclusions set which
     // encodes information about character ranges that are known
     // to have identical properties.
-    // getInclusions(src) contains exactly the first characters of
-    // same-value ranges for the given properties "source".
-    const UnicodeSet* inclusions = getInclusions(src, status);
-    if (U_FAILURE(status)) {
-        return;
-    }
+    // inclusions contains the first characters of
+    // same-value ranges for the given property.
 
     clear();
 
@@ -944,6 +781,43 @@ void UnicodeSet::applyFilter(UnicodeSet::Filter filter,
 
 namespace {
 
+/** Maps map values to 1 if the mask contains their value'th bit, all others to 0. */
+uint32_t U_CALLCONV generalCategoryMaskFilter(const void *context, uint32_t value) {
+    uint32_t mask = *(const uint32_t *)context;
+    value = U_MASK(value) & mask;
+    if (value != 0) { value = 1; }
+    return value;
+}
+
+/** Maps one map value to 1, all others to 0. */
+uint32_t U_CALLCONV intValueFilter(const void *context, uint32_t value) {
+    uint32_t v = *(const uint32_t *)context;
+    return value == v ? 1 : 0;
+}
+
+}  // namespace
+
+void UnicodeSet::applyIntPropertyValue(const UCPMap *map,
+                                       UCPMapValueFilter *filter, const void *context,
+                                       UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return; }
+    clear();
+    UChar32 start = 0, end;
+    uint32_t value;
+    while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0,
+                                  filter, context, &value)) >= 0) {
+        if (value != 0) {
+            add(start, end);
+        }
+        start = end + 1;
+    }
+    if (isBogus()) {
+        errorCode = U_MEMORY_ALLOCATION_ERROR;
+    }
+}
+
+namespace {
+
 static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) {
     /* Note: we use ' ' in compiler code page */
     int32_t j = 0;
@@ -971,16 +845,35 @@ static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) {
 
 UnicodeSet&
 UnicodeSet::applyIntPropertyValue(UProperty prop, int32_t value, UErrorCode& ec) {
-    if (U_FAILURE(ec) || isFrozen()) return *this;
-
+    if (U_FAILURE(ec)) { return *this; }
+    // All of the following check isFrozen() before modifying this set.
     if (prop == UCHAR_GENERAL_CATEGORY_MASK) {
-        applyFilter(generalCategoryMaskFilter, &value, UPROPS_SRC_CHAR, ec);
+        const UCPMap *map = u_getIntPropertyMap(UCHAR_GENERAL_CATEGORY, &ec);
+        applyIntPropertyValue(map, generalCategoryMaskFilter, &value, ec);
     } else if (prop == UCHAR_SCRIPT_EXTENSIONS) {
+        const UnicodeSet* inclusions = CharacterProperties::getInclusionsForProperty(prop, ec);
         UScriptCode script = (UScriptCode)value;
-        applyFilter(scriptExtensionsFilter, &script, UPROPS_SRC_PROPSVEC, ec);
+        applyFilter(scriptExtensionsFilter, &script, inclusions, ec);
+    } else if (0 <= prop && prop < UCHAR_BINARY_LIMIT) {
+        if (value == 0 || value == 1) {
+            const USet *set = u_getBinaryPropertySet(prop, &ec);
+            if (U_FAILURE(ec)) { return *this; }
+            copyFrom(*UnicodeSet::fromUSet(set), TRUE);
+            if (value == 0) {
+                complement();
+            }
+        } else {
+            clear();
+        }
+    } else if (UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT) {
+        const UCPMap *map = u_getIntPropertyMap(prop, &ec);
+        applyIntPropertyValue(map, intValueFilter, &value, ec);
     } else {
-        IntPropertyContext c = {prop, value};
-        applyFilter(intPropertyFilter, &c, uprops_getSource(prop), ec);
+        // This code used to always call getInclusions(property source)
+        // which sets an error for an unsupported property.
+        ec = U_ILLEGAL_ARGUMENT_ERROR;
+        // Otherwise we would just clear() this set because
+        // getIntPropertyValue(c, prop) returns 0 for all code points.
     }
     return *this;
 }
@@ -1030,13 +923,13 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
                     p == UCHAR_TRAIL_CANONICAL_COMBINING_CLASS ||
                     p == UCHAR_LEAD_CANONICAL_COMBINING_CLASS) {
                     char* end;
-                    double value = uprv_strtod(vname.data(), &end);
+                    double val = uprv_strtod(vname.data(), &end);
                     // Anything between 0 and 255 is valid even if unused.
                     // Cast double->int only after range check.
                     // We catch NaN here because comparing it with both 0 and 255 will be false
                     // (as are all comparisons with NaN).
-                    if (*end != 0 || !(0 <= value && value <= 255) ||
-                            (v = (int32_t)value) != value) {
+                    if (*end != 0 || !(0 <= val && val <= 255) ||
+                            (v = (int32_t)val) != val) {
                         // non-integral value or outside 0..255, or trailing junk
                         FAIL(ec);
                     }
@@ -1052,11 +945,12 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
             case UCHAR_NUMERIC_VALUE:
                 {
                     char* end;
-                    double value = uprv_strtod(vname.data(), &end);
+                    double val = uprv_strtod(vname.data(), &end);
                     if (*end != 0) {
                         FAIL(ec);
                     }
-                    applyFilter(numericValueFilter, &value, UPROPS_SRC_CHAR, ec);
+                    applyFilter(numericValueFilter, &val,
+                                CharacterProperties::getInclusionsForProperty(p, ec), ec);
                     return *this;
                 }
             case UCHAR_NAME:
@@ -1085,7 +979,8 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
                     if (!mungeCharName(buf, vname.data(), sizeof(buf))) FAIL(ec);
                     UVersionInfo version;
                     u_versionFromString(version, buf);
-                    applyFilter(versionFilter, &version, UPROPS_SRC_PROPSVEC, ec);
+                    applyFilter(versionFilter, &version,
+                                CharacterProperties::getInclusionsForProperty(p, ec), ec);
                     return *this;
                 }
             case UCHAR_SCRIPT_EXTENSIONS: