1 files changed, 181 insertions, 180 deletions
diff --git a/deps/icu-small/source/i18n/regexcmp.cpp b/deps/icu-small/source/i18n/regexcmp.cpp
index 410ff9513b..0c5fca6f67 100644
--- a/deps/icu-small/source/i18n/regexcmp.cpp
+++ b/deps/icu-small/source/i18n/regexcmp.cpp
@@ -28,6 +28,7 @@
 #include "patternprops.h"
 #include "putilimp.h"
 #include "cmemory.h"
+#include "cstr.h"
 #include "cstring.h"
 #include "uvectr32.h"
 #include "uvectr64.h"
@@ -3892,7 +3893,7 @@ void RegexCompile::stripNOPs() {
 //
 //------------------------------------------------------------------------------
 void RegexCompile::error(UErrorCode e) {
-    if (U_SUCCESS(*fStatus)) {
+    if (U_SUCCESS(*fStatus) || e == U_MEMORY_ALLOCATION_ERROR) {
         *fStatus = e;
         // Hmm. fParseErr (UParseError) line & offset fields are int32_t in public
         // API (see common/unicode/parseerr.h), while fLineNum and fCharNum are
@@ -4370,209 +4371,209 @@ static inline void addIdentifierIgnorable(UnicodeSet *set, UErrorCode& ec) {
 //     Includes trying the Java "properties" that aren't supported as
 //     normal ICU UnicodeSet properties
 //
-static const UChar posSetPrefix[] = {0x5b, 0x5c, 0x70, 0x7b, 0}; // "[\p{"
-static const UChar negSetPrefix[] = {0x5b, 0x5c, 0x50, 0x7b, 0}; // "[\P{"
 UnicodeSet *RegexCompile::createSetForProperty(const UnicodeString &propName, UBool negated) {
-    UnicodeString   setExpr;
-    UnicodeSet      *set;
-    uint32_t        usetFlags = 0;
 
     if (U_FAILURE(*fStatus)) {
-        return NULL;
+        return nullptr;
     }
+    LocalPointer<UnicodeSet> set;
+    UErrorCode status = U_ZERO_ERROR;
 
-    //
-    //  First try the property as we received it
-    //
-    if (negated) {
-        setExpr.append(negSetPrefix, -1);
-    } else {
-        setExpr.append(posSetPrefix, -1);
-    }
-    setExpr.append(propName);
-    setExpr.append(chRBrace);
-    setExpr.append(chRBracket);
-    if (fModeFlags & UREGEX_CASE_INSENSITIVE) {
-        usetFlags |= USET_CASE_INSENSITIVE;
-    }
-    set = new UnicodeSet(setExpr, usetFlags, NULL, *fStatus);
-    if (U_SUCCESS(*fStatus)) {
-       return set;
-    }
-    delete set;
-    set = NULL;
-
-    //
-    //  The property as it was didn't work.
-
-    //  Do [:word:]. It is not recognized as a property by UnicodeSet.  "word" not standard POSIX
-    //     or standard Java, but many other regular expression packages do recognize it.
-
-    if (propName.caseCompare(UNICODE_STRING_SIMPLE("word"), 0) == 0) {
-        *fStatus = U_ZERO_ERROR;
-        set = new UnicodeSet(*(fRXPat->fStaticSets[URX_ISWORD_SET]));
-        if (set == NULL) {
-            *fStatus = U_MEMORY_ALLOCATION_ERROR;
-            return set;
+    do {      // non-loop, exists to allow breaks from the block.
+        //
+        //  First try the property as we received it
+        //
+        UnicodeString   setExpr;
+        uint32_t        usetFlags = 0;
+        setExpr.append(u"[\\p{", -1);
+        setExpr.append(propName);
+        setExpr.append(u"}]", -1);
+        if (fModeFlags & UREGEX_CASE_INSENSITIVE) {
+            usetFlags |= USET_CASE_INSENSITIVE;
         }
-        if (negated) {
-            set->complement();
+        set.adoptInsteadAndCheckErrorCode(new UnicodeSet(setExpr, usetFlags, NULL, status), status);
+        if (U_SUCCESS(status) || status == U_MEMORY_ALLOCATION_ERROR) {
+            break;
         }
-        return set;
-    }
 
+        //
+        //  The incoming property wasn't directly recognized by ICU.
 
-    //    Do Java fixes -
-    //       InGreek -> InGreek or Coptic, that being the official Unicode name for that block.
-    //       InCombiningMarksforSymbols -> InCombiningDiacriticalMarksforSymbols.
-    //
-    //       Note on Spaces:  either "InCombiningMarksForSymbols" or "InCombining Marks for Symbols"
-    //                        is accepted by Java.  The property part of the name is compared
-    //                        case-insenstively.  The spaces must be exactly as shown, either
-    //                        all there, or all omitted, with exactly one at each position
-    //                        if they are present.  From checking against JDK 1.6
-    //
-    //       This code should be removed when ICU properties support the Java  compatibility names
-    //          (ICU 4.0?)
-    //
-    UnicodeString mPropName = propName;
-    if (mPropName.caseCompare(UNICODE_STRING_SIMPLE("InGreek"), 0) == 0) {
-        mPropName = UNICODE_STRING_SIMPLE("InGreek and Coptic");
-    }
-    if (mPropName.caseCompare(UNICODE_STRING_SIMPLE("InCombining Marks for Symbols"), 0) == 0 ||
-        mPropName.caseCompare(UNICODE_STRING_SIMPLE("InCombiningMarksforSymbols"), 0) == 0) {
-        mPropName = UNICODE_STRING_SIMPLE("InCombining Diacritical Marks for Symbols");
-    }
-    else if (mPropName.compare(UNICODE_STRING_SIMPLE("all")) == 0) {
-        mPropName = UNICODE_STRING_SIMPLE("javaValidCodePoint");
-    }
+        //  Check [:word:] and [:all:]. These are not recognized as a properties by ICU UnicodeSet.
+        //     Java accepts 'word' with mixed case.
+        //     Java accepts 'all' only in all lower case.
 
-    //    See if the property looks like a Java "InBlockName", which
-    //    we will recast as "Block=BlockName"
-    //
-    if (mPropName.startsWith(u"In", 2) && propName.length()>=3) {
-        setExpr.truncate(4);   // Leaves "[\p{", or "[\P{"
-        setExpr.append(u"Block=", -1);
-        setExpr.append(UnicodeString(mPropName, 2));  // Property with the leading "In" removed.
-        setExpr.append(chRBrace);
-        setExpr.append(chRBracket);
-        *fStatus = U_ZERO_ERROR;
-        set = new UnicodeSet(setExpr, usetFlags, NULL, *fStatus);
-        if (U_SUCCESS(*fStatus)) {
-            return set;
+        status = U_ZERO_ERROR;
+        if (propName.caseCompare(u"word", -1, 0) == 0) {
+            set.adoptInsteadAndCheckErrorCode(new UnicodeSet(*(fRXPat->fStaticSets[URX_ISWORD_SET])), status);
+            break;
+        }
+        if (propName.compare(u"all", -1) == 0) {
+            set.adoptInsteadAndCheckErrorCode(new UnicodeSet(0, 0x10ffff), status);
+            break;
         }
-        delete set;
-        set = NULL;
-    }
 
-    if (propName.startsWith(UNICODE_STRING_SIMPLE("java")) ||
-        propName.compare(UNICODE_STRING_SIMPLE("all")) == 0)
-    {
-        UErrorCode localStatus = U_ZERO_ERROR;
-        //setExpr.remove();
-        set = new UnicodeSet();
-        //
-        //  Try the various Java specific properties.
-        //   These all begin with "java"
+
+        //    Do Java InBlock expressions
         //
-        if (mPropName.compare(UNICODE_STRING_SIMPLE("javaDefined")) == 0) {
-            addCategory(set, U_GC_CN_MASK, localStatus);
-            set->complement();
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaDigit")) == 0) {
-            addCategory(set, U_GC_ND_MASK, localStatus);
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaIdentifierIgnorable")) == 0) {
-            addIdentifierIgnorable(set, localStatus);
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaISOControl")) == 0) {
-            set->add(0, 0x1F).add(0x7F, 0x9F);
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaJavaIdentifierPart")) == 0) {
-            addCategory(set, U_GC_L_MASK, localStatus);
-            addCategory(set, U_GC_SC_MASK, localStatus);
-            addCategory(set, U_GC_PC_MASK, localStatus);
-            addCategory(set, U_GC_ND_MASK, localStatus);
-            addCategory(set, U_GC_NL_MASK, localStatus);
-            addCategory(set, U_GC_MC_MASK, localStatus);
-            addCategory(set, U_GC_MN_MASK, localStatus);
-            addIdentifierIgnorable(set, localStatus);
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaJavaIdentifierStart")) == 0) {
-            addCategory(set, U_GC_L_MASK, localStatus);
-            addCategory(set, U_GC_NL_MASK, localStatus);
-            addCategory(set, U_GC_SC_MASK, localStatus);
-            addCategory(set, U_GC_PC_MASK, localStatus);
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaLetter")) == 0) {
-            addCategory(set, U_GC_L_MASK, localStatus);
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaLetterOrDigit")) == 0) {
-            addCategory(set, U_GC_L_MASK, localStatus);
-            addCategory(set, U_GC_ND_MASK, localStatus);
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaLowerCase")) == 0) {
-            addCategory(set, U_GC_LL_MASK, localStatus);
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaMirrored")) == 0) {
-            set->applyIntPropertyValue(UCHAR_BIDI_MIRRORED, 1, localStatus);
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaSpaceChar")) == 0) {
-            addCategory(set, U_GC_Z_MASK, localStatus);
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaSupplementaryCodePoint")) == 0) {
-            set->add(0x10000, UnicodeSet::MAX_VALUE);
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaTitleCase")) == 0) {
-            addCategory(set, U_GC_LT_MASK, localStatus);
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaUnicodeIdentifierStart")) == 0) {
-            addCategory(set, U_GC_L_MASK, localStatus);
-            addCategory(set, U_GC_NL_MASK, localStatus);
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaUnicodeIdentifierPart")) == 0) {
-            addCategory(set, U_GC_L_MASK, localStatus);
-            addCategory(set, U_GC_PC_MASK, localStatus);
-            addCategory(set, U_GC_ND_MASK, localStatus);
-            addCategory(set, U_GC_NL_MASK, localStatus);
-            addCategory(set, U_GC_MC_MASK, localStatus);
-            addCategory(set, U_GC_MN_MASK, localStatus);
-            addIdentifierIgnorable(set, localStatus);
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaUpperCase")) == 0) {
-            addCategory(set, U_GC_LU_MASK, localStatus);
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaValidCodePoint")) == 0) {
-            set->add(0, UnicodeSet::MAX_VALUE);
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaWhitespace")) == 0) {
-            addCategory(set, U_GC_Z_MASK, localStatus);
-            set->removeAll(UnicodeSet().add(0xa0).add(0x2007).add(0x202f));
-            set->add(9, 0x0d).add(0x1c, 0x1f);
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("all")) == 0) {
-            set->add(0, UnicodeSet::MAX_VALUE);
+        UnicodeString mPropName = propName;
+        if (mPropName.startsWith(u"In", 2) && mPropName.length() >= 3) {
+            status = U_ZERO_ERROR;
+            set.adoptInsteadAndCheckErrorCode(new UnicodeSet(), status);
+            if (U_FAILURE(status)) {
+                break;
+            }
+            UnicodeString blockName(mPropName, 2);  // Property with the leading "In" removed.
+            set->applyPropertyAlias(UnicodeString(u"Block"), blockName, status);
+            break;
         }
 
-        if (U_SUCCESS(localStatus) && !set->isEmpty()) {
-            *fStatus = U_ZERO_ERROR;
-            if (usetFlags & USET_CASE_INSENSITIVE) {
+        //  Check for the Java form "IsBooleanPropertyValue", which we will recast
+        //  as "BooleanPropertyValue". The property value can be either a
+        //  a General Category or a Script Name.
+
+        if (propName.startsWith(u"Is", 2) && propName.length()>=3) {
+            mPropName.remove(0, 2);      // Strip the "Is"
+            if (mPropName.indexOf(u'=') >= 0) {
+                // Reject any "Is..." property expression containing an '=', that is,
+                // any non-binary property expression.
+                status = U_REGEX_PROPERTY_SYNTAX;
+                break;
+            }
+
+            if (mPropName.caseCompare(u"assigned", -1, 0) == 0) {
+                mPropName.setTo(u"unassigned", -1);
+                negated = !negated;
+            } else if (mPropName.caseCompare(u"TitleCase", -1, 0) == 0) {
+                mPropName.setTo(u"Titlecase_Letter", -1);
+            }
+
+            mPropName.insert(0, u"[\\p{", -1);
+            mPropName.append(u"}]", -1);
+            set.adoptInsteadAndCheckErrorCode(new UnicodeSet(mPropName, *fStatus), status);
+
+            if (U_SUCCESS(status) && !set->isEmpty() && (usetFlags & USET_CASE_INSENSITIVE)) {
                 set->closeOver(USET_CASE_INSENSITIVE);
             }
-            if (negated) {
+            break;
+
+        }
+
+        if (propName.startsWith(u"java", -1)) {
+            status = U_ZERO_ERROR;
+            set.adoptInsteadAndCheckErrorCode(new UnicodeSet(), status);
+            if (U_FAILURE(status)) {
+                break;
+            }
+            //
+            //  Try the various Java specific properties.
+            //   These all begin with "java"
+            //
+            if (propName.compare(u"javaDefined", -1) == 0) {
+                addCategory(set.getAlias(), U_GC_CN_MASK, status);
                 set->complement();
             }
-            return set;
+            else if (propName.compare(u"javaDigit", -1) == 0) {
+                addCategory(set.getAlias(), U_GC_ND_MASK, status);
+            }
+            else if (propName.compare(u"javaIdentifierIgnorable", -1) == 0) {
+                addIdentifierIgnorable(set.getAlias(), status);
+            }
+            else if (propName.compare(u"javaISOControl", -1) == 0) {
+                set->add(0, 0x1F).add(0x7F, 0x9F);
+            }
+            else if (propName.compare(u"javaJavaIdentifierPart", -1) == 0) {
+                addCategory(set.getAlias(), U_GC_L_MASK, status);
+                addCategory(set.getAlias(), U_GC_SC_MASK, status);
+                addCategory(set.getAlias(), U_GC_PC_MASK, status);
+                addCategory(set.getAlias(), U_GC_ND_MASK, status);
+                addCategory(set.getAlias(), U_GC_NL_MASK, status);
+                addCategory(set.getAlias(), U_GC_MC_MASK, status);
+                addCategory(set.getAlias(), U_GC_MN_MASK, status);
+                addIdentifierIgnorable(set.getAlias(), status);
+            }
+            else if (propName.compare(u"javaJavaIdentifierStart", -1) == 0) {
+                addCategory(set.getAlias(), U_GC_L_MASK, status);
+                addCategory(set.getAlias(), U_GC_NL_MASK, status);
+                addCategory(set.getAlias(), U_GC_SC_MASK, status);
+                addCategory(set.getAlias(), U_GC_PC_MASK, status);
+            }
+            else if (propName.compare(u"javaLetter", -1) == 0) {
+                addCategory(set.getAlias(), U_GC_L_MASK, status);
+            }
+            else if (propName.compare(u"javaLetterOrDigit", -1) == 0) {
+                addCategory(set.getAlias(), U_GC_L_MASK, status);
+                addCategory(set.getAlias(), U_GC_ND_MASK, status);
+            }
+            else if (propName.compare(u"javaLowerCase", -1) == 0) {
+                addCategory(set.getAlias(), U_GC_LL_MASK, status);
+            }
+            else if (propName.compare(u"javaMirrored", -1) == 0) {
+                set->applyIntPropertyValue(UCHAR_BIDI_MIRRORED, 1, status);
+            }
+            else if (propName.compare(u"javaSpaceChar", -1) == 0) {
+                addCategory(set.getAlias(), U_GC_Z_MASK, status);
+            }
+            else if (propName.compare(u"javaSupplementaryCodePoint", -1) == 0) {
+                set->add(0x10000, UnicodeSet::MAX_VALUE);
+            }
+            else if (propName.compare(u"javaTitleCase", -1) == 0) {
+                addCategory(set.getAlias(), U_GC_LT_MASK, status);
+            }
+            else if (propName.compare(u"javaUnicodeIdentifierStart", -1) == 0) {
+                addCategory(set.getAlias(), U_GC_L_MASK, status);
+                addCategory(set.getAlias(), U_GC_NL_MASK, status);
+            }
+            else if (propName.compare(u"javaUnicodeIdentifierPart", -1) == 0) {
+                addCategory(set.getAlias(), U_GC_L_MASK, status);
+                addCategory(set.getAlias(), U_GC_PC_MASK, status);
+                addCategory(set.getAlias(), U_GC_ND_MASK, status);
+                addCategory(set.getAlias(), U_GC_NL_MASK, status);
+                addCategory(set.getAlias(), U_GC_MC_MASK, status);
+                addCategory(set.getAlias(), U_GC_MN_MASK, status);
+                addIdentifierIgnorable(set.getAlias(), status);
+            }
+            else if (propName.compare(u"javaUpperCase", -1) == 0) {
+                addCategory(set.getAlias(), U_GC_LU_MASK, status);
+            }
+            else if (propName.compare(u"javaValidCodePoint", -1) == 0) {
+                set->add(0, UnicodeSet::MAX_VALUE);
+            }
+            else if (propName.compare(u"javaWhitespace", -1) == 0) {
+                addCategory(set.getAlias(), U_GC_Z_MASK, status);
+                set->removeAll(UnicodeSet().add(0xa0).add(0x2007).add(0x202f));
+                set->add(9, 0x0d).add(0x1c, 0x1f);
+            } else {
+                status = U_REGEX_PROPERTY_SYNTAX;
+            }
+
+            if (U_SUCCESS(status) && !set->isEmpty() && (usetFlags & USET_CASE_INSENSITIVE)) {
+                set->closeOver(USET_CASE_INSENSITIVE);
+            }
+            break;
+        }
+
+        // Unrecognized property. ICU didn't like it as it was, and none of the Java compatibility
+        // extensions matched it.
+        status = U_REGEX_PROPERTY_SYNTAX;
+    } while (false);   // End of do loop block. Code above breaks out of the block on success or hard failure.
+
+    if (U_SUCCESS(status)) {
+        U_ASSERT(set.isValid());
+        if (negated) {
+            set->complement();
         }
-        delete set;
-        set = NULL;
+        return set.orphan();
+    } else {
+        if (status == U_ILLEGAL_ARGUMENT_ERROR) {
+            status = U_REGEX_PROPERTY_SYNTAX;
+        }
+        error(status);
+        return nullptr;
     }
-    error(*fStatus);
-    return NULL;
 }
 
 
-
 //
 //  SetEval   Part of the evaluation of [set expressions].
 //            Perform any pending (stacked) operations with precedence