diff options
Diffstat (limited to 'deps/v8/src/regexp/regexp-compiler.cc')
-rw-r--r-- | deps/v8/src/regexp/regexp-compiler.cc | 21 |
1 files changed, 17 insertions, 4 deletions
diff --git a/deps/v8/src/regexp/regexp-compiler.cc b/deps/v8/src/regexp/regexp-compiler.cc index 85da69f308..d141f3c490 100644 --- a/deps/v8/src/regexp/regexp-compiler.cc +++ b/deps/v8/src/regexp/regexp-compiler.cc @@ -725,6 +725,11 @@ static int GetCaseIndependentLetters(Isolate* isolate, uc16 character, unibrow::uchar* letters, int letter_length) { #ifdef V8_INTL_SUPPORT + // Special case for U+017F which has upper case in ASCII range. + if (character == 0x017f) { + letters[0] = character; + return 1; + } icu::UnicodeSet set; set.add(character); set = set.closeOver(USET_CASE_INSENSITIVE); @@ -734,10 +739,18 @@ static int GetCaseIndependentLetters(Isolate* isolate, uc16 character, UChar32 start = set.getRangeStart(i); UChar32 end = set.getRangeEnd(i); CHECK(end - start + items <= letter_length); - while (start <= end) { - if (one_byte_subject && start > String::kMaxOneByteCharCode) break; - letters[items++] = (unibrow::uchar)(start); - start++; + // Only add to the output if character is not in ASCII range + // or the case equivalent character is in ASCII range. + // #sec-runtime-semantics-canonicalize-ch + // 3.g If the numeric value of ch ≥ 128 and the numeric value of cu < 128, + // return ch. + if (!((start >= 128) && (character < 128))) { + // No range have start and end span across code point 128. + DCHECK((start >= 128) == (end >= 128)); + for (UChar32 cu = start; cu <= end; cu++) { + if (one_byte_subject && cu > String::kMaxOneByteCharCode) break; + letters[items++] = (unibrow::uchar)(cu); + } } } return items; |