summaryrefslogtreecommitdiff
path: root/deps/v8/src/regexp/regexp-compiler.cc
diff options
context:
space:
mode:
Diffstat (limited to 'deps/v8/src/regexp/regexp-compiler.cc')
-rw-r--r--deps/v8/src/regexp/regexp-compiler.cc21
1 files changed, 17 insertions, 4 deletions
diff --git a/deps/v8/src/regexp/regexp-compiler.cc b/deps/v8/src/regexp/regexp-compiler.cc
index 85da69f308..d141f3c490 100644
--- a/deps/v8/src/regexp/regexp-compiler.cc
+++ b/deps/v8/src/regexp/regexp-compiler.cc
@@ -725,6 +725,11 @@ static int GetCaseIndependentLetters(Isolate* isolate, uc16 character,
unibrow::uchar* letters,
int letter_length) {
#ifdef V8_INTL_SUPPORT
+ // Special case for U+017F which has upper case in ASCII range.
+ if (character == 0x017f) {
+ letters[0] = character;
+ return 1;
+ }
icu::UnicodeSet set;
set.add(character);
set = set.closeOver(USET_CASE_INSENSITIVE);
@@ -734,10 +739,18 @@ static int GetCaseIndependentLetters(Isolate* isolate, uc16 character,
UChar32 start = set.getRangeStart(i);
UChar32 end = set.getRangeEnd(i);
CHECK(end - start + items <= letter_length);
- while (start <= end) {
- if (one_byte_subject && start > String::kMaxOneByteCharCode) break;
- letters[items++] = (unibrow::uchar)(start);
- start++;
+ // Only add to the output if character is not in ASCII range
+ // or the case equivalent character is in ASCII range.
+ // #sec-runtime-semantics-canonicalize-ch
+ // 3.g If the numeric value of ch ≥ 128 and the numeric value of cu < 128,
+ // return ch.
+ if (!((start >= 128) && (character < 128))) {
+ // No range have start and end span across code point 128.
+ DCHECK((start >= 128) == (end >= 128));
+ for (UChar32 cu = start; cu <= end; cu++) {
+ if (one_byte_subject && cu > String::kMaxOneByteCharCode) break;
+ letters[items++] = (unibrow::uchar)(cu);
+ }
}
}
return items;