aboutsummaryrefslogtreecommitdiff
path: root/deps/v8/src/regexp
diff options
context:
space:
mode:
authorAli Ijaz Sheikh <ofrobots@google.com>2016-03-01 08:58:05 -0800
committerAli Sheikh <ofrobots@lemonhope.roam.corp.google.com>2016-03-03 20:35:20 -0800
commit069e02ab47656b3efd1b6829c65856b2e1c2d1db (patch)
treeeb643e0a2e88fd64bb9fc927423458d2ae96c2db /deps/v8/src/regexp
parent8938355398c79f583a468284b768652d12ba9bc9 (diff)
downloadandroid-node-v8-069e02ab47656b3efd1b6829c65856b2e1c2d1db.tar.gz
android-node-v8-069e02ab47656b3efd1b6829c65856b2e1c2d1db.tar.bz2
android-node-v8-069e02ab47656b3efd1b6829c65856b2e1c2d1db.zip
deps: upgrade to V8 4.9.385.18
Pick up the current branch head for V8 4.9 https://github.com/v8/v8/commit/1ecba0f PR-URL: https://github.com/nodejs/node/pull/4722 Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl> Reviewed-By: Michaƫl Zasso <mic.besace@gmail.com>
Diffstat (limited to 'deps/v8/src/regexp')
-rw-r--r--deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc145
-rw-r--r--deps/v8/src/regexp/arm/regexp-macro-assembler-arm.h10
-rw-r--r--deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc129
-rw-r--r--deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.h8
-rw-r--r--deps/v8/src/regexp/bytecodes-irregexp.h20
-rw-r--r--deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc135
-rw-r--r--deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.h10
-rw-r--r--deps/v8/src/regexp/interpreter-irregexp.cc77
-rw-r--r--deps/v8/src/regexp/jsregexp.cc269
-rw-r--r--deps/v8/src/regexp/jsregexp.h192
-rw-r--r--deps/v8/src/regexp/mips/OWNERS1
-rw-r--r--deps/v8/src/regexp/mips/regexp-macro-assembler-mips.cc137
-rw-r--r--deps/v8/src/regexp/mips/regexp-macro-assembler-mips.h10
-rw-r--r--deps/v8/src/regexp/mips64/OWNERS1
-rw-r--r--deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc127
-rw-r--r--deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.h14
-rw-r--r--deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc130
-rw-r--r--deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.h10
-rw-r--r--deps/v8/src/regexp/regexp-ast.cc337
-rw-r--r--deps/v8/src/regexp/regexp-ast.h496
-rw-r--r--deps/v8/src/regexp/regexp-macro-assembler-irregexp-inl.h2
-rw-r--r--deps/v8/src/regexp/regexp-macro-assembler-irregexp.cc18
-rw-r--r--deps/v8/src/regexp/regexp-macro-assembler-irregexp.h6
-rw-r--r--deps/v8/src/regexp/regexp-macro-assembler-tracer.cc42
-rw-r--r--deps/v8/src/regexp/regexp-macro-assembler-tracer.h7
-rw-r--r--deps/v8/src/regexp/regexp-macro-assembler.cc14
-rw-r--r--deps/v8/src/regexp/regexp-macro-assembler.h18
-rw-r--r--deps/v8/src/regexp/regexp-parser.cc1180
-rw-r--r--deps/v8/src/regexp/regexp-parser.h277
-rw-r--r--deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc137
-rw-r--r--deps/v8/src/regexp/x64/regexp-macro-assembler-x64.h10
-rw-r--r--deps/v8/src/regexp/x87/regexp-macro-assembler-x87.cc135
-rw-r--r--deps/v8/src/regexp/x87/regexp-macro-assembler-x87.h10
33 files changed, 3312 insertions, 802 deletions
diff --git a/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc b/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc
index d296d90e7d..6fafdfb4ad 100644
--- a/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc
+++ b/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc
@@ -58,7 +58,7 @@ namespace internal {
* - fp[-16] void* input_string (location of a handle containing the string).
* - fp[-20] success counter (only for global regexps to count matches).
* - fp[-24] Offset of location before start of input (effectively character
- * position -1). Used to initialize capture registers to a
+ * string start - 1). Used to initialize capture registers to a
* non-position.
* - fp[-28] At start (if 1, we are starting at the start of the
* string, otherwise 0)
@@ -98,7 +98,8 @@ RegExpMacroAssemblerARM::RegExpMacroAssemblerARM(Isolate* isolate, Zone* zone,
Mode mode,
int registers_to_save)
: NativeRegExpMacroAssembler(isolate, zone),
- masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize)),
+ masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize,
+ CodeObjectRequired::kYes)),
mode_(mode),
num_registers_(registers_to_save),
num_saved_registers_(registers_to_save),
@@ -176,29 +177,18 @@ void RegExpMacroAssemblerARM::CheckCharacterGT(uc16 limit, Label* on_greater) {
void RegExpMacroAssemblerARM::CheckAtStart(Label* on_at_start) {
- Label not_at_start;
- // Did we start the match at the start of the string at all?
- __ ldr(r0, MemOperand(frame_pointer(), kStartIndex));
- __ cmp(r0, Operand::Zero());
- BranchOrBacktrack(ne, &not_at_start);
-
- // If we did, are we still at the start of the input?
- __ ldr(r1, MemOperand(frame_pointer(), kInputStart));
- __ add(r0, end_of_input_address(), Operand(current_input_offset()));
+ __ ldr(r1, MemOperand(frame_pointer(), kStringStartMinusOne));
+ __ add(r0, current_input_offset(), Operand(-char_size()));
__ cmp(r0, r1);
BranchOrBacktrack(eq, on_at_start);
- __ bind(&not_at_start);
}
-void RegExpMacroAssemblerARM::CheckNotAtStart(Label* on_not_at_start) {
- // Did we start the match at the start of the string at all?
- __ ldr(r0, MemOperand(frame_pointer(), kStartIndex));
- __ cmp(r0, Operand::Zero());
- BranchOrBacktrack(ne, on_not_at_start);
- // If we did, are we still at the start of the input?
- __ ldr(r1, MemOperand(frame_pointer(), kInputStart));
- __ add(r0, end_of_input_address(), Operand(current_input_offset()));
+void RegExpMacroAssemblerARM::CheckNotAtStart(int cp_offset,
+ Label* on_not_at_start) {
+ __ ldr(r1, MemOperand(frame_pointer(), kStringStartMinusOne));
+ __ add(r0, current_input_offset(),
+ Operand(-char_size() + cp_offset * char_size()));
__ cmp(r0, r1);
BranchOrBacktrack(ne, on_not_at_start);
}
@@ -220,20 +210,27 @@ void RegExpMacroAssemblerARM::CheckGreedyLoop(Label* on_equal) {
void RegExpMacroAssemblerARM::CheckNotBackReferenceIgnoreCase(
- int start_reg,
- Label* on_no_match) {
+ int start_reg, bool read_backward, Label* on_no_match) {
Label fallthrough;
__ ldr(r0, register_location(start_reg)); // Index of start of capture
__ ldr(r1, register_location(start_reg + 1)); // Index of end of capture
__ sub(r1, r1, r0, SetCC); // Length of capture.
- // If length is zero, either the capture is empty or it is not participating.
- // In either case succeed immediately.
+ // At this point, the capture registers are either both set or both cleared.
+ // If the capture length is zero, then the capture is either empty or cleared.
+ // Fall through in both cases.
__ b(eq, &fallthrough);
// Check that there are enough characters left in the input.
- __ cmn(r1, Operand(current_input_offset()));
- BranchOrBacktrack(gt, on_no_match);
+ if (read_backward) {
+ __ ldr(r3, MemOperand(frame_pointer(), kStringStartMinusOne));
+ __ add(r3, r3, r1);
+ __ cmp(current_input_offset(), r3);
+ BranchOrBacktrack(le, on_no_match);
+ } else {
+ __ cmn(r1, Operand(current_input_offset()));
+ BranchOrBacktrack(gt, on_no_match);
+ }
if (mode_ == LATIN1) {
Label success;
@@ -242,9 +239,12 @@ void RegExpMacroAssemblerARM::CheckNotBackReferenceIgnoreCase(
// r0 - offset of start of capture
// r1 - length of capture
- __ add(r0, r0, Operand(end_of_input_address()));
- __ add(r2, end_of_input_address(), Operand(current_input_offset()));
- __ add(r1, r0, Operand(r1));
+ __ add(r0, r0, end_of_input_address());
+ __ add(r2, end_of_input_address(), current_input_offset());
+ if (read_backward) {
+ __ sub(r2, r2, r1); // Offset by length when matching backwards.
+ }
+ __ add(r1, r0, r1);
// r0 - Address of start of capture.
// r1 - Address of end of capture
@@ -283,6 +283,12 @@ void RegExpMacroAssemblerARM::CheckNotBackReferenceIgnoreCase(
__ bind(&success);
// Compute new value of character position after the matched part.
__ sub(current_input_offset(), r2, end_of_input_address());
+ if (read_backward) {
+ __ ldr(r0, register_location(start_reg)); // Index of start of capture
+ __ ldr(r1, register_location(start_reg + 1)); // Index of end of capture
+ __ add(current_input_offset(), current_input_offset(), r0);
+ __ sub(current_input_offset(), current_input_offset(), r1);
+ }
} else {
DCHECK(mode_ == UC16);
int argument_count = 4;
@@ -305,7 +311,10 @@ void RegExpMacroAssemblerARM::CheckNotBackReferenceIgnoreCase(
// Save length in callee-save register for use on return.
__ mov(r4, Operand(r1));
// Address of current input position.
- __ add(r1, current_input_offset(), Operand(end_of_input_address()));
+ __ add(r1, current_input_offset(), end_of_input_address());
+ if (read_backward) {
+ __ sub(r1, r1, r4);
+ }
// Isolate.
__ mov(r3, Operand(ExternalReference::isolate_address(isolate())));
@@ -319,17 +328,22 @@ void RegExpMacroAssemblerARM::CheckNotBackReferenceIgnoreCase(
// Check if function returned non-zero for success or zero for failure.
__ cmp(r0, Operand::Zero());
BranchOrBacktrack(eq, on_no_match);
- // On success, increment position by length of capture.
- __ add(current_input_offset(), current_input_offset(), Operand(r4));
+
+ // On success, advance position by length of capture.
+ if (read_backward) {
+ __ sub(current_input_offset(), current_input_offset(), r4);
+ } else {
+ __ add(current_input_offset(), current_input_offset(), r4);
+ }
}
__ bind(&fallthrough);
}
-void RegExpMacroAssemblerARM::CheckNotBackReference(
- int start_reg,
- Label* on_no_match) {
+void RegExpMacroAssemblerARM::CheckNotBackReference(int start_reg,
+ bool read_backward,
+ Label* on_no_match) {
Label fallthrough;
Label success;
@@ -337,17 +351,31 @@ void RegExpMacroAssemblerARM::CheckNotBackReference(
__ ldr(r0, register_location(start_reg));
__ ldr(r1, register_location(start_reg + 1));
__ sub(r1, r1, r0, SetCC); // Length to check.
- // Succeed on empty capture (including no capture).
+
+ // At this point, the capture registers are either both set or both cleared.
+ // If the capture length is zero, then the capture is either empty or cleared.
+ // Fall through in both cases.
__ b(eq, &fallthrough);
// Check that there are enough characters left in the input.
- __ cmn(r1, Operand(current_input_offset()));
- BranchOrBacktrack(gt, on_no_match);
+ if (read_backward) {
+ __ ldr(r3, MemOperand(frame_pointer(), kStringStartMinusOne));
+ __ add(r3, r3, r1);
+ __ cmp(current_input_offset(), r3);
+ BranchOrBacktrack(lt, on_no_match);
+ } else {
+ __ cmn(r1, Operand(current_input_offset()));
+ BranchOrBacktrack(gt, on_no_match);
+ }
- // Compute pointers to match string and capture string
- __ add(r0, r0, Operand(end_of_input_address()));
- __ add(r2, end_of_input_address(), Operand(current_input_offset()));
- __ add(r1, r1, Operand(r0));
+ // r0 - offset of start of capture
+ // r1 - length of capture
+ __ add(r0, r0, end_of_input_address());
+ __ add(r2, end_of_input_address(), current_input_offset());
+ if (read_backward) {
+ __ sub(r2, r2, r1); // Offset by length when matching backwards.
+ }
+ __ add(r1, r0, r1);
Label loop;
__ bind(&loop);
@@ -366,6 +394,13 @@ void RegExpMacroAssemblerARM::CheckNotBackReference(
// Move current character position to position after match.
__ sub(current_input_offset(), r2, end_of_input_address());
+ if (read_backward) {
+ __ ldr(r0, register_location(start_reg)); // Index of start of capture
+ __ ldr(r1, register_location(start_reg + 1)); // Index of end of capture
+ __ add(current_input_offset(), current_input_offset(), r0);
+ __ sub(current_input_offset(), current_input_offset(), r1);
+ }
+
__ bind(&fallthrough);
}
@@ -603,7 +638,7 @@ Handle<HeapObject> RegExpMacroAssemblerARM::GetCode(Handle<String> source) {
__ add(frame_pointer(), sp, Operand(4 * kPointerSize));
__ mov(r0, Operand::Zero());
__ push(r0); // Make room for success counter and initialize it to 0.
- __ push(r0); // Make room for "position - 1" constant (value is irrelevant).
+ __ push(r0); // Make room for "string start - 1" constant.
// Check if we have space on the stack for registers.
Label stack_limit_hit;
Label stack_ok;
@@ -647,7 +682,7 @@ Handle<HeapObject> RegExpMacroAssemblerARM::GetCode(Handle<String> source) {
__ sub(r0, r0, Operand(r1, LSL, (mode_ == UC16) ? 1 : 0));
// Store this value in a local variable, for use when clearing
// position registers.
- __ str(r0, MemOperand(frame_pointer(), kInputStartMinusOne));
+ __ str(r0, MemOperand(frame_pointer(), kStringStartMinusOne));
// Initialize code pointer register
__ mov(code_pointer(), Operand(masm_->CodeObject()));
@@ -751,7 +786,7 @@ Handle<HeapObject> RegExpMacroAssemblerARM::GetCode(Handle<String> source) {
__ str(r2, MemOperand(frame_pointer(), kRegisterOutput));
// Prepare r0 to initialize registers with its value in the next run.
- __ ldr(r0, MemOperand(frame_pointer(), kInputStartMinusOne));
+ __ ldr(r0, MemOperand(frame_pointer(), kStringStartMinusOne));
if (global_with_zero_length_check()) {
// Special case for zero-length matches.
@@ -892,10 +927,13 @@ void RegExpMacroAssemblerARM::LoadCurrentCharacter(int cp_offset,
Label* on_end_of_input,
bool check_bounds,
int characters) {
- DCHECK(cp_offset >= -1); // ^ and \b can look behind one character.
DCHECK(cp_offset < (1<<30)); // Be sane! (And ensure negation works)
if (check_bounds) {
- CheckPosition(cp_offset + characters - 1, on_end_of_input);
+ if (cp_offset >= 0) {
+ CheckPosition(cp_offset + characters - 1, on_end_of_input);
+ } else {
+ CheckPosition(cp_offset, on_end_of_input);
+ }
}
LoadCurrentCharacterUnchecked(cp_offset, characters);
}
@@ -983,7 +1021,7 @@ void RegExpMacroAssemblerARM::WriteCurrentPositionToRegister(int reg,
void RegExpMacroAssemblerARM::ClearRegisters(int reg_from, int reg_to) {
DCHECK(reg_from <= reg_to);
- __ ldr(r0, MemOperand(frame_pointer(), kInputStartMinusOne));
+ __ ldr(r0, MemOperand(frame_pointer(), kStringStartMinusOne));
for (int reg = reg_from; reg <= reg_to; reg++) {
__ str(r0, register_location(reg));
}
@@ -1069,8 +1107,15 @@ MemOperand RegExpMacroAssemblerARM::register_location(int register_index) {
void RegExpMacroAssemblerARM::CheckPosition(int cp_offset,
Label* on_outside_input) {
- __ cmp(current_input_offset(), Operand(-cp_offset * char_size()));
- BranchOrBacktrack(ge, on_outside_input);
+ if (cp_offset >= 0) {
+ __ cmp(current_input_offset(), Operand(-cp_offset * char_size()));
+ BranchOrBacktrack(ge, on_outside_input);
+ } else {
+ __ ldr(r1, MemOperand(frame_pointer(), kStringStartMinusOne));
+ __ add(r0, current_input_offset(), Operand(cp_offset * char_size()));
+ __ cmp(r0, r1);
+ BranchOrBacktrack(le, on_outside_input);
+ }
}
diff --git a/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.h b/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.h
index c6cff65635..233a98f761 100644
--- a/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.h
+++ b/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.h
@@ -34,9 +34,11 @@ class RegExpMacroAssemblerARM: public NativeRegExpMacroAssembler {
// A "greedy loop" is a loop that is both greedy and with a simple
// body. It has a particularly simple implementation.
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
- virtual void CheckNotAtStart(Label* on_not_at_start);
- virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
+ virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start);
+ virtual void CheckNotBackReference(int start_reg, bool read_backward,
+ Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
+ bool read_backward,
Label* on_no_match);
virtual void CheckNotCharacter(unsigned c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(unsigned c,
@@ -119,9 +121,9 @@ class RegExpMacroAssemblerARM: public NativeRegExpMacroAssembler {
// When adding local variables remember to push space for them in
// the frame in GetCode.
static const int kSuccessfulCaptures = kInputString - kPointerSize;
- static const int kInputStartMinusOne = kSuccessfulCaptures - kPointerSize;
+ static const int kStringStartMinusOne = kSuccessfulCaptures - kPointerSize;
// First register address. Following registers are below it on the stack.
- static const int kRegisterZero = kInputStartMinusOne - kPointerSize;
+ static const int kRegisterZero = kStringStartMinusOne - kPointerSize;
// Initial size of code buffer.
static const size_t kRegExpCodeSize = 1024;
diff --git a/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc b/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc
index d440879e26..9948597ca0 100644
--- a/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc
+++ b/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc
@@ -113,7 +113,8 @@ RegExpMacroAssemblerARM64::RegExpMacroAssemblerARM64(Isolate* isolate,
Zone* zone, Mode mode,
int registers_to_save)
: NativeRegExpMacroAssembler(isolate, zone),
- masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize)),
+ masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize,
+ CodeObjectRequired::kYes)),
mode_(mode),
num_registers_(registers_to_save),
num_saved_registers_(registers_to_save),
@@ -210,23 +211,17 @@ void RegExpMacroAssemblerARM64::CheckCharacterGT(uc16 limit,
void RegExpMacroAssemblerARM64::CheckAtStart(Label* on_at_start) {
- Label not_at_start;
- // Did we start the match at the start of the input string?
- CompareAndBranchOrBacktrack(start_offset(), 0, ne, &not_at_start);
- // If we did, are we still at the start of the input string?
- __ Add(x10, input_end(), Operand(current_input_offset(), SXTW));
- __ Cmp(x10, input_start());
+ __ Add(w10, current_input_offset(), Operand(-char_size()));
+ __ Cmp(w10, string_start_minus_one());
BranchOrBacktrack(eq, on_at_start);
- __ Bind(&not_at_start);
}
-void RegExpMacroAssemblerARM64::CheckNotAtStart(Label* on_not_at_start) {
- // Did we start the match at the start of the input string?
- CompareAndBranchOrBacktrack(start_offset(), 0, ne, on_not_at_start);
- // If we did, are we still at the start of the input string?
- __ Add(x10, input_end(), Operand(current_input_offset(), SXTW));
- __ Cmp(x10, input_start());
+void RegExpMacroAssemblerARM64::CheckNotAtStart(int cp_offset,
+ Label* on_not_at_start) {
+ __ Add(w10, current_input_offset(),
+ Operand(-char_size() + cp_offset * char_size()));
+ __ Cmp(w10, string_start_minus_one());
BranchOrBacktrack(ne, on_not_at_start);
}
@@ -277,9 +272,9 @@ void RegExpMacroAssemblerARM64::CheckGreedyLoop(Label* on_equal) {
BranchOrBacktrack(eq, on_equal);
}
+
void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase(
- int start_reg,
- Label* on_no_match) {
+ int start_reg, bool read_backward, Label* on_no_match) {
Label fallthrough;
Register capture_start_offset = w10;
@@ -297,12 +292,21 @@ void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase(
__ Ldp(w11, capture_start_offset, capture_location(start_reg, x10));
}
__ Sub(capture_length, w11, capture_start_offset); // Length to check.
- // Succeed on empty capture (including no capture).
- __ Cbz(capture_length, &fallthrough);
+
+ // At this point, the capture registers are either both set or both cleared.
+ // If the capture length is zero, then the capture is either empty or cleared.
+ // Fall through in both cases.
+ __ CompareAndBranch(capture_length, Operand(0), eq, &fallthrough);
// Check that there are enough characters left in the input.
- __ Cmn(capture_length, current_input_offset());
- BranchOrBacktrack(gt, on_no_match);
+ if (read_backward) {
+ __ Add(w12, string_start_minus_one(), capture_length);
+ __ Cmp(current_input_offset(), w12);
+ BranchOrBacktrack(le, on_no_match);
+ } else {
+ __ Cmn(capture_length, current_input_offset());
+ BranchOrBacktrack(gt, on_no_match);
+ }
if (mode_ == LATIN1) {
Label success;
@@ -322,6 +326,11 @@ void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase(
__ Add(current_position_address,
input_end(),
Operand(current_input_offset(), SXTW));
+ if (read_backward) {
+ // Offset by length when matching backwards.
+ __ Sub(current_position_address, current_position_address,
+ Operand(capture_length, SXTW));
+ }
Label loop;
__ Bind(&loop);
@@ -355,6 +364,10 @@ void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase(
__ Bind(&success);
// Compute new value of character position after the matched part.
__ Sub(current_input_offset().X(), current_position_address, input_end());
+ if (read_backward) {
+ __ Sub(current_input_offset().X(), current_input_offset().X(),
+ Operand(capture_length, SXTW));
+ }
if (masm_->emit_debug_code()) {
__ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW));
__ Ccmp(current_input_offset(), 0, NoFlag, eq);
@@ -383,6 +396,9 @@ void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase(
__ Mov(w2, capture_length);
// Address of current input position.
__ Add(x1, input_end(), Operand(current_input_offset(), SXTW));
+ if (read_backward) {
+ __ Sub(x1, x1, Operand(capture_length, SXTW));
+ }
// Isolate.
__ Mov(x3, ExternalReference::isolate_address(isolate()));
@@ -400,16 +416,20 @@ void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase(
__ PopCPURegList(cached_registers);
BranchOrBacktrack(eq, on_no_match);
- // On success, increment position by length of capture.
- __ Add(current_input_offset(), current_input_offset(), capture_length);
+ // On success, advance position by length of capture.
+ if (read_backward) {
+ __ Sub(current_input_offset(), current_input_offset(), capture_length);
+ } else {
+ __ Add(current_input_offset(), current_input_offset(), capture_length);
+ }
}
__ Bind(&fallthrough);
}
-void RegExpMacroAssemblerARM64::CheckNotBackReference(
- int start_reg,
- Label* on_no_match) {
+void RegExpMacroAssemblerARM64::CheckNotBackReference(int start_reg,
+ bool read_backward,
+ Label* on_no_match) {
Label fallthrough;
Register capture_start_address = x12;
@@ -426,12 +446,21 @@ void RegExpMacroAssemblerARM64::CheckNotBackReference(
__ Ldp(w11, w10, capture_location(start_reg, x10));
}
__ Sub(capture_length, w11, w10); // Length to check.
- // Succeed on empty capture (including no capture).
- __ Cbz(capture_length, &fallthrough);
+
+ // At this point, the capture registers are either both set or both cleared.
+ // If the capture length is zero, then the capture is either empty or cleared.
+ // Fall through in both cases.
+ __ CompareAndBranch(capture_length, Operand(0), eq, &fallthrough);
// Check that there are enough characters left in the input.
- __ Cmn(capture_length, current_input_offset());
- BranchOrBacktrack(gt, on_no_match);
+ if (read_backward) {
+ __ Add(w12, string_start_minus_one(), capture_length);
+ __ Cmp(current_input_offset(), w12);
+ BranchOrBacktrack(le, on_no_match);
+ } else {
+ __ Cmn(capture_length, current_input_offset());
+ BranchOrBacktrack(gt, on_no_match);
+ }
// Compute pointers to match string and capture string
__ Add(capture_start_address, input_end(), Operand(w10, SXTW));
@@ -441,6 +470,11 @@ void RegExpMacroAssemblerARM64::CheckNotBackReference(
__ Add(current_position_address,
input_end(),
Operand(current_input_offset(), SXTW));
+ if (read_backward) {
+ // Offset by length when matching backwards.
+ __ Sub(current_position_address, current_position_address,
+ Operand(capture_length, SXTW));
+ }
Label loop;
__ Bind(&loop);
@@ -459,6 +493,11 @@ void RegExpMacroAssemblerARM64::CheckNotBackReference(
// Move current character position to position after match.
__ Sub(current_input_offset().X(), current_position_address, input_end());
+ if (read_backward) {
+ __ Sub(current_input_offset().X(), current_input_offset().X(),
+ Operand(capture_length, SXTW));
+ }
+
if (masm_->emit_debug_code()) {
__ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW));
__ Ccmp(current_input_offset(), 0, NoFlag, eq);
@@ -758,14 +797,13 @@ Handle<HeapObject> RegExpMacroAssemblerARM64::GetCode(Handle<String> source) {
// The non-position value is used as a clearing value for the
// capture registers, it corresponds to the position of the first character
// minus one.
- __ Sub(non_position_value(), current_input_offset(), char_size());
- __ Sub(non_position_value(), non_position_value(),
+ __ Sub(string_start_minus_one(), current_input_offset(), char_size());
+ __ Sub(string_start_minus_one(), string_start_minus_one(),
Operand(start_offset(), LSL, (mode_ == UC16) ? 1 : 0));
// We can store this value twice in an X register for initializing
// on-stack registers later.
- __ Orr(twice_non_position_value(),
- non_position_value().X(),
- Operand(non_position_value().X(), LSL, kWRegSizeInBits));
+ __ Orr(twice_non_position_value(), string_start_minus_one().X(),
+ Operand(string_start_minus_one().X(), LSL, kWRegSizeInBits));
// Initialize code pointer register.
__ Mov(code_pointer(), Operand(masm_->CodeObject()));
@@ -1081,11 +1119,14 @@ void RegExpMacroAssemblerARM64::LoadCurrentCharacter(int cp_offset,
int characters) {
// TODO(pielan): Make sure long strings are caught before this, and not
// just asserted in debug mode.
- DCHECK(cp_offset >= -1); // ^ and \b can look behind one character.
// Be sane! (And ensure that an int32_t can be used to index the string)
DCHECK(cp_offset < (1<<30));
if (check_bounds) {
- CheckPosition(cp_offset + characters - 1, on_end_of_input);
+ if (cp_offset >= 0) {
+ CheckPosition(cp_offset + characters - 1, on_end_of_input);
+ } else {
+ CheckPosition(cp_offset, on_end_of_input);
+ }
}
LoadCurrentCharacterUnchecked(cp_offset, characters);
}
@@ -1210,7 +1251,7 @@ void RegExpMacroAssemblerARM64::ClearRegisters(int reg_from, int reg_to) {
// If the first capture register is cached in a hardware register but not
// aligned on a 64-bit one, we need to clear the first one specifically.
if ((reg_from < kNumCachedRegisters) && ((reg_from % 2) != 0)) {
- StoreRegister(reg_from, non_position_value());
+ StoreRegister(reg_from, string_start_minus_one());
num_registers--;
reg_from++;
}
@@ -1224,7 +1265,7 @@ void RegExpMacroAssemblerARM64::ClearRegisters(int reg_from, int reg_to) {
}
if ((num_registers % 2) == 1) {
- StoreRegister(reg_from, non_position_value());
+ StoreRegister(reg_from, string_start_minus_one());
num_registers--;
reg_from++;
}
@@ -1301,10 +1342,14 @@ int RegExpMacroAssemblerARM64::CheckStackGuardState(
void RegExpMacroAssemblerARM64::CheckPosition(int cp_offset,
Label* on_outside_input) {
- CompareAndBranchOrBacktrack(current_input_offset(),
- -cp_offset * char_size(),
- ge,
- on_outside_input);
+ if (cp_offset >= 0) {
+ CompareAndBranchOrBacktrack(current_input_offset(),
+ -cp_offset * char_size(), ge, on_outside_input);
+ } else {
+ __ Add(w12, current_input_offset(), Operand(cp_offset * char_size()));
+ __ Cmp(w12, string_start_minus_one());
+ BranchOrBacktrack(le, on_outside_input);
+ }
}
diff --git a/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.h b/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.h
index 0dc519580d..d71f063d00 100644
--- a/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.h
+++ b/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.h
@@ -39,9 +39,11 @@ class RegExpMacroAssemblerARM64: public NativeRegExpMacroAssembler {
// A "greedy loop" is a loop that is both greedy and with a simple
// body. It has a particularly simple implementation.
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
- virtual void CheckNotAtStart(Label* on_not_at_start);
- virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
+ virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start);
+ virtual void CheckNotBackReference(int start_reg, bool read_backward,
+ Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
+ bool read_backward,
Label* on_no_match);
virtual void CheckNotCharacter(unsigned c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(unsigned c,
@@ -190,7 +192,7 @@ class RegExpMacroAssemblerARM64: public NativeRegExpMacroAssembler {
Register code_pointer() { return x20; }
// Register holding the value used for clearing capture registers.
- Register non_position_value() { return w24; }
+ Register string_start_minus_one() { return w24; }
// The top 32 bit of this register is used to store this value
// twice. This is used for clearing more than one register at a time.
Register twice_non_position_value() { return x24; }
diff --git a/deps/v8/src/regexp/bytecodes-irregexp.h b/deps/v8/src/regexp/bytecodes-irregexp.h
index d6110a3cb5..2dbfbc0b82 100644
--- a/deps/v8/src/regexp/bytecodes-irregexp.h
+++ b/deps/v8/src/regexp/bytecodes-irregexp.h
@@ -57,15 +57,17 @@ V(CHECK_LT, 35, 8) /* bc8 pad8 uc16 addr32 */ \
V(CHECK_GT, 36, 8) /* bc8 pad8 uc16 addr32 */ \
V(CHECK_NOT_BACK_REF, 37, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_NOT_BACK_REF_NO_CASE, 38, 8) /* bc8 reg_idx24 addr32 */ \
-V(CHECK_NOT_REGS_EQUAL, 39, 12) /* bc8 regidx24 reg_idx32 addr32 */ \
-V(CHECK_REGISTER_LT, 40, 12) /* bc8 reg_idx24 value32 addr32 */ \
-V(CHECK_REGISTER_GE, 41, 12) /* bc8 reg_idx24 value32 addr32 */ \
-V(CHECK_REGISTER_EQ_POS, 42, 8) /* bc8 reg_idx24 addr32 */ \
-V(CHECK_AT_START, 43, 8) /* bc8 pad24 addr32 */ \
-V(CHECK_NOT_AT_START, 44, 8) /* bc8 pad24 addr32 */ \
-V(CHECK_GREEDY, 45, 8) /* bc8 pad24 addr32 */ \
-V(ADVANCE_CP_AND_GOTO, 46, 8) /* bc8 offset24 addr32 */ \
-V(SET_CURRENT_POSITION_FROM_END, 47, 4) /* bc8 idx24 */
+V(CHECK_NOT_BACK_REF_BACKWARD, 39, 8) /* bc8 reg_idx24 addr32 */ \
+V(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD, 40, 8) /* bc8 reg_idx24 addr32 */ \
+V(CHECK_NOT_REGS_EQUAL, 41, 12) /* bc8 regidx24 reg_idx32 addr32 */ \
+V(CHECK_REGISTER_LT, 42, 12) /* bc8 reg_idx24 value32 addr32 */ \
+V(CHECK_REGISTER_GE, 43, 12) /* bc8 reg_idx24 value32 addr32 */ \
+V(CHECK_REGISTER_EQ_POS, 44, 8) /* bc8 reg_idx24 addr32 */ \
+V(CHECK_AT_START, 45, 8) /* bc8 pad24 addr32 */ \
+V(CHECK_NOT_AT_START, 46, 8) /* bc8 offset24 addr32 */ \
+V(CHECK_GREEDY, 47, 8) /* bc8 pad24 addr32 */ \
+V(ADVANCE_CP_AND_GOTO, 48, 8) /* bc8 offset24 addr32 */ \
+V(SET_CURRENT_POSITION_FROM_END, 49, 4) /* bc8 idx24 */
#define DECLARE_BYTECODES(name, code, length) \
static const int BC_##name = code;
diff --git a/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc b/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc
index 9e50a10574..6ef0f5fff6 100644
--- a/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc
+++ b/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc
@@ -53,7 +53,8 @@ namespace internal {
* - backup of caller ebx
* - success counter (only for global regexps to count matches).
* - Offset of location before start of input (effectively character
- * position -1). Used to initialize capture registers to a non-position.
+ * string start - 1). Used to initialize capture registers to a
+ * non-position.
* - register 0 ebp[-4] (only positions must be stored in the first
* - register 1 ebp[-8] num_saved_registers_ registers)
* - ...
@@ -80,7 +81,8 @@ RegExpMacroAssemblerIA32::RegExpMacroAssemblerIA32(Isolate* isolate, Zone* zone,
Mode mode,
int registers_to_save)
: NativeRegExpMacroAssembler(isolate, zone),
- masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize)),
+ masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize,
+ CodeObjectRequired::kYes)),
mode_(mode),
num_registers_(registers_to_save),
num_saved_registers_(registers_to_save),
@@ -156,25 +158,16 @@ void RegExpMacroAssemblerIA32::CheckCharacterGT(uc16 limit, Label* on_greater) {
void RegExpMacroAssemblerIA32::CheckAtStart(Label* on_at_start) {
- Label not_at_start;
- // Did we start the match at the start of the string at all?
- __ cmp(Operand(ebp, kStartIndex), Immediate(0));
- BranchOrBacktrack(not_equal, &not_at_start);
- // If we did, are we still at the start of the input?
- __ lea(eax, Operand(esi, edi, times_1, 0));
- __ cmp(eax, Operand(ebp, kInputStart));
+ __ lea(eax, Operand(edi, -char_size()));
+ __ cmp(eax, Operand(ebp, kStringStartMinusOne));
BranchOrBacktrack(equal, on_at_start);
- __ bind(&not_at_start);
}
-void RegExpMacroAssemblerIA32::CheckNotAtStart(Label* on_not_at_start) {
- // Did we start the match at the start of the string at all?
- __ cmp(Operand(ebp, kStartIndex), Immediate(0));
- BranchOrBacktrack(not_equal, on_not_at_start);
- // If we did, are we still at the start of the input?
- __ lea(eax, Operand(esi, edi, times_1, 0));
- __ cmp(eax, Operand(ebp, kInputStart));
+void RegExpMacroAssemblerIA32::CheckNotAtStart(int cp_offset,
+ Label* on_not_at_start) {
+ __ lea(eax, Operand(edi, -char_size() + cp_offset * char_size()));
+ __ cmp(eax, Operand(ebp, kStringStartMinusOne));
BranchOrBacktrack(not_equal, on_not_at_start);
}
@@ -196,26 +189,28 @@ void RegExpMacroAssemblerIA32::CheckGreedyLoop(Label* on_equal) {
void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase(
- int start_reg,
- Label* on_no_match) {
+ int start_reg, bool read_backward, Label* on_no_match) {
Label fallthrough;
__ mov(edx, register_location(start_reg)); // Index of start of capture
__ mov(ebx, register_location(start_reg + 1)); // Index of end of capture
__ sub(ebx, edx); // Length of capture.
- // The length of a capture should not be negative. This can only happen
- // if the end of the capture is unrecorded, or at a point earlier than
- // the start of the capture.
- BranchOrBacktrack(less, on_no_match);
-
- // If length is zero, either the capture is empty or it is completely
- // uncaptured. In either case succeed immediately.
+ // At this point, the capture registers are either both set or both cleared.
+ // If the capture length is zero, then the capture is either empty or cleared.
+ // Fall through in both cases.
__ j(equal, &fallthrough);
// Check that there are sufficient characters left in the input.
- __ mov(eax, edi);
- __ add(eax, ebx);
- BranchOrBacktrack(greater, on_no_match);
+ if (read_backward) {
+ __ mov(eax, Operand(ebp, kStringStartMinusOne));
+ __ add(eax, ebx);
+ __ cmp(edi, eax);
+ BranchOrBacktrack(less_equal, on_no_match);
+ } else {
+ __ mov(eax, edi);
+ __ add(eax, ebx);
+ BranchOrBacktrack(greater, on_no_match);
+ }
if (mode_ == LATIN1) {
Label success;
@@ -228,6 +223,9 @@ void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase(
__ add(edx, esi); // Start of capture
__ add(edi, esi); // Start of text to match against capture.
+ if (read_backward) {
+ __ sub(edi, ebx); // Offset by length when matching backwards.
+ }
__ add(ebx, edi); // End of text to match against capture.
Label loop;
@@ -278,6 +276,11 @@ void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase(
__ add(esp, Immediate(kPointerSize));
// Compute new value of character position after the matched part.
__ sub(edi, esi);
+ if (read_backward) {
+ // Subtract match length if we matched backward.
+ __ add(edi, register_location(start_reg));
+ __ sub(edi, register_location(start_reg + 1));
+ }
} else {
DCHECK(mode_ == UC16);
// Save registers before calling C function.
@@ -304,6 +307,9 @@ void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase(
// Found by adding negative string-end offset of current position (edi)
// to end of string.
__ add(edi, esi);
+ if (read_backward) {
+ __ sub(edi, ebx); // Offset by length when matching backwards.
+ }
__ mov(Operand(esp, 1 * kPointerSize), edi);
// Set byte_offset1.
// Start of capture, where edx already holds string-end negative offset.
@@ -325,16 +331,20 @@ void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase(
// Check if function returned non-zero for success or zero for failure.
__ or_(eax, eax);
BranchOrBacktrack(zero, on_no_match);
- // On success, increment position by length of capture.
- __ add(edi, ebx);
+ // On success, advance position by length of capture.
+ if (read_backward) {
+ __ sub(edi, ebx);
+ } else {
+ __ add(edi, ebx);
+ }
}
__ bind(&fallthrough);
}
-void RegExpMacroAssemblerIA32::CheckNotBackReference(
- int start_reg,
- Label* on_no_match) {
+void RegExpMacroAssemblerIA32::CheckNotBackReference(int start_reg,
+ bool read_backward,
+ Label* on_no_match) {
Label fallthrough;
Label success;
Label fail;
@@ -343,22 +353,33 @@ void RegExpMacroAssemblerIA32::CheckNotBackReference(
__ mov(edx, register_location(start_reg));
__ mov(eax, register_location(start_reg + 1));
__ sub(eax, edx); // Length to check.
- // Fail on partial or illegal capture (start of capture after end of capture).
- BranchOrBacktrack(less, on_no_match);
- // Succeed on empty capture (including no capture)
+
+ // At this point, the capture registers are either both set or both cleared.
+ // If the capture length is zero, then the capture is either empty or cleared.
+ // Fall through in both cases.
__ j(equal, &fallthrough);
// Check that there are sufficient characters left in the input.
- __ mov(ebx, edi);
- __ add(ebx, eax);
- BranchOrBacktrack(greater, on_no_match);
+ if (read_backward) {
+ __ mov(ebx, Operand(ebp, kStringStartMinusOne));
+ __ add(ebx, eax);
+ __ cmp(edi, ebx);
+ BranchOrBacktrack(less_equal, on_no_match);
+ } else {
+ __ mov(ebx, edi);
+ __ add(ebx, eax);
+ BranchOrBacktrack(greater, on_no_match);
+ }
// Save register to make it available below.
__ push(backtrack_stackpointer());
// Compute pointers to match string and capture string
- __ lea(ebx, Operand(esi, edi, times_1, 0)); // Start of match.
__ add(edx, esi); // Start of capture.
+ __ lea(ebx, Operand(esi, edi, times_1, 0)); // Start of match.
+ if (read_backward) {
+ __ sub(ebx, eax); // Offset by length when matching backwards.
+ }
__ lea(ecx, Operand(eax, ebx, times_1, 0)); // End of match
Label loop;
@@ -389,6 +410,11 @@ void RegExpMacroAssemblerIA32::CheckNotBackReference(
// Move current character position to position after match.
__ mov(edi, ecx);
__ sub(edi, esi);
+ if (read_backward) {
+ // Subtract match length if we matched backward.
+ __ add(edi, register_location(start_reg));
+ __ sub(edi, register_location(start_reg + 1));
+ }
// Restore backtrack stackpointer.
__ pop(backtrack_stackpointer());
@@ -634,7 +660,7 @@ Handle<HeapObject> RegExpMacroAssemblerIA32::GetCode(Handle<String> source) {
__ push(edi);
__ push(ebx); // Callee-save on MacOS.
__ push(Immediate(0)); // Number of successful matches in a global regexp.
- __ push(Immediate(0)); // Make room for "input start - 1" constant.
+ __ push(Immediate(0)); // Make room for "string start - 1" constant.
// Check if we have space on the stack for registers.
Label stack_limit_hit;
@@ -684,7 +710,7 @@ Handle<HeapObject> RegExpMacroAssemblerIA32::GetCode(Handle<String> source) {
}
// Store this value in a local variable, for use when clearing
// position registers.
- __ mov(Operand(ebp, kInputStartMinusOne), eax);
+ __ mov(Operand(ebp, kStringStartMinusOne), eax);
#if V8_OS_WIN
// Ensure that we write to each stack page, in order. Skipping a page
@@ -767,7 +793,7 @@ Handle<HeapObject> RegExpMacroAssemblerIA32::GetCode(Handle<String> source) {
}
if (global()) {
- // Restart matching if the regular expression is flagged as global.
+ // Restart matching if the regular expression is flagged as global.
// Increment success counter.
__ inc(Operand(ebp, kSuccessfulCaptures));
// Capture results have been stored, so the number of remaining global
@@ -784,7 +810,7 @@ Handle<HeapObject> RegExpMacroAssemblerIA32::GetCode(Handle<String> source) {
Immediate(num_saved_registers_ * kPointerSize));
// Prepare eax to initialize registers with its value in the next run.
- __ mov(eax, Operand(ebp, kInputStartMinusOne));
+ __ mov(eax, Operand(ebp, kStringStartMinusOne));
if (global_with_zero_length_check()) {
// Special case for zero-length matches.
@@ -944,10 +970,13 @@ void RegExpMacroAssemblerIA32::LoadCurrentCharacter(int cp_offset,
Label* on_end_of_input,
bool check_bounds,
int characters) {
- DCHECK(cp_offset >= -1); // ^ and \b can look behind one character.
DCHECK(cp_offset < (1<<30)); // Be sane! (And ensure negation works)
if (check_bounds) {
- CheckPosition(cp_offset + characters - 1, on_end_of_input);
+ if (cp_offset >= 0) {
+ CheckPosition(cp_offset + characters - 1, on_end_of_input);
+ } else {
+ CheckPosition(cp_offset, on_end_of_input);
+ }
}
LoadCurrentCharacterUnchecked(cp_offset, characters);
}
@@ -1031,7 +1060,7 @@ void RegExpMacroAssemblerIA32::WriteCurrentPositionToRegister(int reg,
void RegExpMacroAssemblerIA32::ClearRegisters(int reg_from, int reg_to) {
DCHECK(reg_from <= reg_to);
- __ mov(eax, Operand(ebp, kInputStartMinusOne));
+ __ mov(eax, Operand(ebp, kStringStartMinusOne));
for (int reg = reg_from; reg <= reg_to; reg++) {
__ mov(register_location(reg), eax);
}
@@ -1100,8 +1129,14 @@ Operand RegExpMacroAssemblerIA32::register_location(int register_index) {
void RegExpMacroAssemblerIA32::CheckPosition(int cp_offset,
Label* on_outside_input) {
- __ cmp(edi, -cp_offset * char_size());
- BranchOrBacktrack(greater_equal, on_outside_input);
+ if (cp_offset >= 0) {
+ __ cmp(edi, -cp_offset * char_size());
+ BranchOrBacktrack(greater_equal, on_outside_input);
+ } else {
+ __ lea(eax, Operand(edi, cp_offset * char_size()));
+ __ cmp(eax, Operand(ebp, kStringStartMinusOne));
+ BranchOrBacktrack(less_equal, on_outside_input);
+ }
}
diff --git a/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.h b/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.h
index 06b9699d01..1ef87eef38 100644
--- a/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.h
+++ b/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.h
@@ -33,9 +33,11 @@ class RegExpMacroAssemblerIA32: public NativeRegExpMacroAssembler {
// A "greedy loop" is a loop that is both greedy and with a simple
// body. It has a particularly simple implementation.
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
- virtual void CheckNotAtStart(Label* on_not_at_start);
- virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
+ virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start);
+ virtual void CheckNotBackReference(int start_reg, bool read_backward,
+ Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
+ bool read_backward,
Label* on_no_match);
virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(uint32_t c,
@@ -116,9 +118,9 @@ class RegExpMacroAssemblerIA32: public NativeRegExpMacroAssembler {
static const int kBackup_edi = kBackup_esi - kPointerSize;
static const int kBackup_ebx = kBackup_edi - kPointerSize;
static const int kSuccessfulCaptures = kBackup_ebx - kPointerSize;
- static const int kInputStartMinusOne = kSuccessfulCaptures - kPointerSize;
+ static const int kStringStartMinusOne = kSuccessfulCaptures - kPointerSize;
// First register address. Following registers are below it on the stack.
- static const int kRegisterZero = kInputStartMinusOne - kPointerSize;
+ static const int kRegisterZero = kStringStartMinusOne - kPointerSize;
// Initial size of code buffer.
static const size_t kRegExpCodeSize = 1024;
diff --git a/deps/v8/src/regexp/interpreter-irregexp.cc b/deps/v8/src/regexp/interpreter-irregexp.cc
index afc31a3d57..ea748e4e55 100644
--- a/deps/v8/src/regexp/interpreter-irregexp.cc
+++ b/deps/v8/src/regexp/interpreter-irregexp.cc
@@ -6,7 +6,7 @@
#include "src/regexp/interpreter-irregexp.h"
-#include "src/ast.h"
+#include "src/ast/ast.h"
#include "src/regexp/bytecodes-irregexp.h"
#include "src/regexp/jsregexp.h"
#include "src/regexp/regexp-macro-assembler.h"
@@ -270,7 +270,7 @@ static RegExpImpl::IrregexpResult RawMatch(Isolate* isolate,
break;
BYTECODE(LOAD_CURRENT_CHAR) {
int pos = current + (insn >> BYTECODE_SHIFT);
- if (pos >= subject.length()) {
+ if (pos >= subject.length() || pos < 0) {
pc = code_base + Load32Aligned(pc + 4);
} else {
current_char = subject[pos];
@@ -286,7 +286,7 @@ static RegExpImpl::IrregexpResult RawMatch(Isolate* isolate,
}
BYTECODE(LOAD_2_CURRENT_CHARS) {
int pos = current + (insn >> BYTECODE_SHIFT);
- if (pos + 2 > subject.length()) {
+ if (pos + 2 > subject.length() || pos < 0) {
pc = code_base + Load32Aligned(pc + 4);
} else {
Char next = subject[pos + 1];
@@ -306,7 +306,7 @@ static RegExpImpl::IrregexpResult RawMatch(Isolate* isolate,
BYTECODE(LOAD_4_CURRENT_CHARS) {
DCHECK(sizeof(Char) == 1);
int pos = current + (insn >> BYTECODE_SHIFT);
- if (pos + 4 > subject.length()) {
+ if (pos + 4 > subject.length() || pos < 0) {
pc = code_base + Load32Aligned(pc + 4);
} else {
Char next1 = subject[pos + 1];
@@ -497,46 +497,59 @@ static RegExpImpl::IrregexpResult RawMatch(Isolate* isolate,
BYTECODE(CHECK_NOT_BACK_REF) {
int from = registers[insn >> BYTECODE_SHIFT];
int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
- if (from < 0 || len <= 0) {
- pc += BC_CHECK_NOT_BACK_REF_LENGTH;
- break;
- }
- if (current + len > subject.length()) {
- pc = code_base + Load32Aligned(pc + 4);
- break;
- } else {
- int i;
- for (i = 0; i < len; i++) {
- if (subject[from + i] != subject[current + i]) {
- pc = code_base + Load32Aligned(pc + 4);
- break;
- }
+ if (from >= 0 && len > 0) {
+ if (current + len > subject.length() ||
+ CompareChars(&subject[from], &subject[current], len) != 0) {
+ pc = code_base + Load32Aligned(pc + 4);
+ break;
}
- if (i < len) break;
current += len;
}
pc += BC_CHECK_NOT_BACK_REF_LENGTH;
break;
}
+ BYTECODE(CHECK_NOT_BACK_REF_BACKWARD) {
+ int from = registers[insn >> BYTECODE_SHIFT];
+ int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
+ if (from >= 0 && len > 0) {
+ if (current - len < 0 ||
+ CompareChars(&subject[from], &subject[current - len], len) != 0) {
+ pc = code_base + Load32Aligned(pc + 4);
+ break;
+ }
+ current -= len;
+ }
+ pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH;
+ break;
+ }
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
int from = registers[insn >> BYTECODE_SHIFT];
int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
- if (from < 0 || len <= 0) {
- pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
- break;
+ if (from >= 0 && len > 0) {
+ if (current + len > subject.length() ||
+ !BackRefMatchesNoCase(isolate->interp_canonicalize_mapping(),
+ from, current, len, subject)) {
+ pc = code_base + Load32Aligned(pc + 4);
+ break;
+ }
+ current += len;
}
- if (current + len > subject.length()) {
- pc = code_base + Load32Aligned(pc + 4);
- break;
- } else {
- if (BackRefMatchesNoCase(isolate->interp_canonicalize_mapping(),
- from, current, len, subject)) {
- current += len;
- pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
- } else {
+ pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
+ break;
+ }
+ BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) {
+ int from = registers[insn >> BYTECODE_SHIFT];
+ int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
+ if (from >= 0 && len > 0) {
+ if (current - len < 0 ||
+ !BackRefMatchesNoCase(isolate->interp_canonicalize_mapping(),
+ from, current - len, len, subject)) {
pc = code_base + Load32Aligned(pc + 4);
+ break;
}
+ current -= len;
}
+ pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
break;
}
BYTECODE(CHECK_AT_START)
@@ -547,7 +560,7 @@ static RegExpImpl::IrregexpResult RawMatch(Isolate* isolate,
}
break;
BYTECODE(CHECK_NOT_AT_START)
- if (current == 0) {
+ if (current + (insn >> BYTECODE_SHIFT) == 0) {
pc += BC_CHECK_NOT_AT_START_LENGTH;
} else {
pc = code_base + Load32Aligned(pc + 4);
diff --git a/deps/v8/src/regexp/jsregexp.cc b/deps/v8/src/regexp/jsregexp.cc
index 225ad73c4e..34d20fe781 100644
--- a/deps/v8/src/regexp/jsregexp.cc
+++ b/deps/v8/src/regexp/jsregexp.cc
@@ -4,7 +4,7 @@
#include "src/regexp/jsregexp.h"
-#include "src/ast.h"
+#include "src/ast/ast.h"
#include "src/base/platform/platform.h"
#include "src/compilation-cache.h"
#include "src/compiler.h"
@@ -13,12 +13,12 @@
#include "src/isolate-inl.h"
#include "src/messages.h"
#include "src/ostreams.h"
-#include "src/parser.h"
#include "src/regexp/interpreter-irregexp.h"
#include "src/regexp/jsregexp-inl.h"
#include "src/regexp/regexp-macro-assembler.h"
#include "src/regexp/regexp-macro-assembler-irregexp.h"
#include "src/regexp/regexp-macro-assembler-tracer.h"
+#include "src/regexp/regexp-parser.h"
#include "src/regexp/regexp-stack.h"
#include "src/runtime/runtime.h"
#include "src/splay-tree-inl.h"
@@ -51,16 +51,6 @@
namespace v8 {
namespace internal {
-MaybeHandle<Object> RegExpImpl::CreateRegExpLiteral(
- Handle<JSFunction> constructor,
- Handle<String> pattern,
- Handle<String> flags) {
- // Call the construct code with 2 arguments.
- Handle<Object> argv[] = { pattern, flags };
- return Execution::New(constructor, arraysize(argv), argv);
-}
-
-
MUST_USE_RESULT
static inline MaybeHandle<Object> ThrowRegExpException(
Handle<JSRegExp> re, Handle<String> pattern, Handle<String> error_text) {
@@ -156,25 +146,21 @@ MaybeHandle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
RegExpCompileData parse_result;
FlatStringReader reader(isolate, pattern);
if (!RegExpParser::ParseRegExp(re->GetIsolate(), &zone, &reader,
- flags.is_multiline(), flags.is_unicode(),
- &parse_result)) {
+ flags & JSRegExp::kMultiline,
+ flags & JSRegExp::kUnicode, &parse_result)) {
// Throw an exception if we fail to parse the pattern.
return ThrowRegExpException(re, pattern, parse_result.error);
}
bool has_been_compiled = false;
- if (parse_result.simple &&
- !flags.is_ignore_case() &&
- !flags.is_sticky() &&
- !HasFewDifferentCharacters(pattern)) {
+ if (parse_result.simple && !(flags & JSRegExp::kIgnoreCase) &&
+ !(flags & JSRegExp::kSticky) && !HasFewDifferentCharacters(pattern)) {
// Parse-tree is a single atom that is equal to the pattern.
AtomCompile(re, pattern, flags, pattern);
has_been_compiled = true;
- } else if (parse_result.tree->IsAtom() &&
- !flags.is_ignore_case() &&
- !flags.is_sticky() &&
- parse_result.capture_count == 0) {
+ } else if (parse_result.tree->IsAtom() && !(flags & JSRegExp::kIgnoreCase) &&
+ !(flags & JSRegExp::kSticky) && parse_result.capture_count == 0) {
RegExpAtom* atom = parse_result.tree->AsAtom();
Vector<const uc16> atom_pattern = atom->data();
Handle<String> atom_string;
@@ -385,17 +371,18 @@ bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re,
pattern = String::Flatten(pattern);
RegExpCompileData compile_data;
FlatStringReader reader(isolate, pattern);
- if (!RegExpParser::ParseRegExp(isolate, &zone, &reader, flags.is_multiline(),
- flags.is_unicode(), &compile_data)) {
+ if (!RegExpParser::ParseRegExp(isolate, &zone, &reader,
+ flags & JSRegExp::kMultiline,
+ flags & JSRegExp::kUnicode, &compile_data)) {
// Throw an exception if we fail to parse the pattern.
// THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once.
USE(ThrowRegExpException(re, pattern, compile_data.error));
return false;
}
RegExpEngine::CompilationResult result = RegExpEngine::Compile(
- isolate, &zone, &compile_data, flags.is_ignore_case(), flags.is_global(),
- flags.is_multiline(), flags.is_sticky(), pattern, sample_subject,
- is_one_byte);
+ isolate, &zone, &compile_data, flags & JSRegExp::kIgnoreCase,
+ flags & JSRegExp::kGlobal, flags & JSRegExp::kMultiline,
+ flags & JSRegExp::kSticky, pattern, sample_subject, is_one_byte);
if (result.error_message != NULL) {
// Unable to compile regexp.
Handle<String> error_message = isolate->factory()->NewStringFromUtf8(
@@ -1002,6 +989,8 @@ class RegExpCompiler {
inline void set_limiting_recursion(bool value) {
limiting_recursion_ = value;
}
+ bool read_backward() { return read_backward_; }
+ void set_read_backward(bool value) { read_backward_ = value; }
FrequencyCollator* frequency_collator() { return &frequency_collator_; }
int current_expansion_factor() { return current_expansion_factor_; }
@@ -1025,6 +1014,7 @@ class RegExpCompiler {
bool reg_exp_too_big_;
bool limiting_recursion_;
bool optimize_;
+ bool read_backward_;
int current_expansion_factor_;
FrequencyCollator frequency_collator_;
Isolate* isolate_;
@@ -1060,6 +1050,7 @@ RegExpCompiler::RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count,
reg_exp_too_big_(false),
limiting_recursion_(false),
optimize_(FLAG_regexp_optimization),
+ read_backward_(false),
current_expansion_factor_(1),
frequency_collator_(),
isolate_(isolate),
@@ -1224,7 +1215,8 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
int value = 0;
bool absolute = false;
bool clear = false;
- int store_position = -1;
+ static const int kNoStore = kMinInt;
+ int store_position = kNoStore;
// This is a little tricky because we are scanning the actions in reverse
// historical order (newest first).
for (DeferredAction* action = actions_;
@@ -1245,7 +1237,7 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
// we can set undo_action to IGNORE if we know there is no value to
// restore.
undo_action = RESTORE;
- DCHECK_EQ(store_position, -1);
+ DCHECK_EQ(store_position, kNoStore);
DCHECK(!clear);
break;
}
@@ -1253,14 +1245,14 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
if (!absolute) {
value++;
}
- DCHECK_EQ(store_position, -1);
+ DCHECK_EQ(store_position, kNoStore);
DCHECK(!clear);
undo_action = RESTORE;
break;
case ActionNode::STORE_POSITION: {
Trace::DeferredCapture* pc =
static_cast<Trace::DeferredCapture*>(action);
- if (!clear && store_position == -1) {
+ if (!clear && store_position == kNoStore) {
store_position = pc->cp_offset();
}
@@ -1284,7 +1276,7 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
// Since we're scanning in reverse order, if we've already
// set the position we have to ignore historically earlier
// clearing operations.
- if (store_position == -1) {
+ if (store_position == kNoStore) {
clear = true;
}
undo_action = RESTORE;
@@ -1315,7 +1307,7 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
}
// Perform the chronologically last action (or accumulated increment)
// for the register.
- if (store_position != -1) {
+ if (store_position != kNoStore) {
assembler->WriteCurrentPositionToRegister(reg, store_position);
} else if (clear) {
assembler->ClearRegisters(reg, reg);
@@ -2313,6 +2305,7 @@ void AssertionNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
int BackReferenceNode::EatsAtLeast(int still_to_find,
int budget,
bool not_at_start) {
+ if (read_backward()) return 0;
if (budget <= 0) return 0;
return on_success()->EatsAtLeast(still_to_find,
budget - 1,
@@ -2323,6 +2316,7 @@ int BackReferenceNode::EatsAtLeast(int still_to_find,
int TextNode::EatsAtLeast(int still_to_find,
int budget,
bool not_at_start) {
+ if (read_backward()) return 0;
int answer = Length();
if (answer >= still_to_find) return answer;
if (budget <= 0) return answer;
@@ -2333,9 +2327,8 @@ int TextNode::EatsAtLeast(int still_to_find,
}
-int NegativeLookaheadChoiceNode::EatsAtLeast(int still_to_find,
- int budget,
- bool not_at_start) {
+int NegativeLookaroundChoiceNode::EatsAtLeast(int still_to_find, int budget,
+ bool not_at_start) {
if (budget <= 0) return 0;
// Alternative 0 is the negative lookahead, alternative 1 is what comes
// afterwards.
@@ -2344,10 +2337,8 @@ int NegativeLookaheadChoiceNode::EatsAtLeast(int still_to_find,
}
-void NegativeLookaheadChoiceNode::GetQuickCheckDetails(
- QuickCheckDetails* details,
- RegExpCompiler* compiler,
- int filled_in,
+void NegativeLookaroundChoiceNode::GetQuickCheckDetails(
+ QuickCheckDetails* details, RegExpCompiler* compiler, int filled_in,
bool not_at_start) {
// Alternative 0 is the negative lookahead, alternative 1 is what comes
// afterwards.
@@ -2517,6 +2508,9 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler,
int characters_filled_in,
bool not_at_start) {
+ // Do not collect any quick check details if the text node reads backward,
+ // since it reads in the opposite direction than we use for quick checks.
+ if (read_backward()) return;
Isolate* isolate = compiler->macro_assembler()->isolate();
DCHECK(characters_filled_in < details->characters());
int characters = details->characters();
@@ -2526,8 +2520,8 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
} else {
char_mask = String::kMaxUtf16CodeUnit;
}
- for (int k = 0; k < elms_->length(); k++) {
- TextElement elm = elms_->at(k);
+ for (int k = 0; k < elements()->length(); k++) {
+ TextElement elm = elements()->at(k);
if (elm.text_type() == TextElement::ATOM) {
Vector<const uc16> quarks = elm.atom()->data();
for (int i = 0; i < characters && i < quarks.length(); i++) {
@@ -2678,11 +2672,13 @@ void QuickCheckDetails::Clear() {
void QuickCheckDetails::Advance(int by, bool one_byte) {
- DCHECK(by >= 0);
- if (by >= characters_) {
+ if (by >= characters_ || by < 0) {
+ DCHECK_IMPLIES(by < 0, characters_ == 0);
Clear();
return;
}
+ DCHECK_LE(characters_ - by, 4);
+ DCHECK_LE(characters_, 4);
for (int i = 0; i < characters_ - by; i++) {
positions_[i] = positions_[by + i];
}
@@ -2780,9 +2776,9 @@ RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) {
if (depth < 0) return this;
DCHECK(!info()->visited);
VisitMarker marker(info());
- int element_count = elms_->length();
+ int element_count = elements()->length();
for (int i = 0; i < element_count; i++) {
- TextElement elm = elms_->at(i);
+ TextElement elm = elements()->at(i);
if (elm.text_type() == TextElement::ATOM) {
Vector<const uc16> quarks = elm.atom()->data();
for (int j = 0; j < quarks.length(); j++) {
@@ -2898,8 +2894,8 @@ RegExpNode* ChoiceNode::FilterOneByte(int depth, bool ignore_case) {
}
-RegExpNode* NegativeLookaheadChoiceNode::FilterOneByte(int depth,
- bool ignore_case) {
+RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(int depth,
+ bool ignore_case) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
if (info()->visited) return this;
@@ -3146,9 +3142,9 @@ void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
return;
}
if (trace->at_start() == Trace::UNKNOWN) {
- assembler->CheckNotAtStart(trace->backtrack());
+ assembler->CheckNotAtStart(trace->cp_offset(), trace->backtrack());
Trace at_start_trace = *trace;
- at_start_trace.set_at_start(true);
+ at_start_trace.set_at_start(Trace::TRUE_VALUE);
on_success()->Emit(compiler, &at_start_trace);
return;
}
@@ -3221,10 +3217,11 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,
bool one_byte = compiler->one_byte();
Label* backtrack = trace->backtrack();
QuickCheckDetails* quick_check = trace->quick_check_performed();
- int element_count = elms_->length();
+ int element_count = elements()->length();
+ int backward_offset = read_backward() ? -Length() : 0;
for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
- TextElement elm = elms_->at(i);
- int cp_offset = trace->cp_offset() + elm.cp_offset();
+ TextElement elm = elements()->at(i);
+ int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset;
if (elm.text_type() == TextElement::ATOM) {
Vector<const uc16> quarks = elm.atom()->data();
for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
@@ -3252,13 +3249,10 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,
break;
}
if (emit_function != NULL) {
- bool bound_checked = emit_function(isolate,
- compiler,
- quarks[j],
- backtrack,
- cp_offset + j,
- *checked_up_to < cp_offset + j,
- preloaded);
+ bool bounds_check = *checked_up_to < cp_offset + j || read_backward();
+ bool bound_checked =
+ emit_function(isolate, compiler, quarks[j], backtrack,
+ cp_offset + j, bounds_check, preloaded);
if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);
}
}
@@ -3268,8 +3262,9 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,
if (first_element_checked && i == 0) continue;
if (DeterminedAlready(quick_check, elm.cp_offset())) continue;
RegExpCharacterClass* cc = elm.char_class();
+ bool bounds_check = *checked_up_to < cp_offset || read_backward();
EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset,
- *checked_up_to < cp_offset, preloaded, zone());
+ bounds_check, preloaded, zone());
UpdateBoundsCheck(cp_offset, checked_up_to);
}
}
@@ -3278,7 +3273,7 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,
int TextNode::Length() {
- TextElement elm = elms_->last();
+ TextElement elm = elements()->last();
DCHECK(elm.cp_offset() >= 0);
return elm.cp_offset() + elm.length();
}
@@ -3347,8 +3342,11 @@ void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {
}
Trace successor_trace(*trace);
- successor_trace.set_at_start(false);
- successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler);
+ // If we advance backward, we may end up at the start.
+ successor_trace.AdvanceCurrentPositionInTrace(
+ read_backward() ? -Length() : Length(), compiler);
+ successor_trace.set_at_start(read_backward() ? Trace::UNKNOWN
+ : Trace::FALSE_VALUE);
RecursionCheck rc(compiler);
on_success()->Emit(compiler, &successor_trace);
}
@@ -3360,7 +3358,6 @@ void Trace::InvalidateCurrentCharacter() {
void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {
- DCHECK(by > 0);
// We don't have an instruction for shifting the current character register
// down or for using a shifted value for anything so lets just forget that
// we preloaded any characters into it.
@@ -3379,9 +3376,9 @@ void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {
void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte) {
- int element_count = elms_->length();
+ int element_count = elements()->length();
for (int i = 0; i < element_count; i++) {
- TextElement elm = elms_->at(i);
+ TextElement elm = elements()->at(i);
if (elm.text_type() == TextElement::CHAR_CLASS) {
RegExpCharacterClass* cc = elm.char_class();
// None of the standard character classes is different in the case
@@ -3397,16 +3394,14 @@ void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte) {
}
-int TextNode::GreedyLoopTextLength() {
- TextElement elm = elms_->at(elms_->length() - 1);
- return elm.cp_offset() + elm.length();
-}
+int TextNode::GreedyLoopTextLength() { return Length(); }
RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(
RegExpCompiler* compiler) {
- if (elms_->length() != 1) return NULL;
- TextElement elm = elms_->at(0);
+ if (read_backward()) return NULL;
+ if (elements()->length() != 1) return NULL;
+ TextElement elm = elements()->at(0);
if (elm.text_type() != TextElement::CHAR_CLASS) return NULL;
RegExpCharacterClass* node = elm.char_class();
ZoneList<CharacterRange>* ranges = node->ranges(zone());
@@ -3450,7 +3445,7 @@ int ChoiceNode::GreedyLoopTextLengthForAlternative(
SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node);
node = seq_node->on_success();
}
- return length;
+ return read_backward() ? -length : length;
}
@@ -3881,7 +3876,7 @@ void BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
GreedyLoopState::GreedyLoopState(bool not_at_start) {
counter_backtrack_trace_.set_backtrack(&label_);
- if (not_at_start) counter_backtrack_trace_.set_at_start(false);
+ if (not_at_start) counter_backtrack_trace_.set_at_start(Trace::FALSE_VALUE);
}
@@ -4008,7 +4003,7 @@ Trace* ChoiceNode::EmitGreedyLoop(RegExpCompiler* compiler,
macro_assembler->PushCurrentPosition();
Label greedy_match_failed;
Trace greedy_match_trace;
- if (not_at_start()) greedy_match_trace.set_at_start(false);
+ if (not_at_start()) greedy_match_trace.set_at_start(Trace::FALSE_VALUE);
greedy_match_trace.set_backtrack(&greedy_match_failed);
Label loop_label;
macro_assembler->Bind(&loop_label);
@@ -4354,11 +4349,14 @@ void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
DCHECK_EQ(start_reg_ + 1, end_reg_);
if (compiler->ignore_case()) {
- assembler->CheckNotBackReferenceIgnoreCase(start_reg_,
+ assembler->CheckNotBackReferenceIgnoreCase(start_reg_, read_backward(),
trace->backtrack());
} else {
- assembler->CheckNotBackReference(start_reg_, trace->backtrack());
+ assembler->CheckNotBackReference(start_reg_, read_backward(),
+ trace->backtrack());
}
+ // We are going to advance backward, so we may end up at the start.
+ if (read_backward()) trace->set_at_start(Trace::UNKNOWN);
on_success()->Emit(compiler, trace);
}
@@ -4719,13 +4717,15 @@ RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,
ZoneList<TextElement>* elms =
new(compiler->zone()) ZoneList<TextElement>(1, compiler->zone());
elms->Add(TextElement::Atom(this), compiler->zone());
- return new(compiler->zone()) TextNode(elms, on_success);
+ return new (compiler->zone())
+ TextNode(elms, compiler->read_backward(), on_success);
}
RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) {
- return new(compiler->zone()) TextNode(elements(), on_success);
+ return new (compiler->zone())
+ TextNode(elements(), compiler->read_backward(), on_success);
}
@@ -4822,7 +4822,8 @@ bool RegExpCharacterClass::is_standard(Zone* zone) {
RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) {
- return new(compiler->zone()) TextNode(this, on_success);
+ return new (compiler->zone())
+ TextNode(this, compiler->read_backward(), on_success);
}
@@ -5204,7 +5205,9 @@ RegExpNode* RegExpQuantifier::ToNode(int min,
GuardedAlternative(body->ToNode(compiler, answer)));
}
answer = alternation;
- if (not_at_start) alternation->set_not_at_start();
+ if (not_at_start && !compiler->read_backward()) {
+ alternation->set_not_at_start();
+ }
}
return answer;
}
@@ -5216,9 +5219,9 @@ RegExpNode* RegExpQuantifier::ToNode(int min,
int reg_ctr = needs_counter
? compiler->AllocateRegister()
: RegExpCompiler::kNoRegister;
- LoopChoiceNode* center = new(zone) LoopChoiceNode(body->min_match() == 0,
- zone);
- if (not_at_start) center->set_not_at_start();
+ LoopChoiceNode* center = new (zone)
+ LoopChoiceNode(body->min_match() == 0, compiler->read_backward(), zone);
+ if (not_at_start && !compiler->read_backward()) center->set_not_at_start();
RegExpNode* loop_return = needs_counter
? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center))
: static_cast<RegExpNode*>(center);
@@ -5294,14 +5297,13 @@ RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
ZoneList<CharacterRange>* newline_ranges =
new(zone) ZoneList<CharacterRange>(3, zone);
CharacterRange::AddClassEscape('n', newline_ranges, zone);
- RegExpCharacterClass* newline_atom = new(zone) RegExpCharacterClass('n');
- TextNode* newline_matcher = new(zone) TextNode(
- newline_atom,
- ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
- position_register,
- 0, // No captures inside.
- -1, // Ignored if no captures.
- on_success));
+ RegExpCharacterClass* newline_atom = new (zone) RegExpCharacterClass('n');
+ TextNode* newline_matcher = new (zone) TextNode(
+ newline_atom, false, ActionNode::PositiveSubmatchSuccess(
+ stack_pointer_register, position_register,
+ 0, // No captures inside.
+ -1, // Ignored if no captures.
+ on_success));
// Create an end-of-input matcher.
RegExpNode* end_of_line = ActionNode::BeginSubmatch(
stack_pointer_register,
@@ -5323,10 +5325,10 @@ RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) {
- return new(compiler->zone())
+ return new (compiler->zone())
BackReferenceNode(RegExpCapture::StartRegister(index()),
RegExpCapture::EndRegister(index()),
- on_success);
+ compiler->read_backward(), on_success);
}
@@ -5336,8 +5338,8 @@ RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
}
-RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler,
- RegExpNode* on_success) {
+RegExpNode* RegExpLookaround::ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success) {
int stack_pointer_register = compiler->AllocateRegister();
int position_register = compiler->AllocateRegister();
@@ -5347,19 +5349,16 @@ RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler,
int register_start =
register_of_first_capture + capture_from_ * registers_per_capture;
- RegExpNode* success;
+ RegExpNode* result;
+ bool was_reading_backward = compiler->read_backward();
+ compiler->set_read_backward(type() == LOOKBEHIND);
if (is_positive()) {
- RegExpNode* node = ActionNode::BeginSubmatch(
- stack_pointer_register,
- position_register,
- body()->ToNode(
- compiler,
- ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
- position_register,
- register_count,
- register_start,
- on_success)));
- return node;
+ result = ActionNode::BeginSubmatch(
+ stack_pointer_register, position_register,
+ body()->ToNode(compiler,
+ ActionNode::PositiveSubmatchSuccess(
+ stack_pointer_register, position_register,
+ register_count, register_start, on_success)));
} else {
// We use a ChoiceNode for a negative lookahead because it has most of
// the characteristics we need. It has the body of the lookahead as its
@@ -5374,21 +5373,16 @@ RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler,
Zone* zone = compiler->zone();
GuardedAlternative body_alt(
- body()->ToNode(
- compiler,
- success = new(zone) NegativeSubmatchSuccess(stack_pointer_register,
- position_register,
- register_count,
- register_start,
- zone)));
- ChoiceNode* choice_node =
- new(zone) NegativeLookaheadChoiceNode(body_alt,
- GuardedAlternative(on_success),
- zone);
- return ActionNode::BeginSubmatch(stack_pointer_register,
- position_register,
- choice_node);
- }
+ body()->ToNode(compiler, new (zone) NegativeSubmatchSuccess(
+ stack_pointer_register, position_register,
+ register_count, register_start, zone)));
+ ChoiceNode* choice_node = new (zone) NegativeLookaroundChoiceNode(
+ body_alt, GuardedAlternative(on_success), zone);
+ result = ActionNode::BeginSubmatch(stack_pointer_register,
+ position_register, choice_node);
+ }
+ compiler->set_read_backward(was_reading_backward);
+ return result;
}
@@ -5402,8 +5396,10 @@ RegExpNode* RegExpCapture::ToNode(RegExpTree* body,
int index,
RegExpCompiler* compiler,
RegExpNode* on_success) {
+ DCHECK_NOT_NULL(body);
int start_reg = RegExpCapture::StartRegister(index);
int end_reg = RegExpCapture::EndRegister(index);
+ if (compiler->read_backward()) std::swap(start_reg, end_reg);
RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success);
RegExpNode* body_node = body->ToNode(compiler, store_end);
return ActionNode::StorePosition(start_reg, true, body_node);
@@ -5414,8 +5410,14 @@ RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) {
ZoneList<RegExpTree*>* children = nodes();
RegExpNode* current = on_success;
- for (int i = children->length() - 1; i >= 0; i--) {
- current = children->at(i)->ToNode(compiler, current);
+ if (compiler->read_backward()) {
+ for (int i = 0; i < children->length(); i++) {
+ current = children->at(i)->ToNode(compiler, current);
+ }
+ } else {
+ for (int i = children->length() - 1; i >= 0; i--) {
+ current = children->at(i)->ToNode(compiler, current);
+ }
}
return current;
}
@@ -6291,22 +6293,17 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(
if (!is_start_anchored && !is_sticky) {
// Add a .*? at the beginning, outside the body capture, unless
// this expression is anchored at the beginning or sticky.
- RegExpNode* loop_node =
- RegExpQuantifier::ToNode(0,
- RegExpTree::kInfinity,
- false,
- new(zone) RegExpCharacterClass('*'),
- &compiler,
- captured_body,
- data->contains_anchor);
+ RegExpNode* loop_node = RegExpQuantifier::ToNode(
+ 0, RegExpTree::kInfinity, false, new (zone) RegExpCharacterClass('*'),
+ &compiler, captured_body, data->contains_anchor);
if (data->contains_anchor) {
// Unroll loop once, to take care of the case that might start
// at the start of input.
ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone);
first_step_node->AddAlternative(GuardedAlternative(captured_body));
- first_step_node->AddAlternative(GuardedAlternative(
- new(zone) TextNode(new(zone) RegExpCharacterClass('*'), loop_node)));
+ first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode(
+ new (zone) RegExpCharacterClass('*'), false, loop_node)));
node = first_step_node;
} else {
node = loop_node;
diff --git a/deps/v8/src/regexp/jsregexp.h b/deps/v8/src/regexp/jsregexp.h
index 537bdff8e2..0ad4b79c87 100644
--- a/deps/v8/src/regexp/jsregexp.h
+++ b/deps/v8/src/regexp/jsregexp.h
@@ -7,6 +7,7 @@
#include "src/allocation.h"
#include "src/assembler.h"
+#include "src/regexp/regexp-ast.h"
namespace v8 {
namespace internal {
@@ -29,13 +30,6 @@ class RegExpImpl {
#endif
}
- // Creates a regular expression literal in the old space.
- // This function calls the garbage collector if necessary.
- MUST_USE_RESULT static MaybeHandle<Object> CreateRegExpLiteral(
- Handle<JSFunction> constructor,
- Handle<String> pattern,
- Handle<String> flags);
-
// Returns a string representation of a regular expression.
// Implements RegExp.prototype.toString, see ECMA-262 section 15.10.6.4.
// This function calls the garbage collector if necessary.
@@ -233,63 +227,6 @@ enum ElementInSetsRelation {
};
-// Represents code units in the range from from_ to to_, both ends are
-// inclusive.
-class CharacterRange {
- public:
- CharacterRange() : from_(0), to_(0) { }
- // For compatibility with the CHECK_OK macro
- CharacterRange(void* null) { DCHECK_NULL(null); } // NOLINT
- CharacterRange(uc16 from, uc16 to) : from_(from), to_(to) { }
- static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges,
- Zone* zone);
- static Vector<const int> GetWordBounds();
- static inline CharacterRange Singleton(uc16 value) {
- return CharacterRange(value, value);
- }
- static inline CharacterRange Range(uc16 from, uc16 to) {
- DCHECK(from <= to);
- return CharacterRange(from, to);
- }
- static inline CharacterRange Everything() {
- return CharacterRange(0, 0xFFFF);
- }
- bool Contains(uc16 i) { return from_ <= i && i <= to_; }
- uc16 from() const { return from_; }
- void set_from(uc16 value) { from_ = value; }
- uc16 to() const { return to_; }
- void set_to(uc16 value) { to_ = value; }
- bool is_valid() { return from_ <= to_; }
- bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; }
- bool IsSingleton() { return (from_ == to_); }
- void AddCaseEquivalents(Isolate* isolate, Zone* zone,
- ZoneList<CharacterRange>* ranges, bool is_one_byte);
- static void Split(ZoneList<CharacterRange>* base,
- Vector<const int> overlay,
- ZoneList<CharacterRange>** included,
- ZoneList<CharacterRange>** excluded,
- Zone* zone);
- // Whether a range list is in canonical form: Ranges ordered by from value,
- // and ranges non-overlapping and non-adjacent.
- static bool IsCanonical(ZoneList<CharacterRange>* ranges);
- // Convert range list to canonical form. The characters covered by the ranges
- // will still be the same, but no character is in more than one range, and
- // adjacent ranges are merged. The resulting list may be shorter than the
- // original, but cannot be longer.
- static void Canonicalize(ZoneList<CharacterRange>* ranges);
- // Negate the contents of a character range in canonical form.
- static void Negate(ZoneList<CharacterRange>* src,
- ZoneList<CharacterRange>* dst,
- Zone* zone);
- static const int kStartMarker = (1 << 24);
- static const int kPayloadMask = (1 << 24) - 1;
-
- private:
- uc16 from_;
- uc16 to_;
-};
-
-
// A set of unsigned integers that behaves especially well on small
// integers (< 32). May do zone-allocation.
class OutSet: public ZoneObject {
@@ -387,63 +324,6 @@ class DispatchTable : public ZoneObject {
VISIT(Text)
-#define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \
- VISIT(Disjunction) \
- VISIT(Alternative) \
- VISIT(Assertion) \
- VISIT(CharacterClass) \
- VISIT(Atom) \
- VISIT(Quantifier) \
- VISIT(Capture) \
- VISIT(Lookahead) \
- VISIT(BackReference) \
- VISIT(Empty) \
- VISIT(Text)
-
-
-#define FORWARD_DECLARE(Name) class RegExp##Name;
-FOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE)
-#undef FORWARD_DECLARE
-
-
-class TextElement final BASE_EMBEDDED {
- public:
- enum TextType {
- ATOM,
- CHAR_CLASS
- };
-
- static TextElement Atom(RegExpAtom* atom);
- static TextElement CharClass(RegExpCharacterClass* char_class);
-
- int cp_offset() const { return cp_offset_; }
- void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; }
- int length() const;
-
- TextType text_type() const { return text_type_; }
-
- RegExpTree* tree() const { return tree_; }
-
- RegExpAtom* atom() const {
- DCHECK(text_type() == ATOM);
- return reinterpret_cast<RegExpAtom*>(tree());
- }
-
- RegExpCharacterClass* char_class() const {
- DCHECK(text_type() == CHAR_CLASS);
- return reinterpret_cast<RegExpCharacterClass*>(tree());
- }
-
- private:
- TextElement(TextType text_type, RegExpTree* tree)
- : cp_offset_(-1), text_type_(text_type), tree_(tree) {}
-
- int cp_offset_;
- TextType text_type_;
- RegExpTree* tree_;
-};
-
-
class Trace;
struct PreloadState;
class GreedyLoopState;
@@ -603,7 +483,7 @@ class RegExpNode: public ZoneObject {
RegExpCompiler* compiler,
int characters_filled_in,
bool not_at_start) = 0;
- static const int kNodeIsTooComplexForGreedyLoops = -1;
+ static const int kNodeIsTooComplexForGreedyLoops = kMinInt;
virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; }
// Only returns the successor for a text node of length 1 that matches any
// character and that has no guards on it.
@@ -695,33 +575,6 @@ class RegExpNode: public ZoneObject {
};
-// A simple closed interval.
-class Interval {
- public:
- Interval() : from_(kNone), to_(kNone) { }
- Interval(int from, int to) : from_(from), to_(to) { }
- Interval Union(Interval that) {
- if (that.from_ == kNone)
- return *this;
- else if (from_ == kNone)
- return that;
- else
- return Interval(Min(from_, that.from_), Max(to_, that.to_));
- }
- bool Contains(int value) {
- return (from_ <= value) && (value <= to_);
- }
- bool is_empty() { return from_ == kNone; }
- int from() const { return from_; }
- int to() const { return to_; }
- static Interval Empty() { return Interval(); }
- static const int kNone = -1;
- private:
- int from_;
- int to_;
-};
-
-
class SeqRegExpNode: public RegExpNode {
public:
explicit SeqRegExpNode(RegExpNode* on_success)
@@ -827,14 +680,14 @@ class ActionNode: public SeqRegExpNode {
class TextNode: public SeqRegExpNode {
public:
- TextNode(ZoneList<TextElement>* elms,
+ TextNode(ZoneList<TextElement>* elms, bool read_backward,
RegExpNode* on_success)
- : SeqRegExpNode(on_success),
- elms_(elms) { }
- TextNode(RegExpCharacterClass* that,
+ : SeqRegExpNode(on_success), elms_(elms), read_backward_(read_backward) {}
+ TextNode(RegExpCharacterClass* that, bool read_backward,
RegExpNode* on_success)
: SeqRegExpNode(on_success),
- elms_(new(zone()) ZoneList<TextElement>(1, zone())) {
+ elms_(new (zone()) ZoneList<TextElement>(1, zone())),
+ read_backward_(read_backward) {
elms_->Add(TextElement::CharClass(that), zone());
}
virtual void Accept(NodeVisitor* visitor);
@@ -845,6 +698,7 @@ class TextNode: public SeqRegExpNode {
int characters_filled_in,
bool not_at_start);
ZoneList<TextElement>* elements() { return elms_; }
+ bool read_backward() { return read_backward_; }
void MakeCaseIndependent(Isolate* isolate, bool is_one_byte);
virtual int GreedyLoopTextLength();
virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
@@ -873,6 +727,7 @@ class TextNode: public SeqRegExpNode {
int* checked_up_to);
int Length();
ZoneList<TextElement>* elms_;
+ bool read_backward_;
};
@@ -925,15 +780,16 @@ class AssertionNode: public SeqRegExpNode {
class BackReferenceNode: public SeqRegExpNode {
public:
- BackReferenceNode(int start_reg,
- int end_reg,
+ BackReferenceNode(int start_reg, int end_reg, bool read_backward,
RegExpNode* on_success)
: SeqRegExpNode(on_success),
start_reg_(start_reg),
- end_reg_(end_reg) { }
+ end_reg_(end_reg),
+ read_backward_(read_backward) {}
virtual void Accept(NodeVisitor* visitor);
int start_register() { return start_reg_; }
int end_register() { return end_reg_; }
+ bool read_backward() { return read_backward_; }
virtual void Emit(RegExpCompiler* compiler, Trace* trace);
virtual int EatsAtLeast(int still_to_find,
int recursion_depth,
@@ -950,6 +806,7 @@ class BackReferenceNode: public SeqRegExpNode {
private:
int start_reg_;
int end_reg_;
+ bool read_backward_;
};
@@ -1074,6 +931,7 @@ class ChoiceNode: public RegExpNode {
return true;
}
virtual RegExpNode* FilterOneByte(int depth, bool ignore_case);
+ virtual bool read_backward() { return false; }
protected:
int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative);
@@ -1116,11 +974,11 @@ class ChoiceNode: public RegExpNode {
};
-class NegativeLookaheadChoiceNode: public ChoiceNode {
+class NegativeLookaroundChoiceNode : public ChoiceNode {
public:
- explicit NegativeLookaheadChoiceNode(GuardedAlternative this_must_fail,
- GuardedAlternative then_do_this,
- Zone* zone)
+ explicit NegativeLookaroundChoiceNode(GuardedAlternative this_must_fail,
+ GuardedAlternative then_do_this,
+ Zone* zone)
: ChoiceNode(2, zone) {
AddAlternative(this_must_fail);
AddAlternative(then_do_this);
@@ -1150,12 +1008,12 @@ class NegativeLookaheadChoiceNode: public ChoiceNode {
class LoopChoiceNode: public ChoiceNode {
public:
- explicit LoopChoiceNode(bool body_can_be_zero_length, Zone* zone)
+ LoopChoiceNode(bool body_can_be_zero_length, bool read_backward, Zone* zone)
: ChoiceNode(2, zone),
loop_node_(NULL),
continue_node_(NULL),
- body_can_be_zero_length_(body_can_be_zero_length)
- { }
+ body_can_be_zero_length_(body_can_be_zero_length),
+ read_backward_(read_backward) {}
void AddLoopAlternative(GuardedAlternative alt);
void AddContinueAlternative(GuardedAlternative alt);
virtual void Emit(RegExpCompiler* compiler, Trace* trace);
@@ -1169,6 +1027,7 @@ class LoopChoiceNode: public ChoiceNode {
RegExpNode* loop_node() { return loop_node_; }
RegExpNode* continue_node() { return continue_node_; }
bool body_can_be_zero_length() { return body_can_be_zero_length_; }
+ virtual bool read_backward() { return read_backward_; }
virtual void Accept(NodeVisitor* visitor);
virtual RegExpNode* FilterOneByte(int depth, bool ignore_case);
@@ -1183,6 +1042,7 @@ class LoopChoiceNode: public ChoiceNode {
RegExpNode* loop_node_;
RegExpNode* continue_node_;
bool body_can_be_zero_length_;
+ bool read_backward_;
};
@@ -1438,9 +1298,7 @@ class Trace {
at_start_ == UNKNOWN;
}
TriBool at_start() { return at_start_; }
- void set_at_start(bool at_start) {
- at_start_ = at_start ? TRUE_VALUE : FALSE_VALUE;
- }
+ void set_at_start(TriBool at_start) { at_start_ = at_start; }
Label* backtrack() { return backtrack_; }
Label* loop_label() { return loop_label_; }
RegExpNode* stop_node() { return stop_node_; }
diff --git a/deps/v8/src/regexp/mips/OWNERS b/deps/v8/src/regexp/mips/OWNERS
index 5508ba626f..89455a4fbd 100644
--- a/deps/v8/src/regexp/mips/OWNERS
+++ b/deps/v8/src/regexp/mips/OWNERS
@@ -3,3 +3,4 @@ gergely.kis@imgtec.com
akos.palfi@imgtec.com
balazs.kilvady@imgtec.com
dusan.milosavljevic@imgtec.com
+ivica.bogosavljevic@imgtec.com
diff --git a/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.cc b/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.cc
index 77f09917c0..9c59328ed1 100644
--- a/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.cc
+++ b/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.cc
@@ -97,7 +97,8 @@ RegExpMacroAssemblerMIPS::RegExpMacroAssemblerMIPS(Isolate* isolate, Zone* zone,
Mode mode,
int registers_to_save)
: NativeRegExpMacroAssembler(isolate, zone),
- masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize)),
+ masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize,
+ CodeObjectRequired::kYes)),
mode_(mode),
num_registers_(registers_to_save),
num_saved_registers_(registers_to_save),
@@ -181,26 +182,17 @@ void RegExpMacroAssemblerMIPS::CheckCharacterGT(uc16 limit, Label* on_greater) {
void RegExpMacroAssemblerMIPS::CheckAtStart(Label* on_at_start) {
- Label not_at_start;
- // Did we start the match at the start of the string at all?
- __ lw(a0, MemOperand(frame_pointer(), kStartIndex));
- BranchOrBacktrack(&not_at_start, ne, a0, Operand(zero_reg));
-
- // If we did, are we still at the start of the input?
- __ lw(a1, MemOperand(frame_pointer(), kInputStart));
- __ Addu(a0, end_of_input_address(), Operand(current_input_offset()));
+ __ lw(a1, MemOperand(frame_pointer(), kStringStartMinusOne));
+ __ Addu(a0, current_input_offset(), Operand(-char_size()));
BranchOrBacktrack(on_at_start, eq, a0, Operand(a1));
- __ bind(&not_at_start);
}
-void RegExpMacroAssemblerMIPS::CheckNotAtStart(Label* on_not_at_start) {
- // Did we start the match at the start of the string at all?
- __ lw(a0, MemOperand(frame_pointer(), kStartIndex));
- BranchOrBacktrack(on_not_at_start, ne, a0, Operand(zero_reg));
- // If we did, are we still at the start of the input?
- __ lw(a1, MemOperand(frame_pointer(), kInputStart));
- __ Addu(a0, end_of_input_address(), Operand(current_input_offset()));
+void RegExpMacroAssemblerMIPS::CheckNotAtStart(int cp_offset,
+ Label* on_not_at_start) {
+ __ lw(a1, MemOperand(frame_pointer(), kStringStartMinusOne));
+ __ Addu(a0, current_input_offset(),
+ Operand(-char_size() + cp_offset * char_size()));
BranchOrBacktrack(on_not_at_start, ne, a0, Operand(a1));
}
@@ -223,20 +215,26 @@ void RegExpMacroAssemblerMIPS::CheckGreedyLoop(Label* on_equal) {
void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase(
- int start_reg,
- Label* on_no_match) {
+ int start_reg, bool read_backward, Label* on_no_match) {
Label fallthrough;
__ lw(a0, register_location(start_reg)); // Index of start of capture.
__ lw(a1, register_location(start_reg + 1)); // Index of end of capture.
__ Subu(a1, a1, a0); // Length of capture.
- // If length is zero, either the capture is empty or it is not participating.
- // In either case succeed immediately.
+ // At this point, the capture registers are either both set or both cleared.
+ // If the capture length is zero, then the capture is either empty or cleared.
+ // Fall through in both cases.
__ Branch(&fallthrough, eq, a1, Operand(zero_reg));
- __ Addu(t5, a1, current_input_offset());
- // Check that there are enough characters left in the input.
- BranchOrBacktrack(on_no_match, gt, t5, Operand(zero_reg));
+ if (read_backward) {
+ __ lw(t0, MemOperand(frame_pointer(), kStringStartMinusOne));
+ __ Addu(t0, t0, a1);
+ BranchOrBacktrack(on_no_match, le, current_input_offset(), Operand(t0));
+ } else {
+ __ Addu(t5, a1, current_input_offset());
+ // Check that there are enough characters left in the input.
+ BranchOrBacktrack(on_no_match, gt, t5, Operand(zero_reg));
+ }
if (mode_ == LATIN1) {
Label success;
@@ -247,6 +245,9 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase(
// a1 - length of capture.
__ Addu(a0, a0, Operand(end_of_input_address()));
__ Addu(a2, end_of_input_address(), Operand(current_input_offset()));
+ if (read_backward) {
+ __ Subu(a2, a2, Operand(a1));
+ }
__ Addu(a1, a0, Operand(a1));
// a0 - Address of start of capture.
@@ -285,6 +286,12 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase(
__ bind(&success);
// Compute new value of character position after the matched part.
__ Subu(current_input_offset(), a2, end_of_input_address());
+ if (read_backward) {
+ __ lw(t0, register_location(start_reg)); // Index of start of capture.
+ __ lw(t5, register_location(start_reg + 1)); // Index of end of capture.
+ __ Addu(current_input_offset(), current_input_offset(), Operand(t0));
+ __ Subu(current_input_offset(), current_input_offset(), Operand(t5));
+ }
} else {
DCHECK(mode_ == UC16);
// Put regexp engine registers on stack.
@@ -313,6 +320,9 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase(
__ mov(s3, a1);
// Address of current input position.
__ Addu(a1, current_input_offset(), Operand(end_of_input_address()));
+ if (read_backward) {
+ __ Subu(a1, a1, Operand(s3));
+ }
// Isolate.
__ li(a3, Operand(ExternalReference::isolate_address(masm_->isolate())));
@@ -330,17 +340,21 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase(
// Check if function returned non-zero for success or zero for failure.
BranchOrBacktrack(on_no_match, eq, v0, Operand(zero_reg));
- // On success, increment position by length of capture.
- __ Addu(current_input_offset(), current_input_offset(), Operand(s3));
+ // On success, advance position by length of capture.
+ if (read_backward) {
+ __ Subu(current_input_offset(), current_input_offset(), Operand(s3));
+ } else {
+ __ Addu(current_input_offset(), current_input_offset(), Operand(s3));
+ }
}
__ bind(&fallthrough);
}
-void RegExpMacroAssemblerMIPS::CheckNotBackReference(
- int start_reg,
- Label* on_no_match) {
+void RegExpMacroAssemblerMIPS::CheckNotBackReference(int start_reg,
+ bool read_backward,
+ Label* on_no_match) {
Label fallthrough;
Label success;
@@ -348,17 +362,35 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReference(
__ lw(a0, register_location(start_reg));
__ lw(a1, register_location(start_reg + 1));
__ Subu(a1, a1, a0); // Length to check.
- // Succeed on empty capture (including no capture).
- __ Branch(&fallthrough, eq, a1, Operand(zero_reg));
- __ Addu(t5, a1, current_input_offset());
- // Check that there are enough characters left in the input.
- BranchOrBacktrack(on_no_match, gt, t5, Operand(zero_reg));
+ // At this point, the capture registers are either both set or both cleared.
+ // If the capture length is zero, then the capture is either empty or cleared.
+ // Fall through in both cases.
+ __ Branch(&fallthrough, le, a1, Operand(zero_reg));
+
+ if (read_backward) {
+ __ lw(t0, MemOperand(frame_pointer(), kStringStartMinusOne));
+ __ Addu(t0, t0, a1);
+ BranchOrBacktrack(on_no_match, le, current_input_offset(), Operand(t0));
+ } else {
+ __ Addu(t5, a1, current_input_offset());
+ // Check that there are enough characters left in the input.
+ BranchOrBacktrack(on_no_match, gt, t5, Operand(zero_reg));
+ }
- // Compute pointers to match string and capture string.
+ // a0 - offset of start of capture.
+ // a1 - length of capture.
__ Addu(a0, a0, Operand(end_of_input_address()));
__ Addu(a2, end_of_input_address(), Operand(current_input_offset()));
- __ Addu(a1, a1, Operand(a0));
+ if (read_backward) {
+ __ Subu(a2, a2, Operand(a1));
+ }
+ __ Addu(a1, a0, Operand(a1));
+
+ // a0 - Address of start of capture.
+ // a1 - Address of end of capture.
+ // a2 - Address of current input position.
+
Label loop;
__ bind(&loop);
@@ -379,6 +411,12 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReference(
// Move current character position to position after match.
__ Subu(current_input_offset(), a2, end_of_input_address());
+ if (read_backward) {
+ __ lw(t0, register_location(start_reg)); // Index of start of capture.
+ __ lw(t5, register_location(start_reg + 1)); // Index of end of capture.
+ __ Addu(current_input_offset(), current_input_offset(), Operand(t0));
+ __ Subu(current_input_offset(), current_input_offset(), Operand(t5));
+ }
__ bind(&fallthrough);
}
@@ -599,7 +637,7 @@ Handle<HeapObject> RegExpMacroAssemblerMIPS::GetCode(Handle<String> source) {
__ Addu(frame_pointer(), sp, Operand(4 * kPointerSize));
__ mov(a0, zero_reg);
__ push(a0); // Make room for success counter and initialize it to 0.
- __ push(a0); // Make room for "position - 1" constant (value irrelevant).
+ __ push(a0); // Make room for "string start - 1" constant.
// Check if we have space on the stack for registers.
Label stack_limit_hit;
@@ -642,7 +680,7 @@ Handle<HeapObject> RegExpMacroAssemblerMIPS::GetCode(Handle<String> source) {
__ Subu(a0, a0, t5);
// Store this value in a local variable, for use when clearing
// position registers.
- __ sw(a0, MemOperand(frame_pointer(), kInputStartMinusOne));
+ __ sw(a0, MemOperand(frame_pointer(), kStringStartMinusOne));
// Initialize code pointer register
__ li(code_pointer(), Operand(masm_->CodeObject()), CONSTANT_SIZE);
@@ -751,7 +789,7 @@ Handle<HeapObject> RegExpMacroAssemblerMIPS::GetCode(Handle<String> source) {
__ sw(a2, MemOperand(frame_pointer(), kRegisterOutput));
// Prepare a0 to initialize registers with its value in the next run.
- __ lw(a0, MemOperand(frame_pointer(), kInputStartMinusOne));
+ __ lw(a0, MemOperand(frame_pointer(), kStringStartMinusOne));
if (global_with_zero_length_check()) {
// Special case for zero-length matches.
@@ -905,10 +943,13 @@ void RegExpMacroAssemblerMIPS::LoadCurrentCharacter(int cp_offset,
Label* on_end_of_input,
bool check_bounds,
int characters) {
- DCHECK(cp_offset >= -1); // ^ and \b can look behind one character.
DCHECK(cp_offset < (1<<30)); // Be sane! (And ensure negation works).
if (check_bounds) {
- CheckPosition(cp_offset + characters - 1, on_end_of_input);
+ if (cp_offset >= 0) {
+ CheckPosition(cp_offset + characters - 1, on_end_of_input);
+ } else {
+ CheckPosition(cp_offset, on_end_of_input);
+ }
}
LoadCurrentCharacterUnchecked(cp_offset, characters);
}
@@ -1016,7 +1057,7 @@ void RegExpMacroAssemblerMIPS::WriteCurrentPositionToRegister(int reg,
void RegExpMacroAssemblerMIPS::ClearRegisters(int reg_from, int reg_to) {
DCHECK(reg_from <= reg_to);
- __ lw(a0, MemOperand(frame_pointer(), kInputStartMinusOne));
+ __ lw(a0, MemOperand(frame_pointer(), kStringStartMinusOne));
for (int reg = reg_from; reg <= reg_to; reg++) {
__ sw(a0, register_location(reg));
}
@@ -1129,10 +1170,14 @@ MemOperand RegExpMacroAssemblerMIPS::register_location(int register_index) {
void RegExpMacroAssemblerMIPS::CheckPosition(int cp_offset,
Label* on_outside_input) {
- BranchOrBacktrack(on_outside_input,
- ge,
- current_input_offset(),
- Operand(-cp_offset * char_size()));
+ if (cp_offset >= 0) {
+ BranchOrBacktrack(on_outside_input, ge, current_input_offset(),
+ Operand(-cp_offset * char_size()));
+ } else {
+ __ lw(a1, MemOperand(frame_pointer(), kStringStartMinusOne));
+ __ Addu(a0, current_input_offset(), Operand(cp_offset * char_size()));
+ BranchOrBacktrack(on_outside_input, le, a0, Operand(a1));
+ }
}
diff --git a/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.h b/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.h
index da59546a79..902e2208fe 100644
--- a/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.h
+++ b/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.h
@@ -33,9 +33,11 @@ class RegExpMacroAssemblerMIPS: public NativeRegExpMacroAssembler {
// A "greedy loop" is a loop that is both greedy and with a simple
// body. It has a particularly simple implementation.
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
- virtual void CheckNotAtStart(Label* on_not_at_start);
- virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
+ virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start);
+ virtual void CheckNotBackReference(int start_reg, bool read_backward,
+ Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
+ bool read_backward,
Label* on_no_match);
virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(uint32_t c,
@@ -120,9 +122,9 @@ class RegExpMacroAssemblerMIPS: public NativeRegExpMacroAssembler {
// When adding local variables remember to push space for them in
// the frame in GetCode.
static const int kSuccessfulCaptures = kInputString - kPointerSize;
- static const int kInputStartMinusOne = kSuccessfulCaptures - kPointerSize;
+ static const int kStringStartMinusOne = kSuccessfulCaptures - kPointerSize;
// First register address. Following registers are below it on the stack.
- static const int kRegisterZero = kInputStartMinusOne - kPointerSize;
+ static const int kRegisterZero = kStringStartMinusOne - kPointerSize;
// Initial size of code buffer.
static const size_t kRegExpCodeSize = 1024;
diff --git a/deps/v8/src/regexp/mips64/OWNERS b/deps/v8/src/regexp/mips64/OWNERS
index 5508ba626f..89455a4fbd 100644
--- a/deps/v8/src/regexp/mips64/OWNERS
+++ b/deps/v8/src/regexp/mips64/OWNERS
@@ -3,3 +3,4 @@ gergely.kis@imgtec.com
akos.palfi@imgtec.com
balazs.kilvady@imgtec.com
dusan.milosavljevic@imgtec.com
+ivica.bogosavljevic@imgtec.com
diff --git a/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc b/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc
index 869cbc4f2e..5153bd018b 100644
--- a/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc
+++ b/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc
@@ -61,7 +61,7 @@ namespace internal {
* - fp[-16] void* input_string (location of a handle containing the string).
* - fp[-20] success counter (only for global regexps to count matches).
* - fp[-24] Offset of location before start of input (effectively character
- * position -1). Used to initialize capture registers to a
+ * string start - 1). Used to initialize capture registers to a
* non-position.
* - fp[-28] At start (if 1, we are starting at the start of the
* string, otherwise 0)
@@ -91,7 +91,7 @@ namespace internal {
* - fp[-56] start index (character index of start). kStartIndex
* - fp[-64] void* input_string (location of a handle containing the string). kInputString
* - fp[-72] success counter (only for global regexps to count matches). kSuccessfulCaptures
- * - fp[-80] Offset of location before start of input (effectively character kInputStartMinusOne
+ * - fp[-80] Offset of location before start of input (effectively character kStringStartMinusOne
* position -1). Used to initialize capture registers to a
* non-position.
* --------- The following output registers are 32-bit values. ---------
@@ -133,7 +133,8 @@ RegExpMacroAssemblerMIPS::RegExpMacroAssemblerMIPS(Isolate* isolate, Zone* zone,
Mode mode,
int registers_to_save)
: NativeRegExpMacroAssembler(isolate, zone),
- masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize)),
+ masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize,
+ CodeObjectRequired::kYes)),
mode_(mode),
num_registers_(registers_to_save),
num_saved_registers_(registers_to_save),
@@ -217,26 +218,17 @@ void RegExpMacroAssemblerMIPS::CheckCharacterGT(uc16 limit, Label* on_greater) {
void RegExpMacroAssemblerMIPS::CheckAtStart(Label* on_at_start) {
- Label not_at_start;
- // Did we start the match at the start of the string at all?
- __ ld(a0, MemOperand(frame_pointer(), kStartIndex));
- BranchOrBacktrack(&not_at_start, ne, a0, Operand(zero_reg));
-
- // If we did, are we still at the start of the input?
- __ ld(a1, MemOperand(frame_pointer(), kInputStart));
- __ Daddu(a0, end_of_input_address(), Operand(current_input_offset()));
+ __ ld(a1, MemOperand(frame_pointer(), kStringStartMinusOne));
+ __ Daddu(a0, current_input_offset(), Operand(-char_size()));
BranchOrBacktrack(on_at_start, eq, a0, Operand(a1));
- __ bind(&not_at_start);
}
-void RegExpMacroAssemblerMIPS::CheckNotAtStart(Label* on_not_at_start) {
- // Did we start the match at the start of the string at all?
- __ ld(a0, MemOperand(frame_pointer(), kStartIndex));
- BranchOrBacktrack(on_not_at_start, ne, a0, Operand(zero_reg));
- // If we did, are we still at the start of the input?
- __ ld(a1, MemOperand(frame_pointer(), kInputStart));
- __ Daddu(a0, end_of_input_address(), Operand(current_input_offset()));
+void RegExpMacroAssemblerMIPS::CheckNotAtStart(int cp_offset,
+ Label* on_not_at_start) {
+ __ ld(a1, MemOperand(frame_pointer(), kStringStartMinusOne));
+ __ Daddu(a0, current_input_offset(),
+ Operand(-char_size() + cp_offset * char_size()));
BranchOrBacktrack(on_not_at_start, ne, a0, Operand(a1));
}
@@ -259,20 +251,26 @@ void RegExpMacroAssemblerMIPS::CheckGreedyLoop(Label* on_equal) {
void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase(
- int start_reg,
- Label* on_no_match) {
+ int start_reg, bool read_backward, Label* on_no_match) {
Label fallthrough;
__ ld(a0, register_location(start_reg)); // Index of start of capture.
__ ld(a1, register_location(start_reg + 1)); // Index of end of capture.
__ Dsubu(a1, a1, a0); // Length of capture.
- // If length is zero, either the capture is empty or it is not participating.
- // In either case succeed immediately.
+ // At this point, the capture registers are either both set or both cleared.
+ // If the capture length is zero, then the capture is either empty or cleared.
+ // Fall through in both cases.
__ Branch(&fallthrough, eq, a1, Operand(zero_reg));
- __ Daddu(t1, a1, current_input_offset());
- // Check that there are enough characters left in the input.
- BranchOrBacktrack(on_no_match, gt, t1, Operand(zero_reg));
+ if (read_backward) {
+ __ ld(t1, MemOperand(frame_pointer(), kStringStartMinusOne));
+ __ Daddu(t1, t1, a1);
+ BranchOrBacktrack(on_no_match, le, current_input_offset(), Operand(t1));
+ } else {
+ __ Daddu(t1, a1, current_input_offset());
+ // Check that there are enough characters left in the input.
+ BranchOrBacktrack(on_no_match, gt, t1, Operand(zero_reg));
+ }
if (mode_ == LATIN1) {
Label success;
@@ -283,6 +281,9 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase(
// a1 - length of capture.
__ Daddu(a0, a0, Operand(end_of_input_address()));
__ Daddu(a2, end_of_input_address(), Operand(current_input_offset()));
+ if (read_backward) {
+ __ Dsubu(a2, a2, Operand(a1));
+ }
__ Daddu(a1, a0, Operand(a1));
// a0 - Address of start of capture.
@@ -321,6 +322,12 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase(
__ bind(&success);
// Compute new value of character position after the matched part.
__ Dsubu(current_input_offset(), a2, end_of_input_address());
+ if (read_backward) {
+ __ ld(t1, register_location(start_reg)); // Index of start of capture.
+ __ ld(a2, register_location(start_reg + 1)); // Index of end of capture.
+ __ Daddu(current_input_offset(), current_input_offset(), Operand(t1));
+ __ Dsubu(current_input_offset(), current_input_offset(), Operand(a2));
+ }
} else {
DCHECK(mode_ == UC16);
// Put regexp engine registers on stack.
@@ -349,6 +356,9 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase(
__ mov(s3, a1);
// Address of current input position.
__ Daddu(a1, current_input_offset(), Operand(end_of_input_address()));
+ if (read_backward) {
+ __ Dsubu(a1, a1, Operand(s3));
+ }
// Isolate.
__ li(a3, Operand(ExternalReference::isolate_address(masm_->isolate())));
@@ -367,16 +377,20 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase(
// Check if function returned non-zero for success or zero for failure.
BranchOrBacktrack(on_no_match, eq, v0, Operand(zero_reg));
// On success, increment position by length of capture.
- __ Daddu(current_input_offset(), current_input_offset(), Operand(s3));
+ if (read_backward) {
+ __ Dsubu(current_input_offset(), current_input_offset(), Operand(s3));
+ } else {
+ __ Daddu(current_input_offset(), current_input_offset(), Operand(s3));
+ }
}
__ bind(&fallthrough);
}
-void RegExpMacroAssemblerMIPS::CheckNotBackReference(
- int start_reg,
- Label* on_no_match) {
+void RegExpMacroAssemblerMIPS::CheckNotBackReference(int start_reg,
+ bool read_backward,
+ Label* on_no_match) {
Label fallthrough;
Label success;
@@ -384,16 +398,28 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReference(
__ ld(a0, register_location(start_reg));
__ ld(a1, register_location(start_reg + 1));
__ Dsubu(a1, a1, a0); // Length to check.
- // Succeed on empty capture (including no capture).
+
+ // At this point, the capture registers are either both set or both cleared.
+ // If the capture length is zero, then the capture is either empty or cleared.
+ // Fall through in both cases.
__ Branch(&fallthrough, eq, a1, Operand(zero_reg));
- __ Daddu(t1, a1, current_input_offset());
- // Check that there are enough characters left in the input.
- BranchOrBacktrack(on_no_match, gt, t1, Operand(zero_reg));
+ if (read_backward) {
+ __ ld(t1, MemOperand(frame_pointer(), kStringStartMinusOne));
+ __ Daddu(t1, t1, a1);
+ BranchOrBacktrack(on_no_match, le, current_input_offset(), Operand(t1));
+ } else {
+ __ Daddu(t1, a1, current_input_offset());
+ // Check that there are enough characters left in the input.
+ BranchOrBacktrack(on_no_match, gt, t1, Operand(zero_reg));
+ }
// Compute pointers to match string and capture string.
__ Daddu(a0, a0, Operand(end_of_input_address()));
__ Daddu(a2, end_of_input_address(), Operand(current_input_offset()));
+ if (read_backward) {
+ __ Dsubu(a2, a2, Operand(a1));
+ }
__ Daddu(a1, a1, Operand(a0));
Label loop;
@@ -415,6 +441,12 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReference(
// Move current character position to position after match.
__ Dsubu(current_input_offset(), a2, end_of_input_address());
+ if (read_backward) {
+ __ ld(t1, register_location(start_reg)); // Index of start of capture.
+ __ ld(a2, register_location(start_reg + 1)); // Index of end of capture.
+ __ Daddu(current_input_offset(), current_input_offset(), Operand(t1));
+ __ Dsubu(current_input_offset(), current_input_offset(), Operand(a2));
+ }
__ bind(&fallthrough);
}
@@ -644,7 +676,7 @@ Handle<HeapObject> RegExpMacroAssemblerMIPS::GetCode(Handle<String> source) {
__ Daddu(frame_pointer(), sp, Operand(8 * kPointerSize));
__ mov(a0, zero_reg);
__ push(a0); // Make room for success counter and initialize it to 0.
- __ push(a0); // Make room for "position - 1" constant (value irrelevant).
+ __ push(a0); // Make room for "string start - 1" constant.
// Check if we have space on the stack for registers.
Label stack_limit_hit;
@@ -687,7 +719,7 @@ Handle<HeapObject> RegExpMacroAssemblerMIPS::GetCode(Handle<String> source) {
__ Dsubu(a0, a0, t1);
// Store this value in a local variable, for use when clearing
// position registers.
- __ sd(a0, MemOperand(frame_pointer(), kInputStartMinusOne));
+ __ sd(a0, MemOperand(frame_pointer(), kStringStartMinusOne));
// Initialize code pointer register
__ li(code_pointer(), Operand(masm_->CodeObject()), CONSTANT_SIZE);
@@ -797,7 +829,7 @@ Handle<HeapObject> RegExpMacroAssemblerMIPS::GetCode(Handle<String> source) {
__ sd(a2, MemOperand(frame_pointer(), kRegisterOutput));
// Prepare a0 to initialize registers with its value in the next run.
- __ ld(a0, MemOperand(frame_pointer(), kInputStartMinusOne));
+ __ ld(a0, MemOperand(frame_pointer(), kStringStartMinusOne));
if (global_with_zero_length_check()) {
// Special case for zero-length matches.
@@ -951,10 +983,13 @@ void RegExpMacroAssemblerMIPS::LoadCurrentCharacter(int cp_offset,
Label* on_end_of_input,
bool check_bounds,
int characters) {
- DCHECK(cp_offset >= -1); // ^ and \b can look behind one character.
DCHECK(cp_offset < (1<<30)); // Be sane! (And ensure negation works).
if (check_bounds) {
- CheckPosition(cp_offset + characters - 1, on_end_of_input);
+ if (cp_offset >= 0) {
+ CheckPosition(cp_offset + characters - 1, on_end_of_input);
+ } else {
+ CheckPosition(cp_offset, on_end_of_input);
+ }
}
LoadCurrentCharacterUnchecked(cp_offset, characters);
}
@@ -1062,7 +1097,7 @@ void RegExpMacroAssemblerMIPS::WriteCurrentPositionToRegister(int reg,
void RegExpMacroAssemblerMIPS::ClearRegisters(int reg_from, int reg_to) {
DCHECK(reg_from <= reg_to);
- __ ld(a0, MemOperand(frame_pointer(), kInputStartMinusOne));
+ __ ld(a0, MemOperand(frame_pointer(), kStringStartMinusOne));
for (int reg = reg_from; reg <= reg_to; reg++) {
__ sd(a0, register_location(reg));
}
@@ -1175,10 +1210,14 @@ MemOperand RegExpMacroAssemblerMIPS::register_location(int register_index) {
void RegExpMacroAssemblerMIPS::CheckPosition(int cp_offset,
Label* on_outside_input) {
- BranchOrBacktrack(on_outside_input,
- ge,
- current_input_offset(),
- Operand(-cp_offset * char_size()));
+ if (cp_offset >= 0) {
+ BranchOrBacktrack(on_outside_input, ge, current_input_offset(),
+ Operand(-cp_offset * char_size()));
+ } else {
+ __ ld(a1, MemOperand(frame_pointer(), kStringStartMinusOne));
+ __ Daddu(a0, current_input_offset(), Operand(cp_offset * char_size()));
+ BranchOrBacktrack(on_outside_input, le, a0, Operand(a1));
+ }
}
diff --git a/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.h b/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.h
index 265bf773eb..9a8ca179d5 100644
--- a/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.h
+++ b/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.h
@@ -33,9 +33,11 @@ class RegExpMacroAssemblerMIPS: public NativeRegExpMacroAssembler {
// A "greedy loop" is a loop that is both greedy and with a simple
// body. It has a particularly simple implementation.
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
- virtual void CheckNotAtStart(Label* on_not_at_start);
- virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
+ virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start);
+ virtual void CheckNotBackReference(int start_reg, bool read_backward,
+ Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
+ bool read_backward,
Label* on_no_match);
virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(uint32_t c,
@@ -125,9 +127,9 @@ class RegExpMacroAssemblerMIPS: public NativeRegExpMacroAssembler {
// When adding local variables remember to push space for them in
// the frame in GetCode.
static const int kSuccessfulCaptures = kInputString - kPointerSize;
- static const int kInputStartMinusOne = kSuccessfulCaptures - kPointerSize;
+ static const int kStringStartMinusOne = kSuccessfulCaptures - kPointerSize;
// First register address. Following registers are below it on the stack.
- static const int kRegisterZero = kInputStartMinusOne - kPointerSize;
+ static const int kRegisterZero = kStringStartMinusOne - kPointerSize;
#elif defined(MIPS_ABI_O32)
// Offsets from frame_pointer() of function parameters and stored registers.
@@ -158,9 +160,9 @@ class RegExpMacroAssemblerMIPS: public NativeRegExpMacroAssembler {
// When adding local variables remember to push space for them in
// the frame in GetCode.
static const int kSuccessfulCaptures = kInputString - kPointerSize;
- static const int kInputStartMinusOne = kSuccessfulCaptures - kPointerSize;
+ static const int kStringStartMinusOne = kSuccessfulCaptures - kPointerSize;
// First register address. Following registers are below it on the stack.
- static const int kRegisterZero = kInputStartMinusOne - kPointerSize;
+ static const int kRegisterZero = kStringStartMinusOne - kPointerSize;
#else
# error "undefined MIPS ABI"
diff --git a/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc b/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc
index 03f9741147..f3ddf7bf98 100644
--- a/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc
+++ b/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc
@@ -60,7 +60,7 @@ namespace internal {
* - fp[-32] void* input_string (location of a handle containing the string).
* - fp[-36] success counter (only for global regexps to count matches).
* - fp[-40] Offset of location before start of input (effectively character
- * position -1). Used to initialize capture registers to a
+ * string start - 1). Used to initialize capture registers to a
* non-position.
* - fp[-44] At start (if 1, we are starting at the start of the
* string, otherwise 0)
@@ -100,7 +100,8 @@ RegExpMacroAssemblerPPC::RegExpMacroAssemblerPPC(Isolate* isolate, Zone* zone,
Mode mode,
int registers_to_save)
: NativeRegExpMacroAssembler(isolate, zone),
- masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize)),
+ masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize,
+ CodeObjectRequired::kYes)),
mode_(mode),
num_registers_(registers_to_save),
num_saved_registers_(registers_to_save),
@@ -189,30 +190,18 @@ void RegExpMacroAssemblerPPC::CheckCharacterGT(uc16 limit, Label* on_greater) {
void RegExpMacroAssemblerPPC::CheckAtStart(Label* on_at_start) {
- Label not_at_start;
- // Did we start the match at the start of the string at all?
- __ LoadP(r3, MemOperand(frame_pointer(), kStartIndex));
- __ cmpi(r3, Operand::Zero());
- BranchOrBacktrack(ne, &not_at_start);
-
- // If we did, are we still at the start of the input?
- __ LoadP(r4, MemOperand(frame_pointer(), kInputStart));
- __ mr(r0, current_input_offset());
- __ add(r3, end_of_input_address(), r0);
- __ cmp(r4, r3);
+ __ LoadP(r4, MemOperand(frame_pointer(), kStringStartMinusOne));
+ __ addi(r3, current_input_offset(), Operand(-char_size()));
+ __ cmp(r3, r4);
BranchOrBacktrack(eq, on_at_start);
- __ bind(&not_at_start);
}
-void RegExpMacroAssemblerPPC::CheckNotAtStart(Label* on_not_at_start) {
- // Did we start the match at the start of the string at all?
- __ LoadP(r3, MemOperand(frame_pointer(), kStartIndex));
- __ cmpi(r3, Operand::Zero());
- BranchOrBacktrack(ne, on_not_at_start);
- // If we did, are we still at the start of the input?
- __ LoadP(r4, MemOperand(frame_pointer(), kInputStart));
- __ add(r3, end_of_input_address(), current_input_offset());
+void RegExpMacroAssemblerPPC::CheckNotAtStart(int cp_offset,
+ Label* on_not_at_start) {
+ __ LoadP(r4, MemOperand(frame_pointer(), kStringStartMinusOne));
+ __ addi(r3, current_input_offset(),
+ Operand(-char_size() + cp_offset * char_size()));
__ cmp(r3, r4);
BranchOrBacktrack(ne, on_not_at_start);
}
@@ -238,20 +227,27 @@ void RegExpMacroAssemblerPPC::CheckGreedyLoop(Label* on_equal) {
void RegExpMacroAssemblerPPC::CheckNotBackReferenceIgnoreCase(
- int start_reg, Label* on_no_match) {
+ int start_reg, bool read_backward, Label* on_no_match) {
Label fallthrough;
__ LoadP(r3, register_location(start_reg), r0); // Index of start of capture
__ LoadP(r4, register_location(start_reg + 1), r0); // Index of end
__ sub(r4, r4, r3, LeaveOE, SetRC); // Length of capture.
- // If length is zero, either the capture is empty or it is not participating.
- // In either case succeed immediately.
+ // At this point, the capture registers are either both set or both cleared.
+ // If the capture length is zero, then the capture is either empty or cleared.
+ // Fall through in both cases.
__ beq(&fallthrough, cr0);
// Check that there are enough characters left in the input.
- __ add(r0, r4, current_input_offset(), LeaveOE, SetRC);
- // __ cmn(r1, Operand(current_input_offset()));
- BranchOrBacktrack(gt, on_no_match, cr0);
+ if (read_backward) {
+ __ LoadP(r6, MemOperand(frame_pointer(), kStringStartMinusOne));
+ __ add(r6, r6, r4);
+ __ cmp(current_input_offset(), r6);
+ BranchOrBacktrack(le, on_no_match);
+ } else {
+ __ add(r0, r4, current_input_offset(), LeaveOE, SetRC);
+ BranchOrBacktrack(gt, on_no_match, cr0);
+ }
if (mode_ == LATIN1) {
Label success;
@@ -262,6 +258,9 @@ void RegExpMacroAssemblerPPC::CheckNotBackReferenceIgnoreCase(
// r4 - length of capture
__ add(r3, r3, end_of_input_address());
__ add(r5, end_of_input_address(), current_input_offset());
+ if (read_backward) {
+ __ sub(r5, r5, r4); // Offset by length when matching backwards.
+ }
__ add(r4, r3, r4);
// r3 - Address of start of capture.
@@ -303,6 +302,13 @@ void RegExpMacroAssemblerPPC::CheckNotBackReferenceIgnoreCase(
__ bind(&success);
// Compute new value of character position after the matched part.
__ sub(current_input_offset(), r5, end_of_input_address());
+ if (read_backward) {
+ __ LoadP(r3, register_location(start_reg)); // Index of start of capture
+ __ LoadP(r4,
+ register_location(start_reg + 1)); // Index of end of capture
+ __ add(current_input_offset(), current_input_offset(), r3);
+ __ sub(current_input_offset(), current_input_offset(), r4);
+ }
} else {
DCHECK(mode_ == UC16);
int argument_count = 4;
@@ -326,6 +332,9 @@ void RegExpMacroAssemblerPPC::CheckNotBackReferenceIgnoreCase(
__ mr(r25, r4);
// Address of current input position.
__ add(r4, current_input_offset(), end_of_input_address());
+ if (read_backward) {
+ __ sub(r4, r4, r25);
+ }
// Isolate.
__ mov(r6, Operand(ExternalReference::isolate_address(isolate())));
@@ -339,8 +348,13 @@ void RegExpMacroAssemblerPPC::CheckNotBackReferenceIgnoreCase(
// Check if function returned non-zero for success or zero for failure.
__ cmpi(r3, Operand::Zero());
BranchOrBacktrack(eq, on_no_match);
- // On success, increment position by length of capture.
- __ add(current_input_offset(), current_input_offset(), r25);
+
+ // On success, advance position by length of capture.
+ if (read_backward) {
+ __ sub(current_input_offset(), current_input_offset(), r25);
+ } else {
+ __ add(current_input_offset(), current_input_offset(), r25);
+ }
}
__ bind(&fallthrough);
@@ -348,6 +362,7 @@ void RegExpMacroAssemblerPPC::CheckNotBackReferenceIgnoreCase(
void RegExpMacroAssemblerPPC::CheckNotBackReference(int start_reg,
+ bool read_backward,
Label* on_no_match) {
Label fallthrough;
Label success;
@@ -356,16 +371,30 @@ void RegExpMacroAssemblerPPC::CheckNotBackReference(int start_reg,
__ LoadP(r3, register_location(start_reg), r0);
__ LoadP(r4, register_location(start_reg + 1), r0);
__ sub(r4, r4, r3, LeaveOE, SetRC); // Length to check.
- // Succeed on empty capture (including no capture).
+
+ // At this point, the capture registers are either both set or both cleared.
+ // If the capture length is zero, then the capture is either empty or cleared.
+ // Fall through in both cases.
__ beq(&fallthrough, cr0);
// Check that there are enough characters left in the input.
- __ add(r0, r4, current_input_offset(), LeaveOE, SetRC);
- BranchOrBacktrack(gt, on_no_match, cr0);
+ if (read_backward) {
+ __ LoadP(r6, MemOperand(frame_pointer(), kStringStartMinusOne));
+ __ add(r6, r6, r4);
+ __ cmp(current_input_offset(), r6);
+ BranchOrBacktrack(lt, on_no_match);
+ } else {
+ __ add(r0, r4, current_input_offset(), LeaveOE, SetRC);
+ BranchOrBacktrack(gt, on_no_match, cr0);
+ }
- // Compute pointers to match string and capture string
+ // r3 - offset of start of capture
+ // r4 - length of capture
__ add(r3, r3, end_of_input_address());
__ add(r5, end_of_input_address(), current_input_offset());
+ if (read_backward) {
+ __ sub(r5, r5, r4); // Offset by length when matching backwards.
+ }
__ add(r4, r4, r3);
Label loop;
@@ -389,6 +418,13 @@ void RegExpMacroAssemblerPPC::CheckNotBackReference(int start_reg,
// Move current character position to position after match.
__ sub(current_input_offset(), r5, end_of_input_address());
+ if (read_backward) {
+ __ LoadP(r3, register_location(start_reg)); // Index of start of capture
+ __ LoadP(r4, register_location(start_reg + 1)); // Index of end of capture
+ __ add(current_input_offset(), current_input_offset(), r3);
+ __ sub(current_input_offset(), current_input_offset(), r4);
+ }
+
__ bind(&fallthrough);
}
@@ -639,7 +675,7 @@ Handle<HeapObject> RegExpMacroAssemblerPPC::GetCode(Handle<String> source) {
__ addi(frame_pointer(), sp, Operand(8 * kPointerSize));
__ li(r3, Operand::Zero());
__ push(r3); // Make room for success counter and initialize it to 0.
- __ push(r3); // Make room for "position - 1" constant (value is irrelevant)
+ __ push(r3); // Make room for "string start - 1" constant.
// Check if we have space on the stack for registers.
Label stack_limit_hit;
Label stack_ok;
@@ -688,7 +724,7 @@ Handle<HeapObject> RegExpMacroAssemblerPPC::GetCode(Handle<String> source) {
}
// Store this value in a local variable, for use when clearing
// position registers.
- __ StoreP(r3, MemOperand(frame_pointer(), kInputStartMinusOne));
+ __ StoreP(r3, MemOperand(frame_pointer(), kStringStartMinusOne));
// Initialize code pointer register
__ mov(code_pointer(), Operand(masm_->CodeObject()));
@@ -797,7 +833,7 @@ Handle<HeapObject> RegExpMacroAssemblerPPC::GetCode(Handle<String> source) {
__ StoreP(r5, MemOperand(frame_pointer(), kRegisterOutput));
// Prepare r3 to initialize registers with its value in the next run.
- __ LoadP(r3, MemOperand(frame_pointer(), kInputStartMinusOne));
+ __ LoadP(r3, MemOperand(frame_pointer(), kStringStartMinusOne));
if (global_with_zero_length_check()) {
// Special case for zero-length matches.
@@ -936,10 +972,13 @@ void RegExpMacroAssemblerPPC::LoadCurrentCharacter(int cp_offset,
Label* on_end_of_input,
bool check_bounds,
int characters) {
- DCHECK(cp_offset >= -1); // ^ and \b can look behind one character.
DCHECK(cp_offset < (1 << 30)); // Be sane! (And ensure negation works)
if (check_bounds) {
- CheckPosition(cp_offset + characters - 1, on_end_of_input);
+ if (cp_offset >= 0) {
+ CheckPosition(cp_offset + characters - 1, on_end_of_input);
+ } else {
+ CheckPosition(cp_offset, on_end_of_input);
+ }
}
LoadCurrentCharacterUnchecked(cp_offset, characters);
}
@@ -1028,7 +1067,7 @@ void RegExpMacroAssemblerPPC::WriteCurrentPositionToRegister(int reg,
void RegExpMacroAssemblerPPC::ClearRegisters(int reg_from, int reg_to) {
DCHECK(reg_from <= reg_to);
- __ LoadP(r3, MemOperand(frame_pointer(), kInputStartMinusOne));
+ __ LoadP(r3, MemOperand(frame_pointer(), kStringStartMinusOne));
for (int reg = reg_from; reg <= reg_to; reg++) {
__ StoreP(r3, register_location(reg), r0);
}
@@ -1132,8 +1171,15 @@ MemOperand RegExpMacroAssemblerPPC::register_location(int register_index) {
void RegExpMacroAssemblerPPC::CheckPosition(int cp_offset,
Label* on_outside_input) {
- __ Cmpi(current_input_offset(), Operand(-cp_offset * char_size()), r0);
- BranchOrBacktrack(ge, on_outside_input);
+ if (cp_offset >= 0) {
+ __ Cmpi(current_input_offset(), Operand(-cp_offset * char_size()), r0);
+ BranchOrBacktrack(ge, on_outside_input);
+ } else {
+ __ LoadP(r4, MemOperand(frame_pointer(), kStringStartMinusOne));
+ __ addi(r3, current_input_offset(), Operand(cp_offset * char_size()));
+ __ cmp(r3, r4);
+ BranchOrBacktrack(le, on_outside_input);
+ }
}
diff --git a/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.h b/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.h
index 04a0e5e416..4d1836fc71 100644
--- a/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.h
+++ b/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.h
@@ -34,9 +34,11 @@ class RegExpMacroAssemblerPPC : public NativeRegExpMacroAssembler {
// A "greedy loop" is a loop that is both greedy and with a simple
// body. It has a particularly simple implementation.
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
- virtual void CheckNotAtStart(Label* on_not_at_start);
- virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
+ virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start);
+ virtual void CheckNotBackReference(int start_reg, bool read_backward,
+ Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
+ bool read_backward,
Label* on_no_match);
virtual void CheckNotCharacter(unsigned c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(unsigned c, unsigned mask,
@@ -112,9 +114,9 @@ class RegExpMacroAssemblerPPC : public NativeRegExpMacroAssembler {
// When adding local variables remember to push space for them in
// the frame in GetCode.
static const int kSuccessfulCaptures = kInputString - kPointerSize;
- static const int kInputStartMinusOne = kSuccessfulCaptures - kPointerSize;
+ static const int kStringStartMinusOne = kSuccessfulCaptures - kPointerSize;
// First register address. Following registers are below it on the stack.
- static const int kRegisterZero = kInputStartMinusOne - kPointerSize;
+ static const int kRegisterZero = kStringStartMinusOne - kPointerSize;
// Initial size of code buffer.
static const size_t kRegExpCodeSize = 1024;
diff --git a/deps/v8/src/regexp/regexp-ast.cc b/deps/v8/src/regexp/regexp-ast.cc
new file mode 100644
index 0000000000..31c93b114f
--- /dev/null
+++ b/deps/v8/src/regexp/regexp-ast.cc
@@ -0,0 +1,337 @@
+// Copyright 2016 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/ostreams.h"
+#include "src/regexp/regexp-ast.h"
+
+namespace v8 {
+namespace internal {
+
+#define MAKE_ACCEPT(Name) \
+ void* RegExp##Name::Accept(RegExpVisitor* visitor, void* data) { \
+ return visitor->Visit##Name(this, data); \
+ }
+FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ACCEPT)
+#undef MAKE_ACCEPT
+
+#define MAKE_TYPE_CASE(Name) \
+ RegExp##Name* RegExpTree::As##Name() { return NULL; } \
+ bool RegExpTree::Is##Name() { return false; }
+FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
+#undef MAKE_TYPE_CASE
+
+#define MAKE_TYPE_CASE(Name) \
+ RegExp##Name* RegExp##Name::As##Name() { return this; } \
+ bool RegExp##Name::Is##Name() { return true; }
+FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
+#undef MAKE_TYPE_CASE
+
+
+static Interval ListCaptureRegisters(ZoneList<RegExpTree*>* children) {
+ Interval result = Interval::Empty();
+ for (int i = 0; i < children->length(); i++)
+ result = result.Union(children->at(i)->CaptureRegisters());
+ return result;
+}
+
+
+Interval RegExpAlternative::CaptureRegisters() {
+ return ListCaptureRegisters(nodes());
+}
+
+
+Interval RegExpDisjunction::CaptureRegisters() {
+ return ListCaptureRegisters(alternatives());
+}
+
+
+Interval RegExpLookaround::CaptureRegisters() {
+ return body()->CaptureRegisters();
+}
+
+
+Interval RegExpCapture::CaptureRegisters() {
+ Interval self(StartRegister(index()), EndRegister(index()));
+ return self.Union(body()->CaptureRegisters());
+}
+
+
+Interval RegExpQuantifier::CaptureRegisters() {
+ return body()->CaptureRegisters();
+}
+
+
+bool RegExpAssertion::IsAnchoredAtStart() {
+ return assertion_type() == RegExpAssertion::START_OF_INPUT;
+}
+
+
+bool RegExpAssertion::IsAnchoredAtEnd() {
+ return assertion_type() == RegExpAssertion::END_OF_INPUT;
+}
+
+
+bool RegExpAlternative::IsAnchoredAtStart() {
+ ZoneList<RegExpTree*>* nodes = this->nodes();
+ for (int i = 0; i < nodes->length(); i++) {
+ RegExpTree* node = nodes->at(i);
+ if (node->IsAnchoredAtStart()) {
+ return true;
+ }
+ if (node->max_match() > 0) {
+ return false;
+ }
+ }
+ return false;
+}
+
+
+bool RegExpAlternative::IsAnchoredAtEnd() {
+ ZoneList<RegExpTree*>* nodes = this->nodes();
+ for (int i = nodes->length() - 1; i >= 0; i--) {
+ RegExpTree* node = nodes->at(i);
+ if (node->IsAnchoredAtEnd()) {
+ return true;
+ }
+ if (node->max_match() > 0) {
+ return false;
+ }
+ }
+ return false;
+}
+
+
+bool RegExpDisjunction::IsAnchoredAtStart() {
+ ZoneList<RegExpTree*>* alternatives = this->alternatives();
+ for (int i = 0; i < alternatives->length(); i++) {
+ if (!alternatives->at(i)->IsAnchoredAtStart()) return false;
+ }
+ return true;
+}
+
+
+bool RegExpDisjunction::IsAnchoredAtEnd() {
+ ZoneList<RegExpTree*>* alternatives = this->alternatives();
+ for (int i = 0; i < alternatives->length(); i++) {
+ if (!alternatives->at(i)->IsAnchoredAtEnd()) return false;
+ }
+ return true;
+}
+
+
+bool RegExpLookaround::IsAnchoredAtStart() {
+ return is_positive() && type() == LOOKAHEAD && body()->IsAnchoredAtStart();
+}
+
+
+bool RegExpCapture::IsAnchoredAtStart() { return body()->IsAnchoredAtStart(); }
+
+
+bool RegExpCapture::IsAnchoredAtEnd() { return body()->IsAnchoredAtEnd(); }
+
+
+// Convert regular expression trees to a simple sexp representation.
+// This representation should be different from the input grammar
+// in as many cases as possible, to make it more difficult for incorrect
+// parses to look as correct ones which is likely if the input and
+// output formats are alike.
+class RegExpUnparser final : public RegExpVisitor {
+ public:
+ RegExpUnparser(std::ostream& os, Zone* zone) : os_(os), zone_(zone) {}
+ void VisitCharacterRange(CharacterRange that);
+#define MAKE_CASE(Name) void* Visit##Name(RegExp##Name*, void* data) override;
+ FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE)
+#undef MAKE_CASE
+ private:
+ std::ostream& os_;
+ Zone* zone_;
+};
+
+
+void* RegExpUnparser::VisitDisjunction(RegExpDisjunction* that, void* data) {
+ os_ << "(|";
+ for (int i = 0; i < that->alternatives()->length(); i++) {
+ os_ << " ";
+ that->alternatives()->at(i)->Accept(this, data);
+ }
+ os_ << ")";
+ return NULL;
+}
+
+
+void* RegExpUnparser::VisitAlternative(RegExpAlternative* that, void* data) {
+ os_ << "(:";
+ for (int i = 0; i < that->nodes()->length(); i++) {
+ os_ << " ";
+ that->nodes()->at(i)->Accept(this, data);
+ }
+ os_ << ")";
+ return NULL;
+}
+
+
+void RegExpUnparser::VisitCharacterRange(CharacterRange that) {
+ os_ << AsUC16(that.from());
+ if (!that.IsSingleton()) {
+ os_ << "-" << AsUC16(that.to());
+ }
+}
+
+
+void* RegExpUnparser::VisitCharacterClass(RegExpCharacterClass* that,
+ void* data) {
+ if (that->is_negated()) os_ << "^";
+ os_ << "[";
+ for (int i = 0; i < that->ranges(zone_)->length(); i++) {
+ if (i > 0) os_ << " ";
+ VisitCharacterRange(that->ranges(zone_)->at(i));
+ }
+ os_ << "]";
+ return NULL;
+}
+
+
+void* RegExpUnparser::VisitAssertion(RegExpAssertion* that, void* data) {
+ switch (that->assertion_type()) {
+ case RegExpAssertion::START_OF_INPUT:
+ os_ << "@^i";
+ break;
+ case RegExpAssertion::END_OF_INPUT:
+ os_ << "@$i";
+ break;
+ case RegExpAssertion::START_OF_LINE:
+ os_ << "@^l";
+ break;
+ case RegExpAssertion::END_OF_LINE:
+ os_ << "@$l";
+ break;
+ case RegExpAssertion::BOUNDARY:
+ os_ << "@b";
+ break;
+ case RegExpAssertion::NON_BOUNDARY:
+ os_ << "@B";
+ break;
+ }
+ return NULL;
+}
+
+
+void* RegExpUnparser::VisitAtom(RegExpAtom* that, void* data) {
+ os_ << "'";
+ Vector<const uc16> chardata = that->data();
+ for (int i = 0; i < chardata.length(); i++) {
+ os_ << AsUC16(chardata[i]);
+ }
+ os_ << "'";
+ return NULL;
+}
+
+
+void* RegExpUnparser::VisitText(RegExpText* that, void* data) {
+ if (that->elements()->length() == 1) {
+ that->elements()->at(0).tree()->Accept(this, data);
+ } else {
+ os_ << "(!";
+ for (int i = 0; i < that->elements()->length(); i++) {
+ os_ << " ";
+ that->elements()->at(i).tree()->Accept(this, data);
+ }
+ os_ << ")";
+ }
+ return NULL;
+}
+
+
+void* RegExpUnparser::VisitQuantifier(RegExpQuantifier* that, void* data) {
+ os_ << "(# " << that->min() << " ";
+ if (that->max() == RegExpTree::kInfinity) {
+ os_ << "- ";
+ } else {
+ os_ << that->max() << " ";
+ }
+ os_ << (that->is_greedy() ? "g " : that->is_possessive() ? "p " : "n ");
+ that->body()->Accept(this, data);
+ os_ << ")";
+ return NULL;
+}
+
+
+void* RegExpUnparser::VisitCapture(RegExpCapture* that, void* data) {
+ os_ << "(^ ";
+ that->body()->Accept(this, data);
+ os_ << ")";
+ return NULL;
+}
+
+
+void* RegExpUnparser::VisitLookaround(RegExpLookaround* that, void* data) {
+ os_ << "(";
+ os_ << (that->type() == RegExpLookaround::LOOKAHEAD ? "->" : "<-");
+ os_ << (that->is_positive() ? " + " : " - ");
+ that->body()->Accept(this, data);
+ os_ << ")";
+ return NULL;
+}
+
+
+void* RegExpUnparser::VisitBackReference(RegExpBackReference* that,
+ void* data) {
+ os_ << "(<- " << that->index() << ")";
+ return NULL;
+}
+
+
+void* RegExpUnparser::VisitEmpty(RegExpEmpty* that, void* data) {
+ os_ << '%';
+ return NULL;
+}
+
+
+std::ostream& RegExpTree::Print(std::ostream& os, Zone* zone) { // NOLINT
+ RegExpUnparser unparser(os, zone);
+ Accept(&unparser, NULL);
+ return os;
+}
+
+
+RegExpDisjunction::RegExpDisjunction(ZoneList<RegExpTree*>* alternatives)
+ : alternatives_(alternatives) {
+ DCHECK(alternatives->length() > 1);
+ RegExpTree* first_alternative = alternatives->at(0);
+ min_match_ = first_alternative->min_match();
+ max_match_ = first_alternative->max_match();
+ for (int i = 1; i < alternatives->length(); i++) {
+ RegExpTree* alternative = alternatives->at(i);
+ min_match_ = Min(min_match_, alternative->min_match());
+ max_match_ = Max(max_match_, alternative->max_match());
+ }
+}
+
+
+static int IncreaseBy(int previous, int increase) {
+ if (RegExpTree::kInfinity - previous < increase) {
+ return RegExpTree::kInfinity;
+ } else {
+ return previous + increase;
+ }
+}
+
+
+RegExpAlternative::RegExpAlternative(ZoneList<RegExpTree*>* nodes)
+ : nodes_(nodes) {
+ DCHECK(nodes->length() > 1);
+ min_match_ = 0;
+ max_match_ = 0;
+ for (int i = 0; i < nodes->length(); i++) {
+ RegExpTree* node = nodes->at(i);
+ int node_min_match = node->min_match();
+ min_match_ = IncreaseBy(min_match_, node_min_match);
+ int node_max_match = node->max_match();
+ max_match_ = IncreaseBy(max_match_, node_max_match);
+ }
+}
+
+
+} // namespace internal
+} // namespace v8
diff --git a/deps/v8/src/regexp/regexp-ast.h b/deps/v8/src/regexp/regexp-ast.h
new file mode 100644
index 0000000000..f87778596a
--- /dev/null
+++ b/deps/v8/src/regexp/regexp-ast.h
@@ -0,0 +1,496 @@
+// Copyright 2016 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_REGEXP_REGEXP_AST_H_
+#define V8_REGEXP_REGEXP_AST_H_
+
+#include "src/utils.h"
+#include "src/zone.h"
+
+namespace v8 {
+namespace internal {
+
+#define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \
+ VISIT(Disjunction) \
+ VISIT(Alternative) \
+ VISIT(Assertion) \
+ VISIT(CharacterClass) \
+ VISIT(Atom) \
+ VISIT(Quantifier) \
+ VISIT(Capture) \
+ VISIT(Lookaround) \
+ VISIT(BackReference) \
+ VISIT(Empty) \
+ VISIT(Text)
+
+
+#define FORWARD_DECLARE(Name) class RegExp##Name;
+FOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE)
+#undef FORWARD_DECLARE
+
+class RegExpCompiler;
+class RegExpNode;
+class RegExpTree;
+
+
+class RegExpVisitor BASE_EMBEDDED {
+ public:
+ virtual ~RegExpVisitor() {}
+#define MAKE_CASE(Name) \
+ virtual void* Visit##Name(RegExp##Name*, void* data) = 0;
+ FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE)
+#undef MAKE_CASE
+};
+
+
+// A simple closed interval.
+class Interval {
+ public:
+ Interval() : from_(kNone), to_(kNone) {}
+ Interval(int from, int to) : from_(from), to_(to) {}
+ Interval Union(Interval that) {
+ if (that.from_ == kNone)
+ return *this;
+ else if (from_ == kNone)
+ return that;
+ else
+ return Interval(Min(from_, that.from_), Max(to_, that.to_));
+ }
+ bool Contains(int value) { return (from_ <= value) && (value <= to_); }
+ bool is_empty() { return from_ == kNone; }
+ int from() const { return from_; }
+ int to() const { return to_; }
+ static Interval Empty() { return Interval(); }
+ static const int kNone = -1;
+
+ private:
+ int from_;
+ int to_;
+};
+
+
+// Represents code units in the range from from_ to to_, both ends are
+// inclusive.
+class CharacterRange {
+ public:
+ CharacterRange() : from_(0), to_(0) {}
+ // For compatibility with the CHECK_OK macro
+ CharacterRange(void* null) { DCHECK_NULL(null); } // NOLINT
+ CharacterRange(uc16 from, uc16 to) : from_(from), to_(to) {}
+ static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges,
+ Zone* zone);
+ static Vector<const int> GetWordBounds();
+ static inline CharacterRange Singleton(uc16 value) {
+ return CharacterRange(value, value);
+ }
+ static inline CharacterRange Range(uc16 from, uc16 to) {
+ DCHECK(from <= to);
+ return CharacterRange(from, to);
+ }
+ static inline CharacterRange Everything() {
+ return CharacterRange(0, 0xFFFF);
+ }
+ bool Contains(uc16 i) { return from_ <= i && i <= to_; }
+ uc16 from() const { return from_; }
+ void set_from(uc16 value) { from_ = value; }
+ uc16 to() const { return to_; }
+ void set_to(uc16 value) { to_ = value; }
+ bool is_valid() { return from_ <= to_; }
+ bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; }
+ bool IsSingleton() { return (from_ == to_); }
+ void AddCaseEquivalents(Isolate* isolate, Zone* zone,
+ ZoneList<CharacterRange>* ranges, bool is_one_byte);
+ static void Split(ZoneList<CharacterRange>* base, Vector<const int> overlay,
+ ZoneList<CharacterRange>** included,
+ ZoneList<CharacterRange>** excluded, Zone* zone);
+ // Whether a range list is in canonical form: Ranges ordered by from value,
+ // and ranges non-overlapping and non-adjacent.
+ static bool IsCanonical(ZoneList<CharacterRange>* ranges);
+ // Convert range list to canonical form. The characters covered by the ranges
+ // will still be the same, but no character is in more than one range, and
+ // adjacent ranges are merged. The resulting list may be shorter than the
+ // original, but cannot be longer.
+ static void Canonicalize(ZoneList<CharacterRange>* ranges);
+ // Negate the contents of a character range in canonical form.
+ static void Negate(ZoneList<CharacterRange>* src,
+ ZoneList<CharacterRange>* dst, Zone* zone);
+ static const int kStartMarker = (1 << 24);
+ static const int kPayloadMask = (1 << 24) - 1;
+
+ private:
+ uc16 from_;
+ uc16 to_;
+};
+
+
+class CharacterSet final BASE_EMBEDDED {
+ public:
+ explicit CharacterSet(uc16 standard_set_type)
+ : ranges_(NULL), standard_set_type_(standard_set_type) {}
+ explicit CharacterSet(ZoneList<CharacterRange>* ranges)
+ : ranges_(ranges), standard_set_type_(0) {}
+ ZoneList<CharacterRange>* ranges(Zone* zone);
+ uc16 standard_set_type() { return standard_set_type_; }
+ void set_standard_set_type(uc16 special_set_type) {
+ standard_set_type_ = special_set_type;
+ }
+ bool is_standard() { return standard_set_type_ != 0; }
+ void Canonicalize();
+
+ private:
+ ZoneList<CharacterRange>* ranges_;
+ // If non-zero, the value represents a standard set (e.g., all whitespace
+ // characters) without having to expand the ranges.
+ uc16 standard_set_type_;
+};
+
+
+class TextElement final BASE_EMBEDDED {
+ public:
+ enum TextType { ATOM, CHAR_CLASS };
+
+ static TextElement Atom(RegExpAtom* atom);
+ static TextElement CharClass(RegExpCharacterClass* char_class);
+
+ int cp_offset() const { return cp_offset_; }
+ void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; }
+ int length() const;
+
+ TextType text_type() const { return text_type_; }
+
+ RegExpTree* tree() const { return tree_; }
+
+ RegExpAtom* atom() const {
+ DCHECK(text_type() == ATOM);
+ return reinterpret_cast<RegExpAtom*>(tree());
+ }
+
+ RegExpCharacterClass* char_class() const {
+ DCHECK(text_type() == CHAR_CLASS);
+ return reinterpret_cast<RegExpCharacterClass*>(tree());
+ }
+
+ private:
+ TextElement(TextType text_type, RegExpTree* tree)
+ : cp_offset_(-1), text_type_(text_type), tree_(tree) {}
+
+ int cp_offset_;
+ TextType text_type_;
+ RegExpTree* tree_;
+};
+
+
+class RegExpTree : public ZoneObject {
+ public:
+ static const int kInfinity = kMaxInt;
+ virtual ~RegExpTree() {}
+ virtual void* Accept(RegExpVisitor* visitor, void* data) = 0;
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success) = 0;
+ virtual bool IsTextElement() { return false; }
+ virtual bool IsAnchoredAtStart() { return false; }
+ virtual bool IsAnchoredAtEnd() { return false; }
+ virtual int min_match() = 0;
+ virtual int max_match() = 0;
+ // Returns the interval of registers used for captures within this
+ // expression.
+ virtual Interval CaptureRegisters() { return Interval::Empty(); }
+ virtual void AppendToText(RegExpText* text, Zone* zone);
+ std::ostream& Print(std::ostream& os, Zone* zone); // NOLINT
+#define MAKE_ASTYPE(Name) \
+ virtual RegExp##Name* As##Name(); \
+ virtual bool Is##Name();
+ FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ASTYPE)
+#undef MAKE_ASTYPE
+};
+
+
+class RegExpDisjunction final : public RegExpTree {
+ public:
+ explicit RegExpDisjunction(ZoneList<RegExpTree*>* alternatives);
+ void* Accept(RegExpVisitor* visitor, void* data) override;
+ RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
+ RegExpDisjunction* AsDisjunction() override;
+ Interval CaptureRegisters() override;
+ bool IsDisjunction() override;
+ bool IsAnchoredAtStart() override;
+ bool IsAnchoredAtEnd() override;
+ int min_match() override { return min_match_; }
+ int max_match() override { return max_match_; }
+ ZoneList<RegExpTree*>* alternatives() { return alternatives_; }
+
+ private:
+ bool SortConsecutiveAtoms(RegExpCompiler* compiler);
+ void RationalizeConsecutiveAtoms(RegExpCompiler* compiler);
+ void FixSingleCharacterDisjunctions(RegExpCompiler* compiler);
+ ZoneList<RegExpTree*>* alternatives_;
+ int min_match_;
+ int max_match_;
+};
+
+
+class RegExpAlternative final : public RegExpTree {
+ public:
+ explicit RegExpAlternative(ZoneList<RegExpTree*>* nodes);
+ void* Accept(RegExpVisitor* visitor, void* data) override;
+ RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
+ RegExpAlternative* AsAlternative() override;
+ Interval CaptureRegisters() override;
+ bool IsAlternative() override;
+ bool IsAnchoredAtStart() override;
+ bool IsAnchoredAtEnd() override;
+ int min_match() override { return min_match_; }
+ int max_match() override { return max_match_; }
+ ZoneList<RegExpTree*>* nodes() { return nodes_; }
+
+ private:
+ ZoneList<RegExpTree*>* nodes_;
+ int min_match_;
+ int max_match_;
+};
+
+
+class RegExpAssertion final : public RegExpTree {
+ public:
+ enum AssertionType {
+ START_OF_LINE,
+ START_OF_INPUT,
+ END_OF_LINE,
+ END_OF_INPUT,
+ BOUNDARY,
+ NON_BOUNDARY
+ };
+ explicit RegExpAssertion(AssertionType type) : assertion_type_(type) {}
+ void* Accept(RegExpVisitor* visitor, void* data) override;
+ RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
+ RegExpAssertion* AsAssertion() override;
+ bool IsAssertion() override;
+ bool IsAnchoredAtStart() override;
+ bool IsAnchoredAtEnd() override;
+ int min_match() override { return 0; }
+ int max_match() override { return 0; }
+ AssertionType assertion_type() { return assertion_type_; }
+
+ private:
+ AssertionType assertion_type_;
+};
+
+
+class RegExpCharacterClass final : public RegExpTree {
+ public:
+ RegExpCharacterClass(ZoneList<CharacterRange>* ranges, bool is_negated)
+ : set_(ranges), is_negated_(is_negated) {}
+ explicit RegExpCharacterClass(uc16 type) : set_(type), is_negated_(false) {}
+ void* Accept(RegExpVisitor* visitor, void* data) override;
+ RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
+ RegExpCharacterClass* AsCharacterClass() override;
+ bool IsCharacterClass() override;
+ bool IsTextElement() override { return true; }
+ int min_match() override { return 1; }
+ int max_match() override { return 1; }
+ void AppendToText(RegExpText* text, Zone* zone) override;
+ CharacterSet character_set() { return set_; }
+ // TODO(lrn): Remove need for complex version if is_standard that
+ // recognizes a mangled standard set and just do { return set_.is_special(); }
+ bool is_standard(Zone* zone);
+ // Returns a value representing the standard character set if is_standard()
+ // returns true.
+ // Currently used values are:
+ // s : unicode whitespace
+ // S : unicode non-whitespace
+ // w : ASCII word character (digit, letter, underscore)
+ // W : non-ASCII word character
+ // d : ASCII digit
+ // D : non-ASCII digit
+ // . : non-unicode non-newline
+ // * : All characters
+ uc16 standard_type() { return set_.standard_set_type(); }
+ ZoneList<CharacterRange>* ranges(Zone* zone) { return set_.ranges(zone); }
+ bool is_negated() { return is_negated_; }
+
+ private:
+ CharacterSet set_;
+ bool is_negated_;
+};
+
+
+class RegExpAtom final : public RegExpTree {
+ public:
+ explicit RegExpAtom(Vector<const uc16> data) : data_(data) {}
+ void* Accept(RegExpVisitor* visitor, void* data) override;
+ RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
+ RegExpAtom* AsAtom() override;
+ bool IsAtom() override;
+ bool IsTextElement() override { return true; }
+ int min_match() override { return data_.length(); }
+ int max_match() override { return data_.length(); }
+ void AppendToText(RegExpText* text, Zone* zone) override;
+ Vector<const uc16> data() { return data_; }
+ int length() { return data_.length(); }
+
+ private:
+ Vector<const uc16> data_;
+};
+
+
+class RegExpText final : public RegExpTree {
+ public:
+ explicit RegExpText(Zone* zone) : elements_(2, zone), length_(0) {}
+ void* Accept(RegExpVisitor* visitor, void* data) override;
+ RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
+ RegExpText* AsText() override;
+ bool IsText() override;
+ bool IsTextElement() override { return true; }
+ int min_match() override { return length_; }
+ int max_match() override { return length_; }
+ void AppendToText(RegExpText* text, Zone* zone) override;
+ void AddElement(TextElement elm, Zone* zone) {
+ elements_.Add(elm, zone);
+ length_ += elm.length();
+ }
+ ZoneList<TextElement>* elements() { return &elements_; }
+
+ private:
+ ZoneList<TextElement> elements_;
+ int length_;
+};
+
+
+class RegExpQuantifier final : public RegExpTree {
+ public:
+ enum QuantifierType { GREEDY, NON_GREEDY, POSSESSIVE };
+ RegExpQuantifier(int min, int max, QuantifierType type, RegExpTree* body)
+ : body_(body),
+ min_(min),
+ max_(max),
+ min_match_(min * body->min_match()),
+ quantifier_type_(type) {
+ if (max > 0 && body->max_match() > kInfinity / max) {
+ max_match_ = kInfinity;
+ } else {
+ max_match_ = max * body->max_match();
+ }
+ }
+ void* Accept(RegExpVisitor* visitor, void* data) override;
+ RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
+ static RegExpNode* ToNode(int min, int max, bool is_greedy, RegExpTree* body,
+ RegExpCompiler* compiler, RegExpNode* on_success,
+ bool not_at_start = false);
+ RegExpQuantifier* AsQuantifier() override;
+ Interval CaptureRegisters() override;
+ bool IsQuantifier() override;
+ int min_match() override { return min_match_; }
+ int max_match() override { return max_match_; }
+ int min() { return min_; }
+ int max() { return max_; }
+ bool is_possessive() { return quantifier_type_ == POSSESSIVE; }
+ bool is_non_greedy() { return quantifier_type_ == NON_GREEDY; }
+ bool is_greedy() { return quantifier_type_ == GREEDY; }
+ RegExpTree* body() { return body_; }
+
+ private:
+ RegExpTree* body_;
+ int min_;
+ int max_;
+ int min_match_;
+ int max_match_;
+ QuantifierType quantifier_type_;
+};
+
+
+class RegExpCapture final : public RegExpTree {
+ public:
+ explicit RegExpCapture(int index) : body_(NULL), index_(index) {}
+ void* Accept(RegExpVisitor* visitor, void* data) override;
+ RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
+ static RegExpNode* ToNode(RegExpTree* body, int index,
+ RegExpCompiler* compiler, RegExpNode* on_success);
+ RegExpCapture* AsCapture() override;
+ bool IsAnchoredAtStart() override;
+ bool IsAnchoredAtEnd() override;
+ Interval CaptureRegisters() override;
+ bool IsCapture() override;
+ int min_match() override { return body_->min_match(); }
+ int max_match() override { return body_->max_match(); }
+ RegExpTree* body() { return body_; }
+ void set_body(RegExpTree* body) { body_ = body; }
+ int index() { return index_; }
+ static int StartRegister(int index) { return index * 2; }
+ static int EndRegister(int index) { return index * 2 + 1; }
+
+ private:
+ RegExpTree* body_;
+ int index_;
+};
+
+
+class RegExpLookaround final : public RegExpTree {
+ public:
+ enum Type { LOOKAHEAD, LOOKBEHIND };
+
+ RegExpLookaround(RegExpTree* body, bool is_positive, int capture_count,
+ int capture_from, Type type)
+ : body_(body),
+ is_positive_(is_positive),
+ capture_count_(capture_count),
+ capture_from_(capture_from),
+ type_(type) {}
+
+ void* Accept(RegExpVisitor* visitor, void* data) override;
+ RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
+ RegExpLookaround* AsLookaround() override;
+ Interval CaptureRegisters() override;
+ bool IsLookaround() override;
+ bool IsAnchoredAtStart() override;
+ int min_match() override { return 0; }
+ int max_match() override { return 0; }
+ RegExpTree* body() { return body_; }
+ bool is_positive() { return is_positive_; }
+ int capture_count() { return capture_count_; }
+ int capture_from() { return capture_from_; }
+ Type type() { return type_; }
+
+ private:
+ RegExpTree* body_;
+ bool is_positive_;
+ int capture_count_;
+ int capture_from_;
+ Type type_;
+};
+
+
+class RegExpBackReference final : public RegExpTree {
+ public:
+ explicit RegExpBackReference(RegExpCapture* capture) : capture_(capture) {}
+ void* Accept(RegExpVisitor* visitor, void* data) override;
+ RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
+ RegExpBackReference* AsBackReference() override;
+ bool IsBackReference() override;
+ int min_match() override { return 0; }
+ // The back reference may be recursive, e.g. /(\2)(\1)/. To avoid infinite
+ // recursion, we give up. Ignorance is bliss.
+ int max_match() override { return kInfinity; }
+ int index() { return capture_->index(); }
+ RegExpCapture* capture() { return capture_; }
+
+ private:
+ RegExpCapture* capture_;
+};
+
+
+class RegExpEmpty final : public RegExpTree {
+ public:
+ RegExpEmpty() {}
+ void* Accept(RegExpVisitor* visitor, void* data) override;
+ RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
+ RegExpEmpty* AsEmpty() override;
+ bool IsEmpty() override;
+ int min_match() override { return 0; }
+ int max_match() override { return 0; }
+};
+
+} // namespace internal
+} // namespace v8
+
+#endif // V8_REGEXP_REGEXP_AST_H_
diff --git a/deps/v8/src/regexp/regexp-macro-assembler-irregexp-inl.h b/deps/v8/src/regexp/regexp-macro-assembler-irregexp-inl.h
index 6f176cd12c..4d0b1bc0a7 100644
--- a/deps/v8/src/regexp/regexp-macro-assembler-irregexp-inl.h
+++ b/deps/v8/src/regexp/regexp-macro-assembler-irregexp-inl.h
@@ -5,7 +5,7 @@
#ifndef V8_REGEXP_REGEXP_MACRO_ASSEMBLER_IRREGEXP_INL_H_
#define V8_REGEXP_REGEXP_MACRO_ASSEMBLER_IRREGEXP_INL_H_
-#include "src/ast.h"
+#include "src/ast/ast.h"
#include "src/regexp/bytecodes-irregexp.h"
namespace v8 {
diff --git a/deps/v8/src/regexp/regexp-macro-assembler-irregexp.cc b/deps/v8/src/regexp/regexp-macro-assembler-irregexp.cc
index ca567c9bda..751ee441c8 100644
--- a/deps/v8/src/regexp/regexp-macro-assembler-irregexp.cc
+++ b/deps/v8/src/regexp/regexp-macro-assembler-irregexp.cc
@@ -4,7 +4,7 @@
#include "src/regexp/regexp-macro-assembler-irregexp.h"
-#include "src/ast.h"
+#include "src/ast/ast.h"
#include "src/regexp/bytecodes-irregexp.h"
#include "src/regexp/regexp-macro-assembler.h"
#include "src/regexp/regexp-macro-assembler-irregexp-inl.h"
@@ -273,8 +273,9 @@ void RegExpMacroAssemblerIrregexp::CheckAtStart(Label* on_at_start) {
}
-void RegExpMacroAssemblerIrregexp::CheckNotAtStart(Label* on_not_at_start) {
- Emit(BC_CHECK_NOT_AT_START, 0);
+void RegExpMacroAssemblerIrregexp::CheckNotAtStart(int cp_offset,
+ Label* on_not_at_start) {
+ Emit(BC_CHECK_NOT_AT_START, cp_offset);
EmitOrLink(on_not_at_start);
}
@@ -370,20 +371,23 @@ void RegExpMacroAssemblerIrregexp::CheckBitInTable(
void RegExpMacroAssemblerIrregexp::CheckNotBackReference(int start_reg,
+ bool read_backward,
Label* on_not_equal) {
DCHECK(start_reg >= 0);
DCHECK(start_reg <= kMaxRegister);
- Emit(BC_CHECK_NOT_BACK_REF, start_reg);
+ Emit(read_backward ? BC_CHECK_NOT_BACK_REF_BACKWARD : BC_CHECK_NOT_BACK_REF,
+ start_reg);
EmitOrLink(on_not_equal);
}
void RegExpMacroAssemblerIrregexp::CheckNotBackReferenceIgnoreCase(
- int start_reg,
- Label* on_not_equal) {
+ int start_reg, bool read_backward, Label* on_not_equal) {
DCHECK(start_reg >= 0);
DCHECK(start_reg <= kMaxRegister);
- Emit(BC_CHECK_NOT_BACK_REF_NO_CASE, start_reg);
+ Emit(read_backward ? BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD
+ : BC_CHECK_NOT_BACK_REF_NO_CASE,
+ start_reg);
EmitOrLink(on_not_equal);
}
diff --git a/deps/v8/src/regexp/regexp-macro-assembler-irregexp.h b/deps/v8/src/regexp/regexp-macro-assembler-irregexp.h
index bbfe5203d9..f1ace63a74 100644
--- a/deps/v8/src/regexp/regexp-macro-assembler-irregexp.h
+++ b/deps/v8/src/regexp/regexp-macro-assembler-irregexp.h
@@ -66,7 +66,7 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
virtual void CheckCharacterLT(uc16 limit, Label* on_less);
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
virtual void CheckAtStart(Label* on_at_start);
- virtual void CheckNotAtStart(Label* on_not_at_start);
+ virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start);
virtual void CheckNotCharacter(unsigned c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(unsigned c,
unsigned mask,
@@ -82,8 +82,10 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
uc16 to,
Label* on_not_in_range);
virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set);
- virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
+ virtual void CheckNotBackReference(int start_reg, bool read_backward,
+ Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
+ bool read_backward,
Label* on_no_match);
virtual void IfRegisterLT(int register_index, int comparand, Label* if_lt);
virtual void IfRegisterGE(int register_index, int comparand, Label* if_ge);
diff --git a/deps/v8/src/regexp/regexp-macro-assembler-tracer.cc b/deps/v8/src/regexp/regexp-macro-assembler-tracer.cc
index 2abe55588e..5301ead69b 100644
--- a/deps/v8/src/regexp/regexp-macro-assembler-tracer.cc
+++ b/deps/v8/src/regexp/regexp-macro-assembler-tracer.cc
@@ -4,7 +4,7 @@
#include "src/regexp/regexp-macro-assembler-tracer.h"
-#include "src/ast.h"
+#include "src/ast/ast.h"
namespace v8 {
namespace internal {
@@ -13,9 +13,9 @@ RegExpMacroAssemblerTracer::RegExpMacroAssemblerTracer(
Isolate* isolate, RegExpMacroAssembler* assembler)
: RegExpMacroAssembler(isolate, assembler->zone()), assembler_(assembler) {
unsigned int type = assembler->Implementation();
- DCHECK(type < 6);
- const char* impl_names[] = {"IA32", "ARM", "ARM64",
- "MIPS", "X64", "X87", "Bytecode"};
+ DCHECK(type < 8);
+ const char* impl_names[] = {"IA32", "ARM", "ARM64", "MIPS",
+ "PPC", "X64", "X87", "Bytecode"};
PrintF("RegExpMacroAssembler%s();\n", impl_names[type]);
}
@@ -241,9 +241,11 @@ void RegExpMacroAssemblerTracer::CheckAtStart(Label* on_at_start) {
}
-void RegExpMacroAssemblerTracer::CheckNotAtStart(Label* on_not_at_start) {
- PrintF(" CheckNotAtStart(label[%08x]);\n", LabelToInt(on_not_at_start));
- assembler_->CheckNotAtStart(on_not_at_start);
+void RegExpMacroAssemblerTracer::CheckNotAtStart(int cp_offset,
+ Label* on_not_at_start) {
+ PrintF(" CheckNotAtStart(cp_offset=%d, label[%08x]);\n", cp_offset,
+ LabelToInt(on_not_at_start));
+ assembler_->CheckNotAtStart(cp_offset, on_not_at_start);
}
@@ -349,19 +351,29 @@ void RegExpMacroAssemblerTracer::CheckBitInTable(
void RegExpMacroAssemblerTracer::CheckNotBackReference(int start_reg,
+ bool read_backward,
Label* on_no_match) {
- PrintF(" CheckNotBackReference(register=%d, label[%08x]);\n", start_reg,
- LabelToInt(on_no_match));
- assembler_->CheckNotBackReference(start_reg, on_no_match);
+ PrintF(" CheckNotBackReference(register=%d, %s, label[%08x]);\n", start_reg,
+ read_backward ? "backward" : "forward", LabelToInt(on_no_match));
+ assembler_->CheckNotBackReference(start_reg, read_backward, on_no_match);
}
void RegExpMacroAssemblerTracer::CheckNotBackReferenceIgnoreCase(
- int start_reg,
- Label* on_no_match) {
- PrintF(" CheckNotBackReferenceIgnoreCase(register=%d, label[%08x]);\n",
- start_reg, LabelToInt(on_no_match));
- assembler_->CheckNotBackReferenceIgnoreCase(start_reg, on_no_match);
+ int start_reg, bool read_backward, Label* on_no_match) {
+ PrintF(" CheckNotBackReferenceIgnoreCase(register=%d, %s, label[%08x]);\n",
+ start_reg, read_backward ? "backward" : "forward",
+ LabelToInt(on_no_match));
+ assembler_->CheckNotBackReferenceIgnoreCase(start_reg, read_backward,
+ on_no_match);
+}
+
+
+void RegExpMacroAssemblerTracer::CheckPosition(int cp_offset,
+ Label* on_outside_input) {
+ PrintF(" CheckPosition(cp_offset=%d, label[%08x]);\n", cp_offset,
+ LabelToInt(on_outside_input));
+ assembler_->CheckPosition(cp_offset, on_outside_input);
}
diff --git a/deps/v8/src/regexp/regexp-macro-assembler-tracer.h b/deps/v8/src/regexp/regexp-macro-assembler-tracer.h
index f9364195fa..77377aac31 100644
--- a/deps/v8/src/regexp/regexp-macro-assembler-tracer.h
+++ b/deps/v8/src/regexp/regexp-macro-assembler-tracer.h
@@ -30,9 +30,11 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
virtual void CheckCharacterLT(uc16 limit, Label* on_less);
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
- virtual void CheckNotAtStart(Label* on_not_at_start);
- virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
+ virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start);
+ virtual void CheckNotBackReference(int start_reg, bool read_backward,
+ Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
+ bool read_backward,
Label* on_no_match);
virtual void CheckNotCharacter(unsigned c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(unsigned c,
@@ -49,6 +51,7 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
uc16 to,
Label* on_not_in_range);
virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set);
+ virtual void CheckPosition(int cp_offset, Label* on_outside_input);
virtual bool CheckSpecialCharacterClass(uc16 type,
Label* on_no_match);
virtual void Fail();
diff --git a/deps/v8/src/regexp/regexp-macro-assembler.cc b/deps/v8/src/regexp/regexp-macro-assembler.cc
index 9916d5f32f..caf8b51fe5 100644
--- a/deps/v8/src/regexp/regexp-macro-assembler.cc
+++ b/deps/v8/src/regexp/regexp-macro-assembler.cc
@@ -5,7 +5,6 @@
#include "src/regexp/regexp-macro-assembler.h"
#include "src/assembler.h"
-#include "src/ast.h"
#include "src/isolate-inl.h"
#include "src/regexp/regexp-stack.h"
#include "src/simulator.h"
@@ -189,16 +188,9 @@ NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
Address stack_base = stack_scope.stack()->stack_base();
int direct_call = 0;
- int result = CALL_GENERATED_REGEXP_CODE(code->entry(),
- input,
- start_offset,
- input_start,
- input_end,
- output,
- output_size,
- stack_base,
- direct_call,
- isolate);
+ int result = CALL_GENERATED_REGEXP_CODE(
+ isolate, code->entry(), input, start_offset, input_start, input_end,
+ output, output_size, stack_base, direct_call, isolate);
DCHECK(result >= RETRY);
if (result == EXCEPTION && !isolate->has_pending_exception()) {
diff --git a/deps/v8/src/regexp/regexp-macro-assembler.h b/deps/v8/src/regexp/regexp-macro-assembler.h
index ea97d5b29b..20599334cd 100644
--- a/deps/v8/src/regexp/regexp-macro-assembler.h
+++ b/deps/v8/src/regexp/regexp-macro-assembler.h
@@ -5,7 +5,8 @@
#ifndef V8_REGEXP_REGEXP_MACRO_ASSEMBLER_H_
#define V8_REGEXP_REGEXP_MACRO_ASSEMBLER_H_
-#include "src/ast.h"
+#include "src/assembler.h"
+#include "src/regexp/regexp-ast.h"
namespace v8 {
namespace internal {
@@ -71,9 +72,11 @@ class RegExpMacroAssembler {
virtual void CheckCharacterGT(uc16 limit, Label* on_greater) = 0;
virtual void CheckCharacterLT(uc16 limit, Label* on_less) = 0;
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position) = 0;
- virtual void CheckNotAtStart(Label* on_not_at_start) = 0;
- virtual void CheckNotBackReference(int start_reg, Label* on_no_match) = 0;
+ virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start) = 0;
+ virtual void CheckNotBackReference(int start_reg, bool read_backward,
+ Label* on_no_match) = 0;
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
+ bool read_backward,
Label* on_no_match) = 0;
// Check the current character for a match with a literal character. If we
// fail to match then goto the on_failure label. End of input always
@@ -102,17 +105,12 @@ class RegExpMacroAssembler {
// Checks whether the given offset from the current position is before
// the end of the string. May overwrite the current character.
- virtual void CheckPosition(int cp_offset, Label* on_outside_input) {
- LoadCurrentCharacter(cp_offset, on_outside_input, true);
- }
+ virtual void CheckPosition(int cp_offset, Label* on_outside_input) = 0;
// Check whether a standard/default character class matches the current
// character. Returns false if the type of special character class does
// not have custom support.
// May clobber the current loaded character.
- virtual bool CheckSpecialCharacterClass(uc16 type,
- Label* on_no_match) {
- return false;
- }
+ virtual bool CheckSpecialCharacterClass(uc16 type, Label* on_no_match) = 0;
virtual void Fail() = 0;
virtual Handle<HeapObject> GetCode(Handle<String> source) = 0;
virtual void GoTo(Label* label) = 0;
diff --git a/deps/v8/src/regexp/regexp-parser.cc b/deps/v8/src/regexp/regexp-parser.cc
new file mode 100644
index 0000000000..fa8900342c
--- /dev/null
+++ b/deps/v8/src/regexp/regexp-parser.cc
@@ -0,0 +1,1180 @@
+// Copyright 2016 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/regexp/regexp-parser.h"
+
+#include "src/char-predicates-inl.h"
+#include "src/factory.h"
+#include "src/isolate.h"
+#include "src/objects-inl.h"
+#include "src/regexp/jsregexp.h"
+#include "src/utils.h"
+
+namespace v8 {
+namespace internal {
+
+RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error,
+ bool multiline, bool unicode, Isolate* isolate,
+ Zone* zone)
+ : isolate_(isolate),
+ zone_(zone),
+ error_(error),
+ captures_(NULL),
+ in_(in),
+ current_(kEndMarker),
+ next_pos_(0),
+ captures_started_(0),
+ capture_count_(0),
+ has_more_(true),
+ multiline_(multiline),
+ unicode_(unicode),
+ simple_(false),
+ contains_anchor_(false),
+ is_scanned_for_captures_(false),
+ failed_(false) {
+ Advance();
+}
+
+
+uc32 RegExpParser::Next() {
+ if (has_next()) {
+ return in()->Get(next_pos_);
+ } else {
+ return kEndMarker;
+ }
+}
+
+
+void RegExpParser::Advance() {
+ if (next_pos_ < in()->length()) {
+ StackLimitCheck check(isolate());
+ if (check.HasOverflowed()) {
+ ReportError(CStrVector(Isolate::kStackOverflowMessage));
+ } else if (zone()->excess_allocation()) {
+ ReportError(CStrVector("Regular expression too large"));
+ } else {
+ current_ = in()->Get(next_pos_);
+ next_pos_++;
+ // Read the whole surrogate pair in case of unicode flag, if possible.
+ if (unicode_ && next_pos_ < in()->length() &&
+ unibrow::Utf16::IsLeadSurrogate(static_cast<uc16>(current_))) {
+ uc16 trail = in()->Get(next_pos_);
+ if (unibrow::Utf16::IsTrailSurrogate(trail)) {
+ current_ = unibrow::Utf16::CombineSurrogatePair(
+ static_cast<uc16>(current_), trail);
+ next_pos_++;
+ }
+ }
+ }
+ } else {
+ current_ = kEndMarker;
+ // Advance so that position() points to 1-after-the-last-character. This is
+ // important so that Reset() to this position works correctly.
+ next_pos_ = in()->length() + 1;
+ has_more_ = false;
+ }
+}
+
+
+void RegExpParser::Reset(int pos) {
+ next_pos_ = pos;
+ has_more_ = (pos < in()->length());
+ Advance();
+}
+
+
+void RegExpParser::Advance(int dist) {
+ next_pos_ += dist - 1;
+ Advance();
+}
+
+
+bool RegExpParser::simple() { return simple_; }
+
+
+bool RegExpParser::IsSyntaxCharacter(uc32 c) {
+ return c == '^' || c == '$' || c == '\\' || c == '.' || c == '*' ||
+ c == '+' || c == '?' || c == '(' || c == ')' || c == '[' || c == ']' ||
+ c == '{' || c == '}' || c == '|';
+}
+
+
+RegExpTree* RegExpParser::ReportError(Vector<const char> message) {
+ failed_ = true;
+ *error_ = isolate()->factory()->NewStringFromAscii(message).ToHandleChecked();
+ // Zip to the end to make sure the no more input is read.
+ current_ = kEndMarker;
+ next_pos_ = in()->length();
+ return NULL;
+}
+
+
+#define CHECK_FAILED /**/); \
+ if (failed_) return NULL; \
+ ((void)0
+
+
+// Pattern ::
+// Disjunction
+RegExpTree* RegExpParser::ParsePattern() {
+ RegExpTree* result = ParseDisjunction(CHECK_FAILED);
+ DCHECK(!has_more());
+ // If the result of parsing is a literal string atom, and it has the
+ // same length as the input, then the atom is identical to the input.
+ if (result->IsAtom() && result->AsAtom()->length() == in()->length()) {
+ simple_ = true;
+ }
+ return result;
+}
+
+
+// Disjunction ::
+// Alternative
+// Alternative | Disjunction
+// Alternative ::
+// [empty]
+// Term Alternative
+// Term ::
+// Assertion
+// Atom
+// Atom Quantifier
+RegExpTree* RegExpParser::ParseDisjunction() {
+ // Used to store current state while parsing subexpressions.
+ RegExpParserState initial_state(NULL, INITIAL, RegExpLookaround::LOOKAHEAD, 0,
+ zone());
+ RegExpParserState* state = &initial_state;
+ // Cache the builder in a local variable for quick access.
+ RegExpBuilder* builder = initial_state.builder();
+ while (true) {
+ switch (current()) {
+ case kEndMarker:
+ if (state->IsSubexpression()) {
+ // Inside a parenthesized group when hitting end of input.
+ ReportError(CStrVector("Unterminated group") CHECK_FAILED);
+ }
+ DCHECK_EQ(INITIAL, state->group_type());
+ // Parsing completed successfully.
+ return builder->ToRegExp();
+ case ')': {
+ if (!state->IsSubexpression()) {
+ ReportError(CStrVector("Unmatched ')'") CHECK_FAILED);
+ }
+ DCHECK_NE(INITIAL, state->group_type());
+
+ Advance();
+ // End disjunction parsing and convert builder content to new single
+ // regexp atom.
+ RegExpTree* body = builder->ToRegExp();
+
+ int end_capture_index = captures_started();
+
+ int capture_index = state->capture_index();
+ SubexpressionType group_type = state->group_type();
+
+ // Build result of subexpression.
+ if (group_type == CAPTURE) {
+ RegExpCapture* capture = GetCapture(capture_index);
+ capture->set_body(body);
+ body = capture;
+ } else if (group_type != GROUPING) {
+ DCHECK(group_type == POSITIVE_LOOKAROUND ||
+ group_type == NEGATIVE_LOOKAROUND);
+ bool is_positive = (group_type == POSITIVE_LOOKAROUND);
+ body = new (zone()) RegExpLookaround(
+ body, is_positive, end_capture_index - capture_index,
+ capture_index, state->lookaround_type());
+ }
+
+ // Restore previous state.
+ state = state->previous_state();
+ builder = state->builder();
+
+ builder->AddAtom(body);
+ // For compatability with JSC and ES3, we allow quantifiers after
+ // lookaheads, and break in all cases.
+ break;
+ }
+ case '|': {
+ Advance();
+ builder->NewAlternative();
+ continue;
+ }
+ case '*':
+ case '+':
+ case '?':
+ return ReportError(CStrVector("Nothing to repeat"));
+ case '^': {
+ Advance();
+ if (multiline_) {
+ builder->AddAssertion(
+ new (zone()) RegExpAssertion(RegExpAssertion::START_OF_LINE));
+ } else {
+ builder->AddAssertion(
+ new (zone()) RegExpAssertion(RegExpAssertion::START_OF_INPUT));
+ set_contains_anchor();
+ }
+ continue;
+ }
+ case '$': {
+ Advance();
+ RegExpAssertion::AssertionType assertion_type =
+ multiline_ ? RegExpAssertion::END_OF_LINE
+ : RegExpAssertion::END_OF_INPUT;
+ builder->AddAssertion(new (zone()) RegExpAssertion(assertion_type));
+ continue;
+ }
+ case '.': {
+ Advance();
+ // everything except \x0a, \x0d, \u2028 and \u2029
+ ZoneList<CharacterRange>* ranges =
+ new (zone()) ZoneList<CharacterRange>(2, zone());
+ CharacterRange::AddClassEscape('.', ranges, zone());
+ RegExpTree* atom = new (zone()) RegExpCharacterClass(ranges, false);
+ builder->AddAtom(atom);
+ break;
+ }
+ case '(': {
+ SubexpressionType subexpr_type = CAPTURE;
+ RegExpLookaround::Type lookaround_type = state->lookaround_type();
+ Advance();
+ if (current() == '?') {
+ switch (Next()) {
+ case ':':
+ subexpr_type = GROUPING;
+ break;
+ case '=':
+ lookaround_type = RegExpLookaround::LOOKAHEAD;
+ subexpr_type = POSITIVE_LOOKAROUND;
+ break;
+ case '!':
+ lookaround_type = RegExpLookaround::LOOKAHEAD;
+ subexpr_type = NEGATIVE_LOOKAROUND;
+ break;
+ case '<':
+ if (FLAG_harmony_regexp_lookbehind) {
+ Advance();
+ lookaround_type = RegExpLookaround::LOOKBEHIND;
+ if (Next() == '=') {
+ subexpr_type = POSITIVE_LOOKAROUND;
+ break;
+ } else if (Next() == '!') {
+ subexpr_type = NEGATIVE_LOOKAROUND;
+ break;
+ }
+ }
+ // Fall through.
+ default:
+ ReportError(CStrVector("Invalid group") CHECK_FAILED);
+ break;
+ }
+ Advance(2);
+ } else {
+ if (captures_started_ >= kMaxCaptures) {
+ ReportError(CStrVector("Too many captures") CHECK_FAILED);
+ }
+ captures_started_++;
+ }
+ // Store current state and begin new disjunction parsing.
+ state = new (zone()) RegExpParserState(
+ state, subexpr_type, lookaround_type, captures_started_, zone());
+ builder = state->builder();
+ continue;
+ }
+ case '[': {
+ RegExpTree* atom = ParseCharacterClass(CHECK_FAILED);
+ builder->AddAtom(atom);
+ break;
+ }
+ // Atom ::
+ // \ AtomEscape
+ case '\\':
+ switch (Next()) {
+ case kEndMarker:
+ return ReportError(CStrVector("\\ at end of pattern"));
+ case 'b':
+ Advance(2);
+ builder->AddAssertion(
+ new (zone()) RegExpAssertion(RegExpAssertion::BOUNDARY));
+ continue;
+ case 'B':
+ Advance(2);
+ builder->AddAssertion(
+ new (zone()) RegExpAssertion(RegExpAssertion::NON_BOUNDARY));
+ continue;
+ // AtomEscape ::
+ // CharacterClassEscape
+ //
+ // CharacterClassEscape :: one of
+ // d D s S w W
+ case 'd':
+ case 'D':
+ case 's':
+ case 'S':
+ case 'w':
+ case 'W': {
+ uc32 c = Next();
+ Advance(2);
+ ZoneList<CharacterRange>* ranges =
+ new (zone()) ZoneList<CharacterRange>(2, zone());
+ CharacterRange::AddClassEscape(c, ranges, zone());
+ RegExpTree* atom = new (zone()) RegExpCharacterClass(ranges, false);
+ builder->AddAtom(atom);
+ break;
+ }
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9': {
+ int index = 0;
+ if (ParseBackReferenceIndex(&index)) {
+ if (state->IsInsideCaptureGroup(index)) {
+ // The back reference is inside the capture group it refers to.
+ // Nothing can possibly have been captured yet, so we use empty
+ // instead. This ensures that, when checking a back reference,
+ // the capture registers of the referenced capture are either
+ // both set or both cleared.
+ builder->AddEmpty();
+ } else {
+ RegExpCapture* capture = GetCapture(index);
+ RegExpTree* atom = new (zone()) RegExpBackReference(capture);
+ builder->AddAtom(atom);
+ }
+ break;
+ }
+ uc32 first_digit = Next();
+ if (first_digit == '8' || first_digit == '9') {
+ // If the 'u' flag is present, only syntax characters can be
+ // escaped,
+ // no other identity escapes are allowed. If the 'u' flag is not
+ // present, all identity escapes are allowed.
+ if (!unicode_) {
+ builder->AddCharacter(first_digit);
+ Advance(2);
+ } else {
+ return ReportError(CStrVector("Invalid escape"));
+ }
+ break;
+ }
+ }
+ // FALLTHROUGH
+ case '0': {
+ Advance();
+ uc32 octal = ParseOctalLiteral();
+ builder->AddCharacter(octal);
+ break;
+ }
+ // ControlEscape :: one of
+ // f n r t v
+ case 'f':
+ Advance(2);
+ builder->AddCharacter('\f');
+ break;
+ case 'n':
+ Advance(2);
+ builder->AddCharacter('\n');
+ break;
+ case 'r':
+ Advance(2);
+ builder->AddCharacter('\r');
+ break;
+ case 't':
+ Advance(2);
+ builder->AddCharacter('\t');
+ break;
+ case 'v':
+ Advance(2);
+ builder->AddCharacter('\v');
+ break;
+ case 'c': {
+ Advance();
+ uc32 controlLetter = Next();
+ // Special case if it is an ASCII letter.
+ // Convert lower case letters to uppercase.
+ uc32 letter = controlLetter & ~('a' ^ 'A');
+ if (letter < 'A' || 'Z' < letter) {
+ // controlLetter is not in range 'A'-'Z' or 'a'-'z'.
+ // This is outside the specification. We match JSC in
+ // reading the backslash as a literal character instead
+ // of as starting an escape.
+ builder->AddCharacter('\\');
+ } else {
+ Advance(2);
+ builder->AddCharacter(controlLetter & 0x1f);
+ }
+ break;
+ }
+ case 'x': {
+ Advance(2);
+ uc32 value;
+ if (ParseHexEscape(2, &value)) {
+ builder->AddCharacter(value);
+ } else if (!unicode_) {
+ builder->AddCharacter('x');
+ } else {
+ // If the 'u' flag is present, invalid escapes are not treated as
+ // identity escapes.
+ return ReportError(CStrVector("Invalid escape"));
+ }
+ break;
+ }
+ case 'u': {
+ Advance(2);
+ uc32 value;
+ if (ParseUnicodeEscape(&value)) {
+ builder->AddUnicodeCharacter(value);
+ } else if (!unicode_) {
+ builder->AddCharacter('u');
+ } else {
+ // If the 'u' flag is present, invalid escapes are not treated as
+ // identity escapes.
+ return ReportError(CStrVector("Invalid unicode escape"));
+ }
+ break;
+ }
+ default:
+ Advance();
+ // If the 'u' flag is present, only syntax characters can be
+ // escaped, no
+ // other identity escapes are allowed. If the 'u' flag is not
+ // present,
+ // all identity escapes are allowed.
+ if (!unicode_ || IsSyntaxCharacter(current())) {
+ builder->AddCharacter(current());
+ Advance();
+ } else {
+ return ReportError(CStrVector("Invalid escape"));
+ }
+ break;
+ }
+ break;
+ case '{': {
+ int dummy;
+ if (ParseIntervalQuantifier(&dummy, &dummy)) {
+ ReportError(CStrVector("Nothing to repeat") CHECK_FAILED);
+ }
+ // fallthrough
+ }
+ default:
+ builder->AddUnicodeCharacter(current());
+ Advance();
+ break;
+ } // end switch(current())
+
+ int min;
+ int max;
+ switch (current()) {
+ // QuantifierPrefix ::
+ // *
+ // +
+ // ?
+ // {
+ case '*':
+ min = 0;
+ max = RegExpTree::kInfinity;
+ Advance();
+ break;
+ case '+':
+ min = 1;
+ max = RegExpTree::kInfinity;
+ Advance();
+ break;
+ case '?':
+ min = 0;
+ max = 1;
+ Advance();
+ break;
+ case '{':
+ if (ParseIntervalQuantifier(&min, &max)) {
+ if (max < min) {
+ ReportError(CStrVector("numbers out of order in {} quantifier.")
+ CHECK_FAILED);
+ }
+ break;
+ } else {
+ continue;
+ }
+ default:
+ continue;
+ }
+ RegExpQuantifier::QuantifierType quantifier_type = RegExpQuantifier::GREEDY;
+ if (current() == '?') {
+ quantifier_type = RegExpQuantifier::NON_GREEDY;
+ Advance();
+ } else if (FLAG_regexp_possessive_quantifier && current() == '+') {
+ // FLAG_regexp_possessive_quantifier is a debug-only flag.
+ quantifier_type = RegExpQuantifier::POSSESSIVE;
+ Advance();
+ }
+ builder->AddQuantifierToAtom(min, max, quantifier_type);
+ }
+}
+
+
+#ifdef DEBUG
+// Currently only used in an DCHECK.
+static bool IsSpecialClassEscape(uc32 c) {
+ switch (c) {
+ case 'd':
+ case 'D':
+ case 's':
+ case 'S':
+ case 'w':
+ case 'W':
+ return true;
+ default:
+ return false;
+ }
+}
+#endif
+
+
+// In order to know whether an escape is a backreference or not we have to scan
+// the entire regexp and find the number of capturing parentheses. However we
+// don't want to scan the regexp twice unless it is necessary. This mini-parser
+// is called when needed. It can see the difference between capturing and
+// noncapturing parentheses and can skip character classes and backslash-escaped
+// characters.
+void RegExpParser::ScanForCaptures() {
+ // Start with captures started previous to current position
+ int capture_count = captures_started();
+ // Add count of captures after this position.
+ int n;
+ while ((n = current()) != kEndMarker) {
+ Advance();
+ switch (n) {
+ case '\\':
+ Advance();
+ break;
+ case '[': {
+ int c;
+ while ((c = current()) != kEndMarker) {
+ Advance();
+ if (c == '\\') {
+ Advance();
+ } else {
+ if (c == ']') break;
+ }
+ }
+ break;
+ }
+ case '(':
+ if (current() != '?') capture_count++;
+ break;
+ }
+ }
+ capture_count_ = capture_count;
+ is_scanned_for_captures_ = true;
+}
+
+
+bool RegExpParser::ParseBackReferenceIndex(int* index_out) {
+ DCHECK_EQ('\\', current());
+ DCHECK('1' <= Next() && Next() <= '9');
+ // Try to parse a decimal literal that is no greater than the total number
+ // of left capturing parentheses in the input.
+ int start = position();
+ int value = Next() - '0';
+ Advance(2);
+ while (true) {
+ uc32 c = current();
+ if (IsDecimalDigit(c)) {
+ value = 10 * value + (c - '0');
+ if (value > kMaxCaptures) {
+ Reset(start);
+ return false;
+ }
+ Advance();
+ } else {
+ break;
+ }
+ }
+ if (value > captures_started()) {
+ if (!is_scanned_for_captures_) {
+ int saved_position = position();
+ ScanForCaptures();
+ Reset(saved_position);
+ }
+ if (value > capture_count_) {
+ Reset(start);
+ return false;
+ }
+ }
+ *index_out = value;
+ return true;
+}
+
+
+RegExpCapture* RegExpParser::GetCapture(int index) {
+ // The index for the capture groups are one-based. Its index in the list is
+ // zero-based.
+ int know_captures =
+ is_scanned_for_captures_ ? capture_count_ : captures_started_;
+ DCHECK(index <= know_captures);
+ if (captures_ == NULL) {
+ captures_ = new (zone()) ZoneList<RegExpCapture*>(know_captures, zone());
+ }
+ while (captures_->length() < know_captures) {
+ captures_->Add(new (zone()) RegExpCapture(captures_->length() + 1), zone());
+ }
+ return captures_->at(index - 1);
+}
+
+
+bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(int index) {
+ for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) {
+ if (s->group_type() != CAPTURE) continue;
+ // Return true if we found the matching capture index.
+ if (index == s->capture_index()) return true;
+ // Abort if index is larger than what has been parsed up till this state.
+ if (index > s->capture_index()) return false;
+ }
+ return false;
+}
+
+
+// QuantifierPrefix ::
+// { DecimalDigits }
+// { DecimalDigits , }
+// { DecimalDigits , DecimalDigits }
+//
+// Returns true if parsing succeeds, and set the min_out and max_out
+// values. Values are truncated to RegExpTree::kInfinity if they overflow.
+bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) {
+ DCHECK_EQ(current(), '{');
+ int start = position();
+ Advance();
+ int min = 0;
+ if (!IsDecimalDigit(current())) {
+ Reset(start);
+ return false;
+ }
+ while (IsDecimalDigit(current())) {
+ int next = current() - '0';
+ if (min > (RegExpTree::kInfinity - next) / 10) {
+ // Overflow. Skip past remaining decimal digits and return -1.
+ do {
+ Advance();
+ } while (IsDecimalDigit(current()));
+ min = RegExpTree::kInfinity;
+ break;
+ }
+ min = 10 * min + next;
+ Advance();
+ }
+ int max = 0;
+ if (current() == '}') {
+ max = min;
+ Advance();
+ } else if (current() == ',') {
+ Advance();
+ if (current() == '}') {
+ max = RegExpTree::kInfinity;
+ Advance();
+ } else {
+ while (IsDecimalDigit(current())) {
+ int next = current() - '0';
+ if (max > (RegExpTree::kInfinity - next) / 10) {
+ do {
+ Advance();
+ } while (IsDecimalDigit(current()));
+ max = RegExpTree::kInfinity;
+ break;
+ }
+ max = 10 * max + next;
+ Advance();
+ }
+ if (current() != '}') {
+ Reset(start);
+ return false;
+ }
+ Advance();
+ }
+ } else {
+ Reset(start);
+ return false;
+ }
+ *min_out = min;
+ *max_out = max;
+ return true;
+}
+
+
+uc32 RegExpParser::ParseOctalLiteral() {
+ DCHECK(('0' <= current() && current() <= '7') || current() == kEndMarker);
+ // For compatibility with some other browsers (not all), we parse
+ // up to three octal digits with a value below 256.
+ uc32 value = current() - '0';
+ Advance();
+ if ('0' <= current() && current() <= '7') {
+ value = value * 8 + current() - '0';
+ Advance();
+ if (value < 32 && '0' <= current() && current() <= '7') {
+ value = value * 8 + current() - '0';
+ Advance();
+ }
+ }
+ return value;
+}
+
+
+bool RegExpParser::ParseHexEscape(int length, uc32* value) {
+ int start = position();
+ uc32 val = 0;
+ for (int i = 0; i < length; ++i) {
+ uc32 c = current();
+ int d = HexValue(c);
+ if (d < 0) {
+ Reset(start);
+ return false;
+ }
+ val = val * 16 + d;
+ Advance();
+ }
+ *value = val;
+ return true;
+}
+
+
+bool RegExpParser::ParseUnicodeEscape(uc32* value) {
+ // Accept both \uxxxx and \u{xxxxxx} (if harmony unicode escapes are
+ // allowed). In the latter case, the number of hex digits between { } is
+ // arbitrary. \ and u have already been read.
+ if (current() == '{' && unicode_) {
+ int start = position();
+ Advance();
+ if (ParseUnlimitedLengthHexNumber(0x10ffff, value)) {
+ if (current() == '}') {
+ Advance();
+ return true;
+ }
+ }
+ Reset(start);
+ return false;
+ }
+ // \u but no {, or \u{...} escapes not allowed.
+ return ParseHexEscape(4, value);
+}
+
+
+bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) {
+ uc32 x = 0;
+ int d = HexValue(current());
+ if (d < 0) {
+ return false;
+ }
+ while (d >= 0) {
+ x = x * 16 + d;
+ if (x > max_value) {
+ return false;
+ }
+ Advance();
+ d = HexValue(current());
+ }
+ *value = x;
+ return true;
+}
+
+
+uc32 RegExpParser::ParseClassCharacterEscape() {
+ DCHECK(current() == '\\');
+ DCHECK(has_next() && !IsSpecialClassEscape(Next()));
+ Advance();
+ switch (current()) {
+ case 'b':
+ Advance();
+ return '\b';
+ // ControlEscape :: one of
+ // f n r t v
+ case 'f':
+ Advance();
+ return '\f';
+ case 'n':
+ Advance();
+ return '\n';
+ case 'r':
+ Advance();
+ return '\r';
+ case 't':
+ Advance();
+ return '\t';
+ case 'v':
+ Advance();
+ return '\v';
+ case 'c': {
+ uc32 controlLetter = Next();
+ uc32 letter = controlLetter & ~('A' ^ 'a');
+ // For compatibility with JSC, inside a character class
+ // we also accept digits and underscore as control characters.
+ if ((controlLetter >= '0' && controlLetter <= '9') ||
+ controlLetter == '_' || (letter >= 'A' && letter <= 'Z')) {
+ Advance(2);
+ // Control letters mapped to ASCII control characters in the range
+ // 0x00-0x1f.
+ return controlLetter & 0x1f;
+ }
+ // We match JSC in reading the backslash as a literal
+ // character instead of as starting an escape.
+ return '\\';
+ }
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ // For compatibility, we interpret a decimal escape that isn't
+ // a back reference (and therefore either \0 or not valid according
+ // to the specification) as a 1..3 digit octal character code.
+ return ParseOctalLiteral();
+ case 'x': {
+ Advance();
+ uc32 value;
+ if (ParseHexEscape(2, &value)) {
+ return value;
+ }
+ if (!unicode_) {
+ // If \x is not followed by a two-digit hexadecimal, treat it
+ // as an identity escape.
+ return 'x';
+ }
+ // If the 'u' flag is present, invalid escapes are not treated as
+ // identity escapes.
+ ReportError(CStrVector("Invalid escape"));
+ return 0;
+ }
+ case 'u': {
+ Advance();
+ uc32 value;
+ if (ParseUnicodeEscape(&value)) {
+ return value;
+ }
+ if (!unicode_) {
+ return 'u';
+ }
+ // If the 'u' flag is present, invalid escapes are not treated as
+ // identity escapes.
+ ReportError(CStrVector("Invalid unicode escape"));
+ return 0;
+ }
+ default: {
+ uc32 result = current();
+ // If the 'u' flag is present, only syntax characters can be escaped, no
+ // other identity escapes are allowed. If the 'u' flag is not present, all
+ // identity escapes are allowed.
+ if (!unicode_ || IsSyntaxCharacter(result)) {
+ Advance();
+ return result;
+ }
+ ReportError(CStrVector("Invalid escape"));
+ return 0;
+ }
+ }
+ return 0;
+}
+
+
+CharacterRange RegExpParser::ParseClassAtom(uc16* char_class) {
+ DCHECK_EQ(0, *char_class);
+ uc32 first = current();
+ if (first == '\\') {
+ switch (Next()) {
+ case 'w':
+ case 'W':
+ case 'd':
+ case 'D':
+ case 's':
+ case 'S': {
+ *char_class = Next();
+ Advance(2);
+ return CharacterRange::Singleton(0); // Return dummy value.
+ }
+ case kEndMarker:
+ return ReportError(CStrVector("\\ at end of pattern"));
+ default:
+ uc32 c = ParseClassCharacterEscape(CHECK_FAILED);
+ return CharacterRange::Singleton(c);
+ }
+ } else {
+ Advance();
+ return CharacterRange::Singleton(first);
+ }
+}
+
+
+static const uc16 kNoCharClass = 0;
+
+// Adds range or pre-defined character class to character ranges.
+// If char_class is not kInvalidClass, it's interpreted as a class
+// escape (i.e., 's' means whitespace, from '\s').
+static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges,
+ uc16 char_class, CharacterRange range,
+ Zone* zone) {
+ if (char_class != kNoCharClass) {
+ CharacterRange::AddClassEscape(char_class, ranges, zone);
+ } else {
+ ranges->Add(range, zone);
+ }
+}
+
+
+RegExpTree* RegExpParser::ParseCharacterClass() {
+ static const char* kUnterminated = "Unterminated character class";
+ static const char* kRangeOutOfOrder = "Range out of order in character class";
+
+ DCHECK_EQ(current(), '[');
+ Advance();
+ bool is_negated = false;
+ if (current() == '^') {
+ is_negated = true;
+ Advance();
+ }
+ ZoneList<CharacterRange>* ranges =
+ new (zone()) ZoneList<CharacterRange>(2, zone());
+ while (has_more() && current() != ']') {
+ uc16 char_class = kNoCharClass;
+ CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED);
+ if (current() == '-') {
+ Advance();
+ if (current() == kEndMarker) {
+ // If we reach the end we break out of the loop and let the
+ // following code report an error.
+ break;
+ } else if (current() == ']') {
+ AddRangeOrEscape(ranges, char_class, first, zone());
+ ranges->Add(CharacterRange::Singleton('-'), zone());
+ break;
+ }
+ uc16 char_class_2 = kNoCharClass;
+ CharacterRange next = ParseClassAtom(&char_class_2 CHECK_FAILED);
+ if (char_class != kNoCharClass || char_class_2 != kNoCharClass) {
+ // Either end is an escaped character class. Treat the '-' verbatim.
+ AddRangeOrEscape(ranges, char_class, first, zone());
+ ranges->Add(CharacterRange::Singleton('-'), zone());
+ AddRangeOrEscape(ranges, char_class_2, next, zone());
+ continue;
+ }
+ if (first.from() > next.to()) {
+ return ReportError(CStrVector(kRangeOutOfOrder) CHECK_FAILED);
+ }
+ ranges->Add(CharacterRange::Range(first.from(), next.to()), zone());
+ } else {
+ AddRangeOrEscape(ranges, char_class, first, zone());
+ }
+ }
+ if (!has_more()) {
+ return ReportError(CStrVector(kUnterminated) CHECK_FAILED);
+ }
+ Advance();
+ if (ranges->length() == 0) {
+ ranges->Add(CharacterRange::Everything(), zone());
+ is_negated = !is_negated;
+ }
+ return new (zone()) RegExpCharacterClass(ranges, is_negated);
+}
+
+
+#undef CHECK_FAILED
+
+
+bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone,
+ FlatStringReader* input, bool multiline,
+ bool unicode, RegExpCompileData* result) {
+ DCHECK(result != NULL);
+ RegExpParser parser(input, &result->error, multiline, unicode, isolate, zone);
+ RegExpTree* tree = parser.ParsePattern();
+ if (parser.failed()) {
+ DCHECK(tree == NULL);
+ DCHECK(!result->error.is_null());
+ } else {
+ DCHECK(tree != NULL);
+ DCHECK(result->error.is_null());
+ if (FLAG_trace_regexp_parser) {
+ OFStream os(stdout);
+ tree->Print(os, zone);
+ os << "\n";
+ }
+ result->tree = tree;
+ int capture_count = parser.captures_started();
+ result->simple = tree->IsAtom() && parser.simple() && capture_count == 0;
+ result->contains_anchor = parser.contains_anchor();
+ result->capture_count = capture_count;
+ }
+ return !parser.failed();
+}
+
+
+RegExpBuilder::RegExpBuilder(Zone* zone)
+ : zone_(zone),
+ pending_empty_(false),
+ characters_(NULL),
+ terms_(),
+ alternatives_()
+#ifdef DEBUG
+ ,
+ last_added_(ADD_NONE)
+#endif
+{
+}
+
+
+void RegExpBuilder::FlushCharacters() {
+ pending_empty_ = false;
+ if (characters_ != NULL) {
+ RegExpTree* atom = new (zone()) RegExpAtom(characters_->ToConstVector());
+ characters_ = NULL;
+ text_.Add(atom, zone());
+ LAST(ADD_ATOM);
+ }
+}
+
+
+void RegExpBuilder::FlushText() {
+ FlushCharacters();
+ int num_text = text_.length();
+ if (num_text == 0) {
+ return;
+ } else if (num_text == 1) {
+ terms_.Add(text_.last(), zone());
+ } else {
+ RegExpText* text = new (zone()) RegExpText(zone());
+ for (int i = 0; i < num_text; i++) text_.Get(i)->AppendToText(text, zone());
+ terms_.Add(text, zone());
+ }
+ text_.Clear();
+}
+
+
+void RegExpBuilder::AddCharacter(uc16 c) {
+ pending_empty_ = false;
+ if (characters_ == NULL) {
+ characters_ = new (zone()) ZoneList<uc16>(4, zone());
+ }
+ characters_->Add(c, zone());
+ LAST(ADD_CHAR);
+}
+
+
+void RegExpBuilder::AddUnicodeCharacter(uc32 c) {
+ if (c > unibrow::Utf16::kMaxNonSurrogateCharCode) {
+ ZoneList<uc16> surrogate_pair(2, zone());
+ surrogate_pair.Add(unibrow::Utf16::LeadSurrogate(c), zone());
+ surrogate_pair.Add(unibrow::Utf16::TrailSurrogate(c), zone());
+ RegExpAtom* atom = new (zone()) RegExpAtom(surrogate_pair.ToConstVector());
+ AddAtom(atom);
+ } else {
+ AddCharacter(static_cast<uc16>(c));
+ }
+}
+
+
+void RegExpBuilder::AddEmpty() { pending_empty_ = true; }
+
+
+void RegExpBuilder::AddAtom(RegExpTree* term) {
+ if (term->IsEmpty()) {
+ AddEmpty();
+ return;
+ }
+ if (term->IsTextElement()) {
+ FlushCharacters();
+ text_.Add(term, zone());
+ } else {
+ FlushText();
+ terms_.Add(term, zone());
+ }
+ LAST(ADD_ATOM);
+}
+
+
+void RegExpBuilder::AddAssertion(RegExpTree* assert) {
+ FlushText();
+ terms_.Add(assert, zone());
+ LAST(ADD_ASSERT);
+}
+
+
+void RegExpBuilder::NewAlternative() { FlushTerms(); }
+
+
+void RegExpBuilder::FlushTerms() {
+ FlushText();
+ int num_terms = terms_.length();
+ RegExpTree* alternative;
+ if (num_terms == 0) {
+ alternative = new (zone()) RegExpEmpty();
+ } else if (num_terms == 1) {
+ alternative = terms_.last();
+ } else {
+ alternative = new (zone()) RegExpAlternative(terms_.GetList(zone()));
+ }
+ alternatives_.Add(alternative, zone());
+ terms_.Clear();
+ LAST(ADD_NONE);
+}
+
+
+RegExpTree* RegExpBuilder::ToRegExp() {
+ FlushTerms();
+ int num_alternatives = alternatives_.length();
+ if (num_alternatives == 0) return new (zone()) RegExpEmpty();
+ if (num_alternatives == 1) return alternatives_.last();
+ return new (zone()) RegExpDisjunction(alternatives_.GetList(zone()));
+}
+
+
+void RegExpBuilder::AddQuantifierToAtom(
+ int min, int max, RegExpQuantifier::QuantifierType quantifier_type) {
+ if (pending_empty_) {
+ pending_empty_ = false;
+ return;
+ }
+ RegExpTree* atom;
+ if (characters_ != NULL) {
+ DCHECK(last_added_ == ADD_CHAR);
+ // Last atom was character.
+ Vector<const uc16> char_vector = characters_->ToConstVector();
+ int num_chars = char_vector.length();
+ if (num_chars > 1) {
+ Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1);
+ text_.Add(new (zone()) RegExpAtom(prefix), zone());
+ char_vector = char_vector.SubVector(num_chars - 1, num_chars);
+ }
+ characters_ = NULL;
+ atom = new (zone()) RegExpAtom(char_vector);
+ FlushText();
+ } else if (text_.length() > 0) {
+ DCHECK(last_added_ == ADD_ATOM);
+ atom = text_.RemoveLast();
+ FlushText();
+ } else if (terms_.length() > 0) {
+ DCHECK(last_added_ == ADD_ATOM);
+ atom = terms_.RemoveLast();
+ if (atom->max_match() == 0) {
+ // Guaranteed to only match an empty string.
+ LAST(ADD_TERM);
+ if (min == 0) {
+ return;
+ }
+ terms_.Add(atom, zone());
+ return;
+ }
+ } else {
+ // Only call immediately after adding an atom or character!
+ UNREACHABLE();
+ return;
+ }
+ terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),
+ zone());
+ LAST(ADD_TERM);
+}
+
+} // namespace internal
+} // namespace v8
diff --git a/deps/v8/src/regexp/regexp-parser.h b/deps/v8/src/regexp/regexp-parser.h
new file mode 100644
index 0000000000..af9b765fba
--- /dev/null
+++ b/deps/v8/src/regexp/regexp-parser.h
@@ -0,0 +1,277 @@
+// Copyright 2016 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_REGEXP_REGEXP_PARSER_H_
+#define V8_REGEXP_REGEXP_PARSER_H_
+
+#include "src/objects.h"
+#include "src/regexp/regexp-ast.h"
+#include "src/zone.h"
+
+namespace v8 {
+namespace internal {
+
+struct RegExpCompileData;
+
+
+// A BufferedZoneList is an automatically growing list, just like (and backed
+// by) a ZoneList, that is optimized for the case of adding and removing
+// a single element. The last element added is stored outside the backing list,
+// and if no more than one element is ever added, the ZoneList isn't even
+// allocated.
+// Elements must not be NULL pointers.
+template <typename T, int initial_size>
+class BufferedZoneList {
+ public:
+ BufferedZoneList() : list_(NULL), last_(NULL) {}
+
+ // Adds element at end of list. This element is buffered and can
+ // be read using last() or removed using RemoveLast until a new Add or until
+ // RemoveLast or GetList has been called.
+ void Add(T* value, Zone* zone) {
+ if (last_ != NULL) {
+ if (list_ == NULL) {
+ list_ = new (zone) ZoneList<T*>(initial_size, zone);
+ }
+ list_->Add(last_, zone);
+ }
+ last_ = value;
+ }
+
+ T* last() {
+ DCHECK(last_ != NULL);
+ return last_;
+ }
+
+ T* RemoveLast() {
+ DCHECK(last_ != NULL);
+ T* result = last_;
+ if ((list_ != NULL) && (list_->length() > 0))
+ last_ = list_->RemoveLast();
+ else
+ last_ = NULL;
+ return result;
+ }
+
+ T* Get(int i) {
+ DCHECK((0 <= i) && (i < length()));
+ if (list_ == NULL) {
+ DCHECK_EQ(0, i);
+ return last_;
+ } else {
+ if (i == list_->length()) {
+ DCHECK(last_ != NULL);
+ return last_;
+ } else {
+ return list_->at(i);
+ }
+ }
+ }
+
+ void Clear() {
+ list_ = NULL;
+ last_ = NULL;
+ }
+
+ int length() {
+ int length = (list_ == NULL) ? 0 : list_->length();
+ return length + ((last_ == NULL) ? 0 : 1);
+ }
+
+ ZoneList<T*>* GetList(Zone* zone) {
+ if (list_ == NULL) {
+ list_ = new (zone) ZoneList<T*>(initial_size, zone);
+ }
+ if (last_ != NULL) {
+ list_->Add(last_, zone);
+ last_ = NULL;
+ }
+ return list_;
+ }
+
+ private:
+ ZoneList<T*>* list_;
+ T* last_;
+};
+
+
+// Accumulates RegExp atoms and assertions into lists of terms and alternatives.
+class RegExpBuilder : public ZoneObject {
+ public:
+ explicit RegExpBuilder(Zone* zone);
+ void AddCharacter(uc16 character);
+ void AddUnicodeCharacter(uc32 character);
+ // "Adds" an empty expression. Does nothing except consume a
+ // following quantifier
+ void AddEmpty();
+ void AddAtom(RegExpTree* tree);
+ void AddAssertion(RegExpTree* tree);
+ void NewAlternative(); // '|'
+ void AddQuantifierToAtom(int min, int max,
+ RegExpQuantifier::QuantifierType type);
+ RegExpTree* ToRegExp();
+
+ private:
+ void FlushCharacters();
+ void FlushText();
+ void FlushTerms();
+ Zone* zone() const { return zone_; }
+
+ Zone* zone_;
+ bool pending_empty_;
+ ZoneList<uc16>* characters_;
+ BufferedZoneList<RegExpTree, 2> terms_;
+ BufferedZoneList<RegExpTree, 2> text_;
+ BufferedZoneList<RegExpTree, 2> alternatives_;
+#ifdef DEBUG
+ enum { ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM } last_added_;
+#define LAST(x) last_added_ = x;
+#else
+#define LAST(x)
+#endif
+};
+
+
+class RegExpParser BASE_EMBEDDED {
+ public:
+ RegExpParser(FlatStringReader* in, Handle<String>* error, bool multiline_mode,
+ bool unicode, Isolate* isolate, Zone* zone);
+
+ static bool ParseRegExp(Isolate* isolate, Zone* zone, FlatStringReader* input,
+ bool multiline, bool unicode,
+ RegExpCompileData* result);
+
+ RegExpTree* ParsePattern();
+ RegExpTree* ParseDisjunction();
+ RegExpTree* ParseGroup();
+ RegExpTree* ParseCharacterClass();
+
+ // Parses a {...,...} quantifier and stores the range in the given
+ // out parameters.
+ bool ParseIntervalQuantifier(int* min_out, int* max_out);
+
+ // Parses and returns a single escaped character. The character
+ // must not be 'b' or 'B' since they are usually handle specially.
+ uc32 ParseClassCharacterEscape();
+
+ // Checks whether the following is a length-digit hexadecimal number,
+ // and sets the value if it is.
+ bool ParseHexEscape(int length, uc32* value);
+ bool ParseUnicodeEscape(uc32* value);
+ bool ParseUnlimitedLengthHexNumber(int max_value, uc32* value);
+
+ uc32 ParseOctalLiteral();
+
+ // Tries to parse the input as a back reference. If successful it
+ // stores the result in the output parameter and returns true. If
+ // it fails it will push back the characters read so the same characters
+ // can be reparsed.
+ bool ParseBackReferenceIndex(int* index_out);
+
+ CharacterRange ParseClassAtom(uc16* char_class);
+ RegExpTree* ReportError(Vector<const char> message);
+ void Advance();
+ void Advance(int dist);
+ void Reset(int pos);
+
+ // Reports whether the pattern might be used as a literal search string.
+ // Only use if the result of the parse is a single atom node.
+ bool simple();
+ bool contains_anchor() { return contains_anchor_; }
+ void set_contains_anchor() { contains_anchor_ = true; }
+ int captures_started() { return captures_started_; }
+ int position() { return next_pos_ - 1; }
+ bool failed() { return failed_; }
+
+ static bool IsSyntaxCharacter(uc32 c);
+
+ static const int kMaxCaptures = 1 << 16;
+ static const uc32 kEndMarker = (1 << 21);
+
+ private:
+ enum SubexpressionType {
+ INITIAL,
+ CAPTURE, // All positive values represent captures.
+ POSITIVE_LOOKAROUND,
+ NEGATIVE_LOOKAROUND,
+ GROUPING
+ };
+
+ class RegExpParserState : public ZoneObject {
+ public:
+ RegExpParserState(RegExpParserState* previous_state,
+ SubexpressionType group_type,
+ RegExpLookaround::Type lookaround_type,
+ int disjunction_capture_index, Zone* zone)
+ : previous_state_(previous_state),
+ builder_(new (zone) RegExpBuilder(zone)),
+ group_type_(group_type),
+ lookaround_type_(lookaround_type),
+ disjunction_capture_index_(disjunction_capture_index) {}
+ // Parser state of containing expression, if any.
+ RegExpParserState* previous_state() { return previous_state_; }
+ bool IsSubexpression() { return previous_state_ != NULL; }
+ // RegExpBuilder building this regexp's AST.
+ RegExpBuilder* builder() { return builder_; }
+ // Type of regexp being parsed (parenthesized group or entire regexp).
+ SubexpressionType group_type() { return group_type_; }
+ // Lookahead or Lookbehind.
+ RegExpLookaround::Type lookaround_type() { return lookaround_type_; }
+ // Index in captures array of first capture in this sub-expression, if any.
+ // Also the capture index of this sub-expression itself, if group_type
+ // is CAPTURE.
+ int capture_index() { return disjunction_capture_index_; }
+
+ // Check whether the parser is inside a capture group with the given index.
+ bool IsInsideCaptureGroup(int index);
+
+ private:
+ // Linked list implementation of stack of states.
+ RegExpParserState* previous_state_;
+ // Builder for the stored disjunction.
+ RegExpBuilder* builder_;
+ // Stored disjunction type (capture, look-ahead or grouping), if any.
+ SubexpressionType group_type_;
+ // Stored read direction.
+ RegExpLookaround::Type lookaround_type_;
+ // Stored disjunction's capture index (if any).
+ int disjunction_capture_index_;
+ };
+
+ // Return the 1-indexed RegExpCapture object, allocate if necessary.
+ RegExpCapture* GetCapture(int index);
+
+ Isolate* isolate() { return isolate_; }
+ Zone* zone() const { return zone_; }
+
+ uc32 current() { return current_; }
+ bool has_more() { return has_more_; }
+ bool has_next() { return next_pos_ < in()->length(); }
+ uc32 Next();
+ FlatStringReader* in() { return in_; }
+ void ScanForCaptures();
+
+ Isolate* isolate_;
+ Zone* zone_;
+ Handle<String>* error_;
+ ZoneList<RegExpCapture*>* captures_;
+ FlatStringReader* in_;
+ uc32 current_;
+ int next_pos_;
+ int captures_started_;
+ // The capture count is only valid after we have scanned for captures.
+ int capture_count_;
+ bool has_more_;
+ bool multiline_;
+ bool unicode_;
+ bool simple_;
+ bool contains_anchor_;
+ bool is_scanned_for_captures_;
+ bool failed_;
+};
+
+} // namespace internal
+} // namespace v8
+
+#endif // V8_REGEXP_REGEXP_PARSER_H_
diff --git a/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc b/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc
index 969edc1b3b..286f159cc8 100644
--- a/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc
+++ b/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc
@@ -64,7 +64,8 @@ namespace internal {
* - backup of callee save registers (rbx, possibly rsi and rdi).
* - success counter (only useful for global regexp to count matches)
* - Offset of location before start of input (effectively character
- * position -1). Used to initialize capture registers to a non-position.
+ * string start - 1). Used to initialize capture registers to a
+ * non-position.
* - At start of string (if 1, we are starting at the start of the
* string, otherwise 0)
* - register 0 rbp[-n] (Only positions must be stored in the first
@@ -94,7 +95,7 @@ RegExpMacroAssemblerX64::RegExpMacroAssemblerX64(Isolate* isolate, Zone* zone,
Mode mode,
int registers_to_save)
: NativeRegExpMacroAssembler(isolate, zone),
- masm_(isolate, NULL, kRegExpCodeSize),
+ masm_(isolate, NULL, kRegExpCodeSize, CodeObjectRequired::kYes),
no_root_array_scope_(&masm_),
code_relative_fixup_positions_(4, zone),
mode_(mode),
@@ -171,25 +172,16 @@ void RegExpMacroAssemblerX64::CheckCharacterGT(uc16 limit, Label* on_greater) {
void RegExpMacroAssemblerX64::CheckAtStart(Label* on_at_start) {
- Label not_at_start;
- // Did we start the match at the start of the string at all?
- __ cmpl(Operand(rbp, kStartIndex), Immediate(0));
- BranchOrBacktrack(not_equal, &not_at_start);
- // If we did, are we still at the start of the input?
- __ leap(rax, Operand(rsi, rdi, times_1, 0));
- __ cmpp(rax, Operand(rbp, kInputStart));
+ __ leap(rax, Operand(rdi, -char_size()));
+ __ cmpp(rax, Operand(rbp, kStringStartMinusOne));
BranchOrBacktrack(equal, on_at_start);
- __ bind(&not_at_start);
}
-void RegExpMacroAssemblerX64::CheckNotAtStart(Label* on_not_at_start) {
- // Did we start the match at the start of the string at all?
- __ cmpl(Operand(rbp, kStartIndex), Immediate(0));
- BranchOrBacktrack(not_equal, on_not_at_start);
- // If we did, are we still at the start of the input?
- __ leap(rax, Operand(rsi, rdi, times_1, 0));
- __ cmpp(rax, Operand(rbp, kInputStart));
+void RegExpMacroAssemblerX64::CheckNotAtStart(int cp_offset,
+ Label* on_not_at_start) {
+ __ leap(rax, Operand(rdi, -char_size() + cp_offset * char_size()));
+ __ cmpp(rax, Operand(rbp, kStringStartMinusOne));
BranchOrBacktrack(not_equal, on_not_at_start);
}
@@ -211,8 +203,7 @@ void RegExpMacroAssemblerX64::CheckGreedyLoop(Label* on_equal) {
void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase(
- int start_reg,
- Label* on_no_match) {
+ int start_reg, bool read_backward, Label* on_no_match) {
Label fallthrough;
ReadPositionFromRegister(rdx, start_reg); // Offset of start of capture
ReadPositionFromRegister(rbx, start_reg + 1); // Offset of end of capture
@@ -222,23 +213,25 @@ void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase(
// rdx = Start offset of capture.
// rbx = Length of capture
- // If length is negative, this code will fail (it's a symptom of a partial or
- // illegal capture where start of capture after end of capture).
- // This must not happen (no back-reference can reference a capture that wasn't
- // closed before in the reg-exp, and we must not generate code that can cause
- // this condition).
-
- // If length is zero, either the capture is empty or it is nonparticipating.
- // In either case succeed immediately.
+ // At this point, the capture registers are either both set or both cleared.
+ // If the capture length is zero, then the capture is either empty or cleared.
+ // Fall through in both cases.
__ j(equal, &fallthrough);
// -----------------------
// rdx - Start of capture
// rbx - length of capture
// Check that there are sufficient characters left in the input.
- __ movl(rax, rdi);
- __ addl(rax, rbx);
- BranchOrBacktrack(greater, on_no_match);
+ if (read_backward) {
+ __ movl(rax, Operand(rbp, kStringStartMinusOne));
+ __ addl(rax, rbx);
+ __ cmpl(rdi, rax);
+ BranchOrBacktrack(less_equal, on_no_match);
+ } else {
+ __ movl(rax, rdi);
+ __ addl(rax, rbx);
+ BranchOrBacktrack(greater, on_no_match);
+ }
if (mode_ == LATIN1) {
Label loop_increment;
@@ -248,6 +241,9 @@ void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase(
__ leap(r9, Operand(rsi, rdx, times_1, 0));
__ leap(r11, Operand(rsi, rdi, times_1, 0));
+ if (read_backward) {
+ __ subp(r11, rbx); // Offset by length when matching backwards.
+ }
__ addp(rbx, r9); // End of capture
// ---------------------
// r11 - current input character address
@@ -290,6 +286,11 @@ void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase(
// Compute new value of character position after the matched part.
__ movp(rdi, r11);
__ subq(rdi, rsi);
+ if (read_backward) {
+ // Subtract match length if we matched backward.
+ __ addq(rdi, register_location(start_reg));
+ __ subq(rdi, register_location(start_reg + 1));
+ }
} else {
DCHECK(mode_ == UC16);
// Save important/volatile registers before calling C function.
@@ -313,6 +314,9 @@ void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase(
__ leap(rcx, Operand(rsi, rdx, times_1, 0));
// Set byte_offset2.
__ leap(rdx, Operand(rsi, rdi, times_1, 0));
+ if (read_backward) {
+ __ subq(rdx, rbx);
+ }
// Set byte_length.
__ movp(r8, rbx);
// Isolate.
@@ -324,6 +328,9 @@ void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase(
__ leap(rdi, Operand(rsi, rdx, times_1, 0));
// Set byte_offset2.
__ movp(rsi, rax);
+ if (read_backward) {
+ __ subq(rsi, rbx);
+ }
// Set byte_length.
__ movp(rdx, rbx);
// Isolate.
@@ -349,17 +356,21 @@ void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase(
// Check if function returned non-zero for success or zero for failure.
__ testp(rax, rax);
BranchOrBacktrack(zero, on_no_match);
- // On success, increment position by length of capture.
+ // On success, advance position by length of capture.
// Requires that rbx is callee save (true for both Win64 and AMD64 ABIs).
- __ addq(rdi, rbx);
+ if (read_backward) {
+ __ subq(rdi, rbx);
+ } else {
+ __ addq(rdi, rbx);
+ }
}
__ bind(&fallthrough);
}
-void RegExpMacroAssemblerX64::CheckNotBackReference(
- int start_reg,
- Label* on_no_match) {
+void RegExpMacroAssemblerX64::CheckNotBackReference(int start_reg,
+ bool read_backward,
+ Label* on_no_match) {
Label fallthrough;
// Find length of back-referenced capture.
@@ -367,25 +378,31 @@ void RegExpMacroAssemblerX64::CheckNotBackReference(
ReadPositionFromRegister(rax, start_reg + 1); // Offset of end of capture
__ subp(rax, rdx); // Length to check.
- // Fail on partial or illegal capture (start of capture after end of capture).
- // This must not happen (no back-reference can reference a capture that wasn't
- // closed before in the reg-exp).
- __ Check(greater_equal, kInvalidCaptureReferenced);
-
- // Succeed on empty capture (including non-participating capture)
+ // At this point, the capture registers are either both set or both cleared.
+ // If the capture length is zero, then the capture is either empty or cleared.
+ // Fall through in both cases.
__ j(equal, &fallthrough);
// -----------------------
// rdx - Start of capture
// rax - length of capture
-
// Check that there are sufficient characters left in the input.
- __ movl(rbx, rdi);
- __ addl(rbx, rax);
- BranchOrBacktrack(greater, on_no_match);
+ if (read_backward) {
+ __ movl(rbx, Operand(rbp, kStringStartMinusOne));
+ __ addl(rbx, rax);
+ __ cmpl(rdi, rbx);
+ BranchOrBacktrack(less_equal, on_no_match);
+ } else {
+ __ movl(rbx, rdi);
+ __ addl(rbx, rax);
+ BranchOrBacktrack(greater, on_no_match);
+ }
// Compute pointers to match string and capture string
__ leap(rbx, Operand(rsi, rdi, times_1, 0)); // Start of match.
+ if (read_backward) {
+ __ subq(rbx, rax); // Offset by length when matching backwards.
+ }
__ addp(rdx, rsi); // Start of capture.
__ leap(r9, Operand(rdx, rax, times_1, 0)); // End of capture
@@ -416,6 +433,11 @@ void RegExpMacroAssemblerX64::CheckNotBackReference(
// Set current character position to position after match.
__ movp(rdi, rbx);
__ subq(rdi, rsi);
+ if (read_backward) {
+ // Subtract match length if we matched backward.
+ __ addq(rdi, register_location(start_reg));
+ __ subq(rdi, register_location(start_reg + 1));
+ }
__ bind(&fallthrough);
}
@@ -682,7 +704,7 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
#endif
__ Push(Immediate(0)); // Number of successful matches in a global regexp.
- __ Push(Immediate(0)); // Make room for "input start - 1" constant.
+ __ Push(Immediate(0)); // Make room for "string start - 1" constant.
// Check if we have space on the stack for registers.
Label stack_limit_hit;
@@ -732,7 +754,7 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
}
// Store this value in a local variable, for use when clearing
// position registers.
- __ movp(Operand(rbp, kInputStartMinusOne), rax);
+ __ movp(Operand(rbp, kStringStartMinusOne), rax);
#if V8_OS_WIN
// Ensure that we have written to each stack page, in order. Skipping a page
@@ -835,7 +857,7 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
Immediate(num_saved_registers_ * kIntSize));
// Prepare rax to initialize registers with its value in the next run.
- __ movp(rax, Operand(rbp, kInputStartMinusOne));
+ __ movp(rax, Operand(rbp, kStringStartMinusOne));
if (global_with_zero_length_check()) {
// Special case for zero-length matches.
@@ -1018,10 +1040,13 @@ void RegExpMacroAssemblerX64::LoadCurrentCharacter(int cp_offset,
Label* on_end_of_input,
bool check_bounds,
int characters) {
- DCHECK(cp_offset >= -1); // ^ and \b can look behind one character.
DCHECK(cp_offset < (1<<30)); // Be sane! (And ensure negation works)
if (check_bounds) {
- CheckPosition(cp_offset + characters - 1, on_end_of_input);
+ if (cp_offset >= 0) {
+ CheckPosition(cp_offset + characters - 1, on_end_of_input);
+ } else {
+ CheckPosition(cp_offset, on_end_of_input);
+ }
}
LoadCurrentCharacterUnchecked(cp_offset, characters);
}
@@ -1124,7 +1149,7 @@ void RegExpMacroAssemblerX64::WriteCurrentPositionToRegister(int reg,
void RegExpMacroAssemblerX64::ClearRegisters(int reg_from, int reg_to) {
DCHECK(reg_from <= reg_to);
- __ movp(rax, Operand(rbp, kInputStartMinusOne));
+ __ movp(rax, Operand(rbp, kStringStartMinusOne));
for (int reg = reg_from; reg <= reg_to; reg++) {
__ movp(register_location(reg), rax);
}
@@ -1205,8 +1230,14 @@ Operand RegExpMacroAssemblerX64::register_location(int register_index) {
void RegExpMacroAssemblerX64::CheckPosition(int cp_offset,
Label* on_outside_input) {
- __ cmpl(rdi, Immediate(-cp_offset * char_size()));
- BranchOrBacktrack(greater_equal, on_outside_input);
+ if (cp_offset >= 0) {
+ __ cmpl(rdi, Immediate(-cp_offset * char_size()));
+ BranchOrBacktrack(greater_equal, on_outside_input);
+ } else {
+ __ leap(rax, Operand(rdi, cp_offset * char_size()));
+ __ cmpp(rax, Operand(rbp, kStringStartMinusOne));
+ BranchOrBacktrack(less_equal, on_outside_input);
+ }
}
diff --git a/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.h b/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.h
index dbee9e86b5..257804739f 100644
--- a/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.h
+++ b/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.h
@@ -34,9 +34,11 @@ class RegExpMacroAssemblerX64: public NativeRegExpMacroAssembler {
// A "greedy loop" is a loop that is both greedy and with a simple
// body. It has a particularly simple implementation.
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
- virtual void CheckNotAtStart(Label* on_not_at_start);
- virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
+ virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start);
+ virtual void CheckNotBackReference(int start_reg, bool read_backward,
+ Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
+ bool read_backward,
Label* on_no_match);
virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(uint32_t c,
@@ -171,10 +173,10 @@ class RegExpMacroAssemblerX64: public NativeRegExpMacroAssembler {
static const int kSuccessfulCaptures = kLastCalleeSaveRegister - kPointerSize;
// When adding local variables remember to push space for them in
// the frame in GetCode.
- static const int kInputStartMinusOne = kSuccessfulCaptures - kPointerSize;
+ static const int kStringStartMinusOne = kSuccessfulCaptures - kPointerSize;
// First register address. Following registers are below it on the stack.
- static const int kRegisterZero = kInputStartMinusOne - kPointerSize;
+ static const int kRegisterZero = kStringStartMinusOne - kPointerSize;
// Initial size of code buffer.
static const size_t kRegExpCodeSize = 1024;
diff --git a/deps/v8/src/regexp/x87/regexp-macro-assembler-x87.cc b/deps/v8/src/regexp/x87/regexp-macro-assembler-x87.cc
index c6968dc197..01d0b249b6 100644
--- a/deps/v8/src/regexp/x87/regexp-macro-assembler-x87.cc
+++ b/deps/v8/src/regexp/x87/regexp-macro-assembler-x87.cc
@@ -53,7 +53,8 @@ namespace internal {
* - backup of caller ebx
* - success counter (only for global regexps to count matches).
* - Offset of location before start of input (effectively character
- * position -1). Used to initialize capture registers to a non-position.
+ * string start - 1). Used to initialize capture registers to a
+ * non-position.
* - register 0 ebp[-4] (only positions must be stored in the first
* - register 1 ebp[-8] num_saved_registers_ registers)
* - ...
@@ -80,7 +81,8 @@ RegExpMacroAssemblerX87::RegExpMacroAssemblerX87(Isolate* isolate, Zone* zone,
Mode mode,
int registers_to_save)
: NativeRegExpMacroAssembler(isolate, zone),
- masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize)),
+ masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize,
+ CodeObjectRequired::kYes)),
mode_(mode),
num_registers_(registers_to_save),
num_saved_registers_(registers_to_save),
@@ -156,25 +158,16 @@ void RegExpMacroAssemblerX87::CheckCharacterGT(uc16 limit, Label* on_greater) {
void RegExpMacroAssemblerX87::CheckAtStart(Label* on_at_start) {
- Label not_at_start;
- // Did we start the match at the start of the string at all?
- __ cmp(Operand(ebp, kStartIndex), Immediate(0));
- BranchOrBacktrack(not_equal, &not_at_start);
- // If we did, are we still at the start of the input?
- __ lea(eax, Operand(esi, edi, times_1, 0));
- __ cmp(eax, Operand(ebp, kInputStart));
+ __ lea(eax, Operand(edi, -char_size()));
+ __ cmp(eax, Operand(ebp, kStringStartMinusOne));
BranchOrBacktrack(equal, on_at_start);
- __ bind(&not_at_start);
}
-void RegExpMacroAssemblerX87::CheckNotAtStart(Label* on_not_at_start) {
- // Did we start the match at the start of the string at all?
- __ cmp(Operand(ebp, kStartIndex), Immediate(0));
- BranchOrBacktrack(not_equal, on_not_at_start);
- // If we did, are we still at the start of the input?
- __ lea(eax, Operand(esi, edi, times_1, 0));
- __ cmp(eax, Operand(ebp, kInputStart));
+void RegExpMacroAssemblerX87::CheckNotAtStart(int cp_offset,
+ Label* on_not_at_start) {
+ __ lea(eax, Operand(edi, -char_size() + cp_offset * char_size()));
+ __ cmp(eax, Operand(ebp, kStringStartMinusOne));
BranchOrBacktrack(not_equal, on_not_at_start);
}
@@ -196,26 +189,28 @@ void RegExpMacroAssemblerX87::CheckGreedyLoop(Label* on_equal) {
void RegExpMacroAssemblerX87::CheckNotBackReferenceIgnoreCase(
- int start_reg,
- Label* on_no_match) {
+ int start_reg, bool read_backward, Label* on_no_match) {
Label fallthrough;
__ mov(edx, register_location(start_reg)); // Index of start of capture
__ mov(ebx, register_location(start_reg + 1)); // Index of end of capture
__ sub(ebx, edx); // Length of capture.
- // The length of a capture should not be negative. This can only happen
- // if the end of the capture is unrecorded, or at a point earlier than
- // the start of the capture.
- BranchOrBacktrack(less, on_no_match);
-
- // If length is zero, either the capture is empty or it is completely
- // uncaptured. In either case succeed immediately.
+ // At this point, the capture registers are either both set or both cleared.
+ // If the capture length is zero, then the capture is either empty or cleared.
+ // Fall through in both cases.
__ j(equal, &fallthrough);
// Check that there are sufficient characters left in the input.
- __ mov(eax, edi);
- __ add(eax, ebx);
- BranchOrBacktrack(greater, on_no_match);
+ if (read_backward) {
+ __ mov(eax, Operand(ebp, kStringStartMinusOne));
+ __ add(eax, ebx);
+ __ cmp(edi, eax);
+ BranchOrBacktrack(less_equal, on_no_match);
+ } else {
+ __ mov(eax, edi);
+ __ add(eax, ebx);
+ BranchOrBacktrack(greater, on_no_match);
+ }
if (mode_ == LATIN1) {
Label success;
@@ -228,6 +223,9 @@ void RegExpMacroAssemblerX87::CheckNotBackReferenceIgnoreCase(
__ add(edx, esi); // Start of capture
__ add(edi, esi); // Start of text to match against capture.
+ if (read_backward) {
+ __ sub(edi, ebx); // Offset by length when matching backwards.
+ }
__ add(ebx, edi); // End of text to match against capture.
Label loop;
@@ -278,6 +276,11 @@ void RegExpMacroAssemblerX87::CheckNotBackReferenceIgnoreCase(
__ add(esp, Immediate(kPointerSize));
// Compute new value of character position after the matched part.
__ sub(edi, esi);
+ if (read_backward) {
+ // Subtract match length if we matched backward.
+ __ add(edi, register_location(start_reg));
+ __ sub(edi, register_location(start_reg + 1));
+ }
} else {
DCHECK(mode_ == UC16);
// Save registers before calling C function.
@@ -304,6 +307,9 @@ void RegExpMacroAssemblerX87::CheckNotBackReferenceIgnoreCase(
// Found by adding negative string-end offset of current position (edi)
// to end of string.
__ add(edi, esi);
+ if (read_backward) {
+ __ sub(edi, ebx); // Offset by length when matching backwards.
+ }
__ mov(Operand(esp, 1 * kPointerSize), edi);
// Set byte_offset1.
// Start of capture, where edx already holds string-end negative offset.
@@ -325,16 +331,20 @@ void RegExpMacroAssemblerX87::CheckNotBackReferenceIgnoreCase(
// Check if function returned non-zero for success or zero for failure.
__ or_(eax, eax);
BranchOrBacktrack(zero, on_no_match);
- // On success, increment position by length of capture.
- __ add(edi, ebx);
+ // On success, advance position by length of capture.
+ if (read_backward) {
+ __ sub(edi, ebx);
+ } else {
+ __ add(edi, ebx);
+ }
}
__ bind(&fallthrough);
}
-void RegExpMacroAssemblerX87::CheckNotBackReference(
- int start_reg,
- Label* on_no_match) {
+void RegExpMacroAssemblerX87::CheckNotBackReference(int start_reg,
+ bool read_backward,
+ Label* on_no_match) {
Label fallthrough;
Label success;
Label fail;
@@ -343,22 +353,33 @@ void RegExpMacroAssemblerX87::CheckNotBackReference(
__ mov(edx, register_location(start_reg));
__ mov(eax, register_location(start_reg + 1));
__ sub(eax, edx); // Length to check.
- // Fail on partial or illegal capture (start of capture after end of capture).
- BranchOrBacktrack(less, on_no_match);
- // Succeed on empty capture (including no capture)
+
+ // At this point, the capture registers are either both set or both cleared.
+ // If the capture length is zero, then the capture is either empty or cleared.
+ // Fall through in both cases.
__ j(equal, &fallthrough);
// Check that there are sufficient characters left in the input.
- __ mov(ebx, edi);
- __ add(ebx, eax);
- BranchOrBacktrack(greater, on_no_match);
+ if (read_backward) {
+ __ mov(ebx, Operand(ebp, kStringStartMinusOne));
+ __ add(ebx, eax);
+ __ cmp(edi, ebx);
+ BranchOrBacktrack(less_equal, on_no_match);
+ } else {
+ __ mov(ebx, edi);
+ __ add(ebx, eax);
+ BranchOrBacktrack(greater, on_no_match);
+ }
// Save register to make it available below.
__ push(backtrack_stackpointer());
// Compute pointers to match string and capture string
- __ lea(ebx, Operand(esi, edi, times_1, 0)); // Start of match.
__ add(edx, esi); // Start of capture.
+ __ lea(ebx, Operand(esi, edi, times_1, 0)); // Start of match.
+ if (read_backward) {
+ __ sub(ebx, eax); // Offset by length when matching backwards.
+ }
__ lea(ecx, Operand(eax, ebx, times_1, 0)); // End of match
Label loop;
@@ -389,6 +410,11 @@ void RegExpMacroAssemblerX87::CheckNotBackReference(
// Move current character position to position after match.
__ mov(edi, ecx);
__ sub(edi, esi);
+ if (read_backward) {
+ // Subtract match length if we matched backward.
+ __ add(edi, register_location(start_reg));
+ __ sub(edi, register_location(start_reg + 1));
+ }
// Restore backtrack stackpointer.
__ pop(backtrack_stackpointer());
@@ -634,7 +660,7 @@ Handle<HeapObject> RegExpMacroAssemblerX87::GetCode(Handle<String> source) {
__ push(edi);
__ push(ebx); // Callee-save on MacOS.
__ push(Immediate(0)); // Number of successful matches in a global regexp.
- __ push(Immediate(0)); // Make room for "input start - 1" constant.
+ __ push(Immediate(0)); // Make room for "string start - 1" constant.
// Check if we have space on the stack for registers.
Label stack_limit_hit;
@@ -684,7 +710,7 @@ Handle<HeapObject> RegExpMacroAssemblerX87::GetCode(Handle<String> source) {
}
// Store this value in a local variable, for use when clearing
// position registers.
- __ mov(Operand(ebp, kInputStartMinusOne), eax);
+ __ mov(Operand(ebp, kStringStartMinusOne), eax);
#if V8_OS_WIN
// Ensure that we write to each stack page, in order. Skipping a page
@@ -767,7 +793,7 @@ Handle<HeapObject> RegExpMacroAssemblerX87::GetCode(Handle<String> source) {
}
if (global()) {
- // Restart matching if the regular expression is flagged as global.
+ // Restart matching if the regular expression is flagged as global.
// Increment success counter.
__ inc(Operand(ebp, kSuccessfulCaptures));
// Capture results have been stored, so the number of remaining global
@@ -784,7 +810,7 @@ Handle<HeapObject> RegExpMacroAssemblerX87::GetCode(Handle<String> source) {
Immediate(num_saved_registers_ * kPointerSize));
// Prepare eax to initialize registers with its value in the next run.
- __ mov(eax, Operand(ebp, kInputStartMinusOne));
+ __ mov(eax, Operand(ebp, kStringStartMinusOne));
if (global_with_zero_length_check()) {
// Special case for zero-length matches.
@@ -944,10 +970,13 @@ void RegExpMacroAssemblerX87::LoadCurrentCharacter(int cp_offset,
Label* on_end_of_input,
bool check_bounds,
int characters) {
- DCHECK(cp_offset >= -1); // ^ and \b can look behind one character.
DCHECK(cp_offset < (1<<30)); // Be sane! (And ensure negation works)
if (check_bounds) {
- CheckPosition(cp_offset + characters - 1, on_end_of_input);
+ if (cp_offset >= 0) {
+ CheckPosition(cp_offset + characters - 1, on_end_of_input);
+ } else {
+ CheckPosition(cp_offset, on_end_of_input);
+ }
}
LoadCurrentCharacterUnchecked(cp_offset, characters);
}
@@ -1031,7 +1060,7 @@ void RegExpMacroAssemblerX87::WriteCurrentPositionToRegister(int reg,
void RegExpMacroAssemblerX87::ClearRegisters(int reg_from, int reg_to) {
DCHECK(reg_from <= reg_to);
- __ mov(eax, Operand(ebp, kInputStartMinusOne));
+ __ mov(eax, Operand(ebp, kStringStartMinusOne));
for (int reg = reg_from; reg <= reg_to; reg++) {
__ mov(register_location(reg), eax);
}
@@ -1100,8 +1129,14 @@ Operand RegExpMacroAssemblerX87::register_location(int register_index) {
void RegExpMacroAssemblerX87::CheckPosition(int cp_offset,
Label* on_outside_input) {
- __ cmp(edi, -cp_offset * char_size());
- BranchOrBacktrack(greater_equal, on_outside_input);
+ if (cp_offset >= 0) {
+ __ cmp(edi, -cp_offset * char_size());
+ BranchOrBacktrack(greater_equal, on_outside_input);
+ } else {
+ __ lea(eax, Operand(edi, cp_offset * char_size()));
+ __ cmp(eax, Operand(ebp, kStringStartMinusOne));
+ BranchOrBacktrack(less_equal, on_outside_input);
+ }
}
diff --git a/deps/v8/src/regexp/x87/regexp-macro-assembler-x87.h b/deps/v8/src/regexp/x87/regexp-macro-assembler-x87.h
index 0deea50357..c95541224f 100644
--- a/deps/v8/src/regexp/x87/regexp-macro-assembler-x87.h
+++ b/deps/v8/src/regexp/x87/regexp-macro-assembler-x87.h
@@ -33,9 +33,11 @@ class RegExpMacroAssemblerX87: public NativeRegExpMacroAssembler {
// A "greedy loop" is a loop that is both greedy and with a simple
// body. It has a particularly simple implementation.
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
- virtual void CheckNotAtStart(Label* on_not_at_start);
- virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
+ virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start);
+ virtual void CheckNotBackReference(int start_reg, bool read_backward,
+ Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
+ bool read_backward,
Label* on_no_match);
virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(uint32_t c,
@@ -116,9 +118,9 @@ class RegExpMacroAssemblerX87: public NativeRegExpMacroAssembler {
static const int kBackup_edi = kBackup_esi - kPointerSize;
static const int kBackup_ebx = kBackup_edi - kPointerSize;
static const int kSuccessfulCaptures = kBackup_ebx - kPointerSize;
- static const int kInputStartMinusOne = kSuccessfulCaptures - kPointerSize;
+ static const int kStringStartMinusOne = kSuccessfulCaptures - kPointerSize;
// First register address. Following registers are below it on the stack.
- static const int kRegisterZero = kInputStartMinusOne - kPointerSize;
+ static const int kRegisterZero = kStringStartMinusOne - kPointerSize;
// Initial size of code buffer.
static const size_t kRegExpCodeSize = 1024;