diff options
author | Ali Ijaz Sheikh <ofrobots@google.com> | 2016-03-01 08:58:05 -0800 |
---|---|---|
committer | Ali Sheikh <ofrobots@lemonhope.roam.corp.google.com> | 2016-03-03 20:35:20 -0800 |
commit | 069e02ab47656b3efd1b6829c65856b2e1c2d1db (patch) | |
tree | eb643e0a2e88fd64bb9fc927423458d2ae96c2db /deps/v8/src/regexp | |
parent | 8938355398c79f583a468284b768652d12ba9bc9 (diff) | |
download | android-node-v8-069e02ab47656b3efd1b6829c65856b2e1c2d1db.tar.gz android-node-v8-069e02ab47656b3efd1b6829c65856b2e1c2d1db.tar.bz2 android-node-v8-069e02ab47656b3efd1b6829c65856b2e1c2d1db.zip |
deps: upgrade to V8 4.9.385.18
Pick up the current branch head for V8 4.9
https://github.com/v8/v8/commit/1ecba0f
PR-URL: https://github.com/nodejs/node/pull/4722
Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl>
Reviewed-By: Michaƫl Zasso <mic.besace@gmail.com>
Diffstat (limited to 'deps/v8/src/regexp')
33 files changed, 3312 insertions, 802 deletions
diff --git a/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc b/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc index d296d90e7d..6fafdfb4ad 100644 --- a/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc +++ b/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.cc @@ -58,7 +58,7 @@ namespace internal { * - fp[-16] void* input_string (location of a handle containing the string). * - fp[-20] success counter (only for global regexps to count matches). * - fp[-24] Offset of location before start of input (effectively character - * position -1). Used to initialize capture registers to a + * string start - 1). Used to initialize capture registers to a * non-position. * - fp[-28] At start (if 1, we are starting at the start of the * string, otherwise 0) @@ -98,7 +98,8 @@ RegExpMacroAssemblerARM::RegExpMacroAssemblerARM(Isolate* isolate, Zone* zone, Mode mode, int registers_to_save) : NativeRegExpMacroAssembler(isolate, zone), - masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize)), + masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize, + CodeObjectRequired::kYes)), mode_(mode), num_registers_(registers_to_save), num_saved_registers_(registers_to_save), @@ -176,29 +177,18 @@ void RegExpMacroAssemblerARM::CheckCharacterGT(uc16 limit, Label* on_greater) { void RegExpMacroAssemblerARM::CheckAtStart(Label* on_at_start) { - Label not_at_start; - // Did we start the match at the start of the string at all? - __ ldr(r0, MemOperand(frame_pointer(), kStartIndex)); - __ cmp(r0, Operand::Zero()); - BranchOrBacktrack(ne, ¬_at_start); - - // If we did, are we still at the start of the input? - __ ldr(r1, MemOperand(frame_pointer(), kInputStart)); - __ add(r0, end_of_input_address(), Operand(current_input_offset())); + __ ldr(r1, MemOperand(frame_pointer(), kStringStartMinusOne)); + __ add(r0, current_input_offset(), Operand(-char_size())); __ cmp(r0, r1); BranchOrBacktrack(eq, on_at_start); - __ bind(¬_at_start); } -void RegExpMacroAssemblerARM::CheckNotAtStart(Label* on_not_at_start) { - // Did we start the match at the start of the string at all? - __ ldr(r0, MemOperand(frame_pointer(), kStartIndex)); - __ cmp(r0, Operand::Zero()); - BranchOrBacktrack(ne, on_not_at_start); - // If we did, are we still at the start of the input? - __ ldr(r1, MemOperand(frame_pointer(), kInputStart)); - __ add(r0, end_of_input_address(), Operand(current_input_offset())); +void RegExpMacroAssemblerARM::CheckNotAtStart(int cp_offset, + Label* on_not_at_start) { + __ ldr(r1, MemOperand(frame_pointer(), kStringStartMinusOne)); + __ add(r0, current_input_offset(), + Operand(-char_size() + cp_offset * char_size())); __ cmp(r0, r1); BranchOrBacktrack(ne, on_not_at_start); } @@ -220,20 +210,27 @@ void RegExpMacroAssemblerARM::CheckGreedyLoop(Label* on_equal) { void RegExpMacroAssemblerARM::CheckNotBackReferenceIgnoreCase( - int start_reg, - Label* on_no_match) { + int start_reg, bool read_backward, Label* on_no_match) { Label fallthrough; __ ldr(r0, register_location(start_reg)); // Index of start of capture __ ldr(r1, register_location(start_reg + 1)); // Index of end of capture __ sub(r1, r1, r0, SetCC); // Length of capture. - // If length is zero, either the capture is empty or it is not participating. - // In either case succeed immediately. + // At this point, the capture registers are either both set or both cleared. + // If the capture length is zero, then the capture is either empty or cleared. + // Fall through in both cases. __ b(eq, &fallthrough); // Check that there are enough characters left in the input. - __ cmn(r1, Operand(current_input_offset())); - BranchOrBacktrack(gt, on_no_match); + if (read_backward) { + __ ldr(r3, MemOperand(frame_pointer(), kStringStartMinusOne)); + __ add(r3, r3, r1); + __ cmp(current_input_offset(), r3); + BranchOrBacktrack(le, on_no_match); + } else { + __ cmn(r1, Operand(current_input_offset())); + BranchOrBacktrack(gt, on_no_match); + } if (mode_ == LATIN1) { Label success; @@ -242,9 +239,12 @@ void RegExpMacroAssemblerARM::CheckNotBackReferenceIgnoreCase( // r0 - offset of start of capture // r1 - length of capture - __ add(r0, r0, Operand(end_of_input_address())); - __ add(r2, end_of_input_address(), Operand(current_input_offset())); - __ add(r1, r0, Operand(r1)); + __ add(r0, r0, end_of_input_address()); + __ add(r2, end_of_input_address(), current_input_offset()); + if (read_backward) { + __ sub(r2, r2, r1); // Offset by length when matching backwards. + } + __ add(r1, r0, r1); // r0 - Address of start of capture. // r1 - Address of end of capture @@ -283,6 +283,12 @@ void RegExpMacroAssemblerARM::CheckNotBackReferenceIgnoreCase( __ bind(&success); // Compute new value of character position after the matched part. __ sub(current_input_offset(), r2, end_of_input_address()); + if (read_backward) { + __ ldr(r0, register_location(start_reg)); // Index of start of capture + __ ldr(r1, register_location(start_reg + 1)); // Index of end of capture + __ add(current_input_offset(), current_input_offset(), r0); + __ sub(current_input_offset(), current_input_offset(), r1); + } } else { DCHECK(mode_ == UC16); int argument_count = 4; @@ -305,7 +311,10 @@ void RegExpMacroAssemblerARM::CheckNotBackReferenceIgnoreCase( // Save length in callee-save register for use on return. __ mov(r4, Operand(r1)); // Address of current input position. - __ add(r1, current_input_offset(), Operand(end_of_input_address())); + __ add(r1, current_input_offset(), end_of_input_address()); + if (read_backward) { + __ sub(r1, r1, r4); + } // Isolate. __ mov(r3, Operand(ExternalReference::isolate_address(isolate()))); @@ -319,17 +328,22 @@ void RegExpMacroAssemblerARM::CheckNotBackReferenceIgnoreCase( // Check if function returned non-zero for success or zero for failure. __ cmp(r0, Operand::Zero()); BranchOrBacktrack(eq, on_no_match); - // On success, increment position by length of capture. - __ add(current_input_offset(), current_input_offset(), Operand(r4)); + + // On success, advance position by length of capture. + if (read_backward) { + __ sub(current_input_offset(), current_input_offset(), r4); + } else { + __ add(current_input_offset(), current_input_offset(), r4); + } } __ bind(&fallthrough); } -void RegExpMacroAssemblerARM::CheckNotBackReference( - int start_reg, - Label* on_no_match) { +void RegExpMacroAssemblerARM::CheckNotBackReference(int start_reg, + bool read_backward, + Label* on_no_match) { Label fallthrough; Label success; @@ -337,17 +351,31 @@ void RegExpMacroAssemblerARM::CheckNotBackReference( __ ldr(r0, register_location(start_reg)); __ ldr(r1, register_location(start_reg + 1)); __ sub(r1, r1, r0, SetCC); // Length to check. - // Succeed on empty capture (including no capture). + + // At this point, the capture registers are either both set or both cleared. + // If the capture length is zero, then the capture is either empty or cleared. + // Fall through in both cases. __ b(eq, &fallthrough); // Check that there are enough characters left in the input. - __ cmn(r1, Operand(current_input_offset())); - BranchOrBacktrack(gt, on_no_match); + if (read_backward) { + __ ldr(r3, MemOperand(frame_pointer(), kStringStartMinusOne)); + __ add(r3, r3, r1); + __ cmp(current_input_offset(), r3); + BranchOrBacktrack(lt, on_no_match); + } else { + __ cmn(r1, Operand(current_input_offset())); + BranchOrBacktrack(gt, on_no_match); + } - // Compute pointers to match string and capture string - __ add(r0, r0, Operand(end_of_input_address())); - __ add(r2, end_of_input_address(), Operand(current_input_offset())); - __ add(r1, r1, Operand(r0)); + // r0 - offset of start of capture + // r1 - length of capture + __ add(r0, r0, end_of_input_address()); + __ add(r2, end_of_input_address(), current_input_offset()); + if (read_backward) { + __ sub(r2, r2, r1); // Offset by length when matching backwards. + } + __ add(r1, r0, r1); Label loop; __ bind(&loop); @@ -366,6 +394,13 @@ void RegExpMacroAssemblerARM::CheckNotBackReference( // Move current character position to position after match. __ sub(current_input_offset(), r2, end_of_input_address()); + if (read_backward) { + __ ldr(r0, register_location(start_reg)); // Index of start of capture + __ ldr(r1, register_location(start_reg + 1)); // Index of end of capture + __ add(current_input_offset(), current_input_offset(), r0); + __ sub(current_input_offset(), current_input_offset(), r1); + } + __ bind(&fallthrough); } @@ -603,7 +638,7 @@ Handle<HeapObject> RegExpMacroAssemblerARM::GetCode(Handle<String> source) { __ add(frame_pointer(), sp, Operand(4 * kPointerSize)); __ mov(r0, Operand::Zero()); __ push(r0); // Make room for success counter and initialize it to 0. - __ push(r0); // Make room for "position - 1" constant (value is irrelevant). + __ push(r0); // Make room for "string start - 1" constant. // Check if we have space on the stack for registers. Label stack_limit_hit; Label stack_ok; @@ -647,7 +682,7 @@ Handle<HeapObject> RegExpMacroAssemblerARM::GetCode(Handle<String> source) { __ sub(r0, r0, Operand(r1, LSL, (mode_ == UC16) ? 1 : 0)); // Store this value in a local variable, for use when clearing // position registers. - __ str(r0, MemOperand(frame_pointer(), kInputStartMinusOne)); + __ str(r0, MemOperand(frame_pointer(), kStringStartMinusOne)); // Initialize code pointer register __ mov(code_pointer(), Operand(masm_->CodeObject())); @@ -751,7 +786,7 @@ Handle<HeapObject> RegExpMacroAssemblerARM::GetCode(Handle<String> source) { __ str(r2, MemOperand(frame_pointer(), kRegisterOutput)); // Prepare r0 to initialize registers with its value in the next run. - __ ldr(r0, MemOperand(frame_pointer(), kInputStartMinusOne)); + __ ldr(r0, MemOperand(frame_pointer(), kStringStartMinusOne)); if (global_with_zero_length_check()) { // Special case for zero-length matches. @@ -892,10 +927,13 @@ void RegExpMacroAssemblerARM::LoadCurrentCharacter(int cp_offset, Label* on_end_of_input, bool check_bounds, int characters) { - DCHECK(cp_offset >= -1); // ^ and \b can look behind one character. DCHECK(cp_offset < (1<<30)); // Be sane! (And ensure negation works) if (check_bounds) { - CheckPosition(cp_offset + characters - 1, on_end_of_input); + if (cp_offset >= 0) { + CheckPosition(cp_offset + characters - 1, on_end_of_input); + } else { + CheckPosition(cp_offset, on_end_of_input); + } } LoadCurrentCharacterUnchecked(cp_offset, characters); } @@ -983,7 +1021,7 @@ void RegExpMacroAssemblerARM::WriteCurrentPositionToRegister(int reg, void RegExpMacroAssemblerARM::ClearRegisters(int reg_from, int reg_to) { DCHECK(reg_from <= reg_to); - __ ldr(r0, MemOperand(frame_pointer(), kInputStartMinusOne)); + __ ldr(r0, MemOperand(frame_pointer(), kStringStartMinusOne)); for (int reg = reg_from; reg <= reg_to; reg++) { __ str(r0, register_location(reg)); } @@ -1069,8 +1107,15 @@ MemOperand RegExpMacroAssemblerARM::register_location(int register_index) { void RegExpMacroAssemblerARM::CheckPosition(int cp_offset, Label* on_outside_input) { - __ cmp(current_input_offset(), Operand(-cp_offset * char_size())); - BranchOrBacktrack(ge, on_outside_input); + if (cp_offset >= 0) { + __ cmp(current_input_offset(), Operand(-cp_offset * char_size())); + BranchOrBacktrack(ge, on_outside_input); + } else { + __ ldr(r1, MemOperand(frame_pointer(), kStringStartMinusOne)); + __ add(r0, current_input_offset(), Operand(cp_offset * char_size())); + __ cmp(r0, r1); + BranchOrBacktrack(le, on_outside_input); + } } diff --git a/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.h b/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.h index c6cff65635..233a98f761 100644 --- a/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.h +++ b/deps/v8/src/regexp/arm/regexp-macro-assembler-arm.h @@ -34,9 +34,11 @@ class RegExpMacroAssemblerARM: public NativeRegExpMacroAssembler { // A "greedy loop" is a loop that is both greedy and with a simple // body. It has a particularly simple implementation. virtual void CheckGreedyLoop(Label* on_tos_equals_current_position); - virtual void CheckNotAtStart(Label* on_not_at_start); - virtual void CheckNotBackReference(int start_reg, Label* on_no_match); + virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start); + virtual void CheckNotBackReference(int start_reg, bool read_backward, + Label* on_no_match); virtual void CheckNotBackReferenceIgnoreCase(int start_reg, + bool read_backward, Label* on_no_match); virtual void CheckNotCharacter(unsigned c, Label* on_not_equal); virtual void CheckNotCharacterAfterAnd(unsigned c, @@ -119,9 +121,9 @@ class RegExpMacroAssemblerARM: public NativeRegExpMacroAssembler { // When adding local variables remember to push space for them in // the frame in GetCode. static const int kSuccessfulCaptures = kInputString - kPointerSize; - static const int kInputStartMinusOne = kSuccessfulCaptures - kPointerSize; + static const int kStringStartMinusOne = kSuccessfulCaptures - kPointerSize; // First register address. Following registers are below it on the stack. - static const int kRegisterZero = kInputStartMinusOne - kPointerSize; + static const int kRegisterZero = kStringStartMinusOne - kPointerSize; // Initial size of code buffer. static const size_t kRegExpCodeSize = 1024; diff --git a/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc b/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc index d440879e26..9948597ca0 100644 --- a/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc +++ b/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.cc @@ -113,7 +113,8 @@ RegExpMacroAssemblerARM64::RegExpMacroAssemblerARM64(Isolate* isolate, Zone* zone, Mode mode, int registers_to_save) : NativeRegExpMacroAssembler(isolate, zone), - masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize)), + masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize, + CodeObjectRequired::kYes)), mode_(mode), num_registers_(registers_to_save), num_saved_registers_(registers_to_save), @@ -210,23 +211,17 @@ void RegExpMacroAssemblerARM64::CheckCharacterGT(uc16 limit, void RegExpMacroAssemblerARM64::CheckAtStart(Label* on_at_start) { - Label not_at_start; - // Did we start the match at the start of the input string? - CompareAndBranchOrBacktrack(start_offset(), 0, ne, ¬_at_start); - // If we did, are we still at the start of the input string? - __ Add(x10, input_end(), Operand(current_input_offset(), SXTW)); - __ Cmp(x10, input_start()); + __ Add(w10, current_input_offset(), Operand(-char_size())); + __ Cmp(w10, string_start_minus_one()); BranchOrBacktrack(eq, on_at_start); - __ Bind(¬_at_start); } -void RegExpMacroAssemblerARM64::CheckNotAtStart(Label* on_not_at_start) { - // Did we start the match at the start of the input string? - CompareAndBranchOrBacktrack(start_offset(), 0, ne, on_not_at_start); - // If we did, are we still at the start of the input string? - __ Add(x10, input_end(), Operand(current_input_offset(), SXTW)); - __ Cmp(x10, input_start()); +void RegExpMacroAssemblerARM64::CheckNotAtStart(int cp_offset, + Label* on_not_at_start) { + __ Add(w10, current_input_offset(), + Operand(-char_size() + cp_offset * char_size())); + __ Cmp(w10, string_start_minus_one()); BranchOrBacktrack(ne, on_not_at_start); } @@ -277,9 +272,9 @@ void RegExpMacroAssemblerARM64::CheckGreedyLoop(Label* on_equal) { BranchOrBacktrack(eq, on_equal); } + void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase( - int start_reg, - Label* on_no_match) { + int start_reg, bool read_backward, Label* on_no_match) { Label fallthrough; Register capture_start_offset = w10; @@ -297,12 +292,21 @@ void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase( __ Ldp(w11, capture_start_offset, capture_location(start_reg, x10)); } __ Sub(capture_length, w11, capture_start_offset); // Length to check. - // Succeed on empty capture (including no capture). - __ Cbz(capture_length, &fallthrough); + + // At this point, the capture registers are either both set or both cleared. + // If the capture length is zero, then the capture is either empty or cleared. + // Fall through in both cases. + __ CompareAndBranch(capture_length, Operand(0), eq, &fallthrough); // Check that there are enough characters left in the input. - __ Cmn(capture_length, current_input_offset()); - BranchOrBacktrack(gt, on_no_match); + if (read_backward) { + __ Add(w12, string_start_minus_one(), capture_length); + __ Cmp(current_input_offset(), w12); + BranchOrBacktrack(le, on_no_match); + } else { + __ Cmn(capture_length, current_input_offset()); + BranchOrBacktrack(gt, on_no_match); + } if (mode_ == LATIN1) { Label success; @@ -322,6 +326,11 @@ void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase( __ Add(current_position_address, input_end(), Operand(current_input_offset(), SXTW)); + if (read_backward) { + // Offset by length when matching backwards. + __ Sub(current_position_address, current_position_address, + Operand(capture_length, SXTW)); + } Label loop; __ Bind(&loop); @@ -355,6 +364,10 @@ void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase( __ Bind(&success); // Compute new value of character position after the matched part. __ Sub(current_input_offset().X(), current_position_address, input_end()); + if (read_backward) { + __ Sub(current_input_offset().X(), current_input_offset().X(), + Operand(capture_length, SXTW)); + } if (masm_->emit_debug_code()) { __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW)); __ Ccmp(current_input_offset(), 0, NoFlag, eq); @@ -383,6 +396,9 @@ void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase( __ Mov(w2, capture_length); // Address of current input position. __ Add(x1, input_end(), Operand(current_input_offset(), SXTW)); + if (read_backward) { + __ Sub(x1, x1, Operand(capture_length, SXTW)); + } // Isolate. __ Mov(x3, ExternalReference::isolate_address(isolate())); @@ -400,16 +416,20 @@ void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase( __ PopCPURegList(cached_registers); BranchOrBacktrack(eq, on_no_match); - // On success, increment position by length of capture. - __ Add(current_input_offset(), current_input_offset(), capture_length); + // On success, advance position by length of capture. + if (read_backward) { + __ Sub(current_input_offset(), current_input_offset(), capture_length); + } else { + __ Add(current_input_offset(), current_input_offset(), capture_length); + } } __ Bind(&fallthrough); } -void RegExpMacroAssemblerARM64::CheckNotBackReference( - int start_reg, - Label* on_no_match) { +void RegExpMacroAssemblerARM64::CheckNotBackReference(int start_reg, + bool read_backward, + Label* on_no_match) { Label fallthrough; Register capture_start_address = x12; @@ -426,12 +446,21 @@ void RegExpMacroAssemblerARM64::CheckNotBackReference( __ Ldp(w11, w10, capture_location(start_reg, x10)); } __ Sub(capture_length, w11, w10); // Length to check. - // Succeed on empty capture (including no capture). - __ Cbz(capture_length, &fallthrough); + + // At this point, the capture registers are either both set or both cleared. + // If the capture length is zero, then the capture is either empty or cleared. + // Fall through in both cases. + __ CompareAndBranch(capture_length, Operand(0), eq, &fallthrough); // Check that there are enough characters left in the input. - __ Cmn(capture_length, current_input_offset()); - BranchOrBacktrack(gt, on_no_match); + if (read_backward) { + __ Add(w12, string_start_minus_one(), capture_length); + __ Cmp(current_input_offset(), w12); + BranchOrBacktrack(le, on_no_match); + } else { + __ Cmn(capture_length, current_input_offset()); + BranchOrBacktrack(gt, on_no_match); + } // Compute pointers to match string and capture string __ Add(capture_start_address, input_end(), Operand(w10, SXTW)); @@ -441,6 +470,11 @@ void RegExpMacroAssemblerARM64::CheckNotBackReference( __ Add(current_position_address, input_end(), Operand(current_input_offset(), SXTW)); + if (read_backward) { + // Offset by length when matching backwards. + __ Sub(current_position_address, current_position_address, + Operand(capture_length, SXTW)); + } Label loop; __ Bind(&loop); @@ -459,6 +493,11 @@ void RegExpMacroAssemblerARM64::CheckNotBackReference( // Move current character position to position after match. __ Sub(current_input_offset().X(), current_position_address, input_end()); + if (read_backward) { + __ Sub(current_input_offset().X(), current_input_offset().X(), + Operand(capture_length, SXTW)); + } + if (masm_->emit_debug_code()) { __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW)); __ Ccmp(current_input_offset(), 0, NoFlag, eq); @@ -758,14 +797,13 @@ Handle<HeapObject> RegExpMacroAssemblerARM64::GetCode(Handle<String> source) { // The non-position value is used as a clearing value for the // capture registers, it corresponds to the position of the first character // minus one. - __ Sub(non_position_value(), current_input_offset(), char_size()); - __ Sub(non_position_value(), non_position_value(), + __ Sub(string_start_minus_one(), current_input_offset(), char_size()); + __ Sub(string_start_minus_one(), string_start_minus_one(), Operand(start_offset(), LSL, (mode_ == UC16) ? 1 : 0)); // We can store this value twice in an X register for initializing // on-stack registers later. - __ Orr(twice_non_position_value(), - non_position_value().X(), - Operand(non_position_value().X(), LSL, kWRegSizeInBits)); + __ Orr(twice_non_position_value(), string_start_minus_one().X(), + Operand(string_start_minus_one().X(), LSL, kWRegSizeInBits)); // Initialize code pointer register. __ Mov(code_pointer(), Operand(masm_->CodeObject())); @@ -1081,11 +1119,14 @@ void RegExpMacroAssemblerARM64::LoadCurrentCharacter(int cp_offset, int characters) { // TODO(pielan): Make sure long strings are caught before this, and not // just asserted in debug mode. - DCHECK(cp_offset >= -1); // ^ and \b can look behind one character. // Be sane! (And ensure that an int32_t can be used to index the string) DCHECK(cp_offset < (1<<30)); if (check_bounds) { - CheckPosition(cp_offset + characters - 1, on_end_of_input); + if (cp_offset >= 0) { + CheckPosition(cp_offset + characters - 1, on_end_of_input); + } else { + CheckPosition(cp_offset, on_end_of_input); + } } LoadCurrentCharacterUnchecked(cp_offset, characters); } @@ -1210,7 +1251,7 @@ void RegExpMacroAssemblerARM64::ClearRegisters(int reg_from, int reg_to) { // If the first capture register is cached in a hardware register but not // aligned on a 64-bit one, we need to clear the first one specifically. if ((reg_from < kNumCachedRegisters) && ((reg_from % 2) != 0)) { - StoreRegister(reg_from, non_position_value()); + StoreRegister(reg_from, string_start_minus_one()); num_registers--; reg_from++; } @@ -1224,7 +1265,7 @@ void RegExpMacroAssemblerARM64::ClearRegisters(int reg_from, int reg_to) { } if ((num_registers % 2) == 1) { - StoreRegister(reg_from, non_position_value()); + StoreRegister(reg_from, string_start_minus_one()); num_registers--; reg_from++; } @@ -1301,10 +1342,14 @@ int RegExpMacroAssemblerARM64::CheckStackGuardState( void RegExpMacroAssemblerARM64::CheckPosition(int cp_offset, Label* on_outside_input) { - CompareAndBranchOrBacktrack(current_input_offset(), - -cp_offset * char_size(), - ge, - on_outside_input); + if (cp_offset >= 0) { + CompareAndBranchOrBacktrack(current_input_offset(), + -cp_offset * char_size(), ge, on_outside_input); + } else { + __ Add(w12, current_input_offset(), Operand(cp_offset * char_size())); + __ Cmp(w12, string_start_minus_one()); + BranchOrBacktrack(le, on_outside_input); + } } diff --git a/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.h b/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.h index 0dc519580d..d71f063d00 100644 --- a/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.h +++ b/deps/v8/src/regexp/arm64/regexp-macro-assembler-arm64.h @@ -39,9 +39,11 @@ class RegExpMacroAssemblerARM64: public NativeRegExpMacroAssembler { // A "greedy loop" is a loop that is both greedy and with a simple // body. It has a particularly simple implementation. virtual void CheckGreedyLoop(Label* on_tos_equals_current_position); - virtual void CheckNotAtStart(Label* on_not_at_start); - virtual void CheckNotBackReference(int start_reg, Label* on_no_match); + virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start); + virtual void CheckNotBackReference(int start_reg, bool read_backward, + Label* on_no_match); virtual void CheckNotBackReferenceIgnoreCase(int start_reg, + bool read_backward, Label* on_no_match); virtual void CheckNotCharacter(unsigned c, Label* on_not_equal); virtual void CheckNotCharacterAfterAnd(unsigned c, @@ -190,7 +192,7 @@ class RegExpMacroAssemblerARM64: public NativeRegExpMacroAssembler { Register code_pointer() { return x20; } // Register holding the value used for clearing capture registers. - Register non_position_value() { return w24; } + Register string_start_minus_one() { return w24; } // The top 32 bit of this register is used to store this value // twice. This is used for clearing more than one register at a time. Register twice_non_position_value() { return x24; } diff --git a/deps/v8/src/regexp/bytecodes-irregexp.h b/deps/v8/src/regexp/bytecodes-irregexp.h index d6110a3cb5..2dbfbc0b82 100644 --- a/deps/v8/src/regexp/bytecodes-irregexp.h +++ b/deps/v8/src/regexp/bytecodes-irregexp.h @@ -57,15 +57,17 @@ V(CHECK_LT, 35, 8) /* bc8 pad8 uc16 addr32 */ \ V(CHECK_GT, 36, 8) /* bc8 pad8 uc16 addr32 */ \ V(CHECK_NOT_BACK_REF, 37, 8) /* bc8 reg_idx24 addr32 */ \ V(CHECK_NOT_BACK_REF_NO_CASE, 38, 8) /* bc8 reg_idx24 addr32 */ \ -V(CHECK_NOT_REGS_EQUAL, 39, 12) /* bc8 regidx24 reg_idx32 addr32 */ \ -V(CHECK_REGISTER_LT, 40, 12) /* bc8 reg_idx24 value32 addr32 */ \ -V(CHECK_REGISTER_GE, 41, 12) /* bc8 reg_idx24 value32 addr32 */ \ -V(CHECK_REGISTER_EQ_POS, 42, 8) /* bc8 reg_idx24 addr32 */ \ -V(CHECK_AT_START, 43, 8) /* bc8 pad24 addr32 */ \ -V(CHECK_NOT_AT_START, 44, 8) /* bc8 pad24 addr32 */ \ -V(CHECK_GREEDY, 45, 8) /* bc8 pad24 addr32 */ \ -V(ADVANCE_CP_AND_GOTO, 46, 8) /* bc8 offset24 addr32 */ \ -V(SET_CURRENT_POSITION_FROM_END, 47, 4) /* bc8 idx24 */ +V(CHECK_NOT_BACK_REF_BACKWARD, 39, 8) /* bc8 reg_idx24 addr32 */ \ +V(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD, 40, 8) /* bc8 reg_idx24 addr32 */ \ +V(CHECK_NOT_REGS_EQUAL, 41, 12) /* bc8 regidx24 reg_idx32 addr32 */ \ +V(CHECK_REGISTER_LT, 42, 12) /* bc8 reg_idx24 value32 addr32 */ \ +V(CHECK_REGISTER_GE, 43, 12) /* bc8 reg_idx24 value32 addr32 */ \ +V(CHECK_REGISTER_EQ_POS, 44, 8) /* bc8 reg_idx24 addr32 */ \ +V(CHECK_AT_START, 45, 8) /* bc8 pad24 addr32 */ \ +V(CHECK_NOT_AT_START, 46, 8) /* bc8 offset24 addr32 */ \ +V(CHECK_GREEDY, 47, 8) /* bc8 pad24 addr32 */ \ +V(ADVANCE_CP_AND_GOTO, 48, 8) /* bc8 offset24 addr32 */ \ +V(SET_CURRENT_POSITION_FROM_END, 49, 4) /* bc8 idx24 */ #define DECLARE_BYTECODES(name, code, length) \ static const int BC_##name = code; diff --git a/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc b/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc index 9e50a10574..6ef0f5fff6 100644 --- a/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc +++ b/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.cc @@ -53,7 +53,8 @@ namespace internal { * - backup of caller ebx * - success counter (only for global regexps to count matches). * - Offset of location before start of input (effectively character - * position -1). Used to initialize capture registers to a non-position. + * string start - 1). Used to initialize capture registers to a + * non-position. * - register 0 ebp[-4] (only positions must be stored in the first * - register 1 ebp[-8] num_saved_registers_ registers) * - ... @@ -80,7 +81,8 @@ RegExpMacroAssemblerIA32::RegExpMacroAssemblerIA32(Isolate* isolate, Zone* zone, Mode mode, int registers_to_save) : NativeRegExpMacroAssembler(isolate, zone), - masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize)), + masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize, + CodeObjectRequired::kYes)), mode_(mode), num_registers_(registers_to_save), num_saved_registers_(registers_to_save), @@ -156,25 +158,16 @@ void RegExpMacroAssemblerIA32::CheckCharacterGT(uc16 limit, Label* on_greater) { void RegExpMacroAssemblerIA32::CheckAtStart(Label* on_at_start) { - Label not_at_start; - // Did we start the match at the start of the string at all? - __ cmp(Operand(ebp, kStartIndex), Immediate(0)); - BranchOrBacktrack(not_equal, ¬_at_start); - // If we did, are we still at the start of the input? - __ lea(eax, Operand(esi, edi, times_1, 0)); - __ cmp(eax, Operand(ebp, kInputStart)); + __ lea(eax, Operand(edi, -char_size())); + __ cmp(eax, Operand(ebp, kStringStartMinusOne)); BranchOrBacktrack(equal, on_at_start); - __ bind(¬_at_start); } -void RegExpMacroAssemblerIA32::CheckNotAtStart(Label* on_not_at_start) { - // Did we start the match at the start of the string at all? - __ cmp(Operand(ebp, kStartIndex), Immediate(0)); - BranchOrBacktrack(not_equal, on_not_at_start); - // If we did, are we still at the start of the input? - __ lea(eax, Operand(esi, edi, times_1, 0)); - __ cmp(eax, Operand(ebp, kInputStart)); +void RegExpMacroAssemblerIA32::CheckNotAtStart(int cp_offset, + Label* on_not_at_start) { + __ lea(eax, Operand(edi, -char_size() + cp_offset * char_size())); + __ cmp(eax, Operand(ebp, kStringStartMinusOne)); BranchOrBacktrack(not_equal, on_not_at_start); } @@ -196,26 +189,28 @@ void RegExpMacroAssemblerIA32::CheckGreedyLoop(Label* on_equal) { void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase( - int start_reg, - Label* on_no_match) { + int start_reg, bool read_backward, Label* on_no_match) { Label fallthrough; __ mov(edx, register_location(start_reg)); // Index of start of capture __ mov(ebx, register_location(start_reg + 1)); // Index of end of capture __ sub(ebx, edx); // Length of capture. - // The length of a capture should not be negative. This can only happen - // if the end of the capture is unrecorded, or at a point earlier than - // the start of the capture. - BranchOrBacktrack(less, on_no_match); - - // If length is zero, either the capture is empty or it is completely - // uncaptured. In either case succeed immediately. + // At this point, the capture registers are either both set or both cleared. + // If the capture length is zero, then the capture is either empty or cleared. + // Fall through in both cases. __ j(equal, &fallthrough); // Check that there are sufficient characters left in the input. - __ mov(eax, edi); - __ add(eax, ebx); - BranchOrBacktrack(greater, on_no_match); + if (read_backward) { + __ mov(eax, Operand(ebp, kStringStartMinusOne)); + __ add(eax, ebx); + __ cmp(edi, eax); + BranchOrBacktrack(less_equal, on_no_match); + } else { + __ mov(eax, edi); + __ add(eax, ebx); + BranchOrBacktrack(greater, on_no_match); + } if (mode_ == LATIN1) { Label success; @@ -228,6 +223,9 @@ void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase( __ add(edx, esi); // Start of capture __ add(edi, esi); // Start of text to match against capture. + if (read_backward) { + __ sub(edi, ebx); // Offset by length when matching backwards. + } __ add(ebx, edi); // End of text to match against capture. Label loop; @@ -278,6 +276,11 @@ void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase( __ add(esp, Immediate(kPointerSize)); // Compute new value of character position after the matched part. __ sub(edi, esi); + if (read_backward) { + // Subtract match length if we matched backward. + __ add(edi, register_location(start_reg)); + __ sub(edi, register_location(start_reg + 1)); + } } else { DCHECK(mode_ == UC16); // Save registers before calling C function. @@ -304,6 +307,9 @@ void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase( // Found by adding negative string-end offset of current position (edi) // to end of string. __ add(edi, esi); + if (read_backward) { + __ sub(edi, ebx); // Offset by length when matching backwards. + } __ mov(Operand(esp, 1 * kPointerSize), edi); // Set byte_offset1. // Start of capture, where edx already holds string-end negative offset. @@ -325,16 +331,20 @@ void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase( // Check if function returned non-zero for success or zero for failure. __ or_(eax, eax); BranchOrBacktrack(zero, on_no_match); - // On success, increment position by length of capture. - __ add(edi, ebx); + // On success, advance position by length of capture. + if (read_backward) { + __ sub(edi, ebx); + } else { + __ add(edi, ebx); + } } __ bind(&fallthrough); } -void RegExpMacroAssemblerIA32::CheckNotBackReference( - int start_reg, - Label* on_no_match) { +void RegExpMacroAssemblerIA32::CheckNotBackReference(int start_reg, + bool read_backward, + Label* on_no_match) { Label fallthrough; Label success; Label fail; @@ -343,22 +353,33 @@ void RegExpMacroAssemblerIA32::CheckNotBackReference( __ mov(edx, register_location(start_reg)); __ mov(eax, register_location(start_reg + 1)); __ sub(eax, edx); // Length to check. - // Fail on partial or illegal capture (start of capture after end of capture). - BranchOrBacktrack(less, on_no_match); - // Succeed on empty capture (including no capture) + + // At this point, the capture registers are either both set or both cleared. + // If the capture length is zero, then the capture is either empty or cleared. + // Fall through in both cases. __ j(equal, &fallthrough); // Check that there are sufficient characters left in the input. - __ mov(ebx, edi); - __ add(ebx, eax); - BranchOrBacktrack(greater, on_no_match); + if (read_backward) { + __ mov(ebx, Operand(ebp, kStringStartMinusOne)); + __ add(ebx, eax); + __ cmp(edi, ebx); + BranchOrBacktrack(less_equal, on_no_match); + } else { + __ mov(ebx, edi); + __ add(ebx, eax); + BranchOrBacktrack(greater, on_no_match); + } // Save register to make it available below. __ push(backtrack_stackpointer()); // Compute pointers to match string and capture string - __ lea(ebx, Operand(esi, edi, times_1, 0)); // Start of match. __ add(edx, esi); // Start of capture. + __ lea(ebx, Operand(esi, edi, times_1, 0)); // Start of match. + if (read_backward) { + __ sub(ebx, eax); // Offset by length when matching backwards. + } __ lea(ecx, Operand(eax, ebx, times_1, 0)); // End of match Label loop; @@ -389,6 +410,11 @@ void RegExpMacroAssemblerIA32::CheckNotBackReference( // Move current character position to position after match. __ mov(edi, ecx); __ sub(edi, esi); + if (read_backward) { + // Subtract match length if we matched backward. + __ add(edi, register_location(start_reg)); + __ sub(edi, register_location(start_reg + 1)); + } // Restore backtrack stackpointer. __ pop(backtrack_stackpointer()); @@ -634,7 +660,7 @@ Handle<HeapObject> RegExpMacroAssemblerIA32::GetCode(Handle<String> source) { __ push(edi); __ push(ebx); // Callee-save on MacOS. __ push(Immediate(0)); // Number of successful matches in a global regexp. - __ push(Immediate(0)); // Make room for "input start - 1" constant. + __ push(Immediate(0)); // Make room for "string start - 1" constant. // Check if we have space on the stack for registers. Label stack_limit_hit; @@ -684,7 +710,7 @@ Handle<HeapObject> RegExpMacroAssemblerIA32::GetCode(Handle<String> source) { } // Store this value in a local variable, for use when clearing // position registers. - __ mov(Operand(ebp, kInputStartMinusOne), eax); + __ mov(Operand(ebp, kStringStartMinusOne), eax); #if V8_OS_WIN // Ensure that we write to each stack page, in order. Skipping a page @@ -767,7 +793,7 @@ Handle<HeapObject> RegExpMacroAssemblerIA32::GetCode(Handle<String> source) { } if (global()) { - // Restart matching if the regular expression is flagged as global. + // Restart matching if the regular expression is flagged as global. // Increment success counter. __ inc(Operand(ebp, kSuccessfulCaptures)); // Capture results have been stored, so the number of remaining global @@ -784,7 +810,7 @@ Handle<HeapObject> RegExpMacroAssemblerIA32::GetCode(Handle<String> source) { Immediate(num_saved_registers_ * kPointerSize)); // Prepare eax to initialize registers with its value in the next run. - __ mov(eax, Operand(ebp, kInputStartMinusOne)); + __ mov(eax, Operand(ebp, kStringStartMinusOne)); if (global_with_zero_length_check()) { // Special case for zero-length matches. @@ -944,10 +970,13 @@ void RegExpMacroAssemblerIA32::LoadCurrentCharacter(int cp_offset, Label* on_end_of_input, bool check_bounds, int characters) { - DCHECK(cp_offset >= -1); // ^ and \b can look behind one character. DCHECK(cp_offset < (1<<30)); // Be sane! (And ensure negation works) if (check_bounds) { - CheckPosition(cp_offset + characters - 1, on_end_of_input); + if (cp_offset >= 0) { + CheckPosition(cp_offset + characters - 1, on_end_of_input); + } else { + CheckPosition(cp_offset, on_end_of_input); + } } LoadCurrentCharacterUnchecked(cp_offset, characters); } @@ -1031,7 +1060,7 @@ void RegExpMacroAssemblerIA32::WriteCurrentPositionToRegister(int reg, void RegExpMacroAssemblerIA32::ClearRegisters(int reg_from, int reg_to) { DCHECK(reg_from <= reg_to); - __ mov(eax, Operand(ebp, kInputStartMinusOne)); + __ mov(eax, Operand(ebp, kStringStartMinusOne)); for (int reg = reg_from; reg <= reg_to; reg++) { __ mov(register_location(reg), eax); } @@ -1100,8 +1129,14 @@ Operand RegExpMacroAssemblerIA32::register_location(int register_index) { void RegExpMacroAssemblerIA32::CheckPosition(int cp_offset, Label* on_outside_input) { - __ cmp(edi, -cp_offset * char_size()); - BranchOrBacktrack(greater_equal, on_outside_input); + if (cp_offset >= 0) { + __ cmp(edi, -cp_offset * char_size()); + BranchOrBacktrack(greater_equal, on_outside_input); + } else { + __ lea(eax, Operand(edi, cp_offset * char_size())); + __ cmp(eax, Operand(ebp, kStringStartMinusOne)); + BranchOrBacktrack(less_equal, on_outside_input); + } } diff --git a/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.h b/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.h index 06b9699d01..1ef87eef38 100644 --- a/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.h +++ b/deps/v8/src/regexp/ia32/regexp-macro-assembler-ia32.h @@ -33,9 +33,11 @@ class RegExpMacroAssemblerIA32: public NativeRegExpMacroAssembler { // A "greedy loop" is a loop that is both greedy and with a simple // body. It has a particularly simple implementation. virtual void CheckGreedyLoop(Label* on_tos_equals_current_position); - virtual void CheckNotAtStart(Label* on_not_at_start); - virtual void CheckNotBackReference(int start_reg, Label* on_no_match); + virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start); + virtual void CheckNotBackReference(int start_reg, bool read_backward, + Label* on_no_match); virtual void CheckNotBackReferenceIgnoreCase(int start_reg, + bool read_backward, Label* on_no_match); virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal); virtual void CheckNotCharacterAfterAnd(uint32_t c, @@ -116,9 +118,9 @@ class RegExpMacroAssemblerIA32: public NativeRegExpMacroAssembler { static const int kBackup_edi = kBackup_esi - kPointerSize; static const int kBackup_ebx = kBackup_edi - kPointerSize; static const int kSuccessfulCaptures = kBackup_ebx - kPointerSize; - static const int kInputStartMinusOne = kSuccessfulCaptures - kPointerSize; + static const int kStringStartMinusOne = kSuccessfulCaptures - kPointerSize; // First register address. Following registers are below it on the stack. - static const int kRegisterZero = kInputStartMinusOne - kPointerSize; + static const int kRegisterZero = kStringStartMinusOne - kPointerSize; // Initial size of code buffer. static const size_t kRegExpCodeSize = 1024; diff --git a/deps/v8/src/regexp/interpreter-irregexp.cc b/deps/v8/src/regexp/interpreter-irregexp.cc index afc31a3d57..ea748e4e55 100644 --- a/deps/v8/src/regexp/interpreter-irregexp.cc +++ b/deps/v8/src/regexp/interpreter-irregexp.cc @@ -6,7 +6,7 @@ #include "src/regexp/interpreter-irregexp.h" -#include "src/ast.h" +#include "src/ast/ast.h" #include "src/regexp/bytecodes-irregexp.h" #include "src/regexp/jsregexp.h" #include "src/regexp/regexp-macro-assembler.h" @@ -270,7 +270,7 @@ static RegExpImpl::IrregexpResult RawMatch(Isolate* isolate, break; BYTECODE(LOAD_CURRENT_CHAR) { int pos = current + (insn >> BYTECODE_SHIFT); - if (pos >= subject.length()) { + if (pos >= subject.length() || pos < 0) { pc = code_base + Load32Aligned(pc + 4); } else { current_char = subject[pos]; @@ -286,7 +286,7 @@ static RegExpImpl::IrregexpResult RawMatch(Isolate* isolate, } BYTECODE(LOAD_2_CURRENT_CHARS) { int pos = current + (insn >> BYTECODE_SHIFT); - if (pos + 2 > subject.length()) { + if (pos + 2 > subject.length() || pos < 0) { pc = code_base + Load32Aligned(pc + 4); } else { Char next = subject[pos + 1]; @@ -306,7 +306,7 @@ static RegExpImpl::IrregexpResult RawMatch(Isolate* isolate, BYTECODE(LOAD_4_CURRENT_CHARS) { DCHECK(sizeof(Char) == 1); int pos = current + (insn >> BYTECODE_SHIFT); - if (pos + 4 > subject.length()) { + if (pos + 4 > subject.length() || pos < 0) { pc = code_base + Load32Aligned(pc + 4); } else { Char next1 = subject[pos + 1]; @@ -497,46 +497,59 @@ static RegExpImpl::IrregexpResult RawMatch(Isolate* isolate, BYTECODE(CHECK_NOT_BACK_REF) { int from = registers[insn >> BYTECODE_SHIFT]; int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; - if (from < 0 || len <= 0) { - pc += BC_CHECK_NOT_BACK_REF_LENGTH; - break; - } - if (current + len > subject.length()) { - pc = code_base + Load32Aligned(pc + 4); - break; - } else { - int i; - for (i = 0; i < len; i++) { - if (subject[from + i] != subject[current + i]) { - pc = code_base + Load32Aligned(pc + 4); - break; - } + if (from >= 0 && len > 0) { + if (current + len > subject.length() || + CompareChars(&subject[from], &subject[current], len) != 0) { + pc = code_base + Load32Aligned(pc + 4); + break; } - if (i < len) break; current += len; } pc += BC_CHECK_NOT_BACK_REF_LENGTH; break; } + BYTECODE(CHECK_NOT_BACK_REF_BACKWARD) { + int from = registers[insn >> BYTECODE_SHIFT]; + int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; + if (from >= 0 && len > 0) { + if (current - len < 0 || + CompareChars(&subject[from], &subject[current - len], len) != 0) { + pc = code_base + Load32Aligned(pc + 4); + break; + } + current -= len; + } + pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH; + break; + } BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) { int from = registers[insn >> BYTECODE_SHIFT]; int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; - if (from < 0 || len <= 0) { - pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH; - break; + if (from >= 0 && len > 0) { + if (current + len > subject.length() || + !BackRefMatchesNoCase(isolate->interp_canonicalize_mapping(), + from, current, len, subject)) { + pc = code_base + Load32Aligned(pc + 4); + break; + } + current += len; } - if (current + len > subject.length()) { - pc = code_base + Load32Aligned(pc + 4); - break; - } else { - if (BackRefMatchesNoCase(isolate->interp_canonicalize_mapping(), - from, current, len, subject)) { - current += len; - pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH; - } else { + pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH; + break; + } + BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) { + int from = registers[insn >> BYTECODE_SHIFT]; + int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; + if (from >= 0 && len > 0) { + if (current - len < 0 || + !BackRefMatchesNoCase(isolate->interp_canonicalize_mapping(), + from, current - len, len, subject)) { pc = code_base + Load32Aligned(pc + 4); + break; } + current -= len; } + pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH; break; } BYTECODE(CHECK_AT_START) @@ -547,7 +560,7 @@ static RegExpImpl::IrregexpResult RawMatch(Isolate* isolate, } break; BYTECODE(CHECK_NOT_AT_START) - if (current == 0) { + if (current + (insn >> BYTECODE_SHIFT) == 0) { pc += BC_CHECK_NOT_AT_START_LENGTH; } else { pc = code_base + Load32Aligned(pc + 4); diff --git a/deps/v8/src/regexp/jsregexp.cc b/deps/v8/src/regexp/jsregexp.cc index 225ad73c4e..34d20fe781 100644 --- a/deps/v8/src/regexp/jsregexp.cc +++ b/deps/v8/src/regexp/jsregexp.cc @@ -4,7 +4,7 @@ #include "src/regexp/jsregexp.h" -#include "src/ast.h" +#include "src/ast/ast.h" #include "src/base/platform/platform.h" #include "src/compilation-cache.h" #include "src/compiler.h" @@ -13,12 +13,12 @@ #include "src/isolate-inl.h" #include "src/messages.h" #include "src/ostreams.h" -#include "src/parser.h" #include "src/regexp/interpreter-irregexp.h" #include "src/regexp/jsregexp-inl.h" #include "src/regexp/regexp-macro-assembler.h" #include "src/regexp/regexp-macro-assembler-irregexp.h" #include "src/regexp/regexp-macro-assembler-tracer.h" +#include "src/regexp/regexp-parser.h" #include "src/regexp/regexp-stack.h" #include "src/runtime/runtime.h" #include "src/splay-tree-inl.h" @@ -51,16 +51,6 @@ namespace v8 { namespace internal { -MaybeHandle<Object> RegExpImpl::CreateRegExpLiteral( - Handle<JSFunction> constructor, - Handle<String> pattern, - Handle<String> flags) { - // Call the construct code with 2 arguments. - Handle<Object> argv[] = { pattern, flags }; - return Execution::New(constructor, arraysize(argv), argv); -} - - MUST_USE_RESULT static inline MaybeHandle<Object> ThrowRegExpException( Handle<JSRegExp> re, Handle<String> pattern, Handle<String> error_text) { @@ -156,25 +146,21 @@ MaybeHandle<Object> RegExpImpl::Compile(Handle<JSRegExp> re, RegExpCompileData parse_result; FlatStringReader reader(isolate, pattern); if (!RegExpParser::ParseRegExp(re->GetIsolate(), &zone, &reader, - flags.is_multiline(), flags.is_unicode(), - &parse_result)) { + flags & JSRegExp::kMultiline, + flags & JSRegExp::kUnicode, &parse_result)) { // Throw an exception if we fail to parse the pattern. return ThrowRegExpException(re, pattern, parse_result.error); } bool has_been_compiled = false; - if (parse_result.simple && - !flags.is_ignore_case() && - !flags.is_sticky() && - !HasFewDifferentCharacters(pattern)) { + if (parse_result.simple && !(flags & JSRegExp::kIgnoreCase) && + !(flags & JSRegExp::kSticky) && !HasFewDifferentCharacters(pattern)) { // Parse-tree is a single atom that is equal to the pattern. AtomCompile(re, pattern, flags, pattern); has_been_compiled = true; - } else if (parse_result.tree->IsAtom() && - !flags.is_ignore_case() && - !flags.is_sticky() && - parse_result.capture_count == 0) { + } else if (parse_result.tree->IsAtom() && !(flags & JSRegExp::kIgnoreCase) && + !(flags & JSRegExp::kSticky) && parse_result.capture_count == 0) { RegExpAtom* atom = parse_result.tree->AsAtom(); Vector<const uc16> atom_pattern = atom->data(); Handle<String> atom_string; @@ -385,17 +371,18 @@ bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re, pattern = String::Flatten(pattern); RegExpCompileData compile_data; FlatStringReader reader(isolate, pattern); - if (!RegExpParser::ParseRegExp(isolate, &zone, &reader, flags.is_multiline(), - flags.is_unicode(), &compile_data)) { + if (!RegExpParser::ParseRegExp(isolate, &zone, &reader, + flags & JSRegExp::kMultiline, + flags & JSRegExp::kUnicode, &compile_data)) { // Throw an exception if we fail to parse the pattern. // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once. USE(ThrowRegExpException(re, pattern, compile_data.error)); return false; } RegExpEngine::CompilationResult result = RegExpEngine::Compile( - isolate, &zone, &compile_data, flags.is_ignore_case(), flags.is_global(), - flags.is_multiline(), flags.is_sticky(), pattern, sample_subject, - is_one_byte); + isolate, &zone, &compile_data, flags & JSRegExp::kIgnoreCase, + flags & JSRegExp::kGlobal, flags & JSRegExp::kMultiline, + flags & JSRegExp::kSticky, pattern, sample_subject, is_one_byte); if (result.error_message != NULL) { // Unable to compile regexp. Handle<String> error_message = isolate->factory()->NewStringFromUtf8( @@ -1002,6 +989,8 @@ class RegExpCompiler { inline void set_limiting_recursion(bool value) { limiting_recursion_ = value; } + bool read_backward() { return read_backward_; } + void set_read_backward(bool value) { read_backward_ = value; } FrequencyCollator* frequency_collator() { return &frequency_collator_; } int current_expansion_factor() { return current_expansion_factor_; } @@ -1025,6 +1014,7 @@ class RegExpCompiler { bool reg_exp_too_big_; bool limiting_recursion_; bool optimize_; + bool read_backward_; int current_expansion_factor_; FrequencyCollator frequency_collator_; Isolate* isolate_; @@ -1060,6 +1050,7 @@ RegExpCompiler::RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count, reg_exp_too_big_(false), limiting_recursion_(false), optimize_(FLAG_regexp_optimization), + read_backward_(false), current_expansion_factor_(1), frequency_collator_(), isolate_(isolate), @@ -1224,7 +1215,8 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler, int value = 0; bool absolute = false; bool clear = false; - int store_position = -1; + static const int kNoStore = kMinInt; + int store_position = kNoStore; // This is a little tricky because we are scanning the actions in reverse // historical order (newest first). for (DeferredAction* action = actions_; @@ -1245,7 +1237,7 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler, // we can set undo_action to IGNORE if we know there is no value to // restore. undo_action = RESTORE; - DCHECK_EQ(store_position, -1); + DCHECK_EQ(store_position, kNoStore); DCHECK(!clear); break; } @@ -1253,14 +1245,14 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler, if (!absolute) { value++; } - DCHECK_EQ(store_position, -1); + DCHECK_EQ(store_position, kNoStore); DCHECK(!clear); undo_action = RESTORE; break; case ActionNode::STORE_POSITION: { Trace::DeferredCapture* pc = static_cast<Trace::DeferredCapture*>(action); - if (!clear && store_position == -1) { + if (!clear && store_position == kNoStore) { store_position = pc->cp_offset(); } @@ -1284,7 +1276,7 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler, // Since we're scanning in reverse order, if we've already // set the position we have to ignore historically earlier // clearing operations. - if (store_position == -1) { + if (store_position == kNoStore) { clear = true; } undo_action = RESTORE; @@ -1315,7 +1307,7 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler, } // Perform the chronologically last action (or accumulated increment) // for the register. - if (store_position != -1) { + if (store_position != kNoStore) { assembler->WriteCurrentPositionToRegister(reg, store_position); } else if (clear) { assembler->ClearRegisters(reg, reg); @@ -2313,6 +2305,7 @@ void AssertionNode::FillInBMInfo(Isolate* isolate, int offset, int budget, int BackReferenceNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start) { + if (read_backward()) return 0; if (budget <= 0) return 0; return on_success()->EatsAtLeast(still_to_find, budget - 1, @@ -2323,6 +2316,7 @@ int BackReferenceNode::EatsAtLeast(int still_to_find, int TextNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start) { + if (read_backward()) return 0; int answer = Length(); if (answer >= still_to_find) return answer; if (budget <= 0) return answer; @@ -2333,9 +2327,8 @@ int TextNode::EatsAtLeast(int still_to_find, } -int NegativeLookaheadChoiceNode::EatsAtLeast(int still_to_find, - int budget, - bool not_at_start) { +int NegativeLookaroundChoiceNode::EatsAtLeast(int still_to_find, int budget, + bool not_at_start) { if (budget <= 0) return 0; // Alternative 0 is the negative lookahead, alternative 1 is what comes // afterwards. @@ -2344,10 +2337,8 @@ int NegativeLookaheadChoiceNode::EatsAtLeast(int still_to_find, } -void NegativeLookaheadChoiceNode::GetQuickCheckDetails( - QuickCheckDetails* details, - RegExpCompiler* compiler, - int filled_in, +void NegativeLookaroundChoiceNode::GetQuickCheckDetails( + QuickCheckDetails* details, RegExpCompiler* compiler, int filled_in, bool not_at_start) { // Alternative 0 is the negative lookahead, alternative 1 is what comes // afterwards. @@ -2517,6 +2508,9 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, RegExpCompiler* compiler, int characters_filled_in, bool not_at_start) { + // Do not collect any quick check details if the text node reads backward, + // since it reads in the opposite direction than we use for quick checks. + if (read_backward()) return; Isolate* isolate = compiler->macro_assembler()->isolate(); DCHECK(characters_filled_in < details->characters()); int characters = details->characters(); @@ -2526,8 +2520,8 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, } else { char_mask = String::kMaxUtf16CodeUnit; } - for (int k = 0; k < elms_->length(); k++) { - TextElement elm = elms_->at(k); + for (int k = 0; k < elements()->length(); k++) { + TextElement elm = elements()->at(k); if (elm.text_type() == TextElement::ATOM) { Vector<const uc16> quarks = elm.atom()->data(); for (int i = 0; i < characters && i < quarks.length(); i++) { @@ -2678,11 +2672,13 @@ void QuickCheckDetails::Clear() { void QuickCheckDetails::Advance(int by, bool one_byte) { - DCHECK(by >= 0); - if (by >= characters_) { + if (by >= characters_ || by < 0) { + DCHECK_IMPLIES(by < 0, characters_ == 0); Clear(); return; } + DCHECK_LE(characters_ - by, 4); + DCHECK_LE(characters_, 4); for (int i = 0; i < characters_ - by; i++) { positions_[i] = positions_[by + i]; } @@ -2780,9 +2776,9 @@ RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) { if (depth < 0) return this; DCHECK(!info()->visited); VisitMarker marker(info()); - int element_count = elms_->length(); + int element_count = elements()->length(); for (int i = 0; i < element_count; i++) { - TextElement elm = elms_->at(i); + TextElement elm = elements()->at(i); if (elm.text_type() == TextElement::ATOM) { Vector<const uc16> quarks = elm.atom()->data(); for (int j = 0; j < quarks.length(); j++) { @@ -2898,8 +2894,8 @@ RegExpNode* ChoiceNode::FilterOneByte(int depth, bool ignore_case) { } -RegExpNode* NegativeLookaheadChoiceNode::FilterOneByte(int depth, - bool ignore_case) { +RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(int depth, + bool ignore_case) { if (info()->replacement_calculated) return replacement(); if (depth < 0) return this; if (info()->visited) return this; @@ -3146,9 +3142,9 @@ void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) { return; } if (trace->at_start() == Trace::UNKNOWN) { - assembler->CheckNotAtStart(trace->backtrack()); + assembler->CheckNotAtStart(trace->cp_offset(), trace->backtrack()); Trace at_start_trace = *trace; - at_start_trace.set_at_start(true); + at_start_trace.set_at_start(Trace::TRUE_VALUE); on_success()->Emit(compiler, &at_start_trace); return; } @@ -3221,10 +3217,11 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler, bool one_byte = compiler->one_byte(); Label* backtrack = trace->backtrack(); QuickCheckDetails* quick_check = trace->quick_check_performed(); - int element_count = elms_->length(); + int element_count = elements()->length(); + int backward_offset = read_backward() ? -Length() : 0; for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { - TextElement elm = elms_->at(i); - int cp_offset = trace->cp_offset() + elm.cp_offset(); + TextElement elm = elements()->at(i); + int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset; if (elm.text_type() == TextElement::ATOM) { Vector<const uc16> quarks = elm.atom()->data(); for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { @@ -3252,13 +3249,10 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler, break; } if (emit_function != NULL) { - bool bound_checked = emit_function(isolate, - compiler, - quarks[j], - backtrack, - cp_offset + j, - *checked_up_to < cp_offset + j, - preloaded); + bool bounds_check = *checked_up_to < cp_offset + j || read_backward(); + bool bound_checked = + emit_function(isolate, compiler, quarks[j], backtrack, + cp_offset + j, bounds_check, preloaded); if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); } } @@ -3268,8 +3262,9 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler, if (first_element_checked && i == 0) continue; if (DeterminedAlready(quick_check, elm.cp_offset())) continue; RegExpCharacterClass* cc = elm.char_class(); + bool bounds_check = *checked_up_to < cp_offset || read_backward(); EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset, - *checked_up_to < cp_offset, preloaded, zone()); + bounds_check, preloaded, zone()); UpdateBoundsCheck(cp_offset, checked_up_to); } } @@ -3278,7 +3273,7 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler, int TextNode::Length() { - TextElement elm = elms_->last(); + TextElement elm = elements()->last(); DCHECK(elm.cp_offset() >= 0); return elm.cp_offset() + elm.length(); } @@ -3347,8 +3342,11 @@ void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) { } Trace successor_trace(*trace); - successor_trace.set_at_start(false); - successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler); + // If we advance backward, we may end up at the start. + successor_trace.AdvanceCurrentPositionInTrace( + read_backward() ? -Length() : Length(), compiler); + successor_trace.set_at_start(read_backward() ? Trace::UNKNOWN + : Trace::FALSE_VALUE); RecursionCheck rc(compiler); on_success()->Emit(compiler, &successor_trace); } @@ -3360,7 +3358,6 @@ void Trace::InvalidateCurrentCharacter() { void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) { - DCHECK(by > 0); // We don't have an instruction for shifting the current character register // down or for using a shifted value for anything so lets just forget that // we preloaded any characters into it. @@ -3379,9 +3376,9 @@ void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) { void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte) { - int element_count = elms_->length(); + int element_count = elements()->length(); for (int i = 0; i < element_count; i++) { - TextElement elm = elms_->at(i); + TextElement elm = elements()->at(i); if (elm.text_type() == TextElement::CHAR_CLASS) { RegExpCharacterClass* cc = elm.char_class(); // None of the standard character classes is different in the case @@ -3397,16 +3394,14 @@ void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte) { } -int TextNode::GreedyLoopTextLength() { - TextElement elm = elms_->at(elms_->length() - 1); - return elm.cp_offset() + elm.length(); -} +int TextNode::GreedyLoopTextLength() { return Length(); } RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode( RegExpCompiler* compiler) { - if (elms_->length() != 1) return NULL; - TextElement elm = elms_->at(0); + if (read_backward()) return NULL; + if (elements()->length() != 1) return NULL; + TextElement elm = elements()->at(0); if (elm.text_type() != TextElement::CHAR_CLASS) return NULL; RegExpCharacterClass* node = elm.char_class(); ZoneList<CharacterRange>* ranges = node->ranges(zone()); @@ -3450,7 +3445,7 @@ int ChoiceNode::GreedyLoopTextLengthForAlternative( SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node); node = seq_node->on_success(); } - return length; + return read_backward() ? -length : length; } @@ -3881,7 +3876,7 @@ void BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) { GreedyLoopState::GreedyLoopState(bool not_at_start) { counter_backtrack_trace_.set_backtrack(&label_); - if (not_at_start) counter_backtrack_trace_.set_at_start(false); + if (not_at_start) counter_backtrack_trace_.set_at_start(Trace::FALSE_VALUE); } @@ -4008,7 +4003,7 @@ Trace* ChoiceNode::EmitGreedyLoop(RegExpCompiler* compiler, macro_assembler->PushCurrentPosition(); Label greedy_match_failed; Trace greedy_match_trace; - if (not_at_start()) greedy_match_trace.set_at_start(false); + if (not_at_start()) greedy_match_trace.set_at_start(Trace::FALSE_VALUE); greedy_match_trace.set_backtrack(&greedy_match_failed); Label loop_label; macro_assembler->Bind(&loop_label); @@ -4354,11 +4349,14 @@ void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) { DCHECK_EQ(start_reg_ + 1, end_reg_); if (compiler->ignore_case()) { - assembler->CheckNotBackReferenceIgnoreCase(start_reg_, + assembler->CheckNotBackReferenceIgnoreCase(start_reg_, read_backward(), trace->backtrack()); } else { - assembler->CheckNotBackReference(start_reg_, trace->backtrack()); + assembler->CheckNotBackReference(start_reg_, read_backward(), + trace->backtrack()); } + // We are going to advance backward, so we may end up at the start. + if (read_backward()) trace->set_at_start(Trace::UNKNOWN); on_success()->Emit(compiler, trace); } @@ -4719,13 +4717,15 @@ RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler, ZoneList<TextElement>* elms = new(compiler->zone()) ZoneList<TextElement>(1, compiler->zone()); elms->Add(TextElement::Atom(this), compiler->zone()); - return new(compiler->zone()) TextNode(elms, on_success); + return new (compiler->zone()) + TextNode(elms, compiler->read_backward(), on_success); } RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, RegExpNode* on_success) { - return new(compiler->zone()) TextNode(elements(), on_success); + return new (compiler->zone()) + TextNode(elements(), compiler->read_backward(), on_success); } @@ -4822,7 +4822,8 @@ bool RegExpCharacterClass::is_standard(Zone* zone) { RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, RegExpNode* on_success) { - return new(compiler->zone()) TextNode(this, on_success); + return new (compiler->zone()) + TextNode(this, compiler->read_backward(), on_success); } @@ -5204,7 +5205,9 @@ RegExpNode* RegExpQuantifier::ToNode(int min, GuardedAlternative(body->ToNode(compiler, answer))); } answer = alternation; - if (not_at_start) alternation->set_not_at_start(); + if (not_at_start && !compiler->read_backward()) { + alternation->set_not_at_start(); + } } return answer; } @@ -5216,9 +5219,9 @@ RegExpNode* RegExpQuantifier::ToNode(int min, int reg_ctr = needs_counter ? compiler->AllocateRegister() : RegExpCompiler::kNoRegister; - LoopChoiceNode* center = new(zone) LoopChoiceNode(body->min_match() == 0, - zone); - if (not_at_start) center->set_not_at_start(); + LoopChoiceNode* center = new (zone) + LoopChoiceNode(body->min_match() == 0, compiler->read_backward(), zone); + if (not_at_start && !compiler->read_backward()) center->set_not_at_start(); RegExpNode* loop_return = needs_counter ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center)) : static_cast<RegExpNode*>(center); @@ -5294,14 +5297,13 @@ RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler, ZoneList<CharacterRange>* newline_ranges = new(zone) ZoneList<CharacterRange>(3, zone); CharacterRange::AddClassEscape('n', newline_ranges, zone); - RegExpCharacterClass* newline_atom = new(zone) RegExpCharacterClass('n'); - TextNode* newline_matcher = new(zone) TextNode( - newline_atom, - ActionNode::PositiveSubmatchSuccess(stack_pointer_register, - position_register, - 0, // No captures inside. - -1, // Ignored if no captures. - on_success)); + RegExpCharacterClass* newline_atom = new (zone) RegExpCharacterClass('n'); + TextNode* newline_matcher = new (zone) TextNode( + newline_atom, false, ActionNode::PositiveSubmatchSuccess( + stack_pointer_register, position_register, + 0, // No captures inside. + -1, // Ignored if no captures. + on_success)); // Create an end-of-input matcher. RegExpNode* end_of_line = ActionNode::BeginSubmatch( stack_pointer_register, @@ -5323,10 +5325,10 @@ RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler, RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler, RegExpNode* on_success) { - return new(compiler->zone()) + return new (compiler->zone()) BackReferenceNode(RegExpCapture::StartRegister(index()), RegExpCapture::EndRegister(index()), - on_success); + compiler->read_backward(), on_success); } @@ -5336,8 +5338,8 @@ RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, } -RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, - RegExpNode* on_success) { +RegExpNode* RegExpLookaround::ToNode(RegExpCompiler* compiler, + RegExpNode* on_success) { int stack_pointer_register = compiler->AllocateRegister(); int position_register = compiler->AllocateRegister(); @@ -5347,19 +5349,16 @@ RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, int register_start = register_of_first_capture + capture_from_ * registers_per_capture; - RegExpNode* success; + RegExpNode* result; + bool was_reading_backward = compiler->read_backward(); + compiler->set_read_backward(type() == LOOKBEHIND); if (is_positive()) { - RegExpNode* node = ActionNode::BeginSubmatch( - stack_pointer_register, - position_register, - body()->ToNode( - compiler, - ActionNode::PositiveSubmatchSuccess(stack_pointer_register, - position_register, - register_count, - register_start, - on_success))); - return node; + result = ActionNode::BeginSubmatch( + stack_pointer_register, position_register, + body()->ToNode(compiler, + ActionNode::PositiveSubmatchSuccess( + stack_pointer_register, position_register, + register_count, register_start, on_success))); } else { // We use a ChoiceNode for a negative lookahead because it has most of // the characteristics we need. It has the body of the lookahead as its @@ -5374,21 +5373,16 @@ RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, Zone* zone = compiler->zone(); GuardedAlternative body_alt( - body()->ToNode( - compiler, - success = new(zone) NegativeSubmatchSuccess(stack_pointer_register, - position_register, - register_count, - register_start, - zone))); - ChoiceNode* choice_node = - new(zone) NegativeLookaheadChoiceNode(body_alt, - GuardedAlternative(on_success), - zone); - return ActionNode::BeginSubmatch(stack_pointer_register, - position_register, - choice_node); - } + body()->ToNode(compiler, new (zone) NegativeSubmatchSuccess( + stack_pointer_register, position_register, + register_count, register_start, zone))); + ChoiceNode* choice_node = new (zone) NegativeLookaroundChoiceNode( + body_alt, GuardedAlternative(on_success), zone); + result = ActionNode::BeginSubmatch(stack_pointer_register, + position_register, choice_node); + } + compiler->set_read_backward(was_reading_backward); + return result; } @@ -5402,8 +5396,10 @@ RegExpNode* RegExpCapture::ToNode(RegExpTree* body, int index, RegExpCompiler* compiler, RegExpNode* on_success) { + DCHECK_NOT_NULL(body); int start_reg = RegExpCapture::StartRegister(index); int end_reg = RegExpCapture::EndRegister(index); + if (compiler->read_backward()) std::swap(start_reg, end_reg); RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success); RegExpNode* body_node = body->ToNode(compiler, store_end); return ActionNode::StorePosition(start_reg, true, body_node); @@ -5414,8 +5410,14 @@ RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler, RegExpNode* on_success) { ZoneList<RegExpTree*>* children = nodes(); RegExpNode* current = on_success; - for (int i = children->length() - 1; i >= 0; i--) { - current = children->at(i)->ToNode(compiler, current); + if (compiler->read_backward()) { + for (int i = 0; i < children->length(); i++) { + current = children->at(i)->ToNode(compiler, current); + } + } else { + for (int i = children->length() - 1; i >= 0; i--) { + current = children->at(i)->ToNode(compiler, current); + } } return current; } @@ -6291,22 +6293,17 @@ RegExpEngine::CompilationResult RegExpEngine::Compile( if (!is_start_anchored && !is_sticky) { // Add a .*? at the beginning, outside the body capture, unless // this expression is anchored at the beginning or sticky. - RegExpNode* loop_node = - RegExpQuantifier::ToNode(0, - RegExpTree::kInfinity, - false, - new(zone) RegExpCharacterClass('*'), - &compiler, - captured_body, - data->contains_anchor); + RegExpNode* loop_node = RegExpQuantifier::ToNode( + 0, RegExpTree::kInfinity, false, new (zone) RegExpCharacterClass('*'), + &compiler, captured_body, data->contains_anchor); if (data->contains_anchor) { // Unroll loop once, to take care of the case that might start // at the start of input. ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone); first_step_node->AddAlternative(GuardedAlternative(captured_body)); - first_step_node->AddAlternative(GuardedAlternative( - new(zone) TextNode(new(zone) RegExpCharacterClass('*'), loop_node))); + first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode( + new (zone) RegExpCharacterClass('*'), false, loop_node))); node = first_step_node; } else { node = loop_node; diff --git a/deps/v8/src/regexp/jsregexp.h b/deps/v8/src/regexp/jsregexp.h index 537bdff8e2..0ad4b79c87 100644 --- a/deps/v8/src/regexp/jsregexp.h +++ b/deps/v8/src/regexp/jsregexp.h @@ -7,6 +7,7 @@ #include "src/allocation.h" #include "src/assembler.h" +#include "src/regexp/regexp-ast.h" namespace v8 { namespace internal { @@ -29,13 +30,6 @@ class RegExpImpl { #endif } - // Creates a regular expression literal in the old space. - // This function calls the garbage collector if necessary. - MUST_USE_RESULT static MaybeHandle<Object> CreateRegExpLiteral( - Handle<JSFunction> constructor, - Handle<String> pattern, - Handle<String> flags); - // Returns a string representation of a regular expression. // Implements RegExp.prototype.toString, see ECMA-262 section 15.10.6.4. // This function calls the garbage collector if necessary. @@ -233,63 +227,6 @@ enum ElementInSetsRelation { }; -// Represents code units in the range from from_ to to_, both ends are -// inclusive. -class CharacterRange { - public: - CharacterRange() : from_(0), to_(0) { } - // For compatibility with the CHECK_OK macro - CharacterRange(void* null) { DCHECK_NULL(null); } // NOLINT - CharacterRange(uc16 from, uc16 to) : from_(from), to_(to) { } - static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges, - Zone* zone); - static Vector<const int> GetWordBounds(); - static inline CharacterRange Singleton(uc16 value) { - return CharacterRange(value, value); - } - static inline CharacterRange Range(uc16 from, uc16 to) { - DCHECK(from <= to); - return CharacterRange(from, to); - } - static inline CharacterRange Everything() { - return CharacterRange(0, 0xFFFF); - } - bool Contains(uc16 i) { return from_ <= i && i <= to_; } - uc16 from() const { return from_; } - void set_from(uc16 value) { from_ = value; } - uc16 to() const { return to_; } - void set_to(uc16 value) { to_ = value; } - bool is_valid() { return from_ <= to_; } - bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; } - bool IsSingleton() { return (from_ == to_); } - void AddCaseEquivalents(Isolate* isolate, Zone* zone, - ZoneList<CharacterRange>* ranges, bool is_one_byte); - static void Split(ZoneList<CharacterRange>* base, - Vector<const int> overlay, - ZoneList<CharacterRange>** included, - ZoneList<CharacterRange>** excluded, - Zone* zone); - // Whether a range list is in canonical form: Ranges ordered by from value, - // and ranges non-overlapping and non-adjacent. - static bool IsCanonical(ZoneList<CharacterRange>* ranges); - // Convert range list to canonical form. The characters covered by the ranges - // will still be the same, but no character is in more than one range, and - // adjacent ranges are merged. The resulting list may be shorter than the - // original, but cannot be longer. - static void Canonicalize(ZoneList<CharacterRange>* ranges); - // Negate the contents of a character range in canonical form. - static void Negate(ZoneList<CharacterRange>* src, - ZoneList<CharacterRange>* dst, - Zone* zone); - static const int kStartMarker = (1 << 24); - static const int kPayloadMask = (1 << 24) - 1; - - private: - uc16 from_; - uc16 to_; -}; - - // A set of unsigned integers that behaves especially well on small // integers (< 32). May do zone-allocation. class OutSet: public ZoneObject { @@ -387,63 +324,6 @@ class DispatchTable : public ZoneObject { VISIT(Text) -#define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \ - VISIT(Disjunction) \ - VISIT(Alternative) \ - VISIT(Assertion) \ - VISIT(CharacterClass) \ - VISIT(Atom) \ - VISIT(Quantifier) \ - VISIT(Capture) \ - VISIT(Lookahead) \ - VISIT(BackReference) \ - VISIT(Empty) \ - VISIT(Text) - - -#define FORWARD_DECLARE(Name) class RegExp##Name; -FOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE) -#undef FORWARD_DECLARE - - -class TextElement final BASE_EMBEDDED { - public: - enum TextType { - ATOM, - CHAR_CLASS - }; - - static TextElement Atom(RegExpAtom* atom); - static TextElement CharClass(RegExpCharacterClass* char_class); - - int cp_offset() const { return cp_offset_; } - void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; } - int length() const; - - TextType text_type() const { return text_type_; } - - RegExpTree* tree() const { return tree_; } - - RegExpAtom* atom() const { - DCHECK(text_type() == ATOM); - return reinterpret_cast<RegExpAtom*>(tree()); - } - - RegExpCharacterClass* char_class() const { - DCHECK(text_type() == CHAR_CLASS); - return reinterpret_cast<RegExpCharacterClass*>(tree()); - } - - private: - TextElement(TextType text_type, RegExpTree* tree) - : cp_offset_(-1), text_type_(text_type), tree_(tree) {} - - int cp_offset_; - TextType text_type_; - RegExpTree* tree_; -}; - - class Trace; struct PreloadState; class GreedyLoopState; @@ -603,7 +483,7 @@ class RegExpNode: public ZoneObject { RegExpCompiler* compiler, int characters_filled_in, bool not_at_start) = 0; - static const int kNodeIsTooComplexForGreedyLoops = -1; + static const int kNodeIsTooComplexForGreedyLoops = kMinInt; virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } // Only returns the successor for a text node of length 1 that matches any // character and that has no guards on it. @@ -695,33 +575,6 @@ class RegExpNode: public ZoneObject { }; -// A simple closed interval. -class Interval { - public: - Interval() : from_(kNone), to_(kNone) { } - Interval(int from, int to) : from_(from), to_(to) { } - Interval Union(Interval that) { - if (that.from_ == kNone) - return *this; - else if (from_ == kNone) - return that; - else - return Interval(Min(from_, that.from_), Max(to_, that.to_)); - } - bool Contains(int value) { - return (from_ <= value) && (value <= to_); - } - bool is_empty() { return from_ == kNone; } - int from() const { return from_; } - int to() const { return to_; } - static Interval Empty() { return Interval(); } - static const int kNone = -1; - private: - int from_; - int to_; -}; - - class SeqRegExpNode: public RegExpNode { public: explicit SeqRegExpNode(RegExpNode* on_success) @@ -827,14 +680,14 @@ class ActionNode: public SeqRegExpNode { class TextNode: public SeqRegExpNode { public: - TextNode(ZoneList<TextElement>* elms, + TextNode(ZoneList<TextElement>* elms, bool read_backward, RegExpNode* on_success) - : SeqRegExpNode(on_success), - elms_(elms) { } - TextNode(RegExpCharacterClass* that, + : SeqRegExpNode(on_success), elms_(elms), read_backward_(read_backward) {} + TextNode(RegExpCharacterClass* that, bool read_backward, RegExpNode* on_success) : SeqRegExpNode(on_success), - elms_(new(zone()) ZoneList<TextElement>(1, zone())) { + elms_(new (zone()) ZoneList<TextElement>(1, zone())), + read_backward_(read_backward) { elms_->Add(TextElement::CharClass(that), zone()); } virtual void Accept(NodeVisitor* visitor); @@ -845,6 +698,7 @@ class TextNode: public SeqRegExpNode { int characters_filled_in, bool not_at_start); ZoneList<TextElement>* elements() { return elms_; } + bool read_backward() { return read_backward_; } void MakeCaseIndependent(Isolate* isolate, bool is_one_byte); virtual int GreedyLoopTextLength(); virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( @@ -873,6 +727,7 @@ class TextNode: public SeqRegExpNode { int* checked_up_to); int Length(); ZoneList<TextElement>* elms_; + bool read_backward_; }; @@ -925,15 +780,16 @@ class AssertionNode: public SeqRegExpNode { class BackReferenceNode: public SeqRegExpNode { public: - BackReferenceNode(int start_reg, - int end_reg, + BackReferenceNode(int start_reg, int end_reg, bool read_backward, RegExpNode* on_success) : SeqRegExpNode(on_success), start_reg_(start_reg), - end_reg_(end_reg) { } + end_reg_(end_reg), + read_backward_(read_backward) {} virtual void Accept(NodeVisitor* visitor); int start_register() { return start_reg_; } int end_register() { return end_reg_; } + bool read_backward() { return read_backward_; } virtual void Emit(RegExpCompiler* compiler, Trace* trace); virtual int EatsAtLeast(int still_to_find, int recursion_depth, @@ -950,6 +806,7 @@ class BackReferenceNode: public SeqRegExpNode { private: int start_reg_; int end_reg_; + bool read_backward_; }; @@ -1074,6 +931,7 @@ class ChoiceNode: public RegExpNode { return true; } virtual RegExpNode* FilterOneByte(int depth, bool ignore_case); + virtual bool read_backward() { return false; } protected: int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative); @@ -1116,11 +974,11 @@ class ChoiceNode: public RegExpNode { }; -class NegativeLookaheadChoiceNode: public ChoiceNode { +class NegativeLookaroundChoiceNode : public ChoiceNode { public: - explicit NegativeLookaheadChoiceNode(GuardedAlternative this_must_fail, - GuardedAlternative then_do_this, - Zone* zone) + explicit NegativeLookaroundChoiceNode(GuardedAlternative this_must_fail, + GuardedAlternative then_do_this, + Zone* zone) : ChoiceNode(2, zone) { AddAlternative(this_must_fail); AddAlternative(then_do_this); @@ -1150,12 +1008,12 @@ class NegativeLookaheadChoiceNode: public ChoiceNode { class LoopChoiceNode: public ChoiceNode { public: - explicit LoopChoiceNode(bool body_can_be_zero_length, Zone* zone) + LoopChoiceNode(bool body_can_be_zero_length, bool read_backward, Zone* zone) : ChoiceNode(2, zone), loop_node_(NULL), continue_node_(NULL), - body_can_be_zero_length_(body_can_be_zero_length) - { } + body_can_be_zero_length_(body_can_be_zero_length), + read_backward_(read_backward) {} void AddLoopAlternative(GuardedAlternative alt); void AddContinueAlternative(GuardedAlternative alt); virtual void Emit(RegExpCompiler* compiler, Trace* trace); @@ -1169,6 +1027,7 @@ class LoopChoiceNode: public ChoiceNode { RegExpNode* loop_node() { return loop_node_; } RegExpNode* continue_node() { return continue_node_; } bool body_can_be_zero_length() { return body_can_be_zero_length_; } + virtual bool read_backward() { return read_backward_; } virtual void Accept(NodeVisitor* visitor); virtual RegExpNode* FilterOneByte(int depth, bool ignore_case); @@ -1183,6 +1042,7 @@ class LoopChoiceNode: public ChoiceNode { RegExpNode* loop_node_; RegExpNode* continue_node_; bool body_can_be_zero_length_; + bool read_backward_; }; @@ -1438,9 +1298,7 @@ class Trace { at_start_ == UNKNOWN; } TriBool at_start() { return at_start_; } - void set_at_start(bool at_start) { - at_start_ = at_start ? TRUE_VALUE : FALSE_VALUE; - } + void set_at_start(TriBool at_start) { at_start_ = at_start; } Label* backtrack() { return backtrack_; } Label* loop_label() { return loop_label_; } RegExpNode* stop_node() { return stop_node_; } diff --git a/deps/v8/src/regexp/mips/OWNERS b/deps/v8/src/regexp/mips/OWNERS index 5508ba626f..89455a4fbd 100644 --- a/deps/v8/src/regexp/mips/OWNERS +++ b/deps/v8/src/regexp/mips/OWNERS @@ -3,3 +3,4 @@ gergely.kis@imgtec.com akos.palfi@imgtec.com balazs.kilvady@imgtec.com dusan.milosavljevic@imgtec.com +ivica.bogosavljevic@imgtec.com diff --git a/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.cc b/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.cc index 77f09917c0..9c59328ed1 100644 --- a/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.cc +++ b/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.cc @@ -97,7 +97,8 @@ RegExpMacroAssemblerMIPS::RegExpMacroAssemblerMIPS(Isolate* isolate, Zone* zone, Mode mode, int registers_to_save) : NativeRegExpMacroAssembler(isolate, zone), - masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize)), + masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize, + CodeObjectRequired::kYes)), mode_(mode), num_registers_(registers_to_save), num_saved_registers_(registers_to_save), @@ -181,26 +182,17 @@ void RegExpMacroAssemblerMIPS::CheckCharacterGT(uc16 limit, Label* on_greater) { void RegExpMacroAssemblerMIPS::CheckAtStart(Label* on_at_start) { - Label not_at_start; - // Did we start the match at the start of the string at all? - __ lw(a0, MemOperand(frame_pointer(), kStartIndex)); - BranchOrBacktrack(¬_at_start, ne, a0, Operand(zero_reg)); - - // If we did, are we still at the start of the input? - __ lw(a1, MemOperand(frame_pointer(), kInputStart)); - __ Addu(a0, end_of_input_address(), Operand(current_input_offset())); + __ lw(a1, MemOperand(frame_pointer(), kStringStartMinusOne)); + __ Addu(a0, current_input_offset(), Operand(-char_size())); BranchOrBacktrack(on_at_start, eq, a0, Operand(a1)); - __ bind(¬_at_start); } -void RegExpMacroAssemblerMIPS::CheckNotAtStart(Label* on_not_at_start) { - // Did we start the match at the start of the string at all? - __ lw(a0, MemOperand(frame_pointer(), kStartIndex)); - BranchOrBacktrack(on_not_at_start, ne, a0, Operand(zero_reg)); - // If we did, are we still at the start of the input? - __ lw(a1, MemOperand(frame_pointer(), kInputStart)); - __ Addu(a0, end_of_input_address(), Operand(current_input_offset())); +void RegExpMacroAssemblerMIPS::CheckNotAtStart(int cp_offset, + Label* on_not_at_start) { + __ lw(a1, MemOperand(frame_pointer(), kStringStartMinusOne)); + __ Addu(a0, current_input_offset(), + Operand(-char_size() + cp_offset * char_size())); BranchOrBacktrack(on_not_at_start, ne, a0, Operand(a1)); } @@ -223,20 +215,26 @@ void RegExpMacroAssemblerMIPS::CheckGreedyLoop(Label* on_equal) { void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase( - int start_reg, - Label* on_no_match) { + int start_reg, bool read_backward, Label* on_no_match) { Label fallthrough; __ lw(a0, register_location(start_reg)); // Index of start of capture. __ lw(a1, register_location(start_reg + 1)); // Index of end of capture. __ Subu(a1, a1, a0); // Length of capture. - // If length is zero, either the capture is empty or it is not participating. - // In either case succeed immediately. + // At this point, the capture registers are either both set or both cleared. + // If the capture length is zero, then the capture is either empty or cleared. + // Fall through in both cases. __ Branch(&fallthrough, eq, a1, Operand(zero_reg)); - __ Addu(t5, a1, current_input_offset()); - // Check that there are enough characters left in the input. - BranchOrBacktrack(on_no_match, gt, t5, Operand(zero_reg)); + if (read_backward) { + __ lw(t0, MemOperand(frame_pointer(), kStringStartMinusOne)); + __ Addu(t0, t0, a1); + BranchOrBacktrack(on_no_match, le, current_input_offset(), Operand(t0)); + } else { + __ Addu(t5, a1, current_input_offset()); + // Check that there are enough characters left in the input. + BranchOrBacktrack(on_no_match, gt, t5, Operand(zero_reg)); + } if (mode_ == LATIN1) { Label success; @@ -247,6 +245,9 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase( // a1 - length of capture. __ Addu(a0, a0, Operand(end_of_input_address())); __ Addu(a2, end_of_input_address(), Operand(current_input_offset())); + if (read_backward) { + __ Subu(a2, a2, Operand(a1)); + } __ Addu(a1, a0, Operand(a1)); // a0 - Address of start of capture. @@ -285,6 +286,12 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase( __ bind(&success); // Compute new value of character position after the matched part. __ Subu(current_input_offset(), a2, end_of_input_address()); + if (read_backward) { + __ lw(t0, register_location(start_reg)); // Index of start of capture. + __ lw(t5, register_location(start_reg + 1)); // Index of end of capture. + __ Addu(current_input_offset(), current_input_offset(), Operand(t0)); + __ Subu(current_input_offset(), current_input_offset(), Operand(t5)); + } } else { DCHECK(mode_ == UC16); // Put regexp engine registers on stack. @@ -313,6 +320,9 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase( __ mov(s3, a1); // Address of current input position. __ Addu(a1, current_input_offset(), Operand(end_of_input_address())); + if (read_backward) { + __ Subu(a1, a1, Operand(s3)); + } // Isolate. __ li(a3, Operand(ExternalReference::isolate_address(masm_->isolate()))); @@ -330,17 +340,21 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase( // Check if function returned non-zero for success or zero for failure. BranchOrBacktrack(on_no_match, eq, v0, Operand(zero_reg)); - // On success, increment position by length of capture. - __ Addu(current_input_offset(), current_input_offset(), Operand(s3)); + // On success, advance position by length of capture. + if (read_backward) { + __ Subu(current_input_offset(), current_input_offset(), Operand(s3)); + } else { + __ Addu(current_input_offset(), current_input_offset(), Operand(s3)); + } } __ bind(&fallthrough); } -void RegExpMacroAssemblerMIPS::CheckNotBackReference( - int start_reg, - Label* on_no_match) { +void RegExpMacroAssemblerMIPS::CheckNotBackReference(int start_reg, + bool read_backward, + Label* on_no_match) { Label fallthrough; Label success; @@ -348,17 +362,35 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReference( __ lw(a0, register_location(start_reg)); __ lw(a1, register_location(start_reg + 1)); __ Subu(a1, a1, a0); // Length to check. - // Succeed on empty capture (including no capture). - __ Branch(&fallthrough, eq, a1, Operand(zero_reg)); - __ Addu(t5, a1, current_input_offset()); - // Check that there are enough characters left in the input. - BranchOrBacktrack(on_no_match, gt, t5, Operand(zero_reg)); + // At this point, the capture registers are either both set or both cleared. + // If the capture length is zero, then the capture is either empty or cleared. + // Fall through in both cases. + __ Branch(&fallthrough, le, a1, Operand(zero_reg)); + + if (read_backward) { + __ lw(t0, MemOperand(frame_pointer(), kStringStartMinusOne)); + __ Addu(t0, t0, a1); + BranchOrBacktrack(on_no_match, le, current_input_offset(), Operand(t0)); + } else { + __ Addu(t5, a1, current_input_offset()); + // Check that there are enough characters left in the input. + BranchOrBacktrack(on_no_match, gt, t5, Operand(zero_reg)); + } - // Compute pointers to match string and capture string. + // a0 - offset of start of capture. + // a1 - length of capture. __ Addu(a0, a0, Operand(end_of_input_address())); __ Addu(a2, end_of_input_address(), Operand(current_input_offset())); - __ Addu(a1, a1, Operand(a0)); + if (read_backward) { + __ Subu(a2, a2, Operand(a1)); + } + __ Addu(a1, a0, Operand(a1)); + + // a0 - Address of start of capture. + // a1 - Address of end of capture. + // a2 - Address of current input position. + Label loop; __ bind(&loop); @@ -379,6 +411,12 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReference( // Move current character position to position after match. __ Subu(current_input_offset(), a2, end_of_input_address()); + if (read_backward) { + __ lw(t0, register_location(start_reg)); // Index of start of capture. + __ lw(t5, register_location(start_reg + 1)); // Index of end of capture. + __ Addu(current_input_offset(), current_input_offset(), Operand(t0)); + __ Subu(current_input_offset(), current_input_offset(), Operand(t5)); + } __ bind(&fallthrough); } @@ -599,7 +637,7 @@ Handle<HeapObject> RegExpMacroAssemblerMIPS::GetCode(Handle<String> source) { __ Addu(frame_pointer(), sp, Operand(4 * kPointerSize)); __ mov(a0, zero_reg); __ push(a0); // Make room for success counter and initialize it to 0. - __ push(a0); // Make room for "position - 1" constant (value irrelevant). + __ push(a0); // Make room for "string start - 1" constant. // Check if we have space on the stack for registers. Label stack_limit_hit; @@ -642,7 +680,7 @@ Handle<HeapObject> RegExpMacroAssemblerMIPS::GetCode(Handle<String> source) { __ Subu(a0, a0, t5); // Store this value in a local variable, for use when clearing // position registers. - __ sw(a0, MemOperand(frame_pointer(), kInputStartMinusOne)); + __ sw(a0, MemOperand(frame_pointer(), kStringStartMinusOne)); // Initialize code pointer register __ li(code_pointer(), Operand(masm_->CodeObject()), CONSTANT_SIZE); @@ -751,7 +789,7 @@ Handle<HeapObject> RegExpMacroAssemblerMIPS::GetCode(Handle<String> source) { __ sw(a2, MemOperand(frame_pointer(), kRegisterOutput)); // Prepare a0 to initialize registers with its value in the next run. - __ lw(a0, MemOperand(frame_pointer(), kInputStartMinusOne)); + __ lw(a0, MemOperand(frame_pointer(), kStringStartMinusOne)); if (global_with_zero_length_check()) { // Special case for zero-length matches. @@ -905,10 +943,13 @@ void RegExpMacroAssemblerMIPS::LoadCurrentCharacter(int cp_offset, Label* on_end_of_input, bool check_bounds, int characters) { - DCHECK(cp_offset >= -1); // ^ and \b can look behind one character. DCHECK(cp_offset < (1<<30)); // Be sane! (And ensure negation works). if (check_bounds) { - CheckPosition(cp_offset + characters - 1, on_end_of_input); + if (cp_offset >= 0) { + CheckPosition(cp_offset + characters - 1, on_end_of_input); + } else { + CheckPosition(cp_offset, on_end_of_input); + } } LoadCurrentCharacterUnchecked(cp_offset, characters); } @@ -1016,7 +1057,7 @@ void RegExpMacroAssemblerMIPS::WriteCurrentPositionToRegister(int reg, void RegExpMacroAssemblerMIPS::ClearRegisters(int reg_from, int reg_to) { DCHECK(reg_from <= reg_to); - __ lw(a0, MemOperand(frame_pointer(), kInputStartMinusOne)); + __ lw(a0, MemOperand(frame_pointer(), kStringStartMinusOne)); for (int reg = reg_from; reg <= reg_to; reg++) { __ sw(a0, register_location(reg)); } @@ -1129,10 +1170,14 @@ MemOperand RegExpMacroAssemblerMIPS::register_location(int register_index) { void RegExpMacroAssemblerMIPS::CheckPosition(int cp_offset, Label* on_outside_input) { - BranchOrBacktrack(on_outside_input, - ge, - current_input_offset(), - Operand(-cp_offset * char_size())); + if (cp_offset >= 0) { + BranchOrBacktrack(on_outside_input, ge, current_input_offset(), + Operand(-cp_offset * char_size())); + } else { + __ lw(a1, MemOperand(frame_pointer(), kStringStartMinusOne)); + __ Addu(a0, current_input_offset(), Operand(cp_offset * char_size())); + BranchOrBacktrack(on_outside_input, le, a0, Operand(a1)); + } } diff --git a/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.h b/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.h index da59546a79..902e2208fe 100644 --- a/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.h +++ b/deps/v8/src/regexp/mips/regexp-macro-assembler-mips.h @@ -33,9 +33,11 @@ class RegExpMacroAssemblerMIPS: public NativeRegExpMacroAssembler { // A "greedy loop" is a loop that is both greedy and with a simple // body. It has a particularly simple implementation. virtual void CheckGreedyLoop(Label* on_tos_equals_current_position); - virtual void CheckNotAtStart(Label* on_not_at_start); - virtual void CheckNotBackReference(int start_reg, Label* on_no_match); + virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start); + virtual void CheckNotBackReference(int start_reg, bool read_backward, + Label* on_no_match); virtual void CheckNotBackReferenceIgnoreCase(int start_reg, + bool read_backward, Label* on_no_match); virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal); virtual void CheckNotCharacterAfterAnd(uint32_t c, @@ -120,9 +122,9 @@ class RegExpMacroAssemblerMIPS: public NativeRegExpMacroAssembler { // When adding local variables remember to push space for them in // the frame in GetCode. static const int kSuccessfulCaptures = kInputString - kPointerSize; - static const int kInputStartMinusOne = kSuccessfulCaptures - kPointerSize; + static const int kStringStartMinusOne = kSuccessfulCaptures - kPointerSize; // First register address. Following registers are below it on the stack. - static const int kRegisterZero = kInputStartMinusOne - kPointerSize; + static const int kRegisterZero = kStringStartMinusOne - kPointerSize; // Initial size of code buffer. static const size_t kRegExpCodeSize = 1024; diff --git a/deps/v8/src/regexp/mips64/OWNERS b/deps/v8/src/regexp/mips64/OWNERS index 5508ba626f..89455a4fbd 100644 --- a/deps/v8/src/regexp/mips64/OWNERS +++ b/deps/v8/src/regexp/mips64/OWNERS @@ -3,3 +3,4 @@ gergely.kis@imgtec.com akos.palfi@imgtec.com balazs.kilvady@imgtec.com dusan.milosavljevic@imgtec.com +ivica.bogosavljevic@imgtec.com diff --git a/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc b/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc index 869cbc4f2e..5153bd018b 100644 --- a/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc +++ b/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.cc @@ -61,7 +61,7 @@ namespace internal { * - fp[-16] void* input_string (location of a handle containing the string). * - fp[-20] success counter (only for global regexps to count matches). * - fp[-24] Offset of location before start of input (effectively character - * position -1). Used to initialize capture registers to a + * string start - 1). Used to initialize capture registers to a * non-position. * - fp[-28] At start (if 1, we are starting at the start of the * string, otherwise 0) @@ -91,7 +91,7 @@ namespace internal { * - fp[-56] start index (character index of start). kStartIndex * - fp[-64] void* input_string (location of a handle containing the string). kInputString * - fp[-72] success counter (only for global regexps to count matches). kSuccessfulCaptures - * - fp[-80] Offset of location before start of input (effectively character kInputStartMinusOne + * - fp[-80] Offset of location before start of input (effectively character kStringStartMinusOne * position -1). Used to initialize capture registers to a * non-position. * --------- The following output registers are 32-bit values. --------- @@ -133,7 +133,8 @@ RegExpMacroAssemblerMIPS::RegExpMacroAssemblerMIPS(Isolate* isolate, Zone* zone, Mode mode, int registers_to_save) : NativeRegExpMacroAssembler(isolate, zone), - masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize)), + masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize, + CodeObjectRequired::kYes)), mode_(mode), num_registers_(registers_to_save), num_saved_registers_(registers_to_save), @@ -217,26 +218,17 @@ void RegExpMacroAssemblerMIPS::CheckCharacterGT(uc16 limit, Label* on_greater) { void RegExpMacroAssemblerMIPS::CheckAtStart(Label* on_at_start) { - Label not_at_start; - // Did we start the match at the start of the string at all? - __ ld(a0, MemOperand(frame_pointer(), kStartIndex)); - BranchOrBacktrack(¬_at_start, ne, a0, Operand(zero_reg)); - - // If we did, are we still at the start of the input? - __ ld(a1, MemOperand(frame_pointer(), kInputStart)); - __ Daddu(a0, end_of_input_address(), Operand(current_input_offset())); + __ ld(a1, MemOperand(frame_pointer(), kStringStartMinusOne)); + __ Daddu(a0, current_input_offset(), Operand(-char_size())); BranchOrBacktrack(on_at_start, eq, a0, Operand(a1)); - __ bind(¬_at_start); } -void RegExpMacroAssemblerMIPS::CheckNotAtStart(Label* on_not_at_start) { - // Did we start the match at the start of the string at all? - __ ld(a0, MemOperand(frame_pointer(), kStartIndex)); - BranchOrBacktrack(on_not_at_start, ne, a0, Operand(zero_reg)); - // If we did, are we still at the start of the input? - __ ld(a1, MemOperand(frame_pointer(), kInputStart)); - __ Daddu(a0, end_of_input_address(), Operand(current_input_offset())); +void RegExpMacroAssemblerMIPS::CheckNotAtStart(int cp_offset, + Label* on_not_at_start) { + __ ld(a1, MemOperand(frame_pointer(), kStringStartMinusOne)); + __ Daddu(a0, current_input_offset(), + Operand(-char_size() + cp_offset * char_size())); BranchOrBacktrack(on_not_at_start, ne, a0, Operand(a1)); } @@ -259,20 +251,26 @@ void RegExpMacroAssemblerMIPS::CheckGreedyLoop(Label* on_equal) { void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase( - int start_reg, - Label* on_no_match) { + int start_reg, bool read_backward, Label* on_no_match) { Label fallthrough; __ ld(a0, register_location(start_reg)); // Index of start of capture. __ ld(a1, register_location(start_reg + 1)); // Index of end of capture. __ Dsubu(a1, a1, a0); // Length of capture. - // If length is zero, either the capture is empty or it is not participating. - // In either case succeed immediately. + // At this point, the capture registers are either both set or both cleared. + // If the capture length is zero, then the capture is either empty or cleared. + // Fall through in both cases. __ Branch(&fallthrough, eq, a1, Operand(zero_reg)); - __ Daddu(t1, a1, current_input_offset()); - // Check that there are enough characters left in the input. - BranchOrBacktrack(on_no_match, gt, t1, Operand(zero_reg)); + if (read_backward) { + __ ld(t1, MemOperand(frame_pointer(), kStringStartMinusOne)); + __ Daddu(t1, t1, a1); + BranchOrBacktrack(on_no_match, le, current_input_offset(), Operand(t1)); + } else { + __ Daddu(t1, a1, current_input_offset()); + // Check that there are enough characters left in the input. + BranchOrBacktrack(on_no_match, gt, t1, Operand(zero_reg)); + } if (mode_ == LATIN1) { Label success; @@ -283,6 +281,9 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase( // a1 - length of capture. __ Daddu(a0, a0, Operand(end_of_input_address())); __ Daddu(a2, end_of_input_address(), Operand(current_input_offset())); + if (read_backward) { + __ Dsubu(a2, a2, Operand(a1)); + } __ Daddu(a1, a0, Operand(a1)); // a0 - Address of start of capture. @@ -321,6 +322,12 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase( __ bind(&success); // Compute new value of character position after the matched part. __ Dsubu(current_input_offset(), a2, end_of_input_address()); + if (read_backward) { + __ ld(t1, register_location(start_reg)); // Index of start of capture. + __ ld(a2, register_location(start_reg + 1)); // Index of end of capture. + __ Daddu(current_input_offset(), current_input_offset(), Operand(t1)); + __ Dsubu(current_input_offset(), current_input_offset(), Operand(a2)); + } } else { DCHECK(mode_ == UC16); // Put regexp engine registers on stack. @@ -349,6 +356,9 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase( __ mov(s3, a1); // Address of current input position. __ Daddu(a1, current_input_offset(), Operand(end_of_input_address())); + if (read_backward) { + __ Dsubu(a1, a1, Operand(s3)); + } // Isolate. __ li(a3, Operand(ExternalReference::isolate_address(masm_->isolate()))); @@ -367,16 +377,20 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase( // Check if function returned non-zero for success or zero for failure. BranchOrBacktrack(on_no_match, eq, v0, Operand(zero_reg)); // On success, increment position by length of capture. - __ Daddu(current_input_offset(), current_input_offset(), Operand(s3)); + if (read_backward) { + __ Dsubu(current_input_offset(), current_input_offset(), Operand(s3)); + } else { + __ Daddu(current_input_offset(), current_input_offset(), Operand(s3)); + } } __ bind(&fallthrough); } -void RegExpMacroAssemblerMIPS::CheckNotBackReference( - int start_reg, - Label* on_no_match) { +void RegExpMacroAssemblerMIPS::CheckNotBackReference(int start_reg, + bool read_backward, + Label* on_no_match) { Label fallthrough; Label success; @@ -384,16 +398,28 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReference( __ ld(a0, register_location(start_reg)); __ ld(a1, register_location(start_reg + 1)); __ Dsubu(a1, a1, a0); // Length to check. - // Succeed on empty capture (including no capture). + + // At this point, the capture registers are either both set or both cleared. + // If the capture length is zero, then the capture is either empty or cleared. + // Fall through in both cases. __ Branch(&fallthrough, eq, a1, Operand(zero_reg)); - __ Daddu(t1, a1, current_input_offset()); - // Check that there are enough characters left in the input. - BranchOrBacktrack(on_no_match, gt, t1, Operand(zero_reg)); + if (read_backward) { + __ ld(t1, MemOperand(frame_pointer(), kStringStartMinusOne)); + __ Daddu(t1, t1, a1); + BranchOrBacktrack(on_no_match, le, current_input_offset(), Operand(t1)); + } else { + __ Daddu(t1, a1, current_input_offset()); + // Check that there are enough characters left in the input. + BranchOrBacktrack(on_no_match, gt, t1, Operand(zero_reg)); + } // Compute pointers to match string and capture string. __ Daddu(a0, a0, Operand(end_of_input_address())); __ Daddu(a2, end_of_input_address(), Operand(current_input_offset())); + if (read_backward) { + __ Dsubu(a2, a2, Operand(a1)); + } __ Daddu(a1, a1, Operand(a0)); Label loop; @@ -415,6 +441,12 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReference( // Move current character position to position after match. __ Dsubu(current_input_offset(), a2, end_of_input_address()); + if (read_backward) { + __ ld(t1, register_location(start_reg)); // Index of start of capture. + __ ld(a2, register_location(start_reg + 1)); // Index of end of capture. + __ Daddu(current_input_offset(), current_input_offset(), Operand(t1)); + __ Dsubu(current_input_offset(), current_input_offset(), Operand(a2)); + } __ bind(&fallthrough); } @@ -644,7 +676,7 @@ Handle<HeapObject> RegExpMacroAssemblerMIPS::GetCode(Handle<String> source) { __ Daddu(frame_pointer(), sp, Operand(8 * kPointerSize)); __ mov(a0, zero_reg); __ push(a0); // Make room for success counter and initialize it to 0. - __ push(a0); // Make room for "position - 1" constant (value irrelevant). + __ push(a0); // Make room for "string start - 1" constant. // Check if we have space on the stack for registers. Label stack_limit_hit; @@ -687,7 +719,7 @@ Handle<HeapObject> RegExpMacroAssemblerMIPS::GetCode(Handle<String> source) { __ Dsubu(a0, a0, t1); // Store this value in a local variable, for use when clearing // position registers. - __ sd(a0, MemOperand(frame_pointer(), kInputStartMinusOne)); + __ sd(a0, MemOperand(frame_pointer(), kStringStartMinusOne)); // Initialize code pointer register __ li(code_pointer(), Operand(masm_->CodeObject()), CONSTANT_SIZE); @@ -797,7 +829,7 @@ Handle<HeapObject> RegExpMacroAssemblerMIPS::GetCode(Handle<String> source) { __ sd(a2, MemOperand(frame_pointer(), kRegisterOutput)); // Prepare a0 to initialize registers with its value in the next run. - __ ld(a0, MemOperand(frame_pointer(), kInputStartMinusOne)); + __ ld(a0, MemOperand(frame_pointer(), kStringStartMinusOne)); if (global_with_zero_length_check()) { // Special case for zero-length matches. @@ -951,10 +983,13 @@ void RegExpMacroAssemblerMIPS::LoadCurrentCharacter(int cp_offset, Label* on_end_of_input, bool check_bounds, int characters) { - DCHECK(cp_offset >= -1); // ^ and \b can look behind one character. DCHECK(cp_offset < (1<<30)); // Be sane! (And ensure negation works). if (check_bounds) { - CheckPosition(cp_offset + characters - 1, on_end_of_input); + if (cp_offset >= 0) { + CheckPosition(cp_offset + characters - 1, on_end_of_input); + } else { + CheckPosition(cp_offset, on_end_of_input); + } } LoadCurrentCharacterUnchecked(cp_offset, characters); } @@ -1062,7 +1097,7 @@ void RegExpMacroAssemblerMIPS::WriteCurrentPositionToRegister(int reg, void RegExpMacroAssemblerMIPS::ClearRegisters(int reg_from, int reg_to) { DCHECK(reg_from <= reg_to); - __ ld(a0, MemOperand(frame_pointer(), kInputStartMinusOne)); + __ ld(a0, MemOperand(frame_pointer(), kStringStartMinusOne)); for (int reg = reg_from; reg <= reg_to; reg++) { __ sd(a0, register_location(reg)); } @@ -1175,10 +1210,14 @@ MemOperand RegExpMacroAssemblerMIPS::register_location(int register_index) { void RegExpMacroAssemblerMIPS::CheckPosition(int cp_offset, Label* on_outside_input) { - BranchOrBacktrack(on_outside_input, - ge, - current_input_offset(), - Operand(-cp_offset * char_size())); + if (cp_offset >= 0) { + BranchOrBacktrack(on_outside_input, ge, current_input_offset(), + Operand(-cp_offset * char_size())); + } else { + __ ld(a1, MemOperand(frame_pointer(), kStringStartMinusOne)); + __ Daddu(a0, current_input_offset(), Operand(cp_offset * char_size())); + BranchOrBacktrack(on_outside_input, le, a0, Operand(a1)); + } } diff --git a/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.h b/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.h index 265bf773eb..9a8ca179d5 100644 --- a/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.h +++ b/deps/v8/src/regexp/mips64/regexp-macro-assembler-mips64.h @@ -33,9 +33,11 @@ class RegExpMacroAssemblerMIPS: public NativeRegExpMacroAssembler { // A "greedy loop" is a loop that is both greedy and with a simple // body. It has a particularly simple implementation. virtual void CheckGreedyLoop(Label* on_tos_equals_current_position); - virtual void CheckNotAtStart(Label* on_not_at_start); - virtual void CheckNotBackReference(int start_reg, Label* on_no_match); + virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start); + virtual void CheckNotBackReference(int start_reg, bool read_backward, + Label* on_no_match); virtual void CheckNotBackReferenceIgnoreCase(int start_reg, + bool read_backward, Label* on_no_match); virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal); virtual void CheckNotCharacterAfterAnd(uint32_t c, @@ -125,9 +127,9 @@ class RegExpMacroAssemblerMIPS: public NativeRegExpMacroAssembler { // When adding local variables remember to push space for them in // the frame in GetCode. static const int kSuccessfulCaptures = kInputString - kPointerSize; - static const int kInputStartMinusOne = kSuccessfulCaptures - kPointerSize; + static const int kStringStartMinusOne = kSuccessfulCaptures - kPointerSize; // First register address. Following registers are below it on the stack. - static const int kRegisterZero = kInputStartMinusOne - kPointerSize; + static const int kRegisterZero = kStringStartMinusOne - kPointerSize; #elif defined(MIPS_ABI_O32) // Offsets from frame_pointer() of function parameters and stored registers. @@ -158,9 +160,9 @@ class RegExpMacroAssemblerMIPS: public NativeRegExpMacroAssembler { // When adding local variables remember to push space for them in // the frame in GetCode. static const int kSuccessfulCaptures = kInputString - kPointerSize; - static const int kInputStartMinusOne = kSuccessfulCaptures - kPointerSize; + static const int kStringStartMinusOne = kSuccessfulCaptures - kPointerSize; // First register address. Following registers are below it on the stack. - static const int kRegisterZero = kInputStartMinusOne - kPointerSize; + static const int kRegisterZero = kStringStartMinusOne - kPointerSize; #else # error "undefined MIPS ABI" diff --git a/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc b/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc index 03f9741147..f3ddf7bf98 100644 --- a/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc +++ b/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.cc @@ -60,7 +60,7 @@ namespace internal { * - fp[-32] void* input_string (location of a handle containing the string). * - fp[-36] success counter (only for global regexps to count matches). * - fp[-40] Offset of location before start of input (effectively character - * position -1). Used to initialize capture registers to a + * string start - 1). Used to initialize capture registers to a * non-position. * - fp[-44] At start (if 1, we are starting at the start of the * string, otherwise 0) @@ -100,7 +100,8 @@ RegExpMacroAssemblerPPC::RegExpMacroAssemblerPPC(Isolate* isolate, Zone* zone, Mode mode, int registers_to_save) : NativeRegExpMacroAssembler(isolate, zone), - masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize)), + masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize, + CodeObjectRequired::kYes)), mode_(mode), num_registers_(registers_to_save), num_saved_registers_(registers_to_save), @@ -189,30 +190,18 @@ void RegExpMacroAssemblerPPC::CheckCharacterGT(uc16 limit, Label* on_greater) { void RegExpMacroAssemblerPPC::CheckAtStart(Label* on_at_start) { - Label not_at_start; - // Did we start the match at the start of the string at all? - __ LoadP(r3, MemOperand(frame_pointer(), kStartIndex)); - __ cmpi(r3, Operand::Zero()); - BranchOrBacktrack(ne, ¬_at_start); - - // If we did, are we still at the start of the input? - __ LoadP(r4, MemOperand(frame_pointer(), kInputStart)); - __ mr(r0, current_input_offset()); - __ add(r3, end_of_input_address(), r0); - __ cmp(r4, r3); + __ LoadP(r4, MemOperand(frame_pointer(), kStringStartMinusOne)); + __ addi(r3, current_input_offset(), Operand(-char_size())); + __ cmp(r3, r4); BranchOrBacktrack(eq, on_at_start); - __ bind(¬_at_start); } -void RegExpMacroAssemblerPPC::CheckNotAtStart(Label* on_not_at_start) { - // Did we start the match at the start of the string at all? - __ LoadP(r3, MemOperand(frame_pointer(), kStartIndex)); - __ cmpi(r3, Operand::Zero()); - BranchOrBacktrack(ne, on_not_at_start); - // If we did, are we still at the start of the input? - __ LoadP(r4, MemOperand(frame_pointer(), kInputStart)); - __ add(r3, end_of_input_address(), current_input_offset()); +void RegExpMacroAssemblerPPC::CheckNotAtStart(int cp_offset, + Label* on_not_at_start) { + __ LoadP(r4, MemOperand(frame_pointer(), kStringStartMinusOne)); + __ addi(r3, current_input_offset(), + Operand(-char_size() + cp_offset * char_size())); __ cmp(r3, r4); BranchOrBacktrack(ne, on_not_at_start); } @@ -238,20 +227,27 @@ void RegExpMacroAssemblerPPC::CheckGreedyLoop(Label* on_equal) { void RegExpMacroAssemblerPPC::CheckNotBackReferenceIgnoreCase( - int start_reg, Label* on_no_match) { + int start_reg, bool read_backward, Label* on_no_match) { Label fallthrough; __ LoadP(r3, register_location(start_reg), r0); // Index of start of capture __ LoadP(r4, register_location(start_reg + 1), r0); // Index of end __ sub(r4, r4, r3, LeaveOE, SetRC); // Length of capture. - // If length is zero, either the capture is empty or it is not participating. - // In either case succeed immediately. + // At this point, the capture registers are either both set or both cleared. + // If the capture length is zero, then the capture is either empty or cleared. + // Fall through in both cases. __ beq(&fallthrough, cr0); // Check that there are enough characters left in the input. - __ add(r0, r4, current_input_offset(), LeaveOE, SetRC); - // __ cmn(r1, Operand(current_input_offset())); - BranchOrBacktrack(gt, on_no_match, cr0); + if (read_backward) { + __ LoadP(r6, MemOperand(frame_pointer(), kStringStartMinusOne)); + __ add(r6, r6, r4); + __ cmp(current_input_offset(), r6); + BranchOrBacktrack(le, on_no_match); + } else { + __ add(r0, r4, current_input_offset(), LeaveOE, SetRC); + BranchOrBacktrack(gt, on_no_match, cr0); + } if (mode_ == LATIN1) { Label success; @@ -262,6 +258,9 @@ void RegExpMacroAssemblerPPC::CheckNotBackReferenceIgnoreCase( // r4 - length of capture __ add(r3, r3, end_of_input_address()); __ add(r5, end_of_input_address(), current_input_offset()); + if (read_backward) { + __ sub(r5, r5, r4); // Offset by length when matching backwards. + } __ add(r4, r3, r4); // r3 - Address of start of capture. @@ -303,6 +302,13 @@ void RegExpMacroAssemblerPPC::CheckNotBackReferenceIgnoreCase( __ bind(&success); // Compute new value of character position after the matched part. __ sub(current_input_offset(), r5, end_of_input_address()); + if (read_backward) { + __ LoadP(r3, register_location(start_reg)); // Index of start of capture + __ LoadP(r4, + register_location(start_reg + 1)); // Index of end of capture + __ add(current_input_offset(), current_input_offset(), r3); + __ sub(current_input_offset(), current_input_offset(), r4); + } } else { DCHECK(mode_ == UC16); int argument_count = 4; @@ -326,6 +332,9 @@ void RegExpMacroAssemblerPPC::CheckNotBackReferenceIgnoreCase( __ mr(r25, r4); // Address of current input position. __ add(r4, current_input_offset(), end_of_input_address()); + if (read_backward) { + __ sub(r4, r4, r25); + } // Isolate. __ mov(r6, Operand(ExternalReference::isolate_address(isolate()))); @@ -339,8 +348,13 @@ void RegExpMacroAssemblerPPC::CheckNotBackReferenceIgnoreCase( // Check if function returned non-zero for success or zero for failure. __ cmpi(r3, Operand::Zero()); BranchOrBacktrack(eq, on_no_match); - // On success, increment position by length of capture. - __ add(current_input_offset(), current_input_offset(), r25); + + // On success, advance position by length of capture. + if (read_backward) { + __ sub(current_input_offset(), current_input_offset(), r25); + } else { + __ add(current_input_offset(), current_input_offset(), r25); + } } __ bind(&fallthrough); @@ -348,6 +362,7 @@ void RegExpMacroAssemblerPPC::CheckNotBackReferenceIgnoreCase( void RegExpMacroAssemblerPPC::CheckNotBackReference(int start_reg, + bool read_backward, Label* on_no_match) { Label fallthrough; Label success; @@ -356,16 +371,30 @@ void RegExpMacroAssemblerPPC::CheckNotBackReference(int start_reg, __ LoadP(r3, register_location(start_reg), r0); __ LoadP(r4, register_location(start_reg + 1), r0); __ sub(r4, r4, r3, LeaveOE, SetRC); // Length to check. - // Succeed on empty capture (including no capture). + + // At this point, the capture registers are either both set or both cleared. + // If the capture length is zero, then the capture is either empty or cleared. + // Fall through in both cases. __ beq(&fallthrough, cr0); // Check that there are enough characters left in the input. - __ add(r0, r4, current_input_offset(), LeaveOE, SetRC); - BranchOrBacktrack(gt, on_no_match, cr0); + if (read_backward) { + __ LoadP(r6, MemOperand(frame_pointer(), kStringStartMinusOne)); + __ add(r6, r6, r4); + __ cmp(current_input_offset(), r6); + BranchOrBacktrack(lt, on_no_match); + } else { + __ add(r0, r4, current_input_offset(), LeaveOE, SetRC); + BranchOrBacktrack(gt, on_no_match, cr0); + } - // Compute pointers to match string and capture string + // r3 - offset of start of capture + // r4 - length of capture __ add(r3, r3, end_of_input_address()); __ add(r5, end_of_input_address(), current_input_offset()); + if (read_backward) { + __ sub(r5, r5, r4); // Offset by length when matching backwards. + } __ add(r4, r4, r3); Label loop; @@ -389,6 +418,13 @@ void RegExpMacroAssemblerPPC::CheckNotBackReference(int start_reg, // Move current character position to position after match. __ sub(current_input_offset(), r5, end_of_input_address()); + if (read_backward) { + __ LoadP(r3, register_location(start_reg)); // Index of start of capture + __ LoadP(r4, register_location(start_reg + 1)); // Index of end of capture + __ add(current_input_offset(), current_input_offset(), r3); + __ sub(current_input_offset(), current_input_offset(), r4); + } + __ bind(&fallthrough); } @@ -639,7 +675,7 @@ Handle<HeapObject> RegExpMacroAssemblerPPC::GetCode(Handle<String> source) { __ addi(frame_pointer(), sp, Operand(8 * kPointerSize)); __ li(r3, Operand::Zero()); __ push(r3); // Make room for success counter and initialize it to 0. - __ push(r3); // Make room for "position - 1" constant (value is irrelevant) + __ push(r3); // Make room for "string start - 1" constant. // Check if we have space on the stack for registers. Label stack_limit_hit; Label stack_ok; @@ -688,7 +724,7 @@ Handle<HeapObject> RegExpMacroAssemblerPPC::GetCode(Handle<String> source) { } // Store this value in a local variable, for use when clearing // position registers. - __ StoreP(r3, MemOperand(frame_pointer(), kInputStartMinusOne)); + __ StoreP(r3, MemOperand(frame_pointer(), kStringStartMinusOne)); // Initialize code pointer register __ mov(code_pointer(), Operand(masm_->CodeObject())); @@ -797,7 +833,7 @@ Handle<HeapObject> RegExpMacroAssemblerPPC::GetCode(Handle<String> source) { __ StoreP(r5, MemOperand(frame_pointer(), kRegisterOutput)); // Prepare r3 to initialize registers with its value in the next run. - __ LoadP(r3, MemOperand(frame_pointer(), kInputStartMinusOne)); + __ LoadP(r3, MemOperand(frame_pointer(), kStringStartMinusOne)); if (global_with_zero_length_check()) { // Special case for zero-length matches. @@ -936,10 +972,13 @@ void RegExpMacroAssemblerPPC::LoadCurrentCharacter(int cp_offset, Label* on_end_of_input, bool check_bounds, int characters) { - DCHECK(cp_offset >= -1); // ^ and \b can look behind one character. DCHECK(cp_offset < (1 << 30)); // Be sane! (And ensure negation works) if (check_bounds) { - CheckPosition(cp_offset + characters - 1, on_end_of_input); + if (cp_offset >= 0) { + CheckPosition(cp_offset + characters - 1, on_end_of_input); + } else { + CheckPosition(cp_offset, on_end_of_input); + } } LoadCurrentCharacterUnchecked(cp_offset, characters); } @@ -1028,7 +1067,7 @@ void RegExpMacroAssemblerPPC::WriteCurrentPositionToRegister(int reg, void RegExpMacroAssemblerPPC::ClearRegisters(int reg_from, int reg_to) { DCHECK(reg_from <= reg_to); - __ LoadP(r3, MemOperand(frame_pointer(), kInputStartMinusOne)); + __ LoadP(r3, MemOperand(frame_pointer(), kStringStartMinusOne)); for (int reg = reg_from; reg <= reg_to; reg++) { __ StoreP(r3, register_location(reg), r0); } @@ -1132,8 +1171,15 @@ MemOperand RegExpMacroAssemblerPPC::register_location(int register_index) { void RegExpMacroAssemblerPPC::CheckPosition(int cp_offset, Label* on_outside_input) { - __ Cmpi(current_input_offset(), Operand(-cp_offset * char_size()), r0); - BranchOrBacktrack(ge, on_outside_input); + if (cp_offset >= 0) { + __ Cmpi(current_input_offset(), Operand(-cp_offset * char_size()), r0); + BranchOrBacktrack(ge, on_outside_input); + } else { + __ LoadP(r4, MemOperand(frame_pointer(), kStringStartMinusOne)); + __ addi(r3, current_input_offset(), Operand(cp_offset * char_size())); + __ cmp(r3, r4); + BranchOrBacktrack(le, on_outside_input); + } } diff --git a/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.h b/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.h index 04a0e5e416..4d1836fc71 100644 --- a/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.h +++ b/deps/v8/src/regexp/ppc/regexp-macro-assembler-ppc.h @@ -34,9 +34,11 @@ class RegExpMacroAssemblerPPC : public NativeRegExpMacroAssembler { // A "greedy loop" is a loop that is both greedy and with a simple // body. It has a particularly simple implementation. virtual void CheckGreedyLoop(Label* on_tos_equals_current_position); - virtual void CheckNotAtStart(Label* on_not_at_start); - virtual void CheckNotBackReference(int start_reg, Label* on_no_match); + virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start); + virtual void CheckNotBackReference(int start_reg, bool read_backward, + Label* on_no_match); virtual void CheckNotBackReferenceIgnoreCase(int start_reg, + bool read_backward, Label* on_no_match); virtual void CheckNotCharacter(unsigned c, Label* on_not_equal); virtual void CheckNotCharacterAfterAnd(unsigned c, unsigned mask, @@ -112,9 +114,9 @@ class RegExpMacroAssemblerPPC : public NativeRegExpMacroAssembler { // When adding local variables remember to push space for them in // the frame in GetCode. static const int kSuccessfulCaptures = kInputString - kPointerSize; - static const int kInputStartMinusOne = kSuccessfulCaptures - kPointerSize; + static const int kStringStartMinusOne = kSuccessfulCaptures - kPointerSize; // First register address. Following registers are below it on the stack. - static const int kRegisterZero = kInputStartMinusOne - kPointerSize; + static const int kRegisterZero = kStringStartMinusOne - kPointerSize; // Initial size of code buffer. static const size_t kRegExpCodeSize = 1024; diff --git a/deps/v8/src/regexp/regexp-ast.cc b/deps/v8/src/regexp/regexp-ast.cc new file mode 100644 index 0000000000..31c93b114f --- /dev/null +++ b/deps/v8/src/regexp/regexp-ast.cc @@ -0,0 +1,337 @@ +// Copyright 2016 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "src/ostreams.h" +#include "src/regexp/regexp-ast.h" + +namespace v8 { +namespace internal { + +#define MAKE_ACCEPT(Name) \ + void* RegExp##Name::Accept(RegExpVisitor* visitor, void* data) { \ + return visitor->Visit##Name(this, data); \ + } +FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ACCEPT) +#undef MAKE_ACCEPT + +#define MAKE_TYPE_CASE(Name) \ + RegExp##Name* RegExpTree::As##Name() { return NULL; } \ + bool RegExpTree::Is##Name() { return false; } +FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE) +#undef MAKE_TYPE_CASE + +#define MAKE_TYPE_CASE(Name) \ + RegExp##Name* RegExp##Name::As##Name() { return this; } \ + bool RegExp##Name::Is##Name() { return true; } +FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE) +#undef MAKE_TYPE_CASE + + +static Interval ListCaptureRegisters(ZoneList<RegExpTree*>* children) { + Interval result = Interval::Empty(); + for (int i = 0; i < children->length(); i++) + result = result.Union(children->at(i)->CaptureRegisters()); + return result; +} + + +Interval RegExpAlternative::CaptureRegisters() { + return ListCaptureRegisters(nodes()); +} + + +Interval RegExpDisjunction::CaptureRegisters() { + return ListCaptureRegisters(alternatives()); +} + + +Interval RegExpLookaround::CaptureRegisters() { + return body()->CaptureRegisters(); +} + + +Interval RegExpCapture::CaptureRegisters() { + Interval self(StartRegister(index()), EndRegister(index())); + return self.Union(body()->CaptureRegisters()); +} + + +Interval RegExpQuantifier::CaptureRegisters() { + return body()->CaptureRegisters(); +} + + +bool RegExpAssertion::IsAnchoredAtStart() { + return assertion_type() == RegExpAssertion::START_OF_INPUT; +} + + +bool RegExpAssertion::IsAnchoredAtEnd() { + return assertion_type() == RegExpAssertion::END_OF_INPUT; +} + + +bool RegExpAlternative::IsAnchoredAtStart() { + ZoneList<RegExpTree*>* nodes = this->nodes(); + for (int i = 0; i < nodes->length(); i++) { + RegExpTree* node = nodes->at(i); + if (node->IsAnchoredAtStart()) { + return true; + } + if (node->max_match() > 0) { + return false; + } + } + return false; +} + + +bool RegExpAlternative::IsAnchoredAtEnd() { + ZoneList<RegExpTree*>* nodes = this->nodes(); + for (int i = nodes->length() - 1; i >= 0; i--) { + RegExpTree* node = nodes->at(i); + if (node->IsAnchoredAtEnd()) { + return true; + } + if (node->max_match() > 0) { + return false; + } + } + return false; +} + + +bool RegExpDisjunction::IsAnchoredAtStart() { + ZoneList<RegExpTree*>* alternatives = this->alternatives(); + for (int i = 0; i < alternatives->length(); i++) { + if (!alternatives->at(i)->IsAnchoredAtStart()) return false; + } + return true; +} + + +bool RegExpDisjunction::IsAnchoredAtEnd() { + ZoneList<RegExpTree*>* alternatives = this->alternatives(); + for (int i = 0; i < alternatives->length(); i++) { + if (!alternatives->at(i)->IsAnchoredAtEnd()) return false; + } + return true; +} + + +bool RegExpLookaround::IsAnchoredAtStart() { + return is_positive() && type() == LOOKAHEAD && body()->IsAnchoredAtStart(); +} + + +bool RegExpCapture::IsAnchoredAtStart() { return body()->IsAnchoredAtStart(); } + + +bool RegExpCapture::IsAnchoredAtEnd() { return body()->IsAnchoredAtEnd(); } + + +// Convert regular expression trees to a simple sexp representation. +// This representation should be different from the input grammar +// in as many cases as possible, to make it more difficult for incorrect +// parses to look as correct ones which is likely if the input and +// output formats are alike. +class RegExpUnparser final : public RegExpVisitor { + public: + RegExpUnparser(std::ostream& os, Zone* zone) : os_(os), zone_(zone) {} + void VisitCharacterRange(CharacterRange that); +#define MAKE_CASE(Name) void* Visit##Name(RegExp##Name*, void* data) override; + FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE) +#undef MAKE_CASE + private: + std::ostream& os_; + Zone* zone_; +}; + + +void* RegExpUnparser::VisitDisjunction(RegExpDisjunction* that, void* data) { + os_ << "(|"; + for (int i = 0; i < that->alternatives()->length(); i++) { + os_ << " "; + that->alternatives()->at(i)->Accept(this, data); + } + os_ << ")"; + return NULL; +} + + +void* RegExpUnparser::VisitAlternative(RegExpAlternative* that, void* data) { + os_ << "(:"; + for (int i = 0; i < that->nodes()->length(); i++) { + os_ << " "; + that->nodes()->at(i)->Accept(this, data); + } + os_ << ")"; + return NULL; +} + + +void RegExpUnparser::VisitCharacterRange(CharacterRange that) { + os_ << AsUC16(that.from()); + if (!that.IsSingleton()) { + os_ << "-" << AsUC16(that.to()); + } +} + + +void* RegExpUnparser::VisitCharacterClass(RegExpCharacterClass* that, + void* data) { + if (that->is_negated()) os_ << "^"; + os_ << "["; + for (int i = 0; i < that->ranges(zone_)->length(); i++) { + if (i > 0) os_ << " "; + VisitCharacterRange(that->ranges(zone_)->at(i)); + } + os_ << "]"; + return NULL; +} + + +void* RegExpUnparser::VisitAssertion(RegExpAssertion* that, void* data) { + switch (that->assertion_type()) { + case RegExpAssertion::START_OF_INPUT: + os_ << "@^i"; + break; + case RegExpAssertion::END_OF_INPUT: + os_ << "@$i"; + break; + case RegExpAssertion::START_OF_LINE: + os_ << "@^l"; + break; + case RegExpAssertion::END_OF_LINE: + os_ << "@$l"; + break; + case RegExpAssertion::BOUNDARY: + os_ << "@b"; + break; + case RegExpAssertion::NON_BOUNDARY: + os_ << "@B"; + break; + } + return NULL; +} + + +void* RegExpUnparser::VisitAtom(RegExpAtom* that, void* data) { + os_ << "'"; + Vector<const uc16> chardata = that->data(); + for (int i = 0; i < chardata.length(); i++) { + os_ << AsUC16(chardata[i]); + } + os_ << "'"; + return NULL; +} + + +void* RegExpUnparser::VisitText(RegExpText* that, void* data) { + if (that->elements()->length() == 1) { + that->elements()->at(0).tree()->Accept(this, data); + } else { + os_ << "(!"; + for (int i = 0; i < that->elements()->length(); i++) { + os_ << " "; + that->elements()->at(i).tree()->Accept(this, data); + } + os_ << ")"; + } + return NULL; +} + + +void* RegExpUnparser::VisitQuantifier(RegExpQuantifier* that, void* data) { + os_ << "(# " << that->min() << " "; + if (that->max() == RegExpTree::kInfinity) { + os_ << "- "; + } else { + os_ << that->max() << " "; + } + os_ << (that->is_greedy() ? "g " : that->is_possessive() ? "p " : "n "); + that->body()->Accept(this, data); + os_ << ")"; + return NULL; +} + + +void* RegExpUnparser::VisitCapture(RegExpCapture* that, void* data) { + os_ << "(^ "; + that->body()->Accept(this, data); + os_ << ")"; + return NULL; +} + + +void* RegExpUnparser::VisitLookaround(RegExpLookaround* that, void* data) { + os_ << "("; + os_ << (that->type() == RegExpLookaround::LOOKAHEAD ? "->" : "<-"); + os_ << (that->is_positive() ? " + " : " - "); + that->body()->Accept(this, data); + os_ << ")"; + return NULL; +} + + +void* RegExpUnparser::VisitBackReference(RegExpBackReference* that, + void* data) { + os_ << "(<- " << that->index() << ")"; + return NULL; +} + + +void* RegExpUnparser::VisitEmpty(RegExpEmpty* that, void* data) { + os_ << '%'; + return NULL; +} + + +std::ostream& RegExpTree::Print(std::ostream& os, Zone* zone) { // NOLINT + RegExpUnparser unparser(os, zone); + Accept(&unparser, NULL); + return os; +} + + +RegExpDisjunction::RegExpDisjunction(ZoneList<RegExpTree*>* alternatives) + : alternatives_(alternatives) { + DCHECK(alternatives->length() > 1); + RegExpTree* first_alternative = alternatives->at(0); + min_match_ = first_alternative->min_match(); + max_match_ = first_alternative->max_match(); + for (int i = 1; i < alternatives->length(); i++) { + RegExpTree* alternative = alternatives->at(i); + min_match_ = Min(min_match_, alternative->min_match()); + max_match_ = Max(max_match_, alternative->max_match()); + } +} + + +static int IncreaseBy(int previous, int increase) { + if (RegExpTree::kInfinity - previous < increase) { + return RegExpTree::kInfinity; + } else { + return previous + increase; + } +} + + +RegExpAlternative::RegExpAlternative(ZoneList<RegExpTree*>* nodes) + : nodes_(nodes) { + DCHECK(nodes->length() > 1); + min_match_ = 0; + max_match_ = 0; + for (int i = 0; i < nodes->length(); i++) { + RegExpTree* node = nodes->at(i); + int node_min_match = node->min_match(); + min_match_ = IncreaseBy(min_match_, node_min_match); + int node_max_match = node->max_match(); + max_match_ = IncreaseBy(max_match_, node_max_match); + } +} + + +} // namespace internal +} // namespace v8 diff --git a/deps/v8/src/regexp/regexp-ast.h b/deps/v8/src/regexp/regexp-ast.h new file mode 100644 index 0000000000..f87778596a --- /dev/null +++ b/deps/v8/src/regexp/regexp-ast.h @@ -0,0 +1,496 @@ +// Copyright 2016 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_REGEXP_REGEXP_AST_H_ +#define V8_REGEXP_REGEXP_AST_H_ + +#include "src/utils.h" +#include "src/zone.h" + +namespace v8 { +namespace internal { + +#define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \ + VISIT(Disjunction) \ + VISIT(Alternative) \ + VISIT(Assertion) \ + VISIT(CharacterClass) \ + VISIT(Atom) \ + VISIT(Quantifier) \ + VISIT(Capture) \ + VISIT(Lookaround) \ + VISIT(BackReference) \ + VISIT(Empty) \ + VISIT(Text) + + +#define FORWARD_DECLARE(Name) class RegExp##Name; +FOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE) +#undef FORWARD_DECLARE + +class RegExpCompiler; +class RegExpNode; +class RegExpTree; + + +class RegExpVisitor BASE_EMBEDDED { + public: + virtual ~RegExpVisitor() {} +#define MAKE_CASE(Name) \ + virtual void* Visit##Name(RegExp##Name*, void* data) = 0; + FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE) +#undef MAKE_CASE +}; + + +// A simple closed interval. +class Interval { + public: + Interval() : from_(kNone), to_(kNone) {} + Interval(int from, int to) : from_(from), to_(to) {} + Interval Union(Interval that) { + if (that.from_ == kNone) + return *this; + else if (from_ == kNone) + return that; + else + return Interval(Min(from_, that.from_), Max(to_, that.to_)); + } + bool Contains(int value) { return (from_ <= value) && (value <= to_); } + bool is_empty() { return from_ == kNone; } + int from() const { return from_; } + int to() const { return to_; } + static Interval Empty() { return Interval(); } + static const int kNone = -1; + + private: + int from_; + int to_; +}; + + +// Represents code units in the range from from_ to to_, both ends are +// inclusive. +class CharacterRange { + public: + CharacterRange() : from_(0), to_(0) {} + // For compatibility with the CHECK_OK macro + CharacterRange(void* null) { DCHECK_NULL(null); } // NOLINT + CharacterRange(uc16 from, uc16 to) : from_(from), to_(to) {} + static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges, + Zone* zone); + static Vector<const int> GetWordBounds(); + static inline CharacterRange Singleton(uc16 value) { + return CharacterRange(value, value); + } + static inline CharacterRange Range(uc16 from, uc16 to) { + DCHECK(from <= to); + return CharacterRange(from, to); + } + static inline CharacterRange Everything() { + return CharacterRange(0, 0xFFFF); + } + bool Contains(uc16 i) { return from_ <= i && i <= to_; } + uc16 from() const { return from_; } + void set_from(uc16 value) { from_ = value; } + uc16 to() const { return to_; } + void set_to(uc16 value) { to_ = value; } + bool is_valid() { return from_ <= to_; } + bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; } + bool IsSingleton() { return (from_ == to_); } + void AddCaseEquivalents(Isolate* isolate, Zone* zone, + ZoneList<CharacterRange>* ranges, bool is_one_byte); + static void Split(ZoneList<CharacterRange>* base, Vector<const int> overlay, + ZoneList<CharacterRange>** included, + ZoneList<CharacterRange>** excluded, Zone* zone); + // Whether a range list is in canonical form: Ranges ordered by from value, + // and ranges non-overlapping and non-adjacent. + static bool IsCanonical(ZoneList<CharacterRange>* ranges); + // Convert range list to canonical form. The characters covered by the ranges + // will still be the same, but no character is in more than one range, and + // adjacent ranges are merged. The resulting list may be shorter than the + // original, but cannot be longer. + static void Canonicalize(ZoneList<CharacterRange>* ranges); + // Negate the contents of a character range in canonical form. + static void Negate(ZoneList<CharacterRange>* src, + ZoneList<CharacterRange>* dst, Zone* zone); + static const int kStartMarker = (1 << 24); + static const int kPayloadMask = (1 << 24) - 1; + + private: + uc16 from_; + uc16 to_; +}; + + +class CharacterSet final BASE_EMBEDDED { + public: + explicit CharacterSet(uc16 standard_set_type) + : ranges_(NULL), standard_set_type_(standard_set_type) {} + explicit CharacterSet(ZoneList<CharacterRange>* ranges) + : ranges_(ranges), standard_set_type_(0) {} + ZoneList<CharacterRange>* ranges(Zone* zone); + uc16 standard_set_type() { return standard_set_type_; } + void set_standard_set_type(uc16 special_set_type) { + standard_set_type_ = special_set_type; + } + bool is_standard() { return standard_set_type_ != 0; } + void Canonicalize(); + + private: + ZoneList<CharacterRange>* ranges_; + // If non-zero, the value represents a standard set (e.g., all whitespace + // characters) without having to expand the ranges. + uc16 standard_set_type_; +}; + + +class TextElement final BASE_EMBEDDED { + public: + enum TextType { ATOM, CHAR_CLASS }; + + static TextElement Atom(RegExpAtom* atom); + static TextElement CharClass(RegExpCharacterClass* char_class); + + int cp_offset() const { return cp_offset_; } + void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; } + int length() const; + + TextType text_type() const { return text_type_; } + + RegExpTree* tree() const { return tree_; } + + RegExpAtom* atom() const { + DCHECK(text_type() == ATOM); + return reinterpret_cast<RegExpAtom*>(tree()); + } + + RegExpCharacterClass* char_class() const { + DCHECK(text_type() == CHAR_CLASS); + return reinterpret_cast<RegExpCharacterClass*>(tree()); + } + + private: + TextElement(TextType text_type, RegExpTree* tree) + : cp_offset_(-1), text_type_(text_type), tree_(tree) {} + + int cp_offset_; + TextType text_type_; + RegExpTree* tree_; +}; + + +class RegExpTree : public ZoneObject { + public: + static const int kInfinity = kMaxInt; + virtual ~RegExpTree() {} + virtual void* Accept(RegExpVisitor* visitor, void* data) = 0; + virtual RegExpNode* ToNode(RegExpCompiler* compiler, + RegExpNode* on_success) = 0; + virtual bool IsTextElement() { return false; } + virtual bool IsAnchoredAtStart() { return false; } + virtual bool IsAnchoredAtEnd() { return false; } + virtual int min_match() = 0; + virtual int max_match() = 0; + // Returns the interval of registers used for captures within this + // expression. + virtual Interval CaptureRegisters() { return Interval::Empty(); } + virtual void AppendToText(RegExpText* text, Zone* zone); + std::ostream& Print(std::ostream& os, Zone* zone); // NOLINT +#define MAKE_ASTYPE(Name) \ + virtual RegExp##Name* As##Name(); \ + virtual bool Is##Name(); + FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ASTYPE) +#undef MAKE_ASTYPE +}; + + +class RegExpDisjunction final : public RegExpTree { + public: + explicit RegExpDisjunction(ZoneList<RegExpTree*>* alternatives); + void* Accept(RegExpVisitor* visitor, void* data) override; + RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; + RegExpDisjunction* AsDisjunction() override; + Interval CaptureRegisters() override; + bool IsDisjunction() override; + bool IsAnchoredAtStart() override; + bool IsAnchoredAtEnd() override; + int min_match() override { return min_match_; } + int max_match() override { return max_match_; } + ZoneList<RegExpTree*>* alternatives() { return alternatives_; } + + private: + bool SortConsecutiveAtoms(RegExpCompiler* compiler); + void RationalizeConsecutiveAtoms(RegExpCompiler* compiler); + void FixSingleCharacterDisjunctions(RegExpCompiler* compiler); + ZoneList<RegExpTree*>* alternatives_; + int min_match_; + int max_match_; +}; + + +class RegExpAlternative final : public RegExpTree { + public: + explicit RegExpAlternative(ZoneList<RegExpTree*>* nodes); + void* Accept(RegExpVisitor* visitor, void* data) override; + RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; + RegExpAlternative* AsAlternative() override; + Interval CaptureRegisters() override; + bool IsAlternative() override; + bool IsAnchoredAtStart() override; + bool IsAnchoredAtEnd() override; + int min_match() override { return min_match_; } + int max_match() override { return max_match_; } + ZoneList<RegExpTree*>* nodes() { return nodes_; } + + private: + ZoneList<RegExpTree*>* nodes_; + int min_match_; + int max_match_; +}; + + +class RegExpAssertion final : public RegExpTree { + public: + enum AssertionType { + START_OF_LINE, + START_OF_INPUT, + END_OF_LINE, + END_OF_INPUT, + BOUNDARY, + NON_BOUNDARY + }; + explicit RegExpAssertion(AssertionType type) : assertion_type_(type) {} + void* Accept(RegExpVisitor* visitor, void* data) override; + RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; + RegExpAssertion* AsAssertion() override; + bool IsAssertion() override; + bool IsAnchoredAtStart() override; + bool IsAnchoredAtEnd() override; + int min_match() override { return 0; } + int max_match() override { return 0; } + AssertionType assertion_type() { return assertion_type_; } + + private: + AssertionType assertion_type_; +}; + + +class RegExpCharacterClass final : public RegExpTree { + public: + RegExpCharacterClass(ZoneList<CharacterRange>* ranges, bool is_negated) + : set_(ranges), is_negated_(is_negated) {} + explicit RegExpCharacterClass(uc16 type) : set_(type), is_negated_(false) {} + void* Accept(RegExpVisitor* visitor, void* data) override; + RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; + RegExpCharacterClass* AsCharacterClass() override; + bool IsCharacterClass() override; + bool IsTextElement() override { return true; } + int min_match() override { return 1; } + int max_match() override { return 1; } + void AppendToText(RegExpText* text, Zone* zone) override; + CharacterSet character_set() { return set_; } + // TODO(lrn): Remove need for complex version if is_standard that + // recognizes a mangled standard set and just do { return set_.is_special(); } + bool is_standard(Zone* zone); + // Returns a value representing the standard character set if is_standard() + // returns true. + // Currently used values are: + // s : unicode whitespace + // S : unicode non-whitespace + // w : ASCII word character (digit, letter, underscore) + // W : non-ASCII word character + // d : ASCII digit + // D : non-ASCII digit + // . : non-unicode non-newline + // * : All characters + uc16 standard_type() { return set_.standard_set_type(); } + ZoneList<CharacterRange>* ranges(Zone* zone) { return set_.ranges(zone); } + bool is_negated() { return is_negated_; } + + private: + CharacterSet set_; + bool is_negated_; +}; + + +class RegExpAtom final : public RegExpTree { + public: + explicit RegExpAtom(Vector<const uc16> data) : data_(data) {} + void* Accept(RegExpVisitor* visitor, void* data) override; + RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; + RegExpAtom* AsAtom() override; + bool IsAtom() override; + bool IsTextElement() override { return true; } + int min_match() override { return data_.length(); } + int max_match() override { return data_.length(); } + void AppendToText(RegExpText* text, Zone* zone) override; + Vector<const uc16> data() { return data_; } + int length() { return data_.length(); } + + private: + Vector<const uc16> data_; +}; + + +class RegExpText final : public RegExpTree { + public: + explicit RegExpText(Zone* zone) : elements_(2, zone), length_(0) {} + void* Accept(RegExpVisitor* visitor, void* data) override; + RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; + RegExpText* AsText() override; + bool IsText() override; + bool IsTextElement() override { return true; } + int min_match() override { return length_; } + int max_match() override { return length_; } + void AppendToText(RegExpText* text, Zone* zone) override; + void AddElement(TextElement elm, Zone* zone) { + elements_.Add(elm, zone); + length_ += elm.length(); + } + ZoneList<TextElement>* elements() { return &elements_; } + + private: + ZoneList<TextElement> elements_; + int length_; +}; + + +class RegExpQuantifier final : public RegExpTree { + public: + enum QuantifierType { GREEDY, NON_GREEDY, POSSESSIVE }; + RegExpQuantifier(int min, int max, QuantifierType type, RegExpTree* body) + : body_(body), + min_(min), + max_(max), + min_match_(min * body->min_match()), + quantifier_type_(type) { + if (max > 0 && body->max_match() > kInfinity / max) { + max_match_ = kInfinity; + } else { + max_match_ = max * body->max_match(); + } + } + void* Accept(RegExpVisitor* visitor, void* data) override; + RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; + static RegExpNode* ToNode(int min, int max, bool is_greedy, RegExpTree* body, + RegExpCompiler* compiler, RegExpNode* on_success, + bool not_at_start = false); + RegExpQuantifier* AsQuantifier() override; + Interval CaptureRegisters() override; + bool IsQuantifier() override; + int min_match() override { return min_match_; } + int max_match() override { return max_match_; } + int min() { return min_; } + int max() { return max_; } + bool is_possessive() { return quantifier_type_ == POSSESSIVE; } + bool is_non_greedy() { return quantifier_type_ == NON_GREEDY; } + bool is_greedy() { return quantifier_type_ == GREEDY; } + RegExpTree* body() { return body_; } + + private: + RegExpTree* body_; + int min_; + int max_; + int min_match_; + int max_match_; + QuantifierType quantifier_type_; +}; + + +class RegExpCapture final : public RegExpTree { + public: + explicit RegExpCapture(int index) : body_(NULL), index_(index) {} + void* Accept(RegExpVisitor* visitor, void* data) override; + RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; + static RegExpNode* ToNode(RegExpTree* body, int index, + RegExpCompiler* compiler, RegExpNode* on_success); + RegExpCapture* AsCapture() override; + bool IsAnchoredAtStart() override; + bool IsAnchoredAtEnd() override; + Interval CaptureRegisters() override; + bool IsCapture() override; + int min_match() override { return body_->min_match(); } + int max_match() override { return body_->max_match(); } + RegExpTree* body() { return body_; } + void set_body(RegExpTree* body) { body_ = body; } + int index() { return index_; } + static int StartRegister(int index) { return index * 2; } + static int EndRegister(int index) { return index * 2 + 1; } + + private: + RegExpTree* body_; + int index_; +}; + + +class RegExpLookaround final : public RegExpTree { + public: + enum Type { LOOKAHEAD, LOOKBEHIND }; + + RegExpLookaround(RegExpTree* body, bool is_positive, int capture_count, + int capture_from, Type type) + : body_(body), + is_positive_(is_positive), + capture_count_(capture_count), + capture_from_(capture_from), + type_(type) {} + + void* Accept(RegExpVisitor* visitor, void* data) override; + RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; + RegExpLookaround* AsLookaround() override; + Interval CaptureRegisters() override; + bool IsLookaround() override; + bool IsAnchoredAtStart() override; + int min_match() override { return 0; } + int max_match() override { return 0; } + RegExpTree* body() { return body_; } + bool is_positive() { return is_positive_; } + int capture_count() { return capture_count_; } + int capture_from() { return capture_from_; } + Type type() { return type_; } + + private: + RegExpTree* body_; + bool is_positive_; + int capture_count_; + int capture_from_; + Type type_; +}; + + +class RegExpBackReference final : public RegExpTree { + public: + explicit RegExpBackReference(RegExpCapture* capture) : capture_(capture) {} + void* Accept(RegExpVisitor* visitor, void* data) override; + RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; + RegExpBackReference* AsBackReference() override; + bool IsBackReference() override; + int min_match() override { return 0; } + // The back reference may be recursive, e.g. /(\2)(\1)/. To avoid infinite + // recursion, we give up. Ignorance is bliss. + int max_match() override { return kInfinity; } + int index() { return capture_->index(); } + RegExpCapture* capture() { return capture_; } + + private: + RegExpCapture* capture_; +}; + + +class RegExpEmpty final : public RegExpTree { + public: + RegExpEmpty() {} + void* Accept(RegExpVisitor* visitor, void* data) override; + RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; + RegExpEmpty* AsEmpty() override; + bool IsEmpty() override; + int min_match() override { return 0; } + int max_match() override { return 0; } +}; + +} // namespace internal +} // namespace v8 + +#endif // V8_REGEXP_REGEXP_AST_H_ diff --git a/deps/v8/src/regexp/regexp-macro-assembler-irregexp-inl.h b/deps/v8/src/regexp/regexp-macro-assembler-irregexp-inl.h index 6f176cd12c..4d0b1bc0a7 100644 --- a/deps/v8/src/regexp/regexp-macro-assembler-irregexp-inl.h +++ b/deps/v8/src/regexp/regexp-macro-assembler-irregexp-inl.h @@ -5,7 +5,7 @@ #ifndef V8_REGEXP_REGEXP_MACRO_ASSEMBLER_IRREGEXP_INL_H_ #define V8_REGEXP_REGEXP_MACRO_ASSEMBLER_IRREGEXP_INL_H_ -#include "src/ast.h" +#include "src/ast/ast.h" #include "src/regexp/bytecodes-irregexp.h" namespace v8 { diff --git a/deps/v8/src/regexp/regexp-macro-assembler-irregexp.cc b/deps/v8/src/regexp/regexp-macro-assembler-irregexp.cc index ca567c9bda..751ee441c8 100644 --- a/deps/v8/src/regexp/regexp-macro-assembler-irregexp.cc +++ b/deps/v8/src/regexp/regexp-macro-assembler-irregexp.cc @@ -4,7 +4,7 @@ #include "src/regexp/regexp-macro-assembler-irregexp.h" -#include "src/ast.h" +#include "src/ast/ast.h" #include "src/regexp/bytecodes-irregexp.h" #include "src/regexp/regexp-macro-assembler.h" #include "src/regexp/regexp-macro-assembler-irregexp-inl.h" @@ -273,8 +273,9 @@ void RegExpMacroAssemblerIrregexp::CheckAtStart(Label* on_at_start) { } -void RegExpMacroAssemblerIrregexp::CheckNotAtStart(Label* on_not_at_start) { - Emit(BC_CHECK_NOT_AT_START, 0); +void RegExpMacroAssemblerIrregexp::CheckNotAtStart(int cp_offset, + Label* on_not_at_start) { + Emit(BC_CHECK_NOT_AT_START, cp_offset); EmitOrLink(on_not_at_start); } @@ -370,20 +371,23 @@ void RegExpMacroAssemblerIrregexp::CheckBitInTable( void RegExpMacroAssemblerIrregexp::CheckNotBackReference(int start_reg, + bool read_backward, Label* on_not_equal) { DCHECK(start_reg >= 0); DCHECK(start_reg <= kMaxRegister); - Emit(BC_CHECK_NOT_BACK_REF, start_reg); + Emit(read_backward ? BC_CHECK_NOT_BACK_REF_BACKWARD : BC_CHECK_NOT_BACK_REF, + start_reg); EmitOrLink(on_not_equal); } void RegExpMacroAssemblerIrregexp::CheckNotBackReferenceIgnoreCase( - int start_reg, - Label* on_not_equal) { + int start_reg, bool read_backward, Label* on_not_equal) { DCHECK(start_reg >= 0); DCHECK(start_reg <= kMaxRegister); - Emit(BC_CHECK_NOT_BACK_REF_NO_CASE, start_reg); + Emit(read_backward ? BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD + : BC_CHECK_NOT_BACK_REF_NO_CASE, + start_reg); EmitOrLink(on_not_equal); } diff --git a/deps/v8/src/regexp/regexp-macro-assembler-irregexp.h b/deps/v8/src/regexp/regexp-macro-assembler-irregexp.h index bbfe5203d9..f1ace63a74 100644 --- a/deps/v8/src/regexp/regexp-macro-assembler-irregexp.h +++ b/deps/v8/src/regexp/regexp-macro-assembler-irregexp.h @@ -66,7 +66,7 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler { virtual void CheckCharacterLT(uc16 limit, Label* on_less); virtual void CheckGreedyLoop(Label* on_tos_equals_current_position); virtual void CheckAtStart(Label* on_at_start); - virtual void CheckNotAtStart(Label* on_not_at_start); + virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start); virtual void CheckNotCharacter(unsigned c, Label* on_not_equal); virtual void CheckNotCharacterAfterAnd(unsigned c, unsigned mask, @@ -82,8 +82,10 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler { uc16 to, Label* on_not_in_range); virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set); - virtual void CheckNotBackReference(int start_reg, Label* on_no_match); + virtual void CheckNotBackReference(int start_reg, bool read_backward, + Label* on_no_match); virtual void CheckNotBackReferenceIgnoreCase(int start_reg, + bool read_backward, Label* on_no_match); virtual void IfRegisterLT(int register_index, int comparand, Label* if_lt); virtual void IfRegisterGE(int register_index, int comparand, Label* if_ge); diff --git a/deps/v8/src/regexp/regexp-macro-assembler-tracer.cc b/deps/v8/src/regexp/regexp-macro-assembler-tracer.cc index 2abe55588e..5301ead69b 100644 --- a/deps/v8/src/regexp/regexp-macro-assembler-tracer.cc +++ b/deps/v8/src/regexp/regexp-macro-assembler-tracer.cc @@ -4,7 +4,7 @@ #include "src/regexp/regexp-macro-assembler-tracer.h" -#include "src/ast.h" +#include "src/ast/ast.h" namespace v8 { namespace internal { @@ -13,9 +13,9 @@ RegExpMacroAssemblerTracer::RegExpMacroAssemblerTracer( Isolate* isolate, RegExpMacroAssembler* assembler) : RegExpMacroAssembler(isolate, assembler->zone()), assembler_(assembler) { unsigned int type = assembler->Implementation(); - DCHECK(type < 6); - const char* impl_names[] = {"IA32", "ARM", "ARM64", - "MIPS", "X64", "X87", "Bytecode"}; + DCHECK(type < 8); + const char* impl_names[] = {"IA32", "ARM", "ARM64", "MIPS", + "PPC", "X64", "X87", "Bytecode"}; PrintF("RegExpMacroAssembler%s();\n", impl_names[type]); } @@ -241,9 +241,11 @@ void RegExpMacroAssemblerTracer::CheckAtStart(Label* on_at_start) { } -void RegExpMacroAssemblerTracer::CheckNotAtStart(Label* on_not_at_start) { - PrintF(" CheckNotAtStart(label[%08x]);\n", LabelToInt(on_not_at_start)); - assembler_->CheckNotAtStart(on_not_at_start); +void RegExpMacroAssemblerTracer::CheckNotAtStart(int cp_offset, + Label* on_not_at_start) { + PrintF(" CheckNotAtStart(cp_offset=%d, label[%08x]);\n", cp_offset, + LabelToInt(on_not_at_start)); + assembler_->CheckNotAtStart(cp_offset, on_not_at_start); } @@ -349,19 +351,29 @@ void RegExpMacroAssemblerTracer::CheckBitInTable( void RegExpMacroAssemblerTracer::CheckNotBackReference(int start_reg, + bool read_backward, Label* on_no_match) { - PrintF(" CheckNotBackReference(register=%d, label[%08x]);\n", start_reg, - LabelToInt(on_no_match)); - assembler_->CheckNotBackReference(start_reg, on_no_match); + PrintF(" CheckNotBackReference(register=%d, %s, label[%08x]);\n", start_reg, + read_backward ? "backward" : "forward", LabelToInt(on_no_match)); + assembler_->CheckNotBackReference(start_reg, read_backward, on_no_match); } void RegExpMacroAssemblerTracer::CheckNotBackReferenceIgnoreCase( - int start_reg, - Label* on_no_match) { - PrintF(" CheckNotBackReferenceIgnoreCase(register=%d, label[%08x]);\n", - start_reg, LabelToInt(on_no_match)); - assembler_->CheckNotBackReferenceIgnoreCase(start_reg, on_no_match); + int start_reg, bool read_backward, Label* on_no_match) { + PrintF(" CheckNotBackReferenceIgnoreCase(register=%d, %s, label[%08x]);\n", + start_reg, read_backward ? "backward" : "forward", + LabelToInt(on_no_match)); + assembler_->CheckNotBackReferenceIgnoreCase(start_reg, read_backward, + on_no_match); +} + + +void RegExpMacroAssemblerTracer::CheckPosition(int cp_offset, + Label* on_outside_input) { + PrintF(" CheckPosition(cp_offset=%d, label[%08x]);\n", cp_offset, + LabelToInt(on_outside_input)); + assembler_->CheckPosition(cp_offset, on_outside_input); } diff --git a/deps/v8/src/regexp/regexp-macro-assembler-tracer.h b/deps/v8/src/regexp/regexp-macro-assembler-tracer.h index f9364195fa..77377aac31 100644 --- a/deps/v8/src/regexp/regexp-macro-assembler-tracer.h +++ b/deps/v8/src/regexp/regexp-macro-assembler-tracer.h @@ -30,9 +30,11 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler { virtual void CheckCharacterGT(uc16 limit, Label* on_greater); virtual void CheckCharacterLT(uc16 limit, Label* on_less); virtual void CheckGreedyLoop(Label* on_tos_equals_current_position); - virtual void CheckNotAtStart(Label* on_not_at_start); - virtual void CheckNotBackReference(int start_reg, Label* on_no_match); + virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start); + virtual void CheckNotBackReference(int start_reg, bool read_backward, + Label* on_no_match); virtual void CheckNotBackReferenceIgnoreCase(int start_reg, + bool read_backward, Label* on_no_match); virtual void CheckNotCharacter(unsigned c, Label* on_not_equal); virtual void CheckNotCharacterAfterAnd(unsigned c, @@ -49,6 +51,7 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler { uc16 to, Label* on_not_in_range); virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set); + virtual void CheckPosition(int cp_offset, Label* on_outside_input); virtual bool CheckSpecialCharacterClass(uc16 type, Label* on_no_match); virtual void Fail(); diff --git a/deps/v8/src/regexp/regexp-macro-assembler.cc b/deps/v8/src/regexp/regexp-macro-assembler.cc index 9916d5f32f..caf8b51fe5 100644 --- a/deps/v8/src/regexp/regexp-macro-assembler.cc +++ b/deps/v8/src/regexp/regexp-macro-assembler.cc @@ -5,7 +5,6 @@ #include "src/regexp/regexp-macro-assembler.h" #include "src/assembler.h" -#include "src/ast.h" #include "src/isolate-inl.h" #include "src/regexp/regexp-stack.h" #include "src/simulator.h" @@ -189,16 +188,9 @@ NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute( Address stack_base = stack_scope.stack()->stack_base(); int direct_call = 0; - int result = CALL_GENERATED_REGEXP_CODE(code->entry(), - input, - start_offset, - input_start, - input_end, - output, - output_size, - stack_base, - direct_call, - isolate); + int result = CALL_GENERATED_REGEXP_CODE( + isolate, code->entry(), input, start_offset, input_start, input_end, + output, output_size, stack_base, direct_call, isolate); DCHECK(result >= RETRY); if (result == EXCEPTION && !isolate->has_pending_exception()) { diff --git a/deps/v8/src/regexp/regexp-macro-assembler.h b/deps/v8/src/regexp/regexp-macro-assembler.h index ea97d5b29b..20599334cd 100644 --- a/deps/v8/src/regexp/regexp-macro-assembler.h +++ b/deps/v8/src/regexp/regexp-macro-assembler.h @@ -5,7 +5,8 @@ #ifndef V8_REGEXP_REGEXP_MACRO_ASSEMBLER_H_ #define V8_REGEXP_REGEXP_MACRO_ASSEMBLER_H_ -#include "src/ast.h" +#include "src/assembler.h" +#include "src/regexp/regexp-ast.h" namespace v8 { namespace internal { @@ -71,9 +72,11 @@ class RegExpMacroAssembler { virtual void CheckCharacterGT(uc16 limit, Label* on_greater) = 0; virtual void CheckCharacterLT(uc16 limit, Label* on_less) = 0; virtual void CheckGreedyLoop(Label* on_tos_equals_current_position) = 0; - virtual void CheckNotAtStart(Label* on_not_at_start) = 0; - virtual void CheckNotBackReference(int start_reg, Label* on_no_match) = 0; + virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start) = 0; + virtual void CheckNotBackReference(int start_reg, bool read_backward, + Label* on_no_match) = 0; virtual void CheckNotBackReferenceIgnoreCase(int start_reg, + bool read_backward, Label* on_no_match) = 0; // Check the current character for a match with a literal character. If we // fail to match then goto the on_failure label. End of input always @@ -102,17 +105,12 @@ class RegExpMacroAssembler { // Checks whether the given offset from the current position is before // the end of the string. May overwrite the current character. - virtual void CheckPosition(int cp_offset, Label* on_outside_input) { - LoadCurrentCharacter(cp_offset, on_outside_input, true); - } + virtual void CheckPosition(int cp_offset, Label* on_outside_input) = 0; // Check whether a standard/default character class matches the current // character. Returns false if the type of special character class does // not have custom support. // May clobber the current loaded character. - virtual bool CheckSpecialCharacterClass(uc16 type, - Label* on_no_match) { - return false; - } + virtual bool CheckSpecialCharacterClass(uc16 type, Label* on_no_match) = 0; virtual void Fail() = 0; virtual Handle<HeapObject> GetCode(Handle<String> source) = 0; virtual void GoTo(Label* label) = 0; diff --git a/deps/v8/src/regexp/regexp-parser.cc b/deps/v8/src/regexp/regexp-parser.cc new file mode 100644 index 0000000000..fa8900342c --- /dev/null +++ b/deps/v8/src/regexp/regexp-parser.cc @@ -0,0 +1,1180 @@ +// Copyright 2016 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "src/regexp/regexp-parser.h" + +#include "src/char-predicates-inl.h" +#include "src/factory.h" +#include "src/isolate.h" +#include "src/objects-inl.h" +#include "src/regexp/jsregexp.h" +#include "src/utils.h" + +namespace v8 { +namespace internal { + +RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, + bool multiline, bool unicode, Isolate* isolate, + Zone* zone) + : isolate_(isolate), + zone_(zone), + error_(error), + captures_(NULL), + in_(in), + current_(kEndMarker), + next_pos_(0), + captures_started_(0), + capture_count_(0), + has_more_(true), + multiline_(multiline), + unicode_(unicode), + simple_(false), + contains_anchor_(false), + is_scanned_for_captures_(false), + failed_(false) { + Advance(); +} + + +uc32 RegExpParser::Next() { + if (has_next()) { + return in()->Get(next_pos_); + } else { + return kEndMarker; + } +} + + +void RegExpParser::Advance() { + if (next_pos_ < in()->length()) { + StackLimitCheck check(isolate()); + if (check.HasOverflowed()) { + ReportError(CStrVector(Isolate::kStackOverflowMessage)); + } else if (zone()->excess_allocation()) { + ReportError(CStrVector("Regular expression too large")); + } else { + current_ = in()->Get(next_pos_); + next_pos_++; + // Read the whole surrogate pair in case of unicode flag, if possible. + if (unicode_ && next_pos_ < in()->length() && + unibrow::Utf16::IsLeadSurrogate(static_cast<uc16>(current_))) { + uc16 trail = in()->Get(next_pos_); + if (unibrow::Utf16::IsTrailSurrogate(trail)) { + current_ = unibrow::Utf16::CombineSurrogatePair( + static_cast<uc16>(current_), trail); + next_pos_++; + } + } + } + } else { + current_ = kEndMarker; + // Advance so that position() points to 1-after-the-last-character. This is + // important so that Reset() to this position works correctly. + next_pos_ = in()->length() + 1; + has_more_ = false; + } +} + + +void RegExpParser::Reset(int pos) { + next_pos_ = pos; + has_more_ = (pos < in()->length()); + Advance(); +} + + +void RegExpParser::Advance(int dist) { + next_pos_ += dist - 1; + Advance(); +} + + +bool RegExpParser::simple() { return simple_; } + + +bool RegExpParser::IsSyntaxCharacter(uc32 c) { + return c == '^' || c == '$' || c == '\\' || c == '.' || c == '*' || + c == '+' || c == '?' || c == '(' || c == ')' || c == '[' || c == ']' || + c == '{' || c == '}' || c == '|'; +} + + +RegExpTree* RegExpParser::ReportError(Vector<const char> message) { + failed_ = true; + *error_ = isolate()->factory()->NewStringFromAscii(message).ToHandleChecked(); + // Zip to the end to make sure the no more input is read. + current_ = kEndMarker; + next_pos_ = in()->length(); + return NULL; +} + + +#define CHECK_FAILED /**/); \ + if (failed_) return NULL; \ + ((void)0 + + +// Pattern :: +// Disjunction +RegExpTree* RegExpParser::ParsePattern() { + RegExpTree* result = ParseDisjunction(CHECK_FAILED); + DCHECK(!has_more()); + // If the result of parsing is a literal string atom, and it has the + // same length as the input, then the atom is identical to the input. + if (result->IsAtom() && result->AsAtom()->length() == in()->length()) { + simple_ = true; + } + return result; +} + + +// Disjunction :: +// Alternative +// Alternative | Disjunction +// Alternative :: +// [empty] +// Term Alternative +// Term :: +// Assertion +// Atom +// Atom Quantifier +RegExpTree* RegExpParser::ParseDisjunction() { + // Used to store current state while parsing subexpressions. + RegExpParserState initial_state(NULL, INITIAL, RegExpLookaround::LOOKAHEAD, 0, + zone()); + RegExpParserState* state = &initial_state; + // Cache the builder in a local variable for quick access. + RegExpBuilder* builder = initial_state.builder(); + while (true) { + switch (current()) { + case kEndMarker: + if (state->IsSubexpression()) { + // Inside a parenthesized group when hitting end of input. + ReportError(CStrVector("Unterminated group") CHECK_FAILED); + } + DCHECK_EQ(INITIAL, state->group_type()); + // Parsing completed successfully. + return builder->ToRegExp(); + case ')': { + if (!state->IsSubexpression()) { + ReportError(CStrVector("Unmatched ')'") CHECK_FAILED); + } + DCHECK_NE(INITIAL, state->group_type()); + + Advance(); + // End disjunction parsing and convert builder content to new single + // regexp atom. + RegExpTree* body = builder->ToRegExp(); + + int end_capture_index = captures_started(); + + int capture_index = state->capture_index(); + SubexpressionType group_type = state->group_type(); + + // Build result of subexpression. + if (group_type == CAPTURE) { + RegExpCapture* capture = GetCapture(capture_index); + capture->set_body(body); + body = capture; + } else if (group_type != GROUPING) { + DCHECK(group_type == POSITIVE_LOOKAROUND || + group_type == NEGATIVE_LOOKAROUND); + bool is_positive = (group_type == POSITIVE_LOOKAROUND); + body = new (zone()) RegExpLookaround( + body, is_positive, end_capture_index - capture_index, + capture_index, state->lookaround_type()); + } + + // Restore previous state. + state = state->previous_state(); + builder = state->builder(); + + builder->AddAtom(body); + // For compatability with JSC and ES3, we allow quantifiers after + // lookaheads, and break in all cases. + break; + } + case '|': { + Advance(); + builder->NewAlternative(); + continue; + } + case '*': + case '+': + case '?': + return ReportError(CStrVector("Nothing to repeat")); + case '^': { + Advance(); + if (multiline_) { + builder->AddAssertion( + new (zone()) RegExpAssertion(RegExpAssertion::START_OF_LINE)); + } else { + builder->AddAssertion( + new (zone()) RegExpAssertion(RegExpAssertion::START_OF_INPUT)); + set_contains_anchor(); + } + continue; + } + case '$': { + Advance(); + RegExpAssertion::AssertionType assertion_type = + multiline_ ? RegExpAssertion::END_OF_LINE + : RegExpAssertion::END_OF_INPUT; + builder->AddAssertion(new (zone()) RegExpAssertion(assertion_type)); + continue; + } + case '.': { + Advance(); + // everything except \x0a, \x0d, \u2028 and \u2029 + ZoneList<CharacterRange>* ranges = + new (zone()) ZoneList<CharacterRange>(2, zone()); + CharacterRange::AddClassEscape('.', ranges, zone()); + RegExpTree* atom = new (zone()) RegExpCharacterClass(ranges, false); + builder->AddAtom(atom); + break; + } + case '(': { + SubexpressionType subexpr_type = CAPTURE; + RegExpLookaround::Type lookaround_type = state->lookaround_type(); + Advance(); + if (current() == '?') { + switch (Next()) { + case ':': + subexpr_type = GROUPING; + break; + case '=': + lookaround_type = RegExpLookaround::LOOKAHEAD; + subexpr_type = POSITIVE_LOOKAROUND; + break; + case '!': + lookaround_type = RegExpLookaround::LOOKAHEAD; + subexpr_type = NEGATIVE_LOOKAROUND; + break; + case '<': + if (FLAG_harmony_regexp_lookbehind) { + Advance(); + lookaround_type = RegExpLookaround::LOOKBEHIND; + if (Next() == '=') { + subexpr_type = POSITIVE_LOOKAROUND; + break; + } else if (Next() == '!') { + subexpr_type = NEGATIVE_LOOKAROUND; + break; + } + } + // Fall through. + default: + ReportError(CStrVector("Invalid group") CHECK_FAILED); + break; + } + Advance(2); + } else { + if (captures_started_ >= kMaxCaptures) { + ReportError(CStrVector("Too many captures") CHECK_FAILED); + } + captures_started_++; + } + // Store current state and begin new disjunction parsing. + state = new (zone()) RegExpParserState( + state, subexpr_type, lookaround_type, captures_started_, zone()); + builder = state->builder(); + continue; + } + case '[': { + RegExpTree* atom = ParseCharacterClass(CHECK_FAILED); + builder->AddAtom(atom); + break; + } + // Atom :: + // \ AtomEscape + case '\\': + switch (Next()) { + case kEndMarker: + return ReportError(CStrVector("\\ at end of pattern")); + case 'b': + Advance(2); + builder->AddAssertion( + new (zone()) RegExpAssertion(RegExpAssertion::BOUNDARY)); + continue; + case 'B': + Advance(2); + builder->AddAssertion( + new (zone()) RegExpAssertion(RegExpAssertion::NON_BOUNDARY)); + continue; + // AtomEscape :: + // CharacterClassEscape + // + // CharacterClassEscape :: one of + // d D s S w W + case 'd': + case 'D': + case 's': + case 'S': + case 'w': + case 'W': { + uc32 c = Next(); + Advance(2); + ZoneList<CharacterRange>* ranges = + new (zone()) ZoneList<CharacterRange>(2, zone()); + CharacterRange::AddClassEscape(c, ranges, zone()); + RegExpTree* atom = new (zone()) RegExpCharacterClass(ranges, false); + builder->AddAtom(atom); + break; + } + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + int index = 0; + if (ParseBackReferenceIndex(&index)) { + if (state->IsInsideCaptureGroup(index)) { + // The back reference is inside the capture group it refers to. + // Nothing can possibly have been captured yet, so we use empty + // instead. This ensures that, when checking a back reference, + // the capture registers of the referenced capture are either + // both set or both cleared. + builder->AddEmpty(); + } else { + RegExpCapture* capture = GetCapture(index); + RegExpTree* atom = new (zone()) RegExpBackReference(capture); + builder->AddAtom(atom); + } + break; + } + uc32 first_digit = Next(); + if (first_digit == '8' || first_digit == '9') { + // If the 'u' flag is present, only syntax characters can be + // escaped, + // no other identity escapes are allowed. If the 'u' flag is not + // present, all identity escapes are allowed. + if (!unicode_) { + builder->AddCharacter(first_digit); + Advance(2); + } else { + return ReportError(CStrVector("Invalid escape")); + } + break; + } + } + // FALLTHROUGH + case '0': { + Advance(); + uc32 octal = ParseOctalLiteral(); + builder->AddCharacter(octal); + break; + } + // ControlEscape :: one of + // f n r t v + case 'f': + Advance(2); + builder->AddCharacter('\f'); + break; + case 'n': + Advance(2); + builder->AddCharacter('\n'); + break; + case 'r': + Advance(2); + builder->AddCharacter('\r'); + break; + case 't': + Advance(2); + builder->AddCharacter('\t'); + break; + case 'v': + Advance(2); + builder->AddCharacter('\v'); + break; + case 'c': { + Advance(); + uc32 controlLetter = Next(); + // Special case if it is an ASCII letter. + // Convert lower case letters to uppercase. + uc32 letter = controlLetter & ~('a' ^ 'A'); + if (letter < 'A' || 'Z' < letter) { + // controlLetter is not in range 'A'-'Z' or 'a'-'z'. + // This is outside the specification. We match JSC in + // reading the backslash as a literal character instead + // of as starting an escape. + builder->AddCharacter('\\'); + } else { + Advance(2); + builder->AddCharacter(controlLetter & 0x1f); + } + break; + } + case 'x': { + Advance(2); + uc32 value; + if (ParseHexEscape(2, &value)) { + builder->AddCharacter(value); + } else if (!unicode_) { + builder->AddCharacter('x'); + } else { + // If the 'u' flag is present, invalid escapes are not treated as + // identity escapes. + return ReportError(CStrVector("Invalid escape")); + } + break; + } + case 'u': { + Advance(2); + uc32 value; + if (ParseUnicodeEscape(&value)) { + builder->AddUnicodeCharacter(value); + } else if (!unicode_) { + builder->AddCharacter('u'); + } else { + // If the 'u' flag is present, invalid escapes are not treated as + // identity escapes. + return ReportError(CStrVector("Invalid unicode escape")); + } + break; + } + default: + Advance(); + // If the 'u' flag is present, only syntax characters can be + // escaped, no + // other identity escapes are allowed. If the 'u' flag is not + // present, + // all identity escapes are allowed. + if (!unicode_ || IsSyntaxCharacter(current())) { + builder->AddCharacter(current()); + Advance(); + } else { + return ReportError(CStrVector("Invalid escape")); + } + break; + } + break; + case '{': { + int dummy; + if (ParseIntervalQuantifier(&dummy, &dummy)) { + ReportError(CStrVector("Nothing to repeat") CHECK_FAILED); + } + // fallthrough + } + default: + builder->AddUnicodeCharacter(current()); + Advance(); + break; + } // end switch(current()) + + int min; + int max; + switch (current()) { + // QuantifierPrefix :: + // * + // + + // ? + // { + case '*': + min = 0; + max = RegExpTree::kInfinity; + Advance(); + break; + case '+': + min = 1; + max = RegExpTree::kInfinity; + Advance(); + break; + case '?': + min = 0; + max = 1; + Advance(); + break; + case '{': + if (ParseIntervalQuantifier(&min, &max)) { + if (max < min) { + ReportError(CStrVector("numbers out of order in {} quantifier.") + CHECK_FAILED); + } + break; + } else { + continue; + } + default: + continue; + } + RegExpQuantifier::QuantifierType quantifier_type = RegExpQuantifier::GREEDY; + if (current() == '?') { + quantifier_type = RegExpQuantifier::NON_GREEDY; + Advance(); + } else if (FLAG_regexp_possessive_quantifier && current() == '+') { + // FLAG_regexp_possessive_quantifier is a debug-only flag. + quantifier_type = RegExpQuantifier::POSSESSIVE; + Advance(); + } + builder->AddQuantifierToAtom(min, max, quantifier_type); + } +} + + +#ifdef DEBUG +// Currently only used in an DCHECK. +static bool IsSpecialClassEscape(uc32 c) { + switch (c) { + case 'd': + case 'D': + case 's': + case 'S': + case 'w': + case 'W': + return true; + default: + return false; + } +} +#endif + + +// In order to know whether an escape is a backreference or not we have to scan +// the entire regexp and find the number of capturing parentheses. However we +// don't want to scan the regexp twice unless it is necessary. This mini-parser +// is called when needed. It can see the difference between capturing and +// noncapturing parentheses and can skip character classes and backslash-escaped +// characters. +void RegExpParser::ScanForCaptures() { + // Start with captures started previous to current position + int capture_count = captures_started(); + // Add count of captures after this position. + int n; + while ((n = current()) != kEndMarker) { + Advance(); + switch (n) { + case '\\': + Advance(); + break; + case '[': { + int c; + while ((c = current()) != kEndMarker) { + Advance(); + if (c == '\\') { + Advance(); + } else { + if (c == ']') break; + } + } + break; + } + case '(': + if (current() != '?') capture_count++; + break; + } + } + capture_count_ = capture_count; + is_scanned_for_captures_ = true; +} + + +bool RegExpParser::ParseBackReferenceIndex(int* index_out) { + DCHECK_EQ('\\', current()); + DCHECK('1' <= Next() && Next() <= '9'); + // Try to parse a decimal literal that is no greater than the total number + // of left capturing parentheses in the input. + int start = position(); + int value = Next() - '0'; + Advance(2); + while (true) { + uc32 c = current(); + if (IsDecimalDigit(c)) { + value = 10 * value + (c - '0'); + if (value > kMaxCaptures) { + Reset(start); + return false; + } + Advance(); + } else { + break; + } + } + if (value > captures_started()) { + if (!is_scanned_for_captures_) { + int saved_position = position(); + ScanForCaptures(); + Reset(saved_position); + } + if (value > capture_count_) { + Reset(start); + return false; + } + } + *index_out = value; + return true; +} + + +RegExpCapture* RegExpParser::GetCapture(int index) { + // The index for the capture groups are one-based. Its index in the list is + // zero-based. + int know_captures = + is_scanned_for_captures_ ? capture_count_ : captures_started_; + DCHECK(index <= know_captures); + if (captures_ == NULL) { + captures_ = new (zone()) ZoneList<RegExpCapture*>(know_captures, zone()); + } + while (captures_->length() < know_captures) { + captures_->Add(new (zone()) RegExpCapture(captures_->length() + 1), zone()); + } + return captures_->at(index - 1); +} + + +bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(int index) { + for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) { + if (s->group_type() != CAPTURE) continue; + // Return true if we found the matching capture index. + if (index == s->capture_index()) return true; + // Abort if index is larger than what has been parsed up till this state. + if (index > s->capture_index()) return false; + } + return false; +} + + +// QuantifierPrefix :: +// { DecimalDigits } +// { DecimalDigits , } +// { DecimalDigits , DecimalDigits } +// +// Returns true if parsing succeeds, and set the min_out and max_out +// values. Values are truncated to RegExpTree::kInfinity if they overflow. +bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) { + DCHECK_EQ(current(), '{'); + int start = position(); + Advance(); + int min = 0; + if (!IsDecimalDigit(current())) { + Reset(start); + return false; + } + while (IsDecimalDigit(current())) { + int next = current() - '0'; + if (min > (RegExpTree::kInfinity - next) / 10) { + // Overflow. Skip past remaining decimal digits and return -1. + do { + Advance(); + } while (IsDecimalDigit(current())); + min = RegExpTree::kInfinity; + break; + } + min = 10 * min + next; + Advance(); + } + int max = 0; + if (current() == '}') { + max = min; + Advance(); + } else if (current() == ',') { + Advance(); + if (current() == '}') { + max = RegExpTree::kInfinity; + Advance(); + } else { + while (IsDecimalDigit(current())) { + int next = current() - '0'; + if (max > (RegExpTree::kInfinity - next) / 10) { + do { + Advance(); + } while (IsDecimalDigit(current())); + max = RegExpTree::kInfinity; + break; + } + max = 10 * max + next; + Advance(); + } + if (current() != '}') { + Reset(start); + return false; + } + Advance(); + } + } else { + Reset(start); + return false; + } + *min_out = min; + *max_out = max; + return true; +} + + +uc32 RegExpParser::ParseOctalLiteral() { + DCHECK(('0' <= current() && current() <= '7') || current() == kEndMarker); + // For compatibility with some other browsers (not all), we parse + // up to three octal digits with a value below 256. + uc32 value = current() - '0'; + Advance(); + if ('0' <= current() && current() <= '7') { + value = value * 8 + current() - '0'; + Advance(); + if (value < 32 && '0' <= current() && current() <= '7') { + value = value * 8 + current() - '0'; + Advance(); + } + } + return value; +} + + +bool RegExpParser::ParseHexEscape(int length, uc32* value) { + int start = position(); + uc32 val = 0; + for (int i = 0; i < length; ++i) { + uc32 c = current(); + int d = HexValue(c); + if (d < 0) { + Reset(start); + return false; + } + val = val * 16 + d; + Advance(); + } + *value = val; + return true; +} + + +bool RegExpParser::ParseUnicodeEscape(uc32* value) { + // Accept both \uxxxx and \u{xxxxxx} (if harmony unicode escapes are + // allowed). In the latter case, the number of hex digits between { } is + // arbitrary. \ and u have already been read. + if (current() == '{' && unicode_) { + int start = position(); + Advance(); + if (ParseUnlimitedLengthHexNumber(0x10ffff, value)) { + if (current() == '}') { + Advance(); + return true; + } + } + Reset(start); + return false; + } + // \u but no {, or \u{...} escapes not allowed. + return ParseHexEscape(4, value); +} + + +bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) { + uc32 x = 0; + int d = HexValue(current()); + if (d < 0) { + return false; + } + while (d >= 0) { + x = x * 16 + d; + if (x > max_value) { + return false; + } + Advance(); + d = HexValue(current()); + } + *value = x; + return true; +} + + +uc32 RegExpParser::ParseClassCharacterEscape() { + DCHECK(current() == '\\'); + DCHECK(has_next() && !IsSpecialClassEscape(Next())); + Advance(); + switch (current()) { + case 'b': + Advance(); + return '\b'; + // ControlEscape :: one of + // f n r t v + case 'f': + Advance(); + return '\f'; + case 'n': + Advance(); + return '\n'; + case 'r': + Advance(); + return '\r'; + case 't': + Advance(); + return '\t'; + case 'v': + Advance(); + return '\v'; + case 'c': { + uc32 controlLetter = Next(); + uc32 letter = controlLetter & ~('A' ^ 'a'); + // For compatibility with JSC, inside a character class + // we also accept digits and underscore as control characters. + if ((controlLetter >= '0' && controlLetter <= '9') || + controlLetter == '_' || (letter >= 'A' && letter <= 'Z')) { + Advance(2); + // Control letters mapped to ASCII control characters in the range + // 0x00-0x1f. + return controlLetter & 0x1f; + } + // We match JSC in reading the backslash as a literal + // character instead of as starting an escape. + return '\\'; + } + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + // For compatibility, we interpret a decimal escape that isn't + // a back reference (and therefore either \0 or not valid according + // to the specification) as a 1..3 digit octal character code. + return ParseOctalLiteral(); + case 'x': { + Advance(); + uc32 value; + if (ParseHexEscape(2, &value)) { + return value; + } + if (!unicode_) { + // If \x is not followed by a two-digit hexadecimal, treat it + // as an identity escape. + return 'x'; + } + // If the 'u' flag is present, invalid escapes are not treated as + // identity escapes. + ReportError(CStrVector("Invalid escape")); + return 0; + } + case 'u': { + Advance(); + uc32 value; + if (ParseUnicodeEscape(&value)) { + return value; + } + if (!unicode_) { + return 'u'; + } + // If the 'u' flag is present, invalid escapes are not treated as + // identity escapes. + ReportError(CStrVector("Invalid unicode escape")); + return 0; + } + default: { + uc32 result = current(); + // If the 'u' flag is present, only syntax characters can be escaped, no + // other identity escapes are allowed. If the 'u' flag is not present, all + // identity escapes are allowed. + if (!unicode_ || IsSyntaxCharacter(result)) { + Advance(); + return result; + } + ReportError(CStrVector("Invalid escape")); + return 0; + } + } + return 0; +} + + +CharacterRange RegExpParser::ParseClassAtom(uc16* char_class) { + DCHECK_EQ(0, *char_class); + uc32 first = current(); + if (first == '\\') { + switch (Next()) { + case 'w': + case 'W': + case 'd': + case 'D': + case 's': + case 'S': { + *char_class = Next(); + Advance(2); + return CharacterRange::Singleton(0); // Return dummy value. + } + case kEndMarker: + return ReportError(CStrVector("\\ at end of pattern")); + default: + uc32 c = ParseClassCharacterEscape(CHECK_FAILED); + return CharacterRange::Singleton(c); + } + } else { + Advance(); + return CharacterRange::Singleton(first); + } +} + + +static const uc16 kNoCharClass = 0; + +// Adds range or pre-defined character class to character ranges. +// If char_class is not kInvalidClass, it's interpreted as a class +// escape (i.e., 's' means whitespace, from '\s'). +static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges, + uc16 char_class, CharacterRange range, + Zone* zone) { + if (char_class != kNoCharClass) { + CharacterRange::AddClassEscape(char_class, ranges, zone); + } else { + ranges->Add(range, zone); + } +} + + +RegExpTree* RegExpParser::ParseCharacterClass() { + static const char* kUnterminated = "Unterminated character class"; + static const char* kRangeOutOfOrder = "Range out of order in character class"; + + DCHECK_EQ(current(), '['); + Advance(); + bool is_negated = false; + if (current() == '^') { + is_negated = true; + Advance(); + } + ZoneList<CharacterRange>* ranges = + new (zone()) ZoneList<CharacterRange>(2, zone()); + while (has_more() && current() != ']') { + uc16 char_class = kNoCharClass; + CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED); + if (current() == '-') { + Advance(); + if (current() == kEndMarker) { + // If we reach the end we break out of the loop and let the + // following code report an error. + break; + } else if (current() == ']') { + AddRangeOrEscape(ranges, char_class, first, zone()); + ranges->Add(CharacterRange::Singleton('-'), zone()); + break; + } + uc16 char_class_2 = kNoCharClass; + CharacterRange next = ParseClassAtom(&char_class_2 CHECK_FAILED); + if (char_class != kNoCharClass || char_class_2 != kNoCharClass) { + // Either end is an escaped character class. Treat the '-' verbatim. + AddRangeOrEscape(ranges, char_class, first, zone()); + ranges->Add(CharacterRange::Singleton('-'), zone()); + AddRangeOrEscape(ranges, char_class_2, next, zone()); + continue; + } + if (first.from() > next.to()) { + return ReportError(CStrVector(kRangeOutOfOrder) CHECK_FAILED); + } + ranges->Add(CharacterRange::Range(first.from(), next.to()), zone()); + } else { + AddRangeOrEscape(ranges, char_class, first, zone()); + } + } + if (!has_more()) { + return ReportError(CStrVector(kUnterminated) CHECK_FAILED); + } + Advance(); + if (ranges->length() == 0) { + ranges->Add(CharacterRange::Everything(), zone()); + is_negated = !is_negated; + } + return new (zone()) RegExpCharacterClass(ranges, is_negated); +} + + +#undef CHECK_FAILED + + +bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone, + FlatStringReader* input, bool multiline, + bool unicode, RegExpCompileData* result) { + DCHECK(result != NULL); + RegExpParser parser(input, &result->error, multiline, unicode, isolate, zone); + RegExpTree* tree = parser.ParsePattern(); + if (parser.failed()) { + DCHECK(tree == NULL); + DCHECK(!result->error.is_null()); + } else { + DCHECK(tree != NULL); + DCHECK(result->error.is_null()); + if (FLAG_trace_regexp_parser) { + OFStream os(stdout); + tree->Print(os, zone); + os << "\n"; + } + result->tree = tree; + int capture_count = parser.captures_started(); + result->simple = tree->IsAtom() && parser.simple() && capture_count == 0; + result->contains_anchor = parser.contains_anchor(); + result->capture_count = capture_count; + } + return !parser.failed(); +} + + +RegExpBuilder::RegExpBuilder(Zone* zone) + : zone_(zone), + pending_empty_(false), + characters_(NULL), + terms_(), + alternatives_() +#ifdef DEBUG + , + last_added_(ADD_NONE) +#endif +{ +} + + +void RegExpBuilder::FlushCharacters() { + pending_empty_ = false; + if (characters_ != NULL) { + RegExpTree* atom = new (zone()) RegExpAtom(characters_->ToConstVector()); + characters_ = NULL; + text_.Add(atom, zone()); + LAST(ADD_ATOM); + } +} + + +void RegExpBuilder::FlushText() { + FlushCharacters(); + int num_text = text_.length(); + if (num_text == 0) { + return; + } else if (num_text == 1) { + terms_.Add(text_.last(), zone()); + } else { + RegExpText* text = new (zone()) RegExpText(zone()); + for (int i = 0; i < num_text; i++) text_.Get(i)->AppendToText(text, zone()); + terms_.Add(text, zone()); + } + text_.Clear(); +} + + +void RegExpBuilder::AddCharacter(uc16 c) { + pending_empty_ = false; + if (characters_ == NULL) { + characters_ = new (zone()) ZoneList<uc16>(4, zone()); + } + characters_->Add(c, zone()); + LAST(ADD_CHAR); +} + + +void RegExpBuilder::AddUnicodeCharacter(uc32 c) { + if (c > unibrow::Utf16::kMaxNonSurrogateCharCode) { + ZoneList<uc16> surrogate_pair(2, zone()); + surrogate_pair.Add(unibrow::Utf16::LeadSurrogate(c), zone()); + surrogate_pair.Add(unibrow::Utf16::TrailSurrogate(c), zone()); + RegExpAtom* atom = new (zone()) RegExpAtom(surrogate_pair.ToConstVector()); + AddAtom(atom); + } else { + AddCharacter(static_cast<uc16>(c)); + } +} + + +void RegExpBuilder::AddEmpty() { pending_empty_ = true; } + + +void RegExpBuilder::AddAtom(RegExpTree* term) { + if (term->IsEmpty()) { + AddEmpty(); + return; + } + if (term->IsTextElement()) { + FlushCharacters(); + text_.Add(term, zone()); + } else { + FlushText(); + terms_.Add(term, zone()); + } + LAST(ADD_ATOM); +} + + +void RegExpBuilder::AddAssertion(RegExpTree* assert) { + FlushText(); + terms_.Add(assert, zone()); + LAST(ADD_ASSERT); +} + + +void RegExpBuilder::NewAlternative() { FlushTerms(); } + + +void RegExpBuilder::FlushTerms() { + FlushText(); + int num_terms = terms_.length(); + RegExpTree* alternative; + if (num_terms == 0) { + alternative = new (zone()) RegExpEmpty(); + } else if (num_terms == 1) { + alternative = terms_.last(); + } else { + alternative = new (zone()) RegExpAlternative(terms_.GetList(zone())); + } + alternatives_.Add(alternative, zone()); + terms_.Clear(); + LAST(ADD_NONE); +} + + +RegExpTree* RegExpBuilder::ToRegExp() { + FlushTerms(); + int num_alternatives = alternatives_.length(); + if (num_alternatives == 0) return new (zone()) RegExpEmpty(); + if (num_alternatives == 1) return alternatives_.last(); + return new (zone()) RegExpDisjunction(alternatives_.GetList(zone())); +} + + +void RegExpBuilder::AddQuantifierToAtom( + int min, int max, RegExpQuantifier::QuantifierType quantifier_type) { + if (pending_empty_) { + pending_empty_ = false; + return; + } + RegExpTree* atom; + if (characters_ != NULL) { + DCHECK(last_added_ == ADD_CHAR); + // Last atom was character. + Vector<const uc16> char_vector = characters_->ToConstVector(); + int num_chars = char_vector.length(); + if (num_chars > 1) { + Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1); + text_.Add(new (zone()) RegExpAtom(prefix), zone()); + char_vector = char_vector.SubVector(num_chars - 1, num_chars); + } + characters_ = NULL; + atom = new (zone()) RegExpAtom(char_vector); + FlushText(); + } else if (text_.length() > 0) { + DCHECK(last_added_ == ADD_ATOM); + atom = text_.RemoveLast(); + FlushText(); + } else if (terms_.length() > 0) { + DCHECK(last_added_ == ADD_ATOM); + atom = terms_.RemoveLast(); + if (atom->max_match() == 0) { + // Guaranteed to only match an empty string. + LAST(ADD_TERM); + if (min == 0) { + return; + } + terms_.Add(atom, zone()); + return; + } + } else { + // Only call immediately after adding an atom or character! + UNREACHABLE(); + return; + } + terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), + zone()); + LAST(ADD_TERM); +} + +} // namespace internal +} // namespace v8 diff --git a/deps/v8/src/regexp/regexp-parser.h b/deps/v8/src/regexp/regexp-parser.h new file mode 100644 index 0000000000..af9b765fba --- /dev/null +++ b/deps/v8/src/regexp/regexp-parser.h @@ -0,0 +1,277 @@ +// Copyright 2016 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_REGEXP_REGEXP_PARSER_H_ +#define V8_REGEXP_REGEXP_PARSER_H_ + +#include "src/objects.h" +#include "src/regexp/regexp-ast.h" +#include "src/zone.h" + +namespace v8 { +namespace internal { + +struct RegExpCompileData; + + +// A BufferedZoneList is an automatically growing list, just like (and backed +// by) a ZoneList, that is optimized for the case of adding and removing +// a single element. The last element added is stored outside the backing list, +// and if no more than one element is ever added, the ZoneList isn't even +// allocated. +// Elements must not be NULL pointers. +template <typename T, int initial_size> +class BufferedZoneList { + public: + BufferedZoneList() : list_(NULL), last_(NULL) {} + + // Adds element at end of list. This element is buffered and can + // be read using last() or removed using RemoveLast until a new Add or until + // RemoveLast or GetList has been called. + void Add(T* value, Zone* zone) { + if (last_ != NULL) { + if (list_ == NULL) { + list_ = new (zone) ZoneList<T*>(initial_size, zone); + } + list_->Add(last_, zone); + } + last_ = value; + } + + T* last() { + DCHECK(last_ != NULL); + return last_; + } + + T* RemoveLast() { + DCHECK(last_ != NULL); + T* result = last_; + if ((list_ != NULL) && (list_->length() > 0)) + last_ = list_->RemoveLast(); + else + last_ = NULL; + return result; + } + + T* Get(int i) { + DCHECK((0 <= i) && (i < length())); + if (list_ == NULL) { + DCHECK_EQ(0, i); + return last_; + } else { + if (i == list_->length()) { + DCHECK(last_ != NULL); + return last_; + } else { + return list_->at(i); + } + } + } + + void Clear() { + list_ = NULL; + last_ = NULL; + } + + int length() { + int length = (list_ == NULL) ? 0 : list_->length(); + return length + ((last_ == NULL) ? 0 : 1); + } + + ZoneList<T*>* GetList(Zone* zone) { + if (list_ == NULL) { + list_ = new (zone) ZoneList<T*>(initial_size, zone); + } + if (last_ != NULL) { + list_->Add(last_, zone); + last_ = NULL; + } + return list_; + } + + private: + ZoneList<T*>* list_; + T* last_; +}; + + +// Accumulates RegExp atoms and assertions into lists of terms and alternatives. +class RegExpBuilder : public ZoneObject { + public: + explicit RegExpBuilder(Zone* zone); + void AddCharacter(uc16 character); + void AddUnicodeCharacter(uc32 character); + // "Adds" an empty expression. Does nothing except consume a + // following quantifier + void AddEmpty(); + void AddAtom(RegExpTree* tree); + void AddAssertion(RegExpTree* tree); + void NewAlternative(); // '|' + void AddQuantifierToAtom(int min, int max, + RegExpQuantifier::QuantifierType type); + RegExpTree* ToRegExp(); + + private: + void FlushCharacters(); + void FlushText(); + void FlushTerms(); + Zone* zone() const { return zone_; } + + Zone* zone_; + bool pending_empty_; + ZoneList<uc16>* characters_; + BufferedZoneList<RegExpTree, 2> terms_; + BufferedZoneList<RegExpTree, 2> text_; + BufferedZoneList<RegExpTree, 2> alternatives_; +#ifdef DEBUG + enum { ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM } last_added_; +#define LAST(x) last_added_ = x; +#else +#define LAST(x) +#endif +}; + + +class RegExpParser BASE_EMBEDDED { + public: + RegExpParser(FlatStringReader* in, Handle<String>* error, bool multiline_mode, + bool unicode, Isolate* isolate, Zone* zone); + + static bool ParseRegExp(Isolate* isolate, Zone* zone, FlatStringReader* input, + bool multiline, bool unicode, + RegExpCompileData* result); + + RegExpTree* ParsePattern(); + RegExpTree* ParseDisjunction(); + RegExpTree* ParseGroup(); + RegExpTree* ParseCharacterClass(); + + // Parses a {...,...} quantifier and stores the range in the given + // out parameters. + bool ParseIntervalQuantifier(int* min_out, int* max_out); + + // Parses and returns a single escaped character. The character + // must not be 'b' or 'B' since they are usually handle specially. + uc32 ParseClassCharacterEscape(); + + // Checks whether the following is a length-digit hexadecimal number, + // and sets the value if it is. + bool ParseHexEscape(int length, uc32* value); + bool ParseUnicodeEscape(uc32* value); + bool ParseUnlimitedLengthHexNumber(int max_value, uc32* value); + + uc32 ParseOctalLiteral(); + + // Tries to parse the input as a back reference. If successful it + // stores the result in the output parameter and returns true. If + // it fails it will push back the characters read so the same characters + // can be reparsed. + bool ParseBackReferenceIndex(int* index_out); + + CharacterRange ParseClassAtom(uc16* char_class); + RegExpTree* ReportError(Vector<const char> message); + void Advance(); + void Advance(int dist); + void Reset(int pos); + + // Reports whether the pattern might be used as a literal search string. + // Only use if the result of the parse is a single atom node. + bool simple(); + bool contains_anchor() { return contains_anchor_; } + void set_contains_anchor() { contains_anchor_ = true; } + int captures_started() { return captures_started_; } + int position() { return next_pos_ - 1; } + bool failed() { return failed_; } + + static bool IsSyntaxCharacter(uc32 c); + + static const int kMaxCaptures = 1 << 16; + static const uc32 kEndMarker = (1 << 21); + + private: + enum SubexpressionType { + INITIAL, + CAPTURE, // All positive values represent captures. + POSITIVE_LOOKAROUND, + NEGATIVE_LOOKAROUND, + GROUPING + }; + + class RegExpParserState : public ZoneObject { + public: + RegExpParserState(RegExpParserState* previous_state, + SubexpressionType group_type, + RegExpLookaround::Type lookaround_type, + int disjunction_capture_index, Zone* zone) + : previous_state_(previous_state), + builder_(new (zone) RegExpBuilder(zone)), + group_type_(group_type), + lookaround_type_(lookaround_type), + disjunction_capture_index_(disjunction_capture_index) {} + // Parser state of containing expression, if any. + RegExpParserState* previous_state() { return previous_state_; } + bool IsSubexpression() { return previous_state_ != NULL; } + // RegExpBuilder building this regexp's AST. + RegExpBuilder* builder() { return builder_; } + // Type of regexp being parsed (parenthesized group or entire regexp). + SubexpressionType group_type() { return group_type_; } + // Lookahead or Lookbehind. + RegExpLookaround::Type lookaround_type() { return lookaround_type_; } + // Index in captures array of first capture in this sub-expression, if any. + // Also the capture index of this sub-expression itself, if group_type + // is CAPTURE. + int capture_index() { return disjunction_capture_index_; } + + // Check whether the parser is inside a capture group with the given index. + bool IsInsideCaptureGroup(int index); + + private: + // Linked list implementation of stack of states. + RegExpParserState* previous_state_; + // Builder for the stored disjunction. + RegExpBuilder* builder_; + // Stored disjunction type (capture, look-ahead or grouping), if any. + SubexpressionType group_type_; + // Stored read direction. + RegExpLookaround::Type lookaround_type_; + // Stored disjunction's capture index (if any). + int disjunction_capture_index_; + }; + + // Return the 1-indexed RegExpCapture object, allocate if necessary. + RegExpCapture* GetCapture(int index); + + Isolate* isolate() { return isolate_; } + Zone* zone() const { return zone_; } + + uc32 current() { return current_; } + bool has_more() { return has_more_; } + bool has_next() { return next_pos_ < in()->length(); } + uc32 Next(); + FlatStringReader* in() { return in_; } + void ScanForCaptures(); + + Isolate* isolate_; + Zone* zone_; + Handle<String>* error_; + ZoneList<RegExpCapture*>* captures_; + FlatStringReader* in_; + uc32 current_; + int next_pos_; + int captures_started_; + // The capture count is only valid after we have scanned for captures. + int capture_count_; + bool has_more_; + bool multiline_; + bool unicode_; + bool simple_; + bool contains_anchor_; + bool is_scanned_for_captures_; + bool failed_; +}; + +} // namespace internal +} // namespace v8 + +#endif // V8_REGEXP_REGEXP_PARSER_H_ diff --git a/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc b/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc index 969edc1b3b..286f159cc8 100644 --- a/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc +++ b/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.cc @@ -64,7 +64,8 @@ namespace internal { * - backup of callee save registers (rbx, possibly rsi and rdi). * - success counter (only useful for global regexp to count matches) * - Offset of location before start of input (effectively character - * position -1). Used to initialize capture registers to a non-position. + * string start - 1). Used to initialize capture registers to a + * non-position. * - At start of string (if 1, we are starting at the start of the * string, otherwise 0) * - register 0 rbp[-n] (Only positions must be stored in the first @@ -94,7 +95,7 @@ RegExpMacroAssemblerX64::RegExpMacroAssemblerX64(Isolate* isolate, Zone* zone, Mode mode, int registers_to_save) : NativeRegExpMacroAssembler(isolate, zone), - masm_(isolate, NULL, kRegExpCodeSize), + masm_(isolate, NULL, kRegExpCodeSize, CodeObjectRequired::kYes), no_root_array_scope_(&masm_), code_relative_fixup_positions_(4, zone), mode_(mode), @@ -171,25 +172,16 @@ void RegExpMacroAssemblerX64::CheckCharacterGT(uc16 limit, Label* on_greater) { void RegExpMacroAssemblerX64::CheckAtStart(Label* on_at_start) { - Label not_at_start; - // Did we start the match at the start of the string at all? - __ cmpl(Operand(rbp, kStartIndex), Immediate(0)); - BranchOrBacktrack(not_equal, ¬_at_start); - // If we did, are we still at the start of the input? - __ leap(rax, Operand(rsi, rdi, times_1, 0)); - __ cmpp(rax, Operand(rbp, kInputStart)); + __ leap(rax, Operand(rdi, -char_size())); + __ cmpp(rax, Operand(rbp, kStringStartMinusOne)); BranchOrBacktrack(equal, on_at_start); - __ bind(¬_at_start); } -void RegExpMacroAssemblerX64::CheckNotAtStart(Label* on_not_at_start) { - // Did we start the match at the start of the string at all? - __ cmpl(Operand(rbp, kStartIndex), Immediate(0)); - BranchOrBacktrack(not_equal, on_not_at_start); - // If we did, are we still at the start of the input? - __ leap(rax, Operand(rsi, rdi, times_1, 0)); - __ cmpp(rax, Operand(rbp, kInputStart)); +void RegExpMacroAssemblerX64::CheckNotAtStart(int cp_offset, + Label* on_not_at_start) { + __ leap(rax, Operand(rdi, -char_size() + cp_offset * char_size())); + __ cmpp(rax, Operand(rbp, kStringStartMinusOne)); BranchOrBacktrack(not_equal, on_not_at_start); } @@ -211,8 +203,7 @@ void RegExpMacroAssemblerX64::CheckGreedyLoop(Label* on_equal) { void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase( - int start_reg, - Label* on_no_match) { + int start_reg, bool read_backward, Label* on_no_match) { Label fallthrough; ReadPositionFromRegister(rdx, start_reg); // Offset of start of capture ReadPositionFromRegister(rbx, start_reg + 1); // Offset of end of capture @@ -222,23 +213,25 @@ void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase( // rdx = Start offset of capture. // rbx = Length of capture - // If length is negative, this code will fail (it's a symptom of a partial or - // illegal capture where start of capture after end of capture). - // This must not happen (no back-reference can reference a capture that wasn't - // closed before in the reg-exp, and we must not generate code that can cause - // this condition). - - // If length is zero, either the capture is empty or it is nonparticipating. - // In either case succeed immediately. + // At this point, the capture registers are either both set or both cleared. + // If the capture length is zero, then the capture is either empty or cleared. + // Fall through in both cases. __ j(equal, &fallthrough); // ----------------------- // rdx - Start of capture // rbx - length of capture // Check that there are sufficient characters left in the input. - __ movl(rax, rdi); - __ addl(rax, rbx); - BranchOrBacktrack(greater, on_no_match); + if (read_backward) { + __ movl(rax, Operand(rbp, kStringStartMinusOne)); + __ addl(rax, rbx); + __ cmpl(rdi, rax); + BranchOrBacktrack(less_equal, on_no_match); + } else { + __ movl(rax, rdi); + __ addl(rax, rbx); + BranchOrBacktrack(greater, on_no_match); + } if (mode_ == LATIN1) { Label loop_increment; @@ -248,6 +241,9 @@ void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase( __ leap(r9, Operand(rsi, rdx, times_1, 0)); __ leap(r11, Operand(rsi, rdi, times_1, 0)); + if (read_backward) { + __ subp(r11, rbx); // Offset by length when matching backwards. + } __ addp(rbx, r9); // End of capture // --------------------- // r11 - current input character address @@ -290,6 +286,11 @@ void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase( // Compute new value of character position after the matched part. __ movp(rdi, r11); __ subq(rdi, rsi); + if (read_backward) { + // Subtract match length if we matched backward. + __ addq(rdi, register_location(start_reg)); + __ subq(rdi, register_location(start_reg + 1)); + } } else { DCHECK(mode_ == UC16); // Save important/volatile registers before calling C function. @@ -313,6 +314,9 @@ void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase( __ leap(rcx, Operand(rsi, rdx, times_1, 0)); // Set byte_offset2. __ leap(rdx, Operand(rsi, rdi, times_1, 0)); + if (read_backward) { + __ subq(rdx, rbx); + } // Set byte_length. __ movp(r8, rbx); // Isolate. @@ -324,6 +328,9 @@ void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase( __ leap(rdi, Operand(rsi, rdx, times_1, 0)); // Set byte_offset2. __ movp(rsi, rax); + if (read_backward) { + __ subq(rsi, rbx); + } // Set byte_length. __ movp(rdx, rbx); // Isolate. @@ -349,17 +356,21 @@ void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase( // Check if function returned non-zero for success or zero for failure. __ testp(rax, rax); BranchOrBacktrack(zero, on_no_match); - // On success, increment position by length of capture. + // On success, advance position by length of capture. // Requires that rbx is callee save (true for both Win64 and AMD64 ABIs). - __ addq(rdi, rbx); + if (read_backward) { + __ subq(rdi, rbx); + } else { + __ addq(rdi, rbx); + } } __ bind(&fallthrough); } -void RegExpMacroAssemblerX64::CheckNotBackReference( - int start_reg, - Label* on_no_match) { +void RegExpMacroAssemblerX64::CheckNotBackReference(int start_reg, + bool read_backward, + Label* on_no_match) { Label fallthrough; // Find length of back-referenced capture. @@ -367,25 +378,31 @@ void RegExpMacroAssemblerX64::CheckNotBackReference( ReadPositionFromRegister(rax, start_reg + 1); // Offset of end of capture __ subp(rax, rdx); // Length to check. - // Fail on partial or illegal capture (start of capture after end of capture). - // This must not happen (no back-reference can reference a capture that wasn't - // closed before in the reg-exp). - __ Check(greater_equal, kInvalidCaptureReferenced); - - // Succeed on empty capture (including non-participating capture) + // At this point, the capture registers are either both set or both cleared. + // If the capture length is zero, then the capture is either empty or cleared. + // Fall through in both cases. __ j(equal, &fallthrough); // ----------------------- // rdx - Start of capture // rax - length of capture - // Check that there are sufficient characters left in the input. - __ movl(rbx, rdi); - __ addl(rbx, rax); - BranchOrBacktrack(greater, on_no_match); + if (read_backward) { + __ movl(rbx, Operand(rbp, kStringStartMinusOne)); + __ addl(rbx, rax); + __ cmpl(rdi, rbx); + BranchOrBacktrack(less_equal, on_no_match); + } else { + __ movl(rbx, rdi); + __ addl(rbx, rax); + BranchOrBacktrack(greater, on_no_match); + } // Compute pointers to match string and capture string __ leap(rbx, Operand(rsi, rdi, times_1, 0)); // Start of match. + if (read_backward) { + __ subq(rbx, rax); // Offset by length when matching backwards. + } __ addp(rdx, rsi); // Start of capture. __ leap(r9, Operand(rdx, rax, times_1, 0)); // End of capture @@ -416,6 +433,11 @@ void RegExpMacroAssemblerX64::CheckNotBackReference( // Set current character position to position after match. __ movp(rdi, rbx); __ subq(rdi, rsi); + if (read_backward) { + // Subtract match length if we matched backward. + __ addq(rdi, register_location(start_reg)); + __ subq(rdi, register_location(start_reg + 1)); + } __ bind(&fallthrough); } @@ -682,7 +704,7 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) { #endif __ Push(Immediate(0)); // Number of successful matches in a global regexp. - __ Push(Immediate(0)); // Make room for "input start - 1" constant. + __ Push(Immediate(0)); // Make room for "string start - 1" constant. // Check if we have space on the stack for registers. Label stack_limit_hit; @@ -732,7 +754,7 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) { } // Store this value in a local variable, for use when clearing // position registers. - __ movp(Operand(rbp, kInputStartMinusOne), rax); + __ movp(Operand(rbp, kStringStartMinusOne), rax); #if V8_OS_WIN // Ensure that we have written to each stack page, in order. Skipping a page @@ -835,7 +857,7 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) { Immediate(num_saved_registers_ * kIntSize)); // Prepare rax to initialize registers with its value in the next run. - __ movp(rax, Operand(rbp, kInputStartMinusOne)); + __ movp(rax, Operand(rbp, kStringStartMinusOne)); if (global_with_zero_length_check()) { // Special case for zero-length matches. @@ -1018,10 +1040,13 @@ void RegExpMacroAssemblerX64::LoadCurrentCharacter(int cp_offset, Label* on_end_of_input, bool check_bounds, int characters) { - DCHECK(cp_offset >= -1); // ^ and \b can look behind one character. DCHECK(cp_offset < (1<<30)); // Be sane! (And ensure negation works) if (check_bounds) { - CheckPosition(cp_offset + characters - 1, on_end_of_input); + if (cp_offset >= 0) { + CheckPosition(cp_offset + characters - 1, on_end_of_input); + } else { + CheckPosition(cp_offset, on_end_of_input); + } } LoadCurrentCharacterUnchecked(cp_offset, characters); } @@ -1124,7 +1149,7 @@ void RegExpMacroAssemblerX64::WriteCurrentPositionToRegister(int reg, void RegExpMacroAssemblerX64::ClearRegisters(int reg_from, int reg_to) { DCHECK(reg_from <= reg_to); - __ movp(rax, Operand(rbp, kInputStartMinusOne)); + __ movp(rax, Operand(rbp, kStringStartMinusOne)); for (int reg = reg_from; reg <= reg_to; reg++) { __ movp(register_location(reg), rax); } @@ -1205,8 +1230,14 @@ Operand RegExpMacroAssemblerX64::register_location(int register_index) { void RegExpMacroAssemblerX64::CheckPosition(int cp_offset, Label* on_outside_input) { - __ cmpl(rdi, Immediate(-cp_offset * char_size())); - BranchOrBacktrack(greater_equal, on_outside_input); + if (cp_offset >= 0) { + __ cmpl(rdi, Immediate(-cp_offset * char_size())); + BranchOrBacktrack(greater_equal, on_outside_input); + } else { + __ leap(rax, Operand(rdi, cp_offset * char_size())); + __ cmpp(rax, Operand(rbp, kStringStartMinusOne)); + BranchOrBacktrack(less_equal, on_outside_input); + } } diff --git a/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.h b/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.h index dbee9e86b5..257804739f 100644 --- a/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.h +++ b/deps/v8/src/regexp/x64/regexp-macro-assembler-x64.h @@ -34,9 +34,11 @@ class RegExpMacroAssemblerX64: public NativeRegExpMacroAssembler { // A "greedy loop" is a loop that is both greedy and with a simple // body. It has a particularly simple implementation. virtual void CheckGreedyLoop(Label* on_tos_equals_current_position); - virtual void CheckNotAtStart(Label* on_not_at_start); - virtual void CheckNotBackReference(int start_reg, Label* on_no_match); + virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start); + virtual void CheckNotBackReference(int start_reg, bool read_backward, + Label* on_no_match); virtual void CheckNotBackReferenceIgnoreCase(int start_reg, + bool read_backward, Label* on_no_match); virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal); virtual void CheckNotCharacterAfterAnd(uint32_t c, @@ -171,10 +173,10 @@ class RegExpMacroAssemblerX64: public NativeRegExpMacroAssembler { static const int kSuccessfulCaptures = kLastCalleeSaveRegister - kPointerSize; // When adding local variables remember to push space for them in // the frame in GetCode. - static const int kInputStartMinusOne = kSuccessfulCaptures - kPointerSize; + static const int kStringStartMinusOne = kSuccessfulCaptures - kPointerSize; // First register address. Following registers are below it on the stack. - static const int kRegisterZero = kInputStartMinusOne - kPointerSize; + static const int kRegisterZero = kStringStartMinusOne - kPointerSize; // Initial size of code buffer. static const size_t kRegExpCodeSize = 1024; diff --git a/deps/v8/src/regexp/x87/regexp-macro-assembler-x87.cc b/deps/v8/src/regexp/x87/regexp-macro-assembler-x87.cc index c6968dc197..01d0b249b6 100644 --- a/deps/v8/src/regexp/x87/regexp-macro-assembler-x87.cc +++ b/deps/v8/src/regexp/x87/regexp-macro-assembler-x87.cc @@ -53,7 +53,8 @@ namespace internal { * - backup of caller ebx * - success counter (only for global regexps to count matches). * - Offset of location before start of input (effectively character - * position -1). Used to initialize capture registers to a non-position. + * string start - 1). Used to initialize capture registers to a + * non-position. * - register 0 ebp[-4] (only positions must be stored in the first * - register 1 ebp[-8] num_saved_registers_ registers) * - ... @@ -80,7 +81,8 @@ RegExpMacroAssemblerX87::RegExpMacroAssemblerX87(Isolate* isolate, Zone* zone, Mode mode, int registers_to_save) : NativeRegExpMacroAssembler(isolate, zone), - masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize)), + masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize, + CodeObjectRequired::kYes)), mode_(mode), num_registers_(registers_to_save), num_saved_registers_(registers_to_save), @@ -156,25 +158,16 @@ void RegExpMacroAssemblerX87::CheckCharacterGT(uc16 limit, Label* on_greater) { void RegExpMacroAssemblerX87::CheckAtStart(Label* on_at_start) { - Label not_at_start; - // Did we start the match at the start of the string at all? - __ cmp(Operand(ebp, kStartIndex), Immediate(0)); - BranchOrBacktrack(not_equal, ¬_at_start); - // If we did, are we still at the start of the input? - __ lea(eax, Operand(esi, edi, times_1, 0)); - __ cmp(eax, Operand(ebp, kInputStart)); + __ lea(eax, Operand(edi, -char_size())); + __ cmp(eax, Operand(ebp, kStringStartMinusOne)); BranchOrBacktrack(equal, on_at_start); - __ bind(¬_at_start); } -void RegExpMacroAssemblerX87::CheckNotAtStart(Label* on_not_at_start) { - // Did we start the match at the start of the string at all? - __ cmp(Operand(ebp, kStartIndex), Immediate(0)); - BranchOrBacktrack(not_equal, on_not_at_start); - // If we did, are we still at the start of the input? - __ lea(eax, Operand(esi, edi, times_1, 0)); - __ cmp(eax, Operand(ebp, kInputStart)); +void RegExpMacroAssemblerX87::CheckNotAtStart(int cp_offset, + Label* on_not_at_start) { + __ lea(eax, Operand(edi, -char_size() + cp_offset * char_size())); + __ cmp(eax, Operand(ebp, kStringStartMinusOne)); BranchOrBacktrack(not_equal, on_not_at_start); } @@ -196,26 +189,28 @@ void RegExpMacroAssemblerX87::CheckGreedyLoop(Label* on_equal) { void RegExpMacroAssemblerX87::CheckNotBackReferenceIgnoreCase( - int start_reg, - Label* on_no_match) { + int start_reg, bool read_backward, Label* on_no_match) { Label fallthrough; __ mov(edx, register_location(start_reg)); // Index of start of capture __ mov(ebx, register_location(start_reg + 1)); // Index of end of capture __ sub(ebx, edx); // Length of capture. - // The length of a capture should not be negative. This can only happen - // if the end of the capture is unrecorded, or at a point earlier than - // the start of the capture. - BranchOrBacktrack(less, on_no_match); - - // If length is zero, either the capture is empty or it is completely - // uncaptured. In either case succeed immediately. + // At this point, the capture registers are either both set or both cleared. + // If the capture length is zero, then the capture is either empty or cleared. + // Fall through in both cases. __ j(equal, &fallthrough); // Check that there are sufficient characters left in the input. - __ mov(eax, edi); - __ add(eax, ebx); - BranchOrBacktrack(greater, on_no_match); + if (read_backward) { + __ mov(eax, Operand(ebp, kStringStartMinusOne)); + __ add(eax, ebx); + __ cmp(edi, eax); + BranchOrBacktrack(less_equal, on_no_match); + } else { + __ mov(eax, edi); + __ add(eax, ebx); + BranchOrBacktrack(greater, on_no_match); + } if (mode_ == LATIN1) { Label success; @@ -228,6 +223,9 @@ void RegExpMacroAssemblerX87::CheckNotBackReferenceIgnoreCase( __ add(edx, esi); // Start of capture __ add(edi, esi); // Start of text to match against capture. + if (read_backward) { + __ sub(edi, ebx); // Offset by length when matching backwards. + } __ add(ebx, edi); // End of text to match against capture. Label loop; @@ -278,6 +276,11 @@ void RegExpMacroAssemblerX87::CheckNotBackReferenceIgnoreCase( __ add(esp, Immediate(kPointerSize)); // Compute new value of character position after the matched part. __ sub(edi, esi); + if (read_backward) { + // Subtract match length if we matched backward. + __ add(edi, register_location(start_reg)); + __ sub(edi, register_location(start_reg + 1)); + } } else { DCHECK(mode_ == UC16); // Save registers before calling C function. @@ -304,6 +307,9 @@ void RegExpMacroAssemblerX87::CheckNotBackReferenceIgnoreCase( // Found by adding negative string-end offset of current position (edi) // to end of string. __ add(edi, esi); + if (read_backward) { + __ sub(edi, ebx); // Offset by length when matching backwards. + } __ mov(Operand(esp, 1 * kPointerSize), edi); // Set byte_offset1. // Start of capture, where edx already holds string-end negative offset. @@ -325,16 +331,20 @@ void RegExpMacroAssemblerX87::CheckNotBackReferenceIgnoreCase( // Check if function returned non-zero for success or zero for failure. __ or_(eax, eax); BranchOrBacktrack(zero, on_no_match); - // On success, increment position by length of capture. - __ add(edi, ebx); + // On success, advance position by length of capture. + if (read_backward) { + __ sub(edi, ebx); + } else { + __ add(edi, ebx); + } } __ bind(&fallthrough); } -void RegExpMacroAssemblerX87::CheckNotBackReference( - int start_reg, - Label* on_no_match) { +void RegExpMacroAssemblerX87::CheckNotBackReference(int start_reg, + bool read_backward, + Label* on_no_match) { Label fallthrough; Label success; Label fail; @@ -343,22 +353,33 @@ void RegExpMacroAssemblerX87::CheckNotBackReference( __ mov(edx, register_location(start_reg)); __ mov(eax, register_location(start_reg + 1)); __ sub(eax, edx); // Length to check. - // Fail on partial or illegal capture (start of capture after end of capture). - BranchOrBacktrack(less, on_no_match); - // Succeed on empty capture (including no capture) + + // At this point, the capture registers are either both set or both cleared. + // If the capture length is zero, then the capture is either empty or cleared. + // Fall through in both cases. __ j(equal, &fallthrough); // Check that there are sufficient characters left in the input. - __ mov(ebx, edi); - __ add(ebx, eax); - BranchOrBacktrack(greater, on_no_match); + if (read_backward) { + __ mov(ebx, Operand(ebp, kStringStartMinusOne)); + __ add(ebx, eax); + __ cmp(edi, ebx); + BranchOrBacktrack(less_equal, on_no_match); + } else { + __ mov(ebx, edi); + __ add(ebx, eax); + BranchOrBacktrack(greater, on_no_match); + } // Save register to make it available below. __ push(backtrack_stackpointer()); // Compute pointers to match string and capture string - __ lea(ebx, Operand(esi, edi, times_1, 0)); // Start of match. __ add(edx, esi); // Start of capture. + __ lea(ebx, Operand(esi, edi, times_1, 0)); // Start of match. + if (read_backward) { + __ sub(ebx, eax); // Offset by length when matching backwards. + } __ lea(ecx, Operand(eax, ebx, times_1, 0)); // End of match Label loop; @@ -389,6 +410,11 @@ void RegExpMacroAssemblerX87::CheckNotBackReference( // Move current character position to position after match. __ mov(edi, ecx); __ sub(edi, esi); + if (read_backward) { + // Subtract match length if we matched backward. + __ add(edi, register_location(start_reg)); + __ sub(edi, register_location(start_reg + 1)); + } // Restore backtrack stackpointer. __ pop(backtrack_stackpointer()); @@ -634,7 +660,7 @@ Handle<HeapObject> RegExpMacroAssemblerX87::GetCode(Handle<String> source) { __ push(edi); __ push(ebx); // Callee-save on MacOS. __ push(Immediate(0)); // Number of successful matches in a global regexp. - __ push(Immediate(0)); // Make room for "input start - 1" constant. + __ push(Immediate(0)); // Make room for "string start - 1" constant. // Check if we have space on the stack for registers. Label stack_limit_hit; @@ -684,7 +710,7 @@ Handle<HeapObject> RegExpMacroAssemblerX87::GetCode(Handle<String> source) { } // Store this value in a local variable, for use when clearing // position registers. - __ mov(Operand(ebp, kInputStartMinusOne), eax); + __ mov(Operand(ebp, kStringStartMinusOne), eax); #if V8_OS_WIN // Ensure that we write to each stack page, in order. Skipping a page @@ -767,7 +793,7 @@ Handle<HeapObject> RegExpMacroAssemblerX87::GetCode(Handle<String> source) { } if (global()) { - // Restart matching if the regular expression is flagged as global. + // Restart matching if the regular expression is flagged as global. // Increment success counter. __ inc(Operand(ebp, kSuccessfulCaptures)); // Capture results have been stored, so the number of remaining global @@ -784,7 +810,7 @@ Handle<HeapObject> RegExpMacroAssemblerX87::GetCode(Handle<String> source) { Immediate(num_saved_registers_ * kPointerSize)); // Prepare eax to initialize registers with its value in the next run. - __ mov(eax, Operand(ebp, kInputStartMinusOne)); + __ mov(eax, Operand(ebp, kStringStartMinusOne)); if (global_with_zero_length_check()) { // Special case for zero-length matches. @@ -944,10 +970,13 @@ void RegExpMacroAssemblerX87::LoadCurrentCharacter(int cp_offset, Label* on_end_of_input, bool check_bounds, int characters) { - DCHECK(cp_offset >= -1); // ^ and \b can look behind one character. DCHECK(cp_offset < (1<<30)); // Be sane! (And ensure negation works) if (check_bounds) { - CheckPosition(cp_offset + characters - 1, on_end_of_input); + if (cp_offset >= 0) { + CheckPosition(cp_offset + characters - 1, on_end_of_input); + } else { + CheckPosition(cp_offset, on_end_of_input); + } } LoadCurrentCharacterUnchecked(cp_offset, characters); } @@ -1031,7 +1060,7 @@ void RegExpMacroAssemblerX87::WriteCurrentPositionToRegister(int reg, void RegExpMacroAssemblerX87::ClearRegisters(int reg_from, int reg_to) { DCHECK(reg_from <= reg_to); - __ mov(eax, Operand(ebp, kInputStartMinusOne)); + __ mov(eax, Operand(ebp, kStringStartMinusOne)); for (int reg = reg_from; reg <= reg_to; reg++) { __ mov(register_location(reg), eax); } @@ -1100,8 +1129,14 @@ Operand RegExpMacroAssemblerX87::register_location(int register_index) { void RegExpMacroAssemblerX87::CheckPosition(int cp_offset, Label* on_outside_input) { - __ cmp(edi, -cp_offset * char_size()); - BranchOrBacktrack(greater_equal, on_outside_input); + if (cp_offset >= 0) { + __ cmp(edi, -cp_offset * char_size()); + BranchOrBacktrack(greater_equal, on_outside_input); + } else { + __ lea(eax, Operand(edi, cp_offset * char_size())); + __ cmp(eax, Operand(ebp, kStringStartMinusOne)); + BranchOrBacktrack(less_equal, on_outside_input); + } } diff --git a/deps/v8/src/regexp/x87/regexp-macro-assembler-x87.h b/deps/v8/src/regexp/x87/regexp-macro-assembler-x87.h index 0deea50357..c95541224f 100644 --- a/deps/v8/src/regexp/x87/regexp-macro-assembler-x87.h +++ b/deps/v8/src/regexp/x87/regexp-macro-assembler-x87.h @@ -33,9 +33,11 @@ class RegExpMacroAssemblerX87: public NativeRegExpMacroAssembler { // A "greedy loop" is a loop that is both greedy and with a simple // body. It has a particularly simple implementation. virtual void CheckGreedyLoop(Label* on_tos_equals_current_position); - virtual void CheckNotAtStart(Label* on_not_at_start); - virtual void CheckNotBackReference(int start_reg, Label* on_no_match); + virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start); + virtual void CheckNotBackReference(int start_reg, bool read_backward, + Label* on_no_match); virtual void CheckNotBackReferenceIgnoreCase(int start_reg, + bool read_backward, Label* on_no_match); virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal); virtual void CheckNotCharacterAfterAnd(uint32_t c, @@ -116,9 +118,9 @@ class RegExpMacroAssemblerX87: public NativeRegExpMacroAssembler { static const int kBackup_edi = kBackup_esi - kPointerSize; static const int kBackup_ebx = kBackup_edi - kPointerSize; static const int kSuccessfulCaptures = kBackup_ebx - kPointerSize; - static const int kInputStartMinusOne = kSuccessfulCaptures - kPointerSize; + static const int kStringStartMinusOne = kSuccessfulCaptures - kPointerSize; // First register address. Following registers are below it on the stack. - static const int kRegisterZero = kInputStartMinusOne - kPointerSize; + static const int kRegisterZero = kStringStartMinusOne - kPointerSize; // Initial size of code buffer. static const size_t kRegExpCodeSize = 1024; |