1 files changed, 210 insertions, 55 deletions
diff --git a/deps/v8/src/regexp/regexp-interpreter.cc b/deps/v8/src/regexp/regexp-interpreter.cc
index cf2fb55e4a..df72951afb 100644
--- a/deps/v8/src/regexp/regexp-interpreter.cc
+++ b/deps/v8/src/regexp/regexp-interpreter.cc
@@ -12,6 +12,7 @@
 #include "src/objects/objects-inl.h"
 #include "src/regexp/regexp-bytecodes.h"
 #include "src/regexp/regexp-macro-assembler.h"
+#include "src/regexp/regexp-stack.h"  // For kMaximumStackSize.
 #include "src/regexp/regexp.h"
 #include "src/strings/unicode.h"
 #include "src/utils/utils.h"
@@ -63,23 +64,6 @@ bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, int len,
   return true;
 }
 
-void DisassembleSingleBytecode(const byte* code_base, const byte* pc) {
-  PrintF("%s", RegExpBytecodeName(*pc));
-
-  // Args and the bytecode as hex.
-  for (int i = 0; i < RegExpBytecodeLength(*pc); i++) {
-    PrintF(", %02x", pc[i]);
-  }
-  PrintF(" ");
-
-  // Args as ascii.
-  for (int i = 1; i < RegExpBytecodeLength(*pc); i++) {
-    unsigned char b = pc[i];
-    PrintF("%c", std::isprint(b) ? b : '.');
-  }
-  PrintF("\n");
-}
-
 #ifdef DEBUG
 void MaybeTraceInterpreter(const byte* code_base, const byte* pc,
                            int stack_depth, int current_position,
@@ -94,7 +78,7 @@ void MaybeTraceInterpreter(const byte* code_base, const byte* pc,
     PrintF(format, pc - code_base, stack_depth, current_position, current_char,
            printable ? current_char : '.');
 
-    DisassembleSingleBytecode(code_base, pc);
+    RegExpBytecodeDisassembleSingle(code_base, pc);
   }
 }
 #endif  // DEBUG
@@ -118,7 +102,10 @@ class BacktrackStack {
  public:
   BacktrackStack() = default;
 
-  void push(int v) { data_.emplace_back(v); }
+  V8_WARN_UNUSED_RESULT bool push(int v) {
+    data_.emplace_back(v);
+    return (static_cast<int>(data_.size()) <= kMaxSize);
+  }
   int peek() const {
     DCHECK(!data_.empty());
     return data_.back();
@@ -141,13 +128,17 @@ class BacktrackStack {
   // static stack-allocated backing store, but small enough not to waste space.
   static constexpr int kStaticCapacity = 64;
 
-  base::SmallVector<int, kStaticCapacity> data_;
+  using ValueT = int;
+  base::SmallVector<ValueT, kStaticCapacity> data_;
+
+  static constexpr int kMaxSize =
+      RegExpStack::kMaximumStackSize / sizeof(ValueT);
 
   DISALLOW_COPY_AND_ASSIGN(BacktrackStack);
 };
 
-IrregexpInterpreter::Result StackOverflow(Isolate* isolate,
-                                          RegExp::CallOrigin call_origin) {
+IrregexpInterpreter::Result ThrowStackOverflow(Isolate* isolate,
+                                               RegExp::CallOrigin call_origin) {
   CHECK(call_origin == RegExp::CallOrigin::kFromRuntime);
   // We abort interpreter execution after the stack overflow is thrown, and thus
   // allow allocation here despite the outer DisallowHeapAllocationScope.
@@ -156,6 +147,17 @@ IrregexpInterpreter::Result StackOverflow(Isolate* isolate,
   return IrregexpInterpreter::EXCEPTION;
 }
 
+// Only throws if called from the runtime, otherwise just returns the EXCEPTION
+// status code.
+IrregexpInterpreter::Result MaybeThrowStackOverflow(
+    Isolate* isolate, RegExp::CallOrigin call_origin) {
+  if (call_origin == RegExp::CallOrigin::kFromRuntime) {
+    return ThrowStackOverflow(isolate, call_origin);
+  } else {
+    return IrregexpInterpreter::EXCEPTION;
+  }
+}
+
 template <typename Char>
 void UpdateCodeAndSubjectReferences(
     Isolate* isolate, Handle<ByteArray> code_array,
@@ -208,7 +210,7 @@ IrregexpInterpreter::Result HandleInterrupts(
     Handle<String> subject_handle(*subject_string_out, isolate);
 
     if (js_has_overflowed) {
-      return StackOverflow(isolate, call_origin);
+      return ThrowStackOverflow(isolate, call_origin);
     } else if (check.InterruptRequested()) {
       const bool was_one_byte =
           String::IsOneByteRepresentationUnderneath(*subject_string_out);
@@ -238,6 +240,13 @@ IrregexpInterpreter::Result HandleInterrupts(
   return IrregexpInterpreter::SUCCESS;
 }
 
+bool CheckBitInTable(const uint32_t current_char, const byte* const table) {
+  int mask = RegExpMacroAssembler::kTableMask;
+  int b = table[(current_char & mask) >> kBitsPerByteLog2];
+  int bit = (current_char & (kBitsPerByte - 1));
+  return (b & (1 << bit)) != 0;
+}
+
 // If computed gotos are supported by the compiler, we can get addresses to
 // labels directly in C/C++. Every bytecode handler has its own label and we
 // store the addresses in a dispatch table indexed by bytecode. To execute the
@@ -262,7 +271,7 @@ IrregexpInterpreter::Result HandleInterrupts(
 #define DISPATCH()  \
   pc = next_pc;     \
   insn = next_insn; \
-  break
+  goto switch_dispatch_continuation
 #endif  // V8_USE_COMPUTED_GOTO
 
 // ADVANCE/SET_PC_FROM_OFFSET are separated from DISPATCH, because ideally some
@@ -297,11 +306,52 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array,
   DisallowHeapAllocation no_gc;
 
 #if V8_USE_COMPUTED_GOTO
-#define DECLARE_DISPATCH_TABLE_ENTRY(name, code, length) &&BC_##name,
-  static const void* const dispatch_table[] = {
-      BYTECODE_ITERATOR(DECLARE_DISPATCH_TABLE_ENTRY)};
+
+// We have to make sure that no OOB access to the dispatch table is possible and
+// all values are valid label addresses.
+// Otherwise jumps to arbitrary addresses could potentially happen.
+// This is ensured as follows:
+// Every index to the dispatch table gets masked using BYTECODE_MASK in
+// DECODE(). This way we can only get values between 0 (only the least
+// significant byte of an integer is used) and kRegExpPaddedBytecodeCount - 1
+// (BYTECODE_MASK is defined to be exactly this value).
+// All entries from kRegExpBytecodeCount to kRegExpPaddedBytecodeCount have to
+// be filled with BREAKs (invalid operation).
+
+// Fill dispatch table from last defined bytecode up to the next power of two
+// with BREAK (invalid operation).
+// TODO(pthier): Find a way to fill up automatically (at compile time)
+// 59 real bytecodes -> 5 fillers
+#define BYTECODE_FILLER_ITERATOR(V) \
+  V(BREAK) /* 1 */                  \
+  V(BREAK) /* 2 */                  \
+  V(BREAK) /* 3 */                  \
+  V(BREAK) /* 4 */                  \
+  V(BREAK) /* 5 */
+
+#define COUNT(...) +1
+  static constexpr int kRegExpBytecodeFillerCount =
+      BYTECODE_FILLER_ITERATOR(COUNT);
+#undef COUNT
+
+  // Make sure kRegExpPaddedBytecodeCount is actually the closest possible power
+  // of two.
+  DCHECK_EQ(kRegExpPaddedBytecodeCount,
+            base::bits::RoundUpToPowerOfTwo32(kRegExpBytecodeCount));
+
+  // Make sure every bytecode we get by using BYTECODE_MASK is well defined.
+  STATIC_ASSERT(kRegExpBytecodeCount <= kRegExpPaddedBytecodeCount);
+  STATIC_ASSERT(kRegExpBytecodeCount + kRegExpBytecodeFillerCount ==
+                kRegExpPaddedBytecodeCount);
+
+#define DECLARE_DISPATCH_TABLE_ENTRY(name, ...) &&BC_##name,
+  static const void* const dispatch_table[kRegExpPaddedBytecodeCount] = {
+      BYTECODE_ITERATOR(DECLARE_DISPATCH_TABLE_ENTRY)
+          BYTECODE_FILLER_ITERATOR(DECLARE_DISPATCH_TABLE_ENTRY)};
 #undef DECLARE_DISPATCH_TABLE_ENTRY
-#endif
+#undef BYTECODE_FILLER_ITERATOR
+
+#endif  // V8_USE_COMPUTED_GOTO
 
   const byte* pc = code_array.GetDataStartAddress();
   const byte* code_base = pc;
@@ -329,17 +379,23 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array,
     BYTECODE(BREAK) { UNREACHABLE(); }
     BYTECODE(PUSH_CP) {
       ADVANCE(PUSH_CP);
-      backtrack_stack.push(current);
+      if (!backtrack_stack.push(current)) {
+        return MaybeThrowStackOverflow(isolate, call_origin);
+      }
       DISPATCH();
     }
     BYTECODE(PUSH_BT) {
       ADVANCE(PUSH_BT);
-      backtrack_stack.push(Load32Aligned(pc + 4));
+      if (!backtrack_stack.push(Load32Aligned(pc + 4))) {
+        return MaybeThrowStackOverflow(isolate, call_origin);
+      }
       DISPATCH();
     }
     BYTECODE(PUSH_REGISTER) {
       ADVANCE(PUSH_REGISTER);
-      backtrack_stack.push(registers[insn >> BYTECODE_SHIFT]);
+      if (!backtrack_stack.push(registers[insn >> BYTECODE_SHIFT])) {
+        return MaybeThrowStackOverflow(isolate, call_origin);
+      }
       DISPATCH();
     }
     BYTECODE(SET_REGISTER) {
@@ -580,10 +636,7 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array,
       DISPATCH();
     }
     BYTECODE(CHECK_BIT_IN_TABLE) {
-      int mask = RegExpMacroAssembler::kTableMask;
-      byte b = pc[8 + ((current_char & mask) >> kBitsPerByteLog2)];
-      int bit = (current_char & (kBitsPerByte - 1));
-      if ((b & (1 << bit)) != 0) {
+      if (CheckBitInTable(current_char, pc + 8)) {
         SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
       } else {
         ADVANCE(CHECK_BIT_IN_TABLE);
@@ -762,6 +815,118 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array,
       }
       DISPATCH();
     }
+    BYTECODE(SKIP_UNTIL_CHAR) {
+      int load_offset = (insn >> BYTECODE_SHIFT);
+      uint32_t advance = Load16Aligned(pc + 4);
+      uint32_t c = Load16Aligned(pc + 6);
+      while (static_cast<uintptr_t>(current + load_offset) <
+             static_cast<uintptr_t>(subject.length())) {
+        current_char = subject[current + load_offset];
+        if (c == current_char) {
+          SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
+          DISPATCH();
+        }
+        current += advance;
+      }
+      SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
+      DISPATCH();
+    }
+    BYTECODE(SKIP_UNTIL_CHAR_AND) {
+      int load_offset = (insn >> BYTECODE_SHIFT);
+      uint16_t advance = Load16Aligned(pc + 4);
+      uint16_t c = Load16Aligned(pc + 6);
+      uint32_t mask = Load32Aligned(pc + 8);
+      int32_t maximum_offset = Load32Aligned(pc + 12);
+      while (static_cast<uintptr_t>(current + maximum_offset) <=
+             static_cast<uintptr_t>(subject.length())) {
+        current_char = subject[current + load_offset];
+        if (c == (current_char & mask)) {
+          SET_PC_FROM_OFFSET(Load32Aligned(pc + 16));
+          DISPATCH();
+        }
+        current += advance;
+      }
+      SET_PC_FROM_OFFSET(Load32Aligned(pc + 20));
+      DISPATCH();
+    }
+    BYTECODE(SKIP_UNTIL_CHAR_POS_CHECKED) {
+      int load_offset = (insn >> BYTECODE_SHIFT);
+      uint16_t advance = Load16Aligned(pc + 4);
+      uint16_t c = Load16Aligned(pc + 6);
+      int32_t maximum_offset = Load32Aligned(pc + 8);
+      while (static_cast<uintptr_t>(current + maximum_offset) <=
+             static_cast<uintptr_t>(subject.length())) {
+        current_char = subject[current + load_offset];
+        if (c == current_char) {
+          SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
+          DISPATCH();
+        }
+        current += advance;
+      }
+      SET_PC_FROM_OFFSET(Load32Aligned(pc + 16));
+      DISPATCH();
+    }
+    BYTECODE(SKIP_UNTIL_BIT_IN_TABLE) {
+      int load_offset = (insn >> BYTECODE_SHIFT);
+      uint32_t advance = Load16Aligned(pc + 4);
+      const byte* table = pc + 8;
+      while (static_cast<uintptr_t>(current + load_offset) <
+             static_cast<uintptr_t>(subject.length())) {
+        current_char = subject[current + load_offset];
+        if (CheckBitInTable(current_char, table)) {
+          SET_PC_FROM_OFFSET(Load32Aligned(pc + 24));
+          DISPATCH();
+        }
+        current += advance;
+      }
+      SET_PC_FROM_OFFSET(Load32Aligned(pc + 28));
+      DISPATCH();
+    }
+    BYTECODE(SKIP_UNTIL_GT_OR_NOT_BIT_IN_TABLE) {
+      int load_offset = (insn >> BYTECODE_SHIFT);
+      uint16_t advance = Load16Aligned(pc + 4);
+      uint16_t limit = Load16Aligned(pc + 6);
+      const byte* table = pc + 8;
+      while (static_cast<uintptr_t>(current + load_offset) <
+             static_cast<uintptr_t>(subject.length())) {
+        current_char = subject[current + load_offset];
+        if (current_char > limit) {
+          SET_PC_FROM_OFFSET(Load32Aligned(pc + 24));
+          DISPATCH();
+        }
+        if (!CheckBitInTable(current_char, table)) {
+          SET_PC_FROM_OFFSET(Load32Aligned(pc + 24));
+          DISPATCH();
+        }
+        current += advance;
+      }
+      SET_PC_FROM_OFFSET(Load32Aligned(pc + 28));
+      DISPATCH();
+    }
+    BYTECODE(SKIP_UNTIL_CHAR_OR_CHAR) {
+      int load_offset = (insn >> BYTECODE_SHIFT);
+      uint32_t advance = Load32Aligned(pc + 4);
+      uint16_t c = Load16Aligned(pc + 8);
+      uint16_t c2 = Load16Aligned(pc + 10);
+      while (static_cast<uintptr_t>(current + load_offset) <
+             static_cast<uintptr_t>(subject.length())) {
+        current_char = subject[current + load_offset];
+        // The two if-statements below are split up intentionally, as combining
+        // them seems to result in register allocation behaving quite
+        // differently and slowing down the resulting code.
+        if (c == current_char) {
+          SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
+          DISPATCH();
+        }
+        if (c2 == current_char) {
+          SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
+          DISPATCH();
+        }
+        current += advance;
+      }
+      SET_PC_FROM_OFFSET(Load32Aligned(pc + 16));
+      DISPATCH();
+    }
 #if V8_USE_COMPUTED_GOTO
 // Lint gets confused a lot if we just use !V8_USE_COMPUTED_GOTO or ifndef
 // V8_USE_COMPUTED_GOTO here.
@@ -769,6 +934,9 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array,
       default:
         UNREACHABLE();
     }
+  // Label we jump to in DISPATCH(). There must be no instructions between the
+  // end of the switch, this label and the end of the loop.
+  switch_dispatch_continuation : {}
 #endif  // V8_USE_COMPUTED_GOTO
   }
 }
@@ -784,30 +952,11 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array,
 }  // namespace
 
 // static
-void IrregexpInterpreter::Disassemble(ByteArray byte_array,
-                                      const std::string& pattern) {
-  DisallowHeapAllocation no_gc;
-
-  PrintF("[generated bytecode for regexp pattern: '%s']\n", pattern.c_str());
-
-  const byte* const code_base = byte_array.GetDataStartAddress();
-  const int byte_array_length = byte_array.length();
-  ptrdiff_t offset = 0;
-
-  while (offset < byte_array_length) {
-    const byte* const pc = code_base + offset;
-    PrintF("%p  %4" V8PRIxPTRDIFF "  ", pc, offset);
-    DisassembleSingleBytecode(code_base, pc);
-    offset += RegExpBytecodeLength(*pc);
-  }
-}
-
-// static
 IrregexpInterpreter::Result IrregexpInterpreter::Match(
     Isolate* isolate, JSRegExp regexp, String subject_string, int* registers,
     int registers_length, int start_position, RegExp::CallOrigin call_origin) {
   if (FLAG_regexp_tier_up) {
-    regexp.MarkTierUpForNextExec();
+    regexp.TierUpTick();
   }
 
   bool is_one_byte = String::IsOneByteRepresentationUnderneath(subject_string);
@@ -869,6 +1018,12 @@ IrregexpInterpreter::Result IrregexpInterpreter::MatchForCallFromJs(
   String subject_string = String::cast(Object(subject));
   JSRegExp regexp_obj = JSRegExp::cast(Object(regexp));
 
+  if (regexp_obj.MarkedForTierUp()) {
+    // Returning RETRY will re-enter through runtime, where actual recompilation
+    // for tier-up takes place.
+    return IrregexpInterpreter::RETRY;
+  }
+
   return Match(isolate, regexp_obj, subject_string, registers, registers_length,
                start_position, call_origin);
 }