4 files changed, 630 insertions, 308 deletions
diff --git a/deps/v8/src/compiler/backend/x64/code-generator-x64.cc b/deps/v8/src/compiler/backend/x64/code-generator-x64.cc
index a108edeff0..a4f82b153b 100644
--- a/deps/v8/src/compiler/backend/x64/code-generator-x64.cc
+++ b/deps/v8/src/compiler/backend/x64/code-generator-x64.cc
@@ -155,10 +155,18 @@ class X64OperandConverter : public InstructionOperandConverter {
 
 namespace {
 
+bool HasAddressingMode(Instruction* instr) {
+  return instr->addressing_mode() != kMode_None;
+}
+
 bool HasImmediateInput(Instruction* instr, size_t index) {
   return instr->InputAt(index)->IsImmediate();
 }
 
+bool HasRegisterInput(Instruction* instr, size_t index) {
+  return instr->InputAt(index)->IsRegister();
+}
+
 class OutOfLineLoadFloat32NaN final : public OutOfLineCode {
  public:
   OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result)
@@ -210,6 +218,10 @@ class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
       // Just encode the stub index. This will be patched when the code
       // is added to the native module and copied into wasm code space.
       __ near_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
+    } else if (tasm()->options().inline_offheap_trampolines) {
+      // With embedded builtins we do not need the isolate here. This allows
+      // the call to be generated asynchronously.
+      __ CallBuiltin(Builtins::kDoubleToI);
     } else {
       __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET);
     }
@@ -380,60 +392,60 @@ void EmitWordLoadPoisoningIfNeeded(
     }                                    \
   } while (false)
 
-#define ASSEMBLE_BINOP(asm_instr)                                     \
-  do {                                                                \
-    if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
-      size_t index = 1;                                               \
-      Operand right = i.MemoryOperand(&index);                        \
-      __ asm_instr(i.InputRegister(0), right);                        \
-    } else {                                                          \
-      if (HasImmediateInput(instr, 1)) {                              \
-        if (instr->InputAt(0)->IsRegister()) {                        \
-          __ asm_instr(i.InputRegister(0), i.InputImmediate(1));      \
-        } else {                                                      \
-          __ asm_instr(i.InputOperand(0), i.InputImmediate(1));       \
-        }                                                             \
-      } else {                                                        \
-        if (instr->InputAt(1)->IsRegister()) {                        \
-          __ asm_instr(i.InputRegister(0), i.InputRegister(1));       \
-        } else {                                                      \
-          __ asm_instr(i.InputRegister(0), i.InputOperand(1));        \
-        }                                                             \
-      }                                                               \
-    }                                                                 \
+#define ASSEMBLE_BINOP(asm_instr)                                \
+  do {                                                           \
+    if (HasAddressingMode(instr)) {                              \
+      size_t index = 1;                                          \
+      Operand right = i.MemoryOperand(&index);                   \
+      __ asm_instr(i.InputRegister(0), right);                   \
+    } else {                                                     \
+      if (HasImmediateInput(instr, 1)) {                         \
+        if (HasRegisterInput(instr, 0)) {                        \
+          __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
+        } else {                                                 \
+          __ asm_instr(i.InputOperand(0), i.InputImmediate(1));  \
+        }                                                        \
+      } else {                                                   \
+        if (HasRegisterInput(instr, 1)) {                        \
+          __ asm_instr(i.InputRegister(0), i.InputRegister(1));  \
+        } else {                                                 \
+          __ asm_instr(i.InputRegister(0), i.InputOperand(1));   \
+        }                                                        \
+      }                                                          \
+    }                                                            \
   } while (false)
 
-#define ASSEMBLE_COMPARE(asm_instr)                                   \
-  do {                                                                \
-    if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
-      size_t index = 0;                                               \
-      Operand left = i.MemoryOperand(&index);                         \
-      if (HasImmediateInput(instr, index)) {                          \
-        __ asm_instr(left, i.InputImmediate(index));                  \
-      } else {                                                        \
-        __ asm_instr(left, i.InputRegister(index));                   \
-      }                                                               \
-    } else {                                                          \
-      if (HasImmediateInput(instr, 1)) {                              \
-        if (instr->InputAt(0)->IsRegister()) {                        \
-          __ asm_instr(i.InputRegister(0), i.InputImmediate(1));      \
-        } else {                                                      \
-          __ asm_instr(i.InputOperand(0), i.InputImmediate(1));       \
-        }                                                             \
-      } else {                                                        \
-        if (instr->InputAt(1)->IsRegister()) {                        \
-          __ asm_instr(i.InputRegister(0), i.InputRegister(1));       \
-        } else {                                                      \
-          __ asm_instr(i.InputRegister(0), i.InputOperand(1));        \
-        }                                                             \
-      }                                                               \
-    }                                                                 \
+#define ASSEMBLE_COMPARE(asm_instr)                              \
+  do {                                                           \
+    if (HasAddressingMode(instr)) {                              \
+      size_t index = 0;                                          \
+      Operand left = i.MemoryOperand(&index);                    \
+      if (HasImmediateInput(instr, index)) {                     \
+        __ asm_instr(left, i.InputImmediate(index));             \
+      } else {                                                   \
+        __ asm_instr(left, i.InputRegister(index));              \
+      }                                                          \
+    } else {                                                     \
+      if (HasImmediateInput(instr, 1)) {                         \
+        if (HasRegisterInput(instr, 0)) {                        \
+          __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
+        } else {                                                 \
+          __ asm_instr(i.InputOperand(0), i.InputImmediate(1));  \
+        }                                                        \
+      } else {                                                   \
+        if (HasRegisterInput(instr, 1)) {                        \
+          __ asm_instr(i.InputRegister(0), i.InputRegister(1));  \
+        } else {                                                 \
+          __ asm_instr(i.InputRegister(0), i.InputOperand(1));   \
+        }                                                        \
+      }                                                          \
+    }                                                            \
   } while (false)
 
 #define ASSEMBLE_MULT(asm_instr)                              \
   do {                                                        \
     if (HasImmediateInput(instr, 1)) {                        \
-      if (instr->InputAt(0)->IsRegister()) {                  \
+      if (HasRegisterInput(instr, 0)) {                       \
         __ asm_instr(i.OutputRegister(), i.InputRegister(0),  \
                      i.InputImmediate(1));                    \
       } else {                                                \
@@ -441,7 +453,7 @@ void EmitWordLoadPoisoningIfNeeded(
                      i.InputImmediate(1));                    \
       }                                                       \
     } else {                                                  \
-      if (instr->InputAt(1)->IsRegister()) {                  \
+      if (HasRegisterInput(instr, 1)) {                       \
         __ asm_instr(i.OutputRegister(), i.InputRegister(1)); \
       } else {                                                \
         __ asm_instr(i.OutputRegister(), i.InputOperand(1));  \
@@ -468,9 +480,9 @@ void EmitWordLoadPoisoningIfNeeded(
 
 #define ASSEMBLE_MOVX(asm_instr)                            \
   do {                                                      \
-    if (instr->addressing_mode() != kMode_None) {           \
+    if (HasAddressingMode(instr)) {                         \
       __ asm_instr(i.OutputRegister(), i.MemoryOperand());  \
-    } else if (instr->InputAt(0)->IsRegister()) {           \
+    } else if (HasRegisterInput(instr, 0)) {                \
       __ asm_instr(i.OutputRegister(), i.InputRegister(0)); \
     } else {                                                \
       __ asm_instr(i.OutputRegister(), i.InputOperand(0));  \
@@ -576,17 +588,18 @@ void EmitWordLoadPoisoningIfNeeded(
     __ opcode(i.OutputSimd128Register(), i.InputSimd128Register(1), imm); \
   } while (false)
 
-#define ASSEMBLE_SIMD_ALL_TRUE(opcode)                       \
-  do {                                                       \
-    CpuFeatureScope sse_scope(tasm(), SSE4_1);               \
-    Register dst = i.OutputRegister();                       \
-    Register tmp = i.TempRegister(0);                        \
-    __ movq(tmp, Immediate(1));                              \
-    __ xorq(dst, dst);                                       \
-    __ pxor(kScratchDoubleReg, kScratchDoubleReg);           \
-    __ opcode(kScratchDoubleReg, i.InputSimd128Register(0)); \
-    __ ptest(kScratchDoubleReg, kScratchDoubleReg);          \
-    __ cmovq(zero, dst, tmp);                                \
+#define ASSEMBLE_SIMD_ALL_TRUE(opcode)           \
+  do {                                           \
+    CpuFeatureScope sse_scope(tasm(), SSE4_1);   \
+    Register dst = i.OutputRegister();           \
+    Register tmp1 = i.TempRegister(0);           \
+    XMMRegister tmp2 = i.TempSimd128Register(1); \
+    __ movq(tmp1, Immediate(1));                 \
+    __ xorq(dst, dst);                           \
+    __ pxor(tmp2, tmp2);                         \
+    __ opcode(tmp2, i.InputSimd128Register(0));  \
+    __ ptest(tmp2, tmp2);                        \
+    __ cmovq(zero, dst, tmp1);                   \
   } while (false)
 
 void CodeGenerator::AssembleDeconstructFrame() {
@@ -989,10 +1002,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       // don't emit code for nops.
       break;
     case kArchDeoptimize: {
-      int deopt_state_id =
+      DeoptimizationExit* exit =
           BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
-      CodeGenResult result =
-          AssembleDeoptimizerCall(deopt_state_id, current_source_position_);
+      CodeGenResult result = AssembleDeoptimizerCall(exit);
       if (result != kSuccess) return result;
       unwinding_info_writer_.MarkBlockWillExit();
       break;
@@ -1000,9 +1012,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     case kArchRet:
       AssembleReturn(instr->InputAt(0));
       break;
-    case kArchStackPointer:
-      __ movq(i.OutputRegister(), rsp);
-      break;
     case kArchFramePointer:
       __ movq(i.OutputRegister(), rbp);
       break;
@@ -1013,6 +1022,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
         __ movq(i.OutputRegister(), rbp);
       }
       break;
+    case kArchStackPointerGreaterThan: {
+      constexpr size_t kValueIndex = 0;
+      if (HasAddressingMode(instr)) {
+        __ cmpq(rsp, i.MemoryOperand(kValueIndex));
+      } else {
+        __ cmpq(rsp, i.InputRegister(kValueIndex));
+      }
+      break;
+    }
     case kArchTruncateDoubleToI: {
       auto result = i.OutputRegister();
       auto input = i.InputDoubleRegister(0);
@@ -1176,14 +1194,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       ASSEMBLE_MULT(imulq);
       break;
     case kX64ImulHigh32:
-      if (instr->InputAt(1)->IsRegister()) {
+      if (HasRegisterInput(instr, 1)) {
         __ imull(i.InputRegister(1));
       } else {
         __ imull(i.InputOperand(1));
       }
       break;
     case kX64UmulHigh32:
-      if (instr->InputAt(1)->IsRegister()) {
+      if (HasRegisterInput(instr, 1)) {
         __ mull(i.InputRegister(1));
       } else {
         __ mull(i.InputOperand(1));
@@ -1254,42 +1272,42 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       ASSEMBLE_SHIFT(rorq, 6);
       break;
     case kX64Lzcnt:
-      if (instr->InputAt(0)->IsRegister()) {
+      if (HasRegisterInput(instr, 0)) {
         __ Lzcntq(i.OutputRegister(), i.InputRegister(0));
       } else {
         __ Lzcntq(i.OutputRegister(), i.InputOperand(0));
       }
       break;
     case kX64Lzcnt32:
-      if (instr->InputAt(0)->IsRegister()) {
+      if (HasRegisterInput(instr, 0)) {
         __ Lzcntl(i.OutputRegister(), i.InputRegister(0));
       } else {
         __ Lzcntl(i.OutputRegister(), i.InputOperand(0));
       }
       break;
     case kX64Tzcnt:
-      if (instr->InputAt(0)->IsRegister()) {
+      if (HasRegisterInput(instr, 0)) {
         __ Tzcntq(i.OutputRegister(), i.InputRegister(0));
       } else {
         __ Tzcntq(i.OutputRegister(), i.InputOperand(0));
       }
       break;
     case kX64Tzcnt32:
-      if (instr->InputAt(0)->IsRegister()) {
+      if (HasRegisterInput(instr, 0)) {
         __ Tzcntl(i.OutputRegister(), i.InputRegister(0));
       } else {
         __ Tzcntl(i.OutputRegister(), i.InputOperand(0));
       }
       break;
     case kX64Popcnt:
-      if (instr->InputAt(0)->IsRegister()) {
+      if (HasRegisterInput(instr, 0)) {
         __ Popcntq(i.OutputRegister(), i.InputRegister(0));
       } else {
         __ Popcntq(i.OutputRegister(), i.InputOperand(0));
       }
       break;
     case kX64Popcnt32:
-      if (instr->InputAt(0)->IsRegister()) {
+      if (HasRegisterInput(instr, 0)) {
         __ Popcntl(i.OutputRegister(), i.InputRegister(0));
       } else {
         __ Popcntl(i.OutputRegister(), i.InputOperand(0));
@@ -1321,16 +1339,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       break;
     case kSSEFloat32Abs: {
       // TODO(bmeurer): Use RIP relative 128-bit constants.
-      __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
-      __ Psrlq(kScratchDoubleReg, 33);
-      __ Andps(i.OutputDoubleRegister(), kScratchDoubleReg);
+      XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
+      __ Pcmpeqd(tmp, tmp);
+      __ Psrlq(tmp, 33);
+      __ Andps(i.OutputDoubleRegister(), tmp);
       break;
     }
     case kSSEFloat32Neg: {
       // TODO(bmeurer): Use RIP relative 128-bit constants.
-      __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
-      __ Psllq(kScratchDoubleReg, 31);
-      __ Xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
+      XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
+      __ Pcmpeqd(tmp, tmp);
+      __ Psllq(tmp, 31);
+      __ Xorps(i.OutputDoubleRegister(), tmp);
       break;
     }
     case kSSEFloat32Sqrt:
@@ -1532,17 +1552,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     case kX64F64x2Abs:
     case kSSEFloat64Abs: {
       // TODO(bmeurer): Use RIP relative 128-bit constants.
-      __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
-      __ Psrlq(kScratchDoubleReg, 1);
-      __ Andpd(i.OutputDoubleRegister(), kScratchDoubleReg);
+      XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
+      __ Pcmpeqd(tmp, tmp);
+      __ Psrlq(tmp, 1);
+      __ Andpd(i.OutputDoubleRegister(), tmp);
       break;
     }
     case kX64F64x2Neg:
     case kSSEFloat64Neg: {
       // TODO(bmeurer): Use RIP relative 128-bit constants.
-      __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
-      __ Psllq(kScratchDoubleReg, 63);
-      __ Xorpd(i.OutputDoubleRegister(), kScratchDoubleReg);
+      XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
+      __ Pcmpeqd(tmp, tmp);
+      __ Psllq(tmp, 63);
+      __ Xorpd(i.OutputDoubleRegister(), tmp);
       break;
     }
     case kSSEFloat64Sqrt:
@@ -1659,56 +1681,56 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       break;
     }
     case kSSEInt32ToFloat64:
-      if (instr->InputAt(0)->IsRegister()) {
+      if (HasRegisterInput(instr, 0)) {
         __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
       } else {
         __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
       }
       break;
     case kSSEInt32ToFloat32:
-      if (instr->InputAt(0)->IsRegister()) {
+      if (HasRegisterInput(instr, 0)) {
         __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
       } else {
         __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
       }
       break;
     case kSSEInt64ToFloat32:
-      if (instr->InputAt(0)->IsRegister()) {
+      if (HasRegisterInput(instr, 0)) {
         __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
       } else {
         __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
       }
       break;
     case kSSEInt64ToFloat64:
-      if (instr->InputAt(0)->IsRegister()) {
+      if (HasRegisterInput(instr, 0)) {
         __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
       } else {
         __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
       }
       break;
     case kSSEUint64ToFloat32:
-      if (instr->InputAt(0)->IsRegister()) {
+      if (HasRegisterInput(instr, 0)) {
         __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
       } else {
         __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
       }
       break;
     case kSSEUint64ToFloat64:
-      if (instr->InputAt(0)->IsRegister()) {
+      if (HasRegisterInput(instr, 0)) {
         __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
       } else {
         __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
       }
       break;
     case kSSEUint32ToFloat64:
-      if (instr->InputAt(0)->IsRegister()) {
+      if (HasRegisterInput(instr, 0)) {
         __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
       } else {
         __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
       }
       break;
     case kSSEUint32ToFloat32:
-      if (instr->InputAt(0)->IsRegister()) {
+      if (HasRegisterInput(instr, 0)) {
         __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
       } else {
         __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
@@ -1729,21 +1751,21 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       }
       break;
     case kSSEFloat64InsertLowWord32:
-      if (instr->InputAt(1)->IsRegister()) {
+      if (HasRegisterInput(instr, 1)) {
         __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 0);
       } else {
         __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
       }
       break;
     case kSSEFloat64InsertHighWord32:
-      if (instr->InputAt(1)->IsRegister()) {
+      if (HasRegisterInput(instr, 1)) {
         __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 1);
       } else {
         __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
       }
       break;
     case kSSEFloat64LoadLowWord32:
-      if (instr->InputAt(0)->IsRegister()) {
+      if (HasRegisterInput(instr, 0)) {
         __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
       } else {
         __ Movd(i.OutputDoubleRegister(), i.InputOperand(0));
@@ -1800,56 +1822,52 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     case kAVXFloat32Abs: {
       // TODO(bmeurer): Use RIP relative 128-bit constants.
       CpuFeatureScope avx_scope(tasm(), AVX);
-      __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
-      __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 33);
+      XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
+      __ vpcmpeqd(tmp, tmp, tmp);
+      __ vpsrlq(tmp, tmp, 33);
       if (instr->InputAt(0)->IsFPRegister()) {
-        __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
-                  i.InputDoubleRegister(0));
+        __ vandps(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0));
       } else {
-        __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
-                  i.InputOperand(0));
+        __ vandps(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
       }
       break;
     }
     case kAVXFloat32Neg: {
       // TODO(bmeurer): Use RIP relative 128-bit constants.
       CpuFeatureScope avx_scope(tasm(), AVX);
-      __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
-      __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 31);
+      XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
+      __ vpcmpeqd(tmp, tmp, tmp);
+      __ vpsllq(tmp, tmp, 31);
       if (instr->InputAt(0)->IsFPRegister()) {
-        __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
-                  i.InputDoubleRegister(0));
+        __ vxorps(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0));
       } else {
-        __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
-                  i.InputOperand(0));
+        __ vxorps(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
       }
       break;
     }
     case kAVXFloat64Abs: {
       // TODO(bmeurer): Use RIP relative 128-bit constants.
       CpuFeatureScope avx_scope(tasm(), AVX);
-      __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
-      __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 1);
+      XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
+      __ vpcmpeqd(tmp, tmp, tmp);
+      __ vpsrlq(tmp, tmp, 1);
       if (instr->InputAt(0)->IsFPRegister()) {
-        __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
-                  i.InputDoubleRegister(0));
+        __ vandpd(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0));
       } else {
-        __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
-                  i.InputOperand(0));
+        __ vandpd(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
       }
       break;
     }
     case kAVXFloat64Neg: {
       // TODO(bmeurer): Use RIP relative 128-bit constants.
       CpuFeatureScope avx_scope(tasm(), AVX);
-      __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
-      __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 63);
+      XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
+      __ vpcmpeqd(tmp, tmp, tmp);
+      __ vpsllq(tmp, tmp, 63);
       if (instr->InputAt(0)->IsFPRegister()) {
-        __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
-                  i.InputDoubleRegister(0));
+        __ vxorpd(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0));
       } else {
-        __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
-                  i.InputOperand(0));
+        __ vxorpd(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
       }
       break;
     }
@@ -1929,14 +1947,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     case kX64Movl:
       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
       if (instr->HasOutput()) {
-        if (instr->addressing_mode() == kMode_None) {
-          if (instr->InputAt(0)->IsRegister()) {
+        if (HasAddressingMode(instr)) {
+          __ movl(i.OutputRegister(), i.MemoryOperand());
+        } else {
+          if (HasRegisterInput(instr, 0)) {
             __ movl(i.OutputRegister(), i.InputRegister(0));
           } else {
             __ movl(i.OutputRegister(), i.InputOperand(0));
           }
-        } else {
-          __ movl(i.OutputRegister(), i.MemoryOperand());
         }
         __ AssertZeroExtended(i.OutputRegister());
       } else {
@@ -2002,12 +2020,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
       break;
     }
-    case kX64CompressSigned:   // Fall through.
-    case kX64CompressPointer:  // Fall through.
-    case kX64CompressAny: {
-      ASSEMBLE_MOVX(movl);
-      break;
-    }
     case kX64Movq:
       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
       if (instr->HasOutput()) {
@@ -2082,14 +2094,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       }
       break;
     case kX64BitcastIF:
-      if (instr->InputAt(0)->IsRegister()) {
+      if (HasRegisterInput(instr, 0)) {
         __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
       } else {
         __ Movss(i.OutputDoubleRegister(), i.InputOperand(0));
       }
       break;
     case kX64BitcastLD:
-      if (instr->InputAt(0)->IsRegister()) {
+      if (HasRegisterInput(instr, 0)) {
         __ Movq(i.OutputDoubleRegister(), i.InputRegister(0));
       } else {
         __ Movsd(i.OutputDoubleRegister(), i.InputOperand(0));
@@ -2177,7 +2189,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       __ incl(i.OutputRegister());
       break;
     case kX64Push:
-      if (AddressingModeField::decode(instr->opcode()) != kMode_None) {
+      if (HasAddressingMode(instr)) {
         size_t index = 0;
         Operand operand = i.MemoryOperand(&index);
         __ pushq(operand);
@@ -2189,7 +2201,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
         frame_access_state()->IncreaseSPDelta(1);
         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
                                                          kSystemPointerSize);
-      } else if (instr->InputAt(0)->IsRegister()) {
+      } else if (HasRegisterInput(instr, 0)) {
         __ pushq(i.InputRegister(0));
         frame_access_state()->IncreaseSPDelta(1);
         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
@@ -2256,11 +2268,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       break;
     }
     case kX64F64x2Splat: {
+      CpuFeatureScope sse_scope(tasm(), SSE3);
       XMMRegister dst = i.OutputSimd128Register();
       if (instr->InputAt(0)->IsFPRegister()) {
-        __ pshufd(dst, i.InputDoubleRegister(0), 0x44);
+        __ movddup(dst, i.InputDoubleRegister(0));
       } else {
-        __ pshufd(dst, i.InputOperand(0), 0x44);
+        __ movddup(dst, i.InputOperand(0));
       }
       break;
     }
@@ -2280,6 +2293,61 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       __ movq(i.OutputDoubleRegister(), kScratchRegister);
       break;
     }
+    case kX64F64x2Add: {
+      ASSEMBLE_SSE_BINOP(addpd);
+      break;
+    }
+    case kX64F64x2Sub: {
+      ASSEMBLE_SSE_BINOP(subpd);
+      break;
+    }
+    case kX64F64x2Mul: {
+      ASSEMBLE_SSE_BINOP(mulpd);
+      break;
+    }
+    case kX64F64x2Div: {
+      ASSEMBLE_SSE_BINOP(divpd);
+      break;
+    }
+    case kX64F64x2Min: {
+      XMMRegister src1 = i.InputSimd128Register(1),
+                  dst = i.OutputSimd128Register();
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      // The minpd instruction doesn't propagate NaNs and +0's in its first
+      // operand. Perform minpd in both orders, merge the resuls, and adjust.
+      __ movapd(kScratchDoubleReg, src1);
+      __ minpd(kScratchDoubleReg, dst);
+      __ minpd(dst, src1);
+      // propagate -0's and NaNs, which may be non-canonical.
+      __ orpd(kScratchDoubleReg, dst);
+      // Canonicalize NaNs by quieting and clearing the payload.
+      __ cmppd(dst, kScratchDoubleReg, 3);
+      __ orpd(kScratchDoubleReg, dst);
+      __ psrlq(dst, 13);
+      __ andnpd(dst, kScratchDoubleReg);
+      break;
+    }
+    case kX64F64x2Max: {
+      XMMRegister src1 = i.InputSimd128Register(1),
+                  dst = i.OutputSimd128Register();
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      // The maxpd instruction doesn't propagate NaNs and +0's in its first
+      // operand. Perform maxpd in both orders, merge the resuls, and adjust.
+      __ movapd(kScratchDoubleReg, src1);
+      __ maxpd(kScratchDoubleReg, dst);
+      __ maxpd(dst, src1);
+      // Find discrepancies.
+      __ xorpd(dst, kScratchDoubleReg);
+      // Propagate NaNs, which may be non-canonical.
+      __ orpd(kScratchDoubleReg, dst);
+      // Propagate sign discrepancy and (subtle) quiet NaNs.
+      __ subpd(kScratchDoubleReg, dst);
+      // Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
+      __ cmppd(dst, kScratchDoubleReg, 3);
+      __ psrlq(dst, 13);
+      __ andnpd(dst, kScratchDoubleReg);
+      break;
+    }
     case kX64F64x2Eq: {
       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
       __ cmpeqpd(i.OutputSimd128Register(), i.InputSimd128Register(1));
@@ -2406,6 +2474,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       __ mulps(i.OutputSimd128Register(), i.InputSimd128Register(1));
       break;
     }
+    case kX64F32x4Div: {
+      DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
+      __ divps(i.OutputSimd128Register(), i.InputSimd128Register(1));
+      break;
+    }
     case kX64F32x4Min: {
       XMMRegister src1 = i.InputSimd128Register(1),
                   dst = i.OutputSimd128Register();
@@ -2466,13 +2539,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       break;
     }
     case kX64I64x2Splat: {
+      CpuFeatureScope sse_scope(tasm(), SSE3);
       XMMRegister dst = i.OutputSimd128Register();
-      if (instr->InputAt(0)->IsRegister()) {
+      if (HasRegisterInput(instr, 0)) {
         __ movq(dst, i.InputRegister(0));
       } else {
         __ movq(dst, i.InputOperand(0));
       }
-      __ pshufd(dst, dst, 0x44);
+      __ movddup(dst, dst);
       break;
     }
     case kX64I64x2ExtractLane: {
@@ -2482,7 +2556,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     }
     case kX64I64x2ReplaceLane: {
       CpuFeatureScope sse_scope(tasm(), SSE4_1);
-      if (instr->InputAt(2)->IsRegister()) {
+      if (HasRegisterInput(instr, 2)) {
         __ pinsrq(i.OutputSimd128Register(), i.InputRegister(2),
                   i.InputInt8(1));
       } else {
@@ -2502,7 +2576,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       break;
     }
     case kX64I64x2Shl: {
-      __ psllq(i.OutputSimd128Register(), i.InputInt8(1));
+      XMMRegister tmp = i.TempSimd128Register(0);
+      __ movq(tmp, i.InputRegister(1));
+      __ psllq(i.OutputSimd128Register(), tmp);
       break;
     }
     case kX64I64x2ShrS: {
@@ -2511,16 +2587,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       // ShrS on each quadword one at a time
       XMMRegister dst = i.OutputSimd128Register();
       XMMRegister src = i.InputSimd128Register(0);
+      Register tmp = i.ToRegister(instr->TempAt(0));
 
       // lower quadword
-      __ pextrq(kScratchRegister, src, 0x0);
-      __ sarq(kScratchRegister, Immediate(i.InputInt8(1)));
-      __ pinsrq(dst, kScratchRegister, 0x0);
+      __ pextrq(tmp, src, 0x0);
+      __ sarq_cl(tmp);
+      __ pinsrq(dst, tmp, 0x0);
 
       // upper quadword
-      __ pextrq(kScratchRegister, src, 0x1);
-      __ sarq(kScratchRegister, Immediate(i.InputInt8(1)));
-      __ pinsrq(dst, kScratchRegister, 0x1);
+      __ pextrq(tmp, src, 0x1);
+      __ sarq_cl(tmp);
+      __ pinsrq(dst, tmp, 0x1);
       break;
     }
     case kX64I64x2Add: {
@@ -2538,8 +2615,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       CpuFeatureScope sse_scope(tasm(), SSE4_1);
       XMMRegister left = i.InputSimd128Register(0);
       XMMRegister right = i.InputSimd128Register(1);
-      XMMRegister tmp1 = i.ToSimd128Register(instr->TempAt(0));
-      XMMRegister tmp2 = i.ToSimd128Register(instr->TempAt(1));
+      XMMRegister tmp1 = i.TempSimd128Register(0);
+      XMMRegister tmp2 = i.TempSimd128Register(1);
 
       __ movaps(tmp1, left);
       __ movaps(tmp2, right);
@@ -2559,6 +2636,66 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       __ paddq(left, tmp2);  // left == dst
       break;
     }
+    case kX64I64x2MinS: {
+      if (CpuFeatures::IsSupported(SSE4_2)) {
+        CpuFeatureScope sse_scope_4_2(tasm(), SSE4_2);
+        XMMRegister dst = i.OutputSimd128Register();
+        XMMRegister src = i.InputSimd128Register(1);
+        XMMRegister tmp = i.TempSimd128Register(0);
+        DCHECK_EQ(dst, i.InputSimd128Register(0));
+        DCHECK_EQ(src, xmm0);
+
+        __ movaps(tmp, src);
+        __ pcmpgtq(src, dst);
+        __ blendvpd(tmp, dst);  // implicit use of xmm0 as mask
+        __ movaps(dst, tmp);
+      } else {
+        CpuFeatureScope sse_scope_4_1(tasm(), SSE4_1);
+        XMMRegister dst = i.OutputSimd128Register();
+        XMMRegister src = i.InputSimd128Register(1);
+        XMMRegister tmp = i.TempSimd128Register(0);
+        Register tmp1 = i.TempRegister(1);
+        Register tmp2 = i.TempRegister(2);
+        DCHECK_EQ(dst, i.InputSimd128Register(0));
+        // backup src since we cannot change it
+        __ movaps(tmp, src);
+
+        // compare the lower quardwords
+        __ movq(tmp1, dst);
+        __ movq(tmp2, tmp);
+        __ cmpq(tmp1, tmp2);
+        // tmp2 now has the min of lower quadwords
+        __ cmovq(less_equal, tmp2, tmp1);
+        // tmp1 now has the higher quadword
+        // must do this before movq, movq clears top quadword
+        __ pextrq(tmp1, dst, 1);
+        // save tmp2 into dst
+        __ movq(dst, tmp2);
+        // tmp2 now has the higher quadword
+        __ pextrq(tmp2, tmp, 1);
+        //  compare higher quadwords
+        __ cmpq(tmp1, tmp2);
+        // tmp2 now has the min of higher quadwords
+        __ cmovq(less_equal, tmp2, tmp1);
+        __ movq(tmp, tmp2);
+        // dst = [tmp[0], dst[0]]
+        __ punpcklqdq(dst, tmp);
+      }
+      break;
+    }
+    case kX64I64x2MaxS: {
+      CpuFeatureScope sse_scope_4_2(tasm(), SSE4_2);
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src = i.InputSimd128Register(1);
+      XMMRegister tmp = i.TempSimd128Register(0);
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      DCHECK_EQ(src, xmm0);
+
+      __ movaps(tmp, src);
+      __ pcmpgtq(src, dst);
+      __ blendvpd(dst, tmp);  // implicit use of xmm0 as mask
+      break;
+    }
     case kX64I64x2Eq: {
       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
       CpuFeatureScope sse_scope(tasm(), SSE4_1);
@@ -2568,9 +2705,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     case kX64I64x2Ne: {
       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
       CpuFeatureScope sse_scope(tasm(), SSE4_1);
+      XMMRegister tmp = i.TempSimd128Register(0);
       __ pcmpeqq(i.OutputSimd128Register(), i.InputSimd128Register(1));
-      __ pcmpeqq(kScratchDoubleReg, kScratchDoubleReg);
-      __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
+      __ pcmpeqq(tmp, tmp);
+      __ pxor(i.OutputSimd128Register(), tmp);
       break;
     }
     case kX64I64x2GtS: {
@@ -2584,7 +2722,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       CpuFeatureScope sse_scope(tasm(), SSE4_2);
       XMMRegister dst = i.OutputSimd128Register();
       XMMRegister src = i.InputSimd128Register(1);
-      XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
+      XMMRegister tmp = i.TempSimd128Register(0);
 
       __ movaps(tmp, src);
       __ pcmpgtq(tmp, dst);
@@ -2593,7 +2731,56 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       break;
     }
     case kX64I64x2ShrU: {
-      __ psrlq(i.OutputSimd128Register(), i.InputInt8(1));
+      XMMRegister tmp = i.TempSimd128Register(0);
+      __ movq(tmp, i.InputRegister(1));
+      __ psrlq(i.OutputSimd128Register(), tmp);
+      break;
+    }
+    case kX64I64x2MinU: {
+      CpuFeatureScope sse_scope_4_2(tasm(), SSE4_2);
+      CpuFeatureScope sse_scope_4_1(tasm(), SSE4_1);
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src = i.InputSimd128Register(1);
+      XMMRegister src_tmp = i.TempSimd128Register(0);
+      XMMRegister dst_tmp = i.TempSimd128Register(1);
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      DCHECK_EQ(src, xmm0);
+
+      __ movaps(src_tmp, src);
+      __ movaps(dst_tmp, dst);
+
+      __ pcmpeqd(src, src);
+      __ psllq(src, 63);
+
+      __ pxor(dst_tmp, src);
+      __ pxor(src, src_tmp);
+
+      __ pcmpgtq(src, dst_tmp);
+      __ blendvpd(src_tmp, dst);  // implicit use of xmm0 as mask
+      __ movaps(dst, src_tmp);
+      break;
+    }
+    case kX64I64x2MaxU: {
+      CpuFeatureScope sse_scope_4_2(tasm(), SSE4_2);
+      CpuFeatureScope sse_scope_4_1(tasm(), SSE4_1);
+      XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister src = i.InputSimd128Register(1);
+      XMMRegister src_tmp = i.TempSimd128Register(0);
+      XMMRegister dst_tmp = i.TempSimd128Register(1);
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      DCHECK_EQ(src, xmm0);
+
+      __ movaps(src_tmp, src);
+      __ movaps(dst_tmp, dst);
+
+      __ pcmpeqd(src, src);
+      __ psllq(src, 63);
+
+      __ pxor(dst_tmp, src);
+      __ pxor(src, src_tmp);
+
+      __ pcmpgtq(src, dst_tmp);
+      __ blendvpd(dst, src_tmp);  // implicit use of xmm0 as mask
       break;
     }
     case kX64I64x2GtU: {
@@ -2601,7 +2788,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       CpuFeatureScope sse_scope(tasm(), SSE4_2);
       XMMRegister dst = i.OutputSimd128Register();
       XMMRegister src = i.InputSimd128Register(1);
-      XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
+      XMMRegister tmp = i.TempSimd128Register(0);
 
       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
       __ psllq(kScratchDoubleReg, 63);
@@ -2617,7 +2804,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       CpuFeatureScope sse_scope(tasm(), SSE4_2);
       XMMRegister dst = i.OutputSimd128Register();
       XMMRegister src = i.InputSimd128Register(1);
-      XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
+      XMMRegister tmp = i.TempSimd128Register(0);
 
       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
       __ psllq(kScratchDoubleReg, 63);
@@ -2632,7 +2819,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     }
     case kX64I32x4Splat: {
       XMMRegister dst = i.OutputSimd128Register();
-      if (instr->InputAt(0)->IsRegister()) {
+      if (HasRegisterInput(instr, 0)) {
         __ movd(dst, i.InputRegister(0));
       } else {
         __ movd(dst, i.InputOperand(0));
@@ -2647,7 +2834,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     }
     case kX64I32x4ReplaceLane: {
       CpuFeatureScope sse_scope(tasm(), SSE4_1);
-      if (instr->InputAt(2)->IsRegister()) {
+      if (HasRegisterInput(instr, 2)) {
         __ Pinsrd(i.OutputSimd128Register(), i.InputRegister(2),
                   i.InputInt8(1));
       } else {
@@ -2658,19 +2845,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     case kX64I32x4SConvertF32x4: {
       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
       XMMRegister dst = i.OutputSimd128Register();
+      XMMRegister tmp = i.TempSimd128Register(0);
       // NAN->0
-      __ movaps(kScratchDoubleReg, dst);
-      __ cmpeqps(kScratchDoubleReg, kScratchDoubleReg);
-      __ pand(dst, kScratchDoubleReg);
+      __ movaps(tmp, dst);
+      __ cmpeqps(tmp, tmp);
+      __ pand(dst, tmp);
       // Set top bit if >= 0 (but not -0.0!)
-      __ pxor(kScratchDoubleReg, dst);
+      __ pxor(tmp, dst);
       // Convert
       __ cvttps2dq(dst, dst);
       // Set top bit if >=0 is now < 0
-      __ pand(kScratchDoubleReg, dst);
-      __ psrad(kScratchDoubleReg, 31);
+      __ pand(tmp, dst);
+      __ psrad(tmp, 31);
       // Set positive overflow lanes to 0x7FFFFFFF
-      __ pxor(dst, kScratchDoubleReg);
+      __ pxor(dst, tmp);
       break;
     }
     case kX64I32x4SConvertI16x8Low: {
@@ -2699,11 +2887,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       break;
     }
     case kX64I32x4Shl: {
-      __ pslld(i.OutputSimd128Register(), i.InputInt8(1));
+      XMMRegister tmp = i.TempSimd128Register(0);
+      __ movq(tmp, i.InputRegister(1));
+      __ pslld(i.OutputSimd128Register(), tmp);
       break;
     }
     case kX64I32x4ShrS: {
-      __ psrad(i.OutputSimd128Register(), i.InputInt8(1));
+      XMMRegister tmp = i.TempSimd128Register(0);
+      __ movq(tmp, i.InputRegister(1));
+      __ psrad(i.OutputSimd128Register(), tmp);
       break;
     }
     case kX64I32x4Add: {
@@ -2739,9 +2931,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       break;
     }
     case kX64I32x4Ne: {
+      XMMRegister tmp = i.TempSimd128Register(0);
       __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
-      __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
-      __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
+      __ pcmpeqd(tmp, tmp);
+      __ pxor(i.OutputSimd128Register(), tmp);
       break;
     }
     case kX64I32x4GtS: {
@@ -2760,24 +2953,25 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
       CpuFeatureScope sse_scope(tasm(), SSE4_1);
       XMMRegister dst = i.OutputSimd128Register();
-      XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
+      XMMRegister tmp = i.TempSimd128Register(0);
+      XMMRegister tmp2 = i.TempSimd128Register(1);
       // NAN->0, negative->0
-      __ pxor(kScratchDoubleReg, kScratchDoubleReg);
-      __ maxps(dst, kScratchDoubleReg);
+      __ pxor(tmp2, tmp2);
+      __ maxps(dst, tmp2);
       // scratch: float representation of max_signed
-      __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
-      __ psrld(kScratchDoubleReg, 1);                     // 0x7fffffff
-      __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg);  // 0x4f000000
+      __ pcmpeqd(tmp2, tmp2);
+      __ psrld(tmp2, 1);        // 0x7fffffff
+      __ cvtdq2ps(tmp2, tmp2);  // 0x4f000000
       // tmp: convert (src-max_signed).
       // Positive overflow lanes -> 0x7FFFFFFF
       // Negative lanes -> 0
       __ movaps(tmp, dst);
-      __ subps(tmp, kScratchDoubleReg);
-      __ cmpleps(kScratchDoubleReg, tmp);
+      __ subps(tmp, tmp2);
+      __ cmpleps(tmp2, tmp);
       __ cvttps2dq(tmp, tmp);
-      __ pxor(tmp, kScratchDoubleReg);
-      __ pxor(kScratchDoubleReg, kScratchDoubleReg);
-      __ pmaxsd(tmp, kScratchDoubleReg);
+      __ pxor(tmp, tmp2);
+      __ pxor(tmp2, tmp2);
+      __ pmaxsd(tmp, tmp2);
       // convert. Overflow lanes above max_signed will be 0x80000000
       __ cvttps2dq(dst, dst);
       // Add (src-max_signed) for overflow lanes.
@@ -2797,7 +2991,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       break;
     }
     case kX64I32x4ShrU: {
-      __ psrld(i.OutputSimd128Register(), i.InputInt8(1));
+      XMMRegister tmp = i.TempSimd128Register(0);
+      __ movq(tmp, i.InputRegister(1));
+      __ psrld(i.OutputSimd128Register(), tmp);
       break;
     }
     case kX64I32x4MinU: {
@@ -2814,10 +3010,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       CpuFeatureScope sse_scope(tasm(), SSE4_1);
       XMMRegister dst = i.OutputSimd128Register();
       XMMRegister src = i.InputSimd128Register(1);
+      XMMRegister tmp = i.TempSimd128Register(0);
       __ pmaxud(dst, src);
       __ pcmpeqd(dst, src);
-      __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
-      __ pxor(dst, kScratchDoubleReg);
+      __ pcmpeqd(tmp, tmp);
+      __ pxor(dst, tmp);
       break;
     }
     case kX64I32x4GeU: {
@@ -2835,7 +3032,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     }
     case kX64I16x8Splat: {
       XMMRegister dst = i.OutputSimd128Register();
-      if (instr->InputAt(0)->IsRegister()) {
+      if (HasRegisterInput(instr, 0)) {
         __ movd(dst, i.InputRegister(0));
       } else {
         __ movd(dst, i.InputOperand(0));
@@ -2853,7 +3050,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     }
     case kX64I16x8ReplaceLane: {
       CpuFeatureScope sse_scope(tasm(), SSE4_1);
-      if (instr->InputAt(2)->IsRegister()) {
+      if (HasRegisterInput(instr, 2)) {
         __ pinsrw(i.OutputSimd128Register(), i.InputRegister(2),
                   i.InputInt8(1));
       } else {
@@ -2887,11 +3084,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       break;
     }
     case kX64I16x8Shl: {
-      __ psllw(i.OutputSimd128Register(), i.InputInt8(1));
+      XMMRegister tmp = i.TempSimd128Register(0);
+      __ movq(tmp, i.InputRegister(1));
+      __ psllw(i.OutputSimd128Register(), tmp);
       break;
     }
     case kX64I16x8ShrS: {
-      __ psraw(i.OutputSimd128Register(), i.InputInt8(1));
+      XMMRegister tmp = i.TempSimd128Register(0);
+      __ movq(tmp, i.InputRegister(1));
+      __ psraw(i.OutputSimd128Register(), tmp);
       break;
     }
     case kX64I16x8SConvertI32x4: {
@@ -2940,9 +3141,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       break;
     }
     case kX64I16x8Ne: {
+      XMMRegister tmp = i.TempSimd128Register(0);
       __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
-      __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
-      __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
+      __ pcmpeqw(tmp, tmp);
+      __ pxor(i.OutputSimd128Register(), tmp);
       break;
     }
     case kX64I16x8GtS: {
@@ -2970,7 +3172,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       break;
     }
     case kX64I16x8ShrU: {
-      __ psrlw(i.OutputSimd128Register(), i.InputInt8(1));
+      XMMRegister tmp = i.TempSimd128Register(0);
+      __ movq(tmp, i.InputRegister(1));
+      __ psrlw(i.OutputSimd128Register(), tmp);
       break;
     }
     case kX64I16x8UConvertI32x4: {
@@ -3007,10 +3211,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       CpuFeatureScope sse_scope(tasm(), SSE4_1);
       XMMRegister dst = i.OutputSimd128Register();
       XMMRegister src = i.InputSimd128Register(1);
+      XMMRegister tmp = i.TempSimd128Register(0);
       __ pmaxuw(dst, src);
       __ pcmpeqw(dst, src);
-      __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
-      __ pxor(dst, kScratchDoubleReg);
+      __ pcmpeqw(tmp, tmp);
+      __ pxor(dst, tmp);
       break;
     }
     case kX64I16x8GeU: {
@@ -3024,7 +3229,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     case kX64I8x16Splat: {
       CpuFeatureScope sse_scope(tasm(), SSSE3);
       XMMRegister dst = i.OutputSimd128Register();
-      if (instr->InputAt(0)->IsRegister()) {
+      if (HasRegisterInput(instr, 0)) {
         __ movd(dst, i.InputRegister(0));
       } else {
         __ movd(dst, i.InputOperand(0));
@@ -3042,7 +3247,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     }
     case kX64I8x16ReplaceLane: {
       CpuFeatureScope sse_scope(tasm(), SSE4_1);
-      if (instr->InputAt(2)->IsRegister()) {
+      if (HasRegisterInput(instr, 2)) {
         __ pinsrb(i.OutputSimd128Register(), i.InputRegister(2),
                   i.InputInt8(1));
       } else {
@@ -3071,31 +3276,36 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     case kX64I8x16Shl: {
       XMMRegister dst = i.OutputSimd128Register();
       DCHECK_EQ(dst, i.InputSimd128Register(0));
-      int8_t shift = i.InputInt8(1) & 0x7;
-      if (shift < 4) {
-        // For small shifts, doubling is faster.
-        for (int i = 0; i < shift; ++i) {
-          __ paddb(dst, dst);
-        }
-      } else {
-        // Mask off the unwanted bits before word-shifting.
-        __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
-        __ psrlw(kScratchDoubleReg, 8 + shift);
-        __ packuswb(kScratchDoubleReg, kScratchDoubleReg);
-        __ pand(dst, kScratchDoubleReg);
-        __ psllw(dst, shift);
-      }
+      // Temp registers for shift mask andadditional moves to XMM registers.
+      Register tmp = i.ToRegister(instr->TempAt(0));
+      XMMRegister tmp_simd = i.TempSimd128Register(1);
+      // Mask off the unwanted bits before word-shifting.
+      __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
+      __ movq(tmp, i.InputRegister(1));
+      __ addq(tmp, Immediate(8));
+      __ movq(tmp_simd, tmp);
+      __ psrlw(kScratchDoubleReg, tmp_simd);
+      __ packuswb(kScratchDoubleReg, kScratchDoubleReg);
+      __ pand(dst, kScratchDoubleReg);
+      __ movq(tmp_simd, i.InputRegister(1));
+      __ psllw(dst, tmp_simd);
       break;
     }
     case kX64I8x16ShrS: {
       XMMRegister dst = i.OutputSimd128Register();
-      XMMRegister src = i.InputSimd128Register(0);
-      int8_t shift = i.InputInt8(1) & 0x7;
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      // Temp registers for shift mask andadditional moves to XMM registers.
+      Register tmp = i.ToRegister(instr->TempAt(0));
+      XMMRegister tmp_simd = i.TempSimd128Register(1);
       // Unpack the bytes into words, do arithmetic shifts, and repack.
-      __ punpckhbw(kScratchDoubleReg, src);
-      __ punpcklbw(dst, src);
-      __ psraw(kScratchDoubleReg, 8 + shift);
-      __ psraw(dst, 8 + shift);
+      __ punpckhbw(kScratchDoubleReg, dst);
+      __ punpcklbw(dst, dst);
+      // Prepare shift value
+      __ movq(tmp, i.InputRegister(1));
+      __ addq(tmp, Immediate(8));
+      __ movq(tmp_simd, tmp);
+      __ psraw(kScratchDoubleReg, tmp_simd);
+      __ psraw(dst, tmp_simd);
       __ packsswb(dst, kScratchDoubleReg);
       break;
     }
@@ -3119,7 +3329,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       XMMRegister dst = i.OutputSimd128Register();
       DCHECK_EQ(dst, i.InputSimd128Register(0));
       XMMRegister right = i.InputSimd128Register(1);
-      XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
+      XMMRegister tmp = i.TempSimd128Register(0);
       // I16x8 view of I8x16
       // left = AAaa AAaa ... AAaa AAaa
       // right= BBbb BBbb ... BBbb BBbb
@@ -3163,9 +3373,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       break;
     }
     case kX64I8x16Ne: {
+      XMMRegister tmp = i.TempSimd128Register(0);
       __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
-      __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
-      __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
+      __ pcmpeqb(tmp, tmp);
+      __ pxor(i.OutputSimd128Register(), tmp);
       break;
     }
     case kX64I8x16GtS: {
@@ -3194,13 +3405,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     }
     case kX64I8x16ShrU: {
       XMMRegister dst = i.OutputSimd128Register();
-      XMMRegister src = i.InputSimd128Register(0);
-      int8_t shift = i.InputInt8(1) & 0x7;
       // Unpack the bytes into words, do logical shifts, and repack.
-      __ punpckhbw(kScratchDoubleReg, src);
-      __ punpcklbw(dst, src);
-      __ psrlw(kScratchDoubleReg, 8 + shift);
-      __ psrlw(dst, 8 + shift);
+      DCHECK_EQ(dst, i.InputSimd128Register(0));
+      // Temp registers for shift mask andadditional moves to XMM registers.
+      Register tmp = i.ToRegister(instr->TempAt(0));
+      XMMRegister tmp_simd = i.TempSimd128Register(1);
+      __ punpckhbw(kScratchDoubleReg, dst);
+      __ punpcklbw(dst, dst);
+      // Prepare shift value
+      __ movq(tmp, i.InputRegister(1));
+      __ addq(tmp, Immediate(8));
+      __ movq(tmp_simd, tmp);
+      __ psrlw(kScratchDoubleReg, tmp_simd);
+      __ psrlw(dst, tmp_simd);
       __ packuswb(dst, kScratchDoubleReg);
       break;
     }
@@ -3226,10 +3443,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       CpuFeatureScope sse_scope(tasm(), SSE4_1);
       XMMRegister dst = i.OutputSimd128Register();
       XMMRegister src = i.InputSimd128Register(1);
+      XMMRegister tmp = i.TempSimd128Register(0);
       __ pmaxub(dst, src);
       __ pcmpeqb(dst, src);
-      __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
-      __ pxor(dst, kScratchDoubleReg);
+      __ pcmpeqb(tmp, tmp);
+      __ pxor(dst, tmp);
       break;
     }
     case kX64I8x16GeU: {
@@ -3561,9 +3779,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
       ASSEMBLE_SIMD_ALL_TRUE(pcmpeqb);
       break;
     }
-    case kX64StackCheck:
-      __ CompareRoot(rsp, RootIndex::kStackLimit);
-      break;
     case kWord32AtomicExchangeInt8: {
       __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
       __ movsxbl(i.InputRegister(0), i.InputRegister(0));
@@ -4167,6 +4382,8 @@ void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
 
 void CodeGenerator::FinishCode() { tasm()->PatchConstPool(); }
 
+void CodeGenerator::PrepareForDeoptimizationExits(int deopt_count) {}
+
 void CodeGenerator::AssembleMove(InstructionOperand* source,
                                  InstructionOperand* destination) {
   X64OperandConverter g(this, nullptr);
diff --git a/deps/v8/src/compiler/backend/x64/instruction-codes-x64.h b/deps/v8/src/compiler/backend/x64/instruction-codes-x64.h
index d6ac3f43df..8a0a45a916 100644
--- a/deps/v8/src/compiler/backend/x64/instruction-codes-x64.h
+++ b/deps/v8/src/compiler/backend/x64/instruction-codes-x64.h
@@ -140,9 +140,6 @@ namespace compiler {
   V(X64DecompressSigned)                  \
   V(X64DecompressPointer)                 \
   V(X64DecompressAny)                     \
-  V(X64CompressSigned)                    \
-  V(X64CompressPointer)                   \
-  V(X64CompressAny)                       \
   V(X64Movq)                              \
   V(X64Movsd)                             \
   V(X64Movss)                             \
@@ -158,12 +155,17 @@ namespace compiler {
   V(X64Push)                              \
   V(X64Poke)                              \
   V(X64Peek)                              \
-  V(X64StackCheck)                        \
   V(X64F64x2Splat)                        \
   V(X64F64x2ExtractLane)                  \
   V(X64F64x2ReplaceLane)                  \
   V(X64F64x2Abs)                          \
   V(X64F64x2Neg)                          \
+  V(X64F64x2Add)                          \
+  V(X64F64x2Sub)                          \
+  V(X64F64x2Mul)                          \
+  V(X64F64x2Div)                          \
+  V(X64F64x2Min)                          \
+  V(X64F64x2Max)                          \
   V(X64F64x2Eq)                           \
   V(X64F64x2Ne)                           \
   V(X64F64x2Lt)                           \
@@ -181,6 +183,7 @@ namespace compiler {
   V(X64F32x4AddHoriz)                     \
   V(X64F32x4Sub)                          \
   V(X64F32x4Mul)                          \
+  V(X64F32x4Div)                          \
   V(X64F32x4Min)                          \
   V(X64F32x4Max)                          \
   V(X64F32x4Eq)                           \
@@ -196,11 +199,15 @@ namespace compiler {
   V(X64I64x2Add)                          \
   V(X64I64x2Sub)                          \
   V(X64I64x2Mul)                          \
+  V(X64I64x2MinS)                         \
+  V(X64I64x2MaxS)                         \
   V(X64I64x2Eq)                           \
   V(X64I64x2Ne)                           \
   V(X64I64x2GtS)                          \
   V(X64I64x2GeS)                          \
   V(X64I64x2ShrU)                         \
+  V(X64I64x2MinU)                         \
+  V(X64I64x2MaxU)                         \
   V(X64I64x2GtU)                          \
   V(X64I64x2GeU)                          \
   V(X64I32x4Splat)                        \
diff --git a/deps/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc b/deps/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc
index 6389ef2e50..e9fa450c38 100644
--- a/deps/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc
+++ b/deps/v8/src/compiler/backend/x64/instruction-scheduler-x64.cc
@@ -129,6 +129,12 @@ int InstructionScheduler::GetTargetInstructionFlags(
     case kX64F64x2ReplaceLane:
     case kX64F64x2Abs:
     case kX64F64x2Neg:
+    case kX64F64x2Add:
+    case kX64F64x2Sub:
+    case kX64F64x2Mul:
+    case kX64F64x2Div:
+    case kX64F64x2Min:
+    case kX64F64x2Max:
     case kX64F64x2Eq:
     case kX64F64x2Ne:
     case kX64F64x2Lt:
@@ -146,6 +152,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
     case kX64F32x4AddHoriz:
     case kX64F32x4Sub:
     case kX64F32x4Mul:
+    case kX64F32x4Div:
     case kX64F32x4Min:
     case kX64F32x4Max:
     case kX64F32x4Eq:
@@ -161,11 +168,15 @@ int InstructionScheduler::GetTargetInstructionFlags(
     case kX64I64x2Add:
     case kX64I64x2Sub:
     case kX64I64x2Mul:
+    case kX64I64x2MinS:
+    case kX64I64x2MaxS:
     case kX64I64x2Eq:
     case kX64I64x2Ne:
     case kX64I64x2GtS:
     case kX64I64x2GeS:
     case kX64I64x2ShrU:
+    case kX64I64x2MinU:
+    case kX64I64x2MaxU:
     case kX64I64x2GtU:
     case kX64I64x2GeU:
     case kX64I32x4Splat:
@@ -295,9 +306,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
     case kX64DecompressSigned:
     case kX64DecompressPointer:
     case kX64DecompressAny:
-    case kX64CompressSigned:
-    case kX64CompressPointer:
-    case kX64CompressAny:
       return (instr->addressing_mode() == kMode_None)
                  ? kNoOpcodeFlags
                  : kIsLoadOperation | kHasSideEffect;
@@ -346,7 +354,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
     case kX64Movdqu:
       return instr->HasOutput() ? kIsLoadOperation : kHasSideEffect;
 
-    case kX64StackCheck:
     case kX64Peek:
       return kIsLoadOperation;
 
diff --git a/deps/v8/src/compiler/backend/x64/instruction-selector-x64.cc b/deps/v8/src/compiler/backend/x64/instruction-selector-x64.cc
index a4908fb846..5379074bac 100644
--- a/deps/v8/src/compiler/backend/x64/instruction-selector-x64.cc
+++ b/deps/v8/src/compiler/backend/x64/instruction-selector-x64.cc
@@ -170,9 +170,10 @@ class X64OperandGenerator final : public OperandGenerator {
   AddressingMode GetEffectiveAddressMemoryOperand(Node* operand,
                                                   InstructionOperand inputs[],
                                                   size_t* input_count) {
-    if (selector()->CanAddressRelativeToRootsRegister()) {
+    {
       LoadMatcher<ExternalReferenceMatcher> m(operand);
-      if (m.index().HasValue() && m.object().HasValue()) {
+      if (m.index().HasValue() && m.object().HasValue() &&
+          selector()->CanAddressRelativeToRootsRegister(m.object().Value())) {
         ptrdiff_t const delta =
             m.index().Value() +
             TurboAssemblerBase::RootRegisterOffsetForExternalReference(
@@ -350,7 +351,8 @@ void InstructionSelector::VisitStore(Node* node) {
   StoreRepresentation store_rep = StoreRepresentationOf(node->op());
   WriteBarrierKind write_barrier_kind = store_rep.write_barrier_kind();
 
-  if (write_barrier_kind != kNoWriteBarrier) {
+  if (write_barrier_kind != kNoWriteBarrier &&
+      V8_LIKELY(!FLAG_disable_write_barriers)) {
     DCHECK(CanBeTaggedOrCompressedPointer(store_rep.representation()));
     AddressingMode addressing_mode;
     InstructionOperand inputs[] = {
@@ -528,6 +530,35 @@ void InstructionSelector::VisitWord64Xor(Node* node) {
   }
 }
 
+void InstructionSelector::VisitStackPointerGreaterThan(
+    Node* node, FlagsContinuation* cont) {
+  Node* const value = node->InputAt(0);
+  InstructionCode opcode = kArchStackPointerGreaterThan;
+
+  DCHECK(cont->IsBranch());
+  const int effect_level =
+      GetEffectLevel(cont->true_block()->PredecessorAt(0)->control_input());
+
+  X64OperandGenerator g(this);
+  if (g.CanBeMemoryOperand(kX64Cmp, node, value, effect_level)) {
+    DCHECK_EQ(IrOpcode::kLoad, value->opcode());
+
+    // GetEffectiveAddressMemoryOperand can create at most 3 inputs.
+    static constexpr int kMaxInputCount = 3;
+
+    size_t input_count = 0;
+    InstructionOperand inputs[kMaxInputCount];
+    AddressingMode addressing_mode =
+        g.GetEffectiveAddressMemoryOperand(value, inputs, &input_count);
+    opcode |= AddressingModeField::encode(addressing_mode);
+    DCHECK_LE(input_count, kMaxInputCount);
+
+    EmitWithContinuation(opcode, 0, nullptr, input_count, inputs, cont);
+  } else {
+    EmitWithContinuation(opcode, g.UseRegister(value), cont);
+  }
+}
+
 namespace {
 
 bool TryMergeTruncateInt64ToInt32IntoLoad(InstructionSelector* selector,
@@ -1238,23 +1269,23 @@ void InstructionSelector::VisitChangeUint32ToUint64(Node* node) {
 }
 
 void InstructionSelector::VisitChangeTaggedToCompressed(Node* node) {
-  X64OperandGenerator g(this);
-  Node* value = node->InputAt(0);
-  Emit(kX64CompressAny, g.DefineAsRegister(node), g.Use(value));
+  // The top 32 bits in the 64-bit register will be undefined, and
+  // must not be used by a dependent node.
+  return EmitIdentity(node);
 }
 
 void InstructionSelector::VisitChangeTaggedPointerToCompressedPointer(
     Node* node) {
-  X64OperandGenerator g(this);
-  Node* value = node->InputAt(0);
-  Emit(kX64CompressPointer, g.DefineAsRegister(node), g.Use(value));
+  // The top 32 bits in the 64-bit register will be undefined, and
+  // must not be used by a dependent node.
+  return EmitIdentity(node);
 }
 
 void InstructionSelector::VisitChangeTaggedSignedToCompressedSigned(
     Node* node) {
-  X64OperandGenerator g(this);
-  Node* value = node->InputAt(0);
-  Emit(kX64CompressSigned, g.DefineAsRegister(node), g.Use(value));
+  // The top 32 bits in the 64-bit register will be undefined, and
+  // must not be used by a dependent node.
+  return EmitIdentity(node);
 }
 
 void InstructionSelector::VisitChangeCompressedToTagged(Node* node) {
@@ -1338,10 +1369,13 @@ void VisitFloatBinop(InstructionSelector* selector, Node* node,
 void VisitFloatUnop(InstructionSelector* selector, Node* node, Node* input,
                     ArchOpcode avx_opcode, ArchOpcode sse_opcode) {
   X64OperandGenerator g(selector);
+  InstructionOperand temps[] = {g.TempDoubleRegister()};
   if (selector->IsSupported(AVX)) {
-    selector->Emit(avx_opcode, g.DefineAsRegister(node), g.Use(input));
+    selector->Emit(avx_opcode, g.DefineAsRegister(node), g.UseUnique(input),
+                   arraysize(temps), temps);
   } else {
-    selector->Emit(sse_opcode, g.DefineSameAsFirst(node), g.UseRegister(input));
+    selector->Emit(sse_opcode, g.DefineSameAsFirst(node), g.UseRegister(input),
+                   arraysize(temps), temps);
   }
 }
 
@@ -1838,30 +1872,6 @@ void VisitWord64Compare(InstructionSelector* selector, Node* node,
           g.UseRegister(m.right().node()), cont);
     }
   }
-  if (selector->isolate() != nullptr) {
-    StackCheckMatcher<Int64BinopMatcher, IrOpcode::kUint64LessThan> m(
-        selector->isolate(), node);
-    if (m.Matched()) {
-      // Compare(Load(js_stack_limit), LoadStackPointer)
-      if (!node->op()->HasProperty(Operator::kCommutative)) cont->Commute();
-      InstructionCode opcode = cont->Encode(kX64StackCheck);
-      CHECK(cont->IsBranch());
-      selector->EmitWithContinuation(opcode, cont);
-      return;
-    }
-  }
-  WasmStackCheckMatcher<Int64BinopMatcher, IrOpcode::kUint64LessThan> wasm_m(
-      node);
-  if (wasm_m.Matched()) {
-    // This is a wasm stack check. By structure, we know that we can use the
-    // stack pointer directly, as wasm code does not modify the stack at points
-    // where stack checks are performed.
-    Node* left = node->InputAt(0);
-    LocationOperand rsp(InstructionOperand::EXPLICIT, LocationOperand::REGISTER,
-                        InstructionSequence::DefaultRepresentation(),
-                        RegisterCode::kRegCode_rsp);
-    return VisitCompareWithMemoryOperand(selector, kX64Cmp, left, rsp, cont);
-  }
   VisitWordCompare(selector, node, kX64Cmp, cont);
 }
 
@@ -2157,6 +2167,9 @@ void InstructionSelector::VisitWordCompareZero(Node* user, Node* value,
         return VisitWordCompare(this, value, kX64Cmp32, cont);
       case IrOpcode::kWord32And:
         return VisitWordCompare(this, value, kX64Test32, cont);
+      case IrOpcode::kStackPointerGreaterThan:
+        cont->OverwriteAndNegateIfEqual(kStackPointerGreaterThanCondition);
+        return VisitStackPointerGreaterThan(value, cont);
       default:
         break;
     }
@@ -2586,6 +2599,12 @@ VISIT_ATOMIC_BINOP(Xor)
   V(I8x16)
 
 #define SIMD_BINOP_LIST(V) \
+  V(F64x2Add)              \
+  V(F64x2Sub)              \
+  V(F64x2Mul)              \
+  V(F64x2Div)              \
+  V(F64x2Min)              \
+  V(F64x2Max)              \
   V(F64x2Eq)               \
   V(F64x2Ne)               \
   V(F64x2Lt)               \
@@ -2594,6 +2613,7 @@ VISIT_ATOMIC_BINOP(Xor)
   V(F32x4AddHoriz)         \
   V(F32x4Sub)              \
   V(F32x4Mul)              \
+  V(F32x4Div)              \
   V(F32x4Min)              \
   V(F32x4Max)              \
   V(F32x4Eq)               \
@@ -2603,7 +2623,6 @@ VISIT_ATOMIC_BINOP(Xor)
   V(I64x2Add)              \
   V(I64x2Sub)              \
   V(I64x2Eq)               \
-  V(I64x2Ne)               \
   V(I64x2GtS)              \
   V(I32x4Add)              \
   V(I32x4AddHoriz)         \
@@ -2612,12 +2631,10 @@ VISIT_ATOMIC_BINOP(Xor)
   V(I32x4MinS)             \
   V(I32x4MaxS)             \
   V(I32x4Eq)               \
-  V(I32x4Ne)               \
   V(I32x4GtS)              \
   V(I32x4GeS)              \
   V(I32x4MinU)             \
   V(I32x4MaxU)             \
-  V(I32x4GtU)              \
   V(I32x4GeU)              \
   V(I16x8SConvertI32x4)    \
   V(I16x8Add)              \
@@ -2629,14 +2646,12 @@ VISIT_ATOMIC_BINOP(Xor)
   V(I16x8MinS)             \
   V(I16x8MaxS)             \
   V(I16x8Eq)               \
-  V(I16x8Ne)               \
   V(I16x8GtS)              \
   V(I16x8GeS)              \
   V(I16x8AddSaturateU)     \
   V(I16x8SubSaturateU)     \
   V(I16x8MinU)             \
   V(I16x8MaxU)             \
-  V(I16x8GtU)              \
   V(I16x8GeU)              \
   V(I8x16SConvertI16x8)    \
   V(I8x16Add)              \
@@ -2646,23 +2661,28 @@ VISIT_ATOMIC_BINOP(Xor)
   V(I8x16MinS)             \
   V(I8x16MaxS)             \
   V(I8x16Eq)               \
-  V(I8x16Ne)               \
   V(I8x16GtS)              \
   V(I8x16GeS)              \
   V(I8x16AddSaturateU)     \
   V(I8x16SubSaturateU)     \
   V(I8x16MinU)             \
   V(I8x16MaxU)             \
-  V(I8x16GtU)              \
   V(I8x16GeU)              \
   V(S128And)               \
   V(S128Or)                \
   V(S128Xor)
 
 #define SIMD_BINOP_ONE_TEMP_LIST(V) \
+  V(I64x2Ne)                        \
   V(I64x2GeS)                       \
   V(I64x2GtU)                       \
-  V(I64x2GeU)
+  V(I64x2GeU)                       \
+  V(I32x4Ne)                        \
+  V(I32x4GtU)                       \
+  V(I16x8Ne)                        \
+  V(I16x8GtU)                       \
+  V(I8x16Ne)                        \
+  V(I8x16GtU)
 
 #define SIMD_UNOP_LIST(V)   \
   V(F32x4SConvertI32x4)     \
@@ -2686,16 +2706,17 @@ VISIT_ATOMIC_BINOP(Xor)
 
 #define SIMD_SHIFT_OPCODES(V) \
   V(I64x2Shl)                 \
-  V(I64x2ShrS)                \
   V(I64x2ShrU)                \
   V(I32x4Shl)                 \
   V(I32x4ShrS)                \
   V(I32x4ShrU)                \
   V(I16x8Shl)                 \
   V(I16x8ShrS)                \
-  V(I16x8ShrU)                \
-  V(I8x16Shl)                 \
-  V(I8x16ShrS)                \
+  V(I16x8ShrU)
+
+#define SIMD_NARROW_SHIFT_OPCODES(V) \
+  V(I8x16Shl)                        \
+  V(I8x16ShrS)                       \
   V(I8x16ShrU)
 
 #define SIMD_ANYTRUE_LIST(V) \
@@ -2745,17 +2766,30 @@ SIMD_TYPES(VISIT_SIMD_EXTRACT_LANE)
 SIMD_TYPES(VISIT_SIMD_REPLACE_LANE)
 #undef VISIT_SIMD_REPLACE_LANE
 
-#define VISIT_SIMD_SHIFT(Opcode)                                  \
-  void InstructionSelector::Visit##Opcode(Node* node) {           \
-    X64OperandGenerator g(this);                                  \
-    int32_t value = OpParameter<int32_t>(node->op());             \
-    Emit(kX64##Opcode, g.DefineSameAsFirst(node),                 \
-         g.UseRegister(node->InputAt(0)), g.UseImmediate(value)); \
+#define VISIT_SIMD_SHIFT(Opcode)                                          \
+  void InstructionSelector::Visit##Opcode(Node* node) {                   \
+    X64OperandGenerator g(this);                                          \
+    InstructionOperand temps[] = {g.TempSimd128Register()};               \
+    Emit(kX64##Opcode, g.DefineSameAsFirst(node),                         \
+         g.UseUniqueRegister(node->InputAt(0)),                           \
+         g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps); \
   }
 SIMD_SHIFT_OPCODES(VISIT_SIMD_SHIFT)
 #undef VISIT_SIMD_SHIFT
 #undef SIMD_SHIFT_OPCODES
 
+#define VISIT_SIMD_NARROW_SHIFT(Opcode)                                       \
+  void InstructionSelector::Visit##Opcode(Node* node) {                       \
+    X64OperandGenerator g(this);                                              \
+    InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()}; \
+    Emit(kX64##Opcode, g.DefineSameAsFirst(node),                             \
+         g.UseUniqueRegister(node->InputAt(0)),                               \
+         g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps);     \
+  }
+SIMD_NARROW_SHIFT_OPCODES(VISIT_SIMD_NARROW_SHIFT)
+#undef VISIT_SIMD_NARROW_SHIFT
+#undef SIMD_NARROW_SHIFT_OPCODES
+
 #define VISIT_SIMD_UNOP(Opcode)                         \
   void InstructionSelector::Visit##Opcode(Node* node) { \
     X64OperandGenerator g(this);                        \
@@ -2799,12 +2833,12 @@ SIMD_ANYTRUE_LIST(VISIT_SIMD_ANYTRUE)
 #undef VISIT_SIMD_ANYTRUE
 #undef SIMD_ANYTRUE_LIST
 
-#define VISIT_SIMD_ALLTRUE(Opcode)                                        \
-  void InstructionSelector::Visit##Opcode(Node* node) {                   \
-    X64OperandGenerator g(this);                                          \
-    InstructionOperand temps[] = {g.TempRegister()};                      \
-    Emit(kX64##Opcode, g.DefineAsRegister(node),                          \
-         g.UseUniqueRegister(node->InputAt(0)), arraysize(temps), temps); \
+#define VISIT_SIMD_ALLTRUE(Opcode)                                            \
+  void InstructionSelector::Visit##Opcode(Node* node) {                       \
+    X64OperandGenerator g(this);                                              \
+    InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()}; \
+    Emit(kX64##Opcode, g.DefineAsRegister(node),                              \
+         g.UseUniqueRegister(node->InputAt(0)), arraysize(temps), temps);     \
   }
 SIMD_ALLTRUE_LIST(VISIT_SIMD_ALLTRUE)
 #undef VISIT_SIMD_ALLTRUE
@@ -2820,14 +2854,16 @@ void InstructionSelector::VisitS128Select(Node* node) {
 
 void InstructionSelector::VisitF64x2Abs(Node* node) {
   X64OperandGenerator g(this);
-  Emit(kX64F64x2Abs, g.DefineSameAsFirst(node),
-       g.UseRegister(node->InputAt(0)));
+  InstructionOperand temps[] = {g.TempDoubleRegister()};
+  Emit(kX64F64x2Abs, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)),
+       arraysize(temps), temps);
 }
 
 void InstructionSelector::VisitF64x2Neg(Node* node) {
   X64OperandGenerator g(this);
-  Emit(kX64F64x2Neg, g.DefineSameAsFirst(node),
-       g.UseRegister(node->InputAt(0)));
+  InstructionOperand temps[] = {g.TempDoubleRegister()};
+  Emit(kX64F64x2Neg, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)),
+       arraysize(temps), temps);
 }
 
 void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) {
@@ -2836,6 +2872,15 @@ void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) {
        g.UseRegister(node->InputAt(0)));
 }
 
+void InstructionSelector::VisitI64x2ShrS(Node* node) {
+  X64OperandGenerator g(this);
+  InstructionOperand temps[] = {g.TempRegister()};
+  // Use fixed to rcx, to use sarq_cl in codegen.
+  Emit(kX64I64x2ShrS, g.DefineSameAsFirst(node),
+       g.UseUniqueRegister(node->InputAt(0)), g.UseFixed(node->InputAt(1), rcx),
+       arraysize(temps), temps);
+}
+
 void InstructionSelector::VisitI64x2Mul(Node* node) {
   X64OperandGenerator g(this);
   InstructionOperand temps[] = {g.TempSimd128Register(),
@@ -2845,15 +2890,59 @@ void InstructionSelector::VisitI64x2Mul(Node* node) {
        g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps);
 }
 
+void InstructionSelector::VisitI64x2MinS(Node* node) {
+  X64OperandGenerator g(this);
+  if (this->IsSupported(SSE4_2)) {
+    InstructionOperand temps[] = {g.TempSimd128Register()};
+    Emit(kX64I64x2MinS, g.DefineSameAsFirst(node),
+         g.UseRegister(node->InputAt(0)), g.UseFixed(node->InputAt(1), xmm0),
+         arraysize(temps), temps);
+  } else {
+    InstructionOperand temps[] = {g.TempSimd128Register(), g.TempRegister(),
+                                  g.TempRegister()};
+    Emit(kX64I64x2MinS, g.DefineSameAsFirst(node),
+         g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
+         arraysize(temps), temps);
+  }
+}
+
+void InstructionSelector::VisitI64x2MaxS(Node* node) {
+  X64OperandGenerator g(this);
+  InstructionOperand temps[] = {g.TempSimd128Register()};
+  Emit(kX64I64x2MaxS, g.DefineSameAsFirst(node),
+       g.UseRegister(node->InputAt(0)), g.UseFixed(node->InputAt(1), xmm0),
+       arraysize(temps), temps);
+}
+
+void InstructionSelector::VisitI64x2MinU(Node* node) {
+  X64OperandGenerator g(this);
+  InstructionOperand temps[] = {g.TempSimd128Register(),
+                                g.TempSimd128Register()};
+  Emit(kX64I64x2MinU, g.DefineSameAsFirst(node),
+       g.UseRegister(node->InputAt(0)), g.UseFixed(node->InputAt(1), xmm0),
+       arraysize(temps), temps);
+}
+
+void InstructionSelector::VisitI64x2MaxU(Node* node) {
+  X64OperandGenerator g(this);
+  InstructionOperand temps[] = {g.TempSimd128Register(),
+                                g.TempSimd128Register()};
+  Emit(kX64I64x2MaxU, g.DefineSameAsFirst(node),
+       g.UseRegister(node->InputAt(0)), g.UseFixed(node->InputAt(1), xmm0),
+       arraysize(temps), temps);
+}
+
 void InstructionSelector::VisitI32x4SConvertF32x4(Node* node) {
   X64OperandGenerator g(this);
+  InstructionOperand temps[] = {g.TempSimd128Register()};
   Emit(kX64I32x4SConvertF32x4, g.DefineSameAsFirst(node),
-       g.UseRegister(node->InputAt(0)));
+       g.UseRegister(node->InputAt(0)), arraysize(temps), temps);
 }
 
 void InstructionSelector::VisitI32x4UConvertF32x4(Node* node) {
   X64OperandGenerator g(this);
-  InstructionOperand temps[] = {g.TempSimd128Register()};
+  InstructionOperand temps[] = {g.TempSimd128Register(),
+                                g.TempSimd128Register()};
   Emit(kX64I32x4UConvertF32x4, g.DefineSameAsFirst(node),
        g.UseRegister(node->InputAt(0)), arraysize(temps), temps);
 }
@@ -2997,12 +3086,12 @@ static const ShuffleEntry arch_shuffles[] = {
      true},
     {{7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8},
      kX64S8x8Reverse,
-     false,
-     false},
+     true,
+     true},
     {{3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12},
      kX64S8x4Reverse,
-     false,
-     false},
+     true,
+     true},
     {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14},
      kX64S8x2Reverse,
      true,
@@ -3060,6 +3149,8 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) {
     SwapShuffleInputs(node);
     is_swizzle = false;        // It's simpler to just handle the general case.
     no_same_as_first = false;  // SSE requires same-as-first.
+    // TODO(v8:9608): also see v8:9083
+    src1_needs_reg = true;
     opcode = kX64S8x16Alignr;
     // palignr takes a single imm8 offset.
     imms[imm_count++] = offset;