// Copyright 2017 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "src/wasm/baseline/liftoff-assembler.h" #include #include "src/assembler-inl.h" #include "src/compiler/linkage.h" #include "src/compiler/wasm-compiler.h" #include "src/macro-assembler-inl.h" #include "src/wasm/function-body-decoder-impl.h" #include "src/wasm/wasm-opcodes.h" namespace v8 { namespace internal { namespace wasm { using VarState = LiftoffAssembler::VarState; namespace { #define __ asm_-> #define TRACE(...) \ do { \ if (FLAG_trace_liftoff) PrintF("[liftoff] " __VA_ARGS__); \ } while (false) class StackTransferRecipe { struct RegisterMove { LiftoffRegister dst; LiftoffRegister src; ValueType type; constexpr RegisterMove(LiftoffRegister dst, LiftoffRegister src, ValueType type) : dst(dst), src(src), type(type) {} }; struct RegisterLoad { enum LoadKind : uint8_t { kConstant, // load a constant value into a register. kStack, // fill a register from a stack slot. kHalfStack // fill one half of a register pair from half a stack slot. }; LiftoffRegister dst; LoadKind kind; ValueType type; int32_t value; // i32 constant value or stack index, depending on kind. // Named constructors. static RegisterLoad Const(LiftoffRegister dst, WasmValue constant) { if (constant.type() == kWasmI32) { return {dst, kConstant, kWasmI32, constant.to_i32()}; } DCHECK_EQ(kWasmI64, constant.type()); DCHECK_EQ(constant.to_i32_unchecked(), constant.to_i64_unchecked()); return {dst, kConstant, kWasmI64, constant.to_i32_unchecked()}; } static RegisterLoad Stack(LiftoffRegister dst, int32_t stack_index, ValueType type) { return {dst, kStack, type, stack_index}; } static RegisterLoad HalfStack(LiftoffRegister dst, int32_t half_stack_index) { return {dst, kHalfStack, kWasmI32, half_stack_index}; } private: RegisterLoad(LiftoffRegister dst, LoadKind kind, ValueType type, int32_t value) : dst(dst), kind(kind), type(type), value(value) {} }; public: explicit StackTransferRecipe(LiftoffAssembler* wasm_asm) : asm_(wasm_asm) {} ~StackTransferRecipe() { Execute(); } void Execute() { // First, execute register moves. Then load constants and stack values into // registers. if ((move_dst_regs_ & move_src_regs_).is_empty()) { // No overlap in src and dst registers. Just execute the moves in any // order. for (RegisterMove& rm : register_moves_) { asm_->Move(rm.dst, rm.src, rm.type); } register_moves_.clear(); } else { // Keep use counters of src registers. uint32_t src_reg_use_count[kAfterMaxLiftoffRegCode] = {0}; for (RegisterMove& rm : register_moves_) { ++src_reg_use_count[rm.src.liftoff_code()]; } // Now repeatedly iterate the list of register moves, and execute those // whose dst register does not appear as src any more. The remaining moves // are compacted during this iteration. // If no more moves can be executed (because of a cycle), spill one // register to the stack, add a RegisterLoad to reload it later, and // continue. uint32_t next_spill_slot = asm_->cache_state()->stack_height(); while (!register_moves_.empty()) { int executed_moves = 0; for (auto& rm : register_moves_) { if (src_reg_use_count[rm.dst.liftoff_code()] == 0) { asm_->Move(rm.dst, rm.src, rm.type); ++executed_moves; DCHECK_LT(0, src_reg_use_count[rm.src.liftoff_code()]); --src_reg_use_count[rm.src.liftoff_code()]; } else if (executed_moves) { // Compaction: Move not-executed moves to the beginning of the list. (&rm)[-executed_moves] = rm; } } if (executed_moves == 0) { // There is a cycle. Spill one register, then continue. // TODO(clemensh): Use an unused register if available. RegisterMove& rm = register_moves_.back(); LiftoffRegister spill_reg = rm.src; asm_->Spill(next_spill_slot, spill_reg, rm.type); // Remember to reload into the destination register later. LoadStackSlot(register_moves_.back().dst, next_spill_slot, rm.type); DCHECK_EQ(1, src_reg_use_count[spill_reg.liftoff_code()]); src_reg_use_count[spill_reg.liftoff_code()] = 0; ++next_spill_slot; executed_moves = 1; } register_moves_.erase(register_moves_.end() - executed_moves, register_moves_.end()); } } for (RegisterLoad& rl : register_loads_) { switch (rl.kind) { case RegisterLoad::kConstant: asm_->LoadConstant(rl.dst, rl.type == kWasmI64 ? WasmValue(int64_t{rl.value}) : WasmValue(int32_t{rl.value})); break; case RegisterLoad::kStack: asm_->Fill(rl.dst, rl.value, rl.type); break; case RegisterLoad::kHalfStack: // As half of a register pair, {rl.dst} must be a gp register. asm_->FillI64Half(rl.dst.gp(), rl.value); break; } } register_loads_.clear(); } void TransferStackSlot(const LiftoffAssembler::CacheState& dst_state, uint32_t dst_index, uint32_t src_index) { const VarState& dst = dst_state.stack_state[dst_index]; const VarState& src = __ cache_state()->stack_state[src_index]; DCHECK_EQ(dst.type(), src.type()); switch (dst.loc()) { case VarState::kStack: switch (src.loc()) { case VarState::kStack: if (src_index == dst_index) break; asm_->MoveStackValue(dst_index, src_index, src.type()); break; case VarState::kRegister: asm_->Spill(dst_index, src.reg(), src.type()); break; case VarState::KIntConst: asm_->Spill(dst_index, src.constant()); break; } break; case VarState::kRegister: LoadIntoRegister(dst.reg(), src, src_index); break; case VarState::KIntConst: DCHECK_EQ(dst, src); break; } } void LoadIntoRegister(LiftoffRegister dst, const LiftoffAssembler::VarState& src, uint32_t src_index) { switch (src.loc()) { case VarState::kStack: LoadStackSlot(dst, src_index, src.type()); break; case VarState::kRegister: DCHECK_EQ(dst.reg_class(), src.reg_class()); if (dst != src.reg()) MoveRegister(dst, src.reg(), src.type()); break; case VarState::KIntConst: LoadConstant(dst, src.constant()); break; } } void LoadI64HalfIntoRegister(LiftoffRegister dst, const LiftoffAssembler::VarState& src, uint32_t index, RegPairHalf half) { // Use CHECK such that the remaining code is statically dead if // {kNeedI64RegPair} is false. CHECK(kNeedI64RegPair); DCHECK_EQ(kWasmI64, src.type()); switch (src.loc()) { case VarState::kStack: LoadI64HalfStackSlot(dst, 2 * index - (half == kLowWord ? 0 : 1)); break; case VarState::kRegister: { LiftoffRegister src_half = half == kLowWord ? src.reg().low() : src.reg().high(); if (dst != src_half) MoveRegister(dst, src_half, kWasmI32); break; } case VarState::KIntConst: int32_t value = src.i32_const(); // The high word is the sign extension of the low word. if (half == kHighWord) value = value >> 31; LoadConstant(dst, WasmValue(value)); break; } } void MoveRegister(LiftoffRegister dst, LiftoffRegister src, ValueType type) { DCHECK_NE(dst, src); DCHECK_EQ(dst.reg_class(), src.reg_class()); DCHECK_EQ(reg_class_for(type), src.reg_class()); if (src.is_pair()) { DCHECK_EQ(kWasmI64, type); if (dst.low() != src.low()) MoveRegister(dst.low(), src.low(), kWasmI32); if (dst.high() != src.high()) MoveRegister(dst.high(), src.high(), kWasmI32); return; } DCHECK(!move_dst_regs_.has(dst)); move_dst_regs_.set(dst); move_src_regs_.set(src); register_moves_.emplace_back(dst, src, type); } void LoadConstant(LiftoffRegister dst, WasmValue value) { register_loads_.push_back(RegisterLoad::Const(dst, value)); } void LoadStackSlot(LiftoffRegister dst, uint32_t stack_index, ValueType type) { register_loads_.push_back(RegisterLoad::Stack(dst, stack_index, type)); } void LoadI64HalfStackSlot(LiftoffRegister dst, uint32_t half_stack_index) { register_loads_.push_back(RegisterLoad::HalfStack(dst, half_stack_index)); } private: // TODO(clemensh): Avoid unconditionally allocating on the heap. std::vector register_moves_; std::vector register_loads_; LiftoffRegList move_dst_regs_; LiftoffRegList move_src_regs_; LiftoffAssembler* const asm_; }; } // namespace // TODO(clemensh): Don't copy the full parent state (this makes us N^2). void LiftoffAssembler::CacheState::InitMerge(const CacheState& source, uint32_t num_locals, uint32_t arity) { DCHECK(stack_state.empty()); DCHECK_GE(source.stack_height(), stack_base); stack_state.resize(stack_base + arity, VarState(kWasmStmt)); // |------locals------|--(in between)--|--(discarded)--|----merge----| // <-- num_locals --> ^stack_base <-- arity --> // First, initialize merge slots and locals. Keep them in the registers which // are being used in {source}, but avoid using a register multiple times. Use // unused registers where necessary and possible. for (int range = 0; range < 2; ++range) { auto src_idx = range ? 0 : source.stack_state.size() - arity; auto src_end = range ? num_locals : source.stack_state.size(); auto dst_idx = range ? 0 : stack_state.size() - arity; for (; src_idx < src_end; ++src_idx, ++dst_idx) { auto& dst = stack_state[dst_idx]; auto& src = source.stack_state[src_idx]; // Just initialize to any register; will be overwritten before use. LiftoffRegister reg(Register::from_code<0>()); RegClass rc = src.is_reg() ? src.reg_class() : reg_class_for(src.type()); if (src.is_reg() && is_free(src.reg())) { reg = src.reg(); } else if (has_unused_register(rc)) { reg = unused_register(rc); } else { // Make this a stack slot. dst = VarState(src.type()); continue; } dst = VarState(src.type(), reg); inc_used(reg); } } // Last, initialize the section in between. Here, constants are allowed, but // registers which are already used for the merge region or locals must be // spilled. for (uint32_t i = num_locals; i < stack_base; ++i) { auto& dst = stack_state[i]; auto& src = source.stack_state[i]; if (src.is_reg()) { if (is_used(src.reg())) { // Make this a stack slot. dst = VarState(src.type()); } else { dst = VarState(src.type(), src.reg()); inc_used(src.reg()); } } else if (src.is_const()) { dst = src; } else { DCHECK(src.is_stack()); // Make this a stack slot. dst = VarState(src.type()); } } last_spilled_regs = source.last_spilled_regs; } void LiftoffAssembler::CacheState::Steal(CacheState& source) { // Just use the move assignment operator. *this = std::move(source); } void LiftoffAssembler::CacheState::Split(const CacheState& source) { // Call the private copy assignment operator. *this = source; } namespace { constexpr AssemblerOptions DefaultLiftoffOptions() { return AssemblerOptions{}; } } // namespace // TODO(clemensh): Provide a reasonably sized buffer, based on wasm function // size. LiftoffAssembler::LiftoffAssembler() : TurboAssembler(nullptr, DefaultLiftoffOptions(), nullptr, 0, CodeObjectRequired::kNo) { set_trap_on_abort(true); // Avoid calls to Abort. } LiftoffAssembler::~LiftoffAssembler() { if (num_locals_ > kInlineLocalTypes) { free(more_local_types_); } } LiftoffRegister LiftoffAssembler::PopToRegister(LiftoffRegList pinned) { DCHECK(!cache_state_.stack_state.empty()); VarState slot = cache_state_.stack_state.back(); cache_state_.stack_state.pop_back(); switch (slot.loc()) { case VarState::kStack: { LiftoffRegister reg = GetUnusedRegister(reg_class_for(slot.type()), pinned); Fill(reg, cache_state_.stack_height(), slot.type()); return reg; } case VarState::kRegister: cache_state_.dec_used(slot.reg()); return slot.reg(); case VarState::KIntConst: { RegClass rc = kNeedI64RegPair && slot.type() == kWasmI64 ? kGpRegPair : kGpReg; LiftoffRegister reg = GetUnusedRegister(rc, pinned); LoadConstant(reg, slot.constant()); return reg; } } UNREACHABLE(); } void LiftoffAssembler::MergeFullStackWith(CacheState& target) { DCHECK_EQ(cache_state_.stack_height(), target.stack_height()); // TODO(clemensh): Reuse the same StackTransferRecipe object to save some // allocations. StackTransferRecipe transfers(this); for (uint32_t i = 0, e = cache_state_.stack_height(); i < e; ++i) { transfers.TransferStackSlot(target, i, i); } } void LiftoffAssembler::MergeStackWith(CacheState& target, uint32_t arity) { // Before: ----------------|------ pop_count -----|--- arity ---| // ^target_stack_height ^stack_base ^stack_height // After: ----|-- arity --| // ^ ^target_stack_height // ^target_stack_base uint32_t stack_height = cache_state_.stack_height(); uint32_t target_stack_height = target.stack_height(); DCHECK_LE(target_stack_height, stack_height); DCHECK_LE(arity, target_stack_height); uint32_t stack_base = stack_height - arity; uint32_t target_stack_base = target_stack_height - arity; StackTransferRecipe transfers(this); for (uint32_t i = 0; i < target_stack_base; ++i) { transfers.TransferStackSlot(target, i, i); } for (uint32_t i = 0; i < arity; ++i) { transfers.TransferStackSlot(target, target_stack_base + i, stack_base + i); } } void LiftoffAssembler::Spill(uint32_t index) { auto& slot = cache_state_.stack_state[index]; switch (slot.loc()) { case VarState::kStack: return; case VarState::kRegister: Spill(index, slot.reg(), slot.type()); cache_state_.dec_used(slot.reg()); break; case VarState::KIntConst: Spill(index, slot.constant()); break; } slot.MakeStack(); } void LiftoffAssembler::SpillLocals() { for (uint32_t i = 0; i < num_locals_; ++i) { Spill(i); } } void LiftoffAssembler::SpillAllRegisters() { for (uint32_t i = 0, e = cache_state_.stack_height(); i < e; ++i) { auto& slot = cache_state_.stack_state[i]; if (!slot.is_reg()) continue; Spill(i, slot.reg(), slot.type()); slot.MakeStack(); } cache_state_.reset_used_registers(); } void LiftoffAssembler::PrepareCall(wasm::FunctionSig* sig, compiler::CallDescriptor* call_descriptor, Register* target, LiftoffRegister* target_instance) { uint32_t num_params = static_cast(sig->parameter_count()); // Input 0 is the call target. constexpr size_t kInputShift = 1; // Spill all cache slots which are not being used as parameters. // Don't update any register use counters, they will be reset later anyway. for (uint32_t idx = 0, end = cache_state_.stack_height() - num_params; idx < end; ++idx) { VarState& slot = cache_state_.stack_state[idx]; if (!slot.is_reg()) continue; Spill(idx, slot.reg(), slot.type()); slot.MakeStack(); } LiftoffStackSlots stack_slots(this); StackTransferRecipe stack_transfers(this); LiftoffRegList param_regs; // Move the target instance (if supplied) into the correct instance register. compiler::LinkageLocation instance_loc = call_descriptor->GetInputLocation(kInputShift); DCHECK(instance_loc.IsRegister() && !instance_loc.IsAnyRegister()); LiftoffRegister instance_reg(Register::from_code(instance_loc.AsRegister())); param_regs.set(instance_reg); if (target_instance && *target_instance != instance_reg) { stack_transfers.MoveRegister(instance_reg, *target_instance, kWasmIntPtr); } // Now move all parameter values into the right slot for the call. // Don't pop values yet, such that the stack height is still correct when // executing the {stack_transfers}. // Process parameters backwards, such that pushes of caller frame slots are // in the correct order. uint32_t param_base = cache_state_.stack_height() - num_params; uint32_t call_desc_input_idx = static_cast(call_descriptor->InputCount()); for (uint32_t i = num_params; i > 0; --i) { const uint32_t param = i - 1; ValueType type = sig->GetParam(param); const bool is_pair = kNeedI64RegPair && type == kWasmI64; const int num_lowered_params = is_pair ? 2 : 1; const uint32_t stack_idx = param_base + param; const VarState& slot = cache_state_.stack_state[stack_idx]; // Process both halfs of a register pair separately, because they are passed // as separate parameters. One or both of them could end up on the stack. for (int lowered_idx = 0; lowered_idx < num_lowered_params; ++lowered_idx) { const RegPairHalf half = is_pair && lowered_idx == 0 ? kHighWord : kLowWord; --call_desc_input_idx; compiler::LinkageLocation loc = call_descriptor->GetInputLocation(call_desc_input_idx); if (loc.IsRegister()) { DCHECK(!loc.IsAnyRegister()); RegClass rc = is_pair ? kGpReg : reg_class_for(type); LiftoffRegister reg = LiftoffRegister::from_code(rc, loc.AsRegister()); param_regs.set(reg); if (is_pair) { stack_transfers.LoadI64HalfIntoRegister(reg, slot, stack_idx, half); } else { stack_transfers.LoadIntoRegister(reg, slot, stack_idx); } } else { DCHECK(loc.IsCallerFrameSlot()); stack_slots.Add(slot, stack_idx, half); } } } // {call_desc_input_idx} should point after the instance parameter now. DCHECK_EQ(call_desc_input_idx, kInputShift + 1); // If the target register overlaps with a parameter register, then move the // target to another free register, or spill to the stack. if (target && param_regs.has(LiftoffRegister(*target))) { // Try to find another free register. LiftoffRegList free_regs = kGpCacheRegList.MaskOut(param_regs); if (!free_regs.is_empty()) { LiftoffRegister new_target = free_regs.GetFirstRegSet(); stack_transfers.MoveRegister(new_target, LiftoffRegister(*target), kWasmIntPtr); *target = new_target.gp(); } else { stack_slots.Add(LiftoffAssembler::VarState(LiftoffAssembler::kWasmIntPtr, LiftoffRegister(*target))); *target = no_reg; } } // Create all the slots. stack_slots.Construct(); // Execute the stack transfers before filling the instance register. stack_transfers.Execute(); // Pop parameters from the value stack. auto stack_end = cache_state_.stack_state.end(); cache_state_.stack_state.erase(stack_end - num_params, stack_end); // Reset register use counters. cache_state_.reset_used_registers(); // Reload the instance from the stack. if (!target_instance) { FillInstanceInto(instance_reg.gp()); } } void LiftoffAssembler::FinishCall(wasm::FunctionSig* sig, compiler::CallDescriptor* call_descriptor) { const size_t return_count = sig->return_count(); if (return_count != 0) { DCHECK_EQ(1, return_count); ValueType return_type = sig->GetReturn(0); const bool need_pair = kNeedI64RegPair && return_type == kWasmI64; DCHECK_EQ(need_pair ? 2 : 1, call_descriptor->ReturnCount()); RegClass rc = need_pair ? kGpReg : reg_class_for(return_type); LiftoffRegister return_reg = LiftoffRegister::from_code( rc, call_descriptor->GetReturnLocation(0).AsRegister()); DCHECK(GetCacheRegList(rc).has(return_reg)); if (need_pair) { LiftoffRegister high_reg = LiftoffRegister::from_code( rc, call_descriptor->GetReturnLocation(1).AsRegister()); DCHECK(GetCacheRegList(rc).has(high_reg)); return_reg = LiftoffRegister::ForPair(return_reg.gp(), high_reg.gp()); } DCHECK(!cache_state_.is_used(return_reg)); PushRegister(return_type, return_reg); } } void LiftoffAssembler::Move(LiftoffRegister dst, LiftoffRegister src, ValueType type) { DCHECK_EQ(dst.reg_class(), src.reg_class()); DCHECK_NE(dst, src); if (kNeedI64RegPair && dst.is_pair()) { // Use the {StackTransferRecipe} to move pairs, as the registers in the // pairs might overlap. StackTransferRecipe(this).MoveRegister(dst, src, type); } else if (dst.is_gp()) { Move(dst.gp(), src.gp(), type); } else { Move(dst.fp(), src.fp(), type); } } void LiftoffAssembler::ParallelRegisterMove( std::initializer_list tuples) { StackTransferRecipe stack_transfers(this); for (auto tuple : tuples) { if (tuple.dst == tuple.src) continue; stack_transfers.MoveRegister(tuple.dst, tuple.src, tuple.type); } } bool LiftoffAssembler::ValidateCacheState() const { uint32_t register_use_count[kAfterMaxLiftoffRegCode] = {0}; LiftoffRegList used_regs; for (const VarState& var : cache_state_.stack_state) { if (!var.is_reg()) continue; LiftoffRegister reg = var.reg(); if (kNeedI64RegPair && reg.is_pair()) { ++register_use_count[reg.low().liftoff_code()]; ++register_use_count[reg.high().liftoff_code()]; } else { ++register_use_count[reg.liftoff_code()]; } used_regs.set(reg); } bool valid = memcmp(register_use_count, cache_state_.register_use_count, sizeof(register_use_count)) == 0 && used_regs == cache_state_.used_registers; if (valid) return true; std::ostringstream os; os << "Error in LiftoffAssembler::ValidateCacheState().\n"; os << "expected: used_regs " << used_regs << ", counts " << PrintCollection(register_use_count) << "\n"; os << "found: used_regs " << cache_state_.used_registers << ", counts " << PrintCollection(cache_state_.register_use_count) << "\n"; os << "Use --trace-liftoff to debug."; FATAL("%s", os.str().c_str()); } LiftoffRegister LiftoffAssembler::SpillOneRegister(LiftoffRegList candidates, LiftoffRegList pinned) { // Spill one cached value to free a register. LiftoffRegister spill_reg = cache_state_.GetNextSpillReg(candidates, pinned); SpillRegister(spill_reg); return spill_reg; } void LiftoffAssembler::SpillRegister(LiftoffRegister reg) { int remaining_uses = cache_state_.get_use_count(reg); DCHECK_LT(0, remaining_uses); for (uint32_t idx = cache_state_.stack_height() - 1;; --idx) { DCHECK_GT(cache_state_.stack_height(), idx); auto* slot = &cache_state_.stack_state[idx]; if (!slot->is_reg() || !slot->reg().overlaps(reg)) continue; if (slot->reg().is_pair()) { // Make sure to decrement *both* registers in a pair, because the // {clear_used} call below only clears one of them. cache_state_.dec_used(slot->reg().low()); cache_state_.dec_used(slot->reg().high()); } Spill(idx, slot->reg(), slot->type()); slot->MakeStack(); if (--remaining_uses == 0) break; } cache_state_.clear_used(reg); } void LiftoffAssembler::set_num_locals(uint32_t num_locals) { DCHECK_EQ(0, num_locals_); // only call this once. num_locals_ = num_locals; if (num_locals > kInlineLocalTypes) { more_local_types_ = reinterpret_cast(malloc(num_locals * sizeof(ValueType))); DCHECK_NOT_NULL(more_local_types_); } } std::ostream& operator<<(std::ostream& os, VarState slot) { os << ValueTypes::TypeName(slot.type()) << ":"; switch (slot.loc()) { case VarState::kStack: return os << "s"; case VarState::kRegister: return os << slot.reg(); case VarState::KIntConst: return os << "c" << slot.i32_const(); } UNREACHABLE(); } #undef __ #undef TRACE } // namespace wasm } // namespace internal } // namespace v8