diff options
Diffstat (limited to 'deps/v8/src/codegen/arm64/assembler-arm64.h')
-rw-r--r-- | deps/v8/src/codegen/arm64/assembler-arm64.h | 2855 |
1 files changed, 2855 insertions, 0 deletions
diff --git a/deps/v8/src/codegen/arm64/assembler-arm64.h b/deps/v8/src/codegen/arm64/assembler-arm64.h new file mode 100644 index 0000000000..04cd422241 --- /dev/null +++ b/deps/v8/src/codegen/arm64/assembler-arm64.h @@ -0,0 +1,2855 @@ +// Copyright 2013 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_CODEGEN_ARM64_ASSEMBLER_ARM64_H_ +#define V8_CODEGEN_ARM64_ASSEMBLER_ARM64_H_ + +#include <deque> +#include <list> +#include <map> +#include <vector> + +#include "src/base/optional.h" +#include "src/codegen/arm64/constants-arm64.h" +#include "src/codegen/arm64/instructions-arm64.h" +#include "src/codegen/arm64/register-arm64.h" +#include "src/codegen/assembler.h" +#include "src/codegen/constant-pool.h" +#include "src/common/globals.h" +#include "src/utils/utils.h" + +// Windows arm64 SDK defines mvn to NEON intrinsic neon_not which will not +// be used here. +#if defined(V8_OS_WIN) && defined(mvn) +#undef mvn +#endif + +namespace v8 { +namespace internal { + +class SafepointTableBuilder; + +// ----------------------------------------------------------------------------- +// Immediates. +class Immediate { + public: + template <typename T> + inline explicit Immediate(Handle<T> handle); + + // This is allowed to be an implicit constructor because Immediate is + // a wrapper class that doesn't normally perform any type conversion. + template <typename T> + inline Immediate(T value); // NOLINT(runtime/explicit) + + template <typename T> + inline Immediate(T value, RelocInfo::Mode rmode); + + int64_t value() const { return value_; } + RelocInfo::Mode rmode() const { return rmode_; } + + private: + V8_EXPORT_PRIVATE void InitializeHandle(Handle<HeapObject> value); + + int64_t value_; + RelocInfo::Mode rmode_; +}; + +// ----------------------------------------------------------------------------- +// Operands. +constexpr int kSmiShift = kSmiTagSize + kSmiShiftSize; +constexpr uint64_t kSmiShiftMask = (1ULL << kSmiShift) - 1; + +// Represents an operand in a machine instruction. +class Operand { + // TODO(all): If necessary, study more in details which methods + // TODO(all): should be inlined or not. + public: + // rm, {<shift> {#<shift_amount>}} + // where <shift> is one of {LSL, LSR, ASR, ROR}. + // <shift_amount> is uint6_t. + // This is allowed to be an implicit constructor because Operand is + // a wrapper class that doesn't normally perform any type conversion. + inline Operand(Register reg, Shift shift = LSL, + unsigned shift_amount = 0); // NOLINT(runtime/explicit) + + // rm, <extend> {#<shift_amount>} + // where <extend> is one of {UXTB, UXTH, UXTW, UXTX, SXTB, SXTH, SXTW, SXTX}. + // <shift_amount> is uint2_t. + inline Operand(Register reg, Extend extend, unsigned shift_amount = 0); + + static Operand EmbeddedNumber(double number); // Smi or HeapNumber. + static Operand EmbeddedStringConstant(const StringConstantBase* str); + + inline bool IsHeapObjectRequest() const; + inline HeapObjectRequest heap_object_request() const; + inline Immediate immediate_for_heap_object_request() const; + + template <typename T> + inline explicit Operand(Handle<T> handle); + + // Implicit constructor for all int types, ExternalReference, and Smi. + template <typename T> + inline Operand(T t); // NOLINT(runtime/explicit) + + // Implicit constructor for int types. + template <typename T> + inline Operand(T t, RelocInfo::Mode rmode); + + inline bool IsImmediate() const; + inline bool IsShiftedRegister() const; + inline bool IsExtendedRegister() const; + inline bool IsZero() const; + + // This returns an LSL shift (<= 4) operand as an equivalent extend operand, + // which helps in the encoding of instructions that use the stack pointer. + inline Operand ToExtendedRegister() const; + + inline Immediate immediate() const; + inline int64_t ImmediateValue() const; + inline RelocInfo::Mode ImmediateRMode() const; + inline Register reg() const; + inline Shift shift() const; + inline Extend extend() const; + inline unsigned shift_amount() const; + + // Relocation information. + bool NeedsRelocation(const Assembler* assembler) const; + + private: + base::Optional<HeapObjectRequest> heap_object_request_; + Immediate immediate_; + Register reg_; + Shift shift_; + Extend extend_; + unsigned shift_amount_; +}; + +// MemOperand represents a memory operand in a load or store instruction. +class MemOperand { + public: + inline MemOperand(); + inline explicit MemOperand(Register base, int64_t offset = 0, + AddrMode addrmode = Offset); + inline explicit MemOperand(Register base, Register regoffset, + Shift shift = LSL, unsigned shift_amount = 0); + inline explicit MemOperand(Register base, Register regoffset, Extend extend, + unsigned shift_amount = 0); + inline explicit MemOperand(Register base, const Operand& offset, + AddrMode addrmode = Offset); + + const Register& base() const { return base_; } + const Register& regoffset() const { return regoffset_; } + int64_t offset() const { return offset_; } + AddrMode addrmode() const { return addrmode_; } + Shift shift() const { return shift_; } + Extend extend() const { return extend_; } + unsigned shift_amount() const { return shift_amount_; } + inline bool IsImmediateOffset() const; + inline bool IsRegisterOffset() const; + inline bool IsPreIndex() const; + inline bool IsPostIndex() const; + + // For offset modes, return the offset as an Operand. This helper cannot + // handle indexed modes. + inline Operand OffsetAsOperand() const; + + enum PairResult { + kNotPair, // Can't use a pair instruction. + kPairAB, // Can use a pair instruction (operandA has lower address). + kPairBA // Can use a pair instruction (operandB has lower address). + }; + // Check if two MemOperand are consistent for stp/ldp use. + static PairResult AreConsistentForPair(const MemOperand& operandA, + const MemOperand& operandB, + int access_size_log2 = kXRegSizeLog2); + + private: + Register base_; + Register regoffset_; + int64_t offset_; + AddrMode addrmode_; + Shift shift_; + Extend extend_; + unsigned shift_amount_; +}; + +class ConstPool { + public: + explicit ConstPool(Assembler* assm) : assm_(assm), first_use_(-1) {} + // Returns true when we need to write RelocInfo and false when we do not. + bool RecordEntry(intptr_t data, RelocInfo::Mode mode); + int EntryCount() const { return static_cast<int>(entries_.size()); } + bool IsEmpty() const { return entries_.empty(); } + // Distance in bytes between the current pc and the first instruction + // using the pool. If there are no pending entries return kMaxInt. + int DistanceToFirstUse(); + // Offset after which instructions using the pool will be out of range. + int MaxPcOffset(); + // Maximum size the constant pool can be with current entries. It always + // includes alignment padding and branch over. + int WorstCaseSize(); + // Size in bytes of the literal pool *if* it is emitted at the current + // pc. The size will include the branch over the pool if it was requested. + int SizeIfEmittedAtCurrentPc(bool require_jump); + // Emit the literal pool at the current pc with a branch over the pool if + // requested. + void Emit(bool require_jump); + // Discard any pending pool entries. + void Clear(); + + private: + void EmitMarker(); + void EmitGuard(); + void EmitEntries(); + + using SharedEntryMap = std::map<uint64_t, int>; + // Adds a shared entry to entries_, using 'entry_map' to determine whether we + // already track this entry. Returns true if this is the first time we add + // this entry, false otherwise. + bool AddSharedEntry(SharedEntryMap& entry_map, uint64_t data, int offset); + + Assembler* assm_; + // Keep track of the first instruction requiring a constant pool entry + // since the previous constant pool was emitted. + int first_use_; + + // Map of data to index in entries_ for shared entries. + SharedEntryMap shared_entries_; + + // Map of address of handle to index in entries_. We need to keep track of + // code targets separately from other shared entries, as they can be + // relocated. + SharedEntryMap handle_to_index_map_; + + // Values, pc offset(s) of entries. Use a vector to preserve the order of + // insertion, as the serializer expects code target RelocInfo to point to + // constant pool addresses in an ascending order. + std::vector<std::pair<uint64_t, std::vector<int> > > entries_; +}; + +// ----------------------------------------------------------------------------- +// Assembler. + +class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { + public: + // Create an assembler. Instructions and relocation information are emitted + // into a buffer, with the instructions starting from the beginning and the + // relocation information starting from the end of the buffer. See CodeDesc + // for a detailed comment on the layout (globals.h). + // + // If the provided buffer is nullptr, the assembler allocates and grows its + // own buffer. Otherwise it takes ownership of the provided buffer. + explicit Assembler(const AssemblerOptions&, + std::unique_ptr<AssemblerBuffer> = {}); + + virtual ~Assembler(); + + virtual void AbortedCodeGeneration(); + + // System functions --------------------------------------------------------- + // Start generating code from the beginning of the buffer, discarding any code + // and data that has already been emitted into the buffer. + // + // In order to avoid any accidental transfer of state, Reset DCHECKs that the + // constant pool is not blocked. + void Reset(); + + // GetCode emits any pending (non-emitted) code and fills the descriptor desc. + static constexpr int kNoHandlerTable = 0; + static constexpr SafepointTableBuilder* kNoSafepointTable = nullptr; + void GetCode(Isolate* isolate, CodeDesc* desc, + SafepointTableBuilder* safepoint_table_builder, + int handler_table_offset); + + // Convenience wrapper for code without safepoint or handler tables. + void GetCode(Isolate* isolate, CodeDesc* desc) { + GetCode(isolate, desc, kNoSafepointTable, kNoHandlerTable); + } + + // Insert the smallest number of nop instructions + // possible to align the pc offset to a multiple + // of m. m must be a power of 2 (>= 4). + void Align(int m); + // Insert the smallest number of zero bytes possible to align the pc offset + // to a mulitple of m. m must be a power of 2 (>= 2). + void DataAlign(int m); + // Aligns code to something that's optimal for a jump target for the platform. + void CodeTargetAlign(); + + inline void Unreachable(); + + // Label -------------------------------------------------------------------- + // Bind a label to the current pc. Note that labels can only be bound once, + // and if labels are linked to other instructions, they _must_ be bound + // before they go out of scope. + void bind(Label* label); + + // RelocInfo and pools ------------------------------------------------------ + + // Record relocation information for current pc_. + enum ConstantPoolMode { NEEDS_POOL_ENTRY, NO_POOL_ENTRY }; + void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0, + ConstantPoolMode constant_pool_mode = NEEDS_POOL_ENTRY); + + // Generate a B immediate instruction with the corresponding relocation info. + // 'offset' is the immediate to encode in the B instruction (so it is the + // difference between the target and the PC of the instruction, divided by + // the instruction size). + void near_jump(int offset, RelocInfo::Mode rmode); + // Generate a BL immediate instruction with the corresponding relocation info. + // As for near_jump, 'offset' is the immediate to encode in the BL + // instruction. + void near_call(int offset, RelocInfo::Mode rmode); + // Generate a BL immediate instruction with the corresponding relocation info + // for the input HeapObjectRequest. + void near_call(HeapObjectRequest request); + + // Return the address in the constant pool of the code target address used by + // the branch/call instruction at pc. + inline static Address target_pointer_address_at(Address pc); + + // Read/Modify the code target address in the branch/call instruction at pc. + // The isolate argument is unused (and may be nullptr) when skipping flushing. + inline static Address target_address_at(Address pc, Address constant_pool); + inline static void set_target_address_at( + Address pc, Address constant_pool, Address target, + ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED); + + // Returns the handle for the code object called at 'pc'. + // This might need to be temporarily encoded as an offset into code_targets_. + inline Handle<Code> code_target_object_handle_at(Address pc); + + inline Handle<HeapObject> compressed_embedded_object_handle_at(Address pc); + + // Returns the target address for a runtime function for the call encoded + // at 'pc'. + // Runtime entries can be temporarily encoded as the offset between the + // runtime function entrypoint and the code range start (stored in the + // code_range_start field), in order to be encodable as we generate the code, + // before it is moved into the code space. + inline Address runtime_entry_at(Address pc); + + // This sets the branch destination. 'location' here can be either the pc of + // an immediate branch or the address of an entry in the constant pool. + // This is for calls and branches within generated code. + inline static void deserialization_set_special_target_at(Address location, + Code code, + Address target); + + // Get the size of the special target encoded at 'location'. + inline static int deserialization_special_target_size(Address location); + + // This sets the internal reference at the pc. + inline static void deserialization_set_target_internal_reference_at( + Address pc, Address target, + RelocInfo::Mode mode = RelocInfo::INTERNAL_REFERENCE); + + // This value is used in the serialization process and must be zero for + // ARM64, as the code target is split across multiple instructions and does + // not exist separately in the code, so the serializer should not step + // forwards in memory after a target is resolved and written. + static constexpr int kSpecialTargetSize = 0; + + // Size of the generated code in bytes + uint64_t SizeOfGeneratedCode() const { + DCHECK((pc_ >= buffer_start_) && (pc_ < (buffer_start_ + buffer_->size()))); + return pc_ - buffer_start_; + } + + // Return the code size generated from label to the current position. + uint64_t SizeOfCodeGeneratedSince(const Label* label) { + DCHECK(label->is_bound()); + DCHECK_GE(pc_offset(), label->pos()); + DCHECK_LT(pc_offset(), buffer_->size()); + return pc_offset() - label->pos(); + } + + // Return the number of instructions generated from label to the + // current position. + uint64_t InstructionsGeneratedSince(const Label* label) { + return SizeOfCodeGeneratedSince(label) / kInstrSize; + } + + // Prevent contant pool emission until EndBlockConstPool is called. + // Call to this function can be nested but must be followed by an equal + // number of calls to EndBlockConstpool. + void StartBlockConstPool(); + + // Resume constant pool emission. Need to be called as many time as + // StartBlockConstPool to have an effect. + void EndBlockConstPool(); + + bool is_const_pool_blocked() const; + static bool IsConstantPoolAt(Instruction* instr); + static int ConstantPoolSizeAt(Instruction* instr); + // See Assembler::CheckConstPool for more info. + void EmitPoolGuard(); + + // Prevent veneer pool emission until EndBlockVeneerPool is called. + // Call to this function can be nested but must be followed by an equal + // number of calls to EndBlockConstpool. + void StartBlockVeneerPool(); + + // Resume constant pool emission. Need to be called as many time as + // StartBlockVeneerPool to have an effect. + void EndBlockVeneerPool(); + + bool is_veneer_pool_blocked() const { + return veneer_pool_blocked_nesting_ > 0; + } + + // Block/resume emission of constant pools and veneer pools. + void StartBlockPools() { + StartBlockConstPool(); + StartBlockVeneerPool(); + } + void EndBlockPools() { + EndBlockConstPool(); + EndBlockVeneerPool(); + } + + // Record a deoptimization reason that can be used by a log or cpu profiler. + // Use --trace-deopt to enable. + void RecordDeoptReason(DeoptimizeReason reason, SourcePosition position, + int id); + + int buffer_space() const; + + // Record the emission of a constant pool. + // + // The emission of constant and veneer pools depends on the size of the code + // generated and the number of RelocInfo recorded. + // The Debug mechanism needs to map code offsets between two versions of a + // function, compiled with and without debugger support (see for example + // Debug::PrepareForBreakPoints()). + // Compiling functions with debugger support generates additional code + // (DebugCodegen::GenerateSlot()). This may affect the emission of the pools + // and cause the version of the code with debugger support to have pools + // generated in different places. + // Recording the position and size of emitted pools allows to correctly + // compute the offset mappings between the different versions of a function in + // all situations. + // + // The parameter indicates the size of the pool (in bytes), including + // the marker and branch over the data. + void RecordConstPool(int size); + + // Instruction set functions ------------------------------------------------ + + // Branch / Jump instructions. + // For branches offsets are scaled, i.e. they in instrcutions not in bytes. + // Branch to register. + void br(const Register& xn); + + // Branch-link to register. + void blr(const Register& xn); + + // Branch to register with return hint. + void ret(const Register& xn = lr); + + // Unconditional branch to label. + void b(Label* label); + + // Conditional branch to label. + void b(Label* label, Condition cond); + + // Unconditional branch to PC offset. + void b(int imm26); + + // Conditional branch to PC offset. + void b(int imm19, Condition cond); + + // Branch-link to label / pc offset. + void bl(Label* label); + void bl(int imm26); + + // Compare and branch to label / pc offset if zero. + void cbz(const Register& rt, Label* label); + void cbz(const Register& rt, int imm19); + + // Compare and branch to label / pc offset if not zero. + void cbnz(const Register& rt, Label* label); + void cbnz(const Register& rt, int imm19); + + // Test bit and branch to label / pc offset if zero. + void tbz(const Register& rt, unsigned bit_pos, Label* label); + void tbz(const Register& rt, unsigned bit_pos, int imm14); + + // Test bit and branch to label / pc offset if not zero. + void tbnz(const Register& rt, unsigned bit_pos, Label* label); + void tbnz(const Register& rt, unsigned bit_pos, int imm14); + + // Address calculation instructions. + // Calculate a PC-relative address. Unlike for branches the offset in adr is + // unscaled (i.e. the result can be unaligned). + void adr(const Register& rd, Label* label); + void adr(const Register& rd, int imm21); + + // Data Processing instructions. + // Add. + void add(const Register& rd, const Register& rn, const Operand& operand); + + // Add and update status flags. + void adds(const Register& rd, const Register& rn, const Operand& operand); + + // Compare negative. + void cmn(const Register& rn, const Operand& operand); + + // Subtract. + void sub(const Register& rd, const Register& rn, const Operand& operand); + + // Subtract and update status flags. + void subs(const Register& rd, const Register& rn, const Operand& operand); + + // Compare. + void cmp(const Register& rn, const Operand& operand); + + // Negate. + void neg(const Register& rd, const Operand& operand); + + // Negate and update status flags. + void negs(const Register& rd, const Operand& operand); + + // Add with carry bit. + void adc(const Register& rd, const Register& rn, const Operand& operand); + + // Add with carry bit and update status flags. + void adcs(const Register& rd, const Register& rn, const Operand& operand); + + // Subtract with carry bit. + void sbc(const Register& rd, const Register& rn, const Operand& operand); + + // Subtract with carry bit and update status flags. + void sbcs(const Register& rd, const Register& rn, const Operand& operand); + + // Negate with carry bit. + void ngc(const Register& rd, const Operand& operand); + + // Negate with carry bit and update status flags. + void ngcs(const Register& rd, const Operand& operand); + + // Logical instructions. + // Bitwise and (A & B). + void and_(const Register& rd, const Register& rn, const Operand& operand); + + // Bitwise and (A & B) and update status flags. + void ands(const Register& rd, const Register& rn, const Operand& operand); + + // Bit test, and set flags. + void tst(const Register& rn, const Operand& operand); + + // Bit clear (A & ~B). + void bic(const Register& rd, const Register& rn, const Operand& operand); + + // Bit clear (A & ~B) and update status flags. + void bics(const Register& rd, const Register& rn, const Operand& operand); + + // Bitwise and. + void and_(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Bit clear immediate. + void bic(const VRegister& vd, const int imm8, const int left_shift = 0); + + // Bit clear. + void bic(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Bitwise insert if false. + void bif(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Bitwise insert if true. + void bit(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Bitwise select. + void bsl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Polynomial multiply. + void pmul(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Vector move immediate. + void movi(const VRegister& vd, const uint64_t imm, Shift shift = LSL, + const int shift_amount = 0); + + // Bitwise not. + void mvn(const VRegister& vd, const VRegister& vn); + + // Vector move inverted immediate. + void mvni(const VRegister& vd, const int imm8, Shift shift = LSL, + const int shift_amount = 0); + + // Signed saturating accumulate of unsigned value. + void suqadd(const VRegister& vd, const VRegister& vn); + + // Unsigned saturating accumulate of signed value. + void usqadd(const VRegister& vd, const VRegister& vn); + + // Absolute value. + void abs(const VRegister& vd, const VRegister& vn); + + // Signed saturating absolute value. + void sqabs(const VRegister& vd, const VRegister& vn); + + // Negate. + void neg(const VRegister& vd, const VRegister& vn); + + // Signed saturating negate. + void sqneg(const VRegister& vd, const VRegister& vn); + + // Bitwise not. + void not_(const VRegister& vd, const VRegister& vn); + + // Extract narrow. + void xtn(const VRegister& vd, const VRegister& vn); + + // Extract narrow (second part). + void xtn2(const VRegister& vd, const VRegister& vn); + + // Signed saturating extract narrow. + void sqxtn(const VRegister& vd, const VRegister& vn); + + // Signed saturating extract narrow (second part). + void sqxtn2(const VRegister& vd, const VRegister& vn); + + // Unsigned saturating extract narrow. + void uqxtn(const VRegister& vd, const VRegister& vn); + + // Unsigned saturating extract narrow (second part). + void uqxtn2(const VRegister& vd, const VRegister& vn); + + // Signed saturating extract unsigned narrow. + void sqxtun(const VRegister& vd, const VRegister& vn); + + // Signed saturating extract unsigned narrow (second part). + void sqxtun2(const VRegister& vd, const VRegister& vn); + + // Move register to register. + void mov(const VRegister& vd, const VRegister& vn); + + // Bitwise not or. + void orn(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Bitwise exclusive or. + void eor(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Bitwise or (A | B). + void orr(const Register& rd, const Register& rn, const Operand& operand); + + // Bitwise or. + void orr(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Bitwise or immediate. + void orr(const VRegister& vd, const int imm8, const int left_shift = 0); + + // Bitwise nor (A | ~B). + void orn(const Register& rd, const Register& rn, const Operand& operand); + + // Bitwise eor/xor (A ^ B). + void eor(const Register& rd, const Register& rn, const Operand& operand); + + // Bitwise enor/xnor (A ^ ~B). + void eon(const Register& rd, const Register& rn, const Operand& operand); + + // Logical shift left variable. + void lslv(const Register& rd, const Register& rn, const Register& rm); + + // Logical shift right variable. + void lsrv(const Register& rd, const Register& rn, const Register& rm); + + // Arithmetic shift right variable. + void asrv(const Register& rd, const Register& rn, const Register& rm); + + // Rotate right variable. + void rorv(const Register& rd, const Register& rn, const Register& rm); + + // Bitfield instructions. + // Bitfield move. + void bfm(const Register& rd, const Register& rn, int immr, int imms); + + // Signed bitfield move. + void sbfm(const Register& rd, const Register& rn, int immr, int imms); + + // Unsigned bitfield move. + void ubfm(const Register& rd, const Register& rn, int immr, int imms); + + // Bfm aliases. + // Bitfield insert. + void bfi(const Register& rd, const Register& rn, int lsb, int width) { + DCHECK_GE(width, 1); + DCHECK(lsb + width <= rn.SizeInBits()); + bfm(rd, rn, (rd.SizeInBits() - lsb) & (rd.SizeInBits() - 1), width - 1); + } + + // Bitfield extract and insert low. + void bfxil(const Register& rd, const Register& rn, int lsb, int width) { + DCHECK_GE(width, 1); + DCHECK(lsb + width <= rn.SizeInBits()); + bfm(rd, rn, lsb, lsb + width - 1); + } + + // Sbfm aliases. + // Arithmetic shift right. + void asr(const Register& rd, const Register& rn, int shift) { + DCHECK(shift < rd.SizeInBits()); + sbfm(rd, rn, shift, rd.SizeInBits() - 1); + } + + // Signed bitfield insert in zero. + void sbfiz(const Register& rd, const Register& rn, int lsb, int width) { + DCHECK_GE(width, 1); + DCHECK(lsb + width <= rn.SizeInBits()); + sbfm(rd, rn, (rd.SizeInBits() - lsb) & (rd.SizeInBits() - 1), width - 1); + } + + // Signed bitfield extract. + void sbfx(const Register& rd, const Register& rn, int lsb, int width) { + DCHECK_GE(width, 1); + DCHECK(lsb + width <= rn.SizeInBits()); + sbfm(rd, rn, lsb, lsb + width - 1); + } + + // Signed extend byte. + void sxtb(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 7); } + + // Signed extend halfword. + void sxth(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 15); } + + // Signed extend word. + void sxtw(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 31); } + + // Ubfm aliases. + // Logical shift left. + void lsl(const Register& rd, const Register& rn, int shift) { + int reg_size = rd.SizeInBits(); + DCHECK(shift < reg_size); + ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1); + } + + // Logical shift right. + void lsr(const Register& rd, const Register& rn, int shift) { + DCHECK(shift < rd.SizeInBits()); + ubfm(rd, rn, shift, rd.SizeInBits() - 1); + } + + // Unsigned bitfield insert in zero. + void ubfiz(const Register& rd, const Register& rn, int lsb, int width) { + DCHECK_GE(width, 1); + DCHECK(lsb + width <= rn.SizeInBits()); + ubfm(rd, rn, (rd.SizeInBits() - lsb) & (rd.SizeInBits() - 1), width - 1); + } + + // Unsigned bitfield extract. + void ubfx(const Register& rd, const Register& rn, int lsb, int width) { + DCHECK_GE(width, 1); + DCHECK(lsb + width <= rn.SizeInBits()); + ubfm(rd, rn, lsb, lsb + width - 1); + } + + // Unsigned extend byte. + void uxtb(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 7); } + + // Unsigned extend halfword. + void uxth(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 15); } + + // Unsigned extend word. + void uxtw(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 31); } + + // Extract. + void extr(const Register& rd, const Register& rn, const Register& rm, + int lsb); + + // Conditional select: rd = cond ? rn : rm. + void csel(const Register& rd, const Register& rn, const Register& rm, + Condition cond); + + // Conditional select increment: rd = cond ? rn : rm + 1. + void csinc(const Register& rd, const Register& rn, const Register& rm, + Condition cond); + + // Conditional select inversion: rd = cond ? rn : ~rm. + void csinv(const Register& rd, const Register& rn, const Register& rm, + Condition cond); + + // Conditional select negation: rd = cond ? rn : -rm. + void csneg(const Register& rd, const Register& rn, const Register& rm, + Condition cond); + + // Conditional set: rd = cond ? 1 : 0. + void cset(const Register& rd, Condition cond); + + // Conditional set minus: rd = cond ? -1 : 0. + void csetm(const Register& rd, Condition cond); + + // Conditional increment: rd = cond ? rn + 1 : rn. + void cinc(const Register& rd, const Register& rn, Condition cond); + + // Conditional invert: rd = cond ? ~rn : rn. + void cinv(const Register& rd, const Register& rn, Condition cond); + + // Conditional negate: rd = cond ? -rn : rn. + void cneg(const Register& rd, const Register& rn, Condition cond); + + // Extr aliases. + void ror(const Register& rd, const Register& rs, unsigned shift) { + extr(rd, rs, rs, shift); + } + + // Conditional comparison. + // Conditional compare negative. + void ccmn(const Register& rn, const Operand& operand, StatusFlags nzcv, + Condition cond); + + // Conditional compare. + void ccmp(const Register& rn, const Operand& operand, StatusFlags nzcv, + Condition cond); + + // Multiplication. + // 32 x 32 -> 32-bit and 64 x 64 -> 64-bit multiply. + void mul(const Register& rd, const Register& rn, const Register& rm); + + // 32 + 32 x 32 -> 32-bit and 64 + 64 x 64 -> 64-bit multiply accumulate. + void madd(const Register& rd, const Register& rn, const Register& rm, + const Register& ra); + + // -(32 x 32) -> 32-bit and -(64 x 64) -> 64-bit multiply. + void mneg(const Register& rd, const Register& rn, const Register& rm); + + // 32 - 32 x 32 -> 32-bit and 64 - 64 x 64 -> 64-bit multiply subtract. + void msub(const Register& rd, const Register& rn, const Register& rm, + const Register& ra); + + // 32 x 32 -> 64-bit multiply. + void smull(const Register& rd, const Register& rn, const Register& rm); + + // Xd = bits<127:64> of Xn * Xm. + void smulh(const Register& rd, const Register& rn, const Register& rm); + + // Signed 32 x 32 -> 64-bit multiply and accumulate. + void smaddl(const Register& rd, const Register& rn, const Register& rm, + const Register& ra); + + // Unsigned 32 x 32 -> 64-bit multiply and accumulate. + void umaddl(const Register& rd, const Register& rn, const Register& rm, + const Register& ra); + + // Signed 32 x 32 -> 64-bit multiply and subtract. + void smsubl(const Register& rd, const Register& rn, const Register& rm, + const Register& ra); + + // Unsigned 32 x 32 -> 64-bit multiply and subtract. + void umsubl(const Register& rd, const Register& rn, const Register& rm, + const Register& ra); + + // Signed integer divide. + void sdiv(const Register& rd, const Register& rn, const Register& rm); + + // Unsigned integer divide. + void udiv(const Register& rd, const Register& rn, const Register& rm); + + // Bit count, bit reverse and endian reverse. + void rbit(const Register& rd, const Register& rn); + void rev16(const Register& rd, const Register& rn); + void rev32(const Register& rd, const Register& rn); + void rev(const Register& rd, const Register& rn); + void clz(const Register& rd, const Register& rn); + void cls(const Register& rd, const Register& rn); + + // Memory instructions. + + // Load integer or FP register. + void ldr(const CPURegister& rt, const MemOperand& src); + + // Store integer or FP register. + void str(const CPURegister& rt, const MemOperand& dst); + + // Load word with sign extension. + void ldrsw(const Register& rt, const MemOperand& src); + + // Load byte. + void ldrb(const Register& rt, const MemOperand& src); + + // Store byte. + void strb(const Register& rt, const MemOperand& dst); + + // Load byte with sign extension. + void ldrsb(const Register& rt, const MemOperand& src); + + // Load half-word. + void ldrh(const Register& rt, const MemOperand& src); + + // Store half-word. + void strh(const Register& rt, const MemOperand& dst); + + // Load half-word with sign extension. + void ldrsh(const Register& rt, const MemOperand& src); + + // Load integer or FP register pair. + void ldp(const CPURegister& rt, const CPURegister& rt2, + const MemOperand& src); + + // Store integer or FP register pair. + void stp(const CPURegister& rt, const CPURegister& rt2, + const MemOperand& dst); + + // Load word pair with sign extension. + void ldpsw(const Register& rt, const Register& rt2, const MemOperand& src); + + // Load literal to register from a pc relative address. + void ldr_pcrel(const CPURegister& rt, int imm19); + + // Load literal to register. + void ldr(const CPURegister& rt, const Immediate& imm); + void ldr(const CPURegister& rt, const Operand& operand); + + // Load-acquire word. + void ldar(const Register& rt, const Register& rn); + + // Load-acquire exclusive word. + void ldaxr(const Register& rt, const Register& rn); + + // Store-release word. + void stlr(const Register& rt, const Register& rn); + + // Store-release exclusive word. + void stlxr(const Register& rs, const Register& rt, const Register& rn); + + // Load-acquire byte. + void ldarb(const Register& rt, const Register& rn); + + // Load-acquire exclusive byte. + void ldaxrb(const Register& rt, const Register& rn); + + // Store-release byte. + void stlrb(const Register& rt, const Register& rn); + + // Store-release exclusive byte. + void stlxrb(const Register& rs, const Register& rt, const Register& rn); + + // Load-acquire half-word. + void ldarh(const Register& rt, const Register& rn); + + // Load-acquire exclusive half-word. + void ldaxrh(const Register& rt, const Register& rn); + + // Store-release half-word. + void stlrh(const Register& rt, const Register& rn); + + // Store-release exclusive half-word. + void stlxrh(const Register& rs, const Register& rt, const Register& rn); + + // Move instructions. The default shift of -1 indicates that the move + // instruction will calculate an appropriate 16-bit immediate and left shift + // that is equal to the 64-bit immediate argument. If an explicit left shift + // is specified (0, 16, 32 or 48), the immediate must be a 16-bit value. + // + // For movk, an explicit shift can be used to indicate which half word should + // be overwritten, eg. movk(x0, 0, 0) will overwrite the least-significant + // half word with zero, whereas movk(x0, 0, 48) will overwrite the + // most-significant. + + // Move and keep. + void movk(const Register& rd, uint64_t imm, int shift = -1) { + MoveWide(rd, imm, shift, MOVK); + } + + // Move with non-zero. + void movn(const Register& rd, uint64_t imm, int shift = -1) { + MoveWide(rd, imm, shift, MOVN); + } + + // Move with zero. + void movz(const Register& rd, uint64_t imm, int shift = -1) { + MoveWide(rd, imm, shift, MOVZ); + } + + // Misc instructions. + // Monitor debug-mode breakpoint. + void brk(int code); + + // Halting debug-mode breakpoint. + void hlt(int code); + + // Move register to register. + void mov(const Register& rd, const Register& rn); + + // Move NOT(operand) to register. + void mvn(const Register& rd, const Operand& operand); + + // System instructions. + // Move to register from system register. + void mrs(const Register& rt, SystemRegister sysreg); + + // Move from register to system register. + void msr(SystemRegister sysreg, const Register& rt); + + // System hint. + void hint(SystemHint code); + + // Data memory barrier + void dmb(BarrierDomain domain, BarrierType type); + + // Data synchronization barrier + void dsb(BarrierDomain domain, BarrierType type); + + // Instruction synchronization barrier + void isb(); + + // Conditional speculation barrier. + void csdb(); + + // Alias for system instructions. + void nop() { hint(NOP); } + + // Different nop operations are used by the code generator to detect certain + // states of the generated code. + enum NopMarkerTypes { + DEBUG_BREAK_NOP, + INTERRUPT_CODE_NOP, + ADR_FAR_NOP, + FIRST_NOP_MARKER = DEBUG_BREAK_NOP, + LAST_NOP_MARKER = ADR_FAR_NOP + }; + + void nop(NopMarkerTypes n); + + // Add. + void add(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned halving add. + void uhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Subtract. + void sub(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed halving add. + void shadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Multiply by scalar element. + void mul(const VRegister& vd, const VRegister& vn, const VRegister& vm, + int vm_index); + + // Multiply-add by scalar element. + void mla(const VRegister& vd, const VRegister& vn, const VRegister& vm, + int vm_index); + + // Multiply-subtract by scalar element. + void mls(const VRegister& vd, const VRegister& vn, const VRegister& vm, + int vm_index); + + // Signed long multiply-add by scalar element. + void smlal(const VRegister& vd, const VRegister& vn, const VRegister& vm, + int vm_index); + + // Signed long multiply-add by scalar element (second part). + void smlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm, + int vm_index); + + // Unsigned long multiply-add by scalar element. + void umlal(const VRegister& vd, const VRegister& vn, const VRegister& vm, + int vm_index); + + // Unsigned long multiply-add by scalar element (second part). + void umlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm, + int vm_index); + + // Signed long multiply-sub by scalar element. + void smlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm, + int vm_index); + + // Signed long multiply-sub by scalar element (second part). + void smlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm, + int vm_index); + + // Unsigned long multiply-sub by scalar element. + void umlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm, + int vm_index); + + // Unsigned long multiply-sub by scalar element (second part). + void umlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm, + int vm_index); + + // Signed long multiply by scalar element. + void smull(const VRegister& vd, const VRegister& vn, const VRegister& vm, + int vm_index); + + // Signed long multiply by scalar element (second part). + void smull2(const VRegister& vd, const VRegister& vn, const VRegister& vm, + int vm_index); + + // Unsigned long multiply by scalar element. + void umull(const VRegister& vd, const VRegister& vn, const VRegister& vm, + int vm_index); + + // Unsigned long multiply by scalar element (second part). + void umull2(const VRegister& vd, const VRegister& vn, const VRegister& vm, + int vm_index); + + // Add narrow returning high half. + void addhn(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Add narrow returning high half (second part). + void addhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed saturating double long multiply by element. + void sqdmull(const VRegister& vd, const VRegister& vn, const VRegister& vm, + int vm_index); + + // Signed saturating double long multiply by element (second part). + void sqdmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm, + int vm_index); + + // Signed saturating doubling long multiply-add by element. + void sqdmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm, + int vm_index); + + // Signed saturating doubling long multiply-add by element (second part). + void sqdmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm, + int vm_index); + + // Signed saturating doubling long multiply-sub by element. + void sqdmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm, + int vm_index); + + // Signed saturating doubling long multiply-sub by element (second part). + void sqdmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm, + int vm_index); + + // Compare bitwise to zero. + void cmeq(const VRegister& vd, const VRegister& vn, int value); + + // Compare signed greater than or equal to zero. + void cmge(const VRegister& vd, const VRegister& vn, int value); + + // Compare signed greater than zero. + void cmgt(const VRegister& vd, const VRegister& vn, int value); + + // Compare signed less than or equal to zero. + void cmle(const VRegister& vd, const VRegister& vn, int value); + + // Compare signed less than zero. + void cmlt(const VRegister& vd, const VRegister& vn, int value); + + // Unsigned rounding halving add. + void urhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Compare equal. + void cmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Compare signed greater than or equal. + void cmge(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Compare signed greater than. + void cmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Compare unsigned higher. + void cmhi(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Compare unsigned higher or same. + void cmhs(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Compare bitwise test bits nonzero. + void cmtst(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed shift left by register. + void sshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned shift left by register. + void ushl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed saturating doubling long multiply-subtract. + void sqdmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed saturating doubling long multiply-subtract (second part). + void sqdmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed saturating doubling long multiply. + void sqdmull(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed saturating doubling long multiply (second part). + void sqdmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed saturating doubling multiply returning high half. + void sqdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed saturating rounding doubling multiply returning high half. + void sqrdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed saturating doubling multiply element returning high half. + void sqdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm, + int vm_index); + + // Signed saturating rounding doubling multiply element returning high half. + void sqrdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm, + int vm_index); + + // Unsigned long multiply long. + void umull(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned long multiply (second part). + void umull2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Rounding add narrow returning high half. + void raddhn(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Subtract narrow returning high half. + void subhn(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Subtract narrow returning high half (second part). + void subhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Rounding add narrow returning high half (second part). + void raddhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Rounding subtract narrow returning high half. + void rsubhn(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Rounding subtract narrow returning high half (second part). + void rsubhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed saturating shift left by register. + void sqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned saturating shift left by register. + void uqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed rounding shift left by register. + void srshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned rounding shift left by register. + void urshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed saturating rounding shift left by register. + void sqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned saturating rounding shift left by register. + void uqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed absolute difference. + void sabd(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned absolute difference and accumulate. + void uaba(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Shift left by immediate and insert. + void sli(const VRegister& vd, const VRegister& vn, int shift); + + // Shift right by immediate and insert. + void sri(const VRegister& vd, const VRegister& vn, int shift); + + // Signed maximum. + void smax(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed pairwise maximum. + void smaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Add across vector. + void addv(const VRegister& vd, const VRegister& vn); + + // Signed add long across vector. + void saddlv(const VRegister& vd, const VRegister& vn); + + // Unsigned add long across vector. + void uaddlv(const VRegister& vd, const VRegister& vn); + + // FP maximum number across vector. + void fmaxnmv(const VRegister& vd, const VRegister& vn); + + // FP maximum across vector. + void fmaxv(const VRegister& vd, const VRegister& vn); + + // FP minimum number across vector. + void fminnmv(const VRegister& vd, const VRegister& vn); + + // FP minimum across vector. + void fminv(const VRegister& vd, const VRegister& vn); + + // Signed maximum across vector. + void smaxv(const VRegister& vd, const VRegister& vn); + + // Signed minimum. + void smin(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed minimum pairwise. + void sminp(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed minimum across vector. + void sminv(const VRegister& vd, const VRegister& vn); + + // One-element structure store from one register. + void st1(const VRegister& vt, const MemOperand& src); + + // One-element structure store from two registers. + void st1(const VRegister& vt, const VRegister& vt2, const MemOperand& src); + + // One-element structure store from three registers. + void st1(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, + const MemOperand& src); + + // One-element structure store from four registers. + void st1(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, + const VRegister& vt4, const MemOperand& src); + + // One-element single structure store from one lane. + void st1(const VRegister& vt, int lane, const MemOperand& src); + + // Two-element structure store from two registers. + void st2(const VRegister& vt, const VRegister& vt2, const MemOperand& src); + + // Two-element single structure store from two lanes. + void st2(const VRegister& vt, const VRegister& vt2, int lane, + const MemOperand& src); + + // Three-element structure store from three registers. + void st3(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, + const MemOperand& src); + + // Three-element single structure store from three lanes. + void st3(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, + int lane, const MemOperand& src); + + // Four-element structure store from four registers. + void st4(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, + const VRegister& vt4, const MemOperand& src); + + // Four-element single structure store from four lanes. + void st4(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, + const VRegister& vt4, int lane, const MemOperand& src); + + // Unsigned add long. + void uaddl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned add long (second part). + void uaddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned add wide. + void uaddw(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned add wide (second part). + void uaddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed add long. + void saddl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed add long (second part). + void saddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed add wide. + void saddw(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed add wide (second part). + void saddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned subtract long. + void usubl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned subtract long (second part). + void usubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned subtract wide. + void usubw(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed subtract long. + void ssubl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed subtract long (second part). + void ssubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed integer subtract wide. + void ssubw(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed integer subtract wide (second part). + void ssubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned subtract wide (second part). + void usubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned maximum. + void umax(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned pairwise maximum. + void umaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned maximum across vector. + void umaxv(const VRegister& vd, const VRegister& vn); + + // Unsigned minimum. + void umin(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned pairwise minimum. + void uminp(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned minimum across vector. + void uminv(const VRegister& vd, const VRegister& vn); + + // Transpose vectors (primary). + void trn1(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Transpose vectors (secondary). + void trn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unzip vectors (primary). + void uzp1(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unzip vectors (secondary). + void uzp2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Zip vectors (primary). + void zip1(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Zip vectors (secondary). + void zip2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed shift right by immediate. + void sshr(const VRegister& vd, const VRegister& vn, int shift); + + // Unsigned shift right by immediate. + void ushr(const VRegister& vd, const VRegister& vn, int shift); + + // Signed rounding shift right by immediate. + void srshr(const VRegister& vd, const VRegister& vn, int shift); + + // Unsigned rounding shift right by immediate. + void urshr(const VRegister& vd, const VRegister& vn, int shift); + + // Signed shift right by immediate and accumulate. + void ssra(const VRegister& vd, const VRegister& vn, int shift); + + // Unsigned shift right by immediate and accumulate. + void usra(const VRegister& vd, const VRegister& vn, int shift); + + // Signed rounding shift right by immediate and accumulate. + void srsra(const VRegister& vd, const VRegister& vn, int shift); + + // Unsigned rounding shift right by immediate and accumulate. + void ursra(const VRegister& vd, const VRegister& vn, int shift); + + // Shift right narrow by immediate. + void shrn(const VRegister& vd, const VRegister& vn, int shift); + + // Shift right narrow by immediate (second part). + void shrn2(const VRegister& vd, const VRegister& vn, int shift); + + // Rounding shift right narrow by immediate. + void rshrn(const VRegister& vd, const VRegister& vn, int shift); + + // Rounding shift right narrow by immediate (second part). + void rshrn2(const VRegister& vd, const VRegister& vn, int shift); + + // Unsigned saturating shift right narrow by immediate. + void uqshrn(const VRegister& vd, const VRegister& vn, int shift); + + // Unsigned saturating shift right narrow by immediate (second part). + void uqshrn2(const VRegister& vd, const VRegister& vn, int shift); + + // Unsigned saturating rounding shift right narrow by immediate. + void uqrshrn(const VRegister& vd, const VRegister& vn, int shift); + + // Unsigned saturating rounding shift right narrow by immediate (second part). + void uqrshrn2(const VRegister& vd, const VRegister& vn, int shift); + + // Signed saturating shift right narrow by immediate. + void sqshrn(const VRegister& vd, const VRegister& vn, int shift); + + // Signed saturating shift right narrow by immediate (second part). + void sqshrn2(const VRegister& vd, const VRegister& vn, int shift); + + // Signed saturating rounded shift right narrow by immediate. + void sqrshrn(const VRegister& vd, const VRegister& vn, int shift); + + // Signed saturating rounded shift right narrow by immediate (second part). + void sqrshrn2(const VRegister& vd, const VRegister& vn, int shift); + + // Signed saturating shift right unsigned narrow by immediate. + void sqshrun(const VRegister& vd, const VRegister& vn, int shift); + + // Signed saturating shift right unsigned narrow by immediate (second part). + void sqshrun2(const VRegister& vd, const VRegister& vn, int shift); + + // Signed sat rounded shift right unsigned narrow by immediate. + void sqrshrun(const VRegister& vd, const VRegister& vn, int shift); + + // Signed sat rounded shift right unsigned narrow by immediate (second part). + void sqrshrun2(const VRegister& vd, const VRegister& vn, int shift); + + // FP reciprocal step. + void frecps(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP reciprocal estimate. + void frecpe(const VRegister& vd, const VRegister& vn); + + // FP reciprocal square root estimate. + void frsqrte(const VRegister& vd, const VRegister& vn); + + // FP reciprocal square root step. + void frsqrts(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed absolute difference and accumulate long. + void sabal(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed absolute difference and accumulate long (second part). + void sabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned absolute difference and accumulate long. + void uabal(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned absolute difference and accumulate long (second part). + void uabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed absolute difference long. + void sabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed absolute difference long (second part). + void sabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned absolute difference long. + void uabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned absolute difference long (second part). + void uabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Polynomial multiply long. + void pmull(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Polynomial multiply long (second part). + void pmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed long multiply-add. + void smlal(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed long multiply-add (second part). + void smlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned long multiply-add. + void umlal(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned long multiply-add (second part). + void umlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed long multiply-sub. + void smlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed long multiply-sub (second part). + void smlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned long multiply-sub. + void umlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned long multiply-sub (second part). + void umlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed long multiply. + void smull(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed long multiply (second part). + void smull2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed saturating doubling long multiply-add. + void sqdmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed saturating doubling long multiply-add (second part). + void sqdmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned absolute difference. + void uabd(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed absolute difference and accumulate. + void saba(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP instructions. + // Move immediate to FP register. + void fmov(const VRegister& fd, double imm); + void fmov(const VRegister& fd, float imm); + + // Move FP register to register. + void fmov(const Register& rd, const VRegister& fn); + + // Move register to FP register. + void fmov(const VRegister& fd, const Register& rn); + + // Move FP register to FP register. + void fmov(const VRegister& fd, const VRegister& fn); + + // Move 64-bit register to top half of 128-bit FP register. + void fmov(const VRegister& vd, int index, const Register& rn); + + // Move top half of 128-bit FP register to 64-bit register. + void fmov(const Register& rd, const VRegister& vn, int index); + + // FP add. + void fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP subtract. + void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP multiply. + void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP compare equal to zero. + void fcmeq(const VRegister& vd, const VRegister& vn, double imm); + + // FP greater than zero. + void fcmgt(const VRegister& vd, const VRegister& vn, double imm); + + // FP greater than or equal to zero. + void fcmge(const VRegister& vd, const VRegister& vn, double imm); + + // FP less than or equal to zero. + void fcmle(const VRegister& vd, const VRegister& vn, double imm); + + // FP less than to zero. + void fcmlt(const VRegister& vd, const VRegister& vn, double imm); + + // FP absolute difference. + void fabd(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP pairwise add vector. + void faddp(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP pairwise add scalar. + void faddp(const VRegister& vd, const VRegister& vn); + + // FP pairwise maximum scalar. + void fmaxp(const VRegister& vd, const VRegister& vn); + + // FP pairwise maximum number scalar. + void fmaxnmp(const VRegister& vd, const VRegister& vn); + + // FP pairwise minimum number scalar. + void fminnmp(const VRegister& vd, const VRegister& vn); + + // FP vector multiply accumulate. + void fmla(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP vector multiply subtract. + void fmls(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP vector multiply extended. + void fmulx(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP absolute greater than or equal. + void facge(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP absolute greater than. + void facgt(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP multiply by element. + void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm, + int vm_index); + + // FP fused multiply-add to accumulator by element. + void fmla(const VRegister& vd, const VRegister& vn, const VRegister& vm, + int vm_index); + + // FP fused multiply-sub from accumulator by element. + void fmls(const VRegister& vd, const VRegister& vn, const VRegister& vm, + int vm_index); + + // FP multiply extended by element. + void fmulx(const VRegister& vd, const VRegister& vn, const VRegister& vm, + int vm_index); + + // FP compare equal. + void fcmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP greater than. + void fcmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP greater than or equal. + void fcmge(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP pairwise maximum vector. + void fmaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP pairwise minimum vector. + void fminp(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP pairwise minimum scalar. + void fminp(const VRegister& vd, const VRegister& vn); + + // FP pairwise maximum number vector. + void fmaxnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP pairwise minimum number vector. + void fminnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP fused multiply-add. + void fmadd(const VRegister& vd, const VRegister& vn, const VRegister& vm, + const VRegister& va); + + // FP fused multiply-subtract. + void fmsub(const VRegister& vd, const VRegister& vn, const VRegister& vm, + const VRegister& va); + + // FP fused multiply-add and negate. + void fnmadd(const VRegister& vd, const VRegister& vn, const VRegister& vm, + const VRegister& va); + + // FP fused multiply-subtract and negate. + void fnmsub(const VRegister& vd, const VRegister& vn, const VRegister& vm, + const VRegister& va); + + // FP multiply-negate scalar. + void fnmul(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP reciprocal exponent scalar. + void frecpx(const VRegister& vd, const VRegister& vn); + + // FP divide. + void fdiv(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP maximum. + void fmax(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP minimum. + void fmin(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP maximum. + void fmaxnm(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP minimum. + void fminnm(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP absolute. + void fabs(const VRegister& vd, const VRegister& vn); + + // FP negate. + void fneg(const VRegister& vd, const VRegister& vn); + + // FP square root. + void fsqrt(const VRegister& vd, const VRegister& vn); + + // FP round to integer nearest with ties to away. + void frinta(const VRegister& vd, const VRegister& vn); + + // FP round to integer, implicit rounding. + void frinti(const VRegister& vd, const VRegister& vn); + + // FP round to integer toward minus infinity. + void frintm(const VRegister& vd, const VRegister& vn); + + // FP round to integer nearest with ties to even. + void frintn(const VRegister& vd, const VRegister& vn); + + // FP round to integer towards plus infinity. + void frintp(const VRegister& vd, const VRegister& vn); + + // FP round to integer, exact, implicit rounding. + void frintx(const VRegister& vd, const VRegister& vn); + + // FP round to integer towards zero. + void frintz(const VRegister& vd, const VRegister& vn); + + // FP compare registers. + void fcmp(const VRegister& vn, const VRegister& vm); + + // FP compare immediate. + void fcmp(const VRegister& vn, double value); + + // FP conditional compare. + void fccmp(const VRegister& vn, const VRegister& vm, StatusFlags nzcv, + Condition cond); + + // FP conditional select. + void fcsel(const VRegister& vd, const VRegister& vn, const VRegister& vm, + Condition cond); + + // Common FP Convert functions. + void NEONFPConvertToInt(const Register& rd, const VRegister& vn, Instr op); + void NEONFPConvertToInt(const VRegister& vd, const VRegister& vn, Instr op); + + // FP convert between precisions. + void fcvt(const VRegister& vd, const VRegister& vn); + + // FP convert to higher precision. + void fcvtl(const VRegister& vd, const VRegister& vn); + + // FP convert to higher precision (second part). + void fcvtl2(const VRegister& vd, const VRegister& vn); + + // FP convert to lower precision. + void fcvtn(const VRegister& vd, const VRegister& vn); + + // FP convert to lower prevision (second part). + void fcvtn2(const VRegister& vd, const VRegister& vn); + + // FP convert to lower precision, rounding to odd. + void fcvtxn(const VRegister& vd, const VRegister& vn); + + // FP convert to lower precision, rounding to odd (second part). + void fcvtxn2(const VRegister& vd, const VRegister& vn); + + // FP convert to signed integer, nearest with ties to away. + void fcvtas(const Register& rd, const VRegister& vn); + + // FP convert to unsigned integer, nearest with ties to away. + void fcvtau(const Register& rd, const VRegister& vn); + + // FP convert to signed integer, nearest with ties to away. + void fcvtas(const VRegister& vd, const VRegister& vn); + + // FP convert to unsigned integer, nearest with ties to away. + void fcvtau(const VRegister& vd, const VRegister& vn); + + // FP convert to signed integer, round towards -infinity. + void fcvtms(const Register& rd, const VRegister& vn); + + // FP convert to unsigned integer, round towards -infinity. + void fcvtmu(const Register& rd, const VRegister& vn); + + // FP convert to signed integer, round towards -infinity. + void fcvtms(const VRegister& vd, const VRegister& vn); + + // FP convert to unsigned integer, round towards -infinity. + void fcvtmu(const VRegister& vd, const VRegister& vn); + + // FP convert to signed integer, nearest with ties to even. + void fcvtns(const Register& rd, const VRegister& vn); + + // FP convert to unsigned integer, nearest with ties to even. + void fcvtnu(const Register& rd, const VRegister& vn); + + // FP convert to signed integer, nearest with ties to even. + void fcvtns(const VRegister& rd, const VRegister& vn); + + // FP convert to unsigned integer, nearest with ties to even. + void fcvtnu(const VRegister& rd, const VRegister& vn); + + // FP convert to signed integer or fixed-point, round towards zero. + void fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0); + + // FP convert to unsigned integer or fixed-point, round towards zero. + void fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0); + + // FP convert to signed integer or fixed-point, round towards zero. + void fcvtzs(const VRegister& vd, const VRegister& vn, int fbits = 0); + + // FP convert to unsigned integer or fixed-point, round towards zero. + void fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0); + + // FP convert to signed integer, round towards +infinity. + void fcvtps(const Register& rd, const VRegister& vn); + + // FP convert to unsigned integer, round towards +infinity. + void fcvtpu(const Register& rd, const VRegister& vn); + + // FP convert to signed integer, round towards +infinity. + void fcvtps(const VRegister& vd, const VRegister& vn); + + // FP convert to unsigned integer, round towards +infinity. + void fcvtpu(const VRegister& vd, const VRegister& vn); + + // Convert signed integer or fixed point to FP. + void scvtf(const VRegister& fd, const Register& rn, int fbits = 0); + + // Convert unsigned integer or fixed point to FP. + void ucvtf(const VRegister& fd, const Register& rn, int fbits = 0); + + // Convert signed integer or fixed-point to FP. + void scvtf(const VRegister& fd, const VRegister& vn, int fbits = 0); + + // Convert unsigned integer or fixed-point to FP. + void ucvtf(const VRegister& fd, const VRegister& vn, int fbits = 0); + + // Extract vector from pair of vectors. + void ext(const VRegister& vd, const VRegister& vn, const VRegister& vm, + int index); + + // Duplicate vector element to vector or scalar. + void dup(const VRegister& vd, const VRegister& vn, int vn_index); + + // Duplicate general-purpose register to vector. + void dup(const VRegister& vd, const Register& rn); + + // Insert vector element from general-purpose register. + void ins(const VRegister& vd, int vd_index, const Register& rn); + + // Move general-purpose register to a vector element. + void mov(const VRegister& vd, int vd_index, const Register& rn); + + // Unsigned move vector element to general-purpose register. + void umov(const Register& rd, const VRegister& vn, int vn_index); + + // Move vector element to general-purpose register. + void mov(const Register& rd, const VRegister& vn, int vn_index); + + // Move vector element to scalar. + void mov(const VRegister& vd, const VRegister& vn, int vn_index); + + // Insert vector element from another vector element. + void ins(const VRegister& vd, int vd_index, const VRegister& vn, + int vn_index); + + // Move vector element to another vector element. + void mov(const VRegister& vd, int vd_index, const VRegister& vn, + int vn_index); + + // Signed move vector element to general-purpose register. + void smov(const Register& rd, const VRegister& vn, int vn_index); + + // One-element structure load to one register. + void ld1(const VRegister& vt, const MemOperand& src); + + // One-element structure load to two registers. + void ld1(const VRegister& vt, const VRegister& vt2, const MemOperand& src); + + // One-element structure load to three registers. + void ld1(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, + const MemOperand& src); + + // One-element structure load to four registers. + void ld1(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, + const VRegister& vt4, const MemOperand& src); + + // One-element single structure load to one lane. + void ld1(const VRegister& vt, int lane, const MemOperand& src); + + // One-element single structure load to all lanes. + void ld1r(const VRegister& vt, const MemOperand& src); + + // Two-element structure load. + void ld2(const VRegister& vt, const VRegister& vt2, const MemOperand& src); + + // Two-element single structure load to one lane. + void ld2(const VRegister& vt, const VRegister& vt2, int lane, + const MemOperand& src); + + // Two-element single structure load to all lanes. + void ld2r(const VRegister& vt, const VRegister& vt2, const MemOperand& src); + + // Three-element structure load. + void ld3(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, + const MemOperand& src); + + // Three-element single structure load to one lane. + void ld3(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, + int lane, const MemOperand& src); + + // Three-element single structure load to all lanes. + void ld3r(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, + const MemOperand& src); + + // Four-element structure load. + void ld4(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, + const VRegister& vt4, const MemOperand& src); + + // Four-element single structure load to one lane. + void ld4(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, + const VRegister& vt4, int lane, const MemOperand& src); + + // Four-element single structure load to all lanes. + void ld4r(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, + const VRegister& vt4, const MemOperand& src); + + // Count leading sign bits. + void cls(const VRegister& vd, const VRegister& vn); + + // Count leading zero bits (vector). + void clz(const VRegister& vd, const VRegister& vn); + + // Population count per byte. + void cnt(const VRegister& vd, const VRegister& vn); + + // Reverse bit order. + void rbit(const VRegister& vd, const VRegister& vn); + + // Reverse elements in 16-bit halfwords. + void rev16(const VRegister& vd, const VRegister& vn); + + // Reverse elements in 32-bit words. + void rev32(const VRegister& vd, const VRegister& vn); + + // Reverse elements in 64-bit doublewords. + void rev64(const VRegister& vd, const VRegister& vn); + + // Unsigned reciprocal square root estimate. + void ursqrte(const VRegister& vd, const VRegister& vn); + + // Unsigned reciprocal estimate. + void urecpe(const VRegister& vd, const VRegister& vn); + + // Signed pairwise long add and accumulate. + void sadalp(const VRegister& vd, const VRegister& vn); + + // Signed pairwise long add. + void saddlp(const VRegister& vd, const VRegister& vn); + + // Unsigned pairwise long add. + void uaddlp(const VRegister& vd, const VRegister& vn); + + // Unsigned pairwise long add and accumulate. + void uadalp(const VRegister& vd, const VRegister& vn); + + // Shift left by immediate. + void shl(const VRegister& vd, const VRegister& vn, int shift); + + // Signed saturating shift left by immediate. + void sqshl(const VRegister& vd, const VRegister& vn, int shift); + + // Signed saturating shift left unsigned by immediate. + void sqshlu(const VRegister& vd, const VRegister& vn, int shift); + + // Unsigned saturating shift left by immediate. + void uqshl(const VRegister& vd, const VRegister& vn, int shift); + + // Signed shift left long by immediate. + void sshll(const VRegister& vd, const VRegister& vn, int shift); + + // Signed shift left long by immediate (second part). + void sshll2(const VRegister& vd, const VRegister& vn, int shift); + + // Signed extend long. + void sxtl(const VRegister& vd, const VRegister& vn); + + // Signed extend long (second part). + void sxtl2(const VRegister& vd, const VRegister& vn); + + // Unsigned shift left long by immediate. + void ushll(const VRegister& vd, const VRegister& vn, int shift); + + // Unsigned shift left long by immediate (second part). + void ushll2(const VRegister& vd, const VRegister& vn, int shift); + + // Shift left long by element size. + void shll(const VRegister& vd, const VRegister& vn, int shift); + + // Shift left long by element size (second part). + void shll2(const VRegister& vd, const VRegister& vn, int shift); + + // Unsigned extend long. + void uxtl(const VRegister& vd, const VRegister& vn); + + // Unsigned extend long (second part). + void uxtl2(const VRegister& vd, const VRegister& vn); + + // Signed rounding halving add. + void srhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned halving sub. + void uhsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed halving sub. + void shsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned saturating add. + void uqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed saturating add. + void sqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned saturating subtract. + void uqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed saturating subtract. + void sqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Add pairwise. + void addp(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Add pair of elements scalar. + void addp(const VRegister& vd, const VRegister& vn); + + // Multiply-add to accumulator. + void mla(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Multiply-subtract to accumulator. + void mls(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Multiply. + void mul(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Table lookup from one register. + void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Table lookup from two registers. + void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vn2, + const VRegister& vm); + + // Table lookup from three registers. + void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vn2, + const VRegister& vn3, const VRegister& vm); + + // Table lookup from four registers. + void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vn2, + const VRegister& vn3, const VRegister& vn4, const VRegister& vm); + + // Table lookup extension from one register. + void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Table lookup extension from two registers. + void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vn2, + const VRegister& vm); + + // Table lookup extension from three registers. + void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vn2, + const VRegister& vn3, const VRegister& vm); + + // Table lookup extension from four registers. + void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vn2, + const VRegister& vn3, const VRegister& vn4, const VRegister& vm); + + // Instruction functions used only for test, debug, and patching. + // Emit raw instructions in the instruction stream. + void dci(Instr raw_inst) { Emit(raw_inst); } + + // Emit 8 bits of data in the instruction stream. + void dc8(uint8_t data) { EmitData(&data, sizeof(data)); } + + // Emit 32 bits of data in the instruction stream. + void dc32(uint32_t data) { EmitData(&data, sizeof(data)); } + + // Emit 64 bits of data in the instruction stream. + void dc64(uint64_t data) { EmitData(&data, sizeof(data)); } + + // Emit an address in the instruction stream. + void dcptr(Label* label); + + // Copy a string into the instruction stream, including the terminating + // nullptr character. The instruction pointer (pc_) is then aligned correctly + // for subsequent instructions. + void EmitStringData(const char* string); + + // Pseudo-instructions ------------------------------------------------------ + + // Parameters are described in arm64/instructions-arm64.h. + void debug(const char* message, uint32_t code, Instr params = BREAK); + + // Required by V8. + void dd(uint32_t data) { dc32(data); } + void db(uint8_t data) { dc8(data); } + void dq(uint64_t data) { dc64(data); } + void dp(uintptr_t data) { dc64(data); } + + // Code generation helpers -------------------------------------------------- + + bool IsConstPoolEmpty() const { return constpool_.IsEmpty(); } + + Instruction* pc() const { return Instruction::Cast(pc_); } + + Instruction* InstructionAt(ptrdiff_t offset) const { + return reinterpret_cast<Instruction*>(buffer_start_ + offset); + } + + ptrdiff_t InstructionOffset(Instruction* instr) const { + return reinterpret_cast<byte*>(instr) - buffer_start_; + } + + // Register encoding. + static Instr Rd(CPURegister rd) { + DCHECK_NE(rd.code(), kSPRegInternalCode); + return rd.code() << Rd_offset; + } + + static Instr Rn(CPURegister rn) { + DCHECK_NE(rn.code(), kSPRegInternalCode); + return rn.code() << Rn_offset; + } + + static Instr Rm(CPURegister rm) { + DCHECK_NE(rm.code(), kSPRegInternalCode); + return rm.code() << Rm_offset; + } + + static Instr RmNot31(CPURegister rm) { + DCHECK_NE(rm.code(), kSPRegInternalCode); + DCHECK(!rm.IsZero()); + return Rm(rm); + } + + static Instr Ra(CPURegister ra) { + DCHECK_NE(ra.code(), kSPRegInternalCode); + return ra.code() << Ra_offset; + } + + static Instr Rt(CPURegister rt) { + DCHECK_NE(rt.code(), kSPRegInternalCode); + return rt.code() << Rt_offset; + } + + static Instr Rt2(CPURegister rt2) { + DCHECK_NE(rt2.code(), kSPRegInternalCode); + return rt2.code() << Rt2_offset; + } + + static Instr Rs(CPURegister rs) { + DCHECK_NE(rs.code(), kSPRegInternalCode); + return rs.code() << Rs_offset; + } + + // These encoding functions allow the stack pointer to be encoded, and + // disallow the zero register. + static Instr RdSP(Register rd) { + DCHECK(!rd.IsZero()); + return (rd.code() & kRegCodeMask) << Rd_offset; + } + + static Instr RnSP(Register rn) { + DCHECK(!rn.IsZero()); + return (rn.code() & kRegCodeMask) << Rn_offset; + } + + // Flags encoding. + inline static Instr Flags(FlagsUpdate S); + inline static Instr Cond(Condition cond); + + // PC-relative address encoding. + inline static Instr ImmPCRelAddress(int imm21); + + // Branch encoding. + inline static Instr ImmUncondBranch(int imm26); + inline static Instr ImmCondBranch(int imm19); + inline static Instr ImmCmpBranch(int imm19); + inline static Instr ImmTestBranch(int imm14); + inline static Instr ImmTestBranchBit(unsigned bit_pos); + + // Data Processing encoding. + inline static Instr SF(Register rd); + inline static Instr ImmAddSub(int imm); + inline static Instr ImmS(unsigned imms, unsigned reg_size); + inline static Instr ImmR(unsigned immr, unsigned reg_size); + inline static Instr ImmSetBits(unsigned imms, unsigned reg_size); + inline static Instr ImmRotate(unsigned immr, unsigned reg_size); + inline static Instr ImmLLiteral(int imm19); + inline static Instr BitN(unsigned bitn, unsigned reg_size); + inline static Instr ShiftDP(Shift shift); + inline static Instr ImmDPShift(unsigned amount); + inline static Instr ExtendMode(Extend extend); + inline static Instr ImmExtendShift(unsigned left_shift); + inline static Instr ImmCondCmp(unsigned imm); + inline static Instr Nzcv(StatusFlags nzcv); + + static bool IsImmAddSub(int64_t immediate); + static bool IsImmLogical(uint64_t value, unsigned width, unsigned* n, + unsigned* imm_s, unsigned* imm_r); + + // MemOperand offset encoding. + inline static Instr ImmLSUnsigned(int imm12); + inline static Instr ImmLS(int imm9); + inline static Instr ImmLSPair(int imm7, unsigned size); + inline static Instr ImmShiftLS(unsigned shift_amount); + inline static Instr ImmException(int imm16); + inline static Instr ImmSystemRegister(int imm15); + inline static Instr ImmHint(int imm7); + inline static Instr ImmBarrierDomain(int imm2); + inline static Instr ImmBarrierType(int imm2); + inline static unsigned CalcLSDataSize(LoadStoreOp op); + + // Instruction bits for vector format in data processing operations. + static Instr VFormat(VRegister vd) { + if (vd.Is64Bits()) { + switch (vd.LaneCount()) { + case 2: + return NEON_2S; + case 4: + return NEON_4H; + case 8: + return NEON_8B; + default: + UNREACHABLE(); + } + } else { + DCHECK(vd.Is128Bits()); + switch (vd.LaneCount()) { + case 2: + return NEON_2D; + case 4: + return NEON_4S; + case 8: + return NEON_8H; + case 16: + return NEON_16B; + default: + UNREACHABLE(); + } + } + } + + // Instruction bits for vector format in floating point data processing + // operations. + static Instr FPFormat(VRegister vd) { + if (vd.LaneCount() == 1) { + // Floating point scalar formats. + DCHECK(vd.Is32Bits() || vd.Is64Bits()); + return vd.Is64Bits() ? FP64 : FP32; + } + + // Two lane floating point vector formats. + if (vd.LaneCount() == 2) { + DCHECK(vd.Is64Bits() || vd.Is128Bits()); + return vd.Is128Bits() ? NEON_FP_2D : NEON_FP_2S; + } + + // Four lane floating point vector format. + DCHECK((vd.LaneCount() == 4) && vd.Is128Bits()); + return NEON_FP_4S; + } + + // Instruction bits for vector format in load and store operations. + static Instr LSVFormat(VRegister vd) { + if (vd.Is64Bits()) { + switch (vd.LaneCount()) { + case 1: + return LS_NEON_1D; + case 2: + return LS_NEON_2S; + case 4: + return LS_NEON_4H; + case 8: + return LS_NEON_8B; + default: + UNREACHABLE(); + } + } else { + DCHECK(vd.Is128Bits()); + switch (vd.LaneCount()) { + case 2: + return LS_NEON_2D; + case 4: + return LS_NEON_4S; + case 8: + return LS_NEON_8H; + case 16: + return LS_NEON_16B; + default: + UNREACHABLE(); + } + } + } + + // Instruction bits for scalar format in data processing operations. + static Instr SFormat(VRegister vd) { + DCHECK(vd.IsScalar()); + switch (vd.SizeInBytes()) { + case 1: + return NEON_B; + case 2: + return NEON_H; + case 4: + return NEON_S; + case 8: + return NEON_D; + default: + UNREACHABLE(); + } + } + + static Instr ImmNEONHLM(int index, int num_bits) { + int h, l, m; + if (num_bits == 3) { + DCHECK(is_uint3(index)); + h = (index >> 2) & 1; + l = (index >> 1) & 1; + m = (index >> 0) & 1; + } else if (num_bits == 2) { + DCHECK(is_uint2(index)); + h = (index >> 1) & 1; + l = (index >> 0) & 1; + m = 0; + } else { + DCHECK(is_uint1(index) && (num_bits == 1)); + h = (index >> 0) & 1; + l = 0; + m = 0; + } + return (h << NEONH_offset) | (l << NEONL_offset) | (m << NEONM_offset); + } + + static Instr ImmNEONExt(int imm4) { + DCHECK(is_uint4(imm4)); + return imm4 << ImmNEONExt_offset; + } + + static Instr ImmNEON5(Instr format, int index) { + DCHECK(is_uint4(index)); + int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format)); + int imm5 = (index << (s + 1)) | (1 << s); + return imm5 << ImmNEON5_offset; + } + + static Instr ImmNEON4(Instr format, int index) { + DCHECK(is_uint4(index)); + int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format)); + int imm4 = index << s; + return imm4 << ImmNEON4_offset; + } + + static Instr ImmNEONabcdefgh(int imm8) { + DCHECK(is_uint8(imm8)); + Instr instr; + instr = ((imm8 >> 5) & 7) << ImmNEONabc_offset; + instr |= (imm8 & 0x1f) << ImmNEONdefgh_offset; + return instr; + } + + static Instr NEONCmode(int cmode) { + DCHECK(is_uint4(cmode)); + return cmode << NEONCmode_offset; + } + + static Instr NEONModImmOp(int op) { + DCHECK(is_uint1(op)); + return op << NEONModImmOp_offset; + } + + static bool IsImmLSUnscaled(int64_t offset); + static bool IsImmLSScaled(int64_t offset, unsigned size); + static bool IsImmLLiteral(int64_t offset); + + // Move immediates encoding. + inline static Instr ImmMoveWide(int imm); + inline static Instr ShiftMoveWide(int shift); + + // FP Immediates. + static Instr ImmFP(double imm); + static Instr ImmNEONFP(double imm); + inline static Instr FPScale(unsigned scale); + + // FP register type. + inline static Instr FPType(VRegister fd); + + // Class for scoping postponing the constant pool generation. + class BlockConstPoolScope { + public: + explicit BlockConstPoolScope(Assembler* assem) : assem_(assem) { + assem_->StartBlockConstPool(); + } + ~BlockConstPoolScope() { assem_->EndBlockConstPool(); } + + private: + Assembler* assem_; + + DISALLOW_IMPLICIT_CONSTRUCTORS(BlockConstPoolScope); + }; + + // Unused on this architecture. + void MaybeEmitOutOfLineConstantPool() {} + + // Check if is time to emit a constant pool. + void CheckConstPool(bool force_emit, bool require_jump); + + // Returns true if we should emit a veneer as soon as possible for a branch + // which can at most reach to specified pc. + bool ShouldEmitVeneer(int max_reachable_pc, + int margin = kVeneerDistanceMargin); + bool ShouldEmitVeneers(int margin = kVeneerDistanceMargin) { + return ShouldEmitVeneer(unresolved_branches_first_limit(), margin); + } + + // The maximum code size generated for a veneer. Currently one branch + // instruction. This is for code size checking purposes, and can be extended + // in the future for example if we decide to add nops between the veneers. + static constexpr int kMaxVeneerCodeSize = 1 * kInstrSize; + + void RecordVeneerPool(int location_offset, int size); + // Emits veneers for branches that are approaching their maximum range. + // If need_protection is true, the veneers are protected by a branch jumping + // over the code. + void EmitVeneers(bool force_emit, bool need_protection, + int margin = kVeneerDistanceMargin); + void EmitVeneersGuard() { EmitPoolGuard(); } + // Checks whether veneers need to be emitted at this point. + // If force_emit is set, a veneer is generated for *all* unresolved branches. + void CheckVeneerPool(bool force_emit, bool require_jump, + int margin = kVeneerDistanceMargin); + + class BlockPoolsScope { + public: + explicit BlockPoolsScope(Assembler* assem) : assem_(assem) { + assem_->StartBlockPools(); + } + ~BlockPoolsScope() { assem_->EndBlockPools(); } + + private: + Assembler* assem_; + + DISALLOW_IMPLICIT_CONSTRUCTORS(BlockPoolsScope); + }; + + protected: + inline const Register& AppropriateZeroRegFor(const CPURegister& reg) const; + + void LoadStore(const CPURegister& rt, const MemOperand& addr, LoadStoreOp op); + void LoadStorePair(const CPURegister& rt, const CPURegister& rt2, + const MemOperand& addr, LoadStorePairOp op); + void LoadStoreStruct(const VRegister& vt, const MemOperand& addr, + NEONLoadStoreMultiStructOp op); + void LoadStoreStruct1(const VRegister& vt, int reg_count, + const MemOperand& addr); + void LoadStoreStructSingle(const VRegister& vt, uint32_t lane, + const MemOperand& addr, + NEONLoadStoreSingleStructOp op); + void LoadStoreStructSingleAllLanes(const VRegister& vt, + const MemOperand& addr, + NEONLoadStoreSingleStructOp op); + void LoadStoreStructVerify(const VRegister& vt, const MemOperand& addr, + Instr op); + + static bool IsImmLSPair(int64_t offset, unsigned size); + + void Logical(const Register& rd, const Register& rn, const Operand& operand, + LogicalOp op); + void LogicalImmediate(const Register& rd, const Register& rn, unsigned n, + unsigned imm_s, unsigned imm_r, LogicalOp op); + + void ConditionalCompare(const Register& rn, const Operand& operand, + StatusFlags nzcv, Condition cond, + ConditionalCompareOp op); + static bool IsImmConditionalCompare(int64_t immediate); + + void AddSubWithCarry(const Register& rd, const Register& rn, + const Operand& operand, FlagsUpdate S, + AddSubWithCarryOp op); + + // Functions for emulating operands not directly supported by the instruction + // set. + void EmitShift(const Register& rd, const Register& rn, Shift shift, + unsigned amount); + void EmitExtendShift(const Register& rd, const Register& rn, Extend extend, + unsigned left_shift); + + void AddSub(const Register& rd, const Register& rn, const Operand& operand, + FlagsUpdate S, AddSubOp op); + + static bool IsImmFP32(float imm); + static bool IsImmFP64(double imm); + + // Find an appropriate LoadStoreOp or LoadStorePairOp for the specified + // registers. Only simple loads are supported; sign- and zero-extension (such + // as in LDPSW_x or LDRB_w) are not supported. + static inline LoadStoreOp LoadOpFor(const CPURegister& rt); + static inline LoadStorePairOp LoadPairOpFor(const CPURegister& rt, + const CPURegister& rt2); + static inline LoadStoreOp StoreOpFor(const CPURegister& rt); + static inline LoadStorePairOp StorePairOpFor(const CPURegister& rt, + const CPURegister& rt2); + static inline LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt); + + // Remove the specified branch from the unbound label link chain. + // If available, a veneer for this label can be used for other branches in the + // chain if the link chain cannot be fixed up without this branch. + void RemoveBranchFromLabelLinkChain(Instruction* branch, Label* label, + Instruction* label_veneer = nullptr); + + private: + static uint32_t FPToImm8(double imm); + + // Instruction helpers. + void MoveWide(const Register& rd, uint64_t imm, int shift, + MoveWideImmediateOp mov_op); + void DataProcShiftedRegister(const Register& rd, const Register& rn, + const Operand& operand, FlagsUpdate S, Instr op); + void DataProcExtendedRegister(const Register& rd, const Register& rn, + const Operand& operand, FlagsUpdate S, + Instr op); + void ConditionalSelect(const Register& rd, const Register& rn, + const Register& rm, Condition cond, + ConditionalSelectOp op); + void DataProcessing1Source(const Register& rd, const Register& rn, + DataProcessing1SourceOp op); + void DataProcessing3Source(const Register& rd, const Register& rn, + const Register& rm, const Register& ra, + DataProcessing3SourceOp op); + void FPDataProcessing1Source(const VRegister& fd, const VRegister& fn, + FPDataProcessing1SourceOp op); + void FPDataProcessing2Source(const VRegister& fd, const VRegister& fn, + const VRegister& fm, + FPDataProcessing2SourceOp op); + void FPDataProcessing3Source(const VRegister& fd, const VRegister& fn, + const VRegister& fm, const VRegister& fa, + FPDataProcessing3SourceOp op); + void NEONAcrossLanesL(const VRegister& vd, const VRegister& vn, + NEONAcrossLanesOp op); + void NEONAcrossLanes(const VRegister& vd, const VRegister& vn, + NEONAcrossLanesOp op); + void NEONModifiedImmShiftLsl(const VRegister& vd, const int imm8, + const int left_shift, + NEONModifiedImmediateOp op); + void NEONModifiedImmShiftMsl(const VRegister& vd, const int imm8, + const int shift_amount, + NEONModifiedImmediateOp op); + void NEON3Same(const VRegister& vd, const VRegister& vn, const VRegister& vm, + NEON3SameOp vop); + void NEONFP3Same(const VRegister& vd, const VRegister& vn, + const VRegister& vm, Instr op); + void NEON3DifferentL(const VRegister& vd, const VRegister& vn, + const VRegister& vm, NEON3DifferentOp vop); + void NEON3DifferentW(const VRegister& vd, const VRegister& vn, + const VRegister& vm, NEON3DifferentOp vop); + void NEON3DifferentHN(const VRegister& vd, const VRegister& vn, + const VRegister& vm, NEON3DifferentOp vop); + void NEONFP2RegMisc(const VRegister& vd, const VRegister& vn, + NEON2RegMiscOp vop, double value = 0.0); + void NEON2RegMisc(const VRegister& vd, const VRegister& vn, + NEON2RegMiscOp vop, int value = 0); + void NEONFP2RegMisc(const VRegister& vd, const VRegister& vn, Instr op); + void NEONAddlp(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp op); + void NEONPerm(const VRegister& vd, const VRegister& vn, const VRegister& vm, + NEONPermOp op); + void NEONFPByElement(const VRegister& vd, const VRegister& vn, + const VRegister& vm, int vm_index, + NEONByIndexedElementOp op); + void NEONByElement(const VRegister& vd, const VRegister& vn, + const VRegister& vm, int vm_index, + NEONByIndexedElementOp op); + void NEONByElementL(const VRegister& vd, const VRegister& vn, + const VRegister& vm, int vm_index, + NEONByIndexedElementOp op); + void NEONShiftImmediate(const VRegister& vd, const VRegister& vn, + NEONShiftImmediateOp op, int immh_immb); + void NEONShiftLeftImmediate(const VRegister& vd, const VRegister& vn, + int shift, NEONShiftImmediateOp op); + void NEONShiftRightImmediate(const VRegister& vd, const VRegister& vn, + int shift, NEONShiftImmediateOp op); + void NEONShiftImmediateL(const VRegister& vd, const VRegister& vn, int shift, + NEONShiftImmediateOp op); + void NEONShiftImmediateN(const VRegister& vd, const VRegister& vn, int shift, + NEONShiftImmediateOp op); + void NEONXtn(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp vop); + void NEONTable(const VRegister& vd, const VRegister& vn, const VRegister& vm, + NEONTableOp op); + + Instr LoadStoreStructAddrModeField(const MemOperand& addr); + + // Label helpers. + + // Return an offset for a label-referencing instruction, typically a branch. + int LinkAndGetByteOffsetTo(Label* label); + + // This is the same as LinkAndGetByteOffsetTo, but return an offset + // suitable for fields that take instruction offsets. + inline int LinkAndGetInstructionOffsetTo(Label* label); + + static constexpr int kStartOfLabelLinkChain = 0; + + // Verify that a label's link chain is intact. + void CheckLabelLinkChain(Label const* label); + + // Postpone the generation of the constant pool for the specified number of + // instructions. + void BlockConstPoolFor(int instructions); + + // Set how far from current pc the next constant pool check will be. + void SetNextConstPoolCheckIn(int instructions) { + next_constant_pool_check_ = pc_offset() + instructions * kInstrSize; + } + + // Emit the instruction at pc_. + void Emit(Instr instruction) { + STATIC_ASSERT(sizeof(*pc_) == 1); + STATIC_ASSERT(sizeof(instruction) == kInstrSize); + DCHECK_LE(pc_ + sizeof(instruction), buffer_start_ + buffer_->size()); + + memcpy(pc_, &instruction, sizeof(instruction)); + pc_ += sizeof(instruction); + CheckBuffer(); + } + + // Emit data inline in the instruction stream. + void EmitData(void const* data, unsigned size) { + DCHECK_EQ(sizeof(*pc_), 1); + DCHECK_LE(pc_ + size, buffer_start_ + buffer_->size()); + + // TODO(all): Somehow register we have some data here. Then we can + // disassemble it correctly. + memcpy(pc_, data, size); + pc_ += size; + CheckBuffer(); + } + + void GrowBuffer(); + void CheckBufferSpace(); + void CheckBuffer(); + + // Pc offset of the next constant pool check. + int next_constant_pool_check_; + + // Constant pool generation + // Pools are emitted in the instruction stream. They are emitted when: + // * the distance to the first use is above a pre-defined distance or + // * the numbers of entries in the pool is above a pre-defined size or + // * code generation is finished + // If a pool needs to be emitted before code generation is finished a branch + // over the emitted pool will be inserted. + + // Constants in the pool may be addresses of functions that gets relocated; + // if so, a relocation info entry is associated to the constant pool entry. + + // Repeated checking whether the constant pool should be emitted is rather + // expensive. By default we only check again once a number of instructions + // has been generated. That also means that the sizing of the buffers is not + // an exact science, and that we rely on some slop to not overrun buffers. + static constexpr int kCheckConstPoolInterval = 128; + + // Distance to first use after a which a pool will be emitted. Pool entries + // are accessed with pc relative load therefore this cannot be more than + // 1 * MB. Since constant pool emission checks are interval based this value + // is an approximation. + static constexpr int kApproxMaxDistToConstPool = 64 * KB; + + // Number of pool entries after which a pool will be emitted. Since constant + // pool emission checks are interval based this value is an approximation. + static constexpr int kApproxMaxPoolEntryCount = 512; + + // Emission of the constant pool may be blocked in some code sequences. + int const_pool_blocked_nesting_; // Block emission if this is not zero. + int no_const_pool_before_; // Block emission before this pc offset. + + // Emission of the veneer pools may be blocked in some code sequences. + int veneer_pool_blocked_nesting_; // Block emission if this is not zero. + + // Relocation info generation + // Each relocation is encoded as a variable size value + static constexpr int kMaxRelocSize = RelocInfoWriter::kMaxSize; + RelocInfoWriter reloc_info_writer; + + // Internal reference positions, required for (potential) patching in + // GrowBuffer(); contains only those internal references whose labels + // are already bound. + std::deque<int> internal_reference_positions_; + + // Relocation info records are also used during code generation as temporary + // containers for constants and code target addresses until they are emitted + // to the constant pool. These pending relocation info records are temporarily + // stored in a separate buffer until a constant pool is emitted. + // If every instruction in a long sequence is accessing the pool, we need one + // pending relocation entry per instruction. + + // The pending constant pool. + ConstPool constpool_; + + protected: + // Code generation + // The relocation writer's position is at least kGap bytes below the end of + // the generated instructions. This is so that multi-instruction sequences do + // not have to check for overflow. The same is true for writes of large + // relocation info entries, and debug strings encoded in the instruction + // stream. + static constexpr int kGap = 128; + + public: +#ifdef DEBUG + // Functions used for testing. + int GetConstantPoolEntriesSizeForTesting() const { + // Do not include branch over the pool. + return constpool_.EntryCount() * kSystemPointerSize; + } + + static constexpr int GetCheckConstPoolIntervalForTesting() { + return kCheckConstPoolInterval; + } + + static constexpr int GetApproxMaxDistToConstPoolForTesting() { + return kApproxMaxDistToConstPool; + } +#endif + + class FarBranchInfo { + public: + FarBranchInfo(int offset, Label* label) + : pc_offset_(offset), label_(label) {} + // Offset of the branch in the code generation buffer. + int pc_offset_; + // The label branched to. + Label* label_; + }; + + protected: + // Information about unresolved (forward) branches. + // The Assembler is only allowed to delete out-of-date information from here + // after a label is bound. The MacroAssembler uses this information to + // generate veneers. + // + // The second member gives information about the unresolved branch. The first + // member of the pair is the maximum offset that the branch can reach in the + // buffer. The map is sorted according to this reachable offset, allowing to + // easily check when veneers need to be emitted. + // Note that the maximum reachable offset (first member of the pairs) should + // always be positive but has the same type as the return value for + // pc_offset() for convenience. + std::multimap<int, FarBranchInfo> unresolved_branches_; + + // We generate a veneer for a branch if we reach within this distance of the + // limit of the range. + static constexpr int kVeneerDistanceMargin = 1 * KB; + // The factor of 2 is a finger in the air guess. With a default margin of + // 1KB, that leaves us an addional 256 instructions to avoid generating a + // protective branch. + static constexpr int kVeneerNoProtectionFactor = 2; + static constexpr int kVeneerDistanceCheckMargin = + kVeneerNoProtectionFactor * kVeneerDistanceMargin; + int unresolved_branches_first_limit() const { + DCHECK(!unresolved_branches_.empty()); + return unresolved_branches_.begin()->first; + } + // This is similar to next_constant_pool_check_ and helps reduce the overhead + // of checking for veneer pools. + // It is maintained to the closest unresolved branch limit minus the maximum + // veneer margin (or kMaxInt if there are no unresolved branches). + int next_veneer_pool_check_; + + private: + // Avoid overflows for displacements etc. + static const int kMaximalBufferSize = 512 * MB; + + // If a veneer is emitted for a branch instruction, that instruction must be + // removed from the associated label's link chain so that the assembler does + // not later attempt (likely unsuccessfully) to patch it to branch directly to + // the label. + void DeleteUnresolvedBranchInfoForLabel(Label* label); + // This function deletes the information related to the label by traversing + // the label chain, and for each PC-relative instruction in the chain checking + // if pending unresolved information exists. Its complexity is proportional to + // the length of the label chain. + void DeleteUnresolvedBranchInfoForLabelTraverse(Label* label); + + void AllocateAndInstallRequestedHeapObjects(Isolate* isolate); + + int WriteCodeComments(); + + friend class EnsureSpace; + friend class ConstPool; +}; + +class PatchingAssembler : public Assembler { + public: + // Create an Assembler with a buffer starting at 'start'. + // The buffer size is + // size of instructions to patch + kGap + // Where kGap is the distance from which the Assembler tries to grow the + // buffer. + // If more or fewer instructions than expected are generated or if some + // relocation information takes space in the buffer, the PatchingAssembler + // will crash trying to grow the buffer. + // Note that the instruction cache will not be flushed. + PatchingAssembler(const AssemblerOptions& options, byte* start, + unsigned count) + : Assembler(options, + ExternalAssemblerBuffer(start, count * kInstrSize + kGap)) { + // Block constant pool emission. + StartBlockPools(); + } + + ~PatchingAssembler() { + // Const pool should still be blocked. + DCHECK(is_const_pool_blocked()); + EndBlockPools(); + // Verify we have generated the number of instruction we expected. + DCHECK_EQ(pc_offset() + kGap, buffer_->size()); + // Verify no relocation information has been emitted. + DCHECK(IsConstPoolEmpty()); + } + + // See definition of PatchAdrFar() for details. + static constexpr int kAdrFarPatchableNNops = 2; + static constexpr int kAdrFarPatchableNInstrs = kAdrFarPatchableNNops + 2; + void PatchAdrFar(int64_t target_offset); + void PatchSubSp(uint32_t immediate); +}; + +class EnsureSpace { + public: + explicit EnsureSpace(Assembler* assembler) { assembler->CheckBufferSpace(); } +}; + +} // namespace internal +} // namespace v8 + +#endif // V8_CODEGEN_ARM64_ASSEMBLER_ARM64_H_ |