diff options
author | Michaël Zasso <targos@protonmail.com> | 2017-03-21 10:16:54 +0100 |
---|---|---|
committer | Michaël Zasso <targos@protonmail.com> | 2017-03-25 09:44:10 +0100 |
commit | c459d8ea5d402c702948c860d9497b2230ff7e8a (patch) | |
tree | 56c282fc4d40e5cb613b47cf7be3ea0526ed5b6f /deps/v8/test/cctest/test-assembler-arm.cc | |
parent | e0bc5a7361b1d29c3ed034155fd779ce6f44fb13 (diff) | |
download | android-node-v8-c459d8ea5d402c702948c860d9497b2230ff7e8a.tar.gz android-node-v8-c459d8ea5d402c702948c860d9497b2230ff7e8a.tar.bz2 android-node-v8-c459d8ea5d402c702948c860d9497b2230ff7e8a.zip |
deps: update V8 to 5.7.492.69
PR-URL: https://github.com/nodejs/node/pull/11752
Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl>
Reviewed-By: Franziska Hinkelmann <franziska.hinkelmann@gmail.com>
Diffstat (limited to 'deps/v8/test/cctest/test-assembler-arm.cc')
-rw-r--r-- | deps/v8/test/cctest/test-assembler-arm.cc | 700 |
1 files changed, 679 insertions, 21 deletions
diff --git a/deps/v8/test/cctest/test-assembler-arm.cc b/deps/v8/test/cctest/test-assembler-arm.cc index 08f1f5a9f3..680b1233f7 100644 --- a/deps/v8/test/cctest/test-assembler-arm.cc +++ b/deps/v8/test/cctest/test-assembler-arm.cc @@ -1221,6 +1221,32 @@ TEST(14) { CHECK_EQ(kArmNanLower32, bit_cast<int64_t>(t.div_result) & 0xffffffffu); } +#define CHECK_EQ_SPLAT(field, ex) \ + CHECK_EQ(ex, t.field[0]); \ + CHECK_EQ(ex, t.field[1]); \ + CHECK_EQ(ex, t.field[2]); \ + CHECK_EQ(ex, t.field[3]); + +#define CHECK_EQ_32X4(field, ex0, ex1, ex2, ex3) \ + CHECK_EQ(ex0, t.field[0]); \ + CHECK_EQ(ex1, t.field[1]); \ + CHECK_EQ(ex2, t.field[2]); \ + CHECK_EQ(ex3, t.field[3]); + +#define CHECK_ESTIMATE(expected, tolerance, value) \ + CHECK_LT((expected) - (tolerance), value); \ + CHECK_GT((expected) + (tolerance), value); + +#define CHECK_ESTIMATE_SPLAT(field, ex, tol) \ + CHECK_ESTIMATE(ex, tol, t.field[0]); \ + CHECK_ESTIMATE(ex, tol, t.field[1]); \ + CHECK_ESTIMATE(ex, tol, t.field[2]); \ + CHECK_ESTIMATE(ex, tol, t.field[3]); + +#define INT32_TO_FLOAT(val) \ + std::round(static_cast<float>(bit_cast<int32_t>(val))) +#define UINT32_TO_FLOAT(val) \ + std::round(static_cast<float>(bit_cast<uint32_t>(val))) TEST(15) { // Test the Neon instructions. @@ -1255,18 +1281,50 @@ TEST(15) { uint32_t dstA5; uint32_t dstA6; uint32_t dstA7; + uint32_t lane_test[4]; + uint64_t vmov_to_scalar1, vmov_to_scalar2; + uint32_t vmov_from_scalar_s8, vmov_from_scalar_u8; + uint32_t vmov_from_scalar_s16, vmov_from_scalar_u16; + uint32_t vmov_from_scalar_32; + uint32_t vmov[4], vmvn[4]; + int32_t vcvt_s32_f32[4]; + uint32_t vcvt_u32_f32[4]; + float vcvt_f32_s32[4], vcvt_f32_u32[4]; + uint32_t vdup8[4], vdup16[4], vdup32[4]; + float vabsf[4], vnegf[4]; + uint32_t vabs_s8[4], vabs_s16[4], vabs_s32[4]; + uint32_t vneg_s8[4], vneg_s16[4], vneg_s32[4]; + uint32_t veor[4], vand[4], vorr[4]; + float vdupf[4], vaddf[4], vsubf[4], vmulf[4]; + uint32_t vmin_s8[4], vmin_u16[4], vmin_s32[4]; + uint32_t vmax_s8[4], vmax_u16[4], vmax_s32[4]; + uint32_t vadd8[4], vadd16[4], vadd32[4]; + uint32_t vsub8[4], vsub16[4], vsub32[4]; + uint32_t vmul8[4], vmul16[4], vmul32[4]; + uint32_t vceq[4], vceqf[4], vcgef[4], vcgtf[4]; + uint32_t vcge_s8[4], vcge_u16[4], vcge_s32[4]; + uint32_t vcgt_s8[4], vcgt_u16[4], vcgt_s32[4]; + float vrecpe[4], vrecps[4], vrsqrte[4], vrsqrts[4]; + float vminf[4], vmaxf[4]; + uint32_t vtst[4], vbsl[4]; + uint32_t vext[4]; + uint32_t vzip8a[4], vzip8b[4], vzip16a[4], vzip16b[4], vzip32a[4], + vzip32b[4]; + uint32_t vrev64_32[4], vrev64_16[4], vrev64_8[4]; + uint32_t vrev32_16[4], vrev32_8[4]; + uint32_t vrev16_8[4]; + uint32_t vtbl[2], vtbx[2]; } T; T t; // Create a function that accepts &t, and loads, manipulates, and stores - // the doubles and floats. + // the doubles, floats, and SIMD values. Assembler assm(isolate, NULL, 0); - if (CpuFeatures::IsSupported(NEON)) { CpuFeatureScope scope(&assm, NEON); - __ stm(db_w, sp, r4.bit() | lr.bit()); + __ stm(db_w, sp, r4.bit() | r5.bit() | lr.bit()); // Move 32 bytes with neon. __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, src0)))); __ vld1(Neon8, NeonListOperand(d0, 4), NeonMemOperand(r4)); @@ -1287,7 +1345,456 @@ TEST(15) { __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, dstA4)))); __ vst1(Neon8, NeonListOperand(d2, 2), NeonMemOperand(r4)); - __ ldm(ia_w, sp, r4.bit() | pc.bit()); + // ARM core register to scalar. + __ mov(r4, Operand(0xfffffff8)); + __ vmov(d0, 0); + __ vmov(NeonS8, d0, 1, r4); + __ vmov(NeonS16, d0, 1, r4); + __ vmov(NeonS32, d0, 1, r4); + __ vstr(d0, r0, offsetof(T, vmov_to_scalar1)); + __ vmov(d0, 0); + __ vmov(NeonS8, d0, 3, r4); + __ vmov(NeonS16, d0, 3, r4); + __ vstr(d0, r0, offsetof(T, vmov_to_scalar2)); + + // Scalar to ARM core register. + __ mov(r4, Operand(0xffffff00)); + __ mov(r5, Operand(0xffffffff)); + __ vmov(d0, r4, r5); + __ vmov(NeonS8, r4, d0, 1); + __ str(r4, MemOperand(r0, offsetof(T, vmov_from_scalar_s8))); + __ vmov(NeonU8, r4, d0, 1); + __ str(r4, MemOperand(r0, offsetof(T, vmov_from_scalar_u8))); + __ vmov(NeonS16, r4, d0, 1); + __ str(r4, MemOperand(r0, offsetof(T, vmov_from_scalar_s16))); + __ vmov(NeonU16, r4, d0, 1); + __ str(r4, MemOperand(r0, offsetof(T, vmov_from_scalar_u16))); + __ vmov(NeonS32, r4, d0, 1); + __ str(r4, MemOperand(r0, offsetof(T, vmov_from_scalar_32))); + + // vmov for q-registers. + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, lane_test)))); + __ vld1(Neon8, NeonListOperand(q0), NeonMemOperand(r4)); + __ vmov(q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmov)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + + // vmvn. + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, lane_test)))); + __ vld1(Neon8, NeonListOperand(q0), NeonMemOperand(r4)); + __ vmvn(q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmvn)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + + // vcvt for q-registers. + __ vmov(s0, -1.5); + __ vmov(s1, -1); + __ vmov(s2, 1); + __ vmov(s3, 1.5); + __ vcvt_s32_f32(q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcvt_s32_f32)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + __ vcvt_u32_f32(q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcvt_u32_f32)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + __ mov(r4, Operand(kMinInt)); + __ mov(r5, Operand(kMaxInt)); + __ vmov(d0, r4, r5); + __ mov(r4, Operand(kMaxUInt32)); + __ mov(r5, Operand(kMinInt + 1)); + __ vmov(d1, r4, r5); // q0 = [kMinInt, kMaxInt, kMaxUInt32, kMinInt + 1] + __ vcvt_f32_s32(q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcvt_f32_s32)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + __ vcvt_f32_u32(q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcvt_f32_u32)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + + // vdup (integer). + __ mov(r4, Operand(0xa)); + __ vdup(Neon8, q0, r4); + __ vdup(Neon16, q1, r4); + __ vdup(Neon32, q2, r4); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vdup8)))); + __ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4)); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vdup16)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vdup32)))); + __ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4)); + + // vdup (float). + __ vmov(s0, -1.0); + __ vdup(q0, s0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vdupf)))); + __ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4)); + + // vabs (float). + __ vmov(s0, -1.0); + __ vmov(s1, -0.0); + __ vmov(s2, 0.0); + __ vmov(s3, 1.0); + __ vabs(q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vabsf)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + // vneg (float). + __ vneg(q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vnegf)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + + // vabs (integer). + __ mov(r4, Operand(0x7f7f7f7f)); + __ mov(r5, Operand(0x01010101)); + __ vmov(d0, r4, r5); + __ mov(r4, Operand(0xffffffff)); + __ mov(r5, Operand(0x80808080)); + __ vmov(d1, r4, r5); + __ vabs(Neon8, q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vabs_s8)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + __ vabs(Neon16, q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vabs_s16)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + __ vabs(Neon32, q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vabs_s32)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + // vneg (integer). + __ vneg(Neon8, q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vneg_s8)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + __ vneg(Neon16, q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vneg_s16)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + __ vneg(Neon32, q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vneg_s32)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + + // veor. + __ mov(r4, Operand(0xaa)); + __ vdup(Neon16, q0, r4); + __ mov(r4, Operand(0x55)); + __ vdup(Neon16, q1, r4); + __ veor(q1, q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, veor)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + // vand. + __ mov(r4, Operand(0xff)); + __ vdup(Neon16, q0, r4); + __ mov(r4, Operand(0xfe)); + __ vdup(Neon16, q1, r4); + __ vand(q1, q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vand)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + // vorr. + __ mov(r4, Operand(0xaa)); + __ vdup(Neon16, q0, r4); + __ mov(r4, Operand(0x55)); + __ vdup(Neon16, q1, r4); + __ vorr(q1, q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vorr)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + + // vmin (float). + __ vmov(s4, 2.0); + __ vdup(q0, s4); + __ vmov(s4, 1.0); + __ vdup(q1, s4); + __ vmin(q1, q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vminf)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + // vmax (float). + __ vmov(s4, 2.0); + __ vdup(q0, s4); + __ vmov(s4, 1.0); + __ vdup(q1, s4); + __ vmax(q1, q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmaxf)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + // vadd (float). + __ vmov(s4, 1.0); + __ vdup(q0, s4); + __ vdup(q1, s4); + __ vadd(q1, q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vaddf)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + // vsub (float). + __ vmov(s4, 2.0); + __ vdup(q0, s4); + __ vmov(s4, 1.0); + __ vdup(q1, s4); + __ vsub(q1, q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vsubf)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + // vmul (float). + __ vmov(s4, 2.0); + __ vdup(q0, s4); + __ vdup(q1, s4); + __ vmul(q1, q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmulf)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + // vrecpe. + __ vmov(s4, 2.0); + __ vdup(q0, s4); + __ vrecpe(q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vrecpe)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + // vrecps. + __ vmov(s4, 2.0); + __ vdup(q0, s4); + __ vmov(s4, 1.5); + __ vdup(q1, s4); + __ vrecps(q1, q0, q1); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vrecps)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + // vrsqrte. + __ vmov(s4, 4.0); + __ vdup(q0, s4); + __ vrsqrte(q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vrsqrte)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + // vrsqrts. + __ vmov(s4, 2.0); + __ vdup(q0, s4); + __ vmov(s4, 2.5); + __ vdup(q1, s4); + __ vrsqrts(q1, q0, q1); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vrsqrts)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + // vceq (float). + __ vmov(s4, 1.0); + __ vdup(q0, s4); + __ vdup(q1, s4); + __ vceq(q1, q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vceqf)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + // vcge (float). + __ vmov(s0, 1.0); + __ vmov(s1, -1.0); + __ vmov(s2, -0.0); + __ vmov(s3, 0.0); + __ vdup(q1, s3); + __ vcge(q2, q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcgef)))); + __ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4)); + __ vcgt(q2, q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcgtf)))); + __ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4)); + + // vmin/vmax integer. + __ mov(r4, Operand(0x03)); + __ vdup(Neon16, q0, r4); + __ vdup(Neon8, q1, r4); + __ vmin(NeonS8, q2, q0, q1); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmin_s8)))); + __ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4)); + __ vmax(NeonS8, q2, q0, q1); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmax_s8)))); + __ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4)); + __ mov(r4, Operand(0xff)); + __ vdup(Neon16, q0, r4); + __ vdup(Neon8, q1, r4); + __ vmin(NeonU16, q2, q0, q1); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmin_u16)))); + __ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4)); + __ vmax(NeonU16, q2, q0, q1); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmax_u16)))); + __ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4)); + __ mov(r4, Operand(0xff)); + __ vdup(Neon32, q0, r4); + __ vdup(Neon8, q1, r4); + __ vmin(NeonS32, q2, q0, q1); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmin_s32)))); + __ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4)); + __ vmax(NeonS32, q2, q0, q1); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmax_s32)))); + __ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4)); + + // vadd (integer). + __ mov(r4, Operand(0x81)); + __ vdup(Neon8, q0, r4); + __ mov(r4, Operand(0x82)); + __ vdup(Neon8, q1, r4); + __ vadd(Neon8, q1, q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vadd8)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + __ mov(r4, Operand(0x8001)); + __ vdup(Neon16, q0, r4); + __ mov(r4, Operand(0x8002)); + __ vdup(Neon16, q1, r4); + __ vadd(Neon16, q1, q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vadd16)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + __ mov(r4, Operand(0x80000001)); + __ vdup(Neon32, q0, r4); + __ mov(r4, Operand(0x80000002)); + __ vdup(Neon32, q1, r4); + __ vadd(Neon32, q1, q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vadd32)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + + // vsub (integer). + __ mov(r4, Operand(0x01)); + __ vdup(Neon8, q0, r4); + __ mov(r4, Operand(0x03)); + __ vdup(Neon8, q1, r4); + __ vsub(Neon8, q1, q0, q1); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vsub8)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + __ mov(r4, Operand(0x0001)); + __ vdup(Neon16, q0, r4); + __ mov(r4, Operand(0x0003)); + __ vdup(Neon16, q1, r4); + __ vsub(Neon16, q1, q0, q1); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vsub16)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + __ mov(r4, Operand(0x00000001)); + __ vdup(Neon32, q0, r4); + __ mov(r4, Operand(0x00000003)); + __ vdup(Neon32, q1, r4); + __ vsub(Neon32, q1, q0, q1); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vsub32)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + + // vmul (integer). + __ mov(r4, Operand(0x02)); + __ vdup(Neon8, q0, r4); + __ vmul(Neon8, q1, q0, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmul8)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + __ mov(r4, Operand(0x0002)); + __ vdup(Neon16, q0, r4); + __ vmul(Neon16, q1, q0, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmul16)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + __ mov(r4, Operand(0x00000002)); + __ vdup(Neon32, q0, r4); + __ vmul(Neon32, q1, q0, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmul32)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + + // vceq. + __ mov(r4, Operand(0x03)); + __ vdup(Neon8, q0, r4); + __ vdup(Neon16, q1, r4); + __ vceq(Neon8, q1, q0, q1); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vceq)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + + // vcge/vcgt (integer). + __ mov(r4, Operand(0x03)); + __ vdup(Neon16, q0, r4); + __ vdup(Neon8, q1, r4); + __ vcge(NeonS8, q2, q0, q1); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcge_s8)))); + __ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4)); + __ vcgt(NeonS8, q2, q0, q1); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcgt_s8)))); + __ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4)); + __ mov(r4, Operand(0xff)); + __ vdup(Neon16, q0, r4); + __ vdup(Neon8, q1, r4); + __ vcge(NeonU16, q2, q0, q1); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcge_u16)))); + __ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4)); + __ vcgt(NeonU16, q2, q0, q1); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcgt_u16)))); + __ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4)); + __ mov(r4, Operand(0xff)); + __ vdup(Neon32, q0, r4); + __ vdup(Neon8, q1, r4); + __ vcge(NeonS32, q2, q0, q1); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcge_s32)))); + __ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4)); + __ vcgt(NeonS32, q2, q0, q1); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcgt_s32)))); + __ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4)); + + // vtst. + __ mov(r4, Operand(0x03)); + __ vdup(Neon8, q0, r4); + __ mov(r4, Operand(0x02)); + __ vdup(Neon16, q1, r4); + __ vtst(Neon8, q1, q0, q1); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vtst)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + + // vbsl. + __ mov(r4, Operand(0x00ff)); + __ vdup(Neon16, q0, r4); + __ mov(r4, Operand(0x01)); + __ vdup(Neon8, q1, r4); + __ mov(r4, Operand(0x02)); + __ vdup(Neon8, q2, r4); + __ vbsl(q0, q1, q2); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vbsl)))); + __ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4)); + + // vext. + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, lane_test)))); + __ vld1(Neon8, NeonListOperand(q0), NeonMemOperand(r4)); + __ vmov(q1, q0); + __ vext(q2, q0, q1, 3); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vext)))); + __ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4)); + + // vzip. + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, lane_test)))); + __ vld1(Neon8, NeonListOperand(q0), NeonMemOperand(r4)); + __ vmov(q1, q0); + __ vzip(Neon8, q0, q1); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vzip8a)))); + __ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4)); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vzip8b)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, lane_test)))); + __ vld1(Neon8, NeonListOperand(q0), NeonMemOperand(r4)); + __ vmov(q1, q0); + __ vzip(Neon16, q0, q1); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vzip16a)))); + __ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4)); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vzip16b)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, lane_test)))); + __ vld1(Neon8, NeonListOperand(q0), NeonMemOperand(r4)); + __ vmov(q1, q0); + __ vzip(Neon32, q0, q1); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vzip32a)))); + __ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4)); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vzip32b)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + + // vrev64/32/16 + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, lane_test)))); + __ vld1(Neon8, NeonListOperand(q0), NeonMemOperand(r4)); + __ vrev64(Neon32, q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vrev64_32)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + __ vrev64(Neon16, q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vrev64_16)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + __ vrev64(Neon8, q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vrev64_8)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + __ vrev32(Neon16, q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vrev32_16)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + __ vrev32(Neon8, q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vrev32_8)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + __ vrev16(Neon8, q1, q0); + __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vrev16_8)))); + __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + + // vtb[l/x]. + __ mov(r4, Operand(0x06040200)); + __ mov(r5, Operand(0xff050301)); + __ vmov(d2, r4, r5); // d2 = ff05030106040200 + __ vtbl(d0, NeonListOperand(d2, 1), d2); + __ vstr(d0, r0, offsetof(T, vtbl)); + __ vtbx(d2, NeonListOperand(d2, 1), d2); + __ vstr(d2, r0, offsetof(T, vtbx)); + + // Restore and return. + __ ldm(ia_w, sp, r4.bit() | r5.bit() | pc.bit()); CodeDesc desc; assm.GetCode(&desc); @@ -1324,8 +1831,13 @@ TEST(15) { t.dstA5 = 0; t.dstA6 = 0; t.dstA7 = 0; + t.lane_test[0] = 0x03020100; + t.lane_test[1] = 0x07060504; + t.lane_test[2] = 0x0b0a0908; + t.lane_test[3] = 0x0f0e0d0c; Object* dummy = CALL_GENERATED_CODE(isolate, f, &t, 0, 0, 0, 0); USE(dummy); + CHECK_EQ(0x01020304u, t.dst0); CHECK_EQ(0x11121314u, t.dst1); CHECK_EQ(0x21222324u, t.dst2); @@ -1342,10 +1854,118 @@ TEST(15) { CHECK_EQ(0x00410042u, t.dstA5); CHECK_EQ(0x00830084u, t.dstA6); CHECK_EQ(0x00810082u, t.dstA7); + + CHECK_EQ(0xfffffff8fff8f800u, t.vmov_to_scalar1); + CHECK_EQ(0xfff80000f8000000u, t.vmov_to_scalar2); + CHECK_EQ(0xFFFFFFFFu, t.vmov_from_scalar_s8); + CHECK_EQ(0xFFu, t.vmov_from_scalar_u8); + CHECK_EQ(0xFFFFFFFFu, t.vmov_from_scalar_s16); + CHECK_EQ(0xFFFFu, t.vmov_from_scalar_u16); + CHECK_EQ(0xFFFFFFFFu, t.vmov_from_scalar_32); + + CHECK_EQ_32X4(vmov, 0x03020100u, 0x07060504u, 0x0b0a0908u, 0x0f0e0d0cu); + CHECK_EQ_32X4(vmvn, 0xfcfdfeffu, 0xf8f9fafbu, 0xf4f5f6f7u, 0xf0f1f2f3u); + + CHECK_EQ_SPLAT(vdup8, 0x0a0a0a0au); + CHECK_EQ_SPLAT(vdup16, 0x000a000au); + CHECK_EQ_SPLAT(vdup32, 0x0000000au); + CHECK_EQ_SPLAT(vdupf, -1.0); + + // src: [-1, -1, 1, 1] + CHECK_EQ_32X4(vcvt_s32_f32, -1, -1, 1, 1); + CHECK_EQ_32X4(vcvt_u32_f32, 0u, 0u, 1u, 1u); + // src: [kMinInt, kMaxInt, kMaxUInt32, kMinInt + 1] + CHECK_EQ_32X4(vcvt_f32_s32, INT32_TO_FLOAT(kMinInt), + INT32_TO_FLOAT(kMaxInt), INT32_TO_FLOAT(kMaxUInt32), + INT32_TO_FLOAT(kMinInt + 1)); + CHECK_EQ_32X4(vcvt_f32_u32, UINT32_TO_FLOAT(kMinInt), + UINT32_TO_FLOAT(kMaxInt), UINT32_TO_FLOAT(kMaxUInt32), + UINT32_TO_FLOAT(kMinInt + 1)); + + CHECK_EQ_32X4(vabsf, 1.0, 0.0, 0.0, 1.0); + CHECK_EQ_32X4(vnegf, 1.0, 0.0, -0.0, -1.0); + // src: [0x7f7f7f7f, 0x01010101, 0xffffffff, 0x80808080] + CHECK_EQ_32X4(vabs_s8, 0x7f7f7f7fu, 0x01010101u, 0x01010101u, 0x80808080u); + CHECK_EQ_32X4(vabs_s16, 0x7f7f7f7fu, 0x01010101u, 0x00010001u, 0x7f807f80u); + CHECK_EQ_32X4(vabs_s32, 0x7f7f7f7fu, 0x01010101u, 0x00000001u, 0x7f7f7f80u); + CHECK_EQ_32X4(vneg_s8, 0x81818181u, 0xffffffffu, 0x01010101u, 0x80808080u); + CHECK_EQ_32X4(vneg_s16, 0x80818081u, 0xfefffeffu, 0x00010001u, 0x7f807f80u); + CHECK_EQ_32X4(vneg_s32, 0x80808081u, 0xfefefeffu, 0x00000001u, 0x7f7f7f80u); + + CHECK_EQ_SPLAT(veor, 0x00ff00ffu); + CHECK_EQ_SPLAT(vand, 0x00fe00feu); + CHECK_EQ_SPLAT(vorr, 0x00ff00ffu); + CHECK_EQ_SPLAT(vaddf, 2.0); + CHECK_EQ_SPLAT(vminf, 1.0); + CHECK_EQ_SPLAT(vmaxf, 2.0); + CHECK_EQ_SPLAT(vsubf, -1.0); + CHECK_EQ_SPLAT(vmulf, 4.0); + CHECK_ESTIMATE_SPLAT(vrecpe, 0.5f, 0.1f); // 1 / 2 + CHECK_EQ_SPLAT(vrecps, -1.0f); // 2 - (2 * 1.5) + CHECK_ESTIMATE_SPLAT(vrsqrte, 0.5f, 0.1f); // 1 / sqrt(4) + CHECK_EQ_SPLAT(vrsqrts, -1.0f); // (3 - (2 * 2.5)) / 2 + CHECK_EQ_SPLAT(vceqf, 0xffffffffu); + // [0] >= [-1, 1, -0, 0] + CHECK_EQ_32X4(vcgef, 0u, 0xffffffffu, 0xffffffffu, 0xffffffffu); + CHECK_EQ_32X4(vcgtf, 0u, 0xffffffffu, 0u, 0u); + // [0, 3, 0, 3, ...] and [3, 3, 3, 3, ...] + CHECK_EQ_SPLAT(vmin_s8, 0x00030003u); + CHECK_EQ_SPLAT(vmax_s8, 0x03030303u); + // [0x00ff, 0x00ff, ...] and [0xffff, 0xffff, ...] + CHECK_EQ_SPLAT(vmin_u16, 0x00ff00ffu); + CHECK_EQ_SPLAT(vmax_u16, 0xffffffffu); + // [0x000000ff, 0x000000ff, ...] and [0xffffffff, 0xffffffff, ...] + CHECK_EQ_SPLAT(vmin_s32, 0xffffffffu); + CHECK_EQ_SPLAT(vmax_s32, 0xffu); + CHECK_EQ_SPLAT(vadd8, 0x03030303u); + CHECK_EQ_SPLAT(vadd16, 0x00030003u); + CHECK_EQ_SPLAT(vadd32, 0x00000003u); + CHECK_EQ_SPLAT(vsub8, 0xfefefefeu); + CHECK_EQ_SPLAT(vsub16, 0xfffefffeu); + CHECK_EQ_SPLAT(vsub32, 0xfffffffeu); + CHECK_EQ_SPLAT(vmul8, 0x04040404u); + CHECK_EQ_SPLAT(vmul16, 0x00040004u); + CHECK_EQ_SPLAT(vmul32, 0x00000004u); + CHECK_EQ_SPLAT(vceq, 0x00ff00ffu); + // [0, 3, 0, 3, ...] >= [3, 3, 3, 3, ...] + CHECK_EQ_SPLAT(vcge_s8, 0x00ff00ffu); + CHECK_EQ_SPLAT(vcgt_s8, 0u); + // [0x00ff, 0x00ff, ...] >= [0xffff, 0xffff, ...] + CHECK_EQ_SPLAT(vcge_u16, 0u); + CHECK_EQ_SPLAT(vcgt_u16, 0u); + // [0x000000ff, 0x000000ff, ...] >= [0xffffffff, 0xffffffff, ...] + CHECK_EQ_SPLAT(vcge_s32, 0xffffffffu); + CHECK_EQ_SPLAT(vcgt_s32, 0xffffffffu); + CHECK_EQ_SPLAT(vtst, 0x00ff00ffu); + CHECK_EQ_SPLAT(vbsl, 0x02010201u); + + CHECK_EQ_32X4(vext, 0x06050403u, 0x0a090807u, 0x0e0d0c0bu, 0x0201000fu); + + CHECK_EQ_32X4(vzip8a, 0x01010000u, 0x03030202u, 0x05050404u, 0x07070606u); + CHECK_EQ_32X4(vzip8b, 0x09090808u, 0x0b0b0a0au, 0x0d0d0c0cu, 0x0f0f0e0eu); + CHECK_EQ_32X4(vzip16a, 0x01000100u, 0x03020302u, 0x05040504u, 0x07060706u); + CHECK_EQ_32X4(vzip16b, 0x09080908u, 0x0b0a0b0au, 0x0d0c0d0cu, 0x0f0e0f0eu); + CHECK_EQ_32X4(vzip32a, 0x03020100u, 0x03020100u, 0x07060504u, 0x07060504u); + CHECK_EQ_32X4(vzip32b, 0x0b0a0908u, 0x0b0a0908u, 0x0f0e0d0cu, 0x0f0e0d0cu); + + // src: 0 1 2 3 4 5 6 7 8 9 a b c d e f (little endian) + CHECK_EQ_32X4(vrev64_32, 0x07060504u, 0x03020100u, 0x0f0e0d0cu, + 0x0b0a0908u); + CHECK_EQ_32X4(vrev64_16, 0x05040706u, 0x01000302u, 0x0d0c0f0eu, + 0x09080b0au); + CHECK_EQ_32X4(vrev64_8, 0x04050607u, 0x00010203u, 0x0c0d0e0fu, 0x08090a0bu); + CHECK_EQ_32X4(vrev32_16, 0x01000302u, 0x05040706u, 0x09080b0au, + 0x0d0c0f0eu); + CHECK_EQ_32X4(vrev32_8, 0x00010203u, 0x04050607u, 0x08090a0bu, 0x0c0d0e0fu); + CHECK_EQ_32X4(vrev16_8, 0x02030001u, 0x06070405u, 0x0a0b0809u, 0x0e0f0c0du); + + CHECK_EQ(0x05010400u, t.vtbl[0]); + CHECK_EQ(0x00030602u, t.vtbl[1]); + CHECK_EQ(0x05010400u, t.vtbx[0]); + CHECK_EQ(0xff030602u, t.vtbx[1]); } } - TEST(16) { // Test the pkh, uxtb, uxtab and uxtb16 instructions. CcTest::InitializeVM(); @@ -2881,33 +3501,63 @@ TEST(unaligned_stores) { } TEST(vswp) { + if (!CpuFeatures::IsSupported(NEON)) return; + CcTest::InitializeVM(); Isolate* isolate = CcTest::i_isolate(); HandleScope scope(isolate); Assembler assm(isolate, NULL, 0); typedef struct { - double result0; - double result1; - double result2; - double result3; + uint64_t vswp_d0; + uint64_t vswp_d1; + uint64_t vswp_d30; + uint64_t vswp_d31; + uint32_t vswp_q4[4]; + uint32_t vswp_q5[4]; } T; T t; - __ vmov(d0, 1.0); - __ vmov(d1, -1.0); + __ stm(db_w, sp, r4.bit() | r5.bit() | r6.bit() | r7.bit() | lr.bit()); + + uint64_t one = bit_cast<uint64_t>(1.0); + __ mov(r5, Operand(one >> 32)); + __ mov(r4, Operand(one & 0xffffffff)); + uint64_t minus_one = bit_cast<uint64_t>(-1.0); + __ mov(r7, Operand(minus_one >> 32)); + __ mov(r6, Operand(minus_one & 0xffffffff)); + + __ vmov(d0, r4, r5); // d0 = 1.0 + __ vmov(d1, r6, r7); // d1 = -1.0 __ vswp(d0, d1); - __ vstr(d0, r0, offsetof(T, result0)); - __ vstr(d1, r0, offsetof(T, result1)); + __ vstr(d0, r0, offsetof(T, vswp_d0)); + __ vstr(d1, r0, offsetof(T, vswp_d1)); if (CpuFeatures::IsSupported(VFP32DREGS)) { - __ vmov(d30, 1.0); - __ vmov(d31, -1.0); + __ vmov(d30, r4, r5); // d30 = 1.0 + __ vmov(d31, r6, r7); // d31 = -1.0 __ vswp(d30, d31); - __ vstr(d30, r0, offsetof(T, result2)); - __ vstr(d31, r0, offsetof(T, result3)); + __ vstr(d30, r0, offsetof(T, vswp_d30)); + __ vstr(d31, r0, offsetof(T, vswp_d31)); } + // q-register swap. + const uint32_t test_1 = 0x01234567; + const uint32_t test_2 = 0x89abcdef; + __ mov(r4, Operand(test_1)); + __ mov(r5, Operand(test_2)); + // TODO(bbudge) replace with vdup when implemented. + __ vmov(d8, r4, r4); + __ vmov(d9, r4, r4); // q4 = [1.0, 1.0] + __ vmov(d10, r5, r5); + __ vmov(d11, r5, r5); // q5 = [-1.0, -1.0] + __ vswp(q4, q5); + __ add(r6, r0, Operand(static_cast<int32_t>(offsetof(T, vswp_q4)))); + __ vst1(Neon8, NeonListOperand(q4), NeonMemOperand(r6)); + __ add(r6, r0, Operand(static_cast<int32_t>(offsetof(T, vswp_q5)))); + __ vst1(Neon8, NeonListOperand(q5), NeonMemOperand(r6)); + + __ ldm(ia_w, sp, r4.bit() | r5.bit() | r6.bit() | r7.bit() | pc.bit()); __ bx(lr); CodeDesc desc; @@ -2921,12 +3571,20 @@ TEST(vswp) { F3 f = FUNCTION_CAST<F3>(code->entry()); Object* dummy = CALL_GENERATED_CODE(isolate, f, &t, 0, 0, 0, 0); USE(dummy); - CHECK_EQ(-1.0, t.result0); - CHECK_EQ(1.0, t.result1); + CHECK_EQ(minus_one, t.vswp_d0); + CHECK_EQ(one, t.vswp_d1); if (CpuFeatures::IsSupported(VFP32DREGS)) { - CHECK_EQ(-1.0, t.result2); - CHECK_EQ(1.0, t.result3); + CHECK_EQ(minus_one, t.vswp_d30); + CHECK_EQ(one, t.vswp_d31); } + CHECK_EQ(t.vswp_q4[0], test_2); + CHECK_EQ(t.vswp_q4[1], test_2); + CHECK_EQ(t.vswp_q4[2], test_2); + CHECK_EQ(t.vswp_q4[3], test_2); + CHECK_EQ(t.vswp_q5[0], test_1); + CHECK_EQ(t.vswp_q5[1], test_1); + CHECK_EQ(t.vswp_q5[2], test_1); + CHECK_EQ(t.vswp_q5[3], test_1); } TEST(regress4292_b) { |