diff options
Diffstat (limited to 'deps/openssl/config/archs/darwin64-x86_64-cc/asm_avx2/crypto/ec/ecp_nistz256-x86_64.s')
-rw-r--r-- | deps/openssl/config/archs/darwin64-x86_64-cc/asm_avx2/crypto/ec/ecp_nistz256-x86_64.s | 36 |
1 files changed, 36 insertions, 0 deletions
diff --git a/deps/openssl/config/archs/darwin64-x86_64-cc/asm_avx2/crypto/ec/ecp_nistz256-x86_64.s b/deps/openssl/config/archs/darwin64-x86_64-cc/asm_avx2/crypto/ec/ecp_nistz256-x86_64.s index 302649aacc..ff7ae72344 100644 --- a/deps/openssl/config/archs/darwin64-x86_64-cc/asm_avx2/crypto/ec/ecp_nistz256-x86_64.s +++ b/deps/openssl/config/archs/darwin64-x86_64-cc/asm_avx2/crypto/ec/ecp_nistz256-x86_64.s @@ -3919,6 +3919,7 @@ L$mul_epilogue: __ecp_nistz256_mul_montq: + movq %rax,%rbp mulq %r9 movq L$poly+8(%rip),%r14 @@ -4137,6 +4138,7 @@ __ecp_nistz256_mul_montq: + .globl _ecp_nistz256_sqr_mont .p2align 5 @@ -4199,6 +4201,7 @@ L$sqr_epilogue: .p2align 5 __ecp_nistz256_sqr_montq: + movq %rax,%r13 mulq %r14 movq %rax,%r9 @@ -4358,10 +4361,12 @@ __ecp_nistz256_sqr_montq: .byte 0xf3,0xc3 + .p2align 5 __ecp_nistz256_mul_montx: + mulxq %r9,%r8,%r9 mulxq %r10,%rcx,%r10 movq $32,%r14 @@ -4525,8 +4530,10 @@ __ecp_nistz256_mul_montx: + .p2align 5 __ecp_nistz256_sqr_montx: + mulxq %r14,%r9,%r10 mulxq %r15,%rcx,%r11 xorl %eax,%eax @@ -4657,6 +4664,7 @@ __ecp_nistz256_sqr_montx: + .globl _ecp_nistz256_from_mont .p2align 5 @@ -4787,6 +4795,7 @@ _ecp_nistz256_scatter_w5: .p2align 5 _ecp_nistz256_gather_w5: + movl _OPENSSL_ia32cap_P+8(%rip),%eax testl $32,%eax jnz L$avx2_gather_w5 @@ -4841,6 +4850,7 @@ L$select_loop_sse_w5: movdqu %xmm6,64(%rdi) movdqu %xmm7,80(%rdi) .byte 0xf3,0xc3 + L$SEH_end_ecp_nistz256_gather_w5: @@ -4869,6 +4879,7 @@ _ecp_nistz256_scatter_w7: .p2align 5 _ecp_nistz256_gather_w7: + movl _OPENSSL_ia32cap_P+8(%rip),%eax testl $32,%eax jnz L$avx2_gather_w7 @@ -4912,6 +4923,7 @@ L$select_loop_sse_w7: movdqu %xmm4,32(%rdi) movdqu %xmm5,48(%rdi) .byte 0xf3,0xc3 + L$SEH_end_ecp_nistz256_gather_w7: @@ -4919,6 +4931,7 @@ L$SEH_end_ecp_nistz256_gather_w7: .p2align 5 ecp_nistz256_avx2_gather_w5: + L$avx2_gather_w5: vzeroupper vmovdqa L$Two(%rip),%ymm0 @@ -4973,6 +4986,7 @@ L$select_loop_avx2_w5: vmovdqu %ymm4,64(%rdi) vzeroupper .byte 0xf3,0xc3 + L$SEH_end_ecp_nistz256_avx2_gather_w5: @@ -4982,6 +4996,7 @@ L$SEH_end_ecp_nistz256_avx2_gather_w5: .p2align 5 _ecp_nistz256_avx2_gather_w7: + L$avx2_gather_w7: vzeroupper vmovdqa L$Three(%rip),%ymm0 @@ -5051,11 +5066,13 @@ L$select_loop_avx2_w7: vmovdqu %ymm3,32(%rdi) vzeroupper .byte 0xf3,0xc3 + L$SEH_end_ecp_nistz256_avx2_gather_w7: .p2align 5 __ecp_nistz256_add_toq: + xorq %r11,%r11 addq 0(%rbx),%r12 adcq 8(%rbx),%r13 @@ -5086,8 +5103,10 @@ __ecp_nistz256_add_toq: + .p2align 5 __ecp_nistz256_sub_fromq: + subq 0(%rbx),%r12 sbbq 8(%rbx),%r13 movq %r12,%rax @@ -5117,8 +5136,10 @@ __ecp_nistz256_sub_fromq: + .p2align 5 __ecp_nistz256_subq: + subq %r12,%rax sbbq %r13,%rbp movq %rax,%r12 @@ -5144,8 +5165,10 @@ __ecp_nistz256_subq: + .p2align 5 __ecp_nistz256_mul_by_2q: + xorq %r11,%r11 addq %r12,%r12 adcq %r13,%r13 @@ -5174,6 +5197,7 @@ __ecp_nistz256_mul_by_2q: .byte 0xf3,0xc3 + .globl _ecp_nistz256_point_double .p2align 5 @@ -5593,8 +5617,10 @@ L$add_doubleq: .byte 102,72,15,126,206 .byte 102,72,15,126,199 addq $416,%rsp + jmp L$point_double_shortcutq + .p2align 5 L$add_proceedq: movq 0+64(%rsp),%rax @@ -6149,6 +6175,7 @@ L$add_affineq_epilogue: .p2align 5 __ecp_nistz256_add_tox: + xorq %r11,%r11 adcq 0(%rbx),%r12 adcq 8(%rbx),%r13 @@ -6180,8 +6207,10 @@ __ecp_nistz256_add_tox: + .p2align 5 __ecp_nistz256_sub_fromx: + xorq %r11,%r11 sbbq 0(%rbx),%r12 sbbq 8(%rbx),%r13 @@ -6213,8 +6242,10 @@ __ecp_nistz256_sub_fromx: + .p2align 5 __ecp_nistz256_subx: + xorq %r11,%r11 sbbq %r12,%rax sbbq %r13,%rbp @@ -6242,8 +6273,10 @@ __ecp_nistz256_subx: + .p2align 5 __ecp_nistz256_mul_by_2x: + xorq %r11,%r11 adcq %r12,%r12 adcq %r13,%r13 @@ -6274,6 +6307,7 @@ __ecp_nistz256_mul_by_2x: .byte 0xf3,0xc3 + .p2align 5 ecp_nistz256_point_doublex: @@ -6684,8 +6718,10 @@ L$add_doublex: .byte 102,72,15,126,206 .byte 102,72,15,126,199 addq $416,%rsp + jmp L$point_double_shortcutx + .p2align 5 L$add_proceedx: movq 0+64(%rsp),%rdx |