diff options
author | Sam Roberts <vieuxtech@gmail.com> | 2018-11-22 11:47:07 -0800 |
---|---|---|
committer | Sam Roberts <vieuxtech@gmail.com> | 2019-01-22 13:33:54 -0800 |
commit | 807ed7883a12423270450776f015a7c2348c0913 (patch) | |
tree | 00ec21dd290b29c782680ffc2f97e6d59fd2ab2f /deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/ec/ecp_nistz256-x86_64.s | |
parent | 57119fbdb200702d6e2cf23428de4c458ae86bbc (diff) | |
download | android-node-v8-807ed7883a12423270450776f015a7c2348c0913.tar.gz android-node-v8-807ed7883a12423270450776f015a7c2348c0913.tar.bz2 android-node-v8-807ed7883a12423270450776f015a7c2348c0913.zip |
deps: update archs files for OpenSSL-1.1.1a
`cd deps/openssl/config; make` updates all archs dependant files.
PR-URL: https://github.com/nodejs/node/pull/25381
Reviewed-By: Daniel Bevenius <daniel.bevenius@gmail.com>
Reviewed-By: Shigeki Ohtsu <ohtsu@ohtsu.org>
Diffstat (limited to 'deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/ec/ecp_nistz256-x86_64.s')
-rw-r--r-- | deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/ec/ecp_nistz256-x86_64.s | 1437 |
1 files changed, 1369 insertions, 68 deletions
diff --git a/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/ec/ecp_nistz256-x86_64.s b/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/ec/ecp_nistz256-x86_64.s index 77102c6a41..302649aacc 100644 --- a/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/ec/ecp_nistz256-x86_64.s +++ b/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/ec/ecp_nistz256-x86_64.s @@ -2393,13 +2393,23 @@ L$Three: L$ONE_mont: .quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe + +L$ord: +.quad 0xf3b9cac2fc632551, 0xbce6faada7179e84, 0xffffffffffffffff, 0xffffffff00000000 +L$ordK: +.quad 0xccd1c8aaee00bc4f + .globl _ecp_nistz256_mul_by_2 .p2align 6 _ecp_nistz256_mul_by_2: + pushq %r12 + pushq %r13 +L$mul_by_2_body: + movq 0(%rsi),%r8 xorq %r13,%r13 movq 8(%rsi),%r9 @@ -2431,20 +2441,30 @@ _ecp_nistz256_mul_by_2: movq %r10,16(%rdi) movq %r11,24(%rdi) - popq %r13 - popq %r12 + movq 0(%rsp),%r13 + + movq 8(%rsp),%r12 + + leaq 16(%rsp),%rsp + +L$mul_by_2_epilogue: .byte 0xf3,0xc3 + .globl _ecp_nistz256_div_by_2 .p2align 5 _ecp_nistz256_div_by_2: + pushq %r12 + pushq %r13 +L$div_by_2_body: + movq 0(%rsi),%r8 movq 8(%rsi),%r9 movq 16(%rsi),%r10 @@ -2491,20 +2511,30 @@ _ecp_nistz256_div_by_2: movq %r10,16(%rdi) movq %r11,24(%rdi) - popq %r13 - popq %r12 + movq 0(%rsp),%r13 + + movq 8(%rsp),%r12 + + leaq 16(%rsp),%rsp + +L$div_by_2_epilogue: .byte 0xf3,0xc3 + .globl _ecp_nistz256_mul_by_3 .p2align 5 _ecp_nistz256_mul_by_3: + pushq %r12 + pushq %r13 +L$mul_by_3_body: + movq 0(%rsi),%r8 xorq %r13,%r13 movq 8(%rsi),%r9 @@ -2557,20 +2587,30 @@ _ecp_nistz256_mul_by_3: movq %r10,16(%rdi) movq %r11,24(%rdi) - popq %r13 - popq %r12 + movq 0(%rsp),%r13 + + movq 8(%rsp),%r12 + + leaq 16(%rsp),%rsp + +L$mul_by_3_epilogue: .byte 0xf3,0xc3 + .globl _ecp_nistz256_add .p2align 5 _ecp_nistz256_add: + pushq %r12 + pushq %r13 +L$add_body: + movq 0(%rsi),%r8 xorq %r13,%r13 movq 8(%rsi),%r9 @@ -2603,20 +2643,30 @@ _ecp_nistz256_add: movq %r10,16(%rdi) movq %r11,24(%rdi) - popq %r13 - popq %r12 + movq 0(%rsp),%r13 + + movq 8(%rsp),%r12 + + leaq 16(%rsp),%rsp + +L$add_epilogue: .byte 0xf3,0xc3 + .globl _ecp_nistz256_sub .p2align 5 _ecp_nistz256_sub: + pushq %r12 + pushq %r13 +L$sub_body: + movq 0(%rsi),%r8 xorq %r13,%r13 movq 8(%rsi),%r9 @@ -2649,20 +2699,30 @@ _ecp_nistz256_sub: movq %r10,16(%rdi) movq %r11,24(%rdi) - popq %r13 - popq %r12 + movq 0(%rsp),%r13 + + movq 8(%rsp),%r12 + + leaq 16(%rsp),%rsp + +L$sub_epilogue: .byte 0xf3,0xc3 + .globl _ecp_nistz256_neg .p2align 5 _ecp_nistz256_neg: + pushq %r12 + pushq %r13 +L$neg_body: + xorq %r8,%r8 xorq %r9,%r9 xorq %r10,%r10 @@ -2695,14 +2755,1085 @@ _ecp_nistz256_neg: movq %r10,16(%rdi) movq %r11,24(%rdi) - popq %r13 - popq %r12 + movq 0(%rsp),%r13 + + movq 8(%rsp),%r12 + + leaq 16(%rsp),%rsp + +L$neg_epilogue: .byte 0xf3,0xc3 + + + +.globl _ecp_nistz256_ord_mul_mont + +.p2align 5 +_ecp_nistz256_ord_mul_mont: + + movl $0x80100,%ecx + andl _OPENSSL_ia32cap_P+8(%rip),%ecx + cmpl $0x80100,%ecx + je L$ecp_nistz256_ord_mul_montx + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + +L$ord_mul_body: + + movq 0(%rdx),%rax + movq %rdx,%rbx + leaq L$ord(%rip),%r14 + movq L$ordK(%rip),%r15 + + + movq %rax,%rcx + mulq 0(%rsi) + movq %rax,%r8 + movq %rcx,%rax + movq %rdx,%r9 + + mulq 8(%rsi) + addq %rax,%r9 + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq 16(%rsi) + addq %rax,%r10 + movq %rcx,%rax + adcq $0,%rdx + + movq %r8,%r13 + imulq %r15,%r8 + + movq %rdx,%r11 + mulq 24(%rsi) + addq %rax,%r11 + movq %r8,%rax + adcq $0,%rdx + movq %rdx,%r12 + + + mulq 0(%r14) + movq %r8,%rbp + addq %rax,%r13 + movq %r8,%rax + adcq $0,%rdx + movq %rdx,%rcx + + subq %r8,%r10 + sbbq $0,%r8 + + mulq 8(%r14) + addq %rcx,%r9 + adcq $0,%rdx + addq %rax,%r9 + movq %rbp,%rax + adcq %rdx,%r10 + movq %rbp,%rdx + adcq $0,%r8 + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r11 + movq 8(%rbx),%rax + sbbq %rdx,%rbp + + addq %r8,%r11 + adcq %rbp,%r12 + adcq $0,%r13 + + + movq %rax,%rcx + mulq 0(%rsi) + addq %rax,%r9 + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 8(%rsi) + addq %rbp,%r10 + adcq $0,%rdx + addq %rax,%r10 + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rsi) + addq %rbp,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %rcx,%rax + adcq $0,%rdx + + movq %r9,%rcx + imulq %r15,%r9 + + movq %rdx,%rbp + mulq 24(%rsi) + addq %rbp,%r12 + adcq $0,%rdx + xorq %r8,%r8 + addq %rax,%r12 + movq %r9,%rax + adcq %rdx,%r13 + adcq $0,%r8 + + + mulq 0(%r14) + movq %r9,%rbp + addq %rax,%rcx + movq %r9,%rax + adcq %rdx,%rcx + + subq %r9,%r11 + sbbq $0,%r9 + + mulq 8(%r14) + addq %rcx,%r10 + adcq $0,%rdx + addq %rax,%r10 + movq %rbp,%rax + adcq %rdx,%r11 + movq %rbp,%rdx + adcq $0,%r9 + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r12 + movq 16(%rbx),%rax + sbbq %rdx,%rbp + + addq %r9,%r12 + adcq %rbp,%r13 + adcq $0,%r8 + + + movq %rax,%rcx + mulq 0(%rsi) + addq %rax,%r10 + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 8(%rsi) + addq %rbp,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rsi) + addq %rbp,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %rcx,%rax + adcq $0,%rdx + + movq %r10,%rcx + imulq %r15,%r10 + + movq %rdx,%rbp + mulq 24(%rsi) + addq %rbp,%r13 + adcq $0,%rdx + xorq %r9,%r9 + addq %rax,%r13 + movq %r10,%rax + adcq %rdx,%r8 + adcq $0,%r9 + + + mulq 0(%r14) + movq %r10,%rbp + addq %rax,%rcx + movq %r10,%rax + adcq %rdx,%rcx + + subq %r10,%r12 + sbbq $0,%r10 + + mulq 8(%r14) + addq %rcx,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %rbp,%rax + adcq %rdx,%r12 + movq %rbp,%rdx + adcq $0,%r10 + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r13 + movq 24(%rbx),%rax + sbbq %rdx,%rbp + + addq %r10,%r13 + adcq %rbp,%r8 + adcq $0,%r9 + + + movq %rax,%rcx + mulq 0(%rsi) + addq %rax,%r11 + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 8(%rsi) + addq %rbp,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rsi) + addq %rbp,%r13 + adcq $0,%rdx + addq %rax,%r13 + movq %rcx,%rax + adcq $0,%rdx + + movq %r11,%rcx + imulq %r15,%r11 + + movq %rdx,%rbp + mulq 24(%rsi) + addq %rbp,%r8 + adcq $0,%rdx + xorq %r10,%r10 + addq %rax,%r8 + movq %r11,%rax + adcq %rdx,%r9 + adcq $0,%r10 + + + mulq 0(%r14) + movq %r11,%rbp + addq %rax,%rcx + movq %r11,%rax + adcq %rdx,%rcx + + subq %r11,%r13 + sbbq $0,%r11 + + mulq 8(%r14) + addq %rcx,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %rbp,%rax + adcq %rdx,%r13 + movq %rbp,%rdx + adcq $0,%r11 + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r8 + sbbq %rdx,%rbp + + addq %r11,%r8 + adcq %rbp,%r9 + adcq $0,%r10 + + + movq %r12,%rsi + subq 0(%r14),%r12 + movq %r13,%r11 + sbbq 8(%r14),%r13 + movq %r8,%rcx + sbbq 16(%r14),%r8 + movq %r9,%rbp + sbbq 24(%r14),%r9 + sbbq $0,%r10 + + cmovcq %rsi,%r12 + cmovcq %r11,%r13 + cmovcq %rcx,%r8 + cmovcq %rbp,%r9 + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + movq 0(%rsp),%r15 + + movq 8(%rsp),%r14 + + movq 16(%rsp),%r13 + + movq 24(%rsp),%r12 + + movq 32(%rsp),%rbx + + movq 40(%rsp),%rbp + + leaq 48(%rsp),%rsp + +L$ord_mul_epilogue: + .byte 0xf3,0xc3 + + + + + + + + + +.globl _ecp_nistz256_ord_sqr_mont + +.p2align 5 +_ecp_nistz256_ord_sqr_mont: + + movl $0x80100,%ecx + andl _OPENSSL_ia32cap_P+8(%rip),%ecx + cmpl $0x80100,%ecx + je L$ecp_nistz256_ord_sqr_montx + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + +L$ord_sqr_body: + + movq 0(%rsi),%r8 + movq 8(%rsi),%rax + movq 16(%rsi),%r14 + movq 24(%rsi),%r15 + leaq L$ord(%rip),%rsi + movq %rdx,%rbx + jmp L$oop_ord_sqr + +.p2align 5 +L$oop_ord_sqr: + + movq %rax,%rbp + mulq %r8 + movq %rax,%r9 +.byte 102,72,15,110,205 + movq %r14,%rax + movq %rdx,%r10 + + mulq %r8 + addq %rax,%r10 + movq %r15,%rax +.byte 102,73,15,110,214 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %r8 + addq %rax,%r11 + movq %r15,%rax +.byte 102,73,15,110,223 + adcq $0,%rdx + movq %rdx,%r12 + + + mulq %r14 + movq %rax,%r13 + movq %r14,%rax + movq %rdx,%r14 + + + mulq %rbp + addq %rax,%r11 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%r15 + + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + + addq %r15,%r12 + adcq %rdx,%r13 + adcq $0,%r14 + + + xorq %r15,%r15 + movq %r8,%rax + addq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq %r12,%r12 + adcq %r13,%r13 + adcq %r14,%r14 + adcq $0,%r15 + + + mulq %rax + movq %rax,%r8 +.byte 102,72,15,126,200 + movq %rdx,%rbp + + mulq %rax + addq %rbp,%r9 + adcq %rax,%r10 +.byte 102,72,15,126,208 + adcq $0,%rdx + movq %rdx,%rbp + + mulq %rax + addq %rbp,%r11 + adcq %rax,%r12 +.byte 102,72,15,126,216 + adcq $0,%rdx + movq %rdx,%rbp + + movq %r8,%rcx + imulq 32(%rsi),%r8 + + mulq %rax + addq %rbp,%r13 + adcq %rax,%r14 + movq 0(%rsi),%rax + adcq %rdx,%r15 + + + mulq %r8 + movq %r8,%rbp + addq %rax,%rcx + movq 8(%rsi),%rax + adcq %rdx,%rcx + + subq %r8,%r10 + sbbq $0,%rbp + + mulq %r8 + addq %rcx,%r9 + adcq $0,%rdx + addq %rax,%r9 + movq %r8,%rax + adcq %rdx,%r10 + movq %r8,%rdx + adcq $0,%rbp + + movq %r9,%rcx + imulq 32(%rsi),%r9 + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r11 + movq 0(%rsi),%rax + sbbq %rdx,%r8 + + addq %rbp,%r11 + adcq $0,%r8 + + + mulq %r9 + movq %r9,%rbp + addq %rax,%rcx + movq 8(%rsi),%rax + adcq %rdx,%rcx + + subq %r9,%r11 + sbbq $0,%rbp + + mulq %r9 + addq %rcx,%r10 + adcq $0,%rdx + addq %rax,%r10 + movq %r9,%rax + adcq %rdx,%r11 + movq %r9,%rdx + adcq $0,%rbp + + movq %r10,%rcx + imulq 32(%rsi),%r10 + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r8 + movq 0(%rsi),%rax + sbbq %rdx,%r9 + + addq %rbp,%r8 + adcq $0,%r9 + + + mulq %r10 + movq %r10,%rbp + addq %rax,%rcx + movq 8(%rsi),%rax + adcq %rdx,%rcx + + subq %r10,%r8 + sbbq $0,%rbp + + mulq %r10 + addq %rcx,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %r10,%rax + adcq %rdx,%r8 + movq %r10,%rdx + adcq $0,%rbp + + movq %r11,%rcx + imulq 32(%rsi),%r11 + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r9 + movq 0(%rsi),%rax + sbbq %rdx,%r10 + + addq %rbp,%r9 + adcq $0,%r10 + + + mulq %r11 + movq %r11,%rbp + addq %rax,%rcx + movq 8(%rsi),%rax + adcq %rdx,%rcx + + subq %r11,%r9 + sbbq $0,%rbp + + mulq %r11 + addq %rcx,%r8 + adcq $0,%rdx + addq %rax,%r8 + movq %r11,%rax + adcq %rdx,%r9 + movq %r11,%rdx + adcq $0,%rbp + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r10 + sbbq %rdx,%r11 + + addq %rbp,%r10 + adcq $0,%r11 + + + xorq %rdx,%rdx + addq %r12,%r8 + adcq %r13,%r9 + movq %r8,%r12 + adcq %r14,%r10 + adcq %r15,%r11 + movq %r9,%rax + adcq $0,%rdx + + + subq 0(%rsi),%r8 + movq %r10,%r14 + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + movq %r11,%r15 + sbbq 24(%rsi),%r11 + sbbq $0,%rdx + + cmovcq %r12,%r8 + cmovncq %r9,%rax + cmovncq %r10,%r14 + cmovncq %r11,%r15 + + decq %rbx + jnz L$oop_ord_sqr + + movq %r8,0(%rdi) + movq %rax,8(%rdi) + pxor %xmm1,%xmm1 + movq %r14,16(%rdi) + pxor %xmm2,%xmm2 + movq %r15,24(%rdi) + pxor %xmm3,%xmm3 + + movq 0(%rsp),%r15 + + movq 8(%rsp),%r14 + + movq 16(%rsp),%r13 + + movq 24(%rsp),%r12 + + movq 32(%rsp),%rbx + + movq 40(%rsp),%rbp + + leaq 48(%rsp),%rsp + +L$ord_sqr_epilogue: + .byte 0xf3,0xc3 + + + + +.p2align 5 +ecp_nistz256_ord_mul_montx: + +L$ecp_nistz256_ord_mul_montx: + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + +L$ord_mulx_body: + + movq %rdx,%rbx + movq 0(%rdx),%rdx + movq 0(%rsi),%r9 + movq 8(%rsi),%r10 + movq 16(%rsi),%r11 + movq 24(%rsi),%r12 + leaq -128(%rsi),%rsi + leaq L$ord-128(%rip),%r14 + movq L$ordK(%rip),%r15 + + + mulxq %r9,%r8,%r9 + mulxq %r10,%rcx,%r10 + mulxq %r11,%rbp,%r11 + addq %rcx,%r9 + mulxq %r12,%rcx,%r12 + movq %r8,%rdx + mulxq %r15,%rdx,%rax + adcq %rbp,%r10 + adcq %rcx,%r11 + adcq $0,%r12 + + + xorq %r13,%r13 + mulxq 0+128(%r14),%rcx,%rbp + adcxq %rcx,%r8 + adoxq %rbp,%r9 + + mulxq 8+128(%r14),%rcx,%rbp + adcxq %rcx,%r9 + adoxq %rbp,%r10 + + mulxq 16+128(%r14),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 24+128(%r14),%rcx,%rbp + movq 8(%rbx),%rdx + adcxq %rcx,%r11 + adoxq %rbp,%r12 + adcxq %r8,%r12 + adoxq %r8,%r13 + adcq $0,%r13 + + + mulxq 0+128(%rsi),%rcx,%rbp + adcxq %rcx,%r9 + adoxq %rbp,%r10 + + mulxq 8+128(%rsi),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 16+128(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 24+128(%rsi),%rcx,%rbp + movq %r9,%rdx + mulxq %r15,%rdx,%rax + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + adcxq %r8,%r13 + adoxq %r8,%r8 + adcq $0,%r8 + + + mulxq 0+128(%r14),%rcx,%rbp + adcxq %rcx,%r9 + adoxq %rbp,%r10 + + mulxq 8+128(%r14),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 16+128(%r14),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 24+128(%r14),%rcx,%rbp + movq 16(%rbx),%rdx + adcxq %rcx,%r12 + adoxq %rbp,%r13 + adcxq %r9,%r13 + adoxq %r9,%r8 + adcq $0,%r8 + + + mulxq 0+128(%rsi),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 8+128(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 16+128(%rsi),%rcx,%rbp + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + mulxq 24+128(%rsi),%rcx,%rbp + movq %r10,%rdx + mulxq %r15,%rdx,%rax + adcxq %rcx,%r13 + adoxq %rbp,%r8 + + adcxq %r9,%r8 + adoxq %r9,%r9 + adcq $0,%r9 + + + mulxq 0+128(%r14),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 8+128(%r14),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 16+128(%r14),%rcx,%rbp + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + mulxq 24+128(%r14),%rcx,%rbp + movq 24(%rbx),%rdx + adcxq %rcx,%r13 + adoxq %rbp,%r8 + adcxq %r10,%r8 + adoxq %r10,%r9 + adcq $0,%r9 + + + mulxq 0+128(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 8+128(%rsi),%rcx,%rbp + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + mulxq 16+128(%rsi),%rcx,%rbp + adcxq %rcx,%r13 + adoxq %rbp,%r8 + + mulxq 24+128(%rsi),%rcx,%rbp + movq %r11,%rdx + mulxq %r15,%rdx,%rax + adcxq %rcx,%r8 + adoxq %rbp,%r9 + + adcxq %r10,%r9 + adoxq %r10,%r10 + adcq $0,%r10 + + + mulxq 0+128(%r14),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 8+128(%r14),%rcx,%rbp + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + mulxq 16+128(%r14),%rcx,%rbp + adcxq %rcx,%r13 + adoxq %rbp,%r8 + + mulxq 24+128(%r14),%rcx,%rbp + leaq 128(%r14),%r14 + movq %r12,%rbx + adcxq %rcx,%r8 + adoxq %rbp,%r9 + movq %r13,%rdx + adcxq %r11,%r9 + adoxq %r11,%r10 + adcq $0,%r10 + + + + movq %r8,%rcx + subq 0(%r14),%r12 + sbbq 8(%r14),%r13 + sbbq 16(%r14),%r8 + movq %r9,%rbp + sbbq 24(%r14),%r9 + sbbq $0,%r10 + + cmovcq %rbx,%r12 + cmovcq %rdx,%r13 + cmovcq %rcx,%r8 + cmovcq %rbp,%r9 + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + movq 0(%rsp),%r15 + + movq 8(%rsp),%r14 + + movq 16(%rsp),%r13 + + movq 24(%rsp),%r12 + + movq 32(%rsp),%rbx + + movq 40(%rsp),%rbp + + leaq 48(%rsp),%rsp + +L$ord_mulx_epilogue: + .byte 0xf3,0xc3 + + + + +.p2align 5 +ecp_nistz256_ord_sqr_montx: + +L$ecp_nistz256_ord_sqr_montx: + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + +L$ord_sqrx_body: + + movq %rdx,%rbx + movq 0(%rsi),%rdx + movq 8(%rsi),%r14 + movq 16(%rsi),%r15 + movq 24(%rsi),%r8 + leaq L$ord(%rip),%rsi + jmp L$oop_ord_sqrx + +.p2align 5 +L$oop_ord_sqrx: + mulxq %r14,%r9,%r10 + mulxq %r15,%rcx,%r11 + movq %rdx,%rax +.byte 102,73,15,110,206 + mulxq %r8,%rbp,%r12 + movq %r14,%rdx + addq %rcx,%r10 +.byte 102,73,15,110,215 + adcq %rbp,%r11 + adcq $0,%r12 + xorq %r13,%r13 + + mulxq %r15,%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq %r8,%rcx,%rbp + movq %r15,%rdx + adcxq %rcx,%r12 + adoxq %rbp,%r13 + adcq $0,%r13 + + mulxq %r8,%rcx,%r14 + movq %rax,%rdx +.byte 102,73,15,110,216 + xorq %r15,%r15 + adcxq %r9,%r9 + adoxq %rcx,%r13 + adcxq %r10,%r10 + adoxq %r15,%r14 + + + mulxq %rdx,%r8,%rbp +.byte 102,72,15,126,202 + adcxq %r11,%r11 + adoxq %rbp,%r9 + adcxq %r12,%r12 + mulxq %rdx,%rcx,%rax +.byte 102,72,15,126,210 + adcxq %r13,%r13 + adoxq %rcx,%r10 + adcxq %r14,%r14 + mulxq %rdx,%rcx,%rbp +.byte 0x67 +.byte 102,72,15,126,218 + adoxq %rax,%r11 + adcxq %r15,%r15 + adoxq %rcx,%r12 + adoxq %rbp,%r13 + mulxq %rdx,%rcx,%rax + adoxq %rcx,%r14 + adoxq %rax,%r15 + + + movq %r8,%rdx + mulxq 32(%rsi),%rdx,%rcx + + xorq %rax,%rax + mulxq 0(%rsi),%rcx,%rbp + adcxq %rcx,%r8 + adoxq %rbp,%r9 + mulxq 8(%rsi),%rcx,%rbp + adcxq %rcx,%r9 + adoxq %rbp,%r10 + mulxq 16(%rsi),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + mulxq 24(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r8 + adcxq %rax,%r8 + + + movq %r9,%rdx + mulxq 32(%rsi),%rdx,%rcx + + mulxq 0(%rsi),%rcx,%rbp + adoxq %rcx,%r9 + adcxq %rbp,%r10 + mulxq 8(%rsi),%rcx,%rbp + adoxq %rcx,%r10 + adcxq %rbp,%r11 + mulxq 16(%rsi),%rcx,%rbp + adoxq %rcx,%r11 + adcxq %rbp,%r8 + mulxq 24(%rsi),%rcx,%rbp + adoxq %rcx,%r8 + adcxq %rbp,%r9 + adoxq %rax,%r9 + + + movq %r10,%rdx + mulxq 32(%rsi),%rdx,%rcx + + mulxq 0(%rsi),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + mulxq 8(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r8 + mulxq 16(%rsi),%rcx,%rbp + adcxq %rcx,%r8 + adoxq %rbp,%r9 + mulxq 24(%rsi),%rcx,%rbp + adcxq %rcx,%r9 + adoxq %rbp,%r10 + adcxq %rax,%r10 + + + movq %r11,%rdx + mulxq 32(%rsi),%rdx,%rcx + + mulxq 0(%rsi),%rcx,%rbp + adoxq %rcx,%r11 + adcxq %rbp,%r8 + mulxq 8(%rsi),%rcx,%rbp + adoxq %rcx,%r8 + adcxq %rbp,%r9 + mulxq 16(%rsi),%rcx,%rbp + adoxq %rcx,%r9 + adcxq %rbp,%r10 + mulxq 24(%rsi),%rcx,%rbp + adoxq %rcx,%r10 + adcxq %rbp,%r11 + adoxq %rax,%r11 + + + addq %r8,%r12 + adcq %r13,%r9 + movq %r12,%rdx + adcq %r14,%r10 + adcq %r15,%r11 + movq %r9,%r14 + adcq $0,%rax + + + subq 0(%rsi),%r12 + movq %r10,%r15 + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + movq %r11,%r8 + sbbq 24(%rsi),%r11 + sbbq $0,%rax + + cmovncq %r12,%rdx + cmovncq %r9,%r14 + cmovncq %r10,%r15 + cmovncq %r11,%r8 + + decq %rbx + jnz L$oop_ord_sqrx + + movq %rdx,0(%rdi) + movq %r14,8(%rdi) + pxor %xmm1,%xmm1 + movq %r15,16(%rdi) + pxor %xmm2,%xmm2 + movq %r8,24(%rdi) + pxor %xmm3,%xmm3 + + movq 0(%rsp),%r15 + + movq 8(%rsp),%r14 + + movq 16(%rsp),%r13 + + movq 24(%rsp),%r12 + + movq 32(%rsp),%rbx + + movq 40(%rsp),%rbp + + leaq 48(%rsp),%rsp + +L$ord_sqrx_epilogue: + .byte 0xf3,0xc3 + + + + + + .globl _ecp_nistz256_to_mont .p2align 5 @@ -2723,15 +3854,23 @@ _ecp_nistz256_to_mont: .p2align 5 _ecp_nistz256_mul_mont: + movl $0x80100,%ecx andl _OPENSSL_ia32cap_P+8(%rip),%ecx L$mul_mont: pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + +L$mul_body: cmpl $0x80100,%ecx je L$mul_montx movq %rdx,%rbx @@ -2756,16 +3895,26 @@ L$mul_montx: call __ecp_nistz256_mul_montx L$mul_mont_done: - popq %r15 - popq %r14 - popq %r13 - popq %r12 - popq %rbx - popq %rbp + movq 0(%rsp),%r15 + + movq 8(%rsp),%r14 + + movq 16(%rsp),%r13 + + movq 24(%rsp),%r12 + + movq 32(%rsp),%rbx + + movq 40(%rsp),%rbp + + leaq 48(%rsp),%rsp + +L$mul_epilogue: .byte 0xf3,0xc3 + .p2align 5 __ecp_nistz256_mul_montq: @@ -2992,14 +4141,22 @@ __ecp_nistz256_mul_montq: .p2align 5 _ecp_nistz256_sqr_mont: + movl $0x80100,%ecx andl _OPENSSL_ia32cap_P+8(%rip),%ecx pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + +L$sqr_body: cmpl $0x80100,%ecx je L$sqr_montx movq 0(%rsi),%rax @@ -3020,16 +4177,26 @@ L$sqr_montx: call __ecp_nistz256_sqr_montx L$sqr_mont_done: - popq %r15 - popq %r14 - popq %r13 - popq %r12 - popq %rbx - popq %rbp + movq 0(%rsp),%r15 + + movq 8(%rsp),%r14 + + movq 16(%rsp),%r13 + + movq 24(%rsp),%r12 + + movq 32(%rsp),%rbx + + movq 40(%rsp),%rbp + + leaq 48(%rsp),%rsp + +L$sqr_epilogue: .byte 0xf3,0xc3 + .p2align 5 __ecp_nistz256_sqr_montq: movq %rax,%r13 @@ -3494,9 +4661,13 @@ __ecp_nistz256_sqr_montx: .p2align 5 _ecp_nistz256_from_mont: + pushq %r12 + pushq %r13 +L$from_body: + movq 0(%rsi),%rax movq L$poly+24(%rip),%r13 movq 8(%rsi),%r9 @@ -3576,12 +4747,18 @@ _ecp_nistz256_from_mont: movq %r10,16(%rdi) movq %r11,24(%rdi) - popq %r13 - popq %r12 + movq 0(%rsp),%r13 + + movq 8(%rsp),%r12 + + leaq 16(%rsp),%rsp + +L$from_epilogue: .byte 0xf3,0xc3 + .globl _ecp_nistz256_scatter_w5 .p2align 5 @@ -3664,6 +4841,7 @@ L$select_loop_sse_w5: movdqu %xmm6,64(%rdi) movdqu %xmm7,80(%rdi) .byte 0xf3,0xc3 +L$SEH_end_ecp_nistz256_gather_w5: @@ -3734,6 +4912,7 @@ L$select_loop_sse_w7: movdqu %xmm4,32(%rdi) movdqu %xmm5,48(%rdi) .byte 0xf3,0xc3 +L$SEH_end_ecp_nistz256_gather_w7: @@ -3794,6 +4973,7 @@ L$select_loop_avx2_w5: vmovdqu %ymm4,64(%rdi) vzeroupper .byte 0xf3,0xc3 +L$SEH_end_ecp_nistz256_avx2_gather_w5: @@ -3871,6 +5051,7 @@ L$select_loop_avx2_w7: vmovdqu %ymm3,32(%rdi) vzeroupper .byte 0xf3,0xc3 +L$SEH_end_ecp_nistz256_avx2_gather_w7: .p2align 5 @@ -3997,18 +5178,27 @@ __ecp_nistz256_mul_by_2q: .p2align 5 _ecp_nistz256_point_double: + movl $0x80100,%ecx andl _OPENSSL_ia32cap_P+8(%rip),%ecx cmpl $0x80100,%ecx je L$point_doublex pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $160+8,%rsp +L$point_doubleq_body: + L$point_double_shortcutq: movdqu 0(%rsi),%xmm0 movq %rsi,%rbx @@ -4190,31 +5380,51 @@ L$point_double_shortcutq: .byte 102,72,15,126,207 call __ecp_nistz256_sub_fromq - addq $160+8,%rsp - popq %r15 - popq %r14 - popq %r13 - popq %r12 - popq %rbx - popq %rbp + leaq 160+56(%rsp),%rsi + + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbx + + movq -8(%rsi),%rbp + + leaq (%rsi),%rsp + +L$point_doubleq_epilogue: .byte 0xf3,0xc3 + .globl _ecp_nistz256_point_add .p2align 5 _ecp_nistz256_point_add: + movl $0x80100,%ecx andl _OPENSSL_ia32cap_P+8(%rip),%ecx cmpl $0x80100,%ecx je L$point_addx pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $576+8,%rsp +L$point_addq_body: + movdqu 0(%rsi),%xmm0 movdqu 16(%rsi),%xmm1 movdqu 32(%rsi),%xmm2 @@ -4590,31 +5800,51 @@ L$add_proceedq: movdqu %xmm3,48(%rdi) L$add_doneq: - addq $576+8,%rsp - popq %r15 - popq %r14 - popq %r13 - popq %r12 - popq %rbx - popq %rbp + leaq 576+56(%rsp),%rsi + + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbx + + movq -8(%rsi),%rbp + + leaq (%rsi),%rsp + +L$point_addq_epilogue: .byte 0xf3,0xc3 + .globl _ecp_nistz256_point_add_affine .p2align 5 _ecp_nistz256_point_add_affine: + movl $0x80100,%ecx andl _OPENSSL_ia32cap_P+8(%rip),%ecx cmpl $0x80100,%ecx je L$point_add_affinex pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $480+8,%rsp +L$add_affineq_body: + movdqu 0(%rsi),%xmm0 movq %rdx,%rbx movdqu 16(%rsi),%xmm1 @@ -4896,16 +6126,27 @@ _ecp_nistz256_point_add_affine: movdqu %xmm2,32(%rdi) movdqu %xmm3,48(%rdi) - addq $480+8,%rsp - popq %r15 - popq %r14 - popq %r13 - popq %r12 - popq %rbx - popq %rbp + leaq 480+56(%rsp),%rsi + + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbx + + movq -8(%rsi),%rbp + + leaq (%rsi),%rsp + +L$add_affineq_epilogue: .byte 0xf3,0xc3 + .p2align 5 __ecp_nistz256_add_tox: xorq %r11,%r11 @@ -5035,15 +6276,24 @@ __ecp_nistz256_mul_by_2x: .p2align 5 ecp_nistz256_point_doublex: + L$point_doublex: pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $160+8,%rsp +L$point_doublex_body: + L$point_double_shortcutx: movdqu 0(%rsi),%xmm0 movq %rsi,%rbx @@ -5225,27 +6475,47 @@ L$point_double_shortcutx: .byte 102,72,15,126,207 call __ecp_nistz256_sub_fromx - addq $160+8,%rsp - popq %r15 - popq %r14 - popq %r13 - popq %r12 - popq %rbx - popq %rbp + leaq 160+56(%rsp),%rsi + + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbx + + movq -8(%rsi),%rbp + + leaq (%rsi),%rsp + +L$point_doublex_epilogue: .byte 0xf3,0xc3 + .p2align 5 ecp_nistz256_point_addx: + L$point_addx: pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $576+8,%rsp +L$point_addx_body: + movdqu 0(%rsi),%xmm0 movdqu 16(%rsi),%xmm1 movdqu 32(%rsi),%xmm2 @@ -5621,27 +6891,47 @@ L$add_proceedx: movdqu %xmm3,48(%rdi) L$add_donex: - addq $576+8,%rsp - popq %r15 - popq %r14 - popq %r13 - popq %r12 - popq %rbx - popq %rbp + leaq 576+56(%rsp),%rsi + + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbx + + movq -8(%rsi),%rbp + + leaq (%rsi),%rsp + +L$point_addx_epilogue: .byte 0xf3,0xc3 + .p2align 5 ecp_nistz256_point_add_affinex: + L$point_add_affinex: pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $480+8,%rsp +L$add_affinex_body: + movdqu 0(%rsi),%xmm0 movq %rdx,%rbx movdqu 16(%rsi),%xmm1 @@ -5923,12 +7213,23 @@ L$point_add_affinex: movdqu %xmm2,32(%rdi) movdqu %xmm3,48(%rdi) - addq $480+8,%rsp - popq %r15 - popq %r14 - popq %r13 - popq %r12 - popq %rbx - popq %rbp + leaq 480+56(%rsp),%rsi + + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbx + + movq -8(%rsi),%rbp + + leaq (%rsi),%rsp + +L$add_affinex_epilogue: .byte 0xf3,0xc3 + |