diff options
Diffstat (limited to 'deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/ec')
-rw-r--r-- | deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/ec/ecp_nistz256-x86_64.s | 1437 | ||||
-rw-r--r-- | deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/ec/x25519-x86_64.s | 760 |
2 files changed, 2129 insertions, 68 deletions
diff --git a/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/ec/ecp_nistz256-x86_64.s b/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/ec/ecp_nistz256-x86_64.s index 77102c6a41..302649aacc 100644 --- a/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/ec/ecp_nistz256-x86_64.s +++ b/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/ec/ecp_nistz256-x86_64.s @@ -2393,13 +2393,23 @@ L$Three: L$ONE_mont: .quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe + +L$ord: +.quad 0xf3b9cac2fc632551, 0xbce6faada7179e84, 0xffffffffffffffff, 0xffffffff00000000 +L$ordK: +.quad 0xccd1c8aaee00bc4f + .globl _ecp_nistz256_mul_by_2 .p2align 6 _ecp_nistz256_mul_by_2: + pushq %r12 + pushq %r13 +L$mul_by_2_body: + movq 0(%rsi),%r8 xorq %r13,%r13 movq 8(%rsi),%r9 @@ -2431,20 +2441,30 @@ _ecp_nistz256_mul_by_2: movq %r10,16(%rdi) movq %r11,24(%rdi) - popq %r13 - popq %r12 + movq 0(%rsp),%r13 + + movq 8(%rsp),%r12 + + leaq 16(%rsp),%rsp + +L$mul_by_2_epilogue: .byte 0xf3,0xc3 + .globl _ecp_nistz256_div_by_2 .p2align 5 _ecp_nistz256_div_by_2: + pushq %r12 + pushq %r13 +L$div_by_2_body: + movq 0(%rsi),%r8 movq 8(%rsi),%r9 movq 16(%rsi),%r10 @@ -2491,20 +2511,30 @@ _ecp_nistz256_div_by_2: movq %r10,16(%rdi) movq %r11,24(%rdi) - popq %r13 - popq %r12 + movq 0(%rsp),%r13 + + movq 8(%rsp),%r12 + + leaq 16(%rsp),%rsp + +L$div_by_2_epilogue: .byte 0xf3,0xc3 + .globl _ecp_nistz256_mul_by_3 .p2align 5 _ecp_nistz256_mul_by_3: + pushq %r12 + pushq %r13 +L$mul_by_3_body: + movq 0(%rsi),%r8 xorq %r13,%r13 movq 8(%rsi),%r9 @@ -2557,20 +2587,30 @@ _ecp_nistz256_mul_by_3: movq %r10,16(%rdi) movq %r11,24(%rdi) - popq %r13 - popq %r12 + movq 0(%rsp),%r13 + + movq 8(%rsp),%r12 + + leaq 16(%rsp),%rsp + +L$mul_by_3_epilogue: .byte 0xf3,0xc3 + .globl _ecp_nistz256_add .p2align 5 _ecp_nistz256_add: + pushq %r12 + pushq %r13 +L$add_body: + movq 0(%rsi),%r8 xorq %r13,%r13 movq 8(%rsi),%r9 @@ -2603,20 +2643,30 @@ _ecp_nistz256_add: movq %r10,16(%rdi) movq %r11,24(%rdi) - popq %r13 - popq %r12 + movq 0(%rsp),%r13 + + movq 8(%rsp),%r12 + + leaq 16(%rsp),%rsp + +L$add_epilogue: .byte 0xf3,0xc3 + .globl _ecp_nistz256_sub .p2align 5 _ecp_nistz256_sub: + pushq %r12 + pushq %r13 +L$sub_body: + movq 0(%rsi),%r8 xorq %r13,%r13 movq 8(%rsi),%r9 @@ -2649,20 +2699,30 @@ _ecp_nistz256_sub: movq %r10,16(%rdi) movq %r11,24(%rdi) - popq %r13 - popq %r12 + movq 0(%rsp),%r13 + + movq 8(%rsp),%r12 + + leaq 16(%rsp),%rsp + +L$sub_epilogue: .byte 0xf3,0xc3 + .globl _ecp_nistz256_neg .p2align 5 _ecp_nistz256_neg: + pushq %r12 + pushq %r13 +L$neg_body: + xorq %r8,%r8 xorq %r9,%r9 xorq %r10,%r10 @@ -2695,14 +2755,1085 @@ _ecp_nistz256_neg: movq %r10,16(%rdi) movq %r11,24(%rdi) - popq %r13 - popq %r12 + movq 0(%rsp),%r13 + + movq 8(%rsp),%r12 + + leaq 16(%rsp),%rsp + +L$neg_epilogue: .byte 0xf3,0xc3 + + + +.globl _ecp_nistz256_ord_mul_mont + +.p2align 5 +_ecp_nistz256_ord_mul_mont: + + movl $0x80100,%ecx + andl _OPENSSL_ia32cap_P+8(%rip),%ecx + cmpl $0x80100,%ecx + je L$ecp_nistz256_ord_mul_montx + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + +L$ord_mul_body: + + movq 0(%rdx),%rax + movq %rdx,%rbx + leaq L$ord(%rip),%r14 + movq L$ordK(%rip),%r15 + + + movq %rax,%rcx + mulq 0(%rsi) + movq %rax,%r8 + movq %rcx,%rax + movq %rdx,%r9 + + mulq 8(%rsi) + addq %rax,%r9 + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq 16(%rsi) + addq %rax,%r10 + movq %rcx,%rax + adcq $0,%rdx + + movq %r8,%r13 + imulq %r15,%r8 + + movq %rdx,%r11 + mulq 24(%rsi) + addq %rax,%r11 + movq %r8,%rax + adcq $0,%rdx + movq %rdx,%r12 + + + mulq 0(%r14) + movq %r8,%rbp + addq %rax,%r13 + movq %r8,%rax + adcq $0,%rdx + movq %rdx,%rcx + + subq %r8,%r10 + sbbq $0,%r8 + + mulq 8(%r14) + addq %rcx,%r9 + adcq $0,%rdx + addq %rax,%r9 + movq %rbp,%rax + adcq %rdx,%r10 + movq %rbp,%rdx + adcq $0,%r8 + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r11 + movq 8(%rbx),%rax + sbbq %rdx,%rbp + + addq %r8,%r11 + adcq %rbp,%r12 + adcq $0,%r13 + + + movq %rax,%rcx + mulq 0(%rsi) + addq %rax,%r9 + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 8(%rsi) + addq %rbp,%r10 + adcq $0,%rdx + addq %rax,%r10 + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rsi) + addq %rbp,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %rcx,%rax + adcq $0,%rdx + + movq %r9,%rcx + imulq %r15,%r9 + + movq %rdx,%rbp + mulq 24(%rsi) + addq %rbp,%r12 + adcq $0,%rdx + xorq %r8,%r8 + addq %rax,%r12 + movq %r9,%rax + adcq %rdx,%r13 + adcq $0,%r8 + + + mulq 0(%r14) + movq %r9,%rbp + addq %rax,%rcx + movq %r9,%rax + adcq %rdx,%rcx + + subq %r9,%r11 + sbbq $0,%r9 + + mulq 8(%r14) + addq %rcx,%r10 + adcq $0,%rdx + addq %rax,%r10 + movq %rbp,%rax + adcq %rdx,%r11 + movq %rbp,%rdx + adcq $0,%r9 + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r12 + movq 16(%rbx),%rax + sbbq %rdx,%rbp + + addq %r9,%r12 + adcq %rbp,%r13 + adcq $0,%r8 + + + movq %rax,%rcx + mulq 0(%rsi) + addq %rax,%r10 + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 8(%rsi) + addq %rbp,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rsi) + addq %rbp,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %rcx,%rax + adcq $0,%rdx + + movq %r10,%rcx + imulq %r15,%r10 + + movq %rdx,%rbp + mulq 24(%rsi) + addq %rbp,%r13 + adcq $0,%rdx + xorq %r9,%r9 + addq %rax,%r13 + movq %r10,%rax + adcq %rdx,%r8 + adcq $0,%r9 + + + mulq 0(%r14) + movq %r10,%rbp + addq %rax,%rcx + movq %r10,%rax + adcq %rdx,%rcx + + subq %r10,%r12 + sbbq $0,%r10 + + mulq 8(%r14) + addq %rcx,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %rbp,%rax + adcq %rdx,%r12 + movq %rbp,%rdx + adcq $0,%r10 + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r13 + movq 24(%rbx),%rax + sbbq %rdx,%rbp + + addq %r10,%r13 + adcq %rbp,%r8 + adcq $0,%r9 + + + movq %rax,%rcx + mulq 0(%rsi) + addq %rax,%r11 + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 8(%rsi) + addq %rbp,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rsi) + addq %rbp,%r13 + adcq $0,%rdx + addq %rax,%r13 + movq %rcx,%rax + adcq $0,%rdx + + movq %r11,%rcx + imulq %r15,%r11 + + movq %rdx,%rbp + mulq 24(%rsi) + addq %rbp,%r8 + adcq $0,%rdx + xorq %r10,%r10 + addq %rax,%r8 + movq %r11,%rax + adcq %rdx,%r9 + adcq $0,%r10 + + + mulq 0(%r14) + movq %r11,%rbp + addq %rax,%rcx + movq %r11,%rax + adcq %rdx,%rcx + + subq %r11,%r13 + sbbq $0,%r11 + + mulq 8(%r14) + addq %rcx,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %rbp,%rax + adcq %rdx,%r13 + movq %rbp,%rdx + adcq $0,%r11 + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r8 + sbbq %rdx,%rbp + + addq %r11,%r8 + adcq %rbp,%r9 + adcq $0,%r10 + + + movq %r12,%rsi + subq 0(%r14),%r12 + movq %r13,%r11 + sbbq 8(%r14),%r13 + movq %r8,%rcx + sbbq 16(%r14),%r8 + movq %r9,%rbp + sbbq 24(%r14),%r9 + sbbq $0,%r10 + + cmovcq %rsi,%r12 + cmovcq %r11,%r13 + cmovcq %rcx,%r8 + cmovcq %rbp,%r9 + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + movq 0(%rsp),%r15 + + movq 8(%rsp),%r14 + + movq 16(%rsp),%r13 + + movq 24(%rsp),%r12 + + movq 32(%rsp),%rbx + + movq 40(%rsp),%rbp + + leaq 48(%rsp),%rsp + +L$ord_mul_epilogue: + .byte 0xf3,0xc3 + + + + + + + + + +.globl _ecp_nistz256_ord_sqr_mont + +.p2align 5 +_ecp_nistz256_ord_sqr_mont: + + movl $0x80100,%ecx + andl _OPENSSL_ia32cap_P+8(%rip),%ecx + cmpl $0x80100,%ecx + je L$ecp_nistz256_ord_sqr_montx + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + +L$ord_sqr_body: + + movq 0(%rsi),%r8 + movq 8(%rsi),%rax + movq 16(%rsi),%r14 + movq 24(%rsi),%r15 + leaq L$ord(%rip),%rsi + movq %rdx,%rbx + jmp L$oop_ord_sqr + +.p2align 5 +L$oop_ord_sqr: + + movq %rax,%rbp + mulq %r8 + movq %rax,%r9 +.byte 102,72,15,110,205 + movq %r14,%rax + movq %rdx,%r10 + + mulq %r8 + addq %rax,%r10 + movq %r15,%rax +.byte 102,73,15,110,214 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %r8 + addq %rax,%r11 + movq %r15,%rax +.byte 102,73,15,110,223 + adcq $0,%rdx + movq %rdx,%r12 + + + mulq %r14 + movq %rax,%r13 + movq %r14,%rax + movq %rdx,%r14 + + + mulq %rbp + addq %rax,%r11 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%r15 + + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + + addq %r15,%r12 + adcq %rdx,%r13 + adcq $0,%r14 + + + xorq %r15,%r15 + movq %r8,%rax + addq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq %r12,%r12 + adcq %r13,%r13 + adcq %r14,%r14 + adcq $0,%r15 + + + mulq %rax + movq %rax,%r8 +.byte 102,72,15,126,200 + movq %rdx,%rbp + + mulq %rax + addq %rbp,%r9 + adcq %rax,%r10 +.byte 102,72,15,126,208 + adcq $0,%rdx + movq %rdx,%rbp + + mulq %rax + addq %rbp,%r11 + adcq %rax,%r12 +.byte 102,72,15,126,216 + adcq $0,%rdx + movq %rdx,%rbp + + movq %r8,%rcx + imulq 32(%rsi),%r8 + + mulq %rax + addq %rbp,%r13 + adcq %rax,%r14 + movq 0(%rsi),%rax + adcq %rdx,%r15 + + + mulq %r8 + movq %r8,%rbp + addq %rax,%rcx + movq 8(%rsi),%rax + adcq %rdx,%rcx + + subq %r8,%r10 + sbbq $0,%rbp + + mulq %r8 + addq %rcx,%r9 + adcq $0,%rdx + addq %rax,%r9 + movq %r8,%rax + adcq %rdx,%r10 + movq %r8,%rdx + adcq $0,%rbp + + movq %r9,%rcx + imulq 32(%rsi),%r9 + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r11 + movq 0(%rsi),%rax + sbbq %rdx,%r8 + + addq %rbp,%r11 + adcq $0,%r8 + + + mulq %r9 + movq %r9,%rbp + addq %rax,%rcx + movq 8(%rsi),%rax + adcq %rdx,%rcx + + subq %r9,%r11 + sbbq $0,%rbp + + mulq %r9 + addq %rcx,%r10 + adcq $0,%rdx + addq %rax,%r10 + movq %r9,%rax + adcq %rdx,%r11 + movq %r9,%rdx + adcq $0,%rbp + + movq %r10,%rcx + imulq 32(%rsi),%r10 + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r8 + movq 0(%rsi),%rax + sbbq %rdx,%r9 + + addq %rbp,%r8 + adcq $0,%r9 + + + mulq %r10 + movq %r10,%rbp + addq %rax,%rcx + movq 8(%rsi),%rax + adcq %rdx,%rcx + + subq %r10,%r8 + sbbq $0,%rbp + + mulq %r10 + addq %rcx,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %r10,%rax + adcq %rdx,%r8 + movq %r10,%rdx + adcq $0,%rbp + + movq %r11,%rcx + imulq 32(%rsi),%r11 + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r9 + movq 0(%rsi),%rax + sbbq %rdx,%r10 + + addq %rbp,%r9 + adcq $0,%r10 + + + mulq %r11 + movq %r11,%rbp + addq %rax,%rcx + movq 8(%rsi),%rax + adcq %rdx,%rcx + + subq %r11,%r9 + sbbq $0,%rbp + + mulq %r11 + addq %rcx,%r8 + adcq $0,%rdx + addq %rax,%r8 + movq %r11,%rax + adcq %rdx,%r9 + movq %r11,%rdx + adcq $0,%rbp + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r10 + sbbq %rdx,%r11 + + addq %rbp,%r10 + adcq $0,%r11 + + + xorq %rdx,%rdx + addq %r12,%r8 + adcq %r13,%r9 + movq %r8,%r12 + adcq %r14,%r10 + adcq %r15,%r11 + movq %r9,%rax + adcq $0,%rdx + + + subq 0(%rsi),%r8 + movq %r10,%r14 + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + movq %r11,%r15 + sbbq 24(%rsi),%r11 + sbbq $0,%rdx + + cmovcq %r12,%r8 + cmovncq %r9,%rax + cmovncq %r10,%r14 + cmovncq %r11,%r15 + + decq %rbx + jnz L$oop_ord_sqr + + movq %r8,0(%rdi) + movq %rax,8(%rdi) + pxor %xmm1,%xmm1 + movq %r14,16(%rdi) + pxor %xmm2,%xmm2 + movq %r15,24(%rdi) + pxor %xmm3,%xmm3 + + movq 0(%rsp),%r15 + + movq 8(%rsp),%r14 + + movq 16(%rsp),%r13 + + movq 24(%rsp),%r12 + + movq 32(%rsp),%rbx + + movq 40(%rsp),%rbp + + leaq 48(%rsp),%rsp + +L$ord_sqr_epilogue: + .byte 0xf3,0xc3 + + + + +.p2align 5 +ecp_nistz256_ord_mul_montx: + +L$ecp_nistz256_ord_mul_montx: + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + +L$ord_mulx_body: + + movq %rdx,%rbx + movq 0(%rdx),%rdx + movq 0(%rsi),%r9 + movq 8(%rsi),%r10 + movq 16(%rsi),%r11 + movq 24(%rsi),%r12 + leaq -128(%rsi),%rsi + leaq L$ord-128(%rip),%r14 + movq L$ordK(%rip),%r15 + + + mulxq %r9,%r8,%r9 + mulxq %r10,%rcx,%r10 + mulxq %r11,%rbp,%r11 + addq %rcx,%r9 + mulxq %r12,%rcx,%r12 + movq %r8,%rdx + mulxq %r15,%rdx,%rax + adcq %rbp,%r10 + adcq %rcx,%r11 + adcq $0,%r12 + + + xorq %r13,%r13 + mulxq 0+128(%r14),%rcx,%rbp + adcxq %rcx,%r8 + adoxq %rbp,%r9 + + mulxq 8+128(%r14),%rcx,%rbp + adcxq %rcx,%r9 + adoxq %rbp,%r10 + + mulxq 16+128(%r14),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 24+128(%r14),%rcx,%rbp + movq 8(%rbx),%rdx + adcxq %rcx,%r11 + adoxq %rbp,%r12 + adcxq %r8,%r12 + adoxq %r8,%r13 + adcq $0,%r13 + + + mulxq 0+128(%rsi),%rcx,%rbp + adcxq %rcx,%r9 + adoxq %rbp,%r10 + + mulxq 8+128(%rsi),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 16+128(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 24+128(%rsi),%rcx,%rbp + movq %r9,%rdx + mulxq %r15,%rdx,%rax + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + adcxq %r8,%r13 + adoxq %r8,%r8 + adcq $0,%r8 + + + mulxq 0+128(%r14),%rcx,%rbp + adcxq %rcx,%r9 + adoxq %rbp,%r10 + + mulxq 8+128(%r14),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 16+128(%r14),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 24+128(%r14),%rcx,%rbp + movq 16(%rbx),%rdx + adcxq %rcx,%r12 + adoxq %rbp,%r13 + adcxq %r9,%r13 + adoxq %r9,%r8 + adcq $0,%r8 + + + mulxq 0+128(%rsi),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 8+128(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 16+128(%rsi),%rcx,%rbp + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + mulxq 24+128(%rsi),%rcx,%rbp + movq %r10,%rdx + mulxq %r15,%rdx,%rax + adcxq %rcx,%r13 + adoxq %rbp,%r8 + + adcxq %r9,%r8 + adoxq %r9,%r9 + adcq $0,%r9 + + + mulxq 0+128(%r14),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 8+128(%r14),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 16+128(%r14),%rcx,%rbp + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + mulxq 24+128(%r14),%rcx,%rbp + movq 24(%rbx),%rdx + adcxq %rcx,%r13 + adoxq %rbp,%r8 + adcxq %r10,%r8 + adoxq %r10,%r9 + adcq $0,%r9 + + + mulxq 0+128(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 8+128(%rsi),%rcx,%rbp + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + mulxq 16+128(%rsi),%rcx,%rbp + adcxq %rcx,%r13 + adoxq %rbp,%r8 + + mulxq 24+128(%rsi),%rcx,%rbp + movq %r11,%rdx + mulxq %r15,%rdx,%rax + adcxq %rcx,%r8 + adoxq %rbp,%r9 + + adcxq %r10,%r9 + adoxq %r10,%r10 + adcq $0,%r10 + + + mulxq 0+128(%r14),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 8+128(%r14),%rcx,%rbp + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + mulxq 16+128(%r14),%rcx,%rbp + adcxq %rcx,%r13 + adoxq %rbp,%r8 + + mulxq 24+128(%r14),%rcx,%rbp + leaq 128(%r14),%r14 + movq %r12,%rbx + adcxq %rcx,%r8 + adoxq %rbp,%r9 + movq %r13,%rdx + adcxq %r11,%r9 + adoxq %r11,%r10 + adcq $0,%r10 + + + + movq %r8,%rcx + subq 0(%r14),%r12 + sbbq 8(%r14),%r13 + sbbq 16(%r14),%r8 + movq %r9,%rbp + sbbq 24(%r14),%r9 + sbbq $0,%r10 + + cmovcq %rbx,%r12 + cmovcq %rdx,%r13 + cmovcq %rcx,%r8 + cmovcq %rbp,%r9 + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + movq 0(%rsp),%r15 + + movq 8(%rsp),%r14 + + movq 16(%rsp),%r13 + + movq 24(%rsp),%r12 + + movq 32(%rsp),%rbx + + movq 40(%rsp),%rbp + + leaq 48(%rsp),%rsp + +L$ord_mulx_epilogue: + .byte 0xf3,0xc3 + + + + +.p2align 5 +ecp_nistz256_ord_sqr_montx: + +L$ecp_nistz256_ord_sqr_montx: + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + +L$ord_sqrx_body: + + movq %rdx,%rbx + movq 0(%rsi),%rdx + movq 8(%rsi),%r14 + movq 16(%rsi),%r15 + movq 24(%rsi),%r8 + leaq L$ord(%rip),%rsi + jmp L$oop_ord_sqrx + +.p2align 5 +L$oop_ord_sqrx: + mulxq %r14,%r9,%r10 + mulxq %r15,%rcx,%r11 + movq %rdx,%rax +.byte 102,73,15,110,206 + mulxq %r8,%rbp,%r12 + movq %r14,%rdx + addq %rcx,%r10 +.byte 102,73,15,110,215 + adcq %rbp,%r11 + adcq $0,%r12 + xorq %r13,%r13 + + mulxq %r15,%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq %r8,%rcx,%rbp + movq %r15,%rdx + adcxq %rcx,%r12 + adoxq %rbp,%r13 + adcq $0,%r13 + + mulxq %r8,%rcx,%r14 + movq %rax,%rdx +.byte 102,73,15,110,216 + xorq %r15,%r15 + adcxq %r9,%r9 + adoxq %rcx,%r13 + adcxq %r10,%r10 + adoxq %r15,%r14 + + + mulxq %rdx,%r8,%rbp +.byte 102,72,15,126,202 + adcxq %r11,%r11 + adoxq %rbp,%r9 + adcxq %r12,%r12 + mulxq %rdx,%rcx,%rax +.byte 102,72,15,126,210 + adcxq %r13,%r13 + adoxq %rcx,%r10 + adcxq %r14,%r14 + mulxq %rdx,%rcx,%rbp +.byte 0x67 +.byte 102,72,15,126,218 + adoxq %rax,%r11 + adcxq %r15,%r15 + adoxq %rcx,%r12 + adoxq %rbp,%r13 + mulxq %rdx,%rcx,%rax + adoxq %rcx,%r14 + adoxq %rax,%r15 + + + movq %r8,%rdx + mulxq 32(%rsi),%rdx,%rcx + + xorq %rax,%rax + mulxq 0(%rsi),%rcx,%rbp + adcxq %rcx,%r8 + adoxq %rbp,%r9 + mulxq 8(%rsi),%rcx,%rbp + adcxq %rcx,%r9 + adoxq %rbp,%r10 + mulxq 16(%rsi),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + mulxq 24(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r8 + adcxq %rax,%r8 + + + movq %r9,%rdx + mulxq 32(%rsi),%rdx,%rcx + + mulxq 0(%rsi),%rcx,%rbp + adoxq %rcx,%r9 + adcxq %rbp,%r10 + mulxq 8(%rsi),%rcx,%rbp + adoxq %rcx,%r10 + adcxq %rbp,%r11 + mulxq 16(%rsi),%rcx,%rbp + adoxq %rcx,%r11 + adcxq %rbp,%r8 + mulxq 24(%rsi),%rcx,%rbp + adoxq %rcx,%r8 + adcxq %rbp,%r9 + adoxq %rax,%r9 + + + movq %r10,%rdx + mulxq 32(%rsi),%rdx,%rcx + + mulxq 0(%rsi),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + mulxq 8(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r8 + mulxq 16(%rsi),%rcx,%rbp + adcxq %rcx,%r8 + adoxq %rbp,%r9 + mulxq 24(%rsi),%rcx,%rbp + adcxq %rcx,%r9 + adoxq %rbp,%r10 + adcxq %rax,%r10 + + + movq %r11,%rdx + mulxq 32(%rsi),%rdx,%rcx + + mulxq 0(%rsi),%rcx,%rbp + adoxq %rcx,%r11 + adcxq %rbp,%r8 + mulxq 8(%rsi),%rcx,%rbp + adoxq %rcx,%r8 + adcxq %rbp,%r9 + mulxq 16(%rsi),%rcx,%rbp + adoxq %rcx,%r9 + adcxq %rbp,%r10 + mulxq 24(%rsi),%rcx,%rbp + adoxq %rcx,%r10 + adcxq %rbp,%r11 + adoxq %rax,%r11 + + + addq %r8,%r12 + adcq %r13,%r9 + movq %r12,%rdx + adcq %r14,%r10 + adcq %r15,%r11 + movq %r9,%r14 + adcq $0,%rax + + + subq 0(%rsi),%r12 + movq %r10,%r15 + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + movq %r11,%r8 + sbbq 24(%rsi),%r11 + sbbq $0,%rax + + cmovncq %r12,%rdx + cmovncq %r9,%r14 + cmovncq %r10,%r15 + cmovncq %r11,%r8 + + decq %rbx + jnz L$oop_ord_sqrx + + movq %rdx,0(%rdi) + movq %r14,8(%rdi) + pxor %xmm1,%xmm1 + movq %r15,16(%rdi) + pxor %xmm2,%xmm2 + movq %r8,24(%rdi) + pxor %xmm3,%xmm3 + + movq 0(%rsp),%r15 + + movq 8(%rsp),%r14 + + movq 16(%rsp),%r13 + + movq 24(%rsp),%r12 + + movq 32(%rsp),%rbx + + movq 40(%rsp),%rbp + + leaq 48(%rsp),%rsp + +L$ord_sqrx_epilogue: + .byte 0xf3,0xc3 + + + + + + .globl _ecp_nistz256_to_mont .p2align 5 @@ -2723,15 +3854,23 @@ _ecp_nistz256_to_mont: .p2align 5 _ecp_nistz256_mul_mont: + movl $0x80100,%ecx andl _OPENSSL_ia32cap_P+8(%rip),%ecx L$mul_mont: pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + +L$mul_body: cmpl $0x80100,%ecx je L$mul_montx movq %rdx,%rbx @@ -2756,16 +3895,26 @@ L$mul_montx: call __ecp_nistz256_mul_montx L$mul_mont_done: - popq %r15 - popq %r14 - popq %r13 - popq %r12 - popq %rbx - popq %rbp + movq 0(%rsp),%r15 + + movq 8(%rsp),%r14 + + movq 16(%rsp),%r13 + + movq 24(%rsp),%r12 + + movq 32(%rsp),%rbx + + movq 40(%rsp),%rbp + + leaq 48(%rsp),%rsp + +L$mul_epilogue: .byte 0xf3,0xc3 + .p2align 5 __ecp_nistz256_mul_montq: @@ -2992,14 +4141,22 @@ __ecp_nistz256_mul_montq: .p2align 5 _ecp_nistz256_sqr_mont: + movl $0x80100,%ecx andl _OPENSSL_ia32cap_P+8(%rip),%ecx pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + +L$sqr_body: cmpl $0x80100,%ecx je L$sqr_montx movq 0(%rsi),%rax @@ -3020,16 +4177,26 @@ L$sqr_montx: call __ecp_nistz256_sqr_montx L$sqr_mont_done: - popq %r15 - popq %r14 - popq %r13 - popq %r12 - popq %rbx - popq %rbp + movq 0(%rsp),%r15 + + movq 8(%rsp),%r14 + + movq 16(%rsp),%r13 + + movq 24(%rsp),%r12 + + movq 32(%rsp),%rbx + + movq 40(%rsp),%rbp + + leaq 48(%rsp),%rsp + +L$sqr_epilogue: .byte 0xf3,0xc3 + .p2align 5 __ecp_nistz256_sqr_montq: movq %rax,%r13 @@ -3494,9 +4661,13 @@ __ecp_nistz256_sqr_montx: .p2align 5 _ecp_nistz256_from_mont: + pushq %r12 + pushq %r13 +L$from_body: + movq 0(%rsi),%rax movq L$poly+24(%rip),%r13 movq 8(%rsi),%r9 @@ -3576,12 +4747,18 @@ _ecp_nistz256_from_mont: movq %r10,16(%rdi) movq %r11,24(%rdi) - popq %r13 - popq %r12 + movq 0(%rsp),%r13 + + movq 8(%rsp),%r12 + + leaq 16(%rsp),%rsp + +L$from_epilogue: .byte 0xf3,0xc3 + .globl _ecp_nistz256_scatter_w5 .p2align 5 @@ -3664,6 +4841,7 @@ L$select_loop_sse_w5: movdqu %xmm6,64(%rdi) movdqu %xmm7,80(%rdi) .byte 0xf3,0xc3 +L$SEH_end_ecp_nistz256_gather_w5: @@ -3734,6 +4912,7 @@ L$select_loop_sse_w7: movdqu %xmm4,32(%rdi) movdqu %xmm5,48(%rdi) .byte 0xf3,0xc3 +L$SEH_end_ecp_nistz256_gather_w7: @@ -3794,6 +4973,7 @@ L$select_loop_avx2_w5: vmovdqu %ymm4,64(%rdi) vzeroupper .byte 0xf3,0xc3 +L$SEH_end_ecp_nistz256_avx2_gather_w5: @@ -3871,6 +5051,7 @@ L$select_loop_avx2_w7: vmovdqu %ymm3,32(%rdi) vzeroupper .byte 0xf3,0xc3 +L$SEH_end_ecp_nistz256_avx2_gather_w7: .p2align 5 @@ -3997,18 +5178,27 @@ __ecp_nistz256_mul_by_2q: .p2align 5 _ecp_nistz256_point_double: + movl $0x80100,%ecx andl _OPENSSL_ia32cap_P+8(%rip),%ecx cmpl $0x80100,%ecx je L$point_doublex pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $160+8,%rsp +L$point_doubleq_body: + L$point_double_shortcutq: movdqu 0(%rsi),%xmm0 movq %rsi,%rbx @@ -4190,31 +5380,51 @@ L$point_double_shortcutq: .byte 102,72,15,126,207 call __ecp_nistz256_sub_fromq - addq $160+8,%rsp - popq %r15 - popq %r14 - popq %r13 - popq %r12 - popq %rbx - popq %rbp + leaq 160+56(%rsp),%rsi + + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbx + + movq -8(%rsi),%rbp + + leaq (%rsi),%rsp + +L$point_doubleq_epilogue: .byte 0xf3,0xc3 + .globl _ecp_nistz256_point_add .p2align 5 _ecp_nistz256_point_add: + movl $0x80100,%ecx andl _OPENSSL_ia32cap_P+8(%rip),%ecx cmpl $0x80100,%ecx je L$point_addx pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $576+8,%rsp +L$point_addq_body: + movdqu 0(%rsi),%xmm0 movdqu 16(%rsi),%xmm1 movdqu 32(%rsi),%xmm2 @@ -4590,31 +5800,51 @@ L$add_proceedq: movdqu %xmm3,48(%rdi) L$add_doneq: - addq $576+8,%rsp - popq %r15 - popq %r14 - popq %r13 - popq %r12 - popq %rbx - popq %rbp + leaq 576+56(%rsp),%rsi + + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbx + + movq -8(%rsi),%rbp + + leaq (%rsi),%rsp + +L$point_addq_epilogue: .byte 0xf3,0xc3 + .globl _ecp_nistz256_point_add_affine .p2align 5 _ecp_nistz256_point_add_affine: + movl $0x80100,%ecx andl _OPENSSL_ia32cap_P+8(%rip),%ecx cmpl $0x80100,%ecx je L$point_add_affinex pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $480+8,%rsp +L$add_affineq_body: + movdqu 0(%rsi),%xmm0 movq %rdx,%rbx movdqu 16(%rsi),%xmm1 @@ -4896,16 +6126,27 @@ _ecp_nistz256_point_add_affine: movdqu %xmm2,32(%rdi) movdqu %xmm3,48(%rdi) - addq $480+8,%rsp - popq %r15 - popq %r14 - popq %r13 - popq %r12 - popq %rbx - popq %rbp + leaq 480+56(%rsp),%rsi + + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbx + + movq -8(%rsi),%rbp + + leaq (%rsi),%rsp + +L$add_affineq_epilogue: .byte 0xf3,0xc3 + .p2align 5 __ecp_nistz256_add_tox: xorq %r11,%r11 @@ -5035,15 +6276,24 @@ __ecp_nistz256_mul_by_2x: .p2align 5 ecp_nistz256_point_doublex: + L$point_doublex: pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $160+8,%rsp +L$point_doublex_body: + L$point_double_shortcutx: movdqu 0(%rsi),%xmm0 movq %rsi,%rbx @@ -5225,27 +6475,47 @@ L$point_double_shortcutx: .byte 102,72,15,126,207 call __ecp_nistz256_sub_fromx - addq $160+8,%rsp - popq %r15 - popq %r14 - popq %r13 - popq %r12 - popq %rbx - popq %rbp + leaq 160+56(%rsp),%rsi + + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbx + + movq -8(%rsi),%rbp + + leaq (%rsi),%rsp + +L$point_doublex_epilogue: .byte 0xf3,0xc3 + .p2align 5 ecp_nistz256_point_addx: + L$point_addx: pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $576+8,%rsp +L$point_addx_body: + movdqu 0(%rsi),%xmm0 movdqu 16(%rsi),%xmm1 movdqu 32(%rsi),%xmm2 @@ -5621,27 +6891,47 @@ L$add_proceedx: movdqu %xmm3,48(%rdi) L$add_donex: - addq $576+8,%rsp - popq %r15 - popq %r14 - popq %r13 - popq %r12 - popq %rbx - popq %rbp + leaq 576+56(%rsp),%rsi + + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbx + + movq -8(%rsi),%rbp + + leaq (%rsi),%rsp + +L$point_addx_epilogue: .byte 0xf3,0xc3 + .p2align 5 ecp_nistz256_point_add_affinex: + L$point_add_affinex: pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $480+8,%rsp +L$add_affinex_body: + movdqu 0(%rsi),%xmm0 movq %rdx,%rbx movdqu 16(%rsi),%xmm1 @@ -5923,12 +7213,23 @@ L$point_add_affinex: movdqu %xmm2,32(%rdi) movdqu %xmm3,48(%rdi) - addq $480+8,%rsp - popq %r15 - popq %r14 - popq %r13 - popq %r12 - popq %rbx - popq %rbp + leaq 480+56(%rsp),%rsi + + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbx + + movq -8(%rsi),%rbp + + leaq (%rsi),%rsp + +L$add_affinex_epilogue: .byte 0xf3,0xc3 + diff --git a/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/ec/x25519-x86_64.s b/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/ec/x25519-x86_64.s new file mode 100644 index 0000000000..cdb602d4cc --- /dev/null +++ b/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/ec/x25519-x86_64.s @@ -0,0 +1,760 @@ +.text + +.globl _x25519_fe51_mul + +.p2align 5 +_x25519_fe51_mul: + + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + leaq -40(%rsp),%rsp + +L$fe51_mul_body: + + movq 0(%rsi),%rax + movq 0(%rdx),%r11 + movq 8(%rdx),%r12 + movq 16(%rdx),%r13 + movq 24(%rdx),%rbp + movq 32(%rdx),%r14 + + movq %rdi,32(%rsp) + movq %rax,%rdi + mulq %r11 + movq %r11,0(%rsp) + movq %rax,%rbx + movq %rdi,%rax + movq %rdx,%rcx + mulq %r12 + movq %r12,8(%rsp) + movq %rax,%r8 + movq %rdi,%rax + leaq (%r14,%r14,8),%r15 + movq %rdx,%r9 + mulq %r13 + movq %r13,16(%rsp) + movq %rax,%r10 + movq %rdi,%rax + leaq (%r14,%r15,2),%rdi + movq %rdx,%r11 + mulq %rbp + movq %rax,%r12 + movq 0(%rsi),%rax + movq %rdx,%r13 + mulq %r14 + movq %rax,%r14 + movq 8(%rsi),%rax + movq %rdx,%r15 + + mulq %rdi + addq %rax,%rbx + movq 16(%rsi),%rax + adcq %rdx,%rcx + mulq %rdi + addq %rax,%r8 + movq 24(%rsi),%rax + adcq %rdx,%r9 + mulq %rdi + addq %rax,%r10 + movq 32(%rsi),%rax + adcq %rdx,%r11 + mulq %rdi + imulq $19,%rbp,%rdi + addq %rax,%r12 + movq 8(%rsi),%rax + adcq %rdx,%r13 + mulq %rbp + movq 16(%rsp),%rbp + addq %rax,%r14 + movq 16(%rsi),%rax + adcq %rdx,%r15 + + mulq %rdi + addq %rax,%rbx + movq 24(%rsi),%rax + adcq %rdx,%rcx + mulq %rdi + addq %rax,%r8 + movq 32(%rsi),%rax + adcq %rdx,%r9 + mulq %rdi + imulq $19,%rbp,%rdi + addq %rax,%r10 + movq 8(%rsi),%rax + adcq %rdx,%r11 + mulq %rbp + addq %rax,%r12 + movq 16(%rsi),%rax + adcq %rdx,%r13 + mulq %rbp + movq 8(%rsp),%rbp + addq %rax,%r14 + movq 24(%rsi),%rax + adcq %rdx,%r15 + + mulq %rdi + addq %rax,%rbx + movq 32(%rsi),%rax + adcq %rdx,%rcx + mulq %rdi + addq %rax,%r8 + movq 8(%rsi),%rax + adcq %rdx,%r9 + mulq %rbp + imulq $19,%rbp,%rdi + addq %rax,%r10 + movq 16(%rsi),%rax + adcq %rdx,%r11 + mulq %rbp + addq %rax,%r12 + movq 24(%rsi),%rax + adcq %rdx,%r13 + mulq %rbp + movq 0(%rsp),%rbp + addq %rax,%r14 + movq 32(%rsi),%rax + adcq %rdx,%r15 + + mulq %rdi + addq %rax,%rbx + movq 8(%rsi),%rax + adcq %rdx,%rcx + mulq %rbp + addq %rax,%r8 + movq 16(%rsi),%rax + adcq %rdx,%r9 + mulq %rbp + addq %rax,%r10 + movq 24(%rsi),%rax + adcq %rdx,%r11 + mulq %rbp + addq %rax,%r12 + movq 32(%rsi),%rax + adcq %rdx,%r13 + mulq %rbp + addq %rax,%r14 + adcq %rdx,%r15 + + movq 32(%rsp),%rdi + jmp L$reduce51 +L$fe51_mul_epilogue: + + + +.globl _x25519_fe51_sqr + +.p2align 5 +_x25519_fe51_sqr: + + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + leaq -40(%rsp),%rsp + +L$fe51_sqr_body: + + movq 0(%rsi),%rax + movq 16(%rsi),%r15 + movq 32(%rsi),%rbp + + movq %rdi,32(%rsp) + leaq (%rax,%rax,1),%r14 + mulq %rax + movq %rax,%rbx + movq 8(%rsi),%rax + movq %rdx,%rcx + mulq %r14 + movq %rax,%r8 + movq %r15,%rax + movq %r15,0(%rsp) + movq %rdx,%r9 + mulq %r14 + movq %rax,%r10 + movq 24(%rsi),%rax + movq %rdx,%r11 + imulq $19,%rbp,%rdi + mulq %r14 + movq %rax,%r12 + movq %rbp,%rax + movq %rdx,%r13 + mulq %r14 + movq %rax,%r14 + movq %rbp,%rax + movq %rdx,%r15 + + mulq %rdi + addq %rax,%r12 + movq 8(%rsi),%rax + adcq %rdx,%r13 + + movq 24(%rsi),%rsi + leaq (%rax,%rax,1),%rbp + mulq %rax + addq %rax,%r10 + movq 0(%rsp),%rax + adcq %rdx,%r11 + mulq %rbp + addq %rax,%r12 + movq %rbp,%rax + adcq %rdx,%r13 + mulq %rsi + addq %rax,%r14 + movq %rbp,%rax + adcq %rdx,%r15 + imulq $19,%rsi,%rbp + mulq %rdi + addq %rax,%rbx + leaq (%rsi,%rsi,1),%rax + adcq %rdx,%rcx + + mulq %rdi + addq %rax,%r10 + movq %rsi,%rax + adcq %rdx,%r11 + mulq %rbp + addq %rax,%r8 + movq 0(%rsp),%rax + adcq %rdx,%r9 + + leaq (%rax,%rax,1),%rsi + mulq %rax + addq %rax,%r14 + movq %rbp,%rax + adcq %rdx,%r15 + mulq %rsi + addq %rax,%rbx + movq %rsi,%rax + adcq %rdx,%rcx + mulq %rdi + addq %rax,%r8 + adcq %rdx,%r9 + + movq 32(%rsp),%rdi + jmp L$reduce51 + +.p2align 5 +L$reduce51: + movq $0x7ffffffffffff,%rbp + + movq %r10,%rdx + shrq $51,%r10 + shlq $13,%r11 + andq %rbp,%rdx + orq %r10,%r11 + addq %r11,%r12 + adcq $0,%r13 + + movq %rbx,%rax + shrq $51,%rbx + shlq $13,%rcx + andq %rbp,%rax + orq %rbx,%rcx + addq %rcx,%r8 + adcq $0,%r9 + + movq %r12,%rbx + shrq $51,%r12 + shlq $13,%r13 + andq %rbp,%rbx + orq %r12,%r13 + addq %r13,%r14 + adcq $0,%r15 + + movq %r8,%rcx + shrq $51,%r8 + shlq $13,%r9 + andq %rbp,%rcx + orq %r8,%r9 + addq %r9,%rdx + + movq %r14,%r10 + shrq $51,%r14 + shlq $13,%r15 + andq %rbp,%r10 + orq %r14,%r15 + + leaq (%r15,%r15,8),%r14 + leaq (%r15,%r14,2),%r15 + addq %r15,%rax + + movq %rdx,%r8 + andq %rbp,%rdx + shrq $51,%r8 + addq %r8,%rbx + + movq %rax,%r9 + andq %rbp,%rax + shrq $51,%r9 + addq %r9,%rcx + + movq %rax,0(%rdi) + movq %rcx,8(%rdi) + movq %rdx,16(%rdi) + movq %rbx,24(%rdi) + movq %r10,32(%rdi) + + movq 40(%rsp),%r15 + + movq 48(%rsp),%r14 + + movq 56(%rsp),%r13 + + movq 64(%rsp),%r12 + + movq 72(%rsp),%rbx + + movq 80(%rsp),%rbp + + leaq 88(%rsp),%rsp + +L$fe51_sqr_epilogue: + .byte 0xf3,0xc3 + + + +.globl _x25519_fe51_mul121666 + +.p2align 5 +_x25519_fe51_mul121666: + + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + leaq -40(%rsp),%rsp + +L$fe51_mul121666_body: + movl $121666,%eax + + mulq 0(%rsi) + movq %rax,%rbx + movl $121666,%eax + movq %rdx,%rcx + mulq 8(%rsi) + movq %rax,%r8 + movl $121666,%eax + movq %rdx,%r9 + mulq 16(%rsi) + movq %rax,%r10 + movl $121666,%eax + movq %rdx,%r11 + mulq 24(%rsi) + movq %rax,%r12 + movl $121666,%eax + movq %rdx,%r13 + mulq 32(%rsi) + movq %rax,%r14 + movq %rdx,%r15 + + jmp L$reduce51 +L$fe51_mul121666_epilogue: + + + +.globl _x25519_fe64_eligible + +.p2align 5 +_x25519_fe64_eligible: + movl _OPENSSL_ia32cap_P+8(%rip),%ecx + xorl %eax,%eax + andl $0x80100,%ecx + cmpl $0x80100,%ecx + cmovel %ecx,%eax + .byte 0xf3,0xc3 + + +.globl _x25519_fe64_mul + +.p2align 5 +_x25519_fe64_mul: + + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + pushq %rdi + + leaq -16(%rsp),%rsp + +L$fe64_mul_body: + + movq %rdx,%rax + movq 0(%rdx),%rbp + movq 0(%rsi),%rdx + movq 8(%rax),%rcx + movq 16(%rax),%r14 + movq 24(%rax),%r15 + + mulxq %rbp,%r8,%rax + xorl %edi,%edi + mulxq %rcx,%r9,%rbx + adcxq %rax,%r9 + mulxq %r14,%r10,%rax + adcxq %rbx,%r10 + mulxq %r15,%r11,%r12 + movq 8(%rsi),%rdx + adcxq %rax,%r11 + movq %r14,(%rsp) + adcxq %rdi,%r12 + + mulxq %rbp,%rax,%rbx + adoxq %rax,%r9 + adcxq %rbx,%r10 + mulxq %rcx,%rax,%rbx + adoxq %rax,%r10 + adcxq %rbx,%r11 + mulxq %r14,%rax,%rbx + adoxq %rax,%r11 + adcxq %rbx,%r12 + mulxq %r15,%rax,%r13 + movq 16(%rsi),%rdx + adoxq %rax,%r12 + adcxq %rdi,%r13 + adoxq %rdi,%r13 + + mulxq %rbp,%rax,%rbx + adcxq %rax,%r10 + adoxq %rbx,%r11 + mulxq %rcx,%rax,%rbx + adcxq %rax,%r11 + adoxq %rbx,%r12 + mulxq %r14,%rax,%rbx + adcxq %rax,%r12 + adoxq %rbx,%r13 + mulxq %r15,%rax,%r14 + movq 24(%rsi),%rdx + adcxq %rax,%r13 + adoxq %rdi,%r14 + adcxq %rdi,%r14 + + mulxq %rbp,%rax,%rbx + adoxq %rax,%r11 + adcxq %rbx,%r12 + mulxq %rcx,%rax,%rbx + adoxq %rax,%r12 + adcxq %rbx,%r13 + mulxq (%rsp),%rax,%rbx + adoxq %rax,%r13 + adcxq %rbx,%r14 + mulxq %r15,%rax,%r15 + movl $38,%edx + adoxq %rax,%r14 + adcxq %rdi,%r15 + adoxq %rdi,%r15 + + jmp L$reduce64 +L$fe64_mul_epilogue: + + + +.globl _x25519_fe64_sqr + +.p2align 5 +_x25519_fe64_sqr: + + pushq %rbp + + pushq %rbx + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + pushq %rdi + + leaq -16(%rsp),%rsp + +L$fe64_sqr_body: + + movq 0(%rsi),%rdx + movq 8(%rsi),%rcx + movq 16(%rsi),%rbp + movq 24(%rsi),%rsi + + + mulxq %rdx,%r8,%r15 + mulxq %rcx,%r9,%rax + xorl %edi,%edi + mulxq %rbp,%r10,%rbx + adcxq %rax,%r10 + mulxq %rsi,%r11,%r12 + movq %rcx,%rdx + adcxq %rbx,%r11 + adcxq %rdi,%r12 + + + mulxq %rbp,%rax,%rbx + adoxq %rax,%r11 + adcxq %rbx,%r12 + mulxq %rsi,%rax,%r13 + movq %rbp,%rdx + adoxq %rax,%r12 + adcxq %rdi,%r13 + + + mulxq %rsi,%rax,%r14 + movq %rcx,%rdx + adoxq %rax,%r13 + adcxq %rdi,%r14 + adoxq %rdi,%r14 + + adcxq %r9,%r9 + adoxq %r15,%r9 + adcxq %r10,%r10 + mulxq %rdx,%rax,%rbx + movq %rbp,%rdx + adcxq %r11,%r11 + adoxq %rax,%r10 + adcxq %r12,%r12 + adoxq %rbx,%r11 + mulxq %rdx,%rax,%rbx + movq %rsi,%rdx + adcxq %r13,%r13 + adoxq %rax,%r12 + adcxq %r14,%r14 + adoxq %rbx,%r13 + mulxq %rdx,%rax,%r15 + movl $38,%edx + adoxq %rax,%r14 + adcxq %rdi,%r15 + adoxq %rdi,%r15 + jmp L$reduce64 + +.p2align 5 +L$reduce64: + mulxq %r12,%rax,%rbx + adcxq %rax,%r8 + adoxq %rbx,%r9 + mulxq %r13,%rax,%rbx + adcxq %rax,%r9 + adoxq %rbx,%r10 + mulxq %r14,%rax,%rbx + adcxq %rax,%r10 + adoxq %rbx,%r11 + mulxq %r15,%rax,%r12 + adcxq %rax,%r11 + adoxq %rdi,%r12 + adcxq %rdi,%r12 + + movq 16(%rsp),%rdi + imulq %rdx,%r12 + + addq %r12,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + + sbbq %rax,%rax + andq $38,%rax + + addq %rax,%r8 + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r8,0(%rdi) + + movq 24(%rsp),%r15 + + movq 32(%rsp),%r14 + + movq 40(%rsp),%r13 + + movq 48(%rsp),%r12 + + movq 56(%rsp),%rbx + + movq 64(%rsp),%rbp + + leaq 72(%rsp),%rsp + +L$fe64_sqr_epilogue: + .byte 0xf3,0xc3 + + + +.globl _x25519_fe64_mul121666 + +.p2align 5 +_x25519_fe64_mul121666: +L$fe64_mul121666_body: + movl $121666,%edx + mulxq 0(%rsi),%r8,%rcx + mulxq 8(%rsi),%r9,%rax + addq %rcx,%r9 + mulxq 16(%rsi),%r10,%rcx + adcq %rax,%r10 + mulxq 24(%rsi),%r11,%rax + adcq %rcx,%r11 + adcq $0,%rax + + imulq $38,%rax,%rax + + addq %rax,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + + sbbq %rax,%rax + andq $38,%rax + + addq %rax,%r8 + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r8,0(%rdi) + +L$fe64_mul121666_epilogue: + .byte 0xf3,0xc3 + + +.globl _x25519_fe64_add + +.p2align 5 +_x25519_fe64_add: +L$fe64_add_body: + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + + addq 0(%rdx),%r8 + adcq 8(%rdx),%r9 + adcq 16(%rdx),%r10 + adcq 24(%rdx),%r11 + + sbbq %rax,%rax + andq $38,%rax + + addq %rax,%r8 + adcq $0,%r9 + adcq $0,%r10 + movq %r9,8(%rdi) + adcq $0,%r11 + movq %r10,16(%rdi) + sbbq %rax,%rax + movq %r11,24(%rdi) + andq $38,%rax + + addq %rax,%r8 + movq %r8,0(%rdi) + +L$fe64_add_epilogue: + .byte 0xf3,0xc3 + + +.globl _x25519_fe64_sub + +.p2align 5 +_x25519_fe64_sub: +L$fe64_sub_body: + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + + subq 0(%rdx),%r8 + sbbq 8(%rdx),%r9 + sbbq 16(%rdx),%r10 + sbbq 24(%rdx),%r11 + + sbbq %rax,%rax + andq $38,%rax + + subq %rax,%r8 + sbbq $0,%r9 + sbbq $0,%r10 + movq %r9,8(%rdi) + sbbq $0,%r11 + movq %r10,16(%rdi) + sbbq %rax,%rax + movq %r11,24(%rdi) + andq $38,%rax + + subq %rax,%r8 + movq %r8,0(%rdi) + +L$fe64_sub_epilogue: + .byte 0xf3,0xc3 + + +.globl _x25519_fe64_tobytes + +.p2align 5 +_x25519_fe64_tobytes: +L$fe64_to_body: + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + + + leaq (%r11,%r11,1),%rax + sarq $63,%r11 + shrq $1,%rax + andq $19,%r11 + addq $19,%r11 + + addq %r11,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%rax + + leaq (%rax,%rax,1),%r11 + sarq $63,%rax + shrq $1,%r11 + notq %rax + andq $19,%rax + + subq %rax,%r8 + sbbq $0,%r9 + sbbq $0,%r10 + sbbq $0,%r11 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + +L$fe64_to_epilogue: + .byte 0xf3,0xc3 + +.byte 88,50,53,53,49,57,32,112,114,105,109,105,116,105,118,101,115,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 |