summaryrefslogtreecommitdiff
path: root/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/ec
diff options
context:
space:
mode:
Diffstat (limited to 'deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/ec')
-rw-r--r--deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/ec/ecp_nistz256-x86_64.s1437
-rw-r--r--deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/ec/x25519-x86_64.s760
2 files changed, 2129 insertions, 68 deletions
diff --git a/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/ec/ecp_nistz256-x86_64.s b/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/ec/ecp_nistz256-x86_64.s
index 77102c6a41..302649aacc 100644
--- a/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/ec/ecp_nistz256-x86_64.s
+++ b/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/ec/ecp_nistz256-x86_64.s
@@ -2393,13 +2393,23 @@ L$Three:
L$ONE_mont:
.quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe
+
+L$ord:
+.quad 0xf3b9cac2fc632551, 0xbce6faada7179e84, 0xffffffffffffffff, 0xffffffff00000000
+L$ordK:
+.quad 0xccd1c8aaee00bc4f
+
.globl _ecp_nistz256_mul_by_2
.p2align 6
_ecp_nistz256_mul_by_2:
+
pushq %r12
+
pushq %r13
+L$mul_by_2_body:
+
movq 0(%rsi),%r8
xorq %r13,%r13
movq 8(%rsi),%r9
@@ -2431,20 +2441,30 @@ _ecp_nistz256_mul_by_2:
movq %r10,16(%rdi)
movq %r11,24(%rdi)
- popq %r13
- popq %r12
+ movq 0(%rsp),%r13
+
+ movq 8(%rsp),%r12
+
+ leaq 16(%rsp),%rsp
+
+L$mul_by_2_epilogue:
.byte 0xf3,0xc3
+
.globl _ecp_nistz256_div_by_2
.p2align 5
_ecp_nistz256_div_by_2:
+
pushq %r12
+
pushq %r13
+L$div_by_2_body:
+
movq 0(%rsi),%r8
movq 8(%rsi),%r9
movq 16(%rsi),%r10
@@ -2491,20 +2511,30 @@ _ecp_nistz256_div_by_2:
movq %r10,16(%rdi)
movq %r11,24(%rdi)
- popq %r13
- popq %r12
+ movq 0(%rsp),%r13
+
+ movq 8(%rsp),%r12
+
+ leaq 16(%rsp),%rsp
+
+L$div_by_2_epilogue:
.byte 0xf3,0xc3
+
.globl _ecp_nistz256_mul_by_3
.p2align 5
_ecp_nistz256_mul_by_3:
+
pushq %r12
+
pushq %r13
+L$mul_by_3_body:
+
movq 0(%rsi),%r8
xorq %r13,%r13
movq 8(%rsi),%r9
@@ -2557,20 +2587,30 @@ _ecp_nistz256_mul_by_3:
movq %r10,16(%rdi)
movq %r11,24(%rdi)
- popq %r13
- popq %r12
+ movq 0(%rsp),%r13
+
+ movq 8(%rsp),%r12
+
+ leaq 16(%rsp),%rsp
+
+L$mul_by_3_epilogue:
.byte 0xf3,0xc3
+
.globl _ecp_nistz256_add
.p2align 5
_ecp_nistz256_add:
+
pushq %r12
+
pushq %r13
+L$add_body:
+
movq 0(%rsi),%r8
xorq %r13,%r13
movq 8(%rsi),%r9
@@ -2603,20 +2643,30 @@ _ecp_nistz256_add:
movq %r10,16(%rdi)
movq %r11,24(%rdi)
- popq %r13
- popq %r12
+ movq 0(%rsp),%r13
+
+ movq 8(%rsp),%r12
+
+ leaq 16(%rsp),%rsp
+
+L$add_epilogue:
.byte 0xf3,0xc3
+
.globl _ecp_nistz256_sub
.p2align 5
_ecp_nistz256_sub:
+
pushq %r12
+
pushq %r13
+L$sub_body:
+
movq 0(%rsi),%r8
xorq %r13,%r13
movq 8(%rsi),%r9
@@ -2649,20 +2699,30 @@ _ecp_nistz256_sub:
movq %r10,16(%rdi)
movq %r11,24(%rdi)
- popq %r13
- popq %r12
+ movq 0(%rsp),%r13
+
+ movq 8(%rsp),%r12
+
+ leaq 16(%rsp),%rsp
+
+L$sub_epilogue:
.byte 0xf3,0xc3
+
.globl _ecp_nistz256_neg
.p2align 5
_ecp_nistz256_neg:
+
pushq %r12
+
pushq %r13
+L$neg_body:
+
xorq %r8,%r8
xorq %r9,%r9
xorq %r10,%r10
@@ -2695,14 +2755,1085 @@ _ecp_nistz256_neg:
movq %r10,16(%rdi)
movq %r11,24(%rdi)
- popq %r13
- popq %r12
+ movq 0(%rsp),%r13
+
+ movq 8(%rsp),%r12
+
+ leaq 16(%rsp),%rsp
+
+L$neg_epilogue:
.byte 0xf3,0xc3
+
+
+
+.globl _ecp_nistz256_ord_mul_mont
+
+.p2align 5
+_ecp_nistz256_ord_mul_mont:
+
+ movl $0x80100,%ecx
+ andl _OPENSSL_ia32cap_P+8(%rip),%ecx
+ cmpl $0x80100,%ecx
+ je L$ecp_nistz256_ord_mul_montx
+ pushq %rbp
+
+ pushq %rbx
+
+ pushq %r12
+
+ pushq %r13
+
+ pushq %r14
+
+ pushq %r15
+
+L$ord_mul_body:
+
+ movq 0(%rdx),%rax
+ movq %rdx,%rbx
+ leaq L$ord(%rip),%r14
+ movq L$ordK(%rip),%r15
+
+
+ movq %rax,%rcx
+ mulq 0(%rsi)
+ movq %rax,%r8
+ movq %rcx,%rax
+ movq %rdx,%r9
+
+ mulq 8(%rsi)
+ addq %rax,%r9
+ movq %rcx,%rax
+ adcq $0,%rdx
+ movq %rdx,%r10
+
+ mulq 16(%rsi)
+ addq %rax,%r10
+ movq %rcx,%rax
+ adcq $0,%rdx
+
+ movq %r8,%r13
+ imulq %r15,%r8
+
+ movq %rdx,%r11
+ mulq 24(%rsi)
+ addq %rax,%r11
+ movq %r8,%rax
+ adcq $0,%rdx
+ movq %rdx,%r12
+
+
+ mulq 0(%r14)
+ movq %r8,%rbp
+ addq %rax,%r13
+ movq %r8,%rax
+ adcq $0,%rdx
+ movq %rdx,%rcx
+
+ subq %r8,%r10
+ sbbq $0,%r8
+
+ mulq 8(%r14)
+ addq %rcx,%r9
+ adcq $0,%rdx
+ addq %rax,%r9
+ movq %rbp,%rax
+ adcq %rdx,%r10
+ movq %rbp,%rdx
+ adcq $0,%r8
+
+ shlq $32,%rax
+ shrq $32,%rdx
+ subq %rax,%r11
+ movq 8(%rbx),%rax
+ sbbq %rdx,%rbp
+
+ addq %r8,%r11
+ adcq %rbp,%r12
+ adcq $0,%r13
+
+
+ movq %rax,%rcx
+ mulq 0(%rsi)
+ addq %rax,%r9
+ movq %rcx,%rax
+ adcq $0,%rdx
+ movq %rdx,%rbp
+
+ mulq 8(%rsi)
+ addq %rbp,%r10
+ adcq $0,%rdx
+ addq %rax,%r10
+ movq %rcx,%rax
+ adcq $0,%rdx
+ movq %rdx,%rbp
+
+ mulq 16(%rsi)
+ addq %rbp,%r11
+ adcq $0,%rdx
+ addq %rax,%r11
+ movq %rcx,%rax
+ adcq $0,%rdx
+
+ movq %r9,%rcx
+ imulq %r15,%r9
+
+ movq %rdx,%rbp
+ mulq 24(%rsi)
+ addq %rbp,%r12
+ adcq $0,%rdx
+ xorq %r8,%r8
+ addq %rax,%r12
+ movq %r9,%rax
+ adcq %rdx,%r13
+ adcq $0,%r8
+
+
+ mulq 0(%r14)
+ movq %r9,%rbp
+ addq %rax,%rcx
+ movq %r9,%rax
+ adcq %rdx,%rcx
+
+ subq %r9,%r11
+ sbbq $0,%r9
+
+ mulq 8(%r14)
+ addq %rcx,%r10
+ adcq $0,%rdx
+ addq %rax,%r10
+ movq %rbp,%rax
+ adcq %rdx,%r11
+ movq %rbp,%rdx
+ adcq $0,%r9
+
+ shlq $32,%rax
+ shrq $32,%rdx
+ subq %rax,%r12
+ movq 16(%rbx),%rax
+ sbbq %rdx,%rbp
+
+ addq %r9,%r12
+ adcq %rbp,%r13
+ adcq $0,%r8
+
+
+ movq %rax,%rcx
+ mulq 0(%rsi)
+ addq %rax,%r10
+ movq %rcx,%rax
+ adcq $0,%rdx
+ movq %rdx,%rbp
+
+ mulq 8(%rsi)
+ addq %rbp,%r11
+ adcq $0,%rdx
+ addq %rax,%r11
+ movq %rcx,%rax
+ adcq $0,%rdx
+ movq %rdx,%rbp
+
+ mulq 16(%rsi)
+ addq %rbp,%r12
+ adcq $0,%rdx
+ addq %rax,%r12
+ movq %rcx,%rax
+ adcq $0,%rdx
+
+ movq %r10,%rcx
+ imulq %r15,%r10
+
+ movq %rdx,%rbp
+ mulq 24(%rsi)
+ addq %rbp,%r13
+ adcq $0,%rdx
+ xorq %r9,%r9
+ addq %rax,%r13
+ movq %r10,%rax
+ adcq %rdx,%r8
+ adcq $0,%r9
+
+
+ mulq 0(%r14)
+ movq %r10,%rbp
+ addq %rax,%rcx
+ movq %r10,%rax
+ adcq %rdx,%rcx
+
+ subq %r10,%r12
+ sbbq $0,%r10
+
+ mulq 8(%r14)
+ addq %rcx,%r11
+ adcq $0,%rdx
+ addq %rax,%r11
+ movq %rbp,%rax
+ adcq %rdx,%r12
+ movq %rbp,%rdx
+ adcq $0,%r10
+
+ shlq $32,%rax
+ shrq $32,%rdx
+ subq %rax,%r13
+ movq 24(%rbx),%rax
+ sbbq %rdx,%rbp
+
+ addq %r10,%r13
+ adcq %rbp,%r8
+ adcq $0,%r9
+
+
+ movq %rax,%rcx
+ mulq 0(%rsi)
+ addq %rax,%r11
+ movq %rcx,%rax
+ adcq $0,%rdx
+ movq %rdx,%rbp
+
+ mulq 8(%rsi)
+ addq %rbp,%r12
+ adcq $0,%rdx
+ addq %rax,%r12
+ movq %rcx,%rax
+ adcq $0,%rdx
+ movq %rdx,%rbp
+
+ mulq 16(%rsi)
+ addq %rbp,%r13
+ adcq $0,%rdx
+ addq %rax,%r13
+ movq %rcx,%rax
+ adcq $0,%rdx
+
+ movq %r11,%rcx
+ imulq %r15,%r11
+
+ movq %rdx,%rbp
+ mulq 24(%rsi)
+ addq %rbp,%r8
+ adcq $0,%rdx
+ xorq %r10,%r10
+ addq %rax,%r8
+ movq %r11,%rax
+ adcq %rdx,%r9
+ adcq $0,%r10
+
+
+ mulq 0(%r14)
+ movq %r11,%rbp
+ addq %rax,%rcx
+ movq %r11,%rax
+ adcq %rdx,%rcx
+
+ subq %r11,%r13
+ sbbq $0,%r11
+
+ mulq 8(%r14)
+ addq %rcx,%r12
+ adcq $0,%rdx
+ addq %rax,%r12
+ movq %rbp,%rax
+ adcq %rdx,%r13
+ movq %rbp,%rdx
+ adcq $0,%r11
+
+ shlq $32,%rax
+ shrq $32,%rdx
+ subq %rax,%r8
+ sbbq %rdx,%rbp
+
+ addq %r11,%r8
+ adcq %rbp,%r9
+ adcq $0,%r10
+
+
+ movq %r12,%rsi
+ subq 0(%r14),%r12
+ movq %r13,%r11
+ sbbq 8(%r14),%r13
+ movq %r8,%rcx
+ sbbq 16(%r14),%r8
+ movq %r9,%rbp
+ sbbq 24(%r14),%r9
+ sbbq $0,%r10
+
+ cmovcq %rsi,%r12
+ cmovcq %r11,%r13
+ cmovcq %rcx,%r8
+ cmovcq %rbp,%r9
+
+ movq %r12,0(%rdi)
+ movq %r13,8(%rdi)
+ movq %r8,16(%rdi)
+ movq %r9,24(%rdi)
+
+ movq 0(%rsp),%r15
+
+ movq 8(%rsp),%r14
+
+ movq 16(%rsp),%r13
+
+ movq 24(%rsp),%r12
+
+ movq 32(%rsp),%rbx
+
+ movq 40(%rsp),%rbp
+
+ leaq 48(%rsp),%rsp
+
+L$ord_mul_epilogue:
+ .byte 0xf3,0xc3
+
+
+
+
+
+
+
+
+
+.globl _ecp_nistz256_ord_sqr_mont
+
+.p2align 5
+_ecp_nistz256_ord_sqr_mont:
+
+ movl $0x80100,%ecx
+ andl _OPENSSL_ia32cap_P+8(%rip),%ecx
+ cmpl $0x80100,%ecx
+ je L$ecp_nistz256_ord_sqr_montx
+ pushq %rbp
+
+ pushq %rbx
+
+ pushq %r12
+
+ pushq %r13
+
+ pushq %r14
+
+ pushq %r15
+
+L$ord_sqr_body:
+
+ movq 0(%rsi),%r8
+ movq 8(%rsi),%rax
+ movq 16(%rsi),%r14
+ movq 24(%rsi),%r15
+ leaq L$ord(%rip),%rsi
+ movq %rdx,%rbx
+ jmp L$oop_ord_sqr
+
+.p2align 5
+L$oop_ord_sqr:
+
+ movq %rax,%rbp
+ mulq %r8
+ movq %rax,%r9
+.byte 102,72,15,110,205
+ movq %r14,%rax
+ movq %rdx,%r10
+
+ mulq %r8
+ addq %rax,%r10
+ movq %r15,%rax
+.byte 102,73,15,110,214
+ adcq $0,%rdx
+ movq %rdx,%r11
+
+ mulq %r8
+ addq %rax,%r11
+ movq %r15,%rax
+.byte 102,73,15,110,223
+ adcq $0,%rdx
+ movq %rdx,%r12
+
+
+ mulq %r14
+ movq %rax,%r13
+ movq %r14,%rax
+ movq %rdx,%r14
+
+
+ mulq %rbp
+ addq %rax,%r11
+ movq %r15,%rax
+ adcq $0,%rdx
+ movq %rdx,%r15
+
+ mulq %rbp
+ addq %rax,%r12
+ adcq $0,%rdx
+
+ addq %r15,%r12
+ adcq %rdx,%r13
+ adcq $0,%r14
+
+
+ xorq %r15,%r15
+ movq %r8,%rax
+ addq %r9,%r9
+ adcq %r10,%r10
+ adcq %r11,%r11
+ adcq %r12,%r12
+ adcq %r13,%r13
+ adcq %r14,%r14
+ adcq $0,%r15
+
+
+ mulq %rax
+ movq %rax,%r8
+.byte 102,72,15,126,200
+ movq %rdx,%rbp
+
+ mulq %rax
+ addq %rbp,%r9
+ adcq %rax,%r10
+.byte 102,72,15,126,208
+ adcq $0,%rdx
+ movq %rdx,%rbp
+
+ mulq %rax
+ addq %rbp,%r11
+ adcq %rax,%r12
+.byte 102,72,15,126,216
+ adcq $0,%rdx
+ movq %rdx,%rbp
+
+ movq %r8,%rcx
+ imulq 32(%rsi),%r8
+
+ mulq %rax
+ addq %rbp,%r13
+ adcq %rax,%r14
+ movq 0(%rsi),%rax
+ adcq %rdx,%r15
+
+
+ mulq %r8
+ movq %r8,%rbp
+ addq %rax,%rcx
+ movq 8(%rsi),%rax
+ adcq %rdx,%rcx
+
+ subq %r8,%r10
+ sbbq $0,%rbp
+
+ mulq %r8
+ addq %rcx,%r9
+ adcq $0,%rdx
+ addq %rax,%r9
+ movq %r8,%rax
+ adcq %rdx,%r10
+ movq %r8,%rdx
+ adcq $0,%rbp
+
+ movq %r9,%rcx
+ imulq 32(%rsi),%r9
+
+ shlq $32,%rax
+ shrq $32,%rdx
+ subq %rax,%r11
+ movq 0(%rsi),%rax
+ sbbq %rdx,%r8
+
+ addq %rbp,%r11
+ adcq $0,%r8
+
+
+ mulq %r9
+ movq %r9,%rbp
+ addq %rax,%rcx
+ movq 8(%rsi),%rax
+ adcq %rdx,%rcx
+
+ subq %r9,%r11
+ sbbq $0,%rbp
+
+ mulq %r9
+ addq %rcx,%r10
+ adcq $0,%rdx
+ addq %rax,%r10
+ movq %r9,%rax
+ adcq %rdx,%r11
+ movq %r9,%rdx
+ adcq $0,%rbp
+
+ movq %r10,%rcx
+ imulq 32(%rsi),%r10
+
+ shlq $32,%rax
+ shrq $32,%rdx
+ subq %rax,%r8
+ movq 0(%rsi),%rax
+ sbbq %rdx,%r9
+
+ addq %rbp,%r8
+ adcq $0,%r9
+
+
+ mulq %r10
+ movq %r10,%rbp
+ addq %rax,%rcx
+ movq 8(%rsi),%rax
+ adcq %rdx,%rcx
+
+ subq %r10,%r8
+ sbbq $0,%rbp
+
+ mulq %r10
+ addq %rcx,%r11
+ adcq $0,%rdx
+ addq %rax,%r11
+ movq %r10,%rax
+ adcq %rdx,%r8
+ movq %r10,%rdx
+ adcq $0,%rbp
+
+ movq %r11,%rcx
+ imulq 32(%rsi),%r11
+
+ shlq $32,%rax
+ shrq $32,%rdx
+ subq %rax,%r9
+ movq 0(%rsi),%rax
+ sbbq %rdx,%r10
+
+ addq %rbp,%r9
+ adcq $0,%r10
+
+
+ mulq %r11
+ movq %r11,%rbp
+ addq %rax,%rcx
+ movq 8(%rsi),%rax
+ adcq %rdx,%rcx
+
+ subq %r11,%r9
+ sbbq $0,%rbp
+
+ mulq %r11
+ addq %rcx,%r8
+ adcq $0,%rdx
+ addq %rax,%r8
+ movq %r11,%rax
+ adcq %rdx,%r9
+ movq %r11,%rdx
+ adcq $0,%rbp
+
+ shlq $32,%rax
+ shrq $32,%rdx
+ subq %rax,%r10
+ sbbq %rdx,%r11
+
+ addq %rbp,%r10
+ adcq $0,%r11
+
+
+ xorq %rdx,%rdx
+ addq %r12,%r8
+ adcq %r13,%r9
+ movq %r8,%r12
+ adcq %r14,%r10
+ adcq %r15,%r11
+ movq %r9,%rax
+ adcq $0,%rdx
+
+
+ subq 0(%rsi),%r8
+ movq %r10,%r14
+ sbbq 8(%rsi),%r9
+ sbbq 16(%rsi),%r10
+ movq %r11,%r15
+ sbbq 24(%rsi),%r11
+ sbbq $0,%rdx
+
+ cmovcq %r12,%r8
+ cmovncq %r9,%rax
+ cmovncq %r10,%r14
+ cmovncq %r11,%r15
+
+ decq %rbx
+ jnz L$oop_ord_sqr
+
+ movq %r8,0(%rdi)
+ movq %rax,8(%rdi)
+ pxor %xmm1,%xmm1
+ movq %r14,16(%rdi)
+ pxor %xmm2,%xmm2
+ movq %r15,24(%rdi)
+ pxor %xmm3,%xmm3
+
+ movq 0(%rsp),%r15
+
+ movq 8(%rsp),%r14
+
+ movq 16(%rsp),%r13
+
+ movq 24(%rsp),%r12
+
+ movq 32(%rsp),%rbx
+
+ movq 40(%rsp),%rbp
+
+ leaq 48(%rsp),%rsp
+
+L$ord_sqr_epilogue:
+ .byte 0xf3,0xc3
+
+
+
+
+.p2align 5
+ecp_nistz256_ord_mul_montx:
+
+L$ecp_nistz256_ord_mul_montx:
+ pushq %rbp
+
+ pushq %rbx
+
+ pushq %r12
+
+ pushq %r13
+
+ pushq %r14
+
+ pushq %r15
+
+L$ord_mulx_body:
+
+ movq %rdx,%rbx
+ movq 0(%rdx),%rdx
+ movq 0(%rsi),%r9
+ movq 8(%rsi),%r10
+ movq 16(%rsi),%r11
+ movq 24(%rsi),%r12
+ leaq -128(%rsi),%rsi
+ leaq L$ord-128(%rip),%r14
+ movq L$ordK(%rip),%r15
+
+
+ mulxq %r9,%r8,%r9
+ mulxq %r10,%rcx,%r10
+ mulxq %r11,%rbp,%r11
+ addq %rcx,%r9
+ mulxq %r12,%rcx,%r12
+ movq %r8,%rdx
+ mulxq %r15,%rdx,%rax
+ adcq %rbp,%r10
+ adcq %rcx,%r11
+ adcq $0,%r12
+
+
+ xorq %r13,%r13
+ mulxq 0+128(%r14),%rcx,%rbp
+ adcxq %rcx,%r8
+ adoxq %rbp,%r9
+
+ mulxq 8+128(%r14),%rcx,%rbp
+ adcxq %rcx,%r9
+ adoxq %rbp,%r10
+
+ mulxq 16+128(%r14),%rcx,%rbp
+ adcxq %rcx,%r10
+ adoxq %rbp,%r11
+
+ mulxq 24+128(%r14),%rcx,%rbp
+ movq 8(%rbx),%rdx
+ adcxq %rcx,%r11
+ adoxq %rbp,%r12
+ adcxq %r8,%r12
+ adoxq %r8,%r13
+ adcq $0,%r13
+
+
+ mulxq 0+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r9
+ adoxq %rbp,%r10
+
+ mulxq 8+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r10
+ adoxq %rbp,%r11
+
+ mulxq 16+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r11
+ adoxq %rbp,%r12
+
+ mulxq 24+128(%rsi),%rcx,%rbp
+ movq %r9,%rdx
+ mulxq %r15,%rdx,%rax
+ adcxq %rcx,%r12
+ adoxq %rbp,%r13
+
+ adcxq %r8,%r13
+ adoxq %r8,%r8
+ adcq $0,%r8
+
+
+ mulxq 0+128(%r14),%rcx,%rbp
+ adcxq %rcx,%r9
+ adoxq %rbp,%r10
+
+ mulxq 8+128(%r14),%rcx,%rbp
+ adcxq %rcx,%r10
+ adoxq %rbp,%r11
+
+ mulxq 16+128(%r14),%rcx,%rbp
+ adcxq %rcx,%r11
+ adoxq %rbp,%r12
+
+ mulxq 24+128(%r14),%rcx,%rbp
+ movq 16(%rbx),%rdx
+ adcxq %rcx,%r12
+ adoxq %rbp,%r13
+ adcxq %r9,%r13
+ adoxq %r9,%r8
+ adcq $0,%r8
+
+
+ mulxq 0+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r10
+ adoxq %rbp,%r11
+
+ mulxq 8+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r11
+ adoxq %rbp,%r12
+
+ mulxq 16+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r12
+ adoxq %rbp,%r13
+
+ mulxq 24+128(%rsi),%rcx,%rbp
+ movq %r10,%rdx
+ mulxq %r15,%rdx,%rax
+ adcxq %rcx,%r13
+ adoxq %rbp,%r8
+
+ adcxq %r9,%r8
+ adoxq %r9,%r9
+ adcq $0,%r9
+
+
+ mulxq 0+128(%r14),%rcx,%rbp
+ adcxq %rcx,%r10
+ adoxq %rbp,%r11
+
+ mulxq 8+128(%r14),%rcx,%rbp
+ adcxq %rcx,%r11
+ adoxq %rbp,%r12
+
+ mulxq 16+128(%r14),%rcx,%rbp
+ adcxq %rcx,%r12
+ adoxq %rbp,%r13
+
+ mulxq 24+128(%r14),%rcx,%rbp
+ movq 24(%rbx),%rdx
+ adcxq %rcx,%r13
+ adoxq %rbp,%r8
+ adcxq %r10,%r8
+ adoxq %r10,%r9
+ adcq $0,%r9
+
+
+ mulxq 0+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r11
+ adoxq %rbp,%r12
+
+ mulxq 8+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r12
+ adoxq %rbp,%r13
+
+ mulxq 16+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r13
+ adoxq %rbp,%r8
+
+ mulxq 24+128(%rsi),%rcx,%rbp
+ movq %r11,%rdx
+ mulxq %r15,%rdx,%rax
+ adcxq %rcx,%r8
+ adoxq %rbp,%r9
+
+ adcxq %r10,%r9
+ adoxq %r10,%r10
+ adcq $0,%r10
+
+
+ mulxq 0+128(%r14),%rcx,%rbp
+ adcxq %rcx,%r11
+ adoxq %rbp,%r12
+
+ mulxq 8+128(%r14),%rcx,%rbp
+ adcxq %rcx,%r12
+ adoxq %rbp,%r13
+
+ mulxq 16+128(%r14),%rcx,%rbp
+ adcxq %rcx,%r13
+ adoxq %rbp,%r8
+
+ mulxq 24+128(%r14),%rcx,%rbp
+ leaq 128(%r14),%r14
+ movq %r12,%rbx
+ adcxq %rcx,%r8
+ adoxq %rbp,%r9
+ movq %r13,%rdx
+ adcxq %r11,%r9
+ adoxq %r11,%r10
+ adcq $0,%r10
+
+
+
+ movq %r8,%rcx
+ subq 0(%r14),%r12
+ sbbq 8(%r14),%r13
+ sbbq 16(%r14),%r8
+ movq %r9,%rbp
+ sbbq 24(%r14),%r9
+ sbbq $0,%r10
+
+ cmovcq %rbx,%r12
+ cmovcq %rdx,%r13
+ cmovcq %rcx,%r8
+ cmovcq %rbp,%r9
+
+ movq %r12,0(%rdi)
+ movq %r13,8(%rdi)
+ movq %r8,16(%rdi)
+ movq %r9,24(%rdi)
+
+ movq 0(%rsp),%r15
+
+ movq 8(%rsp),%r14
+
+ movq 16(%rsp),%r13
+
+ movq 24(%rsp),%r12
+
+ movq 32(%rsp),%rbx
+
+ movq 40(%rsp),%rbp
+
+ leaq 48(%rsp),%rsp
+
+L$ord_mulx_epilogue:
+ .byte 0xf3,0xc3
+
+
+
+
+.p2align 5
+ecp_nistz256_ord_sqr_montx:
+
+L$ecp_nistz256_ord_sqr_montx:
+ pushq %rbp
+
+ pushq %rbx
+
+ pushq %r12
+
+ pushq %r13
+
+ pushq %r14
+
+ pushq %r15
+
+L$ord_sqrx_body:
+
+ movq %rdx,%rbx
+ movq 0(%rsi),%rdx
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r15
+ movq 24(%rsi),%r8
+ leaq L$ord(%rip),%rsi
+ jmp L$oop_ord_sqrx
+
+.p2align 5
+L$oop_ord_sqrx:
+ mulxq %r14,%r9,%r10
+ mulxq %r15,%rcx,%r11
+ movq %rdx,%rax
+.byte 102,73,15,110,206
+ mulxq %r8,%rbp,%r12
+ movq %r14,%rdx
+ addq %rcx,%r10
+.byte 102,73,15,110,215
+ adcq %rbp,%r11
+ adcq $0,%r12
+ xorq %r13,%r13
+
+ mulxq %r15,%rcx,%rbp
+ adcxq %rcx,%r11
+ adoxq %rbp,%r12
+
+ mulxq %r8,%rcx,%rbp
+ movq %r15,%rdx
+ adcxq %rcx,%r12
+ adoxq %rbp,%r13
+ adcq $0,%r13
+
+ mulxq %r8,%rcx,%r14
+ movq %rax,%rdx
+.byte 102,73,15,110,216
+ xorq %r15,%r15
+ adcxq %r9,%r9
+ adoxq %rcx,%r13
+ adcxq %r10,%r10
+ adoxq %r15,%r14
+
+
+ mulxq %rdx,%r8,%rbp
+.byte 102,72,15,126,202
+ adcxq %r11,%r11
+ adoxq %rbp,%r9
+ adcxq %r12,%r12
+ mulxq %rdx,%rcx,%rax
+.byte 102,72,15,126,210
+ adcxq %r13,%r13
+ adoxq %rcx,%r10
+ adcxq %r14,%r14
+ mulxq %rdx,%rcx,%rbp
+.byte 0x67
+.byte 102,72,15,126,218
+ adoxq %rax,%r11
+ adcxq %r15,%r15
+ adoxq %rcx,%r12
+ adoxq %rbp,%r13
+ mulxq %rdx,%rcx,%rax
+ adoxq %rcx,%r14
+ adoxq %rax,%r15
+
+
+ movq %r8,%rdx
+ mulxq 32(%rsi),%rdx,%rcx
+
+ xorq %rax,%rax
+ mulxq 0(%rsi),%rcx,%rbp
+ adcxq %rcx,%r8
+ adoxq %rbp,%r9
+ mulxq 8(%rsi),%rcx,%rbp
+ adcxq %rcx,%r9
+ adoxq %rbp,%r10
+ mulxq 16(%rsi),%rcx,%rbp
+ adcxq %rcx,%r10
+ adoxq %rbp,%r11
+ mulxq 24(%rsi),%rcx,%rbp
+ adcxq %rcx,%r11
+ adoxq %rbp,%r8
+ adcxq %rax,%r8
+
+
+ movq %r9,%rdx
+ mulxq 32(%rsi),%rdx,%rcx
+
+ mulxq 0(%rsi),%rcx,%rbp
+ adoxq %rcx,%r9
+ adcxq %rbp,%r10
+ mulxq 8(%rsi),%rcx,%rbp
+ adoxq %rcx,%r10
+ adcxq %rbp,%r11
+ mulxq 16(%rsi),%rcx,%rbp
+ adoxq %rcx,%r11
+ adcxq %rbp,%r8
+ mulxq 24(%rsi),%rcx,%rbp
+ adoxq %rcx,%r8
+ adcxq %rbp,%r9
+ adoxq %rax,%r9
+
+
+ movq %r10,%rdx
+ mulxq 32(%rsi),%rdx,%rcx
+
+ mulxq 0(%rsi),%rcx,%rbp
+ adcxq %rcx,%r10
+ adoxq %rbp,%r11
+ mulxq 8(%rsi),%rcx,%rbp
+ adcxq %rcx,%r11
+ adoxq %rbp,%r8
+ mulxq 16(%rsi),%rcx,%rbp
+ adcxq %rcx,%r8
+ adoxq %rbp,%r9
+ mulxq 24(%rsi),%rcx,%rbp
+ adcxq %rcx,%r9
+ adoxq %rbp,%r10
+ adcxq %rax,%r10
+
+
+ movq %r11,%rdx
+ mulxq 32(%rsi),%rdx,%rcx
+
+ mulxq 0(%rsi),%rcx,%rbp
+ adoxq %rcx,%r11
+ adcxq %rbp,%r8
+ mulxq 8(%rsi),%rcx,%rbp
+ adoxq %rcx,%r8
+ adcxq %rbp,%r9
+ mulxq 16(%rsi),%rcx,%rbp
+ adoxq %rcx,%r9
+ adcxq %rbp,%r10
+ mulxq 24(%rsi),%rcx,%rbp
+ adoxq %rcx,%r10
+ adcxq %rbp,%r11
+ adoxq %rax,%r11
+
+
+ addq %r8,%r12
+ adcq %r13,%r9
+ movq %r12,%rdx
+ adcq %r14,%r10
+ adcq %r15,%r11
+ movq %r9,%r14
+ adcq $0,%rax
+
+
+ subq 0(%rsi),%r12
+ movq %r10,%r15
+ sbbq 8(%rsi),%r9
+ sbbq 16(%rsi),%r10
+ movq %r11,%r8
+ sbbq 24(%rsi),%r11
+ sbbq $0,%rax
+
+ cmovncq %r12,%rdx
+ cmovncq %r9,%r14
+ cmovncq %r10,%r15
+ cmovncq %r11,%r8
+
+ decq %rbx
+ jnz L$oop_ord_sqrx
+
+ movq %rdx,0(%rdi)
+ movq %r14,8(%rdi)
+ pxor %xmm1,%xmm1
+ movq %r15,16(%rdi)
+ pxor %xmm2,%xmm2
+ movq %r8,24(%rdi)
+ pxor %xmm3,%xmm3
+
+ movq 0(%rsp),%r15
+
+ movq 8(%rsp),%r14
+
+ movq 16(%rsp),%r13
+
+ movq 24(%rsp),%r12
+
+ movq 32(%rsp),%rbx
+
+ movq 40(%rsp),%rbp
+
+ leaq 48(%rsp),%rsp
+
+L$ord_sqrx_epilogue:
+ .byte 0xf3,0xc3
+
+
+
+
+
+
.globl _ecp_nistz256_to_mont
.p2align 5
@@ -2723,15 +3854,23 @@ _ecp_nistz256_to_mont:
.p2align 5
_ecp_nistz256_mul_mont:
+
movl $0x80100,%ecx
andl _OPENSSL_ia32cap_P+8(%rip),%ecx
L$mul_mont:
pushq %rbp
+
pushq %rbx
+
pushq %r12
+
pushq %r13
+
pushq %r14
+
pushq %r15
+
+L$mul_body:
cmpl $0x80100,%ecx
je L$mul_montx
movq %rdx,%rbx
@@ -2756,16 +3895,26 @@ L$mul_montx:
call __ecp_nistz256_mul_montx
L$mul_mont_done:
- popq %r15
- popq %r14
- popq %r13
- popq %r12
- popq %rbx
- popq %rbp
+ movq 0(%rsp),%r15
+
+ movq 8(%rsp),%r14
+
+ movq 16(%rsp),%r13
+
+ movq 24(%rsp),%r12
+
+ movq 32(%rsp),%rbx
+
+ movq 40(%rsp),%rbp
+
+ leaq 48(%rsp),%rsp
+
+L$mul_epilogue:
.byte 0xf3,0xc3
+
.p2align 5
__ecp_nistz256_mul_montq:
@@ -2992,14 +4141,22 @@ __ecp_nistz256_mul_montq:
.p2align 5
_ecp_nistz256_sqr_mont:
+
movl $0x80100,%ecx
andl _OPENSSL_ia32cap_P+8(%rip),%ecx
pushq %rbp
+
pushq %rbx
+
pushq %r12
+
pushq %r13
+
pushq %r14
+
pushq %r15
+
+L$sqr_body:
cmpl $0x80100,%ecx
je L$sqr_montx
movq 0(%rsi),%rax
@@ -3020,16 +4177,26 @@ L$sqr_montx:
call __ecp_nistz256_sqr_montx
L$sqr_mont_done:
- popq %r15
- popq %r14
- popq %r13
- popq %r12
- popq %rbx
- popq %rbp
+ movq 0(%rsp),%r15
+
+ movq 8(%rsp),%r14
+
+ movq 16(%rsp),%r13
+
+ movq 24(%rsp),%r12
+
+ movq 32(%rsp),%rbx
+
+ movq 40(%rsp),%rbp
+
+ leaq 48(%rsp),%rsp
+
+L$sqr_epilogue:
.byte 0xf3,0xc3
+
.p2align 5
__ecp_nistz256_sqr_montq:
movq %rax,%r13
@@ -3494,9 +4661,13 @@ __ecp_nistz256_sqr_montx:
.p2align 5
_ecp_nistz256_from_mont:
+
pushq %r12
+
pushq %r13
+L$from_body:
+
movq 0(%rsi),%rax
movq L$poly+24(%rip),%r13
movq 8(%rsi),%r9
@@ -3576,12 +4747,18 @@ _ecp_nistz256_from_mont:
movq %r10,16(%rdi)
movq %r11,24(%rdi)
- popq %r13
- popq %r12
+ movq 0(%rsp),%r13
+
+ movq 8(%rsp),%r12
+
+ leaq 16(%rsp),%rsp
+
+L$from_epilogue:
.byte 0xf3,0xc3
+
.globl _ecp_nistz256_scatter_w5
.p2align 5
@@ -3664,6 +4841,7 @@ L$select_loop_sse_w5:
movdqu %xmm6,64(%rdi)
movdqu %xmm7,80(%rdi)
.byte 0xf3,0xc3
+L$SEH_end_ecp_nistz256_gather_w5:
@@ -3734,6 +4912,7 @@ L$select_loop_sse_w7:
movdqu %xmm4,32(%rdi)
movdqu %xmm5,48(%rdi)
.byte 0xf3,0xc3
+L$SEH_end_ecp_nistz256_gather_w7:
@@ -3794,6 +4973,7 @@ L$select_loop_avx2_w5:
vmovdqu %ymm4,64(%rdi)
vzeroupper
.byte 0xf3,0xc3
+L$SEH_end_ecp_nistz256_avx2_gather_w5:
@@ -3871,6 +5051,7 @@ L$select_loop_avx2_w7:
vmovdqu %ymm3,32(%rdi)
vzeroupper
.byte 0xf3,0xc3
+L$SEH_end_ecp_nistz256_avx2_gather_w7:
.p2align 5
@@ -3997,18 +5178,27 @@ __ecp_nistz256_mul_by_2q:
.p2align 5
_ecp_nistz256_point_double:
+
movl $0x80100,%ecx
andl _OPENSSL_ia32cap_P+8(%rip),%ecx
cmpl $0x80100,%ecx
je L$point_doublex
pushq %rbp
+
pushq %rbx
+
pushq %r12
+
pushq %r13
+
pushq %r14
+
pushq %r15
+
subq $160+8,%rsp
+L$point_doubleq_body:
+
L$point_double_shortcutq:
movdqu 0(%rsi),%xmm0
movq %rsi,%rbx
@@ -4190,31 +5380,51 @@ L$point_double_shortcutq:
.byte 102,72,15,126,207
call __ecp_nistz256_sub_fromq
- addq $160+8,%rsp
- popq %r15
- popq %r14
- popq %r13
- popq %r12
- popq %rbx
- popq %rbp
+ leaq 160+56(%rsp),%rsi
+
+ movq -48(%rsi),%r15
+
+ movq -40(%rsi),%r14
+
+ movq -32(%rsi),%r13
+
+ movq -24(%rsi),%r12
+
+ movq -16(%rsi),%rbx
+
+ movq -8(%rsi),%rbp
+
+ leaq (%rsi),%rsp
+
+L$point_doubleq_epilogue:
.byte 0xf3,0xc3
+
.globl _ecp_nistz256_point_add
.p2align 5
_ecp_nistz256_point_add:
+
movl $0x80100,%ecx
andl _OPENSSL_ia32cap_P+8(%rip),%ecx
cmpl $0x80100,%ecx
je L$point_addx
pushq %rbp
+
pushq %rbx
+
pushq %r12
+
pushq %r13
+
pushq %r14
+
pushq %r15
+
subq $576+8,%rsp
+L$point_addq_body:
+
movdqu 0(%rsi),%xmm0
movdqu 16(%rsi),%xmm1
movdqu 32(%rsi),%xmm2
@@ -4590,31 +5800,51 @@ L$add_proceedq:
movdqu %xmm3,48(%rdi)
L$add_doneq:
- addq $576+8,%rsp
- popq %r15
- popq %r14
- popq %r13
- popq %r12
- popq %rbx
- popq %rbp
+ leaq 576+56(%rsp),%rsi
+
+ movq -48(%rsi),%r15
+
+ movq -40(%rsi),%r14
+
+ movq -32(%rsi),%r13
+
+ movq -24(%rsi),%r12
+
+ movq -16(%rsi),%rbx
+
+ movq -8(%rsi),%rbp
+
+ leaq (%rsi),%rsp
+
+L$point_addq_epilogue:
.byte 0xf3,0xc3
+
.globl _ecp_nistz256_point_add_affine
.p2align 5
_ecp_nistz256_point_add_affine:
+
movl $0x80100,%ecx
andl _OPENSSL_ia32cap_P+8(%rip),%ecx
cmpl $0x80100,%ecx
je L$point_add_affinex
pushq %rbp
+
pushq %rbx
+
pushq %r12
+
pushq %r13
+
pushq %r14
+
pushq %r15
+
subq $480+8,%rsp
+L$add_affineq_body:
+
movdqu 0(%rsi),%xmm0
movq %rdx,%rbx
movdqu 16(%rsi),%xmm1
@@ -4896,16 +6126,27 @@ _ecp_nistz256_point_add_affine:
movdqu %xmm2,32(%rdi)
movdqu %xmm3,48(%rdi)
- addq $480+8,%rsp
- popq %r15
- popq %r14
- popq %r13
- popq %r12
- popq %rbx
- popq %rbp
+ leaq 480+56(%rsp),%rsi
+
+ movq -48(%rsi),%r15
+
+ movq -40(%rsi),%r14
+
+ movq -32(%rsi),%r13
+
+ movq -24(%rsi),%r12
+
+ movq -16(%rsi),%rbx
+
+ movq -8(%rsi),%rbp
+
+ leaq (%rsi),%rsp
+
+L$add_affineq_epilogue:
.byte 0xf3,0xc3
+
.p2align 5
__ecp_nistz256_add_tox:
xorq %r11,%r11
@@ -5035,15 +6276,24 @@ __ecp_nistz256_mul_by_2x:
.p2align 5
ecp_nistz256_point_doublex:
+
L$point_doublex:
pushq %rbp
+
pushq %rbx
+
pushq %r12
+
pushq %r13
+
pushq %r14
+
pushq %r15
+
subq $160+8,%rsp
+L$point_doublex_body:
+
L$point_double_shortcutx:
movdqu 0(%rsi),%xmm0
movq %rsi,%rbx
@@ -5225,27 +6475,47 @@ L$point_double_shortcutx:
.byte 102,72,15,126,207
call __ecp_nistz256_sub_fromx
- addq $160+8,%rsp
- popq %r15
- popq %r14
- popq %r13
- popq %r12
- popq %rbx
- popq %rbp
+ leaq 160+56(%rsp),%rsi
+
+ movq -48(%rsi),%r15
+
+ movq -40(%rsi),%r14
+
+ movq -32(%rsi),%r13
+
+ movq -24(%rsi),%r12
+
+ movq -16(%rsi),%rbx
+
+ movq -8(%rsi),%rbp
+
+ leaq (%rsi),%rsp
+
+L$point_doublex_epilogue:
.byte 0xf3,0xc3
+
.p2align 5
ecp_nistz256_point_addx:
+
L$point_addx:
pushq %rbp
+
pushq %rbx
+
pushq %r12
+
pushq %r13
+
pushq %r14
+
pushq %r15
+
subq $576+8,%rsp
+L$point_addx_body:
+
movdqu 0(%rsi),%xmm0
movdqu 16(%rsi),%xmm1
movdqu 32(%rsi),%xmm2
@@ -5621,27 +6891,47 @@ L$add_proceedx:
movdqu %xmm3,48(%rdi)
L$add_donex:
- addq $576+8,%rsp
- popq %r15
- popq %r14
- popq %r13
- popq %r12
- popq %rbx
- popq %rbp
+ leaq 576+56(%rsp),%rsi
+
+ movq -48(%rsi),%r15
+
+ movq -40(%rsi),%r14
+
+ movq -32(%rsi),%r13
+
+ movq -24(%rsi),%r12
+
+ movq -16(%rsi),%rbx
+
+ movq -8(%rsi),%rbp
+
+ leaq (%rsi),%rsp
+
+L$point_addx_epilogue:
.byte 0xf3,0xc3
+
.p2align 5
ecp_nistz256_point_add_affinex:
+
L$point_add_affinex:
pushq %rbp
+
pushq %rbx
+
pushq %r12
+
pushq %r13
+
pushq %r14
+
pushq %r15
+
subq $480+8,%rsp
+L$add_affinex_body:
+
movdqu 0(%rsi),%xmm0
movq %rdx,%rbx
movdqu 16(%rsi),%xmm1
@@ -5923,12 +7213,23 @@ L$point_add_affinex:
movdqu %xmm2,32(%rdi)
movdqu %xmm3,48(%rdi)
- addq $480+8,%rsp
- popq %r15
- popq %r14
- popq %r13
- popq %r12
- popq %rbx
- popq %rbp
+ leaq 480+56(%rsp),%rsi
+
+ movq -48(%rsi),%r15
+
+ movq -40(%rsi),%r14
+
+ movq -32(%rsi),%r13
+
+ movq -24(%rsi),%r12
+
+ movq -16(%rsi),%rbx
+
+ movq -8(%rsi),%rbp
+
+ leaq (%rsi),%rsp
+
+L$add_affinex_epilogue:
.byte 0xf3,0xc3
+
diff --git a/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/ec/x25519-x86_64.s b/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/ec/x25519-x86_64.s
new file mode 100644
index 0000000000..cdb602d4cc
--- /dev/null
+++ b/deps/openssl/config/archs/darwin64-x86_64-cc/asm/crypto/ec/x25519-x86_64.s
@@ -0,0 +1,760 @@
+.text
+
+.globl _x25519_fe51_mul
+
+.p2align 5
+_x25519_fe51_mul:
+
+ pushq %rbp
+
+ pushq %rbx
+
+ pushq %r12
+
+ pushq %r13
+
+ pushq %r14
+
+ pushq %r15
+
+ leaq -40(%rsp),%rsp
+
+L$fe51_mul_body:
+
+ movq 0(%rsi),%rax
+ movq 0(%rdx),%r11
+ movq 8(%rdx),%r12
+ movq 16(%rdx),%r13
+ movq 24(%rdx),%rbp
+ movq 32(%rdx),%r14
+
+ movq %rdi,32(%rsp)
+ movq %rax,%rdi
+ mulq %r11
+ movq %r11,0(%rsp)
+ movq %rax,%rbx
+ movq %rdi,%rax
+ movq %rdx,%rcx
+ mulq %r12
+ movq %r12,8(%rsp)
+ movq %rax,%r8
+ movq %rdi,%rax
+ leaq (%r14,%r14,8),%r15
+ movq %rdx,%r9
+ mulq %r13
+ movq %r13,16(%rsp)
+ movq %rax,%r10
+ movq %rdi,%rax
+ leaq (%r14,%r15,2),%rdi
+ movq %rdx,%r11
+ mulq %rbp
+ movq %rax,%r12
+ movq 0(%rsi),%rax
+ movq %rdx,%r13
+ mulq %r14
+ movq %rax,%r14
+ movq 8(%rsi),%rax
+ movq %rdx,%r15
+
+ mulq %rdi
+ addq %rax,%rbx
+ movq 16(%rsi),%rax
+ adcq %rdx,%rcx
+ mulq %rdi
+ addq %rax,%r8
+ movq 24(%rsi),%rax
+ adcq %rdx,%r9
+ mulq %rdi
+ addq %rax,%r10
+ movq 32(%rsi),%rax
+ adcq %rdx,%r11
+ mulq %rdi
+ imulq $19,%rbp,%rdi
+ addq %rax,%r12
+ movq 8(%rsi),%rax
+ adcq %rdx,%r13
+ mulq %rbp
+ movq 16(%rsp),%rbp
+ addq %rax,%r14
+ movq 16(%rsi),%rax
+ adcq %rdx,%r15
+
+ mulq %rdi
+ addq %rax,%rbx
+ movq 24(%rsi),%rax
+ adcq %rdx,%rcx
+ mulq %rdi
+ addq %rax,%r8
+ movq 32(%rsi),%rax
+ adcq %rdx,%r9
+ mulq %rdi
+ imulq $19,%rbp,%rdi
+ addq %rax,%r10
+ movq 8(%rsi),%rax
+ adcq %rdx,%r11
+ mulq %rbp
+ addq %rax,%r12
+ movq 16(%rsi),%rax
+ adcq %rdx,%r13
+ mulq %rbp
+ movq 8(%rsp),%rbp
+ addq %rax,%r14
+ movq 24(%rsi),%rax
+ adcq %rdx,%r15
+
+ mulq %rdi
+ addq %rax,%rbx
+ movq 32(%rsi),%rax
+ adcq %rdx,%rcx
+ mulq %rdi
+ addq %rax,%r8
+ movq 8(%rsi),%rax
+ adcq %rdx,%r9
+ mulq %rbp
+ imulq $19,%rbp,%rdi
+ addq %rax,%r10
+ movq 16(%rsi),%rax
+ adcq %rdx,%r11
+ mulq %rbp
+ addq %rax,%r12
+ movq 24(%rsi),%rax
+ adcq %rdx,%r13
+ mulq %rbp
+ movq 0(%rsp),%rbp
+ addq %rax,%r14
+ movq 32(%rsi),%rax
+ adcq %rdx,%r15
+
+ mulq %rdi
+ addq %rax,%rbx
+ movq 8(%rsi),%rax
+ adcq %rdx,%rcx
+ mulq %rbp
+ addq %rax,%r8
+ movq 16(%rsi),%rax
+ adcq %rdx,%r9
+ mulq %rbp
+ addq %rax,%r10
+ movq 24(%rsi),%rax
+ adcq %rdx,%r11
+ mulq %rbp
+ addq %rax,%r12
+ movq 32(%rsi),%rax
+ adcq %rdx,%r13
+ mulq %rbp
+ addq %rax,%r14
+ adcq %rdx,%r15
+
+ movq 32(%rsp),%rdi
+ jmp L$reduce51
+L$fe51_mul_epilogue:
+
+
+
+.globl _x25519_fe51_sqr
+
+.p2align 5
+_x25519_fe51_sqr:
+
+ pushq %rbp
+
+ pushq %rbx
+
+ pushq %r12
+
+ pushq %r13
+
+ pushq %r14
+
+ pushq %r15
+
+ leaq -40(%rsp),%rsp
+
+L$fe51_sqr_body:
+
+ movq 0(%rsi),%rax
+ movq 16(%rsi),%r15
+ movq 32(%rsi),%rbp
+
+ movq %rdi,32(%rsp)
+ leaq (%rax,%rax,1),%r14
+ mulq %rax
+ movq %rax,%rbx
+ movq 8(%rsi),%rax
+ movq %rdx,%rcx
+ mulq %r14
+ movq %rax,%r8
+ movq %r15,%rax
+ movq %r15,0(%rsp)
+ movq %rdx,%r9
+ mulq %r14
+ movq %rax,%r10
+ movq 24(%rsi),%rax
+ movq %rdx,%r11
+ imulq $19,%rbp,%rdi
+ mulq %r14
+ movq %rax,%r12
+ movq %rbp,%rax
+ movq %rdx,%r13
+ mulq %r14
+ movq %rax,%r14
+ movq %rbp,%rax
+ movq %rdx,%r15
+
+ mulq %rdi
+ addq %rax,%r12
+ movq 8(%rsi),%rax
+ adcq %rdx,%r13
+
+ movq 24(%rsi),%rsi
+ leaq (%rax,%rax,1),%rbp
+ mulq %rax
+ addq %rax,%r10
+ movq 0(%rsp),%rax
+ adcq %rdx,%r11
+ mulq %rbp
+ addq %rax,%r12
+ movq %rbp,%rax
+ adcq %rdx,%r13
+ mulq %rsi
+ addq %rax,%r14
+ movq %rbp,%rax
+ adcq %rdx,%r15
+ imulq $19,%rsi,%rbp
+ mulq %rdi
+ addq %rax,%rbx
+ leaq (%rsi,%rsi,1),%rax
+ adcq %rdx,%rcx
+
+ mulq %rdi
+ addq %rax,%r10
+ movq %rsi,%rax
+ adcq %rdx,%r11
+ mulq %rbp
+ addq %rax,%r8
+ movq 0(%rsp),%rax
+ adcq %rdx,%r9
+
+ leaq (%rax,%rax,1),%rsi
+ mulq %rax
+ addq %rax,%r14
+ movq %rbp,%rax
+ adcq %rdx,%r15
+ mulq %rsi
+ addq %rax,%rbx
+ movq %rsi,%rax
+ adcq %rdx,%rcx
+ mulq %rdi
+ addq %rax,%r8
+ adcq %rdx,%r9
+
+ movq 32(%rsp),%rdi
+ jmp L$reduce51
+
+.p2align 5
+L$reduce51:
+ movq $0x7ffffffffffff,%rbp
+
+ movq %r10,%rdx
+ shrq $51,%r10
+ shlq $13,%r11
+ andq %rbp,%rdx
+ orq %r10,%r11
+ addq %r11,%r12
+ adcq $0,%r13
+
+ movq %rbx,%rax
+ shrq $51,%rbx
+ shlq $13,%rcx
+ andq %rbp,%rax
+ orq %rbx,%rcx
+ addq %rcx,%r8
+ adcq $0,%r9
+
+ movq %r12,%rbx
+ shrq $51,%r12
+ shlq $13,%r13
+ andq %rbp,%rbx
+ orq %r12,%r13
+ addq %r13,%r14
+ adcq $0,%r15
+
+ movq %r8,%rcx
+ shrq $51,%r8
+ shlq $13,%r9
+ andq %rbp,%rcx
+ orq %r8,%r9
+ addq %r9,%rdx
+
+ movq %r14,%r10
+ shrq $51,%r14
+ shlq $13,%r15
+ andq %rbp,%r10
+ orq %r14,%r15
+
+ leaq (%r15,%r15,8),%r14
+ leaq (%r15,%r14,2),%r15
+ addq %r15,%rax
+
+ movq %rdx,%r8
+ andq %rbp,%rdx
+ shrq $51,%r8
+ addq %r8,%rbx
+
+ movq %rax,%r9
+ andq %rbp,%rax
+ shrq $51,%r9
+ addq %r9,%rcx
+
+ movq %rax,0(%rdi)
+ movq %rcx,8(%rdi)
+ movq %rdx,16(%rdi)
+ movq %rbx,24(%rdi)
+ movq %r10,32(%rdi)
+
+ movq 40(%rsp),%r15
+
+ movq 48(%rsp),%r14
+
+ movq 56(%rsp),%r13
+
+ movq 64(%rsp),%r12
+
+ movq 72(%rsp),%rbx
+
+ movq 80(%rsp),%rbp
+
+ leaq 88(%rsp),%rsp
+
+L$fe51_sqr_epilogue:
+ .byte 0xf3,0xc3
+
+
+
+.globl _x25519_fe51_mul121666
+
+.p2align 5
+_x25519_fe51_mul121666:
+
+ pushq %rbp
+
+ pushq %rbx
+
+ pushq %r12
+
+ pushq %r13
+
+ pushq %r14
+
+ pushq %r15
+
+ leaq -40(%rsp),%rsp
+
+L$fe51_mul121666_body:
+ movl $121666,%eax
+
+ mulq 0(%rsi)
+ movq %rax,%rbx
+ movl $121666,%eax
+ movq %rdx,%rcx
+ mulq 8(%rsi)
+ movq %rax,%r8
+ movl $121666,%eax
+ movq %rdx,%r9
+ mulq 16(%rsi)
+ movq %rax,%r10
+ movl $121666,%eax
+ movq %rdx,%r11
+ mulq 24(%rsi)
+ movq %rax,%r12
+ movl $121666,%eax
+ movq %rdx,%r13
+ mulq 32(%rsi)
+ movq %rax,%r14
+ movq %rdx,%r15
+
+ jmp L$reduce51
+L$fe51_mul121666_epilogue:
+
+
+
+.globl _x25519_fe64_eligible
+
+.p2align 5
+_x25519_fe64_eligible:
+ movl _OPENSSL_ia32cap_P+8(%rip),%ecx
+ xorl %eax,%eax
+ andl $0x80100,%ecx
+ cmpl $0x80100,%ecx
+ cmovel %ecx,%eax
+ .byte 0xf3,0xc3
+
+
+.globl _x25519_fe64_mul
+
+.p2align 5
+_x25519_fe64_mul:
+
+ pushq %rbp
+
+ pushq %rbx
+
+ pushq %r12
+
+ pushq %r13
+
+ pushq %r14
+
+ pushq %r15
+
+ pushq %rdi
+
+ leaq -16(%rsp),%rsp
+
+L$fe64_mul_body:
+
+ movq %rdx,%rax
+ movq 0(%rdx),%rbp
+ movq 0(%rsi),%rdx
+ movq 8(%rax),%rcx
+ movq 16(%rax),%r14
+ movq 24(%rax),%r15
+
+ mulxq %rbp,%r8,%rax
+ xorl %edi,%edi
+ mulxq %rcx,%r9,%rbx
+ adcxq %rax,%r9
+ mulxq %r14,%r10,%rax
+ adcxq %rbx,%r10
+ mulxq %r15,%r11,%r12
+ movq 8(%rsi),%rdx
+ adcxq %rax,%r11
+ movq %r14,(%rsp)
+ adcxq %rdi,%r12
+
+ mulxq %rbp,%rax,%rbx
+ adoxq %rax,%r9
+ adcxq %rbx,%r10
+ mulxq %rcx,%rax,%rbx
+ adoxq %rax,%r10
+ adcxq %rbx,%r11
+ mulxq %r14,%rax,%rbx
+ adoxq %rax,%r11
+ adcxq %rbx,%r12
+ mulxq %r15,%rax,%r13
+ movq 16(%rsi),%rdx
+ adoxq %rax,%r12
+ adcxq %rdi,%r13
+ adoxq %rdi,%r13
+
+ mulxq %rbp,%rax,%rbx
+ adcxq %rax,%r10
+ adoxq %rbx,%r11
+ mulxq %rcx,%rax,%rbx
+ adcxq %rax,%r11
+ adoxq %rbx,%r12
+ mulxq %r14,%rax,%rbx
+ adcxq %rax,%r12
+ adoxq %rbx,%r13
+ mulxq %r15,%rax,%r14
+ movq 24(%rsi),%rdx
+ adcxq %rax,%r13
+ adoxq %rdi,%r14
+ adcxq %rdi,%r14
+
+ mulxq %rbp,%rax,%rbx
+ adoxq %rax,%r11
+ adcxq %rbx,%r12
+ mulxq %rcx,%rax,%rbx
+ adoxq %rax,%r12
+ adcxq %rbx,%r13
+ mulxq (%rsp),%rax,%rbx
+ adoxq %rax,%r13
+ adcxq %rbx,%r14
+ mulxq %r15,%rax,%r15
+ movl $38,%edx
+ adoxq %rax,%r14
+ adcxq %rdi,%r15
+ adoxq %rdi,%r15
+
+ jmp L$reduce64
+L$fe64_mul_epilogue:
+
+
+
+.globl _x25519_fe64_sqr
+
+.p2align 5
+_x25519_fe64_sqr:
+
+ pushq %rbp
+
+ pushq %rbx
+
+ pushq %r12
+
+ pushq %r13
+
+ pushq %r14
+
+ pushq %r15
+
+ pushq %rdi
+
+ leaq -16(%rsp),%rsp
+
+L$fe64_sqr_body:
+
+ movq 0(%rsi),%rdx
+ movq 8(%rsi),%rcx
+ movq 16(%rsi),%rbp
+ movq 24(%rsi),%rsi
+
+
+ mulxq %rdx,%r8,%r15
+ mulxq %rcx,%r9,%rax
+ xorl %edi,%edi
+ mulxq %rbp,%r10,%rbx
+ adcxq %rax,%r10
+ mulxq %rsi,%r11,%r12
+ movq %rcx,%rdx
+ adcxq %rbx,%r11
+ adcxq %rdi,%r12
+
+
+ mulxq %rbp,%rax,%rbx
+ adoxq %rax,%r11
+ adcxq %rbx,%r12
+ mulxq %rsi,%rax,%r13
+ movq %rbp,%rdx
+ adoxq %rax,%r12
+ adcxq %rdi,%r13
+
+
+ mulxq %rsi,%rax,%r14
+ movq %rcx,%rdx
+ adoxq %rax,%r13
+ adcxq %rdi,%r14
+ adoxq %rdi,%r14
+
+ adcxq %r9,%r9
+ adoxq %r15,%r9
+ adcxq %r10,%r10
+ mulxq %rdx,%rax,%rbx
+ movq %rbp,%rdx
+ adcxq %r11,%r11
+ adoxq %rax,%r10
+ adcxq %r12,%r12
+ adoxq %rbx,%r11
+ mulxq %rdx,%rax,%rbx
+ movq %rsi,%rdx
+ adcxq %r13,%r13
+ adoxq %rax,%r12
+ adcxq %r14,%r14
+ adoxq %rbx,%r13
+ mulxq %rdx,%rax,%r15
+ movl $38,%edx
+ adoxq %rax,%r14
+ adcxq %rdi,%r15
+ adoxq %rdi,%r15
+ jmp L$reduce64
+
+.p2align 5
+L$reduce64:
+ mulxq %r12,%rax,%rbx
+ adcxq %rax,%r8
+ adoxq %rbx,%r9
+ mulxq %r13,%rax,%rbx
+ adcxq %rax,%r9
+ adoxq %rbx,%r10
+ mulxq %r14,%rax,%rbx
+ adcxq %rax,%r10
+ adoxq %rbx,%r11
+ mulxq %r15,%rax,%r12
+ adcxq %rax,%r11
+ adoxq %rdi,%r12
+ adcxq %rdi,%r12
+
+ movq 16(%rsp),%rdi
+ imulq %rdx,%r12
+
+ addq %r12,%r8
+ adcq $0,%r9
+ adcq $0,%r10
+ adcq $0,%r11
+
+ sbbq %rax,%rax
+ andq $38,%rax
+
+ addq %rax,%r8
+ movq %r9,8(%rdi)
+ movq %r10,16(%rdi)
+ movq %r11,24(%rdi)
+ movq %r8,0(%rdi)
+
+ movq 24(%rsp),%r15
+
+ movq 32(%rsp),%r14
+
+ movq 40(%rsp),%r13
+
+ movq 48(%rsp),%r12
+
+ movq 56(%rsp),%rbx
+
+ movq 64(%rsp),%rbp
+
+ leaq 72(%rsp),%rsp
+
+L$fe64_sqr_epilogue:
+ .byte 0xf3,0xc3
+
+
+
+.globl _x25519_fe64_mul121666
+
+.p2align 5
+_x25519_fe64_mul121666:
+L$fe64_mul121666_body:
+ movl $121666,%edx
+ mulxq 0(%rsi),%r8,%rcx
+ mulxq 8(%rsi),%r9,%rax
+ addq %rcx,%r9
+ mulxq 16(%rsi),%r10,%rcx
+ adcq %rax,%r10
+ mulxq 24(%rsi),%r11,%rax
+ adcq %rcx,%r11
+ adcq $0,%rax
+
+ imulq $38,%rax,%rax
+
+ addq %rax,%r8
+ adcq $0,%r9
+ adcq $0,%r10
+ adcq $0,%r11
+
+ sbbq %rax,%rax
+ andq $38,%rax
+
+ addq %rax,%r8
+ movq %r9,8(%rdi)
+ movq %r10,16(%rdi)
+ movq %r11,24(%rdi)
+ movq %r8,0(%rdi)
+
+L$fe64_mul121666_epilogue:
+ .byte 0xf3,0xc3
+
+
+.globl _x25519_fe64_add
+
+.p2align 5
+_x25519_fe64_add:
+L$fe64_add_body:
+ movq 0(%rsi),%r8
+ movq 8(%rsi),%r9
+ movq 16(%rsi),%r10
+ movq 24(%rsi),%r11
+
+ addq 0(%rdx),%r8
+ adcq 8(%rdx),%r9
+ adcq 16(%rdx),%r10
+ adcq 24(%rdx),%r11
+
+ sbbq %rax,%rax
+ andq $38,%rax
+
+ addq %rax,%r8
+ adcq $0,%r9
+ adcq $0,%r10
+ movq %r9,8(%rdi)
+ adcq $0,%r11
+ movq %r10,16(%rdi)
+ sbbq %rax,%rax
+ movq %r11,24(%rdi)
+ andq $38,%rax
+
+ addq %rax,%r8
+ movq %r8,0(%rdi)
+
+L$fe64_add_epilogue:
+ .byte 0xf3,0xc3
+
+
+.globl _x25519_fe64_sub
+
+.p2align 5
+_x25519_fe64_sub:
+L$fe64_sub_body:
+ movq 0(%rsi),%r8
+ movq 8(%rsi),%r9
+ movq 16(%rsi),%r10
+ movq 24(%rsi),%r11
+
+ subq 0(%rdx),%r8
+ sbbq 8(%rdx),%r9
+ sbbq 16(%rdx),%r10
+ sbbq 24(%rdx),%r11
+
+ sbbq %rax,%rax
+ andq $38,%rax
+
+ subq %rax,%r8
+ sbbq $0,%r9
+ sbbq $0,%r10
+ movq %r9,8(%rdi)
+ sbbq $0,%r11
+ movq %r10,16(%rdi)
+ sbbq %rax,%rax
+ movq %r11,24(%rdi)
+ andq $38,%rax
+
+ subq %rax,%r8
+ movq %r8,0(%rdi)
+
+L$fe64_sub_epilogue:
+ .byte 0xf3,0xc3
+
+
+.globl _x25519_fe64_tobytes
+
+.p2align 5
+_x25519_fe64_tobytes:
+L$fe64_to_body:
+ movq 0(%rsi),%r8
+ movq 8(%rsi),%r9
+ movq 16(%rsi),%r10
+ movq 24(%rsi),%r11
+
+
+ leaq (%r11,%r11,1),%rax
+ sarq $63,%r11
+ shrq $1,%rax
+ andq $19,%r11
+ addq $19,%r11
+
+ addq %r11,%r8
+ adcq $0,%r9
+ adcq $0,%r10
+ adcq $0,%rax
+
+ leaq (%rax,%rax,1),%r11
+ sarq $63,%rax
+ shrq $1,%r11
+ notq %rax
+ andq $19,%rax
+
+ subq %rax,%r8
+ sbbq $0,%r9
+ sbbq $0,%r10
+ sbbq $0,%r11
+
+ movq %r8,0(%rdi)
+ movq %r9,8(%rdi)
+ movq %r10,16(%rdi)
+ movq %r11,24(%rdi)
+
+L$fe64_to_epilogue:
+ .byte 0xf3,0xc3
+
+.byte 88,50,53,53,49,57,32,112,114,105,109,105,116,105,118,101,115,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0