summaryrefslogtreecommitdiff
path: root/deps/openssl/config/archs/BSD-x86_64/asm/crypto
diff options
context:
space:
mode:
Diffstat (limited to 'deps/openssl/config/archs/BSD-x86_64/asm/crypto')
-rw-r--r--deps/openssl/config/archs/BSD-x86_64/asm/crypto/aes/aes-x86_64.s145
-rw-r--r--deps/openssl/config/archs/BSD-x86_64/asm/crypto/aes/aesni-mb-x86_64.s72
-rw-r--r--deps/openssl/config/archs/BSD-x86_64/asm/crypto/aes/aesni-sha1-x86_64.s46
-rw-r--r--deps/openssl/config/archs/BSD-x86_64/asm/crypto/aes/aesni-sha256-x86_64.s114
-rw-r--r--deps/openssl/config/archs/BSD-x86_64/asm/crypto/aes/aesni-x86_64.s239
-rw-r--r--deps/openssl/config/archs/BSD-x86_64/asm/crypto/aes/bsaes-x86_64.s163
-rw-r--r--deps/openssl/config/archs/BSD-x86_64/asm/crypto/bn/rsaz-avx2.s41
-rw-r--r--deps/openssl/config/archs/BSD-x86_64/asm/crypto/bn/rsaz-x86_64.s115
-rw-r--r--deps/openssl/config/archs/BSD-x86_64/asm/crypto/bn/x86_64-gf2m.s24
-rw-r--r--deps/openssl/config/archs/BSD-x86_64/asm/crypto/bn/x86_64-mont.s72
-rw-r--r--deps/openssl/config/archs/BSD-x86_64/asm/crypto/bn/x86_64-mont5.s111
-rw-r--r--deps/openssl/config/archs/BSD-x86_64/asm/crypto/buildinf.h76
-rw-r--r--deps/openssl/config/archs/BSD-x86_64/asm/crypto/camellia/cmll-x86_64.s78
-rw-r--r--deps/openssl/config/archs/BSD-x86_64/asm/crypto/chacha/chacha-x86_64.s1513
-rw-r--r--deps/openssl/config/archs/BSD-x86_64/asm/crypto/ec/ecp_nistz256-x86_64.s1523
-rw-r--r--deps/openssl/config/archs/BSD-x86_64/asm/crypto/ec/x25519-x86_64.s792
-rw-r--r--deps/openssl/config/archs/BSD-x86_64/asm/crypto/include/internal/dso_conf.h5
-rw-r--r--deps/openssl/config/archs/BSD-x86_64/asm/crypto/md5/md5-x86_64.s18
-rw-r--r--deps/openssl/config/archs/BSD-x86_64/asm/crypto/modes/aesni-gcm-x86_64.s73
-rw-r--r--deps/openssl/config/archs/BSD-x86_64/asm/crypto/modes/ghash-x86_64.s66
-rw-r--r--deps/openssl/config/archs/BSD-x86_64/asm/crypto/poly1305/poly1305-x86_64.s1792
-rw-r--r--deps/openssl/config/archs/BSD-x86_64/asm/crypto/rc4/rc4-md5-x86_64.s22
-rw-r--r--deps/openssl/config/archs/BSD-x86_64/asm/crypto/rc4/rc4-x86_64.s12
-rw-r--r--deps/openssl/config/archs/BSD-x86_64/asm/crypto/sha/keccak1600-x86_64.s522
-rw-r--r--deps/openssl/config/archs/BSD-x86_64/asm/crypto/sha/sha1-mb-x86_64.s46
-rw-r--r--deps/openssl/config/archs/BSD-x86_64/asm/crypto/sha/sha1-x86_64.s164
-rw-r--r--deps/openssl/config/archs/BSD-x86_64/asm/crypto/sha/sha256-mb-x86_64.s46
-rw-r--r--deps/openssl/config/archs/BSD-x86_64/asm/crypto/sha/sha256-x86_64.s152
-rw-r--r--deps/openssl/config/archs/BSD-x86_64/asm/crypto/sha/sha512-x86_64.s152
-rw-r--r--deps/openssl/config/archs/BSD-x86_64/asm/crypto/whrlpool/wp-x86_64.s36
-rw-r--r--deps/openssl/config/archs/BSD-x86_64/asm/crypto/x86_64cpuid.s74
31 files changed, 7729 insertions, 575 deletions
diff --git a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/aes/aes-x86_64.s b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/aes/aes-x86_64.s
index 488ae6d781..4bc117304f 100644
--- a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/aes/aes-x86_64.s
+++ b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/aes/aes-x86_64.s
@@ -332,15 +332,23 @@ _x86_64_AES_encrypt_compact:
.hidden asm_AES_encrypt
asm_AES_encrypt:
AES_encrypt:
+.cfi_startproc
+ movq %rsp,%rax
+.cfi_def_cfa_register %rax
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
pushq %r15
+.cfi_offset %r15,-56
- movq %rsp,%r10
leaq -63(%rdx),%rcx
andq $-64,%rsp
subq %rsp,%rcx
@@ -350,7 +358,8 @@ AES_encrypt:
subq $32,%rsp
movq %rsi,16(%rsp)
- movq %r10,24(%rsp)
+ movq %rax,24(%rsp)
+.cfi_escape 0x0f,0x05,0x77,0x18,0x06,0x23,0x08
.Lenc_prologue:
movq %rdx,%r15
@@ -377,20 +386,29 @@ AES_encrypt:
movq 16(%rsp),%r9
movq 24(%rsp),%rsi
+.cfi_def_cfa %rsi,8
movl %eax,0(%r9)
movl %ebx,4(%r9)
movl %ecx,8(%r9)
movl %edx,12(%r9)
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+ movq -48(%rsi),%r15
+.cfi_restore %r15
+ movq -40(%rsi),%r14
+.cfi_restore %r14
+ movq -32(%rsi),%r13
+.cfi_restore %r13
+ movq -24(%rsi),%r12
+.cfi_restore %r12
+ movq -16(%rsi),%rbp
+.cfi_restore %rbp
+ movq -8(%rsi),%rbx
+.cfi_restore %rbx
+ leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lenc_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size AES_encrypt,.-AES_encrypt
.type _x86_64_AES_decrypt,@function
.align 16
@@ -779,15 +797,23 @@ _x86_64_AES_decrypt_compact:
.hidden asm_AES_decrypt
asm_AES_decrypt:
AES_decrypt:
+.cfi_startproc
+ movq %rsp,%rax
+.cfi_def_cfa_register %rax
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
pushq %r15
+.cfi_offset %r15,-56
- movq %rsp,%r10
leaq -63(%rdx),%rcx
andq $-64,%rsp
subq %rsp,%rcx
@@ -797,7 +823,8 @@ AES_decrypt:
subq $32,%rsp
movq %rsi,16(%rsp)
- movq %r10,24(%rsp)
+ movq %rax,24(%rsp)
+.cfi_escape 0x0f,0x05,0x77,0x18,0x06,0x23,0x08
.Ldec_prologue:
movq %rdx,%r15
@@ -826,41 +853,68 @@ AES_decrypt:
movq 16(%rsp),%r9
movq 24(%rsp),%rsi
+.cfi_def_cfa %rsi,8
movl %eax,0(%r9)
movl %ebx,4(%r9)
movl %ecx,8(%r9)
movl %edx,12(%r9)
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+ movq -48(%rsi),%r15
+.cfi_restore %r15
+ movq -40(%rsi),%r14
+.cfi_restore %r14
+ movq -32(%rsi),%r13
+.cfi_restore %r13
+ movq -24(%rsi),%r12
+.cfi_restore %r12
+ movq -16(%rsi),%rbp
+.cfi_restore %rbp
+ movq -8(%rsi),%rbx
+.cfi_restore %rbx
+ leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Ldec_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size AES_decrypt,.-AES_decrypt
.globl AES_set_encrypt_key
.type AES_set_encrypt_key,@function
.align 16
AES_set_encrypt_key:
+.cfi_startproc
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
subq $8,%rsp
+.cfi_adjust_cfa_offset 8
.Lenc_key_prologue:
call _x86_64_AES_set_encrypt_key
movq 40(%rsp),%rbp
+.cfi_restore %rbp
movq 48(%rsp),%rbx
+.cfi_restore %rbx
addq $56,%rsp
+.cfi_adjust_cfa_offset -56
.Lenc_key_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size AES_set_encrypt_key,.-AES_set_encrypt_key
.type _x86_64_AES_set_encrypt_key,@function
@@ -1106,13 +1160,27 @@ _x86_64_AES_set_encrypt_key:
.type AES_set_decrypt_key,@function
.align 16
AES_set_decrypt_key:
+.cfi_startproc
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
pushq %rdx
+.cfi_adjust_cfa_offset 8
.Ldec_key_prologue:
call _x86_64_AES_set_encrypt_key
@@ -1280,14 +1348,22 @@ AES_set_decrypt_key:
xorq %rax,%rax
.Labort:
movq 8(%rsp),%r15
+.cfi_restore %r15
movq 16(%rsp),%r14
+.cfi_restore %r14
movq 24(%rsp),%r13
+.cfi_restore %r13
movq 32(%rsp),%r12
+.cfi_restore %r12
movq 40(%rsp),%rbp
+.cfi_restore %rbp
movq 48(%rsp),%rbx
+.cfi_restore %rbx
addq $56,%rsp
+.cfi_adjust_cfa_offset -56
.Ldec_key_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size AES_set_decrypt_key,.-AES_set_decrypt_key
.globl AES_cbc_encrypt
.type AES_cbc_encrypt,@function
@@ -1297,25 +1373,39 @@ AES_set_decrypt_key:
.hidden asm_AES_cbc_encrypt
asm_AES_cbc_encrypt:
AES_cbc_encrypt:
+.cfi_startproc
cmpq $0,%rdx
je .Lcbc_epilogue
pushfq
+.cfi_adjust_cfa_offset 8
+.cfi_offset 49,-16
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-24
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-32
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-40
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-48
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-56
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-64
.Lcbc_prologue:
cld
movl %r9d,%r9d
leaq .LAES_Te(%rip),%r14
+ leaq .LAES_Td(%rip),%r10
cmpq $0,%r9
- jne .Lcbc_picked_te
- leaq .LAES_Td(%rip),%r14
-.Lcbc_picked_te:
+ cmoveq %r10,%r14
movl OPENSSL_ia32cap_P(%rip),%r10d
cmpq $512,%rdx
@@ -1351,8 +1441,10 @@ AES_cbc_encrypt:
.Lcbc_te_ok:
xchgq %rsp,%r15
+.cfi_def_cfa_register %r15
movq %r15,16(%rsp)
+.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x40
.Lcbc_fast_body:
movq %rdi,24(%rsp)
movq %rsi,32(%rsp)
@@ -1734,17 +1826,28 @@ AES_cbc_encrypt:
.align 16
.Lcbc_exit:
movq 16(%rsp),%rsi
+.cfi_def_cfa %rsi,64
movq (%rsi),%r15
+.cfi_restore %r15
movq 8(%rsi),%r14
+.cfi_restore %r14
movq 16(%rsi),%r13
+.cfi_restore %r13
movq 24(%rsi),%r12
+.cfi_restore %r12
movq 32(%rsi),%rbp
+.cfi_restore %rbp
movq 40(%rsi),%rbx
+.cfi_restore %rbx
leaq 48(%rsi),%rsp
+.cfi_def_cfa %rsp,16
.Lcbc_popfq:
popfq
+.cfi_adjust_cfa_offset -8
+.cfi_restore 49
.Lcbc_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size AES_cbc_encrypt,.-AES_cbc_encrypt
.align 64
.LAES_Te:
diff --git a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/aes/aesni-mb-x86_64.s b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/aes/aesni-mb-x86_64.s
index 3dcd55d3f5..f2b5662b9c 100644
--- a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/aes/aesni-mb-x86_64.s
+++ b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/aes/aesni-mb-x86_64.s
@@ -6,6 +6,7 @@
.type aesni_multi_cbc_encrypt,@function
.align 32
aesni_multi_cbc_encrypt:
+.cfi_startproc
cmpl $2,%edx
jb .Lenc_non_avx
movl OPENSSL_ia32cap_P+4(%rip),%ecx
@@ -15,12 +16,19 @@ aesni_multi_cbc_encrypt:
.align 16
.Lenc_non_avx:
movq %rsp,%rax
+.cfi_def_cfa_register %rax
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
pushq %r15
+.cfi_offset %r15,-56
@@ -30,6 +38,7 @@ aesni_multi_cbc_encrypt:
subq $48,%rsp
andq $-64,%rsp
movq %rax,16(%rsp)
+.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08
.Lenc4x_body:
movdqu (%rsi),%xmm12
@@ -239,6 +248,7 @@ aesni_multi_cbc_encrypt:
jnz .Loop_enc4x
movq 16(%rsp),%rax
+.cfi_def_cfa %rax,8
movl 24(%rsp),%edx
@@ -256,20 +266,29 @@ aesni_multi_cbc_encrypt:
.Lenc4x_done:
movq -48(%rax),%r15
+.cfi_restore %r15
movq -40(%rax),%r14
+.cfi_restore %r14
movq -32(%rax),%r13
+.cfi_restore %r13
movq -24(%rax),%r12
+.cfi_restore %r12
movq -16(%rax),%rbp
+.cfi_restore %rbp
movq -8(%rax),%rbx
+.cfi_restore %rbx
leaq (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lenc4x_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size aesni_multi_cbc_encrypt,.-aesni_multi_cbc_encrypt
.globl aesni_multi_cbc_decrypt
.type aesni_multi_cbc_decrypt,@function
.align 32
aesni_multi_cbc_decrypt:
+.cfi_startproc
cmpl $2,%edx
jb .Ldec_non_avx
movl OPENSSL_ia32cap_P+4(%rip),%ecx
@@ -279,12 +298,19 @@ aesni_multi_cbc_decrypt:
.align 16
.Ldec_non_avx:
movq %rsp,%rax
+.cfi_def_cfa_register %rax
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
pushq %r15
+.cfi_offset %r15,-56
@@ -294,6 +320,7 @@ aesni_multi_cbc_decrypt:
subq $48,%rsp
andq $-64,%rsp
movq %rax,16(%rsp)
+.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08
.Ldec4x_body:
movdqu (%rsi),%xmm12
@@ -503,6 +530,7 @@ aesni_multi_cbc_decrypt:
jnz .Loop_dec4x
movq 16(%rsp),%rax
+.cfi_def_cfa %rax,8
movl 24(%rsp),%edx
leaq 160(%rdi),%rdi
@@ -511,26 +539,42 @@ aesni_multi_cbc_decrypt:
.Ldec4x_done:
movq -48(%rax),%r15
+.cfi_restore %r15
movq -40(%rax),%r14
+.cfi_restore %r14
movq -32(%rax),%r13
+.cfi_restore %r13
movq -24(%rax),%r12
+.cfi_restore %r12
movq -16(%rax),%rbp
+.cfi_restore %rbp
movq -8(%rax),%rbx
+.cfi_restore %rbx
leaq (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Ldec4x_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size aesni_multi_cbc_decrypt,.-aesni_multi_cbc_decrypt
.type aesni_multi_cbc_encrypt_avx,@function
.align 32
aesni_multi_cbc_encrypt_avx:
+.cfi_startproc
_avx_cbc_enc_shortcut:
movq %rsp,%rax
+.cfi_def_cfa_register %rax
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
pushq %r15
+.cfi_offset %r15,-56
@@ -542,6 +586,7 @@ _avx_cbc_enc_shortcut:
subq $192,%rsp
andq $-128,%rsp
movq %rax,16(%rsp)
+.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08
.Lenc8x_body:
vzeroupper
@@ -939,6 +984,7 @@ _avx_cbc_enc_shortcut:
jnz .Loop_enc8x
movq 16(%rsp),%rax
+.cfi_def_cfa %rax,8
@@ -947,27 +993,43 @@ _avx_cbc_enc_shortcut:
.Lenc8x_done:
vzeroupper
movq -48(%rax),%r15
+.cfi_restore %r15
movq -40(%rax),%r14
+.cfi_restore %r14
movq -32(%rax),%r13
+.cfi_restore %r13
movq -24(%rax),%r12
+.cfi_restore %r12
movq -16(%rax),%rbp
+.cfi_restore %rbp
movq -8(%rax),%rbx
+.cfi_restore %rbx
leaq (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lenc8x_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size aesni_multi_cbc_encrypt_avx,.-aesni_multi_cbc_encrypt_avx
.type aesni_multi_cbc_decrypt_avx,@function
.align 32
aesni_multi_cbc_decrypt_avx:
+.cfi_startproc
_avx_cbc_dec_shortcut:
movq %rsp,%rax
+.cfi_def_cfa_register %rax
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
pushq %r15
+.cfi_offset %r15,-56
@@ -981,6 +1043,7 @@ _avx_cbc_dec_shortcut:
andq $-256,%rsp
subq $192,%rsp
movq %rax,16(%rsp)
+.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08
.Ldec8x_body:
vzeroupper
@@ -1416,6 +1479,7 @@ _avx_cbc_dec_shortcut:
jnz .Loop_dec8x
movq 16(%rsp),%rax
+.cfi_def_cfa %rax,8
@@ -1424,12 +1488,20 @@ _avx_cbc_dec_shortcut:
.Ldec8x_done:
vzeroupper
movq -48(%rax),%r15
+.cfi_restore %r15
movq -40(%rax),%r14
+.cfi_restore %r14
movq -32(%rax),%r13
+.cfi_restore %r13
movq -24(%rax),%r12
+.cfi_restore %r12
movq -16(%rax),%rbp
+.cfi_restore %rbp
movq -8(%rax),%rbx
+.cfi_restore %rbx
leaq (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Ldec8x_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size aesni_multi_cbc_decrypt_avx,.-aesni_multi_cbc_decrypt_avx
diff --git a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/aes/aesni-sha1-x86_64.s b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/aes/aesni-sha1-x86_64.s
index ca193ddb9e..4d2dfe4489 100644
--- a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/aes/aesni-sha1-x86_64.s
+++ b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/aes/aesni-sha1-x86_64.s
@@ -21,16 +21,30 @@ aesni_cbc_sha1_enc:
.type aesni_cbc_sha1_enc_ssse3,@function
.align 32
aesni_cbc_sha1_enc_ssse3:
+.cfi_startproc
movq 8(%rsp),%r10
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
leaq -104(%rsp),%rsp
+.cfi_adjust_cfa_offset 104
movq %rdi,%r12
@@ -1362,29 +1376,52 @@ aesni_cbc_sha1_enc_ssse3:
movl %ebp,16(%r9)
movups %xmm2,(%r8)
leaq 104(%rsp),%rsi
+.cfi_def_cfa %rsi,56
movq 0(%rsi),%r15
+.cfi_restore %r15
movq 8(%rsi),%r14
+.cfi_restore %r14
movq 16(%rsi),%r13
+.cfi_restore %r13
movq 24(%rsi),%r12
+.cfi_restore %r12
movq 32(%rsi),%rbp
+.cfi_restore %rbp
movq 40(%rsi),%rbx
+.cfi_restore %rbx
leaq 48(%rsi),%rsp
+.cfi_def_cfa %rsp,8
.Lepilogue_ssse3:
.byte 0xf3,0xc3
+.cfi_endproc
.size aesni_cbc_sha1_enc_ssse3,.-aesni_cbc_sha1_enc_ssse3
.type aesni_cbc_sha1_enc_avx,@function
.align 32
aesni_cbc_sha1_enc_avx:
+.cfi_startproc
movq 8(%rsp),%r10
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
leaq -104(%rsp),%rsp
+.cfi_adjust_cfa_offset 104
vzeroall
@@ -2660,15 +2697,24 @@ aesni_cbc_sha1_enc_avx:
vmovups %xmm12,(%r8)
vzeroall
leaq 104(%rsp),%rsi
+.cfi_def_cfa %rsi,56
movq 0(%rsi),%r15
+.cfi_restore %r15
movq 8(%rsi),%r14
+.cfi_restore %r14
movq 16(%rsi),%r13
+.cfi_restore %r13
movq 24(%rsi),%r12
+.cfi_restore %r12
movq 32(%rsi),%rbp
+.cfi_restore %rbp
movq 40(%rsi),%rbx
+.cfi_restore %rbx
leaq 48(%rsi),%rsp
+.cfi_def_cfa %rsp,8
.Lepilogue_avx:
.byte 0xf3,0xc3
+.cfi_endproc
.size aesni_cbc_sha1_enc_avx,.-aesni_cbc_sha1_enc_avx
.align 64
K_XX_XX:
diff --git a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/aes/aesni-sha256-x86_64.s b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/aes/aesni-sha256-x86_64.s
index 427a1c7d12..5a47b3ee51 100644
--- a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/aes/aesni-sha256-x86_64.s
+++ b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/aes/aesni-sha256-x86_64.s
@@ -77,15 +77,23 @@ K256:
.type aesni_cbc_sha256_enc_xop,@function
.align 64
aesni_cbc_sha256_enc_xop:
+.cfi_startproc
.Lxop_shortcut:
movq 8(%rsp),%r10
+ movq %rsp,%rax
+.cfi_def_cfa_register %rax
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
pushq %r15
- movq %rsp,%r11
+.cfi_offset %r15,-56
subq $128,%rsp
andq $-64,%rsp
@@ -101,7 +109,8 @@ aesni_cbc_sha256_enc_xop:
movq %r8,64+32(%rsp)
movq %r9,64+40(%rsp)
movq %r10,64+48(%rsp)
- movq %r11,64+56(%rsp)
+ movq %rax,120(%rsp)
+.cfi_escape 0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08
.Lprologue_xop:
vzeroall
@@ -1207,31 +1216,48 @@ aesni_cbc_sha256_enc_xop:
jb .Lloop_xop
movq 64+32(%rsp),%r8
- movq 64+56(%rsp),%rsi
+ movq 120(%rsp),%rsi
+.cfi_def_cfa %rsi,8
vmovdqu %xmm8,(%r8)
vzeroall
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+ movq -48(%rsi),%r15
+.cfi_restore %r15
+ movq -40(%rsi),%r14
+.cfi_restore %r14
+ movq -32(%rsi),%r13
+.cfi_restore %r13
+ movq -24(%rsi),%r12
+.cfi_restore %r12
+ movq -16(%rsi),%rbp
+.cfi_restore %rbp
+ movq -8(%rsi),%rbx
+.cfi_restore %rbx
+ leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_xop:
.byte 0xf3,0xc3
+.cfi_endproc
.size aesni_cbc_sha256_enc_xop,.-aesni_cbc_sha256_enc_xop
.type aesni_cbc_sha256_enc_avx,@function
.align 64
aesni_cbc_sha256_enc_avx:
+.cfi_startproc
.Lavx_shortcut:
movq 8(%rsp),%r10
+ movq %rsp,%rax
+.cfi_def_cfa_register %rax
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
pushq %r15
- movq %rsp,%r11
+.cfi_offset %r15,-56
subq $128,%rsp
andq $-64,%rsp
@@ -1247,7 +1273,8 @@ aesni_cbc_sha256_enc_avx:
movq %r8,64+32(%rsp)
movq %r9,64+40(%rsp)
movq %r10,64+48(%rsp)
- movq %r11,64+56(%rsp)
+ movq %rax,120(%rsp)
+.cfi_escape 0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08
.Lprologue_avx:
vzeroall
@@ -2384,31 +2411,48 @@ aesni_cbc_sha256_enc_avx:
jb .Lloop_avx
movq 64+32(%rsp),%r8
- movq 64+56(%rsp),%rsi
+ movq 120(%rsp),%rsi
+.cfi_def_cfa %rsi,8
vmovdqu %xmm8,(%r8)
vzeroall
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+ movq -48(%rsi),%r15
+.cfi_restore %r15
+ movq -40(%rsi),%r14
+.cfi_restore %r14
+ movq -32(%rsi),%r13
+.cfi_restore %r13
+ movq -24(%rsi),%r12
+.cfi_restore %r12
+ movq -16(%rsi),%rbp
+.cfi_restore %rbp
+ movq -8(%rsi),%rbx
+.cfi_restore %rbx
+ leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_avx:
.byte 0xf3,0xc3
+.cfi_endproc
.size aesni_cbc_sha256_enc_avx,.-aesni_cbc_sha256_enc_avx
.type aesni_cbc_sha256_enc_avx2,@function
.align 64
aesni_cbc_sha256_enc_avx2:
+.cfi_startproc
.Lavx2_shortcut:
movq 8(%rsp),%r10
+ movq %rsp,%rax
+.cfi_def_cfa_register %rax
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
pushq %r15
- movq %rsp,%r11
+.cfi_offset %r15,-56
subq $576,%rsp
andq $-1024,%rsp
addq $448,%rsp
@@ -2425,7 +2469,8 @@ aesni_cbc_sha256_enc_avx2:
movq %r8,64+32(%rsp)
movq %r9,64+40(%rsp)
movq %r10,64+48(%rsp)
- movq %r11,64+56(%rsp)
+ movq %rax,120(%rsp)
+.cfi_escape 0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08
.Lprologue_avx2:
vzeroall
@@ -3987,18 +4032,27 @@ aesni_cbc_sha256_enc_avx2:
.Ldone_avx2:
leaq (%rbp),%rsp
movq 64+32(%rsp),%r8
- movq 64+56(%rsp),%rsi
+ movq 120(%rsp),%rsi
+.cfi_def_cfa %rsi,8
vmovdqu %xmm8,(%r8)
vzeroall
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+ movq -48(%rsi),%r15
+.cfi_restore %r15
+ movq -40(%rsi),%r14
+.cfi_restore %r14
+ movq -32(%rsi),%r13
+.cfi_restore %r13
+ movq -24(%rsi),%r12
+.cfi_restore %r12
+ movq -16(%rsi),%rbp
+.cfi_restore %rbp
+ movq -8(%rsi),%rbx
+.cfi_restore %rbx
+ leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_avx2:
.byte 0xf3,0xc3
+.cfi_endproc
.size aesni_cbc_sha256_enc_avx2,.-aesni_cbc_sha256_enc_avx2
.type aesni_cbc_sha256_enc_shaext,@function
.align 32
diff --git a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/aes/aesni-x86_64.s b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/aes/aesni-x86_64.s
index e18f87c4e6..5b2a68e758 100644
--- a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/aes/aesni-x86_64.s
+++ b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/aes/aesni-x86_64.s
@@ -995,6 +995,7 @@ aesni_ccm64_decrypt_blocks:
.type aesni_ctr32_encrypt_blocks,@function
.align 16
aesni_ctr32_encrypt_blocks:
+.cfi_startproc
cmpq $1,%rdx
jne .Lctr32_bulk
@@ -1024,11 +1025,12 @@ aesni_ctr32_encrypt_blocks:
.align 16
.Lctr32_bulk:
- leaq (%rsp),%rax
+ leaq (%rsp),%r11
+.cfi_def_cfa_register %r11
pushq %rbp
+.cfi_offset %rbp,-16
subq $128,%rsp
andq $-16,%rsp
- leaq -8(%rax),%rbp
@@ -1037,7 +1039,7 @@ aesni_ctr32_encrypt_blocks:
movdqu (%rcx),%xmm0
movl 12(%r8),%r8d
pxor %xmm0,%xmm2
- movl 12(%rcx),%r11d
+ movl 12(%rcx),%ebp
movdqa %xmm2,0(%rsp)
bswapl %r8d
movdqa %xmm2,%xmm3
@@ -1053,8 +1055,8 @@ aesni_ctr32_encrypt_blocks:
leaq 2(%r8),%rdx
bswapl %eax
bswapl %edx
- xorl %r11d,%eax
- xorl %r11d,%edx
+ xorl %ebp,%eax
+ xorl %ebp,%edx
.byte 102,15,58,34,216,3
leaq 3(%r8),%rax
movdqa %xmm3,16(%rsp)
@@ -1063,25 +1065,25 @@ aesni_ctr32_encrypt_blocks:
movq %r10,%rdx
leaq 4(%r8),%r10
movdqa %xmm4,32(%rsp)
- xorl %r11d,%eax
+ xorl %ebp,%eax
bswapl %r10d
.byte 102,15,58,34,232,3
- xorl %r11d,%r10d
+ xorl %ebp,%r10d
movdqa %xmm5,48(%rsp)
leaq 5(%r8),%r9
movl %r10d,64+12(%rsp)
bswapl %r9d
leaq 6(%r8),%r10
movl 240(%rcx),%eax
- xorl %r11d,%r9d
+ xorl %ebp,%r9d
bswapl %r10d
movl %r9d,80+12(%rsp)
- xorl %r11d,%r10d
+ xorl %ebp,%r10d
leaq 7(%r8),%r9
movl %r10d,96+12(%rsp)
bswapl %r9d
movl OPENSSL_ia32cap_P+4(%rip),%r10d
- xorl %r11d,%r9d
+ xorl %ebp,%r9d
andl $71303168,%r10d
movl %r9d,112+12(%rsp)
@@ -1105,7 +1107,7 @@ aesni_ctr32_encrypt_blocks:
.Lctr32_6x:
shll $4,%eax
movl $48,%r10d
- bswapl %r11d
+ bswapl %ebp
leaq 32(%rcx,%rax,1),%rcx
subq %rax,%r10
jmp .Lctr32_loop6
@@ -1116,32 +1118,32 @@ aesni_ctr32_encrypt_blocks:
movups -48(%rcx,%r10,1),%xmm0
.byte 102,15,56,220,209
movl %r8d,%eax
- xorl %r11d,%eax
+ xorl %ebp,%eax
.byte 102,15,56,220,217
.byte 0x0f,0x38,0xf1,0x44,0x24,12
leal 1(%r8),%eax
.byte 102,15,56,220,225
- xorl %r11d,%eax
+ xorl %ebp,%eax
.byte 0x0f,0x38,0xf1,0x44,0x24,28
.byte 102,15,56,220,233
leal 2(%r8),%eax
- xorl %r11d,%eax
+ xorl %ebp,%eax
.byte 102,15,56,220,241
.byte 0x0f,0x38,0xf1,0x44,0x24,44
leal 3(%r8),%eax
.byte 102,15,56,220,249
movups -32(%rcx,%r10,1),%xmm1
- xorl %r11d,%eax
+ xorl %ebp,%eax
.byte 102,15,56,220,208
.byte 0x0f,0x38,0xf1,0x44,0x24,60
leal 4(%r8),%eax
.byte 102,15,56,220,216
- xorl %r11d,%eax
+ xorl %ebp,%eax
.byte 0x0f,0x38,0xf1,0x44,0x24,76
.byte 102,15,56,220,224
leal 5(%r8),%eax
- xorl %r11d,%eax
+ xorl %ebp,%eax
.byte 102,15,56,220,232
.byte 0x0f,0x38,0xf1,0x44,0x24,92
movq %r10,%rax
@@ -1202,7 +1204,7 @@ aesni_ctr32_encrypt_blocks:
bswapl %r9d
movups 32-128(%rcx),%xmm0
.byte 102,15,56,220,225
- xorl %r11d,%r9d
+ xorl %ebp,%r9d
nop
.byte 102,15,56,220,233
movl %r9d,0+12(%rsp)
@@ -1215,7 +1217,7 @@ aesni_ctr32_encrypt_blocks:
bswapl %r9d
.byte 102,15,56,220,208
.byte 102,15,56,220,216
- xorl %r11d,%r9d
+ xorl %ebp,%r9d
.byte 0x66,0x90
.byte 102,15,56,220,224
.byte 102,15,56,220,232
@@ -1229,7 +1231,7 @@ aesni_ctr32_encrypt_blocks:
bswapl %r9d
.byte 102,15,56,220,209
.byte 102,15,56,220,217
- xorl %r11d,%r9d
+ xorl %ebp,%r9d
.byte 0x66,0x90
.byte 102,15,56,220,225
.byte 102,15,56,220,233
@@ -1243,7 +1245,7 @@ aesni_ctr32_encrypt_blocks:
bswapl %r9d
.byte 102,15,56,220,208
.byte 102,15,56,220,216
- xorl %r11d,%r9d
+ xorl %ebp,%r9d
.byte 0x66,0x90
.byte 102,15,56,220,224
.byte 102,15,56,220,232
@@ -1257,7 +1259,7 @@ aesni_ctr32_encrypt_blocks:
bswapl %r9d
.byte 102,15,56,220,209
.byte 102,15,56,220,217
- xorl %r11d,%r9d
+ xorl %ebp,%r9d
.byte 0x66,0x90
.byte 102,15,56,220,225
.byte 102,15,56,220,233
@@ -1271,7 +1273,7 @@ aesni_ctr32_encrypt_blocks:
bswapl %r9d
.byte 102,15,56,220,208
.byte 102,15,56,220,216
- xorl %r11d,%r9d
+ xorl %ebp,%r9d
.byte 0x66,0x90
.byte 102,15,56,220,224
.byte 102,15,56,220,232
@@ -1285,7 +1287,7 @@ aesni_ctr32_encrypt_blocks:
bswapl %r9d
.byte 102,15,56,220,209
.byte 102,15,56,220,217
- xorl %r11d,%r9d
+ xorl %ebp,%r9d
.byte 0x66,0x90
.byte 102,15,56,220,225
.byte 102,15,56,220,233
@@ -1300,7 +1302,7 @@ aesni_ctr32_encrypt_blocks:
.byte 102,15,56,220,208
.byte 102,15,56,220,216
.byte 102,15,56,220,224
- xorl %r11d,%r9d
+ xorl %ebp,%r9d
movdqu 0(%rdi),%xmm10
.byte 102,15,56,220,232
movl %r9d,112+12(%rsp)
@@ -1535,7 +1537,7 @@ aesni_ctr32_encrypt_blocks:
.Lctr32_done:
xorps %xmm0,%xmm0
- xorl %r11d,%r11d
+ xorl %ebp,%ebp
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
@@ -1559,20 +1561,25 @@ aesni_ctr32_encrypt_blocks:
pxor %xmm14,%xmm14
movaps %xmm0,112(%rsp)
pxor %xmm15,%xmm15
- leaq (%rbp),%rsp
- popq %rbp
+ movq -8(%r11),%rbp
+.cfi_restore %rbp
+ leaq (%r11),%rsp
+.cfi_def_cfa_register %rsp
.Lctr32_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
.globl aesni_xts_encrypt
.type aesni_xts_encrypt,@function
.align 16
aesni_xts_encrypt:
- leaq (%rsp),%rax
+.cfi_startproc
+ leaq (%rsp),%r11
+.cfi_def_cfa_register %r11
pushq %rbp
+.cfi_offset %rbp,-16
subq $112,%rsp
andq $-16,%rsp
- leaq -8(%rax),%rbp
movups (%r9),%xmm2
movl 240(%r8),%eax
movl 240(%rcx),%r10d
@@ -1588,7 +1595,7 @@ aesni_xts_encrypt:
jnz .Loop_enc1_8
.byte 102,15,56,221,209
movups (%rcx),%xmm0
- movq %rcx,%r11
+ movq %rcx,%rbp
movl %r10d,%eax
shll $4,%r10d
movq %rdx,%r9
@@ -1644,9 +1651,9 @@ aesni_xts_encrypt:
jc .Lxts_enc_short
movl $16+96,%eax
- leaq 32(%r11,%r10,1),%rcx
+ leaq 32(%rbp,%r10,1),%rcx
subq %r10,%rax
- movups 16(%r11),%xmm1
+ movups 16(%rbp),%xmm1
movq %rax,%r10
leaq .Lxts_magic(%rip),%r8
jmp .Lxts_enc_grandloop
@@ -1671,7 +1678,7 @@ aesni_xts_encrypt:
movdqa 96(%rsp),%xmm9
pxor %xmm14,%xmm6
.byte 102,15,56,220,233
- movups 32(%r11),%xmm0
+ movups 32(%rbp),%xmm0
leaq 96(%rdi),%rdi
pxor %xmm8,%xmm7
@@ -1680,7 +1687,7 @@ aesni_xts_encrypt:
pxor %xmm9,%xmm11
movdqa %xmm10,0(%rsp)
.byte 102,15,56,220,249
- movups 48(%r11),%xmm1
+ movups 48(%rbp),%xmm1
pxor %xmm9,%xmm12
.byte 102,15,56,220,208
@@ -1695,7 +1702,7 @@ aesni_xts_encrypt:
movdqa %xmm14,64(%rsp)
.byte 102,15,56,220,240
.byte 102,15,56,220,248
- movups 64(%r11),%xmm0
+ movups 64(%rbp),%xmm0
movdqa %xmm8,80(%rsp)
pshufd $0x5f,%xmm15,%xmm9
jmp .Lxts_enc_loop6
@@ -1727,7 +1734,7 @@ aesni_xts_encrypt:
psrad $31,%xmm14
.byte 102,15,56,220,217
pand %xmm8,%xmm14
- movups (%r11),%xmm10
+ movups (%rbp),%xmm10
.byte 102,15,56,220,225
.byte 102,15,56,220,233
.byte 102,15,56,220,241
@@ -1795,10 +1802,10 @@ aesni_xts_encrypt:
.byte 102,15,56,220,225
.byte 102,15,56,220,233
pxor %xmm0,%xmm15
- movups (%r11),%xmm0
+ movups (%rbp),%xmm0
.byte 102,15,56,220,241
.byte 102,15,56,220,249
- movups 16(%r11),%xmm1
+ movups 16(%rbp),%xmm1
pxor %xmm15,%xmm14
.byte 102,15,56,221,84,36,0
@@ -1825,7 +1832,7 @@ aesni_xts_encrypt:
movl $16+96,%eax
subl %r10d,%eax
- movq %r11,%rcx
+ movq %rbp,%rcx
shrl $4,%eax
.Lxts_enc_short:
@@ -1981,7 +1988,7 @@ aesni_xts_encrypt:
jnz .Lxts_enc_steal
subq %r9,%rsi
- movq %r11,%rcx
+ movq %rbp,%rcx
movl %r10d,%eax
movups -16(%rsi),%xmm2
@@ -2024,20 +2031,25 @@ aesni_xts_encrypt:
movaps %xmm0,96(%rsp)
pxor %xmm14,%xmm14
pxor %xmm15,%xmm15
- leaq (%rbp),%rsp
- popq %rbp
+ movq -8(%r11),%rbp
+.cfi_restore %rbp
+ leaq (%r11),%rsp
+.cfi_def_cfa_register %rsp
.Lxts_enc_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size aesni_xts_encrypt,.-aesni_xts_encrypt
.globl aesni_xts_decrypt
.type aesni_xts_decrypt,@function
.align 16
aesni_xts_decrypt:
- leaq (%rsp),%rax
+.cfi_startproc
+ leaq (%rsp),%r11
+.cfi_def_cfa_register %r11
pushq %rbp
+.cfi_offset %rbp,-16
subq $112,%rsp
andq $-16,%rsp
- leaq -8(%rax),%rbp
movups (%r9),%xmm2
movl 240(%r8),%eax
movl 240(%rcx),%r10d
@@ -2059,7 +2071,7 @@ aesni_xts_decrypt:
subq %rax,%rdx
movups (%rcx),%xmm0
- movq %rcx,%r11
+ movq %rcx,%rbp
movl %r10d,%eax
shll $4,%r10d
movq %rdx,%r9
@@ -2115,9 +2127,9 @@ aesni_xts_decrypt:
jc .Lxts_dec_short
movl $16+96,%eax
- leaq 32(%r11,%r10,1),%rcx
+ leaq 32(%rbp,%r10,1),%rcx
subq %r10,%rax
- movups 16(%r11),%xmm1
+ movups 16(%rbp),%xmm1
movq %rax,%r10
leaq .Lxts_magic(%rip),%r8
jmp .Lxts_dec_grandloop
@@ -2142,7 +2154,7 @@ aesni_xts_decrypt:
movdqa 96(%rsp),%xmm9
pxor %xmm14,%xmm6
.byte 102,15,56,222,233
- movups 32(%r11),%xmm0
+ movups 32(%rbp),%xmm0
leaq 96(%rdi),%rdi
pxor %xmm8,%xmm7
@@ -2151,7 +2163,7 @@ aesni_xts_decrypt:
pxor %xmm9,%xmm11
movdqa %xmm10,0(%rsp)
.byte 102,15,56,222,249
- movups 48(%r11),%xmm1
+ movups 48(%rbp),%xmm1
pxor %xmm9,%xmm12
.byte 102,15,56,222,208
@@ -2166,7 +2178,7 @@ aesni_xts_decrypt:
movdqa %xmm14,64(%rsp)
.byte 102,15,56,222,240
.byte 102,15,56,222,248
- movups 64(%r11),%xmm0
+ movups 64(%rbp),%xmm0
movdqa %xmm8,80(%rsp)
pshufd $0x5f,%xmm15,%xmm9
jmp .Lxts_dec_loop6
@@ -2198,7 +2210,7 @@ aesni_xts_decrypt:
psrad $31,%xmm14
.byte 102,15,56,222,217
pand %xmm8,%xmm14
- movups (%r11),%xmm10
+ movups (%rbp),%xmm10
.byte 102,15,56,222,225
.byte 102,15,56,222,233
.byte 102,15,56,222,241
@@ -2266,10 +2278,10 @@ aesni_xts_decrypt:
.byte 102,15,56,222,225
.byte 102,15,56,222,233
pxor %xmm0,%xmm15
- movups (%r11),%xmm0
+ movups (%rbp),%xmm0
.byte 102,15,56,222,241
.byte 102,15,56,222,249
- movups 16(%r11),%xmm1
+ movups 16(%rbp),%xmm1
pxor %xmm15,%xmm14
.byte 102,15,56,223,84,36,0
@@ -2296,7 +2308,7 @@ aesni_xts_decrypt:
movl $16+96,%eax
subl %r10d,%eax
- movq %r11,%rcx
+ movq %rbp,%rcx
shrl $4,%eax
.Lxts_dec_short:
@@ -2453,7 +2465,7 @@ aesni_xts_decrypt:
jz .Lxts_dec_ret
.Lxts_dec_done2:
movq %r9,%rdx
- movq %r11,%rcx
+ movq %rbp,%rcx
movl %r10d,%eax
movups (%rdi),%xmm2
@@ -2483,7 +2495,7 @@ aesni_xts_decrypt:
jnz .Lxts_dec_steal
subq %r9,%rsi
- movq %r11,%rcx
+ movq %rbp,%rcx
movl %r10d,%eax
movups (%rsi),%xmm2
@@ -2526,21 +2538,35 @@ aesni_xts_decrypt:
movaps %xmm0,96(%rsp)
pxor %xmm14,%xmm14
pxor %xmm15,%xmm15
- leaq (%rbp),%rsp
- popq %rbp
+ movq -8(%r11),%rbp
+.cfi_restore %rbp
+ leaq (%r11),%rsp
+.cfi_def_cfa_register %rsp
.Lxts_dec_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size aesni_xts_decrypt,.-aesni_xts_decrypt
.globl aesni_ocb_encrypt
.type aesni_ocb_encrypt,@function
.align 32
aesni_ocb_encrypt:
+.cfi_startproc
leaq (%rsp),%rax
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
movq 8(%rax),%rbx
movq 8+8(%rax),%rbp
@@ -2716,13 +2742,23 @@ aesni_ocb_encrypt:
pxor %xmm13,%xmm13
pxor %xmm14,%xmm14
pxor %xmm15,%xmm15
- popq %r14
- popq %r13
- popq %r12
- popq %rbp
- popq %rbx
+ leaq 40(%rsp),%rax
+.cfi_def_cfa %rax,8
+ movq -40(%rax),%r14
+.cfi_restore %r14
+ movq -32(%rax),%r13
+.cfi_restore %r13
+ movq -24(%rax),%r12
+.cfi_restore %r12
+ movq -16(%rax),%rbp
+.cfi_restore %rbp
+ movq -8(%rax),%rbx
+.cfi_restore %rbx
+ leaq (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Locb_enc_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size aesni_ocb_encrypt,.-aesni_ocb_encrypt
.type __ocb_encrypt6,@function
@@ -2935,12 +2971,23 @@ __ocb_encrypt1:
.type aesni_ocb_decrypt,@function
.align 32
aesni_ocb_decrypt:
+.cfi_startproc
leaq (%rsp),%rax
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
movq 8(%rax),%rbx
movq 8+8(%rax),%rbp
@@ -3138,13 +3185,23 @@ aesni_ocb_decrypt:
pxor %xmm13,%xmm13
pxor %xmm14,%xmm14
pxor %xmm15,%xmm15
- popq %r14
- popq %r13
- popq %r12
- popq %rbp
- popq %rbx
+ leaq 40(%rsp),%rax
+.cfi_def_cfa %rax,8
+ movq -40(%rax),%r14
+.cfi_restore %r14
+ movq -32(%rax),%r13
+.cfi_restore %r13
+ movq -24(%rax),%r12
+.cfi_restore %r12
+ movq -16(%rax),%rbp
+.cfi_restore %rbp
+ movq -8(%rax),%rbx
+.cfi_restore %rbx
+ leaq (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Locb_dec_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size aesni_ocb_decrypt,.-aesni_ocb_decrypt
.type __ocb_decrypt6,@function
@@ -3345,6 +3402,7 @@ __ocb_decrypt1:
.type aesni_cbc_encrypt,@function
.align 16
aesni_cbc_encrypt:
+.cfi_startproc
testq %rdx,%rdx
jz .Lcbc_ret
@@ -3437,11 +3495,13 @@ aesni_cbc_encrypt:
jmp .Lcbc_ret
.align 16
.Lcbc_decrypt_bulk:
- leaq (%rsp),%rax
+ leaq (%rsp),%r11
+.cfi_def_cfa_register %r11
pushq %rbp
+.cfi_offset %rbp,-16
subq $16,%rsp
andq $-16,%rsp
- leaq -8(%rax),%rbp
+ movq %rcx,%rbp
movups (%r8),%xmm10
movl %r10d,%eax
cmpq $0x50,%rdx
@@ -3481,7 +3541,7 @@ aesni_cbc_encrypt:
pxor %xmm0,%xmm3
movups 16-112(%rcx),%xmm1
pxor %xmm0,%xmm4
- xorq %r11,%r11
+ movq $-1,%rbp
cmpq $0x70,%rdx
pxor %xmm0,%xmm5
pxor %xmm0,%xmm6
@@ -3497,10 +3557,10 @@ aesni_cbc_encrypt:
.byte 102,15,56,222,241
.byte 102,15,56,222,249
.byte 102,68,15,56,222,193
- setnc %r11b
- shlq $7,%r11
+ adcq $0,%rbp
+ andq $128,%rbp
.byte 102,68,15,56,222,201
- addq %rdi,%r11
+ addq %rdi,%rbp
movups 48-112(%rcx),%xmm1
.byte 102,15,56,222,208
.byte 102,15,56,222,216
@@ -3638,18 +3698,18 @@ aesni_cbc_encrypt:
movdqu 112(%rdi),%xmm0
.byte 102,65,15,56,223,228
leaq 128(%rdi),%rdi
- movdqu 0(%r11),%xmm11
+ movdqu 0(%rbp),%xmm11
.byte 102,65,15,56,223,237
.byte 102,65,15,56,223,246
- movdqu 16(%r11),%xmm12
- movdqu 32(%r11),%xmm13
+ movdqu 16(%rbp),%xmm12
+ movdqu 32(%rbp),%xmm13
.byte 102,65,15,56,223,255
.byte 102,68,15,56,223,193
- movdqu 48(%r11),%xmm14
- movdqu 64(%r11),%xmm15
+ movdqu 48(%rbp),%xmm14
+ movdqu 64(%rbp),%xmm15
.byte 102,69,15,56,223,202
movdqa %xmm0,%xmm10
- movdqu 80(%r11),%xmm1
+ movdqu 80(%rbp),%xmm1
movups -112(%rcx),%xmm0
movups %xmm2,(%rsi)
@@ -3768,7 +3828,7 @@ aesni_cbc_encrypt:
pxor %xmm13,%xmm5
movdqu %xmm4,32(%rsi)
pxor %xmm14,%xmm6
- movq %r11,%rcx
+ movq %rbp,%rcx
movdqu %xmm5,48(%rsi)
pxor %xmm15,%xmm7
movl %r10d,%eax
@@ -3921,16 +3981,21 @@ aesni_cbc_encrypt:
.Lcbc_dec_ret:
xorps %xmm0,%xmm0
pxor %xmm1,%xmm1
- leaq (%rbp),%rsp
- popq %rbp
+ movq -8(%r11),%rbp
+.cfi_restore %rbp
+ leaq (%r11),%rsp
+.cfi_def_cfa_register %rsp
.Lcbc_ret:
.byte 0xf3,0xc3
+.cfi_endproc
.size aesni_cbc_encrypt,.-aesni_cbc_encrypt
.globl aesni_set_decrypt_key
.type aesni_set_decrypt_key,@function
.align 16
aesni_set_decrypt_key:
+.cfi_startproc
.byte 0x48,0x83,0xEC,0x08
+.cfi_adjust_cfa_offset 8
call __aesni_set_encrypt_key
shll $4,%esi
testl %eax,%eax
@@ -3963,7 +4028,9 @@ aesni_set_decrypt_key:
pxor %xmm0,%xmm0
.Ldec_key_ret:
addq $8,%rsp
+.cfi_adjust_cfa_offset -8
.byte 0xf3,0xc3
+.cfi_endproc
.LSEH_end_set_decrypt_key:
.size aesni_set_decrypt_key,.-aesni_set_decrypt_key
.globl aesni_set_encrypt_key
@@ -3971,7 +4038,9 @@ aesni_set_decrypt_key:
.align 16
aesni_set_encrypt_key:
__aesni_set_encrypt_key:
+.cfi_startproc
.byte 0x48,0x83,0xEC,0x08
+.cfi_adjust_cfa_offset 8
movq $-1,%rax
testq %rdi,%rdi
jz .Lenc_key_ret
@@ -4264,7 +4333,9 @@ __aesni_set_encrypt_key:
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
addq $8,%rsp
+.cfi_adjust_cfa_offset -8
.byte 0xf3,0xc3
+.cfi_endproc
.LSEH_end_set_encrypt_key:
.align 16
diff --git a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/aes/bsaes-x86_64.s b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/aes/bsaes-x86_64.s
index c76c5a8afb..f7451dfe52 100644
--- a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/aes/bsaes-x86_64.s
+++ b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/aes/bsaes-x86_64.s
@@ -1067,6 +1067,7 @@ _bsaes_key_convert:
.type bsaes_cbc_encrypt,@function
.align 16
bsaes_cbc_encrypt:
+.cfi_startproc
cmpl $0,%r9d
jne asm_AES_cbc_encrypt
cmpq $128,%rdx
@@ -1075,13 +1076,27 @@ bsaes_cbc_encrypt:
movq %rsp,%rax
.Lcbc_dec_prologue:
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-16
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
leaq -72(%rsp),%rsp
+.cfi_adjust_cfa_offset 0x48
movq %rsp,%rbp
+.cfi_def_cfa_register %rbp
movl 240(%rcx),%eax
movq %rdi,%r12
movq %rsi,%r13
@@ -1300,33 +1315,56 @@ bsaes_cbc_encrypt:
cmpq %rax,%rbp
ja .Lcbc_dec_bzero
- leaq (%rbp),%rsp
- movq 72(%rsp),%r15
- movq 80(%rsp),%r14
- movq 88(%rsp),%r13
- movq 96(%rsp),%r12
- movq 104(%rsp),%rbx
- movq 112(%rsp),%rax
- leaq 120(%rsp),%rsp
- movq %rax,%rbp
+ leaq 120(%rbp),%rax
+.cfi_def_cfa %rax,8
+ movq -48(%rax),%r15
+.cfi_restore %r15
+ movq -40(%rax),%r14
+.cfi_restore %r14
+ movq -32(%rax),%r13
+.cfi_restore %r13
+ movq -24(%rax),%r12
+.cfi_restore %r12
+ movq -16(%rax),%rbx
+.cfi_restore %rbx
+ movq -8(%rax),%rbp
+.cfi_restore %rbp
+ leaq (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lcbc_dec_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt
.globl bsaes_ctr32_encrypt_blocks
.type bsaes_ctr32_encrypt_blocks,@function
.align 16
bsaes_ctr32_encrypt_blocks:
+.cfi_startproc
movq %rsp,%rax
.Lctr_enc_prologue:
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-16
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
leaq -72(%rsp),%rsp
+.cfi_adjust_cfa_offset 0x48
movq %rsp,%rbp
+.cfi_def_cfa_register %rbp
movdqu (%r8),%xmm0
movl 240(%rcx),%eax
movq %rdi,%r12
@@ -1500,32 +1538,55 @@ bsaes_ctr32_encrypt_blocks:
cmpq %rax,%rbp
ja .Lctr_enc_bzero
- leaq (%rbp),%rsp
- movq 72(%rsp),%r15
- movq 80(%rsp),%r14
- movq 88(%rsp),%r13
- movq 96(%rsp),%r12
- movq 104(%rsp),%rbx
- movq 112(%rsp),%rax
- leaq 120(%rsp),%rsp
- movq %rax,%rbp
+ leaq 120(%rbp),%rax
+.cfi_def_cfa %rax,8
+ movq -48(%rax),%r15
+.cfi_restore %r15
+ movq -40(%rax),%r14
+.cfi_restore %r14
+ movq -32(%rax),%r13
+.cfi_restore %r13
+ movq -24(%rax),%r12
+.cfi_restore %r12
+ movq -16(%rax),%rbx
+.cfi_restore %rbx
+ movq -8(%rax),%rbp
+.cfi_restore %rbp
+ leaq (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lctr_enc_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks
.globl bsaes_xts_encrypt
.type bsaes_xts_encrypt,@function
.align 16
bsaes_xts_encrypt:
+.cfi_startproc
movq %rsp,%rax
.Lxts_enc_prologue:
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-16
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
leaq -72(%rsp),%rsp
+.cfi_adjust_cfa_offset 0x48
movq %rsp,%rbp
+.cfi_def_cfa_register %rbp
movq %rdi,%r12
movq %rsi,%r13
movq %rdx,%r14
@@ -1951,32 +2012,54 @@ bsaes_xts_encrypt:
cmpq %rax,%rbp
ja .Lxts_enc_bzero
- leaq (%rbp),%rsp
- movq 72(%rsp),%r15
- movq 80(%rsp),%r14
- movq 88(%rsp),%r13
- movq 96(%rsp),%r12
- movq 104(%rsp),%rbx
- movq 112(%rsp),%rax
- leaq 120(%rsp),%rsp
- movq %rax,%rbp
+ leaq 120(%rbp),%rax
+.cfi_def_cfa %rax,8
+ movq -48(%rax),%r15
+.cfi_restore %r15
+ movq -40(%rax),%r14
+.cfi_restore %r14
+ movq -32(%rax),%r13
+.cfi_restore %r13
+ movq -24(%rax),%r12
+.cfi_restore %r12
+ movq -16(%rax),%rbx
+.cfi_restore %rbx
+ movq -8(%rax),%rbp
+.cfi_restore %rbp
+ leaq (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lxts_enc_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size bsaes_xts_encrypt,.-bsaes_xts_encrypt
.globl bsaes_xts_decrypt
.type bsaes_xts_decrypt,@function
.align 16
bsaes_xts_decrypt:
+.cfi_startproc
movq %rsp,%rax
.Lxts_dec_prologue:
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-16
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
leaq -72(%rsp),%rsp
+.cfi_adjust_cfa_offset 0x48
movq %rsp,%rbp
movq %rdi,%r12
movq %rsi,%r13
@@ -2429,17 +2512,25 @@ bsaes_xts_decrypt:
cmpq %rax,%rbp
ja .Lxts_dec_bzero
- leaq (%rbp),%rsp
- movq 72(%rsp),%r15
- movq 80(%rsp),%r14
- movq 88(%rsp),%r13
- movq 96(%rsp),%r12
- movq 104(%rsp),%rbx
- movq 112(%rsp),%rax
- leaq 120(%rsp),%rsp
- movq %rax,%rbp
+ leaq 120(%rbp),%rax
+.cfi_def_cfa %rax,8
+ movq -48(%rax),%r15
+.cfi_restore %r15
+ movq -40(%rax),%r14
+.cfi_restore %r14
+ movq -32(%rax),%r13
+.cfi_restore %r13
+ movq -24(%rax),%r12
+.cfi_restore %r12
+ movq -16(%rax),%rbx
+.cfi_restore %rbx
+ movq -8(%rax),%rbp
+.cfi_restore %rbp
+ leaq (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lxts_dec_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size bsaes_xts_decrypt,.-bsaes_xts_decrypt
.type _bsaes_const,@object
.align 64
diff --git a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/bn/rsaz-avx2.s b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/bn/rsaz-avx2.s
index ee619092c9..61b400749b 100644
--- a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/bn/rsaz-avx2.s
+++ b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/bn/rsaz-avx2.s
@@ -4,15 +4,24 @@
.type rsaz_1024_sqr_avx2,@function
.align 64
rsaz_1024_sqr_avx2:
+.cfi_startproc
leaq (%rsp),%rax
+.cfi_def_cfa_register %rax
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
pushq %r15
+.cfi_offset %r15,-56
vzeroupper
movq %rax,%rbp
+.cfi_def_cfa_register %rbp
movq %rdx,%r13
subq $832,%rsp
movq %r13,%r15
@@ -625,28 +634,46 @@ rsaz_1024_sqr_avx2:
vzeroall
movq %rbp,%rax
+.cfi_def_cfa_register %rax
movq -48(%rax),%r15
+.cfi_restore %r15
movq -40(%rax),%r14
+.cfi_restore %r14
movq -32(%rax),%r13
+.cfi_restore %r13
movq -24(%rax),%r12
+.cfi_restore %r12
movq -16(%rax),%rbp
+.cfi_restore %rbp
movq -8(%rax),%rbx
+.cfi_restore %rbx
leaq (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lsqr_1024_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size rsaz_1024_sqr_avx2,.-rsaz_1024_sqr_avx2
.globl rsaz_1024_mul_avx2
.type rsaz_1024_mul_avx2,@function
.align 64
rsaz_1024_mul_avx2:
+.cfi_startproc
leaq (%rsp),%rax
+.cfi_def_cfa_register %rax
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
pushq %r15
+.cfi_offset %r15,-56
movq %rax,%rbp
+.cfi_def_cfa_register %rbp
vzeroall
movq %rdx,%r13
subq $64,%rsp
@@ -1162,15 +1189,24 @@ rsaz_1024_mul_avx2:
vzeroupper
movq %rbp,%rax
+.cfi_def_cfa_register %rax
movq -48(%rax),%r15
+.cfi_restore %r15
movq -40(%rax),%r14
+.cfi_restore %r14
movq -32(%rax),%r13
+.cfi_restore %r13
movq -24(%rax),%r12
+.cfi_restore %r12
movq -16(%rax),%rbp
+.cfi_restore %rbp
movq -8(%rax),%rbx
+.cfi_restore %rbx
leaq (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lmul_1024_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size rsaz_1024_mul_avx2,.-rsaz_1024_mul_avx2
.globl rsaz_1024_red2norm_avx2
.type rsaz_1024_red2norm_avx2,@function
@@ -1555,8 +1591,10 @@ rsaz_1024_scatter5_avx2:
.type rsaz_1024_gather5_avx2,@function
.align 32
rsaz_1024_gather5_avx2:
+.cfi_startproc
vzeroupper
movq %rsp,%r11
+.cfi_def_cfa_register %r11
leaq -256(%rsp),%rsp
andq $-32,%rsp
leaq .Linc(%rip),%r10
@@ -1665,7 +1703,10 @@ rsaz_1024_gather5_avx2:
vmovdqu %ymm0,(%rdi)
vzeroupper
leaq (%r11),%rsp
+.cfi_def_cfa_register %rsp
.byte 0xf3,0xc3
+.cfi_endproc
+.LSEH_end_rsaz_1024_gather5:
.size rsaz_1024_gather5_avx2,.-rsaz_1024_gather5_avx2
.globl rsaz_avx2_eligible
diff --git a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/bn/rsaz-x86_64.s b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/bn/rsaz-x86_64.s
index 795cebe1d7..f8e4a80588 100644
--- a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/bn/rsaz-x86_64.s
+++ b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/bn/rsaz-x86_64.s
@@ -6,14 +6,28 @@
.type rsaz_512_sqr,@function
.align 32
rsaz_512_sqr:
+.cfi_startproc
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
subq $128+24,%rsp
+.cfi_adjust_cfa_offset 128+24
.Lsqr_body:
movq %rdx,%rbp
movq (%rsi),%rdx
@@ -658,28 +672,51 @@ rsaz_512_sqr:
.Lsqr_tail:
leaq 128+24+48(%rsp),%rax
+.cfi_def_cfa %rax,8
movq -48(%rax),%r15
+.cfi_restore %r15
movq -40(%rax),%r14
+.cfi_restore %r14
movq -32(%rax),%r13
+.cfi_restore %r13
movq -24(%rax),%r12
+.cfi_restore %r12
movq -16(%rax),%rbp
+.cfi_restore %rbp
movq -8(%rax),%rbx
+.cfi_restore %rbx
leaq (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lsqr_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size rsaz_512_sqr,.-rsaz_512_sqr
.globl rsaz_512_mul
.type rsaz_512_mul,@function
.align 32
rsaz_512_mul:
+.cfi_startproc
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
subq $128+24,%rsp
+.cfi_adjust_cfa_offset 128+24
.Lmul_body:
.byte 102,72,15,110,199
.byte 102,72,15,110,201
@@ -741,28 +778,51 @@ rsaz_512_mul:
call __rsaz_512_subtract
leaq 128+24+48(%rsp),%rax
+.cfi_def_cfa %rax,8
movq -48(%rax),%r15
+.cfi_restore %r15
movq -40(%rax),%r14
+.cfi_restore %r14
movq -32(%rax),%r13
+.cfi_restore %r13
movq -24(%rax),%r12
+.cfi_restore %r12
movq -16(%rax),%rbp
+.cfi_restore %rbp
movq -8(%rax),%rbx
+.cfi_restore %rbx
leaq (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lmul_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size rsaz_512_mul,.-rsaz_512_mul
.globl rsaz_512_mul_gather4
.type rsaz_512_mul_gather4,@function
.align 32
rsaz_512_mul_gather4:
+.cfi_startproc
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
subq $152,%rsp
+.cfi_adjust_cfa_offset 152
.Lmul_gather4_body:
movd %r9d,%xmm8
movdqa .Linc+16(%rip),%xmm1
@@ -1151,29 +1211,52 @@ rsaz_512_mul_gather4:
call __rsaz_512_subtract
leaq 128+24+48(%rsp),%rax
+.cfi_def_cfa %rax,8
movq -48(%rax),%r15
+.cfi_restore %r15
movq -40(%rax),%r14
+.cfi_restore %r14
movq -32(%rax),%r13
+.cfi_restore %r13
movq -24(%rax),%r12
+.cfi_restore %r12
movq -16(%rax),%rbp
+.cfi_restore %rbp
movq -8(%rax),%rbx
+.cfi_restore %rbx
leaq (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lmul_gather4_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size rsaz_512_mul_gather4,.-rsaz_512_mul_gather4
.globl rsaz_512_mul_scatter4
.type rsaz_512_mul_scatter4,@function
.align 32
rsaz_512_mul_scatter4:
+.cfi_startproc
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
movl %r9d,%r9d
subq $128+24,%rsp
+.cfi_adjust_cfa_offset 128+24
.Lmul_scatter4_body:
leaq (%r8,%r9,8),%r8
.byte 102,72,15,110,199
@@ -1248,28 +1331,51 @@ rsaz_512_mul_scatter4:
movq %r15,896(%rsi)
leaq 128+24+48(%rsp),%rax
+.cfi_def_cfa %rax,8
movq -48(%rax),%r15
+.cfi_restore %r15
movq -40(%rax),%r14
+.cfi_restore %r14
movq -32(%rax),%r13
+.cfi_restore %r13
movq -24(%rax),%r12
+.cfi_restore %r12
movq -16(%rax),%rbp
+.cfi_restore %rbp
movq -8(%rax),%rbx
+.cfi_restore %rbx
leaq (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lmul_scatter4_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4
.globl rsaz_512_mul_by_one
.type rsaz_512_mul_by_one,@function
.align 32
rsaz_512_mul_by_one:
+.cfi_startproc
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
subq $128+24,%rsp
+.cfi_adjust_cfa_offset 128+24
.Lmul_by_one_body:
movl OPENSSL_ia32cap_P+8(%rip),%eax
movq %rdx,%rbp
@@ -1312,15 +1418,24 @@ rsaz_512_mul_by_one:
movq %r15,56(%rdi)
leaq 128+24+48(%rsp),%rax
+.cfi_def_cfa %rax,8
movq -48(%rax),%r15
+.cfi_restore %r15
movq -40(%rax),%r14
+.cfi_restore %r14
movq -32(%rax),%r13
+.cfi_restore %r13
movq -24(%rax),%r12
+.cfi_restore %r12
movq -16(%rax),%rbp
+.cfi_restore %rbp
movq -8(%rax),%rbx
+.cfi_restore %rbx
leaq (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lmul_by_one_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size rsaz_512_mul_by_one,.-rsaz_512_mul_by_one
.type __rsaz_512_reduce,@function
.align 32
diff --git a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/bn/x86_64-gf2m.s b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/bn/x86_64-gf2m.s
index a0b78a0565..0846c4441e 100644
--- a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/bn/x86_64-gf2m.s
+++ b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/bn/x86_64-gf2m.s
@@ -3,7 +3,9 @@
.type _mul_1x1,@function
.align 16
_mul_1x1:
+.cfi_startproc
subq $128+8,%rsp
+.cfi_adjust_cfa_offset 128+8
movq $-1,%r9
leaq (%rax,%rax,1),%rsi
shrq $3,%r9
@@ -193,16 +195,20 @@ _mul_1x1:
xorq %rdi,%rdx
addq $128+8,%rsp
+.cfi_adjust_cfa_offset -128-8
.byte 0xf3,0xc3
.Lend_mul_1x1:
+.cfi_endproc
.size _mul_1x1,.-_mul_1x1
.globl bn_GF2m_mul_2x2
.type bn_GF2m_mul_2x2,@function
.align 16
bn_GF2m_mul_2x2:
- movq OPENSSL_ia32cap_P(%rip),%rax
- btq $33,%rax
+.cfi_startproc
+ movq %rsp,%rax
+ movq OPENSSL_ia32cap_P(%rip),%r10
+ btq $33,%r10
jnc .Lvanilla_mul_2x2
.byte 102,72,15,110,198
@@ -230,11 +236,17 @@ bn_GF2m_mul_2x2:
.align 16
.Lvanilla_mul_2x2:
leaq -136(%rsp),%rsp
+.cfi_adjust_cfa_offset 8*17
movq %r14,80(%rsp)
+.cfi_rel_offset %r14,8*10
movq %r13,88(%rsp)
+.cfi_rel_offset %r13,8*11
movq %r12,96(%rsp)
+.cfi_rel_offset %r12,8*12
movq %rbp,104(%rsp)
+.cfi_rel_offset %rbp,8*13
movq %rbx,112(%rsp)
+.cfi_rel_offset %rbx,8*14
.Lbody_mul_2x2:
movq %rdi,32(%rsp)
movq %rsi,40(%rsp)
@@ -279,13 +291,21 @@ bn_GF2m_mul_2x2:
movq %rax,8(%rbp)
movq 80(%rsp),%r14
+.cfi_restore %r14
movq 88(%rsp),%r13
+.cfi_restore %r13
movq 96(%rsp),%r12
+.cfi_restore %r12
movq 104(%rsp),%rbp
+.cfi_restore %rbp
movq 112(%rsp),%rbx
+.cfi_restore %rbx
leaq 136(%rsp),%rsp
+.cfi_adjust_cfa_offset -8*17
+.Lepilogue_mul_2x2:
.byte 0xf3,0xc3
.Lend_mul_2x2:
+.cfi_endproc
.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 16
diff --git a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/bn/x86_64-mont.s b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/bn/x86_64-mont.s
index 3a78cd8440..414be6aff5 100644
--- a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/bn/x86_64-mont.s
+++ b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/bn/x86_64-mont.s
@@ -6,8 +6,10 @@
.type bn_mul_mont,@function
.align 16
bn_mul_mont:
+.cfi_startproc
movl %r9d,%r9d
movq %rsp,%rax
+.cfi_def_cfa_register %rax
testl $3,%r9d
jnz .Lmul_enter
cmpl $8,%r9d
@@ -22,11 +24,17 @@ bn_mul_mont:
.align 16
.Lmul_enter:
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
pushq %r15
+.cfi_offset %r15,-56
negq %r9
movq %rsp,%r11
@@ -59,6 +67,7 @@ bn_mul_mont:
.Lmul_page_walk_done:
movq %rax,8(%rsp,%r9,8)
+.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08
.Lmul_body:
movq %rdx,%r12
movq (%r8),%r8
@@ -226,32 +235,49 @@ bn_mul_mont:
jnz .Lcopy
movq 8(%rsp,%r9,8),%rsi
+.cfi_def_cfa %rsi,8
movq $1,%rax
movq -48(%rsi),%r15
+.cfi_restore %r15
movq -40(%rsi),%r14
+.cfi_restore %r14
movq -32(%rsi),%r13
+.cfi_restore %r13
movq -24(%rsi),%r12
+.cfi_restore %r12
movq -16(%rsi),%rbp
+.cfi_restore %rbp
movq -8(%rsi),%rbx
+.cfi_restore %rbx
leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lmul_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size bn_mul_mont,.-bn_mul_mont
.type bn_mul4x_mont,@function
.align 16
bn_mul4x_mont:
+.cfi_startproc
movl %r9d,%r9d
movq %rsp,%rax
+.cfi_def_cfa_register %rax
.Lmul4x_enter:
andl $0x80100,%r11d
cmpl $0x80100,%r11d
je .Lmulx4x_enter
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
pushq %r15
+.cfi_offset %r15,-56
negq %r9
movq %rsp,%r11
@@ -275,6 +301,7 @@ bn_mul4x_mont:
.Lmul4x_page_walk_done:
movq %rax,8(%rsp,%r9,8)
+.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08
.Lmul4x_body:
movq %rdi,16(%rsp,%r9,8)
movq %rdx,%r12
@@ -642,16 +669,25 @@ bn_mul4x_mont:
decq %r15
jnz .Lcopy4x
movq 8(%rsp,%r9,8),%rsi
+.cfi_def_cfa %rsi, 8
movq $1,%rax
movq -48(%rsi),%r15
+.cfi_restore %r15
movq -40(%rsi),%r14
+.cfi_restore %r14
movq -32(%rsi),%r13
+.cfi_restore %r13
movq -24(%rsi),%r12
+.cfi_restore %r12
movq -16(%rsi),%rbp
+.cfi_restore %rbp
movq -8(%rsi),%rbx
+.cfi_restore %rbx
leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lmul4x_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size bn_mul4x_mont,.-bn_mul4x_mont
@@ -659,14 +695,22 @@ bn_mul4x_mont:
.type bn_sqr8x_mont,@function
.align 32
bn_sqr8x_mont:
+.cfi_startproc
movq %rsp,%rax
+.cfi_def_cfa_register %rax
.Lsqr8x_enter:
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
pushq %r15
+.cfi_offset %r15,-56
.Lsqr8x_prologue:
movl %r9d,%r10d
@@ -722,6 +766,7 @@ bn_sqr8x_mont:
movq %r8,32(%rsp)
movq %rax,40(%rsp)
+.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
.Lsqr8x_body:
.byte 102,72,15,110,209
@@ -787,6 +832,7 @@ bn_sqr8x_mont:
pxor %xmm0,%xmm0
pshufd $0,%xmm1,%xmm1
movq 40(%rsp),%rsi
+.cfi_def_cfa %rsi,8
jmp .Lsqr8x_cond_copy
.align 32
@@ -816,26 +862,42 @@ bn_sqr8x_mont:
movq $1,%rax
movq -48(%rsi),%r15
+.cfi_restore %r15
movq -40(%rsi),%r14
+.cfi_restore %r14
movq -32(%rsi),%r13
+.cfi_restore %r13
movq -24(%rsi),%r12
+.cfi_restore %r12
movq -16(%rsi),%rbp
+.cfi_restore %rbp
movq -8(%rsi),%rbx
+.cfi_restore %rbx
leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lsqr8x_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size bn_sqr8x_mont,.-bn_sqr8x_mont
.type bn_mulx4x_mont,@function
.align 32
bn_mulx4x_mont:
+.cfi_startproc
movq %rsp,%rax
+.cfi_def_cfa_register %rax
.Lmulx4x_enter:
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
pushq %r15
+.cfi_offset %r15,-56
.Lmulx4x_prologue:
shll $3,%r9d
@@ -881,6 +943,7 @@ bn_mulx4x_mont:
movq %r8,24(%rsp)
movq %rdi,32(%rsp)
movq %rax,40(%rsp)
+.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
movq %r9,48(%rsp)
jmp .Lmulx4x_body
@@ -1125,6 +1188,7 @@ bn_mulx4x_mont:
pxor %xmm0,%xmm0
pshufd $0,%xmm1,%xmm1
movq 40(%rsp),%rsi
+.cfi_def_cfa %rsi,8
jmp .Lmulx4x_cond_copy
.align 32
@@ -1154,14 +1218,22 @@ bn_mulx4x_mont:
movq $1,%rax
movq -48(%rsi),%r15
+.cfi_restore %r15
movq -40(%rsi),%r14
+.cfi_restore %r14
movq -32(%rsi),%r13
+.cfi_restore %r13
movq -24(%rsi),%r12
+.cfi_restore %r12
movq -16(%rsi),%rbp
+.cfi_restore %rbp
movq -8(%rsi),%rbx
+.cfi_restore %rbx
leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lmulx4x_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size bn_mulx4x_mont,.-bn_mulx4x_mont
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 16
diff --git a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/bn/x86_64-mont5.s b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/bn/x86_64-mont5.s
index 0dd53512f9..c6d752a245 100644
--- a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/bn/x86_64-mont5.s
+++ b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/bn/x86_64-mont5.s
@@ -6,8 +6,10 @@
.type bn_mul_mont_gather5,@function
.align 64
bn_mul_mont_gather5:
+.cfi_startproc
movl %r9d,%r9d
movq %rsp,%rax
+.cfi_def_cfa_register %rax
testl $7,%r9d
jnz .Lmul_enter
movl OPENSSL_ia32cap_P+8(%rip),%r11d
@@ -17,11 +19,17 @@ bn_mul_mont_gather5:
.Lmul_enter:
movd 8(%rsp),%xmm5
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
pushq %r15
+.cfi_offset %r15,-56
negq %r9
movq %rsp,%r11
@@ -54,6 +62,7 @@ bn_mul_mont_gather5:
leaq .Linc(%rip),%r10
movq %rax,8(%rsp,%r9,8)
+.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08
.Lmul_body:
leaq 128(%rdx),%r12
@@ -411,33 +420,50 @@ bn_mul_mont_gather5:
jnz .Lcopy
movq 8(%rsp,%r9,8),%rsi
+.cfi_def_cfa %rsi,8
movq $1,%rax
movq -48(%rsi),%r15
+.cfi_restore %r15
movq -40(%rsi),%r14
+.cfi_restore %r14
movq -32(%rsi),%r13
+.cfi_restore %r13
movq -24(%rsi),%r12
+.cfi_restore %r12
movq -16(%rsi),%rbp
+.cfi_restore %rbp
movq -8(%rsi),%rbx
+.cfi_restore %rbx
leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lmul_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size bn_mul_mont_gather5,.-bn_mul_mont_gather5
.type bn_mul4x_mont_gather5,@function
.align 32
bn_mul4x_mont_gather5:
+.cfi_startproc
.byte 0x67
movq %rsp,%rax
+.cfi_def_cfa_register %rax
.Lmul4x_enter:
andl $0x80108,%r11d
cmpl $0x80108,%r11d
je .Lmulx4x_enter
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
pushq %r15
+.cfi_offset %r15,-56
.Lmul4x_prologue:
.byte 0x67
@@ -493,22 +519,32 @@ bn_mul4x_mont_gather5:
negq %r9
movq %rax,40(%rsp)
+.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
.Lmul4x_body:
call mul4x_internal
movq 40(%rsp),%rsi
+.cfi_def_cfa %rsi,8
movq $1,%rax
movq -48(%rsi),%r15
+.cfi_restore %r15
movq -40(%rsi),%r14
+.cfi_restore %r14
movq -32(%rsi),%r13
+.cfi_restore %r13
movq -24(%rsi),%r12
+.cfi_restore %r12
movq -16(%rsi),%rbp
+.cfi_restore %rbp
movq -8(%rsi),%rbx
+.cfi_restore %rbx
leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lmul4x_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5
.type mul4x_internal,@function
@@ -1040,17 +1076,25 @@ mul4x_internal:
.type bn_power5,@function
.align 32
bn_power5:
+.cfi_startproc
movq %rsp,%rax
+.cfi_def_cfa_register %rax
movl OPENSSL_ia32cap_P+8(%rip),%r11d
andl $0x80108,%r11d
cmpl $0x80108,%r11d
je .Lpowerx5_enter
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
pushq %r15
+.cfi_offset %r15,-56
.Lpower5_prologue:
shll $3,%r9d
@@ -1115,6 +1159,7 @@ bn_power5:
movq %r8,32(%rsp)
movq %rax,40(%rsp)
+.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
.Lpower5_body:
.byte 102,72,15,110,207
.byte 102,72,15,110,209
@@ -1141,16 +1186,25 @@ bn_power5:
call mul4x_internal
movq 40(%rsp),%rsi
+.cfi_def_cfa %rsi,8
movq $1,%rax
movq -48(%rsi),%r15
+.cfi_restore %r15
movq -40(%rsi),%r14
+.cfi_restore %r14
movq -32(%rsi),%r13
+.cfi_restore %r13
movq -24(%rsi),%r12
+.cfi_restore %r12
movq -16(%rsi),%rbp
+.cfi_restore %rbp
movq -8(%rsi),%rbx
+.cfi_restore %rbx
leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lpower5_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size bn_power5,.-bn_power5
.globl bn_sqr8x_internal
@@ -2001,14 +2055,22 @@ bn_from_montgomery:
.type bn_from_mont8x,@function
.align 32
bn_from_mont8x:
+.cfi_startproc
.byte 0x67
movq %rsp,%rax
+.cfi_def_cfa_register %rax
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
pushq %r15
+.cfi_offset %r15,-56
.Lfrom_prologue:
shll $3,%r9d
@@ -2073,6 +2135,7 @@ bn_from_mont8x:
movq %r8,32(%rsp)
movq %rax,40(%rsp)
+.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
.Lfrom_body:
movq %r9,%r11
leaq 48(%rsp),%rax
@@ -2114,7 +2177,6 @@ bn_from_mont8x:
pxor %xmm0,%xmm0
leaq 48(%rsp),%rax
- movq 40(%rsp),%rsi
jmp .Lfrom_mont_zero
.align 32
@@ -2124,11 +2186,12 @@ bn_from_mont8x:
pxor %xmm0,%xmm0
leaq 48(%rsp),%rax
- movq 40(%rsp),%rsi
jmp .Lfrom_mont_zero
.align 32
.Lfrom_mont_zero:
+ movq 40(%rsp),%rsi
+.cfi_def_cfa %rsi,8
movdqa %xmm0,0(%rax)
movdqa %xmm0,16(%rax)
movdqa %xmm0,32(%rax)
@@ -2139,26 +2202,42 @@ bn_from_mont8x:
movq $1,%rax
movq -48(%rsi),%r15
+.cfi_restore %r15
movq -40(%rsi),%r14
+.cfi_restore %r14
movq -32(%rsi),%r13
+.cfi_restore %r13
movq -24(%rsi),%r12
+.cfi_restore %r12
movq -16(%rsi),%rbp
+.cfi_restore %rbp
movq -8(%rsi),%rbx
+.cfi_restore %rbx
leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lfrom_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size bn_from_mont8x,.-bn_from_mont8x
.type bn_mulx4x_mont_gather5,@function
.align 32
bn_mulx4x_mont_gather5:
+.cfi_startproc
movq %rsp,%rax
+.cfi_def_cfa_register %rax
.Lmulx4x_enter:
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
pushq %r15
+.cfi_offset %r15,-56
.Lmulx4x_prologue:
shll $3,%r9d
@@ -2224,21 +2303,31 @@ bn_mulx4x_mont_gather5:
movq %r8,32(%rsp)
movq %rax,40(%rsp)
+.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
.Lmulx4x_body:
call mulx4x_internal
movq 40(%rsp),%rsi
+.cfi_def_cfa %rsi,8
movq $1,%rax
movq -48(%rsi),%r15
+.cfi_restore %r15
movq -40(%rsi),%r14
+.cfi_restore %r14
movq -32(%rsi),%r13
+.cfi_restore %r13
movq -24(%rsi),%r12
+.cfi_restore %r12
movq -16(%rsi),%rbp
+.cfi_restore %rbp
movq -8(%rsi),%rbx
+.cfi_restore %rbx
leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lmulx4x_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5
.type mulx4x_internal,@function
@@ -2666,14 +2755,22 @@ mulx4x_internal:
.type bn_powerx5,@function
.align 32
bn_powerx5:
+.cfi_startproc
movq %rsp,%rax
+.cfi_def_cfa_register %rax
.Lpowerx5_enter:
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
pushq %r15
+.cfi_offset %r15,-56
.Lpowerx5_prologue:
shll $3,%r9d
@@ -2745,6 +2842,7 @@ bn_powerx5:
.byte 102,72,15,110,226
movq %r8,32(%rsp)
movq %rax,40(%rsp)
+.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
.Lpowerx5_body:
call __bn_sqrx8x_internal
@@ -2767,17 +2865,26 @@ bn_powerx5:
call mulx4x_internal
movq 40(%rsp),%rsi
+.cfi_def_cfa %rsi,8
movq $1,%rax
movq -48(%rsi),%r15
+.cfi_restore %r15
movq -40(%rsi),%r14
+.cfi_restore %r14
movq -32(%rsi),%r13
+.cfi_restore %r13
movq -24(%rsi),%r12
+.cfi_restore %r12
movq -16(%rsi),%rbp
+.cfi_restore %rbp
movq -8(%rsi),%rbx
+.cfi_restore %rbx
leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lpowerx5_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size bn_powerx5,.-bn_powerx5
.globl bn_sqrx8x_internal
diff --git a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/buildinf.h b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/buildinf.h
index 260e0eaac9..208078f6cb 100644
--- a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/buildinf.h
+++ b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/buildinf.h
@@ -1,38 +1,48 @@
-/* auto-generated by util/mkbuildinf.pl for crypto/cversion.c */
-#define CFLAGS cflags
/*
- * Generate CFLAGS as an array of individual characters. This is a
+ * WARNING: do not edit!
+ * Generated by util/mkbuildinf.pl
+ *
+ * Copyright 2014-2017 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License"). You may not use
+ * this file except in compliance with the License. You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#define PLATFORM "platform: BSD-x86_64"
+#define DATE "built on: Thu Nov 22 19:32:47 2018 UTC"
+
+/*
+ * Generate compiler_flags as an array of individual characters. This is a
* workaround for the situation where CFLAGS gets too long for a C90 string
* literal
*/
-static const char cflags[] = {
- 'c','o','m','p','i','l','e','r',':',' ','c','c',' ','-','D','D',
- 'S','O','_','D','L','F','C','N',' ','-','D','H','A','V','E','_',
- 'D','L','F','C','N','_','H',' ','-','D','N','D','E','B','U','G',
- ' ','-','D','O','P','E','N','S','S','L','_','T','H','R','E','A',
- 'D','S',' ','-','D','O','P','E','N','S','S','L','_','N','O','_',
- 'D','Y','N','A','M','I','C','_','E','N','G','I','N','E',' ','-',
- 'D','O','P','E','N','S','S','L','_','P','I','C',' ','-','D','O',
- 'P','E','N','S','S','L','_','I','A','3','2','_','S','S','E','2',
- ' ','-','D','O','P','E','N','S','S','L','_','B','N','_','A','S',
- 'M','_','M','O','N','T',' ','-','D','O','P','E','N','S','S','L',
- '_','B','N','_','A','S','M','_','M','O','N','T','5',' ','-','D',
- 'O','P','E','N','S','S','L','_','B','N','_','A','S','M','_','G',
- 'F','2','m',' ','-','D','S','H','A','1','_','A','S','M',' ','-',
- 'D','S','H','A','2','5','6','_','A','S','M',' ','-','D','S','H',
- 'A','5','1','2','_','A','S','M',' ','-','D','R','C','4','_','A',
- 'S','M',' ','-','D','M','D','5','_','A','S','M',' ','-','D','A',
- 'E','S','_','A','S','M',' ','-','D','V','P','A','E','S','_','A',
- 'S','M',' ','-','D','B','S','A','E','S','_','A','S','M',' ','-',
- 'D','G','H','A','S','H','_','A','S','M',' ','-','D','E','C','P',
- '_','N','I','S','T','Z','2','5','6','_','A','S','M',' ','-','D',
- 'P','A','D','L','O','C','K','_','A','S','M',' ','-','D','P','O',
- 'L','Y','1','3','0','5','_','A','S','M',' ','-','D','O','P','E',
- 'N','S','S','L','D','I','R','=','"','\\','"','/','u','s','r','/',
- 'l','o','c','a','l','/','s','s','l','\\','"','"',' ','-','D','E',
- 'N','G','I','N','E','S','D','I','R','=','"','\\','"','/','u','s',
- 'r','/','l','o','c','a','l','/','l','i','b','/','e','n','g','i',
- 'n','e','s','-','1','.','1','\\','"','"',' ','\0'
+static const char compiler_flags[] = {
+ 'c','o','m','p','i','l','e','r',':',' ','g','c','c',' ','-','f',
+ 'P','I','C',' ','-','p','t','h','r','e','a','d',' ','-','W','a',
+ ',','-','-','n','o','e','x','e','c','s','t','a','c','k',' ','-',
+ 'W','a','l','l',' ','-','O','3',' ','-','D','L','_','E','N','D',
+ 'I','A','N',' ','-','D','O','P','E','N','S','S','L','_','P','I',
+ 'C',' ','-','D','O','P','E','N','S','S','L','_','C','P','U','I',
+ 'D','_','O','B','J',' ','-','D','O','P','E','N','S','S','L','_',
+ 'I','A','3','2','_','S','S','E','2',' ','-','D','O','P','E','N',
+ 'S','S','L','_','B','N','_','A','S','M','_','M','O','N','T',' ',
+ '-','D','O','P','E','N','S','S','L','_','B','N','_','A','S','M',
+ '_','M','O','N','T','5',' ','-','D','O','P','E','N','S','S','L',
+ '_','B','N','_','A','S','M','_','G','F','2','m',' ','-','D','S',
+ 'H','A','1','_','A','S','M',' ','-','D','S','H','A','2','5','6',
+ '_','A','S','M',' ','-','D','S','H','A','5','1','2','_','A','S',
+ 'M',' ','-','D','K','E','C','C','A','K','1','6','0','0','_','A',
+ 'S','M',' ','-','D','R','C','4','_','A','S','M',' ','-','D','M',
+ 'D','5','_','A','S','M',' ','-','D','A','E','S','_','A','S','M',
+ ' ','-','D','V','P','A','E','S','_','A','S','M',' ','-','D','B',
+ 'S','A','E','S','_','A','S','M',' ','-','D','G','H','A','S','H',
+ '_','A','S','M',' ','-','D','E','C','P','_','N','I','S','T','Z',
+ '2','5','6','_','A','S','M',' ','-','D','X','2','5','5','1','9',
+ '_','A','S','M',' ','-','D','P','A','D','L','O','C','K','_','A',
+ 'S','M',' ','-','D','P','O','L','Y','1','3','0','5','_','A','S',
+ 'M',' ','-','D','_','T','H','R','E','A','D','_','S','A','F','E',
+ ' ','-','D','_','R','E','E','N','T','R','A','N','T',' ','-','D',
+ 'N','D','E','B','U','G','\0'
};
-#define PLATFORM "platform: BSD-x86_64"
-#define DATE "built on: Tue Nov 20 09:37:29 2018"
diff --git a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/camellia/cmll-x86_64.s b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/camellia/cmll-x86_64.s
index 1dead91b17..405566b01c 100644
--- a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/camellia/cmll-x86_64.s
+++ b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/camellia/cmll-x86_64.s
@@ -17,11 +17,22 @@ Camellia_EncryptBlock:
.align 16
.Lenc_rounds:
Camellia_EncryptBlock_Rounds:
+.cfi_startproc
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-24
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-32
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-40
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-48
.Lenc_prologue:
@@ -53,13 +64,20 @@ Camellia_EncryptBlock_Rounds:
movl %r11d,12(%r13)
movq 0(%rsp),%r15
+.cfi_restore %r15
movq 8(%rsp),%r14
+.cfi_restore %r14
movq 16(%rsp),%r13
+.cfi_restore %r13
movq 24(%rsp),%rbp
+.cfi_restore %rbp
movq 32(%rsp),%rbx
+.cfi_restore %rbx
leaq 40(%rsp),%rsp
+.cfi_adjust_cfa_offset -40
.Lenc_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size Camellia_EncryptBlock_Rounds,.-Camellia_EncryptBlock_Rounds
.type _x86_64_Camellia_encrypt,@function
@@ -286,11 +304,22 @@ Camellia_DecryptBlock:
.align 16
.Ldec_rounds:
Camellia_DecryptBlock_Rounds:
+.cfi_startproc
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-24
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-32
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-40
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-48
.Ldec_prologue:
@@ -322,13 +351,20 @@ Camellia_DecryptBlock_Rounds:
movl %r11d,12(%r13)
movq 0(%rsp),%r15
+.cfi_restore %r15
movq 8(%rsp),%r14
+.cfi_restore %r14
movq 16(%rsp),%r13
+.cfi_restore %r13
movq 24(%rsp),%rbp
+.cfi_restore %rbp
movq 32(%rsp),%rbx
+.cfi_restore %rbx
leaq 40(%rsp),%rsp
+.cfi_adjust_cfa_offset -40
.Ldec_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size Camellia_DecryptBlock_Rounds,.-Camellia_DecryptBlock_Rounds
.type _x86_64_Camellia_decrypt,@function
@@ -542,11 +578,22 @@ _x86_64_Camellia_decrypt:
.type Camellia_Ekeygen,@function
.align 16
Camellia_Ekeygen:
+.cfi_startproc
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-24
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-32
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-40
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-48
.Lkey_prologue:
movl %edi,%r15d
@@ -1074,13 +1121,20 @@ Camellia_Ekeygen:
movl $4,%eax
.Ldone:
movq 0(%rsp),%r15
+.cfi_restore %r15
movq 8(%rsp),%r14
+.cfi_restore %r14
movq 16(%rsp),%r13
+.cfi_restore %r13
movq 24(%rsp),%rbp
+.cfi_restore %rbp
movq 32(%rsp),%rbx
+.cfi_restore %rbx
leaq 40(%rsp),%rsp
+.cfi_adjust_cfa_offset -40
.Lkey_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size Camellia_Ekeygen,.-Camellia_Ekeygen
.align 64
.LCamellia_SIGMA:
@@ -1605,17 +1659,31 @@ Camellia_Ekeygen:
.type Camellia_cbc_encrypt,@function
.align 16
Camellia_cbc_encrypt:
+.cfi_startproc
cmpq $0,%rdx
je .Lcbc_abort
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
.Lcbc_prologue:
movq %rsp,%rbp
+.cfi_def_cfa_register %rbp
subq $64,%rsp
andq $-64,%rsp
@@ -1636,6 +1704,7 @@ Camellia_cbc_encrypt:
movq %r8,40(%rsp)
movq %rbp,48(%rsp)
+.cfi_escape 0x0f,0x05,0x77,0x30,0x06,0x23,0x38
.Lcbc_body:
leaq .LCamellia_SBOX(%rip),%rbp
@@ -1824,15 +1893,24 @@ Camellia_cbc_encrypt:
.align 16
.Lcbc_done:
movq 48(%rsp),%rcx
+.cfi_def_cfa %rcx,56
movq 0(%rcx),%r15
+.cfi_restore %r15
movq 8(%rcx),%r14
+.cfi_restore %r14
movq 16(%rcx),%r13
+.cfi_restore %r13
movq 24(%rcx),%r12
+.cfi_restore %r12
movq 32(%rcx),%rbp
+.cfi_restore %rbp
movq 40(%rcx),%rbx
+.cfi_restore %rbx
leaq 48(%rcx),%rsp
+.cfi_def_cfa %rsp,8
.Lcbc_abort:
.byte 0xf3,0xc3
+.cfi_endproc
.size Camellia_cbc_encrypt,.-Camellia_cbc_encrypt
.byte 67,97,109,101,108,108,105,97,32,102,111,114,32,120,56,54,95,54,52,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
diff --git a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/chacha/chacha-x86_64.s b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/chacha/chacha-x86_64.s
index a9fed05fd7..1812bc84b1 100644
--- a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/chacha/chacha-x86_64.s
+++ b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/chacha/chacha-x86_64.s
@@ -19,6 +19,17 @@
.byte 0x2,0x3,0x0,0x1, 0x6,0x7,0x4,0x5, 0xa,0xb,0x8,0x9, 0xe,0xf,0xc,0xd
.Lrot24:
.byte 0x3,0x0,0x1,0x2, 0x7,0x4,0x5,0x6, 0xb,0x8,0x9,0xa, 0xf,0xc,0xd,0xe
+.Ltwoy:
+.long 2,0,0,0, 2,0,0,0
+.align 64
+.Lzeroz:
+.long 0,0,0,0, 1,0,0,0, 2,0,0,0, 3,0,0,0
+.Lfourz:
+.long 4,0,0,0, 4,0,0,0, 4,0,0,0, 4,0,0,0
+.Lincz:
+.long 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+.Lsixteen:
+.long 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16
.Lsigma:
.byte 101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107,0
.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
@@ -26,19 +37,38 @@
.type ChaCha20_ctr32,@function
.align 64
ChaCha20_ctr32:
+.cfi_startproc
cmpq $0,%rdx
je .Lno_data
movq OPENSSL_ia32cap_P+4(%rip),%r10
+ btq $48,%r10
+ jc .LChaCha20_avx512
+ testq %r10,%r10
+ js .LChaCha20_avx512vl
testl $512,%r10d
jnz .LChaCha20_ssse3
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
subq $64+24,%rsp
+.cfi_adjust_cfa_offset 64+24
+.Lctr32_body:
movdqu (%rcx),%xmm1
@@ -276,34 +306,41 @@ ChaCha20_ctr32:
jnz .Loop_tail
.Ldone:
- addq $64+24,%rsp
- popq %r15
- popq %r14
- popq %r13
- popq %r12
- popq %rbp
- popq %rbx
+ leaq 64+24+48(%rsp),%rsi
+.cfi_def_cfa %rsi,8
+ movq -48(%rsi),%r15
+.cfi_restore %r15
+ movq -40(%rsi),%r14
+.cfi_restore %r14
+ movq -32(%rsi),%r13
+.cfi_restore %r13
+ movq -24(%rsi),%r12
+.cfi_restore %r12
+ movq -16(%rsi),%rbp
+.cfi_restore %rbp
+ movq -8(%rsi),%rbx
+.cfi_restore %rbx
+ leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lno_data:
.byte 0xf3,0xc3
+.cfi_endproc
.size ChaCha20_ctr32,.-ChaCha20_ctr32
.type ChaCha20_ssse3,@function
.align 32
ChaCha20_ssse3:
+.cfi_startproc
.LChaCha20_ssse3:
+ movq %rsp,%r9
+.cfi_def_cfa_register %r9
testl $2048,%r10d
jnz .LChaCha20_4xop
cmpq $128,%rdx
+ je .LChaCha20_128
ja .LChaCha20_4x
.Ldo_sse3_after_all:
- pushq %rbx
- pushq %rbp
- pushq %r12
- pushq %r13
- pushq %r14
- pushq %r15
-
- subq $64+24,%rsp
+ subq $64+8,%rsp
movdqa .Lsigma(%rip),%xmm0
movdqu (%rcx),%xmm1
movdqu 16(%rcx),%xmm2
@@ -315,7 +352,7 @@ ChaCha20_ssse3:
movdqa %xmm1,16(%rsp)
movdqa %xmm2,32(%rsp)
movdqa %xmm3,48(%rsp)
- movl $10,%ebp
+ movq $10,%r8
jmp .Loop_ssse3
.align 32
@@ -325,7 +362,7 @@ ChaCha20_ssse3:
movdqa 16(%rsp),%xmm1
movdqa 32(%rsp),%xmm2
paddd 48(%rsp),%xmm3
- movl $10,%ebp
+ movq $10,%r8
movdqa %xmm3,48(%rsp)
jmp .Loop_ssse3
@@ -374,7 +411,7 @@ ChaCha20_ssse3:
pshufd $78,%xmm2,%xmm2
pshufd $147,%xmm1,%xmm1
pshufd $57,%xmm3,%xmm3
- decl %ebp
+ decq %r8
jnz .Loop_ssse3
paddd 0(%rsp),%xmm0
paddd 16(%rsp),%xmm1
@@ -411,31 +448,187 @@ ChaCha20_ssse3:
movdqa %xmm1,16(%rsp)
movdqa %xmm2,32(%rsp)
movdqa %xmm3,48(%rsp)
- xorq %rbx,%rbx
+ xorq %r8,%r8
.Loop_tail_ssse3:
- movzbl (%rsi,%rbx,1),%eax
- movzbl (%rsp,%rbx,1),%ecx
- leaq 1(%rbx),%rbx
+ movzbl (%rsi,%r8,1),%eax
+ movzbl (%rsp,%r8,1),%ecx
+ leaq 1(%r8),%r8
xorl %ecx,%eax
- movb %al,-1(%rdi,%rbx,1)
+ movb %al,-1(%rdi,%r8,1)
decq %rdx
jnz .Loop_tail_ssse3
.Ldone_ssse3:
- addq $64+24,%rsp
- popq %r15
- popq %r14
- popq %r13
- popq %r12
- popq %rbp
- popq %rbx
+ leaq (%r9),%rsp
+.cfi_def_cfa_register %rsp
+.Lssse3_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size ChaCha20_ssse3,.-ChaCha20_ssse3
+.type ChaCha20_128,@function
+.align 32
+ChaCha20_128:
+.cfi_startproc
+.LChaCha20_128:
+ movq %rsp,%r9
+.cfi_def_cfa_register %r9
+ subq $64+8,%rsp
+ movdqa .Lsigma(%rip),%xmm8
+ movdqu (%rcx),%xmm9
+ movdqu 16(%rcx),%xmm2
+ movdqu (%r8),%xmm3
+ movdqa .Lone(%rip),%xmm1
+ movdqa .Lrot16(%rip),%xmm6
+ movdqa .Lrot24(%rip),%xmm7
+
+ movdqa %xmm8,%xmm10
+ movdqa %xmm8,0(%rsp)
+ movdqa %xmm9,%xmm11
+ movdqa %xmm9,16(%rsp)
+ movdqa %xmm2,%xmm0
+ movdqa %xmm2,32(%rsp)
+ paddd %xmm3,%xmm1
+ movdqa %xmm3,48(%rsp)
+ movq $10,%r8
+ jmp .Loop_128
+
+.align 32
+.Loop_128:
+ paddd %xmm9,%xmm8
+ pxor %xmm8,%xmm3
+ paddd %xmm11,%xmm10
+ pxor %xmm10,%xmm1
+.byte 102,15,56,0,222
+.byte 102,15,56,0,206
+ paddd %xmm3,%xmm2
+ paddd %xmm1,%xmm0
+ pxor %xmm2,%xmm9
+ pxor %xmm0,%xmm11
+ movdqa %xmm9,%xmm4
+ psrld $20,%xmm9
+ movdqa %xmm11,%xmm5
+ pslld $12,%xmm4
+ psrld $20,%xmm11
+ por %xmm4,%xmm9
+ pslld $12,%xmm5
+ por %xmm5,%xmm11
+ paddd %xmm9,%xmm8
+ pxor %xmm8,%xmm3
+ paddd %xmm11,%xmm10
+ pxor %xmm10,%xmm1
+.byte 102,15,56,0,223
+.byte 102,15,56,0,207
+ paddd %xmm3,%xmm2
+ paddd %xmm1,%xmm0
+ pxor %xmm2,%xmm9
+ pxor %xmm0,%xmm11
+ movdqa %xmm9,%xmm4
+ psrld $25,%xmm9
+ movdqa %xmm11,%xmm5
+ pslld $7,%xmm4
+ psrld $25,%xmm11
+ por %xmm4,%xmm9
+ pslld $7,%xmm5
+ por %xmm5,%xmm11
+ pshufd $78,%xmm2,%xmm2
+ pshufd $57,%xmm9,%xmm9
+ pshufd $147,%xmm3,%xmm3
+ pshufd $78,%xmm0,%xmm0
+ pshufd $57,%xmm11,%xmm11
+ pshufd $147,%xmm1,%xmm1
+ paddd %xmm9,%xmm8
+ pxor %xmm8,%xmm3
+ paddd %xmm11,%xmm10
+ pxor %xmm10,%xmm1
+.byte 102,15,56,0,222
+.byte 102,15,56,0,206
+ paddd %xmm3,%xmm2
+ paddd %xmm1,%xmm0
+ pxor %xmm2,%xmm9
+ pxor %xmm0,%xmm11
+ movdqa %xmm9,%xmm4
+ psrld $20,%xmm9
+ movdqa %xmm11,%xmm5
+ pslld $12,%xmm4
+ psrld $20,%xmm11
+ por %xmm4,%xmm9
+ pslld $12,%xmm5
+ por %xmm5,%xmm11
+ paddd %xmm9,%xmm8
+ pxor %xmm8,%xmm3
+ paddd %xmm11,%xmm10
+ pxor %xmm10,%xmm1
+.byte 102,15,56,0,223
+.byte 102,15,56,0,207
+ paddd %xmm3,%xmm2
+ paddd %xmm1,%xmm0
+ pxor %xmm2,%xmm9
+ pxor %xmm0,%xmm11
+ movdqa %xmm9,%xmm4
+ psrld $25,%xmm9
+ movdqa %xmm11,%xmm5
+ pslld $7,%xmm4
+ psrld $25,%xmm11
+ por %xmm4,%xmm9
+ pslld $7,%xmm5
+ por %xmm5,%xmm11
+ pshufd $78,%xmm2,%xmm2
+ pshufd $147,%xmm9,%xmm9
+ pshufd $57,%xmm3,%xmm3
+ pshufd $78,%xmm0,%xmm0
+ pshufd $147,%xmm11,%xmm11
+ pshufd $57,%xmm1,%xmm1
+ decq %r8
+ jnz .Loop_128
+ paddd 0(%rsp),%xmm8
+ paddd 16(%rsp),%xmm9
+ paddd 32(%rsp),%xmm2
+ paddd 48(%rsp),%xmm3
+ paddd .Lone(%rip),%xmm1
+ paddd 0(%rsp),%xmm10
+ paddd 16(%rsp),%xmm11
+ paddd 32(%rsp),%xmm0
+ paddd 48(%rsp),%xmm1
+
+ movdqu 0(%rsi),%xmm4
+ movdqu 16(%rsi),%xmm5
+ pxor %xmm4,%xmm8
+ movdqu 32(%rsi),%xmm4
+ pxor %xmm5,%xmm9
+ movdqu 48(%rsi),%xmm5
+ pxor %xmm4,%xmm2
+ movdqu 64(%rsi),%xmm4
+ pxor %xmm5,%xmm3
+ movdqu 80(%rsi),%xmm5
+ pxor %xmm4,%xmm10
+ movdqu 96(%rsi),%xmm4
+ pxor %xmm5,%xmm11
+ movdqu 112(%rsi),%xmm5
+ pxor %xmm4,%xmm0
+ pxor %xmm5,%xmm1
+
+ movdqu %xmm8,0(%rdi)
+ movdqu %xmm9,16(%rdi)
+ movdqu %xmm2,32(%rdi)
+ movdqu %xmm3,48(%rdi)
+ movdqu %xmm10,64(%rdi)
+ movdqu %xmm11,80(%rdi)
+ movdqu %xmm0,96(%rdi)
+ movdqu %xmm1,112(%rdi)
+ leaq (%r9),%rsp
+.cfi_def_cfa_register %rsp
+.L128_epilogue:
+ .byte 0xf3,0xc3
+.cfi_endproc
+.size ChaCha20_128,.-ChaCha20_128
.type ChaCha20_4x,@function
.align 32
ChaCha20_4x:
+.cfi_startproc
.LChaCha20_4x:
+ movq %rsp,%r9
+.cfi_def_cfa_register %r9
movq %r10,%r11
shrq $32,%r10
testq $32,%r10
@@ -448,8 +641,7 @@ ChaCha20_4x:
je .Ldo_sse3_after_all
.Lproceed4x:
- leaq -120(%rsp),%r11
- subq $0x148+0,%rsp
+ subq $0x140+8,%rsp
movdqa .Lsigma(%rip),%xmm11
movdqu (%rcx),%xmm15
movdqu 16(%rcx),%xmm7
@@ -976,15 +1168,20 @@ ChaCha20_4x:
jnz .Loop_tail4x
.Ldone4x:
- addq $0x148+0,%rsp
+ leaq (%r9),%rsp
+.cfi_def_cfa_register %rsp
+.L4x_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size ChaCha20_4x,.-ChaCha20_4x
.type ChaCha20_4xop,@function
.align 32
ChaCha20_4xop:
+.cfi_startproc
.LChaCha20_4xop:
- leaq -120(%rsp),%r11
- subq $0x148+0,%rsp
+ movq %rsp,%r9
+.cfi_def_cfa_register %r9
+ subq $0x140+8,%rsp
vzeroupper
vmovdqa .Lsigma(%rip),%xmm11
@@ -1386,18 +1583,22 @@ ChaCha20_4xop:
.Ldone4xop:
vzeroupper
- addq $0x148+0,%rsp
+ leaq (%r9),%rsp
+.cfi_def_cfa_register %rsp
+.L4xop_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size ChaCha20_4xop,.-ChaCha20_4xop
.type ChaCha20_8x,@function
.align 32
ChaCha20_8x:
+.cfi_startproc
.LChaCha20_8x:
- movq %rsp,%r10
+ movq %rsp,%r9
+.cfi_def_cfa_register %r9
subq $0x280+8,%rsp
andq $-32,%rsp
vzeroupper
- movq %r10,640(%rsp)
@@ -1988,6 +2189,1240 @@ ChaCha20_8x:
.Ldone8x:
vzeroall
- movq 640(%rsp),%rsp
+ leaq (%r9),%rsp
+.cfi_def_cfa_register %rsp
+.L8x_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size ChaCha20_8x,.-ChaCha20_8x
+.type ChaCha20_avx512,@function
+.align 32
+ChaCha20_avx512:
+.cfi_startproc
+.LChaCha20_avx512:
+ movq %rsp,%r9
+.cfi_def_cfa_register %r9
+ cmpq $512,%rdx
+ ja .LChaCha20_16x
+
+ subq $64+8,%rsp
+ vbroadcasti32x4 .Lsigma(%rip),%zmm0
+ vbroadcasti32x4 (%rcx),%zmm1
+ vbroadcasti32x4 16(%rcx),%zmm2
+ vbroadcasti32x4 (%r8),%zmm3
+
+ vmovdqa32 %zmm0,%zmm16
+ vmovdqa32 %zmm1,%zmm17
+ vmovdqa32 %zmm2,%zmm18
+ vpaddd .Lzeroz(%rip),%zmm3,%zmm3
+ vmovdqa32 .Lfourz(%rip),%zmm20
+ movq $10,%r8
+ vmovdqa32 %zmm3,%zmm19
+ jmp .Loop_avx512
+
+.align 16
+.Loop_outer_avx512:
+ vmovdqa32 %zmm16,%zmm0
+ vmovdqa32 %zmm17,%zmm1
+ vmovdqa32 %zmm18,%zmm2
+ vpaddd %zmm20,%zmm19,%zmm3
+ movq $10,%r8
+ vmovdqa32 %zmm3,%zmm19
+ jmp .Loop_avx512
+
+.align 32
+.Loop_avx512:
+ vpaddd %zmm1,%zmm0,%zmm0
+ vpxord %zmm0,%zmm3,%zmm3
+ vprold $16,%zmm3,%zmm3
+ vpaddd %zmm3,%zmm2,%zmm2
+ vpxord %zmm2,%zmm1,%zmm1
+ vprold $12,%zmm1,%zmm1
+ vpaddd %zmm1,%zmm0,%zmm0
+ vpxord %zmm0,%zmm3,%zmm3
+ vprold $8,%zmm3,%zmm3
+ vpaddd %zmm3,%zmm2,%zmm2
+ vpxord %zmm2,%zmm1,%zmm1
+ vprold $7,%zmm1,%zmm1
+ vpshufd $78,%zmm2,%zmm2
+ vpshufd $57,%zmm1,%zmm1
+ vpshufd $147,%zmm3,%zmm3
+ vpaddd %zmm1,%zmm0,%zmm0
+ vpxord %zmm0,%zmm3,%zmm3
+ vprold $16,%zmm3,%zmm3
+ vpaddd %zmm3,%zmm2,%zmm2
+ vpxord %zmm2,%zmm1,%zmm1
+ vprold $12,%zmm1,%zmm1
+ vpaddd %zmm1,%zmm0,%zmm0
+ vpxord %zmm0,%zmm3,%zmm3
+ vprold $8,%zmm3,%zmm3
+ vpaddd %zmm3,%zmm2,%zmm2
+ vpxord %zmm2,%zmm1,%zmm1
+ vprold $7,%zmm1,%zmm1
+ vpshufd $78,%zmm2,%zmm2
+ vpshufd $147,%zmm1,%zmm1
+ vpshufd $57,%zmm3,%zmm3
+ decq %r8
+ jnz .Loop_avx512
+ vpaddd %zmm16,%zmm0,%zmm0
+ vpaddd %zmm17,%zmm1,%zmm1
+ vpaddd %zmm18,%zmm2,%zmm2
+ vpaddd %zmm19,%zmm3,%zmm3
+
+ subq $64,%rdx
+ jb .Ltail64_avx512
+
+ vpxor 0(%rsi),%xmm0,%xmm4
+ vpxor 16(%rsi),%xmm1,%xmm5
+ vpxor 32(%rsi),%xmm2,%xmm6
+ vpxor 48(%rsi),%xmm3,%xmm7
+ leaq 64(%rsi),%rsi
+
+ vmovdqu %xmm4,0(%rdi)
+ vmovdqu %xmm5,16(%rdi)
+ vmovdqu %xmm6,32(%rdi)
+ vmovdqu %xmm7,48(%rdi)
+ leaq 64(%rdi),%rdi
+
+ jz .Ldone_avx512
+
+ vextracti32x4 $1,%zmm0,%xmm4
+ vextracti32x4 $1,%zmm1,%xmm5
+ vextracti32x4 $1,%zmm2,%xmm6
+ vextracti32x4 $1,%zmm3,%xmm7
+
+ subq $64,%rdx
+ jb .Ltail_avx512
+
+ vpxor 0(%rsi),%xmm4,%xmm4
+ vpxor 16(%rsi),%xmm5,%xmm5
+ vpxor 32(%rsi),%xmm6,%xmm6
+ vpxor 48(%rsi),%xmm7,%xmm7
+ leaq 64(%rsi),%rsi
+
+ vmovdqu %xmm4,0(%rdi)
+ vmovdqu %xmm5,16(%rdi)
+ vmovdqu %xmm6,32(%rdi)
+ vmovdqu %xmm7,48(%rdi)
+ leaq 64(%rdi),%rdi
+
+ jz .Ldone_avx512
+
+ vextracti32x4 $2,%zmm0,%xmm4
+ vextracti32x4 $2,%zmm1,%xmm5
+ vextracti32x4 $2,%zmm2,%xmm6
+ vextracti32x4 $2,%zmm3,%xmm7
+
+ subq $64,%rdx
+ jb .Ltail_avx512
+
+ vpxor 0(%rsi),%xmm4,%xmm4
+ vpxor 16(%rsi),%xmm5,%xmm5
+ vpxor 32(%rsi),%xmm6,%xmm6
+ vpxor 48(%rsi),%xmm7,%xmm7
+ leaq 64(%rsi),%rsi
+
+ vmovdqu %xmm4,0(%rdi)
+ vmovdqu %xmm5,16(%rdi)
+ vmovdqu %xmm6,32(%rdi)
+ vmovdqu %xmm7,48(%rdi)
+ leaq 64(%rdi),%rdi
+
+ jz .Ldone_avx512
+
+ vextracti32x4 $3,%zmm0,%xmm4
+ vextracti32x4 $3,%zmm1,%xmm5
+ vextracti32x4 $3,%zmm2,%xmm6
+ vextracti32x4 $3,%zmm3,%xmm7
+
+ subq $64,%rdx
+ jb .Ltail_avx512
+
+ vpxor 0(%rsi),%xmm4,%xmm4
+ vpxor 16(%rsi),%xmm5,%xmm5
+ vpxor 32(%rsi),%xmm6,%xmm6
+ vpxor 48(%rsi),%xmm7,%xmm7
+ leaq 64(%rsi),%rsi
+
+ vmovdqu %xmm4,0(%rdi)
+ vmovdqu %xmm5,16(%rdi)
+ vmovdqu %xmm6,32(%rdi)
+ vmovdqu %xmm7,48(%rdi)
+ leaq 64(%rdi),%rdi
+
+ jnz .Loop_outer_avx512
+
+ jmp .Ldone_avx512
+
+.align 16
+.Ltail64_avx512:
+ vmovdqa %xmm0,0(%rsp)
+ vmovdqa %xmm1,16(%rsp)
+ vmovdqa %xmm2,32(%rsp)
+ vmovdqa %xmm3,48(%rsp)
+ addq $64,%rdx
+ jmp .Loop_tail_avx512
+
+.align 16
+.Ltail_avx512:
+ vmovdqa %xmm4,0(%rsp)
+ vmovdqa %xmm5,16(%rsp)
+ vmovdqa %xmm6,32(%rsp)
+ vmovdqa %xmm7,48(%rsp)
+ addq $64,%rdx
+
+.Loop_tail_avx512:
+ movzbl (%rsi,%r8,1),%eax
+ movzbl (%rsp,%r8,1),%ecx
+ leaq 1(%r8),%r8
+ xorl %ecx,%eax
+ movb %al,-1(%rdi,%r8,1)
+ decq %rdx
+ jnz .Loop_tail_avx512
+
+ vmovdqu32 %zmm16,0(%rsp)
+
+.Ldone_avx512:
+ vzeroall
+ leaq (%r9),%rsp
+.cfi_def_cfa_register %rsp
+.Lavx512_epilogue:
+ .byte 0xf3,0xc3
+.cfi_endproc
+.size ChaCha20_avx512,.-ChaCha20_avx512
+.type ChaCha20_avx512vl,@function
+.align 32
+ChaCha20_avx512vl:
+.cfi_startproc
+.LChaCha20_avx512vl:
+ movq %rsp,%r9
+.cfi_def_cfa_register %r9
+ cmpq $128,%rdx
+ ja .LChaCha20_8xvl
+
+ subq $64+8,%rsp
+ vbroadcasti128 .Lsigma(%rip),%ymm0
+ vbroadcasti128 (%rcx),%ymm1
+ vbroadcasti128 16(%rcx),%ymm2
+ vbroadcasti128 (%r8),%ymm3
+
+ vmovdqa32 %ymm0,%ymm16
+ vmovdqa32 %ymm1,%ymm17
+ vmovdqa32 %ymm2,%ymm18
+ vpaddd .Lzeroz(%rip),%ymm3,%ymm3
+ vmovdqa32 .Ltwoy(%rip),%ymm20
+ movq $10,%r8
+ vmovdqa32 %ymm3,%ymm19
+ jmp .Loop_avx512vl
+
+.align 16
+.Loop_outer_avx512vl:
+ vmovdqa32 %ymm18,%ymm2
+ vpaddd %ymm20,%ymm19,%ymm3
+ movq $10,%r8
+ vmovdqa32 %ymm3,%ymm19
+ jmp .Loop_avx512vl
+
+.align 32
+.Loop_avx512vl:
+ vpaddd %ymm1,%ymm0,%ymm0
+ vpxor %ymm0,%ymm3,%ymm3
+ vprold $16,%ymm3,%ymm3
+ vpaddd %ymm3,%ymm2,%ymm2
+ vpxor %ymm2,%ymm1,%ymm1
+ vprold $12,%ymm1,%ymm1
+ vpaddd %ymm1,%ymm0,%ymm0
+ vpxor %ymm0,%ymm3,%ymm3
+ vprold $8,%ymm3,%ymm3
+ vpaddd %ymm3,%ymm2,%ymm2
+ vpxor %ymm2,%ymm1,%ymm1
+ vprold $7,%ymm1,%ymm1
+ vpshufd $78,%ymm2,%ymm2
+ vpshufd $57,%ymm1,%ymm1
+ vpshufd $147,%ymm3,%ymm3
+ vpaddd %ymm1,%ymm0,%ymm0
+ vpxor %ymm0,%ymm3,%ymm3
+ vprold $16,%ymm3,%ymm3
+ vpaddd %ymm3,%ymm2,%ymm2
+ vpxor %ymm2,%ymm1,%ymm1
+ vprold $12,%ymm1,%ymm1
+ vpaddd %ymm1,%ymm0,%ymm0
+ vpxor %ymm0,%ymm3,%ymm3
+ vprold $8,%ymm3,%ymm3
+ vpaddd %ymm3,%ymm2,%ymm2
+ vpxor %ymm2,%ymm1,%ymm1
+ vprold $7,%ymm1,%ymm1
+ vpshufd $78,%ymm2,%ymm2
+ vpshufd $147,%ymm1,%ymm1
+ vpshufd $57,%ymm3,%ymm3
+ decq %r8
+ jnz .Loop_avx512vl
+ vpaddd %ymm16,%ymm0,%ymm0
+ vpaddd %ymm17,%ymm1,%ymm1
+ vpaddd %ymm18,%ymm2,%ymm2
+ vpaddd %ymm19,%ymm3,%ymm3
+
+ subq $64,%rdx
+ jb .Ltail64_avx512vl
+
+ vpxor 0(%rsi),%xmm0,%xmm4
+ vpxor 16(%rsi),%xmm1,%xmm5
+ vpxor 32(%rsi),%xmm2,%xmm6
+ vpxor 48(%rsi),%xmm3,%xmm7
+ leaq 64(%rsi),%rsi
+
+ vmovdqu %xmm4,0(%rdi)
+ vmovdqu %xmm5,16(%rdi)
+ vmovdqu %xmm6,32(%rdi)
+ vmovdqu %xmm7,48(%rdi)
+ leaq 64(%rdi),%rdi
+
+ jz .Ldone_avx512vl
+
+ vextracti128 $1,%ymm0,%xmm4
+ vextracti128 $1,%ymm1,%xmm5
+ vextracti128 $1,%ymm2,%xmm6
+ vextracti128 $1,%ymm3,%xmm7
+
+ subq $64,%rdx
+ jb .Ltail_avx512vl
+
+ vpxor 0(%rsi),%xmm4,%xmm4
+ vpxor 16(%rsi),%xmm5,%xmm5
+ vpxor 32(%rsi),%xmm6,%xmm6
+ vpxor 48(%rsi),%xmm7,%xmm7
+ leaq 64(%rsi),%rsi
+
+ vmovdqu %xmm4,0(%rdi)
+ vmovdqu %xmm5,16(%rdi)
+ vmovdqu %xmm6,32(%rdi)
+ vmovdqu %xmm7,48(%rdi)
+ leaq 64(%rdi),%rdi
+
+ vmovdqa32 %ymm16,%ymm0
+ vmovdqa32 %ymm17,%ymm1
+ jnz .Loop_outer_avx512vl
+
+ jmp .Ldone_avx512vl
+
+.align 16
+.Ltail64_avx512vl:
+ vmovdqa %xmm0,0(%rsp)
+ vmovdqa %xmm1,16(%rsp)
+ vmovdqa %xmm2,32(%rsp)
+ vmovdqa %xmm3,48(%rsp)
+ addq $64,%rdx
+ jmp .Loop_tail_avx512vl
+
+.align 16
+.Ltail_avx512vl:
+ vmovdqa %xmm4,0(%rsp)
+ vmovdqa %xmm5,16(%rsp)
+ vmovdqa %xmm6,32(%rsp)
+ vmovdqa %xmm7,48(%rsp)
+ addq $64,%rdx
+
+.Loop_tail_avx512vl:
+ movzbl (%rsi,%r8,1),%eax
+ movzbl (%rsp,%r8,1),%ecx
+ leaq 1(%r8),%r8
+ xorl %ecx,%eax
+ movb %al,-1(%rdi,%r8,1)
+ decq %rdx
+ jnz .Loop_tail_avx512vl
+
+ vmovdqu32 %ymm16,0(%rsp)
+ vmovdqu32 %ymm16,32(%rsp)
+
+.Ldone_avx512vl:
+ vzeroall
+ leaq (%r9),%rsp
+.cfi_def_cfa_register %rsp
+.Lavx512vl_epilogue:
+ .byte 0xf3,0xc3
+.cfi_endproc
+.size ChaCha20_avx512vl,.-ChaCha20_avx512vl
+.type ChaCha20_16x,@function
+.align 32
+ChaCha20_16x:
+.cfi_startproc
+.LChaCha20_16x:
+ movq %rsp,%r9
+.cfi_def_cfa_register %r9
+ subq $64+8,%rsp
+ andq $-64,%rsp
+ vzeroupper
+
+ leaq .Lsigma(%rip),%r10
+ vbroadcasti32x4 (%r10),%zmm3
+ vbroadcasti32x4 (%rcx),%zmm7
+ vbroadcasti32x4 16(%rcx),%zmm11
+ vbroadcasti32x4 (%r8),%zmm15
+
+ vpshufd $0x00,%zmm3,%zmm0
+ vpshufd $0x55,%zmm3,%zmm1
+ vpshufd $0xaa,%zmm3,%zmm2
+ vpshufd $0xff,%zmm3,%zmm3
+ vmovdqa64 %zmm0,%zmm16
+ vmovdqa64 %zmm1,%zmm17
+ vmovdqa64 %zmm2,%zmm18
+ vmovdqa64 %zmm3,%zmm19
+
+ vpshufd $0x00,%zmm7,%zmm4
+ vpshufd $0x55,%zmm7,%zmm5
+ vpshufd $0xaa,%zmm7,%zmm6
+ vpshufd $0xff,%zmm7,%zmm7
+ vmovdqa64 %zmm4,%zmm20
+ vmovdqa64 %zmm5,%zmm21
+ vmovdqa64 %zmm6,%zmm22
+ vmovdqa64 %zmm7,%zmm23
+
+ vpshufd $0x00,%zmm11,%zmm8
+ vpshufd $0x55,%zmm11,%zmm9
+ vpshufd $0xaa,%zmm11,%zmm10
+ vpshufd $0xff,%zmm11,%zmm11
+ vmovdqa64 %zmm8,%zmm24
+ vmovdqa64 %zmm9,%zmm25
+ vmovdqa64 %zmm10,%zmm26
+ vmovdqa64 %zmm11,%zmm27
+
+ vpshufd $0x00,%zmm15,%zmm12
+ vpshufd $0x55,%zmm15,%zmm13
+ vpshufd $0xaa,%zmm15,%zmm14
+ vpshufd $0xff,%zmm15,%zmm15
+ vpaddd .Lincz(%rip),%zmm12,%zmm12
+ vmovdqa64 %zmm12,%zmm28
+ vmovdqa64 %zmm13,%zmm29
+ vmovdqa64 %zmm14,%zmm30
+ vmovdqa64 %zmm15,%zmm31
+
+ movl $10,%eax
+ jmp .Loop16x
+
+.align 32
+.Loop_outer16x:
+ vpbroadcastd 0(%r10),%zmm0
+ vpbroadcastd 4(%r10),%zmm1
+ vpbroadcastd 8(%r10),%zmm2
+ vpbroadcastd 12(%r10),%zmm3
+ vpaddd .Lsixteen(%rip),%zmm28,%zmm28
+ vmovdqa64 %zmm20,%zmm4
+ vmovdqa64 %zmm21,%zmm5
+ vmovdqa64 %zmm22,%zmm6
+ vmovdqa64 %zmm23,%zmm7
+ vmovdqa64 %zmm24,%zmm8
+ vmovdqa64 %zmm25,%zmm9
+ vmovdqa64 %zmm26,%zmm10
+ vmovdqa64 %zmm27,%zmm11
+ vmovdqa64 %zmm28,%zmm12
+ vmovdqa64 %zmm29,%zmm13
+ vmovdqa64 %zmm30,%zmm14
+ vmovdqa64 %zmm31,%zmm15
+
+ vmovdqa64 %zmm0,%zmm16
+ vmovdqa64 %zmm1,%zmm17
+ vmovdqa64 %zmm2,%zmm18
+ vmovdqa64 %zmm3,%zmm19
+
+ movl $10,%eax
+ jmp .Loop16x
+
+.align 32
+.Loop16x:
+ vpaddd %zmm4,%zmm0,%zmm0
+ vpaddd %zmm5,%zmm1,%zmm1
+ vpaddd %zmm6,%zmm2,%zmm2
+ vpaddd %zmm7,%zmm3,%zmm3
+ vpxord %zmm0,%zmm12,%zmm12
+ vpxord %zmm1,%zmm13,%zmm13
+ vpxord %zmm2,%zmm14,%zmm14
+ vpxord %zmm3,%zmm15,%zmm15
+ vprold $16,%zmm12,%zmm12
+ vprold $16,%zmm13,%zmm13
+ vprold $16,%zmm14,%zmm14
+ vprold $16,%zmm15,%zmm15
+ vpaddd %zmm12,%zmm8,%zmm8
+ vpaddd %zmm13,%zmm9,%zmm9
+ vpaddd %zmm14,%zmm10,%zmm10
+ vpaddd %zmm15,%zmm11,%zmm11
+ vpxord %zmm8,%zmm4,%zmm4
+ vpxord %zmm9,%zmm5,%zmm5
+ vpxord %zmm10,%zmm6,%zmm6
+ vpxord %zmm11,%zmm7,%zmm7
+ vprold $12,%zmm4,%zmm4
+ vprold $12,%zmm5,%zmm5
+ vprold $12,%zmm6,%zmm6
+ vprold $12,%zmm7,%zmm7
+ vpaddd %zmm4,%zmm0,%zmm0
+ vpaddd %zmm5,%zmm1,%zmm1
+ vpaddd %zmm6,%zmm2,%zmm2
+ vpaddd %zmm7,%zmm3,%zmm3
+ vpxord %zmm0,%zmm12,%zmm12
+ vpxord %zmm1,%zmm13,%zmm13
+ vpxord %zmm2,%zmm14,%zmm14
+ vpxord %zmm3,%zmm15,%zmm15
+ vprold $8,%zmm12,%zmm12
+ vprold $8,%zmm13,%zmm13
+ vprold $8,%zmm14,%zmm14
+ vprold $8,%zmm15,%zmm15
+ vpaddd %zmm12,%zmm8,%zmm8
+ vpaddd %zmm13,%zmm9,%zmm9
+ vpaddd %zmm14,%zmm10,%zmm10
+ vpaddd %zmm15,%zmm11,%zmm11
+ vpxord %zmm8,%zmm4,%zmm4
+ vpxord %zmm9,%zmm5,%zmm5
+ vpxord %zmm10,%zmm6,%zmm6
+ vpxord %zmm11,%zmm7,%zmm7
+ vprold $7,%zmm4,%zmm4
+ vprold $7,%zmm5,%zmm5
+ vprold $7,%zmm6,%zmm6
+ vprold $7,%zmm7,%zmm7
+ vpaddd %zmm5,%zmm0,%zmm0
+ vpaddd %zmm6,%zmm1,%zmm1
+ vpaddd %zmm7,%zmm2,%zmm2
+ vpaddd %zmm4,%zmm3,%zmm3
+ vpxord %zmm0,%zmm15,%zmm15
+ vpxord %zmm1,%zmm12,%zmm12
+ vpxord %zmm2,%zmm13,%zmm13
+ vpxord %zmm3,%zmm14,%zmm14
+ vprold $16,%zmm15,%zmm15
+ vprold $16,%zmm12,%zmm12
+ vprold $16,%zmm13,%zmm13
+ vprold $16,%zmm14,%zmm14
+ vpaddd %zmm15,%zmm10,%zmm10
+ vpaddd %zmm12,%zmm11,%zmm11
+ vpaddd %zmm13,%zmm8,%zmm8
+ vpaddd %zmm14,%zmm9,%zmm9
+ vpxord %zmm10,%zmm5,%zmm5
+ vpxord %zmm11,%zmm6,%zmm6
+ vpxord %zmm8,%zmm7,%zmm7
+ vpxord %zmm9,%zmm4,%zmm4
+ vprold $12,%zmm5,%zmm5
+ vprold $12,%zmm6,%zmm6
+ vprold $12,%zmm7,%zmm7
+ vprold $12,%zmm4,%zmm4
+ vpaddd %zmm5,%zmm0,%zmm0
+ vpaddd %zmm6,%zmm1,%zmm1
+ vpaddd %zmm7,%zmm2,%zmm2
+ vpaddd %zmm4,%zmm3,%zmm3
+ vpxord %zmm0,%zmm15,%zmm15
+ vpxord %zmm1,%zmm12,%zmm12
+ vpxord %zmm2,%zmm13,%zmm13
+ vpxord %zmm3,%zmm14,%zmm14
+ vprold $8,%zmm15,%zmm15
+ vprold $8,%zmm12,%zmm12
+ vprold $8,%zmm13,%zmm13
+ vprold $8,%zmm14,%zmm14
+ vpaddd %zmm15,%zmm10,%zmm10
+ vpaddd %zmm12,%zmm11,%zmm11
+ vpaddd %zmm13,%zmm8,%zmm8
+ vpaddd %zmm14,%zmm9,%zmm9
+ vpxord %zmm10,%zmm5,%zmm5
+ vpxord %zmm11,%zmm6,%zmm6
+ vpxord %zmm8,%zmm7,%zmm7
+ vpxord %zmm9,%zmm4,%zmm4
+ vprold $7,%zmm5,%zmm5
+ vprold $7,%zmm6,%zmm6
+ vprold $7,%zmm7,%zmm7
+ vprold $7,%zmm4,%zmm4
+ decl %eax
+ jnz .Loop16x
+
+ vpaddd %zmm16,%zmm0,%zmm0
+ vpaddd %zmm17,%zmm1,%zmm1
+ vpaddd %zmm18,%zmm2,%zmm2
+ vpaddd %zmm19,%zmm3,%zmm3
+
+ vpunpckldq %zmm1,%zmm0,%zmm18
+ vpunpckldq %zmm3,%zmm2,%zmm19
+ vpunpckhdq %zmm1,%zmm0,%zmm0
+ vpunpckhdq %zmm3,%zmm2,%zmm2
+ vpunpcklqdq %zmm19,%zmm18,%zmm1
+ vpunpckhqdq %zmm19,%zmm18,%zmm18
+ vpunpcklqdq %zmm2,%zmm0,%zmm3
+ vpunpckhqdq %zmm2,%zmm0,%zmm0
+ vpaddd %zmm20,%zmm4,%zmm4
+ vpaddd %zmm21,%zmm5,%zmm5
+ vpaddd %zmm22,%zmm6,%zmm6
+ vpaddd %zmm23,%zmm7,%zmm7
+
+ vpunpckldq %zmm5,%zmm4,%zmm2
+ vpunpckldq %zmm7,%zmm6,%zmm19
+ vpunpckhdq %zmm5,%zmm4,%zmm4
+ vpunpckhdq %zmm7,%zmm6,%zmm6
+ vpunpcklqdq %zmm19,%zmm2,%zmm5
+ vpunpckhqdq %zmm19,%zmm2,%zmm2
+ vpunpcklqdq %zmm6,%zmm4,%zmm7
+ vpunpckhqdq %zmm6,%zmm4,%zmm4
+ vshufi32x4 $0x44,%zmm5,%zmm1,%zmm19
+ vshufi32x4 $0xee,%zmm5,%zmm1,%zmm5
+ vshufi32x4 $0x44,%zmm2,%zmm18,%zmm1
+ vshufi32x4 $0xee,%zmm2,%zmm18,%zmm2
+ vshufi32x4 $0x44,%zmm7,%zmm3,%zmm18
+ vshufi32x4 $0xee,%zmm7,%zmm3,%zmm7
+ vshufi32x4 $0x44,%zmm4,%zmm0,%zmm3
+ vshufi32x4 $0xee,%zmm4,%zmm0,%zmm4
+ vpaddd %zmm24,%zmm8,%zmm8
+ vpaddd %zmm25,%zmm9,%zmm9
+ vpaddd %zmm26,%zmm10,%zmm10
+ vpaddd %zmm27,%zmm11,%zmm11
+
+ vpunpckldq %zmm9,%zmm8,%zmm6
+ vpunpckldq %zmm11,%zmm10,%zmm0
+ vpunpckhdq %zmm9,%zmm8,%zmm8
+ vpunpckhdq %zmm11,%zmm10,%zmm10
+ vpunpcklqdq %zmm0,%zmm6,%zmm9
+ vpunpckhqdq %zmm0,%zmm6,%zmm6
+ vpunpcklqdq %zmm10,%zmm8,%zmm11
+ vpunpckhqdq %zmm10,%zmm8,%zmm8
+ vpaddd %zmm28,%zmm12,%zmm12
+ vpaddd %zmm29,%zmm13,%zmm13
+ vpaddd %zmm30,%zmm14,%zmm14
+ vpaddd %zmm31,%zmm15,%zmm15
+
+ vpunpckldq %zmm13,%zmm12,%zmm10
+ vpunpckldq %zmm15,%zmm14,%zmm0
+ vpunpckhdq %zmm13,%zmm12,%zmm12
+ vpunpckhdq %zmm15,%zmm14,%zmm14
+ vpunpcklqdq %zmm0,%zmm10,%zmm13
+ vpunpckhqdq %zmm0,%zmm10,%zmm10
+ vpunpcklqdq %zmm14,%zmm12,%zmm15
+ vpunpckhqdq %zmm14,%zmm12,%zmm12
+ vshufi32x4 $0x44,%zmm13,%zmm9,%zmm0
+ vshufi32x4 $0xee,%zmm13,%zmm9,%zmm13
+ vshufi32x4 $0x44,%zmm10,%zmm6,%zmm9
+ vshufi32x4 $0xee,%zmm10,%zmm6,%zmm10
+ vshufi32x4 $0x44,%zmm15,%zmm11,%zmm6
+ vshufi32x4 $0xee,%zmm15,%zmm11,%zmm15
+ vshufi32x4 $0x44,%zmm12,%zmm8,%zmm11
+ vshufi32x4 $0xee,%zmm12,%zmm8,%zmm12
+ vshufi32x4 $0x88,%zmm0,%zmm19,%zmm16
+ vshufi32x4 $0xdd,%zmm0,%zmm19,%zmm19
+ vshufi32x4 $0x88,%zmm13,%zmm5,%zmm0
+ vshufi32x4 $0xdd,%zmm13,%zmm5,%zmm13
+ vshufi32x4 $0x88,%zmm9,%zmm1,%zmm17
+ vshufi32x4 $0xdd,%zmm9,%zmm1,%zmm1
+ vshufi32x4 $0x88,%zmm10,%zmm2,%zmm9
+ vshufi32x4 $0xdd,%zmm10,%zmm2,%zmm10
+ vshufi32x4 $0x88,%zmm6,%zmm18,%zmm14
+ vshufi32x4 $0xdd,%zmm6,%zmm18,%zmm18
+ vshufi32x4 $0x88,%zmm15,%zmm7,%zmm6
+ vshufi32x4 $0xdd,%zmm15,%zmm7,%zmm15
+ vshufi32x4 $0x88,%zmm11,%zmm3,%zmm8
+ vshufi32x4 $0xdd,%zmm11,%zmm3,%zmm3
+ vshufi32x4 $0x88,%zmm12,%zmm4,%zmm11
+ vshufi32x4 $0xdd,%zmm12,%zmm4,%zmm12
+ cmpq $1024,%rdx
+ jb .Ltail16x
+
+ vpxord 0(%rsi),%zmm16,%zmm16
+ vpxord 64(%rsi),%zmm17,%zmm17
+ vpxord 128(%rsi),%zmm14,%zmm14
+ vpxord 192(%rsi),%zmm8,%zmm8
+ vmovdqu32 %zmm16,0(%rdi)
+ vmovdqu32 %zmm17,64(%rdi)
+ vmovdqu32 %zmm14,128(%rdi)
+ vmovdqu32 %zmm8,192(%rdi)
+
+ vpxord 256(%rsi),%zmm19,%zmm19
+ vpxord 320(%rsi),%zmm1,%zmm1
+ vpxord 384(%rsi),%zmm18,%zmm18
+ vpxord 448(%rsi),%zmm3,%zmm3
+ vmovdqu32 %zmm19,256(%rdi)
+ vmovdqu32 %zmm1,320(%rdi)
+ vmovdqu32 %zmm18,384(%rdi)
+ vmovdqu32 %zmm3,448(%rdi)
+
+ vpxord 512(%rsi),%zmm0,%zmm0
+ vpxord 576(%rsi),%zmm9,%zmm9
+ vpxord 640(%rsi),%zmm6,%zmm6
+ vpxord 704(%rsi),%zmm11,%zmm11
+ vmovdqu32 %zmm0,512(%rdi)
+ vmovdqu32 %zmm9,576(%rdi)
+ vmovdqu32 %zmm6,640(%rdi)
+ vmovdqu32 %zmm11,704(%rdi)
+
+ vpxord 768(%rsi),%zmm13,%zmm13
+ vpxord 832(%rsi),%zmm10,%zmm10
+ vpxord 896(%rsi),%zmm15,%zmm15
+ vpxord 960(%rsi),%zmm12,%zmm12
+ leaq 1024(%rsi),%rsi
+ vmovdqu32 %zmm13,768(%rdi)
+ vmovdqu32 %zmm10,832(%rdi)
+ vmovdqu32 %zmm15,896(%rdi)
+ vmovdqu32 %zmm12,960(%rdi)
+ leaq 1024(%rdi),%rdi
+
+ subq $1024,%rdx
+ jnz .Loop_outer16x
+
+ jmp .Ldone16x
+
+.align 32
+.Ltail16x:
+ xorq %r10,%r10
+ subq %rsi,%rdi
+ cmpq $64,%rdx
+ jb .Less_than_64_16x
+ vpxord (%rsi),%zmm16,%zmm16
+ vmovdqu32 %zmm16,(%rdi,%rsi,1)
+ je .Ldone16x
+ vmovdqa32 %zmm17,%zmm16
+ leaq 64(%rsi),%rsi
+
+ cmpq $128,%rdx
+ jb .Less_than_64_16x
+ vpxord (%rsi),%zmm17,%zmm17
+ vmovdqu32 %zmm17,(%rdi,%rsi,1)
+ je .Ldone16x
+ vmovdqa32 %zmm14,%zmm16
+ leaq 64(%rsi),%rsi
+
+ cmpq $192,%rdx
+ jb .Less_than_64_16x
+ vpxord (%rsi),%zmm14,%zmm14
+ vmovdqu32 %zmm14,(%rdi,%rsi,1)
+ je .Ldone16x
+ vmovdqa32 %zmm8,%zmm16
+ leaq 64(%rsi),%rsi
+
+ cmpq $256,%rdx
+ jb .Less_than_64_16x
+ vpxord (%rsi),%zmm8,%zmm8
+ vmovdqu32 %zmm8,(%rdi,%rsi,1)
+ je .Ldone16x
+ vmovdqa32 %zmm19,%zmm16
+ leaq 64(%rsi),%rsi
+
+ cmpq $320,%rdx
+ jb .Less_than_64_16x
+ vpxord (%rsi),%zmm19,%zmm19
+ vmovdqu32 %zmm19,(%rdi,%rsi,1)
+ je .Ldone16x
+ vmovdqa32 %zmm1,%zmm16
+ leaq 64(%rsi),%rsi
+
+ cmpq $384,%rdx
+ jb .Less_than_64_16x
+ vpxord (%rsi),%zmm1,%zmm1
+ vmovdqu32 %zmm1,(%rdi,%rsi,1)
+ je .Ldone16x
+ vmovdqa32 %zmm18,%zmm16
+ leaq 64(%rsi),%rsi
+
+ cmpq $448,%rdx
+ jb .Less_than_64_16x
+ vpxord (%rsi),%zmm18,%zmm18
+ vmovdqu32 %zmm18,(%rdi,%rsi,1)
+ je .Ldone16x
+ vmovdqa32 %zmm3,%zmm16
+ leaq 64(%rsi),%rsi
+
+ cmpq $512,%rdx
+ jb .Less_than_64_16x
+ vpxord (%rsi),%zmm3,%zmm3
+ vmovdqu32 %zmm3,(%rdi,%rsi,1)
+ je .Ldone16x
+ vmovdqa32 %zmm0,%zmm16
+ leaq 64(%rsi),%rsi
+
+ cmpq $576,%rdx
+ jb .Less_than_64_16x
+ vpxord (%rsi),%zmm0,%zmm0
+ vmovdqu32 %zmm0,(%rdi,%rsi,1)
+ je .Ldone16x
+ vmovdqa32 %zmm9,%zmm16
+ leaq 64(%rsi),%rsi
+
+ cmpq $640,%rdx
+ jb .Less_than_64_16x
+ vpxord (%rsi),%zmm9,%zmm9
+ vmovdqu32 %zmm9,(%rdi,%rsi,1)
+ je .Ldone16x
+ vmovdqa32 %zmm6,%zmm16
+ leaq 64(%rsi),%rsi
+
+ cmpq $704,%rdx
+ jb .Less_than_64_16x
+ vpxord (%rsi),%zmm6,%zmm6
+ vmovdqu32 %zmm6,(%rdi,%rsi,1)
+ je .Ldone16x
+ vmovdqa32 %zmm11,%zmm16
+ leaq 64(%rsi),%rsi
+
+ cmpq $768,%rdx
+ jb .Less_than_64_16x
+ vpxord (%rsi),%zmm11,%zmm11
+ vmovdqu32 %zmm11,(%rdi,%rsi,1)
+ je .Ldone16x
+ vmovdqa32 %zmm13,%zmm16
+ leaq 64(%rsi),%rsi
+
+ cmpq $832,%rdx
+ jb .Less_than_64_16x
+ vpxord (%rsi),%zmm13,%zmm13
+ vmovdqu32 %zmm13,(%rdi,%rsi,1)
+ je .Ldone16x
+ vmovdqa32 %zmm10,%zmm16
+ leaq 64(%rsi),%rsi
+
+ cmpq $896,%rdx
+ jb .Less_than_64_16x
+ vpxord (%rsi),%zmm10,%zmm10
+ vmovdqu32 %zmm10,(%rdi,%rsi,1)
+ je .Ldone16x
+ vmovdqa32 %zmm15,%zmm16
+ leaq 64(%rsi),%rsi
+
+ cmpq $960,%rdx
+ jb .Less_than_64_16x
+ vpxord (%rsi),%zmm15,%zmm15
+ vmovdqu32 %zmm15,(%rdi,%rsi,1)
+ je .Ldone16x
+ vmovdqa32 %zmm12,%zmm16
+ leaq 64(%rsi),%rsi
+
+.Less_than_64_16x:
+ vmovdqa32 %zmm16,0(%rsp)
+ leaq (%rdi,%rsi,1),%rdi
+ andq $63,%rdx
+
+.Loop_tail16x:
+ movzbl (%rsi,%r10,1),%eax
+ movzbl (%rsp,%r10,1),%ecx
+ leaq 1(%r10),%r10
+ xorl %ecx,%eax
+ movb %al,-1(%rdi,%r10,1)
+ decq %rdx
+ jnz .Loop_tail16x
+
+ vpxord %zmm16,%zmm16,%zmm16
+ vmovdqa32 %zmm16,0(%rsp)
+
+.Ldone16x:
+ vzeroall
+ leaq (%r9),%rsp
+.cfi_def_cfa_register %rsp
+.L16x_epilogue:
+ .byte 0xf3,0xc3
+.cfi_endproc
+.size ChaCha20_16x,.-ChaCha20_16x
+.type ChaCha20_8xvl,@function
+.align 32
+ChaCha20_8xvl:
+.cfi_startproc
+.LChaCha20_8xvl:
+ movq %rsp,%r9
+.cfi_def_cfa_register %r9
+ subq $64+8,%rsp
+ andq $-64,%rsp
+ vzeroupper
+
+ leaq .Lsigma(%rip),%r10
+ vbroadcasti128 (%r10),%ymm3
+ vbroadcasti128 (%rcx),%ymm7
+ vbroadcasti128 16(%rcx),%ymm11
+ vbroadcasti128 (%r8),%ymm15
+
+ vpshufd $0x00,%ymm3,%ymm0
+ vpshufd $0x55,%ymm3,%ymm1
+ vpshufd $0xaa,%ymm3,%ymm2
+ vpshufd $0xff,%ymm3,%ymm3
+ vmovdqa64 %ymm0,%ymm16
+ vmovdqa64 %ymm1,%ymm17
+ vmovdqa64 %ymm2,%ymm18
+ vmovdqa64 %ymm3,%ymm19
+
+ vpshufd $0x00,%ymm7,%ymm4
+ vpshufd $0x55,%ymm7,%ymm5
+ vpshufd $0xaa,%ymm7,%ymm6
+ vpshufd $0xff,%ymm7,%ymm7
+ vmovdqa64 %ymm4,%ymm20
+ vmovdqa64 %ymm5,%ymm21
+ vmovdqa64 %ymm6,%ymm22
+ vmovdqa64 %ymm7,%ymm23
+
+ vpshufd $0x00,%ymm11,%ymm8
+ vpshufd $0x55,%ymm11,%ymm9
+ vpshufd $0xaa,%ymm11,%ymm10
+ vpshufd $0xff,%ymm11,%ymm11
+ vmovdqa64 %ymm8,%ymm24
+ vmovdqa64 %ymm9,%ymm25
+ vmovdqa64 %ymm10,%ymm26
+ vmovdqa64 %ymm11,%ymm27
+
+ vpshufd $0x00,%ymm15,%ymm12
+ vpshufd $0x55,%ymm15,%ymm13
+ vpshufd $0xaa,%ymm15,%ymm14
+ vpshufd $0xff,%ymm15,%ymm15
+ vpaddd .Lincy(%rip),%ymm12,%ymm12
+ vmovdqa64 %ymm12,%ymm28
+ vmovdqa64 %ymm13,%ymm29
+ vmovdqa64 %ymm14,%ymm30
+ vmovdqa64 %ymm15,%ymm31
+
+ movl $10,%eax
+ jmp .Loop8xvl
+
+.align 32
+.Loop_outer8xvl:
+
+
+ vpbroadcastd 8(%r10),%ymm2
+ vpbroadcastd 12(%r10),%ymm3
+ vpaddd .Leight(%rip),%ymm28,%ymm28
+ vmovdqa64 %ymm20,%ymm4
+ vmovdqa64 %ymm21,%ymm5
+ vmovdqa64 %ymm22,%ymm6
+ vmovdqa64 %ymm23,%ymm7
+ vmovdqa64 %ymm24,%ymm8
+ vmovdqa64 %ymm25,%ymm9
+ vmovdqa64 %ymm26,%ymm10
+ vmovdqa64 %ymm27,%ymm11
+ vmovdqa64 %ymm28,%ymm12
+ vmovdqa64 %ymm29,%ymm13
+ vmovdqa64 %ymm30,%ymm14
+ vmovdqa64 %ymm31,%ymm15
+
+ vmovdqa64 %ymm0,%ymm16
+ vmovdqa64 %ymm1,%ymm17
+ vmovdqa64 %ymm2,%ymm18
+ vmovdqa64 %ymm3,%ymm19
+
+ movl $10,%eax
+ jmp .Loop8xvl
+
+.align 32
+.Loop8xvl:
+ vpaddd %ymm4,%ymm0,%ymm0
+ vpaddd %ymm5,%ymm1,%ymm1
+ vpaddd %ymm6,%ymm2,%ymm2
+ vpaddd %ymm7,%ymm3,%ymm3
+ vpxor %ymm0,%ymm12,%ymm12
+ vpxor %ymm1,%ymm13,%ymm13
+ vpxor %ymm2,%ymm14,%ymm14
+ vpxor %ymm3,%ymm15,%ymm15
+ vprold $16,%ymm12,%ymm12
+ vprold $16,%ymm13,%ymm13
+ vprold $16,%ymm14,%ymm14
+ vprold $16,%ymm15,%ymm15
+ vpaddd %ymm12,%ymm8,%ymm8
+ vpaddd %ymm13,%ymm9,%ymm9
+ vpaddd %ymm14,%ymm10,%ymm10
+ vpaddd %ymm15,%ymm11,%ymm11
+ vpxor %ymm8,%ymm4,%ymm4
+ vpxor %ymm9,%ymm5,%ymm5
+ vpxor %ymm10,%ymm6,%ymm6
+ vpxor %ymm11,%ymm7,%ymm7
+ vprold $12,%ymm4,%ymm4
+ vprold $12,%ymm5,%ymm5
+ vprold $12,%ymm6,%ymm6
+ vprold $12,%ymm7,%ymm7
+ vpaddd %ymm4,%ymm0,%ymm0
+ vpaddd %ymm5,%ymm1,%ymm1
+ vpaddd %ymm6,%ymm2,%ymm2
+ vpaddd %ymm7,%ymm3,%ymm3
+ vpxor %ymm0,%ymm12,%ymm12
+ vpxor %ymm1,%ymm13,%ymm13
+ vpxor %ymm2,%ymm14,%ymm14
+ vpxor %ymm3,%ymm15,%ymm15
+ vprold $8,%ymm12,%ymm12
+ vprold $8,%ymm13,%ymm13
+ vprold $8,%ymm14,%ymm14
+ vprold $8,%ymm15,%ymm15
+ vpaddd %ymm12,%ymm8,%ymm8
+ vpaddd %ymm13,%ymm9,%ymm9
+ vpaddd %ymm14,%ymm10,%ymm10
+ vpaddd %ymm15,%ymm11,%ymm11
+ vpxor %ymm8,%ymm4,%ymm4
+ vpxor %ymm9,%ymm5,%ymm5
+ vpxor %ymm10,%ymm6,%ymm6
+ vpxor %ymm11,%ymm7,%ymm7
+ vprold $7,%ymm4,%ymm4
+ vprold $7,%ymm5,%ymm5
+ vprold $7,%ymm6,%ymm6
+ vprold $7,%ymm7,%ymm7
+ vpaddd %ymm5,%ymm0,%ymm0
+ vpaddd %ymm6,%ymm1,%ymm1
+ vpaddd %ymm7,%ymm2,%ymm2
+ vpaddd %ymm4,%ymm3,%ymm3
+ vpxor %ymm0,%ymm15,%ymm15
+ vpxor %ymm1,%ymm12,%ymm12
+ vpxor %ymm2,%ymm13,%ymm13
+ vpxor %ymm3,%ymm14,%ymm14
+ vprold $16,%ymm15,%ymm15
+ vprold $16,%ymm12,%ymm12
+ vprold $16,%ymm13,%ymm13
+ vprold $16,%ymm14,%ymm14
+ vpaddd %ymm15,%ymm10,%ymm10
+ vpaddd %ymm12,%ymm11,%ymm11
+ vpaddd %ymm13,%ymm8,%ymm8
+ vpaddd %ymm14,%ymm9,%ymm9
+ vpxor %ymm10,%ymm5,%ymm5
+ vpxor %ymm11,%ymm6,%ymm6
+ vpxor %ymm8,%ymm7,%ymm7
+ vpxor %ymm9,%ymm4,%ymm4
+ vprold $12,%ymm5,%ymm5
+ vprold $12,%ymm6,%ymm6
+ vprold $12,%ymm7,%ymm7
+ vprold $12,%ymm4,%ymm4
+ vpaddd %ymm5,%ymm0,%ymm0
+ vpaddd %ymm6,%ymm1,%ymm1
+ vpaddd %ymm7,%ymm2,%ymm2
+ vpaddd %ymm4,%ymm3,%ymm3
+ vpxor %ymm0,%ymm15,%ymm15
+ vpxor %ymm1,%ymm12,%ymm12
+ vpxor %ymm2,%ymm13,%ymm13
+ vpxor %ymm3,%ymm14,%ymm14
+ vprold $8,%ymm15,%ymm15
+ vprold $8,%ymm12,%ymm12
+ vprold $8,%ymm13,%ymm13
+ vprold $8,%ymm14,%ymm14
+ vpaddd %ymm15,%ymm10,%ymm10
+ vpaddd %ymm12,%ymm11,%ymm11
+ vpaddd %ymm13,%ymm8,%ymm8
+ vpaddd %ymm14,%ymm9,%ymm9
+ vpxor %ymm10,%ymm5,%ymm5
+ vpxor %ymm11,%ymm6,%ymm6
+ vpxor %ymm8,%ymm7,%ymm7
+ vpxor %ymm9,%ymm4,%ymm4
+ vprold $7,%ymm5,%ymm5
+ vprold $7,%ymm6,%ymm6
+ vprold $7,%ymm7,%ymm7
+ vprold $7,%ymm4,%ymm4
+ decl %eax
+ jnz .Loop8xvl
+
+ vpaddd %ymm16,%ymm0,%ymm0
+ vpaddd %ymm17,%ymm1,%ymm1
+ vpaddd %ymm18,%ymm2,%ymm2
+ vpaddd %ymm19,%ymm3,%ymm3
+
+ vpunpckldq %ymm1,%ymm0,%ymm18
+ vpunpckldq %ymm3,%ymm2,%ymm19
+ vpunpckhdq %ymm1,%ymm0,%ymm0
+ vpunpckhdq %ymm3,%ymm2,%ymm2
+ vpunpcklqdq %ymm19,%ymm18,%ymm1
+ vpunpckhqdq %ymm19,%ymm18,%ymm18
+ vpunpcklqdq %ymm2,%ymm0,%ymm3
+ vpunpckhqdq %ymm2,%ymm0,%ymm0
+ vpaddd %ymm20,%ymm4,%ymm4
+ vpaddd %ymm21,%ymm5,%ymm5
+ vpaddd %ymm22,%ymm6,%ymm6
+ vpaddd %ymm23,%ymm7,%ymm7
+
+ vpunpckldq %ymm5,%ymm4,%ymm2
+ vpunpckldq %ymm7,%ymm6,%ymm19
+ vpunpckhdq %ymm5,%ymm4,%ymm4
+ vpunpckhdq %ymm7,%ymm6,%ymm6
+ vpunpcklqdq %ymm19,%ymm2,%ymm5
+ vpunpckhqdq %ymm19,%ymm2,%ymm2
+ vpunpcklqdq %ymm6,%ymm4,%ymm7
+ vpunpckhqdq %ymm6,%ymm4,%ymm4
+ vshufi32x4 $0,%ymm5,%ymm1,%ymm19
+ vshufi32x4 $3,%ymm5,%ymm1,%ymm5
+ vshufi32x4 $0,%ymm2,%ymm18,%ymm1
+ vshufi32x4 $3,%ymm2,%ymm18,%ymm2
+ vshufi32x4 $0,%ymm7,%ymm3,%ymm18
+ vshufi32x4 $3,%ymm7,%ymm3,%ymm7
+ vshufi32x4 $0,%ymm4,%ymm0,%ymm3
+ vshufi32x4 $3,%ymm4,%ymm0,%ymm4
+ vpaddd %ymm24,%ymm8,%ymm8
+ vpaddd %ymm25,%ymm9,%ymm9
+ vpaddd %ymm26,%ymm10,%ymm10
+ vpaddd %ymm27,%ymm11,%ymm11
+
+ vpunpckldq %ymm9,%ymm8,%ymm6
+ vpunpckldq %ymm11,%ymm10,%ymm0
+ vpunpckhdq %ymm9,%ymm8,%ymm8
+ vpunpckhdq %ymm11,%ymm10,%ymm10
+ vpunpcklqdq %ymm0,%ymm6,%ymm9
+ vpunpckhqdq %ymm0,%ymm6,%ymm6
+ vpunpcklqdq %ymm10,%ymm8,%ymm11
+ vpunpckhqdq %ymm10,%ymm8,%ymm8
+ vpaddd %ymm28,%ymm12,%ymm12
+ vpaddd %ymm29,%ymm13,%ymm13
+ vpaddd %ymm30,%ymm14,%ymm14
+ vpaddd %ymm31,%ymm15,%ymm15
+
+ vpunpckldq %ymm13,%ymm12,%ymm10
+ vpunpckldq %ymm15,%ymm14,%ymm0
+ vpunpckhdq %ymm13,%ymm12,%ymm12
+ vpunpckhdq %ymm15,%ymm14,%ymm14
+ vpunpcklqdq %ymm0,%ymm10,%ymm13
+ vpunpckhqdq %ymm0,%ymm10,%ymm10
+ vpunpcklqdq %ymm14,%ymm12,%ymm15
+ vpunpckhqdq %ymm14,%ymm12,%ymm12
+ vperm2i128 $0x20,%ymm13,%ymm9,%ymm0
+ vperm2i128 $0x31,%ymm13,%ymm9,%ymm13
+ vperm2i128 $0x20,%ymm10,%ymm6,%ymm9
+ vperm2i128 $0x31,%ymm10,%ymm6,%ymm10
+ vperm2i128 $0x20,%ymm15,%ymm11,%ymm6
+ vperm2i128 $0x31,%ymm15,%ymm11,%ymm15
+ vperm2i128 $0x20,%ymm12,%ymm8,%ymm11
+ vperm2i128 $0x31,%ymm12,%ymm8,%ymm12
+ cmpq $512,%rdx
+ jb .Ltail8xvl
+
+ movl $0x80,%eax
+ vpxord 0(%rsi),%ymm19,%ymm19
+ vpxor 32(%rsi),%ymm0,%ymm0
+ vpxor 64(%rsi),%ymm5,%ymm5
+ vpxor 96(%rsi),%ymm13,%ymm13
+ leaq (%rsi,%rax,1),%rsi
+ vmovdqu32 %ymm19,0(%rdi)
+ vmovdqu %ymm0,32(%rdi)
+ vmovdqu %ymm5,64(%rdi)
+ vmovdqu %ymm13,96(%rdi)
+ leaq (%rdi,%rax,1),%rdi
+
+ vpxor 0(%rsi),%ymm1,%ymm1
+ vpxor 32(%rsi),%ymm9,%ymm9
+ vpxor 64(%rsi),%ymm2,%ymm2
+ vpxor 96(%rsi),%ymm10,%ymm10
+ leaq (%rsi,%rax,1),%rsi
+ vmovdqu %ymm1,0(%rdi)
+ vmovdqu %ymm9,32(%rdi)
+ vmovdqu %ymm2,64(%rdi)
+ vmovdqu %ymm10,96(%rdi)
+ leaq (%rdi,%rax,1),%rdi
+
+ vpxord 0(%rsi),%ymm18,%ymm18
+ vpxor 32(%rsi),%ymm6,%ymm6
+ vpxor 64(%rsi),%ymm7,%ymm7
+ vpxor 96(%rsi),%ymm15,%ymm15
+ leaq (%rsi,%rax,1),%rsi
+ vmovdqu32 %ymm18,0(%rdi)
+ vmovdqu %ymm6,32(%rdi)
+ vmovdqu %ymm7,64(%rdi)
+ vmovdqu %ymm15,96(%rdi)
+ leaq (%rdi,%rax,1),%rdi
+
+ vpxor 0(%rsi),%ymm3,%ymm3
+ vpxor 32(%rsi),%ymm11,%ymm11
+ vpxor 64(%rsi),%ymm4,%ymm4
+ vpxor 96(%rsi),%ymm12,%ymm12
+ leaq (%rsi,%rax,1),%rsi
+ vmovdqu %ymm3,0(%rdi)
+ vmovdqu %ymm11,32(%rdi)
+ vmovdqu %ymm4,64(%rdi)
+ vmovdqu %ymm12,96(%rdi)
+ leaq (%rdi,%rax,1),%rdi
+
+ vpbroadcastd 0(%r10),%ymm0
+ vpbroadcastd 4(%r10),%ymm1
+
+ subq $512,%rdx
+ jnz .Loop_outer8xvl
+
+ jmp .Ldone8xvl
+
+.align 32
+.Ltail8xvl:
+ vmovdqa64 %ymm19,%ymm8
+ xorq %r10,%r10
+ subq %rsi,%rdi
+ cmpq $64,%rdx
+ jb .Less_than_64_8xvl
+ vpxor 0(%rsi),%ymm8,%ymm8
+ vpxor 32(%rsi),%ymm0,%ymm0
+ vmovdqu %ymm8,0(%rdi,%rsi,1)
+ vmovdqu %ymm0,32(%rdi,%rsi,1)
+ je .Ldone8xvl
+ vmovdqa %ymm5,%ymm8
+ vmovdqa %ymm13,%ymm0
+ leaq 64(%rsi),%rsi
+
+ cmpq $128,%rdx
+ jb .Less_than_64_8xvl
+ vpxor 0(%rsi),%ymm5,%ymm5
+ vpxor 32(%rsi),%ymm13,%ymm13
+ vmovdqu %ymm5,0(%rdi,%rsi,1)
+ vmovdqu %ymm13,32(%rdi,%rsi,1)
+ je .Ldone8xvl
+ vmovdqa %ymm1,%ymm8
+ vmovdqa %ymm9,%ymm0
+ leaq 64(%rsi),%rsi
+
+ cmpq $192,%rdx
+ jb .Less_than_64_8xvl
+ vpxor 0(%rsi),%ymm1,%ymm1
+ vpxor 32(%rsi),%ymm9,%ymm9
+ vmovdqu %ymm1,0(%rdi,%rsi,1)
+ vmovdqu %ymm9,32(%rdi,%rsi,1)
+ je .Ldone8xvl
+ vmovdqa %ymm2,%ymm8
+ vmovdqa %ymm10,%ymm0
+ leaq 64(%rsi),%rsi
+
+ cmpq $256,%rdx
+ jb .Less_than_64_8xvl
+ vpxor 0(%rsi),%ymm2,%ymm2
+ vpxor 32(%rsi),%ymm10,%ymm10
+ vmovdqu %ymm2,0(%rdi,%rsi,1)
+ vmovdqu %ymm10,32(%rdi,%rsi,1)
+ je .Ldone8xvl
+ vmovdqa32 %ymm18,%ymm8
+ vmovdqa %ymm6,%ymm0
+ leaq 64(%rsi),%rsi
+
+ cmpq $320,%rdx
+ jb .Less_than_64_8xvl
+ vpxord 0(%rsi),%ymm18,%ymm18
+ vpxor 32(%rsi),%ymm6,%ymm6
+ vmovdqu32 %ymm18,0(%rdi,%rsi,1)
+ vmovdqu %ymm6,32(%rdi,%rsi,1)
+ je .Ldone8xvl
+ vmovdqa %ymm7,%ymm8
+ vmovdqa %ymm15,%ymm0
+ leaq 64(%rsi),%rsi
+
+ cmpq $384,%rdx
+ jb .Less_than_64_8xvl
+ vpxor 0(%rsi),%ymm7,%ymm7
+ vpxor 32(%rsi),%ymm15,%ymm15
+ vmovdqu %ymm7,0(%rdi,%rsi,1)
+ vmovdqu %ymm15,32(%rdi,%rsi,1)
+ je .Ldone8xvl
+ vmovdqa %ymm3,%ymm8
+ vmovdqa %ymm11,%ymm0
+ leaq 64(%rsi),%rsi
+
+ cmpq $448,%rdx
+ jb .Less_than_64_8xvl
+ vpxor 0(%rsi),%ymm3,%ymm3
+ vpxor 32(%rsi),%ymm11,%ymm11
+ vmovdqu %ymm3,0(%rdi,%rsi,1)
+ vmovdqu %ymm11,32(%rdi,%rsi,1)
+ je .Ldone8xvl
+ vmovdqa %ymm4,%ymm8
+ vmovdqa %ymm12,%ymm0
+ leaq 64(%rsi),%rsi
+
+.Less_than_64_8xvl:
+ vmovdqa %ymm8,0(%rsp)
+ vmovdqa %ymm0,32(%rsp)
+ leaq (%rdi,%rsi,1),%rdi
+ andq $63,%rdx
+
+.Loop_tail8xvl:
+ movzbl (%rsi,%r10,1),%eax
+ movzbl (%rsp,%r10,1),%ecx
+ leaq 1(%r10),%r10
+ xorl %ecx,%eax
+ movb %al,-1(%rdi,%r10,1)
+ decq %rdx
+ jnz .Loop_tail8xvl
+
+ vpxor %ymm8,%ymm8,%ymm8
+ vmovdqa %ymm8,0(%rsp)
+ vmovdqa %ymm8,32(%rsp)
+
+.Ldone8xvl:
+ vzeroall
+ leaq (%r9),%rsp
+.cfi_def_cfa_register %rsp
+.L8xvl_epilogue:
+ .byte 0xf3,0xc3
+.cfi_endproc
+.size ChaCha20_8xvl,.-ChaCha20_8xvl
diff --git a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/ec/ecp_nistz256-x86_64.s b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/ec/ecp_nistz256-x86_64.s
index 62a7ac611f..eeeed6ba40 100644
--- a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/ec/ecp_nistz256-x86_64.s
+++ b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/ec/ecp_nistz256-x86_64.s
@@ -2393,12 +2393,24 @@ ecp_nistz256_precomputed:
.LONE_mont:
.quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe
+
+.Lord:
+.quad 0xf3b9cac2fc632551, 0xbce6faada7179e84, 0xffffffffffffffff, 0xffffffff00000000
+.LordK:
+.quad 0xccd1c8aaee00bc4f
+
.globl ecp_nistz256_mul_by_2
.type ecp_nistz256_mul_by_2,@function
.align 64
ecp_nistz256_mul_by_2:
+.cfi_startproc
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-16
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-24
+.Lmul_by_2_body:
movq 0(%rsi),%r8
xorq %r13,%r13
@@ -2431,9 +2443,15 @@ ecp_nistz256_mul_by_2:
movq %r10,16(%rdi)
movq %r11,24(%rdi)
- popq %r13
- popq %r12
+ movq 0(%rsp),%r13
+.cfi_restore %r13
+ movq 8(%rsp),%r12
+.cfi_restore %r12
+ leaq 16(%rsp),%rsp
+.cfi_adjust_cfa_offset -16
+.Lmul_by_2_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size ecp_nistz256_mul_by_2,.-ecp_nistz256_mul_by_2
@@ -2442,8 +2460,14 @@ ecp_nistz256_mul_by_2:
.type ecp_nistz256_div_by_2,@function
.align 32
ecp_nistz256_div_by_2:
+.cfi_startproc
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-16
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-24
+.Ldiv_by_2_body:
movq 0(%rsi),%r8
movq 8(%rsi),%r9
@@ -2491,9 +2515,15 @@ ecp_nistz256_div_by_2:
movq %r10,16(%rdi)
movq %r11,24(%rdi)
- popq %r13
- popq %r12
+ movq 0(%rsp),%r13
+.cfi_restore %r13
+ movq 8(%rsp),%r12
+.cfi_restore %r12
+ leaq 16(%rsp),%rsp
+.cfi_adjust_cfa_offset -16
+.Ldiv_by_2_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size ecp_nistz256_div_by_2,.-ecp_nistz256_div_by_2
@@ -2502,8 +2532,14 @@ ecp_nistz256_div_by_2:
.type ecp_nistz256_mul_by_3,@function
.align 32
ecp_nistz256_mul_by_3:
+.cfi_startproc
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-16
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-24
+.Lmul_by_3_body:
movq 0(%rsi),%r8
xorq %r13,%r13
@@ -2557,9 +2593,15 @@ ecp_nistz256_mul_by_3:
movq %r10,16(%rdi)
movq %r11,24(%rdi)
- popq %r13
- popq %r12
+ movq 0(%rsp),%r13
+.cfi_restore %r13
+ movq 8(%rsp),%r12
+.cfi_restore %r12
+ leaq 16(%rsp),%rsp
+.cfi_adjust_cfa_offset -16
+.Lmul_by_3_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size ecp_nistz256_mul_by_3,.-ecp_nistz256_mul_by_3
@@ -2568,8 +2610,14 @@ ecp_nistz256_mul_by_3:
.type ecp_nistz256_add,@function
.align 32
ecp_nistz256_add:
+.cfi_startproc
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-16
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-24
+.Ladd_body:
movq 0(%rsi),%r8
xorq %r13,%r13
@@ -2603,9 +2651,15 @@ ecp_nistz256_add:
movq %r10,16(%rdi)
movq %r11,24(%rdi)
- popq %r13
- popq %r12
+ movq 0(%rsp),%r13
+.cfi_restore %r13
+ movq 8(%rsp),%r12
+.cfi_restore %r12
+ leaq 16(%rsp),%rsp
+.cfi_adjust_cfa_offset -16
+.Ladd_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size ecp_nistz256_add,.-ecp_nistz256_add
@@ -2614,8 +2668,14 @@ ecp_nistz256_add:
.type ecp_nistz256_sub,@function
.align 32
ecp_nistz256_sub:
+.cfi_startproc
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-16
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-24
+.Lsub_body:
movq 0(%rsi),%r8
xorq %r13,%r13
@@ -2649,9 +2709,15 @@ ecp_nistz256_sub:
movq %r10,16(%rdi)
movq %r11,24(%rdi)
- popq %r13
- popq %r12
+ movq 0(%rsp),%r13
+.cfi_restore %r13
+ movq 8(%rsp),%r12
+.cfi_restore %r12
+ leaq 16(%rsp),%rsp
+.cfi_adjust_cfa_offset -16
+.Lsub_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size ecp_nistz256_sub,.-ecp_nistz256_sub
@@ -2660,8 +2726,14 @@ ecp_nistz256_sub:
.type ecp_nistz256_neg,@function
.align 32
ecp_nistz256_neg:
+.cfi_startproc
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-16
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-24
+.Lneg_body:
xorq %r8,%r8
xorq %r9,%r9
@@ -2695,14 +2767,1109 @@ ecp_nistz256_neg:
movq %r10,16(%rdi)
movq %r11,24(%rdi)
- popq %r13
- popq %r12
+ movq 0(%rsp),%r13
+.cfi_restore %r13
+ movq 8(%rsp),%r12
+.cfi_restore %r12
+ leaq 16(%rsp),%rsp
+.cfi_adjust_cfa_offset -16
+.Lneg_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size ecp_nistz256_neg,.-ecp_nistz256_neg
+
+
+.globl ecp_nistz256_ord_mul_mont
+.type ecp_nistz256_ord_mul_mont,@function
+.align 32
+ecp_nistz256_ord_mul_mont:
+.cfi_startproc
+ movl $0x80100,%ecx
+ andl OPENSSL_ia32cap_P+8(%rip),%ecx
+ cmpl $0x80100,%ecx
+ je .Lecp_nistz256_ord_mul_montx
+ pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-16
+ pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-24
+ pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
+ pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
+ pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
+ pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
+.Lord_mul_body:
+
+ movq 0(%rdx),%rax
+ movq %rdx,%rbx
+ leaq .Lord(%rip),%r14
+ movq .LordK(%rip),%r15
+
+
+ movq %rax,%rcx
+ mulq 0(%rsi)
+ movq %rax,%r8
+ movq %rcx,%rax
+ movq %rdx,%r9
+
+ mulq 8(%rsi)
+ addq %rax,%r9
+ movq %rcx,%rax
+ adcq $0,%rdx
+ movq %rdx,%r10
+
+ mulq 16(%rsi)
+ addq %rax,%r10
+ movq %rcx,%rax
+ adcq $0,%rdx
+
+ movq %r8,%r13
+ imulq %r15,%r8
+
+ movq %rdx,%r11
+ mulq 24(%rsi)
+ addq %rax,%r11
+ movq %r8,%rax
+ adcq $0,%rdx
+ movq %rdx,%r12
+
+
+ mulq 0(%r14)
+ movq %r8,%rbp
+ addq %rax,%r13
+ movq %r8,%rax
+ adcq $0,%rdx
+ movq %rdx,%rcx
+
+ subq %r8,%r10
+ sbbq $0,%r8
+
+ mulq 8(%r14)
+ addq %rcx,%r9
+ adcq $0,%rdx
+ addq %rax,%r9
+ movq %rbp,%rax
+ adcq %rdx,%r10
+ movq %rbp,%rdx
+ adcq $0,%r8
+
+ shlq $32,%rax
+ shrq $32,%rdx
+ subq %rax,%r11
+ movq 8(%rbx),%rax
+ sbbq %rdx,%rbp
+
+ addq %r8,%r11
+ adcq %rbp,%r12
+ adcq $0,%r13
+
+
+ movq %rax,%rcx
+ mulq 0(%rsi)
+ addq %rax,%r9
+ movq %rcx,%rax
+ adcq $0,%rdx
+ movq %rdx,%rbp
+
+ mulq 8(%rsi)
+ addq %rbp,%r10
+ adcq $0,%rdx
+ addq %rax,%r10
+ movq %rcx,%rax
+ adcq $0,%rdx
+ movq %rdx,%rbp
+
+ mulq 16(%rsi)
+ addq %rbp,%r11
+ adcq $0,%rdx
+ addq %rax,%r11
+ movq %rcx,%rax
+ adcq $0,%rdx
+
+ movq %r9,%rcx
+ imulq %r15,%r9
+
+ movq %rdx,%rbp
+ mulq 24(%rsi)
+ addq %rbp,%r12
+ adcq $0,%rdx
+ xorq %r8,%r8
+ addq %rax,%r12
+ movq %r9,%rax
+ adcq %rdx,%r13
+ adcq $0,%r8
+
+
+ mulq 0(%r14)
+ movq %r9,%rbp
+ addq %rax,%rcx
+ movq %r9,%rax
+ adcq %rdx,%rcx
+
+ subq %r9,%r11
+ sbbq $0,%r9
+
+ mulq 8(%r14)
+ addq %rcx,%r10
+ adcq $0,%rdx
+ addq %rax,%r10
+ movq %rbp,%rax
+ adcq %rdx,%r11
+ movq %rbp,%rdx
+ adcq $0,%r9
+
+ shlq $32,%rax
+ shrq $32,%rdx
+ subq %rax,%r12
+ movq 16(%rbx),%rax
+ sbbq %rdx,%rbp
+
+ addq %r9,%r12
+ adcq %rbp,%r13
+ adcq $0,%r8
+
+
+ movq %rax,%rcx
+ mulq 0(%rsi)
+ addq %rax,%r10
+ movq %rcx,%rax
+ adcq $0,%rdx
+ movq %rdx,%rbp
+
+ mulq 8(%rsi)
+ addq %rbp,%r11
+ adcq $0,%rdx
+ addq %rax,%r11
+ movq %rcx,%rax
+ adcq $0,%rdx
+ movq %rdx,%rbp
+
+ mulq 16(%rsi)
+ addq %rbp,%r12
+ adcq $0,%rdx
+ addq %rax,%r12
+ movq %rcx,%rax
+ adcq $0,%rdx
+
+ movq %r10,%rcx
+ imulq %r15,%r10
+
+ movq %rdx,%rbp
+ mulq 24(%rsi)
+ addq %rbp,%r13
+ adcq $0,%rdx
+ xorq %r9,%r9
+ addq %rax,%r13
+ movq %r10,%rax
+ adcq %rdx,%r8
+ adcq $0,%r9
+
+
+ mulq 0(%r14)
+ movq %r10,%rbp
+ addq %rax,%rcx
+ movq %r10,%rax
+ adcq %rdx,%rcx
+
+ subq %r10,%r12
+ sbbq $0,%r10
+
+ mulq 8(%r14)
+ addq %rcx,%r11
+ adcq $0,%rdx
+ addq %rax,%r11
+ movq %rbp,%rax
+ adcq %rdx,%r12
+ movq %rbp,%rdx
+ adcq $0,%r10
+
+ shlq $32,%rax
+ shrq $32,%rdx
+ subq %rax,%r13
+ movq 24(%rbx),%rax
+ sbbq %rdx,%rbp
+
+ addq %r10,%r13
+ adcq %rbp,%r8
+ adcq $0,%r9
+
+
+ movq %rax,%rcx
+ mulq 0(%rsi)
+ addq %rax,%r11
+ movq %rcx,%rax
+ adcq $0,%rdx
+ movq %rdx,%rbp
+
+ mulq 8(%rsi)
+ addq %rbp,%r12
+ adcq $0,%rdx
+ addq %rax,%r12
+ movq %rcx,%rax
+ adcq $0,%rdx
+ movq %rdx,%rbp
+
+ mulq 16(%rsi)
+ addq %rbp,%r13
+ adcq $0,%rdx
+ addq %rax,%r13
+ movq %rcx,%rax
+ adcq $0,%rdx
+
+ movq %r11,%rcx
+ imulq %r15,%r11
+
+ movq %rdx,%rbp
+ mulq 24(%rsi)
+ addq %rbp,%r8
+ adcq $0,%rdx
+ xorq %r10,%r10
+ addq %rax,%r8
+ movq %r11,%rax
+ adcq %rdx,%r9
+ adcq $0,%r10
+
+
+ mulq 0(%r14)
+ movq %r11,%rbp
+ addq %rax,%rcx
+ movq %r11,%rax
+ adcq %rdx,%rcx
+
+ subq %r11,%r13
+ sbbq $0,%r11
+
+ mulq 8(%r14)
+ addq %rcx,%r12
+ adcq $0,%rdx
+ addq %rax,%r12
+ movq %rbp,%rax
+ adcq %rdx,%r13
+ movq %rbp,%rdx
+ adcq $0,%r11
+
+ shlq $32,%rax
+ shrq $32,%rdx
+ subq %rax,%r8
+ sbbq %rdx,%rbp
+
+ addq %r11,%r8
+ adcq %rbp,%r9
+ adcq $0,%r10
+
+
+ movq %r12,%rsi
+ subq 0(%r14),%r12
+ movq %r13,%r11
+ sbbq 8(%r14),%r13
+ movq %r8,%rcx
+ sbbq 16(%r14),%r8
+ movq %r9,%rbp
+ sbbq 24(%r14),%r9
+ sbbq $0,%r10
+
+ cmovcq %rsi,%r12
+ cmovcq %r11,%r13
+ cmovcq %rcx,%r8
+ cmovcq %rbp,%r9
+
+ movq %r12,0(%rdi)
+ movq %r13,8(%rdi)
+ movq %r8,16(%rdi)
+ movq %r9,24(%rdi)
+
+ movq 0(%rsp),%r15
+.cfi_restore %r15
+ movq 8(%rsp),%r14
+.cfi_restore %r14
+ movq 16(%rsp),%r13
+.cfi_restore %r13
+ movq 24(%rsp),%r12
+.cfi_restore %r12
+ movq 32(%rsp),%rbx
+.cfi_restore %rbx
+ movq 40(%rsp),%rbp
+.cfi_restore %rbp
+ leaq 48(%rsp),%rsp
+.cfi_adjust_cfa_offset -48
+.Lord_mul_epilogue:
+ .byte 0xf3,0xc3
+.cfi_endproc
+.size ecp_nistz256_ord_mul_mont,.-ecp_nistz256_ord_mul_mont
+
+
+
+
+
+
+
+.globl ecp_nistz256_ord_sqr_mont
+.type ecp_nistz256_ord_sqr_mont,@function
+.align 32
+ecp_nistz256_ord_sqr_mont:
+.cfi_startproc
+ movl $0x80100,%ecx
+ andl OPENSSL_ia32cap_P+8(%rip),%ecx
+ cmpl $0x80100,%ecx
+ je .Lecp_nistz256_ord_sqr_montx
+ pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-16
+ pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-24
+ pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
+ pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
+ pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
+ pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
+.Lord_sqr_body:
+
+ movq 0(%rsi),%r8
+ movq 8(%rsi),%rax
+ movq 16(%rsi),%r14
+ movq 24(%rsi),%r15
+ leaq .Lord(%rip),%rsi
+ movq %rdx,%rbx
+ jmp .Loop_ord_sqr
+
+.align 32
+.Loop_ord_sqr:
+
+ movq %rax,%rbp
+ mulq %r8
+ movq %rax,%r9
+.byte 102,72,15,110,205
+ movq %r14,%rax
+ movq %rdx,%r10
+
+ mulq %r8
+ addq %rax,%r10
+ movq %r15,%rax
+.byte 102,73,15,110,214
+ adcq $0,%rdx
+ movq %rdx,%r11
+
+ mulq %r8
+ addq %rax,%r11
+ movq %r15,%rax
+.byte 102,73,15,110,223
+ adcq $0,%rdx
+ movq %rdx,%r12
+
+
+ mulq %r14
+ movq %rax,%r13
+ movq %r14,%rax
+ movq %rdx,%r14
+
+
+ mulq %rbp
+ addq %rax,%r11
+ movq %r15,%rax
+ adcq $0,%rdx
+ movq %rdx,%r15
+
+ mulq %rbp
+ addq %rax,%r12
+ adcq $0,%rdx
+
+ addq %r15,%r12
+ adcq %rdx,%r13
+ adcq $0,%r14
+
+
+ xorq %r15,%r15
+ movq %r8,%rax
+ addq %r9,%r9
+ adcq %r10,%r10
+ adcq %r11,%r11
+ adcq %r12,%r12
+ adcq %r13,%r13
+ adcq %r14,%r14
+ adcq $0,%r15
+
+
+ mulq %rax
+ movq %rax,%r8
+.byte 102,72,15,126,200
+ movq %rdx,%rbp
+
+ mulq %rax
+ addq %rbp,%r9
+ adcq %rax,%r10
+.byte 102,72,15,126,208
+ adcq $0,%rdx
+ movq %rdx,%rbp
+
+ mulq %rax
+ addq %rbp,%r11
+ adcq %rax,%r12
+.byte 102,72,15,126,216
+ adcq $0,%rdx
+ movq %rdx,%rbp
+
+ movq %r8,%rcx
+ imulq 32(%rsi),%r8
+
+ mulq %rax
+ addq %rbp,%r13
+ adcq %rax,%r14
+ movq 0(%rsi),%rax
+ adcq %rdx,%r15
+
+
+ mulq %r8
+ movq %r8,%rbp
+ addq %rax,%rcx
+ movq 8(%rsi),%rax
+ adcq %rdx,%rcx
+
+ subq %r8,%r10
+ sbbq $0,%rbp
+
+ mulq %r8
+ addq %rcx,%r9
+ adcq $0,%rdx
+ addq %rax,%r9
+ movq %r8,%rax
+ adcq %rdx,%r10
+ movq %r8,%rdx
+ adcq $0,%rbp
+
+ movq %r9,%rcx
+ imulq 32(%rsi),%r9
+
+ shlq $32,%rax
+ shrq $32,%rdx
+ subq %rax,%r11
+ movq 0(%rsi),%rax
+ sbbq %rdx,%r8
+
+ addq %rbp,%r11
+ adcq $0,%r8
+
+
+ mulq %r9
+ movq %r9,%rbp
+ addq %rax,%rcx
+ movq 8(%rsi),%rax
+ adcq %rdx,%rcx
+
+ subq %r9,%r11
+ sbbq $0,%rbp
+
+ mulq %r9
+ addq %rcx,%r10
+ adcq $0,%rdx
+ addq %rax,%r10
+ movq %r9,%rax
+ adcq %rdx,%r11
+ movq %r9,%rdx
+ adcq $0,%rbp
+
+ movq %r10,%rcx
+ imulq 32(%rsi),%r10
+
+ shlq $32,%rax
+ shrq $32,%rdx
+ subq %rax,%r8
+ movq 0(%rsi),%rax
+ sbbq %rdx,%r9
+
+ addq %rbp,%r8
+ adcq $0,%r9
+
+
+ mulq %r10
+ movq %r10,%rbp
+ addq %rax,%rcx
+ movq 8(%rsi),%rax
+ adcq %rdx,%rcx
+
+ subq %r10,%r8
+ sbbq $0,%rbp
+
+ mulq %r10
+ addq %rcx,%r11
+ adcq $0,%rdx
+ addq %rax,%r11
+ movq %r10,%rax
+ adcq %rdx,%r8
+ movq %r10,%rdx
+ adcq $0,%rbp
+
+ movq %r11,%rcx
+ imulq 32(%rsi),%r11
+
+ shlq $32,%rax
+ shrq $32,%rdx
+ subq %rax,%r9
+ movq 0(%rsi),%rax
+ sbbq %rdx,%r10
+
+ addq %rbp,%r9
+ adcq $0,%r10
+
+
+ mulq %r11
+ movq %r11,%rbp
+ addq %rax,%rcx
+ movq 8(%rsi),%rax
+ adcq %rdx,%rcx
+
+ subq %r11,%r9
+ sbbq $0,%rbp
+
+ mulq %r11
+ addq %rcx,%r8
+ adcq $0,%rdx
+ addq %rax,%r8
+ movq %r11,%rax
+ adcq %rdx,%r9
+ movq %r11,%rdx
+ adcq $0,%rbp
+
+ shlq $32,%rax
+ shrq $32,%rdx
+ subq %rax,%r10
+ sbbq %rdx,%r11
+
+ addq %rbp,%r10
+ adcq $0,%r11
+
+
+ xorq %rdx,%rdx
+ addq %r12,%r8
+ adcq %r13,%r9
+ movq %r8,%r12
+ adcq %r14,%r10
+ adcq %r15,%r11
+ movq %r9,%rax
+ adcq $0,%rdx
+
+
+ subq 0(%rsi),%r8
+ movq %r10,%r14
+ sbbq 8(%rsi),%r9
+ sbbq 16(%rsi),%r10
+ movq %r11,%r15
+ sbbq 24(%rsi),%r11
+ sbbq $0,%rdx
+
+ cmovcq %r12,%r8
+ cmovncq %r9,%rax
+ cmovncq %r10,%r14
+ cmovncq %r11,%r15
+
+ decq %rbx
+ jnz .Loop_ord_sqr
+
+ movq %r8,0(%rdi)
+ movq %rax,8(%rdi)
+ pxor %xmm1,%xmm1
+ movq %r14,16(%rdi)
+ pxor %xmm2,%xmm2
+ movq %r15,24(%rdi)
+ pxor %xmm3,%xmm3
+
+ movq 0(%rsp),%r15
+.cfi_restore %r15
+ movq 8(%rsp),%r14
+.cfi_restore %r14
+ movq 16(%rsp),%r13
+.cfi_restore %r13
+ movq 24(%rsp),%r12
+.cfi_restore %r12
+ movq 32(%rsp),%rbx
+.cfi_restore %rbx
+ movq 40(%rsp),%rbp
+.cfi_restore %rbp
+ leaq 48(%rsp),%rsp
+.cfi_adjust_cfa_offset -48
+.Lord_sqr_epilogue:
+ .byte 0xf3,0xc3
+.cfi_endproc
+.size ecp_nistz256_ord_sqr_mont,.-ecp_nistz256_ord_sqr_mont
+
+.type ecp_nistz256_ord_mul_montx,@function
+.align 32
+ecp_nistz256_ord_mul_montx:
+.cfi_startproc
+.Lecp_nistz256_ord_mul_montx:
+ pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-16
+ pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-24
+ pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
+ pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
+ pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
+ pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
+.Lord_mulx_body:
+
+ movq %rdx,%rbx
+ movq 0(%rdx),%rdx
+ movq 0(%rsi),%r9
+ movq 8(%rsi),%r10
+ movq 16(%rsi),%r11
+ movq 24(%rsi),%r12
+ leaq -128(%rsi),%rsi
+ leaq .Lord-128(%rip),%r14
+ movq .LordK(%rip),%r15
+
+
+ mulxq %r9,%r8,%r9
+ mulxq %r10,%rcx,%r10
+ mulxq %r11,%rbp,%r11
+ addq %rcx,%r9
+ mulxq %r12,%rcx,%r12
+ movq %r8,%rdx
+ mulxq %r15,%rdx,%rax
+ adcq %rbp,%r10
+ adcq %rcx,%r11
+ adcq $0,%r12
+
+
+ xorq %r13,%r13
+ mulxq 0+128(%r14),%rcx,%rbp
+ adcxq %rcx,%r8
+ adoxq %rbp,%r9
+
+ mulxq 8+128(%r14),%rcx,%rbp
+ adcxq %rcx,%r9
+ adoxq %rbp,%r10
+
+ mulxq 16+128(%r14),%rcx,%rbp
+ adcxq %rcx,%r10
+ adoxq %rbp,%r11
+
+ mulxq 24+128(%r14),%rcx,%rbp
+ movq 8(%rbx),%rdx
+ adcxq %rcx,%r11
+ adoxq %rbp,%r12
+ adcxq %r8,%r12
+ adoxq %r8,%r13
+ adcq $0,%r13
+
+
+ mulxq 0+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r9
+ adoxq %rbp,%r10
+
+ mulxq 8+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r10
+ adoxq %rbp,%r11
+
+ mulxq 16+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r11
+ adoxq %rbp,%r12
+
+ mulxq 24+128(%rsi),%rcx,%rbp
+ movq %r9,%rdx
+ mulxq %r15,%rdx,%rax
+ adcxq %rcx,%r12
+ adoxq %rbp,%r13
+
+ adcxq %r8,%r13
+ adoxq %r8,%r8
+ adcq $0,%r8
+
+
+ mulxq 0+128(%r14),%rcx,%rbp
+ adcxq %rcx,%r9
+ adoxq %rbp,%r10
+
+ mulxq 8+128(%r14),%rcx,%rbp
+ adcxq %rcx,%r10
+ adoxq %rbp,%r11
+
+ mulxq 16+128(%r14),%rcx,%rbp
+ adcxq %rcx,%r11
+ adoxq %rbp,%r12
+
+ mulxq 24+128(%r14),%rcx,%rbp
+ movq 16(%rbx),%rdx
+ adcxq %rcx,%r12
+ adoxq %rbp,%r13
+ adcxq %r9,%r13
+ adoxq %r9,%r8
+ adcq $0,%r8
+
+
+ mulxq 0+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r10
+ adoxq %rbp,%r11
+
+ mulxq 8+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r11
+ adoxq %rbp,%r12
+
+ mulxq 16+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r12
+ adoxq %rbp,%r13
+
+ mulxq 24+128(%rsi),%rcx,%rbp
+ movq %r10,%rdx
+ mulxq %r15,%rdx,%rax
+ adcxq %rcx,%r13
+ adoxq %rbp,%r8
+
+ adcxq %r9,%r8
+ adoxq %r9,%r9
+ adcq $0,%r9
+
+
+ mulxq 0+128(%r14),%rcx,%rbp
+ adcxq %rcx,%r10
+ adoxq %rbp,%r11
+
+ mulxq 8+128(%r14),%rcx,%rbp
+ adcxq %rcx,%r11
+ adoxq %rbp,%r12
+
+ mulxq 16+128(%r14),%rcx,%rbp
+ adcxq %rcx,%r12
+ adoxq %rbp,%r13
+
+ mulxq 24+128(%r14),%rcx,%rbp
+ movq 24(%rbx),%rdx
+ adcxq %rcx,%r13
+ adoxq %rbp,%r8
+ adcxq %r10,%r8
+ adoxq %r10,%r9
+ adcq $0,%r9
+
+
+ mulxq 0+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r11
+ adoxq %rbp,%r12
+
+ mulxq 8+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r12
+ adoxq %rbp,%r13
+
+ mulxq 16+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r13
+ adoxq %rbp,%r8
+
+ mulxq 24+128(%rsi),%rcx,%rbp
+ movq %r11,%rdx
+ mulxq %r15,%rdx,%rax
+ adcxq %rcx,%r8
+ adoxq %rbp,%r9
+
+ adcxq %r10,%r9
+ adoxq %r10,%r10
+ adcq $0,%r10
+
+
+ mulxq 0+128(%r14),%rcx,%rbp
+ adcxq %rcx,%r11
+ adoxq %rbp,%r12
+
+ mulxq 8+128(%r14),%rcx,%rbp
+ adcxq %rcx,%r12
+ adoxq %rbp,%r13
+
+ mulxq 16+128(%r14),%rcx,%rbp
+ adcxq %rcx,%r13
+ adoxq %rbp,%r8
+
+ mulxq 24+128(%r14),%rcx,%rbp
+ leaq 128(%r14),%r14
+ movq %r12,%rbx
+ adcxq %rcx,%r8
+ adoxq %rbp,%r9
+ movq %r13,%rdx
+ adcxq %r11,%r9
+ adoxq %r11,%r10
+ adcq $0,%r10
+
+
+
+ movq %r8,%rcx
+ subq 0(%r14),%r12
+ sbbq 8(%r14),%r13
+ sbbq 16(%r14),%r8
+ movq %r9,%rbp
+ sbbq 24(%r14),%r9
+ sbbq $0,%r10
+
+ cmovcq %rbx,%r12
+ cmovcq %rdx,%r13
+ cmovcq %rcx,%r8
+ cmovcq %rbp,%r9
+
+ movq %r12,0(%rdi)
+ movq %r13,8(%rdi)
+ movq %r8,16(%rdi)
+ movq %r9,24(%rdi)
+
+ movq 0(%rsp),%r15
+.cfi_restore %r15
+ movq 8(%rsp),%r14
+.cfi_restore %r14
+ movq 16(%rsp),%r13
+.cfi_restore %r13
+ movq 24(%rsp),%r12
+.cfi_restore %r12
+ movq 32(%rsp),%rbx
+.cfi_restore %rbx
+ movq 40(%rsp),%rbp
+.cfi_restore %rbp
+ leaq 48(%rsp),%rsp
+.cfi_adjust_cfa_offset -48
+.Lord_mulx_epilogue:
+ .byte 0xf3,0xc3
+.cfi_endproc
+.size ecp_nistz256_ord_mul_montx,.-ecp_nistz256_ord_mul_montx
+
+.type ecp_nistz256_ord_sqr_montx,@function
+.align 32
+ecp_nistz256_ord_sqr_montx:
+.cfi_startproc
+.Lecp_nistz256_ord_sqr_montx:
+ pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-16
+ pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-24
+ pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
+ pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
+ pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
+ pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
+.Lord_sqrx_body:
+
+ movq %rdx,%rbx
+ movq 0(%rsi),%rdx
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r15
+ movq 24(%rsi),%r8
+ leaq .Lord(%rip),%rsi
+ jmp .Loop_ord_sqrx
+
+.align 32
+.Loop_ord_sqrx:
+ mulxq %r14,%r9,%r10
+ mulxq %r15,%rcx,%r11
+ movq %rdx,%rax
+.byte 102,73,15,110,206
+ mulxq %r8,%rbp,%r12
+ movq %r14,%rdx
+ addq %rcx,%r10
+.byte 102,73,15,110,215
+ adcq %rbp,%r11
+ adcq $0,%r12
+ xorq %r13,%r13
+
+ mulxq %r15,%rcx,%rbp
+ adcxq %rcx,%r11
+ adoxq %rbp,%r12
+
+ mulxq %r8,%rcx,%rbp
+ movq %r15,%rdx
+ adcxq %rcx,%r12
+ adoxq %rbp,%r13
+ adcq $0,%r13
+
+ mulxq %r8,%rcx,%r14
+ movq %rax,%rdx
+.byte 102,73,15,110,216
+ xorq %r15,%r15
+ adcxq %r9,%r9
+ adoxq %rcx,%r13
+ adcxq %r10,%r10
+ adoxq %r15,%r14
+
+
+ mulxq %rdx,%r8,%rbp
+.byte 102,72,15,126,202
+ adcxq %r11,%r11
+ adoxq %rbp,%r9
+ adcxq %r12,%r12
+ mulxq %rdx,%rcx,%rax
+.byte 102,72,15,126,210
+ adcxq %r13,%r13
+ adoxq %rcx,%r10
+ adcxq %r14,%r14
+ mulxq %rdx,%rcx,%rbp
+.byte 0x67
+.byte 102,72,15,126,218
+ adoxq %rax,%r11
+ adcxq %r15,%r15
+ adoxq %rcx,%r12
+ adoxq %rbp,%r13
+ mulxq %rdx,%rcx,%rax
+ adoxq %rcx,%r14
+ adoxq %rax,%r15
+
+
+ movq %r8,%rdx
+ mulxq 32(%rsi),%rdx,%rcx
+
+ xorq %rax,%rax
+ mulxq 0(%rsi),%rcx,%rbp
+ adcxq %rcx,%r8
+ adoxq %rbp,%r9
+ mulxq 8(%rsi),%rcx,%rbp
+ adcxq %rcx,%r9
+ adoxq %rbp,%r10
+ mulxq 16(%rsi),%rcx,%rbp
+ adcxq %rcx,%r10
+ adoxq %rbp,%r11
+ mulxq 24(%rsi),%rcx,%rbp
+ adcxq %rcx,%r11
+ adoxq %rbp,%r8
+ adcxq %rax,%r8
+
+
+ movq %r9,%rdx
+ mulxq 32(%rsi),%rdx,%rcx
+
+ mulxq 0(%rsi),%rcx,%rbp
+ adoxq %rcx,%r9
+ adcxq %rbp,%r10
+ mulxq 8(%rsi),%rcx,%rbp
+ adoxq %rcx,%r10
+ adcxq %rbp,%r11
+ mulxq 16(%rsi),%rcx,%rbp
+ adoxq %rcx,%r11
+ adcxq %rbp,%r8
+ mulxq 24(%rsi),%rcx,%rbp
+ adoxq %rcx,%r8
+ adcxq %rbp,%r9
+ adoxq %rax,%r9
+
+
+ movq %r10,%rdx
+ mulxq 32(%rsi),%rdx,%rcx
+
+ mulxq 0(%rsi),%rcx,%rbp
+ adcxq %rcx,%r10
+ adoxq %rbp,%r11
+ mulxq 8(%rsi),%rcx,%rbp
+ adcxq %rcx,%r11
+ adoxq %rbp,%r8
+ mulxq 16(%rsi),%rcx,%rbp
+ adcxq %rcx,%r8
+ adoxq %rbp,%r9
+ mulxq 24(%rsi),%rcx,%rbp
+ adcxq %rcx,%r9
+ adoxq %rbp,%r10
+ adcxq %rax,%r10
+
+
+ movq %r11,%rdx
+ mulxq 32(%rsi),%rdx,%rcx
+
+ mulxq 0(%rsi),%rcx,%rbp
+ adoxq %rcx,%r11
+ adcxq %rbp,%r8
+ mulxq 8(%rsi),%rcx,%rbp
+ adoxq %rcx,%r8
+ adcxq %rbp,%r9
+ mulxq 16(%rsi),%rcx,%rbp
+ adoxq %rcx,%r9
+ adcxq %rbp,%r10
+ mulxq 24(%rsi),%rcx,%rbp
+ adoxq %rcx,%r10
+ adcxq %rbp,%r11
+ adoxq %rax,%r11
+
+
+ addq %r8,%r12
+ adcq %r13,%r9
+ movq %r12,%rdx
+ adcq %r14,%r10
+ adcq %r15,%r11
+ movq %r9,%r14
+ adcq $0,%rax
+
+
+ subq 0(%rsi),%r12
+ movq %r10,%r15
+ sbbq 8(%rsi),%r9
+ sbbq 16(%rsi),%r10
+ movq %r11,%r8
+ sbbq 24(%rsi),%r11
+ sbbq $0,%rax
+
+ cmovncq %r12,%rdx
+ cmovncq %r9,%r14
+ cmovncq %r10,%r15
+ cmovncq %r11,%r8
+
+ decq %rbx
+ jnz .Loop_ord_sqrx
+
+ movq %rdx,0(%rdi)
+ movq %r14,8(%rdi)
+ pxor %xmm1,%xmm1
+ movq %r15,16(%rdi)
+ pxor %xmm2,%xmm2
+ movq %r8,24(%rdi)
+ pxor %xmm3,%xmm3
+
+ movq 0(%rsp),%r15
+.cfi_restore %r15
+ movq 8(%rsp),%r14
+.cfi_restore %r14
+ movq 16(%rsp),%r13
+.cfi_restore %r13
+ movq 24(%rsp),%r12
+.cfi_restore %r12
+ movq 32(%rsp),%rbx
+.cfi_restore %rbx
+ movq 40(%rsp),%rbp
+.cfi_restore %rbp
+ leaq 48(%rsp),%rsp
+.cfi_adjust_cfa_offset -48
+.Lord_sqrx_epilogue:
+ .byte 0xf3,0xc3
+.cfi_endproc
+.size ecp_nistz256_ord_sqr_montx,.-ecp_nistz256_ord_sqr_montx
+
+
+
+
.globl ecp_nistz256_to_mont
.type ecp_nistz256_to_mont,@function
.align 32
@@ -2723,15 +3890,29 @@ ecp_nistz256_to_mont:
.type ecp_nistz256_mul_mont,@function
.align 32
ecp_nistz256_mul_mont:
+.cfi_startproc
movl $0x80100,%ecx
andl OPENSSL_ia32cap_P+8(%rip),%ecx
.Lmul_mont:
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-16
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
+.Lmul_body:
cmpl $0x80100,%ecx
je .Lmul_montx
movq %rdx,%rbx
@@ -2756,13 +3937,23 @@ ecp_nistz256_mul_mont:
call __ecp_nistz256_mul_montx
.Lmul_mont_done:
- popq %r15
- popq %r14
- popq %r13
- popq %r12
- popq %rbx
- popq %rbp
+ movq 0(%rsp),%r15
+.cfi_restore %r15
+ movq 8(%rsp),%r14
+.cfi_restore %r14
+ movq 16(%rsp),%r13
+.cfi_restore %r13
+ movq 24(%rsp),%r12
+.cfi_restore %r12
+ movq 32(%rsp),%rbx
+.cfi_restore %rbx
+ movq 40(%rsp),%rbp
+.cfi_restore %rbp
+ leaq 48(%rsp),%rsp
+.cfi_adjust_cfa_offset -48
+.Lmul_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont
.type __ecp_nistz256_mul_montq,@function
@@ -2992,14 +4183,28 @@ __ecp_nistz256_mul_montq:
.type ecp_nistz256_sqr_mont,@function
.align 32
ecp_nistz256_sqr_mont:
+.cfi_startproc
movl $0x80100,%ecx
andl OPENSSL_ia32cap_P+8(%rip),%ecx
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-16
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
+.Lsqr_body:
cmpl $0x80100,%ecx
je .Lsqr_montx
movq 0(%rsi),%rax
@@ -3020,13 +4225,23 @@ ecp_nistz256_sqr_mont:
call __ecp_nistz256_sqr_montx
.Lsqr_mont_done:
- popq %r15
- popq %r14
- popq %r13
- popq %r12
- popq %rbx
- popq %rbp
+ movq 0(%rsp),%r15
+.cfi_restore %r15
+ movq 8(%rsp),%r14
+.cfi_restore %r14
+ movq 16(%rsp),%r13
+.cfi_restore %r13
+ movq 24(%rsp),%r12
+.cfi_restore %r12
+ movq 32(%rsp),%rbx
+.cfi_restore %rbx
+ movq 40(%rsp),%rbp
+.cfi_restore %rbp
+ leaq 48(%rsp),%rsp
+.cfi_adjust_cfa_offset -48
+.Lsqr_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont
.type __ecp_nistz256_sqr_montq,@function
@@ -3494,8 +4709,14 @@ __ecp_nistz256_sqr_montx:
.type ecp_nistz256_from_mont,@function
.align 32
ecp_nistz256_from_mont:
+.cfi_startproc
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-16
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-24
+.Lfrom_body:
movq 0(%rsi),%rax
movq .Lpoly+24(%rip),%r13
@@ -3576,9 +4797,15 @@ ecp_nistz256_from_mont:
movq %r10,16(%rdi)
movq %r11,24(%rdi)
- popq %r13
- popq %r12
+ movq 0(%rsp),%r13
+.cfi_restore %r13
+ movq 8(%rsp),%r12
+.cfi_restore %r12
+ leaq 16(%rsp),%rsp
+.cfi_adjust_cfa_offset -16
+.Lfrom_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size ecp_nistz256_from_mont,.-ecp_nistz256_from_mont
@@ -3664,6 +4891,7 @@ ecp_nistz256_gather_w5:
movdqu %xmm6,64(%rdi)
movdqu %xmm7,80(%rdi)
.byte 0xf3,0xc3
+.LSEH_end_ecp_nistz256_gather_w5:
.size ecp_nistz256_gather_w5,.-ecp_nistz256_gather_w5
@@ -3734,6 +4962,7 @@ ecp_nistz256_gather_w7:
movdqu %xmm4,32(%rdi)
movdqu %xmm5,48(%rdi)
.byte 0xf3,0xc3
+.LSEH_end_ecp_nistz256_gather_w7:
.size ecp_nistz256_gather_w7,.-ecp_nistz256_gather_w7
@@ -3794,6 +5023,7 @@ ecp_nistz256_avx2_gather_w5:
vmovdqu %ymm4,64(%rdi)
vzeroupper
.byte 0xf3,0xc3
+.LSEH_end_ecp_nistz256_avx2_gather_w5:
.size ecp_nistz256_avx2_gather_w5,.-ecp_nistz256_avx2_gather_w5
@@ -3871,6 +5101,7 @@ ecp_nistz256_avx2_gather_w7:
vmovdqu %ymm3,32(%rdi)
vzeroupper
.byte 0xf3,0xc3
+.LSEH_end_ecp_nistz256_avx2_gather_w7:
.size ecp_nistz256_avx2_gather_w7,.-ecp_nistz256_avx2_gather_w7
.type __ecp_nistz256_add_toq,@function
.align 32
@@ -3997,17 +5228,32 @@ __ecp_nistz256_mul_by_2q:
.type ecp_nistz256_point_double,@function
.align 32
ecp_nistz256_point_double:
+.cfi_startproc
movl $0x80100,%ecx
andl OPENSSL_ia32cap_P+8(%rip),%ecx
cmpl $0x80100,%ecx
je .Lpoint_doublex
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-16
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
subq $160+8,%rsp
+.cfi_adjust_cfa_offset 32*5+8
+.Lpoint_doubleq_body:
.Lpoint_double_shortcutq:
movdqu 0(%rsi),%xmm0
@@ -4190,30 +5436,56 @@ ecp_nistz256_point_double:
.byte 102,72,15,126,207
call __ecp_nistz256_sub_fromq
- addq $160+8,%rsp
- popq %r15
- popq %r14
- popq %r13
- popq %r12
- popq %rbx
- popq %rbp
+ leaq 160+56(%rsp),%rsi
+.cfi_def_cfa %rsi,8
+ movq -48(%rsi),%r15
+.cfi_restore %r15
+ movq -40(%rsi),%r14
+.cfi_restore %r14
+ movq -32(%rsi),%r13
+.cfi_restore %r13
+ movq -24(%rsi),%r12
+.cfi_restore %r12
+ movq -16(%rsi),%rbx
+.cfi_restore %rbx
+ movq -8(%rsi),%rbp
+.cfi_restore %rbp
+ leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
+.Lpoint_doubleq_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size ecp_nistz256_point_double,.-ecp_nistz256_point_double
.globl ecp_nistz256_point_add
.type ecp_nistz256_point_add,@function
.align 32
ecp_nistz256_point_add:
+.cfi_startproc
movl $0x80100,%ecx
andl OPENSSL_ia32cap_P+8(%rip),%ecx
cmpl $0x80100,%ecx
je .Lpoint_addx
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-16
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
subq $576+8,%rsp
+.cfi_adjust_cfa_offset 32*18+8
+.Lpoint_addq_body:
movdqu 0(%rsi),%xmm0
movdqu 16(%rsi),%xmm1
@@ -4590,30 +5862,56 @@ ecp_nistz256_point_add:
movdqu %xmm3,48(%rdi)
.Ladd_doneq:
- addq $576+8,%rsp
- popq %r15
- popq %r14
- popq %r13
- popq %r12
- popq %rbx
- popq %rbp
+ leaq 576+56(%rsp),%rsi
+.cfi_def_cfa %rsi,8
+ movq -48(%rsi),%r15
+.cfi_restore %r15
+ movq -40(%rsi),%r14
+.cfi_restore %r14
+ movq -32(%rsi),%r13
+.cfi_restore %r13
+ movq -24(%rsi),%r12
+.cfi_restore %r12
+ movq -16(%rsi),%rbx
+.cfi_restore %rbx
+ movq -8(%rsi),%rbp
+.cfi_restore %rbp
+ leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
+.Lpoint_addq_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size ecp_nistz256_point_add,.-ecp_nistz256_point_add
.globl ecp_nistz256_point_add_affine
.type ecp_nistz256_point_add_affine,@function
.align 32
ecp_nistz256_point_add_affine:
+.cfi_startproc
movl $0x80100,%ecx
andl OPENSSL_ia32cap_P+8(%rip),%ecx
cmpl $0x80100,%ecx
je .Lpoint_add_affinex
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-16
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
subq $480+8,%rsp
+.cfi_adjust_cfa_offset 32*15+8
+.Ladd_affineq_body:
movdqu 0(%rsi),%xmm0
movq %rdx,%rbx
@@ -4896,14 +6194,25 @@ ecp_nistz256_point_add_affine:
movdqu %xmm2,32(%rdi)
movdqu %xmm3,48(%rdi)
- addq $480+8,%rsp
- popq %r15
- popq %r14
- popq %r13
- popq %r12
- popq %rbx
- popq %rbp
+ leaq 480+56(%rsp),%rsi
+.cfi_def_cfa %rsi,8
+ movq -48(%rsi),%r15
+.cfi_restore %r15
+ movq -40(%rsi),%r14
+.cfi_restore %r14
+ movq -32(%rsi),%r13
+.cfi_restore %r13
+ movq -24(%rsi),%r12
+.cfi_restore %r12
+ movq -16(%rsi),%rbx
+.cfi_restore %rbx
+ movq -8(%rsi),%rbp
+.cfi_restore %rbp
+ leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
+.Ladd_affineq_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine
.type __ecp_nistz256_add_tox,@function
.align 32
@@ -5035,14 +6344,29 @@ __ecp_nistz256_mul_by_2x:
.type ecp_nistz256_point_doublex,@function
.align 32
ecp_nistz256_point_doublex:
+.cfi_startproc
.Lpoint_doublex:
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-16
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
subq $160+8,%rsp
+.cfi_adjust_cfa_offset 32*5+8
+.Lpoint_doublex_body:
.Lpoint_double_shortcutx:
movdqu 0(%rsi),%xmm0
@@ -5225,26 +6549,52 @@ ecp_nistz256_point_doublex:
.byte 102,72,15,126,207
call __ecp_nistz256_sub_fromx
- addq $160+8,%rsp
- popq %r15
- popq %r14
- popq %r13
- popq %r12
- popq %rbx
- popq %rbp
+ leaq 160+56(%rsp),%rsi
+.cfi_def_cfa %rsi,8
+ movq -48(%rsi),%r15
+.cfi_restore %r15
+ movq -40(%rsi),%r14
+.cfi_restore %r14
+ movq -32(%rsi),%r13
+.cfi_restore %r13
+ movq -24(%rsi),%r12
+.cfi_restore %r12
+ movq -16(%rsi),%rbx
+.cfi_restore %rbx
+ movq -8(%rsi),%rbp
+.cfi_restore %rbp
+ leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
+.Lpoint_doublex_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size ecp_nistz256_point_doublex,.-ecp_nistz256_point_doublex
.type ecp_nistz256_point_addx,@function
.align 32
ecp_nistz256_point_addx:
+.cfi_startproc
.Lpoint_addx:
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-16
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
subq $576+8,%rsp
+.cfi_adjust_cfa_offset 32*18+8
+.Lpoint_addx_body:
movdqu 0(%rsi),%xmm0
movdqu 16(%rsi),%xmm1
@@ -5621,26 +6971,52 @@ ecp_nistz256_point_addx:
movdqu %xmm3,48(%rdi)
.Ladd_donex:
- addq $576+8,%rsp
- popq %r15
- popq %r14
- popq %r13
- popq %r12
- popq %rbx
- popq %rbp
+ leaq 576+56(%rsp),%rsi
+.cfi_def_cfa %rsi,8
+ movq -48(%rsi),%r15
+.cfi_restore %r15
+ movq -40(%rsi),%r14
+.cfi_restore %r14
+ movq -32(%rsi),%r13
+.cfi_restore %r13
+ movq -24(%rsi),%r12
+.cfi_restore %r12
+ movq -16(%rsi),%rbx
+.cfi_restore %rbx
+ movq -8(%rsi),%rbp
+.cfi_restore %rbp
+ leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
+.Lpoint_addx_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size ecp_nistz256_point_addx,.-ecp_nistz256_point_addx
.type ecp_nistz256_point_add_affinex,@function
.align 32
ecp_nistz256_point_add_affinex:
+.cfi_startproc
.Lpoint_add_affinex:
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-16
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
subq $480+8,%rsp
+.cfi_adjust_cfa_offset 32*15+8
+.Ladd_affinex_body:
movdqu 0(%rsi),%xmm0
movq %rdx,%rbx
@@ -5923,12 +7299,23 @@ ecp_nistz256_point_add_affinex:
movdqu %xmm2,32(%rdi)
movdqu %xmm3,48(%rdi)
- addq $480+8,%rsp
- popq %r15
- popq %r14
- popq %r13
- popq %r12
- popq %rbx
- popq %rbp
+ leaq 480+56(%rsp),%rsi
+.cfi_def_cfa %rsi,8
+ movq -48(%rsi),%r15
+.cfi_restore %r15
+ movq -40(%rsi),%r14
+.cfi_restore %r14
+ movq -32(%rsi),%r13
+.cfi_restore %r13
+ movq -24(%rsi),%r12
+.cfi_restore %r12
+ movq -16(%rsi),%rbx
+.cfi_restore %rbx
+ movq -8(%rsi),%rbp
+.cfi_restore %rbp
+ leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
+.Ladd_affinex_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size ecp_nistz256_point_add_affinex,.-ecp_nistz256_point_add_affinex
diff --git a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/ec/x25519-x86_64.s b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/ec/x25519-x86_64.s
new file mode 100644
index 0000000000..2a18eaee28
--- /dev/null
+++ b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/ec/x25519-x86_64.s
@@ -0,0 +1,792 @@
+.text
+
+.globl x25519_fe51_mul
+.type x25519_fe51_mul,@function
+.align 32
+x25519_fe51_mul:
+.cfi_startproc
+ pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-16
+ pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-24
+ pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
+ pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
+ pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
+ pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
+ leaq -40(%rsp),%rsp
+.cfi_adjust_cfa_offset 40
+.Lfe51_mul_body:
+
+ movq 0(%rsi),%rax
+ movq 0(%rdx),%r11
+ movq 8(%rdx),%r12
+ movq 16(%rdx),%r13
+ movq 24(%rdx),%rbp
+ movq 32(%rdx),%r14
+
+ movq %rdi,32(%rsp)
+ movq %rax,%rdi
+ mulq %r11
+ movq %r11,0(%rsp)
+ movq %rax,%rbx
+ movq %rdi,%rax
+ movq %rdx,%rcx
+ mulq %r12
+ movq %r12,8(%rsp)
+ movq %rax,%r8
+ movq %rdi,%rax
+ leaq (%r14,%r14,8),%r15
+ movq %rdx,%r9
+ mulq %r13
+ movq %r13,16(%rsp)
+ movq %rax,%r10
+ movq %rdi,%rax
+ leaq (%r14,%r15,2),%rdi
+ movq %rdx,%r11
+ mulq %rbp
+ movq %rax,%r12
+ movq 0(%rsi),%rax
+ movq %rdx,%r13
+ mulq %r14
+ movq %rax,%r14
+ movq 8(%rsi),%rax
+ movq %rdx,%r15
+
+ mulq %rdi
+ addq %rax,%rbx
+ movq 16(%rsi),%rax
+ adcq %rdx,%rcx
+ mulq %rdi
+ addq %rax,%r8
+ movq 24(%rsi),%rax
+ adcq %rdx,%r9
+ mulq %rdi
+ addq %rax,%r10
+ movq 32(%rsi),%rax
+ adcq %rdx,%r11
+ mulq %rdi
+ imulq $19,%rbp,%rdi
+ addq %rax,%r12
+ movq 8(%rsi),%rax
+ adcq %rdx,%r13
+ mulq %rbp
+ movq 16(%rsp),%rbp
+ addq %rax,%r14
+ movq 16(%rsi),%rax
+ adcq %rdx,%r15
+
+ mulq %rdi
+ addq %rax,%rbx
+ movq 24(%rsi),%rax
+ adcq %rdx,%rcx
+ mulq %rdi
+ addq %rax,%r8
+ movq 32(%rsi),%rax
+ adcq %rdx,%r9
+ mulq %rdi
+ imulq $19,%rbp,%rdi
+ addq %rax,%r10
+ movq 8(%rsi),%rax
+ adcq %rdx,%r11
+ mulq %rbp
+ addq %rax,%r12
+ movq 16(%rsi),%rax
+ adcq %rdx,%r13
+ mulq %rbp
+ movq 8(%rsp),%rbp
+ addq %rax,%r14
+ movq 24(%rsi),%rax
+ adcq %rdx,%r15
+
+ mulq %rdi
+ addq %rax,%rbx
+ movq 32(%rsi),%rax
+ adcq %rdx,%rcx
+ mulq %rdi
+ addq %rax,%r8
+ movq 8(%rsi),%rax
+ adcq %rdx,%r9
+ mulq %rbp
+ imulq $19,%rbp,%rdi
+ addq %rax,%r10
+ movq 16(%rsi),%rax
+ adcq %rdx,%r11
+ mulq %rbp
+ addq %rax,%r12
+ movq 24(%rsi),%rax
+ adcq %rdx,%r13
+ mulq %rbp
+ movq 0(%rsp),%rbp
+ addq %rax,%r14
+ movq 32(%rsi),%rax
+ adcq %rdx,%r15
+
+ mulq %rdi
+ addq %rax,%rbx
+ movq 8(%rsi),%rax
+ adcq %rdx,%rcx
+ mulq %rbp
+ addq %rax,%r8
+ movq 16(%rsi),%rax
+ adcq %rdx,%r9
+ mulq %rbp
+ addq %rax,%r10
+ movq 24(%rsi),%rax
+ adcq %rdx,%r11
+ mulq %rbp
+ addq %rax,%r12
+ movq 32(%rsi),%rax
+ adcq %rdx,%r13
+ mulq %rbp
+ addq %rax,%r14
+ adcq %rdx,%r15
+
+ movq 32(%rsp),%rdi
+ jmp .Lreduce51
+.Lfe51_mul_epilogue:
+.cfi_endproc
+.size x25519_fe51_mul,.-x25519_fe51_mul
+
+.globl x25519_fe51_sqr
+.type x25519_fe51_sqr,@function
+.align 32
+x25519_fe51_sqr:
+.cfi_startproc
+ pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-16
+ pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-24
+ pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
+ pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
+ pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
+ pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
+ leaq -40(%rsp),%rsp
+.cfi_adjust_cfa_offset 40
+.Lfe51_sqr_body:
+
+ movq 0(%rsi),%rax
+ movq 16(%rsi),%r15
+ movq 32(%rsi),%rbp
+
+ movq %rdi,32(%rsp)
+ leaq (%rax,%rax,1),%r14
+ mulq %rax
+ movq %rax,%rbx
+ movq 8(%rsi),%rax
+ movq %rdx,%rcx
+ mulq %r14
+ movq %rax,%r8
+ movq %r15,%rax
+ movq %r15,0(%rsp)
+ movq %rdx,%r9
+ mulq %r14
+ movq %rax,%r10
+ movq 24(%rsi),%rax
+ movq %rdx,%r11
+ imulq $19,%rbp,%rdi
+ mulq %r14
+ movq %rax,%r12
+ movq %rbp,%rax
+ movq %rdx,%r13
+ mulq %r14
+ movq %rax,%r14
+ movq %rbp,%rax
+ movq %rdx,%r15
+
+ mulq %rdi
+ addq %rax,%r12
+ movq 8(%rsi),%rax
+ adcq %rdx,%r13
+
+ movq 24(%rsi),%rsi
+ leaq (%rax,%rax,1),%rbp
+ mulq %rax
+ addq %rax,%r10
+ movq 0(%rsp),%rax
+ adcq %rdx,%r11
+ mulq %rbp
+ addq %rax,%r12
+ movq %rbp,%rax
+ adcq %rdx,%r13
+ mulq %rsi
+ addq %rax,%r14
+ movq %rbp,%rax
+ adcq %rdx,%r15
+ imulq $19,%rsi,%rbp
+ mulq %rdi
+ addq %rax,%rbx
+ leaq (%rsi,%rsi,1),%rax
+ adcq %rdx,%rcx
+
+ mulq %rdi
+ addq %rax,%r10
+ movq %rsi,%rax
+ adcq %rdx,%r11
+ mulq %rbp
+ addq %rax,%r8
+ movq 0(%rsp),%rax
+ adcq %rdx,%r9
+
+ leaq (%rax,%rax,1),%rsi
+ mulq %rax
+ addq %rax,%r14
+ movq %rbp,%rax
+ adcq %rdx,%r15
+ mulq %rsi
+ addq %rax,%rbx
+ movq %rsi,%rax
+ adcq %rdx,%rcx
+ mulq %rdi
+ addq %rax,%r8
+ adcq %rdx,%r9
+
+ movq 32(%rsp),%rdi
+ jmp .Lreduce51
+
+.align 32
+.Lreduce51:
+ movq $0x7ffffffffffff,%rbp
+
+ movq %r10,%rdx
+ shrq $51,%r10
+ shlq $13,%r11
+ andq %rbp,%rdx
+ orq %r10,%r11
+ addq %r11,%r12
+ adcq $0,%r13
+
+ movq %rbx,%rax
+ shrq $51,%rbx
+ shlq $13,%rcx
+ andq %rbp,%rax
+ orq %rbx,%rcx
+ addq %rcx,%r8
+ adcq $0,%r9
+
+ movq %r12,%rbx
+ shrq $51,%r12
+ shlq $13,%r13
+ andq %rbp,%rbx
+ orq %r12,%r13
+ addq %r13,%r14
+ adcq $0,%r15
+
+ movq %r8,%rcx
+ shrq $51,%r8
+ shlq $13,%r9
+ andq %rbp,%rcx
+ orq %r8,%r9
+ addq %r9,%rdx
+
+ movq %r14,%r10
+ shrq $51,%r14
+ shlq $13,%r15
+ andq %rbp,%r10
+ orq %r14,%r15
+
+ leaq (%r15,%r15,8),%r14
+ leaq (%r15,%r14,2),%r15
+ addq %r15,%rax
+
+ movq %rdx,%r8
+ andq %rbp,%rdx
+ shrq $51,%r8
+ addq %r8,%rbx
+
+ movq %rax,%r9
+ andq %rbp,%rax
+ shrq $51,%r9
+ addq %r9,%rcx
+
+ movq %rax,0(%rdi)
+ movq %rcx,8(%rdi)
+ movq %rdx,16(%rdi)
+ movq %rbx,24(%rdi)
+ movq %r10,32(%rdi)
+
+ movq 40(%rsp),%r15
+.cfi_restore %r15
+ movq 48(%rsp),%r14
+.cfi_restore %r14
+ movq 56(%rsp),%r13
+.cfi_restore %r13
+ movq 64(%rsp),%r12
+.cfi_restore %r12
+ movq 72(%rsp),%rbx
+.cfi_restore %rbx
+ movq 80(%rsp),%rbp
+.cfi_restore %rbp
+ leaq 88(%rsp),%rsp
+.cfi_adjust_cfa_offset 88
+.Lfe51_sqr_epilogue:
+ .byte 0xf3,0xc3
+.cfi_endproc
+.size x25519_fe51_sqr,.-x25519_fe51_sqr
+
+.globl x25519_fe51_mul121666
+.type x25519_fe51_mul121666,@function
+.align 32
+x25519_fe51_mul121666:
+.cfi_startproc
+ pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-16
+ pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-24
+ pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
+ pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
+ pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
+ pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
+ leaq -40(%rsp),%rsp
+.cfi_adjust_cfa_offset 40
+.Lfe51_mul121666_body:
+ movl $121666,%eax
+
+ mulq 0(%rsi)
+ movq %rax,%rbx
+ movl $121666,%eax
+ movq %rdx,%rcx
+ mulq 8(%rsi)
+ movq %rax,%r8
+ movl $121666,%eax
+ movq %rdx,%r9
+ mulq 16(%rsi)
+ movq %rax,%r10
+ movl $121666,%eax
+ movq %rdx,%r11
+ mulq 24(%rsi)
+ movq %rax,%r12
+ movl $121666,%eax
+ movq %rdx,%r13
+ mulq 32(%rsi)
+ movq %rax,%r14
+ movq %rdx,%r15
+
+ jmp .Lreduce51
+.Lfe51_mul121666_epilogue:
+.cfi_endproc
+.size x25519_fe51_mul121666,.-x25519_fe51_mul121666
+
+.globl x25519_fe64_eligible
+.type x25519_fe64_eligible,@function
+.align 32
+x25519_fe64_eligible:
+ movl OPENSSL_ia32cap_P+8(%rip),%ecx
+ xorl %eax,%eax
+ andl $0x80100,%ecx
+ cmpl $0x80100,%ecx
+ cmovel %ecx,%eax
+ .byte 0xf3,0xc3
+.size x25519_fe64_eligible,.-x25519_fe64_eligible
+
+.globl x25519_fe64_mul
+.type x25519_fe64_mul,@function
+.align 32
+x25519_fe64_mul:
+.cfi_startproc
+ pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-16
+ pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-24
+ pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
+ pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
+ pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
+ pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
+ pushq %rdi
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rdi,-64
+ leaq -16(%rsp),%rsp
+.cfi_adjust_cfa_offset 16
+.Lfe64_mul_body:
+
+ movq %rdx,%rax
+ movq 0(%rdx),%rbp
+ movq 0(%rsi),%rdx
+ movq 8(%rax),%rcx
+ movq 16(%rax),%r14
+ movq 24(%rax),%r15
+
+ mulxq %rbp,%r8,%rax
+ xorl %edi,%edi
+ mulxq %rcx,%r9,%rbx
+ adcxq %rax,%r9
+ mulxq %r14,%r10,%rax
+ adcxq %rbx,%r10
+ mulxq %r15,%r11,%r12
+ movq 8(%rsi),%rdx
+ adcxq %rax,%r11
+ movq %r14,(%rsp)
+ adcxq %rdi,%r12
+
+ mulxq %rbp,%rax,%rbx
+ adoxq %rax,%r9
+ adcxq %rbx,%r10
+ mulxq %rcx,%rax,%rbx
+ adoxq %rax,%r10
+ adcxq %rbx,%r11
+ mulxq %r14,%rax,%rbx
+ adoxq %rax,%r11
+ adcxq %rbx,%r12
+ mulxq %r15,%rax,%r13
+ movq 16(%rsi),%rdx
+ adoxq %rax,%r12
+ adcxq %rdi,%r13
+ adoxq %rdi,%r13
+
+ mulxq %rbp,%rax,%rbx
+ adcxq %rax,%r10
+ adoxq %rbx,%r11
+ mulxq %rcx,%rax,%rbx
+ adcxq %rax,%r11
+ adoxq %rbx,%r12
+ mulxq %r14,%rax,%rbx
+ adcxq %rax,%r12
+ adoxq %rbx,%r13
+ mulxq %r15,%rax,%r14
+ movq 24(%rsi),%rdx
+ adcxq %rax,%r13
+ adoxq %rdi,%r14
+ adcxq %rdi,%r14
+
+ mulxq %rbp,%rax,%rbx
+ adoxq %rax,%r11
+ adcxq %rbx,%r12
+ mulxq %rcx,%rax,%rbx
+ adoxq %rax,%r12
+ adcxq %rbx,%r13
+ mulxq (%rsp),%rax,%rbx
+ adoxq %rax,%r13
+ adcxq %rbx,%r14
+ mulxq %r15,%rax,%r15
+ movl $38,%edx
+ adoxq %rax,%r14
+ adcxq %rdi,%r15
+ adoxq %rdi,%r15
+
+ jmp .Lreduce64
+.Lfe64_mul_epilogue:
+.cfi_endproc
+.size x25519_fe64_mul,.-x25519_fe64_mul
+
+.globl x25519_fe64_sqr
+.type x25519_fe64_sqr,@function
+.align 32
+x25519_fe64_sqr:
+.cfi_startproc
+ pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-16
+ pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-24
+ pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
+ pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
+ pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
+ pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
+ pushq %rdi
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rdi,-64
+ leaq -16(%rsp),%rsp
+.cfi_adjust_cfa_offset 16
+.Lfe64_sqr_body:
+
+ movq 0(%rsi),%rdx
+ movq 8(%rsi),%rcx
+ movq 16(%rsi),%rbp
+ movq 24(%rsi),%rsi
+
+
+ mulxq %rdx,%r8,%r15
+ mulxq %rcx,%r9,%rax
+ xorl %edi,%edi
+ mulxq %rbp,%r10,%rbx
+ adcxq %rax,%r10
+ mulxq %rsi,%r11,%r12
+ movq %rcx,%rdx
+ adcxq %rbx,%r11
+ adcxq %rdi,%r12
+
+
+ mulxq %rbp,%rax,%rbx
+ adoxq %rax,%r11
+ adcxq %rbx,%r12
+ mulxq %rsi,%rax,%r13
+ movq %rbp,%rdx
+ adoxq %rax,%r12
+ adcxq %rdi,%r13
+
+
+ mulxq %rsi,%rax,%r14
+ movq %rcx,%rdx
+ adoxq %rax,%r13
+ adcxq %rdi,%r14
+ adoxq %rdi,%r14
+
+ adcxq %r9,%r9
+ adoxq %r15,%r9
+ adcxq %r10,%r10
+ mulxq %rdx,%rax,%rbx
+ movq %rbp,%rdx
+ adcxq %r11,%r11
+ adoxq %rax,%r10
+ adcxq %r12,%r12
+ adoxq %rbx,%r11
+ mulxq %rdx,%rax,%rbx
+ movq %rsi,%rdx
+ adcxq %r13,%r13
+ adoxq %rax,%r12
+ adcxq %r14,%r14
+ adoxq %rbx,%r13
+ mulxq %rdx,%rax,%r15
+ movl $38,%edx
+ adoxq %rax,%r14
+ adcxq %rdi,%r15
+ adoxq %rdi,%r15
+ jmp .Lreduce64
+
+.align 32
+.Lreduce64:
+ mulxq %r12,%rax,%rbx
+ adcxq %rax,%r8
+ adoxq %rbx,%r9
+ mulxq %r13,%rax,%rbx
+ adcxq %rax,%r9
+ adoxq %rbx,%r10
+ mulxq %r14,%rax,%rbx
+ adcxq %rax,%r10
+ adoxq %rbx,%r11
+ mulxq %r15,%rax,%r12
+ adcxq %rax,%r11
+ adoxq %rdi,%r12
+ adcxq %rdi,%r12
+
+ movq 16(%rsp),%rdi
+ imulq %rdx,%r12
+
+ addq %r12,%r8
+ adcq $0,%r9
+ adcq $0,%r10
+ adcq $0,%r11
+
+ sbbq %rax,%rax
+ andq $38,%rax
+
+ addq %rax,%r8
+ movq %r9,8(%rdi)
+ movq %r10,16(%rdi)
+ movq %r11,24(%rdi)
+ movq %r8,0(%rdi)
+
+ movq 24(%rsp),%r15
+.cfi_restore %r15
+ movq 32(%rsp),%r14
+.cfi_restore %r14
+ movq 40(%rsp),%r13
+.cfi_restore %r13
+ movq 48(%rsp),%r12
+.cfi_restore %r12
+ movq 56(%rsp),%rbx
+.cfi_restore %rbx
+ movq 64(%rsp),%rbp
+.cfi_restore %rbp
+ leaq 72(%rsp),%rsp
+.cfi_adjust_cfa_offset 88
+.Lfe64_sqr_epilogue:
+ .byte 0xf3,0xc3
+.cfi_endproc
+.size x25519_fe64_sqr,.-x25519_fe64_sqr
+
+.globl x25519_fe64_mul121666
+.type x25519_fe64_mul121666,@function
+.align 32
+x25519_fe64_mul121666:
+.Lfe64_mul121666_body:
+ movl $121666,%edx
+ mulxq 0(%rsi),%r8,%rcx
+ mulxq 8(%rsi),%r9,%rax
+ addq %rcx,%r9
+ mulxq 16(%rsi),%r10,%rcx
+ adcq %rax,%r10
+ mulxq 24(%rsi),%r11,%rax
+ adcq %rcx,%r11
+ adcq $0,%rax
+
+ imulq $38,%rax,%rax
+
+ addq %rax,%r8
+ adcq $0,%r9
+ adcq $0,%r10
+ adcq $0,%r11
+
+ sbbq %rax,%rax
+ andq $38,%rax
+
+ addq %rax,%r8
+ movq %r9,8(%rdi)
+ movq %r10,16(%rdi)
+ movq %r11,24(%rdi)
+ movq %r8,0(%rdi)
+
+.Lfe64_mul121666_epilogue:
+ .byte 0xf3,0xc3
+.size x25519_fe64_mul121666,.-x25519_fe64_mul121666
+
+.globl x25519_fe64_add
+.type x25519_fe64_add,@function
+.align 32
+x25519_fe64_add:
+.Lfe64_add_body:
+ movq 0(%rsi),%r8
+ movq 8(%rsi),%r9
+ movq 16(%rsi),%r10
+ movq 24(%rsi),%r11
+
+ addq 0(%rdx),%r8
+ adcq 8(%rdx),%r9
+ adcq 16(%rdx),%r10
+ adcq 24(%rdx),%r11
+
+ sbbq %rax,%rax
+ andq $38,%rax
+
+ addq %rax,%r8
+ adcq $0,%r9
+ adcq $0,%r10
+ movq %r9,8(%rdi)
+ adcq $0,%r11
+ movq %r10,16(%rdi)
+ sbbq %rax,%rax
+ movq %r11,24(%rdi)
+ andq $38,%rax
+
+ addq %rax,%r8
+ movq %r8,0(%rdi)
+
+.Lfe64_add_epilogue:
+ .byte 0xf3,0xc3
+.size x25519_fe64_add,.-x25519_fe64_add
+
+.globl x25519_fe64_sub
+.type x25519_fe64_sub,@function
+.align 32
+x25519_fe64_sub:
+.Lfe64_sub_body:
+ movq 0(%rsi),%r8
+ movq 8(%rsi),%r9
+ movq 16(%rsi),%r10
+ movq 24(%rsi),%r11
+
+ subq 0(%rdx),%r8
+ sbbq 8(%rdx),%r9
+ sbbq 16(%rdx),%r10
+ sbbq 24(%rdx),%r11
+
+ sbbq %rax,%rax
+ andq $38,%rax
+
+ subq %rax,%r8
+ sbbq $0,%r9
+ sbbq $0,%r10
+ movq %r9,8(%rdi)
+ sbbq $0,%r11
+ movq %r10,16(%rdi)
+ sbbq %rax,%rax
+ movq %r11,24(%rdi)
+ andq $38,%rax
+
+ subq %rax,%r8
+ movq %r8,0(%rdi)
+
+.Lfe64_sub_epilogue:
+ .byte 0xf3,0xc3
+.size x25519_fe64_sub,.-x25519_fe64_sub
+
+.globl x25519_fe64_tobytes
+.type x25519_fe64_tobytes,@function
+.align 32
+x25519_fe64_tobytes:
+.Lfe64_to_body:
+ movq 0(%rsi),%r8
+ movq 8(%rsi),%r9
+ movq 16(%rsi),%r10
+ movq 24(%rsi),%r11
+
+
+ leaq (%r11,%r11,1),%rax
+ sarq $63,%r11
+ shrq $1,%rax
+ andq $19,%r11
+ addq $19,%r11
+
+ addq %r11,%r8
+ adcq $0,%r9
+ adcq $0,%r10
+ adcq $0,%rax
+
+ leaq (%rax,%rax,1),%r11
+ sarq $63,%rax
+ shrq $1,%r11
+ notq %rax
+ andq $19,%rax
+
+ subq %rax,%r8
+ sbbq $0,%r9
+ sbbq $0,%r10
+ sbbq $0,%r11
+
+ movq %r8,0(%rdi)
+ movq %r9,8(%rdi)
+ movq %r10,16(%rdi)
+ movq %r11,24(%rdi)
+
+.Lfe64_to_epilogue:
+ .byte 0xf3,0xc3
+.size x25519_fe64_tobytes,.-x25519_fe64_tobytes
+.byte 88,50,53,53,49,57,32,112,114,105,109,105,116,105,118,101,115,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
diff --git a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/include/internal/dso_conf.h b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/include/internal/dso_conf.h
index 7a52dd1f1a..18f6ac3bff 100644
--- a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/include/internal/dso_conf.h
+++ b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/include/internal/dso_conf.h
@@ -1,7 +1,7 @@
/* WARNING: do not edit! */
/* Generated by Makefile from crypto/include/internal/dso_conf.h.in */
/*
- * Copyright 2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2016-2018 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -12,5 +12,8 @@
#ifndef HEADER_DSO_CONF_H
# define HEADER_DSO_CONF_H
+# define DSO_DLFCN
+# define HAVE_DLFCN_H
# define DSO_EXTENSION ".so"
+
#endif
diff --git a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/md5/md5-x86_64.s b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/md5/md5-x86_64.s
index 0defe666bb..348ebe4962 100644
--- a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/md5/md5-x86_64.s
+++ b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/md5/md5-x86_64.s
@@ -4,11 +4,22 @@
.globl md5_block_asm_data_order
.type md5_block_asm_data_order,@function
md5_block_asm_data_order:
+.cfi_startproc
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-16
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-40
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-48
.Lprologue:
@@ -655,11 +666,18 @@ md5_block_asm_data_order:
movl %edx,12(%rbp)
movq (%rsp),%r15
+.cfi_restore %r15
movq 8(%rsp),%r14
+.cfi_restore %r14
movq 16(%rsp),%r12
+.cfi_restore %r12
movq 24(%rsp),%rbx
+.cfi_restore %rbx
movq 32(%rsp),%rbp
+.cfi_restore %rbp
addq $40,%rsp
+.cfi_adjust_cfa_offset -40
.Lepilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size md5_block_asm_data_order,.-md5_block_asm_data_order
diff --git a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/modes/aesni-gcm-x86_64.s b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/modes/aesni-gcm-x86_64.s
index 21e49925f1..6a7a9577c7 100644
--- a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/modes/aesni-gcm-x86_64.s
+++ b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/modes/aesni-gcm-x86_64.s
@@ -31,23 +31,6 @@ _aesni_ctr32_ghash_6x:
vpxor %xmm15,%xmm12,%xmm12
vmovups 16-128(%rcx),%xmm2
vpclmulqdq $0x01,%xmm3,%xmm7,%xmm6
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
xorq %r12,%r12
cmpq %r14,%r15
@@ -332,20 +315,25 @@ _aesni_ctr32_ghash_6x:
.type aesni_gcm_decrypt,@function
.align 32
aesni_gcm_decrypt:
+.cfi_startproc
xorq %r10,%r10
-
-
-
cmpq $0x60,%rdx
jb .Lgcm_dec_abort
leaq (%rsp),%rax
+.cfi_def_cfa_register %rax
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
pushq %r15
+.cfi_offset %r15,-56
vzeroupper
vmovdqu (%r8),%xmm1
@@ -374,15 +362,7 @@ aesni_gcm_decrypt:
vmovdqu 80(%rdi),%xmm7
leaq (%rdi),%r14
vmovdqu 64(%rdi),%xmm4
-
-
-
-
-
-
-
leaq -192(%rdi,%rdx,1),%r15
-
vmovdqu 48(%rdi),%xmm5
shrq $4,%rdx
xorq %r10,%r10
@@ -415,15 +395,23 @@ aesni_gcm_decrypt:
vzeroupper
movq -48(%rax),%r15
+.cfi_restore %r15
movq -40(%rax),%r14
+.cfi_restore %r14
movq -32(%rax),%r13
+.cfi_restore %r13
movq -24(%rax),%r12
+.cfi_restore %r12
movq -16(%rax),%rbp
+.cfi_restore %rbp
movq -8(%rax),%rbx
+.cfi_restore %rbx
leaq (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lgcm_dec_abort:
movq %r10,%rax
.byte 0xf3,0xc3
+.cfi_endproc
.size aesni_gcm_decrypt,.-aesni_gcm_decrypt
.type _aesni_ctr32_6x,@function
.align 32
@@ -520,21 +508,25 @@ _aesni_ctr32_6x:
.type aesni_gcm_encrypt,@function
.align 32
aesni_gcm_encrypt:
+.cfi_startproc
xorq %r10,%r10
-
-
-
-
cmpq $288,%rdx
jb .Lgcm_enc_abort
leaq (%rsp),%rax
+.cfi_def_cfa_register %rax
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
pushq %r15
+.cfi_offset %r15,-56
vzeroupper
vmovdqu (%r8),%xmm1
@@ -558,16 +550,7 @@ aesni_gcm_encrypt:
.Lenc_no_key_aliasing:
leaq (%rsi),%r14
-
-
-
-
-
-
-
-
leaq -192(%rsi,%rdx,1),%r15
-
shrq $4,%rdx
call _aesni_ctr32_6x
@@ -769,15 +752,23 @@ aesni_gcm_encrypt:
vzeroupper
movq -48(%rax),%r15
+.cfi_restore %r15
movq -40(%rax),%r14
+.cfi_restore %r14
movq -32(%rax),%r13
+.cfi_restore %r13
movq -24(%rax),%r12
+.cfi_restore %r12
movq -16(%rax),%rbp
+.cfi_restore %rbp
movq -8(%rax),%rbx
+.cfi_restore %rbx
leaq (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lgcm_enc_abort:
movq %r10,%rax
.byte 0xf3,0xc3
+.cfi_endproc
.size aesni_gcm_encrypt,.-aesni_gcm_encrypt
.align 64
.Lbswap_mask:
diff --git a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/modes/ghash-x86_64.s b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/modes/ghash-x86_64.s
index 0116ef1c94..29c297f04b 100644
--- a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/modes/ghash-x86_64.s
+++ b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/modes/ghash-x86_64.s
@@ -5,9 +5,27 @@
.type gcm_gmult_4bit,@function
.align 16
gcm_gmult_4bit:
+.cfi_startproc
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
+ pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
+ pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
+ pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
+ subq $280,%rsp
+.cfi_adjust_cfa_offset 280
.Lgmult_prologue:
movzbq 15(%rdi),%r8
@@ -84,22 +102,41 @@ gcm_gmult_4bit:
movq %r8,8(%rdi)
movq %r9,(%rdi)
- movq 16(%rsp),%rbx
- leaq 24(%rsp),%rsp
+ leaq 280+48(%rsp),%rsi
+.cfi_def_cfa %rsi,8
+ movq -8(%rsi),%rbx
+.cfi_restore %rbx
+ leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lgmult_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size gcm_gmult_4bit,.-gcm_gmult_4bit
.globl gcm_ghash_4bit
.type gcm_ghash_4bit,@function
.align 16
gcm_ghash_4bit:
+.cfi_startproc
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
subq $280,%rsp
+.cfi_adjust_cfa_offset 280
.Lghash_prologue:
movq %rdx,%r14
movq %rcx,%r15
@@ -644,16 +681,25 @@ gcm_ghash_4bit:
movq %r8,8(%rdi)
movq %r9,(%rdi)
- leaq 280(%rsp),%rsi
- movq 0(%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+ leaq 280+48(%rsp),%rsi
+.cfi_def_cfa %rsi,8
+ movq -48(%rsi),%r15
+.cfi_restore %r15
+ movq -40(%rsi),%r14
+.cfi_restore %r14
+ movq -32(%rsi),%r13
+.cfi_restore %r13
+ movq -24(%rsi),%r12
+.cfi_restore %r12
+ movq -16(%rsi),%rbp
+.cfi_restore %rbp
+ movq -8(%rsi),%rbx
+.cfi_restore %rbx
+ leaq 0(%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lghash_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size gcm_ghash_4bit,.-gcm_ghash_4bit
.globl gcm_init_clmul
.type gcm_init_clmul,@function
diff --git a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/poly1305/poly1305-x86_64.s b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/poly1305/poly1305-x86_64.s
index 8b2e361ea1..deb4f74bfb 100644
--- a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/poly1305/poly1305-x86_64.s
+++ b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/poly1305/poly1305-x86_64.s
@@ -31,6 +31,11 @@ poly1305_init:
leaq poly1305_blocks_avx2(%rip),%rax
btq $37,%r9
cmovcq %rax,%r10
+ movq $2149646336,%rax
+ shrq $32,%r9
+ andq %rax,%r9
+ cmpq %rax,%r9
+ je .Linit_base2_44
movq $0x0ffffffc0fffffff,%rax
movq $0x0ffffffc0ffffffc,%rcx
andq 0(%rsi),%rax
@@ -47,16 +52,29 @@ poly1305_init:
.type poly1305_blocks,@function
.align 32
poly1305_blocks:
+.cfi_startproc
.Lblocks:
shrq $4,%rdx
jz .Lno_data
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
.Lblocks_body:
movq %rdx,%r15
@@ -127,15 +145,23 @@ poly1305_blocks:
movq %rbp,16(%rdi)
movq 0(%rsp),%r15
+.cfi_restore %r15
movq 8(%rsp),%r14
+.cfi_restore %r14
movq 16(%rsp),%r13
+.cfi_restore %r13
movq 24(%rsp),%r12
+.cfi_restore %r12
movq 32(%rsp),%rbp
+.cfi_restore %rbp
movq 40(%rsp),%rbx
+.cfi_restore %rbx
leaq 48(%rsp),%rsp
+.cfi_adjust_cfa_offset -48
.Lno_data:
.Lblocks_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size poly1305_blocks,.-poly1305_blocks
.type poly1305_emit,@function
@@ -371,6 +397,7 @@ __poly1305_init_avx:
.type poly1305_blocks_avx,@function
.align 32
poly1305_blocks_avx:
+.cfi_startproc
movl 20(%rdi),%r8d
cmpq $128,%rdx
jae .Lblocks_avx
@@ -390,11 +417,23 @@ poly1305_blocks_avx:
jz .Leven_avx
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
.Lblocks_avx_body:
movq %rdx,%r15
@@ -497,24 +536,45 @@ poly1305_blocks_avx:
.align 16
.Ldone_avx:
movq 0(%rsp),%r15
+.cfi_restore %r15
movq 8(%rsp),%r14
+.cfi_restore %r14
movq 16(%rsp),%r13
+.cfi_restore %r13
movq 24(%rsp),%r12
+.cfi_restore %r12
movq 32(%rsp),%rbp
+.cfi_restore %rbp
movq 40(%rsp),%rbx
+.cfi_restore %rbx
leaq 48(%rsp),%rsp
+.cfi_adjust_cfa_offset -48
.Lno_data_avx:
.Lblocks_avx_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.align 32
.Lbase2_64_avx:
+.cfi_startproc
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
.Lbase2_64_avx_body:
movq %rdx,%r15
@@ -574,18 +634,27 @@ poly1305_blocks_avx:
movq %r15,%rdx
movq 0(%rsp),%r15
+.cfi_restore %r15
movq 8(%rsp),%r14
+.cfi_restore %r14
movq 16(%rsp),%r13
+.cfi_restore %r13
movq 24(%rsp),%r12
+.cfi_restore %r12
movq 32(%rsp),%rbp
+.cfi_restore %rbp
movq 40(%rsp),%rbx
+.cfi_restore %rbx
leaq 48(%rsp),%rax
leaq 48(%rsp),%rsp
+.cfi_adjust_cfa_offset -48
.Lbase2_64_avx_epilogue:
jmp .Ldo_avx
+.cfi_endproc
.align 32
.Leven_avx:
+.cfi_startproc
vmovd 0(%rdi),%xmm0
vmovd 4(%rdi),%xmm1
vmovd 8(%rdi),%xmm2
@@ -594,6 +663,7 @@ poly1305_blocks_avx:
.Ldo_avx:
leaq -88(%rsp),%r11
+.cfi_def_cfa %r11,0x60
subq $0x178,%rsp
subq $64,%rdx
leaq -32(%rsi),%rax
@@ -1153,8 +1223,10 @@ poly1305_blocks_avx:
vmovd %xmm13,-100(%rdi)
vmovd %xmm14,-96(%rdi)
leaq 88(%r11),%rsp
+.cfi_def_cfa %rsp,8
vzeroupper
.byte 0xf3,0xc3
+.cfi_endproc
.size poly1305_blocks_avx,.-poly1305_blocks_avx
.type poly1305_emit_avx,@function
@@ -1214,6 +1286,7 @@ poly1305_emit_avx:
.type poly1305_blocks_avx2,@function
.align 32
poly1305_blocks_avx2:
+.cfi_startproc
movl 20(%rdi),%r8d
cmpq $128,%rdx
jae .Lblocks_avx2
@@ -1233,11 +1306,23 @@ poly1305_blocks_avx2:
jz .Leven_avx2
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
.Lblocks_avx2_body:
movq %rdx,%r15
@@ -1346,24 +1431,45 @@ poly1305_blocks_avx2:
.align 16
.Ldone_avx2:
movq 0(%rsp),%r15
+.cfi_restore %r15
movq 8(%rsp),%r14
+.cfi_restore %r14
movq 16(%rsp),%r13
+.cfi_restore %r13
movq 24(%rsp),%r12
+.cfi_restore %r12
movq 32(%rsp),%rbp
+.cfi_restore %rbp
movq 40(%rsp),%rbx
+.cfi_restore %rbx
leaq 48(%rsp),%rsp
+.cfi_adjust_cfa_offset -48
.Lno_data_avx2:
.Lblocks_avx2_epilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.align 32
.Lbase2_64_avx2:
+.cfi_startproc
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
.Lbase2_64_avx2_body:
movq %rdx,%r15
@@ -1426,20 +1532,32 @@ poly1305_blocks_avx2:
.Lproceed_avx2:
movq %r15,%rdx
+ movl OPENSSL_ia32cap_P+8(%rip),%r10d
+ movl $3221291008,%r11d
movq 0(%rsp),%r15
+.cfi_restore %r15
movq 8(%rsp),%r14
+.cfi_restore %r14
movq 16(%rsp),%r13
+.cfi_restore %r13
movq 24(%rsp),%r12
+.cfi_restore %r12
movq 32(%rsp),%rbp
+.cfi_restore %rbp
movq 40(%rsp),%rbx
+.cfi_restore %rbx
leaq 48(%rsp),%rax
leaq 48(%rsp),%rsp
+.cfi_adjust_cfa_offset -48
.Lbase2_64_avx2_epilogue:
jmp .Ldo_avx2
+.cfi_endproc
.align 32
.Leven_avx2:
+.cfi_startproc
+ movl OPENSSL_ia32cap_P+8(%rip),%r10d
vmovd 0(%rdi),%xmm0
vmovd 4(%rdi),%xmm1
vmovd 8(%rdi),%xmm2
@@ -1447,10 +1565,18 @@ poly1305_blocks_avx2:
vmovd 16(%rdi),%xmm4
.Ldo_avx2:
+ cmpq $512,%rdx
+ jb .Lskip_avx512
+ andl %r11d,%r10d
+ testl $65536,%r10d
+ jnz .Lblocks_avx512
+.Lskip_avx512:
leaq -8(%rsp),%r11
+.cfi_def_cfa %r11,16
subq $0x128,%rsp
- leaq 48+64(%rdi),%rdi
leaq .Lconst(%rip),%rcx
+ leaq 48+64(%rdi),%rdi
+ vmovdqa 96(%rcx),%ymm7
vmovdqu -64(%rdi),%xmm9
@@ -1460,36 +1586,28 @@ poly1305_blocks_avx2:
vmovdqu -16(%rdi),%xmm11
vmovdqu 0(%rdi),%xmm12
vmovdqu 16(%rdi),%xmm13
+ leaq 144(%rsp),%rax
vmovdqu 32(%rdi),%xmm14
- vpermq $0x15,%ymm9,%ymm9
+ vpermd %ymm9,%ymm7,%ymm9
vmovdqu 48(%rdi),%xmm15
- vpermq $0x15,%ymm10,%ymm10
- vpshufd $0xc8,%ymm9,%ymm9
+ vpermd %ymm10,%ymm7,%ymm10
vmovdqu 64(%rdi),%xmm5
- vpermq $0x15,%ymm6,%ymm6
- vpshufd $0xc8,%ymm10,%ymm10
+ vpermd %ymm6,%ymm7,%ymm6
vmovdqa %ymm9,0(%rsp)
- vpermq $0x15,%ymm11,%ymm11
- vpshufd $0xc8,%ymm6,%ymm6
- vmovdqa %ymm10,32(%rsp)
- vpermq $0x15,%ymm12,%ymm12
- vpshufd $0xc8,%ymm11,%ymm11
- vmovdqa %ymm6,64(%rsp)
- vpermq $0x15,%ymm13,%ymm13
- vpshufd $0xc8,%ymm12,%ymm12
- vmovdqa %ymm11,96(%rsp)
- vpermq $0x15,%ymm14,%ymm14
- vpshufd $0xc8,%ymm13,%ymm13
- vmovdqa %ymm12,128(%rsp)
- vpermq $0x15,%ymm15,%ymm15
- vpshufd $0xc8,%ymm14,%ymm14
- vmovdqa %ymm13,160(%rsp)
- vpermq $0x15,%ymm5,%ymm5
- vpshufd $0xc8,%ymm15,%ymm15
- vmovdqa %ymm14,192(%rsp)
- vpshufd $0xc8,%ymm5,%ymm5
- vmovdqa %ymm15,224(%rsp)
- vmovdqa %ymm5,256(%rsp)
+ vpermd %ymm11,%ymm7,%ymm11
+ vmovdqa %ymm10,32-144(%rax)
+ vpermd %ymm12,%ymm7,%ymm12
+ vmovdqa %ymm6,64-144(%rax)
+ vpermd %ymm13,%ymm7,%ymm13
+ vmovdqa %ymm11,96-144(%rax)
+ vpermd %ymm14,%ymm7,%ymm14
+ vmovdqa %ymm12,128-144(%rax)
+ vpermd %ymm15,%ymm7,%ymm15
+ vmovdqa %ymm13,160-144(%rax)
+ vpermd %ymm5,%ymm7,%ymm5
+ vmovdqa %ymm14,192-144(%rax)
+ vmovdqa %ymm15,224-144(%rax)
+ vmovdqa %ymm5,256-144(%rax)
vmovdqa 64(%rcx),%ymm5
@@ -1516,7 +1634,6 @@ poly1305_blocks_avx2:
vpand %ymm5,%ymm10,%ymm10
vpor 32(%rcx),%ymm6,%ymm6
- leaq 144(%rsp),%rax
vpaddq %ymm2,%ymm9,%ymm2
subq $64,%rdx
jz .Ltail_avx2
@@ -1811,9 +1928,1506 @@ poly1305_blocks_avx2:
vmovd %xmm3,-100(%rdi)
vmovd %xmm4,-96(%rdi)
leaq 8(%r11),%rsp
+.cfi_def_cfa %rsp,8
vzeroupper
.byte 0xf3,0xc3
+.cfi_endproc
.size poly1305_blocks_avx2,.-poly1305_blocks_avx2
+.type poly1305_blocks_avx512,@function
+.align 32
+poly1305_blocks_avx512:
+.cfi_startproc
+.Lblocks_avx512:
+ movl $15,%eax
+ kmovw %eax,%k2
+ leaq -8(%rsp),%r11
+.cfi_def_cfa %r11,16
+ subq $0x128,%rsp
+ leaq .Lconst(%rip),%rcx
+ leaq 48+64(%rdi),%rdi
+ vmovdqa 96(%rcx),%ymm9
+
+
+ vmovdqu -64(%rdi),%xmm11
+ andq $-512,%rsp
+ vmovdqu -48(%rdi),%xmm12
+ movq $0x20,%rax
+ vmovdqu -32(%rdi),%xmm7
+ vmovdqu -16(%rdi),%xmm13
+ vmovdqu 0(%rdi),%xmm8
+ vmovdqu 16(%rdi),%xmm14
+ vmovdqu 32(%rdi),%xmm10
+ vmovdqu 48(%rdi),%xmm15
+ vmovdqu 64(%rdi),%xmm6
+ vpermd %zmm11,%zmm9,%zmm16
+ vpbroadcastq 64(%rcx),%zmm5
+ vpermd %zmm12,%zmm9,%zmm17
+ vpermd %zmm7,%zmm9,%zmm21
+ vpermd %zmm13,%zmm9,%zmm18
+ vmovdqa64 %zmm16,0(%rsp){%k2}
+ vpsrlq $32,%zmm16,%zmm7
+ vpermd %zmm8,%zmm9,%zmm22
+ vmovdqu64 %zmm17,0(%rsp,%rax,1){%k2}
+ vpsrlq $32,%zmm17,%zmm8
+ vpermd %zmm14,%zmm9,%zmm19
+ vmovdqa64 %zmm21,64(%rsp){%k2}
+ vpermd %zmm10,%zmm9,%zmm23
+ vpermd %zmm15,%zmm9,%zmm20
+ vmovdqu64 %zmm18,64(%rsp,%rax,1){%k2}
+ vpermd %zmm6,%zmm9,%zmm24
+ vmovdqa64 %zmm22,128(%rsp){%k2}
+ vmovdqu64 %zmm19,128(%rsp,%rax,1){%k2}
+ vmovdqa64 %zmm23,192(%rsp){%k2}
+ vmovdqu64 %zmm20,192(%rsp,%rax,1){%k2}
+ vmovdqa64 %zmm24,256(%rsp){%k2}
+
+
+
+
+
+
+
+
+
+
+ vpmuludq %zmm7,%zmm16,%zmm11
+ vpmuludq %zmm7,%zmm17,%zmm12
+ vpmuludq %zmm7,%zmm18,%zmm13
+ vpmuludq %zmm7,%zmm19,%zmm14
+ vpmuludq %zmm7,%zmm20,%zmm15
+ vpsrlq $32,%zmm18,%zmm9
+
+ vpmuludq %zmm8,%zmm24,%zmm25
+ vpmuludq %zmm8,%zmm16,%zmm26
+ vpmuludq %zmm8,%zmm17,%zmm27
+ vpmuludq %zmm8,%zmm18,%zmm28
+ vpmuludq %zmm8,%zmm19,%zmm29
+ vpsrlq $32,%zmm19,%zmm10
+ vpaddq %zmm25,%zmm11,%zmm11
+ vpaddq %zmm26,%zmm12,%zmm12
+ vpaddq %zmm27,%zmm13,%zmm13
+ vpaddq %zmm28,%zmm14,%zmm14
+ vpaddq %zmm29,%zmm15,%zmm15
+
+ vpmuludq %zmm9,%zmm23,%zmm25
+ vpmuludq %zmm9,%zmm24,%zmm26
+ vpmuludq %zmm9,%zmm17,%zmm28
+ vpmuludq %zmm9,%zmm18,%zmm29
+ vpmuludq %zmm9,%zmm16,%zmm27
+ vpsrlq $32,%zmm20,%zmm6
+ vpaddq %zmm25,%zmm11,%zmm11
+ vpaddq %zmm26,%zmm12,%zmm12
+ vpaddq %zmm28,%zmm14,%zmm14
+ vpaddq %zmm29,%zmm15,%zmm15
+ vpaddq %zmm27,%zmm13,%zmm13
+
+ vpmuludq %zmm10,%zmm22,%zmm25
+ vpmuludq %zmm10,%zmm16,%zmm28
+ vpmuludq %zmm10,%zmm17,%zmm29
+ vpmuludq %zmm10,%zmm23,%zmm26
+ vpmuludq %zmm10,%zmm24,%zmm27
+ vpaddq %zmm25,%zmm11,%zmm11
+ vpaddq %zmm28,%zmm14,%zmm14
+ vpaddq %zmm29,%zmm15,%zmm15
+ vpaddq %zmm26,%zmm12,%zmm12
+ vpaddq %zmm27,%zmm13,%zmm13
+
+ vpmuludq %zmm6,%zmm24,%zmm28
+ vpmuludq %zmm6,%zmm16,%zmm29
+ vpmuludq %zmm6,%zmm21,%zmm25
+ vpmuludq %zmm6,%zmm22,%zmm26
+ vpmuludq %zmm6,%zmm23,%zmm27
+ vpaddq %zmm28,%zmm14,%zmm14
+ vpaddq %zmm29,%zmm15,%zmm15
+ vpaddq %zmm25,%zmm11,%zmm11
+ vpaddq %zmm26,%zmm12,%zmm12
+ vpaddq %zmm27,%zmm13,%zmm13
+
+
+
+ vmovdqu64 0(%rsi),%zmm10
+ vmovdqu64 64(%rsi),%zmm6
+ leaq 128(%rsi),%rsi
+
+
+
+
+ vpsrlq $26,%zmm14,%zmm28
+ vpandq %zmm5,%zmm14,%zmm14
+ vpaddq %zmm28,%zmm15,%zmm15
+
+ vpsrlq $26,%zmm11,%zmm25
+ vpandq %zmm5,%zmm11,%zmm11
+ vpaddq %zmm25,%zmm12,%zmm12
+
+ vpsrlq $26,%zmm15,%zmm29
+ vpandq %zmm5,%zmm15,%zmm15
+
+ vpsrlq $26,%zmm12,%zmm26
+ vpandq %zmm5,%zmm12,%zmm12
+ vpaddq %zmm26,%zmm13,%zmm13
+
+ vpaddq %zmm29,%zmm11,%zmm11
+ vpsllq $2,%zmm29,%zmm29
+ vpaddq %zmm29,%zmm11,%zmm11
+
+ vpsrlq $26,%zmm13,%zmm27
+ vpandq %zmm5,%zmm13,%zmm13
+ vpaddq %zmm27,%zmm14,%zmm14
+
+ vpsrlq $26,%zmm11,%zmm25
+ vpandq %zmm5,%zmm11,%zmm11
+ vpaddq %zmm25,%zmm12,%zmm12
+
+ vpsrlq $26,%zmm14,%zmm28
+ vpandq %zmm5,%zmm14,%zmm14
+ vpaddq %zmm28,%zmm15,%zmm15
+
+
+
+
+
+ vpunpcklqdq %zmm6,%zmm10,%zmm7
+ vpunpckhqdq %zmm6,%zmm10,%zmm6
+
+
+
+
+
+
+ vmovdqa32 128(%rcx),%zmm25
+ movl $0x7777,%eax
+ kmovw %eax,%k1
+
+ vpermd %zmm16,%zmm25,%zmm16
+ vpermd %zmm17,%zmm25,%zmm17
+ vpermd %zmm18,%zmm25,%zmm18
+ vpermd %zmm19,%zmm25,%zmm19
+ vpermd %zmm20,%zmm25,%zmm20
+
+ vpermd %zmm11,%zmm25,%zmm16{%k1}
+ vpermd %zmm12,%zmm25,%zmm17{%k1}
+ vpermd %zmm13,%zmm25,%zmm18{%k1}
+ vpermd %zmm14,%zmm25,%zmm19{%k1}
+ vpermd %zmm15,%zmm25,%zmm20{%k1}
+
+ vpslld $2,%zmm17,%zmm21
+ vpslld $2,%zmm18,%zmm22
+ vpslld $2,%zmm19,%zmm23
+ vpslld $2,%zmm20,%zmm24
+ vpaddd %zmm17,%zmm21,%zmm21
+ vpaddd %zmm18,%zmm22,%zmm22
+ vpaddd %zmm19,%zmm23,%zmm23
+ vpaddd %zmm20,%zmm24,%zmm24
+
+ vpbroadcastq 32(%rcx),%zmm30
+
+ vpsrlq $52,%zmm7,%zmm9
+ vpsllq $12,%zmm6,%zmm10
+ vporq %zmm10,%zmm9,%zmm9
+ vpsrlq $26,%zmm7,%zmm8
+ vpsrlq $14,%zmm6,%zmm10
+ vpsrlq $40,%zmm6,%zmm6
+ vpandq %zmm5,%zmm9,%zmm9
+ vpandq %zmm5,%zmm7,%zmm7
+
+
+
+
+ vpaddq %zmm2,%zmm9,%zmm2
+ subq $192,%rdx
+ jbe .Ltail_avx512
+ jmp .Loop_avx512
+
+.align 32
+.Loop_avx512:
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ vpmuludq %zmm2,%zmm17,%zmm14
+ vpaddq %zmm0,%zmm7,%zmm0
+ vpmuludq %zmm2,%zmm18,%zmm15
+ vpandq %zmm5,%zmm8,%zmm8
+ vpmuludq %zmm2,%zmm23,%zmm11
+ vpandq %zmm5,%zmm10,%zmm10
+ vpmuludq %zmm2,%zmm24,%zmm12
+ vporq %zmm30,%zmm6,%zmm6
+ vpmuludq %zmm2,%zmm16,%zmm13
+ vpaddq %zmm1,%zmm8,%zmm1
+ vpaddq %zmm3,%zmm10,%zmm3
+ vpaddq %zmm4,%zmm6,%zmm4
+
+ vmovdqu64 0(%rsi),%zmm10
+ vmovdqu64 64(%rsi),%zmm6
+ leaq 128(%rsi),%rsi
+ vpmuludq %zmm0,%zmm19,%zmm28
+ vpmuludq %zmm0,%zmm20,%zmm29
+ vpmuludq %zmm0,%zmm16,%zmm25
+ vpmuludq %zmm0,%zmm17,%zmm26
+ vpaddq %zmm28,%zmm14,%zmm14
+ vpaddq %zmm29,%zmm15,%zmm15
+ vpaddq %zmm25,%zmm11,%zmm11
+ vpaddq %zmm26,%zmm12,%zmm12
+
+ vpmuludq %zmm1,%zmm18,%zmm28
+ vpmuludq %zmm1,%zmm19,%zmm29
+ vpmuludq %zmm1,%zmm24,%zmm25
+ vpmuludq %zmm0,%zmm18,%zmm27
+ vpaddq %zmm28,%zmm14,%zmm14
+ vpaddq %zmm29,%zmm15,%zmm15
+ vpaddq %zmm25,%zmm11,%zmm11
+ vpaddq %zmm27,%zmm13,%zmm13
+
+ vpunpcklqdq %zmm6,%zmm10,%zmm7
+ vpunpckhqdq %zmm6,%zmm10,%zmm6
+
+ vpmuludq %zmm3,%zmm16,%zmm28
+ vpmuludq %zmm3,%zmm17,%zmm29
+ vpmuludq %zmm1,%zmm16,%zmm26
+ vpmuludq %zmm1,%zmm17,%zmm27
+ vpaddq %zmm28,%zmm14,%zmm14
+ vpaddq %zmm29,%zmm15,%zmm15
+ vpaddq %zmm26,%zmm12,%zmm12
+ vpaddq %zmm27,%zmm13,%zmm13
+
+ vpmuludq %zmm4,%zmm24,%zmm28
+ vpmuludq %zmm4,%zmm16,%zmm29
+ vpmuludq %zmm3,%zmm22,%zmm25
+ vpmuludq %zmm3,%zmm23,%zmm26
+ vpaddq %zmm28,%zmm14,%zmm14
+ vpmuludq %zmm3,%zmm24,%zmm27
+ vpaddq %zmm29,%zmm15,%zmm15
+ vpaddq %zmm25,%zmm11,%zmm11
+ vpaddq %zmm26,%zmm12,%zmm12
+ vpaddq %zmm27,%zmm13,%zmm13
+
+ vpmuludq %zmm4,%zmm21,%zmm25
+ vpmuludq %zmm4,%zmm22,%zmm26
+ vpmuludq %zmm4,%zmm23,%zmm27
+ vpaddq %zmm25,%zmm11,%zmm0
+ vpaddq %zmm26,%zmm12,%zmm1
+ vpaddq %zmm27,%zmm13,%zmm2
+
+
+
+
+ vpsrlq $52,%zmm7,%zmm9
+ vpsllq $12,%zmm6,%zmm10
+
+ vpsrlq $26,%zmm14,%zmm3
+ vpandq %zmm5,%zmm14,%zmm14
+ vpaddq %zmm3,%zmm15,%zmm4
+
+ vporq %zmm10,%zmm9,%zmm9
+
+ vpsrlq $26,%zmm0,%zmm11
+ vpandq %zmm5,%zmm0,%zmm0
+ vpaddq %zmm11,%zmm1,%zmm1
+
+ vpandq %zmm5,%zmm9,%zmm9
+
+ vpsrlq $26,%zmm4,%zmm15
+ vpandq %zmm5,%zmm4,%zmm4
+
+ vpsrlq $26,%zmm1,%zmm12
+ vpandq %zmm5,%zmm1,%zmm1
+ vpaddq %zmm12,%zmm2,%zmm2
+
+ vpaddq %zmm15,%zmm0,%zmm0
+ vpsllq $2,%zmm15,%zmm15
+ vpaddq %zmm15,%zmm0,%zmm0
+
+ vpaddq %zmm9,%zmm2,%zmm2
+ vpsrlq $26,%zmm7,%zmm8
+
+ vpsrlq $26,%zmm2,%zmm13
+ vpandq %zmm5,%zmm2,%zmm2
+ vpaddq %zmm13,%zmm14,%zmm3
+
+ vpsrlq $14,%zmm6,%zmm10
+
+ vpsrlq $26,%zmm0,%zmm11
+ vpandq %zmm5,%zmm0,%zmm0
+ vpaddq %zmm11,%zmm1,%zmm1
+
+ vpsrlq $40,%zmm6,%zmm6
+
+ vpsrlq $26,%zmm3,%zmm14
+ vpandq %zmm5,%zmm3,%zmm3
+ vpaddq %zmm14,%zmm4,%zmm4
+
+ vpandq %zmm5,%zmm7,%zmm7
+
+
+
+
+ subq $128,%rdx
+ ja .Loop_avx512
+
+.Ltail_avx512:
+
+
+
+
+
+ vpsrlq $32,%zmm16,%zmm16
+ vpsrlq $32,%zmm17,%zmm17
+ vpsrlq $32,%zmm18,%zmm18
+ vpsrlq $32,%zmm23,%zmm23
+ vpsrlq $32,%zmm24,%zmm24
+ vpsrlq $32,%zmm19,%zmm19
+ vpsrlq $32,%zmm20,%zmm20
+ vpsrlq $32,%zmm21,%zmm21
+ vpsrlq $32,%zmm22,%zmm22
+
+
+
+ leaq (%rsi,%rdx,1),%rsi
+
+
+ vpaddq %zmm0,%zmm7,%zmm0
+
+ vpmuludq %zmm2,%zmm17,%zmm14
+ vpmuludq %zmm2,%zmm18,%zmm15
+ vpmuludq %zmm2,%zmm23,%zmm11
+ vpandq %zmm5,%zmm8,%zmm8
+ vpmuludq %zmm2,%zmm24,%zmm12
+ vpandq %zmm5,%zmm10,%zmm10
+ vpmuludq %zmm2,%zmm16,%zmm13
+ vporq %zmm30,%zmm6,%zmm6
+ vpaddq %zmm1,%zmm8,%zmm1
+ vpaddq %zmm3,%zmm10,%zmm3
+ vpaddq %zmm4,%zmm6,%zmm4
+
+ vmovdqu 0(%rsi),%xmm7
+ vpmuludq %zmm0,%zmm19,%zmm28
+ vpmuludq %zmm0,%zmm20,%zmm29
+ vpmuludq %zmm0,%zmm16,%zmm25
+ vpmuludq %zmm0,%zmm17,%zmm26
+ vpaddq %zmm28,%zmm14,%zmm14
+ vpaddq %zmm29,%zmm15,%zmm15
+ vpaddq %zmm25,%zmm11,%zmm11
+ vpaddq %zmm26,%zmm12,%zmm12
+
+ vmovdqu 16(%rsi),%xmm8
+ vpmuludq %zmm1,%zmm18,%zmm28
+ vpmuludq %zmm1,%zmm19,%zmm29
+ vpmuludq %zmm1,%zmm24,%zmm25
+ vpmuludq %zmm0,%zmm18,%zmm27
+ vpaddq %zmm28,%zmm14,%zmm14
+ vpaddq %zmm29,%zmm15,%zmm15
+ vpaddq %zmm25,%zmm11,%zmm11
+ vpaddq %zmm27,%zmm13,%zmm13
+
+ vinserti128 $1,32(%rsi),%ymm7,%ymm7
+ vpmuludq %zmm3,%zmm16,%zmm28
+ vpmuludq %zmm3,%zmm17,%zmm29
+ vpmuludq %zmm1,%zmm16,%zmm26
+ vpmuludq %zmm1,%zmm17,%zmm27
+ vpaddq %zmm28,%zmm14,%zmm14
+ vpaddq %zmm29,%zmm15,%zmm15
+ vpaddq %zmm26,%zmm12,%zmm12
+ vpaddq %zmm27,%zmm13,%zmm13
+
+ vinserti128 $1,48(%rsi),%ymm8,%ymm8
+ vpmuludq %zmm4,%zmm24,%zmm28
+ vpmuludq %zmm4,%zmm16,%zmm29
+ vpmuludq %zmm3,%zmm22,%zmm25
+ vpmuludq %zmm3,%zmm23,%zmm26
+ vpmuludq %zmm3,%zmm24,%zmm27
+ vpaddq %zmm28,%zmm14,%zmm3
+ vpaddq %zmm29,%zmm15,%zmm15
+ vpaddq %zmm25,%zmm11,%zmm11
+ vpaddq %zmm26,%zmm12,%zmm12
+ vpaddq %zmm27,%zmm13,%zmm13
+
+ vpmuludq %zmm4,%zmm21,%zmm25
+ vpmuludq %zmm4,%zmm22,%zmm26
+ vpmuludq %zmm4,%zmm23,%zmm27
+ vpaddq %zmm25,%zmm11,%zmm0
+ vpaddq %zmm26,%zmm12,%zmm1
+ vpaddq %zmm27,%zmm13,%zmm2
+
+
+
+
+ movl $1,%eax
+ vpermq $0xb1,%zmm3,%zmm14
+ vpermq $0xb1,%zmm15,%zmm4
+ vpermq $0xb1,%zmm0,%zmm11
+ vpermq $0xb1,%zmm1,%zmm12
+ vpermq $0xb1,%zmm2,%zmm13
+ vpaddq %zmm14,%zmm3,%zmm3
+ vpaddq %zmm15,%zmm4,%zmm4
+ vpaddq %zmm11,%zmm0,%zmm0
+ vpaddq %zmm12,%zmm1,%zmm1
+ vpaddq %zmm13,%zmm2,%zmm2
+
+ kmovw %eax,%k3
+ vpermq $0x2,%zmm3,%zmm14
+ vpermq $0x2,%zmm4,%zmm15
+ vpermq $0x2,%zmm0,%zmm11
+ vpermq $0x2,%zmm1,%zmm12
+ vpermq $0x2,%zmm2,%zmm13
+ vpaddq %zmm14,%zmm3,%zmm3
+ vpaddq %zmm15,%zmm4,%zmm4
+ vpaddq %zmm11,%zmm0,%zmm0
+ vpaddq %zmm12,%zmm1,%zmm1
+ vpaddq %zmm13,%zmm2,%zmm2
+
+ vextracti64x4 $0x1,%zmm3,%ymm14
+ vextracti64x4 $0x1,%zmm4,%ymm15
+ vextracti64x4 $0x1,%zmm0,%ymm11
+ vextracti64x4 $0x1,%zmm1,%ymm12
+ vextracti64x4 $0x1,%zmm2,%ymm13
+ vpaddq %zmm14,%zmm3,%zmm3{%k3}{z}
+ vpaddq %zmm15,%zmm4,%zmm4{%k3}{z}
+ vpaddq %zmm11,%zmm0,%zmm0{%k3}{z}
+ vpaddq %zmm12,%zmm1,%zmm1{%k3}{z}
+ vpaddq %zmm13,%zmm2,%zmm2{%k3}{z}
+
+
+
+ vpsrlq $26,%ymm3,%ymm14
+ vpand %ymm5,%ymm3,%ymm3
+ vpsrldq $6,%ymm7,%ymm9
+ vpsrldq $6,%ymm8,%ymm10
+ vpunpckhqdq %ymm8,%ymm7,%ymm6
+ vpaddq %ymm14,%ymm4,%ymm4
+
+ vpsrlq $26,%ymm0,%ymm11
+ vpand %ymm5,%ymm0,%ymm0
+ vpunpcklqdq %ymm10,%ymm9,%ymm9
+ vpunpcklqdq %ymm8,%ymm7,%ymm7
+ vpaddq %ymm11,%ymm1,%ymm1
+
+ vpsrlq $26,%ymm4,%ymm15
+ vpand %ymm5,%ymm4,%ymm4
+
+ vpsrlq $26,%ymm1,%ymm12
+ vpand %ymm5,%ymm1,%ymm1
+ vpsrlq $30,%ymm9,%ymm10
+ vpsrlq $4,%ymm9,%ymm9
+ vpaddq %ymm12,%ymm2,%ymm2
+
+ vpaddq %ymm15,%ymm0,%ymm0
+ vpsllq $2,%ymm15,%ymm15
+ vpsrlq $26,%ymm7,%ymm8
+ vpsrlq $40,%ymm6,%ymm6
+ vpaddq %ymm15,%ymm0,%ymm0
+
+ vpsrlq $26,%ymm2,%ymm13
+ vpand %ymm5,%ymm2,%ymm2
+ vpand %ymm5,%ymm9,%ymm9
+ vpand %ymm5,%ymm7,%ymm7
+ vpaddq %ymm13,%ymm3,%ymm3
+
+ vpsrlq $26,%ymm0,%ymm11
+ vpand %ymm5,%ymm0,%ymm0
+ vpaddq %ymm2,%ymm9,%ymm2
+ vpand %ymm5,%ymm8,%ymm8
+ vpaddq %ymm11,%ymm1,%ymm1
+
+ vpsrlq $26,%ymm3,%ymm14
+ vpand %ymm5,%ymm3,%ymm3
+ vpand %ymm5,%ymm10,%ymm10
+ vpor 32(%rcx),%ymm6,%ymm6
+ vpaddq %ymm14,%ymm4,%ymm4
+
+ leaq 144(%rsp),%rax
+ addq $64,%rdx
+ jnz .Ltail_avx2
+
+ vpsubq %ymm9,%ymm2,%ymm2
+ vmovd %xmm0,-112(%rdi)
+ vmovd %xmm1,-108(%rdi)
+ vmovd %xmm2,-104(%rdi)
+ vmovd %xmm3,-100(%rdi)
+ vmovd %xmm4,-96(%rdi)
+ vzeroall
+ leaq 8(%r11),%rsp
+.cfi_def_cfa %rsp,8
+ .byte 0xf3,0xc3
+.cfi_endproc
+.size poly1305_blocks_avx512,.-poly1305_blocks_avx512
+.type poly1305_init_base2_44,@function
+.align 32
+poly1305_init_base2_44:
+ xorq %rax,%rax
+ movq %rax,0(%rdi)
+ movq %rax,8(%rdi)
+ movq %rax,16(%rdi)
+
+.Linit_base2_44:
+ leaq poly1305_blocks_vpmadd52(%rip),%r10
+ leaq poly1305_emit_base2_44(%rip),%r11
+
+ movq $0x0ffffffc0fffffff,%rax
+ movq $0x0ffffffc0ffffffc,%rcx
+ andq 0(%rsi),%rax
+ movq $0x00000fffffffffff,%r8
+ andq 8(%rsi),%rcx
+ movq $0x00000fffffffffff,%r9
+ andq %rax,%r8
+ shrdq $44,%rcx,%rax
+ movq %r8,40(%rdi)
+ andq %r9,%rax
+ shrq $24,%rcx
+ movq %rax,48(%rdi)
+ leaq (%rax,%rax,4),%rax
+ movq %rcx,56(%rdi)
+ shlq $2,%rax
+ leaq (%rcx,%rcx,4),%rcx
+ shlq $2,%rcx
+ movq %rax,24(%rdi)
+ movq %rcx,32(%rdi)
+ movq $-1,64(%rdi)
+ movq %r10,0(%rdx)
+ movq %r11,8(%rdx)
+ movl $1,%eax
+ .byte 0xf3,0xc3
+.size poly1305_init_base2_44,.-poly1305_init_base2_44
+.type poly1305_blocks_vpmadd52,@function
+.align 32
+poly1305_blocks_vpmadd52:
+ shrq $4,%rdx
+ jz .Lno_data_vpmadd52
+
+ shlq $40,%rcx
+ movq 64(%rdi),%r8
+
+
+
+
+
+
+ movq $3,%rax
+ movq $1,%r10
+ cmpq $4,%rdx
+ cmovaeq %r10,%rax
+ testq %r8,%r8
+ cmovnsq %r10,%rax
+
+ andq %rdx,%rax
+ jz .Lblocks_vpmadd52_4x
+
+ subq %rax,%rdx
+ movl $7,%r10d
+ movl $1,%r11d
+ kmovw %r10d,%k7
+ leaq .L2_44_inp_permd(%rip),%r10
+ kmovw %r11d,%k1
+
+ vmovq %rcx,%xmm21
+ vmovdqa64 0(%r10),%ymm19
+ vmovdqa64 32(%r10),%ymm20
+ vpermq $0xcf,%ymm21,%ymm21
+ vmovdqa64 64(%r10),%ymm22
+
+ vmovdqu64 0(%rdi),%ymm16{%k7}{z}
+ vmovdqu64 40(%rdi),%ymm3{%k7}{z}
+ vmovdqu64 32(%rdi),%ymm4{%k7}{z}
+ vmovdqu64 24(%rdi),%ymm5{%k7}{z}
+
+ vmovdqa64 96(%r10),%ymm23
+ vmovdqa64 128(%r10),%ymm24
+
+ jmp .Loop_vpmadd52
+
+.align 32
+.Loop_vpmadd52:
+ vmovdqu32 0(%rsi),%xmm18
+ leaq 16(%rsi),%rsi
+
+ vpermd %ymm18,%ymm19,%ymm18
+ vpsrlvq %ymm20,%ymm18,%ymm18
+ vpandq %ymm22,%ymm18,%ymm18
+ vporq %ymm21,%ymm18,%ymm18
+
+ vpaddq %ymm18,%ymm16,%ymm16
+
+ vpermq $0,%ymm16,%ymm0{%k7}{z}
+ vpermq $85,%ymm16,%ymm1{%k7}{z}
+ vpermq $170,%ymm16,%ymm2{%k7}{z}
+
+ vpxord %ymm16,%ymm16,%ymm16
+ vpxord %ymm17,%ymm17,%ymm17
+
+ vpmadd52luq %ymm3,%ymm0,%ymm16
+ vpmadd52huq %ymm3,%ymm0,%ymm17
+
+ vpmadd52luq %ymm4,%ymm1,%ymm16
+ vpmadd52huq %ymm4,%ymm1,%ymm17
+
+ vpmadd52luq %ymm5,%ymm2,%ymm16
+ vpmadd52huq %ymm5,%ymm2,%ymm17
+
+ vpsrlvq %ymm23,%ymm16,%ymm18
+ vpsllvq %ymm24,%ymm17,%ymm17
+ vpandq %ymm22,%ymm16,%ymm16
+
+ vpaddq %ymm18,%ymm17,%ymm17
+
+ vpermq $147,%ymm17,%ymm17
+
+ vpaddq %ymm17,%ymm16,%ymm16
+
+ vpsrlvq %ymm23,%ymm16,%ymm18
+ vpandq %ymm22,%ymm16,%ymm16
+
+ vpermq $147,%ymm18,%ymm18
+
+ vpaddq %ymm18,%ymm16,%ymm16
+
+ vpermq $147,%ymm16,%ymm18{%k1}{z}
+
+ vpaddq %ymm18,%ymm16,%ymm16
+ vpsllq $2,%ymm18,%ymm18
+
+ vpaddq %ymm18,%ymm16,%ymm16
+
+ decq %rax
+ jnz .Loop_vpmadd52
+
+ vmovdqu64 %ymm16,0(%rdi){%k7}
+
+ testq %rdx,%rdx
+ jnz .Lblocks_vpmadd52_4x
+
+.Lno_data_vpmadd52:
+ .byte 0xf3,0xc3
+.size poly1305_blocks_vpmadd52,.-poly1305_blocks_vpmadd52
+.type poly1305_blocks_vpmadd52_4x,@function
+.align 32
+poly1305_blocks_vpmadd52_4x:
+ shrq $4,%rdx
+ jz .Lno_data_vpmadd52_4x
+
+ shlq $40,%rcx
+ movq 64(%rdi),%r8
+
+.Lblocks_vpmadd52_4x:
+ vpbroadcastq %rcx,%ymm31
+
+ vmovdqa64 .Lx_mask44(%rip),%ymm28
+ movl $5,%eax
+ vmovdqa64 .Lx_mask42(%rip),%ymm29
+ kmovw %eax,%k1
+
+ testq %r8,%r8
+ js .Linit_vpmadd52
+
+ vmovq 0(%rdi),%xmm0
+ vmovq 8(%rdi),%xmm1
+ vmovq 16(%rdi),%xmm2
+
+ testq $3,%rdx
+ jnz .Lblocks_vpmadd52_2x_do
+
+.Lblocks_vpmadd52_4x_do:
+ vpbroadcastq 64(%rdi),%ymm3
+ vpbroadcastq 96(%rdi),%ymm4
+ vpbroadcastq 128(%rdi),%ymm5
+ vpbroadcastq 160(%rdi),%ymm16
+
+.Lblocks_vpmadd52_4x_key_loaded:
+ vpsllq $2,%ymm5,%ymm17
+ vpaddq %ymm5,%ymm17,%ymm17
+ vpsllq $2,%ymm17,%ymm17
+
+ testq $7,%rdx
+ jz .Lblocks_vpmadd52_8x
+
+ vmovdqu64 0(%rsi),%ymm26
+ vmovdqu64 32(%rsi),%ymm27
+ leaq 64(%rsi),%rsi
+
+ vpunpcklqdq %ymm27,%ymm26,%ymm25
+ vpunpckhqdq %ymm27,%ymm26,%ymm27
+
+
+
+ vpsrlq $24,%ymm27,%ymm26
+ vporq %ymm31,%ymm26,%ymm26
+ vpaddq %ymm26,%ymm2,%ymm2
+ vpandq %ymm28,%ymm25,%ymm24
+ vpsrlq $44,%ymm25,%ymm25
+ vpsllq $20,%ymm27,%ymm27
+ vporq %ymm27,%ymm25,%ymm25
+ vpandq %ymm28,%ymm25,%ymm25
+
+ subq $4,%rdx
+ jz .Ltail_vpmadd52_4x
+ jmp .Loop_vpmadd52_4x
+ ud2
+
+.align 32
+.Linit_vpmadd52:
+ vmovq 24(%rdi),%xmm16
+ vmovq 56(%rdi),%xmm2
+ vmovq 32(%rdi),%xmm17
+ vmovq 40(%rdi),%xmm3
+ vmovq 48(%rdi),%xmm4
+
+ vmovdqa %ymm3,%ymm0
+ vmovdqa %ymm4,%ymm1
+ vmovdqa %ymm2,%ymm5
+
+ movl $2,%eax
+
+.Lmul_init_vpmadd52:
+ vpxorq %ymm18,%ymm18,%ymm18
+ vpmadd52luq %ymm2,%ymm16,%ymm18
+ vpxorq %ymm19,%ymm19,%ymm19
+ vpmadd52huq %ymm2,%ymm16,%ymm19
+ vpxorq %ymm20,%ymm20,%ymm20
+ vpmadd52luq %ymm2,%ymm17,%ymm20
+ vpxorq %ymm21,%ymm21,%ymm21
+ vpmadd52huq %ymm2,%ymm17,%ymm21
+ vpxorq %ymm22,%ymm22,%ymm22
+ vpmadd52luq %ymm2,%ymm3,%ymm22
+ vpxorq %ymm23,%ymm23,%ymm23
+ vpmadd52huq %ymm2,%ymm3,%ymm23
+
+ vpmadd52luq %ymm0,%ymm3,%ymm18
+ vpmadd52huq %ymm0,%ymm3,%ymm19
+ vpmadd52luq %ymm0,%ymm4,%ymm20
+ vpmadd52huq %ymm0,%ymm4,%ymm21
+ vpmadd52luq %ymm0,%ymm5,%ymm22
+ vpmadd52huq %ymm0,%ymm5,%ymm23
+
+ vpmadd52luq %ymm1,%ymm17,%ymm18
+ vpmadd52huq %ymm1,%ymm17,%ymm19
+ vpmadd52luq %ymm1,%ymm3,%ymm20
+ vpmadd52huq %ymm1,%ymm3,%ymm21
+ vpmadd52luq %ymm1,%ymm4,%ymm22
+ vpmadd52huq %ymm1,%ymm4,%ymm23
+
+
+
+ vpsrlq $44,%ymm18,%ymm30
+ vpsllq $8,%ymm19,%ymm19
+ vpandq %ymm28,%ymm18,%ymm0
+ vpaddq %ymm30,%ymm19,%ymm19
+
+ vpaddq %ymm19,%ymm20,%ymm20
+
+ vpsrlq $44,%ymm20,%ymm30
+ vpsllq $8,%ymm21,%ymm21
+ vpandq %ymm28,%ymm20,%ymm1
+ vpaddq %ymm30,%ymm21,%ymm21
+
+ vpaddq %ymm21,%ymm22,%ymm22
+
+ vpsrlq $42,%ymm22,%ymm30
+ vpsllq $10,%ymm23,%ymm23
+ vpandq %ymm29,%ymm22,%ymm2
+ vpaddq %ymm30,%ymm23,%ymm23
+
+ vpaddq %ymm23,%ymm0,%ymm0
+ vpsllq $2,%ymm23,%ymm23
+
+ vpaddq %ymm23,%ymm0,%ymm0
+
+ vpsrlq $44,%ymm0,%ymm30
+ vpandq %ymm28,%ymm0,%ymm0
+
+ vpaddq %ymm30,%ymm1,%ymm1
+
+ decl %eax
+ jz .Ldone_init_vpmadd52
+
+ vpunpcklqdq %ymm4,%ymm1,%ymm4
+ vpbroadcastq %xmm1,%xmm1
+ vpunpcklqdq %ymm5,%ymm2,%ymm5
+ vpbroadcastq %xmm2,%xmm2
+ vpunpcklqdq %ymm3,%ymm0,%ymm3
+ vpbroadcastq %xmm0,%xmm0
+
+ vpsllq $2,%ymm4,%ymm16
+ vpsllq $2,%ymm5,%ymm17
+ vpaddq %ymm4,%ymm16,%ymm16
+ vpaddq %ymm5,%ymm17,%ymm17
+ vpsllq $2,%ymm16,%ymm16
+ vpsllq $2,%ymm17,%ymm17
+
+ jmp .Lmul_init_vpmadd52
+ ud2
+
+.align 32
+.Ldone_init_vpmadd52:
+ vinserti128 $1,%xmm4,%ymm1,%ymm4
+ vinserti128 $1,%xmm5,%ymm2,%ymm5
+ vinserti128 $1,%xmm3,%ymm0,%ymm3
+
+ vpermq $216,%ymm4,%ymm4
+ vpermq $216,%ymm5,%ymm5
+ vpermq $216,%ymm3,%ymm3
+
+ vpsllq $2,%ymm4,%ymm16
+ vpaddq %ymm4,%ymm16,%ymm16
+ vpsllq $2,%ymm16,%ymm16
+
+ vmovq 0(%rdi),%xmm0
+ vmovq 8(%rdi),%xmm1
+ vmovq 16(%rdi),%xmm2
+
+ testq $3,%rdx
+ jnz .Ldone_init_vpmadd52_2x
+
+ vmovdqu64 %ymm3,64(%rdi)
+ vpbroadcastq %xmm3,%ymm3
+ vmovdqu64 %ymm4,96(%rdi)
+ vpbroadcastq %xmm4,%ymm4
+ vmovdqu64 %ymm5,128(%rdi)
+ vpbroadcastq %xmm5,%ymm5
+ vmovdqu64 %ymm16,160(%rdi)
+ vpbroadcastq %xmm16,%ymm16
+
+ jmp .Lblocks_vpmadd52_4x_key_loaded
+ ud2
+
+.align 32
+.Ldone_init_vpmadd52_2x:
+ vmovdqu64 %ymm3,64(%rdi)
+ vpsrldq $8,%ymm3,%ymm3
+ vmovdqu64 %ymm4,96(%rdi)
+ vpsrldq $8,%ymm4,%ymm4
+ vmovdqu64 %ymm5,128(%rdi)
+ vpsrldq $8,%ymm5,%ymm5
+ vmovdqu64 %ymm16,160(%rdi)
+ vpsrldq $8,%ymm16,%ymm16
+ jmp .Lblocks_vpmadd52_2x_key_loaded
+ ud2
+
+.align 32
+.Lblocks_vpmadd52_2x_do:
+ vmovdqu64 128+8(%rdi),%ymm5{%k1}{z}
+ vmovdqu64 160+8(%rdi),%ymm16{%k1}{z}
+ vmovdqu64 64+8(%rdi),%ymm3{%k1}{z}
+ vmovdqu64 96+8(%rdi),%ymm4{%k1}{z}
+
+.Lblocks_vpmadd52_2x_key_loaded:
+ vmovdqu64 0(%rsi),%ymm26
+ vpxorq %ymm27,%ymm27,%ymm27
+ leaq 32(%rsi),%rsi
+
+ vpunpcklqdq %ymm27,%ymm26,%ymm25
+ vpunpckhqdq %ymm27,%ymm26,%ymm27
+
+
+
+ vpsrlq $24,%ymm27,%ymm26
+ vporq %ymm31,%ymm26,%ymm26
+ vpaddq %ymm26,%ymm2,%ymm2
+ vpandq %ymm28,%ymm25,%ymm24
+ vpsrlq $44,%ymm25,%ymm25
+ vpsllq $20,%ymm27,%ymm27
+ vporq %ymm27,%ymm25,%ymm25
+ vpandq %ymm28,%ymm25,%ymm25
+
+ jmp .Ltail_vpmadd52_2x
+ ud2
+
+.align 32
+.Loop_vpmadd52_4x:
+
+ vpaddq %ymm24,%ymm0,%ymm0
+ vpaddq %ymm25,%ymm1,%ymm1
+
+ vpxorq %ymm18,%ymm18,%ymm18
+ vpmadd52luq %ymm2,%ymm16,%ymm18
+ vpxorq %ymm19,%ymm19,%ymm19
+ vpmadd52huq %ymm2,%ymm16,%ymm19
+ vpxorq %ymm20,%ymm20,%ymm20
+ vpmadd52luq %ymm2,%ymm17,%ymm20
+ vpxorq %ymm21,%ymm21,%ymm21
+ vpmadd52huq %ymm2,%ymm17,%ymm21
+ vpxorq %ymm22,%ymm22,%ymm22
+ vpmadd52luq %ymm2,%ymm3,%ymm22
+ vpxorq %ymm23,%ymm23,%ymm23
+ vpmadd52huq %ymm2,%ymm3,%ymm23
+
+ vmovdqu64 0(%rsi),%ymm26
+ vmovdqu64 32(%rsi),%ymm27
+ leaq 64(%rsi),%rsi
+ vpmadd52luq %ymm0,%ymm3,%ymm18
+ vpmadd52huq %ymm0,%ymm3,%ymm19
+ vpmadd52luq %ymm0,%ymm4,%ymm20
+ vpmadd52huq %ymm0,%ymm4,%ymm21
+ vpmadd52luq %ymm0,%ymm5,%ymm22
+ vpmadd52huq %ymm0,%ymm5,%ymm23
+
+ vpunpcklqdq %ymm27,%ymm26,%ymm25
+ vpunpckhqdq %ymm27,%ymm26,%ymm27
+ vpmadd52luq %ymm1,%ymm17,%ymm18
+ vpmadd52huq %ymm1,%ymm17,%ymm19
+ vpmadd52luq %ymm1,%ymm3,%ymm20
+ vpmadd52huq %ymm1,%ymm3,%ymm21
+ vpmadd52luq %ymm1,%ymm4,%ymm22
+ vpmadd52huq %ymm1,%ymm4,%ymm23
+
+
+
+ vpsrlq $44,%ymm18,%ymm30
+ vpsllq $8,%ymm19,%ymm19
+ vpandq %ymm28,%ymm18,%ymm0
+ vpaddq %ymm30,%ymm19,%ymm19
+
+ vpsrlq $24,%ymm27,%ymm26
+ vporq %ymm31,%ymm26,%ymm26
+ vpaddq %ymm19,%ymm20,%ymm20
+
+ vpsrlq $44,%ymm20,%ymm30
+ vpsllq $8,%ymm21,%ymm21
+ vpandq %ymm28,%ymm20,%ymm1
+ vpaddq %ymm30,%ymm21,%ymm21
+
+ vpandq %ymm28,%ymm25,%ymm24
+ vpsrlq $44,%ymm25,%ymm25
+ vpsllq $20,%ymm27,%ymm27
+ vpaddq %ymm21,%ymm22,%ymm22
+
+ vpsrlq $42,%ymm22,%ymm30
+ vpsllq $10,%ymm23,%ymm23
+ vpandq %ymm29,%ymm22,%ymm2
+ vpaddq %ymm30,%ymm23,%ymm23
+
+ vpaddq %ymm26,%ymm2,%ymm2
+ vpaddq %ymm23,%ymm0,%ymm0
+ vpsllq $2,%ymm23,%ymm23
+
+ vpaddq %ymm23,%ymm0,%ymm0
+ vporq %ymm27,%ymm25,%ymm25
+ vpandq %ymm28,%ymm25,%ymm25
+
+ vpsrlq $44,%ymm0,%ymm30
+ vpandq %ymm28,%ymm0,%ymm0
+
+ vpaddq %ymm30,%ymm1,%ymm1
+
+ subq $4,%rdx
+ jnz .Loop_vpmadd52_4x
+
+.Ltail_vpmadd52_4x:
+ vmovdqu64 128(%rdi),%ymm5
+ vmovdqu64 160(%rdi),%ymm16
+ vmovdqu64 64(%rdi),%ymm3
+ vmovdqu64 96(%rdi),%ymm4
+
+.Ltail_vpmadd52_2x:
+ vpsllq $2,%ymm5,%ymm17
+ vpaddq %ymm5,%ymm17,%ymm17
+ vpsllq $2,%ymm17,%ymm17
+
+
+ vpaddq %ymm24,%ymm0,%ymm0
+ vpaddq %ymm25,%ymm1,%ymm1
+
+ vpxorq %ymm18,%ymm18,%ymm18
+ vpmadd52luq %ymm2,%ymm16,%ymm18
+ vpxorq %ymm19,%ymm19,%ymm19
+ vpmadd52huq %ymm2,%ymm16,%ymm19
+ vpxorq %ymm20,%ymm20,%ymm20
+ vpmadd52luq %ymm2,%ymm17,%ymm20
+ vpxorq %ymm21,%ymm21,%ymm21
+ vpmadd52huq %ymm2,%ymm17,%ymm21
+ vpxorq %ymm22,%ymm22,%ymm22
+ vpmadd52luq %ymm2,%ymm3,%ymm22
+ vpxorq %ymm23,%ymm23,%ymm23
+ vpmadd52huq %ymm2,%ymm3,%ymm23
+
+ vpmadd52luq %ymm0,%ymm3,%ymm18
+ vpmadd52huq %ymm0,%ymm3,%ymm19
+ vpmadd52luq %ymm0,%ymm4,%ymm20
+ vpmadd52huq %ymm0,%ymm4,%ymm21
+ vpmadd52luq %ymm0,%ymm5,%ymm22
+ vpmadd52huq %ymm0,%ymm5,%ymm23
+
+ vpmadd52luq %ymm1,%ymm17,%ymm18
+ vpmadd52huq %ymm1,%ymm17,%ymm19
+ vpmadd52luq %ymm1,%ymm3,%ymm20
+ vpmadd52huq %ymm1,%ymm3,%ymm21
+ vpmadd52luq %ymm1,%ymm4,%ymm22
+ vpmadd52huq %ymm1,%ymm4,%ymm23
+
+
+
+
+ movl $1,%eax
+ kmovw %eax,%k1
+ vpsrldq $8,%ymm18,%ymm24
+ vpsrldq $8,%ymm19,%ymm0
+ vpsrldq $8,%ymm20,%ymm25
+ vpsrldq $8,%ymm21,%ymm1
+ vpaddq %ymm24,%ymm18,%ymm18
+ vpaddq %ymm0,%ymm19,%ymm19
+ vpsrldq $8,%ymm22,%ymm26
+ vpsrldq $8,%ymm23,%ymm2
+ vpaddq %ymm25,%ymm20,%ymm20
+ vpaddq %ymm1,%ymm21,%ymm21
+ vpermq $0x2,%ymm18,%ymm24
+ vpermq $0x2,%ymm19,%ymm0
+ vpaddq %ymm26,%ymm22,%ymm22
+ vpaddq %ymm2,%ymm23,%ymm23
+
+ vpermq $0x2,%ymm20,%ymm25
+ vpermq $0x2,%ymm21,%ymm1
+ vpaddq %ymm24,%ymm18,%ymm18{%k1}{z}
+ vpaddq %ymm0,%ymm19,%ymm19{%k1}{z}
+ vpermq $0x2,%ymm22,%ymm26
+ vpermq $0x2,%ymm23,%ymm2
+ vpaddq %ymm25,%ymm20,%ymm20{%k1}{z}
+ vpaddq %ymm1,%ymm21,%ymm21{%k1}{z}
+ vpaddq %ymm26,%ymm22,%ymm22{%k1}{z}
+ vpaddq %ymm2,%ymm23,%ymm23{%k1}{z}
+
+
+
+ vpsrlq $44,%ymm18,%ymm30
+ vpsllq $8,%ymm19,%ymm19
+ vpandq %ymm28,%ymm18,%ymm0
+ vpaddq %ymm30,%ymm19,%ymm19
+
+ vpaddq %ymm19,%ymm20,%ymm20
+
+ vpsrlq $44,%ymm20,%ymm30
+ vpsllq $8,%ymm21,%ymm21
+ vpandq %ymm28,%ymm20,%ymm1
+ vpaddq %ymm30,%ymm21,%ymm21
+
+ vpaddq %ymm21,%ymm22,%ymm22
+
+ vpsrlq $42,%ymm22,%ymm30
+ vpsllq $10,%ymm23,%ymm23
+ vpandq %ymm29,%ymm22,%ymm2
+ vpaddq %ymm30,%ymm23,%ymm23
+
+ vpaddq %ymm23,%ymm0,%ymm0
+ vpsllq $2,%ymm23,%ymm23
+
+ vpaddq %ymm23,%ymm0,%ymm0
+
+ vpsrlq $44,%ymm0,%ymm30
+ vpandq %ymm28,%ymm0,%ymm0
+
+ vpaddq %ymm30,%ymm1,%ymm1
+
+
+ subq $2,%rdx
+ ja .Lblocks_vpmadd52_4x_do
+
+ vmovq %xmm0,0(%rdi)
+ vmovq %xmm1,8(%rdi)
+ vmovq %xmm2,16(%rdi)
+ vzeroall
+
+.Lno_data_vpmadd52_4x:
+ .byte 0xf3,0xc3
+.size poly1305_blocks_vpmadd52_4x,.-poly1305_blocks_vpmadd52_4x
+.type poly1305_blocks_vpmadd52_8x,@function
+.align 32
+poly1305_blocks_vpmadd52_8x:
+ shrq $4,%rdx
+ jz .Lno_data_vpmadd52_8x
+
+ shlq $40,%rcx
+ movq 64(%rdi),%r8
+
+ vmovdqa64 .Lx_mask44(%rip),%ymm28
+ vmovdqa64 .Lx_mask42(%rip),%ymm29
+
+ testq %r8,%r8
+ js .Linit_vpmadd52
+
+ vmovq 0(%rdi),%xmm0
+ vmovq 8(%rdi),%xmm1
+ vmovq 16(%rdi),%xmm2
+
+.Lblocks_vpmadd52_8x:
+
+
+
+ vmovdqu64 128(%rdi),%ymm5
+ vmovdqu64 160(%rdi),%ymm16
+ vmovdqu64 64(%rdi),%ymm3
+ vmovdqu64 96(%rdi),%ymm4
+
+ vpsllq $2,%ymm5,%ymm17
+ vpaddq %ymm5,%ymm17,%ymm17
+ vpsllq $2,%ymm17,%ymm17
+
+ vpbroadcastq %xmm5,%ymm8
+ vpbroadcastq %xmm3,%ymm6
+ vpbroadcastq %xmm4,%ymm7
+
+ vpxorq %ymm18,%ymm18,%ymm18
+ vpmadd52luq %ymm8,%ymm16,%ymm18
+ vpxorq %ymm19,%ymm19,%ymm19
+ vpmadd52huq %ymm8,%ymm16,%ymm19
+ vpxorq %ymm20,%ymm20,%ymm20
+ vpmadd52luq %ymm8,%ymm17,%ymm20
+ vpxorq %ymm21,%ymm21,%ymm21
+ vpmadd52huq %ymm8,%ymm17,%ymm21
+ vpxorq %ymm22,%ymm22,%ymm22
+ vpmadd52luq %ymm8,%ymm3,%ymm22
+ vpxorq %ymm23,%ymm23,%ymm23
+ vpmadd52huq %ymm8,%ymm3,%ymm23
+
+ vpmadd52luq %ymm6,%ymm3,%ymm18
+ vpmadd52huq %ymm6,%ymm3,%ymm19
+ vpmadd52luq %ymm6,%ymm4,%ymm20
+ vpmadd52huq %ymm6,%ymm4,%ymm21
+ vpmadd52luq %ymm6,%ymm5,%ymm22
+ vpmadd52huq %ymm6,%ymm5,%ymm23
+
+ vpmadd52luq %ymm7,%ymm17,%ymm18
+ vpmadd52huq %ymm7,%ymm17,%ymm19
+ vpmadd52luq %ymm7,%ymm3,%ymm20
+ vpmadd52huq %ymm7,%ymm3,%ymm21
+ vpmadd52luq %ymm7,%ymm4,%ymm22
+ vpmadd52huq %ymm7,%ymm4,%ymm23
+
+
+
+ vpsrlq $44,%ymm18,%ymm30
+ vpsllq $8,%ymm19,%ymm19
+ vpandq %ymm28,%ymm18,%ymm6
+ vpaddq %ymm30,%ymm19,%ymm19
+
+ vpaddq %ymm19,%ymm20,%ymm20
+
+ vpsrlq $44,%ymm20,%ymm30
+ vpsllq $8,%ymm21,%ymm21
+ vpandq %ymm28,%ymm20,%ymm7
+ vpaddq %ymm30,%ymm21,%ymm21
+
+ vpaddq %ymm21,%ymm22,%ymm22
+
+ vpsrlq $42,%ymm22,%ymm30
+ vpsllq $10,%ymm23,%ymm23
+ vpandq %ymm29,%ymm22,%ymm8
+ vpaddq %ymm30,%ymm23,%ymm23
+
+ vpaddq %ymm23,%ymm6,%ymm6
+ vpsllq $2,%ymm23,%ymm23
+
+ vpaddq %ymm23,%ymm6,%ymm6
+
+ vpsrlq $44,%ymm6,%ymm30
+ vpandq %ymm28,%ymm6,%ymm6
+
+ vpaddq %ymm30,%ymm7,%ymm7
+
+
+
+
+
+ vpunpcklqdq %ymm5,%ymm8,%ymm26
+ vpunpckhqdq %ymm5,%ymm8,%ymm5
+ vpunpcklqdq %ymm3,%ymm6,%ymm24
+ vpunpckhqdq %ymm3,%ymm6,%ymm3
+ vpunpcklqdq %ymm4,%ymm7,%ymm25
+ vpunpckhqdq %ymm4,%ymm7,%ymm4
+ vshufi64x2 $0x44,%zmm5,%zmm26,%zmm8
+ vshufi64x2 $0x44,%zmm3,%zmm24,%zmm6
+ vshufi64x2 $0x44,%zmm4,%zmm25,%zmm7
+
+ vmovdqu64 0(%rsi),%zmm26
+ vmovdqu64 64(%rsi),%zmm27
+ leaq 128(%rsi),%rsi
+
+ vpsllq $2,%zmm8,%zmm10
+ vpsllq $2,%zmm7,%zmm9
+ vpaddq %zmm8,%zmm10,%zmm10
+ vpaddq %zmm7,%zmm9,%zmm9
+ vpsllq $2,%zmm10,%zmm10
+ vpsllq $2,%zmm9,%zmm9
+
+ vpbroadcastq %rcx,%zmm31
+ vpbroadcastq %xmm28,%zmm28
+ vpbroadcastq %xmm29,%zmm29
+
+ vpbroadcastq %xmm9,%zmm16
+ vpbroadcastq %xmm10,%zmm17
+ vpbroadcastq %xmm6,%zmm3
+ vpbroadcastq %xmm7,%zmm4
+ vpbroadcastq %xmm8,%zmm5
+
+ vpunpcklqdq %zmm27,%zmm26,%zmm25
+ vpunpckhqdq %zmm27,%zmm26,%zmm27
+
+
+
+ vpsrlq $24,%zmm27,%zmm26
+ vporq %zmm31,%zmm26,%zmm26
+ vpaddq %zmm26,%zmm2,%zmm2
+ vpandq %zmm28,%zmm25,%zmm24
+ vpsrlq $44,%zmm25,%zmm25
+ vpsllq $20,%zmm27,%zmm27
+ vporq %zmm27,%zmm25,%zmm25
+ vpandq %zmm28,%zmm25,%zmm25
+
+ subq $8,%rdx
+ jz .Ltail_vpmadd52_8x
+ jmp .Loop_vpmadd52_8x
+
+.align 32
+.Loop_vpmadd52_8x:
+
+ vpaddq %zmm24,%zmm0,%zmm0
+ vpaddq %zmm25,%zmm1,%zmm1
+
+ vpxorq %zmm18,%zmm18,%zmm18
+ vpmadd52luq %zmm2,%zmm16,%zmm18
+ vpxorq %zmm19,%zmm19,%zmm19
+ vpmadd52huq %zmm2,%zmm16,%zmm19
+ vpxorq %zmm20,%zmm20,%zmm20
+ vpmadd52luq %zmm2,%zmm17,%zmm20
+ vpxorq %zmm21,%zmm21,%zmm21
+ vpmadd52huq %zmm2,%zmm17,%zmm21
+ vpxorq %zmm22,%zmm22,%zmm22
+ vpmadd52luq %zmm2,%zmm3,%zmm22
+ vpxorq %zmm23,%zmm23,%zmm23
+ vpmadd52huq %zmm2,%zmm3,%zmm23
+
+ vmovdqu64 0(%rsi),%zmm26
+ vmovdqu64 64(%rsi),%zmm27
+ leaq 128(%rsi),%rsi
+ vpmadd52luq %zmm0,%zmm3,%zmm18
+ vpmadd52huq %zmm0,%zmm3,%zmm19
+ vpmadd52luq %zmm0,%zmm4,%zmm20
+ vpmadd52huq %zmm0,%zmm4,%zmm21
+ vpmadd52luq %zmm0,%zmm5,%zmm22
+ vpmadd52huq %zmm0,%zmm5,%zmm23
+
+ vpunpcklqdq %zmm27,%zmm26,%zmm25
+ vpunpckhqdq %zmm27,%zmm26,%zmm27
+ vpmadd52luq %zmm1,%zmm17,%zmm18
+ vpmadd52huq %zmm1,%zmm17,%zmm19
+ vpmadd52luq %zmm1,%zmm3,%zmm20
+ vpmadd52huq %zmm1,%zmm3,%zmm21
+ vpmadd52luq %zmm1,%zmm4,%zmm22
+ vpmadd52huq %zmm1,%zmm4,%zmm23
+
+
+
+ vpsrlq $44,%zmm18,%zmm30
+ vpsllq $8,%zmm19,%zmm19
+ vpandq %zmm28,%zmm18,%zmm0
+ vpaddq %zmm30,%zmm19,%zmm19
+
+ vpsrlq $24,%zmm27,%zmm26
+ vporq %zmm31,%zmm26,%zmm26
+ vpaddq %zmm19,%zmm20,%zmm20
+
+ vpsrlq $44,%zmm20,%zmm30
+ vpsllq $8,%zmm21,%zmm21
+ vpandq %zmm28,%zmm20,%zmm1
+ vpaddq %zmm30,%zmm21,%zmm21
+
+ vpandq %zmm28,%zmm25,%zmm24
+ vpsrlq $44,%zmm25,%zmm25
+ vpsllq $20,%zmm27,%zmm27
+ vpaddq %zmm21,%zmm22,%zmm22
+
+ vpsrlq $42,%zmm22,%zmm30
+ vpsllq $10,%zmm23,%zmm23
+ vpandq %zmm29,%zmm22,%zmm2
+ vpaddq %zmm30,%zmm23,%zmm23
+
+ vpaddq %zmm26,%zmm2,%zmm2
+ vpaddq %zmm23,%zmm0,%zmm0
+ vpsllq $2,%zmm23,%zmm23
+
+ vpaddq %zmm23,%zmm0,%zmm0
+ vporq %zmm27,%zmm25,%zmm25
+ vpandq %zmm28,%zmm25,%zmm25
+
+ vpsrlq $44,%zmm0,%zmm30
+ vpandq %zmm28,%zmm0,%zmm0
+
+ vpaddq %zmm30,%zmm1,%zmm1
+
+ subq $8,%rdx
+ jnz .Loop_vpmadd52_8x
+
+.Ltail_vpmadd52_8x:
+
+ vpaddq %zmm24,%zmm0,%zmm0
+ vpaddq %zmm25,%zmm1,%zmm1
+
+ vpxorq %zmm18,%zmm18,%zmm18
+ vpmadd52luq %zmm2,%zmm9,%zmm18
+ vpxorq %zmm19,%zmm19,%zmm19
+ vpmadd52huq %zmm2,%zmm9,%zmm19
+ vpxorq %zmm20,%zmm20,%zmm20
+ vpmadd52luq %zmm2,%zmm10,%zmm20
+ vpxorq %zmm21,%zmm21,%zmm21
+ vpmadd52huq %zmm2,%zmm10,%zmm21
+ vpxorq %zmm22,%zmm22,%zmm22
+ vpmadd52luq %zmm2,%zmm6,%zmm22
+ vpxorq %zmm23,%zmm23,%zmm23
+ vpmadd52huq %zmm2,%zmm6,%zmm23
+
+ vpmadd52luq %zmm0,%zmm6,%zmm18
+ vpmadd52huq %zmm0,%zmm6,%zmm19
+ vpmadd52luq %zmm0,%zmm7,%zmm20
+ vpmadd52huq %zmm0,%zmm7,%zmm21
+ vpmadd52luq %zmm0,%zmm8,%zmm22
+ vpmadd52huq %zmm0,%zmm8,%zmm23
+
+ vpmadd52luq %zmm1,%zmm10,%zmm18
+ vpmadd52huq %zmm1,%zmm10,%zmm19
+ vpmadd52luq %zmm1,%zmm6,%zmm20
+ vpmadd52huq %zmm1,%zmm6,%zmm21
+ vpmadd52luq %zmm1,%zmm7,%zmm22
+ vpmadd52huq %zmm1,%zmm7,%zmm23
+
+
+
+
+ movl $1,%eax
+ kmovw %eax,%k1
+ vpsrldq $8,%zmm18,%zmm24
+ vpsrldq $8,%zmm19,%zmm0
+ vpsrldq $8,%zmm20,%zmm25
+ vpsrldq $8,%zmm21,%zmm1
+ vpaddq %zmm24,%zmm18,%zmm18
+ vpaddq %zmm0,%zmm19,%zmm19
+ vpsrldq $8,%zmm22,%zmm26
+ vpsrldq $8,%zmm23,%zmm2
+ vpaddq %zmm25,%zmm20,%zmm20
+ vpaddq %zmm1,%zmm21,%zmm21
+ vpermq $0x2,%zmm18,%zmm24
+ vpermq $0x2,%zmm19,%zmm0
+ vpaddq %zmm26,%zmm22,%zmm22
+ vpaddq %zmm2,%zmm23,%zmm23
+
+ vpermq $0x2,%zmm20,%zmm25
+ vpermq $0x2,%zmm21,%zmm1
+ vpaddq %zmm24,%zmm18,%zmm18
+ vpaddq %zmm0,%zmm19,%zmm19
+ vpermq $0x2,%zmm22,%zmm26
+ vpermq $0x2,%zmm23,%zmm2
+ vpaddq %zmm25,%zmm20,%zmm20
+ vpaddq %zmm1,%zmm21,%zmm21
+ vextracti64x4 $1,%zmm18,%ymm24
+ vextracti64x4 $1,%zmm19,%ymm0
+ vpaddq %zmm26,%zmm22,%zmm22
+ vpaddq %zmm2,%zmm23,%zmm23
+
+ vextracti64x4 $1,%zmm20,%ymm25
+ vextracti64x4 $1,%zmm21,%ymm1
+ vextracti64x4 $1,%zmm22,%ymm26
+ vextracti64x4 $1,%zmm23,%ymm2
+ vpaddq %ymm24,%ymm18,%ymm18{%k1}{z}
+ vpaddq %ymm0,%ymm19,%ymm19{%k1}{z}
+ vpaddq %ymm25,%ymm20,%ymm20{%k1}{z}
+ vpaddq %ymm1,%ymm21,%ymm21{%k1}{z}
+ vpaddq %ymm26,%ymm22,%ymm22{%k1}{z}
+ vpaddq %ymm2,%ymm23,%ymm23{%k1}{z}
+
+
+
+ vpsrlq $44,%ymm18,%ymm30
+ vpsllq $8,%ymm19,%ymm19
+ vpandq %ymm28,%ymm18,%ymm0
+ vpaddq %ymm30,%ymm19,%ymm19
+
+ vpaddq %ymm19,%ymm20,%ymm20
+
+ vpsrlq $44,%ymm20,%ymm30
+ vpsllq $8,%ymm21,%ymm21
+ vpandq %ymm28,%ymm20,%ymm1
+ vpaddq %ymm30,%ymm21,%ymm21
+
+ vpaddq %ymm21,%ymm22,%ymm22
+
+ vpsrlq $42,%ymm22,%ymm30
+ vpsllq $10,%ymm23,%ymm23
+ vpandq %ymm29,%ymm22,%ymm2
+ vpaddq %ymm30,%ymm23,%ymm23
+
+ vpaddq %ymm23,%ymm0,%ymm0
+ vpsllq $2,%ymm23,%ymm23
+
+ vpaddq %ymm23,%ymm0,%ymm0
+
+ vpsrlq $44,%ymm0,%ymm30
+ vpandq %ymm28,%ymm0,%ymm0
+
+ vpaddq %ymm30,%ymm1,%ymm1
+
+
+
+ vmovq %xmm0,0(%rdi)
+ vmovq %xmm1,8(%rdi)
+ vmovq %xmm2,16(%rdi)
+ vzeroall
+
+.Lno_data_vpmadd52_8x:
+ .byte 0xf3,0xc3
+.size poly1305_blocks_vpmadd52_8x,.-poly1305_blocks_vpmadd52_8x
+.type poly1305_emit_base2_44,@function
+.align 32
+poly1305_emit_base2_44:
+ movq 0(%rdi),%r8
+ movq 8(%rdi),%r9
+ movq 16(%rdi),%r10
+
+ movq %r9,%rax
+ shrq $20,%r9
+ shlq $44,%rax
+ movq %r10,%rcx
+ shrq $40,%r10
+ shlq $24,%rcx
+
+ addq %rax,%r8
+ adcq %rcx,%r9
+ adcq $0,%r10
+
+ movq %r8,%rax
+ addq $5,%r8
+ movq %r9,%rcx
+ adcq $0,%r9
+ adcq $0,%r10
+ shrq $2,%r10
+ cmovnzq %r8,%rax
+ cmovnzq %r9,%rcx
+
+ addq 0(%rdx),%rax
+ adcq 8(%rdx),%rcx
+ movq %rax,0(%rsi)
+ movq %rcx,8(%rsi)
+
+ .byte 0xf3,0xc3
+.size poly1305_emit_base2_44,.-poly1305_emit_base2_44
.align 64
.Lconst:
.Lmask24:
@@ -1822,7 +3436,125 @@ poly1305_blocks_avx2:
.long 16777216,0,16777216,0,16777216,0,16777216,0
.Lmask26:
.long 0x3ffffff,0,0x3ffffff,0,0x3ffffff,0,0x3ffffff,0
-.Lfive:
-.long 5,0,5,0,5,0,5,0
+.Lpermd_avx2:
+.long 2,2,2,3,2,0,2,1
+.Lpermd_avx512:
+.long 0,0,0,1, 0,2,0,3, 0,4,0,5, 0,6,0,7
+
+.L2_44_inp_permd:
+.long 0,1,1,2,2,3,7,7
+.L2_44_inp_shift:
+.quad 0,12,24,64
+.L2_44_mask:
+.quad 0xfffffffffff,0xfffffffffff,0x3ffffffffff,0xffffffffffffffff
+.L2_44_shift_rgt:
+.quad 44,44,42,64
+.L2_44_shift_lft:
+.quad 8,8,10,64
+
+.align 64
+.Lx_mask44:
+.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff
+.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff
+.Lx_mask42:
+.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff
+.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff
.byte 80,111,108,121,49,51,48,53,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 16
+.globl xor128_encrypt_n_pad
+.type xor128_encrypt_n_pad,@function
+.align 16
+xor128_encrypt_n_pad:
+ subq %rdx,%rsi
+ subq %rdx,%rdi
+ movq %rcx,%r10
+ shrq $4,%rcx
+ jz .Ltail_enc
+ nop
+.Loop_enc_xmm:
+ movdqu (%rsi,%rdx,1),%xmm0
+ pxor (%rdx),%xmm0
+ movdqu %xmm0,(%rdi,%rdx,1)
+ movdqa %xmm0,(%rdx)
+ leaq 16(%rdx),%rdx
+ decq %rcx
+ jnz .Loop_enc_xmm
+
+ andq $15,%r10
+ jz .Ldone_enc
+
+.Ltail_enc:
+ movq $16,%rcx
+ subq %r10,%rcx
+ xorl %eax,%eax
+.Loop_enc_byte:
+ movb (%rsi,%rdx,1),%al
+ xorb (%rdx),%al
+ movb %al,(%rdi,%rdx,1)
+ movb %al,(%rdx)
+ leaq 1(%rdx),%rdx
+ decq %r10
+ jnz .Loop_enc_byte
+
+ xorl %eax,%eax
+.Loop_enc_pad:
+ movb %al,(%rdx)
+ leaq 1(%rdx),%rdx
+ decq %rcx
+ jnz .Loop_enc_pad
+
+.Ldone_enc:
+ movq %rdx,%rax
+ .byte 0xf3,0xc3
+.size xor128_encrypt_n_pad,.-xor128_encrypt_n_pad
+
+.globl xor128_decrypt_n_pad
+.type xor128_decrypt_n_pad,@function
+.align 16
+xor128_decrypt_n_pad:
+ subq %rdx,%rsi
+ subq %rdx,%rdi
+ movq %rcx,%r10
+ shrq $4,%rcx
+ jz .Ltail_dec
+ nop
+.Loop_dec_xmm:
+ movdqu (%rsi,%rdx,1),%xmm0
+ movdqa (%rdx),%xmm1
+ pxor %xmm0,%xmm1
+ movdqu %xmm1,(%rdi,%rdx,1)
+ movdqa %xmm0,(%rdx)
+ leaq 16(%rdx),%rdx
+ decq %rcx
+ jnz .Loop_dec_xmm
+
+ pxor %xmm1,%xmm1
+ andq $15,%r10
+ jz .Ldone_dec
+
+.Ltail_dec:
+ movq $16,%rcx
+ subq %r10,%rcx
+ xorl %eax,%eax
+ xorq %r11,%r11
+.Loop_dec_byte:
+ movb (%rsi,%rdx,1),%r11b
+ movb (%rdx),%al
+ xorb %r11b,%al
+ movb %al,(%rdi,%rdx,1)
+ movb %r11b,(%rdx)
+ leaq 1(%rdx),%rdx
+ decq %r10
+ jnz .Loop_dec_byte
+
+ xorl %eax,%eax
+.Loop_dec_pad:
+ movb %al,(%rdx)
+ leaq 1(%rdx),%rdx
+ decq %rcx
+ jnz .Loop_dec_pad
+
+.Ldone_dec:
+ movq %rdx,%rax
+ .byte 0xf3,0xc3
+.size xor128_decrypt_n_pad,.-xor128_decrypt_n_pad
diff --git a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/rc4/rc4-md5-x86_64.s b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/rc4/rc4-md5-x86_64.s
index aab3c6db13..03fbca89de 100644
--- a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/rc4/rc4-md5-x86_64.s
+++ b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/rc4/rc4-md5-x86_64.s
@@ -4,15 +4,29 @@
.globl rc4_md5_enc
.type rc4_md5_enc,@function
rc4_md5_enc:
+.cfi_startproc
cmpq $0,%r9
je .Labort
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
subq $40,%rsp
+.cfi_adjust_cfa_offset 40
.Lbody:
movq %rcx,%r11
movq %r9,%r12
@@ -1247,13 +1261,21 @@ rc4_md5_enc:
movl %ecx,-4(%rdi)
movq 40(%rsp),%r15
+.cfi_restore %r15
movq 48(%rsp),%r14
+.cfi_restore %r14
movq 56(%rsp),%r13
+.cfi_restore %r13
movq 64(%rsp),%r12
+.cfi_restore %r12
movq 72(%rsp),%rbp
+.cfi_restore %rbp
movq 80(%rsp),%rbx
+.cfi_restore %rbx
leaq 88(%rsp),%rsp
+.cfi_adjust_cfa_offset -88
.Lepilogue:
.Labort:
.byte 0xf3,0xc3
+.cfi_endproc
.size rc4_md5_enc,.-rc4_md5_enc
diff --git a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/rc4/rc4-x86_64.s b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/rc4/rc4-x86_64.s
index 781b48b9eb..fba70351d4 100644
--- a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/rc4/rc4-x86_64.s
+++ b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/rc4/rc4-x86_64.s
@@ -8,9 +8,16 @@ RC4: orq %rsi,%rsi
jne .Lentry
.byte 0xf3,0xc3
.Lentry:
+.cfi_startproc
pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-16
pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-24
pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-32
.Lprologue:
movq %rsi,%r11
movq %rdx,%r12
@@ -511,11 +518,16 @@ RC4: orq %rsi,%rsi
movl %ecx,-4(%rdi)
movq (%rsp),%r13
+.cfi_restore %r13
movq 8(%rsp),%r12
+.cfi_restore %r12
movq 16(%rsp),%rbx
+.cfi_restore %rbx
addq $24,%rsp
+.cfi_adjust_cfa_offset -24
.Lepilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size RC4,.-RC4
.globl RC4_set_key
.type RC4_set_key,@function
diff --git a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/sha/keccak1600-x86_64.s b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/sha/keccak1600-x86_64.s
new file mode 100644
index 0000000000..e511f25035
--- /dev/null
+++ b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/sha/keccak1600-x86_64.s
@@ -0,0 +1,522 @@
+.text
+
+.type __KeccakF1600,@function
+.align 32
+__KeccakF1600:
+ movq 60(%rdi),%rax
+ movq 68(%rdi),%rbx
+ movq 76(%rdi),%rcx
+ movq 84(%rdi),%rdx
+ movq 92(%rdi),%rbp
+ jmp .Loop
+
+.align 32
+.Loop:
+ movq -100(%rdi),%r8
+ movq -52(%rdi),%r9
+ movq -4(%rdi),%r10
+ movq 44(%rdi),%r11
+
+ xorq -84(%rdi),%rcx
+ xorq -76(%rdi),%rdx
+ xorq %r8,%rax
+ xorq -92(%rdi),%rbx
+ xorq -44(%rdi),%rcx
+ xorq -60(%rdi),%rax
+ movq %rbp,%r12
+ xorq -68(%rdi),%rbp
+
+ xorq %r10,%rcx
+ xorq -20(%rdi),%rax
+ xorq -36(%rdi),%rdx
+ xorq %r9,%rbx
+ xorq -28(%rdi),%rbp
+
+ xorq 36(%rdi),%rcx
+ xorq 20(%rdi),%rax
+ xorq 4(%rdi),%rdx
+ xorq -12(%rdi),%rbx
+ xorq 12(%rdi),%rbp
+
+ movq %rcx,%r13
+ rolq $1,%rcx
+ xorq %rax,%rcx
+ xorq %r11,%rdx
+
+ rolq $1,%rax
+ xorq %rdx,%rax
+ xorq 28(%rdi),%rbx
+
+ rolq $1,%rdx
+ xorq %rbx,%rdx
+ xorq 52(%rdi),%rbp
+
+ rolq $1,%rbx
+ xorq %rbp,%rbx
+
+ rolq $1,%rbp
+ xorq %r13,%rbp
+ xorq %rcx,%r9
+ xorq %rdx,%r10
+ rolq $44,%r9
+ xorq %rbp,%r11
+ xorq %rax,%r12
+ rolq $43,%r10
+ xorq %rbx,%r8
+ movq %r9,%r13
+ rolq $21,%r11
+ orq %r10,%r9
+ xorq %r8,%r9
+ rolq $14,%r12
+
+ xorq (%r15),%r9
+ leaq 8(%r15),%r15
+
+ movq %r12,%r14
+ andq %r11,%r12
+ movq %r9,-100(%rsi)
+ xorq %r10,%r12
+ notq %r10
+ movq %r12,-84(%rsi)
+
+ orq %r11,%r10
+ movq 76(%rdi),%r12
+ xorq %r13,%r10
+ movq %r10,-92(%rsi)
+
+ andq %r8,%r13
+ movq -28(%rdi),%r9
+ xorq %r14,%r13
+ movq -20(%rdi),%r10
+ movq %r13,-68(%rsi)
+
+ orq %r8,%r14
+ movq -76(%rdi),%r8
+ xorq %r11,%r14
+ movq 28(%rdi),%r11
+ movq %r14,-76(%rsi)
+
+
+ xorq %rbp,%r8
+ xorq %rdx,%r12
+ rolq $28,%r8
+ xorq %rcx,%r11
+ xorq %rax,%r9
+ rolq $61,%r12
+ rolq $45,%r11
+ xorq %rbx,%r10
+ rolq $20,%r9
+ movq %r8,%r13
+ orq %r12,%r8
+ rolq $3,%r10
+
+ xorq %r11,%r8
+ movq %r8,-36(%rsi)
+
+ movq %r9,%r14
+ andq %r13,%r9
+ movq -92(%rdi),%r8
+ xorq %r12,%r9
+ notq %r12
+ movq %r9,-28(%rsi)
+
+ orq %r11,%r12
+ movq -44(%rdi),%r9
+ xorq %r10,%r12
+ movq %r12,-44(%rsi)
+
+ andq %r10,%r11
+ movq 60(%rdi),%r12
+ xorq %r14,%r11
+ movq %r11,-52(%rsi)
+
+ orq %r10,%r14
+ movq 4(%rdi),%r10
+ xorq %r13,%r14
+ movq 52(%rdi),%r11
+ movq %r14,-60(%rsi)
+
+
+ xorq %rbp,%r10
+ xorq %rax,%r11
+ rolq $25,%r10
+ xorq %rdx,%r9
+ rolq $8,%r11
+ xorq %rbx,%r12
+ rolq $6,%r9
+ xorq %rcx,%r8
+ rolq $18,%r12
+ movq %r10,%r13
+ andq %r11,%r10
+ rolq $1,%r8
+
+ notq %r11
+ xorq %r9,%r10
+ movq %r10,-12(%rsi)
+
+ movq %r12,%r14
+ andq %r11,%r12
+ movq -12(%rdi),%r10
+ xorq %r13,%r12
+ movq %r12,-4(%rsi)
+
+ orq %r9,%r13
+ movq 84(%rdi),%r12
+ xorq %r8,%r13
+ movq %r13,-20(%rsi)
+
+ andq %r8,%r9
+ xorq %r14,%r9
+ movq %r9,12(%rsi)
+
+ orq %r8,%r14
+ movq -60(%rdi),%r9
+ xorq %r11,%r14
+ movq 36(%rdi),%r11
+ movq %r14,4(%rsi)
+
+
+ movq -68(%rdi),%r8
+
+ xorq %rcx,%r10
+ xorq %rdx,%r11
+ rolq $10,%r10
+ xorq %rbx,%r9
+ rolq $15,%r11
+ xorq %rbp,%r12
+ rolq $36,%r9
+ xorq %rax,%r8
+ rolq $56,%r12
+ movq %r10,%r13
+ orq %r11,%r10
+ rolq $27,%r8
+
+ notq %r11
+ xorq %r9,%r10
+ movq %r10,28(%rsi)
+
+ movq %r12,%r14
+ orq %r11,%r12
+ xorq %r13,%r12
+ movq %r12,36(%rsi)
+
+ andq %r9,%r13
+ xorq %r8,%r13
+ movq %r13,20(%rsi)
+
+ orq %r8,%r9
+ xorq %r14,%r9
+ movq %r9,52(%rsi)
+
+ andq %r14,%r8
+ xorq %r11,%r8
+ movq %r8,44(%rsi)
+
+
+ xorq -84(%rdi),%rdx
+ xorq -36(%rdi),%rbp
+ rolq $62,%rdx
+ xorq 68(%rdi),%rcx
+ rolq $55,%rbp
+ xorq 12(%rdi),%rax
+ rolq $2,%rcx
+ xorq 20(%rdi),%rbx
+ xchgq %rsi,%rdi
+ rolq $39,%rax
+ rolq $41,%rbx
+ movq %rdx,%r13
+ andq %rbp,%rdx
+ notq %rbp
+ xorq %rcx,%rdx
+ movq %rdx,92(%rdi)
+
+ movq %rax,%r14
+ andq %rbp,%rax
+ xorq %r13,%rax
+ movq %rax,60(%rdi)
+
+ orq %rcx,%r13
+ xorq %rbx,%r13
+ movq %r13,84(%rdi)
+
+ andq %rbx,%rcx
+ xorq %r14,%rcx
+ movq %rcx,76(%rdi)
+
+ orq %r14,%rbx
+ xorq %rbp,%rbx
+ movq %rbx,68(%rdi)
+
+ movq %rdx,%rbp
+ movq %r13,%rdx
+
+ testq $255,%r15
+ jnz .Loop
+
+ leaq -192(%r15),%r15
+ .byte 0xf3,0xc3
+.size __KeccakF1600,.-__KeccakF1600
+
+.type KeccakF1600,@function
+.align 32
+KeccakF1600:
+.cfi_startproc
+ pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-16
+ pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-24
+ pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
+ pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
+ pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
+ pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
+
+ leaq 100(%rdi),%rdi
+ subq $200,%rsp
+.cfi_adjust_cfa_offset 200
+
+ notq -92(%rdi)
+ notq -84(%rdi)
+ notq -36(%rdi)
+ notq -4(%rdi)
+ notq 36(%rdi)
+ notq 60(%rdi)
+
+ leaq iotas(%rip),%r15
+ leaq 100(%rsp),%rsi
+
+ call __KeccakF1600
+
+ notq -92(%rdi)
+ notq -84(%rdi)
+ notq -36(%rdi)
+ notq -4(%rdi)
+ notq 36(%rdi)
+ notq 60(%rdi)
+ leaq -100(%rdi),%rdi
+
+ addq $200,%rsp
+.cfi_adjust_cfa_offset -200
+
+ popq %r15
+.cfi_adjust_cfa_offset -8
+.cfi_restore %r15
+ popq %r14
+.cfi_adjust_cfa_offset -8
+.cfi_restore %r14
+ popq %r13
+.cfi_adjust_cfa_offset -8
+.cfi_restore %r13
+ popq %r12
+.cfi_adjust_cfa_offset -8
+.cfi_restore %r12
+ popq %rbp
+.cfi_adjust_cfa_offset -8
+.cfi_restore %rbp
+ popq %rbx
+.cfi_adjust_cfa_offset -8
+.cfi_restore %rbx
+ .byte 0xf3,0xc3
+.cfi_endproc
+.size KeccakF1600,.-KeccakF1600
+.globl SHA3_absorb
+.type SHA3_absorb,@function
+.align 32
+SHA3_absorb:
+.cfi_startproc
+ pushq %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbx,-16
+ pushq %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset %rbp,-24
+ pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-32
+ pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-40
+ pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-48
+ pushq %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r15,-56
+
+ leaq 100(%rdi),%rdi
+ subq $232,%rsp
+.cfi_adjust_cfa_offset 232
+
+ movq %rsi,%r9
+ leaq 100(%rsp),%rsi
+
+ notq -92(%rdi)
+ notq -84(%rdi)
+ notq -36(%rdi)
+ notq -4(%rdi)
+ notq 36(%rdi)
+ notq 60(%rdi)
+ leaq iotas(%rip),%r15
+
+ movq %rcx,216-100(%rsi)
+
+.Loop_absorb:
+ cmpq %rcx,%rdx
+ jc .Ldone_absorb
+
+ shrq $3,%rcx
+ leaq -100(%rdi),%r8
+
+.Lblock_absorb:
+ movq (%r9),%rax
+ leaq 8(%r9),%r9
+ xorq (%r8),%rax
+ leaq 8(%r8),%r8
+ subq $8,%rdx
+ movq %rax,-8(%r8)
+ subq $1,%rcx
+ jnz .Lblock_absorb
+
+ movq %r9,200-100(%rsi)
+ movq %rdx,208-100(%rsi)
+ call __KeccakF1600
+ movq 200-100(%rsi),%r9
+ movq 208-100(%rsi),%rdx
+ movq 216-100(%rsi),%rcx
+ jmp .Loop_absorb
+
+.align 32
+.Ldone_absorb:
+ movq %rdx,%rax
+
+ notq -92(%rdi)
+ notq -84(%rdi)
+ notq -36(%rdi)
+ notq -4(%rdi)
+ notq 36(%rdi)
+ notq 60(%rdi)
+
+ addq $232,%rsp
+.cfi_adjust_cfa_offset -232
+
+ popq %r15
+.cfi_adjust_cfa_offset -8
+.cfi_restore %r15
+ popq %r14
+.cfi_adjust_cfa_offset -8
+.cfi_restore %r14
+ popq %r13
+.cfi_adjust_cfa_offset -8
+.cfi_restore %r13
+ popq %r12
+.cfi_adjust_cfa_offset -8
+.cfi_restore %r12
+ popq %rbp
+.cfi_adjust_cfa_offset -8
+.cfi_restore %rbp
+ popq %rbx
+.cfi_adjust_cfa_offset -8
+.cfi_restore %rbx
+ .byte 0xf3,0xc3
+.cfi_endproc
+.size SHA3_absorb,.-SHA3_absorb
+.globl SHA3_squeeze
+.type SHA3_squeeze,@function
+.align 32
+SHA3_squeeze:
+.cfi_startproc
+ pushq %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r12,-16
+ pushq %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r13,-24
+ pushq %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset %r14,-32
+
+ shrq $3,%rcx
+ movq %rdi,%r8
+ movq %rsi,%r12
+ movq %rdx,%r13
+ movq %rcx,%r14
+ jmp .Loop_squeeze
+
+.align 32
+.Loop_squeeze:
+ cmpq $8,%r13
+ jb .Ltail_squeeze
+
+ movq (%r8),%rax
+ leaq 8(%r8),%r8
+ movq %rax,(%r12)
+ leaq 8(%r12),%r12
+ subq $8,%r13
+ jz .Ldone_squeeze
+
+ subq $1,%rcx
+ jnz .Loop_squeeze
+
+ call KeccakF1600
+ movq %rdi,%r8
+ movq %r14,%rcx
+ jmp .Loop_squeeze
+
+.Ltail_squeeze:
+ movq %r8,%rsi
+ movq %r12,%rdi
+ movq %r13,%rcx
+.byte 0xf3,0xa4
+
+.Ldone_squeeze:
+ popq %r14
+.cfi_adjust_cfa_offset -8
+.cfi_restore %r14
+ popq %r13
+.cfi_adjust_cfa_offset -8
+.cfi_restore %r13
+ popq %r12
+.cfi_adjust_cfa_offset -8
+.cfi_restore %r13
+ .byte 0xf3,0xc3
+.cfi_endproc
+.size SHA3_squeeze,.-SHA3_squeeze
+.align 256
+.quad 0,0,0,0,0,0,0,0
+.type iotas,@object
+iotas:
+.quad 0x0000000000000001
+.quad 0x0000000000008082
+.quad 0x800000000000808a
+.quad 0x8000000080008000
+.quad 0x000000000000808b
+.quad 0x0000000080000001
+.quad 0x8000000080008081
+.quad 0x8000000000008009
+.quad 0x000000000000008a
+.quad 0x0000000000000088
+.quad 0x0000000080008009
+.quad 0x000000008000000a
+.quad 0x000000008000808b
+.quad 0x800000000000008b
+.quad 0x8000000000008089
+.quad 0x8000000000008003
+.quad 0x8000000000008002
+.quad 0x8000000000000080
+.quad 0x000000000000800a
+.quad 0x800000008000000a
+.quad 0x8000000080008081
+.quad 0x8000000000008080
+.quad 0x0000000080000001
+.quad 0x8000000080008008
+.size iotas,.-iotas
+.byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
diff --git a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/sha/sha1-mb-x86_64.s b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/sha/sha1-mb-x86_64.s
index d266d776ec..1a0de0f100 100644
--- a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/sha/sha1-mb-x86_64.s
+++ b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/sha/sha1-mb-x86_64.s
@@ -6,17 +6,22 @@
.type sha1_multi_block,@function
.align 32
sha1_multi_block:
+.cfi_startproc
movq OPENSSL_ia32cap_P+4(%rip),%rcx
btq $61,%rcx
jc _shaext_shortcut
testl $268435456,%ecx
jnz _avx_shortcut
movq %rsp,%rax
+.cfi_def_cfa_register %rax
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbx,-24
subq $288,%rsp
andq $-256,%rsp
movq %rax,272(%rsp)
+.cfi_escape 0x0f,0x06,0x77,0x90,0x02,0x06,0x23,0x08
.Lbody:
leaq K_XX_XX(%rip),%rbp
leaq 256(%rsp),%rbx
@@ -2546,19 +2551,28 @@ sha1_multi_block:
.Ldone:
movq 272(%rsp),%rax
+.cfi_def_cfa %rax,8
movq -16(%rax),%rbp
+.cfi_restore %rbp
movq -8(%rax),%rbx
+.cfi_restore %rbx
leaq (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size sha1_multi_block,.-sha1_multi_block
.type sha1_multi_block_shaext,@function
.align 32
sha1_multi_block_shaext:
+.cfi_startproc
_shaext_shortcut:
movq %rsp,%rax
+.cfi_def_cfa_register %rax
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
subq $288,%rsp
shll $1,%edx
andq $-256,%rsp
@@ -2914,14 +2928,19 @@ _shaext_shortcut:
.Ldone_shaext:
movq -16(%rax),%rbp
+.cfi_restore %rbp
movq -8(%rax),%rbx
+.cfi_restore %rbx
leaq (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_shaext:
.byte 0xf3,0xc3
+.cfi_endproc
.size sha1_multi_block_shaext,.-sha1_multi_block_shaext
.type sha1_multi_block_avx,@function
.align 32
sha1_multi_block_avx:
+.cfi_startproc
_avx_shortcut:
shrq $32,%rcx
cmpl $2,%edx
@@ -2932,11 +2951,15 @@ _avx_shortcut:
.align 32
.Lavx:
movq %rsp,%rax
+.cfi_def_cfa_register %rax
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
subq $288,%rsp
andq $-256,%rsp
movq %rax,272(%rsp)
+.cfi_escape 0x0f,0x06,0x77,0x90,0x02,0x06,0x23,0x08
.Lbody_avx:
leaq K_XX_XX(%rip),%rbp
leaq 256(%rsp),%rbx
@@ -4986,27 +5009,41 @@ _avx_shortcut:
.Ldone_avx:
movq 272(%rsp),%rax
+.cfi_def_cfa %rax,8
vzeroupper
movq -16(%rax),%rbp
+.cfi_restore %rbp
movq -8(%rax),%rbx
+.cfi_restore %rbx
leaq (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_avx:
.byte 0xf3,0xc3
+.cfi_endproc
.size sha1_multi_block_avx,.-sha1_multi_block_avx
.type sha1_multi_block_avx2,@function
.align 32
sha1_multi_block_avx2:
+.cfi_startproc
_avx2_shortcut:
movq %rsp,%rax
+.cfi_def_cfa_register %rax
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
pushq %r15
+.cfi_offset %r15,-56
subq $576,%rsp
andq $-256,%rsp
movq %rax,544(%rsp)
+.cfi_escape 0x0f,0x06,0x77,0xa0,0x04,0x06,0x23,0x08
.Lbody_avx2:
leaq K_XX_XX(%rip),%rbp
shrl $1,%edx
@@ -7193,16 +7230,25 @@ _avx2_shortcut:
.Ldone_avx2:
movq 544(%rsp),%rax
+.cfi_def_cfa %rax,8
vzeroupper
movq -48(%rax),%r15
+.cfi_restore %r15
movq -40(%rax),%r14
+.cfi_restore %r14
movq -32(%rax),%r13
+.cfi_restore %r13
movq -24(%rax),%r12
+.cfi_restore %r12
movq -16(%rax),%rbp
+.cfi_restore %rbp
movq -8(%rax),%rbx
+.cfi_restore %rbx
leaq (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_avx2:
.byte 0xf3,0xc3
+.cfi_endproc
.size sha1_multi_block_avx2,.-sha1_multi_block_avx2
.align 256
diff --git a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/sha/sha1-x86_64.s b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/sha/sha1-x86_64.s
index dbeebed9a0..e436521a04 100644
--- a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/sha/sha1-x86_64.s
+++ b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/sha/sha1-x86_64.s
@@ -5,6 +5,7 @@
.type sha1_block_data_order,@function
.align 16
sha1_block_data_order:
+.cfi_startproc
movl OPENSSL_ia32cap_P+0(%rip),%r9d
movl OPENSSL_ia32cap_P+4(%rip),%r8d
movl OPENSSL_ia32cap_P+8(%rip),%r10d
@@ -25,17 +26,24 @@ sha1_block_data_order:
.align 16
.Lialu:
movq %rsp,%rax
+.cfi_def_cfa_register %rax
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
movq %rdi,%r8
subq $72,%rsp
movq %rsi,%r9
andq $-64,%rsp
movq %rdx,%r10
movq %rax,64(%rsp)
+.cfi_escape 0x0f,0x06,0x77,0xc0,0x00,0x06,0x23,0x08
.Lprologue:
movl 0(%r8),%esi
@@ -1230,19 +1238,28 @@ sha1_block_data_order:
jnz .Lloop
movq 64(%rsp),%rsi
+.cfi_def_cfa %rsi,8
movq -40(%rsi),%r14
+.cfi_restore %r14
movq -32(%rsi),%r13
+.cfi_restore %r13
movq -24(%rsi),%r12
+.cfi_restore %r12
movq -16(%rsi),%rbp
+.cfi_restore %rbp
movq -8(%rsi),%rbx
+.cfi_restore %rbx
leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size sha1_block_data_order,.-sha1_block_data_order
.type sha1_block_data_order_shaext,@function
.align 32
sha1_block_data_order_shaext:
_shaext_shortcut:
+.cfi_startproc
movdqu (%rdi),%xmm0
movd 16(%rdi),%xmm1
movdqa K_XX_XX+160(%rip),%xmm3
@@ -1404,20 +1421,27 @@ _shaext_shortcut:
pshufd $27,%xmm1,%xmm1
movdqu %xmm0,(%rdi)
movd %xmm1,16(%rdi)
+.cfi_endproc
.byte 0xf3,0xc3
.size sha1_block_data_order_shaext,.-sha1_block_data_order_shaext
.type sha1_block_data_order_ssse3,@function
.align 16
sha1_block_data_order_ssse3:
_ssse3_shortcut:
- movq %rsp,%rax
+.cfi_startproc
+ movq %rsp,%r11
+.cfi_def_cfa_register %r11
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
leaq -64(%rsp),%rsp
- movq %rax,%r14
andq $-64,%rsp
movq %rdi,%r8
movq %rsi,%r9
@@ -1425,7 +1449,7 @@ _ssse3_shortcut:
shlq $6,%r10
addq %r9,%r10
- leaq K_XX_XX+64(%rip),%r11
+ leaq K_XX_XX+64(%rip),%r14
movl 0(%r8),%eax
movl 4(%r8),%ebx
@@ -1437,8 +1461,8 @@ _ssse3_shortcut:
xorl %edx,%edi
andl %edi,%esi
- movdqa 64(%r11),%xmm6
- movdqa -64(%r11),%xmm9
+ movdqa 64(%r14),%xmm6
+ movdqa -64(%r14),%xmm9
movdqu 0(%r9),%xmm0
movdqu 16(%r9),%xmm1
movdqu 32(%r9),%xmm2
@@ -1514,7 +1538,7 @@ _ssse3_shortcut:
pslld $2,%xmm9
pxor %xmm10,%xmm4
xorl %ebp,%edx
- movdqa -64(%r11),%xmm10
+ movdqa -64(%r14),%xmm10
roll $5,%ecx
addl %edi,%ebx
andl %edx,%esi
@@ -1575,7 +1599,7 @@ _ssse3_shortcut:
pslld $2,%xmm10
pxor %xmm8,%xmm5
xorl %eax,%ebp
- movdqa -32(%r11),%xmm8
+ movdqa -32(%r14),%xmm8
roll $5,%edx
addl %edi,%ecx
andl %ebp,%esi
@@ -1636,7 +1660,7 @@ _ssse3_shortcut:
pslld $2,%xmm8
pxor %xmm9,%xmm6
xorl %ebx,%eax
- movdqa -32(%r11),%xmm9
+ movdqa -32(%r14),%xmm9
roll $5,%ebp
addl %edi,%edx
andl %eax,%esi
@@ -1697,7 +1721,7 @@ _ssse3_shortcut:
pslld $2,%xmm9
pxor %xmm10,%xmm7
xorl %ecx,%ebx
- movdqa -32(%r11),%xmm10
+ movdqa -32(%r14),%xmm10
roll $5,%eax
addl %edi,%ebp
andl %ebx,%esi
@@ -1808,7 +1832,7 @@ _ssse3_shortcut:
pxor %xmm3,%xmm2
addl %esi,%eax
xorl %edx,%edi
- movdqa 0(%r11),%xmm10
+ movdqa 0(%r14),%xmm10
rorl $7,%ecx
paddd %xmm1,%xmm9
addl %ebx,%eax
@@ -2043,7 +2067,7 @@ _ssse3_shortcut:
pxor %xmm0,%xmm7
roll $5,%ebx
addl %esi,%eax
- movdqa 32(%r11),%xmm9
+ movdqa 32(%r14),%xmm9
xorl %ecx,%edi
paddd %xmm6,%xmm8
xorl %edx,%ecx
@@ -2334,8 +2358,8 @@ _ssse3_shortcut:
addl %edx,%ecx
cmpq %r10,%r9
je .Ldone_ssse3
- movdqa 64(%r11),%xmm6
- movdqa -64(%r11),%xmm9
+ movdqa 64(%r14),%xmm6
+ movdqa -64(%r14),%xmm9
movdqu 0(%r9),%xmm0
movdqu 16(%r9),%xmm1
movdqu 32(%r9),%xmm2
@@ -2572,29 +2596,41 @@ _ssse3_shortcut:
movl %ecx,8(%r8)
movl %edx,12(%r8)
movl %ebp,16(%r8)
- leaq (%r14),%rsi
- movq -40(%rsi),%r14
- movq -32(%rsi),%r13
- movq -24(%rsi),%r12
- movq -16(%rsi),%rbp
- movq -8(%rsi),%rbx
- leaq (%rsi),%rsp
+ movq -40(%r11),%r14
+.cfi_restore %r14
+ movq -32(%r11),%r13
+.cfi_restore %r13
+ movq -24(%r11),%r12
+.cfi_restore %r12
+ movq -16(%r11),%rbp
+.cfi_restore %rbp
+ movq -8(%r11),%rbx
+.cfi_restore %rbx
+ leaq (%r11),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_ssse3:
.byte 0xf3,0xc3
+.cfi_endproc
.size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3
.type sha1_block_data_order_avx,@function
.align 16
sha1_block_data_order_avx:
_avx_shortcut:
- movq %rsp,%rax
+.cfi_startproc
+ movq %rsp,%r11
+.cfi_def_cfa_register %r11
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
leaq -64(%rsp),%rsp
vzeroupper
- movq %rax,%r14
andq $-64,%rsp
movq %rdi,%r8
movq %rsi,%r9
@@ -2602,7 +2638,7 @@ _avx_shortcut:
shlq $6,%r10
addq %r9,%r10
- leaq K_XX_XX+64(%rip),%r11
+ leaq K_XX_XX+64(%rip),%r14
movl 0(%r8),%eax
movl 4(%r8),%ebx
@@ -2614,8 +2650,8 @@ _avx_shortcut:
xorl %edx,%edi
andl %edi,%esi
- vmovdqa 64(%r11),%xmm6
- vmovdqa -64(%r11),%xmm11
+ vmovdqa 64(%r14),%xmm6
+ vmovdqa -64(%r14),%xmm11
vmovdqu 0(%r9),%xmm0
vmovdqu 16(%r9),%xmm1
vmovdqu 32(%r9),%xmm2
@@ -2740,7 +2776,7 @@ _avx_shortcut:
vpxor %xmm10,%xmm5,%xmm5
xorl %eax,%ebp
shldl $5,%edx,%edx
- vmovdqa -32(%r11),%xmm11
+ vmovdqa -32(%r14),%xmm11
addl %edi,%ecx
andl %ebp,%esi
xorl %eax,%ebp
@@ -2953,7 +2989,7 @@ _avx_shortcut:
addl %esi,%eax
xorl %edx,%edi
vpaddd %xmm1,%xmm11,%xmm9
- vmovdqa 0(%r11),%xmm11
+ vmovdqa 0(%r14),%xmm11
shrdl $7,%ecx,%ecx
addl %ebx,%eax
vpxor %xmm8,%xmm2,%xmm2
@@ -3172,7 +3208,7 @@ _avx_shortcut:
movl %ebx,%edi
xorl %edx,%esi
vpaddd %xmm6,%xmm11,%xmm9
- vmovdqa 32(%r11),%xmm11
+ vmovdqa 32(%r14),%xmm11
shldl $5,%ebx,%ebx
addl %esi,%eax
vpxor %xmm8,%xmm7,%xmm7
@@ -3451,8 +3487,8 @@ _avx_shortcut:
addl %edx,%ecx
cmpq %r10,%r9
je .Ldone_avx
- vmovdqa 64(%r11),%xmm6
- vmovdqa -64(%r11),%xmm11
+ vmovdqa 64(%r14),%xmm6
+ vmovdqa -64(%r14),%xmm11
vmovdqu 0(%r9),%xmm0
vmovdqu 16(%r9),%xmm1
vmovdqu 32(%r9),%xmm2
@@ -3688,28 +3724,40 @@ _avx_shortcut:
movl %ecx,8(%r8)
movl %edx,12(%r8)
movl %ebp,16(%r8)
- leaq (%r14),%rsi
- movq -40(%rsi),%r14
- movq -32(%rsi),%r13
- movq -24(%rsi),%r12
- movq -16(%rsi),%rbp
- movq -8(%rsi),%rbx
- leaq (%rsi),%rsp
+ movq -40(%r11),%r14
+.cfi_restore %r14
+ movq -32(%r11),%r13
+.cfi_restore %r13
+ movq -24(%r11),%r12
+.cfi_restore %r12
+ movq -16(%r11),%rbp
+.cfi_restore %rbp
+ movq -8(%r11),%rbx
+.cfi_restore %rbx
+ leaq (%r11),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_avx:
.byte 0xf3,0xc3
+.cfi_endproc
.size sha1_block_data_order_avx,.-sha1_block_data_order_avx
.type sha1_block_data_order_avx2,@function
.align 16
sha1_block_data_order_avx2:
_avx2_shortcut:
- movq %rsp,%rax
+.cfi_startproc
+ movq %rsp,%r11
+.cfi_def_cfa_register %r11
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
vzeroupper
- movq %rax,%r14
movq %rdi,%r8
movq %rsi,%r9
movq %rdx,%r10
@@ -3719,7 +3767,7 @@ _avx2_shortcut:
leaq 64(%r9),%r13
andq $-128,%rsp
addq %r9,%r10
- leaq K_XX_XX+64(%rip),%r11
+ leaq K_XX_XX+64(%rip),%r14
movl 0(%r8),%eax
cmpq %r10,%r13
@@ -3728,7 +3776,7 @@ _avx2_shortcut:
movl 8(%r8),%ecx
movl 12(%r8),%edx
movl 16(%r8),%esi
- vmovdqu 64(%r11),%ymm6
+ vmovdqu 64(%r14),%ymm6
vmovdqu (%r9),%xmm0
vmovdqu 16(%r9),%xmm1
@@ -3742,7 +3790,7 @@ _avx2_shortcut:
vpshufb %ymm6,%ymm1,%ymm1
vinserti128 $1,48(%r13),%ymm3,%ymm3
vpshufb %ymm6,%ymm2,%ymm2
- vmovdqu -64(%r11),%ymm11
+ vmovdqu -64(%r14),%ymm11
vpshufb %ymm6,%ymm3,%ymm3
vpaddd %ymm11,%ymm0,%ymm4
@@ -3774,7 +3822,7 @@ _avx2_shortcut:
vpxor %ymm3,%ymm8,%ymm8
vpxor %ymm8,%ymm5,%ymm5
vpsrld $31,%ymm5,%ymm8
- vmovdqu -32(%r11),%ymm11
+ vmovdqu -32(%r14),%ymm11
vpslldq $12,%ymm5,%ymm10
vpaddd %ymm5,%ymm5,%ymm5
vpsrld $30,%ymm10,%ymm9
@@ -3928,7 +3976,7 @@ _avx2_shortcut:
addl -56(%r13),%ebp
andnl %esi,%ebx,%edi
vpxor %ymm3,%ymm2,%ymm2
- vmovdqu 0(%r11),%ymm11
+ vmovdqu 0(%r14),%ymm11
addl %ecx,%ebp
rorxl $27,%ebx,%r12d
rorxl $2,%ebx,%ecx
@@ -4159,7 +4207,7 @@ _avx2_shortcut:
addl -116(%r13),%eax
leal (%rax,%rbx,1),%eax
vpxor %ymm0,%ymm7,%ymm7
- vmovdqu 32(%r11),%ymm11
+ vmovdqu 32(%r14),%ymm11
rorxl $27,%ebp,%r12d
rorxl $2,%ebp,%ebx
xorl %ecx,%ebp
@@ -4604,7 +4652,7 @@ _avx2_shortcut:
cmpq %r10,%r9
je .Ldone_avx2
- vmovdqu 64(%r11),%ymm6
+ vmovdqu 64(%r14),%ymm6
cmpq %r10,%rdi
ja .Last_avx2
@@ -4820,7 +4868,7 @@ _avx2_shortcut:
xorl %ebx,%eax
addl %r12d,%esi
xorl %ecx,%eax
- vmovdqu -64(%r11),%ymm11
+ vmovdqu -64(%r14),%ymm11
vpshufb %ymm6,%ymm0,%ymm0
addl 68(%r13),%edx
leal (%rdx,%rax,1),%edx
@@ -5176,7 +5224,7 @@ _avx2_shortcut:
xorl %ebp,%esi
addl %r12d,%edx
vpsrld $31,%ymm5,%ymm8
- vmovdqu -32(%r11),%ymm11
+ vmovdqu -32(%r14),%ymm11
xorl %ebx,%esi
addl 104(%r13),%ecx
leal (%rcx,%rsi,1),%ecx
@@ -5369,15 +5417,21 @@ _avx2_shortcut:
.Ldone_avx2:
vzeroupper
- leaq (%r14),%rsi
- movq -40(%rsi),%r14
- movq -32(%rsi),%r13
- movq -24(%rsi),%r12
- movq -16(%rsi),%rbp
- movq -8(%rsi),%rbx
- leaq (%rsi),%rsp
+ movq -40(%r11),%r14
+.cfi_restore %r14
+ movq -32(%r11),%r13
+.cfi_restore %r13
+ movq -24(%r11),%r12
+.cfi_restore %r12
+ movq -16(%r11),%rbp
+.cfi_restore %rbp
+ movq -8(%r11),%rbx
+.cfi_restore %rbx
+ leaq (%r11),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_avx2:
.byte 0xf3,0xc3
+.cfi_endproc
.size sha1_block_data_order_avx2,.-sha1_block_data_order_avx2
.align 64
K_XX_XX:
diff --git a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/sha/sha256-mb-x86_64.s b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/sha/sha256-mb-x86_64.s
index f2896b4d6e..59cf9c984e 100644
--- a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/sha/sha256-mb-x86_64.s
+++ b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/sha/sha256-mb-x86_64.s
@@ -6,17 +6,22 @@
.type sha256_multi_block,@function
.align 32
sha256_multi_block:
+.cfi_startproc
movq OPENSSL_ia32cap_P+4(%rip),%rcx
btq $61,%rcx
jc _shaext_shortcut
testl $268435456,%ecx
jnz _avx_shortcut
movq %rsp,%rax
+.cfi_def_cfa_register %rax
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
subq $288,%rsp
andq $-256,%rsp
movq %rax,272(%rsp)
+.cfi_escape 0x0f,0x06,0x77,0x90,0x02,0x06,0x23,0x08
.Lbody:
leaq K256+128(%rip),%rbp
leaq 256(%rsp),%rbx
@@ -2615,19 +2620,28 @@ sha256_multi_block:
.Ldone:
movq 272(%rsp),%rax
+.cfi_def_cfa %rax,8
movq -16(%rax),%rbp
+.cfi_restore %rbp
movq -8(%rax),%rbx
+.cfi_restore %rbx
leaq (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size sha256_multi_block,.-sha256_multi_block
.type sha256_multi_block_shaext,@function
.align 32
sha256_multi_block_shaext:
+.cfi_startproc
_shaext_shortcut:
movq %rsp,%rax
+.cfi_def_cfa_register %rax
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
subq $288,%rsp
shll $1,%edx
andq $-256,%rsp
@@ -3102,14 +3116,19 @@ _shaext_shortcut:
.Ldone_shaext:
movq -16(%rax),%rbp
+.cfi_restore %rbp
movq -8(%rax),%rbx
+.cfi_restore %rbx
leaq (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_shaext:
.byte 0xf3,0xc3
+.cfi_endproc
.size sha256_multi_block_shaext,.-sha256_multi_block_shaext
.type sha256_multi_block_avx,@function
.align 32
sha256_multi_block_avx:
+.cfi_startproc
_avx_shortcut:
shrq $32,%rcx
cmpl $2,%edx
@@ -3120,11 +3139,15 @@ _avx_shortcut:
.align 32
.Lavx:
movq %rsp,%rax
+.cfi_def_cfa_register %rax
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
subq $288,%rsp
andq $-256,%rsp
movq %rax,272(%rsp)
+.cfi_escape 0x0f,0x06,0x77,0x90,0x02,0x06,0x23,0x08
.Lbody_avx:
leaq K256+128(%rip),%rbp
leaq 256(%rsp),%rbx
@@ -5353,27 +5376,41 @@ _avx_shortcut:
.Ldone_avx:
movq 272(%rsp),%rax
+.cfi_def_cfa %rax,8
vzeroupper
movq -16(%rax),%rbp
+.cfi_restore %rbp
movq -8(%rax),%rbx
+.cfi_restore %rbx
leaq (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_avx:
.byte 0xf3,0xc3
+.cfi_endproc
.size sha256_multi_block_avx,.-sha256_multi_block_avx
.type sha256_multi_block_avx2,@function
.align 32
sha256_multi_block_avx2:
+.cfi_startproc
_avx2_shortcut:
movq %rsp,%rax
+.cfi_def_cfa_register %rax
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
pushq %r15
+.cfi_offset %r15,-56
subq $576,%rsp
andq $-256,%rsp
movq %rax,544(%rsp)
+.cfi_escape 0x0f,0x06,0x77,0xa0,0x04,0x06,0x23,0x08
.Lbody_avx2:
leaq K256+128(%rip),%rbp
leaq 128(%rdi),%rdi
@@ -7738,16 +7775,25 @@ _avx2_shortcut:
.Ldone_avx2:
movq 544(%rsp),%rax
+.cfi_def_cfa %rax,8
vzeroupper
movq -48(%rax),%r15
+.cfi_restore %r15
movq -40(%rax),%r14
+.cfi_restore %r14
movq -32(%rax),%r13
+.cfi_restore %r13
movq -24(%rax),%r12
+.cfi_restore %r12
movq -16(%rax),%rbp
+.cfi_restore %rbp
movq -8(%rax),%rbx
+.cfi_restore %rbx
leaq (%rax),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_avx2:
.byte 0xf3,0xc3
+.cfi_endproc
.size sha256_multi_block_avx2,.-sha256_multi_block_avx2
.align 256
K256:
diff --git a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/sha/sha256-x86_64.s b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/sha/sha256-x86_64.s
index 8264a7dbdf..42b24df18e 100644
--- a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/sha/sha256-x86_64.s
+++ b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/sha/sha256-x86_64.s
@@ -5,6 +5,7 @@
.type sha256_block_data_order,@function
.align 16
sha256_block_data_order:
+.cfi_startproc
leaq OPENSSL_ia32cap_P(%rip),%r11
movl 0(%r11),%r9d
movl 4(%r11),%r10d
@@ -21,13 +22,20 @@ sha256_block_data_order:
je .Lavx_shortcut
testl $512,%r10d
jnz .Lssse3_shortcut
+ movq %rsp,%rax
+.cfi_def_cfa_register %rax
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
pushq %r15
- movq %rsp,%r11
+.cfi_offset %r15,-56
shlq $4,%rdx
subq $64+32,%rsp
leaq (%rsi,%rdx,4),%rdx
@@ -35,7 +43,8 @@ sha256_block_data_order:
movq %rdi,64+0(%rsp)
movq %rsi,64+8(%rsp)
movq %rdx,64+16(%rsp)
- movq %r11,64+24(%rsp)
+ movq %rax,88(%rsp)
+.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08
.Lprologue:
movl 0(%rdi),%eax
@@ -1699,16 +1708,25 @@ sha256_block_data_order:
movl %r11d,28(%rdi)
jb .Lloop
- movq 64+24(%rsp),%rsi
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+ movq 88(%rsp),%rsi
+.cfi_def_cfa %rsi,8
+ movq -48(%rsi),%r15
+.cfi_restore %r15
+ movq -40(%rsi),%r14
+.cfi_restore %r14
+ movq -32(%rsi),%r13
+.cfi_restore %r13
+ movq -24(%rsi),%r12
+.cfi_restore %r12
+ movq -16(%rsi),%rbp
+.cfi_restore %rbp
+ movq -8(%rsi),%rbx
+.cfi_restore %rbx
+ leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size sha256_block_data_order,.-sha256_block_data_order
.align 64
.type K256,@object
@@ -1963,14 +1981,22 @@ _shaext_shortcut:
.type sha256_block_data_order_ssse3,@function
.align 64
sha256_block_data_order_ssse3:
+.cfi_startproc
.Lssse3_shortcut:
+ movq %rsp,%rax
+.cfi_def_cfa_register %rax
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
pushq %r15
- movq %rsp,%r11
+.cfi_offset %r15,-56
shlq $4,%rdx
subq $96,%rsp
leaq (%rsi,%rdx,4),%rdx
@@ -1978,7 +2004,8 @@ sha256_block_data_order_ssse3:
movq %rdi,64+0(%rsp)
movq %rsi,64+8(%rsp)
movq %rdx,64+16(%rsp)
- movq %r11,64+24(%rsp)
+ movq %rax,88(%rsp)
+.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08
.Lprologue_ssse3:
movl 0(%rdi),%eax
@@ -3044,28 +3071,45 @@ sha256_block_data_order_ssse3:
movl %r11d,28(%rdi)
jb .Lloop_ssse3
- movq 64+24(%rsp),%rsi
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+ movq 88(%rsp),%rsi
+.cfi_def_cfa %rsi,8
+ movq -48(%rsi),%r15
+.cfi_restore %r15
+ movq -40(%rsi),%r14
+.cfi_restore %r14
+ movq -32(%rsi),%r13
+.cfi_restore %r13
+ movq -24(%rsi),%r12
+.cfi_restore %r12
+ movq -16(%rsi),%rbp
+.cfi_restore %rbp
+ movq -8(%rsi),%rbx
+.cfi_restore %rbx
+ leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_ssse3:
.byte 0xf3,0xc3
+.cfi_endproc
.size sha256_block_data_order_ssse3,.-sha256_block_data_order_ssse3
.type sha256_block_data_order_avx,@function
.align 64
sha256_block_data_order_avx:
+.cfi_startproc
.Lavx_shortcut:
+ movq %rsp,%rax
+.cfi_def_cfa_register %rax
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
pushq %r15
- movq %rsp,%r11
+.cfi_offset %r15,-56
shlq $4,%rdx
subq $96,%rsp
leaq (%rsi,%rdx,4),%rdx
@@ -3073,7 +3117,8 @@ sha256_block_data_order_avx:
movq %rdi,64+0(%rsp)
movq %rsi,64+8(%rsp)
movq %rdx,64+16(%rsp)
- movq %r11,64+24(%rsp)
+ movq %rax,88(%rsp)
+.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08
.Lprologue_avx:
vzeroupper
@@ -4100,29 +4145,46 @@ sha256_block_data_order_avx:
movl %r11d,28(%rdi)
jb .Lloop_avx
- movq 64+24(%rsp),%rsi
+ movq 88(%rsp),%rsi
+.cfi_def_cfa %rsi,8
vzeroupper
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+ movq -48(%rsi),%r15
+.cfi_restore %r15
+ movq -40(%rsi),%r14
+.cfi_restore %r14
+ movq -32(%rsi),%r13
+.cfi_restore %r13
+ movq -24(%rsi),%r12
+.cfi_restore %r12
+ movq -16(%rsi),%rbp
+.cfi_restore %rbp
+ movq -8(%rsi),%rbx
+.cfi_restore %rbx
+ leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_avx:
.byte 0xf3,0xc3
+.cfi_endproc
.size sha256_block_data_order_avx,.-sha256_block_data_order_avx
.type sha256_block_data_order_avx2,@function
.align 64
sha256_block_data_order_avx2:
+.cfi_startproc
.Lavx2_shortcut:
+ movq %rsp,%rax
+.cfi_def_cfa_register %rax
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
pushq %r15
- movq %rsp,%r11
+.cfi_offset %r15,-56
subq $544,%rsp
shlq $4,%rdx
andq $-1024,%rsp
@@ -4131,7 +4193,8 @@ sha256_block_data_order_avx2:
movq %rdi,64+0(%rsp)
movq %rsi,64+8(%rsp)
movq %rdx,64+16(%rsp)
- movq %r11,64+24(%rsp)
+ movq %rax,88(%rsp)
+.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08
.Lprologue_avx2:
vzeroupper
@@ -5344,15 +5407,24 @@ sha256_block_data_order_avx2:
.Ldone_avx2:
leaq (%rbp),%rsp
- movq 64+24(%rsp),%rsi
+ movq 88(%rsp),%rsi
+.cfi_def_cfa %rsi,8
vzeroupper
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+ movq -48(%rsi),%r15
+.cfi_restore %r15
+ movq -40(%rsi),%r14
+.cfi_restore %r14
+ movq -32(%rsi),%r13
+.cfi_restore %r13
+ movq -24(%rsi),%r12
+.cfi_restore %r12
+ movq -16(%rsi),%rbp
+.cfi_restore %rbp
+ movq -8(%rsi),%rbx
+.cfi_restore %rbx
+ leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_avx2:
.byte 0xf3,0xc3
+.cfi_endproc
.size sha256_block_data_order_avx2,.-sha256_block_data_order_avx2
diff --git a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/sha/sha512-x86_64.s b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/sha/sha512-x86_64.s
index 6f8488a38a..5931a2a932 100644
--- a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/sha/sha512-x86_64.s
+++ b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/sha/sha512-x86_64.s
@@ -5,6 +5,7 @@
.type sha512_block_data_order,@function
.align 16
sha512_block_data_order:
+.cfi_startproc
leaq OPENSSL_ia32cap_P(%rip),%r11
movl 0(%r11),%r9d
movl 4(%r11),%r10d
@@ -19,13 +20,20 @@ sha512_block_data_order:
orl %r9d,%r10d
cmpl $1342177792,%r10d
je .Lavx_shortcut
+ movq %rsp,%rax
+.cfi_def_cfa_register %rax
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
pushq %r15
- movq %rsp,%r11
+.cfi_offset %r15,-56
shlq $4,%rdx
subq $128+32,%rsp
leaq (%rsi,%rdx,8),%rdx
@@ -33,7 +41,8 @@ sha512_block_data_order:
movq %rdi,128+0(%rsp)
movq %rsi,128+8(%rsp)
movq %rdx,128+16(%rsp)
- movq %r11,128+24(%rsp)
+ movq %rax,152(%rsp)
+.cfi_escape 0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x08
.Lprologue:
movq 0(%rdi),%rax
@@ -1697,16 +1706,25 @@ sha512_block_data_order:
movq %r11,56(%rdi)
jb .Lloop
- movq 128+24(%rsp),%rsi
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+ movq 152(%rsp),%rsi
+.cfi_def_cfa %rsi,8
+ movq -48(%rsi),%r15
+.cfi_restore %r15
+ movq -40(%rsi),%r14
+.cfi_restore %r14
+ movq -32(%rsi),%r13
+.cfi_restore %r13
+ movq -24(%rsi),%r12
+.cfi_restore %r12
+ movq -16(%rsi),%rbp
+.cfi_restore %rbp
+ movq -8(%rsi),%rbx
+.cfi_restore %rbx
+ leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size sha512_block_data_order,.-sha512_block_data_order
.align 64
.type K512,@object
@@ -1798,14 +1816,22 @@ K512:
.type sha512_block_data_order_xop,@function
.align 64
sha512_block_data_order_xop:
+.cfi_startproc
.Lxop_shortcut:
+ movq %rsp,%rax
+.cfi_def_cfa_register %rax
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
pushq %r15
- movq %rsp,%r11
+.cfi_offset %r15,-56
shlq $4,%rdx
subq $160,%rsp
leaq (%rsi,%rdx,8),%rdx
@@ -1813,7 +1839,8 @@ sha512_block_data_order_xop:
movq %rdi,128+0(%rsp)
movq %rsi,128+8(%rsp)
movq %rdx,128+16(%rsp)
- movq %r11,128+24(%rsp)
+ movq %rax,152(%rsp)
+.cfi_escape 0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x08
.Lprologue_xop:
vzeroupper
@@ -2866,29 +2893,46 @@ sha512_block_data_order_xop:
movq %r11,56(%rdi)
jb .Lloop_xop
- movq 128+24(%rsp),%rsi
+ movq 152(%rsp),%rsi
+.cfi_def_cfa %rsi,8
vzeroupper
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+ movq -48(%rsi),%r15
+.cfi_restore %r15
+ movq -40(%rsi),%r14
+.cfi_restore %r14
+ movq -32(%rsi),%r13
+.cfi_restore %r13
+ movq -24(%rsi),%r12
+.cfi_restore %r12
+ movq -16(%rsi),%rbp
+.cfi_restore %rbp
+ movq -8(%rsi),%rbx
+.cfi_restore %rbx
+ leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_xop:
.byte 0xf3,0xc3
+.cfi_endproc
.size sha512_block_data_order_xop,.-sha512_block_data_order_xop
.type sha512_block_data_order_avx,@function
.align 64
sha512_block_data_order_avx:
+.cfi_startproc
.Lavx_shortcut:
+ movq %rsp,%rax
+.cfi_def_cfa_register %rax
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
pushq %r15
- movq %rsp,%r11
+.cfi_offset %r15,-56
shlq $4,%rdx
subq $160,%rsp
leaq (%rsi,%rdx,8),%rdx
@@ -2896,7 +2940,8 @@ sha512_block_data_order_avx:
movq %rdi,128+0(%rsp)
movq %rsi,128+8(%rsp)
movq %rdx,128+16(%rsp)
- movq %r11,128+24(%rsp)
+ movq %rax,152(%rsp)
+.cfi_escape 0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x08
.Lprologue_avx:
vzeroupper
@@ -4013,29 +4058,46 @@ sha512_block_data_order_avx:
movq %r11,56(%rdi)
jb .Lloop_avx
- movq 128+24(%rsp),%rsi
+ movq 152(%rsp),%rsi
+.cfi_def_cfa %rsi,8
vzeroupper
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+ movq -48(%rsi),%r15
+.cfi_restore %r15
+ movq -40(%rsi),%r14
+.cfi_restore %r14
+ movq -32(%rsi),%r13
+.cfi_restore %r13
+ movq -24(%rsi),%r12
+.cfi_restore %r12
+ movq -16(%rsi),%rbp
+.cfi_restore %rbp
+ movq -8(%rsi),%rbx
+.cfi_restore %rbx
+ leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_avx:
.byte 0xf3,0xc3
+.cfi_endproc
.size sha512_block_data_order_avx,.-sha512_block_data_order_avx
.type sha512_block_data_order_avx2,@function
.align 64
sha512_block_data_order_avx2:
+.cfi_startproc
.Lavx2_shortcut:
+ movq %rsp,%rax
+.cfi_def_cfa_register %rax
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
pushq %r15
- movq %rsp,%r11
+.cfi_offset %r15,-56
subq $1312,%rsp
shlq $4,%rdx
andq $-2048,%rsp
@@ -4044,7 +4106,8 @@ sha512_block_data_order_avx2:
movq %rdi,128+0(%rsp)
movq %rsi,128+8(%rsp)
movq %rdx,128+16(%rsp)
- movq %r11,128+24(%rsp)
+ movq %rax,152(%rsp)
+.cfi_escape 0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x08
.Lprologue_avx2:
vzeroupper
@@ -5351,15 +5414,24 @@ sha512_block_data_order_avx2:
.Ldone_avx2:
leaq (%rbp),%rsp
- movq 128+24(%rsp),%rsi
+ movq 152(%rsp),%rsi
+.cfi_def_cfa %rsi,8
vzeroupper
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+ movq -48(%rsi),%r15
+.cfi_restore %r15
+ movq -40(%rsi),%r14
+.cfi_restore %r14
+ movq -32(%rsi),%r13
+.cfi_restore %r13
+ movq -24(%rsi),%r12
+.cfi_restore %r12
+ movq -16(%rsi),%rbp
+.cfi_restore %rbp
+ movq -8(%rsi),%rbx
+.cfi_restore %rbx
+ leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue_avx2:
.byte 0xf3,0xc3
+.cfi_endproc
.size sha512_block_data_order_avx2,.-sha512_block_data_order_avx2
diff --git a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/whrlpool/wp-x86_64.s b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/whrlpool/wp-x86_64.s
index a4d55b6afc..2c261f398a 100644
--- a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/whrlpool/wp-x86_64.s
+++ b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/whrlpool/wp-x86_64.s
@@ -4,14 +4,22 @@
.type whirlpool_block,@function
.align 16
whirlpool_block:
+.cfi_startproc
+ movq %rsp,%rax
+.cfi_def_cfa_register %rax
pushq %rbx
+.cfi_offset %rbx,-16
pushq %rbp
+.cfi_offset %rbp,-24
pushq %r12
+.cfi_offset %r12,-32
pushq %r13
+.cfi_offset %r13,-40
pushq %r14
+.cfi_offset %r14,-48
pushq %r15
+.cfi_offset %r15,-56
- movq %rsp,%r11
subq $128+40,%rsp
andq $-64,%rsp
@@ -19,7 +27,8 @@ whirlpool_block:
movq %rdi,0(%r10)
movq %rsi,8(%r10)
movq %rdx,16(%r10)
- movq %r11,32(%r10)
+ movq %rax,32(%r10)
+.cfi_escape 0x0f,0x06,0x77,0xa0,0x01,0x06,0x23,0x08
.Lprologue:
movq %r10,%rbx
@@ -579,15 +588,24 @@ whirlpool_block:
jmp .Louterloop
.Lalldone:
movq 32(%rbx),%rsi
- movq (%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+.cfi_def_cfa %rsi,8
+ movq -48(%rsi),%r15
+.cfi_restore %r15
+ movq -40(%rsi),%r14
+.cfi_restore %r14
+ movq -32(%rsi),%r13
+.cfi_restore %r13
+ movq -24(%rsi),%r12
+.cfi_restore %r12
+ movq -16(%rsi),%rbp
+.cfi_restore %rbp
+ movq -8(%rsi),%rbx
+.cfi_restore %rbx
+ leaq (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue:
.byte 0xf3,0xc3
+.cfi_endproc
.size whirlpool_block,.-whirlpool_block
.align 64
diff --git a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/x86_64cpuid.s b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/x86_64cpuid.s
index 7e1f5e2740..fd17eaaba4 100644
--- a/deps/openssl/config/archs/BSD-x86_64/asm/crypto/x86_64cpuid.s
+++ b/deps/openssl/config/archs/BSD-x86_64/asm/crypto/x86_64cpuid.s
@@ -36,10 +36,12 @@ OPENSSL_rdtsc:
.type OPENSSL_ia32_cpuid,@function
.align 16
OPENSSL_ia32_cpuid:
+.cfi_startproc
movq %rbx,%r8
+.cfi_register %rbx,%r8
xorl %eax,%eax
- movl %eax,8(%rdi)
+ movq %rax,8(%rdi)
cpuid
movl %eax,%r11d
@@ -110,6 +112,7 @@ OPENSSL_ia32_cpuid:
.Lnocacheinfo:
movl $1,%eax
cpuid
+ movd %eax,%xmm0
andl $0xbfefffff,%edx
cmpl $0,%r9d
jne .Lnotintel
@@ -157,26 +160,45 @@ OPENSSL_ia32_cpuid:
jc .Lnotknights
andl $0xfff7ffff,%ebx
.Lnotknights:
+ movd %xmm0,%eax
+ andl $0x0fff0ff0,%eax
+ cmpl $0x00050650,%eax
+ jne .Lnotskylakex
+ andl $0xfffeffff,%ebx
+
+.Lnotskylakex:
movl %ebx,8(%rdi)
+ movl %ecx,12(%rdi)
.Lno_extended_info:
btl $27,%r9d
jnc .Lclear_avx
xorl %ecx,%ecx
.byte 0x0f,0x01,0xd0
+ andl $0xe6,%eax
+ cmpl $0xe6,%eax
+ je .Ldone
+ andl $0x3fdeffff,8(%rdi)
+
+
+
+
andl $6,%eax
cmpl $6,%eax
je .Ldone
.Lclear_avx:
movl $0xefffe7ff,%eax
andl %eax,%r9d
- andl $0xffffffdf,8(%rdi)
+ movl $0x3fdeffdf,%eax
+ andl %eax,8(%rdi)
.Ldone:
shlq $32,%r9
movl %r10d,%eax
movq %r8,%rbx
+.cfi_restore %rbx
orq %r9,%rax
.byte 0xf3,0xc3
+.cfi_endproc
.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
.globl OPENSSL_cleanse
@@ -222,6 +244,18 @@ CRYPTO_memcmp:
xorq %r10,%r10
cmpq $0,%rdx
je .Lno_data
+ cmpq $16,%rdx
+ jne .Loop_cmp
+ movq (%rdi),%r10
+ movq 8(%rdi),%r11
+ movq $1,%rdx
+ xorq (%rsi),%r10
+ xorq 8(%rsi),%r11
+ orq %r11,%r10
+ cmovnzq %rdx,%rax
+ .byte 0xf3,0xc3
+
+.align 16
.Loop_cmp:
movb (%rdi),%r10b
leaq 1(%rdi),%rdi
@@ -345,21 +379,6 @@ OPENSSL_instrument_bus2:
subq %rcx,%rax
.byte 0xf3,0xc3
.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
-.globl OPENSSL_ia32_rdrand
-.type OPENSSL_ia32_rdrand,@function
-.align 16
-OPENSSL_ia32_rdrand:
- movl $8,%ecx
-.Loop_rdrand:
-.byte 72,15,199,240
- jc .Lbreak_rdrand
- loop .Loop_rdrand
-.Lbreak_rdrand:
- cmpq $0,%rax
- cmoveq %rcx,%rax
- .byte 0xf3,0xc3
-.size OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand
-
.globl OPENSSL_ia32_rdrand_bytes
.type OPENSSL_ia32_rdrand_bytes,@function
.align 16
@@ -393,28 +412,14 @@ OPENSSL_ia32_rdrand_bytes:
movb %r10b,(%rdi)
leaq 1(%rdi),%rdi
incq %rax
- shrq $8,%r8
+ shrq $8,%r10
decq %rsi
jnz .Ltail_rdrand_bytes
.Ldone_rdrand_bytes:
+ xorq %r10,%r10
.byte 0xf3,0xc3
.size OPENSSL_ia32_rdrand_bytes,.-OPENSSL_ia32_rdrand_bytes
-.globl OPENSSL_ia32_rdseed
-.type OPENSSL_ia32_rdseed,@function
-.align 16
-OPENSSL_ia32_rdseed:
- movl $8,%ecx
-.Loop_rdseed:
-.byte 72,15,199,248
- jc .Lbreak_rdseed
- loop .Loop_rdseed
-.Lbreak_rdseed:
- cmpq $0,%rax
- cmoveq %rcx,%rax
- .byte 0xf3,0xc3
-.size OPENSSL_ia32_rdseed,.-OPENSSL_ia32_rdseed
-
.globl OPENSSL_ia32_rdseed_bytes
.type OPENSSL_ia32_rdseed_bytes,@function
.align 16
@@ -448,10 +453,11 @@ OPENSSL_ia32_rdseed_bytes:
movb %r10b,(%rdi)
leaq 1(%rdi),%rdi
incq %rax
- shrq $8,%r8
+ shrq $8,%r10
decq %rsi
jnz .Ltail_rdseed_bytes
.Ldone_rdseed_bytes:
+ xorq %r10,%r10
.byte 0xf3,0xc3
.size OPENSSL_ia32_rdseed_bytes,.-OPENSSL_ia32_rdseed_bytes