summaryrefslogtreecommitdiff
path: root/deps/openssl/config/archs/VC-WIN64A/asm/crypto
diff options
context:
space:
mode:
Diffstat (limited to 'deps/openssl/config/archs/VC-WIN64A/asm/crypto')
-rw-r--r--deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/aes-x86_64.asm126
-rw-r--r--deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/aesni-mb-x86_64.asm72
-rw-r--r--deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/aesni-sha1-x86_64.asm34
-rw-r--r--deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/aesni-sha256-x86_64.asm115
-rw-r--r--deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/aesni-x86_64.asm433
-rw-r--r--deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/bsaes-x86_64.asm186
-rw-r--r--deps/openssl/config/archs/VC-WIN64A/asm/crypto/bn/rsaz-avx2.asm59
-rw-r--r--deps/openssl/config/archs/VC-WIN64A/asm/crypto/bn/rsaz-x86_64.asm85
-rw-r--r--deps/openssl/config/archs/VC-WIN64A/asm/crypto/bn/x86_64-gf2m.asm35
-rw-r--r--deps/openssl/config/archs/VC-WIN64A/asm/crypto/bn/x86_64-mont.asm72
-rw-r--r--deps/openssl/config/archs/VC-WIN64A/asm/crypto/bn/x86_64-mont5.asm111
-rw-r--r--deps/openssl/config/archs/VC-WIN64A/asm/crypto/buildinf.h25
-rw-r--r--deps/openssl/config/archs/VC-WIN64A/asm/crypto/camellia/cmll-x86_64.asm57
-rw-r--r--deps/openssl/config/archs/VC-WIN64A/asm/crypto/chacha/chacha-x86_64.asm1998
-rw-r--r--deps/openssl/config/archs/VC-WIN64A/asm/crypto/ec/ecp_nistz256-x86_64.asm1837
-rw-r--r--deps/openssl/config/archs/VC-WIN64A/asm/crypto/ec/x25519-x86_64.asm1054
-rw-r--r--deps/openssl/config/archs/VC-WIN64A/asm/crypto/include/internal/dso_conf.h4
-rw-r--r--deps/openssl/config/archs/VC-WIN64A/asm/crypto/md5/md5-x86_64.asm13
-rw-r--r--deps/openssl/config/archs/VC-WIN64A/asm/crypto/modes/aesni-gcm-x86_64.asm73
-rw-r--r--deps/openssl/config/archs/VC-WIN64A/asm/crypto/modes/ghash-x86_64.asm62
-rw-r--r--deps/openssl/config/archs/VC-WIN64A/asm/crypto/poly1305/poly1305-x86_64.asm1855
-rw-r--r--deps/openssl/config/archs/VC-WIN64A/asm/crypto/rc4/rc4-md5-x86_64.asm16
-rw-r--r--deps/openssl/config/archs/VC-WIN64A/asm/crypto/rc4/rc4-x86_64.asm9
-rw-r--r--deps/openssl/config/archs/VC-WIN64A/asm/crypto/sha/keccak1600-x86_64.asm525
-rw-r--r--deps/openssl/config/archs/VC-WIN64A/asm/crypto/sha/sha1-mb-x86_64.asm46
-rw-r--r--deps/openssl/config/archs/VC-WIN64A/asm/crypto/sha/sha1-x86_64.asm240
-rw-r--r--deps/openssl/config/archs/VC-WIN64A/asm/crypto/sha/sha256-mb-x86_64.asm46
-rw-r--r--deps/openssl/config/archs/VC-WIN64A/asm/crypto/sha/sha256-x86_64.asm153
-rw-r--r--deps/openssl/config/archs/VC-WIN64A/asm/crypto/sha/sha512-x86_64.asm153
-rw-r--r--deps/openssl/config/archs/VC-WIN64A/asm/crypto/whrlpool/wp-x86_64.asm37
-rw-r--r--deps/openssl/config/archs/VC-WIN64A/asm/crypto/x86_64cpuid.asm74
31 files changed, 8810 insertions, 795 deletions
diff --git a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/aes-x86_64.asm b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/aes-x86_64.asm
index 923e31ec9e..5babb865fa 100644
--- a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/aes-x86_64.asm
+++ b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/aes-x86_64.asm
@@ -346,15 +346,23 @@ $L$SEH_begin_AES_encrypt:
mov rdx,r8
+
+ mov rax,rsp
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
- mov r10,rsp
+
lea rcx,[((-63))+rdx]
and rsp,-64
sub rcx,rsp
@@ -364,7 +372,8 @@ $L$SEH_begin_AES_encrypt:
sub rsp,32
mov QWORD[16+rsp],rsi
- mov QWORD[24+rsp],r10
+ mov QWORD[24+rsp],rax
+
$L$enc_prologue:
mov r15,rdx
@@ -391,22 +400,31 @@ $L$enc_prologue:
mov r9,QWORD[16+rsp]
mov rsi,QWORD[24+rsp]
+
mov DWORD[r9],eax
mov DWORD[4+r9],ebx
mov DWORD[8+r9],ecx
mov DWORD[12+r9],edx
- mov r15,QWORD[rsi]
- mov r14,QWORD[8+rsi]
- mov r13,QWORD[16+rsi]
- mov r12,QWORD[24+rsi]
- mov rbp,QWORD[32+rsi]
- mov rbx,QWORD[40+rsi]
- lea rsp,[48+rsi]
+ mov r15,QWORD[((-48))+rsi]
+
+ mov r14,QWORD[((-40))+rsi]
+
+ mov r13,QWORD[((-32))+rsi]
+
+ mov r12,QWORD[((-24))+rsi]
+
+ mov rbp,QWORD[((-16))+rsi]
+
+ mov rbx,QWORD[((-8))+rsi]
+
+ lea rsp,[rsi]
+
$L$enc_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_AES_encrypt:
ALIGN 16
@@ -804,15 +822,23 @@ $L$SEH_begin_AES_decrypt:
mov rdx,r8
+
+ mov rax,rsp
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
- mov r10,rsp
+
lea rcx,[((-63))+rdx]
and rsp,-64
sub rcx,rsp
@@ -822,7 +848,8 @@ $L$SEH_begin_AES_decrypt:
sub rsp,32
mov QWORD[16+rsp],rsi
- mov QWORD[24+rsp],r10
+ mov QWORD[24+rsp],rax
+
$L$dec_prologue:
mov r15,rdx
@@ -851,22 +878,31 @@ $L$dec_prologue:
mov r9,QWORD[16+rsp]
mov rsi,QWORD[24+rsp]
+
mov DWORD[r9],eax
mov DWORD[4+r9],ebx
mov DWORD[8+r9],ecx
mov DWORD[12+r9],edx
- mov r15,QWORD[rsi]
- mov r14,QWORD[8+rsi]
- mov r13,QWORD[16+rsi]
- mov r12,QWORD[24+rsi]
- mov rbp,QWORD[32+rsi]
- mov rbx,QWORD[40+rsi]
- lea rsp,[48+rsi]
+ mov r15,QWORD[((-48))+rsi]
+
+ mov r14,QWORD[((-40))+rsi]
+
+ mov r13,QWORD[((-32))+rsi]
+
+ mov r12,QWORD[((-24))+rsi]
+
+ mov rbp,QWORD[((-16))+rsi]
+
+ mov rbx,QWORD[((-8))+rsi]
+
+ lea rsp,[rsi]
+
$L$dec_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_AES_decrypt:
global AES_set_encrypt_key
@@ -881,24 +917,36 @@ $L$SEH_begin_AES_set_encrypt_key:
mov rdx,r8
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
sub rsp,8
+
$L$enc_key_prologue:
call _x86_64_AES_set_encrypt_key
mov rbp,QWORD[40+rsp]
+
mov rbx,QWORD[48+rsp]
+
add rsp,56
+
$L$enc_key_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_AES_set_encrypt_key:
@@ -1153,13 +1201,21 @@ $L$SEH_begin_AES_set_decrypt_key:
mov rdx,r8
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
push rdx
+
$L$dec_key_prologue:
call _x86_64_AES_set_encrypt_key
@@ -1327,16 +1383,24 @@ $L$permute:
xor rax,rax
$L$abort:
mov r15,QWORD[8+rsp]
+
mov r14,QWORD[16+rsp]
+
mov r13,QWORD[24+rsp]
+
mov r12,QWORD[32+rsp]
+
mov rbp,QWORD[40+rsp]
+
mov rbx,QWORD[48+rsp]
+
add rsp,56
+
$L$dec_key_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_AES_set_decrypt_key:
global AES_cbc_encrypt
@@ -1358,25 +1422,32 @@ $L$SEH_begin_AES_cbc_encrypt:
mov r9,QWORD[48+rsp]
+
cmp rdx,0
je NEAR $L$cbc_epilogue
pushfq
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
$L$cbc_prologue:
cld
mov r9d,r9d
lea r14,[$L$AES_Te]
+ lea r10,[$L$AES_Td]
cmp r9,0
- jne NEAR $L$cbc_picked_te
- lea r14,[$L$AES_Td]
-$L$cbc_picked_te:
+ cmove r14,r10
mov r10d,DWORD[OPENSSL_ia32cap_P]
cmp rdx,512
@@ -1413,7 +1484,9 @@ $L$cbc_te_ok:
xchg r15,rsp
+
mov QWORD[16+rsp],r15
+
$L$cbc_fast_body:
mov QWORD[24+rsp],rdi
mov QWORD[32+rsp],rsi
@@ -1795,19 +1868,29 @@ $L$cbc_slow_dec_partial:
ALIGN 16
$L$cbc_exit:
mov rsi,QWORD[16+rsp]
+
mov r15,QWORD[rsi]
+
mov r14,QWORD[8+rsi]
+
mov r13,QWORD[16+rsi]
+
mov r12,QWORD[24+rsi]
+
mov rbp,QWORD[32+rsi]
+
mov rbx,QWORD[40+rsi]
+
lea rsp,[48+rsi]
+
$L$cbc_popfq:
popfq
+
$L$cbc_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_AES_cbc_encrypt:
ALIGN 64
$L$AES_Te:
@@ -2632,7 +2715,6 @@ block_se_handler:
jae NEAR $L$in_block_prologue
mov rax,QWORD[24+rax]
- lea rax,[48+rax]
mov rbx,QWORD[((-8))+rax]
mov rbp,QWORD[((-16))+rax]
diff --git a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/aesni-mb-x86_64.asm b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/aesni-mb-x86_64.asm
index e7fdb2142a..9891df39f0 100644
--- a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/aesni-mb-x86_64.asm
+++ b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/aesni-mb-x86_64.asm
@@ -20,6 +20,7 @@ $L$SEH_begin_aesni_multi_cbc_encrypt:
mov rdx,r8
+
cmp edx,2
jb NEAR $L$enc_non_avx
mov ecx,DWORD[((OPENSSL_ia32cap_P+4))]
@@ -29,12 +30,19 @@ $L$SEH_begin_aesni_multi_cbc_encrypt:
ALIGN 16
$L$enc_non_avx:
mov rax,rsp
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
lea rsp,[((-168))+rsp]
movaps XMMWORD[rsp],xmm6
movaps XMMWORD[16+rsp],xmm7
@@ -56,6 +64,7 @@ $L$enc_non_avx:
and rsp,-64
mov QWORD[16+rsp],rax
+
$L$enc4x_body:
movdqu xmm12,XMMWORD[rsi]
lea rsi,[120+rsi]
@@ -264,6 +273,7 @@ DB 102,15,56,221,232
jnz NEAR $L$oop_enc4x
mov rax,QWORD[16+rsp]
+
mov edx,DWORD[24+rsp]
@@ -291,16 +301,24 @@ $L$enc4x_done:
mov r15,QWORD[((-48))+rax]
+
mov r14,QWORD[((-40))+rax]
+
mov r13,QWORD[((-32))+rax]
+
mov r12,QWORD[((-24))+rax]
+
mov rbp,QWORD[((-16))+rax]
+
mov rbx,QWORD[((-8))+rax]
+
lea rsp,[rax]
+
$L$enc4x_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_aesni_multi_cbc_encrypt:
global aesni_multi_cbc_decrypt
@@ -316,6 +334,7 @@ $L$SEH_begin_aesni_multi_cbc_decrypt:
mov rdx,r8
+
cmp edx,2
jb NEAR $L$dec_non_avx
mov ecx,DWORD[((OPENSSL_ia32cap_P+4))]
@@ -325,12 +344,19 @@ $L$SEH_begin_aesni_multi_cbc_decrypt:
ALIGN 16
$L$dec_non_avx:
mov rax,rsp
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
lea rsp,[((-168))+rsp]
movaps XMMWORD[rsp],xmm6
movaps XMMWORD[16+rsp],xmm7
@@ -352,6 +378,7 @@ $L$dec_non_avx:
and rsp,-64
mov QWORD[16+rsp],rax
+
$L$dec4x_body:
movdqu xmm12,XMMWORD[rsi]
lea rsi,[120+rsi]
@@ -560,6 +587,7 @@ DB 102,65,15,56,223,233
jnz NEAR $L$oop_dec4x
mov rax,QWORD[16+rsp]
+
mov edx,DWORD[24+rsp]
lea rdi,[160+rdi]
@@ -578,16 +606,24 @@ $L$dec4x_done:
mov r15,QWORD[((-48))+rax]
+
mov r14,QWORD[((-40))+rax]
+
mov r13,QWORD[((-32))+rax]
+
mov r12,QWORD[((-24))+rax]
+
mov rbp,QWORD[((-16))+rax]
+
mov rbx,QWORD[((-8))+rax]
+
lea rsp,[rax]
+
$L$dec4x_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_aesni_multi_cbc_decrypt:
ALIGN 32
@@ -601,14 +637,22 @@ $L$SEH_begin_aesni_multi_cbc_encrypt_avx:
mov rdx,r8
+
_avx_cbc_enc_shortcut:
mov rax,rsp
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
lea rsp,[((-168))+rsp]
movaps XMMWORD[rsp],xmm6
movaps XMMWORD[16+rsp],xmm7
@@ -632,6 +676,7 @@ _avx_cbc_enc_shortcut:
and rsp,-128
mov QWORD[16+rsp],rax
+
$L$enc8x_body:
vzeroupper
vmovdqu xmm15,XMMWORD[rsi]
@@ -1033,6 +1078,7 @@ $L$enc8x_tail:
+
$L$enc8x_done:
vzeroupper
movaps xmm6,XMMWORD[((-216))+rax]
@@ -1046,16 +1092,24 @@ $L$enc8x_done:
movaps xmm14,XMMWORD[((-88))+rax]
movaps xmm15,XMMWORD[((-72))+rax]
mov r15,QWORD[((-48))+rax]
+
mov r14,QWORD[((-40))+rax]
+
mov r13,QWORD[((-32))+rax]
+
mov r12,QWORD[((-24))+rax]
+
mov rbp,QWORD[((-16))+rax]
+
mov rbx,QWORD[((-8))+rax]
+
lea rsp,[rax]
+
$L$enc8x_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_aesni_multi_cbc_encrypt_avx:
@@ -1070,14 +1124,22 @@ $L$SEH_begin_aesni_multi_cbc_decrypt_avx:
mov rdx,r8
+
_avx_cbc_dec_shortcut:
mov rax,rsp
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
lea rsp,[((-168))+rsp]
movaps XMMWORD[rsp],xmm6
movaps XMMWORD[16+rsp],xmm7
@@ -1103,6 +1165,7 @@ _avx_cbc_dec_shortcut:
sub rsp,192
mov QWORD[16+rsp],rax
+
$L$dec8x_body:
vzeroupper
vmovdqu xmm15,XMMWORD[rsi]
@@ -1542,6 +1605,7 @@ $L$dec8x_tail:
+
$L$dec8x_done:
vzeroupper
movaps xmm6,XMMWORD[((-216))+rax]
@@ -1555,16 +1619,24 @@ $L$dec8x_done:
movaps xmm14,XMMWORD[((-88))+rax]
movaps xmm15,XMMWORD[((-72))+rax]
mov r15,QWORD[((-48))+rax]
+
mov r14,QWORD[((-40))+rax]
+
mov r13,QWORD[((-32))+rax]
+
mov r12,QWORD[((-24))+rax]
+
mov rbp,QWORD[((-16))+rax]
+
mov rbx,QWORD[((-8))+rax]
+
lea rsp,[rax]
+
$L$dec8x_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_aesni_multi_cbc_decrypt_avx:
EXTERN __imp_RtlVirtualUnwind
diff --git a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/aesni-sha1-x86_64.asm b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/aesni-sha1-x86_64.asm
index 45fa82e223..925d1be94a 100644
--- a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/aesni-sha1-x86_64.asm
+++ b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/aesni-sha1-x86_64.asm
@@ -38,18 +38,26 @@ $L$SEH_begin_aesni_cbc_sha1_enc_ssse3:
mov r9,QWORD[48+rsp]
+
mov r10,QWORD[56+rsp]
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
lea rsp,[((-264))+rsp]
+
movaps XMMWORD[(96+0)+rsp],xmm6
movaps XMMWORD[(96+16)+rsp],xmm7
movaps XMMWORD[(96+32)+rsp],xmm8
@@ -1400,17 +1408,26 @@ DB 102,15,56,221,209
movaps xmm14,XMMWORD[((96+128))+rsp]
movaps xmm15,XMMWORD[((96+144))+rsp]
lea rsi,[264+rsp]
+
mov r15,QWORD[rsi]
+
mov r14,QWORD[8+rsi]
+
mov r13,QWORD[16+rsi]
+
mov r12,QWORD[24+rsi]
+
mov rbp,QWORD[32+rsi]
+
mov rbx,QWORD[40+rsi]
+
lea rsp,[48+rsi]
+
$L$epilogue_ssse3:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_aesni_cbc_sha1_enc_ssse3:
ALIGN 32
@@ -1427,18 +1444,26 @@ $L$SEH_begin_aesni_cbc_sha1_enc_avx:
mov r9,QWORD[48+rsp]
+
mov r10,QWORD[56+rsp]
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
lea rsp,[((-264))+rsp]
+
movaps XMMWORD[(96+0)+rsp],xmm6
movaps XMMWORD[(96+16)+rsp],xmm7
movaps XMMWORD[(96+32)+rsp],xmm8
@@ -2733,17 +2758,26 @@ $L$vaesenclast10:
movaps xmm14,XMMWORD[((96+128))+rsp]
movaps xmm15,XMMWORD[((96+144))+rsp]
lea rsi,[264+rsp]
+
mov r15,QWORD[rsi]
+
mov r14,QWORD[8+rsi]
+
mov r13,QWORD[16+rsi]
+
mov r12,QWORD[24+rsi]
+
mov rbp,QWORD[32+rsi]
+
mov rbx,QWORD[40+rsi]
+
lea rsp,[48+rsi]
+
$L$epilogue_avx:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_aesni_cbc_sha1_enc_avx:
ALIGN 64
K_XX_XX:
diff --git a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/aesni-sha256-x86_64.asm b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/aesni-sha256-x86_64.asm
index f148890c00..b5d50c74db 100644
--- a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/aesni-sha256-x86_64.asm
+++ b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/aesni-sha256-x86_64.asm
@@ -98,15 +98,23 @@ $L$SEH_begin_aesni_cbc_sha256_enc_xop:
mov r9,QWORD[48+rsp]
+
$L$xop_shortcut:
mov r10,QWORD[56+rsp]
+ mov rax,rsp
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
- mov r11,rsp
+
sub rsp,288
and rsp,-64
@@ -122,7 +130,8 @@ $L$xop_shortcut:
mov QWORD[((64+32))+rsp],r8
mov QWORD[((64+40))+rsp],r9
mov QWORD[((64+48))+rsp],r10
- mov QWORD[((64+56))+rsp],r11
+ mov QWORD[120+rsp],rax
+
movaps XMMWORD[128+rsp],xmm6
movaps XMMWORD[144+rsp],xmm7
movaps XMMWORD[160+rsp],xmm8
@@ -1238,7 +1247,8 @@ DB 143,232,120,194,239,2
jb NEAR $L$loop_xop
mov r8,QWORD[((64+32))+rsp]
- mov rsi,QWORD[((64+56))+rsp]
+ mov rsi,QWORD[120+rsp]
+
vmovdqu XMMWORD[r8],xmm8
vzeroall
movaps xmm6,XMMWORD[128+rsp]
@@ -1251,17 +1261,25 @@ DB 143,232,120,194,239,2
movaps xmm13,XMMWORD[240+rsp]
movaps xmm14,XMMWORD[256+rsp]
movaps xmm15,XMMWORD[272+rsp]
- mov r15,QWORD[rsi]
- mov r14,QWORD[8+rsi]
- mov r13,QWORD[16+rsi]
- mov r12,QWORD[24+rsi]
- mov rbp,QWORD[32+rsi]
- mov rbx,QWORD[40+rsi]
- lea rsp,[48+rsi]
+ mov r15,QWORD[((-48))+rsi]
+
+ mov r14,QWORD[((-40))+rsi]
+
+ mov r13,QWORD[((-32))+rsi]
+
+ mov r12,QWORD[((-24))+rsi]
+
+ mov rbp,QWORD[((-16))+rsi]
+
+ mov rbx,QWORD[((-8))+rsi]
+
+ lea rsp,[rsi]
+
$L$epilogue_xop:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_aesni_cbc_sha256_enc_xop:
ALIGN 64
@@ -1278,15 +1296,23 @@ $L$SEH_begin_aesni_cbc_sha256_enc_avx:
mov r9,QWORD[48+rsp]
+
$L$avx_shortcut:
mov r10,QWORD[56+rsp]
+ mov rax,rsp
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
- mov r11,rsp
+
sub rsp,288
and rsp,-64
@@ -1302,7 +1328,8 @@ $L$avx_shortcut:
mov QWORD[((64+32))+rsp],r8
mov QWORD[((64+40))+rsp],r9
mov QWORD[((64+48))+rsp],r10
- mov QWORD[((64+56))+rsp],r11
+ mov QWORD[120+rsp],rax
+
movaps XMMWORD[128+rsp],xmm6
movaps XMMWORD[144+rsp],xmm7
movaps XMMWORD[160+rsp],xmm8
@@ -2449,7 +2476,8 @@ $L$avx_00_47:
jb NEAR $L$loop_avx
mov r8,QWORD[((64+32))+rsp]
- mov rsi,QWORD[((64+56))+rsp]
+ mov rsi,QWORD[120+rsp]
+
vmovdqu XMMWORD[r8],xmm8
vzeroall
movaps xmm6,XMMWORD[128+rsp]
@@ -2462,17 +2490,25 @@ $L$avx_00_47:
movaps xmm13,XMMWORD[240+rsp]
movaps xmm14,XMMWORD[256+rsp]
movaps xmm15,XMMWORD[272+rsp]
- mov r15,QWORD[rsi]
- mov r14,QWORD[8+rsi]
- mov r13,QWORD[16+rsi]
- mov r12,QWORD[24+rsi]
- mov rbp,QWORD[32+rsi]
- mov rbx,QWORD[40+rsi]
- lea rsp,[48+rsi]
+ mov r15,QWORD[((-48))+rsi]
+
+ mov r14,QWORD[((-40))+rsi]
+
+ mov r13,QWORD[((-32))+rsi]
+
+ mov r12,QWORD[((-24))+rsi]
+
+ mov rbp,QWORD[((-16))+rsi]
+
+ mov rbx,QWORD[((-8))+rsi]
+
+ lea rsp,[rsi]
+
$L$epilogue_avx:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_aesni_cbc_sha256_enc_avx:
ALIGN 64
@@ -2489,15 +2525,23 @@ $L$SEH_begin_aesni_cbc_sha256_enc_avx2:
mov r9,QWORD[48+rsp]
+
$L$avx2_shortcut:
mov r10,QWORD[56+rsp]
+ mov rax,rsp
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
- mov r11,rsp
+
sub rsp,736
and rsp,-256*4
add rsp,448
@@ -2514,7 +2558,8 @@ $L$avx2_shortcut:
mov QWORD[((64+32))+rsp],r8
mov QWORD[((64+40))+rsp],r9
mov QWORD[((64+48))+rsp],r10
- mov QWORD[((64+56))+rsp],r11
+ mov QWORD[120+rsp],rax
+
movaps XMMWORD[128+rsp],xmm6
movaps XMMWORD[144+rsp],xmm7
movaps XMMWORD[160+rsp],xmm8
@@ -4086,7 +4131,8 @@ $L$ower_avx2:
$L$done_avx2:
lea rsp,[rbp]
mov r8,QWORD[((64+32))+rsp]
- mov rsi,QWORD[((64+56))+rsp]
+ mov rsi,QWORD[120+rsp]
+
vmovdqu XMMWORD[r8],xmm8
vzeroall
movaps xmm6,XMMWORD[128+rsp]
@@ -4099,17 +4145,25 @@ $L$done_avx2:
movaps xmm13,XMMWORD[240+rsp]
movaps xmm14,XMMWORD[256+rsp]
movaps xmm15,XMMWORD[272+rsp]
- mov r15,QWORD[rsi]
- mov r14,QWORD[8+rsi]
- mov r13,QWORD[16+rsi]
- mov r12,QWORD[24+rsi]
- mov rbp,QWORD[32+rsi]
- mov rbx,QWORD[40+rsi]
- lea rsp,[48+rsi]
+ mov r15,QWORD[((-48))+rsi]
+
+ mov r14,QWORD[((-40))+rsi]
+
+ mov r13,QWORD[((-32))+rsi]
+
+ mov r12,QWORD[((-24))+rsi]
+
+ mov rbp,QWORD[((-16))+rsi]
+
+ mov rbx,QWORD[((-8))+rsi]
+
+ lea rsp,[rsi]
+
$L$epilogue_avx2:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_aesni_cbc_sha256_enc_avx2:
ALIGN 32
@@ -4554,7 +4608,6 @@ $L$not_in_shaext:
$L$not_in_avx2:
mov rsi,rax
mov rax,QWORD[((64+56))+rax]
- lea rax,[48+rax]
mov rbx,QWORD[((-8))+rax]
mov rbp,QWORD[((-16))+rax]
diff --git a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/aesni-x86_64.asm b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/aesni-x86_64.asm
index 0f4790eead..3daf8476c3 100644
--- a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/aesni-x86_64.asm
+++ b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/aesni-x86_64.asm
@@ -1100,6 +1100,7 @@ $L$SEH_begin_aesni_ctr32_encrypt_blocks:
mov r8,QWORD[40+rsp]
+
cmp rdx,1
jne NEAR $L$ctr32_bulk
@@ -1129,22 +1130,23 @@ DB 102,15,56,221,209
ALIGN 16
$L$ctr32_bulk:
- lea rax,[rsp]
+ lea r11,[rsp]
+
push rbp
+
sub rsp,288
and rsp,-16
- movaps XMMWORD[(-168)+rax],xmm6
- movaps XMMWORD[(-152)+rax],xmm7
- movaps XMMWORD[(-136)+rax],xmm8
- movaps XMMWORD[(-120)+rax],xmm9
- movaps XMMWORD[(-104)+rax],xmm10
- movaps XMMWORD[(-88)+rax],xmm11
- movaps XMMWORD[(-72)+rax],xmm12
- movaps XMMWORD[(-56)+rax],xmm13
- movaps XMMWORD[(-40)+rax],xmm14
- movaps XMMWORD[(-24)+rax],xmm15
+ movaps XMMWORD[(-168)+r11],xmm6
+ movaps XMMWORD[(-152)+r11],xmm7
+ movaps XMMWORD[(-136)+r11],xmm8
+ movaps XMMWORD[(-120)+r11],xmm9
+ movaps XMMWORD[(-104)+r11],xmm10
+ movaps XMMWORD[(-88)+r11],xmm11
+ movaps XMMWORD[(-72)+r11],xmm12
+ movaps XMMWORD[(-56)+r11],xmm13
+ movaps XMMWORD[(-40)+r11],xmm14
+ movaps XMMWORD[(-24)+r11],xmm15
$L$ctr32_body:
- lea rbp,[((-8))+rax]
@@ -1153,7 +1155,7 @@ $L$ctr32_body:
movdqu xmm0,XMMWORD[rcx]
mov r8d,DWORD[12+r8]
pxor xmm2,xmm0
- mov r11d,DWORD[12+rcx]
+ mov ebp,DWORD[12+rcx]
movdqa XMMWORD[rsp],xmm2
bswap r8d
movdqa xmm3,xmm2
@@ -1169,8 +1171,8 @@ $L$ctr32_body:
lea rdx,[2+r8]
bswap eax
bswap edx
- xor eax,r11d
- xor edx,r11d
+ xor eax,ebp
+ xor edx,ebp
DB 102,15,58,34,216,3
lea rax,[3+r8]
movdqa XMMWORD[16+rsp],xmm3
@@ -1179,25 +1181,25 @@ DB 102,15,58,34,226,3
mov rdx,r10
lea r10,[4+r8]
movdqa XMMWORD[32+rsp],xmm4
- xor eax,r11d
+ xor eax,ebp
bswap r10d
DB 102,15,58,34,232,3
- xor r10d,r11d
+ xor r10d,ebp
movdqa XMMWORD[48+rsp],xmm5
lea r9,[5+r8]
mov DWORD[((64+12))+rsp],r10d
bswap r9d
lea r10,[6+r8]
mov eax,DWORD[240+rcx]
- xor r9d,r11d
+ xor r9d,ebp
bswap r10d
mov DWORD[((80+12))+rsp],r9d
- xor r10d,r11d
+ xor r10d,ebp
lea r9,[7+r8]
mov DWORD[((96+12))+rsp],r10d
bswap r9d
mov r10d,DWORD[((OPENSSL_ia32cap_P+4))]
- xor r9d,r11d
+ xor r9d,ebp
and r10d,71303168
mov DWORD[((112+12))+rsp],r9d
@@ -1221,7 +1223,7 @@ ALIGN 16
$L$ctr32_6x:
shl eax,4
mov r10d,48
- bswap r11d
+ bswap ebp
lea rcx,[32+rax*1+rcx]
sub r10,rax
jmp NEAR $L$ctr32_loop6
@@ -1232,32 +1234,32 @@ $L$ctr32_loop6:
movups xmm0,XMMWORD[((-48))+r10*1+rcx]
DB 102,15,56,220,209
mov eax,r8d
- xor eax,r11d
+ xor eax,ebp
DB 102,15,56,220,217
DB 0x0f,0x38,0xf1,0x44,0x24,12
lea eax,[1+r8]
DB 102,15,56,220,225
- xor eax,r11d
+ xor eax,ebp
DB 0x0f,0x38,0xf1,0x44,0x24,28
DB 102,15,56,220,233
lea eax,[2+r8]
- xor eax,r11d
+ xor eax,ebp
DB 102,15,56,220,241
DB 0x0f,0x38,0xf1,0x44,0x24,44
lea eax,[3+r8]
DB 102,15,56,220,249
movups xmm1,XMMWORD[((-32))+r10*1+rcx]
- xor eax,r11d
+ xor eax,ebp
DB 102,15,56,220,208
DB 0x0f,0x38,0xf1,0x44,0x24,60
lea eax,[4+r8]
DB 102,15,56,220,216
- xor eax,r11d
+ xor eax,ebp
DB 0x0f,0x38,0xf1,0x44,0x24,76
DB 102,15,56,220,224
lea eax,[5+r8]
- xor eax,r11d
+ xor eax,ebp
DB 102,15,56,220,232
DB 0x0f,0x38,0xf1,0x44,0x24,92
mov rax,r10
@@ -1318,7 +1320,7 @@ DB 102,15,56,220,217
bswap r9d
movups xmm0,XMMWORD[((32-128))+rcx]
DB 102,15,56,220,225
- xor r9d,r11d
+ xor r9d,ebp
nop
DB 102,15,56,220,233
mov DWORD[((0+12))+rsp],r9d
@@ -1331,7 +1333,7 @@ DB 102,68,15,56,220,201
bswap r9d
DB 102,15,56,220,208
DB 102,15,56,220,216
- xor r9d,r11d
+ xor r9d,ebp
DB 0x66,0x90
DB 102,15,56,220,224
DB 102,15,56,220,232
@@ -1345,7 +1347,7 @@ DB 102,68,15,56,220,200
bswap r9d
DB 102,15,56,220,209
DB 102,15,56,220,217
- xor r9d,r11d
+ xor r9d,ebp
DB 0x66,0x90
DB 102,15,56,220,225
DB 102,15,56,220,233
@@ -1359,7 +1361,7 @@ DB 102,68,15,56,220,201
bswap r9d
DB 102,15,56,220,208
DB 102,15,56,220,216
- xor r9d,r11d
+ xor r9d,ebp
DB 0x66,0x90
DB 102,15,56,220,224
DB 102,15,56,220,232
@@ -1373,7 +1375,7 @@ DB 102,68,15,56,220,200
bswap r9d
DB 102,15,56,220,209
DB 102,15,56,220,217
- xor r9d,r11d
+ xor r9d,ebp
DB 0x66,0x90
DB 102,15,56,220,225
DB 102,15,56,220,233
@@ -1387,7 +1389,7 @@ DB 102,68,15,56,220,201
bswap r9d
DB 102,15,56,220,208
DB 102,15,56,220,216
- xor r9d,r11d
+ xor r9d,ebp
DB 0x66,0x90
DB 102,15,56,220,224
DB 102,15,56,220,232
@@ -1401,7 +1403,7 @@ DB 102,68,15,56,220,200
bswap r9d
DB 102,15,56,220,209
DB 102,15,56,220,217
- xor r9d,r11d
+ xor r9d,ebp
DB 0x66,0x90
DB 102,15,56,220,225
DB 102,15,56,220,233
@@ -1416,7 +1418,7 @@ DB 102,68,15,56,220,201
DB 102,15,56,220,208
DB 102,15,56,220,216
DB 102,15,56,220,224
- xor r9d,r11d
+ xor r9d,ebp
movdqu xmm10,XMMWORD[rdi]
DB 102,15,56,220,232
mov DWORD[((112+12))+rsp],r9d
@@ -1651,32 +1653,32 @@ DB 102,15,56,221,225
$L$ctr32_done:
xorps xmm0,xmm0
- xor r11d,r11d
+ xor ebp,ebp
pxor xmm1,xmm1
pxor xmm2,xmm2
pxor xmm3,xmm3
pxor xmm4,xmm4
pxor xmm5,xmm5
- movaps xmm6,XMMWORD[((-160))+rbp]
- movaps XMMWORD[(-160)+rbp],xmm0
- movaps xmm7,XMMWORD[((-144))+rbp]
- movaps XMMWORD[(-144)+rbp],xmm0
- movaps xmm8,XMMWORD[((-128))+rbp]
- movaps XMMWORD[(-128)+rbp],xmm0
- movaps xmm9,XMMWORD[((-112))+rbp]
- movaps XMMWORD[(-112)+rbp],xmm0
- movaps xmm10,XMMWORD[((-96))+rbp]
- movaps XMMWORD[(-96)+rbp],xmm0
- movaps xmm11,XMMWORD[((-80))+rbp]
- movaps XMMWORD[(-80)+rbp],xmm0
- movaps xmm12,XMMWORD[((-64))+rbp]
- movaps XMMWORD[(-64)+rbp],xmm0
- movaps xmm13,XMMWORD[((-48))+rbp]
- movaps XMMWORD[(-48)+rbp],xmm0
- movaps xmm14,XMMWORD[((-32))+rbp]
- movaps XMMWORD[(-32)+rbp],xmm0
- movaps xmm15,XMMWORD[((-16))+rbp]
- movaps XMMWORD[(-16)+rbp],xmm0
+ movaps xmm6,XMMWORD[((-168))+r11]
+ movaps XMMWORD[(-168)+r11],xmm0
+ movaps xmm7,XMMWORD[((-152))+r11]
+ movaps XMMWORD[(-152)+r11],xmm0
+ movaps xmm8,XMMWORD[((-136))+r11]
+ movaps XMMWORD[(-136)+r11],xmm0
+ movaps xmm9,XMMWORD[((-120))+r11]
+ movaps XMMWORD[(-120)+r11],xmm0
+ movaps xmm10,XMMWORD[((-104))+r11]
+ movaps XMMWORD[(-104)+r11],xmm0
+ movaps xmm11,XMMWORD[((-88))+r11]
+ movaps XMMWORD[(-88)+r11],xmm0
+ movaps xmm12,XMMWORD[((-72))+r11]
+ movaps XMMWORD[(-72)+r11],xmm0
+ movaps xmm13,XMMWORD[((-56))+r11]
+ movaps XMMWORD[(-56)+r11],xmm0
+ movaps xmm14,XMMWORD[((-40))+r11]
+ movaps XMMWORD[(-40)+r11],xmm0
+ movaps xmm15,XMMWORD[((-24))+r11]
+ movaps XMMWORD[(-24)+r11],xmm0
movaps XMMWORD[rsp],xmm0
movaps XMMWORD[16+rsp],xmm0
movaps XMMWORD[32+rsp],xmm0
@@ -1685,12 +1687,15 @@ $L$ctr32_done:
movaps XMMWORD[80+rsp],xmm0
movaps XMMWORD[96+rsp],xmm0
movaps XMMWORD[112+rsp],xmm0
- lea rsp,[rbp]
- pop rbp
+ mov rbp,QWORD[((-8))+r11]
+
+ lea rsp,[r11]
+
$L$ctr32_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_aesni_ctr32_encrypt_blocks:
global aesni_xts_encrypt
@@ -1708,22 +1713,24 @@ $L$SEH_begin_aesni_xts_encrypt:
mov r9,QWORD[48+rsp]
- lea rax,[rsp]
+
+ lea r11,[rsp]
+
push rbp
+
sub rsp,272
and rsp,-16
- movaps XMMWORD[(-168)+rax],xmm6
- movaps XMMWORD[(-152)+rax],xmm7
- movaps XMMWORD[(-136)+rax],xmm8
- movaps XMMWORD[(-120)+rax],xmm9
- movaps XMMWORD[(-104)+rax],xmm10
- movaps XMMWORD[(-88)+rax],xmm11
- movaps XMMWORD[(-72)+rax],xmm12
- movaps XMMWORD[(-56)+rax],xmm13
- movaps XMMWORD[(-40)+rax],xmm14
- movaps XMMWORD[(-24)+rax],xmm15
+ movaps XMMWORD[(-168)+r11],xmm6
+ movaps XMMWORD[(-152)+r11],xmm7
+ movaps XMMWORD[(-136)+r11],xmm8
+ movaps XMMWORD[(-120)+r11],xmm9
+ movaps XMMWORD[(-104)+r11],xmm10
+ movaps XMMWORD[(-88)+r11],xmm11
+ movaps XMMWORD[(-72)+r11],xmm12
+ movaps XMMWORD[(-56)+r11],xmm13
+ movaps XMMWORD[(-40)+r11],xmm14
+ movaps XMMWORD[(-24)+r11],xmm15
$L$xts_enc_body:
- lea rbp,[((-8))+rax]
movups xmm2,XMMWORD[r9]
mov eax,DWORD[240+r8]
mov r10d,DWORD[240+rcx]
@@ -1739,7 +1746,7 @@ DB 102,15,56,220,209
jnz NEAR $L$oop_enc1_8
DB 102,15,56,221,209
movups xmm0,XMMWORD[rcx]
- mov r11,rcx
+ mov rbp,rcx
mov eax,r10d
shl r10d,4
mov r9,rdx
@@ -1795,9 +1802,9 @@ DB 102,15,56,221,209
jc NEAR $L$xts_enc_short
mov eax,16+96
- lea rcx,[32+r10*1+r11]
+ lea rcx,[32+r10*1+rbp]
sub rax,r10
- movups xmm1,XMMWORD[16+r11]
+ movups xmm1,XMMWORD[16+rbp]
mov r10,rax
lea r8,[$L$xts_magic]
jmp NEAR $L$xts_enc_grandloop
@@ -1822,7 +1829,7 @@ DB 102,15,56,220,225
movdqa xmm9,XMMWORD[96+rsp]
pxor xmm6,xmm14
DB 102,15,56,220,233
- movups xmm0,XMMWORD[32+r11]
+ movups xmm0,XMMWORD[32+rbp]
lea rdi,[96+rdi]
pxor xmm7,xmm8
@@ -1831,7 +1838,7 @@ DB 102,15,56,220,241
pxor xmm11,xmm9
movdqa XMMWORD[rsp],xmm10
DB 102,15,56,220,249
- movups xmm1,XMMWORD[48+r11]
+ movups xmm1,XMMWORD[48+rbp]
pxor xmm12,xmm9
DB 102,15,56,220,208
@@ -1846,7 +1853,7 @@ DB 102,15,56,220,232
movdqa XMMWORD[64+rsp],xmm14
DB 102,15,56,220,240
DB 102,15,56,220,248
- movups xmm0,XMMWORD[64+r11]
+ movups xmm0,XMMWORD[64+rbp]
movdqa XMMWORD[80+rsp],xmm8
pshufd xmm9,xmm15,0x5f
jmp NEAR $L$xts_enc_loop6
@@ -1878,7 +1885,7 @@ DB 102,15,56,220,209
psrad xmm14,31
DB 102,15,56,220,217
pand xmm14,xmm8
- movups xmm10,XMMWORD[r11]
+ movups xmm10,XMMWORD[rbp]
DB 102,15,56,220,225
DB 102,15,56,220,233
DB 102,15,56,220,241
@@ -1946,10 +1953,10 @@ DB 102,15,56,220,217
DB 102,15,56,220,225
DB 102,15,56,220,233
pxor xmm15,xmm0
- movups xmm0,XMMWORD[r11]
+ movups xmm0,XMMWORD[rbp]
DB 102,15,56,220,241
DB 102,15,56,220,249
- movups xmm1,XMMWORD[16+r11]
+ movups xmm1,XMMWORD[16+rbp]
pxor xmm14,xmm15
DB 102,15,56,221,84,36,0
@@ -1976,7 +1983,7 @@ DB 102,15,56,221,124,36,80
mov eax,16+96
sub eax,r10d
- mov rcx,r11
+ mov rcx,rbp
shr eax,4
$L$xts_enc_short:
@@ -2132,7 +2139,7 @@ $L$xts_enc_steal:
jnz NEAR $L$xts_enc_steal
sub rsi,r9
- mov rcx,r11
+ mov rcx,rbp
mov eax,r10d
movups xmm2,XMMWORD[((-16))+rsi]
@@ -2158,26 +2165,26 @@ $L$xts_enc_ret:
pxor xmm3,xmm3
pxor xmm4,xmm4
pxor xmm5,xmm5
- movaps xmm6,XMMWORD[((-160))+rbp]
- movaps XMMWORD[(-160)+rbp],xmm0
- movaps xmm7,XMMWORD[((-144))+rbp]
- movaps XMMWORD[(-144)+rbp],xmm0
- movaps xmm8,XMMWORD[((-128))+rbp]
- movaps XMMWORD[(-128)+rbp],xmm0
- movaps xmm9,XMMWORD[((-112))+rbp]
- movaps XMMWORD[(-112)+rbp],xmm0
- movaps xmm10,XMMWORD[((-96))+rbp]
- movaps XMMWORD[(-96)+rbp],xmm0
- movaps xmm11,XMMWORD[((-80))+rbp]
- movaps XMMWORD[(-80)+rbp],xmm0
- movaps xmm12,XMMWORD[((-64))+rbp]
- movaps XMMWORD[(-64)+rbp],xmm0
- movaps xmm13,XMMWORD[((-48))+rbp]
- movaps XMMWORD[(-48)+rbp],xmm0
- movaps xmm14,XMMWORD[((-32))+rbp]
- movaps XMMWORD[(-32)+rbp],xmm0
- movaps xmm15,XMMWORD[((-16))+rbp]
- movaps XMMWORD[(-16)+rbp],xmm0
+ movaps xmm6,XMMWORD[((-168))+r11]
+ movaps XMMWORD[(-168)+r11],xmm0
+ movaps xmm7,XMMWORD[((-152))+r11]
+ movaps XMMWORD[(-152)+r11],xmm0
+ movaps xmm8,XMMWORD[((-136))+r11]
+ movaps XMMWORD[(-136)+r11],xmm0
+ movaps xmm9,XMMWORD[((-120))+r11]
+ movaps XMMWORD[(-120)+r11],xmm0
+ movaps xmm10,XMMWORD[((-104))+r11]
+ movaps XMMWORD[(-104)+r11],xmm0
+ movaps xmm11,XMMWORD[((-88))+r11]
+ movaps XMMWORD[(-88)+r11],xmm0
+ movaps xmm12,XMMWORD[((-72))+r11]
+ movaps XMMWORD[(-72)+r11],xmm0
+ movaps xmm13,XMMWORD[((-56))+r11]
+ movaps XMMWORD[(-56)+r11],xmm0
+ movaps xmm14,XMMWORD[((-40))+r11]
+ movaps XMMWORD[(-40)+r11],xmm0
+ movaps xmm15,XMMWORD[((-24))+r11]
+ movaps XMMWORD[(-24)+r11],xmm0
movaps XMMWORD[rsp],xmm0
movaps XMMWORD[16+rsp],xmm0
movaps XMMWORD[32+rsp],xmm0
@@ -2185,12 +2192,15 @@ $L$xts_enc_ret:
movaps XMMWORD[64+rsp],xmm0
movaps XMMWORD[80+rsp],xmm0
movaps XMMWORD[96+rsp],xmm0
- lea rsp,[rbp]
- pop rbp
+ mov rbp,QWORD[((-8))+r11]
+
+ lea rsp,[r11]
+
$L$xts_enc_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_aesni_xts_encrypt:
global aesni_xts_decrypt
@@ -2208,22 +2218,24 @@ $L$SEH_begin_aesni_xts_decrypt:
mov r9,QWORD[48+rsp]
- lea rax,[rsp]
+
+ lea r11,[rsp]
+
push rbp
+
sub rsp,272
and rsp,-16
- movaps XMMWORD[(-168)+rax],xmm6
- movaps XMMWORD[(-152)+rax],xmm7
- movaps XMMWORD[(-136)+rax],xmm8
- movaps XMMWORD[(-120)+rax],xmm9
- movaps XMMWORD[(-104)+rax],xmm10
- movaps XMMWORD[(-88)+rax],xmm11
- movaps XMMWORD[(-72)+rax],xmm12
- movaps XMMWORD[(-56)+rax],xmm13
- movaps XMMWORD[(-40)+rax],xmm14
- movaps XMMWORD[(-24)+rax],xmm15
+ movaps XMMWORD[(-168)+r11],xmm6
+ movaps XMMWORD[(-152)+r11],xmm7
+ movaps XMMWORD[(-136)+r11],xmm8
+ movaps XMMWORD[(-120)+r11],xmm9
+ movaps XMMWORD[(-104)+r11],xmm10
+ movaps XMMWORD[(-88)+r11],xmm11
+ movaps XMMWORD[(-72)+r11],xmm12
+ movaps XMMWORD[(-56)+r11],xmm13
+ movaps XMMWORD[(-40)+r11],xmm14
+ movaps XMMWORD[(-24)+r11],xmm15
$L$xts_dec_body:
- lea rbp,[((-8))+rax]
movups xmm2,XMMWORD[r9]
mov eax,DWORD[240+r8]
mov r10d,DWORD[240+rcx]
@@ -2245,7 +2257,7 @@ DB 102,15,56,221,209
sub rdx,rax
movups xmm0,XMMWORD[rcx]
- mov r11,rcx
+ mov rbp,rcx
mov eax,r10d
shl r10d,4
mov r9,rdx
@@ -2301,9 +2313,9 @@ DB 102,15,56,221,209
jc NEAR $L$xts_dec_short
mov eax,16+96
- lea rcx,[32+r10*1+r11]
+ lea rcx,[32+r10*1+rbp]
sub rax,r10
- movups xmm1,XMMWORD[16+r11]
+ movups xmm1,XMMWORD[16+rbp]
mov r10,rax
lea r8,[$L$xts_magic]
jmp NEAR $L$xts_dec_grandloop
@@ -2328,7 +2340,7 @@ DB 102,15,56,222,225
movdqa xmm9,XMMWORD[96+rsp]
pxor xmm6,xmm14
DB 102,15,56,222,233
- movups xmm0,XMMWORD[32+r11]
+ movups xmm0,XMMWORD[32+rbp]
lea rdi,[96+rdi]
pxor xmm7,xmm8
@@ -2337,7 +2349,7 @@ DB 102,15,56,222,241
pxor xmm11,xmm9
movdqa XMMWORD[rsp],xmm10
DB 102,15,56,222,249
- movups xmm1,XMMWORD[48+r11]
+ movups xmm1,XMMWORD[48+rbp]
pxor xmm12,xmm9
DB 102,15,56,222,208
@@ -2352,7 +2364,7 @@ DB 102,15,56,222,232
movdqa XMMWORD[64+rsp],xmm14
DB 102,15,56,222,240
DB 102,15,56,222,248
- movups xmm0,XMMWORD[64+r11]
+ movups xmm0,XMMWORD[64+rbp]
movdqa XMMWORD[80+rsp],xmm8
pshufd xmm9,xmm15,0x5f
jmp NEAR $L$xts_dec_loop6
@@ -2384,7 +2396,7 @@ DB 102,15,56,222,209
psrad xmm14,31
DB 102,15,56,222,217
pand xmm14,xmm8
- movups xmm10,XMMWORD[r11]
+ movups xmm10,XMMWORD[rbp]
DB 102,15,56,222,225
DB 102,15,56,222,233
DB 102,15,56,222,241
@@ -2452,10 +2464,10 @@ DB 102,15,56,222,217
DB 102,15,56,222,225
DB 102,15,56,222,233
pxor xmm15,xmm0
- movups xmm0,XMMWORD[r11]
+ movups xmm0,XMMWORD[rbp]
DB 102,15,56,222,241
DB 102,15,56,222,249
- movups xmm1,XMMWORD[16+r11]
+ movups xmm1,XMMWORD[16+rbp]
pxor xmm14,xmm15
DB 102,15,56,223,84,36,0
@@ -2482,7 +2494,7 @@ DB 102,15,56,223,124,36,80
mov eax,16+96
sub eax,r10d
- mov rcx,r11
+ mov rcx,rbp
shr eax,4
$L$xts_dec_short:
@@ -2639,7 +2651,7 @@ $L$xts_dec_done:
jz NEAR $L$xts_dec_ret
$L$xts_dec_done2:
mov rdx,r9
- mov rcx,r11
+ mov rcx,rbp
mov eax,r10d
movups xmm2,XMMWORD[rdi]
@@ -2669,7 +2681,7 @@ $L$xts_dec_steal:
jnz NEAR $L$xts_dec_steal
sub rsi,r9
- mov rcx,r11
+ mov rcx,rbp
mov eax,r10d
movups xmm2,XMMWORD[rsi]
@@ -2695,26 +2707,26 @@ $L$xts_dec_ret:
pxor xmm3,xmm3
pxor xmm4,xmm4
pxor xmm5,xmm5
- movaps xmm6,XMMWORD[((-160))+rbp]
- movaps XMMWORD[(-160)+rbp],xmm0
- movaps xmm7,XMMWORD[((-144))+rbp]
- movaps XMMWORD[(-144)+rbp],xmm0
- movaps xmm8,XMMWORD[((-128))+rbp]
- movaps XMMWORD[(-128)+rbp],xmm0
- movaps xmm9,XMMWORD[((-112))+rbp]
- movaps XMMWORD[(-112)+rbp],xmm0
- movaps xmm10,XMMWORD[((-96))+rbp]
- movaps XMMWORD[(-96)+rbp],xmm0
- movaps xmm11,XMMWORD[((-80))+rbp]
- movaps XMMWORD[(-80)+rbp],xmm0
- movaps xmm12,XMMWORD[((-64))+rbp]
- movaps XMMWORD[(-64)+rbp],xmm0
- movaps xmm13,XMMWORD[((-48))+rbp]
- movaps XMMWORD[(-48)+rbp],xmm0
- movaps xmm14,XMMWORD[((-32))+rbp]
- movaps XMMWORD[(-32)+rbp],xmm0
- movaps xmm15,XMMWORD[((-16))+rbp]
- movaps XMMWORD[(-16)+rbp],xmm0
+ movaps xmm6,XMMWORD[((-168))+r11]
+ movaps XMMWORD[(-168)+r11],xmm0
+ movaps xmm7,XMMWORD[((-152))+r11]
+ movaps XMMWORD[(-152)+r11],xmm0
+ movaps xmm8,XMMWORD[((-136))+r11]
+ movaps XMMWORD[(-136)+r11],xmm0
+ movaps xmm9,XMMWORD[((-120))+r11]
+ movaps XMMWORD[(-120)+r11],xmm0
+ movaps xmm10,XMMWORD[((-104))+r11]
+ movaps XMMWORD[(-104)+r11],xmm0
+ movaps xmm11,XMMWORD[((-88))+r11]
+ movaps XMMWORD[(-88)+r11],xmm0
+ movaps xmm12,XMMWORD[((-72))+r11]
+ movaps XMMWORD[(-72)+r11],xmm0
+ movaps xmm13,XMMWORD[((-56))+r11]
+ movaps XMMWORD[(-56)+r11],xmm0
+ movaps xmm14,XMMWORD[((-40))+r11]
+ movaps XMMWORD[(-40)+r11],xmm0
+ movaps xmm15,XMMWORD[((-24))+r11]
+ movaps XMMWORD[(-24)+r11],xmm0
movaps XMMWORD[rsp],xmm0
movaps XMMWORD[16+rsp],xmm0
movaps XMMWORD[32+rsp],xmm0
@@ -2722,12 +2734,15 @@ $L$xts_dec_ret:
movaps XMMWORD[64+rsp],xmm0
movaps XMMWORD[80+rsp],xmm0
movaps XMMWORD[96+rsp],xmm0
- lea rsp,[rbp]
- pop rbp
+ mov rbp,QWORD[((-8))+r11]
+
+ lea rsp,[r11]
+
$L$xts_dec_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_aesni_xts_decrypt:
global aesni_ocb_encrypt
@@ -2745,12 +2760,18 @@ $L$SEH_begin_aesni_ocb_encrypt:
mov r9,QWORD[48+rsp]
+
lea rax,[rsp]
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
lea rsp,[((-160))+rsp]
movaps XMMWORD[rsp],xmm6
movaps XMMWORD[16+rsp],xmm7
@@ -2950,16 +2971,23 @@ $L$ocb_enc_done:
movaps XMMWORD[144+rsp],xmm0
lea rax,[((160+40))+rsp]
$L$ocb_enc_pop:
- lea rsp,[160+rsp]
- pop r14
- pop r13
- pop r12
- pop rbp
- pop rbx
+ mov r14,QWORD[((-40))+rax]
+
+ mov r13,QWORD[((-32))+rax]
+
+ mov r12,QWORD[((-24))+rax]
+
+ mov rbp,QWORD[((-16))+rax]
+
+ mov rbx,QWORD[((-8))+rax]
+
+ lea rsp,[rax]
+
$L$ocb_enc_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_aesni_ocb_encrypt:
@@ -3184,12 +3212,18 @@ $L$SEH_begin_aesni_ocb_decrypt:
mov r9,QWORD[48+rsp]
+
lea rax,[rsp]
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
lea rsp,[((-160))+rsp]
movaps XMMWORD[rsp],xmm6
movaps XMMWORD[16+rsp],xmm7
@@ -3411,16 +3445,23 @@ $L$ocb_dec_done:
movaps XMMWORD[144+rsp],xmm0
lea rax,[((160+40))+rsp]
$L$ocb_dec_pop:
- lea rsp,[160+rsp]
- pop r14
- pop r13
- pop r12
- pop rbp
- pop rbx
+ mov r14,QWORD[((-40))+rax]
+
+ mov r13,QWORD[((-32))+rax]
+
+ mov r12,QWORD[((-24))+rax]
+
+ mov rbp,QWORD[((-16))+rax]
+
+ mov rbx,QWORD[((-8))+rax]
+
+ lea rsp,[rax]
+
$L$ocb_dec_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_aesni_ocb_decrypt:
@@ -3633,6 +3674,7 @@ $L$SEH_begin_aesni_cbc_encrypt:
mov r9,QWORD[48+rsp]
+
test rdx,rdx
jz NEAR $L$cbc_ret
@@ -3725,8 +3767,10 @@ DB 102,15,56,223,209
jmp NEAR $L$cbc_ret
ALIGN 16
$L$cbc_decrypt_bulk:
- lea rax,[rsp]
+ lea r11,[rsp]
+
push rbp
+
sub rsp,176
and rsp,-16
movaps XMMWORD[16+rsp],xmm6
@@ -3740,7 +3784,7 @@ $L$cbc_decrypt_bulk:
movaps XMMWORD[144+rsp],xmm14
movaps XMMWORD[160+rsp],xmm15
$L$cbc_decrypt_body:
- lea rbp,[((-8))+rax]
+ mov rbp,rcx
movups xmm10,XMMWORD[r8]
mov eax,r10d
cmp rdx,0x50
@@ -3780,7 +3824,7 @@ $L$cbc_dec_loop8_enter:
pxor xmm3,xmm0
movups xmm1,XMMWORD[((16-112))+rcx]
pxor xmm4,xmm0
- xor r11,r11
+ mov rbp,-1
cmp rdx,0x70
pxor xmm5,xmm0
pxor xmm6,xmm0
@@ -3796,10 +3840,10 @@ DB 102,15,56,222,233
DB 102,15,56,222,241
DB 102,15,56,222,249
DB 102,68,15,56,222,193
- setnc r11b
- shl r11,7
+ adc rbp,0
+ and rbp,128
DB 102,68,15,56,222,201
- add r11,rdi
+ add rbp,rdi
movups xmm1,XMMWORD[((48-112))+rcx]
DB 102,15,56,222,208
DB 102,15,56,222,216
@@ -3937,18 +3981,18 @@ DB 102,65,15,56,223,219
movdqu xmm0,XMMWORD[112+rdi]
DB 102,65,15,56,223,228
lea rdi,[128+rdi]
- movdqu xmm11,XMMWORD[r11]
+ movdqu xmm11,XMMWORD[rbp]
DB 102,65,15,56,223,237
DB 102,65,15,56,223,246
- movdqu xmm12,XMMWORD[16+r11]
- movdqu xmm13,XMMWORD[32+r11]
+ movdqu xmm12,XMMWORD[16+rbp]
+ movdqu xmm13,XMMWORD[32+rbp]
DB 102,65,15,56,223,255
DB 102,68,15,56,223,193
- movdqu xmm14,XMMWORD[48+r11]
- movdqu xmm15,XMMWORD[64+r11]
+ movdqu xmm14,XMMWORD[48+rbp]
+ movdqu xmm15,XMMWORD[64+rbp]
DB 102,69,15,56,223,202
movdqa xmm10,xmm0
- movdqu xmm1,XMMWORD[80+r11]
+ movdqu xmm1,XMMWORD[80+rbp]
movups xmm0,XMMWORD[((-112))+rcx]
movups XMMWORD[rsi],xmm2
@@ -4067,7 +4111,7 @@ $L$cbc_dec_loop6_enter:
pxor xmm5,xmm13
movdqu XMMWORD[32+rsi],xmm4
pxor xmm6,xmm14
- mov rcx,r11
+ mov rcx,rbp
movdqu XMMWORD[48+rsi],xmm5
pxor xmm7,xmm15
mov eax,r10d
@@ -4236,18 +4280,23 @@ $L$cbc_dec_ret:
movaps XMMWORD[144+rsp],xmm0
movaps xmm15,XMMWORD[160+rsp]
movaps XMMWORD[160+rsp],xmm0
- lea rsp,[rbp]
- pop rbp
+ mov rbp,QWORD[((-8))+r11]
+
+ lea rsp,[r11]
+
$L$cbc_ret:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_aesni_cbc_encrypt:
global aesni_set_decrypt_key
ALIGN 16
aesni_set_decrypt_key:
+
DB 0x48,0x83,0xEC,0x08
+
call __aesni_set_encrypt_key
shl edx,4
test eax,eax
@@ -4280,7 +4329,9 @@ DB 102,15,56,219,192
pxor xmm0,xmm0
$L$dec_key_ret:
add rsp,8
+
DB 0F3h,0C3h ;repret
+
$L$SEH_end_set_decrypt_key:
global aesni_set_encrypt_key
@@ -4288,7 +4339,9 @@ global aesni_set_encrypt_key
ALIGN 16
aesni_set_encrypt_key:
__aesni_set_encrypt_key:
+
DB 0x48,0x83,0xEC,0x08
+
mov rax,-1
test rcx,rcx
jz NEAR $L$enc_key_ret
@@ -4581,7 +4634,9 @@ $L$enc_key_ret:
pxor xmm4,xmm4
pxor xmm5,xmm5
add rsp,8
+
DB 0F3h,0C3h ;repret
+
$L$SEH_end_set_encrypt_key:
ALIGN 16
@@ -4753,13 +4808,16 @@ ctr_xts_se_handler:
cmp rbx,r10
jae NEAR $L$common_seh_tail
- mov rax,QWORD[160+r8]
- lea rsi,[((-160))+rax]
+ mov rax,QWORD[208+r8]
+
+ lea rsi,[((-168))+rax]
lea rdi,[512+r8]
mov ecx,20
DD 0xa548f3fc
- jmp NEAR $L$common_rbp_tail
+ mov rbp,QWORD[((-8))+rax]
+ mov QWORD[160+r8],rbp
+ jmp NEAR $L$common_seh_tail
@@ -4841,9 +4899,13 @@ cbc_se_handler:
cmp rbx,r10
jb NEAR $L$common_seh_tail
+ mov rax,QWORD[120+r8]
+
lea r10,[$L$cbc_decrypt_body]
cmp rbx,r10
- jb NEAR $L$restore_cbc_rax
+ jb NEAR $L$common_seh_tail
+
+ mov rax,QWORD[152+r8]
lea r10,[$L$cbc_ret]
cmp rbx,r10
@@ -4854,15 +4916,10 @@ cbc_se_handler:
mov ecx,20
DD 0xa548f3fc
-$L$common_rbp_tail:
- mov rax,QWORD[160+r8]
- mov rbp,QWORD[rax]
- lea rax,[8+rax]
- mov QWORD[160+r8],rbp
- jmp NEAR $L$common_seh_tail
+ mov rax,QWORD[208+r8]
-$L$restore_cbc_rax:
- mov rax,QWORD[120+r8]
+ mov rbp,QWORD[((-8))+rax]
+ mov QWORD[160+r8],rbp
$L$common_seh_tail:
mov rdi,QWORD[8+rax]
diff --git a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/bsaes-x86_64.asm b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/bsaes-x86_64.asm
index 6d75248d1f..9ea8253d7c 100644
--- a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/bsaes-x86_64.asm
+++ b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/aes/bsaes-x86_64.asm
@@ -1072,6 +1072,7 @@ global bsaes_cbc_encrypt
ALIGN 16
bsaes_cbc_encrypt:
+
mov r11d,DWORD[48+rsp]
cmp r11d,0
jne NEAR asm_AES_cbc_encrypt
@@ -1081,12 +1082,19 @@ bsaes_cbc_encrypt:
mov rax,rsp
$L$cbc_dec_prologue:
push rbp
+
push rbx
+
push r12
+
push r13
+
push r14
+
push r15
+
lea rsp,[((-72))+rsp]
+
mov r10,QWORD[160+rsp]
lea rsp,[((-160))+rsp]
movaps XMMWORD[64+rsp],xmm6
@@ -1101,6 +1109,7 @@ $L$cbc_dec_prologue:
movaps XMMWORD[208+rsp],xmm15
$L$cbc_dec_body:
mov rbp,rsp
+
mov eax,DWORD[240+r9]
mov r12,rcx
mov r13,rdx
@@ -1319,7 +1328,8 @@ $L$cbc_dec_bzero:
cmp rbp,rax
ja NEAR $L$cbc_dec_bzero
- lea rsp,[rbp]
+ lea rax,[120+rbp]
+
movaps xmm6,XMMWORD[64+rbp]
movaps xmm7,XMMWORD[80+rbp]
movaps xmm8,XMMWORD[96+rbp]
@@ -1330,32 +1340,48 @@ $L$cbc_dec_bzero:
movaps xmm13,XMMWORD[176+rbp]
movaps xmm14,XMMWORD[192+rbp]
movaps xmm15,XMMWORD[208+rbp]
- lea rsp,[160+rbp]
- mov r15,QWORD[72+rsp]
- mov r14,QWORD[80+rsp]
- mov r13,QWORD[88+rsp]
- mov r12,QWORD[96+rsp]
- mov rbx,QWORD[104+rsp]
- mov rax,QWORD[112+rsp]
- lea rsp,[120+rsp]
- mov rbp,rax
+ lea rax,[160+rax]
+$L$cbc_dec_tail:
+ mov r15,QWORD[((-48))+rax]
+
+ mov r14,QWORD[((-40))+rax]
+
+ mov r13,QWORD[((-32))+rax]
+
+ mov r12,QWORD[((-24))+rax]
+
+ mov rbx,QWORD[((-16))+rax]
+
+ mov rbp,QWORD[((-8))+rax]
+
+ lea rsp,[rax]
+
$L$cbc_dec_epilogue:
DB 0F3h,0C3h ;repret
+
global bsaes_ctr32_encrypt_blocks
ALIGN 16
bsaes_ctr32_encrypt_blocks:
+
mov rax,rsp
$L$ctr_enc_prologue:
push rbp
+
push rbx
+
push r12
+
push r13
+
push r14
+
push r15
+
lea rsp,[((-72))+rsp]
+
mov r10,QWORD[160+rsp]
lea rsp,[((-160))+rsp]
movaps XMMWORD[64+rsp],xmm6
@@ -1370,6 +1396,7 @@ $L$ctr_enc_prologue:
movaps XMMWORD[208+rsp],xmm15
$L$ctr_enc_body:
mov rbp,rsp
+
movdqu xmm0,XMMWORD[r10]
mov eax,DWORD[240+r9]
mov r12,rcx
@@ -1543,7 +1570,8 @@ $L$ctr_enc_bzero:
cmp rbp,rax
ja NEAR $L$ctr_enc_bzero
- lea rsp,[rbp]
+ lea rax,[120+rbp]
+
movaps xmm6,XMMWORD[64+rbp]
movaps xmm7,XMMWORD[80+rbp]
movaps xmm8,XMMWORD[96+rbp]
@@ -1554,31 +1582,47 @@ $L$ctr_enc_bzero:
movaps xmm13,XMMWORD[176+rbp]
movaps xmm14,XMMWORD[192+rbp]
movaps xmm15,XMMWORD[208+rbp]
- lea rsp,[160+rbp]
- mov r15,QWORD[72+rsp]
- mov r14,QWORD[80+rsp]
- mov r13,QWORD[88+rsp]
- mov r12,QWORD[96+rsp]
- mov rbx,QWORD[104+rsp]
- mov rax,QWORD[112+rsp]
- lea rsp,[120+rsp]
- mov rbp,rax
+ lea rax,[160+rax]
+$L$ctr_enc_tail:
+ mov r15,QWORD[((-48))+rax]
+
+ mov r14,QWORD[((-40))+rax]
+
+ mov r13,QWORD[((-32))+rax]
+
+ mov r12,QWORD[((-24))+rax]
+
+ mov rbx,QWORD[((-16))+rax]
+
+ mov rbp,QWORD[((-8))+rax]
+
+ lea rsp,[rax]
+
$L$ctr_enc_epilogue:
DB 0F3h,0C3h ;repret
+
global bsaes_xts_encrypt
ALIGN 16
bsaes_xts_encrypt:
+
mov rax,rsp
$L$xts_enc_prologue:
push rbp
+
push rbx
+
push r12
+
push r13
+
push r14
+
push r15
+
lea rsp,[((-72))+rsp]
+
mov r10,QWORD[160+rsp]
mov r11,QWORD[168+rsp]
lea rsp,[((-160))+rsp]
@@ -1594,6 +1638,7 @@ $L$xts_enc_prologue:
movaps XMMWORD[208+rsp],xmm15
$L$xts_enc_body:
mov rbp,rsp
+
mov r12,rcx
mov r13,rdx
mov r14,r8
@@ -2019,7 +2064,8 @@ $L$xts_enc_bzero:
cmp rbp,rax
ja NEAR $L$xts_enc_bzero
- lea rsp,[rbp]
+ lea rax,[120+rbp]
+
movaps xmm6,XMMWORD[64+rbp]
movaps xmm7,XMMWORD[80+rbp]
movaps xmm8,XMMWORD[96+rbp]
@@ -2030,32 +2076,48 @@ $L$xts_enc_bzero:
movaps xmm13,XMMWORD[176+rbp]
movaps xmm14,XMMWORD[192+rbp]
movaps xmm15,XMMWORD[208+rbp]
- lea rsp,[160+rbp]
- mov r15,QWORD[72+rsp]
- mov r14,QWORD[80+rsp]
- mov r13,QWORD[88+rsp]
- mov r12,QWORD[96+rsp]
- mov rbx,QWORD[104+rsp]
- mov rax,QWORD[112+rsp]
- lea rsp,[120+rsp]
- mov rbp,rax
+ lea rax,[160+rax]
+$L$xts_enc_tail:
+ mov r15,QWORD[((-48))+rax]
+
+ mov r14,QWORD[((-40))+rax]
+
+ mov r13,QWORD[((-32))+rax]
+
+ mov r12,QWORD[((-24))+rax]
+
+ mov rbx,QWORD[((-16))+rax]
+
+ mov rbp,QWORD[((-8))+rax]
+
+ lea rsp,[rax]
+
$L$xts_enc_epilogue:
DB 0F3h,0C3h ;repret
+
global bsaes_xts_decrypt
ALIGN 16
bsaes_xts_decrypt:
+
mov rax,rsp
$L$xts_dec_prologue:
push rbp
+
push rbx
+
push r12
+
push r13
+
push r14
+
push r15
+
lea rsp,[((-72))+rsp]
+
mov r10,QWORD[160+rsp]
mov r11,QWORD[168+rsp]
lea rsp,[((-160))+rsp]
@@ -2522,7 +2584,8 @@ $L$xts_dec_bzero:
cmp rbp,rax
ja NEAR $L$xts_dec_bzero
- lea rsp,[rbp]
+ lea rax,[120+rbp]
+
movaps xmm6,XMMWORD[64+rbp]
movaps xmm7,XMMWORD[80+rbp]
movaps xmm8,XMMWORD[96+rbp]
@@ -2533,19 +2596,27 @@ $L$xts_dec_bzero:
movaps xmm13,XMMWORD[176+rbp]
movaps xmm14,XMMWORD[192+rbp]
movaps xmm15,XMMWORD[208+rbp]
- lea rsp,[160+rbp]
- mov r15,QWORD[72+rsp]
- mov r14,QWORD[80+rsp]
- mov r13,QWORD[88+rsp]
- mov r12,QWORD[96+rsp]
- mov rbx,QWORD[104+rsp]
- mov rax,QWORD[112+rsp]
- lea rsp,[120+rsp]
- mov rbp,rax
+ lea rax,[160+rax]
+$L$xts_dec_tail:
+ mov r15,QWORD[((-48))+rax]
+
+ mov r14,QWORD[((-40))+rax]
+
+ mov r13,QWORD[((-32))+rax]
+
+ mov r12,QWORD[((-24))+rax]
+
+ mov rbx,QWORD[((-16))+rax]
+
+ mov rbp,QWORD[((-8))+rax]
+
+ lea rsp,[rax]
+
$L$xts_dec_epilogue:
DB 0F3h,0C3h ;repret
+
ALIGN 64
_bsaes_const:
$L$M0ISR:
@@ -2628,30 +2699,33 @@ se_handler:
mov r10d,DWORD[r11]
lea r10,[r10*1+rsi]
cmp rbx,r10
- jb NEAR $L$in_prologue
-
- mov rax,QWORD[152+r8]
+ jbe NEAR $L$in_prologue
mov r10d,DWORD[4+r11]
lea r10,[r10*1+rsi]
cmp rbx,r10
jae NEAR $L$in_prologue
+ mov r10d,DWORD[8+r11]
+ lea r10,[r10*1+rsi]
+ cmp rbx,r10
+ jae NEAR $L$in_tail
+
mov rax,QWORD[160+r8]
lea rsi,[64+rax]
lea rdi,[512+r8]
mov ecx,20
DD 0xa548f3fc
- lea rax,[160+rax]
-
- mov rbp,QWORD[112+rax]
- mov rbx,QWORD[104+rax]
- mov r12,QWORD[96+rax]
- mov r13,QWORD[88+rax]
- mov r14,QWORD[80+rax]
- mov r15,QWORD[72+rax]
- lea rax,[120+rax]
+ lea rax,[((160+120))+rax]
+
+$L$in_tail:
+ mov rbp,QWORD[((-48))+rax]
+ mov rbx,QWORD[((-40))+rax]
+ mov r12,QWORD[((-32))+rax]
+ mov r13,QWORD[((-24))+rax]
+ mov r14,QWORD[((-16))+rax]
+ mov r15,QWORD[((-8))+rax]
mov QWORD[144+r8],rbx
mov QWORD[160+r8],rbp
mov QWORD[216+r8],r12
@@ -2719,15 +2793,23 @@ $L$cbc_dec_info:
DB 9,0,0,0
DD se_handler wrt ..imagebase
DD $L$cbc_dec_body wrt ..imagebase,$L$cbc_dec_epilogue wrt ..imagebase
+ DD $L$cbc_dec_tail wrt ..imagebase
+ DD 0
$L$ctr_enc_info:
DB 9,0,0,0
DD se_handler wrt ..imagebase
DD $L$ctr_enc_body wrt ..imagebase,$L$ctr_enc_epilogue wrt ..imagebase
+ DD $L$ctr_enc_tail wrt ..imagebase
+ DD 0
$L$xts_enc_info:
DB 9,0,0,0
DD se_handler wrt ..imagebase
DD $L$xts_enc_body wrt ..imagebase,$L$xts_enc_epilogue wrt ..imagebase
+ DD $L$xts_enc_tail wrt ..imagebase
+ DD 0
$L$xts_dec_info:
DB 9,0,0,0
DD se_handler wrt ..imagebase
DD $L$xts_dec_body wrt ..imagebase,$L$xts_dec_epilogue wrt ..imagebase
+ DD $L$xts_dec_tail wrt ..imagebase
+ DD 0
diff --git a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/bn/rsaz-avx2.asm b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/bn/rsaz-avx2.asm
index 86d26158d0..02a518607d 100644
--- a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/bn/rsaz-avx2.asm
+++ b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/bn/rsaz-avx2.asm
@@ -20,13 +20,21 @@ $L$SEH_begin_rsaz_1024_sqr_avx2:
mov r8,QWORD[40+rsp]
+
lea rax,[rsp]
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
vzeroupper
lea rsp,[((-168))+rsp]
vmovaps XMMWORD[(-216)+rax],xmm6
@@ -41,6 +49,7 @@ $L$SEH_begin_rsaz_1024_sqr_avx2:
vmovaps XMMWORD[(-72)+rax],xmm15
$L$sqr_1024_body:
mov rbp,rax
+
mov r13,rdx
sub rsp,832
mov r15,r13
@@ -653,6 +662,8 @@ DB 0x67
vzeroall
mov rax,rbp
+
+$L$sqr_1024_in_tail:
movaps xmm6,XMMWORD[((-216))+rax]
movaps xmm7,XMMWORD[((-200))+rax]
movaps xmm8,XMMWORD[((-184))+rax]
@@ -664,16 +675,24 @@ DB 0x67
movaps xmm14,XMMWORD[((-88))+rax]
movaps xmm15,XMMWORD[((-72))+rax]
mov r15,QWORD[((-48))+rax]
+
mov r14,QWORD[((-40))+rax]
+
mov r13,QWORD[((-32))+rax]
+
mov r12,QWORD[((-24))+rax]
+
mov rbp,QWORD[((-16))+rax]
+
mov rbx,QWORD[((-8))+rax]
+
lea rsp,[rax]
+
$L$sqr_1024_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_rsaz_1024_sqr_avx2:
global rsaz_1024_mul_avx2
@@ -690,13 +709,21 @@ $L$SEH_begin_rsaz_1024_mul_avx2:
mov r8,QWORD[40+rsp]
+
lea rax,[rsp]
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
vzeroupper
lea rsp,[((-168))+rsp]
vmovaps XMMWORD[(-216)+rax],xmm6
@@ -711,6 +738,7 @@ $L$SEH_begin_rsaz_1024_mul_avx2:
vmovaps XMMWORD[(-72)+rax],xmm15
$L$mul_1024_body:
mov rbp,rax
+
vzeroall
mov r13,rdx
sub rsp,64
@@ -1226,6 +1254,8 @@ $L$oop_mul_1024:
vzeroupper
mov rax,rbp
+
+$L$mul_1024_in_tail:
movaps xmm6,XMMWORD[((-216))+rax]
movaps xmm7,XMMWORD[((-200))+rax]
movaps xmm8,XMMWORD[((-184))+rax]
@@ -1237,16 +1267,24 @@ $L$oop_mul_1024:
movaps xmm14,XMMWORD[((-88))+rax]
movaps xmm15,XMMWORD[((-72))+rax]
mov r15,QWORD[((-48))+rax]
+
mov r14,QWORD[((-40))+rax]
+
mov r13,QWORD[((-32))+rax]
+
mov r12,QWORD[((-24))+rax]
+
mov rbp,QWORD[((-16))+rax]
+
mov rbx,QWORD[((-8))+rax]
+
lea rsp,[rax]
+
$L$mul_1024_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_rsaz_1024_mul_avx2:
global rsaz_1024_red2norm_avx2
@@ -1631,8 +1669,10 @@ global rsaz_1024_gather5_avx2
ALIGN 32
rsaz_1024_gather5_avx2:
+
vzeroupper
mov r11,rsp
+
lea rax,[((-136))+rsp]
$L$SEH_begin_rsaz_1024_gather5:
@@ -1764,10 +1804,12 @@ $L$oop_gather_1024:
movaps xmm13,XMMWORD[((-56))+r11]
movaps xmm14,XMMWORD[((-40))+r11]
movaps xmm15,XMMWORD[((-24))+r11]
-$L$SEH_end_rsaz_1024_gather5:
lea rsp,[r11]
+
DB 0F3h,0C3h ;repret
+$L$SEH_end_rsaz_1024_gather5:
+
EXTERN OPENSSL_ia32cap_P
global rsaz_avx2_eligible
@@ -1822,14 +1864,17 @@ rsaz_se_handler:
cmp rbx,r10
jb NEAR $L$common_seh_tail
- mov rax,QWORD[152+r8]
-
mov r10d,DWORD[4+r11]
lea r10,[r10*1+rsi]
cmp rbx,r10
jae NEAR $L$common_seh_tail
- mov rax,QWORD[160+r8]
+ mov rbp,QWORD[160+r8]
+
+ mov r10d,DWORD[8+r11]
+ lea r10,[r10*1+rsi]
+ cmp rbx,r10
+ cmovc rax,rbp
mov r15,QWORD[((-48))+rax]
mov r14,QWORD[((-40))+rax]
@@ -1907,11 +1952,13 @@ ALIGN 8
$L$SEH_info_rsaz_1024_sqr_avx2:
DB 9,0,0,0
DD rsaz_se_handler wrt ..imagebase
- DD $L$sqr_1024_body wrt ..imagebase,$L$sqr_1024_epilogue wrt ..imagebase
+ DD $L$sqr_1024_body wrt ..imagebase,$L$sqr_1024_epilogue wrt ..imagebase,$L$sqr_1024_in_tail wrt ..imagebase
+ DD 0
$L$SEH_info_rsaz_1024_mul_avx2:
DB 9,0,0,0
DD rsaz_se_handler wrt ..imagebase
- DD $L$mul_1024_body wrt ..imagebase,$L$mul_1024_epilogue wrt ..imagebase
+ DD $L$mul_1024_body wrt ..imagebase,$L$mul_1024_epilogue wrt ..imagebase,$L$mul_1024_in_tail wrt ..imagebase
+ DD 0
$L$SEH_info_rsaz_1024_gather5:
DB 0x01,0x36,0x17,0x0b
DB 0x36,0xf8,0x09,0x00
diff --git a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/bn/rsaz-x86_64.asm b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/bn/rsaz-x86_64.asm
index b6384fc421..603a8d17b8 100644
--- a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/bn/rsaz-x86_64.asm
+++ b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/bn/rsaz-x86_64.asm
@@ -22,14 +22,22 @@ $L$SEH_begin_rsaz_512_sqr:
mov r8,QWORD[40+rsp]
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
sub rsp,128+24
+
$L$sqr_body:
mov rbp,rdx
mov rdx,QWORD[rsi]
@@ -674,17 +682,26 @@ DB 102,72,15,126,205
$L$sqr_tail:
lea rax,[((128+24+48))+rsp]
+
mov r15,QWORD[((-48))+rax]
+
mov r14,QWORD[((-40))+rax]
+
mov r13,QWORD[((-32))+rax]
+
mov r12,QWORD[((-24))+rax]
+
mov rbp,QWORD[((-16))+rax]
+
mov rbx,QWORD[((-8))+rax]
+
lea rsp,[rax]
+
$L$sqr_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_rsaz_512_sqr:
global rsaz_512_mul
@@ -701,14 +718,22 @@ $L$SEH_begin_rsaz_512_mul:
mov r8,QWORD[40+rsp]
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
sub rsp,128+24
+
$L$mul_body:
DB 102,72,15,110,199
DB 102,72,15,110,201
@@ -770,17 +795,26 @@ $L$mul_tail:
call __rsaz_512_subtract
lea rax,[((128+24+48))+rsp]
+
mov r15,QWORD[((-48))+rax]
+
mov r14,QWORD[((-40))+rax]
+
mov r13,QWORD[((-32))+rax]
+
mov r12,QWORD[((-24))+rax]
+
mov rbp,QWORD[((-16))+rax]
+
mov rbx,QWORD[((-8))+rax]
+
lea rsp,[rax]
+
$L$mul_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_rsaz_512_mul:
global rsaz_512_mul_gather4
@@ -798,14 +832,22 @@ $L$SEH_begin_rsaz_512_mul_gather4:
mov r9,QWORD[48+rsp]
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
sub rsp,328
+
movaps XMMWORD[160+rsp],xmm6
movaps XMMWORD[176+rsp],xmm7
movaps XMMWORD[192+rsp],xmm8
@@ -1215,17 +1257,26 @@ $L$mul_gather_tail:
movaps xmm14,XMMWORD[((288-200))+rax]
movaps xmm15,XMMWORD[((304-200))+rax]
lea rax,[176+rax]
+
mov r15,QWORD[((-48))+rax]
+
mov r14,QWORD[((-40))+rax]
+
mov r13,QWORD[((-32))+rax]
+
mov r12,QWORD[((-24))+rax]
+
mov rbp,QWORD[((-16))+rax]
+
mov rbx,QWORD[((-8))+rax]
+
lea rsp,[rax]
+
$L$mul_gather4_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_rsaz_512_mul_gather4:
global rsaz_512_mul_scatter4
@@ -1243,15 +1294,23 @@ $L$SEH_begin_rsaz_512_mul_scatter4:
mov r9,QWORD[48+rsp]
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
mov r9d,r9d
sub rsp,128+24
+
$L$mul_scatter4_body:
lea r8,[r9*8+r8]
DB 102,72,15,110,199
@@ -1326,17 +1385,26 @@ DB 102,72,15,126,214
mov QWORD[896+rsi],r15
lea rax,[((128+24+48))+rsp]
+
mov r15,QWORD[((-48))+rax]
+
mov r14,QWORD[((-40))+rax]
+
mov r13,QWORD[((-32))+rax]
+
mov r12,QWORD[((-24))+rax]
+
mov rbp,QWORD[((-16))+rax]
+
mov rbx,QWORD[((-8))+rax]
+
lea rsp,[rax]
+
$L$mul_scatter4_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_rsaz_512_mul_scatter4:
global rsaz_512_mul_by_one
@@ -1352,14 +1420,22 @@ $L$SEH_begin_rsaz_512_mul_by_one:
mov rcx,r9
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
sub rsp,128+24
+
$L$mul_by_one_body:
mov eax,DWORD[((OPENSSL_ia32cap_P+8))]
mov rbp,rdx
@@ -1402,17 +1478,26 @@ $L$by_one_tail:
mov QWORD[56+rdi],r15
lea rax,[((128+24+48))+rsp]
+
mov r15,QWORD[((-48))+rax]
+
mov r14,QWORD[((-40))+rax]
+
mov r13,QWORD[((-32))+rax]
+
mov r12,QWORD[((-24))+rax]
+
mov rbp,QWORD[((-16))+rax]
+
mov rbx,QWORD[((-8))+rax]
+
lea rsp,[rax]
+
$L$mul_by_one_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_rsaz_512_mul_by_one:
ALIGN 32
diff --git a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/bn/x86_64-gf2m.asm b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/bn/x86_64-gf2m.asm
index 053a3f86a6..8123fd11b6 100644
--- a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/bn/x86_64-gf2m.asm
+++ b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/bn/x86_64-gf2m.asm
@@ -8,7 +8,9 @@ section .text code align=64
ALIGN 16
_mul_1x1:
+
sub rsp,128+8
+
mov r9,-1
lea rsi,[rax*1+rax]
shr r9,3
@@ -198,16 +200,20 @@ DB 102,72,15,126,199
xor rdx,rdi
add rsp,128+8
+
DB 0F3h,0C3h ;repret
$L$end_mul_1x1:
+
EXTERN OPENSSL_ia32cap_P
global bn_GF2m_mul_2x2
ALIGN 16
bn_GF2m_mul_2x2:
- mov rax,QWORD[OPENSSL_ia32cap_P]
- bt rax,33
+
+ mov rax,rsp
+ mov r10,QWORD[OPENSSL_ia32cap_P]
+ bt r10,33
jnc NEAR $L$vanilla_mul_2x2
DB 102,72,15,110,194
@@ -235,14 +241,20 @@ DB 102,15,58,68,229,0
ALIGN 16
$L$vanilla_mul_2x2:
lea rsp,[((-136))+rsp]
+
mov r10,QWORD[176+rsp]
mov QWORD[120+rsp],rdi
mov QWORD[128+rsp],rsi
mov QWORD[80+rsp],r14
+
mov QWORD[88+rsp],r13
+
mov QWORD[96+rsp],r12
+
mov QWORD[104+rsp],rbp
+
mov QWORD[112+rsp],rbx
+
$L$body_mul_2x2:
mov QWORD[32+rsp],rcx
mov QWORD[40+rsp],rdx
@@ -287,16 +299,24 @@ $L$body_mul_2x2:
mov QWORD[8+rbp],rax
mov r14,QWORD[80+rsp]
+
mov r13,QWORD[88+rsp]
+
mov r12,QWORD[96+rsp]
+
mov rbp,QWORD[104+rsp]
+
mov rbx,QWORD[112+rsp]
+
mov rdi,QWORD[120+rsp]
mov rsi,QWORD[128+rsp]
lea rsp,[136+rsp]
+
+$L$epilogue_mul_2x2:
DB 0F3h,0C3h ;repret
$L$end_mul_2x2:
+
DB 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105
DB 99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54
DB 52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
@@ -319,13 +339,19 @@ se_handler:
pushfq
sub rsp,64
- mov rax,QWORD[152+r8]
+ mov rax,QWORD[120+r8]
mov rbx,QWORD[248+r8]
lea r10,[$L$body_mul_2x2]
cmp rbx,r10
jb NEAR $L$in_prologue
+ mov rax,QWORD[152+r8]
+
+ lea r10,[$L$epilogue_mul_2x2]
+ cmp rbx,r10
+ jae NEAR $L$in_prologue
+
mov r14,QWORD[80+rax]
mov r13,QWORD[88+rax]
mov r12,QWORD[96+rax]
@@ -342,8 +368,9 @@ se_handler:
mov QWORD[224+r8],r13
mov QWORD[232+r8],r14
-$L$in_prologue:
lea rax,[136+rax]
+
+$L$in_prologue:
mov QWORD[152+r8],rax
mov rdi,QWORD[40+r9]
diff --git a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/bn/x86_64-mont.asm b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/bn/x86_64-mont.asm
index 26908c313b..81c205803e 100644
--- a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/bn/x86_64-mont.asm
+++ b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/bn/x86_64-mont.asm
@@ -23,8 +23,10 @@ $L$SEH_begin_bn_mul_mont:
mov r9,QWORD[48+rsp]
+
mov r9d,r9d
mov rax,rsp
+
test r9d,3
jnz NEAR $L$mul_enter
cmp r9d,8
@@ -39,12 +41,18 @@ $L$SEH_begin_bn_mul_mont:
ALIGN 16
$L$mul_enter:
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
neg r9
mov r11,rsp
lea r10,[((-16))+r9*8+rsp]
@@ -76,6 +84,7 @@ $L$mul_page_walk:
$L$mul_page_walk_done:
mov QWORD[8+r9*8+rsp],rax
+
$L$mul_body:
mov r12,rdx
mov r8,QWORD[r8]
@@ -243,18 +252,27 @@ $L$copy:
jnz NEAR $L$copy
mov rsi,QWORD[8+r9*8+rsp]
+
mov rax,1
mov r15,QWORD[((-48))+rsi]
+
mov r14,QWORD[((-40))+rsi]
+
mov r13,QWORD[((-32))+rsi]
+
mov r12,QWORD[((-24))+rsi]
+
mov rbp,QWORD[((-16))+rsi]
+
mov rbx,QWORD[((-8))+rsi]
+
lea rsp,[rsi]
+
$L$mul_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_bn_mul_mont:
ALIGN 16
@@ -271,19 +289,27 @@ $L$SEH_begin_bn_mul4x_mont:
mov r9,QWORD[48+rsp]
+
mov r9d,r9d
mov rax,rsp
+
$L$mul4x_enter:
and r11d,0x80100
cmp r11d,0x80100
je NEAR $L$mulx4x_enter
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
neg r9
mov r11,rsp
lea r10,[((-32))+r9*8+rsp]
@@ -306,6 +332,7 @@ $L$mul4x_page_walk:
$L$mul4x_page_walk_done:
mov QWORD[8+r9*8+rsp],rax
+
$L$mul4x_body:
mov QWORD[16+r9*8+rsp],rdi
mov r12,rdx
@@ -673,18 +700,27 @@ $L$copy4x:
dec r15
jnz NEAR $L$copy4x
mov rsi,QWORD[8+r9*8+rsp]
+
mov rax,1
mov r15,QWORD[((-48))+rsi]
+
mov r14,QWORD[((-40))+rsi]
+
mov r13,QWORD[((-32))+rsi]
+
mov r12,QWORD[((-24))+rsi]
+
mov rbp,QWORD[((-16))+rsi]
+
mov rbx,QWORD[((-8))+rsi]
+
lea rsp,[rsi]
+
$L$mul4x_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_bn_mul4x_mont:
EXTERN bn_sqrx8x_internal
EXTERN bn_sqr8x_internal
@@ -704,14 +740,22 @@ $L$SEH_begin_bn_sqr8x_mont:
mov r9,QWORD[48+rsp]
+
mov rax,rsp
+
$L$sqr8x_enter:
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
$L$sqr8x_prologue:
mov r10d,r9d
@@ -767,6 +811,7 @@ $L$sqr8x_page_walk_done:
mov QWORD[32+rsp],r8
mov QWORD[40+rsp],rax
+
$L$sqr8x_body:
DB 102,72,15,110,209
@@ -832,6 +877,7 @@ DB 102,72,15,110,200
pxor xmm0,xmm0
pshufd xmm1,xmm1,0
mov rsi,QWORD[40+rsp]
+
jmp NEAR $L$sqr8x_cond_copy
ALIGN 32
@@ -861,16 +907,24 @@ $L$sqr8x_cond_copy:
mov rax,1
mov r15,QWORD[((-48))+rsi]
+
mov r14,QWORD[((-40))+rsi]
+
mov r13,QWORD[((-32))+rsi]
+
mov r12,QWORD[((-24))+rsi]
+
mov rbp,QWORD[((-16))+rsi]
+
mov rbx,QWORD[((-8))+rsi]
+
lea rsp,[rsi]
+
$L$sqr8x_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_bn_sqr8x_mont:
ALIGN 32
@@ -887,14 +941,22 @@ $L$SEH_begin_bn_mulx4x_mont:
mov r9,QWORD[48+rsp]
+
mov rax,rsp
+
$L$mulx4x_enter:
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
$L$mulx4x_prologue:
shl r9d,3
@@ -940,6 +1002,7 @@ $L$mulx4x_page_walk_done:
mov QWORD[24+rsp],r8
mov QWORD[32+rsp],rdi
mov QWORD[40+rsp],rax
+
mov QWORD[48+rsp],r9
jmp NEAR $L$mulx4x_body
@@ -1184,6 +1247,7 @@ DB 102,73,15,110,207
pxor xmm0,xmm0
pshufd xmm1,xmm1,0
mov rsi,QWORD[40+rsp]
+
jmp NEAR $L$mulx4x_cond_copy
ALIGN 32
@@ -1213,16 +1277,24 @@ $L$mulx4x_cond_copy:
mov rax,1
mov r15,QWORD[((-48))+rsi]
+
mov r14,QWORD[((-40))+rsi]
+
mov r13,QWORD[((-32))+rsi]
+
mov r12,QWORD[((-24))+rsi]
+
mov rbp,QWORD[((-16))+rsi]
+
mov rbx,QWORD[((-8))+rsi]
+
lea rsp,[rsi]
+
$L$mulx4x_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_bn_mulx4x_mont:
DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
DB 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
diff --git a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/bn/x86_64-mont5.asm b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/bn/x86_64-mont5.asm
index de93630c8f..d1855c5acf 100644
--- a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/bn/x86_64-mont5.asm
+++ b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/bn/x86_64-mont5.asm
@@ -23,8 +23,10 @@ $L$SEH_begin_bn_mul_mont_gather5:
mov r9,QWORD[48+rsp]
+
mov r9d,r9d
mov rax,rsp
+
test r9d,7
jnz NEAR $L$mul_enter
mov r11d,DWORD[((OPENSSL_ia32cap_P+8))]
@@ -34,12 +36,18 @@ ALIGN 16
$L$mul_enter:
movd xmm5,DWORD[56+rsp]
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
neg r9
mov r11,rsp
lea r10,[((-280))+r9*8+rsp]
@@ -71,6 +79,7 @@ $L$mul_page_walk_done:
lea r10,[$L$inc]
mov QWORD[8+r9*8+rsp],rax
+
$L$mul_body:
lea r12,[128+rdx]
@@ -428,19 +437,28 @@ $L$copy:
jnz NEAR $L$copy
mov rsi,QWORD[8+r9*8+rsp]
+
mov rax,1
mov r15,QWORD[((-48))+rsi]
+
mov r14,QWORD[((-40))+rsi]
+
mov r13,QWORD[((-32))+rsi]
+
mov r12,QWORD[((-24))+rsi]
+
mov rbp,QWORD[((-16))+rsi]
+
mov rbx,QWORD[((-8))+rsi]
+
lea rsp,[rsi]
+
$L$mul_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_bn_mul_mont_gather5:
ALIGN 32
@@ -457,18 +475,26 @@ $L$SEH_begin_bn_mul4x_mont_gather5:
mov r9,QWORD[48+rsp]
+
DB 0x67
mov rax,rsp
+
$L$mul4x_enter:
and r11d,0x80108
cmp r11d,0x80108
je NEAR $L$mulx4x_enter
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
$L$mul4x_prologue:
DB 0x67
@@ -524,24 +550,34 @@ $L$mul4x_page_walk_done:
neg r9
mov QWORD[40+rsp],rax
+
$L$mul4x_body:
call mul4x_internal
mov rsi,QWORD[40+rsp]
+
mov rax,1
mov r15,QWORD[((-48))+rsi]
+
mov r14,QWORD[((-40))+rsi]
+
mov r13,QWORD[((-32))+rsi]
+
mov r12,QWORD[((-24))+rsi]
+
mov rbp,QWORD[((-16))+rsi]
+
mov rbx,QWORD[((-8))+rsi]
+
lea rsp,[rsi]
+
$L$mul4x_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_bn_mul4x_mont_gather5:
@@ -1085,17 +1121,25 @@ $L$SEH_begin_bn_power5:
mov r9,QWORD[48+rsp]
+
mov rax,rsp
+
mov r11d,DWORD[((OPENSSL_ia32cap_P+8))]
and r11d,0x80108
cmp r11d,0x80108
je NEAR $L$powerx5_enter
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
$L$power5_prologue:
shl r9d,3
@@ -1160,6 +1204,7 @@ $L$pwr_page_walk_done:
mov QWORD[32+rsp],r8
mov QWORD[40+rsp],rax
+
$L$power5_body:
DB 102,72,15,110,207
DB 102,72,15,110,209
@@ -1186,18 +1231,27 @@ DB 102,72,15,126,226
call mul4x_internal
mov rsi,QWORD[40+rsp]
+
mov rax,1
mov r15,QWORD[((-48))+rsi]
+
mov r14,QWORD[((-40))+rsi]
+
mov r13,QWORD[((-32))+rsi]
+
mov r12,QWORD[((-24))+rsi]
+
mov rbp,QWORD[((-16))+rsi]
+
mov rbx,QWORD[((-8))+rsi]
+
lea rsp,[rsi]
+
$L$power5_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_bn_power5:
global bn_sqr8x_internal
@@ -2060,14 +2114,22 @@ $L$SEH_begin_bn_from_mont8x:
mov r9,QWORD[48+rsp]
+
DB 0x67
mov rax,rsp
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
$L$from_prologue:
shl r9d,3
@@ -2132,6 +2194,7 @@ $L$from_page_walk_done:
mov QWORD[32+rsp],r8
mov QWORD[40+rsp],rax
+
$L$from_body:
mov r11,r9
lea rax,[48+rsp]
@@ -2173,7 +2236,6 @@ DB 102,73,15,110,218
pxor xmm0,xmm0
lea rax,[48+rsp]
- mov rsi,QWORD[40+rsp]
jmp NEAR $L$from_mont_zero
ALIGN 32
@@ -2183,11 +2245,12 @@ $L$from_mont_nox:
pxor xmm0,xmm0
lea rax,[48+rsp]
- mov rsi,QWORD[40+rsp]
jmp NEAR $L$from_mont_zero
ALIGN 32
$L$from_mont_zero:
+ mov rsi,QWORD[40+rsp]
+
movdqa XMMWORD[rax],xmm0
movdqa XMMWORD[16+rax],xmm0
movdqa XMMWORD[32+rax],xmm0
@@ -2198,16 +2261,24 @@ $L$from_mont_zero:
mov rax,1
mov r15,QWORD[((-48))+rsi]
+
mov r14,QWORD[((-40))+rsi]
+
mov r13,QWORD[((-32))+rsi]
+
mov r12,QWORD[((-24))+rsi]
+
mov rbp,QWORD[((-16))+rsi]
+
mov rbx,QWORD[((-8))+rsi]
+
lea rsp,[rsi]
+
$L$from_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_bn_from_mont8x:
ALIGN 32
@@ -2224,14 +2295,22 @@ $L$SEH_begin_bn_mulx4x_mont_gather5:
mov r9,QWORD[48+rsp]
+
mov rax,rsp
+
$L$mulx4x_enter:
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
$L$mulx4x_prologue:
shl r9d,3
@@ -2297,23 +2376,33 @@ $L$mulx4x_page_walk_done:
mov QWORD[32+rsp],r8
mov QWORD[40+rsp],rax
+
$L$mulx4x_body:
call mulx4x_internal
mov rsi,QWORD[40+rsp]
+
mov rax,1
mov r15,QWORD[((-48))+rsi]
+
mov r14,QWORD[((-40))+rsi]
+
mov r13,QWORD[((-32))+rsi]
+
mov r12,QWORD[((-24))+rsi]
+
mov rbp,QWORD[((-16))+rsi]
+
mov rbx,QWORD[((-8))+rsi]
+
lea rsp,[rsi]
+
$L$mulx4x_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_bn_mulx4x_mont_gather5:
@@ -2753,14 +2842,22 @@ $L$SEH_begin_bn_powerx5:
mov r9,QWORD[48+rsp]
+
mov rax,rsp
+
$L$powerx5_enter:
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
$L$powerx5_prologue:
shl r9d,3
@@ -2832,6 +2929,7 @@ DB 102,73,15,110,218
DB 102,72,15,110,226
mov QWORD[32+rsp],r8
mov QWORD[40+rsp],rax
+
$L$powerx5_body:
call __bn_sqrx8x_internal
@@ -2854,19 +2952,28 @@ DB 102,72,15,126,226
call mulx4x_internal
mov rsi,QWORD[40+rsp]
+
mov rax,1
mov r15,QWORD[((-48))+rsi]
+
mov r14,QWORD[((-40))+rsi]
+
mov r13,QWORD[((-32))+rsi]
+
mov r12,QWORD[((-24))+rsi]
+
mov rbp,QWORD[((-16))+rsi]
+
mov rbx,QWORD[((-8))+rsi]
+
lea rsp,[rsi]
+
$L$powerx5_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_bn_powerx5:
global bn_sqrx8x_internal
diff --git a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/buildinf.h b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/buildinf.h
index c0b70ab335..3cfa6a4a22 100644
--- a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/buildinf.h
+++ b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/buildinf.h
@@ -1,12 +1,23 @@
-/* auto-generated by util/mkbuildinf.pl for crypto/cversion.c */
-#define CFLAGS cflags
/*
- * Generate CFLAGS as an array of individual characters. This is a
+ * WARNING: do not edit!
+ * Generated by util/mkbuildinf.pl
+ *
+ * Copyright 2014-2017 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License"). You may not use
+ * this file except in compliance with the License. You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#define PLATFORM "platform: "
+#define DATE "built on: Thu Nov 22 19:36:01 2018 UTC"
+
+/*
+ * Generate compiler_flags as an array of individual characters. This is a
* workaround for the situation where CFLAGS gets too long for a C90 string
* literal
*/
-static const char cflags[] = {
- 'c','o','m','p','i','l','e','r',':',' ','c','c','\0'
+static const char compiler_flags[] = {
+ 'c','o','m','p','i','l','e','r',':',' ','c','c',' ',' ','\0'
};
-#define PLATFORM "platform: "
-#define DATE "built on: Tue Nov 20 09:39:06 2018"
diff --git a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/camellia/cmll-x86_64.asm b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/camellia/cmll-x86_64.asm
index cb91061570..7a0f351e51 100644
--- a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/camellia/cmll-x86_64.asm
+++ b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/camellia/cmll-x86_64.asm
@@ -32,11 +32,17 @@ $L$SEH_begin_Camellia_EncryptBlock_Rounds:
mov rcx,r9
+
push rbx
+
push rbp
+
push r13
+
push r14
+
push r15
+
$L$enc_prologue:
@@ -68,15 +74,22 @@ $L$enc_prologue:
mov DWORD[12+r13],r11d
mov r15,QWORD[rsp]
+
mov r14,QWORD[8+rsp]
+
mov r13,QWORD[16+rsp]
+
mov rbp,QWORD[24+rsp]
+
mov rbx,QWORD[32+rsp]
+
lea rsp,[40+rsp]
+
$L$enc_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_Camellia_EncryptBlock_Rounds:
@@ -313,11 +326,17 @@ $L$SEH_begin_Camellia_DecryptBlock_Rounds:
mov rcx,r9
+
push rbx
+
push rbp
+
push r13
+
push r14
+
push r15
+
$L$dec_prologue:
@@ -349,15 +368,22 @@ $L$dec_prologue:
mov DWORD[12+r13],r11d
mov r15,QWORD[rsp]
+
mov r14,QWORD[8+rsp]
+
mov r13,QWORD[16+rsp]
+
mov rbp,QWORD[24+rsp]
+
mov rbx,QWORD[32+rsp]
+
lea rsp,[40+rsp]
+
$L$dec_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_Camellia_DecryptBlock_Rounds:
@@ -580,11 +606,17 @@ $L$SEH_begin_Camellia_Ekeygen:
mov rdx,r8
+
push rbx
+
push rbp
+
push r13
+
push r14
+
push r15
+
$L$key_prologue:
mov r15d,edi
@@ -1112,15 +1144,22 @@ $L$2nd256:
mov eax,4
$L$done:
mov r15,QWORD[rsp]
+
mov r14,QWORD[8+rsp]
+
mov r13,QWORD[16+rsp]
+
mov rbp,QWORD[24+rsp]
+
mov rbx,QWORD[32+rsp]
+
lea rsp,[40+rsp]
+
$L$key_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_Camellia_Ekeygen:
ALIGN 64
$L$Camellia_SIGMA:
@@ -1657,17 +1696,25 @@ $L$SEH_begin_Camellia_cbc_encrypt:
mov r9,QWORD[48+rsp]
+
cmp rdx,0
je NEAR $L$cbc_abort
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
$L$cbc_prologue:
mov rbp,rsp
+
sub rsp,64
and rsp,-64
@@ -1689,6 +1736,7 @@ $L$cbc_prologue:
mov QWORD[40+rsp],r8
mov QWORD[48+rsp],rbp
+
$L$cbc_body:
lea rbp,[$L$Camellia_SBOX]
@@ -1876,17 +1924,26 @@ $L$cbc_dec_popf:
ALIGN 16
$L$cbc_done:
mov rcx,QWORD[48+rsp]
+
mov r15,QWORD[rcx]
+
mov r14,QWORD[8+rcx]
+
mov r13,QWORD[16+rcx]
+
mov r12,QWORD[24+rcx]
+
mov rbp,QWORD[32+rcx]
+
mov rbx,QWORD[40+rcx]
+
lea rsp,[48+rcx]
+
$L$cbc_abort:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_Camellia_cbc_encrypt:
DB 67,97,109,101,108,108,105,97,32,102,111,114,32,120,56,54
diff --git a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/chacha/chacha-x86_64.asm b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/chacha/chacha-x86_64.asm
index ce4751884f..1a2003ea1f 100644
--- a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/chacha/chacha-x86_64.asm
+++ b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/chacha/chacha-x86_64.asm
@@ -24,6 +24,17 @@ $L$rot16:
DB 0x2,0x3,0x0,0x1,0x6,0x7,0x4,0x5,0xa,0xb,0x8,0x9,0xe,0xf,0xc,0xd
$L$rot24:
DB 0x3,0x0,0x1,0x2,0x7,0x4,0x5,0x6,0xb,0x8,0x9,0xa,0xf,0xc,0xd,0xe
+$L$twoy:
+ DD 2,0,0,0,2,0,0,0
+ALIGN 64
+$L$zeroz:
+ DD 0,0,0,0,1,0,0,0,2,0,0,0,3,0,0,0
+$L$fourz:
+ DD 4,0,0,0,4,0,0,0,4,0,0,0,4,0,0,0
+$L$incz:
+ DD 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+$L$sixteen:
+ DD 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16
$L$sigma:
DB 101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107
DB 0
@@ -46,20 +57,33 @@ $L$SEH_begin_ChaCha20_ctr32:
mov r8,QWORD[40+rsp]
+
cmp rdx,0
je NEAR $L$no_data
mov r10,QWORD[((OPENSSL_ia32cap_P+4))]
+ bt r10,48
+ jc NEAR $L$ChaCha20_avx512
+ test r10,r10
+ js NEAR $L$ChaCha20_avx512vl
test r10d,512
jnz NEAR $L$ChaCha20_ssse3
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
sub rsp,64+24
+$L$ctr32_body:
+
movdqu xmm1,XMMWORD[rcx]
movdqu xmm2,XMMWORD[16+rcx]
@@ -296,17 +320,27 @@ $L$oop_tail:
jnz NEAR $L$oop_tail
$L$done:
- add rsp,64+24
- pop r15
- pop r14
- pop r13
- pop r12
- pop rbp
- pop rbx
+ lea rsi,[((64+24+48))+rsp]
+
+ mov r15,QWORD[((-48))+rsi]
+
+ mov r14,QWORD[((-40))+rsi]
+
+ mov r13,QWORD[((-32))+rsi]
+
+ mov r12,QWORD[((-24))+rsi]
+
+ mov rbp,QWORD[((-16))+rsi]
+
+ mov rbx,QWORD[((-8))+rsi]
+
+ lea rsp,[rsi]
+
$L$no_data:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_ChaCha20_ctr32:
ALIGN 32
@@ -322,23 +356,21 @@ $L$SEH_begin_ChaCha20_ssse3:
mov r8,QWORD[40+rsp]
+
$L$ChaCha20_ssse3:
+ mov r9,rsp
+
test r10d,2048
jnz NEAR $L$ChaCha20_4xop
cmp rdx,128
+ je NEAR $L$ChaCha20_128
ja NEAR $L$ChaCha20_4x
$L$do_sse3_after_all:
- push rbx
- push rbp
- push r12
- push r13
- push r14
- push r15
-
- sub rsp,64+72
- movaps XMMWORD[(64+32)+rsp],xmm6
- movaps XMMWORD[(64+48)+rsp],xmm7
+ sub rsp,64+40
+ movaps XMMWORD[(-40)+r9],xmm6
+ movaps XMMWORD[(-24)+r9],xmm7
+$L$ssse3_body:
movdqa xmm0,XMMWORD[$L$sigma]
movdqu xmm1,XMMWORD[rcx]
movdqu xmm2,XMMWORD[16+rcx]
@@ -350,7 +382,7 @@ $L$do_sse3_after_all:
movdqa XMMWORD[16+rsp],xmm1
movdqa XMMWORD[32+rsp],xmm2
movdqa XMMWORD[48+rsp],xmm3
- mov ebp,10
+ mov r8,10
jmp NEAR $L$oop_ssse3
ALIGN 32
@@ -360,7 +392,7 @@ $L$oop_outer_ssse3:
movdqa xmm1,XMMWORD[16+rsp]
movdqa xmm2,XMMWORD[32+rsp]
paddd xmm3,XMMWORD[48+rsp]
- mov ebp,10
+ mov r8,10
movdqa XMMWORD[48+rsp],xmm3
jmp NEAR $L$oop_ssse3
@@ -409,7 +441,7 @@ DB 102,15,56,0,223
pshufd xmm2,xmm2,78
pshufd xmm1,xmm1,147
pshufd xmm3,xmm3,57
- dec ebp
+ dec r8
jnz NEAR $L$oop_ssse3
paddd xmm0,XMMWORD[rsp]
paddd xmm1,XMMWORD[16+rsp]
@@ -446,33 +478,212 @@ $L$tail_ssse3:
movdqa XMMWORD[16+rsp],xmm1
movdqa XMMWORD[32+rsp],xmm2
movdqa XMMWORD[48+rsp],xmm3
- xor rbx,rbx
+ xor r8,r8
$L$oop_tail_ssse3:
- movzx eax,BYTE[rbx*1+rsi]
- movzx ecx,BYTE[rbx*1+rsp]
- lea rbx,[1+rbx]
+ movzx eax,BYTE[r8*1+rsi]
+ movzx ecx,BYTE[r8*1+rsp]
+ lea r8,[1+r8]
xor eax,ecx
- mov BYTE[((-1))+rbx*1+rdi],al
+ mov BYTE[((-1))+r8*1+rdi],al
dec rdx
jnz NEAR $L$oop_tail_ssse3
$L$done_ssse3:
- movaps xmm6,XMMWORD[((64+32))+rsp]
- movaps xmm7,XMMWORD[((64+48))+rsp]
- add rsp,64+72
- pop r15
- pop r14
- pop r13
- pop r12
- pop rbp
- pop rbx
+ movaps xmm6,XMMWORD[((-40))+r9]
+ movaps xmm7,XMMWORD[((-24))+r9]
+ lea rsp,[r9]
+
+$L$ssse3_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_ChaCha20_ssse3:
ALIGN 32
+ChaCha20_128:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_ChaCha20_128:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD[40+rsp]
+
+
+
+$L$ChaCha20_128:
+ mov r9,rsp
+
+ sub rsp,64+104
+ movaps XMMWORD[(-104)+r9],xmm6
+ movaps XMMWORD[(-88)+r9],xmm7
+ movaps XMMWORD[(-72)+r9],xmm8
+ movaps XMMWORD[(-56)+r9],xmm9
+ movaps XMMWORD[(-40)+r9],xmm10
+ movaps XMMWORD[(-24)+r9],xmm11
+$L$128_body:
+ movdqa xmm8,XMMWORD[$L$sigma]
+ movdqu xmm9,XMMWORD[rcx]
+ movdqu xmm2,XMMWORD[16+rcx]
+ movdqu xmm3,XMMWORD[r8]
+ movdqa xmm1,XMMWORD[$L$one]
+ movdqa xmm6,XMMWORD[$L$rot16]
+ movdqa xmm7,XMMWORD[$L$rot24]
+
+ movdqa xmm10,xmm8
+ movdqa XMMWORD[rsp],xmm8
+ movdqa xmm11,xmm9
+ movdqa XMMWORD[16+rsp],xmm9
+ movdqa xmm0,xmm2
+ movdqa XMMWORD[32+rsp],xmm2
+ paddd xmm1,xmm3
+ movdqa XMMWORD[48+rsp],xmm3
+ mov r8,10
+ jmp NEAR $L$oop_128
+
+ALIGN 32
+$L$oop_128:
+ paddd xmm8,xmm9
+ pxor xmm3,xmm8
+ paddd xmm10,xmm11
+ pxor xmm1,xmm10
+DB 102,15,56,0,222
+DB 102,15,56,0,206
+ paddd xmm2,xmm3
+ paddd xmm0,xmm1
+ pxor xmm9,xmm2
+ pxor xmm11,xmm0
+ movdqa xmm4,xmm9
+ psrld xmm9,20
+ movdqa xmm5,xmm11
+ pslld xmm4,12
+ psrld xmm11,20
+ por xmm9,xmm4
+ pslld xmm5,12
+ por xmm11,xmm5
+ paddd xmm8,xmm9
+ pxor xmm3,xmm8
+ paddd xmm10,xmm11
+ pxor xmm1,xmm10
+DB 102,15,56,0,223
+DB 102,15,56,0,207
+ paddd xmm2,xmm3
+ paddd xmm0,xmm1
+ pxor xmm9,xmm2
+ pxor xmm11,xmm0
+ movdqa xmm4,xmm9
+ psrld xmm9,25
+ movdqa xmm5,xmm11
+ pslld xmm4,7
+ psrld xmm11,25
+ por xmm9,xmm4
+ pslld xmm5,7
+ por xmm11,xmm5
+ pshufd xmm2,xmm2,78
+ pshufd xmm9,xmm9,57
+ pshufd xmm3,xmm3,147
+ pshufd xmm0,xmm0,78
+ pshufd xmm11,xmm11,57
+ pshufd xmm1,xmm1,147
+ paddd xmm8,xmm9
+ pxor xmm3,xmm8
+ paddd xmm10,xmm11
+ pxor xmm1,xmm10
+DB 102,15,56,0,222
+DB 102,15,56,0,206
+ paddd xmm2,xmm3
+ paddd xmm0,xmm1
+ pxor xmm9,xmm2
+ pxor xmm11,xmm0
+ movdqa xmm4,xmm9
+ psrld xmm9,20
+ movdqa xmm5,xmm11
+ pslld xmm4,12
+ psrld xmm11,20
+ por xmm9,xmm4
+ pslld xmm5,12
+ por xmm11,xmm5
+ paddd xmm8,xmm9
+ pxor xmm3,xmm8
+ paddd xmm10,xmm11
+ pxor xmm1,xmm10
+DB 102,15,56,0,223
+DB 102,15,56,0,207
+ paddd xmm2,xmm3
+ paddd xmm0,xmm1
+ pxor xmm9,xmm2
+ pxor xmm11,xmm0
+ movdqa xmm4,xmm9
+ psrld xmm9,25
+ movdqa xmm5,xmm11
+ pslld xmm4,7
+ psrld xmm11,25
+ por xmm9,xmm4
+ pslld xmm5,7
+ por xmm11,xmm5
+ pshufd xmm2,xmm2,78
+ pshufd xmm9,xmm9,147
+ pshufd xmm3,xmm3,57
+ pshufd xmm0,xmm0,78
+ pshufd xmm11,xmm11,147
+ pshufd xmm1,xmm1,57
+ dec r8
+ jnz NEAR $L$oop_128
+ paddd xmm8,XMMWORD[rsp]
+ paddd xmm9,XMMWORD[16+rsp]
+ paddd xmm2,XMMWORD[32+rsp]
+ paddd xmm3,XMMWORD[48+rsp]
+ paddd xmm1,XMMWORD[$L$one]
+ paddd xmm10,XMMWORD[rsp]
+ paddd xmm11,XMMWORD[16+rsp]
+ paddd xmm0,XMMWORD[32+rsp]
+ paddd xmm1,XMMWORD[48+rsp]
+
+ movdqu xmm4,XMMWORD[rsi]
+ movdqu xmm5,XMMWORD[16+rsi]
+ pxor xmm8,xmm4
+ movdqu xmm4,XMMWORD[32+rsi]
+ pxor xmm9,xmm5
+ movdqu xmm5,XMMWORD[48+rsi]
+ pxor xmm2,xmm4
+ movdqu xmm4,XMMWORD[64+rsi]
+ pxor xmm3,xmm5
+ movdqu xmm5,XMMWORD[80+rsi]
+ pxor xmm10,xmm4
+ movdqu xmm4,XMMWORD[96+rsi]
+ pxor xmm11,xmm5
+ movdqu xmm5,XMMWORD[112+rsi]
+ pxor xmm0,xmm4
+ pxor xmm1,xmm5
+
+ movdqu XMMWORD[rdi],xmm8
+ movdqu XMMWORD[16+rdi],xmm9
+ movdqu XMMWORD[32+rdi],xmm2
+ movdqu XMMWORD[48+rdi],xmm3
+ movdqu XMMWORD[64+rdi],xmm10
+ movdqu XMMWORD[80+rdi],xmm11
+ movdqu XMMWORD[96+rdi],xmm0
+ movdqu XMMWORD[112+rdi],xmm1
+ movaps xmm6,XMMWORD[((-104))+r9]
+ movaps xmm7,XMMWORD[((-88))+r9]
+ movaps xmm8,XMMWORD[((-72))+r9]
+ movaps xmm9,XMMWORD[((-56))+r9]
+ movaps xmm10,XMMWORD[((-40))+r9]
+ movaps xmm11,XMMWORD[((-24))+r9]
+ lea rsp,[r9]
+
+$L$128_epilogue:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_ChaCha20_128:
+
+ALIGN 32
ChaCha20_4x:
mov QWORD[8+rsp],rdi ;WIN64 prologue
mov QWORD[16+rsp],rsi
@@ -485,7 +696,10 @@ $L$SEH_begin_ChaCha20_4x:
mov r8,QWORD[40+rsp]
+
$L$ChaCha20_4x:
+ mov r9,rsp
+
mov r11,r10
shr r10,32
test r10,32
@@ -498,18 +712,18 @@ $L$ChaCha20_4x:
je NEAR $L$do_sse3_after_all
$L$proceed4x:
- lea r11,[((-120))+rsp]
- sub rsp,0x148+160
- movaps XMMWORD[(-48)+r11],xmm6
- movaps XMMWORD[(-32)+r11],xmm7
- movaps XMMWORD[(-16)+r11],xmm8
- movaps XMMWORD[r11],xmm9
- movaps XMMWORD[16+r11],xmm10
- movaps XMMWORD[32+r11],xmm11
- movaps XMMWORD[48+r11],xmm12
- movaps XMMWORD[64+r11],xmm13
- movaps XMMWORD[80+r11],xmm14
- movaps XMMWORD[96+r11],xmm15
+ sub rsp,0x140+168
+ movaps XMMWORD[(-168)+r9],xmm6
+ movaps XMMWORD[(-152)+r9],xmm7
+ movaps XMMWORD[(-136)+r9],xmm8
+ movaps XMMWORD[(-120)+r9],xmm9
+ movaps XMMWORD[(-104)+r9],xmm10
+ movaps XMMWORD[(-88)+r9],xmm11
+ movaps XMMWORD[(-72)+r9],xmm12
+ movaps XMMWORD[(-56)+r9],xmm13
+ movaps XMMWORD[(-40)+r9],xmm14
+ movaps XMMWORD[(-24)+r9],xmm15
+$L$4x_body:
movdqa xmm11,XMMWORD[$L$sigma]
movdqu xmm15,XMMWORD[rcx]
movdqu xmm7,XMMWORD[16+rcx]
@@ -1036,21 +1250,23 @@ $L$oop_tail4x:
jnz NEAR $L$oop_tail4x
$L$done4x:
- lea r11,[((320+48))+rsp]
- movaps xmm6,XMMWORD[((-48))+r11]
- movaps xmm7,XMMWORD[((-32))+r11]
- movaps xmm8,XMMWORD[((-16))+r11]
- movaps xmm9,XMMWORD[r11]
- movaps xmm10,XMMWORD[16+r11]
- movaps xmm11,XMMWORD[32+r11]
- movaps xmm12,XMMWORD[48+r11]
- movaps xmm13,XMMWORD[64+r11]
- movaps xmm14,XMMWORD[80+r11]
- movaps xmm15,XMMWORD[96+r11]
- add rsp,0x148+160
+ movaps xmm6,XMMWORD[((-168))+r9]
+ movaps xmm7,XMMWORD[((-152))+r9]
+ movaps xmm8,XMMWORD[((-136))+r9]
+ movaps xmm9,XMMWORD[((-120))+r9]
+ movaps xmm10,XMMWORD[((-104))+r9]
+ movaps xmm11,XMMWORD[((-88))+r9]
+ movaps xmm12,XMMWORD[((-72))+r9]
+ movaps xmm13,XMMWORD[((-56))+r9]
+ movaps xmm14,XMMWORD[((-40))+r9]
+ movaps xmm15,XMMWORD[((-24))+r9]
+ lea rsp,[r9]
+
+$L$4x_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_ChaCha20_4x:
ALIGN 32
@@ -1066,19 +1282,22 @@ $L$SEH_begin_ChaCha20_4xop:
mov r8,QWORD[40+rsp]
+
$L$ChaCha20_4xop:
- lea r11,[((-120))+rsp]
- sub rsp,0x148+160
- movaps XMMWORD[(-48)+r11],xmm6
- movaps XMMWORD[(-32)+r11],xmm7
- movaps XMMWORD[(-16)+r11],xmm8
- movaps XMMWORD[r11],xmm9
- movaps XMMWORD[16+r11],xmm10
- movaps XMMWORD[32+r11],xmm11
- movaps XMMWORD[48+r11],xmm12
- movaps XMMWORD[64+r11],xmm13
- movaps XMMWORD[80+r11],xmm14
- movaps XMMWORD[96+r11],xmm15
+ mov r9,rsp
+
+ sub rsp,0x140+168
+ movaps XMMWORD[(-168)+r9],xmm6
+ movaps XMMWORD[(-152)+r9],xmm7
+ movaps XMMWORD[(-136)+r9],xmm8
+ movaps XMMWORD[(-120)+r9],xmm9
+ movaps XMMWORD[(-104)+r9],xmm10
+ movaps XMMWORD[(-88)+r9],xmm11
+ movaps XMMWORD[(-72)+r9],xmm12
+ movaps XMMWORD[(-56)+r9],xmm13
+ movaps XMMWORD[(-40)+r9],xmm14
+ movaps XMMWORD[(-24)+r9],xmm15
+$L$4xop_body:
vzeroupper
vmovdqa xmm11,XMMWORD[$L$sigma]
@@ -1480,21 +1699,23 @@ $L$oop_tail4xop:
$L$done4xop:
vzeroupper
- lea r11,[((320+48))+rsp]
- movaps xmm6,XMMWORD[((-48))+r11]
- movaps xmm7,XMMWORD[((-32))+r11]
- movaps xmm8,XMMWORD[((-16))+r11]
- movaps xmm9,XMMWORD[r11]
- movaps xmm10,XMMWORD[16+r11]
- movaps xmm11,XMMWORD[32+r11]
- movaps xmm12,XMMWORD[48+r11]
- movaps xmm13,XMMWORD[64+r11]
- movaps xmm14,XMMWORD[80+r11]
- movaps xmm15,XMMWORD[96+r11]
- add rsp,0x148+160
+ movaps xmm6,XMMWORD[((-168))+r9]
+ movaps xmm7,XMMWORD[((-152))+r9]
+ movaps xmm8,XMMWORD[((-136))+r9]
+ movaps xmm9,XMMWORD[((-120))+r9]
+ movaps xmm10,XMMWORD[((-104))+r9]
+ movaps xmm11,XMMWORD[((-88))+r9]
+ movaps xmm12,XMMWORD[((-72))+r9]
+ movaps xmm13,XMMWORD[((-56))+r9]
+ movaps xmm14,XMMWORD[((-40))+r9]
+ movaps xmm15,XMMWORD[((-24))+r9]
+ lea rsp,[r9]
+
+$L$4xop_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_ChaCha20_4xop:
ALIGN 32
@@ -1510,23 +1731,24 @@ $L$SEH_begin_ChaCha20_8x:
mov r8,QWORD[40+rsp]
+
$L$ChaCha20_8x:
- mov r10,rsp
- sub rsp,0x280+176
+ mov r9,rsp
+
+ sub rsp,0x280+168
and rsp,-32
- lea r11,[((656+48))+rsp]
- movaps XMMWORD[(-48)+r11],xmm6
- movaps XMMWORD[(-32)+r11],xmm7
- movaps XMMWORD[(-16)+r11],xmm8
- movaps XMMWORD[r11],xmm9
- movaps XMMWORD[16+r11],xmm10
- movaps XMMWORD[32+r11],xmm11
- movaps XMMWORD[48+r11],xmm12
- movaps XMMWORD[64+r11],xmm13
- movaps XMMWORD[80+r11],xmm14
- movaps XMMWORD[96+r11],xmm15
+ movaps XMMWORD[(-168)+r9],xmm6
+ movaps XMMWORD[(-152)+r9],xmm7
+ movaps XMMWORD[(-136)+r9],xmm8
+ movaps XMMWORD[(-120)+r9],xmm9
+ movaps XMMWORD[(-104)+r9],xmm10
+ movaps XMMWORD[(-88)+r9],xmm11
+ movaps XMMWORD[(-72)+r9],xmm12
+ movaps XMMWORD[(-56)+r9],xmm13
+ movaps XMMWORD[(-40)+r9],xmm14
+ movaps XMMWORD[(-24)+r9],xmm15
+$L$8x_body:
vzeroupper
- mov QWORD[640+rsp],r10
@@ -2117,19 +2339,1579 @@ $L$oop_tail8x:
$L$done8x:
vzeroall
- lea r11,[((656+48))+rsp]
- movaps xmm6,XMMWORD[((-48))+r11]
- movaps xmm7,XMMWORD[((-32))+r11]
- movaps xmm8,XMMWORD[((-16))+r11]
- movaps xmm9,XMMWORD[r11]
- movaps xmm10,XMMWORD[16+r11]
- movaps xmm11,XMMWORD[32+r11]
- movaps xmm12,XMMWORD[48+r11]
- movaps xmm13,XMMWORD[64+r11]
- movaps xmm14,XMMWORD[80+r11]
- movaps xmm15,XMMWORD[96+r11]
- mov rsp,QWORD[640+rsp]
+ movaps xmm6,XMMWORD[((-168))+r9]
+ movaps xmm7,XMMWORD[((-152))+r9]
+ movaps xmm8,XMMWORD[((-136))+r9]
+ movaps xmm9,XMMWORD[((-120))+r9]
+ movaps xmm10,XMMWORD[((-104))+r9]
+ movaps xmm11,XMMWORD[((-88))+r9]
+ movaps xmm12,XMMWORD[((-72))+r9]
+ movaps xmm13,XMMWORD[((-56))+r9]
+ movaps xmm14,XMMWORD[((-40))+r9]
+ movaps xmm15,XMMWORD[((-24))+r9]
+ lea rsp,[r9]
+
+$L$8x_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_ChaCha20_8x:
+
+ALIGN 32
+ChaCha20_avx512:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_ChaCha20_avx512:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD[40+rsp]
+
+
+
+$L$ChaCha20_avx512:
+ mov r9,rsp
+
+ cmp rdx,512
+ ja NEAR $L$ChaCha20_16x
+
+ sub rsp,64+40
+ movaps XMMWORD[(-40)+r9],xmm6
+ movaps XMMWORD[(-24)+r9],xmm7
+$L$avx512_body:
+ vbroadcasti32x4 zmm0,ZMMWORD[$L$sigma]
+ vbroadcasti32x4 zmm1,ZMMWORD[rcx]
+ vbroadcasti32x4 zmm2,ZMMWORD[16+rcx]
+ vbroadcasti32x4 zmm3,ZMMWORD[r8]
+
+ vmovdqa32 zmm16,zmm0
+ vmovdqa32 zmm17,zmm1
+ vmovdqa32 zmm18,zmm2
+ vpaddd zmm3,zmm3,ZMMWORD[$L$zeroz]
+ vmovdqa32 zmm20,ZMMWORD[$L$fourz]
+ mov r8,10
+ vmovdqa32 zmm19,zmm3
+ jmp NEAR $L$oop_avx512
+
+ALIGN 16
+$L$oop_outer_avx512:
+ vmovdqa32 zmm0,zmm16
+ vmovdqa32 zmm1,zmm17
+ vmovdqa32 zmm2,zmm18
+ vpaddd zmm3,zmm19,zmm20
+ mov r8,10
+ vmovdqa32 zmm19,zmm3
+ jmp NEAR $L$oop_avx512
+
+ALIGN 32
+$L$oop_avx512:
+ vpaddd zmm0,zmm0,zmm1
+ vpxord zmm3,zmm3,zmm0
+ vprold zmm3,zmm3,16
+ vpaddd zmm2,zmm2,zmm3
+ vpxord zmm1,zmm1,zmm2
+ vprold zmm1,zmm1,12
+ vpaddd zmm0,zmm0,zmm1
+ vpxord zmm3,zmm3,zmm0
+ vprold zmm3,zmm3,8
+ vpaddd zmm2,zmm2,zmm3
+ vpxord zmm1,zmm1,zmm2
+ vprold zmm1,zmm1,7
+ vpshufd zmm2,zmm2,78
+ vpshufd zmm1,zmm1,57
+ vpshufd zmm3,zmm3,147
+ vpaddd zmm0,zmm0,zmm1
+ vpxord zmm3,zmm3,zmm0
+ vprold zmm3,zmm3,16
+ vpaddd zmm2,zmm2,zmm3
+ vpxord zmm1,zmm1,zmm2
+ vprold zmm1,zmm1,12
+ vpaddd zmm0,zmm0,zmm1
+ vpxord zmm3,zmm3,zmm0
+ vprold zmm3,zmm3,8
+ vpaddd zmm2,zmm2,zmm3
+ vpxord zmm1,zmm1,zmm2
+ vprold zmm1,zmm1,7
+ vpshufd zmm2,zmm2,78
+ vpshufd zmm1,zmm1,147
+ vpshufd zmm3,zmm3,57
+ dec r8
+ jnz NEAR $L$oop_avx512
+ vpaddd zmm0,zmm0,zmm16
+ vpaddd zmm1,zmm1,zmm17
+ vpaddd zmm2,zmm2,zmm18
+ vpaddd zmm3,zmm3,zmm19
+
+ sub rdx,64
+ jb NEAR $L$tail64_avx512
+
+ vpxor xmm4,xmm0,XMMWORD[rsi]
+ vpxor xmm5,xmm1,XMMWORD[16+rsi]
+ vpxor xmm6,xmm2,XMMWORD[32+rsi]
+ vpxor xmm7,xmm3,XMMWORD[48+rsi]
+ lea rsi,[64+rsi]
+
+ vmovdqu XMMWORD[rdi],xmm4
+ vmovdqu XMMWORD[16+rdi],xmm5
+ vmovdqu XMMWORD[32+rdi],xmm6
+ vmovdqu XMMWORD[48+rdi],xmm7
+ lea rdi,[64+rdi]
+
+ jz NEAR $L$done_avx512
+
+ vextracti32x4 xmm4,zmm0,1
+ vextracti32x4 xmm5,zmm1,1
+ vextracti32x4 xmm6,zmm2,1
+ vextracti32x4 xmm7,zmm3,1
+
+ sub rdx,64
+ jb NEAR $L$tail_avx512
+
+ vpxor xmm4,xmm4,XMMWORD[rsi]
+ vpxor xmm5,xmm5,XMMWORD[16+rsi]
+ vpxor xmm6,xmm6,XMMWORD[32+rsi]
+ vpxor xmm7,xmm7,XMMWORD[48+rsi]
+ lea rsi,[64+rsi]
+
+ vmovdqu XMMWORD[rdi],xmm4
+ vmovdqu XMMWORD[16+rdi],xmm5
+ vmovdqu XMMWORD[32+rdi],xmm6
+ vmovdqu XMMWORD[48+rdi],xmm7
+ lea rdi,[64+rdi]
+
+ jz NEAR $L$done_avx512
+
+ vextracti32x4 xmm4,zmm0,2
+ vextracti32x4 xmm5,zmm1,2
+ vextracti32x4 xmm6,zmm2,2
+ vextracti32x4 xmm7,zmm3,2
+
+ sub rdx,64
+ jb NEAR $L$tail_avx512
+
+ vpxor xmm4,xmm4,XMMWORD[rsi]
+ vpxor xmm5,xmm5,XMMWORD[16+rsi]
+ vpxor xmm6,xmm6,XMMWORD[32+rsi]
+ vpxor xmm7,xmm7,XMMWORD[48+rsi]
+ lea rsi,[64+rsi]
+
+ vmovdqu XMMWORD[rdi],xmm4
+ vmovdqu XMMWORD[16+rdi],xmm5
+ vmovdqu XMMWORD[32+rdi],xmm6
+ vmovdqu XMMWORD[48+rdi],xmm7
+ lea rdi,[64+rdi]
+
+ jz NEAR $L$done_avx512
+
+ vextracti32x4 xmm4,zmm0,3
+ vextracti32x4 xmm5,zmm1,3
+ vextracti32x4 xmm6,zmm2,3
+ vextracti32x4 xmm7,zmm3,3
+
+ sub rdx,64
+ jb NEAR $L$tail_avx512
+
+ vpxor xmm4,xmm4,XMMWORD[rsi]
+ vpxor xmm5,xmm5,XMMWORD[16+rsi]
+ vpxor xmm6,xmm6,XMMWORD[32+rsi]
+ vpxor xmm7,xmm7,XMMWORD[48+rsi]
+ lea rsi,[64+rsi]
+
+ vmovdqu XMMWORD[rdi],xmm4
+ vmovdqu XMMWORD[16+rdi],xmm5
+ vmovdqu XMMWORD[32+rdi],xmm6
+ vmovdqu XMMWORD[48+rdi],xmm7
+ lea rdi,[64+rdi]
+
+ jnz NEAR $L$oop_outer_avx512
+
+ jmp NEAR $L$done_avx512
+
+ALIGN 16
+$L$tail64_avx512:
+ vmovdqa XMMWORD[rsp],xmm0
+ vmovdqa XMMWORD[16+rsp],xmm1
+ vmovdqa XMMWORD[32+rsp],xmm2
+ vmovdqa XMMWORD[48+rsp],xmm3
+ add rdx,64
+ jmp NEAR $L$oop_tail_avx512
+
+ALIGN 16
+$L$tail_avx512:
+ vmovdqa XMMWORD[rsp],xmm4
+ vmovdqa XMMWORD[16+rsp],xmm5
+ vmovdqa XMMWORD[32+rsp],xmm6
+ vmovdqa XMMWORD[48+rsp],xmm7
+ add rdx,64
+
+$L$oop_tail_avx512:
+ movzx eax,BYTE[r8*1+rsi]
+ movzx ecx,BYTE[r8*1+rsp]
+ lea r8,[1+r8]
+ xor eax,ecx
+ mov BYTE[((-1))+r8*1+rdi],al
+ dec rdx
+ jnz NEAR $L$oop_tail_avx512
+
+ vmovdqu32 ZMMWORD[rsp],zmm16
+
+$L$done_avx512:
+ vzeroall
+ movaps xmm6,XMMWORD[((-40))+r9]
+ movaps xmm7,XMMWORD[((-24))+r9]
+ lea rsp,[r9]
+
+$L$avx512_epilogue:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_ChaCha20_avx512:
+
+ALIGN 32
+ChaCha20_avx512vl:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_ChaCha20_avx512vl:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD[40+rsp]
+
+
+
+$L$ChaCha20_avx512vl:
+ mov r9,rsp
+
+ cmp rdx,128
+ ja NEAR $L$ChaCha20_8xvl
+
+ sub rsp,64+40
+ movaps XMMWORD[(-40)+r9],xmm6
+ movaps XMMWORD[(-24)+r9],xmm7
+$L$avx512vl_body:
+ vbroadcasti128 ymm0,XMMWORD[$L$sigma]
+ vbroadcasti128 ymm1,XMMWORD[rcx]
+ vbroadcasti128 ymm2,XMMWORD[16+rcx]
+ vbroadcasti128 ymm3,XMMWORD[r8]
+
+ vmovdqa32 ymm16,ymm0
+ vmovdqa32 ymm17,ymm1
+ vmovdqa32 ymm18,ymm2
+ vpaddd ymm3,ymm3,YMMWORD[$L$zeroz]
+ vmovdqa32 ymm20,YMMWORD[$L$twoy]
+ mov r8,10
+ vmovdqa32 ymm19,ymm3
+ jmp NEAR $L$oop_avx512vl
+
+ALIGN 16
+$L$oop_outer_avx512vl:
+ vmovdqa32 ymm2,ymm18
+ vpaddd ymm3,ymm19,ymm20
+ mov r8,10
+ vmovdqa32 ymm19,ymm3
+ jmp NEAR $L$oop_avx512vl
+
+ALIGN 32
+$L$oop_avx512vl:
+ vpaddd ymm0,ymm0,ymm1
+ vpxor ymm3,ymm3,ymm0
+ vprold ymm3,ymm3,16
+ vpaddd ymm2,ymm2,ymm3
+ vpxor ymm1,ymm1,ymm2
+ vprold ymm1,ymm1,12
+ vpaddd ymm0,ymm0,ymm1
+ vpxor ymm3,ymm3,ymm0
+ vprold ymm3,ymm3,8
+ vpaddd ymm2,ymm2,ymm3
+ vpxor ymm1,ymm1,ymm2
+ vprold ymm1,ymm1,7
+ vpshufd ymm2,ymm2,78
+ vpshufd ymm1,ymm1,57
+ vpshufd ymm3,ymm3,147
+ vpaddd ymm0,ymm0,ymm1
+ vpxor ymm3,ymm3,ymm0
+ vprold ymm3,ymm3,16
+ vpaddd ymm2,ymm2,ymm3
+ vpxor ymm1,ymm1,ymm2
+ vprold ymm1,ymm1,12
+ vpaddd ymm0,ymm0,ymm1
+ vpxor ymm3,ymm3,ymm0
+ vprold ymm3,ymm3,8
+ vpaddd ymm2,ymm2,ymm3
+ vpxor ymm1,ymm1,ymm2
+ vprold ymm1,ymm1,7
+ vpshufd ymm2,ymm2,78
+ vpshufd ymm1,ymm1,147
+ vpshufd ymm3,ymm3,57
+ dec r8
+ jnz NEAR $L$oop_avx512vl
+ vpaddd ymm0,ymm0,ymm16
+ vpaddd ymm1,ymm1,ymm17
+ vpaddd ymm2,ymm2,ymm18
+ vpaddd ymm3,ymm3,ymm19
+
+ sub rdx,64
+ jb NEAR $L$tail64_avx512vl
+
+ vpxor xmm4,xmm0,XMMWORD[rsi]
+ vpxor xmm5,xmm1,XMMWORD[16+rsi]
+ vpxor xmm6,xmm2,XMMWORD[32+rsi]
+ vpxor xmm7,xmm3,XMMWORD[48+rsi]
+ lea rsi,[64+rsi]
+
+ vmovdqu XMMWORD[rdi],xmm4
+ vmovdqu XMMWORD[16+rdi],xmm5
+ vmovdqu XMMWORD[32+rdi],xmm6
+ vmovdqu XMMWORD[48+rdi],xmm7
+ lea rdi,[64+rdi]
+
+ jz NEAR $L$done_avx512vl
+
+ vextracti128 xmm4,ymm0,1
+ vextracti128 xmm5,ymm1,1
+ vextracti128 xmm6,ymm2,1
+ vextracti128 xmm7,ymm3,1
+
+ sub rdx,64
+ jb NEAR $L$tail_avx512vl
+
+ vpxor xmm4,xmm4,XMMWORD[rsi]
+ vpxor xmm5,xmm5,XMMWORD[16+rsi]
+ vpxor xmm6,xmm6,XMMWORD[32+rsi]
+ vpxor xmm7,xmm7,XMMWORD[48+rsi]
+ lea rsi,[64+rsi]
+
+ vmovdqu XMMWORD[rdi],xmm4
+ vmovdqu XMMWORD[16+rdi],xmm5
+ vmovdqu XMMWORD[32+rdi],xmm6
+ vmovdqu XMMWORD[48+rdi],xmm7
+ lea rdi,[64+rdi]
+
+ vmovdqa32 ymm0,ymm16
+ vmovdqa32 ymm1,ymm17
+ jnz NEAR $L$oop_outer_avx512vl
+
+ jmp NEAR $L$done_avx512vl
+
+ALIGN 16
+$L$tail64_avx512vl:
+ vmovdqa XMMWORD[rsp],xmm0
+ vmovdqa XMMWORD[16+rsp],xmm1
+ vmovdqa XMMWORD[32+rsp],xmm2
+ vmovdqa XMMWORD[48+rsp],xmm3
+ add rdx,64
+ jmp NEAR $L$oop_tail_avx512vl
+
+ALIGN 16
+$L$tail_avx512vl:
+ vmovdqa XMMWORD[rsp],xmm4
+ vmovdqa XMMWORD[16+rsp],xmm5
+ vmovdqa XMMWORD[32+rsp],xmm6
+ vmovdqa XMMWORD[48+rsp],xmm7
+ add rdx,64
+
+$L$oop_tail_avx512vl:
+ movzx eax,BYTE[r8*1+rsi]
+ movzx ecx,BYTE[r8*1+rsp]
+ lea r8,[1+r8]
+ xor eax,ecx
+ mov BYTE[((-1))+r8*1+rdi],al
+ dec rdx
+ jnz NEAR $L$oop_tail_avx512vl
+
+ vmovdqu32 YMMWORD[rsp],ymm16
+ vmovdqu32 YMMWORD[32+rsp],ymm16
+
+$L$done_avx512vl:
+ vzeroall
+ movaps xmm6,XMMWORD[((-40))+r9]
+ movaps xmm7,XMMWORD[((-24))+r9]
+ lea rsp,[r9]
+
+$L$avx512vl_epilogue:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_ChaCha20_avx512vl:
+
+ALIGN 32
+ChaCha20_16x:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_ChaCha20_16x:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD[40+rsp]
+
+
+
+$L$ChaCha20_16x:
+ mov r9,rsp
+
+ sub rsp,64+168
+ and rsp,-64
+ movaps XMMWORD[(-168)+r9],xmm6
+ movaps XMMWORD[(-152)+r9],xmm7
+ movaps XMMWORD[(-136)+r9],xmm8
+ movaps XMMWORD[(-120)+r9],xmm9
+ movaps XMMWORD[(-104)+r9],xmm10
+ movaps XMMWORD[(-88)+r9],xmm11
+ movaps XMMWORD[(-72)+r9],xmm12
+ movaps XMMWORD[(-56)+r9],xmm13
+ movaps XMMWORD[(-40)+r9],xmm14
+ movaps XMMWORD[(-24)+r9],xmm15
+$L$16x_body:
+ vzeroupper
+
+ lea r10,[$L$sigma]
+ vbroadcasti32x4 zmm3,ZMMWORD[r10]
+ vbroadcasti32x4 zmm7,ZMMWORD[rcx]
+ vbroadcasti32x4 zmm11,ZMMWORD[16+rcx]
+ vbroadcasti32x4 zmm15,ZMMWORD[r8]
+
+ vpshufd zmm0,zmm3,0x00
+ vpshufd zmm1,zmm3,0x55
+ vpshufd zmm2,zmm3,0xaa
+ vpshufd zmm3,zmm3,0xff
+ vmovdqa64 zmm16,zmm0
+ vmovdqa64 zmm17,zmm1
+ vmovdqa64 zmm18,zmm2
+ vmovdqa64 zmm19,zmm3
+
+ vpshufd zmm4,zmm7,0x00
+ vpshufd zmm5,zmm7,0x55
+ vpshufd zmm6,zmm7,0xaa
+ vpshufd zmm7,zmm7,0xff
+ vmovdqa64 zmm20,zmm4
+ vmovdqa64 zmm21,zmm5
+ vmovdqa64 zmm22,zmm6
+ vmovdqa64 zmm23,zmm7
+
+ vpshufd zmm8,zmm11,0x00
+ vpshufd zmm9,zmm11,0x55
+ vpshufd zmm10,zmm11,0xaa
+ vpshufd zmm11,zmm11,0xff
+ vmovdqa64 zmm24,zmm8
+ vmovdqa64 zmm25,zmm9
+ vmovdqa64 zmm26,zmm10
+ vmovdqa64 zmm27,zmm11
+
+ vpshufd zmm12,zmm15,0x00
+ vpshufd zmm13,zmm15,0x55
+ vpshufd zmm14,zmm15,0xaa
+ vpshufd zmm15,zmm15,0xff
+ vpaddd zmm12,zmm12,ZMMWORD[$L$incz]
+ vmovdqa64 zmm28,zmm12
+ vmovdqa64 zmm29,zmm13
+ vmovdqa64 zmm30,zmm14
+ vmovdqa64 zmm31,zmm15
+
+ mov eax,10
+ jmp NEAR $L$oop16x
+
+ALIGN 32
+$L$oop_outer16x:
+ vpbroadcastd zmm0,DWORD[r10]
+ vpbroadcastd zmm1,DWORD[4+r10]
+ vpbroadcastd zmm2,DWORD[8+r10]
+ vpbroadcastd zmm3,DWORD[12+r10]
+ vpaddd zmm28,zmm28,ZMMWORD[$L$sixteen]
+ vmovdqa64 zmm4,zmm20
+ vmovdqa64 zmm5,zmm21
+ vmovdqa64 zmm6,zmm22
+ vmovdqa64 zmm7,zmm23
+ vmovdqa64 zmm8,zmm24
+ vmovdqa64 zmm9,zmm25
+ vmovdqa64 zmm10,zmm26
+ vmovdqa64 zmm11,zmm27
+ vmovdqa64 zmm12,zmm28
+ vmovdqa64 zmm13,zmm29
+ vmovdqa64 zmm14,zmm30
+ vmovdqa64 zmm15,zmm31
+
+ vmovdqa64 zmm16,zmm0
+ vmovdqa64 zmm17,zmm1
+ vmovdqa64 zmm18,zmm2
+ vmovdqa64 zmm19,zmm3
+
+ mov eax,10
+ jmp NEAR $L$oop16x
+
+ALIGN 32
+$L$oop16x:
+ vpaddd zmm0,zmm0,zmm4
+ vpaddd zmm1,zmm1,zmm5
+ vpaddd zmm2,zmm2,zmm6
+ vpaddd zmm3,zmm3,zmm7
+ vpxord zmm12,zmm12,zmm0
+ vpxord zmm13,zmm13,zmm1
+ vpxord zmm14,zmm14,zmm2
+ vpxord zmm15,zmm15,zmm3
+ vprold zmm12,zmm12,16
+ vprold zmm13,zmm13,16
+ vprold zmm14,zmm14,16
+ vprold zmm15,zmm15,16
+ vpaddd zmm8,zmm8,zmm12
+ vpaddd zmm9,zmm9,zmm13
+ vpaddd zmm10,zmm10,zmm14
+ vpaddd zmm11,zmm11,zmm15
+ vpxord zmm4,zmm4,zmm8
+ vpxord zmm5,zmm5,zmm9
+ vpxord zmm6,zmm6,zmm10
+ vpxord zmm7,zmm7,zmm11
+ vprold zmm4,zmm4,12
+ vprold zmm5,zmm5,12
+ vprold zmm6,zmm6,12
+ vprold zmm7,zmm7,12
+ vpaddd zmm0,zmm0,zmm4
+ vpaddd zmm1,zmm1,zmm5
+ vpaddd zmm2,zmm2,zmm6
+ vpaddd zmm3,zmm3,zmm7
+ vpxord zmm12,zmm12,zmm0
+ vpxord zmm13,zmm13,zmm1
+ vpxord zmm14,zmm14,zmm2
+ vpxord zmm15,zmm15,zmm3
+ vprold zmm12,zmm12,8
+ vprold zmm13,zmm13,8
+ vprold zmm14,zmm14,8
+ vprold zmm15,zmm15,8
+ vpaddd zmm8,zmm8,zmm12
+ vpaddd zmm9,zmm9,zmm13
+ vpaddd zmm10,zmm10,zmm14
+ vpaddd zmm11,zmm11,zmm15
+ vpxord zmm4,zmm4,zmm8
+ vpxord zmm5,zmm5,zmm9
+ vpxord zmm6,zmm6,zmm10
+ vpxord zmm7,zmm7,zmm11
+ vprold zmm4,zmm4,7
+ vprold zmm5,zmm5,7
+ vprold zmm6,zmm6,7
+ vprold zmm7,zmm7,7
+ vpaddd zmm0,zmm0,zmm5
+ vpaddd zmm1,zmm1,zmm6
+ vpaddd zmm2,zmm2,zmm7
+ vpaddd zmm3,zmm3,zmm4
+ vpxord zmm15,zmm15,zmm0
+ vpxord zmm12,zmm12,zmm1
+ vpxord zmm13,zmm13,zmm2
+ vpxord zmm14,zmm14,zmm3
+ vprold zmm15,zmm15,16
+ vprold zmm12,zmm12,16
+ vprold zmm13,zmm13,16
+ vprold zmm14,zmm14,16
+ vpaddd zmm10,zmm10,zmm15
+ vpaddd zmm11,zmm11,zmm12
+ vpaddd zmm8,zmm8,zmm13
+ vpaddd zmm9,zmm9,zmm14
+ vpxord zmm5,zmm5,zmm10
+ vpxord zmm6,zmm6,zmm11
+ vpxord zmm7,zmm7,zmm8
+ vpxord zmm4,zmm4,zmm9
+ vprold zmm5,zmm5,12
+ vprold zmm6,zmm6,12
+ vprold zmm7,zmm7,12
+ vprold zmm4,zmm4,12
+ vpaddd zmm0,zmm0,zmm5
+ vpaddd zmm1,zmm1,zmm6
+ vpaddd zmm2,zmm2,zmm7
+ vpaddd zmm3,zmm3,zmm4
+ vpxord zmm15,zmm15,zmm0
+ vpxord zmm12,zmm12,zmm1
+ vpxord zmm13,zmm13,zmm2
+ vpxord zmm14,zmm14,zmm3
+ vprold zmm15,zmm15,8
+ vprold zmm12,zmm12,8
+ vprold zmm13,zmm13,8
+ vprold zmm14,zmm14,8
+ vpaddd zmm10,zmm10,zmm15
+ vpaddd zmm11,zmm11,zmm12
+ vpaddd zmm8,zmm8,zmm13
+ vpaddd zmm9,zmm9,zmm14
+ vpxord zmm5,zmm5,zmm10
+ vpxord zmm6,zmm6,zmm11
+ vpxord zmm7,zmm7,zmm8
+ vpxord zmm4,zmm4,zmm9
+ vprold zmm5,zmm5,7
+ vprold zmm6,zmm6,7
+ vprold zmm7,zmm7,7
+ vprold zmm4,zmm4,7
+ dec eax
+ jnz NEAR $L$oop16x
+
+ vpaddd zmm0,zmm0,zmm16
+ vpaddd zmm1,zmm1,zmm17
+ vpaddd zmm2,zmm2,zmm18
+ vpaddd zmm3,zmm3,zmm19
+
+ vpunpckldq zmm18,zmm0,zmm1
+ vpunpckldq zmm19,zmm2,zmm3
+ vpunpckhdq zmm0,zmm0,zmm1
+ vpunpckhdq zmm2,zmm2,zmm3
+ vpunpcklqdq zmm1,zmm18,zmm19
+ vpunpckhqdq zmm18,zmm18,zmm19
+ vpunpcklqdq zmm3,zmm0,zmm2
+ vpunpckhqdq zmm0,zmm0,zmm2
+ vpaddd zmm4,zmm4,zmm20
+ vpaddd zmm5,zmm5,zmm21
+ vpaddd zmm6,zmm6,zmm22
+ vpaddd zmm7,zmm7,zmm23
+
+ vpunpckldq zmm2,zmm4,zmm5
+ vpunpckldq zmm19,zmm6,zmm7
+ vpunpckhdq zmm4,zmm4,zmm5
+ vpunpckhdq zmm6,zmm6,zmm7
+ vpunpcklqdq zmm5,zmm2,zmm19
+ vpunpckhqdq zmm2,zmm2,zmm19
+ vpunpcklqdq zmm7,zmm4,zmm6
+ vpunpckhqdq zmm4,zmm4,zmm6
+ vshufi32x4 zmm19,zmm1,zmm5,0x44
+ vshufi32x4 zmm5,zmm1,zmm5,0xee
+ vshufi32x4 zmm1,zmm18,zmm2,0x44
+ vshufi32x4 zmm2,zmm18,zmm2,0xee
+ vshufi32x4 zmm18,zmm3,zmm7,0x44
+ vshufi32x4 zmm7,zmm3,zmm7,0xee
+ vshufi32x4 zmm3,zmm0,zmm4,0x44
+ vshufi32x4 zmm4,zmm0,zmm4,0xee
+ vpaddd zmm8,zmm8,zmm24
+ vpaddd zmm9,zmm9,zmm25
+ vpaddd zmm10,zmm10,zmm26
+ vpaddd zmm11,zmm11,zmm27
+
+ vpunpckldq zmm6,zmm8,zmm9
+ vpunpckldq zmm0,zmm10,zmm11
+ vpunpckhdq zmm8,zmm8,zmm9
+ vpunpckhdq zmm10,zmm10,zmm11
+ vpunpcklqdq zmm9,zmm6,zmm0
+ vpunpckhqdq zmm6,zmm6,zmm0
+ vpunpcklqdq zmm11,zmm8,zmm10
+ vpunpckhqdq zmm8,zmm8,zmm10
+ vpaddd zmm12,zmm12,zmm28
+ vpaddd zmm13,zmm13,zmm29
+ vpaddd zmm14,zmm14,zmm30
+ vpaddd zmm15,zmm15,zmm31
+
+ vpunpckldq zmm10,zmm12,zmm13
+ vpunpckldq zmm0,zmm14,zmm15
+ vpunpckhdq zmm12,zmm12,zmm13
+ vpunpckhdq zmm14,zmm14,zmm15
+ vpunpcklqdq zmm13,zmm10,zmm0
+ vpunpckhqdq zmm10,zmm10,zmm0
+ vpunpcklqdq zmm15,zmm12,zmm14
+ vpunpckhqdq zmm12,zmm12,zmm14
+ vshufi32x4 zmm0,zmm9,zmm13,0x44
+ vshufi32x4 zmm13,zmm9,zmm13,0xee
+ vshufi32x4 zmm9,zmm6,zmm10,0x44
+ vshufi32x4 zmm10,zmm6,zmm10,0xee
+ vshufi32x4 zmm6,zmm11,zmm15,0x44
+ vshufi32x4 zmm15,zmm11,zmm15,0xee
+ vshufi32x4 zmm11,zmm8,zmm12,0x44
+ vshufi32x4 zmm12,zmm8,zmm12,0xee
+ vshufi32x4 zmm16,zmm19,zmm0,0x88
+ vshufi32x4 zmm19,zmm19,zmm0,0xdd
+ vshufi32x4 zmm0,zmm5,zmm13,0x88
+ vshufi32x4 zmm13,zmm5,zmm13,0xdd
+ vshufi32x4 zmm17,zmm1,zmm9,0x88
+ vshufi32x4 zmm1,zmm1,zmm9,0xdd
+ vshufi32x4 zmm9,zmm2,zmm10,0x88
+ vshufi32x4 zmm10,zmm2,zmm10,0xdd
+ vshufi32x4 zmm14,zmm18,zmm6,0x88
+ vshufi32x4 zmm18,zmm18,zmm6,0xdd
+ vshufi32x4 zmm6,zmm7,zmm15,0x88
+ vshufi32x4 zmm15,zmm7,zmm15,0xdd
+ vshufi32x4 zmm8,zmm3,zmm11,0x88
+ vshufi32x4 zmm3,zmm3,zmm11,0xdd
+ vshufi32x4 zmm11,zmm4,zmm12,0x88
+ vshufi32x4 zmm12,zmm4,zmm12,0xdd
+ cmp rdx,64*16
+ jb NEAR $L$tail16x
+
+ vpxord zmm16,zmm16,ZMMWORD[rsi]
+ vpxord zmm17,zmm17,ZMMWORD[64+rsi]
+ vpxord zmm14,zmm14,ZMMWORD[128+rsi]
+ vpxord zmm8,zmm8,ZMMWORD[192+rsi]
+ vmovdqu32 ZMMWORD[rdi],zmm16
+ vmovdqu32 ZMMWORD[64+rdi],zmm17
+ vmovdqu32 ZMMWORD[128+rdi],zmm14
+ vmovdqu32 ZMMWORD[192+rdi],zmm8
+
+ vpxord zmm19,zmm19,ZMMWORD[256+rsi]
+ vpxord zmm1,zmm1,ZMMWORD[320+rsi]
+ vpxord zmm18,zmm18,ZMMWORD[384+rsi]
+ vpxord zmm3,zmm3,ZMMWORD[448+rsi]
+ vmovdqu32 ZMMWORD[256+rdi],zmm19
+ vmovdqu32 ZMMWORD[320+rdi],zmm1
+ vmovdqu32 ZMMWORD[384+rdi],zmm18
+ vmovdqu32 ZMMWORD[448+rdi],zmm3
+
+ vpxord zmm0,zmm0,ZMMWORD[512+rsi]
+ vpxord zmm9,zmm9,ZMMWORD[576+rsi]
+ vpxord zmm6,zmm6,ZMMWORD[640+rsi]
+ vpxord zmm11,zmm11,ZMMWORD[704+rsi]
+ vmovdqu32 ZMMWORD[512+rdi],zmm0
+ vmovdqu32 ZMMWORD[576+rdi],zmm9
+ vmovdqu32 ZMMWORD[640+rdi],zmm6
+ vmovdqu32 ZMMWORD[704+rdi],zmm11
+
+ vpxord zmm13,zmm13,ZMMWORD[768+rsi]
+ vpxord zmm10,zmm10,ZMMWORD[832+rsi]
+ vpxord zmm15,zmm15,ZMMWORD[896+rsi]
+ vpxord zmm12,zmm12,ZMMWORD[960+rsi]
+ lea rsi,[1024+rsi]
+ vmovdqu32 ZMMWORD[768+rdi],zmm13
+ vmovdqu32 ZMMWORD[832+rdi],zmm10
+ vmovdqu32 ZMMWORD[896+rdi],zmm15
+ vmovdqu32 ZMMWORD[960+rdi],zmm12
+ lea rdi,[1024+rdi]
+
+ sub rdx,64*16
+ jnz NEAR $L$oop_outer16x
+
+ jmp NEAR $L$done16x
+
+ALIGN 32
+$L$tail16x:
+ xor r10,r10
+ sub rdi,rsi
+ cmp rdx,64*1
+ jb NEAR $L$ess_than_64_16x
+ vpxord zmm16,zmm16,ZMMWORD[rsi]
+ vmovdqu32 ZMMWORD[rsi*1+rdi],zmm16
+ je NEAR $L$done16x
+ vmovdqa32 zmm16,zmm17
+ lea rsi,[64+rsi]
+
+ cmp rdx,64*2
+ jb NEAR $L$ess_than_64_16x
+ vpxord zmm17,zmm17,ZMMWORD[rsi]
+ vmovdqu32 ZMMWORD[rsi*1+rdi],zmm17
+ je NEAR $L$done16x
+ vmovdqa32 zmm16,zmm14
+ lea rsi,[64+rsi]
+
+ cmp rdx,64*3
+ jb NEAR $L$ess_than_64_16x
+ vpxord zmm14,zmm14,ZMMWORD[rsi]
+ vmovdqu32 ZMMWORD[rsi*1+rdi],zmm14
+ je NEAR $L$done16x
+ vmovdqa32 zmm16,zmm8
+ lea rsi,[64+rsi]
+
+ cmp rdx,64*4
+ jb NEAR $L$ess_than_64_16x
+ vpxord zmm8,zmm8,ZMMWORD[rsi]
+ vmovdqu32 ZMMWORD[rsi*1+rdi],zmm8
+ je NEAR $L$done16x
+ vmovdqa32 zmm16,zmm19
+ lea rsi,[64+rsi]
+
+ cmp rdx,64*5
+ jb NEAR $L$ess_than_64_16x
+ vpxord zmm19,zmm19,ZMMWORD[rsi]
+ vmovdqu32 ZMMWORD[rsi*1+rdi],zmm19
+ je NEAR $L$done16x
+ vmovdqa32 zmm16,zmm1
+ lea rsi,[64+rsi]
+
+ cmp rdx,64*6
+ jb NEAR $L$ess_than_64_16x
+ vpxord zmm1,zmm1,ZMMWORD[rsi]
+ vmovdqu32 ZMMWORD[rsi*1+rdi],zmm1
+ je NEAR $L$done16x
+ vmovdqa32 zmm16,zmm18
+ lea rsi,[64+rsi]
+
+ cmp rdx,64*7
+ jb NEAR $L$ess_than_64_16x
+ vpxord zmm18,zmm18,ZMMWORD[rsi]
+ vmovdqu32 ZMMWORD[rsi*1+rdi],zmm18
+ je NEAR $L$done16x
+ vmovdqa32 zmm16,zmm3
+ lea rsi,[64+rsi]
+
+ cmp rdx,64*8
+ jb NEAR $L$ess_than_64_16x
+ vpxord zmm3,zmm3,ZMMWORD[rsi]
+ vmovdqu32 ZMMWORD[rsi*1+rdi],zmm3
+ je NEAR $L$done16x
+ vmovdqa32 zmm16,zmm0
+ lea rsi,[64+rsi]
+
+ cmp rdx,64*9
+ jb NEAR $L$ess_than_64_16x
+ vpxord zmm0,zmm0,ZMMWORD[rsi]
+ vmovdqu32 ZMMWORD[rsi*1+rdi],zmm0
+ je NEAR $L$done16x
+ vmovdqa32 zmm16,zmm9
+ lea rsi,[64+rsi]
+
+ cmp rdx,64*10
+ jb NEAR $L$ess_than_64_16x
+ vpxord zmm9,zmm9,ZMMWORD[rsi]
+ vmovdqu32 ZMMWORD[rsi*1+rdi],zmm9
+ je NEAR $L$done16x
+ vmovdqa32 zmm16,zmm6
+ lea rsi,[64+rsi]
+
+ cmp rdx,64*11
+ jb NEAR $L$ess_than_64_16x
+ vpxord zmm6,zmm6,ZMMWORD[rsi]
+ vmovdqu32 ZMMWORD[rsi*1+rdi],zmm6
+ je NEAR $L$done16x
+ vmovdqa32 zmm16,zmm11
+ lea rsi,[64+rsi]
+
+ cmp rdx,64*12
+ jb NEAR $L$ess_than_64_16x
+ vpxord zmm11,zmm11,ZMMWORD[rsi]
+ vmovdqu32 ZMMWORD[rsi*1+rdi],zmm11
+ je NEAR $L$done16x
+ vmovdqa32 zmm16,zmm13
+ lea rsi,[64+rsi]
+
+ cmp rdx,64*13
+ jb NEAR $L$ess_than_64_16x
+ vpxord zmm13,zmm13,ZMMWORD[rsi]
+ vmovdqu32 ZMMWORD[rsi*1+rdi],zmm13
+ je NEAR $L$done16x
+ vmovdqa32 zmm16,zmm10
+ lea rsi,[64+rsi]
+
+ cmp rdx,64*14
+ jb NEAR $L$ess_than_64_16x
+ vpxord zmm10,zmm10,ZMMWORD[rsi]
+ vmovdqu32 ZMMWORD[rsi*1+rdi],zmm10
+ je NEAR $L$done16x
+ vmovdqa32 zmm16,zmm15
+ lea rsi,[64+rsi]
+
+ cmp rdx,64*15
+ jb NEAR $L$ess_than_64_16x
+ vpxord zmm15,zmm15,ZMMWORD[rsi]
+ vmovdqu32 ZMMWORD[rsi*1+rdi],zmm15
+ je NEAR $L$done16x
+ vmovdqa32 zmm16,zmm12
+ lea rsi,[64+rsi]
+
+$L$ess_than_64_16x:
+ vmovdqa32 ZMMWORD[rsp],zmm16
+ lea rdi,[rsi*1+rdi]
+ and rdx,63
+
+$L$oop_tail16x:
+ movzx eax,BYTE[r10*1+rsi]
+ movzx ecx,BYTE[r10*1+rsp]
+ lea r10,[1+r10]
+ xor eax,ecx
+ mov BYTE[((-1))+r10*1+rdi],al
+ dec rdx
+ jnz NEAR $L$oop_tail16x
+
+ vpxord zmm16,zmm16,zmm16
+ vmovdqa32 ZMMWORD[rsp],zmm16
+
+$L$done16x:
+ vzeroall
+ movaps xmm6,XMMWORD[((-168))+r9]
+ movaps xmm7,XMMWORD[((-152))+r9]
+ movaps xmm8,XMMWORD[((-136))+r9]
+ movaps xmm9,XMMWORD[((-120))+r9]
+ movaps xmm10,XMMWORD[((-104))+r9]
+ movaps xmm11,XMMWORD[((-88))+r9]
+ movaps xmm12,XMMWORD[((-72))+r9]
+ movaps xmm13,XMMWORD[((-56))+r9]
+ movaps xmm14,XMMWORD[((-40))+r9]
+ movaps xmm15,XMMWORD[((-24))+r9]
+ lea rsp,[r9]
+
+$L$16x_epilogue:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_ChaCha20_16x:
+
+ALIGN 32
+ChaCha20_8xvl:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_ChaCha20_8xvl:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD[40+rsp]
+
+
+
+$L$ChaCha20_8xvl:
+ mov r9,rsp
+
+ sub rsp,64+168
+ and rsp,-64
+ movaps XMMWORD[(-168)+r9],xmm6
+ movaps XMMWORD[(-152)+r9],xmm7
+ movaps XMMWORD[(-136)+r9],xmm8
+ movaps XMMWORD[(-120)+r9],xmm9
+ movaps XMMWORD[(-104)+r9],xmm10
+ movaps XMMWORD[(-88)+r9],xmm11
+ movaps XMMWORD[(-72)+r9],xmm12
+ movaps XMMWORD[(-56)+r9],xmm13
+ movaps XMMWORD[(-40)+r9],xmm14
+ movaps XMMWORD[(-24)+r9],xmm15
+$L$8xvl_body:
+ vzeroupper
+
+ lea r10,[$L$sigma]
+ vbroadcasti128 ymm3,XMMWORD[r10]
+ vbroadcasti128 ymm7,XMMWORD[rcx]
+ vbroadcasti128 ymm11,XMMWORD[16+rcx]
+ vbroadcasti128 ymm15,XMMWORD[r8]
+
+ vpshufd ymm0,ymm3,0x00
+ vpshufd ymm1,ymm3,0x55
+ vpshufd ymm2,ymm3,0xaa
+ vpshufd ymm3,ymm3,0xff
+ vmovdqa64 ymm16,ymm0
+ vmovdqa64 ymm17,ymm1
+ vmovdqa64 ymm18,ymm2
+ vmovdqa64 ymm19,ymm3
+
+ vpshufd ymm4,ymm7,0x00
+ vpshufd ymm5,ymm7,0x55
+ vpshufd ymm6,ymm7,0xaa
+ vpshufd ymm7,ymm7,0xff
+ vmovdqa64 ymm20,ymm4
+ vmovdqa64 ymm21,ymm5
+ vmovdqa64 ymm22,ymm6
+ vmovdqa64 ymm23,ymm7
+
+ vpshufd ymm8,ymm11,0x00
+ vpshufd ymm9,ymm11,0x55
+ vpshufd ymm10,ymm11,0xaa
+ vpshufd ymm11,ymm11,0xff
+ vmovdqa64 ymm24,ymm8
+ vmovdqa64 ymm25,ymm9
+ vmovdqa64 ymm26,ymm10
+ vmovdqa64 ymm27,ymm11
+
+ vpshufd ymm12,ymm15,0x00
+ vpshufd ymm13,ymm15,0x55
+ vpshufd ymm14,ymm15,0xaa
+ vpshufd ymm15,ymm15,0xff
+ vpaddd ymm12,ymm12,YMMWORD[$L$incy]
+ vmovdqa64 ymm28,ymm12
+ vmovdqa64 ymm29,ymm13
+ vmovdqa64 ymm30,ymm14
+ vmovdqa64 ymm31,ymm15
+
+ mov eax,10
+ jmp NEAR $L$oop8xvl
+
+ALIGN 32
+$L$oop_outer8xvl:
+
+
+ vpbroadcastd ymm2,DWORD[8+r10]
+ vpbroadcastd ymm3,DWORD[12+r10]
+ vpaddd ymm28,ymm28,YMMWORD[$L$eight]
+ vmovdqa64 ymm4,ymm20
+ vmovdqa64 ymm5,ymm21
+ vmovdqa64 ymm6,ymm22
+ vmovdqa64 ymm7,ymm23
+ vmovdqa64 ymm8,ymm24
+ vmovdqa64 ymm9,ymm25
+ vmovdqa64 ymm10,ymm26
+ vmovdqa64 ymm11,ymm27
+ vmovdqa64 ymm12,ymm28
+ vmovdqa64 ymm13,ymm29
+ vmovdqa64 ymm14,ymm30
+ vmovdqa64 ymm15,ymm31
+
+ vmovdqa64 ymm16,ymm0
+ vmovdqa64 ymm17,ymm1
+ vmovdqa64 ymm18,ymm2
+ vmovdqa64 ymm19,ymm3
+
+ mov eax,10
+ jmp NEAR $L$oop8xvl
+
+ALIGN 32
+$L$oop8xvl:
+ vpaddd ymm0,ymm0,ymm4
+ vpaddd ymm1,ymm1,ymm5
+ vpaddd ymm2,ymm2,ymm6
+ vpaddd ymm3,ymm3,ymm7
+ vpxor ymm12,ymm12,ymm0
+ vpxor ymm13,ymm13,ymm1
+ vpxor ymm14,ymm14,ymm2
+ vpxor ymm15,ymm15,ymm3
+ vprold ymm12,ymm12,16
+ vprold ymm13,ymm13,16
+ vprold ymm14,ymm14,16
+ vprold ymm15,ymm15,16
+ vpaddd ymm8,ymm8,ymm12
+ vpaddd ymm9,ymm9,ymm13
+ vpaddd ymm10,ymm10,ymm14
+ vpaddd ymm11,ymm11,ymm15
+ vpxor ymm4,ymm4,ymm8
+ vpxor ymm5,ymm5,ymm9
+ vpxor ymm6,ymm6,ymm10
+ vpxor ymm7,ymm7,ymm11
+ vprold ymm4,ymm4,12
+ vprold ymm5,ymm5,12
+ vprold ymm6,ymm6,12
+ vprold ymm7,ymm7,12
+ vpaddd ymm0,ymm0,ymm4
+ vpaddd ymm1,ymm1,ymm5
+ vpaddd ymm2,ymm2,ymm6
+ vpaddd ymm3,ymm3,ymm7
+ vpxor ymm12,ymm12,ymm0
+ vpxor ymm13,ymm13,ymm1
+ vpxor ymm14,ymm14,ymm2
+ vpxor ymm15,ymm15,ymm3
+ vprold ymm12,ymm12,8
+ vprold ymm13,ymm13,8
+ vprold ymm14,ymm14,8
+ vprold ymm15,ymm15,8
+ vpaddd ymm8,ymm8,ymm12
+ vpaddd ymm9,ymm9,ymm13
+ vpaddd ymm10,ymm10,ymm14
+ vpaddd ymm11,ymm11,ymm15
+ vpxor ymm4,ymm4,ymm8
+ vpxor ymm5,ymm5,ymm9
+ vpxor ymm6,ymm6,ymm10
+ vpxor ymm7,ymm7,ymm11
+ vprold ymm4,ymm4,7
+ vprold ymm5,ymm5,7
+ vprold ymm6,ymm6,7
+ vprold ymm7,ymm7,7
+ vpaddd ymm0,ymm0,ymm5
+ vpaddd ymm1,ymm1,ymm6
+ vpaddd ymm2,ymm2,ymm7
+ vpaddd ymm3,ymm3,ymm4
+ vpxor ymm15,ymm15,ymm0
+ vpxor ymm12,ymm12,ymm1
+ vpxor ymm13,ymm13,ymm2
+ vpxor ymm14,ymm14,ymm3
+ vprold ymm15,ymm15,16
+ vprold ymm12,ymm12,16
+ vprold ymm13,ymm13,16
+ vprold ymm14,ymm14,16
+ vpaddd ymm10,ymm10,ymm15
+ vpaddd ymm11,ymm11,ymm12
+ vpaddd ymm8,ymm8,ymm13
+ vpaddd ymm9,ymm9,ymm14
+ vpxor ymm5,ymm5,ymm10
+ vpxor ymm6,ymm6,ymm11
+ vpxor ymm7,ymm7,ymm8
+ vpxor ymm4,ymm4,ymm9
+ vprold ymm5,ymm5,12
+ vprold ymm6,ymm6,12
+ vprold ymm7,ymm7,12
+ vprold ymm4,ymm4,12
+ vpaddd ymm0,ymm0,ymm5
+ vpaddd ymm1,ymm1,ymm6
+ vpaddd ymm2,ymm2,ymm7
+ vpaddd ymm3,ymm3,ymm4
+ vpxor ymm15,ymm15,ymm0
+ vpxor ymm12,ymm12,ymm1
+ vpxor ymm13,ymm13,ymm2
+ vpxor ymm14,ymm14,ymm3
+ vprold ymm15,ymm15,8
+ vprold ymm12,ymm12,8
+ vprold ymm13,ymm13,8
+ vprold ymm14,ymm14,8
+ vpaddd ymm10,ymm10,ymm15
+ vpaddd ymm11,ymm11,ymm12
+ vpaddd ymm8,ymm8,ymm13
+ vpaddd ymm9,ymm9,ymm14
+ vpxor ymm5,ymm5,ymm10
+ vpxor ymm6,ymm6,ymm11
+ vpxor ymm7,ymm7,ymm8
+ vpxor ymm4,ymm4,ymm9
+ vprold ymm5,ymm5,7
+ vprold ymm6,ymm6,7
+ vprold ymm7,ymm7,7
+ vprold ymm4,ymm4,7
+ dec eax
+ jnz NEAR $L$oop8xvl
+
+ vpaddd ymm0,ymm0,ymm16
+ vpaddd ymm1,ymm1,ymm17
+ vpaddd ymm2,ymm2,ymm18
+ vpaddd ymm3,ymm3,ymm19
+
+ vpunpckldq ymm18,ymm0,ymm1
+ vpunpckldq ymm19,ymm2,ymm3
+ vpunpckhdq ymm0,ymm0,ymm1
+ vpunpckhdq ymm2,ymm2,ymm3
+ vpunpcklqdq ymm1,ymm18,ymm19
+ vpunpckhqdq ymm18,ymm18,ymm19
+ vpunpcklqdq ymm3,ymm0,ymm2
+ vpunpckhqdq ymm0,ymm0,ymm2
+ vpaddd ymm4,ymm4,ymm20
+ vpaddd ymm5,ymm5,ymm21
+ vpaddd ymm6,ymm6,ymm22
+ vpaddd ymm7,ymm7,ymm23
+
+ vpunpckldq ymm2,ymm4,ymm5
+ vpunpckldq ymm19,ymm6,ymm7
+ vpunpckhdq ymm4,ymm4,ymm5
+ vpunpckhdq ymm6,ymm6,ymm7
+ vpunpcklqdq ymm5,ymm2,ymm19
+ vpunpckhqdq ymm2,ymm2,ymm19
+ vpunpcklqdq ymm7,ymm4,ymm6
+ vpunpckhqdq ymm4,ymm4,ymm6
+ vshufi32x4 ymm19,ymm1,ymm5,0
+ vshufi32x4 ymm5,ymm1,ymm5,3
+ vshufi32x4 ymm1,ymm18,ymm2,0
+ vshufi32x4 ymm2,ymm18,ymm2,3
+ vshufi32x4 ymm18,ymm3,ymm7,0
+ vshufi32x4 ymm7,ymm3,ymm7,3
+ vshufi32x4 ymm3,ymm0,ymm4,0
+ vshufi32x4 ymm4,ymm0,ymm4,3
+ vpaddd ymm8,ymm8,ymm24
+ vpaddd ymm9,ymm9,ymm25
+ vpaddd ymm10,ymm10,ymm26
+ vpaddd ymm11,ymm11,ymm27
+
+ vpunpckldq ymm6,ymm8,ymm9
+ vpunpckldq ymm0,ymm10,ymm11
+ vpunpckhdq ymm8,ymm8,ymm9
+ vpunpckhdq ymm10,ymm10,ymm11
+ vpunpcklqdq ymm9,ymm6,ymm0
+ vpunpckhqdq ymm6,ymm6,ymm0
+ vpunpcklqdq ymm11,ymm8,ymm10
+ vpunpckhqdq ymm8,ymm8,ymm10
+ vpaddd ymm12,ymm12,ymm28
+ vpaddd ymm13,ymm13,ymm29
+ vpaddd ymm14,ymm14,ymm30
+ vpaddd ymm15,ymm15,ymm31
+
+ vpunpckldq ymm10,ymm12,ymm13
+ vpunpckldq ymm0,ymm14,ymm15
+ vpunpckhdq ymm12,ymm12,ymm13
+ vpunpckhdq ymm14,ymm14,ymm15
+ vpunpcklqdq ymm13,ymm10,ymm0
+ vpunpckhqdq ymm10,ymm10,ymm0
+ vpunpcklqdq ymm15,ymm12,ymm14
+ vpunpckhqdq ymm12,ymm12,ymm14
+ vperm2i128 ymm0,ymm9,ymm13,0x20
+ vperm2i128 ymm13,ymm9,ymm13,0x31
+ vperm2i128 ymm9,ymm6,ymm10,0x20
+ vperm2i128 ymm10,ymm6,ymm10,0x31
+ vperm2i128 ymm6,ymm11,ymm15,0x20
+ vperm2i128 ymm15,ymm11,ymm15,0x31
+ vperm2i128 ymm11,ymm8,ymm12,0x20
+ vperm2i128 ymm12,ymm8,ymm12,0x31
+ cmp rdx,64*8
+ jb NEAR $L$tail8xvl
+
+ mov eax,0x80
+ vpxord ymm19,ymm19,YMMWORD[rsi]
+ vpxor ymm0,ymm0,YMMWORD[32+rsi]
+ vpxor ymm5,ymm5,YMMWORD[64+rsi]
+ vpxor ymm13,ymm13,YMMWORD[96+rsi]
+ lea rsi,[rax*1+rsi]
+ vmovdqu32 YMMWORD[rdi],ymm19
+ vmovdqu YMMWORD[32+rdi],ymm0
+ vmovdqu YMMWORD[64+rdi],ymm5
+ vmovdqu YMMWORD[96+rdi],ymm13
+ lea rdi,[rax*1+rdi]
+
+ vpxor ymm1,ymm1,YMMWORD[rsi]
+ vpxor ymm9,ymm9,YMMWORD[32+rsi]
+ vpxor ymm2,ymm2,YMMWORD[64+rsi]
+ vpxor ymm10,ymm10,YMMWORD[96+rsi]
+ lea rsi,[rax*1+rsi]
+ vmovdqu YMMWORD[rdi],ymm1
+ vmovdqu YMMWORD[32+rdi],ymm9
+ vmovdqu YMMWORD[64+rdi],ymm2
+ vmovdqu YMMWORD[96+rdi],ymm10
+ lea rdi,[rax*1+rdi]
+
+ vpxord ymm18,ymm18,YMMWORD[rsi]
+ vpxor ymm6,ymm6,YMMWORD[32+rsi]
+ vpxor ymm7,ymm7,YMMWORD[64+rsi]
+ vpxor ymm15,ymm15,YMMWORD[96+rsi]
+ lea rsi,[rax*1+rsi]
+ vmovdqu32 YMMWORD[rdi],ymm18
+ vmovdqu YMMWORD[32+rdi],ymm6
+ vmovdqu YMMWORD[64+rdi],ymm7
+ vmovdqu YMMWORD[96+rdi],ymm15
+ lea rdi,[rax*1+rdi]
+
+ vpxor ymm3,ymm3,YMMWORD[rsi]
+ vpxor ymm11,ymm11,YMMWORD[32+rsi]
+ vpxor ymm4,ymm4,YMMWORD[64+rsi]
+ vpxor ymm12,ymm12,YMMWORD[96+rsi]
+ lea rsi,[rax*1+rsi]
+ vmovdqu YMMWORD[rdi],ymm3
+ vmovdqu YMMWORD[32+rdi],ymm11
+ vmovdqu YMMWORD[64+rdi],ymm4
+ vmovdqu YMMWORD[96+rdi],ymm12
+ lea rdi,[rax*1+rdi]
+
+ vpbroadcastd ymm0,DWORD[r10]
+ vpbroadcastd ymm1,DWORD[4+r10]
+
+ sub rdx,64*8
+ jnz NEAR $L$oop_outer8xvl
+
+ jmp NEAR $L$done8xvl
+
+ALIGN 32
+$L$tail8xvl:
+ vmovdqa64 ymm8,ymm19
+ xor r10,r10
+ sub rdi,rsi
+ cmp rdx,64*1
+ jb NEAR $L$ess_than_64_8xvl
+ vpxor ymm8,ymm8,YMMWORD[rsi]
+ vpxor ymm0,ymm0,YMMWORD[32+rsi]
+ vmovdqu YMMWORD[rsi*1+rdi],ymm8
+ vmovdqu YMMWORD[32+rsi*1+rdi],ymm0
+ je NEAR $L$done8xvl
+ vmovdqa ymm8,ymm5
+ vmovdqa ymm0,ymm13
+ lea rsi,[64+rsi]
+
+ cmp rdx,64*2
+ jb NEAR $L$ess_than_64_8xvl
+ vpxor ymm5,ymm5,YMMWORD[rsi]
+ vpxor ymm13,ymm13,YMMWORD[32+rsi]
+ vmovdqu YMMWORD[rsi*1+rdi],ymm5
+ vmovdqu YMMWORD[32+rsi*1+rdi],ymm13
+ je NEAR $L$done8xvl
+ vmovdqa ymm8,ymm1
+ vmovdqa ymm0,ymm9
+ lea rsi,[64+rsi]
+
+ cmp rdx,64*3
+ jb NEAR $L$ess_than_64_8xvl
+ vpxor ymm1,ymm1,YMMWORD[rsi]
+ vpxor ymm9,ymm9,YMMWORD[32+rsi]
+ vmovdqu YMMWORD[rsi*1+rdi],ymm1
+ vmovdqu YMMWORD[32+rsi*1+rdi],ymm9
+ je NEAR $L$done8xvl
+ vmovdqa ymm8,ymm2
+ vmovdqa ymm0,ymm10
+ lea rsi,[64+rsi]
+
+ cmp rdx,64*4
+ jb NEAR $L$ess_than_64_8xvl
+ vpxor ymm2,ymm2,YMMWORD[rsi]
+ vpxor ymm10,ymm10,YMMWORD[32+rsi]
+ vmovdqu YMMWORD[rsi*1+rdi],ymm2
+ vmovdqu YMMWORD[32+rsi*1+rdi],ymm10
+ je NEAR $L$done8xvl
+ vmovdqa32 ymm8,ymm18
+ vmovdqa ymm0,ymm6
+ lea rsi,[64+rsi]
+
+ cmp rdx,64*5
+ jb NEAR $L$ess_than_64_8xvl
+ vpxord ymm18,ymm18,YMMWORD[rsi]
+ vpxor ymm6,ymm6,YMMWORD[32+rsi]
+ vmovdqu32 YMMWORD[rsi*1+rdi],ymm18
+ vmovdqu YMMWORD[32+rsi*1+rdi],ymm6
+ je NEAR $L$done8xvl
+ vmovdqa ymm8,ymm7
+ vmovdqa ymm0,ymm15
+ lea rsi,[64+rsi]
+
+ cmp rdx,64*6
+ jb NEAR $L$ess_than_64_8xvl
+ vpxor ymm7,ymm7,YMMWORD[rsi]
+ vpxor ymm15,ymm15,YMMWORD[32+rsi]
+ vmovdqu YMMWORD[rsi*1+rdi],ymm7
+ vmovdqu YMMWORD[32+rsi*1+rdi],ymm15
+ je NEAR $L$done8xvl
+ vmovdqa ymm8,ymm3
+ vmovdqa ymm0,ymm11
+ lea rsi,[64+rsi]
+
+ cmp rdx,64*7
+ jb NEAR $L$ess_than_64_8xvl
+ vpxor ymm3,ymm3,YMMWORD[rsi]
+ vpxor ymm11,ymm11,YMMWORD[32+rsi]
+ vmovdqu YMMWORD[rsi*1+rdi],ymm3
+ vmovdqu YMMWORD[32+rsi*1+rdi],ymm11
+ je NEAR $L$done8xvl
+ vmovdqa ymm8,ymm4
+ vmovdqa ymm0,ymm12
+ lea rsi,[64+rsi]
+
+$L$ess_than_64_8xvl:
+ vmovdqa YMMWORD[rsp],ymm8
+ vmovdqa YMMWORD[32+rsp],ymm0
+ lea rdi,[rsi*1+rdi]
+ and rdx,63
+
+$L$oop_tail8xvl:
+ movzx eax,BYTE[r10*1+rsi]
+ movzx ecx,BYTE[r10*1+rsp]
+ lea r10,[1+r10]
+ xor eax,ecx
+ mov BYTE[((-1))+r10*1+rdi],al
+ dec rdx
+ jnz NEAR $L$oop_tail8xvl
+
+ vpxor ymm8,ymm8,ymm8
+ vmovdqa YMMWORD[rsp],ymm8
+ vmovdqa YMMWORD[32+rsp],ymm8
+
+$L$done8xvl:
+ vzeroall
+ movaps xmm6,XMMWORD[((-168))+r9]
+ movaps xmm7,XMMWORD[((-152))+r9]
+ movaps xmm8,XMMWORD[((-136))+r9]
+ movaps xmm9,XMMWORD[((-120))+r9]
+ movaps xmm10,XMMWORD[((-104))+r9]
+ movaps xmm11,XMMWORD[((-88))+r9]
+ movaps xmm12,XMMWORD[((-72))+r9]
+ movaps xmm13,XMMWORD[((-56))+r9]
+ movaps xmm14,XMMWORD[((-40))+r9]
+ movaps xmm15,XMMWORD[((-24))+r9]
+ lea rsp,[r9]
+
+$L$8xvl_epilogue:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_ChaCha20_8xvl:
+EXTERN __imp_RtlVirtualUnwind
+
+ALIGN 16
+se_handler:
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD[120+r8]
+ mov rbx,QWORD[248+r8]
+
+ mov rsi,QWORD[8+r9]
+ mov r11,QWORD[56+r9]
+
+ lea r10,[$L$ctr32_body]
+ cmp rbx,r10
+ jb NEAR $L$common_seh_tail
+
+ mov rax,QWORD[152+r8]
+
+ lea r10,[$L$no_data]
+ cmp rbx,r10
+ jae NEAR $L$common_seh_tail
+
+ lea rax,[((64+24+48))+rax]
+
+ mov rbx,QWORD[((-8))+rax]
+ mov rbp,QWORD[((-16))+rax]
+ mov r12,QWORD[((-24))+rax]
+ mov r13,QWORD[((-32))+rax]
+ mov r14,QWORD[((-40))+rax]
+ mov r15,QWORD[((-48))+rax]
+ mov QWORD[144+r8],rbx
+ mov QWORD[160+r8],rbp
+ mov QWORD[216+r8],r12
+ mov QWORD[224+r8],r13
+ mov QWORD[232+r8],r14
+ mov QWORD[240+r8],r15
+
+$L$common_seh_tail:
+ mov rdi,QWORD[8+rax]
+ mov rsi,QWORD[16+rax]
+ mov QWORD[152+r8],rax
+ mov QWORD[168+r8],rsi
+ mov QWORD[176+r8],rdi
+
+ mov rdi,QWORD[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0xa548f3fc
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD[8+rsi]
+ mov r8,QWORD[rsi]
+ mov r9,QWORD[16+rsi]
+ mov r10,QWORD[40+rsi]
+ lea r11,[56+rsi]
+ lea r12,[24+rsi]
+ mov QWORD[32+rsp],r10
+ mov QWORD[40+rsp],r11
+ mov QWORD[48+rsp],r12
+ mov QWORD[56+rsp],rcx
+ call QWORD[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+
+
+
+ALIGN 16
+simd_handler:
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD[120+r8]
+ mov rbx,QWORD[248+r8]
+
+ mov rsi,QWORD[8+r9]
+ mov r11,QWORD[56+r9]
+
+ mov r10d,DWORD[r11]
+ lea r10,[r10*1+rsi]
+ cmp rbx,r10
+ jb NEAR $L$common_seh_tail
+
+ mov rax,QWORD[192+r8]
+
+ mov r10d,DWORD[4+r11]
+ mov ecx,DWORD[8+r11]
+ lea r10,[r10*1+rsi]
+ cmp rbx,r10
+ jae NEAR $L$common_seh_tail
+
+ neg rcx
+ lea rsi,[((-8))+rcx*1+rax]
+ lea rdi,[512+r8]
+ neg ecx
+ shr ecx,3
+ DD 0xa548f3fc
+
+ jmp NEAR $L$common_seh_tail
+
+
+section .pdata rdata align=4
+ALIGN 4
+ DD $L$SEH_begin_ChaCha20_ctr32 wrt ..imagebase
+ DD $L$SEH_end_ChaCha20_ctr32 wrt ..imagebase
+ DD $L$SEH_info_ChaCha20_ctr32 wrt ..imagebase
+
+ DD $L$SEH_begin_ChaCha20_ssse3 wrt ..imagebase
+ DD $L$SEH_end_ChaCha20_ssse3 wrt ..imagebase
+ DD $L$SEH_info_ChaCha20_ssse3 wrt ..imagebase
+
+ DD $L$SEH_begin_ChaCha20_128 wrt ..imagebase
+ DD $L$SEH_end_ChaCha20_128 wrt ..imagebase
+ DD $L$SEH_info_ChaCha20_128 wrt ..imagebase
+
+ DD $L$SEH_begin_ChaCha20_4x wrt ..imagebase
+ DD $L$SEH_end_ChaCha20_4x wrt ..imagebase
+ DD $L$SEH_info_ChaCha20_4x wrt ..imagebase
+ DD $L$SEH_begin_ChaCha20_4xop wrt ..imagebase
+ DD $L$SEH_end_ChaCha20_4xop wrt ..imagebase
+ DD $L$SEH_info_ChaCha20_4xop wrt ..imagebase
+ DD $L$SEH_begin_ChaCha20_8x wrt ..imagebase
+ DD $L$SEH_end_ChaCha20_8x wrt ..imagebase
+ DD $L$SEH_info_ChaCha20_8x wrt ..imagebase
+ DD $L$SEH_begin_ChaCha20_avx512 wrt ..imagebase
+ DD $L$SEH_end_ChaCha20_avx512 wrt ..imagebase
+ DD $L$SEH_info_ChaCha20_avx512 wrt ..imagebase
+
+ DD $L$SEH_begin_ChaCha20_avx512vl wrt ..imagebase
+ DD $L$SEH_end_ChaCha20_avx512vl wrt ..imagebase
+ DD $L$SEH_info_ChaCha20_avx512vl wrt ..imagebase
+
+ DD $L$SEH_begin_ChaCha20_16x wrt ..imagebase
+ DD $L$SEH_end_ChaCha20_16x wrt ..imagebase
+ DD $L$SEH_info_ChaCha20_16x wrt ..imagebase
+
+ DD $L$SEH_begin_ChaCha20_8xvl wrt ..imagebase
+ DD $L$SEH_end_ChaCha20_8xvl wrt ..imagebase
+ DD $L$SEH_info_ChaCha20_8xvl wrt ..imagebase
+section .xdata rdata align=8
+ALIGN 8
+$L$SEH_info_ChaCha20_ctr32:
+DB 9,0,0,0
+ DD se_handler wrt ..imagebase
+
+$L$SEH_info_ChaCha20_ssse3:
+DB 9,0,0,0
+ DD simd_handler wrt ..imagebase
+ DD $L$ssse3_body wrt ..imagebase,$L$ssse3_epilogue wrt ..imagebase
+ DD 0x20,0
+
+$L$SEH_info_ChaCha20_128:
+DB 9,0,0,0
+ DD simd_handler wrt ..imagebase
+ DD $L$128_body wrt ..imagebase,$L$128_epilogue wrt ..imagebase
+ DD 0x60,0
+
+$L$SEH_info_ChaCha20_4x:
+DB 9,0,0,0
+ DD simd_handler wrt ..imagebase
+ DD $L$4x_body wrt ..imagebase,$L$4x_epilogue wrt ..imagebase
+ DD 0xa0,0
+$L$SEH_info_ChaCha20_4xop:
+DB 9,0,0,0
+ DD simd_handler wrt ..imagebase
+ DD $L$4xop_body wrt ..imagebase,$L$4xop_epilogue wrt ..imagebase
+ DD 0xa0,0
+$L$SEH_info_ChaCha20_8x:
+DB 9,0,0,0
+ DD simd_handler wrt ..imagebase
+ DD $L$8x_body wrt ..imagebase,$L$8x_epilogue wrt ..imagebase
+ DD 0xa0,0
+$L$SEH_info_ChaCha20_avx512:
+DB 9,0,0,0
+ DD simd_handler wrt ..imagebase
+ DD $L$avx512_body wrt ..imagebase,$L$avx512_epilogue wrt ..imagebase
+ DD 0x20,0
+
+$L$SEH_info_ChaCha20_avx512vl:
+DB 9,0,0,0
+ DD simd_handler wrt ..imagebase
+ DD $L$avx512vl_body wrt ..imagebase,$L$avx512vl_epilogue wrt ..imagebase
+ DD 0x20,0
+
+$L$SEH_info_ChaCha20_16x:
+DB 9,0,0,0
+ DD simd_handler wrt ..imagebase
+ DD $L$16x_body wrt ..imagebase,$L$16x_epilogue wrt ..imagebase
+ DD 0xa0,0
+
+$L$SEH_info_ChaCha20_8xvl:
+DB 9,0,0,0
+ DD simd_handler wrt ..imagebase
+ DD $L$8xvl_body wrt ..imagebase,$L$8xvl_epilogue wrt ..imagebase
+ DD 0xa0,0
diff --git a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/ec/ecp_nistz256-x86_64.asm b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/ec/ecp_nistz256-x86_64.asm
index e0c40d6ec4..9ef88ef1c8 100644
--- a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/ec/ecp_nistz256-x86_64.asm
+++ b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/ec/ecp_nistz256-x86_64.asm
@@ -2399,6 +2399,12 @@ $L$Three:
$L$ONE_mont:
DQ 0x0000000000000001,0xffffffff00000000,0xffffffffffffffff,0x00000000fffffffe
+
+$L$ord:
+ DQ 0xf3b9cac2fc632551,0xbce6faada7179e84,0xffffffffffffffff,0xffffffff00000000
+$L$ordK:
+ DQ 0xccd1c8aaee00bc4f
+
global ecp_nistz256_mul_by_2
ALIGN 64
@@ -2411,9 +2417,13 @@ $L$SEH_begin_ecp_nistz256_mul_by_2:
mov rsi,rdx
+
push r12
+
push r13
+$L$mul_by_2_body:
+
mov r8,QWORD[rsi]
xor r13,r13
mov r9,QWORD[8+rsi]
@@ -2445,11 +2455,17 @@ $L$SEH_begin_ecp_nistz256_mul_by_2:
mov QWORD[16+rdi],r10
mov QWORD[24+rdi],r11
- pop r13
- pop r12
+ mov r13,QWORD[rsp]
+
+ mov r12,QWORD[8+rsp]
+
+ lea rsp,[16+rsp]
+
+$L$mul_by_2_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_ecp_nistz256_mul_by_2:
@@ -2466,9 +2482,13 @@ $L$SEH_begin_ecp_nistz256_div_by_2:
mov rsi,rdx
+
push r12
+
push r13
+$L$div_by_2_body:
+
mov r8,QWORD[rsi]
mov r9,QWORD[8+rsi]
mov r10,QWORD[16+rsi]
@@ -2515,11 +2535,17 @@ $L$SEH_begin_ecp_nistz256_div_by_2:
mov QWORD[16+rdi],r10
mov QWORD[24+rdi],r11
- pop r13
- pop r12
+ mov r13,QWORD[rsp]
+
+ mov r12,QWORD[8+rsp]
+
+ lea rsp,[16+rsp]
+
+$L$div_by_2_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_ecp_nistz256_div_by_2:
@@ -2536,9 +2562,13 @@ $L$SEH_begin_ecp_nistz256_mul_by_3:
mov rsi,rdx
+
push r12
+
push r13
+$L$mul_by_3_body:
+
mov r8,QWORD[rsi]
xor r13,r13
mov r9,QWORD[8+rsi]
@@ -2591,11 +2621,17 @@ $L$SEH_begin_ecp_nistz256_mul_by_3:
mov QWORD[16+rdi],r10
mov QWORD[24+rdi],r11
- pop r13
- pop r12
+ mov r13,QWORD[rsp]
+
+ mov r12,QWORD[8+rsp]
+
+ lea rsp,[16+rsp]
+
+$L$mul_by_3_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_ecp_nistz256_mul_by_3:
@@ -2613,9 +2649,13 @@ $L$SEH_begin_ecp_nistz256_add:
mov rdx,r8
+
push r12
+
push r13
+$L$add_body:
+
mov r8,QWORD[rsi]
xor r13,r13
mov r9,QWORD[8+rsi]
@@ -2648,11 +2688,17 @@ $L$SEH_begin_ecp_nistz256_add:
mov QWORD[16+rdi],r10
mov QWORD[24+rdi],r11
- pop r13
- pop r12
+ mov r13,QWORD[rsp]
+
+ mov r12,QWORD[8+rsp]
+
+ lea rsp,[16+rsp]
+
+$L$add_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_ecp_nistz256_add:
@@ -2670,9 +2716,13 @@ $L$SEH_begin_ecp_nistz256_sub:
mov rdx,r8
+
push r12
+
push r13
+$L$sub_body:
+
mov r8,QWORD[rsi]
xor r13,r13
mov r9,QWORD[8+rsi]
@@ -2705,11 +2755,17 @@ $L$SEH_begin_ecp_nistz256_sub:
mov QWORD[16+rdi],r10
mov QWORD[24+rdi],r11
- pop r13
- pop r12
+ mov r13,QWORD[rsp]
+
+ mov r12,QWORD[8+rsp]
+
+ lea rsp,[16+rsp]
+
+$L$sub_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_ecp_nistz256_sub:
@@ -2726,9 +2782,13 @@ $L$SEH_begin_ecp_nistz256_neg:
mov rsi,rdx
+
push r12
+
push r13
+$L$neg_body:
+
xor r8,r8
xor r9,r9
xor r10,r10
@@ -2761,16 +2821,1131 @@ $L$SEH_begin_ecp_nistz256_neg:
mov QWORD[16+rdi],r10
mov QWORD[24+rdi],r11
- pop r13
- pop r12
+ mov r13,QWORD[rsp]
+
+ mov r12,QWORD[8+rsp]
+
+ lea rsp,[16+rsp]
+
+$L$neg_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_ecp_nistz256_neg:
+
+
+global ecp_nistz256_ord_mul_mont
+
+ALIGN 32
+ecp_nistz256_ord_mul_mont:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_ecp_nistz256_ord_mul_mont:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+
+ mov ecx,0x80100
+ and ecx,DWORD[((OPENSSL_ia32cap_P+8))]
+ cmp ecx,0x80100
+ je NEAR $L$ecp_nistz256_ord_mul_montx
+ push rbp
+
+ push rbx
+
+ push r12
+
+ push r13
+
+ push r14
+
+ push r15
+
+$L$ord_mul_body:
+
+ mov rax,QWORD[rdx]
+ mov rbx,rdx
+ lea r14,[$L$ord]
+ mov r15,QWORD[$L$ordK]
+
+
+ mov rcx,rax
+ mul QWORD[rsi]
+ mov r8,rax
+ mov rax,rcx
+ mov r9,rdx
+
+ mul QWORD[8+rsi]
+ add r9,rax
+ mov rax,rcx
+ adc rdx,0
+ mov r10,rdx
+
+ mul QWORD[16+rsi]
+ add r10,rax
+ mov rax,rcx
+ adc rdx,0
+
+ mov r13,r8
+ imul r8,r15
+
+ mov r11,rdx
+ mul QWORD[24+rsi]
+ add r11,rax
+ mov rax,r8
+ adc rdx,0
+ mov r12,rdx
+
+
+ mul QWORD[r14]
+ mov rbp,r8
+ add r13,rax
+ mov rax,r8
+ adc rdx,0
+ mov rcx,rdx
+
+ sub r10,r8
+ sbb r8,0
+
+ mul QWORD[8+r14]
+ add r9,rcx
+ adc rdx,0
+ add r9,rax
+ mov rax,rbp
+ adc r10,rdx
+ mov rdx,rbp
+ adc r8,0
+
+ shl rax,32
+ shr rdx,32
+ sub r11,rax
+ mov rax,QWORD[8+rbx]
+ sbb rbp,rdx
+
+ add r11,r8
+ adc r12,rbp
+ adc r13,0
+
+
+ mov rcx,rax
+ mul QWORD[rsi]
+ add r9,rax
+ mov rax,rcx
+ adc rdx,0
+ mov rbp,rdx
+
+ mul QWORD[8+rsi]
+ add r10,rbp
+ adc rdx,0
+ add r10,rax
+ mov rax,rcx
+ adc rdx,0
+ mov rbp,rdx
+
+ mul QWORD[16+rsi]
+ add r11,rbp
+ adc rdx,0
+ add r11,rax
+ mov rax,rcx
+ adc rdx,0
+
+ mov rcx,r9
+ imul r9,r15
+
+ mov rbp,rdx
+ mul QWORD[24+rsi]
+ add r12,rbp
+ adc rdx,0
+ xor r8,r8
+ add r12,rax
+ mov rax,r9
+ adc r13,rdx
+ adc r8,0
+
+
+ mul QWORD[r14]
+ mov rbp,r9
+ add rcx,rax
+ mov rax,r9
+ adc rcx,rdx
+
+ sub r11,r9
+ sbb r9,0
+
+ mul QWORD[8+r14]
+ add r10,rcx
+ adc rdx,0
+ add r10,rax
+ mov rax,rbp
+ adc r11,rdx
+ mov rdx,rbp
+ adc r9,0
+
+ shl rax,32
+ shr rdx,32
+ sub r12,rax
+ mov rax,QWORD[16+rbx]
+ sbb rbp,rdx
+
+ add r12,r9
+ adc r13,rbp
+ adc r8,0
+
+
+ mov rcx,rax
+ mul QWORD[rsi]
+ add r10,rax
+ mov rax,rcx
+ adc rdx,0
+ mov rbp,rdx
+
+ mul QWORD[8+rsi]
+ add r11,rbp
+ adc rdx,0
+ add r11,rax
+ mov rax,rcx
+ adc rdx,0
+ mov rbp,rdx
+
+ mul QWORD[16+rsi]
+ add r12,rbp
+ adc rdx,0
+ add r12,rax
+ mov rax,rcx
+ adc rdx,0
+
+ mov rcx,r10
+ imul r10,r15
+
+ mov rbp,rdx
+ mul QWORD[24+rsi]
+ add r13,rbp
+ adc rdx,0
+ xor r9,r9
+ add r13,rax
+ mov rax,r10
+ adc r8,rdx
+ adc r9,0
+
+
+ mul QWORD[r14]
+ mov rbp,r10
+ add rcx,rax
+ mov rax,r10
+ adc rcx,rdx
+
+ sub r12,r10
+ sbb r10,0
+
+ mul QWORD[8+r14]
+ add r11,rcx
+ adc rdx,0
+ add r11,rax
+ mov rax,rbp
+ adc r12,rdx
+ mov rdx,rbp
+ adc r10,0
+
+ shl rax,32
+ shr rdx,32
+ sub r13,rax
+ mov rax,QWORD[24+rbx]
+ sbb rbp,rdx
+
+ add r13,r10
+ adc r8,rbp
+ adc r9,0
+
+
+ mov rcx,rax
+ mul QWORD[rsi]
+ add r11,rax
+ mov rax,rcx
+ adc rdx,0
+ mov rbp,rdx
+
+ mul QWORD[8+rsi]
+ add r12,rbp
+ adc rdx,0
+ add r12,rax
+ mov rax,rcx
+ adc rdx,0
+ mov rbp,rdx
+
+ mul QWORD[16+rsi]
+ add r13,rbp
+ adc rdx,0
+ add r13,rax
+ mov rax,rcx
+ adc rdx,0
+
+ mov rcx,r11
+ imul r11,r15
+
+ mov rbp,rdx
+ mul QWORD[24+rsi]
+ add r8,rbp
+ adc rdx,0
+ xor r10,r10
+ add r8,rax
+ mov rax,r11
+ adc r9,rdx
+ adc r10,0
+
+
+ mul QWORD[r14]
+ mov rbp,r11
+ add rcx,rax
+ mov rax,r11
+ adc rcx,rdx
+
+ sub r13,r11
+ sbb r11,0
+
+ mul QWORD[8+r14]
+ add r12,rcx
+ adc rdx,0
+ add r12,rax
+ mov rax,rbp
+ adc r13,rdx
+ mov rdx,rbp
+ adc r11,0
+
+ shl rax,32
+ shr rdx,32
+ sub r8,rax
+ sbb rbp,rdx
+
+ add r8,r11
+ adc r9,rbp
+ adc r10,0
+
+
+ mov rsi,r12
+ sub r12,QWORD[r14]
+ mov r11,r13
+ sbb r13,QWORD[8+r14]
+ mov rcx,r8
+ sbb r8,QWORD[16+r14]
+ mov rbp,r9
+ sbb r9,QWORD[24+r14]
+ sbb r10,0
+
+ cmovc r12,rsi
+ cmovc r13,r11
+ cmovc r8,rcx
+ cmovc r9,rbp
+
+ mov QWORD[rdi],r12
+ mov QWORD[8+rdi],r13
+ mov QWORD[16+rdi],r8
+ mov QWORD[24+rdi],r9
+
+ mov r15,QWORD[rsp]
+
+ mov r14,QWORD[8+rsp]
+
+ mov r13,QWORD[16+rsp]
+
+ mov r12,QWORD[24+rsp]
+
+ mov rbx,QWORD[32+rsp]
+
+ mov rbp,QWORD[40+rsp]
+
+ lea rsp,[48+rsp]
+
+$L$ord_mul_epilogue:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_ecp_nistz256_ord_mul_mont:
+
+
+
+
+
+
+
+global ecp_nistz256_ord_sqr_mont
+
+ALIGN 32
+ecp_nistz256_ord_sqr_mont:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_ecp_nistz256_ord_sqr_mont:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+
+ mov ecx,0x80100
+ and ecx,DWORD[((OPENSSL_ia32cap_P+8))]
+ cmp ecx,0x80100
+ je NEAR $L$ecp_nistz256_ord_sqr_montx
+ push rbp
+
+ push rbx
+
+ push r12
+
+ push r13
+
+ push r14
+
+ push r15
+
+$L$ord_sqr_body:
+
+ mov r8,QWORD[rsi]
+ mov rax,QWORD[8+rsi]
+ mov r14,QWORD[16+rsi]
+ mov r15,QWORD[24+rsi]
+ lea rsi,[$L$ord]
+ mov rbx,rdx
+ jmp NEAR $L$oop_ord_sqr
+
+ALIGN 32
+$L$oop_ord_sqr:
+
+ mov rbp,rax
+ mul r8
+ mov r9,rax
+DB 102,72,15,110,205
+ mov rax,r14
+ mov r10,rdx
+
+ mul r8
+ add r10,rax
+ mov rax,r15
+DB 102,73,15,110,214
+ adc rdx,0
+ mov r11,rdx
+
+ mul r8
+ add r11,rax
+ mov rax,r15
+DB 102,73,15,110,223
+ adc rdx,0
+ mov r12,rdx
+
+
+ mul r14
+ mov r13,rax
+ mov rax,r14
+ mov r14,rdx
+
+
+ mul rbp
+ add r11,rax
+ mov rax,r15
+ adc rdx,0
+ mov r15,rdx
+
+ mul rbp
+ add r12,rax
+ adc rdx,0
+
+ add r12,r15
+ adc r13,rdx
+ adc r14,0
+
+
+ xor r15,r15
+ mov rax,r8
+ add r9,r9
+ adc r10,r10
+ adc r11,r11
+ adc r12,r12
+ adc r13,r13
+ adc r14,r14
+ adc r15,0
+
+
+ mul rax
+ mov r8,rax
+DB 102,72,15,126,200
+ mov rbp,rdx
+
+ mul rax
+ add r9,rbp
+ adc r10,rax
+DB 102,72,15,126,208
+ adc rdx,0
+ mov rbp,rdx
+
+ mul rax
+ add r11,rbp
+ adc r12,rax
+DB 102,72,15,126,216
+ adc rdx,0
+ mov rbp,rdx
+
+ mov rcx,r8
+ imul r8,QWORD[32+rsi]
+
+ mul rax
+ add r13,rbp
+ adc r14,rax
+ mov rax,QWORD[rsi]
+ adc r15,rdx
+
+
+ mul r8
+ mov rbp,r8
+ add rcx,rax
+ mov rax,QWORD[8+rsi]
+ adc rcx,rdx
+
+ sub r10,r8
+ sbb rbp,0
+
+ mul r8
+ add r9,rcx
+ adc rdx,0
+ add r9,rax
+ mov rax,r8
+ adc r10,rdx
+ mov rdx,r8
+ adc rbp,0
+
+ mov rcx,r9
+ imul r9,QWORD[32+rsi]
+
+ shl rax,32
+ shr rdx,32
+ sub r11,rax
+ mov rax,QWORD[rsi]
+ sbb r8,rdx
+
+ add r11,rbp
+ adc r8,0
+
+
+ mul r9
+ mov rbp,r9
+ add rcx,rax
+ mov rax,QWORD[8+rsi]
+ adc rcx,rdx
+
+ sub r11,r9
+ sbb rbp,0
+
+ mul r9
+ add r10,rcx
+ adc rdx,0
+ add r10,rax
+ mov rax,r9
+ adc r11,rdx
+ mov rdx,r9
+ adc rbp,0
+
+ mov rcx,r10
+ imul r10,QWORD[32+rsi]
+
+ shl rax,32
+ shr rdx,32
+ sub r8,rax
+ mov rax,QWORD[rsi]
+ sbb r9,rdx
+
+ add r8,rbp
+ adc r9,0
+
+
+ mul r10
+ mov rbp,r10
+ add rcx,rax
+ mov rax,QWORD[8+rsi]
+ adc rcx,rdx
+
+ sub r8,r10
+ sbb rbp,0
+
+ mul r10
+ add r11,rcx
+ adc rdx,0
+ add r11,rax
+ mov rax,r10
+ adc r8,rdx
+ mov rdx,r10
+ adc rbp,0
+
+ mov rcx,r11
+ imul r11,QWORD[32+rsi]
+
+ shl rax,32
+ shr rdx,32
+ sub r9,rax
+ mov rax,QWORD[rsi]
+ sbb r10,rdx
+
+ add r9,rbp
+ adc r10,0
+
+
+ mul r11
+ mov rbp,r11
+ add rcx,rax
+ mov rax,QWORD[8+rsi]
+ adc rcx,rdx
+
+ sub r9,r11
+ sbb rbp,0
+
+ mul r11
+ add r8,rcx
+ adc rdx,0
+ add r8,rax
+ mov rax,r11
+ adc r9,rdx
+ mov rdx,r11
+ adc rbp,0
+
+ shl rax,32
+ shr rdx,32
+ sub r10,rax
+ sbb r11,rdx
+
+ add r10,rbp
+ adc r11,0
+
+
+ xor rdx,rdx
+ add r8,r12
+ adc r9,r13
+ mov r12,r8
+ adc r10,r14
+ adc r11,r15
+ mov rax,r9
+ adc rdx,0
+
+
+ sub r8,QWORD[rsi]
+ mov r14,r10
+ sbb r9,QWORD[8+rsi]
+ sbb r10,QWORD[16+rsi]
+ mov r15,r11
+ sbb r11,QWORD[24+rsi]
+ sbb rdx,0
+
+ cmovc r8,r12
+ cmovnc rax,r9
+ cmovnc r14,r10
+ cmovnc r15,r11
+
+ dec rbx
+ jnz NEAR $L$oop_ord_sqr
+
+ mov QWORD[rdi],r8
+ mov QWORD[8+rdi],rax
+ pxor xmm1,xmm1
+ mov QWORD[16+rdi],r14
+ pxor xmm2,xmm2
+ mov QWORD[24+rdi],r15
+ pxor xmm3,xmm3
+
+ mov r15,QWORD[rsp]
+
+ mov r14,QWORD[8+rsp]
+
+ mov r13,QWORD[16+rsp]
+
+ mov r12,QWORD[24+rsp]
+
+ mov rbx,QWORD[32+rsp]
+
+ mov rbp,QWORD[40+rsp]
+
+ lea rsp,[48+rsp]
+
+$L$ord_sqr_epilogue:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_ecp_nistz256_ord_sqr_mont:
+
+
+ALIGN 32
+ecp_nistz256_ord_mul_montx:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_ecp_nistz256_ord_mul_montx:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+
+$L$ecp_nistz256_ord_mul_montx:
+ push rbp
+
+ push rbx
+
+ push r12
+
+ push r13
+
+ push r14
+
+ push r15
+
+$L$ord_mulx_body:
+
+ mov rbx,rdx
+ mov rdx,QWORD[rdx]
+ mov r9,QWORD[rsi]
+ mov r10,QWORD[8+rsi]
+ mov r11,QWORD[16+rsi]
+ mov r12,QWORD[24+rsi]
+ lea rsi,[((-128))+rsi]
+ lea r14,[(($L$ord-128))]
+ mov r15,QWORD[$L$ordK]
+
+
+ mulx r9,r8,r9
+ mulx r10,rcx,r10
+ mulx r11,rbp,r11
+ add r9,rcx
+ mulx r12,rcx,r12
+ mov rdx,r8
+ mulx rax,rdx,r15
+ adc r10,rbp
+ adc r11,rcx
+ adc r12,0
+
+
+ xor r13,r13
+ mulx rbp,rcx,QWORD[((0+128))+r14]
+ adcx r8,rcx
+ adox r9,rbp
+
+ mulx rbp,rcx,QWORD[((8+128))+r14]
+ adcx r9,rcx
+ adox r10,rbp
+
+ mulx rbp,rcx,QWORD[((16+128))+r14]
+ adcx r10,rcx
+ adox r11,rbp
+
+ mulx rbp,rcx,QWORD[((24+128))+r14]
+ mov rdx,QWORD[8+rbx]
+ adcx r11,rcx
+ adox r12,rbp
+ adcx r12,r8
+ adox r13,r8
+ adc r13,0
+
+
+ mulx rbp,rcx,QWORD[((0+128))+rsi]
+ adcx r9,rcx
+ adox r10,rbp
+
+ mulx rbp,rcx,QWORD[((8+128))+rsi]
+ adcx r10,rcx
+ adox r11,rbp
+
+ mulx rbp,rcx,QWORD[((16+128))+rsi]
+ adcx r11,rcx
+ adox r12,rbp
+
+ mulx rbp,rcx,QWORD[((24+128))+rsi]
+ mov rdx,r9
+ mulx rax,rdx,r15
+ adcx r12,rcx
+ adox r13,rbp
+
+ adcx r13,r8
+ adox r8,r8
+ adc r8,0
+
+
+ mulx rbp,rcx,QWORD[((0+128))+r14]
+ adcx r9,rcx
+ adox r10,rbp
+
+ mulx rbp,rcx,QWORD[((8+128))+r14]
+ adcx r10,rcx
+ adox r11,rbp
+
+ mulx rbp,rcx,QWORD[((16+128))+r14]
+ adcx r11,rcx
+ adox r12,rbp
+
+ mulx rbp,rcx,QWORD[((24+128))+r14]
+ mov rdx,QWORD[16+rbx]
+ adcx r12,rcx
+ adox r13,rbp
+ adcx r13,r9
+ adox r8,r9
+ adc r8,0
+
+
+ mulx rbp,rcx,QWORD[((0+128))+rsi]
+ adcx r10,rcx
+ adox r11,rbp
+
+ mulx rbp,rcx,QWORD[((8+128))+rsi]
+ adcx r11,rcx
+ adox r12,rbp
+
+ mulx rbp,rcx,QWORD[((16+128))+rsi]
+ adcx r12,rcx
+ adox r13,rbp
+
+ mulx rbp,rcx,QWORD[((24+128))+rsi]
+ mov rdx,r10
+ mulx rax,rdx,r15
+ adcx r13,rcx
+ adox r8,rbp
+
+ adcx r8,r9
+ adox r9,r9
+ adc r9,0
+
+
+ mulx rbp,rcx,QWORD[((0+128))+r14]
+ adcx r10,rcx
+ adox r11,rbp
+
+ mulx rbp,rcx,QWORD[((8+128))+r14]
+ adcx r11,rcx
+ adox r12,rbp
+
+ mulx rbp,rcx,QWORD[((16+128))+r14]
+ adcx r12,rcx
+ adox r13,rbp
+
+ mulx rbp,rcx,QWORD[((24+128))+r14]
+ mov rdx,QWORD[24+rbx]
+ adcx r13,rcx
+ adox r8,rbp
+ adcx r8,r10
+ adox r9,r10
+ adc r9,0
+
+
+ mulx rbp,rcx,QWORD[((0+128))+rsi]
+ adcx r11,rcx
+ adox r12,rbp
+
+ mulx rbp,rcx,QWORD[((8+128))+rsi]
+ adcx r12,rcx
+ adox r13,rbp
+
+ mulx rbp,rcx,QWORD[((16+128))+rsi]
+ adcx r13,rcx
+ adox r8,rbp
+
+ mulx rbp,rcx,QWORD[((24+128))+rsi]
+ mov rdx,r11
+ mulx rax,rdx,r15
+ adcx r8,rcx
+ adox r9,rbp
+
+ adcx r9,r10
+ adox r10,r10
+ adc r10,0
+
+
+ mulx rbp,rcx,QWORD[((0+128))+r14]
+ adcx r11,rcx
+ adox r12,rbp
+
+ mulx rbp,rcx,QWORD[((8+128))+r14]
+ adcx r12,rcx
+ adox r13,rbp
+
+ mulx rbp,rcx,QWORD[((16+128))+r14]
+ adcx r13,rcx
+ adox r8,rbp
+
+ mulx rbp,rcx,QWORD[((24+128))+r14]
+ lea r14,[128+r14]
+ mov rbx,r12
+ adcx r8,rcx
+ adox r9,rbp
+ mov rdx,r13
+ adcx r9,r11
+ adox r10,r11
+ adc r10,0
+
+
+
+ mov rcx,r8
+ sub r12,QWORD[r14]
+ sbb r13,QWORD[8+r14]
+ sbb r8,QWORD[16+r14]
+ mov rbp,r9
+ sbb r9,QWORD[24+r14]
+ sbb r10,0
+
+ cmovc r12,rbx
+ cmovc r13,rdx
+ cmovc r8,rcx
+ cmovc r9,rbp
+
+ mov QWORD[rdi],r12
+ mov QWORD[8+rdi],r13
+ mov QWORD[16+rdi],r8
+ mov QWORD[24+rdi],r9
+
+ mov r15,QWORD[rsp]
+
+ mov r14,QWORD[8+rsp]
+
+ mov r13,QWORD[16+rsp]
+
+ mov r12,QWORD[24+rsp]
+
+ mov rbx,QWORD[32+rsp]
+
+ mov rbp,QWORD[40+rsp]
+
+ lea rsp,[48+rsp]
+
+$L$ord_mulx_epilogue:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_ecp_nistz256_ord_mul_montx:
+
+
+ALIGN 32
+ecp_nistz256_ord_sqr_montx:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_ecp_nistz256_ord_sqr_montx:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+
+$L$ecp_nistz256_ord_sqr_montx:
+ push rbp
+
+ push rbx
+
+ push r12
+
+ push r13
+
+ push r14
+
+ push r15
+
+$L$ord_sqrx_body:
+
+ mov rbx,rdx
+ mov rdx,QWORD[rsi]
+ mov r14,QWORD[8+rsi]
+ mov r15,QWORD[16+rsi]
+ mov r8,QWORD[24+rsi]
+ lea rsi,[$L$ord]
+ jmp NEAR $L$oop_ord_sqrx
+
+ALIGN 32
+$L$oop_ord_sqrx:
+ mulx r10,r9,r14
+ mulx r11,rcx,r15
+ mov rax,rdx
+DB 102,73,15,110,206
+ mulx r12,rbp,r8
+ mov rdx,r14
+ add r10,rcx
+DB 102,73,15,110,215
+ adc r11,rbp
+ adc r12,0
+ xor r13,r13
+
+ mulx rbp,rcx,r15
+ adcx r11,rcx
+ adox r12,rbp
+
+ mulx rbp,rcx,r8
+ mov rdx,r15
+ adcx r12,rcx
+ adox r13,rbp
+ adc r13,0
+
+ mulx r14,rcx,r8
+ mov rdx,rax
+DB 102,73,15,110,216
+ xor r15,r15
+ adcx r9,r9
+ adox r13,rcx
+ adcx r10,r10
+ adox r14,r15
+
+
+ mulx rbp,r8,rdx
+DB 102,72,15,126,202
+ adcx r11,r11
+ adox r9,rbp
+ adcx r12,r12
+ mulx rax,rcx,rdx
+DB 102,72,15,126,210
+ adcx r13,r13
+ adox r10,rcx
+ adcx r14,r14
+ mulx rbp,rcx,rdx
+DB 0x67
+DB 102,72,15,126,218
+ adox r11,rax
+ adcx r15,r15
+ adox r12,rcx
+ adox r13,rbp
+ mulx rax,rcx,rdx
+ adox r14,rcx
+ adox r15,rax
+
+
+ mov rdx,r8
+ mulx rcx,rdx,QWORD[32+rsi]
+
+ xor rax,rax
+ mulx rbp,rcx,QWORD[rsi]
+ adcx r8,rcx
+ adox r9,rbp
+ mulx rbp,rcx,QWORD[8+rsi]
+ adcx r9,rcx
+ adox r10,rbp
+ mulx rbp,rcx,QWORD[16+rsi]
+ adcx r10,rcx
+ adox r11,rbp
+ mulx rbp,rcx,QWORD[24+rsi]
+ adcx r11,rcx
+ adox r8,rbp
+ adcx r8,rax
+
+
+ mov rdx,r9
+ mulx rcx,rdx,QWORD[32+rsi]
+
+ mulx rbp,rcx,QWORD[rsi]
+ adox r9,rcx
+ adcx r10,rbp
+ mulx rbp,rcx,QWORD[8+rsi]
+ adox r10,rcx
+ adcx r11,rbp
+ mulx rbp,rcx,QWORD[16+rsi]
+ adox r11,rcx
+ adcx r8,rbp
+ mulx rbp,rcx,QWORD[24+rsi]
+ adox r8,rcx
+ adcx r9,rbp
+ adox r9,rax
+
+
+ mov rdx,r10
+ mulx rcx,rdx,QWORD[32+rsi]
+
+ mulx rbp,rcx,QWORD[rsi]
+ adcx r10,rcx
+ adox r11,rbp
+ mulx rbp,rcx,QWORD[8+rsi]
+ adcx r11,rcx
+ adox r8,rbp
+ mulx rbp,rcx,QWORD[16+rsi]
+ adcx r8,rcx
+ adox r9,rbp
+ mulx rbp,rcx,QWORD[24+rsi]
+ adcx r9,rcx
+ adox r10,rbp
+ adcx r10,rax
+
+
+ mov rdx,r11
+ mulx rcx,rdx,QWORD[32+rsi]
+
+ mulx rbp,rcx,QWORD[rsi]
+ adox r11,rcx
+ adcx r8,rbp
+ mulx rbp,rcx,QWORD[8+rsi]
+ adox r8,rcx
+ adcx r9,rbp
+ mulx rbp,rcx,QWORD[16+rsi]
+ adox r9,rcx
+ adcx r10,rbp
+ mulx rbp,rcx,QWORD[24+rsi]
+ adox r10,rcx
+ adcx r11,rbp
+ adox r11,rax
+
+
+ add r12,r8
+ adc r9,r13
+ mov rdx,r12
+ adc r10,r14
+ adc r11,r15
+ mov r14,r9
+ adc rax,0
+
+
+ sub r12,QWORD[rsi]
+ mov r15,r10
+ sbb r9,QWORD[8+rsi]
+ sbb r10,QWORD[16+rsi]
+ mov r8,r11
+ sbb r11,QWORD[24+rsi]
+ sbb rax,0
+
+ cmovnc rdx,r12
+ cmovnc r14,r9
+ cmovnc r15,r10
+ cmovnc r8,r11
+
+ dec rbx
+ jnz NEAR $L$oop_ord_sqrx
+
+ mov QWORD[rdi],rdx
+ mov QWORD[8+rdi],r14
+ pxor xmm1,xmm1
+ mov QWORD[16+rdi],r15
+ pxor xmm2,xmm2
+ mov QWORD[24+rdi],r8
+ pxor xmm3,xmm3
+
+ mov r15,QWORD[rsp]
+
+ mov r14,QWORD[8+rsp]
+
+ mov r13,QWORD[16+rsp]
+
+ mov r12,QWORD[24+rsp]
+
+ mov rbx,QWORD[32+rsp]
+
+ mov rbp,QWORD[40+rsp]
+
+ lea rsp,[48+rsp]
+
+$L$ord_sqrx_epilogue:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_ecp_nistz256_ord_sqr_montx:
+
+
+
+
global ecp_nistz256_to_mont
ALIGN 32
@@ -2808,15 +3983,23 @@ $L$SEH_begin_ecp_nistz256_mul_mont:
mov rdx,r8
+
mov ecx,0x80100
and ecx,DWORD[((OPENSSL_ia32cap_P+8))]
$L$mul_mont:
push rbp
+
push rbx
+
push r12
+
push r13
+
push r14
+
push r15
+
+$L$mul_body:
cmp ecx,0x80100
je NEAR $L$mul_montx
mov rbx,rdx
@@ -2841,15 +4024,25 @@ $L$mul_montx:
call __ecp_nistz256_mul_montx
$L$mul_mont_done:
- pop r15
- pop r14
- pop r13
- pop r12
- pop rbx
- pop rbp
+ mov r15,QWORD[rsp]
+
+ mov r14,QWORD[8+rsp]
+
+ mov r13,QWORD[16+rsp]
+
+ mov r12,QWORD[24+rsp]
+
+ mov rbx,QWORD[32+rsp]
+
+ mov rbp,QWORD[40+rsp]
+
+ lea rsp,[48+rsp]
+
+$L$mul_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_ecp_nistz256_mul_mont:
@@ -3087,14 +4280,22 @@ $L$SEH_begin_ecp_nistz256_sqr_mont:
mov rsi,rdx
+
mov ecx,0x80100
and ecx,DWORD[((OPENSSL_ia32cap_P+8))]
push rbp
+
push rbx
+
push r12
+
push r13
+
push r14
+
push r15
+
+$L$sqr_body:
cmp ecx,0x80100
je NEAR $L$sqr_montx
mov rax,QWORD[rsi]
@@ -3115,15 +4316,25 @@ $L$sqr_montx:
call __ecp_nistz256_sqr_montx
$L$sqr_mont_done:
- pop r15
- pop r14
- pop r13
- pop r12
- pop rbx
- pop rbp
+ mov r15,QWORD[rsp]
+
+ mov r14,QWORD[8+rsp]
+
+ mov r13,QWORD[16+rsp]
+
+ mov r12,QWORD[24+rsp]
+
+ mov rbx,QWORD[32+rsp]
+
+ mov rbp,QWORD[40+rsp]
+
+ lea rsp,[48+rsp]
+
+$L$sqr_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_ecp_nistz256_sqr_mont:
@@ -3599,9 +4810,13 @@ $L$SEH_begin_ecp_nistz256_from_mont:
mov rsi,rdx
+
push r12
+
push r13
+$L$from_body:
+
mov rax,QWORD[rsi]
mov r13,QWORD[(($L$poly+24))]
mov r9,QWORD[8+rsi]
@@ -3681,11 +4896,17 @@ $L$SEH_begin_ecp_nistz256_from_mont:
mov QWORD[16+rdi],r10
mov QWORD[24+rdi],r11
- pop r13
- pop r12
+ mov r13,QWORD[rsp]
+
+ mov r12,QWORD[8+rsp]
+
+ lea rsp,[16+rsp]
+
+$L$from_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_ecp_nistz256_from_mont:
@@ -3794,8 +5015,8 @@ $L$select_loop_sse_w5:
movaps xmm14,XMMWORD[128+rsp]
movaps xmm15,XMMWORD[144+rsp]
lea rsp,[168+rsp]
-$L$SEH_end_ecp_nistz256_gather_w5:
DB 0F3h,0C3h ;repret
+$L$SEH_end_ecp_nistz256_gather_w5:
@@ -3889,8 +5110,8 @@ $L$select_loop_sse_w7:
movaps xmm14,XMMWORD[128+rsp]
movaps xmm15,XMMWORD[144+rsp]
lea rsp,[168+rsp]
-$L$SEH_end_ecp_nistz256_gather_w7:
DB 0F3h,0C3h ;repret
+$L$SEH_end_ecp_nistz256_gather_w7:
@@ -3900,6 +5121,7 @@ ecp_nistz256_avx2_gather_w5:
$L$avx2_gather_w5:
vzeroupper
lea rax,[((-136))+rsp]
+ mov r11,rsp
$L$SEH_begin_ecp_nistz256_avx2_gather_w5:
DB 0x48,0x8d,0x60,0xe0
DB 0xc5,0xf8,0x29,0x70,0xe0
@@ -3973,9 +5195,9 @@ $L$select_loop_avx2_w5:
movaps xmm13,XMMWORD[112+rsp]
movaps xmm14,XMMWORD[128+rsp]
movaps xmm15,XMMWORD[144+rsp]
- lea rsp,[168+rsp]
-$L$SEH_end_ecp_nistz256_avx2_gather_w5:
+ lea rsp,[r11]
DB 0F3h,0C3h ;repret
+$L$SEH_end_ecp_nistz256_avx2_gather_w5:
@@ -3986,6 +5208,7 @@ ALIGN 32
ecp_nistz256_avx2_gather_w7:
$L$avx2_gather_w7:
vzeroupper
+ mov r11,rsp
lea rax,[((-136))+rsp]
$L$SEH_begin_ecp_nistz256_avx2_gather_w7:
DB 0x48,0x8d,0x60,0xe0
@@ -4075,9 +5298,9 @@ $L$select_loop_avx2_w7:
movaps xmm13,XMMWORD[112+rsp]
movaps xmm14,XMMWORD[128+rsp]
movaps xmm15,XMMWORD[144+rsp]
- lea rsp,[168+rsp]
-$L$SEH_end_ecp_nistz256_avx2_gather_w7:
+ lea rsp,[r11]
DB 0F3h,0C3h ;repret
+$L$SEH_end_ecp_nistz256_avx2_gather_w7:
ALIGN 32
@@ -4212,18 +5435,27 @@ $L$SEH_begin_ecp_nistz256_point_double:
mov rsi,rdx
+
mov ecx,0x80100
and ecx,DWORD[((OPENSSL_ia32cap_P+8))]
cmp ecx,0x80100
je NEAR $L$point_doublex
push rbp
+
push rbx
+
push r12
+
push r13
+
push r14
+
push r15
+
sub rsp,32*5+8
+$L$point_doubleq_body:
+
$L$point_double_shortcutq:
movdqu xmm0,XMMWORD[rsi]
mov rbx,rsi
@@ -4405,16 +5637,27 @@ DB 102,72,15,126,203
DB 102,72,15,126,207
call __ecp_nistz256_sub_fromq
- add rsp,32*5+8
- pop r15
- pop r14
- pop r13
- pop r12
- pop rbx
- pop rbp
+ lea rsi,[((160+56))+rsp]
+
+ mov r15,QWORD[((-48))+rsi]
+
+ mov r14,QWORD[((-40))+rsi]
+
+ mov r13,QWORD[((-32))+rsi]
+
+ mov r12,QWORD[((-24))+rsi]
+
+ mov rbx,QWORD[((-16))+rsi]
+
+ mov rbp,QWORD[((-8))+rsi]
+
+ lea rsp,[rsi]
+
+$L$point_doubleq_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_ecp_nistz256_point_double:
global ecp_nistz256_point_add
@@ -4429,18 +5672,27 @@ $L$SEH_begin_ecp_nistz256_point_add:
mov rdx,r8
+
mov ecx,0x80100
and ecx,DWORD[((OPENSSL_ia32cap_P+8))]
cmp ecx,0x80100
je NEAR $L$point_addx
push rbp
+
push rbx
+
push r12
+
push r13
+
push r14
+
push r15
+
sub rsp,32*18+8
+$L$point_addq_body:
+
movdqu xmm0,XMMWORD[rsi]
movdqu xmm1,XMMWORD[16+rsi]
movdqu xmm2,XMMWORD[32+rsi]
@@ -4816,16 +6068,27 @@ DB 102,72,15,126,199
movdqu XMMWORD[48+rdi],xmm3
$L$add_doneq:
- add rsp,32*18+8
- pop r15
- pop r14
- pop r13
- pop r12
- pop rbx
- pop rbp
+ lea rsi,[((576+56))+rsp]
+
+ mov r15,QWORD[((-48))+rsi]
+
+ mov r14,QWORD[((-40))+rsi]
+
+ mov r13,QWORD[((-32))+rsi]
+
+ mov r12,QWORD[((-24))+rsi]
+
+ mov rbx,QWORD[((-16))+rsi]
+
+ mov rbp,QWORD[((-8))+rsi]
+
+ lea rsp,[rsi]
+
+$L$point_addq_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_ecp_nistz256_point_add:
global ecp_nistz256_point_add_affine
@@ -4840,18 +6103,27 @@ $L$SEH_begin_ecp_nistz256_point_add_affine:
mov rdx,r8
+
mov ecx,0x80100
and ecx,DWORD[((OPENSSL_ia32cap_P+8))]
cmp ecx,0x80100
je NEAR $L$point_add_affinex
push rbp
+
push rbx
+
push r12
+
push r13
+
push r14
+
push r15
+
sub rsp,32*15+8
+$L$add_affineq_body:
+
movdqu xmm0,XMMWORD[rsi]
mov rbx,rdx
movdqu xmm1,XMMWORD[16+rsi]
@@ -5133,16 +6405,27 @@ DB 102,72,15,126,199
movdqu XMMWORD[32+rdi],xmm2
movdqu XMMWORD[48+rdi],xmm3
- add rsp,32*15+8
- pop r15
- pop r14
- pop r13
- pop r12
- pop rbx
- pop rbp
+ lea rsi,[((480+56))+rsp]
+
+ mov r15,QWORD[((-48))+rsi]
+
+ mov r14,QWORD[((-40))+rsi]
+
+ mov r13,QWORD[((-32))+rsi]
+
+ mov r12,QWORD[((-24))+rsi]
+
+ mov rbx,QWORD[((-16))+rsi]
+
+ mov rbp,QWORD[((-8))+rsi]
+
+ lea rsp,[rsi]
+
+$L$add_affineq_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_ecp_nistz256_point_add_affine:
ALIGN 32
@@ -5282,15 +6565,24 @@ $L$SEH_begin_ecp_nistz256_point_doublex:
mov rsi,rdx
+
$L$point_doublex:
push rbp
+
push rbx
+
push r12
+
push r13
+
push r14
+
push r15
+
sub rsp,32*5+8
+$L$point_doublex_body:
+
$L$point_double_shortcutx:
movdqu xmm0,XMMWORD[rsi]
mov rbx,rsi
@@ -5472,16 +6764,27 @@ DB 102,72,15,126,203
DB 102,72,15,126,207
call __ecp_nistz256_sub_fromx
- add rsp,32*5+8
- pop r15
- pop r14
- pop r13
- pop r12
- pop rbx
- pop rbp
+ lea rsi,[((160+56))+rsp]
+
+ mov r15,QWORD[((-48))+rsi]
+
+ mov r14,QWORD[((-40))+rsi]
+
+ mov r13,QWORD[((-32))+rsi]
+
+ mov r12,QWORD[((-24))+rsi]
+
+ mov rbx,QWORD[((-16))+rsi]
+
+ mov rbp,QWORD[((-8))+rsi]
+
+ lea rsp,[rsi]
+
+$L$point_doublex_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_ecp_nistz256_point_doublex:
ALIGN 32
@@ -5495,15 +6798,24 @@ $L$SEH_begin_ecp_nistz256_point_addx:
mov rdx,r8
+
$L$point_addx:
push rbp
+
push rbx
+
push r12
+
push r13
+
push r14
+
push r15
+
sub rsp,32*18+8
+$L$point_addx_body:
+
movdqu xmm0,XMMWORD[rsi]
movdqu xmm1,XMMWORD[16+rsi]
movdqu xmm2,XMMWORD[32+rsi]
@@ -5879,16 +7191,27 @@ DB 102,72,15,126,199
movdqu XMMWORD[48+rdi],xmm3
$L$add_donex:
- add rsp,32*18+8
- pop r15
- pop r14
- pop r13
- pop r12
- pop rbx
- pop rbp
+ lea rsi,[((576+56))+rsp]
+
+ mov r15,QWORD[((-48))+rsi]
+
+ mov r14,QWORD[((-40))+rsi]
+
+ mov r13,QWORD[((-32))+rsi]
+
+ mov r12,QWORD[((-24))+rsi]
+
+ mov rbx,QWORD[((-16))+rsi]
+
+ mov rbp,QWORD[((-8))+rsi]
+
+ lea rsp,[rsi]
+
+$L$point_addx_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_ecp_nistz256_point_addx:
ALIGN 32
@@ -5902,15 +7225,24 @@ $L$SEH_begin_ecp_nistz256_point_add_affinex:
mov rdx,r8
+
$L$point_add_affinex:
push rbp
+
push rbx
+
push r12
+
push r13
+
push r14
+
push r15
+
sub rsp,32*15+8
+$L$add_affinex_body:
+
movdqu xmm0,XMMWORD[rsi]
mov rbx,rdx
movdqu xmm1,XMMWORD[16+rsi]
@@ -6192,14 +7524,375 @@ DB 102,72,15,126,199
movdqu XMMWORD[32+rdi],xmm2
movdqu XMMWORD[48+rdi],xmm3
- add rsp,32*15+8
+ lea rsi,[((480+56))+rsp]
+
+ mov r15,QWORD[((-48))+rsi]
+
+ mov r14,QWORD[((-40))+rsi]
+
+ mov r13,QWORD[((-32))+rsi]
+
+ mov r12,QWORD[((-24))+rsi]
+
+ mov rbx,QWORD[((-16))+rsi]
+
+ mov rbp,QWORD[((-8))+rsi]
+
+ lea rsp,[rsi]
+
+$L$add_affinex_epilogue:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_ecp_nistz256_point_add_affinex:
+EXTERN __imp_RtlVirtualUnwind
+
+
+ALIGN 16
+short_handler:
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD[120+r8]
+ mov rbx,QWORD[248+r8]
+
+ mov rsi,QWORD[8+r9]
+ mov r11,QWORD[56+r9]
+
+ mov r10d,DWORD[r11]
+ lea r10,[r10*1+rsi]
+ cmp rbx,r10
+ jb NEAR $L$common_seh_tail
+
+ mov rax,QWORD[152+r8]
+
+ mov r10d,DWORD[4+r11]
+ lea r10,[r10*1+rsi]
+ cmp rbx,r10
+ jae NEAR $L$common_seh_tail
+
+ lea rax,[16+rax]
+
+ mov r12,QWORD[((-8))+rax]
+ mov r13,QWORD[((-16))+rax]
+ mov QWORD[216+r8],r12
+ mov QWORD[224+r8],r13
+
+ jmp NEAR $L$common_seh_tail
+
+
+
+ALIGN 16
+full_handler:
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD[120+r8]
+ mov rbx,QWORD[248+r8]
+
+ mov rsi,QWORD[8+r9]
+ mov r11,QWORD[56+r9]
+
+ mov r10d,DWORD[r11]
+ lea r10,[r10*1+rsi]
+ cmp rbx,r10
+ jb NEAR $L$common_seh_tail
+
+ mov rax,QWORD[152+r8]
+
+ mov r10d,DWORD[4+r11]
+ lea r10,[r10*1+rsi]
+ cmp rbx,r10
+ jae NEAR $L$common_seh_tail
+
+ mov r10d,DWORD[8+r11]
+ lea rax,[r10*1+rax]
+
+ mov rbp,QWORD[((-8))+rax]
+ mov rbx,QWORD[((-16))+rax]
+ mov r12,QWORD[((-24))+rax]
+ mov r13,QWORD[((-32))+rax]
+ mov r14,QWORD[((-40))+rax]
+ mov r15,QWORD[((-48))+rax]
+ mov QWORD[144+r8],rbx
+ mov QWORD[160+r8],rbp
+ mov QWORD[216+r8],r12
+ mov QWORD[224+r8],r13
+ mov QWORD[232+r8],r14
+ mov QWORD[240+r8],r15
+
+$L$common_seh_tail:
+ mov rdi,QWORD[8+rax]
+ mov rsi,QWORD[16+rax]
+ mov QWORD[152+r8],rax
+ mov QWORD[168+r8],rsi
+ mov QWORD[176+r8],rdi
+
+ mov rdi,QWORD[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0xa548f3fc
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD[8+rsi]
+ mov r8,QWORD[rsi]
+ mov r9,QWORD[16+rsi]
+ mov r10,QWORD[40+rsi]
+ lea r11,[56+rsi]
+ lea r12,[24+rsi]
+ mov QWORD[32+rsp],r10
+ mov QWORD[40+rsp],r11
+ mov QWORD[48+rsp],r12
+ mov QWORD[56+rsp],rcx
+ call QWORD[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
pop r15
pop r14
pop r13
pop r12
- pop rbx
pop rbp
- mov rdi,QWORD[8+rsp] ;WIN64 epilogue
- mov rsi,QWORD[16+rsp]
+ pop rbx
+ pop rdi
+ pop rsi
DB 0F3h,0C3h ;repret
-$L$SEH_end_ecp_nistz256_point_add_affinex:
+
+
+section .pdata rdata align=4
+ALIGN 4
+ DD $L$SEH_begin_ecp_nistz256_mul_by_2 wrt ..imagebase
+ DD $L$SEH_end_ecp_nistz256_mul_by_2 wrt ..imagebase
+ DD $L$SEH_info_ecp_nistz256_mul_by_2 wrt ..imagebase
+
+ DD $L$SEH_begin_ecp_nistz256_div_by_2 wrt ..imagebase
+ DD $L$SEH_end_ecp_nistz256_div_by_2 wrt ..imagebase
+ DD $L$SEH_info_ecp_nistz256_div_by_2 wrt ..imagebase
+
+ DD $L$SEH_begin_ecp_nistz256_mul_by_3 wrt ..imagebase
+ DD $L$SEH_end_ecp_nistz256_mul_by_3 wrt ..imagebase
+ DD $L$SEH_info_ecp_nistz256_mul_by_3 wrt ..imagebase
+
+ DD $L$SEH_begin_ecp_nistz256_add wrt ..imagebase
+ DD $L$SEH_end_ecp_nistz256_add wrt ..imagebase
+ DD $L$SEH_info_ecp_nistz256_add wrt ..imagebase
+
+ DD $L$SEH_begin_ecp_nistz256_sub wrt ..imagebase
+ DD $L$SEH_end_ecp_nistz256_sub wrt ..imagebase
+ DD $L$SEH_info_ecp_nistz256_sub wrt ..imagebase
+
+ DD $L$SEH_begin_ecp_nistz256_neg wrt ..imagebase
+ DD $L$SEH_end_ecp_nistz256_neg wrt ..imagebase
+ DD $L$SEH_info_ecp_nistz256_neg wrt ..imagebase
+
+ DD $L$SEH_begin_ecp_nistz256_ord_mul_mont wrt ..imagebase
+ DD $L$SEH_end_ecp_nistz256_ord_mul_mont wrt ..imagebase
+ DD $L$SEH_info_ecp_nistz256_ord_mul_mont wrt ..imagebase
+
+ DD $L$SEH_begin_ecp_nistz256_ord_sqr_mont wrt ..imagebase
+ DD $L$SEH_end_ecp_nistz256_ord_sqr_mont wrt ..imagebase
+ DD $L$SEH_info_ecp_nistz256_ord_sqr_mont wrt ..imagebase
+ DD $L$SEH_begin_ecp_nistz256_ord_mul_montx wrt ..imagebase
+ DD $L$SEH_end_ecp_nistz256_ord_mul_montx wrt ..imagebase
+ DD $L$SEH_info_ecp_nistz256_ord_mul_montx wrt ..imagebase
+
+ DD $L$SEH_begin_ecp_nistz256_ord_sqr_montx wrt ..imagebase
+ DD $L$SEH_end_ecp_nistz256_ord_sqr_montx wrt ..imagebase
+ DD $L$SEH_info_ecp_nistz256_ord_sqr_montx wrt ..imagebase
+ DD $L$SEH_begin_ecp_nistz256_to_mont wrt ..imagebase
+ DD $L$SEH_end_ecp_nistz256_to_mont wrt ..imagebase
+ DD $L$SEH_info_ecp_nistz256_to_mont wrt ..imagebase
+
+ DD $L$SEH_begin_ecp_nistz256_mul_mont wrt ..imagebase
+ DD $L$SEH_end_ecp_nistz256_mul_mont wrt ..imagebase
+ DD $L$SEH_info_ecp_nistz256_mul_mont wrt ..imagebase
+
+ DD $L$SEH_begin_ecp_nistz256_sqr_mont wrt ..imagebase
+ DD $L$SEH_end_ecp_nistz256_sqr_mont wrt ..imagebase
+ DD $L$SEH_info_ecp_nistz256_sqr_mont wrt ..imagebase
+
+ DD $L$SEH_begin_ecp_nistz256_from_mont wrt ..imagebase
+ DD $L$SEH_end_ecp_nistz256_from_mont wrt ..imagebase
+ DD $L$SEH_info_ecp_nistz256_from_mont wrt ..imagebase
+
+ DD $L$SEH_begin_ecp_nistz256_gather_w5 wrt ..imagebase
+ DD $L$SEH_end_ecp_nistz256_gather_w5 wrt ..imagebase
+ DD $L$SEH_info_ecp_nistz256_gather_wX wrt ..imagebase
+
+ DD $L$SEH_begin_ecp_nistz256_gather_w7 wrt ..imagebase
+ DD $L$SEH_end_ecp_nistz256_gather_w7 wrt ..imagebase
+ DD $L$SEH_info_ecp_nistz256_gather_wX wrt ..imagebase
+ DD $L$SEH_begin_ecp_nistz256_avx2_gather_w5 wrt ..imagebase
+ DD $L$SEH_end_ecp_nistz256_avx2_gather_w5 wrt ..imagebase
+ DD $L$SEH_info_ecp_nistz256_avx2_gather_wX wrt ..imagebase
+
+ DD $L$SEH_begin_ecp_nistz256_avx2_gather_w7 wrt ..imagebase
+ DD $L$SEH_end_ecp_nistz256_avx2_gather_w7 wrt ..imagebase
+ DD $L$SEH_info_ecp_nistz256_avx2_gather_wX wrt ..imagebase
+ DD $L$SEH_begin_ecp_nistz256_point_double wrt ..imagebase
+ DD $L$SEH_end_ecp_nistz256_point_double wrt ..imagebase
+ DD $L$SEH_info_ecp_nistz256_point_double wrt ..imagebase
+
+ DD $L$SEH_begin_ecp_nistz256_point_add wrt ..imagebase
+ DD $L$SEH_end_ecp_nistz256_point_add wrt ..imagebase
+ DD $L$SEH_info_ecp_nistz256_point_add wrt ..imagebase
+
+ DD $L$SEH_begin_ecp_nistz256_point_add_affine wrt ..imagebase
+ DD $L$SEH_end_ecp_nistz256_point_add_affine wrt ..imagebase
+ DD $L$SEH_info_ecp_nistz256_point_add_affine wrt ..imagebase
+ DD $L$SEH_begin_ecp_nistz256_point_doublex wrt ..imagebase
+ DD $L$SEH_end_ecp_nistz256_point_doublex wrt ..imagebase
+ DD $L$SEH_info_ecp_nistz256_point_doublex wrt ..imagebase
+
+ DD $L$SEH_begin_ecp_nistz256_point_addx wrt ..imagebase
+ DD $L$SEH_end_ecp_nistz256_point_addx wrt ..imagebase
+ DD $L$SEH_info_ecp_nistz256_point_addx wrt ..imagebase
+
+ DD $L$SEH_begin_ecp_nistz256_point_add_affinex wrt ..imagebase
+ DD $L$SEH_end_ecp_nistz256_point_add_affinex wrt ..imagebase
+ DD $L$SEH_info_ecp_nistz256_point_add_affinex wrt ..imagebase
+
+section .xdata rdata align=8
+ALIGN 8
+$L$SEH_info_ecp_nistz256_mul_by_2:
+DB 9,0,0,0
+ DD short_handler wrt ..imagebase
+ DD $L$mul_by_2_body wrt ..imagebase,$L$mul_by_2_epilogue wrt ..imagebase
+$L$SEH_info_ecp_nistz256_div_by_2:
+DB 9,0,0,0
+ DD short_handler wrt ..imagebase
+ DD $L$div_by_2_body wrt ..imagebase,$L$div_by_2_epilogue wrt ..imagebase
+$L$SEH_info_ecp_nistz256_mul_by_3:
+DB 9,0,0,0
+ DD short_handler wrt ..imagebase
+ DD $L$mul_by_3_body wrt ..imagebase,$L$mul_by_3_epilogue wrt ..imagebase
+$L$SEH_info_ecp_nistz256_add:
+DB 9,0,0,0
+ DD short_handler wrt ..imagebase
+ DD $L$add_body wrt ..imagebase,$L$add_epilogue wrt ..imagebase
+$L$SEH_info_ecp_nistz256_sub:
+DB 9,0,0,0
+ DD short_handler wrt ..imagebase
+ DD $L$sub_body wrt ..imagebase,$L$sub_epilogue wrt ..imagebase
+$L$SEH_info_ecp_nistz256_neg:
+DB 9,0,0,0
+ DD short_handler wrt ..imagebase
+ DD $L$neg_body wrt ..imagebase,$L$neg_epilogue wrt ..imagebase
+$L$SEH_info_ecp_nistz256_ord_mul_mont:
+DB 9,0,0,0
+ DD full_handler wrt ..imagebase
+ DD $L$ord_mul_body wrt ..imagebase,$L$ord_mul_epilogue wrt ..imagebase
+ DD 48,0
+$L$SEH_info_ecp_nistz256_ord_sqr_mont:
+DB 9,0,0,0
+ DD full_handler wrt ..imagebase
+ DD $L$ord_sqr_body wrt ..imagebase,$L$ord_sqr_epilogue wrt ..imagebase
+ DD 48,0
+$L$SEH_info_ecp_nistz256_ord_mul_montx:
+DB 9,0,0,0
+ DD full_handler wrt ..imagebase
+ DD $L$ord_mulx_body wrt ..imagebase,$L$ord_mulx_epilogue wrt ..imagebase
+ DD 48,0
+$L$SEH_info_ecp_nistz256_ord_sqr_montx:
+DB 9,0,0,0
+ DD full_handler wrt ..imagebase
+ DD $L$ord_sqrx_body wrt ..imagebase,$L$ord_sqrx_epilogue wrt ..imagebase
+ DD 48,0
+$L$SEH_info_ecp_nistz256_to_mont:
+DB 9,0,0,0
+ DD full_handler wrt ..imagebase
+ DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase
+ DD 48,0
+$L$SEH_info_ecp_nistz256_mul_mont:
+DB 9,0,0,0
+ DD full_handler wrt ..imagebase
+ DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase
+ DD 48,0
+$L$SEH_info_ecp_nistz256_sqr_mont:
+DB 9,0,0,0
+ DD full_handler wrt ..imagebase
+ DD $L$sqr_body wrt ..imagebase,$L$sqr_epilogue wrt ..imagebase
+ DD 48,0
+$L$SEH_info_ecp_nistz256_from_mont:
+DB 9,0,0,0
+ DD short_handler wrt ..imagebase
+ DD $L$from_body wrt ..imagebase,$L$from_epilogue wrt ..imagebase
+$L$SEH_info_ecp_nistz256_gather_wX:
+DB 0x01,0x33,0x16,0x00
+DB 0x33,0xf8,0x09,0x00
+DB 0x2e,0xe8,0x08,0x00
+DB 0x29,0xd8,0x07,0x00
+DB 0x24,0xc8,0x06,0x00
+DB 0x1f,0xb8,0x05,0x00
+DB 0x1a,0xa8,0x04,0x00
+DB 0x15,0x98,0x03,0x00
+DB 0x10,0x88,0x02,0x00
+DB 0x0c,0x78,0x01,0x00
+DB 0x08,0x68,0x00,0x00
+DB 0x04,0x01,0x15,0x00
+ALIGN 8
+$L$SEH_info_ecp_nistz256_avx2_gather_wX:
+DB 0x01,0x36,0x17,0x0b
+DB 0x36,0xf8,0x09,0x00
+DB 0x31,0xe8,0x08,0x00
+DB 0x2c,0xd8,0x07,0x00
+DB 0x27,0xc8,0x06,0x00
+DB 0x22,0xb8,0x05,0x00
+DB 0x1d,0xa8,0x04,0x00
+DB 0x18,0x98,0x03,0x00
+DB 0x13,0x88,0x02,0x00
+DB 0x0e,0x78,0x01,0x00
+DB 0x09,0x68,0x00,0x00
+DB 0x04,0x01,0x15,0x00
+DB 0x00,0xb3,0x00,0x00
+ALIGN 8
+$L$SEH_info_ecp_nistz256_point_double:
+DB 9,0,0,0
+ DD full_handler wrt ..imagebase
+ DD $L$point_doubleq_body wrt ..imagebase,$L$point_doubleq_epilogue wrt ..imagebase
+ DD 32*5+56,0
+$L$SEH_info_ecp_nistz256_point_add:
+DB 9,0,0,0
+ DD full_handler wrt ..imagebase
+ DD $L$point_addq_body wrt ..imagebase,$L$point_addq_epilogue wrt ..imagebase
+ DD 32*18+56,0
+$L$SEH_info_ecp_nistz256_point_add_affine:
+DB 9,0,0,0
+ DD full_handler wrt ..imagebase
+ DD $L$add_affineq_body wrt ..imagebase,$L$add_affineq_epilogue wrt ..imagebase
+ DD 32*15+56,0
+ALIGN 8
+$L$SEH_info_ecp_nistz256_point_doublex:
+DB 9,0,0,0
+ DD full_handler wrt ..imagebase
+ DD $L$point_doublex_body wrt ..imagebase,$L$point_doublex_epilogue wrt ..imagebase
+ DD 32*5+56,0
+$L$SEH_info_ecp_nistz256_point_addx:
+DB 9,0,0,0
+ DD full_handler wrt ..imagebase
+ DD $L$point_addx_body wrt ..imagebase,$L$point_addx_epilogue wrt ..imagebase
+ DD 32*18+56,0
+$L$SEH_info_ecp_nistz256_point_add_affinex:
+DB 9,0,0,0
+ DD full_handler wrt ..imagebase
+ DD $L$add_affinex_body wrt ..imagebase,$L$add_affinex_epilogue wrt ..imagebase
+ DD 32*15+56,0
diff --git a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/ec/x25519-x86_64.asm b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/ec/x25519-x86_64.asm
new file mode 100644
index 0000000000..84d55134ac
--- /dev/null
+++ b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/ec/x25519-x86_64.asm
@@ -0,0 +1,1054 @@
+default rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+section .text code align=64
+
+
+global x25519_fe51_mul
+
+ALIGN 32
+x25519_fe51_mul:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_x25519_fe51_mul:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+
+ push rbp
+
+ push rbx
+
+ push r12
+
+ push r13
+
+ push r14
+
+ push r15
+
+ lea rsp,[((-40))+rsp]
+
+$L$fe51_mul_body:
+
+ mov rax,QWORD[rsi]
+ mov r11,QWORD[rdx]
+ mov r12,QWORD[8+rdx]
+ mov r13,QWORD[16+rdx]
+ mov rbp,QWORD[24+rdx]
+ mov r14,QWORD[32+rdx]
+
+ mov QWORD[32+rsp],rdi
+ mov rdi,rax
+ mul r11
+ mov QWORD[rsp],r11
+ mov rbx,rax
+ mov rax,rdi
+ mov rcx,rdx
+ mul r12
+ mov QWORD[8+rsp],r12
+ mov r8,rax
+ mov rax,rdi
+ lea r15,[r14*8+r14]
+ mov r9,rdx
+ mul r13
+ mov QWORD[16+rsp],r13
+ mov r10,rax
+ mov rax,rdi
+ lea rdi,[r15*2+r14]
+ mov r11,rdx
+ mul rbp
+ mov r12,rax
+ mov rax,QWORD[rsi]
+ mov r13,rdx
+ mul r14
+ mov r14,rax
+ mov rax,QWORD[8+rsi]
+ mov r15,rdx
+
+ mul rdi
+ add rbx,rax
+ mov rax,QWORD[16+rsi]
+ adc rcx,rdx
+ mul rdi
+ add r8,rax
+ mov rax,QWORD[24+rsi]
+ adc r9,rdx
+ mul rdi
+ add r10,rax
+ mov rax,QWORD[32+rsi]
+ adc r11,rdx
+ mul rdi
+ imul rdi,rbp,19
+ add r12,rax
+ mov rax,QWORD[8+rsi]
+ adc r13,rdx
+ mul rbp
+ mov rbp,QWORD[16+rsp]
+ add r14,rax
+ mov rax,QWORD[16+rsi]
+ adc r15,rdx
+
+ mul rdi
+ add rbx,rax
+ mov rax,QWORD[24+rsi]
+ adc rcx,rdx
+ mul rdi
+ add r8,rax
+ mov rax,QWORD[32+rsi]
+ adc r9,rdx
+ mul rdi
+ imul rdi,rbp,19
+ add r10,rax
+ mov rax,QWORD[8+rsi]
+ adc r11,rdx
+ mul rbp
+ add r12,rax
+ mov rax,QWORD[16+rsi]
+ adc r13,rdx
+ mul rbp
+ mov rbp,QWORD[8+rsp]
+ add r14,rax
+ mov rax,QWORD[24+rsi]
+ adc r15,rdx
+
+ mul rdi
+ add rbx,rax
+ mov rax,QWORD[32+rsi]
+ adc rcx,rdx
+ mul rdi
+ add r8,rax
+ mov rax,QWORD[8+rsi]
+ adc r9,rdx
+ mul rbp
+ imul rdi,rbp,19
+ add r10,rax
+ mov rax,QWORD[16+rsi]
+ adc r11,rdx
+ mul rbp
+ add r12,rax
+ mov rax,QWORD[24+rsi]
+ adc r13,rdx
+ mul rbp
+ mov rbp,QWORD[rsp]
+ add r14,rax
+ mov rax,QWORD[32+rsi]
+ adc r15,rdx
+
+ mul rdi
+ add rbx,rax
+ mov rax,QWORD[8+rsi]
+ adc rcx,rdx
+ mul rbp
+ add r8,rax
+ mov rax,QWORD[16+rsi]
+ adc r9,rdx
+ mul rbp
+ add r10,rax
+ mov rax,QWORD[24+rsi]
+ adc r11,rdx
+ mul rbp
+ add r12,rax
+ mov rax,QWORD[32+rsi]
+ adc r13,rdx
+ mul rbp
+ add r14,rax
+ adc r15,rdx
+
+ mov rdi,QWORD[32+rsp]
+ jmp NEAR $L$reduce51
+$L$fe51_mul_epilogue:
+
+$L$SEH_end_x25519_fe51_mul:
+
+global x25519_fe51_sqr
+
+ALIGN 32
+x25519_fe51_sqr:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_x25519_fe51_sqr:
+ mov rdi,rcx
+ mov rsi,rdx
+
+
+
+ push rbp
+
+ push rbx
+
+ push r12
+
+ push r13
+
+ push r14
+
+ push r15
+
+ lea rsp,[((-40))+rsp]
+
+$L$fe51_sqr_body:
+
+ mov rax,QWORD[rsi]
+ mov r15,QWORD[16+rsi]
+ mov rbp,QWORD[32+rsi]
+
+ mov QWORD[32+rsp],rdi
+ lea r14,[rax*1+rax]
+ mul rax
+ mov rbx,rax
+ mov rax,QWORD[8+rsi]
+ mov rcx,rdx
+ mul r14
+ mov r8,rax
+ mov rax,r15
+ mov QWORD[rsp],r15
+ mov r9,rdx
+ mul r14
+ mov r10,rax
+ mov rax,QWORD[24+rsi]
+ mov r11,rdx
+ imul rdi,rbp,19
+ mul r14
+ mov r12,rax
+ mov rax,rbp
+ mov r13,rdx
+ mul r14
+ mov r14,rax
+ mov rax,rbp
+ mov r15,rdx
+
+ mul rdi
+ add r12,rax
+ mov rax,QWORD[8+rsi]
+ adc r13,rdx
+
+ mov rsi,QWORD[24+rsi]
+ lea rbp,[rax*1+rax]
+ mul rax
+ add r10,rax
+ mov rax,QWORD[rsp]
+ adc r11,rdx
+ mul rbp
+ add r12,rax
+ mov rax,rbp
+ adc r13,rdx
+ mul rsi
+ add r14,rax
+ mov rax,rbp
+ adc r15,rdx
+ imul rbp,rsi,19
+ mul rdi
+ add rbx,rax
+ lea rax,[rsi*1+rsi]
+ adc rcx,rdx
+
+ mul rdi
+ add r10,rax
+ mov rax,rsi
+ adc r11,rdx
+ mul rbp
+ add r8,rax
+ mov rax,QWORD[rsp]
+ adc r9,rdx
+
+ lea rsi,[rax*1+rax]
+ mul rax
+ add r14,rax
+ mov rax,rbp
+ adc r15,rdx
+ mul rsi
+ add rbx,rax
+ mov rax,rsi
+ adc rcx,rdx
+ mul rdi
+ add r8,rax
+ adc r9,rdx
+
+ mov rdi,QWORD[32+rsp]
+ jmp NEAR $L$reduce51
+
+ALIGN 32
+$L$reduce51:
+ mov rbp,0x7ffffffffffff
+
+ mov rdx,r10
+ shr r10,51
+ shl r11,13
+ and rdx,rbp
+ or r11,r10
+ add r12,r11
+ adc r13,0
+
+ mov rax,rbx
+ shr rbx,51
+ shl rcx,13
+ and rax,rbp
+ or rcx,rbx
+ add r8,rcx
+ adc r9,0
+
+ mov rbx,r12
+ shr r12,51
+ shl r13,13
+ and rbx,rbp
+ or r13,r12
+ add r14,r13
+ adc r15,0
+
+ mov rcx,r8
+ shr r8,51
+ shl r9,13
+ and rcx,rbp
+ or r9,r8
+ add rdx,r9
+
+ mov r10,r14
+ shr r14,51
+ shl r15,13
+ and r10,rbp
+ or r15,r14
+
+ lea r14,[r15*8+r15]
+ lea r15,[r14*2+r15]
+ add rax,r15
+
+ mov r8,rdx
+ and rdx,rbp
+ shr r8,51
+ add rbx,r8
+
+ mov r9,rax
+ and rax,rbp
+ shr r9,51
+ add rcx,r9
+
+ mov QWORD[rdi],rax
+ mov QWORD[8+rdi],rcx
+ mov QWORD[16+rdi],rdx
+ mov QWORD[24+rdi],rbx
+ mov QWORD[32+rdi],r10
+
+ mov r15,QWORD[40+rsp]
+
+ mov r14,QWORD[48+rsp]
+
+ mov r13,QWORD[56+rsp]
+
+ mov r12,QWORD[64+rsp]
+
+ mov rbx,QWORD[72+rsp]
+
+ mov rbp,QWORD[80+rsp]
+
+ lea rsp,[88+rsp]
+
+$L$fe51_sqr_epilogue:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_x25519_fe51_sqr:
+
+global x25519_fe51_mul121666
+
+ALIGN 32
+x25519_fe51_mul121666:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_x25519_fe51_mul121666:
+ mov rdi,rcx
+ mov rsi,rdx
+
+
+
+ push rbp
+
+ push rbx
+
+ push r12
+
+ push r13
+
+ push r14
+
+ push r15
+
+ lea rsp,[((-40))+rsp]
+
+$L$fe51_mul121666_body:
+ mov eax,121666
+
+ mul QWORD[rsi]
+ mov rbx,rax
+ mov eax,121666
+ mov rcx,rdx
+ mul QWORD[8+rsi]
+ mov r8,rax
+ mov eax,121666
+ mov r9,rdx
+ mul QWORD[16+rsi]
+ mov r10,rax
+ mov eax,121666
+ mov r11,rdx
+ mul QWORD[24+rsi]
+ mov r12,rax
+ mov eax,121666
+ mov r13,rdx
+ mul QWORD[32+rsi]
+ mov r14,rax
+ mov r15,rdx
+
+ jmp NEAR $L$reduce51
+$L$fe51_mul121666_epilogue:
+
+$L$SEH_end_x25519_fe51_mul121666:
+EXTERN OPENSSL_ia32cap_P
+global x25519_fe64_eligible
+
+ALIGN 32
+x25519_fe64_eligible:
+ mov ecx,DWORD[((OPENSSL_ia32cap_P+8))]
+ xor eax,eax
+ and ecx,0x80100
+ cmp ecx,0x80100
+ cmove eax,ecx
+ DB 0F3h,0C3h ;repret
+
+
+global x25519_fe64_mul
+
+ALIGN 32
+x25519_fe64_mul:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_x25519_fe64_mul:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+
+ push rbp
+
+ push rbx
+
+ push r12
+
+ push r13
+
+ push r14
+
+ push r15
+
+ push rdi
+
+ lea rsp,[((-16))+rsp]
+
+$L$fe64_mul_body:
+
+ mov rax,rdx
+ mov rbp,QWORD[rdx]
+ mov rdx,QWORD[rsi]
+ mov rcx,QWORD[8+rax]
+ mov r14,QWORD[16+rax]
+ mov r15,QWORD[24+rax]
+
+ mulx rax,r8,rbp
+ xor edi,edi
+ mulx rbx,r9,rcx
+ adcx r9,rax
+ mulx rax,r10,r14
+ adcx r10,rbx
+ mulx r12,r11,r15
+ mov rdx,QWORD[8+rsi]
+ adcx r11,rax
+ mov QWORD[rsp],r14
+ adcx r12,rdi
+
+ mulx rbx,rax,rbp
+ adox r9,rax
+ adcx r10,rbx
+ mulx rbx,rax,rcx
+ adox r10,rax
+ adcx r11,rbx
+ mulx rbx,rax,r14
+ adox r11,rax
+ adcx r12,rbx
+ mulx r13,rax,r15
+ mov rdx,QWORD[16+rsi]
+ adox r12,rax
+ adcx r13,rdi
+ adox r13,rdi
+
+ mulx rbx,rax,rbp
+ adcx r10,rax
+ adox r11,rbx
+ mulx rbx,rax,rcx
+ adcx r11,rax
+ adox r12,rbx
+ mulx rbx,rax,r14
+ adcx r12,rax
+ adox r13,rbx
+ mulx r14,rax,r15
+ mov rdx,QWORD[24+rsi]
+ adcx r13,rax
+ adox r14,rdi
+ adcx r14,rdi
+
+ mulx rbx,rax,rbp
+ adox r11,rax
+ adcx r12,rbx
+ mulx rbx,rax,rcx
+ adox r12,rax
+ adcx r13,rbx
+ mulx rbx,rax,QWORD[rsp]
+ adox r13,rax
+ adcx r14,rbx
+ mulx r15,rax,r15
+ mov edx,38
+ adox r14,rax
+ adcx r15,rdi
+ adox r15,rdi
+
+ jmp NEAR $L$reduce64
+$L$fe64_mul_epilogue:
+
+$L$SEH_end_x25519_fe64_mul:
+
+global x25519_fe64_sqr
+
+ALIGN 32
+x25519_fe64_sqr:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_x25519_fe64_sqr:
+ mov rdi,rcx
+ mov rsi,rdx
+
+
+
+ push rbp
+
+ push rbx
+
+ push r12
+
+ push r13
+
+ push r14
+
+ push r15
+
+ push rdi
+
+ lea rsp,[((-16))+rsp]
+
+$L$fe64_sqr_body:
+
+ mov rdx,QWORD[rsi]
+ mov rcx,QWORD[8+rsi]
+ mov rbp,QWORD[16+rsi]
+ mov rsi,QWORD[24+rsi]
+
+
+ mulx r15,r8,rdx
+ mulx rax,r9,rcx
+ xor edi,edi
+ mulx rbx,r10,rbp
+ adcx r10,rax
+ mulx r12,r11,rsi
+ mov rdx,rcx
+ adcx r11,rbx
+ adcx r12,rdi
+
+
+ mulx rbx,rax,rbp
+ adox r11,rax
+ adcx r12,rbx
+ mulx r13,rax,rsi
+ mov rdx,rbp
+ adox r12,rax
+ adcx r13,rdi
+
+
+ mulx r14,rax,rsi
+ mov rdx,rcx
+ adox r13,rax
+ adcx r14,rdi
+ adox r14,rdi
+
+ adcx r9,r9
+ adox r9,r15
+ adcx r10,r10
+ mulx rbx,rax,rdx
+ mov rdx,rbp
+ adcx r11,r11
+ adox r10,rax
+ adcx r12,r12
+ adox r11,rbx
+ mulx rbx,rax,rdx
+ mov rdx,rsi
+ adcx r13,r13
+ adox r12,rax
+ adcx r14,r14
+ adox r13,rbx
+ mulx r15,rax,rdx
+ mov edx,38
+ adox r14,rax
+ adcx r15,rdi
+ adox r15,rdi
+ jmp NEAR $L$reduce64
+
+ALIGN 32
+$L$reduce64:
+ mulx rbx,rax,r12
+ adcx r8,rax
+ adox r9,rbx
+ mulx rbx,rax,r13
+ adcx r9,rax
+ adox r10,rbx
+ mulx rbx,rax,r14
+ adcx r10,rax
+ adox r11,rbx
+ mulx r12,rax,r15
+ adcx r11,rax
+ adox r12,rdi
+ adcx r12,rdi
+
+ mov rdi,QWORD[16+rsp]
+ imul r12,rdx
+
+ add r8,r12
+ adc r9,0
+ adc r10,0
+ adc r11,0
+
+ sbb rax,rax
+ and rax,38
+
+ add r8,rax
+ mov QWORD[8+rdi],r9
+ mov QWORD[16+rdi],r10
+ mov QWORD[24+rdi],r11
+ mov QWORD[rdi],r8
+
+ mov r15,QWORD[24+rsp]
+
+ mov r14,QWORD[32+rsp]
+
+ mov r13,QWORD[40+rsp]
+
+ mov r12,QWORD[48+rsp]
+
+ mov rbx,QWORD[56+rsp]
+
+ mov rbp,QWORD[64+rsp]
+
+ lea rsp,[72+rsp]
+
+$L$fe64_sqr_epilogue:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_x25519_fe64_sqr:
+
+global x25519_fe64_mul121666
+
+ALIGN 32
+x25519_fe64_mul121666:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_x25519_fe64_mul121666:
+ mov rdi,rcx
+ mov rsi,rdx
+
+
+$L$fe64_mul121666_body:
+ mov edx,121666
+ mulx rcx,r8,QWORD[rsi]
+ mulx rax,r9,QWORD[8+rsi]
+ add r9,rcx
+ mulx rcx,r10,QWORD[16+rsi]
+ adc r10,rax
+ mulx rax,r11,QWORD[24+rsi]
+ adc r11,rcx
+ adc rax,0
+
+ imul rax,rax,38
+
+ add r8,rax
+ adc r9,0
+ adc r10,0
+ adc r11,0
+
+ sbb rax,rax
+ and rax,38
+
+ add r8,rax
+ mov QWORD[8+rdi],r9
+ mov QWORD[16+rdi],r10
+ mov QWORD[24+rdi],r11
+ mov QWORD[rdi],r8
+
+$L$fe64_mul121666_epilogue:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_x25519_fe64_mul121666:
+
+global x25519_fe64_add
+
+ALIGN 32
+x25519_fe64_add:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_x25519_fe64_add:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+$L$fe64_add_body:
+ mov r8,QWORD[rsi]
+ mov r9,QWORD[8+rsi]
+ mov r10,QWORD[16+rsi]
+ mov r11,QWORD[24+rsi]
+
+ add r8,QWORD[rdx]
+ adc r9,QWORD[8+rdx]
+ adc r10,QWORD[16+rdx]
+ adc r11,QWORD[24+rdx]
+
+ sbb rax,rax
+ and rax,38
+
+ add r8,rax
+ adc r9,0
+ adc r10,0
+ mov QWORD[8+rdi],r9
+ adc r11,0
+ mov QWORD[16+rdi],r10
+ sbb rax,rax
+ mov QWORD[24+rdi],r11
+ and rax,38
+
+ add r8,rax
+ mov QWORD[rdi],r8
+
+$L$fe64_add_epilogue:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_x25519_fe64_add:
+
+global x25519_fe64_sub
+
+ALIGN 32
+x25519_fe64_sub:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_x25519_fe64_sub:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+$L$fe64_sub_body:
+ mov r8,QWORD[rsi]
+ mov r9,QWORD[8+rsi]
+ mov r10,QWORD[16+rsi]
+ mov r11,QWORD[24+rsi]
+
+ sub r8,QWORD[rdx]
+ sbb r9,QWORD[8+rdx]
+ sbb r10,QWORD[16+rdx]
+ sbb r11,QWORD[24+rdx]
+
+ sbb rax,rax
+ and rax,38
+
+ sub r8,rax
+ sbb r9,0
+ sbb r10,0
+ mov QWORD[8+rdi],r9
+ sbb r11,0
+ mov QWORD[16+rdi],r10
+ sbb rax,rax
+ mov QWORD[24+rdi],r11
+ and rax,38
+
+ sub r8,rax
+ mov QWORD[rdi],r8
+
+$L$fe64_sub_epilogue:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_x25519_fe64_sub:
+
+global x25519_fe64_tobytes
+
+ALIGN 32
+x25519_fe64_tobytes:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_x25519_fe64_tobytes:
+ mov rdi,rcx
+ mov rsi,rdx
+
+
+$L$fe64_to_body:
+ mov r8,QWORD[rsi]
+ mov r9,QWORD[8+rsi]
+ mov r10,QWORD[16+rsi]
+ mov r11,QWORD[24+rsi]
+
+
+ lea rax,[r11*1+r11]
+ sar r11,63
+ shr rax,1
+ and r11,19
+ add r11,19
+
+ add r8,r11
+ adc r9,0
+ adc r10,0
+ adc rax,0
+
+ lea r11,[rax*1+rax]
+ sar rax,63
+ shr r11,1
+ not rax
+ and rax,19
+
+ sub r8,rax
+ sbb r9,0
+ sbb r10,0
+ sbb r11,0
+
+ mov QWORD[rdi],r8
+ mov QWORD[8+rdi],r9
+ mov QWORD[16+rdi],r10
+ mov QWORD[24+rdi],r11
+
+$L$fe64_to_epilogue:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_x25519_fe64_tobytes:
+DB 88,50,53,53,49,57,32,112,114,105,109,105,116,105,118,101
+DB 115,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82
+DB 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
+DB 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+EXTERN __imp_RtlVirtualUnwind
+
+
+ALIGN 16
+short_handler:
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD[120+r8]
+ mov rbx,QWORD[248+r8]
+
+ mov rsi,QWORD[8+r9]
+ mov r11,QWORD[56+r9]
+
+ mov r10d,DWORD[r11]
+ lea r10,[r10*1+rsi]
+ cmp rbx,r10
+ jb NEAR $L$common_seh_tail
+
+ mov rax,QWORD[152+r8]
+ jmp NEAR $L$common_seh_tail
+
+
+
+ALIGN 16
+full_handler:
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD[120+r8]
+ mov rbx,QWORD[248+r8]
+
+ mov rsi,QWORD[8+r9]
+ mov r11,QWORD[56+r9]
+
+ mov r10d,DWORD[r11]
+ lea r10,[r10*1+rsi]
+ cmp rbx,r10
+ jb NEAR $L$common_seh_tail
+
+ mov rax,QWORD[152+r8]
+
+ mov r10d,DWORD[4+r11]
+ lea r10,[r10*1+rsi]
+ cmp rbx,r10
+ jae NEAR $L$common_seh_tail
+
+ mov r10d,DWORD[8+r11]
+ lea rax,[r10*1+rax]
+
+ mov rbp,QWORD[((-8))+rax]
+ mov rbx,QWORD[((-16))+rax]
+ mov r12,QWORD[((-24))+rax]
+ mov r13,QWORD[((-32))+rax]
+ mov r14,QWORD[((-40))+rax]
+ mov r15,QWORD[((-48))+rax]
+ mov QWORD[144+r8],rbx
+ mov QWORD[160+r8],rbp
+ mov QWORD[216+r8],r12
+ mov QWORD[224+r8],r13
+ mov QWORD[232+r8],r14
+ mov QWORD[240+r8],r15
+
+$L$common_seh_tail:
+ mov rdi,QWORD[8+rax]
+ mov rsi,QWORD[16+rax]
+ mov QWORD[152+r8],rax
+ mov QWORD[168+r8],rsi
+ mov QWORD[176+r8],rdi
+
+ mov rdi,QWORD[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0xa548f3fc
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD[8+rsi]
+ mov r8,QWORD[rsi]
+ mov r9,QWORD[16+rsi]
+ mov r10,QWORD[40+rsi]
+ lea r11,[56+rsi]
+ lea r12,[24+rsi]
+ mov QWORD[32+rsp],r10
+ mov QWORD[40+rsp],r11
+ mov QWORD[48+rsp],r12
+ mov QWORD[56+rsp],rcx
+ call QWORD[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+
+
+section .pdata rdata align=4
+ALIGN 4
+ DD $L$SEH_begin_x25519_fe51_mul wrt ..imagebase
+ DD $L$SEH_end_x25519_fe51_mul wrt ..imagebase
+ DD $L$SEH_info_x25519_fe51_mul wrt ..imagebase
+
+ DD $L$SEH_begin_x25519_fe51_sqr wrt ..imagebase
+ DD $L$SEH_end_x25519_fe51_sqr wrt ..imagebase
+ DD $L$SEH_info_x25519_fe51_sqr wrt ..imagebase
+
+ DD $L$SEH_begin_x25519_fe51_mul121666 wrt ..imagebase
+ DD $L$SEH_end_x25519_fe51_mul121666 wrt ..imagebase
+ DD $L$SEH_info_x25519_fe51_mul121666 wrt ..imagebase
+ DD $L$SEH_begin_x25519_fe64_mul wrt ..imagebase
+ DD $L$SEH_end_x25519_fe64_mul wrt ..imagebase
+ DD $L$SEH_info_x25519_fe64_mul wrt ..imagebase
+
+ DD $L$SEH_begin_x25519_fe64_sqr wrt ..imagebase
+ DD $L$SEH_end_x25519_fe64_sqr wrt ..imagebase
+ DD $L$SEH_info_x25519_fe64_sqr wrt ..imagebase
+
+ DD $L$SEH_begin_x25519_fe64_mul121666 wrt ..imagebase
+ DD $L$SEH_end_x25519_fe64_mul121666 wrt ..imagebase
+ DD $L$SEH_info_x25519_fe64_mul121666 wrt ..imagebase
+
+ DD $L$SEH_begin_x25519_fe64_add wrt ..imagebase
+ DD $L$SEH_end_x25519_fe64_add wrt ..imagebase
+ DD $L$SEH_info_x25519_fe64_add wrt ..imagebase
+
+ DD $L$SEH_begin_x25519_fe64_sub wrt ..imagebase
+ DD $L$SEH_end_x25519_fe64_sub wrt ..imagebase
+ DD $L$SEH_info_x25519_fe64_sub wrt ..imagebase
+
+ DD $L$SEH_begin_x25519_fe64_tobytes wrt ..imagebase
+ DD $L$SEH_end_x25519_fe64_tobytes wrt ..imagebase
+ DD $L$SEH_info_x25519_fe64_tobytes wrt ..imagebase
+section .xdata rdata align=8
+ALIGN 8
+$L$SEH_info_x25519_fe51_mul:
+DB 9,0,0,0
+ DD full_handler wrt ..imagebase
+ DD $L$fe51_mul_body wrt ..imagebase,$L$fe51_mul_epilogue wrt ..imagebase
+ DD 88,0
+$L$SEH_info_x25519_fe51_sqr:
+DB 9,0,0,0
+ DD full_handler wrt ..imagebase
+ DD $L$fe51_sqr_body wrt ..imagebase,$L$fe51_sqr_epilogue wrt ..imagebase
+ DD 88,0
+$L$SEH_info_x25519_fe51_mul121666:
+DB 9,0,0,0
+ DD full_handler wrt ..imagebase
+ DD $L$fe51_mul121666_body wrt ..imagebase,$L$fe51_mul121666_epilogue wrt ..imagebase
+ DD 88,0
+$L$SEH_info_x25519_fe64_mul:
+DB 9,0,0,0
+ DD full_handler wrt ..imagebase
+ DD $L$fe64_mul_body wrt ..imagebase,$L$fe64_mul_epilogue wrt ..imagebase
+ DD 72,0
+$L$SEH_info_x25519_fe64_sqr:
+DB 9,0,0,0
+ DD full_handler wrt ..imagebase
+ DD $L$fe64_sqr_body wrt ..imagebase,$L$fe64_sqr_epilogue wrt ..imagebase
+ DD 72,0
+$L$SEH_info_x25519_fe64_mul121666:
+DB 9,0,0,0
+ DD short_handler wrt ..imagebase
+ DD $L$fe64_mul121666_body wrt ..imagebase,$L$fe64_mul121666_epilogue wrt ..imagebase
+$L$SEH_info_x25519_fe64_add:
+DB 9,0,0,0
+ DD short_handler wrt ..imagebase
+ DD $L$fe64_add_body wrt ..imagebase,$L$fe64_add_epilogue wrt ..imagebase
+$L$SEH_info_x25519_fe64_sub:
+DB 9,0,0,0
+ DD short_handler wrt ..imagebase
+ DD $L$fe64_sub_body wrt ..imagebase,$L$fe64_sub_epilogue wrt ..imagebase
+$L$SEH_info_x25519_fe64_tobytes:
+DB 9,0,0,0
+ DD short_handler wrt ..imagebase
+ DD $L$fe64_to_body wrt ..imagebase,$L$fe64_to_epilogue wrt ..imagebase
diff --git a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/include/internal/dso_conf.h b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/include/internal/dso_conf.h
index 289768d956..dc8306eda3 100644
--- a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/include/internal/dso_conf.h
+++ b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/include/internal/dso_conf.h
@@ -1,7 +1,7 @@
/* WARNING: do not edit! */
/* Generated by makefile from crypto/include/internal/dso_conf.h.in */
/*
- * Copyright 2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2016-2018 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -12,5 +12,7 @@
#ifndef HEADER_DSO_CONF_H
# define HEADER_DSO_CONF_H
+# define DSO_WIN32
# define DSO_EXTENSION ".dll"
+
#endif
diff --git a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/md5/md5-x86_64.asm b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/md5/md5-x86_64.asm
index 8bb2cfb77f..3fd339153b 100644
--- a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/md5/md5-x86_64.asm
+++ b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/md5/md5-x86_64.asm
@@ -18,11 +18,17 @@ $L$SEH_begin_md5_block_asm_data_order:
mov rdx,r8
+
push rbp
+
push rbx
+
push r12
+
push r14
+
push r15
+
$L$prologue:
@@ -669,15 +675,22 @@ $L$end:
mov DWORD[12+rbp],edx
mov r15,QWORD[rsp]
+
mov r14,QWORD[8+rsp]
+
mov r12,QWORD[16+rsp]
+
mov rbx,QWORD[24+rsp]
+
mov rbp,QWORD[32+rsp]
+
add rsp,40
+
$L$epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_md5_block_asm_data_order:
EXTERN __imp_RtlVirtualUnwind
diff --git a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/modes/aesni-gcm-x86_64.asm b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/modes/aesni-gcm-x86_64.asm
index 741a9e4f3a..b1d8332457 100644
--- a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/modes/aesni-gcm-x86_64.asm
+++ b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/modes/aesni-gcm-x86_64.asm
@@ -36,23 +36,6 @@ $L$resume_ctr32:
vpxor xmm12,xmm12,xmm15
vmovups xmm2,XMMWORD[((16-128))+rcx]
vpclmulqdq xmm6,xmm7,xmm3,0x01
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
xor r12,r12
cmp r15,r14
@@ -349,20 +332,25 @@ $L$SEH_begin_aesni_gcm_decrypt:
mov r9,QWORD[48+rsp]
- xor r10,r10
-
-
+ xor r10,r10
cmp rdx,0x60
jb NEAR $L$gcm_dec_abort
lea rax,[rsp]
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
lea rsp,[((-168))+rsp]
movaps XMMWORD[(-216)+rax],xmm6
movaps XMMWORD[(-200)+rax],xmm7
@@ -403,15 +391,7 @@ $L$dec_no_key_aliasing:
vmovdqu xmm7,XMMWORD[80+rdi]
lea r14,[rdi]
vmovdqu xmm4,XMMWORD[64+rdi]
-
-
-
-
-
-
-
lea r15,[((-192))+rdx*1+rdi]
-
vmovdqu xmm5,XMMWORD[48+rdi]
shr rdx,4
xor r10,r10
@@ -454,17 +434,25 @@ $L$dec_no_key_aliasing:
movaps xmm14,XMMWORD[((-88))+rax]
movaps xmm15,XMMWORD[((-72))+rax]
mov r15,QWORD[((-48))+rax]
+
mov r14,QWORD[((-40))+rax]
+
mov r13,QWORD[((-32))+rax]
+
mov r12,QWORD[((-24))+rax]
+
mov rbp,QWORD[((-16))+rax]
+
mov rbx,QWORD[((-8))+rax]
+
lea rsp,[rax]
+
$L$gcm_dec_abort:
mov rax,r10
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_aesni_gcm_decrypt:
ALIGN 32
@@ -573,21 +561,25 @@ $L$SEH_begin_aesni_gcm_encrypt:
mov r9,QWORD[48+rsp]
- xor r10,r10
-
-
-
+ xor r10,r10
cmp rdx,0x60*3
jb NEAR $L$gcm_enc_abort
lea rax,[rsp]
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
lea rsp,[((-168))+rsp]
movaps XMMWORD[(-216)+rax],xmm6
movaps XMMWORD[(-200)+rax],xmm7
@@ -623,16 +615,7 @@ $L$gcm_enc_body:
$L$enc_no_key_aliasing:
lea r14,[rsi]
-
-
-
-
-
-
-
-
lea r15,[((-192))+rdx*1+rsi]
-
shr rdx,4
call _aesni_ctr32_6x
@@ -844,17 +827,25 @@ $L$enc_no_key_aliasing:
movaps xmm14,XMMWORD[((-88))+rax]
movaps xmm15,XMMWORD[((-72))+rax]
mov r15,QWORD[((-48))+rax]
+
mov r14,QWORD[((-40))+rax]
+
mov r13,QWORD[((-32))+rax]
+
mov r12,QWORD[((-24))+rax]
+
mov rbp,QWORD[((-16))+rax]
+
mov rbx,QWORD[((-8))+rax]
+
lea rsp,[rax]
+
$L$gcm_enc_abort:
mov rax,r10
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_aesni_gcm_encrypt:
ALIGN 64
$L$bswap_mask:
diff --git a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/modes/ghash-x86_64.asm b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/modes/ghash-x86_64.asm
index e5204bf81d..b227e2400e 100644
--- a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/modes/ghash-x86_64.asm
+++ b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/modes/ghash-x86_64.asm
@@ -18,9 +18,21 @@ $L$SEH_begin_gcm_gmult_4bit:
mov rsi,rdx
+
push rbx
+
push rbp
+
push r12
+
+ push r13
+
+ push r14
+
+ push r15
+
+ sub rsp,280
+
$L$gmult_prologue:
movzx r8,BYTE[15+rdi]
@@ -97,12 +109,17 @@ $L$break1:
mov QWORD[8+rdi],r8
mov QWORD[rdi],r9
- mov rbx,QWORD[16+rsp]
- lea rsp,[24+rsp]
+ lea rsi,[((280+48))+rsp]
+
+ mov rbx,QWORD[((-8))+rsi]
+
+ lea rsp,[rsi]
+
$L$gmult_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_gcm_gmult_4bit:
global gcm_ghash_4bit
@@ -118,13 +135,21 @@ $L$SEH_begin_gcm_ghash_4bit:
mov rcx,r9
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
sub rsp,280
+
$L$ghash_prologue:
mov r14,rdx
mov r15,rcx
@@ -669,18 +694,27 @@ $L$outer_loop:
mov QWORD[8+rdi],r8
mov QWORD[rdi],r9
- lea rsi,[280+rsp]
- mov r15,QWORD[rsi]
- mov r14,QWORD[8+rsi]
- mov r13,QWORD[16+rsi]
- mov r12,QWORD[24+rsi]
- mov rbp,QWORD[32+rsi]
- mov rbx,QWORD[40+rsi]
- lea rsp,[48+rsi]
+ lea rsi,[((280+48))+rsp]
+
+ mov r15,QWORD[((-48))+rsi]
+
+ mov r14,QWORD[((-40))+rsi]
+
+ mov r13,QWORD[((-32))+rsi]
+
+ mov r12,QWORD[((-24))+rsi]
+
+ mov rbp,QWORD[((-16))+rsi]
+
+ mov rbx,QWORD[((-8))+rsi]
+
+ lea rsp,[rsi]
+
$L$ghash_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_gcm_ghash_4bit:
global gcm_init_clmul
@@ -1916,14 +1950,20 @@ se_handler:
cmp rbx,r10
jae NEAR $L$in_prologue
- lea rax,[24+rax]
+ lea rax,[((48+280))+rax]
mov rbx,QWORD[((-8))+rax]
mov rbp,QWORD[((-16))+rax]
mov r12,QWORD[((-24))+rax]
+ mov r13,QWORD[((-32))+rax]
+ mov r14,QWORD[((-40))+rax]
+ mov r15,QWORD[((-48))+rax]
mov QWORD[144+r8],rbx
mov QWORD[160+r8],rbp
mov QWORD[216+r8],r12
+ mov QWORD[224+r8],r13
+ mov QWORD[232+r8],r14
+ mov QWORD[240+r8],r15
$L$in_prologue:
mov rdi,QWORD[8+rax]
diff --git a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/poly1305/poly1305-x86_64.asm b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/poly1305/poly1305-x86_64.asm
index 15fde3cba6..5717654508 100644
--- a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/poly1305/poly1305-x86_64.asm
+++ b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/poly1305/poly1305-x86_64.asm
@@ -45,6 +45,11 @@ $L$SEH_begin_poly1305_init:
lea rax,[poly1305_blocks_avx2]
bt r9,37
cmovc r10,rax
+ mov rax,2149646336
+ shr r9,32
+ and r9,rax
+ cmp r9,rax
+ je NEAR $L$init_base2_44
mov rax,0x0ffffffc0fffffff
mov rcx,0x0ffffffc0ffffffc
and rax,QWORD[rsi]
@@ -73,16 +78,23 @@ $L$SEH_begin_poly1305_blocks:
mov rcx,r9
+
$L$blocks:
shr rdx,4
jz NEAR $L$no_data
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
$L$blocks_body:
mov r15,rdx
@@ -153,17 +165,25 @@ $L$oop:
mov QWORD[16+rdi],rbp
mov r15,QWORD[rsp]
+
mov r14,QWORD[8+rsp]
+
mov r13,QWORD[16+rsp]
+
mov r12,QWORD[24+rsp]
+
mov rbp,QWORD[32+rsp]
+
mov rbx,QWORD[40+rsp]
+
lea rsp,[48+rsp]
+
$L$no_data:
$L$blocks_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_poly1305_blocks:
@@ -420,6 +440,7 @@ $L$SEH_begin_poly1305_blocks_avx:
mov rcx,r9
+
mov r8d,DWORD[20+rdi]
cmp rdx,128
jae NEAR $L$blocks_avx
@@ -439,11 +460,17 @@ $L$blocks_avx:
jz NEAR $L$even_avx
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
$L$blocks_avx_body:
mov r15,rdx
@@ -546,26 +573,41 @@ $L$store_base2_26_avx:
ALIGN 16
$L$done_avx:
mov r15,QWORD[rsp]
+
mov r14,QWORD[8+rsp]
+
mov r13,QWORD[16+rsp]
+
mov r12,QWORD[24+rsp]
+
mov rbp,QWORD[32+rsp]
+
mov rbx,QWORD[40+rsp]
+
lea rsp,[48+rsp]
+
$L$no_data_avx:
$L$blocks_avx_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
ALIGN 32
$L$base2_64_avx:
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
$L$base2_64_avx_body:
mov r15,rdx
@@ -625,18 +667,27 @@ $L$proceed_avx:
mov rdx,r15
mov r15,QWORD[rsp]
+
mov r14,QWORD[8+rsp]
+
mov r13,QWORD[16+rsp]
+
mov r12,QWORD[24+rsp]
+
mov rbp,QWORD[32+rsp]
+
mov rbx,QWORD[40+rsp]
+
lea rax,[48+rsp]
lea rsp,[48+rsp]
+
$L$base2_64_avx_epilogue:
jmp NEAR $L$do_avx
+
ALIGN 32
$L$even_avx:
+
vmovd xmm0,DWORD[rdi]
vmovd xmm1,DWORD[4+rdi]
vmovd xmm2,DWORD[8+rdi]
@@ -1230,6 +1281,7 @@ $L$do_avx_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_poly1305_blocks_avx:
@@ -1310,6 +1362,7 @@ $L$SEH_begin_poly1305_blocks_avx2:
mov rcx,r9
+
mov r8d,DWORD[20+rdi]
cmp rdx,128
jae NEAR $L$blocks_avx2
@@ -1329,11 +1382,17 @@ $L$blocks_avx2:
jz NEAR $L$even_avx2
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
$L$blocks_avx2_body:
mov r15,rdx
@@ -1442,26 +1501,41 @@ $L$store_base2_26_avx2:
ALIGN 16
$L$done_avx2:
mov r15,QWORD[rsp]
+
mov r14,QWORD[8+rsp]
+
mov r13,QWORD[16+rsp]
+
mov r12,QWORD[24+rsp]
+
mov rbp,QWORD[32+rsp]
+
mov rbx,QWORD[40+rsp]
+
lea rsp,[48+rsp]
+
$L$no_data_avx2:
$L$blocks_avx2_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
ALIGN 32
$L$base2_64_avx2:
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
$L$base2_64_avx2_body:
mov r15,rdx
@@ -1524,20 +1598,32 @@ $L$init_avx2:
$L$proceed_avx2:
mov rdx,r15
+ mov r10d,DWORD[((OPENSSL_ia32cap_P+8))]
+ mov r11d,3221291008
mov r15,QWORD[rsp]
+
mov r14,QWORD[8+rsp]
+
mov r13,QWORD[16+rsp]
+
mov r12,QWORD[24+rsp]
+
mov rbp,QWORD[32+rsp]
+
mov rbx,QWORD[40+rsp]
+
lea rax,[48+rsp]
lea rsp,[48+rsp]
+
$L$base2_64_avx2_epilogue:
jmp NEAR $L$do_avx2
+
ALIGN 32
$L$even_avx2:
+
+ mov r10d,DWORD[((OPENSSL_ia32cap_P+8))]
vmovd xmm0,DWORD[rdi]
vmovd xmm1,DWORD[4+rdi]
vmovd xmm2,DWORD[8+rdi]
@@ -1545,6 +1631,12 @@ $L$even_avx2:
vmovd xmm4,DWORD[16+rdi]
$L$do_avx2:
+ cmp rdx,512
+ jb NEAR $L$skip_avx512
+ and r10d,r11d
+ test r10d,65536
+ jnz NEAR $L$blocks_avx512
+$L$skip_avx512:
lea r11,[((-248))+rsp]
sub rsp,0x1c8
vmovdqa XMMWORD[80+r11],xmm6
@@ -1558,8 +1650,9 @@ $L$do_avx2:
vmovdqa XMMWORD[208+r11],xmm14
vmovdqa XMMWORD[224+r11],xmm15
$L$do_avx2_body:
- lea rdi,[((48+64))+rdi]
lea rcx,[$L$const]
+ lea rdi,[((48+64))+rdi]
+ vmovdqa ymm7,YMMWORD[96+rcx]
vmovdqu xmm9,XMMWORD[((-64))+rdi]
@@ -1569,36 +1662,28 @@ $L$do_avx2_body:
vmovdqu xmm11,XMMWORD[((-16))+rdi]
vmovdqu xmm12,XMMWORD[rdi]
vmovdqu xmm13,XMMWORD[16+rdi]
+ lea rax,[144+rsp]
vmovdqu xmm14,XMMWORD[32+rdi]
- vpermq ymm9,ymm9,0x15
+ vpermd ymm9,ymm7,ymm9
vmovdqu xmm15,XMMWORD[48+rdi]
- vpermq ymm10,ymm10,0x15
- vpshufd ymm9,ymm9,0xc8
+ vpermd ymm10,ymm7,ymm10
vmovdqu xmm5,XMMWORD[64+rdi]
- vpermq ymm6,ymm6,0x15
- vpshufd ymm10,ymm10,0xc8
+ vpermd ymm6,ymm7,ymm6
vmovdqa YMMWORD[rsp],ymm9
- vpermq ymm11,ymm11,0x15
- vpshufd ymm6,ymm6,0xc8
- vmovdqa YMMWORD[32+rsp],ymm10
- vpermq ymm12,ymm12,0x15
- vpshufd ymm11,ymm11,0xc8
- vmovdqa YMMWORD[64+rsp],ymm6
- vpermq ymm13,ymm13,0x15
- vpshufd ymm12,ymm12,0xc8
- vmovdqa YMMWORD[96+rsp],ymm11
- vpermq ymm14,ymm14,0x15
- vpshufd ymm13,ymm13,0xc8
- vmovdqa YMMWORD[128+rsp],ymm12
- vpermq ymm15,ymm15,0x15
- vpshufd ymm14,ymm14,0xc8
- vmovdqa YMMWORD[160+rsp],ymm13
- vpermq ymm5,ymm5,0x15
- vpshufd ymm15,ymm15,0xc8
- vmovdqa YMMWORD[192+rsp],ymm14
- vpshufd ymm5,ymm5,0xc8
- vmovdqa YMMWORD[224+rsp],ymm15
- vmovdqa YMMWORD[256+rsp],ymm5
+ vpermd ymm11,ymm7,ymm11
+ vmovdqa YMMWORD[(32-144)+rax],ymm10
+ vpermd ymm12,ymm7,ymm12
+ vmovdqa YMMWORD[(64-144)+rax],ymm6
+ vpermd ymm13,ymm7,ymm13
+ vmovdqa YMMWORD[(96-144)+rax],ymm11
+ vpermd ymm14,ymm7,ymm14
+ vmovdqa YMMWORD[(128-144)+rax],ymm12
+ vpermd ymm15,ymm7,ymm15
+ vmovdqa YMMWORD[(160-144)+rax],ymm13
+ vpermd ymm5,ymm7,ymm5
+ vmovdqa YMMWORD[(192-144)+rax],ymm14
+ vmovdqa YMMWORD[(224-144)+rax],ymm15
+ vmovdqa YMMWORD[(256-144)+rax],ymm5
vmovdqa ymm5,YMMWORD[64+rcx]
@@ -1625,7 +1710,6 @@ $L$do_avx2_body:
vpand ymm10,ymm10,ymm5
vpor ymm6,ymm6,YMMWORD[32+rcx]
- lea rax,[144+rsp]
vpaddq ymm2,ymm9,ymm2
sub rdx,64
jz NEAR $L$tail_avx2
@@ -1935,7 +2019,1593 @@ $L$do_avx2_epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_poly1305_blocks_avx2:
+
+ALIGN 32
+poly1305_blocks_avx512:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_poly1305_blocks_avx512:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+
+
+
+$L$blocks_avx512:
+ mov eax,15
+ kmovw k2,eax
+ lea r11,[((-248))+rsp]
+ sub rsp,0x1c8
+ vmovdqa XMMWORD[80+r11],xmm6
+ vmovdqa XMMWORD[96+r11],xmm7
+ vmovdqa XMMWORD[112+r11],xmm8
+ vmovdqa XMMWORD[128+r11],xmm9
+ vmovdqa XMMWORD[144+r11],xmm10
+ vmovdqa XMMWORD[160+r11],xmm11
+ vmovdqa XMMWORD[176+r11],xmm12
+ vmovdqa XMMWORD[192+r11],xmm13
+ vmovdqa XMMWORD[208+r11],xmm14
+ vmovdqa XMMWORD[224+r11],xmm15
+$L$do_avx512_body:
+ lea rcx,[$L$const]
+ lea rdi,[((48+64))+rdi]
+ vmovdqa ymm9,YMMWORD[96+rcx]
+
+
+ vmovdqu xmm11,XMMWORD[((-64))+rdi]
+ and rsp,-512
+ vmovdqu xmm12,XMMWORD[((-48))+rdi]
+ mov rax,0x20
+ vmovdqu xmm7,XMMWORD[((-32))+rdi]
+ vmovdqu xmm13,XMMWORD[((-16))+rdi]
+ vmovdqu xmm8,XMMWORD[rdi]
+ vmovdqu xmm14,XMMWORD[16+rdi]
+ vmovdqu xmm10,XMMWORD[32+rdi]
+ vmovdqu xmm15,XMMWORD[48+rdi]
+ vmovdqu xmm6,XMMWORD[64+rdi]
+ vpermd zmm16,zmm9,zmm11
+ vpbroadcastq zmm5,QWORD[64+rcx]
+ vpermd zmm17,zmm9,zmm12
+ vpermd zmm21,zmm9,zmm7
+ vpermd zmm18,zmm9,zmm13
+ vmovdqa64 ZMMWORD[rsp]{k2},zmm16
+ vpsrlq zmm7,zmm16,32
+ vpermd zmm22,zmm9,zmm8
+ vmovdqu64 ZMMWORD[rax*1+rsp]{k2},zmm17
+ vpsrlq zmm8,zmm17,32
+ vpermd zmm19,zmm9,zmm14
+ vmovdqa64 ZMMWORD[64+rsp]{k2},zmm21
+ vpermd zmm23,zmm9,zmm10
+ vpermd zmm20,zmm9,zmm15
+ vmovdqu64 ZMMWORD[64+rax*1+rsp]{k2},zmm18
+ vpermd zmm24,zmm9,zmm6
+ vmovdqa64 ZMMWORD[128+rsp]{k2},zmm22
+ vmovdqu64 ZMMWORD[128+rax*1+rsp]{k2},zmm19
+ vmovdqa64 ZMMWORD[192+rsp]{k2},zmm23
+ vmovdqu64 ZMMWORD[192+rax*1+rsp]{k2},zmm20
+ vmovdqa64 ZMMWORD[256+rsp]{k2},zmm24
+
+
+
+
+
+
+
+
+
+
+ vpmuludq zmm11,zmm16,zmm7
+ vpmuludq zmm12,zmm17,zmm7
+ vpmuludq zmm13,zmm18,zmm7
+ vpmuludq zmm14,zmm19,zmm7
+ vpmuludq zmm15,zmm20,zmm7
+ vpsrlq zmm9,zmm18,32
+
+ vpmuludq zmm25,zmm24,zmm8
+ vpmuludq zmm26,zmm16,zmm8
+ vpmuludq zmm27,zmm17,zmm8
+ vpmuludq zmm28,zmm18,zmm8
+ vpmuludq zmm29,zmm19,zmm8
+ vpsrlq zmm10,zmm19,32
+ vpaddq zmm11,zmm11,zmm25
+ vpaddq zmm12,zmm12,zmm26
+ vpaddq zmm13,zmm13,zmm27
+ vpaddq zmm14,zmm14,zmm28
+ vpaddq zmm15,zmm15,zmm29
+
+ vpmuludq zmm25,zmm23,zmm9
+ vpmuludq zmm26,zmm24,zmm9
+ vpmuludq zmm28,zmm17,zmm9
+ vpmuludq zmm29,zmm18,zmm9
+ vpmuludq zmm27,zmm16,zmm9
+ vpsrlq zmm6,zmm20,32
+ vpaddq zmm11,zmm11,zmm25
+ vpaddq zmm12,zmm12,zmm26
+ vpaddq zmm14,zmm14,zmm28
+ vpaddq zmm15,zmm15,zmm29
+ vpaddq zmm13,zmm13,zmm27
+
+ vpmuludq zmm25,zmm22,zmm10
+ vpmuludq zmm28,zmm16,zmm10
+ vpmuludq zmm29,zmm17,zmm10
+ vpmuludq zmm26,zmm23,zmm10
+ vpmuludq zmm27,zmm24,zmm10
+ vpaddq zmm11,zmm11,zmm25
+ vpaddq zmm14,zmm14,zmm28
+ vpaddq zmm15,zmm15,zmm29
+ vpaddq zmm12,zmm12,zmm26
+ vpaddq zmm13,zmm13,zmm27
+
+ vpmuludq zmm28,zmm24,zmm6
+ vpmuludq zmm29,zmm16,zmm6
+ vpmuludq zmm25,zmm21,zmm6
+ vpmuludq zmm26,zmm22,zmm6
+ vpmuludq zmm27,zmm23,zmm6
+ vpaddq zmm14,zmm14,zmm28
+ vpaddq zmm15,zmm15,zmm29
+ vpaddq zmm11,zmm11,zmm25
+ vpaddq zmm12,zmm12,zmm26
+ vpaddq zmm13,zmm13,zmm27
+
+
+
+ vmovdqu64 zmm10,ZMMWORD[rsi]
+ vmovdqu64 zmm6,ZMMWORD[64+rsi]
+ lea rsi,[128+rsi]
+
+
+
+
+ vpsrlq zmm28,zmm14,26
+ vpandq zmm14,zmm14,zmm5
+ vpaddq zmm15,zmm15,zmm28
+
+ vpsrlq zmm25,zmm11,26
+ vpandq zmm11,zmm11,zmm5
+ vpaddq zmm12,zmm12,zmm25
+
+ vpsrlq zmm29,zmm15,26
+ vpandq zmm15,zmm15,zmm5
+
+ vpsrlq zmm26,zmm12,26
+ vpandq zmm12,zmm12,zmm5
+ vpaddq zmm13,zmm13,zmm26
+
+ vpaddq zmm11,zmm11,zmm29
+ vpsllq zmm29,zmm29,2
+ vpaddq zmm11,zmm11,zmm29
+
+ vpsrlq zmm27,zmm13,26
+ vpandq zmm13,zmm13,zmm5
+ vpaddq zmm14,zmm14,zmm27
+
+ vpsrlq zmm25,zmm11,26
+ vpandq zmm11,zmm11,zmm5
+ vpaddq zmm12,zmm12,zmm25
+
+ vpsrlq zmm28,zmm14,26
+ vpandq zmm14,zmm14,zmm5
+ vpaddq zmm15,zmm15,zmm28
+
+
+
+
+
+ vpunpcklqdq zmm7,zmm10,zmm6
+ vpunpckhqdq zmm6,zmm10,zmm6
+
+
+
+
+
+
+ vmovdqa32 zmm25,ZMMWORD[128+rcx]
+ mov eax,0x7777
+ kmovw k1,eax
+
+ vpermd zmm16,zmm25,zmm16
+ vpermd zmm17,zmm25,zmm17
+ vpermd zmm18,zmm25,zmm18
+ vpermd zmm19,zmm25,zmm19
+ vpermd zmm20,zmm25,zmm20
+
+ vpermd zmm16{k1},zmm25,zmm11
+ vpermd zmm17{k1},zmm25,zmm12
+ vpermd zmm18{k1},zmm25,zmm13
+ vpermd zmm19{k1},zmm25,zmm14
+ vpermd zmm20{k1},zmm25,zmm15
+
+ vpslld zmm21,zmm17,2
+ vpslld zmm22,zmm18,2
+ vpslld zmm23,zmm19,2
+ vpslld zmm24,zmm20,2
+ vpaddd zmm21,zmm21,zmm17
+ vpaddd zmm22,zmm22,zmm18
+ vpaddd zmm23,zmm23,zmm19
+ vpaddd zmm24,zmm24,zmm20
+
+ vpbroadcastq zmm30,QWORD[32+rcx]
+
+ vpsrlq zmm9,zmm7,52
+ vpsllq zmm10,zmm6,12
+ vporq zmm9,zmm9,zmm10
+ vpsrlq zmm8,zmm7,26
+ vpsrlq zmm10,zmm6,14
+ vpsrlq zmm6,zmm6,40
+ vpandq zmm9,zmm9,zmm5
+ vpandq zmm7,zmm7,zmm5
+
+
+
+
+ vpaddq zmm2,zmm9,zmm2
+ sub rdx,192
+ jbe NEAR $L$tail_avx512
+ jmp NEAR $L$oop_avx512
+
+ALIGN 32
+$L$oop_avx512:
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ vpmuludq zmm14,zmm17,zmm2
+ vpaddq zmm0,zmm7,zmm0
+ vpmuludq zmm15,zmm18,zmm2
+ vpandq zmm8,zmm8,zmm5
+ vpmuludq zmm11,zmm23,zmm2
+ vpandq zmm10,zmm10,zmm5
+ vpmuludq zmm12,zmm24,zmm2
+ vporq zmm6,zmm6,zmm30
+ vpmuludq zmm13,zmm16,zmm2
+ vpaddq zmm1,zmm8,zmm1
+ vpaddq zmm3,zmm10,zmm3
+ vpaddq zmm4,zmm6,zmm4
+
+ vmovdqu64 zmm10,ZMMWORD[rsi]
+ vmovdqu64 zmm6,ZMMWORD[64+rsi]
+ lea rsi,[128+rsi]
+ vpmuludq zmm28,zmm19,zmm0
+ vpmuludq zmm29,zmm20,zmm0
+ vpmuludq zmm25,zmm16,zmm0
+ vpmuludq zmm26,zmm17,zmm0
+ vpaddq zmm14,zmm14,zmm28
+ vpaddq zmm15,zmm15,zmm29
+ vpaddq zmm11,zmm11,zmm25
+ vpaddq zmm12,zmm12,zmm26
+
+ vpmuludq zmm28,zmm18,zmm1
+ vpmuludq zmm29,zmm19,zmm1
+ vpmuludq zmm25,zmm24,zmm1
+ vpmuludq zmm27,zmm18,zmm0
+ vpaddq zmm14,zmm14,zmm28
+ vpaddq zmm15,zmm15,zmm29
+ vpaddq zmm11,zmm11,zmm25
+ vpaddq zmm13,zmm13,zmm27
+
+ vpunpcklqdq zmm7,zmm10,zmm6
+ vpunpckhqdq zmm6,zmm10,zmm6
+
+ vpmuludq zmm28,zmm16,zmm3
+ vpmuludq zmm29,zmm17,zmm3
+ vpmuludq zmm26,zmm16,zmm1
+ vpmuludq zmm27,zmm17,zmm1
+ vpaddq zmm14,zmm14,zmm28
+ vpaddq zmm15,zmm15,zmm29
+ vpaddq zmm12,zmm12,zmm26
+ vpaddq zmm13,zmm13,zmm27
+
+ vpmuludq zmm28,zmm24,zmm4
+ vpmuludq zmm29,zmm16,zmm4
+ vpmuludq zmm25,zmm22,zmm3
+ vpmuludq zmm26,zmm23,zmm3
+ vpaddq zmm14,zmm14,zmm28
+ vpmuludq zmm27,zmm24,zmm3
+ vpaddq zmm15,zmm15,zmm29
+ vpaddq zmm11,zmm11,zmm25
+ vpaddq zmm12,zmm12,zmm26
+ vpaddq zmm13,zmm13,zmm27
+
+ vpmuludq zmm25,zmm21,zmm4
+ vpmuludq zmm26,zmm22,zmm4
+ vpmuludq zmm27,zmm23,zmm4
+ vpaddq zmm0,zmm11,zmm25
+ vpaddq zmm1,zmm12,zmm26
+ vpaddq zmm2,zmm13,zmm27
+
+
+
+
+ vpsrlq zmm9,zmm7,52
+ vpsllq zmm10,zmm6,12
+
+ vpsrlq zmm3,zmm14,26
+ vpandq zmm14,zmm14,zmm5
+ vpaddq zmm4,zmm15,zmm3
+
+ vporq zmm9,zmm9,zmm10
+
+ vpsrlq zmm11,zmm0,26
+ vpandq zmm0,zmm0,zmm5
+ vpaddq zmm1,zmm1,zmm11
+
+ vpandq zmm9,zmm9,zmm5
+
+ vpsrlq zmm15,zmm4,26
+ vpandq zmm4,zmm4,zmm5
+
+ vpsrlq zmm12,zmm1,26
+ vpandq zmm1,zmm1,zmm5
+ vpaddq zmm2,zmm2,zmm12
+
+ vpaddq zmm0,zmm0,zmm15
+ vpsllq zmm15,zmm15,2
+ vpaddq zmm0,zmm0,zmm15
+
+ vpaddq zmm2,zmm2,zmm9
+ vpsrlq zmm8,zmm7,26
+
+ vpsrlq zmm13,zmm2,26
+ vpandq zmm2,zmm2,zmm5
+ vpaddq zmm3,zmm14,zmm13
+
+ vpsrlq zmm10,zmm6,14
+
+ vpsrlq zmm11,zmm0,26
+ vpandq zmm0,zmm0,zmm5
+ vpaddq zmm1,zmm1,zmm11
+
+ vpsrlq zmm6,zmm6,40
+
+ vpsrlq zmm14,zmm3,26
+ vpandq zmm3,zmm3,zmm5
+ vpaddq zmm4,zmm4,zmm14
+
+ vpandq zmm7,zmm7,zmm5
+
+
+
+
+ sub rdx,128
+ ja NEAR $L$oop_avx512
+
+$L$tail_avx512:
+
+
+
+
+
+ vpsrlq zmm16,zmm16,32
+ vpsrlq zmm17,zmm17,32
+ vpsrlq zmm18,zmm18,32
+ vpsrlq zmm23,zmm23,32
+ vpsrlq zmm24,zmm24,32
+ vpsrlq zmm19,zmm19,32
+ vpsrlq zmm20,zmm20,32
+ vpsrlq zmm21,zmm21,32
+ vpsrlq zmm22,zmm22,32
+
+
+
+ lea rsi,[rdx*1+rsi]
+
+
+ vpaddq zmm0,zmm7,zmm0
+
+ vpmuludq zmm14,zmm17,zmm2
+ vpmuludq zmm15,zmm18,zmm2
+ vpmuludq zmm11,zmm23,zmm2
+ vpandq zmm8,zmm8,zmm5
+ vpmuludq zmm12,zmm24,zmm2
+ vpandq zmm10,zmm10,zmm5
+ vpmuludq zmm13,zmm16,zmm2
+ vporq zmm6,zmm6,zmm30
+ vpaddq zmm1,zmm8,zmm1
+ vpaddq zmm3,zmm10,zmm3
+ vpaddq zmm4,zmm6,zmm4
+
+ vmovdqu xmm7,XMMWORD[rsi]
+ vpmuludq zmm28,zmm19,zmm0
+ vpmuludq zmm29,zmm20,zmm0
+ vpmuludq zmm25,zmm16,zmm0
+ vpmuludq zmm26,zmm17,zmm0
+ vpaddq zmm14,zmm14,zmm28
+ vpaddq zmm15,zmm15,zmm29
+ vpaddq zmm11,zmm11,zmm25
+ vpaddq zmm12,zmm12,zmm26
+
+ vmovdqu xmm8,XMMWORD[16+rsi]
+ vpmuludq zmm28,zmm18,zmm1
+ vpmuludq zmm29,zmm19,zmm1
+ vpmuludq zmm25,zmm24,zmm1
+ vpmuludq zmm27,zmm18,zmm0
+ vpaddq zmm14,zmm14,zmm28
+ vpaddq zmm15,zmm15,zmm29
+ vpaddq zmm11,zmm11,zmm25
+ vpaddq zmm13,zmm13,zmm27
+
+ vinserti128 ymm7,ymm7,XMMWORD[32+rsi],1
+ vpmuludq zmm28,zmm16,zmm3
+ vpmuludq zmm29,zmm17,zmm3
+ vpmuludq zmm26,zmm16,zmm1
+ vpmuludq zmm27,zmm17,zmm1
+ vpaddq zmm14,zmm14,zmm28
+ vpaddq zmm15,zmm15,zmm29
+ vpaddq zmm12,zmm12,zmm26
+ vpaddq zmm13,zmm13,zmm27
+
+ vinserti128 ymm8,ymm8,XMMWORD[48+rsi],1
+ vpmuludq zmm28,zmm24,zmm4
+ vpmuludq zmm29,zmm16,zmm4
+ vpmuludq zmm25,zmm22,zmm3
+ vpmuludq zmm26,zmm23,zmm3
+ vpmuludq zmm27,zmm24,zmm3
+ vpaddq zmm3,zmm14,zmm28
+ vpaddq zmm15,zmm15,zmm29
+ vpaddq zmm11,zmm11,zmm25
+ vpaddq zmm12,zmm12,zmm26
+ vpaddq zmm13,zmm13,zmm27
+
+ vpmuludq zmm25,zmm21,zmm4
+ vpmuludq zmm26,zmm22,zmm4
+ vpmuludq zmm27,zmm23,zmm4
+ vpaddq zmm0,zmm11,zmm25
+ vpaddq zmm1,zmm12,zmm26
+ vpaddq zmm2,zmm13,zmm27
+
+
+
+
+ mov eax,1
+ vpermq zmm14,zmm3,0xb1
+ vpermq zmm4,zmm15,0xb1
+ vpermq zmm11,zmm0,0xb1
+ vpermq zmm12,zmm1,0xb1
+ vpermq zmm13,zmm2,0xb1
+ vpaddq zmm3,zmm3,zmm14
+ vpaddq zmm4,zmm4,zmm15
+ vpaddq zmm0,zmm0,zmm11
+ vpaddq zmm1,zmm1,zmm12
+ vpaddq zmm2,zmm2,zmm13
+
+ kmovw k3,eax
+ vpermq zmm14,zmm3,0x2
+ vpermq zmm15,zmm4,0x2
+ vpermq zmm11,zmm0,0x2
+ vpermq zmm12,zmm1,0x2
+ vpermq zmm13,zmm2,0x2
+ vpaddq zmm3,zmm3,zmm14
+ vpaddq zmm4,zmm4,zmm15
+ vpaddq zmm0,zmm0,zmm11
+ vpaddq zmm1,zmm1,zmm12
+ vpaddq zmm2,zmm2,zmm13
+
+ vextracti64x4 ymm14,zmm3,0x1
+ vextracti64x4 ymm15,zmm4,0x1
+ vextracti64x4 ymm11,zmm0,0x1
+ vextracti64x4 ymm12,zmm1,0x1
+ vextracti64x4 ymm13,zmm2,0x1
+ vpaddq zmm3{k3}{z},zmm3,zmm14
+ vpaddq zmm4{k3}{z},zmm4,zmm15
+ vpaddq zmm0{k3}{z},zmm0,zmm11
+ vpaddq zmm1{k3}{z},zmm1,zmm12
+ vpaddq zmm2{k3}{z},zmm2,zmm13
+
+
+
+ vpsrlq ymm14,ymm3,26
+ vpand ymm3,ymm3,ymm5
+ vpsrldq ymm9,ymm7,6
+ vpsrldq ymm10,ymm8,6
+ vpunpckhqdq ymm6,ymm7,ymm8
+ vpaddq ymm4,ymm4,ymm14
+
+ vpsrlq ymm11,ymm0,26
+ vpand ymm0,ymm0,ymm5
+ vpunpcklqdq ymm9,ymm9,ymm10
+ vpunpcklqdq ymm7,ymm7,ymm8
+ vpaddq ymm1,ymm1,ymm11
+
+ vpsrlq ymm15,ymm4,26
+ vpand ymm4,ymm4,ymm5
+
+ vpsrlq ymm12,ymm1,26
+ vpand ymm1,ymm1,ymm5
+ vpsrlq ymm10,ymm9,30
+ vpsrlq ymm9,ymm9,4
+ vpaddq ymm2,ymm2,ymm12
+
+ vpaddq ymm0,ymm0,ymm15
+ vpsllq ymm15,ymm15,2
+ vpsrlq ymm8,ymm7,26
+ vpsrlq ymm6,ymm6,40
+ vpaddq ymm0,ymm0,ymm15
+
+ vpsrlq ymm13,ymm2,26
+ vpand ymm2,ymm2,ymm5
+ vpand ymm9,ymm9,ymm5
+ vpand ymm7,ymm7,ymm5
+ vpaddq ymm3,ymm3,ymm13
+
+ vpsrlq ymm11,ymm0,26
+ vpand ymm0,ymm0,ymm5
+ vpaddq ymm2,ymm9,ymm2
+ vpand ymm8,ymm8,ymm5
+ vpaddq ymm1,ymm1,ymm11
+
+ vpsrlq ymm14,ymm3,26
+ vpand ymm3,ymm3,ymm5
+ vpand ymm10,ymm10,ymm5
+ vpor ymm6,ymm6,YMMWORD[32+rcx]
+ vpaddq ymm4,ymm4,ymm14
+
+ lea rax,[144+rsp]
+ add rdx,64
+ jnz NEAR $L$tail_avx2
+
+ vpsubq ymm2,ymm2,ymm9
+ vmovd DWORD[(-112)+rdi],xmm0
+ vmovd DWORD[(-108)+rdi],xmm1
+ vmovd DWORD[(-104)+rdi],xmm2
+ vmovd DWORD[(-100)+rdi],xmm3
+ vmovd DWORD[(-96)+rdi],xmm4
+ vzeroall
+ movdqa xmm6,XMMWORD[80+r11]
+ movdqa xmm7,XMMWORD[96+r11]
+ movdqa xmm8,XMMWORD[112+r11]
+ movdqa xmm9,XMMWORD[128+r11]
+ movdqa xmm10,XMMWORD[144+r11]
+ movdqa xmm11,XMMWORD[160+r11]
+ movdqa xmm12,XMMWORD[176+r11]
+ movdqa xmm13,XMMWORD[192+r11]
+ movdqa xmm14,XMMWORD[208+r11]
+ movdqa xmm15,XMMWORD[224+r11]
+ lea rsp,[248+r11]
+$L$do_avx512_epilogue:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_poly1305_blocks_avx512:
+
+ALIGN 32
+poly1305_init_base2_44:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_poly1305_init_base2_44:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+ xor rax,rax
+ mov QWORD[rdi],rax
+ mov QWORD[8+rdi],rax
+ mov QWORD[16+rdi],rax
+
+$L$init_base2_44:
+ lea r10,[poly1305_blocks_vpmadd52]
+ lea r11,[poly1305_emit_base2_44]
+
+ mov rax,0x0ffffffc0fffffff
+ mov rcx,0x0ffffffc0ffffffc
+ and rax,QWORD[rsi]
+ mov r8,0x00000fffffffffff
+ and rcx,QWORD[8+rsi]
+ mov r9,0x00000fffffffffff
+ and r8,rax
+ shrd rax,rcx,44
+ mov QWORD[40+rdi],r8
+ and rax,r9
+ shr rcx,24
+ mov QWORD[48+rdi],rax
+ lea rax,[rax*4+rax]
+ mov QWORD[56+rdi],rcx
+ shl rax,2
+ lea rcx,[rcx*4+rcx]
+ shl rcx,2
+ mov QWORD[24+rdi],rax
+ mov QWORD[32+rdi],rcx
+ mov QWORD[64+rdi],-1
+ mov QWORD[rdx],r10
+ mov QWORD[8+rdx],r11
+ mov eax,1
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_poly1305_init_base2_44:
+
+ALIGN 32
+poly1305_blocks_vpmadd52:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_poly1305_blocks_vpmadd52:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+
+
+ shr rdx,4
+ jz NEAR $L$no_data_vpmadd52
+
+ shl rcx,40
+ mov r8,QWORD[64+rdi]
+
+
+
+
+
+
+ mov rax,3
+ mov r10,1
+ cmp rdx,4
+ cmovae rax,r10
+ test r8,r8
+ cmovns rax,r10
+
+ and rax,rdx
+ jz NEAR $L$blocks_vpmadd52_4x
+
+ sub rdx,rax
+ mov r10d,7
+ mov r11d,1
+ kmovw k7,r10d
+ lea r10,[$L$2_44_inp_permd]
+ kmovw k1,r11d
+
+ vmovq xmm21,rcx
+ vmovdqa64 ymm19,YMMWORD[r10]
+ vmovdqa64 ymm20,YMMWORD[32+r10]
+ vpermq ymm21,ymm21,0xcf
+ vmovdqa64 ymm22,YMMWORD[64+r10]
+
+ vmovdqu64 ymm16{k7}{z},[rdi]
+ vmovdqu64 ymm3{k7}{z},[40+rdi]
+ vmovdqu64 ymm4{k7}{z},[32+rdi]
+ vmovdqu64 ymm5{k7}{z},[24+rdi]
+
+ vmovdqa64 ymm23,YMMWORD[96+r10]
+ vmovdqa64 ymm24,YMMWORD[128+r10]
+
+ jmp NEAR $L$oop_vpmadd52
+
+ALIGN 32
+$L$oop_vpmadd52:
+ vmovdqu32 xmm18,XMMWORD[rsi]
+ lea rsi,[16+rsi]
+
+ vpermd ymm18,ymm19,ymm18
+ vpsrlvq ymm18,ymm18,ymm20
+ vpandq ymm18,ymm18,ymm22
+ vporq ymm18,ymm18,ymm21
+
+ vpaddq ymm16,ymm16,ymm18
+
+ vpermq ymm0{k7}{z},ymm16,0
+ vpermq ymm1{k7}{z},ymm16,85
+ vpermq ymm2{k7}{z},ymm16,170
+
+ vpxord ymm16,ymm16,ymm16
+ vpxord ymm17,ymm17,ymm17
+
+ vpmadd52luq ymm16,ymm0,ymm3
+ vpmadd52huq ymm17,ymm0,ymm3
+
+ vpmadd52luq ymm16,ymm1,ymm4
+ vpmadd52huq ymm17,ymm1,ymm4
+
+ vpmadd52luq ymm16,ymm2,ymm5
+ vpmadd52huq ymm17,ymm2,ymm5
+
+ vpsrlvq ymm18,ymm16,ymm23
+ vpsllvq ymm17,ymm17,ymm24
+ vpandq ymm16,ymm16,ymm22
+
+ vpaddq ymm17,ymm17,ymm18
+
+ vpermq ymm17,ymm17,147
+
+ vpaddq ymm16,ymm16,ymm17
+
+ vpsrlvq ymm18,ymm16,ymm23
+ vpandq ymm16,ymm16,ymm22
+
+ vpermq ymm18,ymm18,147
+
+ vpaddq ymm16,ymm16,ymm18
+
+ vpermq ymm18{k1}{z},ymm16,147
+
+ vpaddq ymm16,ymm16,ymm18
+ vpsllq ymm18,ymm18,2
+
+ vpaddq ymm16,ymm16,ymm18
+
+ dec rax
+ jnz NEAR $L$oop_vpmadd52
+
+ vmovdqu64 YMMWORD[rdi]{k7},ymm16
+
+ test rdx,rdx
+ jnz NEAR $L$blocks_vpmadd52_4x
+
+$L$no_data_vpmadd52:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_poly1305_blocks_vpmadd52:
+
+ALIGN 32
+poly1305_blocks_vpmadd52_4x:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_poly1305_blocks_vpmadd52_4x:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+
+
+ shr rdx,4
+ jz NEAR $L$no_data_vpmadd52_4x
+
+ shl rcx,40
+ mov r8,QWORD[64+rdi]
+
+$L$blocks_vpmadd52_4x:
+ vpbroadcastq ymm31,rcx
+
+ vmovdqa64 ymm28,YMMWORD[$L$x_mask44]
+ mov eax,5
+ vmovdqa64 ymm29,YMMWORD[$L$x_mask42]
+ kmovw k1,eax
+
+ test r8,r8
+ js NEAR $L$init_vpmadd52
+
+ vmovq xmm0,QWORD[rdi]
+ vmovq xmm1,QWORD[8+rdi]
+ vmovq xmm2,QWORD[16+rdi]
+
+ test rdx,3
+ jnz NEAR $L$blocks_vpmadd52_2x_do
+
+$L$blocks_vpmadd52_4x_do:
+ vpbroadcastq ymm3,QWORD[64+rdi]
+ vpbroadcastq ymm4,QWORD[96+rdi]
+ vpbroadcastq ymm5,QWORD[128+rdi]
+ vpbroadcastq ymm16,QWORD[160+rdi]
+
+$L$blocks_vpmadd52_4x_key_loaded:
+ vpsllq ymm17,ymm5,2
+ vpaddq ymm17,ymm17,ymm5
+ vpsllq ymm17,ymm17,2
+
+ test rdx,7
+ jz NEAR $L$blocks_vpmadd52_8x
+
+ vmovdqu64 ymm26,YMMWORD[rsi]
+ vmovdqu64 ymm27,YMMWORD[32+rsi]
+ lea rsi,[64+rsi]
+
+ vpunpcklqdq ymm25,ymm26,ymm27
+ vpunpckhqdq ymm27,ymm26,ymm27
+
+
+
+ vpsrlq ymm26,ymm27,24
+ vporq ymm26,ymm26,ymm31
+ vpaddq ymm2,ymm2,ymm26
+ vpandq ymm24,ymm25,ymm28
+ vpsrlq ymm25,ymm25,44
+ vpsllq ymm27,ymm27,20
+ vporq ymm25,ymm25,ymm27
+ vpandq ymm25,ymm25,ymm28
+
+ sub rdx,4
+ jz NEAR $L$tail_vpmadd52_4x
+ jmp NEAR $L$oop_vpmadd52_4x
+ ud2
+
+ALIGN 32
+$L$init_vpmadd52:
+ vmovq xmm16,QWORD[24+rdi]
+ vmovq xmm2,QWORD[56+rdi]
+ vmovq xmm17,QWORD[32+rdi]
+ vmovq xmm3,QWORD[40+rdi]
+ vmovq xmm4,QWORD[48+rdi]
+
+ vmovdqa ymm0,ymm3
+ vmovdqa ymm1,ymm4
+ vmovdqa ymm5,ymm2
+
+ mov eax,2
+
+$L$mul_init_vpmadd52:
+ vpxorq ymm18,ymm18,ymm18
+ vpmadd52luq ymm18,ymm16,ymm2
+ vpxorq ymm19,ymm19,ymm19
+ vpmadd52huq ymm19,ymm16,ymm2
+ vpxorq ymm20,ymm20,ymm20
+ vpmadd52luq ymm20,ymm17,ymm2
+ vpxorq ymm21,ymm21,ymm21
+ vpmadd52huq ymm21,ymm17,ymm2
+ vpxorq ymm22,ymm22,ymm22
+ vpmadd52luq ymm22,ymm3,ymm2
+ vpxorq ymm23,ymm23,ymm23
+ vpmadd52huq ymm23,ymm3,ymm2
+
+ vpmadd52luq ymm18,ymm3,ymm0
+ vpmadd52huq ymm19,ymm3,ymm0
+ vpmadd52luq ymm20,ymm4,ymm0
+ vpmadd52huq ymm21,ymm4,ymm0
+ vpmadd52luq ymm22,ymm5,ymm0
+ vpmadd52huq ymm23,ymm5,ymm0
+
+ vpmadd52luq ymm18,ymm17,ymm1
+ vpmadd52huq ymm19,ymm17,ymm1
+ vpmadd52luq ymm20,ymm3,ymm1
+ vpmadd52huq ymm21,ymm3,ymm1
+ vpmadd52luq ymm22,ymm4,ymm1
+ vpmadd52huq ymm23,ymm4,ymm1
+
+
+
+ vpsrlq ymm30,ymm18,44
+ vpsllq ymm19,ymm19,8
+ vpandq ymm0,ymm18,ymm28
+ vpaddq ymm19,ymm19,ymm30
+
+ vpaddq ymm20,ymm20,ymm19
+
+ vpsrlq ymm30,ymm20,44
+ vpsllq ymm21,ymm21,8
+ vpandq ymm1,ymm20,ymm28
+ vpaddq ymm21,ymm21,ymm30
+
+ vpaddq ymm22,ymm22,ymm21
+
+ vpsrlq ymm30,ymm22,42
+ vpsllq ymm23,ymm23,10
+ vpandq ymm2,ymm22,ymm29
+ vpaddq ymm23,ymm23,ymm30
+
+ vpaddq ymm0,ymm0,ymm23
+ vpsllq ymm23,ymm23,2
+
+ vpaddq ymm0,ymm0,ymm23
+
+ vpsrlq ymm30,ymm0,44
+ vpandq ymm0,ymm0,ymm28
+
+ vpaddq ymm1,ymm1,ymm30
+
+ dec eax
+ jz NEAR $L$done_init_vpmadd52
+
+ vpunpcklqdq ymm4,ymm1,ymm4
+ vpbroadcastq xmm1,xmm1
+ vpunpcklqdq ymm5,ymm2,ymm5
+ vpbroadcastq xmm2,xmm2
+ vpunpcklqdq ymm3,ymm0,ymm3
+ vpbroadcastq xmm0,xmm0
+
+ vpsllq ymm16,ymm4,2
+ vpsllq ymm17,ymm5,2
+ vpaddq ymm16,ymm16,ymm4
+ vpaddq ymm17,ymm17,ymm5
+ vpsllq ymm16,ymm16,2
+ vpsllq ymm17,ymm17,2
+
+ jmp NEAR $L$mul_init_vpmadd52
+ ud2
+
+ALIGN 32
+$L$done_init_vpmadd52:
+ vinserti128 ymm4,ymm1,xmm4,1
+ vinserti128 ymm5,ymm2,xmm5,1
+ vinserti128 ymm3,ymm0,xmm3,1
+
+ vpermq ymm4,ymm4,216
+ vpermq ymm5,ymm5,216
+ vpermq ymm3,ymm3,216
+
+ vpsllq ymm16,ymm4,2
+ vpaddq ymm16,ymm16,ymm4
+ vpsllq ymm16,ymm16,2
+
+ vmovq xmm0,QWORD[rdi]
+ vmovq xmm1,QWORD[8+rdi]
+ vmovq xmm2,QWORD[16+rdi]
+
+ test rdx,3
+ jnz NEAR $L$done_init_vpmadd52_2x
+
+ vmovdqu64 YMMWORD[64+rdi],ymm3
+ vpbroadcastq ymm3,xmm3
+ vmovdqu64 YMMWORD[96+rdi],ymm4
+ vpbroadcastq ymm4,xmm4
+ vmovdqu64 YMMWORD[128+rdi],ymm5
+ vpbroadcastq ymm5,xmm5
+ vmovdqu64 YMMWORD[160+rdi],ymm16
+ vpbroadcastq ymm16,xmm16
+
+ jmp NEAR $L$blocks_vpmadd52_4x_key_loaded
+ ud2
+
+ALIGN 32
+$L$done_init_vpmadd52_2x:
+ vmovdqu64 YMMWORD[64+rdi],ymm3
+ vpsrldq ymm3,ymm3,8
+ vmovdqu64 YMMWORD[96+rdi],ymm4
+ vpsrldq ymm4,ymm4,8
+ vmovdqu64 YMMWORD[128+rdi],ymm5
+ vpsrldq ymm5,ymm5,8
+ vmovdqu64 YMMWORD[160+rdi],ymm16
+ vpsrldq ymm16,ymm16,8
+ jmp NEAR $L$blocks_vpmadd52_2x_key_loaded
+ ud2
+
+ALIGN 32
+$L$blocks_vpmadd52_2x_do:
+ vmovdqu64 ymm5{k1}{z},[((128+8))+rdi]
+ vmovdqu64 ymm16{k1}{z},[((160+8))+rdi]
+ vmovdqu64 ymm3{k1}{z},[((64+8))+rdi]
+ vmovdqu64 ymm4{k1}{z},[((96+8))+rdi]
+
+$L$blocks_vpmadd52_2x_key_loaded:
+ vmovdqu64 ymm26,YMMWORD[rsi]
+ vpxorq ymm27,ymm27,ymm27
+ lea rsi,[32+rsi]
+
+ vpunpcklqdq ymm25,ymm26,ymm27
+ vpunpckhqdq ymm27,ymm26,ymm27
+
+
+
+ vpsrlq ymm26,ymm27,24
+ vporq ymm26,ymm26,ymm31
+ vpaddq ymm2,ymm2,ymm26
+ vpandq ymm24,ymm25,ymm28
+ vpsrlq ymm25,ymm25,44
+ vpsllq ymm27,ymm27,20
+ vporq ymm25,ymm25,ymm27
+ vpandq ymm25,ymm25,ymm28
+
+ jmp NEAR $L$tail_vpmadd52_2x
+ ud2
+
+ALIGN 32
+$L$oop_vpmadd52_4x:
+
+ vpaddq ymm0,ymm0,ymm24
+ vpaddq ymm1,ymm1,ymm25
+
+ vpxorq ymm18,ymm18,ymm18
+ vpmadd52luq ymm18,ymm16,ymm2
+ vpxorq ymm19,ymm19,ymm19
+ vpmadd52huq ymm19,ymm16,ymm2
+ vpxorq ymm20,ymm20,ymm20
+ vpmadd52luq ymm20,ymm17,ymm2
+ vpxorq ymm21,ymm21,ymm21
+ vpmadd52huq ymm21,ymm17,ymm2
+ vpxorq ymm22,ymm22,ymm22
+ vpmadd52luq ymm22,ymm3,ymm2
+ vpxorq ymm23,ymm23,ymm23
+ vpmadd52huq ymm23,ymm3,ymm2
+
+ vmovdqu64 ymm26,YMMWORD[rsi]
+ vmovdqu64 ymm27,YMMWORD[32+rsi]
+ lea rsi,[64+rsi]
+ vpmadd52luq ymm18,ymm3,ymm0
+ vpmadd52huq ymm19,ymm3,ymm0
+ vpmadd52luq ymm20,ymm4,ymm0
+ vpmadd52huq ymm21,ymm4,ymm0
+ vpmadd52luq ymm22,ymm5,ymm0
+ vpmadd52huq ymm23,ymm5,ymm0
+
+ vpunpcklqdq ymm25,ymm26,ymm27
+ vpunpckhqdq ymm27,ymm26,ymm27
+ vpmadd52luq ymm18,ymm17,ymm1
+ vpmadd52huq ymm19,ymm17,ymm1
+ vpmadd52luq ymm20,ymm3,ymm1
+ vpmadd52huq ymm21,ymm3,ymm1
+ vpmadd52luq ymm22,ymm4,ymm1
+ vpmadd52huq ymm23,ymm4,ymm1
+
+
+
+ vpsrlq ymm30,ymm18,44
+ vpsllq ymm19,ymm19,8
+ vpandq ymm0,ymm18,ymm28
+ vpaddq ymm19,ymm19,ymm30
+
+ vpsrlq ymm26,ymm27,24
+ vporq ymm26,ymm26,ymm31
+ vpaddq ymm20,ymm20,ymm19
+
+ vpsrlq ymm30,ymm20,44
+ vpsllq ymm21,ymm21,8
+ vpandq ymm1,ymm20,ymm28
+ vpaddq ymm21,ymm21,ymm30
+
+ vpandq ymm24,ymm25,ymm28
+ vpsrlq ymm25,ymm25,44
+ vpsllq ymm27,ymm27,20
+ vpaddq ymm22,ymm22,ymm21
+
+ vpsrlq ymm30,ymm22,42
+ vpsllq ymm23,ymm23,10
+ vpandq ymm2,ymm22,ymm29
+ vpaddq ymm23,ymm23,ymm30
+
+ vpaddq ymm2,ymm2,ymm26
+ vpaddq ymm0,ymm0,ymm23
+ vpsllq ymm23,ymm23,2
+
+ vpaddq ymm0,ymm0,ymm23
+ vporq ymm25,ymm25,ymm27
+ vpandq ymm25,ymm25,ymm28
+
+ vpsrlq ymm30,ymm0,44
+ vpandq ymm0,ymm0,ymm28
+
+ vpaddq ymm1,ymm1,ymm30
+
+ sub rdx,4
+ jnz NEAR $L$oop_vpmadd52_4x
+
+$L$tail_vpmadd52_4x:
+ vmovdqu64 ymm5,YMMWORD[128+rdi]
+ vmovdqu64 ymm16,YMMWORD[160+rdi]
+ vmovdqu64 ymm3,YMMWORD[64+rdi]
+ vmovdqu64 ymm4,YMMWORD[96+rdi]
+
+$L$tail_vpmadd52_2x:
+ vpsllq ymm17,ymm5,2
+ vpaddq ymm17,ymm17,ymm5
+ vpsllq ymm17,ymm17,2
+
+
+ vpaddq ymm0,ymm0,ymm24
+ vpaddq ymm1,ymm1,ymm25
+
+ vpxorq ymm18,ymm18,ymm18
+ vpmadd52luq ymm18,ymm16,ymm2
+ vpxorq ymm19,ymm19,ymm19
+ vpmadd52huq ymm19,ymm16,ymm2
+ vpxorq ymm20,ymm20,ymm20
+ vpmadd52luq ymm20,ymm17,ymm2
+ vpxorq ymm21,ymm21,ymm21
+ vpmadd52huq ymm21,ymm17,ymm2
+ vpxorq ymm22,ymm22,ymm22
+ vpmadd52luq ymm22,ymm3,ymm2
+ vpxorq ymm23,ymm23,ymm23
+ vpmadd52huq ymm23,ymm3,ymm2
+
+ vpmadd52luq ymm18,ymm3,ymm0
+ vpmadd52huq ymm19,ymm3,ymm0
+ vpmadd52luq ymm20,ymm4,ymm0
+ vpmadd52huq ymm21,ymm4,ymm0
+ vpmadd52luq ymm22,ymm5,ymm0
+ vpmadd52huq ymm23,ymm5,ymm0
+
+ vpmadd52luq ymm18,ymm17,ymm1
+ vpmadd52huq ymm19,ymm17,ymm1
+ vpmadd52luq ymm20,ymm3,ymm1
+ vpmadd52huq ymm21,ymm3,ymm1
+ vpmadd52luq ymm22,ymm4,ymm1
+ vpmadd52huq ymm23,ymm4,ymm1
+
+
+
+
+ mov eax,1
+ kmovw k1,eax
+ vpsrldq ymm24,ymm18,8
+ vpsrldq ymm0,ymm19,8
+ vpsrldq ymm25,ymm20,8
+ vpsrldq ymm1,ymm21,8
+ vpaddq ymm18,ymm18,ymm24
+ vpaddq ymm19,ymm19,ymm0
+ vpsrldq ymm26,ymm22,8
+ vpsrldq ymm2,ymm23,8
+ vpaddq ymm20,ymm20,ymm25
+ vpaddq ymm21,ymm21,ymm1
+ vpermq ymm24,ymm18,0x2
+ vpermq ymm0,ymm19,0x2
+ vpaddq ymm22,ymm22,ymm26
+ vpaddq ymm23,ymm23,ymm2
+
+ vpermq ymm25,ymm20,0x2
+ vpermq ymm1,ymm21,0x2
+ vpaddq ymm18{k1}{z},ymm18,ymm24
+ vpaddq ymm19{k1}{z},ymm19,ymm0
+ vpermq ymm26,ymm22,0x2
+ vpermq ymm2,ymm23,0x2
+ vpaddq ymm20{k1}{z},ymm20,ymm25
+ vpaddq ymm21{k1}{z},ymm21,ymm1
+ vpaddq ymm22{k1}{z},ymm22,ymm26
+ vpaddq ymm23{k1}{z},ymm23,ymm2
+
+
+
+ vpsrlq ymm30,ymm18,44
+ vpsllq ymm19,ymm19,8
+ vpandq ymm0,ymm18,ymm28
+ vpaddq ymm19,ymm19,ymm30
+
+ vpaddq ymm20,ymm20,ymm19
+
+ vpsrlq ymm30,ymm20,44
+ vpsllq ymm21,ymm21,8
+ vpandq ymm1,ymm20,ymm28
+ vpaddq ymm21,ymm21,ymm30
+
+ vpaddq ymm22,ymm22,ymm21
+
+ vpsrlq ymm30,ymm22,42
+ vpsllq ymm23,ymm23,10
+ vpandq ymm2,ymm22,ymm29
+ vpaddq ymm23,ymm23,ymm30
+
+ vpaddq ymm0,ymm0,ymm23
+ vpsllq ymm23,ymm23,2
+
+ vpaddq ymm0,ymm0,ymm23
+
+ vpsrlq ymm30,ymm0,44
+ vpandq ymm0,ymm0,ymm28
+
+ vpaddq ymm1,ymm1,ymm30
+
+
+ sub rdx,2
+ ja NEAR $L$blocks_vpmadd52_4x_do
+
+ vmovq QWORD[rdi],xmm0
+ vmovq QWORD[8+rdi],xmm1
+ vmovq QWORD[16+rdi],xmm2
+ vzeroall
+
+$L$no_data_vpmadd52_4x:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_poly1305_blocks_vpmadd52_4x:
+
+ALIGN 32
+poly1305_blocks_vpmadd52_8x:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_poly1305_blocks_vpmadd52_8x:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+
+
+ shr rdx,4
+ jz NEAR $L$no_data_vpmadd52_8x
+
+ shl rcx,40
+ mov r8,QWORD[64+rdi]
+
+ vmovdqa64 ymm28,YMMWORD[$L$x_mask44]
+ vmovdqa64 ymm29,YMMWORD[$L$x_mask42]
+
+ test r8,r8
+ js NEAR $L$init_vpmadd52
+
+ vmovq xmm0,QWORD[rdi]
+ vmovq xmm1,QWORD[8+rdi]
+ vmovq xmm2,QWORD[16+rdi]
+
+$L$blocks_vpmadd52_8x:
+
+
+
+ vmovdqu64 ymm5,YMMWORD[128+rdi]
+ vmovdqu64 ymm16,YMMWORD[160+rdi]
+ vmovdqu64 ymm3,YMMWORD[64+rdi]
+ vmovdqu64 ymm4,YMMWORD[96+rdi]
+
+ vpsllq ymm17,ymm5,2
+ vpaddq ymm17,ymm17,ymm5
+ vpsllq ymm17,ymm17,2
+
+ vpbroadcastq ymm8,xmm5
+ vpbroadcastq ymm6,xmm3
+ vpbroadcastq ymm7,xmm4
+
+ vpxorq ymm18,ymm18,ymm18
+ vpmadd52luq ymm18,ymm16,ymm8
+ vpxorq ymm19,ymm19,ymm19
+ vpmadd52huq ymm19,ymm16,ymm8
+ vpxorq ymm20,ymm20,ymm20
+ vpmadd52luq ymm20,ymm17,ymm8
+ vpxorq ymm21,ymm21,ymm21
+ vpmadd52huq ymm21,ymm17,ymm8
+ vpxorq ymm22,ymm22,ymm22
+ vpmadd52luq ymm22,ymm3,ymm8
+ vpxorq ymm23,ymm23,ymm23
+ vpmadd52huq ymm23,ymm3,ymm8
+
+ vpmadd52luq ymm18,ymm3,ymm6
+ vpmadd52huq ymm19,ymm3,ymm6
+ vpmadd52luq ymm20,ymm4,ymm6
+ vpmadd52huq ymm21,ymm4,ymm6
+ vpmadd52luq ymm22,ymm5,ymm6
+ vpmadd52huq ymm23,ymm5,ymm6
+
+ vpmadd52luq ymm18,ymm17,ymm7
+ vpmadd52huq ymm19,ymm17,ymm7
+ vpmadd52luq ymm20,ymm3,ymm7
+ vpmadd52huq ymm21,ymm3,ymm7
+ vpmadd52luq ymm22,ymm4,ymm7
+ vpmadd52huq ymm23,ymm4,ymm7
+
+
+
+ vpsrlq ymm30,ymm18,44
+ vpsllq ymm19,ymm19,8
+ vpandq ymm6,ymm18,ymm28
+ vpaddq ymm19,ymm19,ymm30
+
+ vpaddq ymm20,ymm20,ymm19
+
+ vpsrlq ymm30,ymm20,44
+ vpsllq ymm21,ymm21,8
+ vpandq ymm7,ymm20,ymm28
+ vpaddq ymm21,ymm21,ymm30
+
+ vpaddq ymm22,ymm22,ymm21
+
+ vpsrlq ymm30,ymm22,42
+ vpsllq ymm23,ymm23,10
+ vpandq ymm8,ymm22,ymm29
+ vpaddq ymm23,ymm23,ymm30
+
+ vpaddq ymm6,ymm6,ymm23
+ vpsllq ymm23,ymm23,2
+
+ vpaddq ymm6,ymm6,ymm23
+
+ vpsrlq ymm30,ymm6,44
+ vpandq ymm6,ymm6,ymm28
+
+ vpaddq ymm7,ymm7,ymm30
+
+
+
+
+
+ vpunpcklqdq ymm26,ymm8,ymm5
+ vpunpckhqdq ymm5,ymm8,ymm5
+ vpunpcklqdq ymm24,ymm6,ymm3
+ vpunpckhqdq ymm3,ymm6,ymm3
+ vpunpcklqdq ymm25,ymm7,ymm4
+ vpunpckhqdq ymm4,ymm7,ymm4
+ vshufi64x2 zmm8,zmm26,zmm5,0x44
+ vshufi64x2 zmm6,zmm24,zmm3,0x44
+ vshufi64x2 zmm7,zmm25,zmm4,0x44
+
+ vmovdqu64 zmm26,ZMMWORD[rsi]
+ vmovdqu64 zmm27,ZMMWORD[64+rsi]
+ lea rsi,[128+rsi]
+
+ vpsllq zmm10,zmm8,2
+ vpsllq zmm9,zmm7,2
+ vpaddq zmm10,zmm10,zmm8
+ vpaddq zmm9,zmm9,zmm7
+ vpsllq zmm10,zmm10,2
+ vpsllq zmm9,zmm9,2
+
+ vpbroadcastq zmm31,rcx
+ vpbroadcastq zmm28,xmm28
+ vpbroadcastq zmm29,xmm29
+
+ vpbroadcastq zmm16,xmm9
+ vpbroadcastq zmm17,xmm10
+ vpbroadcastq zmm3,xmm6
+ vpbroadcastq zmm4,xmm7
+ vpbroadcastq zmm5,xmm8
+
+ vpunpcklqdq zmm25,zmm26,zmm27
+ vpunpckhqdq zmm27,zmm26,zmm27
+
+
+
+ vpsrlq zmm26,zmm27,24
+ vporq zmm26,zmm26,zmm31
+ vpaddq zmm2,zmm2,zmm26
+ vpandq zmm24,zmm25,zmm28
+ vpsrlq zmm25,zmm25,44
+ vpsllq zmm27,zmm27,20
+ vporq zmm25,zmm25,zmm27
+ vpandq zmm25,zmm25,zmm28
+
+ sub rdx,8
+ jz NEAR $L$tail_vpmadd52_8x
+ jmp NEAR $L$oop_vpmadd52_8x
+
+ALIGN 32
+$L$oop_vpmadd52_8x:
+
+ vpaddq zmm0,zmm0,zmm24
+ vpaddq zmm1,zmm1,zmm25
+
+ vpxorq zmm18,zmm18,zmm18
+ vpmadd52luq zmm18,zmm16,zmm2
+ vpxorq zmm19,zmm19,zmm19
+ vpmadd52huq zmm19,zmm16,zmm2
+ vpxorq zmm20,zmm20,zmm20
+ vpmadd52luq zmm20,zmm17,zmm2
+ vpxorq zmm21,zmm21,zmm21
+ vpmadd52huq zmm21,zmm17,zmm2
+ vpxorq zmm22,zmm22,zmm22
+ vpmadd52luq zmm22,zmm3,zmm2
+ vpxorq zmm23,zmm23,zmm23
+ vpmadd52huq zmm23,zmm3,zmm2
+
+ vmovdqu64 zmm26,ZMMWORD[rsi]
+ vmovdqu64 zmm27,ZMMWORD[64+rsi]
+ lea rsi,[128+rsi]
+ vpmadd52luq zmm18,zmm3,zmm0
+ vpmadd52huq zmm19,zmm3,zmm0
+ vpmadd52luq zmm20,zmm4,zmm0
+ vpmadd52huq zmm21,zmm4,zmm0
+ vpmadd52luq zmm22,zmm5,zmm0
+ vpmadd52huq zmm23,zmm5,zmm0
+
+ vpunpcklqdq zmm25,zmm26,zmm27
+ vpunpckhqdq zmm27,zmm26,zmm27
+ vpmadd52luq zmm18,zmm17,zmm1
+ vpmadd52huq zmm19,zmm17,zmm1
+ vpmadd52luq zmm20,zmm3,zmm1
+ vpmadd52huq zmm21,zmm3,zmm1
+ vpmadd52luq zmm22,zmm4,zmm1
+ vpmadd52huq zmm23,zmm4,zmm1
+
+
+
+ vpsrlq zmm30,zmm18,44
+ vpsllq zmm19,zmm19,8
+ vpandq zmm0,zmm18,zmm28
+ vpaddq zmm19,zmm19,zmm30
+
+ vpsrlq zmm26,zmm27,24
+ vporq zmm26,zmm26,zmm31
+ vpaddq zmm20,zmm20,zmm19
+
+ vpsrlq zmm30,zmm20,44
+ vpsllq zmm21,zmm21,8
+ vpandq zmm1,zmm20,zmm28
+ vpaddq zmm21,zmm21,zmm30
+
+ vpandq zmm24,zmm25,zmm28
+ vpsrlq zmm25,zmm25,44
+ vpsllq zmm27,zmm27,20
+ vpaddq zmm22,zmm22,zmm21
+
+ vpsrlq zmm30,zmm22,42
+ vpsllq zmm23,zmm23,10
+ vpandq zmm2,zmm22,zmm29
+ vpaddq zmm23,zmm23,zmm30
+
+ vpaddq zmm2,zmm2,zmm26
+ vpaddq zmm0,zmm0,zmm23
+ vpsllq zmm23,zmm23,2
+
+ vpaddq zmm0,zmm0,zmm23
+ vporq zmm25,zmm25,zmm27
+ vpandq zmm25,zmm25,zmm28
+
+ vpsrlq zmm30,zmm0,44
+ vpandq zmm0,zmm0,zmm28
+
+ vpaddq zmm1,zmm1,zmm30
+
+ sub rdx,8
+ jnz NEAR $L$oop_vpmadd52_8x
+
+$L$tail_vpmadd52_8x:
+
+ vpaddq zmm0,zmm0,zmm24
+ vpaddq zmm1,zmm1,zmm25
+
+ vpxorq zmm18,zmm18,zmm18
+ vpmadd52luq zmm18,zmm9,zmm2
+ vpxorq zmm19,zmm19,zmm19
+ vpmadd52huq zmm19,zmm9,zmm2
+ vpxorq zmm20,zmm20,zmm20
+ vpmadd52luq zmm20,zmm10,zmm2
+ vpxorq zmm21,zmm21,zmm21
+ vpmadd52huq zmm21,zmm10,zmm2
+ vpxorq zmm22,zmm22,zmm22
+ vpmadd52luq zmm22,zmm6,zmm2
+ vpxorq zmm23,zmm23,zmm23
+ vpmadd52huq zmm23,zmm6,zmm2
+
+ vpmadd52luq zmm18,zmm6,zmm0
+ vpmadd52huq zmm19,zmm6,zmm0
+ vpmadd52luq zmm20,zmm7,zmm0
+ vpmadd52huq zmm21,zmm7,zmm0
+ vpmadd52luq zmm22,zmm8,zmm0
+ vpmadd52huq zmm23,zmm8,zmm0
+
+ vpmadd52luq zmm18,zmm10,zmm1
+ vpmadd52huq zmm19,zmm10,zmm1
+ vpmadd52luq zmm20,zmm6,zmm1
+ vpmadd52huq zmm21,zmm6,zmm1
+ vpmadd52luq zmm22,zmm7,zmm1
+ vpmadd52huq zmm23,zmm7,zmm1
+
+
+
+
+ mov eax,1
+ kmovw k1,eax
+ vpsrldq zmm24,zmm18,8
+ vpsrldq zmm0,zmm19,8
+ vpsrldq zmm25,zmm20,8
+ vpsrldq zmm1,zmm21,8
+ vpaddq zmm18,zmm18,zmm24
+ vpaddq zmm19,zmm19,zmm0
+ vpsrldq zmm26,zmm22,8
+ vpsrldq zmm2,zmm23,8
+ vpaddq zmm20,zmm20,zmm25
+ vpaddq zmm21,zmm21,zmm1
+ vpermq zmm24,zmm18,0x2
+ vpermq zmm0,zmm19,0x2
+ vpaddq zmm22,zmm22,zmm26
+ vpaddq zmm23,zmm23,zmm2
+
+ vpermq zmm25,zmm20,0x2
+ vpermq zmm1,zmm21,0x2
+ vpaddq zmm18,zmm18,zmm24
+ vpaddq zmm19,zmm19,zmm0
+ vpermq zmm26,zmm22,0x2
+ vpermq zmm2,zmm23,0x2
+ vpaddq zmm20,zmm20,zmm25
+ vpaddq zmm21,zmm21,zmm1
+ vextracti64x4 ymm24,zmm18,1
+ vextracti64x4 ymm0,zmm19,1
+ vpaddq zmm22,zmm22,zmm26
+ vpaddq zmm23,zmm23,zmm2
+
+ vextracti64x4 ymm25,zmm20,1
+ vextracti64x4 ymm1,zmm21,1
+ vextracti64x4 ymm26,zmm22,1
+ vextracti64x4 ymm2,zmm23,1
+ vpaddq ymm18{k1}{z},ymm18,ymm24
+ vpaddq ymm19{k1}{z},ymm19,ymm0
+ vpaddq ymm20{k1}{z},ymm20,ymm25
+ vpaddq ymm21{k1}{z},ymm21,ymm1
+ vpaddq ymm22{k1}{z},ymm22,ymm26
+ vpaddq ymm23{k1}{z},ymm23,ymm2
+
+
+
+ vpsrlq ymm30,ymm18,44
+ vpsllq ymm19,ymm19,8
+ vpandq ymm0,ymm18,ymm28
+ vpaddq ymm19,ymm19,ymm30
+
+ vpaddq ymm20,ymm20,ymm19
+
+ vpsrlq ymm30,ymm20,44
+ vpsllq ymm21,ymm21,8
+ vpandq ymm1,ymm20,ymm28
+ vpaddq ymm21,ymm21,ymm30
+
+ vpaddq ymm22,ymm22,ymm21
+
+ vpsrlq ymm30,ymm22,42
+ vpsllq ymm23,ymm23,10
+ vpandq ymm2,ymm22,ymm29
+ vpaddq ymm23,ymm23,ymm30
+
+ vpaddq ymm0,ymm0,ymm23
+ vpsllq ymm23,ymm23,2
+
+ vpaddq ymm0,ymm0,ymm23
+
+ vpsrlq ymm30,ymm0,44
+ vpandq ymm0,ymm0,ymm28
+
+ vpaddq ymm1,ymm1,ymm30
+
+
+
+ vmovq QWORD[rdi],xmm0
+ vmovq QWORD[8+rdi],xmm1
+ vmovq QWORD[16+rdi],xmm2
+ vzeroall
+
+$L$no_data_vpmadd52_8x:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_poly1305_blocks_vpmadd52_8x:
+
+ALIGN 32
+poly1305_emit_base2_44:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_poly1305_emit_base2_44:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+ mov r8,QWORD[rdi]
+ mov r9,QWORD[8+rdi]
+ mov r10,QWORD[16+rdi]
+
+ mov rax,r9
+ shr r9,20
+ shl rax,44
+ mov rcx,r10
+ shr r10,40
+ shl rcx,24
+
+ add r8,rax
+ adc r9,rcx
+ adc r10,0
+
+ mov rax,r8
+ add r8,5
+ mov rcx,r9
+ adc r9,0
+ adc r10,0
+ shr r10,2
+ cmovnz rax,r8
+ cmovnz rcx,r9
+
+ add rax,QWORD[rdx]
+ adc rcx,QWORD[8+rdx]
+ mov QWORD[rsi],rax
+ mov QWORD[8+rsi],rcx
+
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_poly1305_emit_base2_44:
ALIGN 64
$L$const:
$L$mask24:
@@ -1944,13 +3614,131 @@ $L$129:
DD 16777216,0,16777216,0,16777216,0,16777216,0
$L$mask26:
DD 0x3ffffff,0,0x3ffffff,0,0x3ffffff,0,0x3ffffff,0
-$L$five:
- DD 5,0,5,0,5,0,5,0
+$L$permd_avx2:
+ DD 2,2,2,3,2,0,2,1
+$L$permd_avx512:
+ DD 0,0,0,1,0,2,0,3,0,4,0,5,0,6,0,7
+
+$L$2_44_inp_permd:
+ DD 0,1,1,2,2,3,7,7
+$L$2_44_inp_shift:
+ DQ 0,12,24,64
+$L$2_44_mask:
+ DQ 0xfffffffffff,0xfffffffffff,0x3ffffffffff,0xffffffffffffffff
+$L$2_44_shift_rgt:
+ DQ 44,44,42,64
+$L$2_44_shift_lft:
+ DQ 8,8,10,64
+
+ALIGN 64
+$L$x_mask44:
+ DQ 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff
+ DQ 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff
+$L$x_mask42:
+ DQ 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff
+ DQ 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff
DB 80,111,108,121,49,51,48,53,32,102,111,114,32,120,56,54
DB 95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32
DB 98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115
DB 108,46,111,114,103,62,0
ALIGN 16
+global xor128_encrypt_n_pad
+
+ALIGN 16
+xor128_encrypt_n_pad:
+ sub rdx,r8
+ sub rcx,r8
+ mov r10,r9
+ shr r9,4
+ jz NEAR $L$tail_enc
+ nop
+$L$oop_enc_xmm:
+ movdqu xmm0,XMMWORD[r8*1+rdx]
+ pxor xmm0,XMMWORD[r8]
+ movdqu XMMWORD[r8*1+rcx],xmm0
+ movdqa XMMWORD[r8],xmm0
+ lea r8,[16+r8]
+ dec r9
+ jnz NEAR $L$oop_enc_xmm
+
+ and r10,15
+ jz NEAR $L$done_enc
+
+$L$tail_enc:
+ mov r9,16
+ sub r9,r10
+ xor eax,eax
+$L$oop_enc_byte:
+ mov al,BYTE[r8*1+rdx]
+ xor al,BYTE[r8]
+ mov BYTE[r8*1+rcx],al
+ mov BYTE[r8],al
+ lea r8,[1+r8]
+ dec r10
+ jnz NEAR $L$oop_enc_byte
+
+ xor eax,eax
+$L$oop_enc_pad:
+ mov BYTE[r8],al
+ lea r8,[1+r8]
+ dec r9
+ jnz NEAR $L$oop_enc_pad
+
+$L$done_enc:
+ mov rax,r8
+ DB 0F3h,0C3h ;repret
+
+
+global xor128_decrypt_n_pad
+
+ALIGN 16
+xor128_decrypt_n_pad:
+ sub rdx,r8
+ sub rcx,r8
+ mov r10,r9
+ shr r9,4
+ jz NEAR $L$tail_dec
+ nop
+$L$oop_dec_xmm:
+ movdqu xmm0,XMMWORD[r8*1+rdx]
+ movdqa xmm1,XMMWORD[r8]
+ pxor xmm1,xmm0
+ movdqu XMMWORD[r8*1+rcx],xmm1
+ movdqa XMMWORD[r8],xmm0
+ lea r8,[16+r8]
+ dec r9
+ jnz NEAR $L$oop_dec_xmm
+
+ pxor xmm1,xmm1
+ and r10,15
+ jz NEAR $L$done_dec
+
+$L$tail_dec:
+ mov r9,16
+ sub r9,r10
+ xor eax,eax
+ xor r11,r11
+$L$oop_dec_byte:
+ mov r11b,BYTE[r8*1+rdx]
+ mov al,BYTE[r8]
+ xor al,r11b
+ mov BYTE[r8*1+rcx],al
+ mov BYTE[r8],r11b
+ lea r8,[1+r8]
+ dec r10
+ jnz NEAR $L$oop_dec_byte
+
+ xor eax,eax
+$L$oop_dec_pad:
+ mov BYTE[r8],al
+ lea r8,[1+r8]
+ dec r9
+ jnz NEAR $L$oop_dec_pad
+
+$L$done_dec:
+ mov rax,r8
+ DB 0F3h,0C3h ;repret
+
EXTERN __imp_RtlVirtualUnwind
ALIGN 16
@@ -2121,6 +3909,9 @@ ALIGN 4
DD $L$even_avx2 wrt ..imagebase
DD $L$SEH_end_poly1305_blocks_avx2 wrt ..imagebase
DD $L$SEH_info_poly1305_blocks_avx2_3 wrt ..imagebase
+ DD $L$SEH_begin_poly1305_blocks_avx512 wrt ..imagebase
+ DD $L$SEH_end_poly1305_blocks_avx512 wrt ..imagebase
+ DD $L$SEH_info_poly1305_blocks_avx512 wrt ..imagebase
section .xdata rdata align=8
ALIGN 8
$L$SEH_info_poly1305_init:
@@ -2170,3 +3961,7 @@ $L$SEH_info_poly1305_blocks_avx2_3:
DB 9,0,0,0
DD avx_handler wrt ..imagebase
DD $L$do_avx2_body wrt ..imagebase,$L$do_avx2_epilogue wrt ..imagebase
+$L$SEH_info_poly1305_blocks_avx512:
+DB 9,0,0,0
+ DD avx_handler wrt ..imagebase
+ DD $L$do_avx512_body wrt ..imagebase,$L$do_avx512_epilogue wrt ..imagebase
diff --git a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/rc4/rc4-md5-x86_64.asm b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/rc4/rc4-md5-x86_64.asm
index f1ea9652d9..5e42fe63df 100644
--- a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/rc4/rc4-md5-x86_64.asm
+++ b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/rc4/rc4-md5-x86_64.asm
@@ -21,15 +21,23 @@ $L$SEH_begin_rc4_md5_enc:
mov r9,QWORD[48+rsp]
+
cmp r9,0
je NEAR $L$abort
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
sub rsp,40
+
$L$body:
mov r11,rcx
mov r12,r9
@@ -1264,17 +1272,25 @@ $L$oop:
mov DWORD[((-4))+rdi],ecx
mov r15,QWORD[40+rsp]
+
mov r14,QWORD[48+rsp]
+
mov r13,QWORD[56+rsp]
+
mov r12,QWORD[64+rsp]
+
mov rbp,QWORD[72+rsp]
+
mov rbx,QWORD[80+rsp]
+
lea rsp,[88+rsp]
+
$L$epilogue:
$L$abort:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_rc4_md5_enc:
EXTERN __imp_RtlVirtualUnwind
diff --git a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/rc4/rc4-x86_64.asm b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/rc4/rc4-x86_64.asm
index 9c042ec082..5732b40ed6 100644
--- a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/rc4/rc4-x86_64.asm
+++ b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/rc4/rc4-x86_64.asm
@@ -25,9 +25,13 @@ $L$SEH_begin_RC4:
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
$L$entry:
+
push rbx
+
push r12
+
push r13
+
$L$prologue:
mov r11,rsi
mov r12,rdx
@@ -528,13 +532,18 @@ $L$exit:
mov DWORD[((-4))+rdi],ecx
mov r13,QWORD[rsp]
+
mov r12,QWORD[8+rsp]
+
mov rbx,QWORD[16+rsp]
+
add rsp,24
+
$L$epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_RC4:
global RC4_set_key
diff --git a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/sha/keccak1600-x86_64.asm b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/sha/keccak1600-x86_64.asm
new file mode 100644
index 0000000000..d0471cb3b3
--- /dev/null
+++ b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/sha/keccak1600-x86_64.asm
@@ -0,0 +1,525 @@
+default rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+section .text code align=64
+
+
+
+ALIGN 32
+__KeccakF1600:
+ mov rax,QWORD[60+rdi]
+ mov rbx,QWORD[68+rdi]
+ mov rcx,QWORD[76+rdi]
+ mov rdx,QWORD[84+rdi]
+ mov rbp,QWORD[92+rdi]
+ jmp NEAR $L$oop
+
+ALIGN 32
+$L$oop:
+ mov r8,QWORD[((-100))+rdi]
+ mov r9,QWORD[((-52))+rdi]
+ mov r10,QWORD[((-4))+rdi]
+ mov r11,QWORD[44+rdi]
+
+ xor rcx,QWORD[((-84))+rdi]
+ xor rdx,QWORD[((-76))+rdi]
+ xor rax,r8
+ xor rbx,QWORD[((-92))+rdi]
+ xor rcx,QWORD[((-44))+rdi]
+ xor rax,QWORD[((-60))+rdi]
+ mov r12,rbp
+ xor rbp,QWORD[((-68))+rdi]
+
+ xor rcx,r10
+ xor rax,QWORD[((-20))+rdi]
+ xor rdx,QWORD[((-36))+rdi]
+ xor rbx,r9
+ xor rbp,QWORD[((-28))+rdi]
+
+ xor rcx,QWORD[36+rdi]
+ xor rax,QWORD[20+rdi]
+ xor rdx,QWORD[4+rdi]
+ xor rbx,QWORD[((-12))+rdi]
+ xor rbp,QWORD[12+rdi]
+
+ mov r13,rcx
+ rol rcx,1
+ xor rcx,rax
+ xor rdx,r11
+
+ rol rax,1
+ xor rax,rdx
+ xor rbx,QWORD[28+rdi]
+
+ rol rdx,1
+ xor rdx,rbx
+ xor rbp,QWORD[52+rdi]
+
+ rol rbx,1
+ xor rbx,rbp
+
+ rol rbp,1
+ xor rbp,r13
+ xor r9,rcx
+ xor r10,rdx
+ rol r9,44
+ xor r11,rbp
+ xor r12,rax
+ rol r10,43
+ xor r8,rbx
+ mov r13,r9
+ rol r11,21
+ or r9,r10
+ xor r9,r8
+ rol r12,14
+
+ xor r9,QWORD[r15]
+ lea r15,[8+r15]
+
+ mov r14,r12
+ and r12,r11
+ mov QWORD[((-100))+rsi],r9
+ xor r12,r10
+ not r10
+ mov QWORD[((-84))+rsi],r12
+
+ or r10,r11
+ mov r12,QWORD[76+rdi]
+ xor r10,r13
+ mov QWORD[((-92))+rsi],r10
+
+ and r13,r8
+ mov r9,QWORD[((-28))+rdi]
+ xor r13,r14
+ mov r10,QWORD[((-20))+rdi]
+ mov QWORD[((-68))+rsi],r13
+
+ or r14,r8
+ mov r8,QWORD[((-76))+rdi]
+ xor r14,r11
+ mov r11,QWORD[28+rdi]
+ mov QWORD[((-76))+rsi],r14
+
+
+ xor r8,rbp
+ xor r12,rdx
+ rol r8,28
+ xor r11,rcx
+ xor r9,rax
+ rol r12,61
+ rol r11,45
+ xor r10,rbx
+ rol r9,20
+ mov r13,r8
+ or r8,r12
+ rol r10,3
+
+ xor r8,r11
+ mov QWORD[((-36))+rsi],r8
+
+ mov r14,r9
+ and r9,r13
+ mov r8,QWORD[((-92))+rdi]
+ xor r9,r12
+ not r12
+ mov QWORD[((-28))+rsi],r9
+
+ or r12,r11
+ mov r9,QWORD[((-44))+rdi]
+ xor r12,r10
+ mov QWORD[((-44))+rsi],r12
+
+ and r11,r10
+ mov r12,QWORD[60+rdi]
+ xor r11,r14
+ mov QWORD[((-52))+rsi],r11
+
+ or r14,r10
+ mov r10,QWORD[4+rdi]
+ xor r14,r13
+ mov r11,QWORD[52+rdi]
+ mov QWORD[((-60))+rsi],r14
+
+
+ xor r10,rbp
+ xor r11,rax
+ rol r10,25
+ xor r9,rdx
+ rol r11,8
+ xor r12,rbx
+ rol r9,6
+ xor r8,rcx
+ rol r12,18
+ mov r13,r10
+ and r10,r11
+ rol r8,1
+
+ not r11
+ xor r10,r9
+ mov QWORD[((-12))+rsi],r10
+
+ mov r14,r12
+ and r12,r11
+ mov r10,QWORD[((-12))+rdi]
+ xor r12,r13
+ mov QWORD[((-4))+rsi],r12
+
+ or r13,r9
+ mov r12,QWORD[84+rdi]
+ xor r13,r8
+ mov QWORD[((-20))+rsi],r13
+
+ and r9,r8
+ xor r9,r14
+ mov QWORD[12+rsi],r9
+
+ or r14,r8
+ mov r9,QWORD[((-60))+rdi]
+ xor r14,r11
+ mov r11,QWORD[36+rdi]
+ mov QWORD[4+rsi],r14
+
+
+ mov r8,QWORD[((-68))+rdi]
+
+ xor r10,rcx
+ xor r11,rdx
+ rol r10,10
+ xor r9,rbx
+ rol r11,15
+ xor r12,rbp
+ rol r9,36
+ xor r8,rax
+ rol r12,56
+ mov r13,r10
+ or r10,r11
+ rol r8,27
+
+ not r11
+ xor r10,r9
+ mov QWORD[28+rsi],r10
+
+ mov r14,r12
+ or r12,r11
+ xor r12,r13
+ mov QWORD[36+rsi],r12
+
+ and r13,r9
+ xor r13,r8
+ mov QWORD[20+rsi],r13
+
+ or r9,r8
+ xor r9,r14
+ mov QWORD[52+rsi],r9
+
+ and r8,r14
+ xor r8,r11
+ mov QWORD[44+rsi],r8
+
+
+ xor rdx,QWORD[((-84))+rdi]
+ xor rbp,QWORD[((-36))+rdi]
+ rol rdx,62
+ xor rcx,QWORD[68+rdi]
+ rol rbp,55
+ xor rax,QWORD[12+rdi]
+ rol rcx,2
+ xor rbx,QWORD[20+rdi]
+ xchg rdi,rsi
+ rol rax,39
+ rol rbx,41
+ mov r13,rdx
+ and rdx,rbp
+ not rbp
+ xor rdx,rcx
+ mov QWORD[92+rdi],rdx
+
+ mov r14,rax
+ and rax,rbp
+ xor rax,r13
+ mov QWORD[60+rdi],rax
+
+ or r13,rcx
+ xor r13,rbx
+ mov QWORD[84+rdi],r13
+
+ and rcx,rbx
+ xor rcx,r14
+ mov QWORD[76+rdi],rcx
+
+ or rbx,r14
+ xor rbx,rbp
+ mov QWORD[68+rdi],rbx
+
+ mov rbp,rdx
+ mov rdx,r13
+
+ test r15,255
+ jnz NEAR $L$oop
+
+ lea r15,[((-192))+r15]
+ DB 0F3h,0C3h ;repret
+
+
+
+ALIGN 32
+KeccakF1600:
+
+ push rbx
+
+ push rbp
+
+ push r12
+
+ push r13
+
+ push r14
+
+ push r15
+
+
+ lea rdi,[100+rdi]
+ sub rsp,200
+
+
+ not QWORD[((-92))+rdi]
+ not QWORD[((-84))+rdi]
+ not QWORD[((-36))+rdi]
+ not QWORD[((-4))+rdi]
+ not QWORD[36+rdi]
+ not QWORD[60+rdi]
+
+ lea r15,[iotas]
+ lea rsi,[100+rsp]
+
+ call __KeccakF1600
+
+ not QWORD[((-92))+rdi]
+ not QWORD[((-84))+rdi]
+ not QWORD[((-36))+rdi]
+ not QWORD[((-4))+rdi]
+ not QWORD[36+rdi]
+ not QWORD[60+rdi]
+ lea rdi,[((-100))+rdi]
+
+ add rsp,200
+
+
+ pop r15
+
+ pop r14
+
+ pop r13
+
+ pop r12
+
+ pop rbp
+
+ pop rbx
+
+ DB 0F3h,0C3h ;repret
+
+
+global SHA3_absorb
+
+ALIGN 32
+SHA3_absorb:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_SHA3_absorb:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+
+
+
+ push rbx
+
+ push rbp
+
+ push r12
+
+ push r13
+
+ push r14
+
+ push r15
+
+
+ lea rdi,[100+rdi]
+ sub rsp,232
+
+
+ mov r9,rsi
+ lea rsi,[100+rsp]
+
+ not QWORD[((-92))+rdi]
+ not QWORD[((-84))+rdi]
+ not QWORD[((-36))+rdi]
+ not QWORD[((-4))+rdi]
+ not QWORD[36+rdi]
+ not QWORD[60+rdi]
+ lea r15,[iotas]
+
+ mov QWORD[((216-100))+rsi],rcx
+
+$L$oop_absorb:
+ cmp rdx,rcx
+ jc NEAR $L$done_absorb
+
+ shr rcx,3
+ lea r8,[((-100))+rdi]
+
+$L$block_absorb:
+ mov rax,QWORD[r9]
+ lea r9,[8+r9]
+ xor rax,QWORD[r8]
+ lea r8,[8+r8]
+ sub rdx,8
+ mov QWORD[((-8))+r8],rax
+ sub rcx,1
+ jnz NEAR $L$block_absorb
+
+ mov QWORD[((200-100))+rsi],r9
+ mov QWORD[((208-100))+rsi],rdx
+ call __KeccakF1600
+ mov r9,QWORD[((200-100))+rsi]
+ mov rdx,QWORD[((208-100))+rsi]
+ mov rcx,QWORD[((216-100))+rsi]
+ jmp NEAR $L$oop_absorb
+
+ALIGN 32
+$L$done_absorb:
+ mov rax,rdx
+
+ not QWORD[((-92))+rdi]
+ not QWORD[((-84))+rdi]
+ not QWORD[((-36))+rdi]
+ not QWORD[((-4))+rdi]
+ not QWORD[36+rdi]
+ not QWORD[60+rdi]
+
+ add rsp,232
+
+
+ pop r15
+
+ pop r14
+
+ pop r13
+
+ pop r12
+
+ pop rbp
+
+ pop rbx
+
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_SHA3_absorb:
+global SHA3_squeeze
+
+ALIGN 32
+SHA3_squeeze:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_SHA3_squeeze:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+
+
+
+ push r12
+
+ push r13
+
+ push r14
+
+
+ shr rcx,3
+ mov r8,rdi
+ mov r12,rsi
+ mov r13,rdx
+ mov r14,rcx
+ jmp NEAR $L$oop_squeeze
+
+ALIGN 32
+$L$oop_squeeze:
+ cmp r13,8
+ jb NEAR $L$tail_squeeze
+
+ mov rax,QWORD[r8]
+ lea r8,[8+r8]
+ mov QWORD[r12],rax
+ lea r12,[8+r12]
+ sub r13,8
+ jz NEAR $L$done_squeeze
+
+ sub rcx,1
+ jnz NEAR $L$oop_squeeze
+
+ call KeccakF1600
+ mov r8,rdi
+ mov rcx,r14
+ jmp NEAR $L$oop_squeeze
+
+$L$tail_squeeze:
+ mov rsi,r8
+ mov rdi,r12
+ mov rcx,r13
+DB 0xf3,0xa4
+
+$L$done_squeeze:
+ pop r14
+
+ pop r13
+
+ pop r12
+
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_SHA3_squeeze:
+ALIGN 256
+ DQ 0,0,0,0,0,0,0,0
+
+iotas:
+ DQ 0x0000000000000001
+ DQ 0x0000000000008082
+ DQ 0x800000000000808a
+ DQ 0x8000000080008000
+ DQ 0x000000000000808b
+ DQ 0x0000000080000001
+ DQ 0x8000000080008081
+ DQ 0x8000000000008009
+ DQ 0x000000000000008a
+ DQ 0x0000000000000088
+ DQ 0x0000000080008009
+ DQ 0x000000008000000a
+ DQ 0x000000008000808b
+ DQ 0x800000000000008b
+ DQ 0x8000000000008089
+ DQ 0x8000000000008003
+ DQ 0x8000000000008002
+ DQ 0x8000000000000080
+ DQ 0x000000000000800a
+ DQ 0x800000008000000a
+ DQ 0x8000000080008081
+ DQ 0x8000000000008080
+ DQ 0x0000000080000001
+ DQ 0x8000000080008008
+
+DB 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111
+DB 114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102
+DB 111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84
+DB 79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64
+DB 111,112,101,110,115,115,108,46,111,114,103,62,0
diff --git a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/sha/sha1-mb-x86_64.asm b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/sha/sha1-mb-x86_64.asm
index cfc4874e52..725bf4e796 100644
--- a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/sha/sha1-mb-x86_64.asm
+++ b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/sha/sha1-mb-x86_64.asm
@@ -20,14 +20,18 @@ $L$SEH_begin_sha1_multi_block:
mov rdx,r8
+
mov rcx,QWORD[((OPENSSL_ia32cap_P+4))]
bt rcx,61
jc NEAR _shaext_shortcut
test ecx,268435456
jnz NEAR _avx_shortcut
mov rax,rsp
+
push rbx
+
push rbp
+
lea rsp,[((-168))+rsp]
movaps XMMWORD[rsp],xmm6
movaps XMMWORD[16+rsp],xmm7
@@ -42,6 +46,7 @@ $L$SEH_begin_sha1_multi_block:
sub rsp,288
and rsp,-256
mov QWORD[272+rsp],rax
+
$L$body:
lea rbp,[K_XX_XX]
lea rbx,[256+rsp]
@@ -2571,6 +2576,7 @@ DB 102,15,56,0,197
$L$done:
mov rax,QWORD[272+rsp]
+
movaps xmm6,XMMWORD[((-184))+rax]
movaps xmm7,XMMWORD[((-168))+rax]
movaps xmm8,XMMWORD[((-152))+rax]
@@ -2582,12 +2588,16 @@ $L$done:
movaps xmm14,XMMWORD[((-56))+rax]
movaps xmm15,XMMWORD[((-40))+rax]
mov rbp,QWORD[((-16))+rax]
+
mov rbx,QWORD[((-8))+rax]
+
lea rsp,[rax]
+
$L$epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_sha1_multi_block:
ALIGN 32
@@ -2601,10 +2611,14 @@ $L$SEH_begin_sha1_multi_block_shaext:
mov rdx,r8
+
_shaext_shortcut:
mov rax,rsp
+
push rbx
+
push rbp
+
lea rsp,[((-168))+rsp]
movaps XMMWORD[rsp],xmm6
movaps XMMWORD[16+rsp],xmm7
@@ -2981,12 +2995,16 @@ $L$done_shaext:
movaps xmm14,XMMWORD[((-56))+rax]
movaps xmm15,XMMWORD[((-40))+rax]
mov rbp,QWORD[((-16))+rax]
+
mov rbx,QWORD[((-8))+rax]
+
lea rsp,[rax]
+
$L$epilogue_shaext:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_sha1_multi_block_shaext:
ALIGN 32
@@ -3000,6 +3018,7 @@ $L$SEH_begin_sha1_multi_block_avx:
mov rdx,r8
+
_avx_shortcut:
shr rcx,32
cmp edx,2
@@ -3010,8 +3029,11 @@ _avx_shortcut:
ALIGN 32
$L$avx:
mov rax,rsp
+
push rbx
+
push rbp
+
lea rsp,[((-168))+rsp]
movaps XMMWORD[rsp],xmm6
movaps XMMWORD[16+rsp],xmm7
@@ -3026,6 +3048,7 @@ $L$avx:
sub rsp,288
and rsp,-256
mov QWORD[272+rsp],rax
+
$L$body_avx:
lea rbp,[K_XX_XX]
lea rbx,[256+rsp]
@@ -5075,6 +5098,7 @@ $L$oop_avx:
$L$done_avx:
mov rax,QWORD[272+rsp]
+
vzeroupper
movaps xmm6,XMMWORD[((-184))+rax]
movaps xmm7,XMMWORD[((-168))+rax]
@@ -5087,12 +5111,16 @@ $L$done_avx:
movaps xmm14,XMMWORD[((-56))+rax]
movaps xmm15,XMMWORD[((-40))+rax]
mov rbp,QWORD[((-16))+rax]
+
mov rbx,QWORD[((-8))+rax]
+
lea rsp,[rax]
+
$L$epilogue_avx:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_sha1_multi_block_avx:
ALIGN 32
@@ -5106,14 +5134,22 @@ $L$SEH_begin_sha1_multi_block_avx2:
mov rdx,r8
+
_avx2_shortcut:
mov rax,rsp
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
lea rsp,[((-168))+rsp]
movaps XMMWORD[rsp],xmm6
movaps XMMWORD[16+rsp],xmm7
@@ -5128,6 +5164,7 @@ _avx2_shortcut:
sub rsp,576
and rsp,-256
mov QWORD[544+rsp],rax
+
$L$body_avx2:
lea rbp,[K_XX_XX]
shr edx,1
@@ -7314,6 +7351,7 @@ $L$oop_avx2:
$L$done_avx2:
mov rax,QWORD[544+rsp]
+
vzeroupper
movaps xmm6,XMMWORD[((-216))+rax]
movaps xmm7,XMMWORD[((-200))+rax]
@@ -7326,16 +7364,24 @@ $L$done_avx2:
movaps xmm14,XMMWORD[((-88))+rax]
movaps xmm15,XMMWORD[((-72))+rax]
mov r15,QWORD[((-48))+rax]
+
mov r14,QWORD[((-40))+rax]
+
mov r13,QWORD[((-32))+rax]
+
mov r12,QWORD[((-24))+rax]
+
mov rbp,QWORD[((-16))+rax]
+
mov rbx,QWORD[((-8))+rax]
+
lea rsp,[rax]
+
$L$epilogue_avx2:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_sha1_multi_block_avx2:
ALIGN 256
diff --git a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/sha/sha1-x86_64.asm b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/sha/sha1-x86_64.asm
index 6282079ede..d796380ae8 100644
--- a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/sha/sha1-x86_64.asm
+++ b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/sha/sha1-x86_64.asm
@@ -19,6 +19,7 @@ $L$SEH_begin_sha1_block_data_order:
mov rdx,r8
+
mov r9d,DWORD[((OPENSSL_ia32cap_P+0))]
mov r8d,DWORD[((OPENSSL_ia32cap_P+4))]
mov r10d,DWORD[((OPENSSL_ia32cap_P+8))]
@@ -39,17 +40,24 @@ $L$SEH_begin_sha1_block_data_order:
ALIGN 16
$L$ialu:
mov rax,rsp
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
mov r8,rdi
sub rsp,72
mov r9,rsi
and rsp,-64
mov r10,rdx
mov QWORD[64+rsp],rax
+
$L$prologue:
mov esi,DWORD[r8]
@@ -1244,16 +1252,24 @@ $L$loop:
jnz NEAR $L$loop
mov rsi,QWORD[64+rsp]
+
mov r14,QWORD[((-40))+rsi]
+
mov r13,QWORD[((-32))+rsi]
+
mov r12,QWORD[((-24))+rsi]
+
mov rbp,QWORD[((-16))+rsi]
+
mov rbx,QWORD[((-8))+rsi]
+
lea rsp,[rsi]
+
$L$epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_sha1_block_data_order:
ALIGN 32
@@ -1268,6 +1284,7 @@ $L$SEH_begin_sha1_block_data_order_shaext:
_shaext_shortcut:
+
lea rsp,[((-72))+rsp]
movaps XMMWORD[(-8-64)+rax],xmm6
movaps XMMWORD[(-8-48)+rax],xmm7
@@ -1441,6 +1458,7 @@ DB 102,15,56,0,251
movaps xmm9,XMMWORD[((-8-16))+rax]
mov rsp,rax
$L$epilogue_shaext:
+
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
@@ -1458,21 +1476,27 @@ $L$SEH_begin_sha1_block_data_order_ssse3:
_ssse3_shortcut:
- mov rax,rsp
+
+ mov r11,rsp
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
lea rsp,[((-160))+rsp]
- movaps XMMWORD[(-40-96)+rax],xmm6
- movaps XMMWORD[(-40-80)+rax],xmm7
- movaps XMMWORD[(-40-64)+rax],xmm8
- movaps XMMWORD[(-40-48)+rax],xmm9
- movaps XMMWORD[(-40-32)+rax],xmm10
- movaps XMMWORD[(-40-16)+rax],xmm11
+ movaps XMMWORD[(-40-96)+r11],xmm6
+ movaps XMMWORD[(-40-80)+r11],xmm7
+ movaps XMMWORD[(-40-64)+r11],xmm8
+ movaps XMMWORD[(-40-48)+r11],xmm9
+ movaps XMMWORD[(-40-32)+r11],xmm10
+ movaps XMMWORD[(-40-16)+r11],xmm11
$L$prologue_ssse3:
- mov r14,rax
and rsp,-64
mov r8,rdi
mov r9,rsi
@@ -1480,7 +1504,7 @@ $L$prologue_ssse3:
shl r10,6
add r10,r9
- lea r11,[((K_XX_XX+64))]
+ lea r14,[((K_XX_XX+64))]
mov eax,DWORD[r8]
mov ebx,DWORD[4+r8]
@@ -1492,8 +1516,8 @@ $L$prologue_ssse3:
xor edi,edx
and esi,edi
- movdqa xmm6,XMMWORD[64+r11]
- movdqa xmm9,XMMWORD[((-64))+r11]
+ movdqa xmm6,XMMWORD[64+r14]
+ movdqa xmm9,XMMWORD[((-64))+r14]
movdqu xmm0,XMMWORD[r9]
movdqu xmm1,XMMWORD[16+r9]
movdqu xmm2,XMMWORD[32+r9]
@@ -1569,7 +1593,7 @@ $L$oop_ssse3:
pslld xmm9,2
pxor xmm4,xmm10
xor edx,ebp
- movdqa xmm10,XMMWORD[((-64))+r11]
+ movdqa xmm10,XMMWORD[((-64))+r14]
rol ecx,5
add ebx,edi
and esi,edx
@@ -1630,7 +1654,7 @@ $L$oop_ssse3:
pslld xmm10,2
pxor xmm5,xmm8
xor ebp,eax
- movdqa xmm8,XMMWORD[((-32))+r11]
+ movdqa xmm8,XMMWORD[((-32))+r14]
rol edx,5
add ecx,edi
and esi,ebp
@@ -1691,7 +1715,7 @@ $L$oop_ssse3:
pslld xmm8,2
pxor xmm6,xmm9
xor eax,ebx
- movdqa xmm9,XMMWORD[((-32))+r11]
+ movdqa xmm9,XMMWORD[((-32))+r14]
rol ebp,5
add edx,edi
and esi,eax
@@ -1752,7 +1776,7 @@ $L$oop_ssse3:
pslld xmm9,2
pxor xmm7,xmm10
xor ebx,ecx
- movdqa xmm10,XMMWORD[((-32))+r11]
+ movdqa xmm10,XMMWORD[((-32))+r14]
rol eax,5
add ebp,edi
and esi,ebx
@@ -1863,7 +1887,7 @@ $L$oop_ssse3:
pxor xmm2,xmm3
add eax,esi
xor edi,edx
- movdqa xmm10,XMMWORD[r11]
+ movdqa xmm10,XMMWORD[r14]
ror ecx,7
paddd xmm9,xmm1
add eax,ebx
@@ -2098,7 +2122,7 @@ $L$oop_ssse3:
pxor xmm7,xmm0
rol ebx,5
add eax,esi
- movdqa xmm9,XMMWORD[32+r11]
+ movdqa xmm9,XMMWORD[32+r14]
xor edi,ecx
paddd xmm8,xmm6
xor ecx,edx
@@ -2389,8 +2413,8 @@ $L$oop_ssse3:
add ecx,edx
cmp r9,r10
je NEAR $L$done_ssse3
- movdqa xmm6,XMMWORD[64+r11]
- movdqa xmm9,XMMWORD[((-64))+r11]
+ movdqa xmm6,XMMWORD[64+r14]
+ movdqa xmm9,XMMWORD[((-64))+r14]
movdqu xmm0,XMMWORD[r9]
movdqu xmm1,XMMWORD[16+r9]
movdqu xmm2,XMMWORD[32+r9]
@@ -2627,23 +2651,29 @@ $L$done_ssse3:
mov DWORD[8+r8],ecx
mov DWORD[12+r8],edx
mov DWORD[16+r8],ebp
- movaps xmm6,XMMWORD[((-40-96))+r14]
- movaps xmm7,XMMWORD[((-40-80))+r14]
- movaps xmm8,XMMWORD[((-40-64))+r14]
- movaps xmm9,XMMWORD[((-40-48))+r14]
- movaps xmm10,XMMWORD[((-40-32))+r14]
- movaps xmm11,XMMWORD[((-40-16))+r14]
- lea rsi,[r14]
- mov r14,QWORD[((-40))+rsi]
- mov r13,QWORD[((-32))+rsi]
- mov r12,QWORD[((-24))+rsi]
- mov rbp,QWORD[((-16))+rsi]
- mov rbx,QWORD[((-8))+rsi]
- lea rsp,[rsi]
+ movaps xmm6,XMMWORD[((-40-96))+r11]
+ movaps xmm7,XMMWORD[((-40-80))+r11]
+ movaps xmm8,XMMWORD[((-40-64))+r11]
+ movaps xmm9,XMMWORD[((-40-48))+r11]
+ movaps xmm10,XMMWORD[((-40-32))+r11]
+ movaps xmm11,XMMWORD[((-40-16))+r11]
+ mov r14,QWORD[((-40))+r11]
+
+ mov r13,QWORD[((-32))+r11]
+
+ mov r12,QWORD[((-24))+r11]
+
+ mov rbp,QWORD[((-16))+r11]
+
+ mov rbx,QWORD[((-8))+r11]
+
+ lea rsp,[r11]
+
$L$epilogue_ssse3:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_sha1_block_data_order_ssse3:
ALIGN 16
@@ -2658,22 +2688,28 @@ $L$SEH_begin_sha1_block_data_order_avx:
_avx_shortcut:
- mov rax,rsp
+
+ mov r11,rsp
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
lea rsp,[((-160))+rsp]
vzeroupper
- vmovaps XMMWORD[(-40-96)+rax],xmm6
- vmovaps XMMWORD[(-40-80)+rax],xmm7
- vmovaps XMMWORD[(-40-64)+rax],xmm8
- vmovaps XMMWORD[(-40-48)+rax],xmm9
- vmovaps XMMWORD[(-40-32)+rax],xmm10
- vmovaps XMMWORD[(-40-16)+rax],xmm11
+ vmovaps XMMWORD[(-40-96)+r11],xmm6
+ vmovaps XMMWORD[(-40-80)+r11],xmm7
+ vmovaps XMMWORD[(-40-64)+r11],xmm8
+ vmovaps XMMWORD[(-40-48)+r11],xmm9
+ vmovaps XMMWORD[(-40-32)+r11],xmm10
+ vmovaps XMMWORD[(-40-16)+r11],xmm11
$L$prologue_avx:
- mov r14,rax
and rsp,-64
mov r8,rdi
mov r9,rsi
@@ -2681,7 +2717,7 @@ $L$prologue_avx:
shl r10,6
add r10,r9
- lea r11,[((K_XX_XX+64))]
+ lea r14,[((K_XX_XX+64))]
mov eax,DWORD[r8]
mov ebx,DWORD[4+r8]
@@ -2693,8 +2729,8 @@ $L$prologue_avx:
xor edi,edx
and esi,edi
- vmovdqa xmm6,XMMWORD[64+r11]
- vmovdqa xmm11,XMMWORD[((-64))+r11]
+ vmovdqa xmm6,XMMWORD[64+r14]
+ vmovdqa xmm11,XMMWORD[((-64))+r14]
vmovdqu xmm0,XMMWORD[r9]
vmovdqu xmm1,XMMWORD[16+r9]
vmovdqu xmm2,XMMWORD[32+r9]
@@ -2819,7 +2855,7 @@ $L$oop_avx:
vpxor xmm5,xmm5,xmm10
xor ebp,eax
shld edx,edx,5
- vmovdqa xmm11,XMMWORD[((-32))+r11]
+ vmovdqa xmm11,XMMWORD[((-32))+r14]
add ecx,edi
and esi,ebp
xor ebp,eax
@@ -3032,7 +3068,7 @@ $L$oop_avx:
add eax,esi
xor edi,edx
vpaddd xmm9,xmm11,xmm1
- vmovdqa xmm11,XMMWORD[r11]
+ vmovdqa xmm11,XMMWORD[r14]
shrd ecx,ecx,7
add eax,ebx
vpxor xmm2,xmm2,xmm8
@@ -3251,7 +3287,7 @@ $L$oop_avx:
mov edi,ebx
xor esi,edx
vpaddd xmm9,xmm11,xmm6
- vmovdqa xmm11,XMMWORD[32+r11]
+ vmovdqa xmm11,XMMWORD[32+r14]
shld ebx,ebx,5
add eax,esi
vpxor xmm7,xmm7,xmm8
@@ -3530,8 +3566,8 @@ $L$oop_avx:
add ecx,edx
cmp r9,r10
je NEAR $L$done_avx
- vmovdqa xmm6,XMMWORD[64+r11]
- vmovdqa xmm11,XMMWORD[((-64))+r11]
+ vmovdqa xmm6,XMMWORD[64+r14]
+ vmovdqa xmm11,XMMWORD[((-64))+r14]
vmovdqu xmm0,XMMWORD[r9]
vmovdqu xmm1,XMMWORD[16+r9]
vmovdqu xmm2,XMMWORD[32+r9]
@@ -3767,23 +3803,29 @@ $L$done_avx:
mov DWORD[8+r8],ecx
mov DWORD[12+r8],edx
mov DWORD[16+r8],ebp
- movaps xmm6,XMMWORD[((-40-96))+r14]
- movaps xmm7,XMMWORD[((-40-80))+r14]
- movaps xmm8,XMMWORD[((-40-64))+r14]
- movaps xmm9,XMMWORD[((-40-48))+r14]
- movaps xmm10,XMMWORD[((-40-32))+r14]
- movaps xmm11,XMMWORD[((-40-16))+r14]
- lea rsi,[r14]
- mov r14,QWORD[((-40))+rsi]
- mov r13,QWORD[((-32))+rsi]
- mov r12,QWORD[((-24))+rsi]
- mov rbp,QWORD[((-16))+rsi]
- mov rbx,QWORD[((-8))+rsi]
- lea rsp,[rsi]
+ movaps xmm6,XMMWORD[((-40-96))+r11]
+ movaps xmm7,XMMWORD[((-40-80))+r11]
+ movaps xmm8,XMMWORD[((-40-64))+r11]
+ movaps xmm9,XMMWORD[((-40-48))+r11]
+ movaps xmm10,XMMWORD[((-40-32))+r11]
+ movaps xmm11,XMMWORD[((-40-16))+r11]
+ mov r14,QWORD[((-40))+r11]
+
+ mov r13,QWORD[((-32))+r11]
+
+ mov r12,QWORD[((-24))+r11]
+
+ mov rbp,QWORD[((-16))+r11]
+
+ mov rbx,QWORD[((-8))+r11]
+
+ lea rsp,[r11]
+
$L$epilogue_avx:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_sha1_block_data_order_avx:
ALIGN 16
@@ -3798,22 +3840,28 @@ $L$SEH_begin_sha1_block_data_order_avx2:
_avx2_shortcut:
- mov rax,rsp
+
+ mov r11,rsp
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
vzeroupper
lea rsp,[((-96))+rsp]
- vmovaps XMMWORD[(-40-96)+rax],xmm6
- vmovaps XMMWORD[(-40-80)+rax],xmm7
- vmovaps XMMWORD[(-40-64)+rax],xmm8
- vmovaps XMMWORD[(-40-48)+rax],xmm9
- vmovaps XMMWORD[(-40-32)+rax],xmm10
- vmovaps XMMWORD[(-40-16)+rax],xmm11
+ vmovaps XMMWORD[(-40-96)+r11],xmm6
+ vmovaps XMMWORD[(-40-80)+r11],xmm7
+ vmovaps XMMWORD[(-40-64)+r11],xmm8
+ vmovaps XMMWORD[(-40-48)+r11],xmm9
+ vmovaps XMMWORD[(-40-32)+r11],xmm10
+ vmovaps XMMWORD[(-40-16)+r11],xmm11
$L$prologue_avx2:
- mov r14,rax
mov r8,rdi
mov r9,rsi
mov r10,rdx
@@ -3823,7 +3871,7 @@ $L$prologue_avx2:
lea r13,[64+r9]
and rsp,-128
add r10,r9
- lea r11,[((K_XX_XX+64))]
+ lea r14,[((K_XX_XX+64))]
mov eax,DWORD[r8]
cmp r13,r10
@@ -3832,7 +3880,7 @@ $L$prologue_avx2:
mov ecx,DWORD[8+r8]
mov edx,DWORD[12+r8]
mov esi,DWORD[16+r8]
- vmovdqu ymm6,YMMWORD[64+r11]
+ vmovdqu ymm6,YMMWORD[64+r14]
vmovdqu xmm0,XMMWORD[r9]
vmovdqu xmm1,XMMWORD[16+r9]
@@ -3846,7 +3894,7 @@ $L$prologue_avx2:
vpshufb ymm1,ymm1,ymm6
vinserti128 ymm3,ymm3,XMMWORD[48+r13],1
vpshufb ymm2,ymm2,ymm6
- vmovdqu ymm11,YMMWORD[((-64))+r11]
+ vmovdqu ymm11,YMMWORD[((-64))+r14]
vpshufb ymm3,ymm3,ymm6
vpaddd ymm4,ymm0,ymm11
@@ -3878,7 +3926,7 @@ $L$prologue_avx2:
vpxor ymm8,ymm8,ymm3
vpxor ymm5,ymm5,ymm8
vpsrld ymm8,ymm5,31
- vmovdqu ymm11,YMMWORD[((-32))+r11]
+ vmovdqu ymm11,YMMWORD[((-32))+r14]
vpslldq ymm10,ymm5,12
vpaddd ymm5,ymm5,ymm5
vpsrld ymm9,ymm10,30
@@ -4032,7 +4080,7 @@ $L$align32_1:
add ebp,DWORD[((-56))+r13]
andn edi,ebx,esi
vpxor ymm2,ymm2,ymm3
- vmovdqu ymm11,YMMWORD[r11]
+ vmovdqu ymm11,YMMWORD[r14]
add ebp,ecx
rorx r12d,ebx,27
rorx ecx,ebx,2
@@ -4263,7 +4311,7 @@ $L$align32_1:
add eax,DWORD[((-116))+r13]
lea eax,[rbx*1+rax]
vpxor ymm7,ymm7,ymm0
- vmovdqu ymm11,YMMWORD[32+r11]
+ vmovdqu ymm11,YMMWORD[32+r14]
rorx r12d,ebp,27
rorx ebx,ebp,2
xor ebp,ecx
@@ -4708,7 +4756,7 @@ $L$align32_2:
cmp r9,r10
je NEAR $L$done_avx2
- vmovdqu ymm6,YMMWORD[64+r11]
+ vmovdqu ymm6,YMMWORD[64+r14]
cmp rdi,r10
ja NEAR $L$ast_avx2
@@ -4924,7 +4972,7 @@ $L$ast_avx2:
xor eax,ebx
add esi,r12d
xor eax,ecx
- vmovdqu ymm11,YMMWORD[((-64))+r11]
+ vmovdqu ymm11,YMMWORD[((-64))+r14]
vpshufb ymm0,ymm0,ymm6
add edx,DWORD[68+r13]
lea edx,[rax*1+rdx]
@@ -5280,7 +5328,7 @@ $L$align32_3:
xor esi,ebp
add edx,r12d
vpsrld ymm8,ymm5,31
- vmovdqu ymm11,YMMWORD[((-32))+r11]
+ vmovdqu ymm11,YMMWORD[((-32))+r14]
xor esi,ebx
add ecx,DWORD[104+r13]
lea ecx,[rsi*1+rcx]
@@ -5473,23 +5521,29 @@ $L$align32_3:
$L$done_avx2:
vzeroupper
- movaps xmm6,XMMWORD[((-40-96))+r14]
- movaps xmm7,XMMWORD[((-40-80))+r14]
- movaps xmm8,XMMWORD[((-40-64))+r14]
- movaps xmm9,XMMWORD[((-40-48))+r14]
- movaps xmm10,XMMWORD[((-40-32))+r14]
- movaps xmm11,XMMWORD[((-40-16))+r14]
- lea rsi,[r14]
- mov r14,QWORD[((-40))+rsi]
- mov r13,QWORD[((-32))+rsi]
- mov r12,QWORD[((-24))+rsi]
- mov rbp,QWORD[((-16))+rsi]
- mov rbx,QWORD[((-8))+rsi]
- lea rsp,[rsi]
+ movaps xmm6,XMMWORD[((-40-96))+r11]
+ movaps xmm7,XMMWORD[((-40-80))+r11]
+ movaps xmm8,XMMWORD[((-40-64))+r11]
+ movaps xmm9,XMMWORD[((-40-48))+r11]
+ movaps xmm10,XMMWORD[((-40-32))+r11]
+ movaps xmm11,XMMWORD[((-40-16))+r11]
+ mov r14,QWORD[((-40))+r11]
+
+ mov r13,QWORD[((-32))+r11]
+
+ mov r12,QWORD[((-24))+r11]
+
+ mov rbp,QWORD[((-16))+r11]
+
+ mov rbx,QWORD[((-8))+r11]
+
+ lea rsp,[r11]
+
$L$epilogue_avx2:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_sha1_block_data_order_avx2:
ALIGN 64
K_XX_XX:
@@ -5610,15 +5664,13 @@ ssse3_handler:
cmp rbx,r10
jb NEAR $L$common_seh_tail
- mov rax,QWORD[152+r8]
+ mov rax,QWORD[208+r8]
mov r10d,DWORD[4+r11]
lea r10,[r10*1+rsi]
cmp rbx,r10
jae NEAR $L$common_seh_tail
- mov rax,QWORD[232+r8]
-
lea rsi,[((-40-96))+rax]
lea rdi,[512+r8]
mov ecx,12
diff --git a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/sha/sha256-mb-x86_64.asm b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/sha/sha256-mb-x86_64.asm
index 9efc2ad7f0..f0754004b2 100644
--- a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/sha/sha256-mb-x86_64.asm
+++ b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/sha/sha256-mb-x86_64.asm
@@ -20,14 +20,18 @@ $L$SEH_begin_sha256_multi_block:
mov rdx,r8
+
mov rcx,QWORD[((OPENSSL_ia32cap_P+4))]
bt rcx,61
jc NEAR _shaext_shortcut
test ecx,268435456
jnz NEAR _avx_shortcut
mov rax,rsp
+
push rbx
+
push rbp
+
lea rsp,[((-168))+rsp]
movaps XMMWORD[rsp],xmm6
movaps XMMWORD[16+rsp],xmm7
@@ -42,6 +46,7 @@ $L$SEH_begin_sha256_multi_block:
sub rsp,288
and rsp,-256
mov QWORD[272+rsp],rax
+
$L$body:
lea rbp,[((K256+128))]
lea rbx,[256+rsp]
@@ -2640,6 +2645,7 @@ $L$oop_16_xx:
$L$done:
mov rax,QWORD[272+rsp]
+
movaps xmm6,XMMWORD[((-184))+rax]
movaps xmm7,XMMWORD[((-168))+rax]
movaps xmm8,XMMWORD[((-152))+rax]
@@ -2651,12 +2657,16 @@ $L$done:
movaps xmm14,XMMWORD[((-56))+rax]
movaps xmm15,XMMWORD[((-40))+rax]
mov rbp,QWORD[((-16))+rax]
+
mov rbx,QWORD[((-8))+rax]
+
lea rsp,[rax]
+
$L$epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_sha256_multi_block:
ALIGN 32
@@ -2670,10 +2680,14 @@ $L$SEH_begin_sha256_multi_block_shaext:
mov rdx,r8
+
_shaext_shortcut:
mov rax,rsp
+
push rbx
+
push rbp
+
lea rsp,[((-168))+rsp]
movaps XMMWORD[rsp],xmm6
movaps XMMWORD[16+rsp],xmm7
@@ -3169,12 +3183,16 @@ $L$done_shaext:
movaps xmm14,XMMWORD[((-56))+rax]
movaps xmm15,XMMWORD[((-40))+rax]
mov rbp,QWORD[((-16))+rax]
+
mov rbx,QWORD[((-8))+rax]
+
lea rsp,[rax]
+
$L$epilogue_shaext:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_sha256_multi_block_shaext:
ALIGN 32
@@ -3188,6 +3206,7 @@ $L$SEH_begin_sha256_multi_block_avx:
mov rdx,r8
+
_avx_shortcut:
shr rcx,32
cmp edx,2
@@ -3198,8 +3217,11 @@ _avx_shortcut:
ALIGN 32
$L$avx:
mov rax,rsp
+
push rbx
+
push rbp
+
lea rsp,[((-168))+rsp]
movaps XMMWORD[rsp],xmm6
movaps XMMWORD[16+rsp],xmm7
@@ -3214,6 +3236,7 @@ $L$avx:
sub rsp,288
and rsp,-256
mov QWORD[272+rsp],rax
+
$L$body_avx:
lea rbp,[((K256+128))]
lea rbx,[256+rsp]
@@ -5442,6 +5465,7 @@ $L$oop_16_xx_avx:
$L$done_avx:
mov rax,QWORD[272+rsp]
+
vzeroupper
movaps xmm6,XMMWORD[((-184))+rax]
movaps xmm7,XMMWORD[((-168))+rax]
@@ -5454,12 +5478,16 @@ $L$done_avx:
movaps xmm14,XMMWORD[((-56))+rax]
movaps xmm15,XMMWORD[((-40))+rax]
mov rbp,QWORD[((-16))+rax]
+
mov rbx,QWORD[((-8))+rax]
+
lea rsp,[rax]
+
$L$epilogue_avx:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_sha256_multi_block_avx:
ALIGN 32
@@ -5473,14 +5501,22 @@ $L$SEH_begin_sha256_multi_block_avx2:
mov rdx,r8
+
_avx2_shortcut:
mov rax,rsp
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
+
lea rsp,[((-168))+rsp]
movaps XMMWORD[rsp],xmm6
movaps XMMWORD[16+rsp],xmm7
@@ -5495,6 +5531,7 @@ _avx2_shortcut:
sub rsp,576
and rsp,-256
mov QWORD[544+rsp],rax
+
$L$body_avx2:
lea rbp,[((K256+128))]
lea rdi,[128+rdi]
@@ -7859,6 +7896,7 @@ $L$oop_16_xx_avx2:
$L$done_avx2:
mov rax,QWORD[544+rsp]
+
vzeroupper
movaps xmm6,XMMWORD[((-216))+rax]
movaps xmm7,XMMWORD[((-200))+rax]
@@ -7871,16 +7909,24 @@ $L$done_avx2:
movaps xmm14,XMMWORD[((-88))+rax]
movaps xmm15,XMMWORD[((-72))+rax]
mov r15,QWORD[((-48))+rax]
+
mov r14,QWORD[((-40))+rax]
+
mov r13,QWORD[((-32))+rax]
+
mov r12,QWORD[((-24))+rax]
+
mov rbp,QWORD[((-16))+rax]
+
mov rbx,QWORD[((-8))+rax]
+
lea rsp,[rax]
+
$L$epilogue_avx2:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_sha256_multi_block_avx2:
ALIGN 256
K256:
diff --git a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/sha/sha256-x86_64.asm b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/sha/sha256-x86_64.asm
index 31a5279fc3..fc102444ff 100644
--- a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/sha/sha256-x86_64.asm
+++ b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/sha/sha256-x86_64.asm
@@ -19,6 +19,7 @@ $L$SEH_begin_sha256_block_data_order:
mov rdx,r8
+
lea r11,[OPENSSL_ia32cap_P]
mov r9d,DWORD[r11]
mov r10d,DWORD[4+r11]
@@ -35,13 +36,20 @@ $L$SEH_begin_sha256_block_data_order:
je NEAR $L$avx_shortcut
test r10d,512
jnz NEAR $L$ssse3_shortcut
+ mov rax,rsp
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
- mov r11,rsp
+
shl rdx,4
sub rsp,16*4+4*8
lea rdx,[rdx*4+rsi]
@@ -49,7 +57,8 @@ $L$SEH_begin_sha256_block_data_order:
mov QWORD[((64+0))+rsp],rdi
mov QWORD[((64+8))+rsp],rsi
mov QWORD[((64+16))+rsp],rdx
- mov QWORD[((64+24))+rsp],r11
+ mov QWORD[88+rsp],rax
+
$L$prologue:
mov eax,DWORD[rdi]
@@ -1713,18 +1722,27 @@ $L$rounds_16_xx:
mov DWORD[28+rdi],r11d
jb NEAR $L$loop
- mov rsi,QWORD[((64+24))+rsp]
- mov r15,QWORD[rsi]
- mov r14,QWORD[8+rsi]
- mov r13,QWORD[16+rsi]
- mov r12,QWORD[24+rsi]
- mov rbp,QWORD[32+rsi]
- mov rbx,QWORD[40+rsi]
- lea rsp,[48+rsi]
+ mov rsi,QWORD[88+rsp]
+
+ mov r15,QWORD[((-48))+rsi]
+
+ mov r14,QWORD[((-40))+rsi]
+
+ mov r13,QWORD[((-32))+rsi]
+
+ mov r12,QWORD[((-24))+rsi]
+
+ mov rbp,QWORD[((-16))+rsi]
+
+ mov rbx,QWORD[((-8))+rsi]
+
+ lea rsp,[rsi]
+
$L$epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_sha256_block_data_order:
ALIGN 64
@@ -2017,14 +2035,22 @@ $L$SEH_begin_sha256_block_data_order_ssse3:
mov rdx,r8
+
$L$ssse3_shortcut:
+ mov rax,rsp
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
- mov r11,rsp
+
shl rdx,4
sub rsp,160
lea rdx,[rdx*4+rsi]
@@ -2032,7 +2058,8 @@ $L$ssse3_shortcut:
mov QWORD[((64+0))+rsp],rdi
mov QWORD[((64+8))+rsp],rsi
mov QWORD[((64+16))+rsp],rdx
- mov QWORD[((64+24))+rsp],r11
+ mov QWORD[88+rsp],rax
+
movaps XMMWORD[(64+32)+rsp],xmm6
movaps XMMWORD[(64+48)+rsp],xmm7
movaps XMMWORD[(64+64)+rsp],xmm8
@@ -3102,22 +3129,31 @@ DB 102,15,58,15,249,4
mov DWORD[28+rdi],r11d
jb NEAR $L$loop_ssse3
- mov rsi,QWORD[((64+24))+rsp]
+ mov rsi,QWORD[88+rsp]
+
movaps xmm6,XMMWORD[((64+32))+rsp]
movaps xmm7,XMMWORD[((64+48))+rsp]
movaps xmm8,XMMWORD[((64+64))+rsp]
movaps xmm9,XMMWORD[((64+80))+rsp]
- mov r15,QWORD[rsi]
- mov r14,QWORD[8+rsi]
- mov r13,QWORD[16+rsi]
- mov r12,QWORD[24+rsi]
- mov rbp,QWORD[32+rsi]
- mov rbx,QWORD[40+rsi]
- lea rsp,[48+rsi]
+ mov r15,QWORD[((-48))+rsi]
+
+ mov r14,QWORD[((-40))+rsi]
+
+ mov r13,QWORD[((-32))+rsi]
+
+ mov r12,QWORD[((-24))+rsi]
+
+ mov rbp,QWORD[((-16))+rsi]
+
+ mov rbx,QWORD[((-8))+rsi]
+
+ lea rsp,[rsi]
+
$L$epilogue_ssse3:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_sha256_block_data_order_ssse3:
ALIGN 64
@@ -3131,14 +3167,22 @@ $L$SEH_begin_sha256_block_data_order_avx:
mov rdx,r8
+
$L$avx_shortcut:
+ mov rax,rsp
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
- mov r11,rsp
+
shl rdx,4
sub rsp,160
lea rdx,[rdx*4+rsi]
@@ -3146,7 +3190,8 @@ $L$avx_shortcut:
mov QWORD[((64+0))+rsp],rdi
mov QWORD[((64+8))+rsp],rsi
mov QWORD[((64+16))+rsp],rdx
- mov QWORD[((64+24))+rsp],r11
+ mov QWORD[88+rsp],rax
+
movaps XMMWORD[(64+32)+rsp],xmm6
movaps XMMWORD[(64+48)+rsp],xmm7
movaps XMMWORD[(64+64)+rsp],xmm8
@@ -4177,23 +4222,32 @@ $L$avx_00_47:
mov DWORD[28+rdi],r11d
jb NEAR $L$loop_avx
- mov rsi,QWORD[((64+24))+rsp]
+ mov rsi,QWORD[88+rsp]
+
vzeroupper
movaps xmm6,XMMWORD[((64+32))+rsp]
movaps xmm7,XMMWORD[((64+48))+rsp]
movaps xmm8,XMMWORD[((64+64))+rsp]
movaps xmm9,XMMWORD[((64+80))+rsp]
- mov r15,QWORD[rsi]
- mov r14,QWORD[8+rsi]
- mov r13,QWORD[16+rsi]
- mov r12,QWORD[24+rsi]
- mov rbp,QWORD[32+rsi]
- mov rbx,QWORD[40+rsi]
- lea rsp,[48+rsi]
+ mov r15,QWORD[((-48))+rsi]
+
+ mov r14,QWORD[((-40))+rsi]
+
+ mov r13,QWORD[((-32))+rsi]
+
+ mov r12,QWORD[((-24))+rsi]
+
+ mov rbp,QWORD[((-16))+rsi]
+
+ mov rbx,QWORD[((-8))+rsi]
+
+ lea rsp,[rsi]
+
$L$epilogue_avx:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_sha256_block_data_order_avx:
ALIGN 64
@@ -4207,14 +4261,22 @@ $L$SEH_begin_sha256_block_data_order_avx2:
mov rdx,r8
+
$L$avx2_shortcut:
+ mov rax,rsp
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
- mov r11,rsp
+
sub rsp,608
shl rdx,4
and rsp,-256*4
@@ -4223,7 +4285,8 @@ $L$avx2_shortcut:
mov QWORD[((64+0))+rsp],rdi
mov QWORD[((64+8))+rsp],rsi
mov QWORD[((64+16))+rsp],rdx
- mov QWORD[((64+24))+rsp],r11
+ mov QWORD[88+rsp],rax
+
movaps XMMWORD[(64+32)+rsp],xmm6
movaps XMMWORD[(64+48)+rsp],xmm7
movaps XMMWORD[(64+64)+rsp],xmm8
@@ -5440,23 +5503,32 @@ $L$ower_avx2:
$L$done_avx2:
lea rsp,[rbp]
- mov rsi,QWORD[((64+24))+rsp]
+ mov rsi,QWORD[88+rsp]
+
vzeroupper
movaps xmm6,XMMWORD[((64+32))+rsp]
movaps xmm7,XMMWORD[((64+48))+rsp]
movaps xmm8,XMMWORD[((64+64))+rsp]
movaps xmm9,XMMWORD[((64+80))+rsp]
- mov r15,QWORD[rsi]
- mov r14,QWORD[8+rsi]
- mov r13,QWORD[16+rsi]
- mov r12,QWORD[24+rsi]
- mov rbp,QWORD[32+rsi]
- mov rbx,QWORD[40+rsi]
- lea rsp,[48+rsi]
+ mov r15,QWORD[((-48))+rsi]
+
+ mov r14,QWORD[((-40))+rsi]
+
+ mov r13,QWORD[((-32))+rsi]
+
+ mov r12,QWORD[((-24))+rsi]
+
+ mov rbp,QWORD[((-16))+rsi]
+
+ mov rbx,QWORD[((-8))+rsi]
+
+ lea rsp,[rsi]
+
$L$epilogue_avx2:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_sha256_block_data_order_avx2:
EXTERN __imp_RtlVirtualUnwind
@@ -5499,7 +5571,6 @@ se_handler:
$L$not_in_avx2:
mov rsi,rax
mov rax,QWORD[((64+24))+rax]
- lea rax,[48+rax]
mov rbx,QWORD[((-8))+rax]
mov rbp,QWORD[((-16))+rax]
diff --git a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/sha/sha512-x86_64.asm b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/sha/sha512-x86_64.asm
index 0e99bed5a7..1a9935d7b6 100644
--- a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/sha/sha512-x86_64.asm
+++ b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/sha/sha512-x86_64.asm
@@ -19,6 +19,7 @@ $L$SEH_begin_sha512_block_data_order:
mov rdx,r8
+
lea r11,[OPENSSL_ia32cap_P]
mov r9d,DWORD[r11]
mov r10d,DWORD[4+r11]
@@ -33,13 +34,20 @@ $L$SEH_begin_sha512_block_data_order:
or r10d,r9d
cmp r10d,1342177792
je NEAR $L$avx_shortcut
+ mov rax,rsp
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
- mov r11,rsp
+
shl rdx,4
sub rsp,16*8+4*8
lea rdx,[rdx*8+rsi]
@@ -47,7 +55,8 @@ $L$SEH_begin_sha512_block_data_order:
mov QWORD[((128+0))+rsp],rdi
mov QWORD[((128+8))+rsp],rsi
mov QWORD[((128+16))+rsp],rdx
- mov QWORD[((128+24))+rsp],r11
+ mov QWORD[152+rsp],rax
+
$L$prologue:
mov rax,QWORD[rdi]
@@ -1711,18 +1720,27 @@ $L$rounds_16_xx:
mov QWORD[56+rdi],r11
jb NEAR $L$loop
- mov rsi,QWORD[((128+24))+rsp]
- mov r15,QWORD[rsi]
- mov r14,QWORD[8+rsi]
- mov r13,QWORD[16+rsi]
- mov r12,QWORD[24+rsi]
- mov rbp,QWORD[32+rsi]
- mov rbx,QWORD[40+rsi]
- lea rsp,[48+rsi]
+ mov rsi,QWORD[152+rsp]
+
+ mov r15,QWORD[((-48))+rsi]
+
+ mov r14,QWORD[((-40))+rsi]
+
+ mov r13,QWORD[((-32))+rsi]
+
+ mov r12,QWORD[((-24))+rsi]
+
+ mov rbp,QWORD[((-16))+rsi]
+
+ mov rbx,QWORD[((-8))+rsi]
+
+ lea rsp,[rsi]
+
$L$epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_sha512_block_data_order:
ALIGN 64
@@ -1827,14 +1845,22 @@ $L$SEH_begin_sha512_block_data_order_xop:
mov rdx,r8
+
$L$xop_shortcut:
+ mov rax,rsp
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
- mov r11,rsp
+
shl rdx,4
sub rsp,256
lea rdx,[rdx*8+rsi]
@@ -1842,7 +1868,8 @@ $L$xop_shortcut:
mov QWORD[((128+0))+rsp],rdi
mov QWORD[((128+8))+rsp],rsi
mov QWORD[((128+16))+rsp],rdx
- mov QWORD[((128+24))+rsp],r11
+ mov QWORD[152+rsp],rax
+
movaps XMMWORD[(128+32)+rsp],xmm6
movaps XMMWORD[(128+48)+rsp],xmm7
movaps XMMWORD[(128+64)+rsp],xmm8
@@ -2901,7 +2928,8 @@ DB 143,72,120,195,203,42
mov QWORD[56+rdi],r11
jb NEAR $L$loop_xop
- mov rsi,QWORD[((128+24))+rsp]
+ mov rsi,QWORD[152+rsp]
+
vzeroupper
movaps xmm6,XMMWORD[((128+32))+rsp]
movaps xmm7,XMMWORD[((128+48))+rsp]
@@ -2909,17 +2937,25 @@ DB 143,72,120,195,203,42
movaps xmm9,XMMWORD[((128+80))+rsp]
movaps xmm10,XMMWORD[((128+96))+rsp]
movaps xmm11,XMMWORD[((128+112))+rsp]
- mov r15,QWORD[rsi]
- mov r14,QWORD[8+rsi]
- mov r13,QWORD[16+rsi]
- mov r12,QWORD[24+rsi]
- mov rbp,QWORD[32+rsi]
- mov rbx,QWORD[40+rsi]
- lea rsp,[48+rsi]
+ mov r15,QWORD[((-48))+rsi]
+
+ mov r14,QWORD[((-40))+rsi]
+
+ mov r13,QWORD[((-32))+rsi]
+
+ mov r12,QWORD[((-24))+rsi]
+
+ mov rbp,QWORD[((-16))+rsi]
+
+ mov rbx,QWORD[((-8))+rsi]
+
+ lea rsp,[rsi]
+
$L$epilogue_xop:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_sha512_block_data_order_xop:
ALIGN 64
@@ -2933,14 +2969,22 @@ $L$SEH_begin_sha512_block_data_order_avx:
mov rdx,r8
+
$L$avx_shortcut:
+ mov rax,rsp
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
- mov r11,rsp
+
shl rdx,4
sub rsp,256
lea rdx,[rdx*8+rsi]
@@ -2948,7 +2992,8 @@ $L$avx_shortcut:
mov QWORD[((128+0))+rsp],rdi
mov QWORD[((128+8))+rsp],rsi
mov QWORD[((128+16))+rsp],rdx
- mov QWORD[((128+24))+rsp],r11
+ mov QWORD[152+rsp],rax
+
movaps XMMWORD[(128+32)+rsp],xmm6
movaps XMMWORD[(128+48)+rsp],xmm7
movaps XMMWORD[(128+64)+rsp],xmm8
@@ -4071,7 +4116,8 @@ $L$avx_00_47:
mov QWORD[56+rdi],r11
jb NEAR $L$loop_avx
- mov rsi,QWORD[((128+24))+rsp]
+ mov rsi,QWORD[152+rsp]
+
vzeroupper
movaps xmm6,XMMWORD[((128+32))+rsp]
movaps xmm7,XMMWORD[((128+48))+rsp]
@@ -4079,17 +4125,25 @@ $L$avx_00_47:
movaps xmm9,XMMWORD[((128+80))+rsp]
movaps xmm10,XMMWORD[((128+96))+rsp]
movaps xmm11,XMMWORD[((128+112))+rsp]
- mov r15,QWORD[rsi]
- mov r14,QWORD[8+rsi]
- mov r13,QWORD[16+rsi]
- mov r12,QWORD[24+rsi]
- mov rbp,QWORD[32+rsi]
- mov rbx,QWORD[40+rsi]
- lea rsp,[48+rsi]
+ mov r15,QWORD[((-48))+rsi]
+
+ mov r14,QWORD[((-40))+rsi]
+
+ mov r13,QWORD[((-32))+rsi]
+
+ mov r12,QWORD[((-24))+rsi]
+
+ mov rbp,QWORD[((-16))+rsi]
+
+ mov rbx,QWORD[((-8))+rsi]
+
+ lea rsp,[rsi]
+
$L$epilogue_avx:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_sha512_block_data_order_avx:
ALIGN 64
@@ -4103,14 +4157,22 @@ $L$SEH_begin_sha512_block_data_order_avx2:
mov rdx,r8
+
$L$avx2_shortcut:
+ mov rax,rsp
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
- mov r11,rsp
+
sub rsp,1408
shl rdx,4
and rsp,-256*8
@@ -4119,7 +4181,8 @@ $L$avx2_shortcut:
mov QWORD[((128+0))+rsp],rdi
mov QWORD[((128+8))+rsp],rsi
mov QWORD[((128+16))+rsp],rdx
- mov QWORD[((128+24))+rsp],r11
+ mov QWORD[152+rsp],rax
+
movaps XMMWORD[(128+32)+rsp],xmm6
movaps XMMWORD[(128+48)+rsp],xmm7
movaps XMMWORD[(128+64)+rsp],xmm8
@@ -5432,7 +5495,8 @@ $L$ower_avx2:
$L$done_avx2:
lea rsp,[rbp]
- mov rsi,QWORD[((128+24))+rsp]
+ mov rsi,QWORD[152+rsp]
+
vzeroupper
movaps xmm6,XMMWORD[((128+32))+rsp]
movaps xmm7,XMMWORD[((128+48))+rsp]
@@ -5440,17 +5504,25 @@ $L$done_avx2:
movaps xmm9,XMMWORD[((128+80))+rsp]
movaps xmm10,XMMWORD[((128+96))+rsp]
movaps xmm11,XMMWORD[((128+112))+rsp]
- mov r15,QWORD[rsi]
- mov r14,QWORD[8+rsi]
- mov r13,QWORD[16+rsi]
- mov r12,QWORD[24+rsi]
- mov rbp,QWORD[32+rsi]
- mov rbx,QWORD[40+rsi]
- lea rsp,[48+rsi]
+ mov r15,QWORD[((-48))+rsi]
+
+ mov r14,QWORD[((-40))+rsi]
+
+ mov r13,QWORD[((-32))+rsi]
+
+ mov r12,QWORD[((-24))+rsi]
+
+ mov rbp,QWORD[((-16))+rsi]
+
+ mov rbx,QWORD[((-8))+rsi]
+
+ lea rsp,[rsi]
+
$L$epilogue_avx2:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_sha512_block_data_order_avx2:
EXTERN __imp_RtlVirtualUnwind
@@ -5493,7 +5565,6 @@ se_handler:
$L$not_in_avx2:
mov rsi,rax
mov rax,QWORD[((128+24))+rax]
- lea rax,[48+rax]
mov rbx,QWORD[((-8))+rax]
mov rbp,QWORD[((-16))+rax]
diff --git a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/whrlpool/wp-x86_64.asm b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/whrlpool/wp-x86_64.asm
index 065697e58d..76f7b07678 100644
--- a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/whrlpool/wp-x86_64.asm
+++ b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/whrlpool/wp-x86_64.asm
@@ -18,14 +18,22 @@ $L$SEH_begin_whirlpool_block:
mov rdx,r8
+
+ mov rax,rsp
+
push rbx
+
push rbp
+
push r12
+
push r13
+
push r14
+
push r15
- mov r11,rsp
+
sub rsp,128+40
and rsp,-64
@@ -33,7 +41,8 @@ $L$SEH_begin_whirlpool_block:
mov QWORD[r10],rdi
mov QWORD[8+r10],rsi
mov QWORD[16+r10],rdx
- mov QWORD[32+r10],r11
+ mov QWORD[32+r10],rax
+
$L$prologue:
mov rbx,r10
@@ -593,17 +602,26 @@ $L$roundsdone:
jmp NEAR $L$outerloop
$L$alldone:
mov rsi,QWORD[32+rbx]
- mov r15,QWORD[rsi]
- mov r14,QWORD[8+rsi]
- mov r13,QWORD[16+rsi]
- mov r12,QWORD[24+rsi]
- mov rbp,QWORD[32+rsi]
- mov rbx,QWORD[40+rsi]
- lea rsp,[48+rsi]
+
+ mov r15,QWORD[((-48))+rsi]
+
+ mov r14,QWORD[((-40))+rsi]
+
+ mov r13,QWORD[((-32))+rsi]
+
+ mov r12,QWORD[((-24))+rsi]
+
+ mov rbp,QWORD[((-16))+rsi]
+
+ mov rbx,QWORD[((-8))+rsi]
+
+ lea rsp,[rsi]
+
$L$epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_whirlpool_block:
ALIGN 64
@@ -904,7 +922,6 @@ se_handler:
jae NEAR $L$in_prologue
mov rax,QWORD[((128+32))+rax]
- lea rax,[48+rax]
mov rbx,QWORD[((-8))+rax]
mov rbp,QWORD[((-16))+rax]
diff --git a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/x86_64cpuid.asm b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/x86_64cpuid.asm
index 2aede40d9e..e2fec12d22 100644
--- a/deps/openssl/config/archs/VC-WIN64A/asm/crypto/x86_64cpuid.asm
+++ b/deps/openssl/config/archs/VC-WIN64A/asm/crypto/x86_64cpuid.asm
@@ -48,10 +48,12 @@ $L$SEH_begin_OPENSSL_ia32_cpuid:
mov rdi,rcx
+
mov r8,rbx
+
xor eax,eax
- mov DWORD[8+rdi],eax
+ mov QWORD[8+rdi],rax
cpuid
mov r11d,eax
@@ -122,6 +124,7 @@ $L$intel:
$L$nocacheinfo:
mov eax,1
cpuid
+ movd xmm0,eax
and edx,0xbfefffff
cmp r9d,0
jne NEAR $L$notintel
@@ -169,28 +172,47 @@ $L$generic:
jc NEAR $L$notknights
and ebx,0xfff7ffff
$L$notknights:
+ movd eax,xmm0
+ and eax,0x0fff0ff0
+ cmp eax,0x00050650
+ jne NEAR $L$notskylakex
+ and ebx,0xfffeffff
+
+$L$notskylakex:
mov DWORD[8+rdi],ebx
+ mov DWORD[12+rdi],ecx
$L$no_extended_info:
bt r9d,27
jnc NEAR $L$clear_avx
xor ecx,ecx
DB 0x0f,0x01,0xd0
+ and eax,0xe6
+ cmp eax,0xe6
+ je NEAR $L$done
+ and DWORD[8+rdi],0x3fdeffff
+
+
+
+
and eax,6
cmp eax,6
je NEAR $L$done
$L$clear_avx:
mov eax,0xefffe7ff
and r9d,eax
- and DWORD[8+rdi],0xffffffdf
+ mov eax,0x3fdeffdf
+ and DWORD[8+rdi],eax
$L$done:
shl r9,32
mov eax,r10d
mov rbx,r8
+
or rax,r9
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
+
$L$SEH_end_OPENSSL_ia32_cpuid:
global OPENSSL_cleanse
@@ -236,6 +258,18 @@ CRYPTO_memcmp:
xor r10,r10
cmp r8,0
je NEAR $L$no_data
+ cmp r8,16
+ jne NEAR $L$oop_cmp
+ mov r10,QWORD[rcx]
+ mov r11,QWORD[8+rcx]
+ mov r8,1
+ xor r10,QWORD[rdx]
+ xor r11,QWORD[8+rdx]
+ or r10,r11
+ cmovnz rax,r8
+ DB 0F3h,0C3h ;repret
+
+ALIGN 16
$L$oop_cmp:
mov r10b,BYTE[rcx]
lea rcx,[1+rcx]
@@ -347,21 +381,6 @@ $L$done2:
sub rax,rcx
DB 0F3h,0C3h ;repret
-global OPENSSL_ia32_rdrand
-
-ALIGN 16
-OPENSSL_ia32_rdrand:
- mov ecx,8
-$L$oop_rdrand:
-DB 72,15,199,240
- jc NEAR $L$break_rdrand
- loop $L$oop_rdrand
-$L$break_rdrand:
- cmp rax,0
- cmove rax,rcx
- DB 0F3h,0C3h ;repret
-
-
global OPENSSL_ia32_rdrand_bytes
ALIGN 16
@@ -395,28 +414,14 @@ $L$tail_rdrand_bytes:
mov BYTE[rcx],r10b
lea rcx,[1+rcx]
inc rax
- shr r8,8
+ shr r10,8
dec rdx
jnz NEAR $L$tail_rdrand_bytes
$L$done_rdrand_bytes:
+ xor r10,r10
DB 0F3h,0C3h ;repret
-global OPENSSL_ia32_rdseed
-
-ALIGN 16
-OPENSSL_ia32_rdseed:
- mov ecx,8
-$L$oop_rdseed:
-DB 72,15,199,248
- jc NEAR $L$break_rdseed
- loop $L$oop_rdseed
-$L$break_rdseed:
- cmp rax,0
- cmove rax,rcx
- DB 0F3h,0C3h ;repret
-
-
global OPENSSL_ia32_rdseed_bytes
ALIGN 16
@@ -450,10 +455,11 @@ $L$tail_rdseed_bytes:
mov BYTE[rcx],r10b
lea rcx,[1+rcx]
inc rax
- shr r8,8
+ shr r10,8
dec rdx
jnz NEAR $L$tail_rdseed_bytes
$L$done_rdseed_bytes:
+ xor r10,r10
DB 0F3h,0C3h ;repret