summaryrefslogtreecommitdiff
path: root/deps/openssl/config/archs/VC-WIN64A/asm_avx2/crypto/sha/sha256-x86_64.asm
diff options
context:
space:
mode:
authorSam Roberts <vieuxtech@gmail.com>2018-11-22 11:47:07 -0800
committerSam Roberts <vieuxtech@gmail.com>2019-01-22 13:33:54 -0800
commit807ed7883a12423270450776f015a7c2348c0913 (patch)
tree00ec21dd290b29c782680ffc2f97e6d59fd2ab2f /deps/openssl/config/archs/VC-WIN64A/asm_avx2/crypto/sha/sha256-x86_64.asm
parent57119fbdb200702d6e2cf23428de4c458ae86bbc (diff)
downloadandroid-node-v8-807ed7883a12423270450776f015a7c2348c0913.tar.gz
android-node-v8-807ed7883a12423270450776f015a7c2348c0913.tar.bz2
android-node-v8-807ed7883a12423270450776f015a7c2348c0913.zip
deps: update archs files for OpenSSL-1.1.1a
`cd deps/openssl/config; make` updates all archs dependant files. PR-URL: https://github.com/nodejs/node/pull/25381 Reviewed-By: Daniel Bevenius <daniel.bevenius@gmail.com> Reviewed-By: Shigeki Ohtsu <ohtsu@ohtsu.org>
Diffstat (limited to 'deps/openssl/config/archs/VC-WIN64A/asm_avx2/crypto/sha/sha256-x86_64.asm')
-rw-r--r--deps/openssl/config/archs/VC-WIN64A/asm_avx2/crypto/sha/sha256-x86_64.asm5705
1 files changed, 5705 insertions, 0 deletions
diff --git a/deps/openssl/config/archs/VC-WIN64A/asm_avx2/crypto/sha/sha256-x86_64.asm b/deps/openssl/config/archs/VC-WIN64A/asm_avx2/crypto/sha/sha256-x86_64.asm
new file mode 100644
index 0000000000..fc102444ff
--- /dev/null
+++ b/deps/openssl/config/archs/VC-WIN64A/asm_avx2/crypto/sha/sha256-x86_64.asm
@@ -0,0 +1,5705 @@
+default rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+section .text code align=64
+
+
+EXTERN OPENSSL_ia32cap_P
+global sha256_block_data_order
+
+ALIGN 16
+sha256_block_data_order:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_sha256_block_data_order:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+
+ lea r11,[OPENSSL_ia32cap_P]
+ mov r9d,DWORD[r11]
+ mov r10d,DWORD[4+r11]
+ mov r11d,DWORD[8+r11]
+ test r11d,536870912
+ jnz NEAR _shaext_shortcut
+ and r11d,296
+ cmp r11d,296
+ je NEAR $L$avx2_shortcut
+ and r9d,1073741824
+ and r10d,268435968
+ or r10d,r9d
+ cmp r10d,1342177792
+ je NEAR $L$avx_shortcut
+ test r10d,512
+ jnz NEAR $L$ssse3_shortcut
+ mov rax,rsp
+
+ push rbx
+
+ push rbp
+
+ push r12
+
+ push r13
+
+ push r14
+
+ push r15
+
+ shl rdx,4
+ sub rsp,16*4+4*8
+ lea rdx,[rdx*4+rsi]
+ and rsp,-64
+ mov QWORD[((64+0))+rsp],rdi
+ mov QWORD[((64+8))+rsp],rsi
+ mov QWORD[((64+16))+rsp],rdx
+ mov QWORD[88+rsp],rax
+
+$L$prologue:
+
+ mov eax,DWORD[rdi]
+ mov ebx,DWORD[4+rdi]
+ mov ecx,DWORD[8+rdi]
+ mov edx,DWORD[12+rdi]
+ mov r8d,DWORD[16+rdi]
+ mov r9d,DWORD[20+rdi]
+ mov r10d,DWORD[24+rdi]
+ mov r11d,DWORD[28+rdi]
+ jmp NEAR $L$loop
+
+ALIGN 16
+$L$loop:
+ mov edi,ebx
+ lea rbp,[K256]
+ xor edi,ecx
+ mov r12d,DWORD[rsi]
+ mov r13d,r8d
+ mov r14d,eax
+ bswap r12d
+ ror r13d,14
+ mov r15d,r9d
+
+ xor r13d,r8d
+ ror r14d,9
+ xor r15d,r10d
+
+ mov DWORD[rsp],r12d
+ xor r14d,eax
+ and r15d,r8d
+
+ ror r13d,5
+ add r12d,r11d
+ xor r15d,r10d
+
+ ror r14d,11
+ xor r13d,r8d
+ add r12d,r15d
+
+ mov r15d,eax
+ add r12d,DWORD[rbp]
+ xor r14d,eax
+
+ xor r15d,ebx
+ ror r13d,6
+ mov r11d,ebx
+
+ and edi,r15d
+ ror r14d,2
+ add r12d,r13d
+
+ xor r11d,edi
+ add edx,r12d
+ add r11d,r12d
+
+ lea rbp,[4+rbp]
+ add r11d,r14d
+ mov r12d,DWORD[4+rsi]
+ mov r13d,edx
+ mov r14d,r11d
+ bswap r12d
+ ror r13d,14
+ mov edi,r8d
+
+ xor r13d,edx
+ ror r14d,9
+ xor edi,r9d
+
+ mov DWORD[4+rsp],r12d
+ xor r14d,r11d
+ and edi,edx
+
+ ror r13d,5
+ add r12d,r10d
+ xor edi,r9d
+
+ ror r14d,11
+ xor r13d,edx
+ add r12d,edi
+
+ mov edi,r11d
+ add r12d,DWORD[rbp]
+ xor r14d,r11d
+
+ xor edi,eax
+ ror r13d,6
+ mov r10d,eax
+
+ and r15d,edi
+ ror r14d,2
+ add r12d,r13d
+
+ xor r10d,r15d
+ add ecx,r12d
+ add r10d,r12d
+
+ lea rbp,[4+rbp]
+ add r10d,r14d
+ mov r12d,DWORD[8+rsi]
+ mov r13d,ecx
+ mov r14d,r10d
+ bswap r12d
+ ror r13d,14
+ mov r15d,edx
+
+ xor r13d,ecx
+ ror r14d,9
+ xor r15d,r8d
+
+ mov DWORD[8+rsp],r12d
+ xor r14d,r10d
+ and r15d,ecx
+
+ ror r13d,5
+ add r12d,r9d
+ xor r15d,r8d
+
+ ror r14d,11
+ xor r13d,ecx
+ add r12d,r15d
+
+ mov r15d,r10d
+ add r12d,DWORD[rbp]
+ xor r14d,r10d
+
+ xor r15d,r11d
+ ror r13d,6
+ mov r9d,r11d
+
+ and edi,r15d
+ ror r14d,2
+ add r12d,r13d
+
+ xor r9d,edi
+ add ebx,r12d
+ add r9d,r12d
+
+ lea rbp,[4+rbp]
+ add r9d,r14d
+ mov r12d,DWORD[12+rsi]
+ mov r13d,ebx
+ mov r14d,r9d
+ bswap r12d
+ ror r13d,14
+ mov edi,ecx
+
+ xor r13d,ebx
+ ror r14d,9
+ xor edi,edx
+
+ mov DWORD[12+rsp],r12d
+ xor r14d,r9d
+ and edi,ebx
+
+ ror r13d,5
+ add r12d,r8d
+ xor edi,edx
+
+ ror r14d,11
+ xor r13d,ebx
+ add r12d,edi
+
+ mov edi,r9d
+ add r12d,DWORD[rbp]
+ xor r14d,r9d
+
+ xor edi,r10d
+ ror r13d,6
+ mov r8d,r10d
+
+ and r15d,edi
+ ror r14d,2
+ add r12d,r13d
+
+ xor r8d,r15d
+ add eax,r12d
+ add r8d,r12d
+
+ lea rbp,[20+rbp]
+ add r8d,r14d
+ mov r12d,DWORD[16+rsi]
+ mov r13d,eax
+ mov r14d,r8d
+ bswap r12d
+ ror r13d,14
+ mov r15d,ebx
+
+ xor r13d,eax
+ ror r14d,9
+ xor r15d,ecx
+
+ mov DWORD[16+rsp],r12d
+ xor r14d,r8d
+ and r15d,eax
+
+ ror r13d,5
+ add r12d,edx
+ xor r15d,ecx
+
+ ror r14d,11
+ xor r13d,eax
+ add r12d,r15d
+
+ mov r15d,r8d
+ add r12d,DWORD[rbp]
+ xor r14d,r8d
+
+ xor r15d,r9d
+ ror r13d,6
+ mov edx,r9d
+
+ and edi,r15d
+ ror r14d,2
+ add r12d,r13d
+
+ xor edx,edi
+ add r11d,r12d
+ add edx,r12d
+
+ lea rbp,[4+rbp]
+ add edx,r14d
+ mov r12d,DWORD[20+rsi]
+ mov r13d,r11d
+ mov r14d,edx
+ bswap r12d
+ ror r13d,14
+ mov edi,eax
+
+ xor r13d,r11d
+ ror r14d,9
+ xor edi,ebx
+
+ mov DWORD[20+rsp],r12d
+ xor r14d,edx
+ and edi,r11d
+
+ ror r13d,5
+ add r12d,ecx
+ xor edi,ebx
+
+ ror r14d,11
+ xor r13d,r11d
+ add r12d,edi
+
+ mov edi,edx
+ add r12d,DWORD[rbp]
+ xor r14d,edx
+
+ xor edi,r8d
+ ror r13d,6
+ mov ecx,r8d
+
+ and r15d,edi
+ ror r14d,2
+ add r12d,r13d
+
+ xor ecx,r15d
+ add r10d,r12d
+ add ecx,r12d
+
+ lea rbp,[4+rbp]
+ add ecx,r14d
+ mov r12d,DWORD[24+rsi]
+ mov r13d,r10d
+ mov r14d,ecx
+ bswap r12d
+ ror r13d,14
+ mov r15d,r11d
+
+ xor r13d,r10d
+ ror r14d,9
+ xor r15d,eax
+
+ mov DWORD[24+rsp],r12d
+ xor r14d,ecx
+ and r15d,r10d
+
+ ror r13d,5
+ add r12d,ebx
+ xor r15d,eax
+
+ ror r14d,11
+ xor r13d,r10d
+ add r12d,r15d
+
+ mov r15d,ecx
+ add r12d,DWORD[rbp]
+ xor r14d,ecx
+
+ xor r15d,edx
+ ror r13d,6
+ mov ebx,edx
+
+ and edi,r15d
+ ror r14d,2
+ add r12d,r13d
+
+ xor ebx,edi
+ add r9d,r12d
+ add ebx,r12d
+
+ lea rbp,[4+rbp]
+ add ebx,r14d
+ mov r12d,DWORD[28+rsi]
+ mov r13d,r9d
+ mov r14d,ebx
+ bswap r12d
+ ror r13d,14
+ mov edi,r10d
+
+ xor r13d,r9d
+ ror r14d,9
+ xor edi,r11d
+
+ mov DWORD[28+rsp],r12d
+ xor r14d,ebx
+ and edi,r9d
+
+ ror r13d,5
+ add r12d,eax
+ xor edi,r11d
+
+ ror r14d,11
+ xor r13d,r9d
+ add r12d,edi
+
+ mov edi,ebx
+ add r12d,DWORD[rbp]
+ xor r14d,ebx
+
+ xor edi,ecx
+ ror r13d,6
+ mov eax,ecx
+
+ and r15d,edi
+ ror r14d,2
+ add r12d,r13d
+
+ xor eax,r15d
+ add r8d,r12d
+ add eax,r12d
+
+ lea rbp,[20+rbp]
+ add eax,r14d
+ mov r12d,DWORD[32+rsi]
+ mov r13d,r8d
+ mov r14d,eax
+ bswap r12d
+ ror r13d,14
+ mov r15d,r9d
+
+ xor r13d,r8d
+ ror r14d,9
+ xor r15d,r10d
+
+ mov DWORD[32+rsp],r12d
+ xor r14d,eax
+ and r15d,r8d
+
+ ror r13d,5
+ add r12d,r11d
+ xor r15d,r10d
+
+ ror r14d,11
+ xor r13d,r8d
+ add r12d,r15d
+
+ mov r15d,eax
+ add r12d,DWORD[rbp]
+ xor r14d,eax
+
+ xor r15d,ebx
+ ror r13d,6
+ mov r11d,ebx
+
+ and edi,r15d
+ ror r14d,2
+ add r12d,r13d
+
+ xor r11d,edi
+ add edx,r12d
+ add r11d,r12d
+
+ lea rbp,[4+rbp]
+ add r11d,r14d
+ mov r12d,DWORD[36+rsi]
+ mov r13d,edx
+ mov r14d,r11d
+ bswap r12d
+ ror r13d,14
+ mov edi,r8d
+
+ xor r13d,edx
+ ror r14d,9
+ xor edi,r9d
+
+ mov DWORD[36+rsp],r12d
+ xor r14d,r11d
+ and edi,edx
+
+ ror r13d,5
+ add r12d,r10d
+ xor edi,r9d
+
+ ror r14d,11
+ xor r13d,edx
+ add r12d,edi
+
+ mov edi,r11d
+ add r12d,DWORD[rbp]
+ xor r14d,r11d
+
+ xor edi,eax
+ ror r13d,6
+ mov r10d,eax
+
+ and r15d,edi
+ ror r14d,2
+ add r12d,r13d
+
+ xor r10d,r15d
+ add ecx,r12d
+ add r10d,r12d
+
+ lea rbp,[4+rbp]
+ add r10d,r14d
+ mov r12d,DWORD[40+rsi]
+ mov r13d,ecx
+ mov r14d,r10d
+ bswap r12d
+ ror r13d,14
+ mov r15d,edx
+
+ xor r13d,ecx
+ ror r14d,9
+ xor r15d,r8d
+
+ mov DWORD[40+rsp],r12d
+ xor r14d,r10d
+ and r15d,ecx
+
+ ror r13d,5
+ add r12d,r9d
+ xor r15d,r8d
+
+ ror r14d,11
+ xor r13d,ecx
+ add r12d,r15d
+
+ mov r15d,r10d
+ add r12d,DWORD[rbp]
+ xor r14d,r10d
+
+ xor r15d,r11d
+ ror r13d,6
+ mov r9d,r11d
+
+ and edi,r15d
+ ror r14d,2
+ add r12d,r13d
+
+ xor r9d,edi
+ add ebx,r12d
+ add r9d,r12d
+
+ lea rbp,[4+rbp]
+ add r9d,r14d
+ mov r12d,DWORD[44+rsi]
+ mov r13d,ebx
+ mov r14d,r9d
+ bswap r12d
+ ror r13d,14
+ mov edi,ecx
+
+ xor r13d,ebx
+ ror r14d,9
+ xor edi,edx
+
+ mov DWORD[44+rsp],r12d
+ xor r14d,r9d
+ and edi,ebx
+
+ ror r13d,5
+ add r12d,r8d
+ xor edi,edx
+
+ ror r14d,11
+ xor r13d,ebx
+ add r12d,edi
+
+ mov edi,r9d
+ add r12d,DWORD[rbp]
+ xor r14d,r9d
+
+ xor edi,r10d
+ ror r13d,6
+ mov r8d,r10d
+
+ and r15d,edi
+ ror r14d,2
+ add r12d,r13d
+
+ xor r8d,r15d
+ add eax,r12d
+ add r8d,r12d
+
+ lea rbp,[20+rbp]
+ add r8d,r14d
+ mov r12d,DWORD[48+rsi]
+ mov r13d,eax
+ mov r14d,r8d
+ bswap r12d
+ ror r13d,14
+ mov r15d,ebx
+
+ xor r13d,eax
+ ror r14d,9
+ xor r15d,ecx
+
+ mov DWORD[48+rsp],r12d
+ xor r14d,r8d
+ and r15d,eax
+
+ ror r13d,5
+ add r12d,edx
+ xor r15d,ecx
+
+ ror r14d,11
+ xor r13d,eax
+ add r12d,r15d
+
+ mov r15d,r8d
+ add r12d,DWORD[rbp]
+ xor r14d,r8d
+
+ xor r15d,r9d
+ ror r13d,6
+ mov edx,r9d
+
+ and edi,r15d
+ ror r14d,2
+ add r12d,r13d
+
+ xor edx,edi
+ add r11d,r12d
+ add edx,r12d
+
+ lea rbp,[4+rbp]
+ add edx,r14d
+ mov r12d,DWORD[52+rsi]
+ mov r13d,r11d
+ mov r14d,edx
+ bswap r12d
+ ror r13d,14
+ mov edi,eax
+
+ xor r13d,r11d
+ ror r14d,9
+ xor edi,ebx
+
+ mov DWORD[52+rsp],r12d
+ xor r14d,edx
+ and edi,r11d
+
+ ror r13d,5
+ add r12d,ecx
+ xor edi,ebx
+
+ ror r14d,11
+ xor r13d,r11d
+ add r12d,edi
+
+ mov edi,edx
+ add r12d,DWORD[rbp]
+ xor r14d,edx
+
+ xor edi,r8d
+ ror r13d,6
+ mov ecx,r8d
+
+ and r15d,edi
+ ror r14d,2
+ add r12d,r13d
+
+ xor ecx,r15d
+ add r10d,r12d
+ add ecx,r12d
+
+ lea rbp,[4+rbp]
+ add ecx,r14d
+ mov r12d,DWORD[56+rsi]
+ mov r13d,r10d
+ mov r14d,ecx
+ bswap r12d
+ ror r13d,14
+ mov r15d,r11d
+
+ xor r13d,r10d
+ ror r14d,9
+ xor r15d,eax
+
+ mov DWORD[56+rsp],r12d
+ xor r14d,ecx
+ and r15d,r10d
+
+ ror r13d,5
+ add r12d,ebx
+ xor r15d,eax
+
+ ror r14d,11
+ xor r13d,r10d
+ add r12d,r15d
+
+ mov r15d,ecx
+ add r12d,DWORD[rbp]
+ xor r14d,ecx
+
+ xor r15d,edx
+ ror r13d,6
+ mov ebx,edx
+
+ and edi,r15d
+ ror r14d,2
+ add r12d,r13d
+
+ xor ebx,edi
+ add r9d,r12d
+ add ebx,r12d
+
+ lea rbp,[4+rbp]
+ add ebx,r14d
+ mov r12d,DWORD[60+rsi]
+ mov r13d,r9d
+ mov r14d,ebx
+ bswap r12d
+ ror r13d,14
+ mov edi,r10d
+
+ xor r13d,r9d
+ ror r14d,9
+ xor edi,r11d
+
+ mov DWORD[60+rsp],r12d
+ xor r14d,ebx
+ and edi,r9d
+
+ ror r13d,5
+ add r12d,eax
+ xor edi,r11d
+
+ ror r14d,11
+ xor r13d,r9d
+ add r12d,edi
+
+ mov edi,ebx
+ add r12d,DWORD[rbp]
+ xor r14d,ebx
+
+ xor edi,ecx
+ ror r13d,6
+ mov eax,ecx
+
+ and r15d,edi
+ ror r14d,2
+ add r12d,r13d
+
+ xor eax,r15d
+ add r8d,r12d
+ add eax,r12d
+
+ lea rbp,[20+rbp]
+ jmp NEAR $L$rounds_16_xx
+ALIGN 16
+$L$rounds_16_xx:
+ mov r13d,DWORD[4+rsp]
+ mov r15d,DWORD[56+rsp]
+
+ mov r12d,r13d
+ ror r13d,11
+ add eax,r14d
+ mov r14d,r15d
+ ror r15d,2
+
+ xor r13d,r12d
+ shr r12d,3
+ ror r13d,7
+ xor r15d,r14d
+ shr r14d,10
+
+ ror r15d,17
+ xor r12d,r13d
+ xor r15d,r14d
+ add r12d,DWORD[36+rsp]
+
+ add r12d,DWORD[rsp]
+ mov r13d,r8d
+ add r12d,r15d
+ mov r14d,eax
+ ror r13d,14
+ mov r15d,r9d
+
+ xor r13d,r8d
+ ror r14d,9
+ xor r15d,r10d
+
+ mov DWORD[rsp],r12d
+ xor r14d,eax
+ and r15d,r8d
+
+ ror r13d,5
+ add r12d,r11d
+ xor r15d,r10d
+
+ ror r14d,11
+ xor r13d,r8d
+ add r12d,r15d
+
+ mov r15d,eax
+ add r12d,DWORD[rbp]
+ xor r14d,eax
+
+ xor r15d,ebx
+ ror r13d,6
+ mov r11d,ebx
+
+ and edi,r15d
+ ror r14d,2
+ add r12d,r13d
+
+ xor r11d,edi
+ add edx,r12d
+ add r11d,r12d
+
+ lea rbp,[4+rbp]
+ mov r13d,DWORD[8+rsp]
+ mov edi,DWORD[60+rsp]
+
+ mov r12d,r13d
+ ror r13d,11
+ add r11d,r14d
+ mov r14d,edi
+ ror edi,2
+
+ xor r13d,r12d
+ shr r12d,3
+ ror r13d,7
+ xor edi,r14d
+ shr r14d,10
+
+ ror edi,17
+ xor r12d,r13d
+ xor edi,r14d
+ add r12d,DWORD[40+rsp]
+
+ add r12d,DWORD[4+rsp]
+ mov r13d,edx
+ add r12d,edi
+ mov r14d,r11d
+ ror r13d,14
+ mov edi,r8d
+
+ xor r13d,edx
+ ror r14d,9
+ xor edi,r9d
+
+ mov DWORD[4+rsp],r12d
+ xor r14d,r11d
+ and edi,edx
+
+ ror r13d,5
+ add r12d,r10d
+ xor edi,r9d
+
+ ror r14d,11
+ xor r13d,edx
+ add r12d,edi
+
+ mov edi,r11d
+ add r12d,DWORD[rbp]
+ xor r14d,r11d
+
+ xor edi,eax
+ ror r13d,6
+ mov r10d,eax
+
+ and r15d,edi
+ ror r14d,2
+ add r12d,r13d
+
+ xor r10d,r15d
+ add ecx,r12d
+ add r10d,r12d
+
+ lea rbp,[4+rbp]
+ mov r13d,DWORD[12+rsp]
+ mov r15d,DWORD[rsp]
+
+ mov r12d,r13d
+ ror r13d,11
+ add r10d,r14d
+ mov r14d,r15d
+ ror r15d,2
+
+ xor r13d,r12d
+ shr r12d,3
+ ror r13d,7
+ xor r15d,r14d
+ shr r14d,10
+
+ ror r15d,17
+ xor r12d,r13d
+ xor r15d,r14d
+ add r12d,DWORD[44+rsp]
+
+ add r12d,DWORD[8+rsp]
+ mov r13d,ecx
+ add r12d,r15d
+ mov r14d,r10d
+ ror r13d,14
+ mov r15d,edx
+
+ xor r13d,ecx
+ ror r14d,9
+ xor r15d,r8d
+
+ mov DWORD[8+rsp],r12d
+ xor r14d,r10d
+ and r15d,ecx
+
+ ror r13d,5
+ add r12d,r9d
+ xor r15d,r8d
+
+ ror r14d,11
+ xor r13d,ecx
+ add r12d,r15d
+
+ mov r15d,r10d
+ add r12d,DWORD[rbp]
+ xor r14d,r10d
+
+ xor r15d,r11d
+ ror r13d,6
+ mov r9d,r11d
+
+ and edi,r15d
+ ror r14d,2
+ add r12d,r13d
+
+ xor r9d,edi
+ add ebx,r12d
+ add r9d,r12d
+
+ lea rbp,[4+rbp]
+ mov r13d,DWORD[16+rsp]
+ mov edi,DWORD[4+rsp]
+
+ mov r12d,r13d
+ ror r13d,11
+ add r9d,r14d
+ mov r14d,edi
+ ror edi,2
+
+ xor r13d,r12d
+ shr r12d,3
+ ror r13d,7
+ xor edi,r14d
+ shr r14d,10
+
+ ror edi,17
+ xor r12d,r13d
+ xor edi,r14d
+ add r12d,DWORD[48+rsp]
+
+ add r12d,DWORD[12+rsp]
+ mov r13d,ebx
+ add r12d,edi
+ mov r14d,r9d
+ ror r13d,14
+ mov edi,ecx
+
+ xor r13d,ebx
+ ror r14d,9
+ xor edi,edx
+
+ mov DWORD[12+rsp],r12d
+ xor r14d,r9d
+ and edi,ebx
+
+ ror r13d,5
+ add r12d,r8d
+ xor edi,edx
+
+ ror r14d,11
+ xor r13d,ebx
+ add r12d,edi
+
+ mov edi,r9d
+ add r12d,DWORD[rbp]
+ xor r14d,r9d
+
+ xor edi,r10d
+ ror r13d,6
+ mov r8d,r10d
+
+ and r15d,edi
+ ror r14d,2
+ add r12d,r13d
+
+ xor r8d,r15d
+ add eax,r12d
+ add r8d,r12d
+
+ lea rbp,[20+rbp]
+ mov r13d,DWORD[20+rsp]
+ mov r15d,DWORD[8+rsp]
+
+ mov r12d,r13d
+ ror r13d,11
+ add r8d,r14d
+ mov r14d,r15d
+ ror r15d,2
+
+ xor r13d,r12d
+ shr r12d,3
+ ror r13d,7
+ xor r15d,r14d
+ shr r14d,10
+
+ ror r15d,17
+ xor r12d,r13d
+ xor r15d,r14d
+ add r12d,DWORD[52+rsp]
+
+ add r12d,DWORD[16+rsp]
+ mov r13d,eax
+ add r12d,r15d
+ mov r14d,r8d
+ ror r13d,14
+ mov r15d,ebx
+
+ xor r13d,eax
+ ror r14d,9
+ xor r15d,ecx
+
+ mov DWORD[16+rsp],r12d
+ xor r14d,r8d
+ and r15d,eax
+
+ ror r13d,5
+ add r12d,edx
+ xor r15d,ecx
+
+ ror r14d,11
+ xor r13d,eax
+ add r12d,r15d
+
+ mov r15d,r8d
+ add r12d,DWORD[rbp]
+ xor r14d,r8d
+
+ xor r15d,r9d
+ ror r13d,6
+ mov edx,r9d
+
+ and edi,r15d
+ ror r14d,2
+ add r12d,r13d
+
+ xor edx,edi
+ add r11d,r12d
+ add edx,r12d
+
+ lea rbp,[4+rbp]
+ mov r13d,DWORD[24+rsp]
+ mov edi,DWORD[12+rsp]
+
+ mov r12d,r13d
+ ror r13d,11
+ add edx,r14d
+ mov r14d,edi
+ ror edi,2
+
+ xor r13d,r12d
+ shr r12d,3
+ ror r13d,7
+ xor edi,r14d
+ shr r14d,10
+
+ ror edi,17
+ xor r12d,r13d
+ xor edi,r14d
+ add r12d,DWORD[56+rsp]
+
+ add r12d,DWORD[20+rsp]
+ mov r13d,r11d
+ add r12d,edi
+ mov r14d,edx
+ ror r13d,14
+ mov edi,eax
+
+ xor r13d,r11d
+ ror r14d,9
+ xor edi,ebx
+
+ mov DWORD[20+rsp],r12d
+ xor r14d,edx
+ and edi,r11d
+
+ ror r13d,5
+ add r12d,ecx
+ xor edi,ebx
+
+ ror r14d,11
+ xor r13d,r11d
+ add r12d,edi
+
+ mov edi,edx
+ add r12d,DWORD[rbp]
+ xor r14d,edx
+
+ xor edi,r8d
+ ror r13d,6
+ mov ecx,r8d
+
+ and r15d,edi
+ ror r14d,2
+ add r12d,r13d
+
+ xor ecx,r15d
+ add r10d,r12d
+ add ecx,r12d
+
+ lea rbp,[4+rbp]
+ mov r13d,DWORD[28+rsp]
+ mov r15d,DWORD[16+rsp]
+
+ mov r12d,r13d
+ ror r13d,11
+ add ecx,r14d
+ mov r14d,r15d
+ ror r15d,2
+
+ xor r13d,r12d
+ shr r12d,3
+ ror r13d,7
+ xor r15d,r14d
+ shr r14d,10
+
+ ror r15d,17
+ xor r12d,r13d
+ xor r15d,r14d
+ add r12d,DWORD[60+rsp]
+
+ add r12d,DWORD[24+rsp]
+ mov r13d,r10d
+ add r12d,r15d
+ mov r14d,ecx
+ ror r13d,14
+ mov r15d,r11d
+
+ xor r13d,r10d
+ ror r14d,9
+ xor r15d,eax
+
+ mov DWORD[24+rsp],r12d
+ xor r14d,ecx
+ and r15d,r10d
+
+ ror r13d,5
+ add r12d,ebx
+ xor r15d,eax
+
+ ror r14d,11
+ xor r13d,r10d
+ add r12d,r15d
+
+ mov r15d,ecx
+ add r12d,DWORD[rbp]
+ xor r14d,ecx
+
+ xor r15d,edx
+ ror r13d,6
+ mov ebx,edx
+
+ and edi,r15d
+ ror r14d,2
+ add r12d,r13d
+
+ xor ebx,edi
+ add r9d,r12d
+ add ebx,r12d
+
+ lea rbp,[4+rbp]
+ mov r13d,DWORD[32+rsp]
+ mov edi,DWORD[20+rsp]
+
+ mov r12d,r13d
+ ror r13d,11
+ add ebx,r14d
+ mov r14d,edi
+ ror edi,2
+
+ xor r13d,r12d
+ shr r12d,3
+ ror r13d,7
+ xor edi,r14d
+ shr r14d,10
+
+ ror edi,17
+ xor r12d,r13d
+ xor edi,r14d
+ add r12d,DWORD[rsp]
+
+ add r12d,DWORD[28+rsp]
+ mov r13d,r9d
+ add r12d,edi
+ mov r14d,ebx
+ ror r13d,14
+ mov edi,r10d
+
+ xor r13d,r9d
+ ror r14d,9
+ xor edi,r11d
+
+ mov DWORD[28+rsp],r12d
+ xor r14d,ebx
+ and edi,r9d
+
+ ror r13d,5
+ add r12d,eax
+ xor edi,r11d
+
+ ror r14d,11
+ xor r13d,r9d
+ add r12d,edi
+
+ mov edi,ebx
+ add r12d,DWORD[rbp]
+ xor r14d,ebx
+
+ xor edi,ecx
+ ror r13d,6
+ mov eax,ecx
+
+ and r15d,edi
+ ror r14d,2
+ add r12d,r13d
+
+ xor eax,r15d
+ add r8d,r12d
+ add eax,r12d
+
+ lea rbp,[20+rbp]
+ mov r13d,DWORD[36+rsp]
+ mov r15d,DWORD[24+rsp]
+
+ mov r12d,r13d
+ ror r13d,11
+ add eax,r14d
+ mov r14d,r15d
+ ror r15d,2
+
+ xor r13d,r12d
+ shr r12d,3
+ ror r13d,7
+ xor r15d,r14d
+ shr r14d,10
+
+ ror r15d,17
+ xor r12d,r13d
+ xor r15d,r14d
+ add r12d,DWORD[4+rsp]
+
+ add r12d,DWORD[32+rsp]
+ mov r13d,r8d
+ add r12d,r15d
+ mov r14d,eax
+ ror r13d,14
+ mov r15d,r9d
+
+ xor r13d,r8d
+ ror r14d,9
+ xor r15d,r10d
+
+ mov DWORD[32+rsp],r12d
+ xor r14d,eax
+ and r15d,r8d
+
+ ror r13d,5
+ add r12d,r11d
+ xor r15d,r10d
+
+ ror r14d,11
+ xor r13d,r8d
+ add r12d,r15d
+
+ mov r15d,eax
+ add r12d,DWORD[rbp]
+ xor r14d,eax
+
+ xor r15d,ebx
+ ror r13d,6
+ mov r11d,ebx
+
+ and edi,r15d
+ ror r14d,2
+ add r12d,r13d
+
+ xor r11d,edi
+ add edx,r12d
+ add r11d,r12d
+
+ lea rbp,[4+rbp]
+ mov r13d,DWORD[40+rsp]
+ mov edi,DWORD[28+rsp]
+
+ mov r12d,r13d
+ ror r13d,11
+ add r11d,r14d
+ mov r14d,edi
+ ror edi,2
+
+ xor r13d,r12d
+ shr r12d,3
+ ror r13d,7
+ xor edi,r14d
+ shr r14d,10
+
+ ror edi,17
+ xor r12d,r13d
+ xor edi,r14d
+ add r12d,DWORD[8+rsp]
+
+ add r12d,DWORD[36+rsp]
+ mov r13d,edx
+ add r12d,edi
+ mov r14d,r11d
+ ror r13d,14
+ mov edi,r8d
+
+ xor r13d,edx
+ ror r14d,9
+ xor edi,r9d
+
+ mov DWORD[36+rsp],r12d
+ xor r14d,r11d
+ and edi,edx
+
+ ror r13d,5
+ add r12d,r10d
+ xor edi,r9d
+
+ ror r14d,11
+ xor r13d,edx
+ add r12d,edi
+
+ mov edi,r11d
+ add r12d,DWORD[rbp]
+ xor r14d,r11d
+
+ xor edi,eax
+ ror r13d,6
+ mov r10d,eax
+
+ and r15d,edi
+ ror r14d,2
+ add r12d,r13d
+
+ xor r10d,r15d
+ add ecx,r12d
+ add r10d,r12d
+
+ lea rbp,[4+rbp]
+ mov r13d,DWORD[44+rsp]
+ mov r15d,DWORD[32+rsp]
+
+ mov r12d,r13d
+ ror r13d,11
+ add r10d,r14d
+ mov r14d,r15d
+ ror r15d,2
+
+ xor r13d,r12d
+ shr r12d,3
+ ror r13d,7
+ xor r15d,r14d
+ shr r14d,10
+
+ ror r15d,17
+ xor r12d,r13d
+ xor r15d,r14d
+ add r12d,DWORD[12+rsp]
+
+ add r12d,DWORD[40+rsp]
+ mov r13d,ecx
+ add r12d,r15d
+ mov r14d,r10d
+ ror r13d,14
+ mov r15d,edx
+
+ xor r13d,ecx
+ ror r14d,9
+ xor r15d,r8d
+
+ mov DWORD[40+rsp],r12d
+ xor r14d,r10d
+ and r15d,ecx
+
+ ror r13d,5
+ add r12d,r9d
+ xor r15d,r8d
+
+ ror r14d,11
+ xor r13d,ecx
+ add r12d,r15d
+
+ mov r15d,r10d
+ add r12d,DWORD[rbp]
+ xor r14d,r10d
+
+ xor r15d,r11d
+ ror r13d,6
+ mov r9d,r11d
+
+ and edi,r15d
+ ror r14d,2
+ add r12d,r13d
+
+ xor r9d,edi
+ add ebx,r12d
+ add r9d,r12d
+
+ lea rbp,[4+rbp]
+ mov r13d,DWORD[48+rsp]
+ mov edi,DWORD[36+rsp]
+
+ mov r12d,r13d
+ ror r13d,11
+ add r9d,r14d
+ mov r14d,edi
+ ror edi,2
+
+ xor r13d,r12d
+ shr r12d,3
+ ror r13d,7
+ xor edi,r14d
+ shr r14d,10
+
+ ror edi,17
+ xor r12d,r13d
+ xor edi,r14d
+ add r12d,DWORD[16+rsp]
+
+ add r12d,DWORD[44+rsp]
+ mov r13d,ebx
+ add r12d,edi
+ mov r14d,r9d
+ ror r13d,14
+ mov edi,ecx
+
+ xor r13d,ebx
+ ror r14d,9
+ xor edi,edx
+
+ mov DWORD[44+rsp],r12d
+ xor r14d,r9d
+ and edi,ebx
+
+ ror r13d,5
+ add r12d,r8d
+ xor edi,edx
+
+ ror r14d,11
+ xor r13d,ebx
+ add r12d,edi
+
+ mov edi,r9d
+ add r12d,DWORD[rbp]
+ xor r14d,r9d
+
+ xor edi,r10d
+ ror r13d,6
+ mov r8d,r10d
+
+ and r15d,edi
+ ror r14d,2
+ add r12d,r13d
+
+ xor r8d,r15d
+ add eax,r12d
+ add r8d,r12d
+
+ lea rbp,[20+rbp]
+ mov r13d,DWORD[52+rsp]
+ mov r15d,DWORD[40+rsp]
+
+ mov r12d,r13d
+ ror r13d,11
+ add r8d,r14d
+ mov r14d,r15d
+ ror r15d,2
+
+ xor r13d,r12d
+ shr r12d,3
+ ror r13d,7
+ xor r15d,r14d
+ shr r14d,10
+
+ ror r15d,17
+ xor r12d,r13d
+ xor r15d,r14d
+ add r12d,DWORD[20+rsp]
+
+ add r12d,DWORD[48+rsp]
+ mov r13d,eax
+ add r12d,r15d
+ mov r14d,r8d
+ ror r13d,14
+ mov r15d,ebx
+
+ xor r13d,eax
+ ror r14d,9
+ xor r15d,ecx
+
+ mov DWORD[48+rsp],r12d
+ xor r14d,r8d
+ and r15d,eax
+
+ ror r13d,5
+ add r12d,edx
+ xor r15d,ecx
+
+ ror r14d,11
+ xor r13d,eax
+ add r12d,r15d
+
+ mov r15d,r8d
+ add r12d,DWORD[rbp]
+ xor r14d,r8d
+
+ xor r15d,r9d
+ ror r13d,6
+ mov edx,r9d
+
+ and edi,r15d
+ ror r14d,2
+ add r12d,r13d
+
+ xor edx,edi
+ add r11d,r12d
+ add edx,r12d
+
+ lea rbp,[4+rbp]
+ mov r13d,DWORD[56+rsp]
+ mov edi,DWORD[44+rsp]
+
+ mov r12d,r13d
+ ror r13d,11
+ add edx,r14d
+ mov r14d,edi
+ ror edi,2
+
+ xor r13d,r12d
+ shr r12d,3
+ ror r13d,7
+ xor edi,r14d
+ shr r14d,10
+
+ ror edi,17
+ xor r12d,r13d
+ xor edi,r14d
+ add r12d,DWORD[24+rsp]
+
+ add r12d,DWORD[52+rsp]
+ mov r13d,r11d
+ add r12d,edi
+ mov r14d,edx
+ ror r13d,14
+ mov edi,eax
+
+ xor r13d,r11d
+ ror r14d,9
+ xor edi,ebx
+
+ mov DWORD[52+rsp],r12d
+ xor r14d,edx
+ and edi,r11d
+
+ ror r13d,5
+ add r12d,ecx
+ xor edi,ebx
+
+ ror r14d,11
+ xor r13d,r11d
+ add r12d,edi
+
+ mov edi,edx
+ add r12d,DWORD[rbp]
+ xor r14d,edx
+
+ xor edi,r8d
+ ror r13d,6
+ mov ecx,r8d
+
+ and r15d,edi
+ ror r14d,2
+ add r12d,r13d
+
+ xor ecx,r15d
+ add r10d,r12d
+ add ecx,r12d
+
+ lea rbp,[4+rbp]
+ mov r13d,DWORD[60+rsp]
+ mov r15d,DWORD[48+rsp]
+
+ mov r12d,r13d
+ ror r13d,11
+ add ecx,r14d
+ mov r14d,r15d
+ ror r15d,2
+
+ xor r13d,r12d
+ shr r12d,3
+ ror r13d,7
+ xor r15d,r14d
+ shr r14d,10
+
+ ror r15d,17
+ xor r12d,r13d
+ xor r15d,r14d
+ add r12d,DWORD[28+rsp]
+
+ add r12d,DWORD[56+rsp]
+ mov r13d,r10d
+ add r12d,r15d
+ mov r14d,ecx
+ ror r13d,14
+ mov r15d,r11d
+
+ xor r13d,r10d
+ ror r14d,9
+ xor r15d,eax
+
+ mov DWORD[56+rsp],r12d
+ xor r14d,ecx
+ and r15d,r10d
+
+ ror r13d,5
+ add r12d,ebx
+ xor r15d,eax
+
+ ror r14d,11
+ xor r13d,r10d
+ add r12d,r15d
+
+ mov r15d,ecx
+ add r12d,DWORD[rbp]
+ xor r14d,ecx
+
+ xor r15d,edx
+ ror r13d,6
+ mov ebx,edx
+
+ and edi,r15d
+ ror r14d,2
+ add r12d,r13d
+
+ xor ebx,edi
+ add r9d,r12d
+ add ebx,r12d
+
+ lea rbp,[4+rbp]
+ mov r13d,DWORD[rsp]
+ mov edi,DWORD[52+rsp]
+
+ mov r12d,r13d
+ ror r13d,11
+ add ebx,r14d
+ mov r14d,edi
+ ror edi,2
+
+ xor r13d,r12d
+ shr r12d,3
+ ror r13d,7
+ xor edi,r14d
+ shr r14d,10
+
+ ror edi,17
+ xor r12d,r13d
+ xor edi,r14d
+ add r12d,DWORD[32+rsp]
+
+ add r12d,DWORD[60+rsp]
+ mov r13d,r9d
+ add r12d,edi
+ mov r14d,ebx
+ ror r13d,14
+ mov edi,r10d
+
+ xor r13d,r9d
+ ror r14d,9
+ xor edi,r11d
+
+ mov DWORD[60+rsp],r12d
+ xor r14d,ebx
+ and edi,r9d
+
+ ror r13d,5
+ add r12d,eax
+ xor edi,r11d
+
+ ror r14d,11
+ xor r13d,r9d
+ add r12d,edi
+
+ mov edi,ebx
+ add r12d,DWORD[rbp]
+ xor r14d,ebx
+
+ xor edi,ecx
+ ror r13d,6
+ mov eax,ecx
+
+ and r15d,edi
+ ror r14d,2
+ add r12d,r13d
+
+ xor eax,r15d
+ add r8d,r12d
+ add eax,r12d
+
+ lea rbp,[20+rbp]
+ cmp BYTE[3+rbp],0
+ jnz NEAR $L$rounds_16_xx
+
+ mov rdi,QWORD[((64+0))+rsp]
+ add eax,r14d
+ lea rsi,[64+rsi]
+
+ add eax,DWORD[rdi]
+ add ebx,DWORD[4+rdi]
+ add ecx,DWORD[8+rdi]
+ add edx,DWORD[12+rdi]
+ add r8d,DWORD[16+rdi]
+ add r9d,DWORD[20+rdi]
+ add r10d,DWORD[24+rdi]
+ add r11d,DWORD[28+rdi]
+
+ cmp rsi,QWORD[((64+16))+rsp]
+
+ mov DWORD[rdi],eax
+ mov DWORD[4+rdi],ebx
+ mov DWORD[8+rdi],ecx
+ mov DWORD[12+rdi],edx
+ mov DWORD[16+rdi],r8d
+ mov DWORD[20+rdi],r9d
+ mov DWORD[24+rdi],r10d
+ mov DWORD[28+rdi],r11d
+ jb NEAR $L$loop
+
+ mov rsi,QWORD[88+rsp]
+
+ mov r15,QWORD[((-48))+rsi]
+
+ mov r14,QWORD[((-40))+rsi]
+
+ mov r13,QWORD[((-32))+rsi]
+
+ mov r12,QWORD[((-24))+rsi]
+
+ mov rbp,QWORD[((-16))+rsi]
+
+ mov rbx,QWORD[((-8))+rsi]
+
+ lea rsp,[rsi]
+
+$L$epilogue:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_sha256_block_data_order:
+ALIGN 64
+
+K256:
+ DD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+ DD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+ DD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+ DD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+ DD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+ DD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+ DD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+ DD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+ DD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+ DD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+ DD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+ DD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+ DD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+ DD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+ DD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+ DD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+ DD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+ DD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+ DD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+ DD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+ DD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+ DD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+ DD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+ DD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+ DD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+ DD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+ DD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+ DD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+ DD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+ DD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+ DD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+ DD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+
+ DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+ DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+ DD 0x03020100,0x0b0a0908,0xffffffff,0xffffffff
+ DD 0x03020100,0x0b0a0908,0xffffffff,0xffffffff
+ DD 0xffffffff,0xffffffff,0x03020100,0x0b0a0908
+ DD 0xffffffff,0xffffffff,0x03020100,0x0b0a0908
+DB 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97
+DB 110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54
+DB 52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
+DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
+DB 111,114,103,62,0
+
+ALIGN 64
+sha256_block_data_order_shaext:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_sha256_block_data_order_shaext:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+_shaext_shortcut:
+ lea rsp,[((-88))+rsp]
+ movaps XMMWORD[(-8-80)+rax],xmm6
+ movaps XMMWORD[(-8-64)+rax],xmm7
+ movaps XMMWORD[(-8-48)+rax],xmm8
+ movaps XMMWORD[(-8-32)+rax],xmm9
+ movaps XMMWORD[(-8-16)+rax],xmm10
+$L$prologue_shaext:
+ lea rcx,[((K256+128))]
+ movdqu xmm1,XMMWORD[rdi]
+ movdqu xmm2,XMMWORD[16+rdi]
+ movdqa xmm7,XMMWORD[((512-128))+rcx]
+
+ pshufd xmm0,xmm1,0x1b
+ pshufd xmm1,xmm1,0xb1
+ pshufd xmm2,xmm2,0x1b
+ movdqa xmm8,xmm7
+DB 102,15,58,15,202,8
+ punpcklqdq xmm2,xmm0
+ jmp NEAR $L$oop_shaext
+
+ALIGN 16
+$L$oop_shaext:
+ movdqu xmm3,XMMWORD[rsi]
+ movdqu xmm4,XMMWORD[16+rsi]
+ movdqu xmm5,XMMWORD[32+rsi]
+DB 102,15,56,0,223
+ movdqu xmm6,XMMWORD[48+rsi]
+
+ movdqa xmm0,XMMWORD[((0-128))+rcx]
+ paddd xmm0,xmm3
+DB 102,15,56,0,231
+ movdqa xmm10,xmm2
+DB 15,56,203,209
+ pshufd xmm0,xmm0,0x0e
+ nop
+ movdqa xmm9,xmm1
+DB 15,56,203,202
+
+ movdqa xmm0,XMMWORD[((32-128))+rcx]
+ paddd xmm0,xmm4
+DB 102,15,56,0,239
+DB 15,56,203,209
+ pshufd xmm0,xmm0,0x0e
+ lea rsi,[64+rsi]
+DB 15,56,204,220
+DB 15,56,203,202
+
+ movdqa xmm0,XMMWORD[((64-128))+rcx]
+ paddd xmm0,xmm5
+DB 102,15,56,0,247
+DB 15,56,203,209
+ pshufd xmm0,xmm0,0x0e
+ movdqa xmm7,xmm6
+DB 102,15,58,15,253,4
+ nop
+ paddd xmm3,xmm7
+DB 15,56,204,229
+DB 15,56,203,202
+
+ movdqa xmm0,XMMWORD[((96-128))+rcx]
+ paddd xmm0,xmm6
+DB 15,56,205,222
+DB 15,56,203,209
+ pshufd xmm0,xmm0,0x0e
+ movdqa xmm7,xmm3
+DB 102,15,58,15,254,4
+ nop
+ paddd xmm4,xmm7
+DB 15,56,204,238
+DB 15,56,203,202
+ movdqa xmm0,XMMWORD[((128-128))+rcx]
+ paddd xmm0,xmm3
+DB 15,56,205,227
+DB 15,56,203,209
+ pshufd xmm0,xmm0,0x0e
+ movdqa xmm7,xmm4
+DB 102,15,58,15,251,4
+ nop
+ paddd xmm5,xmm7
+DB 15,56,204,243
+DB 15,56,203,202
+ movdqa xmm0,XMMWORD[((160-128))+rcx]
+ paddd xmm0,xmm4
+DB 15,56,205,236
+DB 15,56,203,209
+ pshufd xmm0,xmm0,0x0e
+ movdqa xmm7,xmm5
+DB 102,15,58,15,252,4
+ nop
+ paddd xmm6,xmm7
+DB 15,56,204,220
+DB 15,56,203,202
+ movdqa xmm0,XMMWORD[((192-128))+rcx]
+ paddd xmm0,xmm5
+DB 15,56,205,245
+DB 15,56,203,209
+ pshufd xmm0,xmm0,0x0e
+ movdqa xmm7,xmm6
+DB 102,15,58,15,253,4
+ nop
+ paddd xmm3,xmm7
+DB 15,56,204,229
+DB 15,56,203,202
+ movdqa xmm0,XMMWORD[((224-128))+rcx]
+ paddd xmm0,xmm6
+DB 15,56,205,222
+DB 15,56,203,209
+ pshufd xmm0,xmm0,0x0e
+ movdqa xmm7,xmm3
+DB 102,15,58,15,254,4
+ nop
+ paddd xmm4,xmm7
+DB 15,56,204,238
+DB 15,56,203,202
+ movdqa xmm0,XMMWORD[((256-128))+rcx]
+ paddd xmm0,xmm3
+DB 15,56,205,227
+DB 15,56,203,209
+ pshufd xmm0,xmm0,0x0e
+ movdqa xmm7,xmm4
+DB 102,15,58,15,251,4
+ nop
+ paddd xmm5,xmm7
+DB 15,56,204,243
+DB 15,56,203,202
+ movdqa xmm0,XMMWORD[((288-128))+rcx]
+ paddd xmm0,xmm4
+DB 15,56,205,236
+DB 15,56,203,209
+ pshufd xmm0,xmm0,0x0e
+ movdqa xmm7,xmm5
+DB 102,15,58,15,252,4
+ nop
+ paddd xmm6,xmm7
+DB 15,56,204,220
+DB 15,56,203,202
+ movdqa xmm0,XMMWORD[((320-128))+rcx]
+ paddd xmm0,xmm5
+DB 15,56,205,245
+DB 15,56,203,209
+ pshufd xmm0,xmm0,0x0e
+ movdqa xmm7,xmm6
+DB 102,15,58,15,253,4
+ nop
+ paddd xmm3,xmm7
+DB 15,56,204,229
+DB 15,56,203,202
+ movdqa xmm0,XMMWORD[((352-128))+rcx]
+ paddd xmm0,xmm6
+DB 15,56,205,222
+DB 15,56,203,209
+ pshufd xmm0,xmm0,0x0e
+ movdqa xmm7,xmm3
+DB 102,15,58,15,254,4
+ nop
+ paddd xmm4,xmm7
+DB 15,56,204,238
+DB 15,56,203,202
+ movdqa xmm0,XMMWORD[((384-128))+rcx]
+ paddd xmm0,xmm3
+DB 15,56,205,227
+DB 15,56,203,209
+ pshufd xmm0,xmm0,0x0e
+ movdqa xmm7,xmm4
+DB 102,15,58,15,251,4
+ nop
+ paddd xmm5,xmm7
+DB 15,56,204,243
+DB 15,56,203,202
+ movdqa xmm0,XMMWORD[((416-128))+rcx]
+ paddd xmm0,xmm4
+DB 15,56,205,236
+DB 15,56,203,209
+ pshufd xmm0,xmm0,0x0e
+ movdqa xmm7,xmm5
+DB 102,15,58,15,252,4
+DB 15,56,203,202
+ paddd xmm6,xmm7
+
+ movdqa xmm0,XMMWORD[((448-128))+rcx]
+ paddd xmm0,xmm5
+DB 15,56,203,209
+ pshufd xmm0,xmm0,0x0e
+DB 15,56,205,245
+ movdqa xmm7,xmm8
+DB 15,56,203,202
+
+ movdqa xmm0,XMMWORD[((480-128))+rcx]
+ paddd xmm0,xmm6
+ nop
+DB 15,56,203,209
+ pshufd xmm0,xmm0,0x0e
+ dec rdx
+ nop
+DB 15,56,203,202
+
+ paddd xmm2,xmm10
+ paddd xmm1,xmm9
+ jnz NEAR $L$oop_shaext
+
+ pshufd xmm2,xmm2,0xb1
+ pshufd xmm7,xmm1,0x1b
+ pshufd xmm1,xmm1,0xb1
+ punpckhqdq xmm1,xmm2
+DB 102,15,58,15,215,8
+
+ movdqu XMMWORD[rdi],xmm1
+ movdqu XMMWORD[16+rdi],xmm2
+ movaps xmm6,XMMWORD[((-8-80))+rax]
+ movaps xmm7,XMMWORD[((-8-64))+rax]
+ movaps xmm8,XMMWORD[((-8-48))+rax]
+ movaps xmm9,XMMWORD[((-8-32))+rax]
+ movaps xmm10,XMMWORD[((-8-16))+rax]
+ mov rsp,rax
+$L$epilogue_shaext:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_sha256_block_data_order_shaext:
+
+ALIGN 64
+sha256_block_data_order_ssse3:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_sha256_block_data_order_ssse3:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+
+$L$ssse3_shortcut:
+ mov rax,rsp
+
+ push rbx
+
+ push rbp
+
+ push r12
+
+ push r13
+
+ push r14
+
+ push r15
+
+ shl rdx,4
+ sub rsp,160
+ lea rdx,[rdx*4+rsi]
+ and rsp,-64
+ mov QWORD[((64+0))+rsp],rdi
+ mov QWORD[((64+8))+rsp],rsi
+ mov QWORD[((64+16))+rsp],rdx
+ mov QWORD[88+rsp],rax
+
+ movaps XMMWORD[(64+32)+rsp],xmm6
+ movaps XMMWORD[(64+48)+rsp],xmm7
+ movaps XMMWORD[(64+64)+rsp],xmm8
+ movaps XMMWORD[(64+80)+rsp],xmm9
+$L$prologue_ssse3:
+
+ mov eax,DWORD[rdi]
+ mov ebx,DWORD[4+rdi]
+ mov ecx,DWORD[8+rdi]
+ mov edx,DWORD[12+rdi]
+ mov r8d,DWORD[16+rdi]
+ mov r9d,DWORD[20+rdi]
+ mov r10d,DWORD[24+rdi]
+ mov r11d,DWORD[28+rdi]
+
+
+ jmp NEAR $L$loop_ssse3
+ALIGN 16
+$L$loop_ssse3:
+ movdqa xmm7,XMMWORD[((K256+512))]
+ movdqu xmm0,XMMWORD[rsi]
+ movdqu xmm1,XMMWORD[16+rsi]
+ movdqu xmm2,XMMWORD[32+rsi]
+DB 102,15,56,0,199
+ movdqu xmm3,XMMWORD[48+rsi]
+ lea rbp,[K256]
+DB 102,15,56,0,207
+ movdqa xmm4,XMMWORD[rbp]
+ movdqa xmm5,XMMWORD[32+rbp]
+DB 102,15,56,0,215
+ paddd xmm4,xmm0
+ movdqa xmm6,XMMWORD[64+rbp]
+DB 102,15,56,0,223
+ movdqa xmm7,XMMWORD[96+rbp]
+ paddd xmm5,xmm1
+ paddd xmm6,xmm2
+ paddd xmm7,xmm3
+ movdqa XMMWORD[rsp],xmm4
+ mov r14d,eax
+ movdqa XMMWORD[16+rsp],xmm5
+ mov edi,ebx
+ movdqa XMMWORD[32+rsp],xmm6
+ xor edi,ecx
+ movdqa XMMWORD[48+rsp],xmm7
+ mov r13d,r8d
+ jmp NEAR $L$ssse3_00_47
+
+ALIGN 16
+$L$ssse3_00_47:
+ sub rbp,-128
+ ror r13d,14
+ movdqa xmm4,xmm1
+ mov eax,r14d
+ mov r12d,r9d
+ movdqa xmm7,xmm3
+ ror r14d,9
+ xor r13d,r8d
+ xor r12d,r10d
+ ror r13d,5
+ xor r14d,eax
+DB 102,15,58,15,224,4
+ and r12d,r8d
+ xor r13d,r8d
+DB 102,15,58,15,250,4
+ add r11d,DWORD[rsp]
+ mov r15d,eax
+ xor r12d,r10d
+ ror r14d,11
+ movdqa xmm5,xmm4
+ xor r15d,ebx
+ add r11d,r12d
+ movdqa xmm6,xmm4
+ ror r13d,6
+ and edi,r15d
+ psrld xmm4,3
+ xor r14d,eax
+ add r11d,r13d
+ xor edi,ebx
+ paddd xmm0,xmm7
+ ror r14d,2
+ add edx,r11d
+ psrld xmm6,7
+ add r11d,edi
+ mov r13d,edx
+ pshufd xmm7,xmm3,250
+ add r14d,r11d
+ ror r13d,14
+ pslld xmm5,14
+ mov r11d,r14d
+ mov r12d,r8d
+ pxor xmm4,xmm6
+ ror r14d,9
+ xor r13d,edx
+ xor r12d,r9d
+ ror r13d,5
+ psrld xmm6,11
+ xor r14d,r11d
+ pxor xmm4,xmm5
+ and r12d,edx
+ xor r13d,edx
+ pslld xmm5,11
+ add r10d,DWORD[4+rsp]
+ mov edi,r11d
+ pxor xmm4,xmm6
+ xor r12d,r9d
+ ror r14d,11
+ movdqa xmm6,xmm7
+ xor edi,eax
+ add r10d,r12d
+ pxor xmm4,xmm5
+ ror r13d,6
+ and r15d,edi
+ xor r14d,r11d
+ psrld xmm7,10
+ add r10d,r13d
+ xor r15d,eax
+ paddd xmm0,xmm4
+ ror r14d,2
+ add ecx,r10d
+ psrlq xmm6,17
+ add r10d,r15d
+ mov r13d,ecx
+ add r14d,r10d
+ pxor xmm7,xmm6
+ ror r13d,14
+ mov r10d,r14d
+ mov r12d,edx
+ ror r14d,9
+ psrlq xmm6,2
+ xor r13d,ecx
+ xor r12d,r8d
+ pxor xmm7,xmm6
+ ror r13d,5
+ xor r14d,r10d
+ and r12d,ecx
+ pshufd xmm7,xmm7,128
+ xor r13d,ecx
+ add r9d,DWORD[8+rsp]
+ mov r15d,r10d
+ psrldq xmm7,8
+ xor r12d,r8d
+ ror r14d,11
+ xor r15d,r11d
+ add r9d,r12d
+ ror r13d,6
+ paddd xmm0,xmm7
+ and edi,r15d
+ xor r14d,r10d
+ add r9d,r13d
+ pshufd xmm7,xmm0,80
+ xor edi,r11d
+ ror r14d,2
+ add ebx,r9d
+ movdqa xmm6,xmm7
+ add r9d,edi
+ mov r13d,ebx
+ psrld xmm7,10
+ add r14d,r9d
+ ror r13d,14
+ psrlq xmm6,17
+ mov r9d,r14d
+ mov r12d,ecx
+ pxor xmm7,xmm6
+ ror r14d,9
+ xor r13d,ebx
+ xor r12d,edx
+ ror r13d,5
+ xor r14d,r9d
+ psrlq xmm6,2
+ and r12d,ebx
+ xor r13d,ebx
+ add r8d,DWORD[12+rsp]
+ pxor xmm7,xmm6
+ mov edi,r9d
+ xor r12d,edx
+ ror r14d,11
+ pshufd xmm7,xmm7,8
+ xor edi,r10d
+ add r8d,r12d
+ movdqa xmm6,XMMWORD[rbp]
+ ror r13d,6
+ and r15d,edi
+ pslldq xmm7,8
+ xor r14d,r9d
+ add r8d,r13d
+ xor r15d,r10d
+ paddd xmm0,xmm7
+ ror r14d,2
+ add eax,r8d
+ add r8d,r15d
+ paddd xmm6,xmm0
+ mov r13d,eax
+ add r14d,r8d
+ movdqa XMMWORD[rsp],xmm6
+ ror r13d,14
+ movdqa xmm4,xmm2
+ mov r8d,r14d
+ mov r12d,ebx
+ movdqa xmm7,xmm0
+ ror r14d,9
+ xor r13d,eax
+ xor r12d,ecx
+ ror r13d,5
+ xor r14d,r8d
+DB 102,15,58,15,225,4
+ and r12d,eax
+ xor r13d,eax
+DB 102,15,58,15,251,4
+ add edx,DWORD[16+rsp]
+ mov r15d,r8d
+ xor r12d,ecx
+ ror r14d,11
+ movdqa xmm5,xmm4
+ xor r15d,r9d
+ add edx,r12d
+ movdqa xmm6,xmm4
+ ror r13d,6
+ and edi,r15d
+ psrld xmm4,3
+ xor r14d,r8d
+ add edx,r13d
+ xor edi,r9d
+ paddd xmm1,xmm7
+ ror r14d,2
+ add r11d,edx
+ psrld xmm6,7
+ add edx,edi
+ mov r13d,r11d
+ pshufd xmm7,xmm0,250
+ add r14d,edx
+ ror r13d,14
+ pslld xmm5,14
+ mov edx,r14d
+ mov r12d,eax
+ pxor xmm4,xmm6
+ ror r14d,9
+ xor r13d,r11d
+ xor r12d,ebx
+ ror r13d,5
+ psrld xmm6,11
+ xor r14d,edx
+ pxor xmm4,xmm5
+ and r12d,r11d
+ xor r13d,r11d
+ pslld xmm5,11
+ add ecx,DWORD[20+rsp]
+ mov edi,edx
+ pxor xmm4,xmm6
+ xor r12d,ebx
+ ror r14d,11
+ movdqa xmm6,xmm7
+ xor edi,r8d
+ add ecx,r12d
+ pxor xmm4,xmm5
+ ror r13d,6
+ and r15d,edi
+ xor r14d,edx
+ psrld xmm7,10
+ add ecx,r13d
+ xor r15d,r8d
+ paddd xmm1,xmm4
+ ror r14d,2
+ add r10d,ecx
+ psrlq xmm6,17
+ add ecx,r15d
+ mov r13d,r10d
+ add r14d,ecx
+ pxor xmm7,xmm6
+ ror r13d,14
+ mov ecx,r14d
+ mov r12d,r11d
+ ror r14d,9
+ psrlq xmm6,2
+ xor r13d,r10d
+ xor r12d,eax
+ pxor xmm7,xmm6
+ ror r13d,5
+ xor r14d,ecx
+ and r12d,r10d
+ pshufd xmm7,xmm7,128
+ xor r13d,r10d
+ add ebx,DWORD[24+rsp]
+ mov r15d,ecx
+ psrldq xmm7,8
+ xor r12d,eax
+ ror r14d,11
+ xor r15d,edx
+ add ebx,r12d
+ ror r13d,6
+ paddd xmm1,xmm7
+ and edi,r15d
+ xor r14d,ecx
+ add ebx,r13d
+ pshufd xmm7,xmm1,80
+ xor edi,edx
+ ror r14d,2
+ add r9d,ebx
+ movdqa xmm6,xmm7
+ add ebx,edi
+ mov r13d,r9d
+ psrld xmm7,10
+ add r14d,ebx
+ ror r13d,14
+ psrlq xmm6,17
+ mov ebx,r14d
+ mov r12d,r10d
+ pxor xmm7,xmm6
+ ror r14d,9
+ xor r13d,r9d
+ xor r12d,r11d
+ ror r13d,5
+ xor r14d,ebx
+ psrlq xmm6,2
+ and r12d,r9d
+ xor r13d,r9d
+ add eax,DWORD[28+rsp]
+ pxor xmm7,xmm6
+ mov edi,ebx
+ xor r12d,r11d
+ ror r14d,11
+ pshufd xmm7,xmm7,8
+ xor edi,ecx
+ add eax,r12d
+ movdqa xmm6,XMMWORD[32+rbp]
+ ror r13d,6
+ and r15d,edi
+ pslldq xmm7,8
+ xor r14d,ebx
+ add eax,r13d
+ xor r15d,ecx
+ paddd xmm1,xmm7
+ ror r14d,2
+ add r8d,eax
+ add eax,r15d
+ paddd xmm6,xmm1
+ mov r13d,r8d
+ add r14d,eax
+ movdqa XMMWORD[16+rsp],xmm6
+ ror r13d,14
+ movdqa xmm4,xmm3
+ mov eax,r14d
+ mov r12d,r9d
+ movdqa xmm7,xmm1
+ ror r14d,9
+ xor r13d,r8d
+ xor r12d,r10d
+ ror r13d,5
+ xor r14d,eax
+DB 102,15,58,15,226,4
+ and r12d,r8d
+ xor r13d,r8d
+DB 102,15,58,15,248,4
+ add r11d,DWORD[32+rsp]
+ mov r15d,eax
+ xor r12d,r10d
+ ror r14d,11
+ movdqa xmm5,xmm4
+ xor r15d,ebx
+ add r11d,r12d
+ movdqa xmm6,xmm4
+ ror r13d,6
+ and edi,r15d
+ psrld xmm4,3
+ xor r14d,eax
+ add r11d,r13d
+ xor edi,ebx
+ paddd xmm2,xmm7
+ ror r14d,2
+ add edx,r11d
+ psrld xmm6,7
+ add r11d,edi
+ mov r13d,edx
+ pshufd xmm7,xmm1,250
+ add r14d,r11d
+ ror r13d,14
+ pslld xmm5,14
+ mov r11d,r14d
+ mov r12d,r8d
+ pxor xmm4,xmm6
+ ror r14d,9
+ xor r13d,edx
+ xor r12d,r9d
+ ror r13d,5
+ psrld xmm6,11
+ xor r14d,r11d
+ pxor xmm4,xmm5
+ and r12d,edx
+ xor r13d,edx
+ pslld xmm5,11
+ add r10d,DWORD[36+rsp]
+ mov edi,r11d
+ pxor xmm4,xmm6
+ xor r12d,r9d
+ ror r14d,11
+ movdqa xmm6,xmm7
+ xor edi,eax
+ add r10d,r12d
+ pxor xmm4,xmm5
+ ror r13d,6
+ and r15d,edi
+ xor r14d,r11d
+ psrld xmm7,10
+ add r10d,r13d
+ xor r15d,eax
+ paddd xmm2,xmm4
+ ror r14d,2
+ add ecx,r10d
+ psrlq xmm6,17
+ add r10d,r15d
+ mov r13d,ecx
+ add r14d,r10d
+ pxor xmm7,xmm6
+ ror r13d,14
+ mov r10d,r14d
+ mov r12d,edx
+ ror r14d,9
+ psrlq xmm6,2
+ xor r13d,ecx
+ xor r12d,r8d
+ pxor xmm7,xmm6
+ ror r13d,5
+ xor r14d,r10d
+ and r12d,ecx
+ pshufd xmm7,xmm7,128
+ xor r13d,ecx
+ add r9d,DWORD[40+rsp]
+ mov r15d,r10d
+ psrldq xmm7,8
+ xor r12d,r8d
+ ror r14d,11
+ xor r15d,r11d
+ add r9d,r12d
+ ror r13d,6
+ paddd xmm2,xmm7
+ and edi,r15d
+ xor r14d,r10d
+ add r9d,r13d
+ pshufd xmm7,xmm2,80
+ xor edi,r11d
+ ror r14d,2
+ add ebx,r9d
+ movdqa xmm6,xmm7
+ add r9d,edi
+ mov r13d,ebx
+ psrld xmm7,10
+ add r14d,r9d
+ ror r13d,14
+ psrlq xmm6,17
+ mov r9d,r14d
+ mov r12d,ecx
+ pxor xmm7,xmm6
+ ror r14d,9
+ xor r13d,ebx
+ xor r12d,edx
+ ror r13d,5
+ xor r14d,r9d
+ psrlq xmm6,2
+ and r12d,ebx
+ xor r13d,ebx
+ add r8d,DWORD[44+rsp]
+ pxor xmm7,xmm6
+ mov edi,r9d
+ xor r12d,edx
+ ror r14d,11
+ pshufd xmm7,xmm7,8
+ xor edi,r10d
+ add r8d,r12d
+ movdqa xmm6,XMMWORD[64+rbp]
+ ror r13d,6
+ and r15d,edi
+ pslldq xmm7,8
+ xor r14d,r9d
+ add r8d,r13d
+ xor r15d,r10d
+ paddd xmm2,xmm7
+ ror r14d,2
+ add eax,r8d
+ add r8d,r15d
+ paddd xmm6,xmm2
+ mov r13d,eax
+ add r14d,r8d
+ movdqa XMMWORD[32+rsp],xmm6
+ ror r13d,14
+ movdqa xmm4,xmm0
+ mov r8d,r14d
+ mov r12d,ebx
+ movdqa xmm7,xmm2
+ ror r14d,9
+ xor r13d,eax
+ xor r12d,ecx
+ ror r13d,5
+ xor r14d,r8d
+DB 102,15,58,15,227,4
+ and r12d,eax
+ xor r13d,eax
+DB 102,15,58,15,249,4
+ add edx,DWORD[48+rsp]
+ mov r15d,r8d
+ xor r12d,ecx
+ ror r14d,11
+ movdqa xmm5,xmm4
+ xor r15d,r9d
+ add edx,r12d
+ movdqa xmm6,xmm4
+ ror r13d,6
+ and edi,r15d
+ psrld xmm4,3
+ xor r14d,r8d
+ add edx,r13d
+ xor edi,r9d
+ paddd xmm3,xmm7
+ ror r14d,2
+ add r11d,edx
+ psrld xmm6,7
+ add edx,edi
+ mov r13d,r11d
+ pshufd xmm7,xmm2,250
+ add r14d,edx
+ ror r13d,14
+ pslld xmm5,14
+ mov edx,r14d
+ mov r12d,eax
+ pxor xmm4,xmm6
+ ror r14d,9
+ xor r13d,r11d
+ xor r12d,ebx
+ ror r13d,5
+ psrld xmm6,11
+ xor r14d,edx
+ pxor xmm4,xmm5
+ and r12d,r11d
+ xor r13d,r11d
+ pslld xmm5,11
+ add ecx,DWORD[52+rsp]
+ mov edi,edx
+ pxor xmm4,xmm6
+ xor r12d,ebx
+ ror r14d,11
+ movdqa xmm6,xmm7
+ xor edi,r8d
+ add ecx,r12d
+ pxor xmm4,xmm5
+ ror r13d,6
+ and r15d,edi
+ xor r14d,edx
+ psrld xmm7,10
+ add ecx,r13d
+ xor r15d,r8d
+ paddd xmm3,xmm4
+ ror r14d,2
+ add r10d,ecx
+ psrlq xmm6,17
+ add ecx,r15d
+ mov r13d,r10d
+ add r14d,ecx
+ pxor xmm7,xmm6
+ ror r13d,14
+ mov ecx,r14d
+ mov r12d,r11d
+ ror r14d,9
+ psrlq xmm6,2
+ xor r13d,r10d
+ xor r12d,eax
+ pxor xmm7,xmm6
+ ror r13d,5
+ xor r14d,ecx
+ and r12d,r10d
+ pshufd xmm7,xmm7,128
+ xor r13d,r10d
+ add ebx,DWORD[56+rsp]
+ mov r15d,ecx
+ psrldq xmm7,8
+ xor r12d,eax
+ ror r14d,11
+ xor r15d,edx
+ add ebx,r12d
+ ror r13d,6
+ paddd xmm3,xmm7
+ and edi,r15d
+ xor r14d,ecx
+ add ebx,r13d
+ pshufd xmm7,xmm3,80
+ xor edi,edx
+ ror r14d,2
+ add r9d,ebx
+ movdqa xmm6,xmm7
+ add ebx,edi
+ mov r13d,r9d
+ psrld xmm7,10
+ add r14d,ebx
+ ror r13d,14
+ psrlq xmm6,17
+ mov ebx,r14d
+ mov r12d,r10d
+ pxor xmm7,xmm6
+ ror r14d,9
+ xor r13d,r9d
+ xor r12d,r11d
+ ror r13d,5
+ xor r14d,ebx
+ psrlq xmm6,2
+ and r12d,r9d
+ xor r13d,r9d
+ add eax,DWORD[60+rsp]
+ pxor xmm7,xmm6
+ mov edi,ebx
+ xor r12d,r11d
+ ror r14d,11
+ pshufd xmm7,xmm7,8
+ xor edi,ecx
+ add eax,r12d
+ movdqa xmm6,XMMWORD[96+rbp]
+ ror r13d,6
+ and r15d,edi
+ pslldq xmm7,8
+ xor r14d,ebx
+ add eax,r13d
+ xor r15d,ecx
+ paddd xmm3,xmm7
+ ror r14d,2
+ add r8d,eax
+ add eax,r15d
+ paddd xmm6,xmm3
+ mov r13d,r8d
+ add r14d,eax
+ movdqa XMMWORD[48+rsp],xmm6
+ cmp BYTE[131+rbp],0
+ jne NEAR $L$ssse3_00_47
+ ror r13d,14
+ mov eax,r14d
+ mov r12d,r9d
+ ror r14d,9
+ xor r13d,r8d
+ xor r12d,r10d
+ ror r13d,5
+ xor r14d,eax
+ and r12d,r8d
+ xor r13d,r8d
+ add r11d,DWORD[rsp]
+ mov r15d,eax
+ xor r12d,r10d
+ ror r14d,11
+ xor r15d,ebx
+ add r11d,r12d
+ ror r13d,6
+ and edi,r15d
+ xor r14d,eax
+ add r11d,r13d
+ xor edi,ebx
+ ror r14d,2
+ add edx,r11d
+ add r11d,edi
+ mov r13d,edx
+ add r14d,r11d
+ ror r13d,14
+ mov r11d,r14d
+ mov r12d,r8d
+ ror r14d,9
+ xor r13d,edx
+ xor r12d,r9d
+ ror r13d,5
+ xor r14d,r11d
+ and r12d,edx
+ xor r13d,edx
+ add r10d,DWORD[4+rsp]
+ mov edi,r11d
+ xor r12d,r9d
+ ror r14d,11
+ xor edi,eax
+ add r10d,r12d
+ ror r13d,6
+ and r15d,edi
+ xor r14d,r11d
+ add r10d,r13d
+ xor r15d,eax
+ ror r14d,2
+ add ecx,r10d
+ add r10d,r15d
+ mov r13d,ecx
+ add r14d,r10d
+ ror r13d,14
+ mov r10d,r14d
+ mov r12d,edx
+ ror r14d,9
+ xor r13d,ecx
+ xor r12d,r8d
+ ror r13d,5
+ xor r14d,r10d
+ and r12d,ecx
+ xor r13d,ecx
+ add r9d,DWORD[8+rsp]
+ mov r15d,r10d
+ xor r12d,r8d
+ ror r14d,11
+ xor r15d,r11d
+ add r9d,r12d
+ ror r13d,6
+ and edi,r15d
+ xor r14d,r10d
+ add r9d,r13d
+ xor edi,r11d
+ ror r14d,2
+ add ebx,r9d
+ add r9d,edi
+ mov r13d,ebx
+ add r14d,r9d
+ ror r13d,14
+ mov r9d,r14d
+ mov r12d,ecx
+ ror r14d,9
+ xor r13d,ebx
+ xor r12d,edx
+ ror r13d,5
+ xor r14d,r9d
+ and r12d,ebx
+ xor r13d,ebx
+ add r8d,DWORD[12+rsp]
+ mov edi,r9d
+ xor r12d,edx
+ ror r14d,11
+ xor edi,r10d
+ add r8d,r12d
+ ror r13d,6
+ and r15d,edi
+ xor r14d,r9d
+ add r8d,r13d
+ xor r15d,r10d
+ ror r14d,2
+ add eax,r8d
+ add r8d,r15d
+ mov r13d,eax
+ add r14d,r8d
+ ror r13d,14
+ mov r8d,r14d
+ mov r12d,ebx
+ ror r14d,9
+ xor r13d,eax
+ xor r12d,ecx
+ ror r13d,5
+ xor r14d,r8d
+ and r12d,eax
+ xor r13d,eax
+ add edx,DWORD[16+rsp]
+ mov r15d,r8d
+ xor r12d,ecx
+ ror r14d,11
+ xor r15d,r9d
+ add edx,r12d
+ ror r13d,6
+ and edi,r15d
+ xor r14d,r8d
+ add edx,r13d
+ xor edi,r9d
+ ror r14d,2
+ add r11d,edx
+ add edx,edi
+ mov r13d,r11d
+ add r14d,edx
+ ror r13d,14
+ mov edx,r14d
+ mov r12d,eax
+ ror r14d,9
+ xor r13d,r11d
+ xor r12d,ebx
+ ror r13d,5
+ xor r14d,edx
+ and r12d,r11d
+ xor r13d,r11d
+ add ecx,DWORD[20+rsp]
+ mov edi,edx
+ xor r12d,ebx
+ ror r14d,11
+ xor edi,r8d
+ add ecx,r12d
+ ror r13d,6
+ and r15d,edi
+ xor r14d,edx
+ add ecx,r13d
+ xor r15d,r8d
+ ror r14d,2
+ add r10d,ecx
+ add ecx,r15d
+ mov r13d,r10d
+ add r14d,ecx
+ ror r13d,14
+ mov ecx,r14d
+ mov r12d,r11d
+ ror r14d,9
+ xor r13d,r10d
+ xor r12d,eax
+ ror r13d,5
+ xor r14d,ecx
+ and r12d,r10d
+ xor r13d,r10d
+ add ebx,DWORD[24+rsp]
+ mov r15d,ecx
+ xor r12d,eax
+ ror r14d,11
+ xor r15d,edx
+ add ebx,r12d
+ ror r13d,6
+ and edi,r15d
+ xor r14d,ecx
+ add ebx,r13d
+ xor edi,edx
+ ror r14d,2
+ add r9d,ebx
+ add ebx,edi
+ mov r13d,r9d
+ add r14d,ebx
+ ror r13d,14
+ mov ebx,r14d
+ mov r12d,r10d
+ ror r14d,9
+ xor r13d,r9d
+ xor r12d,r11d
+ ror r13d,5
+ xor r14d,ebx
+ and r12d,r9d
+ xor r13d,r9d
+ add eax,DWORD[28+rsp]
+ mov edi,ebx
+ xor r12d,r11d
+ ror r14d,11
+ xor edi,ecx
+ add eax,r12d
+ ror r13d,6
+ and r15d,edi
+ xor r14d,ebx
+ add eax,r13d
+ xor r15d,ecx
+ ror r14d,2
+ add r8d,eax
+ add eax,r15d
+ mov r13d,r8d
+ add r14d,eax
+ ror r13d,14
+ mov eax,r14d
+ mov r12d,r9d
+ ror r14d,9
+ xor r13d,r8d
+ xor r12d,r10d
+ ror r13d,5
+ xor r14d,eax
+ and r12d,r8d
+ xor r13d,r8d
+ add r11d,DWORD[32+rsp]
+ mov r15d,eax
+ xor r12d,r10d
+ ror r14d,11
+ xor r15d,ebx
+ add r11d,r12d
+ ror r13d,6
+ and edi,r15d
+ xor r14d,eax
+ add r11d,r13d
+ xor edi,ebx
+ ror r14d,2
+ add edx,r11d
+ add r11d,edi
+ mov r13d,edx
+ add r14d,r11d
+ ror r13d,14
+ mov r11d,r14d
+ mov r12d,r8d
+ ror r14d,9
+ xor r13d,edx
+ xor r12d,r9d
+ ror r13d,5
+ xor r14d,r11d
+ and r12d,edx
+ xor r13d,edx
+ add r10d,DWORD[36+rsp]
+ mov edi,r11d
+ xor r12d,r9d
+ ror r14d,11
+ xor edi,eax
+ add r10d,r12d
+ ror r13d,6
+ and r15d,edi
+ xor r14d,r11d
+ add r10d,r13d
+ xor r15d,eax
+ ror r14d,2
+ add ecx,r10d
+ add r10d,r15d
+ mov r13d,ecx
+ add r14d,r10d
+ ror r13d,14
+ mov r10d,r14d
+ mov r12d,edx
+ ror r14d,9
+ xor r13d,ecx
+ xor r12d,r8d
+ ror r13d,5
+ xor r14d,r10d
+ and r12d,ecx
+ xor r13d,ecx
+ add r9d,DWORD[40+rsp]
+ mov r15d,r10d
+ xor r12d,r8d
+ ror r14d,11
+ xor r15d,r11d
+ add r9d,r12d
+ ror r13d,6
+ and edi,r15d
+ xor r14d,r10d
+ add r9d,r13d
+ xor edi,r11d
+ ror r14d,2
+ add ebx,r9d
+ add r9d,edi
+ mov r13d,ebx
+ add r14d,r9d
+ ror r13d,14
+ mov r9d,r14d
+ mov r12d,ecx
+ ror r14d,9
+ xor r13d,ebx
+ xor r12d,edx
+ ror r13d,5
+ xor r14d,r9d
+ and r12d,ebx
+ xor r13d,ebx
+ add r8d,DWORD[44+rsp]
+ mov edi,r9d
+ xor r12d,edx
+ ror r14d,11
+ xor edi,r10d
+ add r8d,r12d
+ ror r13d,6
+ and r15d,edi
+ xor r14d,r9d
+ add r8d,r13d
+ xor r15d,r10d
+ ror r14d,2
+ add eax,r8d
+ add r8d,r15d
+ mov r13d,eax
+ add r14d,r8d
+ ror r13d,14
+ mov r8d,r14d
+ mov r12d,ebx
+ ror r14d,9
+ xor r13d,eax
+ xor r12d,ecx
+ ror r13d,5
+ xor r14d,r8d
+ and r12d,eax
+ xor r13d,eax
+ add edx,DWORD[48+rsp]
+ mov r15d,r8d
+ xor r12d,ecx
+ ror r14d,11
+ xor r15d,r9d
+ add edx,r12d
+ ror r13d,6
+ and edi,r15d
+ xor r14d,r8d
+ add edx,r13d
+ xor edi,r9d
+ ror r14d,2
+ add r11d,edx
+ add edx,edi
+ mov r13d,r11d
+ add r14d,edx
+ ror r13d,14
+ mov edx,r14d
+ mov r12d,eax
+ ror r14d,9
+ xor r13d,r11d
+ xor r12d,ebx
+ ror r13d,5
+ xor r14d,edx
+ and r12d,r11d
+ xor r13d,r11d
+ add ecx,DWORD[52+rsp]
+ mov edi,edx
+ xor r12d,ebx
+ ror r14d,11
+ xor edi,r8d
+ add ecx,r12d
+ ror r13d,6
+ and r15d,edi
+ xor r14d,edx
+ add ecx,r13d
+ xor r15d,r8d
+ ror r14d,2
+ add r10d,ecx
+ add ecx,r15d
+ mov r13d,r10d
+ add r14d,ecx
+ ror r13d,14
+ mov ecx,r14d
+ mov r12d,r11d
+ ror r14d,9
+ xor r13d,r10d
+ xor r12d,eax
+ ror r13d,5
+ xor r14d,ecx
+ and r12d,r10d
+ xor r13d,r10d
+ add ebx,DWORD[56+rsp]
+ mov r15d,ecx
+ xor r12d,eax
+ ror r14d,11
+ xor r15d,edx
+ add ebx,r12d
+ ror r13d,6
+ and edi,r15d
+ xor r14d,ecx
+ add ebx,r13d
+ xor edi,edx
+ ror r14d,2
+ add r9d,ebx
+ add ebx,edi
+ mov r13d,r9d
+ add r14d,ebx
+ ror r13d,14
+ mov ebx,r14d
+ mov r12d,r10d
+ ror r14d,9
+ xor r13d,r9d
+ xor r12d,r11d
+ ror r13d,5
+ xor r14d,ebx
+ and r12d,r9d
+ xor r13d,r9d
+ add eax,DWORD[60+rsp]
+ mov edi,ebx
+ xor r12d,r11d
+ ror r14d,11
+ xor edi,ecx
+ add eax,r12d
+ ror r13d,6
+ and r15d,edi
+ xor r14d,ebx
+ add eax,r13d
+ xor r15d,ecx
+ ror r14d,2
+ add r8d,eax
+ add eax,r15d
+ mov r13d,r8d
+ add r14d,eax
+ mov rdi,QWORD[((64+0))+rsp]
+ mov eax,r14d
+
+ add eax,DWORD[rdi]
+ lea rsi,[64+rsi]
+ add ebx,DWORD[4+rdi]
+ add ecx,DWORD[8+rdi]
+ add edx,DWORD[12+rdi]
+ add r8d,DWORD[16+rdi]
+ add r9d,DWORD[20+rdi]
+ add r10d,DWORD[24+rdi]
+ add r11d,DWORD[28+rdi]
+
+ cmp rsi,QWORD[((64+16))+rsp]
+
+ mov DWORD[rdi],eax
+ mov DWORD[4+rdi],ebx
+ mov DWORD[8+rdi],ecx
+ mov DWORD[12+rdi],edx
+ mov DWORD[16+rdi],r8d
+ mov DWORD[20+rdi],r9d
+ mov DWORD[24+rdi],r10d
+ mov DWORD[28+rdi],r11d
+ jb NEAR $L$loop_ssse3
+
+ mov rsi,QWORD[88+rsp]
+
+ movaps xmm6,XMMWORD[((64+32))+rsp]
+ movaps xmm7,XMMWORD[((64+48))+rsp]
+ movaps xmm8,XMMWORD[((64+64))+rsp]
+ movaps xmm9,XMMWORD[((64+80))+rsp]
+ mov r15,QWORD[((-48))+rsi]
+
+ mov r14,QWORD[((-40))+rsi]
+
+ mov r13,QWORD[((-32))+rsi]
+
+ mov r12,QWORD[((-24))+rsi]
+
+ mov rbp,QWORD[((-16))+rsi]
+
+ mov rbx,QWORD[((-8))+rsi]
+
+ lea rsp,[rsi]
+
+$L$epilogue_ssse3:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_sha256_block_data_order_ssse3:
+
+ALIGN 64
+sha256_block_data_order_avx:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_sha256_block_data_order_avx:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+
+$L$avx_shortcut:
+ mov rax,rsp
+
+ push rbx
+
+ push rbp
+
+ push r12
+
+ push r13
+
+ push r14
+
+ push r15
+
+ shl rdx,4
+ sub rsp,160
+ lea rdx,[rdx*4+rsi]
+ and rsp,-64
+ mov QWORD[((64+0))+rsp],rdi
+ mov QWORD[((64+8))+rsp],rsi
+ mov QWORD[((64+16))+rsp],rdx
+ mov QWORD[88+rsp],rax
+
+ movaps XMMWORD[(64+32)+rsp],xmm6
+ movaps XMMWORD[(64+48)+rsp],xmm7
+ movaps XMMWORD[(64+64)+rsp],xmm8
+ movaps XMMWORD[(64+80)+rsp],xmm9
+$L$prologue_avx:
+
+ vzeroupper
+ mov eax,DWORD[rdi]
+ mov ebx,DWORD[4+rdi]
+ mov ecx,DWORD[8+rdi]
+ mov edx,DWORD[12+rdi]
+ mov r8d,DWORD[16+rdi]
+ mov r9d,DWORD[20+rdi]
+ mov r10d,DWORD[24+rdi]
+ mov r11d,DWORD[28+rdi]
+ vmovdqa xmm8,XMMWORD[((K256+512+32))]
+ vmovdqa xmm9,XMMWORD[((K256+512+64))]
+ jmp NEAR $L$loop_avx
+ALIGN 16
+$L$loop_avx:
+ vmovdqa xmm7,XMMWORD[((K256+512))]
+ vmovdqu xmm0,XMMWORD[rsi]
+ vmovdqu xmm1,XMMWORD[16+rsi]
+ vmovdqu xmm2,XMMWORD[32+rsi]
+ vmovdqu xmm3,XMMWORD[48+rsi]
+ vpshufb xmm0,xmm0,xmm7
+ lea rbp,[K256]
+ vpshufb xmm1,xmm1,xmm7
+ vpshufb xmm2,xmm2,xmm7
+ vpaddd xmm4,xmm0,XMMWORD[rbp]
+ vpshufb xmm3,xmm3,xmm7
+ vpaddd xmm5,xmm1,XMMWORD[32+rbp]
+ vpaddd xmm6,xmm2,XMMWORD[64+rbp]
+ vpaddd xmm7,xmm3,XMMWORD[96+rbp]
+ vmovdqa XMMWORD[rsp],xmm4
+ mov r14d,eax
+ vmovdqa XMMWORD[16+rsp],xmm5
+ mov edi,ebx
+ vmovdqa XMMWORD[32+rsp],xmm6
+ xor edi,ecx
+ vmovdqa XMMWORD[48+rsp],xmm7
+ mov r13d,r8d
+ jmp NEAR $L$avx_00_47
+
+ALIGN 16
+$L$avx_00_47:
+ sub rbp,-128
+ vpalignr xmm4,xmm1,xmm0,4
+ shrd r13d,r13d,14
+ mov eax,r14d
+ mov r12d,r9d
+ vpalignr xmm7,xmm3,xmm2,4
+ shrd r14d,r14d,9
+ xor r13d,r8d
+ xor r12d,r10d
+ vpsrld xmm6,xmm4,7
+ shrd r13d,r13d,5
+ xor r14d,eax
+ and r12d,r8d
+ vpaddd xmm0,xmm0,xmm7
+ xor r13d,r8d
+ add r11d,DWORD[rsp]
+ mov r15d,eax
+ vpsrld xmm7,xmm4,3
+ xor r12d,r10d
+ shrd r14d,r14d,11
+ xor r15d,ebx
+ vpslld xmm5,xmm4,14
+ add r11d,r12d
+ shrd r13d,r13d,6
+ and edi,r15d
+ vpxor xmm4,xmm7,xmm6
+ xor r14d,eax
+ add r11d,r13d
+ xor edi,ebx
+ vpshufd xmm7,xmm3,250
+ shrd r14d,r14d,2
+ add edx,r11d
+ add r11d,edi
+ vpsrld xmm6,xmm6,11
+ mov r13d,edx
+ add r14d,r11d
+ shrd r13d,r13d,14
+ vpxor xmm4,xmm4,xmm5
+ mov r11d,r14d
+ mov r12d,r8d
+ shrd r14d,r14d,9
+ vpslld xmm5,xmm5,11
+ xor r13d,edx
+ xor r12d,r9d
+ shrd r13d,r13d,5
+ vpxor xmm4,xmm4,xmm6
+ xor r14d,r11d
+ and r12d,edx
+ xor r13d,edx
+ vpsrld xmm6,xmm7,10
+ add r10d,DWORD[4+rsp]
+ mov edi,r11d
+ xor r12d,r9d
+ vpxor xmm4,xmm4,xmm5
+ shrd r14d,r14d,11
+ xor edi,eax
+ add r10d,r12d
+ vpsrlq xmm7,xmm7,17
+ shrd r13d,r13d,6
+ and r15d,edi
+ xor r14d,r11d
+ vpaddd xmm0,xmm0,xmm4
+ add r10d,r13d
+ xor r15d,eax
+ shrd r14d,r14d,2
+ vpxor xmm6,xmm6,xmm7
+ add ecx,r10d
+ add r10d,r15d
+ mov r13d,ecx
+ vpsrlq xmm7,xmm7,2
+ add r14d,r10d
+ shrd r13d,r13d,14
+ mov r10d,r14d
+ vpxor xmm6,xmm6,xmm7
+ mov r12d,edx
+ shrd r14d,r14d,9
+ xor r13d,ecx
+ vpshufb xmm6,xmm6,xmm8
+ xor r12d,r8d
+ shrd r13d,r13d,5
+ xor r14d,r10d
+ vpaddd xmm0,xmm0,xmm6
+ and r12d,ecx
+ xor r13d,ecx
+ add r9d,DWORD[8+rsp]
+ vpshufd xmm7,xmm0,80
+ mov r15d,r10d
+ xor r12d,r8d
+ shrd r14d,r14d,11
+ vpsrld xmm6,xmm7,10
+ xor r15d,r11d
+ add r9d,r12d
+ shrd r13d,r13d,6
+ vpsrlq xmm7,xmm7,17
+ and edi,r15d
+ xor r14d,r10d
+ add r9d,r13d
+ vpxor xmm6,xmm6,xmm7
+ xor edi,r11d
+ shrd r14d,r14d,2
+ add ebx,r9d
+ vpsrlq xmm7,xmm7,2
+ add r9d,edi
+ mov r13d,ebx
+ add r14d,r9d
+ vpxor xmm6,xmm6,xmm7
+ shrd r13d,r13d,14
+ mov r9d,r14d
+ mov r12d,ecx
+ vpshufb xmm6,xmm6,xmm9
+ shrd r14d,r14d,9
+ xor r13d,ebx
+ xor r12d,edx
+ vpaddd xmm0,xmm0,xmm6
+ shrd r13d,r13d,5
+ xor r14d,r9d
+ and r12d,ebx
+ vpaddd xmm6,xmm0,XMMWORD[rbp]
+ xor r13d,ebx
+ add r8d,DWORD[12+rsp]
+ mov edi,r9d
+ xor r12d,edx
+ shrd r14d,r14d,11
+ xor edi,r10d
+ add r8d,r12d
+ shrd r13d,r13d,6
+ and r15d,edi
+ xor r14d,r9d
+ add r8d,r13d
+ xor r15d,r10d
+ shrd r14d,r14d,2
+ add eax,r8d
+ add r8d,r15d
+ mov r13d,eax
+ add r14d,r8d
+ vmovdqa XMMWORD[rsp],xmm6
+ vpalignr xmm4,xmm2,xmm1,4
+ shrd r13d,r13d,14
+ mov r8d,r14d
+ mov r12d,ebx
+ vpalignr xmm7,xmm0,xmm3,4
+ shrd r14d,r14d,9
+ xor r13d,eax
+ xor r12d,ecx
+ vpsrld xmm6,xmm4,7
+ shrd r13d,r13d,5
+ xor r14d,r8d
+ and r12d,eax
+ vpaddd xmm1,xmm1,xmm7
+ xor r13d,eax
+ add edx,DWORD[16+rsp]
+ mov r15d,r8d
+ vpsrld xmm7,xmm4,3
+ xor r12d,ecx
+ shrd r14d,r14d,11
+ xor r15d,r9d
+ vpslld xmm5,xmm4,14
+ add edx,r12d
+ shrd r13d,r13d,6
+ and edi,r15d
+ vpxor xmm4,xmm7,xmm6
+ xor r14d,r8d
+ add edx,r13d
+ xor edi,r9d
+ vpshufd xmm7,xmm0,250
+ shrd r14d,r14d,2
+ add r11d,edx
+ add edx,edi
+ vpsrld xmm6,xmm6,11
+ mov r13d,r11d
+ add r14d,edx
+ shrd r13d,r13d,14
+ vpxor xmm4,xmm4,xmm5
+ mov edx,r14d
+ mov r12d,eax
+ shrd r14d,r14d,9
+ vpslld xmm5,xmm5,11
+ xor r13d,r11d
+ xor r12d,ebx
+ shrd r13d,r13d,5
+ vpxor xmm4,xmm4,xmm6
+ xor r14d,edx
+ and r12d,r11d
+ xor r13d,r11d
+ vpsrld xmm6,xmm7,10
+ add ecx,DWORD[20+rsp]
+ mov edi,edx
+ xor r12d,ebx
+ vpxor xmm4,xmm4,xmm5
+ shrd r14d,r14d,11
+ xor edi,r8d
+ add ecx,r12d
+ vpsrlq xmm7,xmm7,17
+ shrd r13d,r13d,6
+ and r15d,edi
+ xor r14d,edx
+ vpaddd xmm1,xmm1,xmm4
+ add ecx,r13d
+ xor r15d,r8d
+ shrd r14d,r14d,2
+ vpxor xmm6,xmm6,xmm7
+ add r10d,ecx
+ add ecx,r15d
+ mov r13d,r10d
+ vpsrlq xmm7,xmm7,2
+ add r14d,ecx
+ shrd r13d,r13d,14
+ mov ecx,r14d
+ vpxor xmm6,xmm6,xmm7
+ mov r12d,r11d
+ shrd r14d,r14d,9
+ xor r13d,r10d
+ vpshufb xmm6,xmm6,xmm8
+ xor r12d,eax
+ shrd r13d,r13d,5
+ xor r14d,ecx
+ vpaddd xmm1,xmm1,xmm6
+ and r12d,r10d
+ xor r13d,r10d
+ add ebx,DWORD[24+rsp]
+ vpshufd xmm7,xmm1,80
+ mov r15d,ecx
+ xor r12d,eax
+ shrd r14d,r14d,11
+ vpsrld xmm6,xmm7,10
+ xor r15d,edx
+ add ebx,r12d
+ shrd r13d,r13d,6
+ vpsrlq xmm7,xmm7,17
+ and edi,r15d
+ xor r14d,ecx
+ add ebx,r13d
+ vpxor xmm6,xmm6,xmm7
+ xor edi,edx
+ shrd r14d,r14d,2
+ add r9d,ebx
+ vpsrlq xmm7,xmm7,2
+ add ebx,edi
+ mov r13d,r9d
+ add r14d,ebx
+ vpxor xmm6,xmm6,xmm7
+ shrd r13d,r13d,14
+ mov ebx,r14d
+ mov r12d,r10d
+ vpshufb xmm6,xmm6,xmm9
+ shrd r14d,r14d,9
+ xor r13d,r9d
+ xor r12d,r11d
+ vpaddd xmm1,xmm1,xmm6
+ shrd r13d,r13d,5
+ xor r14d,ebx
+ and r12d,r9d
+ vpaddd xmm6,xmm1,XMMWORD[32+rbp]
+ xor r13d,r9d
+ add eax,DWORD[28+rsp]
+ mov edi,ebx
+ xor r12d,r11d
+ shrd r14d,r14d,11
+ xor edi,ecx
+ add eax,r12d
+ shrd r13d,r13d,6
+ and r15d,edi
+ xor r14d,ebx
+ add eax,r13d
+ xor r15d,ecx
+ shrd r14d,r14d,2
+ add r8d,eax
+ add eax,r15d
+ mov r13d,r8d
+ add r14d,eax
+ vmovdqa XMMWORD[16+rsp],xmm6
+ vpalignr xmm4,xmm3,xmm2,4
+ shrd r13d,r13d,14
+ mov eax,r14d
+ mov r12d,r9d
+ vpalignr xmm7,xmm1,xmm0,4
+ shrd r14d,r14d,9
+ xor r13d,r8d
+ xor r12d,r10d
+ vpsrld xmm6,xmm4,7
+ shrd r13d,r13d,5
+ xor r14d,eax
+ and r12d,r8d
+ vpaddd xmm2,xmm2,xmm7
+ xor r13d,r8d
+ add r11d,DWORD[32+rsp]
+ mov r15d,eax
+ vpsrld xmm7,xmm4,3
+ xor r12d,r10d
+ shrd r14d,r14d,11
+ xor r15d,ebx
+ vpslld xmm5,xmm4,14
+ add r11d,r12d
+ shrd r13d,r13d,6
+ and edi,r15d
+ vpxor xmm4,xmm7,xmm6
+ xor r14d,eax
+ add r11d,r13d
+ xor edi,ebx
+ vpshufd xmm7,xmm1,250
+ shrd r14d,r14d,2
+ add edx,r11d
+ add r11d,edi
+ vpsrld xmm6,xmm6,11
+ mov r13d,edx
+ add r14d,r11d
+ shrd r13d,r13d,14
+ vpxor xmm4,xmm4,xmm5
+ mov r11d,r14d
+ mov r12d,r8d
+ shrd r14d,r14d,9
+ vpslld xmm5,xmm5,11
+ xor r13d,edx
+ xor r12d,r9d
+ shrd r13d,r13d,5
+ vpxor xmm4,xmm4,xmm6
+ xor r14d,r11d
+ and r12d,edx
+ xor r13d,edx
+ vpsrld xmm6,xmm7,10
+ add r10d,DWORD[36+rsp]
+ mov edi,r11d
+ xor r12d,r9d
+ vpxor xmm4,xmm4,xmm5
+ shrd r14d,r14d,11
+ xor edi,eax
+ add r10d,r12d
+ vpsrlq xmm7,xmm7,17
+ shrd r13d,r13d,6
+ and r15d,edi
+ xor r14d,r11d
+ vpaddd xmm2,xmm2,xmm4
+ add r10d,r13d
+ xor r15d,eax
+ shrd r14d,r14d,2
+ vpxor xmm6,xmm6,xmm7
+ add ecx,r10d
+ add r10d,r15d
+ mov r13d,ecx
+ vpsrlq xmm7,xmm7,2
+ add r14d,r10d
+ shrd r13d,r13d,14
+ mov r10d,r14d
+ vpxor xmm6,xmm6,xmm7
+ mov r12d,edx
+ shrd r14d,r14d,9
+ xor r13d,ecx
+ vpshufb xmm6,xmm6,xmm8
+ xor r12d,r8d
+ shrd r13d,r13d,5
+ xor r14d,r10d
+ vpaddd xmm2,xmm2,xmm6
+ and r12d,ecx
+ xor r13d,ecx
+ add r9d,DWORD[40+rsp]
+ vpshufd xmm7,xmm2,80
+ mov r15d,r10d
+ xor r12d,r8d
+ shrd r14d,r14d,11
+ vpsrld xmm6,xmm7,10
+ xor r15d,r11d
+ add r9d,r12d
+ shrd r13d,r13d,6
+ vpsrlq xmm7,xmm7,17
+ and edi,r15d
+ xor r14d,r10d
+ add r9d,r13d
+ vpxor xmm6,xmm6,xmm7
+ xor edi,r11d
+ shrd r14d,r14d,2
+ add ebx,r9d
+ vpsrlq xmm7,xmm7,2
+ add r9d,edi
+ mov r13d,ebx
+ add r14d,r9d
+ vpxor xmm6,xmm6,xmm7
+ shrd r13d,r13d,14
+ mov r9d,r14d
+ mov r12d,ecx
+ vpshufb xmm6,xmm6,xmm9
+ shrd r14d,r14d,9
+ xor r13d,ebx
+ xor r12d,edx
+ vpaddd xmm2,xmm2,xmm6
+ shrd r13d,r13d,5
+ xor r14d,r9d
+ and r12d,ebx
+ vpaddd xmm6,xmm2,XMMWORD[64+rbp]
+ xor r13d,ebx
+ add r8d,DWORD[44+rsp]
+ mov edi,r9d
+ xor r12d,edx
+ shrd r14d,r14d,11
+ xor edi,r10d
+ add r8d,r12d
+ shrd r13d,r13d,6
+ and r15d,edi
+ xor r14d,r9d
+ add r8d,r13d
+ xor r15d,r10d
+ shrd r14d,r14d,2
+ add eax,r8d
+ add r8d,r15d
+ mov r13d,eax
+ add r14d,r8d
+ vmovdqa XMMWORD[32+rsp],xmm6
+ vpalignr xmm4,xmm0,xmm3,4
+ shrd r13d,r13d,14
+ mov r8d,r14d
+ mov r12d,ebx
+ vpalignr xmm7,xmm2,xmm1,4
+ shrd r14d,r14d,9
+ xor r13d,eax
+ xor r12d,ecx
+ vpsrld xmm6,xmm4,7
+ shrd r13d,r13d,5
+ xor r14d,r8d
+ and r12d,eax
+ vpaddd xmm3,xmm3,xmm7
+ xor r13d,eax
+ add edx,DWORD[48+rsp]
+ mov r15d,r8d
+ vpsrld xmm7,xmm4,3
+ xor r12d,ecx
+ shrd r14d,r14d,11
+ xor r15d,r9d
+ vpslld xmm5,xmm4,14
+ add edx,r12d
+ shrd r13d,r13d,6
+ and edi,r15d
+ vpxor xmm4,xmm7,xmm6
+ xor r14d,r8d
+ add edx,r13d
+ xor edi,r9d
+ vpshufd xmm7,xmm2,250
+ shrd r14d,r14d,2
+ add r11d,edx
+ add edx,edi
+ vpsrld xmm6,xmm6,11
+ mov r13d,r11d
+ add r14d,edx
+ shrd r13d,r13d,14
+ vpxor xmm4,xmm4,xmm5
+ mov edx,r14d
+ mov r12d,eax
+ shrd r14d,r14d,9
+ vpslld xmm5,xmm5,11
+ xor r13d,r11d
+ xor r12d,ebx
+ shrd r13d,r13d,5
+ vpxor xmm4,xmm4,xmm6
+ xor r14d,edx
+ and r12d,r11d
+ xor r13d,r11d
+ vpsrld xmm6,xmm7,10
+ add ecx,DWORD[52+rsp]
+ mov edi,edx
+ xor r12d,ebx
+ vpxor xmm4,xmm4,xmm5
+ shrd r14d,r14d,11
+ xor edi,r8d
+ add ecx,r12d
+ vpsrlq xmm7,xmm7,17
+ shrd r13d,r13d,6
+ and r15d,edi
+ xor r14d,edx
+ vpaddd xmm3,xmm3,xmm4
+ add ecx,r13d
+ xor r15d,r8d
+ shrd r14d,r14d,2
+ vpxor xmm6,xmm6,xmm7
+ add r10d,ecx
+ add ecx,r15d
+ mov r13d,r10d
+ vpsrlq xmm7,xmm7,2
+ add r14d,ecx
+ shrd r13d,r13d,14
+ mov ecx,r14d
+ vpxor xmm6,xmm6,xmm7
+ mov r12d,r11d
+ shrd r14d,r14d,9
+ xor r13d,r10d
+ vpshufb xmm6,xmm6,xmm8
+ xor r12d,eax
+ shrd r13d,r13d,5
+ xor r14d,ecx
+ vpaddd xmm3,xmm3,xmm6
+ and r12d,r10d
+ xor r13d,r10d
+ add ebx,DWORD[56+rsp]
+ vpshufd xmm7,xmm3,80
+ mov r15d,ecx
+ xor r12d,eax
+ shrd r14d,r14d,11
+ vpsrld xmm6,xmm7,10
+ xor r15d,edx
+ add ebx,r12d
+ shrd r13d,r13d,6
+ vpsrlq xmm7,xmm7,17
+ and edi,r15d
+ xor r14d,ecx
+ add ebx,r13d
+ vpxor xmm6,xmm6,xmm7
+ xor edi,edx
+ shrd r14d,r14d,2
+ add r9d,ebx
+ vpsrlq xmm7,xmm7,2
+ add ebx,edi
+ mov r13d,r9d
+ add r14d,ebx
+ vpxor xmm6,xmm6,xmm7
+ shrd r13d,r13d,14
+ mov ebx,r14d
+ mov r12d,r10d
+ vpshufb xmm6,xmm6,xmm9
+ shrd r14d,r14d,9
+ xor r13d,r9d
+ xor r12d,r11d
+ vpaddd xmm3,xmm3,xmm6
+ shrd r13d,r13d,5
+ xor r14d,ebx
+ and r12d,r9d
+ vpaddd xmm6,xmm3,XMMWORD[96+rbp]
+ xor r13d,r9d
+ add eax,DWORD[60+rsp]
+ mov edi,ebx
+ xor r12d,r11d
+ shrd r14d,r14d,11
+ xor edi,ecx
+ add eax,r12d
+ shrd r13d,r13d,6
+ and r15d,edi
+ xor r14d,ebx
+ add eax,r13d
+ xor r15d,ecx
+ shrd r14d,r14d,2
+ add r8d,eax
+ add eax,r15d
+ mov r13d,r8d
+ add r14d,eax
+ vmovdqa XMMWORD[48+rsp],xmm6
+ cmp BYTE[131+rbp],0
+ jne NEAR $L$avx_00_47
+ shrd r13d,r13d,14
+ mov eax,r14d
+ mov r12d,r9d
+ shrd r14d,r14d,9
+ xor r13d,r8d
+ xor r12d,r10d
+ shrd r13d,r13d,5
+ xor r14d,eax
+ and r12d,r8d
+ xor r13d,r8d
+ add r11d,DWORD[rsp]
+ mov r15d,eax
+ xor r12d,r10d
+ shrd r14d,r14d,11
+ xor r15d,ebx
+ add r11d,r12d
+ shrd r13d,r13d,6
+ and edi,r15d
+ xor r14d,eax
+ add r11d,r13d
+ xor edi,ebx
+ shrd r14d,r14d,2
+ add edx,r11d
+ add r11d,edi
+ mov r13d,edx
+ add r14d,r11d
+ shrd r13d,r13d,14
+ mov r11d,r14d
+ mov r12d,r8d
+ shrd r14d,r14d,9
+ xor r13d,edx
+ xor r12d,r9d
+ shrd r13d,r13d,5
+ xor r14d,r11d
+ and r12d,edx
+ xor r13d,edx
+ add r10d,DWORD[4+rsp]
+ mov edi,r11d
+ xor r12d,r9d
+ shrd r14d,r14d,11
+ xor edi,eax
+ add r10d,r12d
+ shrd r13d,r13d,6
+ and r15d,edi
+ xor r14d,r11d
+ add r10d,r13d
+ xor r15d,eax
+ shrd r14d,r14d,2
+ add ecx,r10d
+ add r10d,r15d
+ mov r13d,ecx
+ add r14d,r10d
+ shrd r13d,r13d,14
+ mov r10d,r14d
+ mov r12d,edx
+ shrd r14d,r14d,9
+ xor r13d,ecx
+ xor r12d,r8d
+ shrd r13d,r13d,5
+ xor r14d,r10d
+ and r12d,ecx
+ xor r13d,ecx
+ add r9d,DWORD[8+rsp]
+ mov r15d,r10d
+ xor r12d,r8d
+ shrd r14d,r14d,11
+ xor r15d,r11d
+ add r9d,r12d
+ shrd r13d,r13d,6
+ and edi,r15d
+ xor r14d,r10d
+ add r9d,r13d
+ xor edi,r11d
+ shrd r14d,r14d,2
+ add ebx,r9d
+ add r9d,edi
+ mov r13d,ebx
+ add r14d,r9d
+ shrd r13d,r13d,14
+ mov r9d,r14d
+ mov r12d,ecx
+ shrd r14d,r14d,9
+ xor r13d,ebx
+ xor r12d,edx
+ shrd r13d,r13d,5
+ xor r14d,r9d
+ and r12d,ebx
+ xor r13d,ebx
+ add r8d,DWORD[12+rsp]
+ mov edi,r9d
+ xor r12d,edx
+ shrd r14d,r14d,11
+ xor edi,r10d
+ add r8d,r12d
+ shrd r13d,r13d,6
+ and r15d,edi
+ xor r14d,r9d
+ add r8d,r13d
+ xor r15d,r10d
+ shrd r14d,r14d,2
+ add eax,r8d
+ add r8d,r15d
+ mov r13d,eax
+ add r14d,r8d
+ shrd r13d,r13d,14
+ mov r8d,r14d
+ mov r12d,ebx
+ shrd r14d,r14d,9
+ xor r13d,eax
+ xor r12d,ecx
+ shrd r13d,r13d,5
+ xor r14d,r8d
+ and r12d,eax
+ xor r13d,eax
+ add edx,DWORD[16+rsp]
+ mov r15d,r8d
+ xor r12d,ecx
+ shrd r14d,r14d,11
+ xor r15d,r9d
+ add edx,r12d
+ shrd r13d,r13d,6
+ and edi,r15d
+ xor r14d,r8d
+ add edx,r13d
+ xor edi,r9d
+ shrd r14d,r14d,2
+ add r11d,edx
+ add edx,edi
+ mov r13d,r11d
+ add r14d,edx
+ shrd r13d,r13d,14
+ mov edx,r14d
+ mov r12d,eax
+ shrd r14d,r14d,9
+ xor r13d,r11d
+ xor r12d,ebx
+ shrd r13d,r13d,5
+ xor r14d,edx
+ and r12d,r11d
+ xor r13d,r11d
+ add ecx,DWORD[20+rsp]
+ mov edi,edx
+ xor r12d,ebx
+ shrd r14d,r14d,11
+ xor edi,r8d
+ add ecx,r12d
+ shrd r13d,r13d,6
+ and r15d,edi
+ xor r14d,edx
+ add ecx,r13d
+ xor r15d,r8d
+ shrd r14d,r14d,2
+ add r10d,ecx
+ add ecx,r15d
+ mov r13d,r10d
+ add r14d,ecx
+ shrd r13d,r13d,14
+ mov ecx,r14d
+ mov r12d,r11d
+ shrd r14d,r14d,9
+ xor r13d,r10d
+ xor r12d,eax
+ shrd r13d,r13d,5
+ xor r14d,ecx
+ and r12d,r10d
+ xor r13d,r10d
+ add ebx,DWORD[24+rsp]
+ mov r15d,ecx
+ xor r12d,eax
+ shrd r14d,r14d,11
+ xor r15d,edx
+ add ebx,r12d
+ shrd r13d,r13d,6
+ and edi,r15d
+ xor r14d,ecx
+ add ebx,r13d
+ xor edi,edx
+ shrd r14d,r14d,2
+ add r9d,ebx
+ add ebx,edi
+ mov r13d,r9d
+ add r14d,ebx
+ shrd r13d,r13d,14
+ mov ebx,r14d
+ mov r12d,r10d
+ shrd r14d,r14d,9
+ xor r13d,r9d
+ xor r12d,r11d
+ shrd r13d,r13d,5
+ xor r14d,ebx
+ and r12d,r9d
+ xor r13d,r9d
+ add eax,DWORD[28+rsp]
+ mov edi,ebx
+ xor r12d,r11d
+ shrd r14d,r14d,11
+ xor edi,ecx
+ add eax,r12d
+ shrd r13d,r13d,6
+ and r15d,edi
+ xor r14d,ebx
+ add eax,r13d
+ xor r15d,ecx
+ shrd r14d,r14d,2
+ add r8d,eax
+ add eax,r15d
+ mov r13d,r8d
+ add r14d,eax
+ shrd r13d,r13d,14
+ mov eax,r14d
+ mov r12d,r9d
+ shrd r14d,r14d,9
+ xor r13d,r8d
+ xor r12d,r10d
+ shrd r13d,r13d,5
+ xor r14d,eax
+ and r12d,r8d
+ xor r13d,r8d
+ add r11d,DWORD[32+rsp]
+ mov r15d,eax
+ xor r12d,r10d
+ shrd r14d,r14d,11
+ xor r15d,ebx
+ add r11d,r12d
+ shrd r13d,r13d,6
+ and edi,r15d
+ xor r14d,eax
+ add r11d,r13d
+ xor edi,ebx
+ shrd r14d,r14d,2
+ add edx,r11d
+ add r11d,edi
+ mov r13d,edx
+ add r14d,r11d
+ shrd r13d,r13d,14
+ mov r11d,r14d
+ mov r12d,r8d
+ shrd r14d,r14d,9
+ xor r13d,edx
+ xor r12d,r9d
+ shrd r13d,r13d,5
+ xor r14d,r11d
+ and r12d,edx
+ xor r13d,edx
+ add r10d,DWORD[36+rsp]
+ mov edi,r11d
+ xor r12d,r9d
+ shrd r14d,r14d,11
+ xor edi,eax
+ add r10d,r12d
+ shrd r13d,r13d,6
+ and r15d,edi
+ xor r14d,r11d
+ add r10d,r13d
+ xor r15d,eax
+ shrd r14d,r14d,2
+ add ecx,r10d
+ add r10d,r15d
+ mov r13d,ecx
+ add r14d,r10d
+ shrd r13d,r13d,14
+ mov r10d,r14d
+ mov r12d,edx
+ shrd r14d,r14d,9
+ xor r13d,ecx
+ xor r12d,r8d
+ shrd r13d,r13d,5
+ xor r14d,r10d
+ and r12d,ecx
+ xor r13d,ecx
+ add r9d,DWORD[40+rsp]
+ mov r15d,r10d
+ xor r12d,r8d
+ shrd r14d,r14d,11
+ xor r15d,r11d
+ add r9d,r12d
+ shrd r13d,r13d,6
+ and edi,r15d
+ xor r14d,r10d
+ add r9d,r13d
+ xor edi,r11d
+ shrd r14d,r14d,2
+ add ebx,r9d
+ add r9d,edi
+ mov r13d,ebx
+ add r14d,r9d
+ shrd r13d,r13d,14
+ mov r9d,r14d
+ mov r12d,ecx
+ shrd r14d,r14d,9
+ xor r13d,ebx
+ xor r12d,edx
+ shrd r13d,r13d,5
+ xor r14d,r9d
+ and r12d,ebx
+ xor r13d,ebx
+ add r8d,DWORD[44+rsp]
+ mov edi,r9d
+ xor r12d,edx
+ shrd r14d,r14d,11
+ xor edi,r10d
+ add r8d,r12d
+ shrd r13d,r13d,6
+ and r15d,edi
+ xor r14d,r9d
+ add r8d,r13d
+ xor r15d,r10d
+ shrd r14d,r14d,2
+ add eax,r8d
+ add r8d,r15d
+ mov r13d,eax
+ add r14d,r8d
+ shrd r13d,r13d,14
+ mov r8d,r14d
+ mov r12d,ebx
+ shrd r14d,r14d,9
+ xor r13d,eax
+ xor r12d,ecx
+ shrd r13d,r13d,5
+ xor r14d,r8d
+ and r12d,eax
+ xor r13d,eax
+ add edx,DWORD[48+rsp]
+ mov r15d,r8d
+ xor r12d,ecx
+ shrd r14d,r14d,11
+ xor r15d,r9d
+ add edx,r12d
+ shrd r13d,r13d,6
+ and edi,r15d
+ xor r14d,r8d
+ add edx,r13d
+ xor edi,r9d
+ shrd r14d,r14d,2
+ add r11d,edx
+ add edx,edi
+ mov r13d,r11d
+ add r14d,edx
+ shrd r13d,r13d,14
+ mov edx,r14d
+ mov r12d,eax
+ shrd r14d,r14d,9
+ xor r13d,r11d
+ xor r12d,ebx
+ shrd r13d,r13d,5
+ xor r14d,edx
+ and r12d,r11d
+ xor r13d,r11d
+ add ecx,DWORD[52+rsp]
+ mov edi,edx
+ xor r12d,ebx
+ shrd r14d,r14d,11
+ xor edi,r8d
+ add ecx,r12d
+ shrd r13d,r13d,6
+ and r15d,edi
+ xor r14d,edx
+ add ecx,r13d
+ xor r15d,r8d
+ shrd r14d,r14d,2
+ add r10d,ecx
+ add ecx,r15d
+ mov r13d,r10d
+ add r14d,ecx
+ shrd r13d,r13d,14
+ mov ecx,r14d
+ mov r12d,r11d
+ shrd r14d,r14d,9
+ xor r13d,r10d
+ xor r12d,eax
+ shrd r13d,r13d,5
+ xor r14d,ecx
+ and r12d,r10d
+ xor r13d,r10d
+ add ebx,DWORD[56+rsp]
+ mov r15d,ecx
+ xor r12d,eax
+ shrd r14d,r14d,11
+ xor r15d,edx
+ add ebx,r12d
+ shrd r13d,r13d,6
+ and edi,r15d
+ xor r14d,ecx
+ add ebx,r13d
+ xor edi,edx
+ shrd r14d,r14d,2
+ add r9d,ebx
+ add ebx,edi
+ mov r13d,r9d
+ add r14d,ebx
+ shrd r13d,r13d,14
+ mov ebx,r14d
+ mov r12d,r10d
+ shrd r14d,r14d,9
+ xor r13d,r9d
+ xor r12d,r11d
+ shrd r13d,r13d,5
+ xor r14d,ebx
+ and r12d,r9d
+ xor r13d,r9d
+ add eax,DWORD[60+rsp]
+ mov edi,ebx
+ xor r12d,r11d
+ shrd r14d,r14d,11
+ xor edi,ecx
+ add eax,r12d
+ shrd r13d,r13d,6
+ and r15d,edi
+ xor r14d,ebx
+ add eax,r13d
+ xor r15d,ecx
+ shrd r14d,r14d,2
+ add r8d,eax
+ add eax,r15d
+ mov r13d,r8d
+ add r14d,eax
+ mov rdi,QWORD[((64+0))+rsp]
+ mov eax,r14d
+
+ add eax,DWORD[rdi]
+ lea rsi,[64+rsi]
+ add ebx,DWORD[4+rdi]
+ add ecx,DWORD[8+rdi]
+ add edx,DWORD[12+rdi]
+ add r8d,DWORD[16+rdi]
+ add r9d,DWORD[20+rdi]
+ add r10d,DWORD[24+rdi]
+ add r11d,DWORD[28+rdi]
+
+ cmp rsi,QWORD[((64+16))+rsp]
+
+ mov DWORD[rdi],eax
+ mov DWORD[4+rdi],ebx
+ mov DWORD[8+rdi],ecx
+ mov DWORD[12+rdi],edx
+ mov DWORD[16+rdi],r8d
+ mov DWORD[20+rdi],r9d
+ mov DWORD[24+rdi],r10d
+ mov DWORD[28+rdi],r11d
+ jb NEAR $L$loop_avx
+
+ mov rsi,QWORD[88+rsp]
+
+ vzeroupper
+ movaps xmm6,XMMWORD[((64+32))+rsp]
+ movaps xmm7,XMMWORD[((64+48))+rsp]
+ movaps xmm8,XMMWORD[((64+64))+rsp]
+ movaps xmm9,XMMWORD[((64+80))+rsp]
+ mov r15,QWORD[((-48))+rsi]
+
+ mov r14,QWORD[((-40))+rsi]
+
+ mov r13,QWORD[((-32))+rsi]
+
+ mov r12,QWORD[((-24))+rsi]
+
+ mov rbp,QWORD[((-16))+rsi]
+
+ mov rbx,QWORD[((-8))+rsi]
+
+ lea rsp,[rsi]
+
+$L$epilogue_avx:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_sha256_block_data_order_avx:
+
+ALIGN 64
+sha256_block_data_order_avx2:
+ mov QWORD[8+rsp],rdi ;WIN64 prologue
+ mov QWORD[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_sha256_block_data_order_avx2:
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+
+$L$avx2_shortcut:
+ mov rax,rsp
+
+ push rbx
+
+ push rbp
+
+ push r12
+
+ push r13
+
+ push r14
+
+ push r15
+
+ sub rsp,608
+ shl rdx,4
+ and rsp,-256*4
+ lea rdx,[rdx*4+rsi]
+ add rsp,448
+ mov QWORD[((64+0))+rsp],rdi
+ mov QWORD[((64+8))+rsp],rsi
+ mov QWORD[((64+16))+rsp],rdx
+ mov QWORD[88+rsp],rax
+
+ movaps XMMWORD[(64+32)+rsp],xmm6
+ movaps XMMWORD[(64+48)+rsp],xmm7
+ movaps XMMWORD[(64+64)+rsp],xmm8
+ movaps XMMWORD[(64+80)+rsp],xmm9
+$L$prologue_avx2:
+
+ vzeroupper
+ sub rsi,-16*4
+ mov eax,DWORD[rdi]
+ mov r12,rsi
+ mov ebx,DWORD[4+rdi]
+ cmp rsi,rdx
+ mov ecx,DWORD[8+rdi]
+ cmove r12,rsp
+ mov edx,DWORD[12+rdi]
+ mov r8d,DWORD[16+rdi]
+ mov r9d,DWORD[20+rdi]
+ mov r10d,DWORD[24+rdi]
+ mov r11d,DWORD[28+rdi]
+ vmovdqa ymm8,YMMWORD[((K256+512+32))]
+ vmovdqa ymm9,YMMWORD[((K256+512+64))]
+ jmp NEAR $L$oop_avx2
+ALIGN 16
+$L$oop_avx2:
+ vmovdqa ymm7,YMMWORD[((K256+512))]
+ vmovdqu xmm0,XMMWORD[((-64+0))+rsi]
+ vmovdqu xmm1,XMMWORD[((-64+16))+rsi]
+ vmovdqu xmm2,XMMWORD[((-64+32))+rsi]
+ vmovdqu xmm3,XMMWORD[((-64+48))+rsi]
+
+ vinserti128 ymm0,ymm0,XMMWORD[r12],1
+ vinserti128 ymm1,ymm1,XMMWORD[16+r12],1
+ vpshufb ymm0,ymm0,ymm7
+ vinserti128 ymm2,ymm2,XMMWORD[32+r12],1
+ vpshufb ymm1,ymm1,ymm7
+ vinserti128 ymm3,ymm3,XMMWORD[48+r12],1
+
+ lea rbp,[K256]
+ vpshufb ymm2,ymm2,ymm7
+ vpaddd ymm4,ymm0,YMMWORD[rbp]
+ vpshufb ymm3,ymm3,ymm7
+ vpaddd ymm5,ymm1,YMMWORD[32+rbp]
+ vpaddd ymm6,ymm2,YMMWORD[64+rbp]
+ vpaddd ymm7,ymm3,YMMWORD[96+rbp]
+ vmovdqa YMMWORD[rsp],ymm4
+ xor r14d,r14d
+ vmovdqa YMMWORD[32+rsp],ymm5
+ lea rsp,[((-64))+rsp]
+ mov edi,ebx
+ vmovdqa YMMWORD[rsp],ymm6
+ xor edi,ecx
+ vmovdqa YMMWORD[32+rsp],ymm7
+ mov r12d,r9d
+ sub rbp,-16*2*4
+ jmp NEAR $L$avx2_00_47
+
+ALIGN 16
+$L$avx2_00_47:
+ lea rsp,[((-64))+rsp]
+ vpalignr ymm4,ymm1,ymm0,4
+ add r11d,DWORD[((0+128))+rsp]
+ and r12d,r8d
+ rorx r13d,r8d,25
+ vpalignr ymm7,ymm3,ymm2,4
+ rorx r15d,r8d,11
+ lea eax,[r14*1+rax]
+ lea r11d,[r12*1+r11]
+ vpsrld ymm6,ymm4,7
+ andn r12d,r8d,r10d
+ xor r13d,r15d
+ rorx r14d,r8d,6
+ vpaddd ymm0,ymm0,ymm7
+ lea r11d,[r12*1+r11]
+ xor r13d,r14d
+ mov r15d,eax
+ vpsrld ymm7,ymm4,3
+ rorx r12d,eax,22
+ lea r11d,[r13*1+r11]
+ xor r15d,ebx
+ vpslld ymm5,ymm4,14
+ rorx r14d,eax,13
+ rorx r13d,eax,2
+ lea edx,[r11*1+rdx]
+ vpxor ymm4,ymm7,ymm6
+ and edi,r15d
+ xor r14d,r12d
+ xor edi,ebx
+ vpshufd ymm7,ymm3,250
+ xor r14d,r13d
+ lea r11d,[rdi*1+r11]
+ mov r12d,r8d
+ vpsrld ymm6,ymm6,11
+ add r10d,DWORD[((4+128))+rsp]
+ and r12d,edx
+ rorx r13d,edx,25
+ vpxor ymm4,ymm4,ymm5
+ rorx edi,edx,11
+ lea r11d,[r14*1+r11]
+ lea r10d,[r12*1+r10]
+ vpslld ymm5,ymm5,11
+ andn r12d,edx,r9d
+ xor r13d,edi
+ rorx r14d,edx,6
+ vpxor ymm4,ymm4,ymm6
+ lea r10d,[r12*1+r10]
+ xor r13d,r14d
+ mov edi,r11d
+ vpsrld ymm6,ymm7,10
+ rorx r12d,r11d,22
+ lea r10d,[r13*1+r10]
+ xor edi,eax
+ vpxor ymm4,ymm4,ymm5
+ rorx r14d,r11d,13
+ rorx r13d,r11d,2
+ lea ecx,[r10*1+rcx]
+ vpsrlq ymm7,ymm7,17
+ and r15d,edi
+ xor r14d,r12d
+ xor r15d,eax
+ vpaddd ymm0,ymm0,ymm4
+ xor r14d,r13d
+ lea r10d,[r15*1+r10]
+ mov r12d,edx
+ vpxor ymm6,ymm6,ymm7
+ add r9d,DWORD[((8+128))+rsp]
+ and r12d,ecx
+ rorx r13d,ecx,25
+ vpsrlq ymm7,ymm7,2
+ rorx r15d,ecx,11
+ lea r10d,[r14*1+r10]
+ lea r9d,[r12*1+r9]
+ vpxor ymm6,ymm6,ymm7
+ andn r12d,ecx,r8d
+ xor r13d,r15d
+ rorx r14d,ecx,6
+ vpshufb ymm6,ymm6,ymm8
+ lea r9d,[r12*1+r9]
+ xor r13d,r14d
+ mov r15d,r10d
+ vpaddd ymm0,ymm0,ymm6
+ rorx r12d,r10d,22
+ lea r9d,[r13*1+r9]
+ xor r15d,r11d
+ vpshufd ymm7,ymm0,80
+ rorx r14d,r10d,13
+ rorx r13d,r10d,2
+ lea ebx,[r9*1+rbx]
+ vpsrld ymm6,ymm7,10
+ and edi,r15d
+ xor r14d,r12d
+ xor edi,r11d
+ vpsrlq ymm7,ymm7,17
+ xor r14d,r13d
+ lea r9d,[rdi*1+r9]
+ mov r12d,ecx
+ vpxor ymm6,ymm6,ymm7
+ add r8d,DWORD[((12+128))+rsp]
+ and r12d,ebx
+ rorx r13d,ebx,25
+ vpsrlq ymm7,ymm7,2
+ rorx edi,ebx,11
+ lea r9d,[r14*1+r9]
+ lea r8d,[r12*1+r8]
+ vpxor ymm6,ymm6,ymm7
+ andn r12d,ebx,edx
+ xor r13d,edi
+ rorx r14d,ebx,6
+ vpshufb ymm6,ymm6,ymm9
+ lea r8d,[r12*1+r8]
+ xor r13d,r14d
+ mov edi,r9d
+ vpaddd ymm0,ymm0,ymm6
+ rorx r12d,r9d,22
+ lea r8d,[r13*1+r8]
+ xor edi,r10d
+ vpaddd ymm6,ymm0,YMMWORD[rbp]
+ rorx r14d,r9d,13
+ rorx r13d,r9d,2
+ lea eax,[r8*1+rax]
+ and r15d,edi
+ xor r14d,r12d
+ xor r15d,r10d
+ xor r14d,r13d
+ lea r8d,[r15*1+r8]
+ mov r12d,ebx
+ vmovdqa YMMWORD[rsp],ymm6
+ vpalignr ymm4,ymm2,ymm1,4
+ add edx,DWORD[((32+128))+rsp]
+ and r12d,eax
+ rorx r13d,eax,25
+ vpalignr ymm7,ymm0,ymm3,4
+ rorx r15d,eax,11
+ lea r8d,[r14*1+r8]
+ lea edx,[r12*1+rdx]
+ vpsrld ymm6,ymm4,7
+ andn r12d,eax,ecx
+ xor r13d,r15d
+ rorx r14d,eax,6
+ vpaddd ymm1,ymm1,ymm7
+ lea edx,[r12*1+rdx]
+ xor r13d,r14d
+ mov r15d,r8d
+ vpsrld ymm7,ymm4,3
+ rorx r12d,r8d,22
+ lea edx,[r13*1+rdx]
+ xor r15d,r9d
+ vpslld ymm5,ymm4,14
+ rorx r14d,r8d,13
+ rorx r13d,r8d,2
+ lea r11d,[rdx*1+r11]
+ vpxor ymm4,ymm7,ymm6
+ and edi,r15d
+ xor r14d,r12d
+ xor edi,r9d
+ vpshufd ymm7,ymm0,250
+ xor r14d,r13d
+ lea edx,[rdi*1+rdx]
+ mov r12d,eax
+ vpsrld ymm6,ymm6,11
+ add ecx,DWORD[((36+128))+rsp]
+ and r12d,r11d
+ rorx r13d,r11d,25
+ vpxor ymm4,ymm4,ymm5
+ rorx edi,r11d,11
+ lea edx,[r14*1+rdx]
+ lea ecx,[r12*1+rcx]
+ vpslld ymm5,ymm5,11
+ andn r12d,r11d,ebx
+ xor r13d,edi
+ rorx r14d,r11d,6
+ vpxor ymm4,ymm4,ymm6
+ lea ecx,[r12*1+rcx]
+ xor r13d,r14d
+ mov edi,edx
+ vpsrld ymm6,ymm7,10
+ rorx r12d,edx,22
+ lea ecx,[r13*1+rcx]
+ xor edi,r8d
+ vpxor ymm4,ymm4,ymm5
+ rorx r14d,edx,13
+ rorx r13d,edx,2
+ lea r10d,[rcx*1+r10]
+ vpsrlq ymm7,ymm7,17
+ and r15d,edi
+ xor r14d,r12d
+ xor r15d,r8d
+ vpaddd ymm1,ymm1,ymm4
+ xor r14d,r13d
+ lea ecx,[r15*1+rcx]
+ mov r12d,r11d
+ vpxor ymm6,ymm6,ymm7
+ add ebx,DWORD[((40+128))+rsp]
+ and r12d,r10d
+ rorx r13d,r10d,25
+ vpsrlq ymm7,ymm7,2
+ rorx r15d,r10d,11
+ lea ecx,[r14*1+rcx]
+ lea ebx,[r12*1+rbx]
+ vpxor ymm6,ymm6,ymm7
+ andn r12d,r10d,eax
+ xor r13d,r15d
+ rorx r14d,r10d,6
+ vpshufb ymm6,ymm6,ymm8
+ lea ebx,[r12*1+rbx]
+ xor r13d,r14d
+ mov r15d,ecx
+ vpaddd ymm1,ymm1,ymm6
+ rorx r12d,ecx,22
+ lea ebx,[r13*1+rbx]
+ xor r15d,edx
+ vpshufd ymm7,ymm1,80
+ rorx r14d,ecx,13
+ rorx r13d,ecx,2
+ lea r9d,[rbx*1+r9]
+ vpsrld ymm6,ymm7,10
+ and edi,r15d
+ xor r14d,r12d
+ xor edi,edx
+ vpsrlq ymm7,ymm7,17
+ xor r14d,r13d
+ lea ebx,[rdi*1+rbx]
+ mov r12d,r10d
+ vpxor ymm6,ymm6,ymm7
+ add eax,DWORD[((44+128))+rsp]
+ and r12d,r9d
+ rorx r13d,r9d,25
+ vpsrlq ymm7,ymm7,2
+ rorx edi,r9d,11
+ lea ebx,[r14*1+rbx]
+ lea eax,[r12*1+rax]
+ vpxor ymm6,ymm6,ymm7
+ andn r12d,r9d,r11d
+ xor r13d,edi
+ rorx r14d,r9d,6
+ vpshufb ymm6,ymm6,ymm9
+ lea eax,[r12*1+rax]
+ xor r13d,r14d
+ mov edi,ebx
+ vpaddd ymm1,ymm1,ymm6
+ rorx r12d,ebx,22
+ lea eax,[r13*1+rax]
+ xor edi,ecx
+ vpaddd ymm6,ymm1,YMMWORD[32+rbp]
+ rorx r14d,ebx,13
+ rorx r13d,ebx,2
+ lea r8d,[rax*1+r8]
+ and r15d,edi
+ xor r14d,r12d
+ xor r15d,ecx
+ xor r14d,r13d
+ lea eax,[r15*1+rax]
+ mov r12d,r9d
+ vmovdqa YMMWORD[32+rsp],ymm6
+ lea rsp,[((-64))+rsp]
+ vpalignr ymm4,ymm3,ymm2,4
+ add r11d,DWORD[((0+128))+rsp]
+ and r12d,r8d
+ rorx r13d,r8d,25
+ vpalignr ymm7,ymm1,ymm0,4
+ rorx r15d,r8d,11
+ lea eax,[r14*1+rax]
+ lea r11d,[r12*1+r11]
+ vpsrld ymm6,ymm4,7
+ andn r12d,r8d,r10d
+ xor r13d,r15d
+ rorx r14d,r8d,6
+ vpaddd ymm2,ymm2,ymm7
+ lea r11d,[r12*1+r11]
+ xor r13d,r14d
+ mov r15d,eax
+ vpsrld ymm7,ymm4,3
+ rorx r12d,eax,22
+ lea r11d,[r13*1+r11]
+ xor r15d,ebx
+ vpslld ymm5,ymm4,14
+ rorx r14d,eax,13
+ rorx r13d,eax,2
+ lea edx,[r11*1+rdx]
+ vpxor ymm4,ymm7,ymm6
+ and edi,r15d
+ xor r14d,r12d
+ xor edi,ebx
+ vpshufd ymm7,ymm1,250
+ xor r14d,r13d
+ lea r11d,[rdi*1+r11]
+ mov r12d,r8d
+ vpsrld ymm6,ymm6,11
+ add r10d,DWORD[((4+128))+rsp]
+ and r12d,edx
+ rorx r13d,edx,25
+ vpxor ymm4,ymm4,ymm5
+ rorx edi,edx,11
+ lea r11d,[r14*1+r11]
+ lea r10d,[r12*1+r10]
+ vpslld ymm5,ymm5,11
+ andn r12d,edx,r9d
+ xor r13d,edi
+ rorx r14d,edx,6
+ vpxor ymm4,ymm4,ymm6
+ lea r10d,[r12*1+r10]
+ xor r13d,r14d
+ mov edi,r11d
+ vpsrld ymm6,ymm7,10
+ rorx r12d,r11d,22
+ lea r10d,[r13*1+r10]
+ xor edi,eax
+ vpxor ymm4,ymm4,ymm5
+ rorx r14d,r11d,13
+ rorx r13d,r11d,2
+ lea ecx,[r10*1+rcx]
+ vpsrlq ymm7,ymm7,17
+ and r15d,edi
+ xor r14d,r12d
+ xor r15d,eax
+ vpaddd ymm2,ymm2,ymm4
+ xor r14d,r13d
+ lea r10d,[r15*1+r10]
+ mov r12d,edx
+ vpxor ymm6,ymm6,ymm7
+ add r9d,DWORD[((8+128))+rsp]
+ and r12d,ecx
+ rorx r13d,ecx,25
+ vpsrlq ymm7,ymm7,2
+ rorx r15d,ecx,11
+ lea r10d,[r14*1+r10]
+ lea r9d,[r12*1+r9]
+ vpxor ymm6,ymm6,ymm7
+ andn r12d,ecx,r8d
+ xor r13d,r15d
+ rorx r14d,ecx,6
+ vpshufb ymm6,ymm6,ymm8
+ lea r9d,[r12*1+r9]
+ xor r13d,r14d
+ mov r15d,r10d
+ vpaddd ymm2,ymm2,ymm6
+ rorx r12d,r10d,22
+ lea r9d,[r13*1+r9]
+ xor r15d,r11d
+ vpshufd ymm7,ymm2,80
+ rorx r14d,r10d,13
+ rorx r13d,r10d,2
+ lea ebx,[r9*1+rbx]
+ vpsrld ymm6,ymm7,10
+ and edi,r15d
+ xor r14d,r12d
+ xor edi,r11d
+ vpsrlq ymm7,ymm7,17
+ xor r14d,r13d
+ lea r9d,[rdi*1+r9]
+ mov r12d,ecx
+ vpxor ymm6,ymm6,ymm7
+ add r8d,DWORD[((12+128))+rsp]
+ and r12d,ebx
+ rorx r13d,ebx,25
+ vpsrlq ymm7,ymm7,2
+ rorx edi,ebx,11
+ lea r9d,[r14*1+r9]
+ lea r8d,[r12*1+r8]
+ vpxor ymm6,ymm6,ymm7
+ andn r12d,ebx,edx
+ xor r13d,edi
+ rorx r14d,ebx,6
+ vpshufb ymm6,ymm6,ymm9
+ lea r8d,[r12*1+r8]
+ xor r13d,r14d
+ mov edi,r9d
+ vpaddd ymm2,ymm2,ymm6
+ rorx r12d,r9d,22
+ lea r8d,[r13*1+r8]
+ xor edi,r10d
+ vpaddd ymm6,ymm2,YMMWORD[64+rbp]
+ rorx r14d,r9d,13
+ rorx r13d,r9d,2
+ lea eax,[r8*1+rax]
+ and r15d,edi
+ xor r14d,r12d
+ xor r15d,r10d
+ xor r14d,r13d
+ lea r8d,[r15*1+r8]
+ mov r12d,ebx
+ vmovdqa YMMWORD[rsp],ymm6
+ vpalignr ymm4,ymm0,ymm3,4
+ add edx,DWORD[((32+128))+rsp]
+ and r12d,eax
+ rorx r13d,eax,25
+ vpalignr ymm7,ymm2,ymm1,4
+ rorx r15d,eax,11
+ lea r8d,[r14*1+r8]
+ lea edx,[r12*1+rdx]
+ vpsrld ymm6,ymm4,7
+ andn r12d,eax,ecx
+ xor r13d,r15d
+ rorx r14d,eax,6
+ vpaddd ymm3,ymm3,ymm7
+ lea edx,[r12*1+rdx]
+ xor r13d,r14d
+ mov r15d,r8d
+ vpsrld ymm7,ymm4,3
+ rorx r12d,r8d,22
+ lea edx,[r13*1+rdx]
+ xor r15d,r9d
+ vpslld ymm5,ymm4,14
+ rorx r14d,r8d,13
+ rorx r13d,r8d,2
+ lea r11d,[rdx*1+r11]
+ vpxor ymm4,ymm7,ymm6
+ and edi,r15d
+ xor r14d,r12d
+ xor edi,r9d
+ vpshufd ymm7,ymm2,250
+ xor r14d,r13d
+ lea edx,[rdi*1+rdx]
+ mov r12d,eax
+ vpsrld ymm6,ymm6,11
+ add ecx,DWORD[((36+128))+rsp]
+ and r12d,r11d
+ rorx r13d,r11d,25
+ vpxor ymm4,ymm4,ymm5
+ rorx edi,r11d,11
+ lea edx,[r14*1+rdx]
+ lea ecx,[r12*1+rcx]
+ vpslld ymm5,ymm5,11
+ andn r12d,r11d,ebx
+ xor r13d,edi
+ rorx r14d,r11d,6
+ vpxor ymm4,ymm4,ymm6
+ lea ecx,[r12*1+rcx]
+ xor r13d,r14d
+ mov edi,edx
+ vpsrld ymm6,ymm7,10
+ rorx r12d,edx,22
+ lea ecx,[r13*1+rcx]
+ xor edi,r8d
+ vpxor ymm4,ymm4,ymm5
+ rorx r14d,edx,13
+ rorx r13d,edx,2
+ lea r10d,[rcx*1+r10]
+ vpsrlq ymm7,ymm7,17
+ and r15d,edi
+ xor r14d,r12d
+ xor r15d,r8d
+ vpaddd ymm3,ymm3,ymm4
+ xor r14d,r13d
+ lea ecx,[r15*1+rcx]
+ mov r12d,r11d
+ vpxor ymm6,ymm6,ymm7
+ add ebx,DWORD[((40+128))+rsp]
+ and r12d,r10d
+ rorx r13d,r10d,25
+ vpsrlq ymm7,ymm7,2
+ rorx r15d,r10d,11
+ lea ecx,[r14*1+rcx]
+ lea ebx,[r12*1+rbx]
+ vpxor ymm6,ymm6,ymm7
+ andn r12d,r10d,eax
+ xor r13d,r15d
+ rorx r14d,r10d,6
+ vpshufb ymm6,ymm6,ymm8
+ lea ebx,[r12*1+rbx]
+ xor r13d,r14d
+ mov r15d,ecx
+ vpaddd ymm3,ymm3,ymm6
+ rorx r12d,ecx,22
+ lea ebx,[r13*1+rbx]
+ xor r15d,edx
+ vpshufd ymm7,ymm3,80
+ rorx r14d,ecx,13
+ rorx r13d,ecx,2
+ lea r9d,[rbx*1+r9]
+ vpsrld ymm6,ymm7,10
+ and edi,r15d
+ xor r14d,r12d
+ xor edi,edx
+ vpsrlq ymm7,ymm7,17
+ xor r14d,r13d
+ lea ebx,[rdi*1+rbx]
+ mov r12d,r10d
+ vpxor ymm6,ymm6,ymm7
+ add eax,DWORD[((44+128))+rsp]
+ and r12d,r9d
+ rorx r13d,r9d,25
+ vpsrlq ymm7,ymm7,2
+ rorx edi,r9d,11
+ lea ebx,[r14*1+rbx]
+ lea eax,[r12*1+rax]
+ vpxor ymm6,ymm6,ymm7
+ andn r12d,r9d,r11d
+ xor r13d,edi
+ rorx r14d,r9d,6
+ vpshufb ymm6,ymm6,ymm9
+ lea eax,[r12*1+rax]
+ xor r13d,r14d
+ mov edi,ebx
+ vpaddd ymm3,ymm3,ymm6
+ rorx r12d,ebx,22
+ lea eax,[r13*1+rax]
+ xor edi,ecx
+ vpaddd ymm6,ymm3,YMMWORD[96+rbp]
+ rorx r14d,ebx,13
+ rorx r13d,ebx,2
+ lea r8d,[rax*1+r8]
+ and r15d,edi
+ xor r14d,r12d
+ xor r15d,ecx
+ xor r14d,r13d
+ lea eax,[r15*1+rax]
+ mov r12d,r9d
+ vmovdqa YMMWORD[32+rsp],ymm6
+ lea rbp,[128+rbp]
+ cmp BYTE[3+rbp],0
+ jne NEAR $L$avx2_00_47
+ add r11d,DWORD[((0+64))+rsp]
+ and r12d,r8d
+ rorx r13d,r8d,25
+ rorx r15d,r8d,11
+ lea eax,[r14*1+rax]
+ lea r11d,[r12*1+r11]
+ andn r12d,r8d,r10d
+ xor r13d,r15d
+ rorx r14d,r8d,6
+ lea r11d,[r12*1+r11]
+ xor r13d,r14d
+ mov r15d,eax
+ rorx r12d,eax,22
+ lea r11d,[r13*1+r11]
+ xor r15d,ebx
+ rorx r14d,eax,13
+ rorx r13d,eax,2
+ lea edx,[r11*1+rdx]
+ and edi,r15d
+ xor r14d,r12d
+ xor edi,ebx
+ xor r14d,r13d
+ lea r11d,[rdi*1+r11]
+ mov r12d,r8d
+ add r10d,DWORD[((4+64))+rsp]
+ and r12d,edx
+ rorx r13d,edx,25
+ rorx edi,edx,11
+ lea r11d,[r14*1+r11]
+ lea r10d,[r12*1+r10]
+ andn r12d,edx,r9d
+ xor r13d,edi
+ rorx r14d,edx,6
+ lea r10d,[r12*1+r10]
+ xor r13d,r14d
+ mov edi,r11d
+ rorx r12d,r11d,22
+ lea r10d,[r13*1+r10]
+ xor edi,eax
+ rorx r14d,r11d,13
+ rorx r13d,r11d,2
+ lea ecx,[r10*1+rcx]
+ and r15d,edi
+ xor r14d,r12d
+ xor r15d,eax
+ xor r14d,r13d
+ lea r10d,[r15*1+r10]
+ mov r12d,edx
+ add r9d,DWORD[((8+64))+rsp]
+ and r12d,ecx
+ rorx r13d,ecx,25
+ rorx r15d,ecx,11
+ lea r10d,[r14*1+r10]
+ lea r9d,[r12*1+r9]
+ andn r12d,ecx,r8d
+ xor r13d,r15d
+ rorx r14d,ecx,6
+ lea r9d,[r12*1+r9]
+ xor r13d,r14d
+ mov r15d,r10d
+ rorx r12d,r10d,22
+ lea r9d,[r13*1+r9]
+ xor r15d,r11d
+ rorx r14d,r10d,13
+ rorx r13d,r10d,2
+ lea ebx,[r9*1+rbx]
+ and edi,r15d
+ xor r14d,r12d
+ xor edi,r11d
+ xor r14d,r13d
+ lea r9d,[rdi*1+r9]
+ mov r12d,ecx
+ add r8d,DWORD[((12+64))+rsp]
+ and r12d,ebx
+ rorx r13d,ebx,25
+ rorx edi,ebx,11
+ lea r9d,[r14*1+r9]
+ lea r8d,[r12*1+r8]
+ andn r12d,ebx,edx
+ xor r13d,edi
+ rorx r14d,ebx,6
+ lea r8d,[r12*1+r8]
+ xor r13d,r14d
+ mov edi,r9d
+ rorx r12d,r9d,22
+ lea r8d,[r13*1+r8]
+ xor edi,r10d
+ rorx r14d,r9d,13
+ rorx r13d,r9d,2
+ lea eax,[r8*1+rax]
+ and r15d,edi
+ xor r14d,r12d
+ xor r15d,r10d
+ xor r14d,r13d
+ lea r8d,[r15*1+r8]
+ mov r12d,ebx
+ add edx,DWORD[((32+64))+rsp]
+ and r12d,eax
+ rorx r13d,eax,25
+ rorx r15d,eax,11
+ lea r8d,[r14*1+r8]
+ lea edx,[r12*1+rdx]
+ andn r12d,eax,ecx
+ xor r13d,r15d
+ rorx r14d,eax,6
+ lea edx,[r12*1+rdx]
+ xor r13d,r14d
+ mov r15d,r8d
+ rorx r12d,r8d,22
+ lea edx,[r13*1+rdx]
+ xor r15d,r9d
+ rorx r14d,r8d,13
+ rorx r13d,r8d,2
+ lea r11d,[rdx*1+r11]
+ and edi,r15d
+ xor r14d,r12d
+ xor edi,r9d
+ xor r14d,r13d
+ lea edx,[rdi*1+rdx]
+ mov r12d,eax
+ add ecx,DWORD[((36+64))+rsp]
+ and r12d,r11d
+ rorx r13d,r11d,25
+ rorx edi,r11d,11
+ lea edx,[r14*1+rdx]
+ lea ecx,[r12*1+rcx]
+ andn r12d,r11d,ebx
+ xor r13d,edi
+ rorx r14d,r11d,6
+ lea ecx,[r12*1+rcx]
+ xor r13d,r14d
+ mov edi,edx
+ rorx r12d,edx,22
+ lea ecx,[r13*1+rcx]
+ xor edi,r8d
+ rorx r14d,edx,13
+ rorx r13d,edx,2
+ lea r10d,[rcx*1+r10]
+ and r15d,edi
+ xor r14d,r12d
+ xor r15d,r8d
+ xor r14d,r13d
+ lea ecx,[r15*1+rcx]
+ mov r12d,r11d
+ add ebx,DWORD[((40+64))+rsp]
+ and r12d,r10d
+ rorx r13d,r10d,25
+ rorx r15d,r10d,11
+ lea ecx,[r14*1+rcx]
+ lea ebx,[r12*1+rbx]
+ andn r12d,r10d,eax
+ xor r13d,r15d
+ rorx r14d,r10d,6
+ lea ebx,[r12*1+rbx]
+ xor r13d,r14d
+ mov r15d,ecx
+ rorx r12d,ecx,22
+ lea ebx,[r13*1+rbx]
+ xor r15d,edx
+ rorx r14d,ecx,13
+ rorx r13d,ecx,2
+ lea r9d,[rbx*1+r9]
+ and edi,r15d
+ xor r14d,r12d
+ xor edi,edx
+ xor r14d,r13d
+ lea ebx,[rdi*1+rbx]
+ mov r12d,r10d
+ add eax,DWORD[((44+64))+rsp]
+ and r12d,r9d
+ rorx r13d,r9d,25
+ rorx edi,r9d,11
+ lea ebx,[r14*1+rbx]
+ lea eax,[r12*1+rax]
+ andn r12d,r9d,r11d
+ xor r13d,edi
+ rorx r14d,r9d,6
+ lea eax,[r12*1+rax]
+ xor r13d,r14d
+ mov edi,ebx
+ rorx r12d,ebx,22
+ lea eax,[r13*1+rax]
+ xor edi,ecx
+ rorx r14d,ebx,13
+ rorx r13d,ebx,2
+ lea r8d,[rax*1+r8]
+ and r15d,edi
+ xor r14d,r12d
+ xor r15d,ecx
+ xor r14d,r13d
+ lea eax,[r15*1+rax]
+ mov r12d,r9d
+ add r11d,DWORD[rsp]
+ and r12d,r8d
+ rorx r13d,r8d,25
+ rorx r15d,r8d,11
+ lea eax,[r14*1+rax]
+ lea r11d,[r12*1+r11]
+ andn r12d,r8d,r10d
+ xor r13d,r15d
+ rorx r14d,r8d,6
+ lea r11d,[r12*1+r11]
+ xor r13d,r14d
+ mov r15d,eax
+ rorx r12d,eax,22
+ lea r11d,[r13*1+r11]
+ xor r15d,ebx
+ rorx r14d,eax,13
+ rorx r13d,eax,2
+ lea edx,[r11*1+rdx]
+ and edi,r15d
+ xor r14d,r12d
+ xor edi,ebx
+ xor r14d,r13d
+ lea r11d,[rdi*1+r11]
+ mov r12d,r8d
+ add r10d,DWORD[4+rsp]
+ and r12d,edx
+ rorx r13d,edx,25
+ rorx edi,edx,11
+ lea r11d,[r14*1+r11]
+ lea r10d,[r12*1+r10]
+ andn r12d,edx,r9d
+ xor r13d,edi
+ rorx r14d,edx,6
+ lea r10d,[r12*1+r10]
+ xor r13d,r14d
+ mov edi,r11d
+ rorx r12d,r11d,22
+ lea r10d,[r13*1+r10]
+ xor edi,eax
+ rorx r14d,r11d,13
+ rorx r13d,r11d,2
+ lea ecx,[r10*1+rcx]
+ and r15d,edi
+ xor r14d,r12d
+ xor r15d,eax
+ xor r14d,r13d
+ lea r10d,[r15*1+r10]
+ mov r12d,edx
+ add r9d,DWORD[8+rsp]
+ and r12d,ecx
+ rorx r13d,ecx,25
+ rorx r15d,ecx,11
+ lea r10d,[r14*1+r10]
+ lea r9d,[r12*1+r9]
+ andn r12d,ecx,r8d
+ xor r13d,r15d
+ rorx r14d,ecx,6
+ lea r9d,[r12*1+r9]
+ xor r13d,r14d
+ mov r15d,r10d
+ rorx r12d,r10d,22
+ lea r9d,[r13*1+r9]
+ xor r15d,r11d
+ rorx r14d,r10d,13
+ rorx r13d,r10d,2
+ lea ebx,[r9*1+rbx]
+ and edi,r15d
+ xor r14d,r12d
+ xor edi,r11d
+ xor r14d,r13d
+ lea r9d,[rdi*1+r9]
+ mov r12d,ecx
+ add r8d,DWORD[12+rsp]
+ and r12d,ebx
+ rorx r13d,ebx,25
+ rorx edi,ebx,11
+ lea r9d,[r14*1+r9]
+ lea r8d,[r12*1+r8]
+ andn r12d,ebx,edx
+ xor r13d,edi
+ rorx r14d,ebx,6
+ lea r8d,[r12*1+r8]
+ xor r13d,r14d
+ mov edi,r9d
+ rorx r12d,r9d,22
+ lea r8d,[r13*1+r8]
+ xor edi,r10d
+ rorx r14d,r9d,13
+ rorx r13d,r9d,2
+ lea eax,[r8*1+rax]
+ and r15d,edi
+ xor r14d,r12d
+ xor r15d,r10d
+ xor r14d,r13d
+ lea r8d,[r15*1+r8]
+ mov r12d,ebx
+ add edx,DWORD[32+rsp]
+ and r12d,eax
+ rorx r13d,eax,25
+ rorx r15d,eax,11
+ lea r8d,[r14*1+r8]
+ lea edx,[r12*1+rdx]
+ andn r12d,eax,ecx
+ xor r13d,r15d
+ rorx r14d,eax,6
+ lea edx,[r12*1+rdx]
+ xor r13d,r14d
+ mov r15d,r8d
+ rorx r12d,r8d,22
+ lea edx,[r13*1+rdx]
+ xor r15d,r9d
+ rorx r14d,r8d,13
+ rorx r13d,r8d,2
+ lea r11d,[rdx*1+r11]
+ and edi,r15d
+ xor r14d,r12d
+ xor edi,r9d
+ xor r14d,r13d
+ lea edx,[rdi*1+rdx]
+ mov r12d,eax
+ add ecx,DWORD[36+rsp]
+ and r12d,r11d
+ rorx r13d,r11d,25
+ rorx edi,r11d,11
+ lea edx,[r14*1+rdx]
+ lea ecx,[r12*1+rcx]
+ andn r12d,r11d,ebx
+ xor r13d,edi
+ rorx r14d,r11d,6
+ lea ecx,[r12*1+rcx]
+ xor r13d,r14d
+ mov edi,edx
+ rorx r12d,edx,22
+ lea ecx,[r13*1+rcx]
+ xor edi,r8d
+ rorx r14d,edx,13
+ rorx r13d,edx,2
+ lea r10d,[rcx*1+r10]
+ and r15d,edi
+ xor r14d,r12d
+ xor r15d,r8d
+ xor r14d,r13d
+ lea ecx,[r15*1+rcx]
+ mov r12d,r11d
+ add ebx,DWORD[40+rsp]
+ and r12d,r10d
+ rorx r13d,r10d,25
+ rorx r15d,r10d,11
+ lea ecx,[r14*1+rcx]
+ lea ebx,[r12*1+rbx]
+ andn r12d,r10d,eax
+ xor r13d,r15d
+ rorx r14d,r10d,6
+ lea ebx,[r12*1+rbx]
+ xor r13d,r14d
+ mov r15d,ecx
+ rorx r12d,ecx,22
+ lea ebx,[r13*1+rbx]
+ xor r15d,edx
+ rorx r14d,ecx,13
+ rorx r13d,ecx,2
+ lea r9d,[rbx*1+r9]
+ and edi,r15d
+ xor r14d,r12d
+ xor edi,edx
+ xor r14d,r13d
+ lea ebx,[rdi*1+rbx]
+ mov r12d,r10d
+ add eax,DWORD[44+rsp]
+ and r12d,r9d
+ rorx r13d,r9d,25
+ rorx edi,r9d,11
+ lea ebx,[r14*1+rbx]
+ lea eax,[r12*1+rax]
+ andn r12d,r9d,r11d
+ xor r13d,edi
+ rorx r14d,r9d,6
+ lea eax,[r12*1+rax]
+ xor r13d,r14d
+ mov edi,ebx
+ rorx r12d,ebx,22
+ lea eax,[r13*1+rax]
+ xor edi,ecx
+ rorx r14d,ebx,13
+ rorx r13d,ebx,2
+ lea r8d,[rax*1+r8]
+ and r15d,edi
+ xor r14d,r12d
+ xor r15d,ecx
+ xor r14d,r13d
+ lea eax,[r15*1+rax]
+ mov r12d,r9d
+ mov rdi,QWORD[512+rsp]
+ add eax,r14d
+
+ lea rbp,[448+rsp]
+
+ add eax,DWORD[rdi]
+ add ebx,DWORD[4+rdi]
+ add ecx,DWORD[8+rdi]
+ add edx,DWORD[12+rdi]
+ add r8d,DWORD[16+rdi]
+ add r9d,DWORD[20+rdi]
+ add r10d,DWORD[24+rdi]
+ add r11d,DWORD[28+rdi]
+
+ mov DWORD[rdi],eax
+ mov DWORD[4+rdi],ebx
+ mov DWORD[8+rdi],ecx
+ mov DWORD[12+rdi],edx
+ mov DWORD[16+rdi],r8d
+ mov DWORD[20+rdi],r9d
+ mov DWORD[24+rdi],r10d
+ mov DWORD[28+rdi],r11d
+
+ cmp rsi,QWORD[80+rbp]
+ je NEAR $L$done_avx2
+
+ xor r14d,r14d
+ mov edi,ebx
+ xor edi,ecx
+ mov r12d,r9d
+ jmp NEAR $L$ower_avx2
+ALIGN 16
+$L$ower_avx2:
+ add r11d,DWORD[((0+16))+rbp]
+ and r12d,r8d
+ rorx r13d,r8d,25
+ rorx r15d,r8d,11
+ lea eax,[r14*1+rax]
+ lea r11d,[r12*1+r11]
+ andn r12d,r8d,r10d
+ xor r13d,r15d
+ rorx r14d,r8d,6
+ lea r11d,[r12*1+r11]
+ xor r13d,r14d
+ mov r15d,eax
+ rorx r12d,eax,22
+ lea r11d,[r13*1+r11]
+ xor r15d,ebx
+ rorx r14d,eax,13
+ rorx r13d,eax,2
+ lea edx,[r11*1+rdx]
+ and edi,r15d
+ xor r14d,r12d
+ xor edi,ebx
+ xor r14d,r13d
+ lea r11d,[rdi*1+r11]
+ mov r12d,r8d
+ add r10d,DWORD[((4+16))+rbp]
+ and r12d,edx
+ rorx r13d,edx,25
+ rorx edi,edx,11
+ lea r11d,[r14*1+r11]
+ lea r10d,[r12*1+r10]
+ andn r12d,edx,r9d
+ xor r13d,edi
+ rorx r14d,edx,6
+ lea r10d,[r12*1+r10]
+ xor r13d,r14d
+ mov edi,r11d
+ rorx r12d,r11d,22
+ lea r10d,[r13*1+r10]
+ xor edi,eax
+ rorx r14d,r11d,13
+ rorx r13d,r11d,2
+ lea ecx,[r10*1+rcx]
+ and r15d,edi
+ xor r14d,r12d
+ xor r15d,eax
+ xor r14d,r13d
+ lea r10d,[r15*1+r10]
+ mov r12d,edx
+ add r9d,DWORD[((8+16))+rbp]
+ and r12d,ecx
+ rorx r13d,ecx,25
+ rorx r15d,ecx,11
+ lea r10d,[r14*1+r10]
+ lea r9d,[r12*1+r9]
+ andn r12d,ecx,r8d
+ xor r13d,r15d
+ rorx r14d,ecx,6
+ lea r9d,[r12*1+r9]
+ xor r13d,r14d
+ mov r15d,r10d
+ rorx r12d,r10d,22
+ lea r9d,[r13*1+r9]
+ xor r15d,r11d
+ rorx r14d,r10d,13
+ rorx r13d,r10d,2
+ lea ebx,[r9*1+rbx]
+ and edi,r15d
+ xor r14d,r12d
+ xor edi,r11d
+ xor r14d,r13d
+ lea r9d,[rdi*1+r9]
+ mov r12d,ecx
+ add r8d,DWORD[((12+16))+rbp]
+ and r12d,ebx
+ rorx r13d,ebx,25
+ rorx edi,ebx,11
+ lea r9d,[r14*1+r9]
+ lea r8d,[r12*1+r8]
+ andn r12d,ebx,edx
+ xor r13d,edi
+ rorx r14d,ebx,6
+ lea r8d,[r12*1+r8]
+ xor r13d,r14d
+ mov edi,r9d
+ rorx r12d,r9d,22
+ lea r8d,[r13*1+r8]
+ xor edi,r10d
+ rorx r14d,r9d,13
+ rorx r13d,r9d,2
+ lea eax,[r8*1+rax]
+ and r15d,edi
+ xor r14d,r12d
+ xor r15d,r10d
+ xor r14d,r13d
+ lea r8d,[r15*1+r8]
+ mov r12d,ebx
+ add edx,DWORD[((32+16))+rbp]
+ and r12d,eax
+ rorx r13d,eax,25
+ rorx r15d,eax,11
+ lea r8d,[r14*1+r8]
+ lea edx,[r12*1+rdx]
+ andn r12d,eax,ecx
+ xor r13d,r15d
+ rorx r14d,eax,6
+ lea edx,[r12*1+rdx]
+ xor r13d,r14d
+ mov r15d,r8d
+ rorx r12d,r8d,22
+ lea edx,[r13*1+rdx]
+ xor r15d,r9d
+ rorx r14d,r8d,13
+ rorx r13d,r8d,2
+ lea r11d,[rdx*1+r11]
+ and edi,r15d
+ xor r14d,r12d
+ xor edi,r9d
+ xor r14d,r13d
+ lea edx,[rdi*1+rdx]
+ mov r12d,eax
+ add ecx,DWORD[((36+16))+rbp]
+ and r12d,r11d
+ rorx r13d,r11d,25
+ rorx edi,r11d,11
+ lea edx,[r14*1+rdx]
+ lea ecx,[r12*1+rcx]
+ andn r12d,r11d,ebx
+ xor r13d,edi
+ rorx r14d,r11d,6
+ lea ecx,[r12*1+rcx]
+ xor r13d,r14d
+ mov edi,edx
+ rorx r12d,edx,22
+ lea ecx,[r13*1+rcx]
+ xor edi,r8d
+ rorx r14d,edx,13
+ rorx r13d,edx,2
+ lea r10d,[rcx*1+r10]
+ and r15d,edi
+ xor r14d,r12d
+ xor r15d,r8d
+ xor r14d,r13d
+ lea ecx,[r15*1+rcx]
+ mov r12d,r11d
+ add ebx,DWORD[((40+16))+rbp]
+ and r12d,r10d
+ rorx r13d,r10d,25
+ rorx r15d,r10d,11
+ lea ecx,[r14*1+rcx]
+ lea ebx,[r12*1+rbx]
+ andn r12d,r10d,eax
+ xor r13d,r15d
+ rorx r14d,r10d,6
+ lea ebx,[r12*1+rbx]
+ xor r13d,r14d
+ mov r15d,ecx
+ rorx r12d,ecx,22
+ lea ebx,[r13*1+rbx]
+ xor r15d,edx
+ rorx r14d,ecx,13
+ rorx r13d,ecx,2
+ lea r9d,[rbx*1+r9]
+ and edi,r15d
+ xor r14d,r12d
+ xor edi,edx
+ xor r14d,r13d
+ lea ebx,[rdi*1+rbx]
+ mov r12d,r10d
+ add eax,DWORD[((44+16))+rbp]
+ and r12d,r9d
+ rorx r13d,r9d,25
+ rorx edi,r9d,11
+ lea ebx,[r14*1+rbx]
+ lea eax,[r12*1+rax]
+ andn r12d,r9d,r11d
+ xor r13d,edi
+ rorx r14d,r9d,6
+ lea eax,[r12*1+rax]
+ xor r13d,r14d
+ mov edi,ebx
+ rorx r12d,ebx,22
+ lea eax,[r13*1+rax]
+ xor edi,ecx
+ rorx r14d,ebx,13
+ rorx r13d,ebx,2
+ lea r8d,[rax*1+r8]
+ and r15d,edi
+ xor r14d,r12d
+ xor r15d,ecx
+ xor r14d,r13d
+ lea eax,[r15*1+rax]
+ mov r12d,r9d
+ lea rbp,[((-64))+rbp]
+ cmp rbp,rsp
+ jae NEAR $L$ower_avx2
+
+ mov rdi,QWORD[512+rsp]
+ add eax,r14d
+
+ lea rsp,[448+rsp]
+
+ add eax,DWORD[rdi]
+ add ebx,DWORD[4+rdi]
+ add ecx,DWORD[8+rdi]
+ add edx,DWORD[12+rdi]
+ add r8d,DWORD[16+rdi]
+ add r9d,DWORD[20+rdi]
+ lea rsi,[128+rsi]
+ add r10d,DWORD[24+rdi]
+ mov r12,rsi
+ add r11d,DWORD[28+rdi]
+ cmp rsi,QWORD[((64+16))+rsp]
+
+ mov DWORD[rdi],eax
+ cmove r12,rsp
+ mov DWORD[4+rdi],ebx
+ mov DWORD[8+rdi],ecx
+ mov DWORD[12+rdi],edx
+ mov DWORD[16+rdi],r8d
+ mov DWORD[20+rdi],r9d
+ mov DWORD[24+rdi],r10d
+ mov DWORD[28+rdi],r11d
+
+ jbe NEAR $L$oop_avx2
+ lea rbp,[rsp]
+
+$L$done_avx2:
+ lea rsp,[rbp]
+ mov rsi,QWORD[88+rsp]
+
+ vzeroupper
+ movaps xmm6,XMMWORD[((64+32))+rsp]
+ movaps xmm7,XMMWORD[((64+48))+rsp]
+ movaps xmm8,XMMWORD[((64+64))+rsp]
+ movaps xmm9,XMMWORD[((64+80))+rsp]
+ mov r15,QWORD[((-48))+rsi]
+
+ mov r14,QWORD[((-40))+rsi]
+
+ mov r13,QWORD[((-32))+rsi]
+
+ mov r12,QWORD[((-24))+rsi]
+
+ mov rbp,QWORD[((-16))+rsi]
+
+ mov rbx,QWORD[((-8))+rsi]
+
+ lea rsp,[rsi]
+
+$L$epilogue_avx2:
+ mov rdi,QWORD[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD[16+rsp]
+ DB 0F3h,0C3h ;repret
+
+$L$SEH_end_sha256_block_data_order_avx2:
+EXTERN __imp_RtlVirtualUnwind
+
+ALIGN 16
+se_handler:
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD[120+r8]
+ mov rbx,QWORD[248+r8]
+
+ mov rsi,QWORD[8+r9]
+ mov r11,QWORD[56+r9]
+
+ mov r10d,DWORD[r11]
+ lea r10,[r10*1+rsi]
+ cmp rbx,r10
+ jb NEAR $L$in_prologue
+
+ mov rax,QWORD[152+r8]
+
+ mov r10d,DWORD[4+r11]
+ lea r10,[r10*1+rsi]
+ cmp rbx,r10
+ jae NEAR $L$in_prologue
+ lea r10,[$L$avx2_shortcut]
+ cmp rbx,r10
+ jb NEAR $L$not_in_avx2
+
+ and rax,-256*4
+ add rax,448
+$L$not_in_avx2:
+ mov rsi,rax
+ mov rax,QWORD[((64+24))+rax]
+
+ mov rbx,QWORD[((-8))+rax]
+ mov rbp,QWORD[((-16))+rax]
+ mov r12,QWORD[((-24))+rax]
+ mov r13,QWORD[((-32))+rax]
+ mov r14,QWORD[((-40))+rax]
+ mov r15,QWORD[((-48))+rax]
+ mov QWORD[144+r8],rbx
+ mov QWORD[160+r8],rbp
+ mov QWORD[216+r8],r12
+ mov QWORD[224+r8],r13
+ mov QWORD[232+r8],r14
+ mov QWORD[240+r8],r15
+
+ lea r10,[$L$epilogue]
+ cmp rbx,r10
+ jb NEAR $L$in_prologue
+
+ lea rsi,[((64+32))+rsi]
+ lea rdi,[512+r8]
+ mov ecx,8
+ DD 0xa548f3fc
+
+$L$in_prologue:
+ mov rdi,QWORD[8+rax]
+ mov rsi,QWORD[16+rax]
+ mov QWORD[152+r8],rax
+ mov QWORD[168+r8],rsi
+ mov QWORD[176+r8],rdi
+
+ mov rdi,QWORD[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0xa548f3fc
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD[8+rsi]
+ mov r8,QWORD[rsi]
+ mov r9,QWORD[16+rsi]
+ mov r10,QWORD[40+rsi]
+ lea r11,[56+rsi]
+ lea r12,[24+rsi]
+ mov QWORD[32+rsp],r10
+ mov QWORD[40+rsp],r11
+ mov QWORD[48+rsp],r12
+ mov QWORD[56+rsp],rcx
+ call QWORD[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+
+
+ALIGN 16
+shaext_handler:
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD[120+r8]
+ mov rbx,QWORD[248+r8]
+
+ lea r10,[$L$prologue_shaext]
+ cmp rbx,r10
+ jb NEAR $L$in_prologue
+
+ lea r10,[$L$epilogue_shaext]
+ cmp rbx,r10
+ jae NEAR $L$in_prologue
+
+ lea rsi,[((-8-80))+rax]
+ lea rdi,[512+r8]
+ mov ecx,10
+ DD 0xa548f3fc
+
+ jmp NEAR $L$in_prologue
+
+section .pdata rdata align=4
+ALIGN 4
+ DD $L$SEH_begin_sha256_block_data_order wrt ..imagebase
+ DD $L$SEH_end_sha256_block_data_order wrt ..imagebase
+ DD $L$SEH_info_sha256_block_data_order wrt ..imagebase
+ DD $L$SEH_begin_sha256_block_data_order_shaext wrt ..imagebase
+ DD $L$SEH_end_sha256_block_data_order_shaext wrt ..imagebase
+ DD $L$SEH_info_sha256_block_data_order_shaext wrt ..imagebase
+ DD $L$SEH_begin_sha256_block_data_order_ssse3 wrt ..imagebase
+ DD $L$SEH_end_sha256_block_data_order_ssse3 wrt ..imagebase
+ DD $L$SEH_info_sha256_block_data_order_ssse3 wrt ..imagebase
+ DD $L$SEH_begin_sha256_block_data_order_avx wrt ..imagebase
+ DD $L$SEH_end_sha256_block_data_order_avx wrt ..imagebase
+ DD $L$SEH_info_sha256_block_data_order_avx wrt ..imagebase
+ DD $L$SEH_begin_sha256_block_data_order_avx2 wrt ..imagebase
+ DD $L$SEH_end_sha256_block_data_order_avx2 wrt ..imagebase
+ DD $L$SEH_info_sha256_block_data_order_avx2 wrt ..imagebase
+section .xdata rdata align=8
+ALIGN 8
+$L$SEH_info_sha256_block_data_order:
+DB 9,0,0,0
+ DD se_handler wrt ..imagebase
+ DD $L$prologue wrt ..imagebase,$L$epilogue wrt ..imagebase
+$L$SEH_info_sha256_block_data_order_shaext:
+DB 9,0,0,0
+ DD shaext_handler wrt ..imagebase
+$L$SEH_info_sha256_block_data_order_ssse3:
+DB 9,0,0,0
+ DD se_handler wrt ..imagebase
+ DD $L$prologue_ssse3 wrt ..imagebase,$L$epilogue_ssse3 wrt ..imagebase
+$L$SEH_info_sha256_block_data_order_avx:
+DB 9,0,0,0
+ DD se_handler wrt ..imagebase
+ DD $L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase
+$L$SEH_info_sha256_block_data_order_avx2:
+DB 9,0,0,0
+ DD se_handler wrt ..imagebase
+ DD $L$prologue_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase