default rel %define XMMWORD %define YMMWORD %define ZMMWORD section .text code align=64 EXTERN OPENSSL_ia32cap_P global aesni_cbc_sha256_enc ALIGN 16 aesni_cbc_sha256_enc: lea r11,[OPENSSL_ia32cap_P] mov eax,1 cmp rcx,0 je NEAR $L$probe mov eax,DWORD[r11] mov r10,QWORD[4+r11] bt r10,61 jc NEAR aesni_cbc_sha256_enc_shaext mov r11,r10 shr r11,32 test r10d,2048 jnz NEAR aesni_cbc_sha256_enc_xop and r11d,296 cmp r11d,296 je NEAR aesni_cbc_sha256_enc_avx2 and r10d,268435456 jnz NEAR aesni_cbc_sha256_enc_avx ud2 xor eax,eax cmp rcx,0 je NEAR $L$probe ud2 $L$probe: DB 0F3h,0C3h ;repret ALIGN 64 K256: DD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 DD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 DD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 DD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 DD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 DD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 DD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 DD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 DD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc DD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc DD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da DD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da DD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 DD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 DD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 DD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 DD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 DD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 DD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 DD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 DD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 DD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 DD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 DD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 DD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 DD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 DD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 DD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 DD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 DD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 DD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 DD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f DD 0,0,0,0,0,0,0,0,-1,-1,-1,-1 DD 0,0,0,0,0,0,0,0 DB 65,69,83,78,73,45,67,66,67,43,83,72,65,50,53,54 DB 32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95 DB 54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98 DB 121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108 DB 46,111,114,103,62,0 ALIGN 64 ALIGN 64 aesni_cbc_sha256_enc_xop: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp $L$SEH_begin_aesni_cbc_sha256_enc_xop: mov rdi,rcx mov rsi,rdx mov rdx,r8 mov rcx,r9 mov r8,QWORD[40+rsp] mov r9,QWORD[48+rsp] $L$xop_shortcut: mov r10,QWORD[56+rsp] mov rax,rsp push rbx push rbp push r12 push r13 push r14 push r15 sub rsp,288 and rsp,-64 shl rdx,6 sub rsi,rdi sub r10,rdi add rdx,rdi mov QWORD[((64+8))+rsp],rsi mov QWORD[((64+16))+rsp],rdx mov QWORD[((64+32))+rsp],r8 mov QWORD[((64+40))+rsp],r9 mov QWORD[((64+48))+rsp],r10 mov QWORD[120+rsp],rax movaps XMMWORD[128+rsp],xmm6 movaps XMMWORD[144+rsp],xmm7 movaps XMMWORD[160+rsp],xmm8 movaps XMMWORD[176+rsp],xmm9 movaps XMMWORD[192+rsp],xmm10 movaps XMMWORD[208+rsp],xmm11 movaps XMMWORD[224+rsp],xmm12 movaps XMMWORD[240+rsp],xmm13 movaps XMMWORD[256+rsp],xmm14 movaps XMMWORD[272+rsp],xmm15 $L$prologue_xop: vzeroall mov r12,rdi lea rdi,[128+rcx] lea r13,[((K256+544))] mov r14d,DWORD[((240-128))+rdi] mov r15,r9 mov rsi,r10 vmovdqu xmm8,XMMWORD[r8] sub r14,9 mov eax,DWORD[r15] mov ebx,DWORD[4+r15] mov ecx,DWORD[8+r15] mov edx,DWORD[12+r15] mov r8d,DWORD[16+r15] mov r9d,DWORD[20+r15] mov r10d,DWORD[24+r15] mov r11d,DWORD[28+r15] vmovdqa xmm14,XMMWORD[r14*8+r13] vmovdqa xmm13,XMMWORD[16+r14*8+r13] vmovdqa xmm12,XMMWORD[32+r14*8+r13] vmovdqu xmm10,XMMWORD[((0-128))+rdi] jmp NEAR $L$loop_xop ALIGN 16 $L$loop_xop: vmovdqa xmm7,XMMWORD[((K256+512))] vmovdqu xmm0,XMMWORD[r12*1+rsi] vmovdqu xmm1,XMMWORD[16+r12*1+rsi] vmovdqu xmm2,XMMWORD[32+r12*1+rsi] vmovdqu xmm3,XMMWORD[48+r12*1+rsi] vpshufb xmm0,xmm0,xmm7 lea rbp,[K256] vpshufb xmm1,xmm1,xmm7 vpshufb xmm2,xmm2,xmm7 vpaddd xmm4,xmm0,XMMWORD[rbp] vpshufb xmm3,xmm3,xmm7 vpaddd xmm5,xmm1,XMMWORD[32+rbp] vpaddd xmm6,xmm2,XMMWORD[64+rbp] vpaddd xmm7,xmm3,XMMWORD[96+rbp] vmovdqa XMMWORD[rsp],xmm4 mov r14d,eax vmovdqa XMMWORD[16+rsp],xmm5 mov esi,ebx vmovdqa XMMWORD[32+rsp],xmm6 xor esi,ecx vmovdqa XMMWORD[48+rsp],xmm7 mov r13d,r8d jmp NEAR $L$xop_00_47 ALIGN 16 $L$xop_00_47: sub rbp,-16*2*4 vmovdqu xmm9,XMMWORD[r12] mov QWORD[((64+0))+rsp],r12 vpalignr xmm4,xmm1,xmm0,4 ror r13d,14 mov eax,r14d vpalignr xmm7,xmm3,xmm2,4 mov r12d,r9d xor r13d,r8d DB 143,232,120,194,236,14 ror r14d,9 xor r12d,r10d vpsrld xmm4,xmm4,3 ror r13d,5 xor r14d,eax vpaddd xmm0,xmm0,xmm7 and r12d,r8d vpxor xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((16-128))+rdi] xor r13d,r8d add r11d,DWORD[rsp] mov r15d,eax DB 143,232,120,194,245,11 ror r14d,11 xor r12d,r10d vpxor xmm4,xmm4,xmm5 xor r15d,ebx ror r13d,6 add r11d,r12d and esi,r15d DB 143,232,120,194,251,13 xor r14d,eax add r11d,r13d vpxor xmm4,xmm4,xmm6 xor esi,ebx add edx,r11d vpsrld xmm6,xmm3,10 ror r14d,2 add r11d,esi vpaddd xmm0,xmm0,xmm4 mov r13d,edx add r14d,r11d DB 143,232,120,194,239,2 ror r13d,14 mov r11d,r14d vpxor xmm7,xmm7,xmm6 mov r12d,r8d xor r13d,edx ror r14d,9 xor r12d,r9d vpxor xmm7,xmm7,xmm5 ror r13d,5 xor r14d,r11d and r12d,edx vpxor xmm9,xmm9,xmm8 xor r13d,edx vpsrldq xmm7,xmm7,8 add r10d,DWORD[4+rsp] mov esi,r11d ror r14d,11 xor r12d,r9d vpaddd xmm0,xmm0,xmm7 xor esi,eax ror r13d,6 add r10d,r12d and r15d,esi DB 143,232,120,194,248,13 xor r14d,r11d add r10d,r13d vpsrld xmm6,xmm0,10 xor r15d,eax add ecx,r10d DB 143,232,120,194,239,2 ror r14d,2 add r10d,r15d vpxor xmm7,xmm7,xmm6 mov r13d,ecx add r14d,r10d ror r13d,14 mov r10d,r14d vpxor xmm7,xmm7,xmm5 mov r12d,edx xor r13d,ecx ror r14d,9 xor r12d,r8d vpslldq xmm7,xmm7,8 ror r13d,5 xor r14d,r10d and r12d,ecx vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((32-128))+rdi] xor r13d,ecx vpaddd xmm0,xmm0,xmm7 add r9d,DWORD[8+rsp] mov r15d,r10d ror r14d,11 xor r12d,r8d vpaddd xmm6,xmm0,XMMWORD[rbp] xor r15d,r11d ror r13d,6 add r9d,r12d and esi,r15d xor r14d,r10d add r9d,r13d xor esi,r11d add ebx,r9d ror r14d,2 add r9d,esi mov r13d,ebx add r14d,r9d ror r13d,14 mov r9d,r14d mov r12d,ecx xor r13d,ebx ror r14d,9 xor r12d,edx ror r13d,5 xor r14d,r9d and r12d,ebx vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((48-128))+rdi] xor r13d,ebx add r8d,DWORD[12+rsp] mov esi,r9d ror r14d,11 xor r12d,edx xor esi,r10d ror r13d,6 add r8d,r12d and r15d,esi xor r14d,r9d add r8d,r13d xor r15d,r10d add eax,r8d ror r14d,2 add r8d,r15d mov r13d,eax add r14d,r8d vmovdqa XMMWORD[rsp],xmm6 vpalignr xmm4,xmm2,xmm1,4 ror r13d,14 mov r8d,r14d vpalignr xmm7,xmm0,xmm3,4 mov r12d,ebx xor r13d,eax DB 143,232,120,194,236,14 ror r14d,9 xor r12d,ecx vpsrld xmm4,xmm4,3 ror r13d,5 xor r14d,r8d vpaddd xmm1,xmm1,xmm7 and r12d,eax vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((64-128))+rdi] xor r13d,eax add edx,DWORD[16+rsp] mov r15d,r8d DB 143,232,120,194,245,11 ror r14d,11 xor r12d,ecx vpxor xmm4,xmm4,xmm5 xor r15d,r9d ror r13d,6 add edx,r12d and esi,r15d DB 143,232,120,194,248,13 xor r14d,r8d add edx,r13d vpxor xmm4,xmm4,xmm6 xor esi,r9d add r11d,edx vpsrld xmm6,xmm0,10 ror r14d,2 add edx,esi vpaddd xmm1,xmm1,xmm4 mov r13d,r11d add r14d,edx DB 143,232,120,194,239,2 ror r13d,14 mov edx,r14d vpxor xmm7,xmm7,xmm6 mov r12d,eax xor r13d,r11d ror r14d,9 xor r12d,ebx vpxor xmm7,xmm7,xmm5 ror r13d,5 xor r14d,edx and r12d,r11d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((80-128))+rdi] xor r13d,r11d vpsrldq xmm7,xmm7,8 add ecx,DWORD[20+rsp] mov esi,edx ror r14d,11 xor r12d,ebx vpaddd xmm1,xmm1,xmm7 xor esi,r8d ror r13d,6 add ecx,r12d and r15d,esi DB 143,232,120,194,249,13 xor r14d,edx add ecx,r13d vpsrld xmm6,xmm1,10 xor r15d,r8d add r10d,ecx DB 143,232,120,194,239,2 ror r14d,2 add ecx,r15d vpxor xmm7,xmm7,xmm6 mov r13d,r10d add r14d,ecx ror r13d,14 mov ecx,r14d vpxor xmm7,xmm7,xmm5 mov r12d,r11d xor r13d,r10d ror r14d,9 xor r12d,eax vpslldq xmm7,xmm7,8 ror r13d,5 xor r14d,ecx and r12d,r10d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((96-128))+rdi] xor r13d,r10d vpaddd xmm1,xmm1,xmm7 add ebx,DWORD[24+rsp] mov r15d,ecx ror r14d,11 xor r12d,eax vpaddd xmm6,xmm1,XMMWORD[32+rbp] xor r15d,edx ror r13d,6 add ebx,r12d and esi,r15d xor r14d,ecx add ebx,r13d xor esi,edx add r9d,ebx ror r14d,2 add ebx,esi mov r13d,r9d add r14d,ebx ror r13d,14 mov ebx,r14d mov r12d,r10d xor r13d,r9d ror r14d,9 xor r12d,r11d ror r13d,5 xor r14d,ebx and r12d,r9d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((112-128))+rdi] xor r13d,r9d add eax,DWORD[28+rsp] mov esi,ebx ror r14d,11 xor r12d,r11d xor esi,ecx ror r13d,6 add eax,r12d and r15d,esi xor r14d,ebx add eax,r13d xor r15d,ecx add r8d,eax ror r14d,2 add eax,r15d mov r13d,r8d add r14d,eax vmovdqa XMMWORD[16+rsp],xmm6 vpalignr xmm4,xmm3,xmm2,4 ror r13d,14 mov eax,r14d vpalignr xmm7,xmm1,xmm0,4 mov r12d,r9d xor r13d,r8d DB 143,232,120,194,236,14 ror r14d,9 xor r12d,r10d vpsrld xmm4,xmm4,3 ror r13d,5 xor r14d,eax vpaddd xmm2,xmm2,xmm7 and r12d,r8d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((128-128))+rdi] xor r13d,r8d add r11d,DWORD[32+rsp] mov r15d,eax DB 143,232,120,194,245,11 ror r14d,11 xor r12d,r10d vpxor xmm4,xmm4,xmm5 xor r15d,ebx ror r13d,6 add r11d,r12d and esi,r15d DB 143,232,120,194,249,13 xor r14d,eax add r11d,r13d vpxor xmm4,xmm4,xmm6 xor esi,ebx add edx,r11d vpsrld xmm6,xmm1,10 ror r14d,2 add r11d,esi vpaddd xmm2,xmm2,xmm4 mov r13d,edx add r14d,r11d DB 143,232,120,194,239,2 ror r13d,14 mov r11d,r14d vpxor xmm7,xmm7,xmm6 mov r12d,r8d xor r13d,edx ror r14d,9 xor r12d,r9d vpxor xmm7,xmm7,xmm5 ror r13d,5 xor r14d,r11d and r12d,edx vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((144-128))+rdi] xor r13d,edx vpsrldq xmm7,xmm7,8 add r10d,DWORD[36+rsp] mov esi,r11d ror r14d,11 xor r12d,r9d vpaddd xmm2,xmm2,xmm7 xor esi,eax ror r13d,6 add r10d,r12d and r15d,esi DB 143,232,120,194,250,13 xor r14d,r11d add r10d,r13d vpsrld xmm6,xmm2,10 xor r15d,eax add ecx,r10d DB 143,232,120,194,239,2 ror r14d,2 add r10d,r15d vpxor xmm7,xmm7,xmm6 mov r13d,ecx add r14d,r10d ror r13d,14 mov r10d,r14d vpxor xmm7,xmm7,xmm5 mov r12d,edx xor r13d,ecx ror r14d,9 xor r12d,r8d vpslldq xmm7,xmm7,8 ror r13d,5 xor r14d,r10d and r12d,ecx vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((160-128))+rdi] xor r13d,ecx vpaddd xmm2,xmm2,xmm7 add r9d,DWORD[40+rsp] mov r15d,r10d ror r14d,11 xor r12d,r8d vpaddd xmm6,xmm2,XMMWORD[64+rbp] xor r15d,r11d ror r13d,6 add r9d,r12d and esi,r15d xor r14d,r10d add r9d,r13d xor esi,r11d add ebx,r9d ror r14d,2 add r9d,esi mov r13d,ebx add r14d,r9d ror r13d,14 mov r9d,r14d mov r12d,ecx xor r13d,ebx ror r14d,9 xor r12d,edx ror r13d,5 xor r14d,r9d and r12d,ebx vaesenclast xmm11,xmm9,xmm10 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((176-128))+rdi] xor r13d,ebx add r8d,DWORD[44+rsp] mov esi,r9d ror r14d,11 xor r12d,edx xor esi,r10d ror r13d,6 add r8d,r12d and r15d,esi xor r14d,r9d add r8d,r13d xor r15d,r10d add eax,r8d ror r14d,2 add r8d,r15d mov r13d,eax add r14d,r8d vmovdqa XMMWORD[32+rsp],xmm6 vpalignr xmm4,xmm0,xmm3,4 ror r13d,14 mov r8d,r14d vpalignr xmm7,xmm2,xmm1,4 mov r12d,ebx xor r13d,eax DB 143,232,120,194,236,14 ror r14d,9 xor r12d,ecx vpsrld xmm4,xmm4,3 ror r13d,5 xor r14d,r8d vpaddd xmm3,xmm3,xmm7 and r12d,eax vpand xmm8,xmm11,xmm12 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((192-128))+rdi] xor r13d,eax add edx,DWORD[48+rsp] mov r15d,r8d DB 143,232,120,194,245,11 ror r14d,11 xor r12d,ecx vpxor xmm4,xmm4,xmm5 xor r15d,r9d ror r13d,6 add edx,r12d and esi,r15d DB 143,232,120,194,250,13 xor r14d,r8d add edx,r13d vpxor xmm4,xmm4,xmm6 xor esi,r9d add r11d,edx vpsrld xmm6,xmm2,10 ror r14d,2 add edx,esi vpaddd xmm3,xmm3,xmm4 mov r13d,r11d add r14d,edx DB 143,232,120,194,239,2 ror r13d,14 mov edx,r14d vpxor xmm7,xmm7,xmm6 mov r12d,eax xor r13d,r11d ror r14d,9 xor r12d,ebx vpxor xmm7,xmm7,xmm5 ror r13d,5 xor r14d,edx and r12d,r11d vaesenclast xmm11,xmm9,xmm10 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((208-128))+rdi] xor r13d,r11d vpsrldq xmm7,xmm7,8 add ecx,DWORD[52+rsp] mov esi,edx ror r14d,11 xor r12d,ebx vpaddd xmm3,xmm3,xmm7 xor esi,r8d ror r13d,6 add ecx,r12d and r15d,esi DB 143,232,120,194,251,13 xor r14d,edx add ecx,r13d vpsrld xmm6,xmm3,10 xor r15d,r8d add r10d,ecx DB 143,232,120,194,239,2 ror r14d,2 add ecx,r15d vpxor xmm7,xmm7,xmm6 mov r13d,r10d add r14d,ecx ror r13d,14 mov ecx,r14d vpxor xmm7,xmm7,xmm5 mov r12d,r11d xor r13d,r10d ror r14d,9 xor r12d,eax vpslldq xmm7,xmm7,8 ror r13d,5 xor r14d,ecx and r12d,r10d vpand xmm11,xmm11,xmm13 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((224-128))+rdi] xor r13d,r10d vpaddd xmm3,xmm3,xmm7 add ebx,DWORD[56+rsp] mov r15d,ecx ror r14d,11 xor r12d,eax vpaddd xmm6,xmm3,XMMWORD[96+rbp] xor r15d,edx ror r13d,6 add ebx,r12d and esi,r15d xor r14d,ecx add ebx,r13d xor esi,edx add r9d,ebx ror r14d,2 add ebx,esi mov r13d,r9d add r14d,ebx ror r13d,14 mov ebx,r14d mov r12d,r10d xor r13d,r9d ror r14d,9 xor r12d,r11d ror r13d,5 xor r14d,ebx and r12d,r9d vpor xmm8,xmm8,xmm11 vaesenclast xmm11,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((0-128))+rdi] xor r13d,r9d add eax,DWORD[60+rsp] mov esi,ebx ror r14d,11 xor r12d,r11d xor esi,ecx ror r13d,6 add eax,r12d and r15d,esi xor r14d,ebx add eax,r13d xor r15d,ecx add r8d,eax ror r14d,2 add eax,r15d mov r13d,r8d add r14d,eax vmovdqa XMMWORD[48+rsp],xmm6 mov r12,QWORD[((64+0))+rsp] vpand xmm11,xmm11,xmm14 mov r15,QWORD[((64+8))+rsp] vpor xmm8,xmm8,xmm11 vmovdqu XMMWORD[r12*1+r15],xmm8 lea r12,[16+r12] cmp BYTE[131+rbp],0 jne NEAR $L$xop_00_47 vmovdqu xmm9,XMMWORD[r12] mov QWORD[((64+0))+rsp],r12 ror r13d,14 mov eax,r14d mov r12d,r9d xor r13d,r8d ror r14d,9 xor r12d,r10d ror r13d,5 xor r14d,eax and r12d,r8d vpxor xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((16-128))+rdi] xor r13d,r8d add r11d,DWORD[rsp] mov r15d,eax ror r14d,11 xor r12d,r10d xor r15d,ebx ror r13d,6 add r11d,r12d and esi,r15d xor r14d,eax add r11d,r13d xor esi,ebx add edx,r11d ror r14d,2 add r11d,esi mov r13d,edx add r14d,r11d ror r13d,14 mov r11d,r14d mov r12d,r8d xor r13d,edx ror r14d,9 xor r12d,r9d ror r13d,5 xor r14d,r11d and r12d,edx vpxor xmm9,xmm9,xmm8 xor r13d,edx add r10d,DWORD[4+rsp] mov esi,r11d ror r14d,11 xor r12d,r9d xor esi,eax ror r13d,6 add r10d,r12d and r15d,esi xor r14d,r11d add r10d,r13d xor r15d,eax add ecx,r10d ror r14d,2 add r10d,r15d mov r13d,ecx add r14d,r10d ror r13d,14 mov r10d,r14d mov r12d,edx xor r13d,ecx ror r14d,9 xor r12d,r8d ror r13d,5 xor r14d,r10d and r12d,ecx vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((32-128))+rdi] xor r13d,ecx add r9d,DWORD[8+rsp] mov r15d,r10d ror r14d,11 xor r12d,r8d xor r15d,r11d ror r13d,6 add r9d,r12d and esi,r15d xor r14d,r10d add r9d,r13d xor esi,r11d add ebx,r9d ror r14d,2 add r9d,esi mov r13d,ebx add r14d,r9d ror r13d,14 mov r9d,r14d mov r12d,ecx xor r13d,ebx ror r14d,9 xor r12d,edx ror r13d,5 xor r14d,r9d and r12d,ebx vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((48-128))+rdi] xor r13d,ebx add r8d,DWORD[12+rsp] mov esi,r9d ror r14d,11 xor r12d,edx xor esi,r10d ror r13d,6 add r8d,r12d and r15d,esi xor r14d,r9d add r8d,r13d xor r15d,r10d add eax,r8d ror r14d,2 add r8d,r15d mov r13d,eax add r14d,r8d ror r13d,14 mov r8d,r14d mov r12d,ebx xor r13d,eax ror r14d,9 xor r12d,ecx ror r13d,5 xor r14d,r8d and r12d,eax vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((64-128))+rdi] xor r13d,eax add edx,DWORD[16+rsp] mov r15d,r8d ror r14d,11 xor r12d,ecx xor r15d,r9d ror r13d,6 add edx,r12d and esi,r15d xor r14d,r8d add edx,r13d xor esi,r9d add r11d,edx ror r14d,2 add edx,esi mov r13d,r11d add r14d,edx ror r13d,14 mov edx,r14d mov r12d,eax xor r13d,r11d ror r14d,9 xor r12d,ebx ror r13d,5 xor r14d,edx and r12d,r11d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((80-128))+rdi] xor r13d,r11d add ecx,DWORD[20+rsp] mov esi,edx ror r14d,11 xor r12d,ebx xor esi,r8d ror r13d,6 add ecx,r12d and r15d,esi xor r14d,edx add ecx,r13d xor r15d,r8d add r10d,ecx ror r14d,2 add ecx,r15d mov r13d,r10d add r14d,ecx ror r13d,14 mov ecx,r14d mov r12d,r11d xor r13d,r10d ror r14d,9 xor r12d,eax ror r13d,5 xor r14d,ecx and r12d,r10d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((96-128))+rdi] xor r13d,r10d add ebx,DWORD[24+rsp] mov r15d,ecx ror r14d,11 xor r12d,eax xor r15d,edx ror r13d,6 add ebx,r12d and esi,r15d xor r14d,ecx add ebx,r13d xor esi,edx add r9d,ebx ror r14d,2 add ebx,esi mov r13d,r9d add r14d,ebx ror r13d,14 mov ebx,r14d mov r12d,r10d xor r13d,r9d ror r14d,9 xor r12d,r11d ror r13d,5 xor r14d,ebx and r12d,r9d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((112-128))+rdi] xor r13d,r9d add eax,DWORD[28+rsp] mov esi,ebx ror r14d,11 xor r12d,r11d xor esi,ecx ror r13d,6 add eax,r12d and r15d,esi xor r14d,ebx add eax,r13d xor r15d,ecx add r8d,eax ror r14d,2 add eax,r15d mov r13d,r8d add r14d,eax ror r13d,14 mov eax,r14d mov r12d,r9d xor r13d,r8d ror r14d,9 xor r12d,r10d ror r13d,5 xor r14d,eax and r12d,r8d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((128-128))+rdi] xor r13d,r8d add r11d,DWORD[32+rsp] mov r15d,eax ror r14d,11 xor r12d,r10d xor r15d,ebx ror r13d,6 add r11d,r12d and esi,r15d xor r14d,eax add r11d,r13d xor esi,ebx add edx,r11d ror r14d,2 add r11d,esi mov r13d,edx add r14d,r11d ror r13d,14 mov r11d,r14d mov r12d,r8d xor r13d,edx ror r14d,9 xor r12d,r9d ror r13d,5 xor r14d,r11d and r12d,edx vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((144-128))+rdi] xor r13d,edx add r10d,DWORD[36+rsp] mov esi,r11d ror r14d,11 xor r12d,r9d xor esi,eax ror r13d,6 add r10d,r12d and r15d,esi xor r14d,r11d add r10d,r13d xor r15d,eax add ecx,r10d ror r14d,2 add r10d,r15d mov r13d,ecx add r14d,r10d ror r13d,14 mov r10d,r14d mov r12d,edx xor r13d,ecx ror r14d,9 xor r12d,r8d ror r13d,5 xor r14d,r10d and r12d,ecx vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((160-128))+rdi] xor r13d,ecx add r9d,DWORD[40+rsp] mov r15d,r10d ror r14d,11 xor r12d,r8d xor r15d,r11d ror r13d,6 add r9d,r12d and esi,r15d xor r14d,r10d add r9d,r13d xor esi,r11d add ebx,r9d ror r14d,2 add r9d,esi mov r13d,ebx add r14d,r9d ror r13d,14 mov r9d,r14d mov r12d,ecx xor r13d,ebx ror r14d,9 xor r12d,edx ror r13d,5 xor r14d,r9d and r12d,ebx vaesenclast xmm11,xmm9,xmm10 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((176-128))+rdi] xor r13d,ebx add r8d,DWORD[44+rsp] mov esi,r9d ror r14d,11 xor r12d,edx xor esi,r10d ror r13d,6 add r8d,r12d and r15d,esi xor r14d,r9d add r8d,r13d xor r15d,r10d add eax,r8d ror r14d,2 add r8d,r15d mov r13d,eax add r14d,r8d ror r13d,14 mov r8d,r14d mov r12d,ebx xor r13d,eax ror r14d,9 xor r12d,ecx ror r13d,5 xor r14d,r8d and r12d,eax vpand xmm8,xmm11,xmm12 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((192-128))+rdi] xor r13d,eax add edx,DWORD[48+rsp] mov r15d,r8d ror r14d,11 xor r12d,ecx xor r15d,r9d ror r13d,6 add edx,r12d and esi,r15d xor r14d,r8d add edx,r13d xor esi,r9d add r11d,edx ror r14d,2 add edx,esi mov r13d,r11d add r14d,edx ror r13d,14 mov edx,r14d mov r12d,eax xor r13d,r11d ror r14d,9 xor r12d,ebx ror r13d,5 xor r14d,edx and r12d,r11d vaesenclast xmm11,xmm9,xmm10 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((208-128))+rdi] xor r13d,r11d add ecx,DWORD[52+rsp] mov esi,edx ror r14d,11 xor r12d,ebx xor esi,r8d ror r13d,6 add ecx,r12d and r15d,esi xor r14d,edx add ecx,r13d xor r15d,r8d add r10d,ecx ror r14d,2 add ecx,r15d mov r13d,r10d add r14d,ecx ror r13d,14 mov ecx,r14d mov r12d,r11d xor r13d,r10d ror r14d,9 xor r12d,eax ror r13d,5 xor r14d,ecx and r12d,r10d vpand xmm11,xmm11,xmm13 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((224-128))+rdi] xor r13d,r10d add ebx,DWORD[56+rsp] mov r15d,ecx ror r14d,11 xor r12d,eax xor r15d,edx ror r13d,6 add ebx,r12d and esi,r15d xor r14d,ecx add ebx,r13d xor esi,edx add r9d,ebx ror r14d,2 add ebx,esi mov r13d,r9d add r14d,ebx ror r13d,14 mov ebx,r14d mov r12d,r10d xor r13d,r9d ror r14d,9 xor r12d,r11d ror r13d,5 xor r14d,ebx and r12d,r9d vpor xmm8,xmm8,xmm11 vaesenclast xmm11,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((0-128))+rdi] xor r13d,r9d add eax,DWORD[60+rsp] mov esi,ebx ror r14d,11 xor r12d,r11d xor esi,ecx ror r13d,6 add eax,r12d and r15d,esi xor r14d,ebx add eax,r13d xor r15d,ecx add r8d,eax ror r14d,2 add eax,r15d mov r13d,r8d add r14d,eax mov r12,QWORD[((64+0))+rsp] mov r13,QWORD[((64+8))+rsp] mov r15,QWORD[((64+40))+rsp] mov rsi,QWORD[((64+48))+rsp] vpand xmm11,xmm11,xmm14 mov eax,r14d vpor xmm8,xmm8,xmm11 vmovdqu XMMWORD[r13*1+r12],xmm8 lea r12,[16+r12] add eax,DWORD[r15] add ebx,DWORD[4+r15] add ecx,DWORD[8+r15] add edx,DWORD[12+r15] add r8d,DWORD[16+r15] add r9d,DWORD[20+r15] add r10d,DWORD[24+r15] add r11d,DWORD[28+r15] cmp r12,QWORD[((64+16))+rsp] mov DWORD[r15],eax mov DWORD[4+r15],ebx mov DWORD[8+r15],ecx mov DWORD[12+r15],edx mov DWORD[16+r15],r8d mov DWORD[20+r15],r9d mov DWORD[24+r15],r10d mov DWORD[28+r15],r11d jb NEAR $L$loop_xop mov r8,QWORD[((64+32))+rsp] mov rsi,QWORD[120+rsp] vmovdqu XMMWORD[r8],xmm8 vzeroall movaps xmm6,XMMWORD[128+rsp] movaps xmm7,XMMWORD[144+rsp] movaps xmm8,XMMWORD[160+rsp] movaps xmm9,XMMWORD[176+rsp] movaps xmm10,XMMWORD[192+rsp] movaps xmm11,XMMWORD[208+rsp] movaps xmm12,XMMWORD[224+rsp] movaps xmm13,XMMWORD[240+rsp] movaps xmm14,XMMWORD[256+rsp] movaps xmm15,XMMWORD[272+rsp] mov r15,QWORD[((-48))+rsi] mov r14,QWORD[((-40))+rsi] mov r13,QWORD[((-32))+rsi] mov r12,QWORD[((-24))+rsi] mov rbp,QWORD[((-16))+rsi] mov rbx,QWORD[((-8))+rsi] lea rsp,[rsi] $L$epilogue_xop: mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$SEH_end_aesni_cbc_sha256_enc_xop: ALIGN 64 aesni_cbc_sha256_enc_avx: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp $L$SEH_begin_aesni_cbc_sha256_enc_avx: mov rdi,rcx mov rsi,rdx mov rdx,r8 mov rcx,r9 mov r8,QWORD[40+rsp] mov r9,QWORD[48+rsp] $L$avx_shortcut: mov r10,QWORD[56+rsp] mov rax,rsp push rbx push rbp push r12 push r13 push r14 push r15 sub rsp,288 and rsp,-64 shl rdx,6 sub rsi,rdi sub r10,rdi add rdx,rdi mov QWORD[((64+8))+rsp],rsi mov QWORD[((64+16))+rsp],rdx mov QWORD[((64+32))+rsp],r8 mov QWORD[((64+40))+rsp],r9 mov QWORD[((64+48))+rsp],r10 mov QWORD[120+rsp],rax movaps XMMWORD[128+rsp],xmm6 movaps XMMWORD[144+rsp],xmm7 movaps XMMWORD[160+rsp],xmm8 movaps XMMWORD[176+rsp],xmm9 movaps XMMWORD[192+rsp],xmm10 movaps XMMWORD[208+rsp],xmm11 movaps XMMWORD[224+rsp],xmm12 movaps XMMWORD[240+rsp],xmm13 movaps XMMWORD[256+rsp],xmm14 movaps XMMWORD[272+rsp],xmm15 $L$prologue_avx: vzeroall mov r12,rdi lea rdi,[128+rcx] lea r13,[((K256+544))] mov r14d,DWORD[((240-128))+rdi] mov r15,r9 mov rsi,r10 vmovdqu xmm8,XMMWORD[r8] sub r14,9 mov eax,DWORD[r15] mov ebx,DWORD[4+r15] mov ecx,DWORD[8+r15] mov edx,DWORD[12+r15] mov r8d,DWORD[16+r15] mov r9d,DWORD[20+r15] mov r10d,DWORD[24+r15] mov r11d,DWORD[28+r15] vmovdqa xmm14,XMMWORD[r14*8+r13] vmovdqa xmm13,XMMWORD[16+r14*8+r13] vmovdqa xmm12,XMMWORD[32+r14*8+r13] vmovdqu xmm10,XMMWORD[((0-128))+rdi] jmp NEAR $L$loop_avx ALIGN 16 $L$loop_avx: vmovdqa xmm7,XMMWORD[((K256+512))] vmovdqu xmm0,XMMWORD[r12*1+rsi] vmovdqu xmm1,XMMWORD[16+r12*1+rsi] vmovdqu xmm2,XMMWORD[32+r12*1+rsi] vmovdqu xmm3,XMMWORD[48+r12*1+rsi] vpshufb xmm0,xmm0,xmm7 lea rbp,[K256] vpshufb xmm1,xmm1,xmm7 vpshufb xmm2,xmm2,xmm7 vpaddd xmm4,xmm0,XMMWORD[rbp] vpshufb xmm3,xmm3,xmm7 vpaddd xmm5,xmm1,XMMWORD[32+rbp] vpaddd xmm6,xmm2,XMMWORD[64+rbp] vpaddd xmm7,xmm3,XMMWORD[96+rbp] vmovdqa XMMWORD[rsp],xmm4 mov r14d,eax vmovdqa XMMWORD[16+rsp],xmm5 mov esi,ebx vmovdqa XMMWORD[32+rsp],xmm6 xor esi,ecx vmovdqa XMMWORD[48+rsp],xmm7 mov r13d,r8d jmp NEAR $L$avx_00_47 ALIGN 16 $L$avx_00_47: sub rbp,-16*2*4 vmovdqu xmm9,XMMWORD[r12] mov QWORD[((64+0))+rsp],r12 vpalignr xmm4,xmm1,xmm0,4 shrd r13d,r13d,14 mov eax,r14d mov r12d,r9d vpalignr xmm7,xmm3,xmm2,4 xor r13d,r8d shrd r14d,r14d,9 xor r12d,r10d vpsrld xmm6,xmm4,7 shrd r13d,r13d,5 xor r14d,eax and r12d,r8d vpaddd xmm0,xmm0,xmm7 vpxor xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((16-128))+rdi] xor r13d,r8d add r11d,DWORD[rsp] mov r15d,eax vpsrld xmm7,xmm4,3 shrd r14d,r14d,11 xor r12d,r10d xor r15d,ebx vpslld xmm5,xmm4,14 shrd r13d,r13d,6 add r11d,r12d and esi,r15d vpxor xmm4,xmm7,xmm6 xor r14d,eax add r11d,r13d xor esi,ebx vpshufd xmm7,xmm3,250 add edx,r11d shrd r14d,r14d,2 add r11d,esi vpsrld xmm6,xmm6,11 mov r13d,edx add r14d,r11d shrd r13d,r13d,14 vpxor xmm4,xmm4,xmm5 mov r11d,r14d mov r12d,r8d xor r13d,edx vpslld xmm5,xmm5,11 shrd r14d,r14d,9 xor r12d,r9d shrd r13d,r13d,5 vpxor xmm4,xmm4,xmm6 xor r14d,r11d and r12d,edx vpxor xmm9,xmm9,xmm8 xor r13d,edx vpsrld xmm6,xmm7,10 add r10d,DWORD[4+rsp] mov esi,r11d shrd r14d,r14d,11 vpxor xmm4,xmm4,xmm5 xor r12d,r9d xor esi,eax shrd r13d,r13d,6 vpsrlq xmm7,xmm7,17 add r10d,r12d and r15d,esi xor r14d,r11d vpaddd xmm0,xmm0,xmm4 add r10d,r13d xor r15d,eax add ecx,r10d vpxor xmm6,xmm6,xmm7 shrd r14d,r14d,2 add r10d,r15d mov r13d,ecx vpsrlq xmm7,xmm7,2 add r14d,r10d shrd r13d,r13d,14 mov r10d,r14d vpxor xmm6,xmm6,xmm7 mov r12d,edx xor r13d,ecx shrd r14d,r14d,9 vpshufd xmm6,xmm6,132 xor r12d,r8d shrd r13d,r13d,5 xor r14d,r10d vpsrldq xmm6,xmm6,8 and r12d,ecx vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((32-128))+rdi] xor r13d,ecx add r9d,DWORD[8+rsp] vpaddd xmm0,xmm0,xmm6 mov r15d,r10d shrd r14d,r14d,11 xor r12d,r8d vpshufd xmm7,xmm0,80 xor r15d,r11d shrd r13d,r13d,6 add r9d,r12d vpsrld xmm6,xmm7,10 and esi,r15d xor r14d,r10d add r9d,r13d vpsrlq xmm7,xmm7,17 xor esi,r11d add ebx,r9d shrd r14d,r14d,2 vpxor xmm6,xmm6,xmm7 add r9d,esi mov r13d,ebx add r14d,r9d vpsrlq xmm7,xmm7,2 shrd r13d,r13d,14 mov r9d,r14d mov r12d,ecx vpxor xmm6,xmm6,xmm7 xor r13d,ebx shrd r14d,r14d,9 xor r12d,edx vpshufd xmm6,xmm6,232 shrd r13d,r13d,5 xor r14d,r9d and r12d,ebx vpslldq xmm6,xmm6,8 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((48-128))+rdi] xor r13d,ebx add r8d,DWORD[12+rsp] mov esi,r9d vpaddd xmm0,xmm0,xmm6 shrd r14d,r14d,11 xor r12d,edx xor esi,r10d vpaddd xmm6,xmm0,XMMWORD[rbp] shrd r13d,r13d,6 add r8d,r12d and r15d,esi xor r14d,r9d add r8d,r13d xor r15d,r10d add eax,r8d shrd r14d,r14d,2 add r8d,r15d mov r13d,eax add r14d,r8d vmovdqa XMMWORD[rsp],xmm6 vpalignr xmm4,xmm2,xmm1,4 shrd r13d,r13d,14 mov r8d,r14d mov r12d,ebx vpalignr xmm7,xmm0,xmm3,4 xor r13d,eax shrd r14d,r14d,9 xor r12d,ecx vpsrld xmm6,xmm4,7 shrd r13d,r13d,5 xor r14d,r8d and r12d,eax vpaddd xmm1,xmm1,xmm7 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((64-128))+rdi] xor r13d,eax add edx,DWORD[16+rsp] mov r15d,r8d vpsrld xmm7,xmm4,3 shrd r14d,r14d,11 xor r12d,ecx xor r15d,r9d vpslld xmm5,xmm4,14 shrd r13d,r13d,6 add edx,r12d and esi,r15d vpxor xmm4,xmm7,xmm6 xor r14d,r8d add edx,r13d xor esi,r9d vpshufd xmm7,xmm0,250 add r11d,edx shrd r14d,r14d,2 add edx,esi vpsrld xmm6,xmm6,11 mov r13d,r11d add r14d,edx shrd r13d,r13d,14 vpxor xmm4,xmm4,xmm5 mov edx,r14d mov r12d,eax xor r13d,r11d vpslld xmm5,xmm5,11 shrd r14d,r14d,9 xor r12d,ebx shrd r13d,r13d,5 vpxor xmm4,xmm4,xmm6 xor r14d,edx and r12d,r11d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((80-128))+rdi] xor r13d,r11d vpsrld xmm6,xmm7,10 add ecx,DWORD[20+rsp] mov esi,edx shrd r14d,r14d,11 vpxor xmm4,xmm4,xmm5 xor r12d,ebx xor esi,r8d shrd r13d,r13d,6 vpsrlq xmm7,xmm7,17 add ecx,r12d and r15d,esi xor r14d,edx vpaddd xmm1,xmm1,xmm4 add ecx,r13d xor r15d,r8d add r10d,ecx vpxor xmm6,xmm6,xmm7 shrd r14d,r14d,2 add ecx,r15d mov r13d,r10d vpsrlq xmm7,xmm7,2 add r14d,ecx shrd r13d,r13d,14 mov ecx,r14d vpxor xmm6,xmm6,xmm7 mov r12d,r11d xor r13d,r10d shrd r14d,r14d,9 vpshufd xmm6,xmm6,132 xor r12d,eax shrd r13d,r13d,5 xor r14d,ecx vpsrldq xmm6,xmm6,8 and r12d,r10d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((96-128))+rdi] xor r13d,r10d add ebx,DWORD[24+rsp] vpaddd xmm1,xmm1,xmm6 mov r15d,ecx shrd r14d,r14d,11 xor r12d,eax vpshufd xmm7,xmm1,80 xor r15d,edx shrd r13d,r13d,6 add ebx,r12d vpsrld xmm6,xmm7,10 and esi,r15d xor r14d,ecx add ebx,r13d vpsrlq xmm7,xmm7,17 xor esi,edx add r9d,ebx shrd r14d,r14d,2 vpxor xmm6,xmm6,xmm7 add ebx,esi mov r13d,r9d add r14d,ebx vpsrlq xmm7,xmm7,2 shrd r13d,r13d,14 mov ebx,r14d mov r12d,r10d vpxor xmm6,xmm6,xmm7 xor r13d,r9d shrd r14d,r14d,9 xor r12d,r11d vpshufd xmm6,xmm6,232 shrd r13d,r13d,5 xor r14d,ebx and r12d,r9d vpslldq xmm6,xmm6,8 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((112-128))+rdi] xor r13d,r9d add eax,DWORD[28+rsp] mov esi,ebx vpaddd xmm1,xmm1,xmm6 shrd r14d,r14d,11 xor r12d,r11d xor esi,ecx vpaddd xmm6,xmm1,XMMWORD[32+rbp] shrd r13d,r13d,6 add eax,r12d and r15d,esi xor r14d,ebx add eax,r13d xor r15d,ecx add r8d,eax shrd r14d,r14d,2 add eax,r15d mov r13d,r8d add r14d,eax vmovdqa XMMWORD[16+rsp],xmm6 vpalignr xmm4,xmm3,xmm2,4 shrd r13d,r13d,14 mov eax,r14d mov r12d,r9d vpalignr xmm7,xmm1,xmm0,4 xor r13d,r8d shrd r14d,r14d,9 xor r12d,r10d vpsrld xmm6,xmm4,7 shrd r13d,r13d,5 xor r14d,eax and r12d,r8d vpaddd xmm2,xmm2,xmm7 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((128-128))+rdi] xor r13d,r8d add r11d,DWORD[32+rsp] mov r15d,eax vpsrld xmm7,xmm4,3 shrd r14d,r14d,11 xor r12d,r10d xor r15d,ebx vpslld xmm5,xmm4,14 shrd r13d,r13d,6 add r11d,r12d and esi,r15d vpxor xmm4,xmm7,xmm6 xor r14d,eax add r11d,r13d xor esi,ebx vpshufd xmm7,xmm1,250 add edx,r11d shrd r14d,r14d,2 add r11d,esi vpsrld xmm6,xmm6,11 mov r13d,edx add r14d,r11d shrd r13d,r13d,14 vpxor xmm4,xmm4,xmm5 mov r11d,r14d mov r12d,r8d xor r13d,edx vpslld xmm5,xmm5,11 shrd r14d,r14d,9 xor r12d,r9d shrd r13d,r13d,5 vpxor xmm4,xmm4,xmm6 xor r14d,r11d and r12d,edx vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((144-128))+rdi] xor r13d,edx vpsrld xmm6,xmm7,10 add r10d,DWORD[36+rsp] mov esi,r11d shrd r14d,r14d,11 vpxor xmm4,xmm4,xmm5 xor r12d,r9d xor esi,eax shrd r13d,r13d,6 vpsrlq xmm7,xmm7,17 add r10d,r12d and r15d,esi xor r14d,r11d vpaddd xmm2,xmm2,xmm4 add r10d,r13d xor r15d,eax add ecx,r10d vpxor xmm6,xmm6,xmm7 shrd r14d,r14d,2 add r10d,r15d mov r13d,ecx vpsrlq xmm7,xmm7,2 add r14d,r10d shrd r13d,r13d,14 mov r10d,r14d vpxor xmm6,xmm6,xmm7 mov r12d,edx xor r13d,ecx shrd r14d,r14d,9 vpshufd xmm6,xmm6,132 xor r12d,r8d shrd r13d,r13d,5 xor r14d,r10d vpsrldq xmm6,xmm6,8 and r12d,ecx vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((160-128))+rdi] xor r13d,ecx add r9d,DWORD[40+rsp] vpaddd xmm2,xmm2,xmm6 mov r15d,r10d shrd r14d,r14d,11 xor r12d,r8d vpshufd xmm7,xmm2,80 xor r15d,r11d shrd r13d,r13d,6 add r9d,r12d vpsrld xmm6,xmm7,10 and esi,r15d xor r14d,r10d add r9d,r13d vpsrlq xmm7,xmm7,17 xor esi,r11d add ebx,r9d shrd r14d,r14d,2 vpxor xmm6,xmm6,xmm7 add r9d,esi mov r13d,ebx add r14d,r9d vpsrlq xmm7,xmm7,2 shrd r13d,r13d,14 mov r9d,r14d mov r12d,ecx vpxor xmm6,xmm6,xmm7 xor r13d,ebx shrd r14d,r14d,9 xor r12d,edx vpshufd xmm6,xmm6,232 shrd r13d,r13d,5 xor r14d,r9d and r12d,ebx vpslldq xmm6,xmm6,8 vaesenclast xmm11,xmm9,xmm10 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((176-128))+rdi] xor r13d,ebx add r8d,DWORD[44+rsp] mov esi,r9d vpaddd xmm2,xmm2,xmm6 shrd r14d,r14d,11 xor r12d,edx xor esi,r10d vpaddd xmm6,xmm2,XMMWORD[64+rbp] shrd r13d,r13d,6 add r8d,r12d and r15d,esi xor r14d,r9d add r8d,r13d xor r15d,r10d add eax,r8d shrd r14d,r14d,2 add r8d,r15d mov r13d,eax add r14d,r8d vmovdqa XMMWORD[32+rsp],xmm6 vpalignr xmm4,xmm0,xmm3,4 shrd r13d,r13d,14 mov r8d,r14d mov r12d,ebx vpalignr xmm7,xmm2,xmm1,4 xor r13d,eax shrd r14d,r14d,9 xor r12d,ecx vpsrld xmm6,xmm4,7 shrd r13d,r13d,5 xor r14d,r8d and r12d,eax vpaddd xmm3,xmm3,xmm7 vpand xmm8,xmm11,xmm12 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((192-128))+rdi] xor r13d,eax add edx,DWORD[48+rsp] mov r15d,r8d vpsrld xmm7,xmm4,3 shrd r14d,r14d,11 xor r12d,ecx xor r15d,r9d vpslld xmm5,xmm4,14 shrd r13d,r13d,6 add edx,r12d and esi,r15d vpxor xmm4,xmm7,xmm6 xor r14d,r8d add edx,r13d xor esi,r9d vpshufd xmm7,xmm2,250 add r11d,edx shrd r14d,r14d,2 add edx,esi vpsrld xmm6,xmm6,11 mov r13d,r11d add r14d,edx shrd r13d,r13d,14 vpxor xmm4,xmm4,xmm5 mov edx,r14d mov r12d,eax xor r13d,r11d vpslld xmm5,xmm5,11 shrd r14d,r14d,9 xor r12d,ebx shrd r13d,r13d,5 vpxor xmm4,xmm4,xmm6 xor r14d,edx and r12d,r11d vaesenclast xmm11,xmm9,xmm10 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((208-128))+rdi] xor r13d,r11d vpsrld xmm6,xmm7,10 add ecx,DWORD[52+rsp] mov esi,edx shrd r14d,r14d,11 vpxor xmm4,xmm4,xmm5 xor r12d,ebx xor esi,r8d shrd r13d,r13d,6 vpsrlq xmm7,xmm7,17 add ecx,r12d and r15d,esi xor r14d,edx vpaddd xmm3,xmm3,xmm4 add ecx,r13d xor r15d,r8d add r10d,ecx vpxor xmm6,xmm6,xmm7 shrd r14d,r14d,2 add ecx,r15d mov r13d,r10d vpsrlq xmm7,xmm7,2 add r14d,ecx shrd r13d,r13d,14 mov ecx,r14d vpxor xmm6,xmm6,xmm7 mov r12d,r11d xor r13d,r10d shrd r14d,r14d,9 vpshufd xmm6,xmm6,132 xor r12d,eax shrd r13d,r13d,5 xor r14d,ecx vpsrldq xmm6,xmm6,8 and r12d,r10d vpand xmm11,xmm11,xmm13 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((224-128))+rdi] xor r13d,r10d add ebx,DWORD[56+rsp] vpaddd xmm3,xmm3,xmm6 mov r15d,ecx shrd r14d,r14d,11 xor r12d,eax vpshufd xmm7,xmm3,80 xor r15d,edx shrd r13d,r13d,6 add ebx,r12d vpsrld xmm6,xmm7,10 and esi,r15d xor r14d,ecx add ebx,r13d vpsrlq xmm7,xmm7,17 xor esi,edx add r9d,ebx shrd r14d,r14d,2 vpxor xmm6,xmm6,xmm7 add ebx,esi mov r13d,r9d add r14d,ebx vpsrlq xmm7,xmm7,2 shrd r13d,r13d,14 mov ebx,r14d mov r12d,r10d vpxor xmm6,xmm6,xmm7 xor r13d,r9d shrd r14d,r14d,9 xor r12d,r11d vpshufd xmm6,xmm6,232 shrd r13d,r13d,5 xor r14d,ebx and r12d,r9d vpslldq xmm6,xmm6,8 vpor xmm8,xmm8,xmm11 vaesenclast xmm11,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((0-128))+rdi] xor r13d,r9d add eax,DWORD[60+rsp] mov esi,ebx vpaddd xmm3,xmm3,xmm6 shrd r14d,r14d,11 xor r12d,r11d xor esi,ecx vpaddd xmm6,xmm3,XMMWORD[96+rbp] shrd r13d,r13d,6 add eax,r12d and r15d,esi xor r14d,ebx add eax,r13d xor r15d,ecx add r8d,eax shrd r14d,r14d,2 add eax,r15d mov r13d,r8d add r14d,eax vmovdqa XMMWORD[48+rsp],xmm6 mov r12,QWORD[((64+0))+rsp] vpand xmm11,xmm11,xmm14 mov r15,QWORD[((64+8))+rsp] vpor xmm8,xmm8,xmm11 vmovdqu XMMWORD[r12*1+r15],xmm8 lea r12,[16+r12] cmp BYTE[131+rbp],0 jne NEAR $L$avx_00_47 vmovdqu xmm9,XMMWORD[r12] mov QWORD[((64+0))+rsp],r12 shrd r13d,r13d,14 mov eax,r14d mov r12d,r9d xor r13d,r8d shrd r14d,r14d,9 xor r12d,r10d shrd r13d,r13d,5 xor r14d,eax and r12d,r8d vpxor xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((16-128))+rdi] xor r13d,r8d add r11d,DWORD[rsp] mov r15d,eax shrd r14d,r14d,11 xor r12d,r10d xor r15d,ebx shrd r13d,r13d,6 add r11d,r12d and esi,r15d xor r14d,eax add r11d,r13d xor esi,ebx add edx,r11d shrd r14d,r14d,2 add r11d,esi mov r13d,edx add r14d,r11d shrd r13d,r13d,14 mov r11d,r14d mov r12d,r8d xor r13d,edx shrd r14d,r14d,9 xor r12d,r9d shrd r13d,r13d,5 xor r14d,r11d and r12d,edx vpxor xmm9,xmm9,xmm8 xor r13d,edx add r10d,DWORD[4+rsp] mov esi,r11d shrd r14d,r14d,11 xor r12d,r9d xor esi,eax shrd r13d,r13d,6 add r10d,r12d and r15d,esi xor r14d,r11d add r10d,r13d xor r15d,eax add ecx,r10d shrd r14d,r14d,2 add r10d,r15d mov r13d,ecx add r14d,r10d shrd r13d,r13d,14 mov r10d,r14d mov r12d,edx xor r13d,ecx shrd r14d,r14d,9 xor r12d,r8d shrd r13d,r13d,5 xor r14d,r10d and r12d,ecx vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((32-128))+rdi] xor r13d,ecx add r9d,DWORD[8+rsp] mov r15d,r10d shrd r14d,r14d,11 xor r12d,r8d xor r15d,r11d shrd r13d,r13d,6 add r9d,r12d and esi,r15d xor r14d,r10d add r9d,r13d xor esi,r11d add ebx,r9d shrd r14d,r14d,2 add r9d,esi mov r13d,ebx add r14d,r9d shrd r13d,r13d,14 mov r9d,r14d mov r12d,ecx xor r13d,ebx shrd r14d,r14d,9 xor r12d,edx shrd r13d,r13d,5 xor r14d,r9d and r12d,ebx vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((48-128))+rdi] xor r13d,ebx add r8d,DWORD[12+rsp] mov esi,r9d shrd r14d,r14d,11 xor r12d,edx xor esi,r10d shrd r13d,r13d,6 add r8d,r12d and r15d,esi xor r14d,r9d add r8d,r13d xor r15d,r10d add eax,r8d shrd r14d,r14d,2 add r8d,r15d mov r13d,eax add r14d,r8d shrd r13d,r13d,14 mov r8d,r14d mov r12d,ebx xor r13d,eax shrd r14d,r14d,9 xor r12d,ecx shrd r13d,r13d,5 xor r14d,r8d and r12d,eax vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((64-128))+rdi] xor r13d,eax add edx,DWORD[16+rsp] mov r15d,r8d shrd r14d,r14d,11 xor r12d,ecx xor r15d,r9d shrd r13d,r13d,6 add edx,r12d and esi,r15d xor r14d,r8d add edx,r13d xor esi,r9d add r11d,edx shrd r14d,r14d,2 add edx,esi mov r13d,r11d add r14d,edx shrd r13d,r13d,14 mov edx,r14d mov r12d,eax xor r13d,r11d shrd r14d,r14d,9 xor r12d,ebx shrd r13d,r13d,5 xor r14d,edx and r12d,r11d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((80-128))+rdi] xor r13d,r11d add ecx,DWORD[20+rsp] mov esi,edx shrd r14d,r14d,11 xor r12d,ebx xor esi,r8d shrd r13d,r13d,6 add ecx,r12d and r15d,esi xor r14d,edx add ecx,r13d xor r15d,r8d add r10d,ecx shrd r14d,r14d,2 add ecx,r15d mov r13d,r10d add r14d,ecx shrd r13d,r13d,14 mov ecx,r14d mov r12d,r11d xor r13d,r10d shrd r14d,r14d,9 xor r12d,eax shrd r13d,r13d,5 xor r14d,ecx and r12d,r10d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((96-128))+rdi] xor r13d,r10d add ebx,DWORD[24+rsp] mov r15d,ecx shrd r14d,r14d,11 xor r12d,eax xor r15d,edx shrd r13d,r13d,6 add ebx,r12d and esi,r15d xor r14d,ecx add ebx,r13d xor esi,edx add r9d,ebx shrd r14d,r14d,2 add ebx,esi mov r13d,r9d add r14d,ebx shrd r13d,r13d,14 mov ebx,r14d mov r12d,r10d xor r13d,r9d shrd r14d,r14d,9 xor r12d,r11d shrd r13d,r13d,5 xor r14d,ebx and r12d,r9d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((112-128))+rdi] xor r13d,r9d add eax,DWORD[28+rsp] mov esi,ebx shrd r14d,r14d,11 xor r12d,r11d xor esi,ecx shrd r13d,r13d,6 add eax,r12d and r15d,esi xor r14d,ebx add eax,r13d xor r15d,ecx add r8d,eax shrd r14d,r14d,2 add eax,r15d mov r13d,r8d add r14d,eax shrd r13d,r13d,14 mov eax,r14d mov r12d,r9d xor r13d,r8d shrd r14d,r14d,9 xor r12d,r10d shrd r13d,r13d,5 xor r14d,eax and r12d,r8d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((128-128))+rdi] xor r13d,r8d add r11d,DWORD[32+rsp] mov r15d,eax shrd r14d,r14d,11 xor r12d,r10d xor r15d,ebx shrd r13d,r13d,6 add r11d,r12d and esi,r15d xor r14d,eax add r11d,r13d xor esi,ebx add edx,r11d shrd r14d,r14d,2 add r11d,esi mov r13d,edx add r14d,r11d shrd r13d,r13d,14 mov r11d,r14d mov r12d,r8d xor r13d,edx shrd r14d,r14d,9 xor r12d,r9d shrd r13d,r13d,5 xor r14d,r11d and r12d,edx vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((144-128))+rdi] xor r13d,edx add r10d,DWORD[36+rsp] mov esi,r11d shrd r14d,r14d,11 xor r12d,r9d xor esi,eax shrd r13d,r13d,6 add r10d,r12d and r15d,esi xor r14d,r11d add r10d,r13d xor r15d,eax add ecx,r10d shrd r14d,r14d,2 add r10d,r15d mov r13d,ecx add r14d,r10d shrd r13d,r13d,14 mov r10d,r14d mov r12d,edx xor r13d,ecx shrd r14d,r14d,9 xor r12d,r8d shrd r13d,r13d,5 xor r14d,r10d and r12d,ecx vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((160-128))+rdi] xor r13d,ecx add r9d,DWORD[40+rsp] mov r15d,r10d shrd r14d,r14d,11 xor r12d,r8d xor r15d,r11d shrd r13d,r13d,6 add r9d,r12d and esi,r15d xor r14d,r10d add r9d,r13d xor esi,r11d add ebx,r9d shrd r14d,r14d,2 add r9d,esi mov r13d,ebx add r14d,r9d shrd r13d,r13d,14 mov r9d,r14d mov r12d,ecx xor r13d,ebx shrd r14d,r14d,9 xor r12d,edx shrd r13d,r13d,5 xor r14d,r9d and r12d,ebx vaesenclast xmm11,xmm9,xmm10 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((176-128))+rdi] xor r13d,ebx add r8d,DWORD[44+rsp] mov esi,r9d shrd r14d,r14d,11 xor r12d,edx xor esi,r10d shrd r13d,r13d,6 add r8d,r12d and r15d,esi xor r14d,r9d add r8d,r13d xor r15d,r10d add eax,r8d shrd r14d,r14d,2 add r8d,r15d mov r13d,eax add r14d,r8d shrd r13d,r13d,14 mov r8d,r14d mov r12d,ebx xor r13d,eax shrd r14d,r14d,9 xor r12d,ecx shrd r13d,r13d,5 xor r14d,r8d and r12d,eax vpand xmm8,xmm11,xmm12 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((192-128))+rdi] xor r13d,eax add edx,DWORD[48+rsp] mov r15d,r8d shrd r14d,r14d,11 xor r12d,ecx xor r15d,r9d shrd r13d,r13d,6 add edx,r12d and esi,r15d xor r14d,r8d add edx,r13d xor esi,r9d add r11d,edx shrd r14d,r14d,2 add edx,esi mov r13d,r11d add r14d,edx shrd r13d,r13d,14 mov edx,r14d mov r12d,eax xor r13d,r11d shrd r14d,r14d,9 xor r12d,ebx shrd r13d,r13d,5 xor r14d,edx and r12d,r11d vaesenclast xmm11,xmm9,xmm10 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((208-128))+rdi] xor r13d,r11d add ecx,DWORD[52+rsp] mov esi,edx shrd r14d,r14d,11 xor r12d,ebx xor esi,r8d shrd r13d,r13d,6 add ecx,r12d and r15d,esi xor r14d,edx add ecx,r13d xor r15d,r8d add r10d,ecx shrd r14d,r14d,2 add ecx,r15d mov r13d,r10d add r14d,ecx shrd r13d,r13d,14 mov ecx,r14d mov r12d,r11d xor r13d,r10d shrd r14d,r14d,9 xor r12d,eax shrd r13d,r13d,5 xor r14d,ecx and r12d,r10d vpand xmm11,xmm11,xmm13 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((224-128))+rdi] xor r13d,r10d add ebx,DWORD[56+rsp] mov r15d,ecx shrd r14d,r14d,11 xor r12d,eax xor r15d,edx shrd r13d,r13d,6 add ebx,r12d and esi,r15d xor r14d,ecx add ebx,r13d xor esi,edx add r9d,ebx shrd r14d,r14d,2 add ebx,esi mov r13d,r9d add r14d,ebx shrd r13d,r13d,14 mov ebx,r14d mov r12d,r10d xor r13d,r9d shrd r14d,r14d,9 xor r12d,r11d shrd r13d,r13d,5 xor r14d,ebx and r12d,r9d vpor xmm8,xmm8,xmm11 vaesenclast xmm11,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((0-128))+rdi] xor r13d,r9d add eax,DWORD[60+rsp] mov esi,ebx shrd r14d,r14d,11 xor r12d,r11d xor esi,ecx shrd r13d,r13d,6 add eax,r12d and r15d,esi xor r14d,ebx add eax,r13d xor r15d,ecx add r8d,eax shrd r14d,r14d,2 add eax,r15d mov r13d,r8d add r14d,eax mov r12,QWORD[((64+0))+rsp] mov r13,QWORD[((64+8))+rsp] mov r15,QWORD[((64+40))+rsp] mov rsi,QWORD[((64+48))+rsp] vpand xmm11,xmm11,xmm14 mov eax,r14d vpor xmm8,xmm8,xmm11 vmovdqu XMMWORD[r13*1+r12],xmm8 lea r12,[16+r12] add eax,DWORD[r15] add ebx,DWORD[4+r15] add ecx,DWORD[8+r15] add edx,DWORD[12+r15] add r8d,DWORD[16+r15] add r9d,DWORD[20+r15] add r10d,DWORD[24+r15] add r11d,DWORD[28+r15] cmp r12,QWORD[((64+16))+rsp] mov DWORD[r15],eax mov DWORD[4+r15],ebx mov DWORD[8+r15],ecx mov DWORD[12+r15],edx mov DWORD[16+r15],r8d mov DWORD[20+r15],r9d mov DWORD[24+r15],r10d mov DWORD[28+r15],r11d jb NEAR $L$loop_avx mov r8,QWORD[((64+32))+rsp] mov rsi,QWORD[120+rsp] vmovdqu XMMWORD[r8],xmm8 vzeroall movaps xmm6,XMMWORD[128+rsp] movaps xmm7,XMMWORD[144+rsp] movaps xmm8,XMMWORD[160+rsp] movaps xmm9,XMMWORD[176+rsp] movaps xmm10,XMMWORD[192+rsp] movaps xmm11,XMMWORD[208+rsp] movaps xmm12,XMMWORD[224+rsp] movaps xmm13,XMMWORD[240+rsp] movaps xmm14,XMMWORD[256+rsp] movaps xmm15,XMMWORD[272+rsp] mov r15,QWORD[((-48))+rsi] mov r14,QWORD[((-40))+rsi] mov r13,QWORD[((-32))+rsi] mov r12,QWORD[((-24))+rsi] mov rbp,QWORD[((-16))+rsi] mov rbx,QWORD[((-8))+rsi] lea rsp,[rsi] $L$epilogue_avx: mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$SEH_end_aesni_cbc_sha256_enc_avx: ALIGN 64 aesni_cbc_sha256_enc_avx2: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp $L$SEH_begin_aesni_cbc_sha256_enc_avx2: mov rdi,rcx mov rsi,rdx mov rdx,r8 mov rcx,r9 mov r8,QWORD[40+rsp] mov r9,QWORD[48+rsp] $L$avx2_shortcut: mov r10,QWORD[56+rsp] mov rax,rsp push rbx push rbp push r12 push r13 push r14 push r15 sub rsp,736 and rsp,-256*4 add rsp,448 shl rdx,6 sub rsi,rdi sub r10,rdi add rdx,rdi mov QWORD[((64+16))+rsp],rdx mov QWORD[((64+32))+rsp],r8 mov QWORD[((64+40))+rsp],r9 mov QWORD[((64+48))+rsp],r10 mov QWORD[120+rsp],rax movaps XMMWORD[128+rsp],xmm6 movaps XMMWORD[144+rsp],xmm7 movaps XMMWORD[160+rsp],xmm8 movaps XMMWORD[176+rsp],xmm9 movaps XMMWORD[192+rsp],xmm10 movaps XMMWORD[208+rsp],xmm11 movaps XMMWORD[224+rsp],xmm12 movaps XMMWORD[240+rsp],xmm13 movaps XMMWORD[256+rsp],xmm14 movaps XMMWORD[272+rsp],xmm15 $L$prologue_avx2: vzeroall mov r13,rdi vpinsrq xmm15,xmm15,rsi,1 lea rdi,[128+rcx] lea r12,[((K256+544))] mov r14d,DWORD[((240-128))+rdi] mov r15,r9 mov rsi,r10 vmovdqu xmm8,XMMWORD[r8] lea r14,[((-9))+r14] vmovdqa xmm14,XMMWORD[r14*8+r12] vmovdqa xmm13,XMMWORD[16+r14*8+r12] vmovdqa xmm12,XMMWORD[32+r14*8+r12] sub r13,-16*4 mov eax,DWORD[r15] lea r12,[r13*1+rsi] mov ebx,DWORD[4+r15] cmp r13,rdx mov ecx,DWORD[8+r15] cmove r12,rsp mov edx,DWORD[12+r15] mov r8d,DWORD[16+r15] mov r9d,DWORD[20+r15] mov r10d,DWORD[24+r15] mov r11d,DWORD[28+r15] vmovdqu xmm10,XMMWORD[((0-128))+rdi] jmp NEAR $L$oop_avx2 ALIGN 16 $L$oop_avx2: vmovdqa ymm7,YMMWORD[((K256+512))] vmovdqu xmm0,XMMWORD[((-64+0))+r13*1+rsi] vmovdqu xmm1,XMMWORD[((-64+16))+r13*1+rsi] vmovdqu xmm2,XMMWORD[((-64+32))+r13*1+rsi] vmovdqu xmm3,XMMWORD[((-64+48))+r13*1+rsi] vinserti128 ymm0,ymm0,XMMWORD[r12],1 vinserti128 ymm1,ymm1,XMMWORD[16+r12],1 vpshufb ymm0,ymm0,ymm7 vinserti128 ymm2,ymm2,XMMWORD[32+r12],1 vpshufb ymm1,ymm1,ymm7 vinserti128 ymm3,ymm3,XMMWORD[48+r12],1 lea rbp,[K256] vpshufb ymm2,ymm2,ymm7 lea r13,[((-64))+r13] vpaddd ymm4,ymm0,YMMWORD[rbp] vpshufb ymm3,ymm3,ymm7 vpaddd ymm5,ymm1,YMMWORD[32+rbp] vpaddd ymm6,ymm2,YMMWORD[64+rbp] vpaddd ymm7,ymm3,YMMWORD[96+rbp] vmovdqa YMMWORD[rsp],ymm4 xor r14d,r14d vmovdqa YMMWORD[32+rsp],ymm5 lea rsp,[((-64))+rsp] mov esi,ebx vmovdqa YMMWORD[rsp],ymm6 xor esi,ecx vmovdqa YMMWORD[32+rsp],ymm7 mov r12d,r9d sub rbp,-16*2*4 jmp NEAR $L$avx2_00_47 ALIGN 16 $L$avx2_00_47: vmovdqu xmm9,XMMWORD[r13] vpinsrq xmm15,xmm15,r13,0 lea rsp,[((-64))+rsp] vpalignr ymm4,ymm1,ymm0,4 add r11d,DWORD[((0+128))+rsp] and r12d,r8d rorx r13d,r8d,25 vpalignr ymm7,ymm3,ymm2,4 rorx r15d,r8d,11 lea eax,[r14*1+rax] lea r11d,[r12*1+r11] vpsrld ymm6,ymm4,7 andn r12d,r8d,r10d xor r13d,r15d rorx r14d,r8d,6 vpaddd ymm0,ymm0,ymm7 lea r11d,[r12*1+r11] xor r13d,r14d mov r15d,eax vpsrld ymm7,ymm4,3 rorx r12d,eax,22 lea r11d,[r13*1+r11] xor r15d,ebx vpslld ymm5,ymm4,14 rorx r14d,eax,13 rorx r13d,eax,2 lea edx,[r11*1+rdx] vpxor ymm4,ymm7,ymm6 and esi,r15d vpxor xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((16-128))+rdi] xor r14d,r12d xor esi,ebx vpshufd ymm7,ymm3,250 xor r14d,r13d lea r11d,[rsi*1+r11] mov r12d,r8d vpsrld ymm6,ymm6,11 add r10d,DWORD[((4+128))+rsp] and r12d,edx rorx r13d,edx,25 vpxor ymm4,ymm4,ymm5 rorx esi,edx,11 lea r11d,[r14*1+r11] lea r10d,[r12*1+r10] vpslld ymm5,ymm5,11 andn r12d,edx,r9d xor r13d,esi rorx r14d,edx,6 vpxor ymm4,ymm4,ymm6 lea r10d,[r12*1+r10] xor r13d,r14d mov esi,r11d vpsrld ymm6,ymm7,10 rorx r12d,r11d,22 lea r10d,[r13*1+r10] xor esi,eax vpxor ymm4,ymm4,ymm5 rorx r14d,r11d,13 rorx r13d,r11d,2 lea ecx,[r10*1+rcx] vpsrlq ymm7,ymm7,17 and r15d,esi vpxor xmm9,xmm9,xmm8 xor r14d,r12d xor r15d,eax vpaddd ymm0,ymm0,ymm4 xor r14d,r13d lea r10d,[r15*1+r10] mov r12d,edx vpxor ymm6,ymm6,ymm7 add r9d,DWORD[((8+128))+rsp] and r12d,ecx rorx r13d,ecx,25 vpsrlq ymm7,ymm7,2 rorx r15d,ecx,11 lea r10d,[r14*1+r10] lea r9d,[r12*1+r9] vpxor ymm6,ymm6,ymm7 andn r12d,ecx,r8d xor r13d,r15d rorx r14d,ecx,6 vpshufd ymm6,ymm6,132 lea r9d,[r12*1+r9] xor r13d,r14d mov r15d,r10d vpsrldq ymm6,ymm6,8 rorx r12d,r10d,22 lea r9d,[r13*1+r9] xor r15d,r11d vpaddd ymm0,ymm0,ymm6 rorx r14d,r10d,13 rorx r13d,r10d,2 lea ebx,[r9*1+rbx] vpshufd ymm7,ymm0,80 and esi,r15d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((32-128))+rdi] xor r14d,r12d xor esi,r11d vpsrld ymm6,ymm7,10 xor r14d,r13d lea r9d,[rsi*1+r9] mov r12d,ecx vpsrlq ymm7,ymm7,17 add r8d,DWORD[((12+128))+rsp] and r12d,ebx rorx r13d,ebx,25 vpxor ymm6,ymm6,ymm7 rorx esi,ebx,11 lea r9d,[r14*1+r9] lea r8d,[r12*1+r8] vpsrlq ymm7,ymm7,2 andn r12d,ebx,edx xor r13d,esi rorx r14d,ebx,6 vpxor ymm6,ymm6,ymm7 lea r8d,[r12*1+r8] xor r13d,r14d mov esi,r9d vpshufd ymm6,ymm6,232 rorx r12d,r9d,22 lea r8d,[r13*1+r8] xor esi,r10d vpslldq ymm6,ymm6,8 rorx r14d,r9d,13 rorx r13d,r9d,2 lea eax,[r8*1+rax] vpaddd ymm0,ymm0,ymm6 and r15d,esi vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((48-128))+rdi] xor r14d,r12d xor r15d,r10d vpaddd ymm6,ymm0,YMMWORD[rbp] xor r14d,r13d lea r8d,[r15*1+r8] mov r12d,ebx vmovdqa YMMWORD[rsp],ymm6 vpalignr ymm4,ymm2,ymm1,4 add edx,DWORD[((32+128))+rsp] and r12d,eax rorx r13d,eax,25 vpalignr ymm7,ymm0,ymm3,4 rorx r15d,eax,11 lea r8d,[r14*1+r8] lea edx,[r12*1+rdx] vpsrld ymm6,ymm4,7 andn r12d,eax,ecx xor r13d,r15d rorx r14d,eax,6 vpaddd ymm1,ymm1,ymm7 lea edx,[r12*1+rdx] xor r13d,r14d mov r15d,r8d vpsrld ymm7,ymm4,3 rorx r12d,r8d,22 lea edx,[r13*1+rdx] xor r15d,r9d vpslld ymm5,ymm4,14 rorx r14d,r8d,13 rorx r13d,r8d,2 lea r11d,[rdx*1+r11] vpxor ymm4,ymm7,ymm6 and esi,r15d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((64-128))+rdi] xor r14d,r12d xor esi,r9d vpshufd ymm7,ymm0,250 xor r14d,r13d lea edx,[rsi*1+rdx] mov r12d,eax vpsrld ymm6,ymm6,11 add ecx,DWORD[((36+128))+rsp] and r12d,r11d rorx r13d,r11d,25 vpxor ymm4,ymm4,ymm5 rorx esi,r11d,11 lea edx,[r14*1+rdx] lea ecx,[r12*1+rcx] vpslld ymm5,ymm5,11 andn r12d,r11d,ebx xor r13d,esi rorx r14d,r11d,6 vpxor ymm4,ymm4,ymm6 lea ecx,[r12*1+rcx] xor r13d,r14d mov esi,edx vpsrld ymm6,ymm7,10 rorx r12d,edx,22 lea ecx,[r13*1+rcx] xor esi,r8d vpxor ymm4,ymm4,ymm5 rorx r14d,edx,13 rorx r13d,edx,2 lea r10d,[rcx*1+r10] vpsrlq ymm7,ymm7,17 and r15d,esi vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((80-128))+rdi] xor r14d,r12d xor r15d,r8d vpaddd ymm1,ymm1,ymm4 xor r14d,r13d lea ecx,[r15*1+rcx] mov r12d,r11d vpxor ymm6,ymm6,ymm7 add ebx,DWORD[((40+128))+rsp] and r12d,r10d rorx r13d,r10d,25 vpsrlq ymm7,ymm7,2 rorx r15d,r10d,11 lea ecx,[r14*1+rcx] lea ebx,[r12*1+rbx] vpxor ymm6,ymm6,ymm7 andn r12d,r10d,eax xor r13d,r15d rorx r14d,r10d,6 vpshufd ymm6,ymm6,132 lea ebx,[r12*1+rbx] xor r13d,r14d mov r15d,ecx vpsrldq ymm6,ymm6,8 rorx r12d,ecx,22 lea ebx,[r13*1+rbx] xor r15d,edx vpaddd ymm1,ymm1,ymm6 rorx r14d,ecx,13 rorx r13d,ecx,2 lea r9d,[rbx*1+r9] vpshufd ymm7,ymm1,80 and esi,r15d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((96-128))+rdi] xor r14d,r12d xor esi,edx vpsrld ymm6,ymm7,10 xor r14d,r13d lea ebx,[rsi*1+rbx] mov r12d,r10d vpsrlq ymm7,ymm7,17 add eax,DWORD[((44+128))+rsp] and r12d,r9d rorx r13d,r9d,25 vpxor ymm6,ymm6,ymm7 rorx esi,r9d,11 lea ebx,[r14*1+rbx] lea eax,[r12*1+rax] vpsrlq ymm7,ymm7,2 andn r12d,r9d,r11d xor r13d,esi rorx r14d,r9d,6 vpxor ymm6,ymm6,ymm7 lea eax,[r12*1+rax] xor r13d,r14d mov esi,ebx vpshufd ymm6,ymm6,232 rorx r12d,ebx,22 lea eax,[r13*1+rax] xor esi,ecx vpslldq ymm6,ymm6,8 rorx r14d,ebx,13 rorx r13d,ebx,2 lea r8d,[rax*1+r8] vpaddd ymm1,ymm1,ymm6 and r15d,esi vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((112-128))+rdi] xor r14d,r12d xor r15d,ecx vpaddd ymm6,ymm1,YMMWORD[32+rbp] xor r14d,r13d lea eax,[r15*1+rax] mov r12d,r9d vmovdqa YMMWORD[32+rsp],ymm6 lea rsp,[((-64))+rsp] vpalignr ymm4,ymm3,ymm2,4 add r11d,DWORD[((0+128))+rsp] and r12d,r8d rorx r13d,r8d,25 vpalignr ymm7,ymm1,ymm0,4 rorx r15d,r8d,11 lea eax,[r14*1+rax] lea r11d,[r12*1+r11] vpsrld ymm6,ymm4,7 andn r12d,r8d,r10d xor r13d,r15d rorx r14d,r8d,6 vpaddd ymm2,ymm2,ymm7 lea r11d,[r12*1+r11] xor r13d,r14d mov r15d,eax vpsrld ymm7,ymm4,3 rorx r12d,eax,22 lea r11d,[r13*1+r11] xor r15d,ebx vpslld ymm5,ymm4,14 rorx r14d,eax,13 rorx r13d,eax,2 lea edx,[r11*1+rdx] vpxor ymm4,ymm7,ymm6 and esi,r15d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((128-128))+rdi] xor r14d,r12d xor esi,ebx vpshufd ymm7,ymm1,250 xor r14d,r13d lea r11d,[rsi*1+r11] mov r12d,r8d vpsrld ymm6,ymm6,11 add r10d,DWORD[((4+128))+rsp] and r12d,edx rorx r13d,edx,25 vpxor ymm4,ymm4,ymm5 rorx esi,edx,11 lea r11d,[r14*1+r11] lea r10d,[r12*1+r10] vpslld ymm5,ymm5,11 andn r12d,edx,r9d xor r13d,esi rorx r14d,edx,6 vpxor ymm4,ymm4,ymm6 lea r10d,[r12*1+r10] xor r13d,r14d mov esi,r11d vpsrld ymm6,ymm7,10 rorx r12d,r11d,22 lea r10d,[r13*1+r10] xor esi,eax vpxor ymm4,ymm4,ymm5 rorx r14d,r11d,13 rorx r13d,r11d,2 lea ecx,[r10*1+rcx] vpsrlq ymm7,ymm7,17 and r15d,esi vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((144-128))+rdi] xor r14d,r12d xor r15d,eax vpaddd ymm2,ymm2,ymm4 xor r14d,r13d lea r10d,[r15*1+r10] mov r12d,edx vpxor ymm6,ymm6,ymm7 add r9d,DWORD[((8+128))+rsp] and r12d,ecx rorx r13d,ecx,25 vpsrlq ymm7,ymm7,2 rorx r15d,ecx,11 lea r10d,[r14*1+r10] lea r9d,[r12*1+r9] vpxor ymm6,ymm6,ymm7 andn r12d,ecx,r8d xor r13d,r15d rorx r14d,ecx,6 vpshufd ymm6,ymm6,132 lea r9d,[r12*1+r9] xor r13d,r14d mov r15d,r10d vpsrldq ymm6,ymm6,8 rorx r12d,r10d,22 lea r9d,[r13*1+r9] xor r15d,r11d vpaddd ymm2,ymm2,ymm6 rorx r14d,r10d,13 rorx r13d,r10d,2 lea ebx,[r9*1+rbx] vpshufd ymm7,ymm2,80 and esi,r15d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((160-128))+rdi] xor r14d,r12d xor esi,r11d vpsrld ymm6,ymm7,10 xor r14d,r13d lea r9d,[rsi*1+r9] mov r12d,ecx vpsrlq ymm7,ymm7,17 add r8d,DWORD[((12+128))+rsp] and r12d,ebx rorx r13d,ebx,25 vpxor ymm6,ymm6,ymm7 rorx esi,ebx,11 lea r9d,[r14*1+r9] lea r8d,[r12*1+r8] vpsrlq ymm7,ymm7,2 andn r12d,ebx,edx xor r13d,esi rorx r14d,ebx,6 vpxor ymm6,ymm6,ymm7 lea r8d,[r12*1+r8] xor r13d,r14d mov esi,r9d vpshufd ymm6,ymm6,232 rorx r12d,r9d,22 lea r8d,[r13*1+r8] xor esi,r10d vpslldq ymm6,ymm6,8 rorx r14d,r9d,13 rorx r13d,r9d,2 lea eax,[r8*1+rax] vpaddd ymm2,ymm2,ymm6 and r15d,esi vaesenclast xmm11,xmm9,xmm10 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((176-128))+rdi] xor r14d,r12d xor r15d,r10d vpaddd ymm6,ymm2,YMMWORD[64+rbp] xor r14d,r13d lea r8d,[r15*1+r8] mov r12d,ebx vmovdqa YMMWORD[rsp],ymm6 vpalignr ymm4,ymm0,ymm3,4 add edx,DWORD[((32+128))+rsp] and r12d,eax rorx r13d,eax,25 vpalignr ymm7,ymm2,ymm1,4 rorx r15d,eax,11 lea r8d,[r14*1+r8] lea edx,[r12*1+rdx] vpsrld ymm6,ymm4,7 andn r12d,eax,ecx xor r13d,r15d rorx r14d,eax,6 vpaddd ymm3,ymm3,ymm7 lea edx,[r12*1+rdx] xor r13d,r14d mov r15d,r8d vpsrld ymm7,ymm4,3 rorx r12d,r8d,22 lea edx,[r13*1+rdx] xor r15d,r9d vpslld ymm5,ymm4,14 rorx r14d,r8d,13 rorx r13d,r8d,2 lea r11d,[rdx*1+r11] vpxor ymm4,ymm7,ymm6 and esi,r15d vpand xmm8,xmm11,xmm12 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((192-128))+rdi] xor r14d,r12d xor esi,r9d vpshufd ymm7,ymm2,250 xor r14d,r13d lea edx,[rsi*1+rdx] mov r12d,eax vpsrld ymm6,ymm6,11 add ecx,DWORD[((36+128))+rsp] and r12d,r11d rorx r13d,r11d,25 vpxor ymm4,ymm4,ymm5 rorx esi,r11d,11 lea edx,[r14*1+rdx] lea ecx,[r12*1+rcx] vpslld ymm5,ymm5,11 andn r12d,r11d,ebx xor r13d,esi rorx r14d,r11d,6 vpxor ymm4,ymm4,ymm6 lea ecx,[r12*1+rcx] xor r13d,r14d mov esi,edx vpsrld ymm6,ymm7,10 rorx r12d,edx,22 lea ecx,[r13*1+rcx] xor esi,r8d vpxor ymm4,ymm4,ymm5 rorx r14d,edx,13 rorx r13d,edx,2 lea r10d,[rcx*1+r10] vpsrlq ymm7,ymm7,17 and r15d,esi vaesenclast xmm11,xmm9,xmm10 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((208-128))+rdi] xor r14d,r12d xor r15d,r8d vpaddd ymm3,ymm3,ymm4 xor r14d,r13d lea ecx,[r15*1+rcx] mov r12d,r11d vpxor ymm6,ymm6,ymm7 add ebx,DWORD[((40+128))+rsp] and r12d,r10d rorx r13d,r10d,25 vpsrlq ymm7,ymm7,2 rorx r15d,r10d,11 lea ecx,[r14*1+rcx] lea ebx,[r12*1+rbx] vpxor ymm6,ymm6,ymm7 andn r12d,r10d,eax xor r13d,r15d rorx r14d,r10d,6 vpshufd ymm6,ymm6,132 lea ebx,[r12*1+rbx] xor r13d,r14d mov r15d,ecx vpsrldq ymm6,ymm6,8 rorx r12d,ecx,22 lea ebx,[r13*1+rbx] xor r15d,edx vpaddd ymm3,ymm3,ymm6 rorx r14d,ecx,13 rorx r13d,ecx,2 lea r9d,[rbx*1+r9] vpshufd ymm7,ymm3,80 and esi,r15d vpand xmm11,xmm11,xmm13 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((224-128))+rdi] xor r14d,r12d xor esi,edx vpsrld ymm6,ymm7,10 xor r14d,r13d lea ebx,[rsi*1+rbx] mov r12d,r10d vpsrlq ymm7,ymm7,17 add eax,DWORD[((44+128))+rsp] and r12d,r9d rorx r13d,r9d,25 vpxor ymm6,ymm6,ymm7 rorx esi,r9d,11 lea ebx,[r14*1+rbx] lea eax,[r12*1+rax] vpsrlq ymm7,ymm7,2 andn r12d,r9d,r11d xor r13d,esi rorx r14d,r9d,6 vpxor ymm6,ymm6,ymm7 lea eax,[r12*1+rax] xor r13d,r14d mov esi,ebx vpshufd ymm6,ymm6,232 rorx r12d,ebx,22 lea eax,[r13*1+rax] xor esi,ecx vpslldq ymm6,ymm6,8 rorx r14d,ebx,13 rorx r13d,ebx,2 lea r8d,[rax*1+r8] vpaddd ymm3,ymm3,ymm6 and r15d,esi vpor xmm8,xmm8,xmm11 vaesenclast xmm11,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((0-128))+rdi] xor r14d,r12d xor r15d,ecx vpaddd ymm6,ymm3,YMMWORD[96+rbp] xor r14d,r13d lea eax,[r15*1+rax] mov r12d,r9d vmovdqa YMMWORD[32+rsp],ymm6 vmovq r13,xmm15 vpextrq r15,xmm15,1 vpand xmm11,xmm11,xmm14 vpor xmm8,xmm8,xmm11 vmovdqu XMMWORD[r13*1+r15],xmm8 lea r13,[16+r13] lea rbp,[128+rbp] cmp BYTE[3+rbp],0 jne NEAR $L$avx2_00_47 vmovdqu xmm9,XMMWORD[r13] vpinsrq xmm15,xmm15,r13,0 add r11d,DWORD[((0+64))+rsp] and r12d,r8d rorx r13d,r8d,25 rorx r15d,r8d,11 lea eax,[r14*1+rax] lea r11d,[r12*1+r11] andn r12d,r8d,r10d xor r13d,r15d rorx r14d,r8d,6 lea r11d,[r12*1+r11] xor r13d,r14d mov r15d,eax rorx r12d,eax,22 lea r11d,[r13*1+r11] xor r15d,ebx rorx r14d,eax,13 rorx r13d,eax,2 lea edx,[r11*1+rdx] and esi,r15d vpxor xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((16-128))+rdi] xor r14d,r12d xor esi,ebx xor r14d,r13d lea r11d,[rsi*1+r11] mov r12d,r8d add r10d,DWORD[((4+64))+rsp] and r12d,edx rorx r13d,edx,25 rorx esi,edx,11 lea r11d,[r14*1+r11] lea r10d,[r12*1+r10] andn r12d,edx,r9d xor r13d,esi rorx r14d,edx,6 lea r10d,[r12*1+r10] xor r13d,r14d mov esi,r11d rorx r12d,r11d,22 lea r10d,[r13*1+r10] xor esi,eax rorx r14d,r11d,13 rorx r13d,r11d,2 lea ecx,[r10*1+rcx] and r15d,esi vpxor xmm9,xmm9,xmm8 xor r14d,r12d xor r15d,eax xor r14d,r13d lea r10d,[r15*1+r10] mov r12d,edx add r9d,DWORD[((8+64))+rsp] and r12d,ecx rorx r13d,ecx,25 rorx r15d,ecx,11 lea r10d,[r14*1+r10] lea r9d,[r12*1+r9] andn r12d,ecx,r8d xor r13d,r15d rorx r14d,ecx,6 lea r9d,[r12*1+r9] xor r13d,r14d mov r15d,r10d rorx r12d,r10d,22 lea r9d,[r13*1+r9] xor r15d,r11d rorx r14d,r10d,13 rorx r13d,r10d,2 lea ebx,[r9*1+rbx] and esi,r15d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((32-128))+rdi] xor r14d,r12d xor esi,r11d xor r14d,r13d lea r9d,[rsi*1+r9] mov r12d,ecx add r8d,DWORD[((12+64))+rsp] and r12d,ebx rorx r13d,ebx,25 rorx esi,ebx,11 lea r9d,[r14*1+r9] lea r8d,[r12*1+r8] andn r12d,ebx,edx xor r13d,esi rorx r14d,ebx,6 lea r8d,[r12*1+r8] xor r13d,r14d mov esi,r9d rorx r12d,r9d,22 lea r8d,[r13*1+r8] xor esi,r10d rorx r14d,r9d,13 rorx r13d,r9d,2 lea eax,[r8*1+rax] and r15d,esi vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((48-128))+rdi] xor r14d,r12d xor r15d,r10d xor r14d,r13d lea r8d,[r15*1+r8] mov r12d,ebx add edx,DWORD[((32+64))+rsp] and r12d,eax rorx r13d,eax,25 rorx r15d,eax,11 lea r8d,[r14*1+r8] lea edx,[r12*1+rdx] andn r12d,eax,ecx xor r13d,r15d rorx r14d,eax,6 lea edx,[r12*1+rdx] xor r13d,r14d mov r15d,r8d rorx r12d,r8d,22 lea edx,[r13*1+rdx] xor r15d,r9d rorx r14d,r8d,13 rorx r13d,r8d,2 lea r11d,[rdx*1+r11] and esi,r15d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((64-128))+rdi] xor r14d,r12d xor esi,r9d xor r14d,r13d lea edx,[rsi*1+rdx] mov r12d,eax add ecx,DWORD[((36+64))+rsp] and r12d,r11d rorx r13d,r11d,25 rorx esi,r11d,11 lea edx,[r14*1+rdx] lea ecx,[r12*1+rcx] andn r12d,r11d,ebx xor r13d,esi rorx r14d,r11d,6 lea ecx,[r12*1+rcx] xor r13d,r14d mov esi,edx rorx r12d,edx,22 lea ecx,[r13*1+rcx] xor esi,r8d rorx r14d,edx,13 rorx r13d,edx,2 lea r10d,[rcx*1+r10] and r15d,esi vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((80-128))+rdi] xor r14d,r12d xor r15d,r8d xor r14d,r13d lea ecx,[r15*1+rcx] mov r12d,r11d add ebx,DWORD[((40+64))+rsp] and r12d,r10d rorx r13d,r10d,25 rorx r15d,r10d,11 lea ecx,[r14*1+rcx] lea ebx,[r12*1+rbx] andn r12d,r10d,eax xor r13d,r15d rorx r14d,r10d,6 lea ebx,[r12*1+rbx] xor r13d,r14d mov r15d,ecx rorx r12d,ecx,22 lea ebx,[r13*1+rbx] xor r15d,edx rorx r14d,ecx,13 rorx r13d,ecx,2 lea r9d,[rbx*1+r9] and esi,r15d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((96-128))+rdi] xor r14d,r12d xor esi,edx xor r14d,r13d lea ebx,[rsi*1+rbx] mov r12d,r10d add eax,DWORD[((44+64))+rsp] and r12d,r9d rorx r13d,r9d,25 rorx esi,r9d,11 lea ebx,[r14*1+rbx] lea eax,[r12*1+rax] andn r12d,r9d,r11d xor r13d,esi rorx r14d,r9d,6 lea eax,[r12*1+rax] xor r13d,r14d mov esi,ebx rorx r12d,ebx,22 lea eax,[r13*1+rax] xor esi,ecx rorx r14d,ebx,13 rorx r13d,ebx,2 lea r8d,[rax*1+r8] and r15d,esi vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((112-128))+rdi] xor r14d,r12d xor r15d,ecx xor r14d,r13d lea eax,[r15*1+rax] mov r12d,r9d add r11d,DWORD[rsp] and r12d,r8d rorx r13d,r8d,25 rorx r15d,r8d,11 lea eax,[r14*1+rax] lea r11d,[r12*1+r11] andn r12d,r8d,r10d xor r13d,r15d rorx r14d,r8d,6 lea r11d,[r12*1+r11] xor r13d,r14d mov r15d,eax rorx r12d,eax,22 lea r11d,[r13*1+r11] xor r15d,ebx rorx r14d,eax,13 rorx r13d,eax,2 lea edx,[r11*1+rdx] and esi,r15d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((128-128))+rdi] xor r14d,r12d xor esi,ebx xor r14d,r13d lea r11d,[rsi*1+r11] mov r12d,r8d add r10d,DWORD[4+rsp] and r12d,edx rorx r13d,edx,25 rorx esi,edx,11 lea r11d,[r14*1+r11] lea r10d,[r12*1+r10] andn r12d,edx,r9d xor r13d,esi rorx r14d,edx,6 lea r10d,[r12*1+r10] xor r13d,r14d mov esi,r11d rorx r12d,r11d,22 lea r10d,[r13*1+r10] xor esi,eax rorx r14d,r11d,13 rorx r13d,r11d,2 lea ecx,[r10*1+rcx] and r15d,esi vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((144-128))+rdi] xor r14d,r12d xor r15d,eax xor r14d,r13d lea r10d,[r15*1+r10] mov r12d,edx add r9d,DWORD[8+rsp] and r12d,ecx rorx r13d,ecx,25 rorx r15d,ecx,11 lea r10d,[r14*1+r10] lea r9d,[r12*1+r9] andn r12d,ecx,r8d xor r13d,r15d rorx r14d,ecx,6 lea r9d,[r12*1+r9] xor r13d,r14d mov r15d,r10d rorx r12d,r10d,22 lea r9d,[r13*1+r9] xor r15d,r11d rorx r14d,r10d,13 rorx r13d,r10d,2 lea ebx,[r9*1+rbx] and esi,r15d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((160-128))+rdi] xor r14d,r12d xor esi,r11d xor r14d,r13d lea r9d,[rsi*1+r9] mov r12d,ecx add r8d,DWORD[12+rsp] and r12d,ebx rorx r13d,ebx,25 rorx esi,ebx,11 lea r9d,[r14*1+r9] lea r8d,[r12*1+r8] andn r12d,ebx,edx xor r13d,esi rorx r14d,ebx,6 lea r8d,[r12*1+r8] xor r13d,r14d mov esi,r9d rorx r12d,r9d,22 lea r8d,[r13*1+r8] xor esi,r10d rorx r14d,r9d,13 rorx r13d,r9d,2 lea eax,[r8*1+rax] and r15d,esi vaesenclast xmm11,xmm9,xmm10 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((176-128))+rdi] xor r14d,r12d xor r15d,r10d xor r14d,r13d lea r8d,[r15*1+r8] mov r12d,ebx add edx,DWORD[32+rsp] and r12d,eax rorx r13d,eax,25 rorx r15d,eax,11 lea r8d,[r14*1+r8] lea edx,[r12*1+rdx] andn r12d,eax,ecx xor r13d,r15d rorx r14d,eax,6 lea edx,[r12*1+rdx] xor r13d,r14d mov r15d,r8d rorx r12d,r8d,22 lea edx,[r13*1+rdx] xor r15d,r9d rorx r14d,r8d,13 rorx r13d,r8d,2 lea r11d,[rdx*1+r11] and esi,r15d vpand xmm8,xmm11,xmm12 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((192-128))+rdi] xor r14d,r12d xor esi,r9d xor r14d,r13d lea edx,[rsi*1+rdx] mov r12d,eax add ecx,DWORD[36+rsp] and r12d,r11d rorx r13d,r11d,25 rorx esi,r11d,11 lea edx,[r14*1+rdx] lea ecx,[r12*1+rcx] andn r12d,r11d,ebx xor r13d,esi rorx r14d,r11d,6 lea ecx,[r12*1+rcx] xor r13d,r14d mov esi,edx rorx r12d,edx,22 lea ecx,[r13*1+rcx] xor esi,r8d rorx r14d,edx,13 rorx r13d,edx,2 lea r10d,[rcx*1+r10] and r15d,esi vaesenclast xmm11,xmm9,xmm10 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((208-128))+rdi] xor r14d,r12d xor r15d,r8d xor r14d,r13d lea ecx,[r15*1+rcx] mov r12d,r11d add ebx,DWORD[40+rsp] and r12d,r10d rorx r13d,r10d,25 rorx r15d,r10d,11 lea ecx,[r14*1+rcx] lea ebx,[r12*1+rbx] andn r12d,r10d,eax xor r13d,r15d rorx r14d,r10d,6 lea ebx,[r12*1+rbx] xor r13d,r14d mov r15d,ecx rorx r12d,ecx,22 lea ebx,[r13*1+rbx] xor r15d,edx rorx r14d,ecx,13 rorx r13d,ecx,2 lea r9d,[rbx*1+r9] and esi,r15d vpand xmm11,xmm11,xmm13 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((224-128))+rdi] xor r14d,r12d xor esi,edx xor r14d,r13d lea ebx,[rsi*1+rbx] mov r12d,r10d add eax,DWORD[44+rsp] and r12d,r9d rorx r13d,r9d,25 rorx esi,r9d,11 lea ebx,[r14*1+rbx] lea eax,[r12*1+rax] andn r12d,r9d,r11d xor r13d,esi rorx r14d,r9d,6 lea eax,[r12*1+rax] xor r13d,r14d mov esi,ebx rorx r12d,ebx,22 lea eax,[r13*1+rax] xor esi,ecx rorx r14d,ebx,13 rorx r13d,ebx,2 lea r8d,[rax*1+r8] and r15d,esi vpor xmm8,xmm8,xmm11 vaesenclast xmm11,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((0-128))+rdi] xor r14d,r12d xor r15d,ecx xor r14d,r13d lea eax,[r15*1+rax] mov r12d,r9d vpextrq r12,xmm15,1 vmovq r13,xmm15 mov r15,QWORD[552+rsp] add eax,r14d lea rbp,[448+rsp] vpand xmm11,xmm11,xmm14 vpor xmm8,xmm8,xmm11 vmovdqu XMMWORD[r13*1+r12],xmm8 lea r13,[16+r13] add eax,DWORD[r15] add ebx,DWORD[4+r15] add ecx,DWORD[8+r15] add edx,DWORD[12+r15] add r8d,DWORD[16+r15] add r9d,DWORD[20+r15] add r10d,DWORD[24+r15] add r11d,DWORD[28+r15] mov DWORD[r15],eax mov DWORD[4+r15],ebx mov DWORD[8+r15],ecx mov DWORD[12+r15],edx mov DWORD[16+r15],r8d mov DWORD[20+r15],r9d mov DWORD[24+r15],r10d mov DWORD[28+r15],r11d cmp r13,QWORD[80+rbp] je NEAR $L$done_avx2 xor r14d,r14d mov esi,ebx mov r12d,r9d xor esi,ecx jmp NEAR $L$ower_avx2 ALIGN 16 $L$ower_avx2: vmovdqu xmm9,XMMWORD[r13] vpinsrq xmm15,xmm15,r13,0 add r11d,DWORD[((0+16))+rbp] and r12d,r8d rorx r13d,r8d,25 rorx r15d,r8d,11 lea eax,[r14*1+rax] lea r11d,[r12*1+r11] andn r12d,r8d,r10d xor r13d,r15d rorx r14d,r8d,6 lea r11d,[r12*1+r11] xor r13d,r14d mov r15d,eax rorx r12d,eax,22 lea r11d,[r13*1+r11] xor r15d,ebx rorx r14d,eax,13 rorx r13d,eax,2 lea edx,[r11*1+rdx] and esi,r15d vpxor xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((16-128))+rdi] xor r14d,r12d xor esi,ebx xor r14d,r13d lea r11d,[rsi*1+r11] mov r12d,r8d add r10d,DWORD[((4+16))+rbp] and r12d,edx rorx r13d,edx,25 rorx esi,edx,11 lea r11d,[r14*1+r11] lea r10d,[r12*1+r10] andn r12d,edx,r9d xor r13d,esi rorx r14d,edx,6 lea r10d,[r12*1+r10] xor r13d,r14d mov esi,r11d rorx r12d,r11d,22 lea r10d,[r13*1+r10] xor esi,eax rorx r14d,r11d,13 rorx r13d,r11d,2 lea ecx,[r10*1+rcx] and r15d,esi vpxor xmm9,xmm9,xmm8 xor r14d,r12d xor r15d,eax xor r14d,r13d lea r10d,[r15*1+r10] mov r12d,edx add r9d,DWORD[((8+16))+rbp] and r12d,ecx rorx r13d,ecx,25 rorx r15d,ecx,11 lea r10d,[r14*1+r10] lea r9d,[r12*1+r9] andn r12d,ecx,r8d xor r13d,r15d rorx r14d,ecx,6 lea r9d,[r12*1+r9] xor r13d,r14d mov r15d,r10d rorx r12d,r10d,22 lea r9d,[r13*1+r9] xor r15d,r11d rorx r14d,r10d,13 rorx r13d,r10d,2 lea ebx,[r9*1+rbx] and esi,r15d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((32-128))+rdi] xor r14d,r12d xor esi,r11d xor r14d,r13d lea r9d,[rsi*1+r9] mov r12d,ecx add r8d,DWORD[((12+16))+rbp] and r12d,ebx rorx r13d,ebx,25 rorx esi,ebx,11 lea r9d,[r14*1+r9] lea r8d,[r12*1+r8] andn r12d,ebx,edx xor r13d,esi rorx r14d,ebx,6 lea r8d,[r12*1+r8] xor r13d,r14d mov esi,r9d rorx r12d,r9d,22 lea r8d,[r13*1+r8] xor esi,r10d rorx r14d,r9d,13 rorx r13d,r9d,2 lea eax,[r8*1+rax] and r15d,esi vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((48-128))+rdi] xor r14d,r12d xor r15d,r10d xor r14d,r13d lea r8d,[r15*1+r8] mov r12d,ebx add edx,DWORD[((32+16))+rbp] and r12d,eax rorx r13d,eax,25 rorx r15d,eax,11 lea r8d,[r14*1+r8] lea edx,[r12*1+rdx] andn r12d,eax,ecx xor r13d,r15d rorx r14d,eax,6 lea edx,[r12*1+rdx] xor r13d,r14d mov r15d,r8d rorx r12d,r8d,22 lea edx,[r13*1+rdx] xor r15d,r9d rorx r14d,r8d,13 rorx r13d,r8d,2 lea r11d,[rdx*1+r11] and esi,r15d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((64-128))+rdi] xor r14d,r12d xor esi,r9d xor r14d,r13d lea edx,[rsi*1+rdx] mov r12d,eax add ecx,DWORD[((36+16))+rbp] and r12d,r11d rorx r13d,r11d,25 rorx esi,r11d,11 lea edx,[r14*1+rdx] lea ecx,[r12*1+rcx] andn r12d,r11d,ebx xor r13d,esi rorx r14d,r11d,6 lea ecx,[r12*1+rcx] xor r13d,r14d mov esi,edx rorx r12d,edx,22 lea ecx,[r13*1+rcx] xor esi,r8d rorx r14d,edx,13 rorx r13d,edx,2 lea r10d,[rcx*1+r10] and r15d,esi vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((80-128))+rdi] xor r14d,r12d xor r15d,r8d xor r14d,r13d lea ecx,[r15*1+rcx] mov r12d,r11d add ebx,DWORD[((40+16))+rbp] and r12d,r10d rorx r13d,r10d,25 rorx r15d,r10d,11 lea ecx,[r14*1+rcx] lea ebx,[r12*1+rbx] andn r12d,r10d,eax xor r13d,r15d rorx r14d,r10d,6 lea ebx,[r12*1+rbx] xor r13d,r14d mov r15d,ecx rorx r12d,ecx,22 lea ebx,[r13*1+rbx] xor r15d,edx rorx r14d,ecx,13 rorx r13d,ecx,2 lea r9d,[rbx*1+r9] and esi,r15d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((96-128))+rdi] xor r14d,r12d xor esi,edx xor r14d,r13d lea ebx,[rsi*1+rbx] mov r12d,r10d add eax,DWORD[((44+16))+rbp] and r12d,r9d rorx r13d,r9d,25 rorx esi,r9d,11 lea ebx,[r14*1+rbx] lea eax,[r12*1+rax] andn r12d,r9d,r11d xor r13d,esi rorx r14d,r9d,6 lea eax,[r12*1+rax] xor r13d,r14d mov esi,ebx rorx r12d,ebx,22 lea eax,[r13*1+rax] xor esi,ecx rorx r14d,ebx,13 rorx r13d,ebx,2 lea r8d,[rax*1+r8] and r15d,esi vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((112-128))+rdi] xor r14d,r12d xor r15d,ecx xor r14d,r13d lea eax,[r15*1+rax] mov r12d,r9d lea rbp,[((-64))+rbp] add r11d,DWORD[((0+16))+rbp] and r12d,r8d rorx r13d,r8d,25 rorx r15d,r8d,11 lea eax,[r14*1+rax] lea r11d,[r12*1+r11] andn r12d,r8d,r10d xor r13d,r15d rorx r14d,r8d,6 lea r11d,[r12*1+r11] xor r13d,r14d mov r15d,eax rorx r12d,eax,22 lea r11d,[r13*1+r11] xor r15d,ebx rorx r14d,eax,13 rorx r13d,eax,2 lea edx,[r11*1+rdx] and esi,r15d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((128-128))+rdi] xor r14d,r12d xor esi,ebx xor r14d,r13d lea r11d,[rsi*1+r11] mov r12d,r8d add r10d,DWORD[((4+16))+rbp] and r12d,edx rorx r13d,edx,25 rorx esi,edx,11 lea r11d,[r14*1+r11] lea r10d,[r12*1+r10] andn r12d,edx,r9d xor r13d,esi rorx r14d,edx,6 lea r10d,[r12*1+r10] xor r13d,r14d mov esi,r11d rorx r12d,r11d,22 lea r10d,[r13*1+r10] xor esi,eax rorx r14d,r11d,13 rorx r13d,r11d,2 lea ecx,[r10*1+rcx] and r15d,esi vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((144-128))+rdi] xor r14d,r12d xor r15d,eax xor r14d,r13d lea r10d,[r15*1+r10] mov r12d,edx add r9d,DWORD[((8+16))+rbp] and r12d,ecx rorx r13d,ecx,25 rorx r15d,ecx,11 lea r10d,[r14*1+r10] lea r9d,[r12*1+r9] andn r12d,ecx,r8d xor r13d,r15d rorx r14d,ecx,6 lea r9d,[r12*1+r9] xor r13d,r14d mov r15d,r10d rorx r12d,r10d,22 lea r9d,[r13*1+r9] xor r15d,r11d rorx r14d,r10d,13 rorx r13d,r10d,2 lea ebx,[r9*1+rbx] and esi,r15d vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((160-128))+rdi] xor r14d,r12d xor esi,r11d xor r14d,r13d lea r9d,[rsi*1+r9] mov r12d,ecx add r8d,DWORD[((12+16))+rbp] and r12d,ebx rorx r13d,ebx,25 rorx esi,ebx,11 lea r9d,[r14*1+r9] lea r8d,[r12*1+r8] andn r12d,ebx,edx xor r13d,esi rorx r14d,ebx,6 lea r8d,[r12*1+r8] xor r13d,r14d mov esi,r9d rorx r12d,r9d,22 lea r8d,[r13*1+r8] xor esi,r10d rorx r14d,r9d,13 rorx r13d,r9d,2 lea eax,[r8*1+rax] and r15d,esi vaesenclast xmm11,xmm9,xmm10 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((176-128))+rdi] xor r14d,r12d xor r15d,r10d xor r14d,r13d lea r8d,[r15*1+r8] mov r12d,ebx add edx,DWORD[((32+16))+rbp] and r12d,eax rorx r13d,eax,25 rorx r15d,eax,11 lea r8d,[r14*1+r8] lea edx,[r12*1+rdx] andn r12d,eax,ecx xor r13d,r15d rorx r14d,eax,6 lea edx,[r12*1+rdx] xor r13d,r14d mov r15d,r8d rorx r12d,r8d,22 lea edx,[r13*1+rdx] xor r15d,r9d rorx r14d,r8d,13 rorx r13d,r8d,2 lea r11d,[rdx*1+r11] and esi,r15d vpand xmm8,xmm11,xmm12 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((192-128))+rdi] xor r14d,r12d xor esi,r9d xor r14d,r13d lea edx,[rsi*1+rdx] mov r12d,eax add ecx,DWORD[((36+16))+rbp] and r12d,r11d rorx r13d,r11d,25 rorx esi,r11d,11 lea edx,[r14*1+rdx] lea ecx,[r12*1+rcx] andn r12d,r11d,ebx xor r13d,esi rorx r14d,r11d,6 lea ecx,[r12*1+rcx] xor r13d,r14d mov esi,edx rorx r12d,edx,22 lea ecx,[r13*1+rcx] xor esi,r8d rorx r14d,edx,13 rorx r13d,edx,2 lea r10d,[rcx*1+r10] and r15d,esi vaesenclast xmm11,xmm9,xmm10 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((208-128))+rdi] xor r14d,r12d xor r15d,r8d xor r14d,r13d lea ecx,[r15*1+rcx] mov r12d,r11d add ebx,DWORD[((40+16))+rbp] and r12d,r10d rorx r13d,r10d,25 rorx r15d,r10d,11 lea ecx,[r14*1+rcx] lea ebx,[r12*1+rbx] andn r12d,r10d,eax xor r13d,r15d rorx r14d,r10d,6 lea ebx,[r12*1+rbx] xor r13d,r14d mov r15d,ecx rorx r12d,ecx,22 lea ebx,[r13*1+rbx] xor r15d,edx rorx r14d,ecx,13 rorx r13d,ecx,2 lea r9d,[rbx*1+r9] and esi,r15d vpand xmm11,xmm11,xmm13 vaesenc xmm9,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((224-128))+rdi] xor r14d,r12d xor esi,edx xor r14d,r13d lea ebx,[rsi*1+rbx] mov r12d,r10d add eax,DWORD[((44+16))+rbp] and r12d,r9d rorx r13d,r9d,25 rorx esi,r9d,11 lea ebx,[r14*1+rbx] lea eax,[r12*1+rax] andn r12d,r9d,r11d xor r13d,esi rorx r14d,r9d,6 lea eax,[r12*1+rax] xor r13d,r14d mov esi,ebx rorx r12d,ebx,22 lea eax,[r13*1+rax] xor esi,ecx rorx r14d,ebx,13 rorx r13d,ebx,2 lea r8d,[rax*1+r8] and r15d,esi vpor xmm8,xmm8,xmm11 vaesenclast xmm11,xmm9,xmm10 vmovdqu xmm10,XMMWORD[((0-128))+rdi] xor r14d,r12d xor r15d,ecx xor r14d,r13d lea eax,[r15*1+rax] mov r12d,r9d vmovq r13,xmm15 vpextrq r15,xmm15,1 vpand xmm11,xmm11,xmm14 vpor xmm8,xmm8,xmm11 lea rbp,[((-64))+rbp] vmovdqu XMMWORD[r13*1+r15],xmm8 lea r13,[16+r13] cmp rbp,rsp jae NEAR $L$ower_avx2 mov r15,QWORD[552+rsp] lea r13,[64+r13] mov rsi,QWORD[560+rsp] add eax,r14d lea rsp,[448+rsp] add eax,DWORD[r15] add ebx,DWORD[4+r15] add ecx,DWORD[8+r15] add edx,DWORD[12+r15] add r8d,DWORD[16+r15] add r9d,DWORD[20+r15] add r10d,DWORD[24+r15] lea r12,[r13*1+rsi] add r11d,DWORD[28+r15] cmp r13,QWORD[((64+16))+rsp] mov DWORD[r15],eax cmove r12,rsp mov DWORD[4+r15],ebx mov DWORD[8+r15],ecx mov DWORD[12+r15],edx mov DWORD[16+r15],r8d mov DWORD[20+r15],r9d mov DWORD[24+r15],r10d mov DWORD[28+r15],r11d jbe NEAR $L$oop_avx2 lea rbp,[rsp] $L$done_avx2: lea rsp,[rbp] mov r8,QWORD[((64+32))+rsp] mov rsi,QWORD[120+rsp] vmovdqu XMMWORD[r8],xmm8 vzeroall movaps xmm6,XMMWORD[128+rsp] movaps xmm7,XMMWORD[144+rsp] movaps xmm8,XMMWORD[160+rsp] movaps xmm9,XMMWORD[176+rsp] movaps xmm10,XMMWORD[192+rsp] movaps xmm11,XMMWORD[208+rsp] movaps xmm12,XMMWORD[224+rsp] movaps xmm13,XMMWORD[240+rsp] movaps xmm14,XMMWORD[256+rsp] movaps xmm15,XMMWORD[272+rsp] mov r15,QWORD[((-48))+rsi] mov r14,QWORD[((-40))+rsi] mov r13,QWORD[((-32))+rsi] mov r12,QWORD[((-24))+rsi] mov rbp,QWORD[((-16))+rsi] mov rbx,QWORD[((-8))+rsi] lea rsp,[rsi] $L$epilogue_avx2: mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$SEH_end_aesni_cbc_sha256_enc_avx2: ALIGN 32 aesni_cbc_sha256_enc_shaext: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp $L$SEH_begin_aesni_cbc_sha256_enc_shaext: mov rdi,rcx mov rsi,rdx mov rdx,r8 mov rcx,r9 mov r8,QWORD[40+rsp] mov r9,QWORD[48+rsp] mov r10,QWORD[56+rsp] lea rsp,[((-168))+rsp] movaps XMMWORD[(-8-160)+rax],xmm6 movaps XMMWORD[(-8-144)+rax],xmm7 movaps XMMWORD[(-8-128)+rax],xmm8 movaps XMMWORD[(-8-112)+rax],xmm9 movaps XMMWORD[(-8-96)+rax],xmm10 movaps XMMWORD[(-8-80)+rax],xmm11 movaps XMMWORD[(-8-64)+rax],xmm12 movaps XMMWORD[(-8-48)+rax],xmm13 movaps XMMWORD[(-8-32)+rax],xmm14 movaps XMMWORD[(-8-16)+rax],xmm15 $L$prologue_shaext: lea rax,[((K256+128))] movdqu xmm1,XMMWORD[r9] movdqu xmm2,XMMWORD[16+r9] movdqa xmm3,XMMWORD[((512-128))+rax] mov r11d,DWORD[240+rcx] sub rsi,rdi movups xmm15,XMMWORD[rcx] movups xmm6,XMMWORD[r8] movups xmm4,XMMWORD[16+rcx] lea rcx,[112+rcx] pshufd xmm0,xmm1,0x1b pshufd xmm1,xmm1,0xb1 pshufd xmm2,xmm2,0x1b movdqa xmm7,xmm3 DB 102,15,58,15,202,8 punpcklqdq xmm2,xmm0 jmp NEAR $L$oop_shaext ALIGN 16 $L$oop_shaext: movdqu xmm10,XMMWORD[r10] movdqu xmm11,XMMWORD[16+r10] movdqu xmm12,XMMWORD[32+r10] DB 102,68,15,56,0,211 movdqu xmm13,XMMWORD[48+r10] movdqa xmm0,XMMWORD[((0-128))+rax] paddd xmm0,xmm10 DB 102,68,15,56,0,219 movdqa xmm9,xmm2 movdqa xmm8,xmm1 movups xmm14,XMMWORD[rdi] xorps xmm14,xmm15 xorps xmm6,xmm14 movups xmm5,XMMWORD[((-80))+rcx] aesenc xmm6,xmm4 DB 15,56,203,209 pshufd xmm0,xmm0,0x0e movups xmm4,XMMWORD[((-64))+rcx] aesenc xmm6,xmm5 DB 15,56,203,202 movdqa xmm0,XMMWORD[((32-128))+rax] paddd xmm0,xmm11 DB 102,68,15,56,0,227 lea r10,[64+r10] movups xmm5,XMMWORD[((-48))+rcx] aesenc xmm6,xmm4 DB 15,56,203,209 pshufd xmm0,xmm0,0x0e movups xmm4,XMMWORD[((-32))+rcx] aesenc xmm6,xmm5 DB 15,56,203,202 movdqa xmm0,XMMWORD[((64-128))+rax] paddd xmm0,xmm12 DB 102,68,15,56,0,235 DB 69,15,56,204,211 movups xmm5,XMMWORD[((-16))+rcx] aesenc xmm6,xmm4 DB 15,56,203,209 pshufd xmm0,xmm0,0x0e movdqa xmm3,xmm13 DB 102,65,15,58,15,220,4 paddd xmm10,xmm3 movups xmm4,XMMWORD[rcx] aesenc xmm6,xmm5 DB 15,56,203,202 movdqa xmm0,XMMWORD[((96-128))+rax] paddd xmm0,xmm13 DB 69,15,56,205,213 DB 69,15,56,204,220 movups xmm5,XMMWORD[16+rcx] aesenc xmm6,xmm4 DB 15,56,203,209 pshufd xmm0,xmm0,0x0e movups xmm4,XMMWORD[32+rcx] aesenc xmm6,xmm5 movdqa xmm3,xmm10 DB 102,65,15,58,15,221,4 paddd xmm11,xmm3 DB 15,56,203,202 movdqa xmm0,XMMWORD[((128-128))+rax] paddd xmm0,xmm10 DB 69,15,56,205,218 DB 69,15,56,204,229 movups xmm5,XMMWORD[48+rcx] aesenc xmm6,xmm4 DB 15,56,203,209 pshufd xmm0,xmm0,0x0e movdqa xmm3,xmm11 DB 102,65,15,58,15,218,4 paddd xmm12,xmm3 cmp r11d,11 jb NEAR $L$aesenclast1 movups xmm4,XMMWORD[64+rcx] aesenc xmm6,xmm5 movups xmm5,XMMWORD[80+rcx] aesenc xmm6,xmm4 je NEAR $L$aesenclast1 movups xmm4,XMMWORD[96+rcx] aesenc xmm6,xmm5 movups xmm5,XMMWORD[112+rcx] aesenc xmm6,xmm4 $L$aesenclast1: aesenclast xmm6,xmm5 movups xmm4,XMMWORD[((16-112))+rcx] nop DB 15,56,203,202 movups xmm14,XMMWORD[16+rdi] xorps xmm14,xmm15 movups XMMWORD[rdi*1+rsi],xmm6 xorps xmm6,xmm14 movups xmm5,XMMWORD[((-80))+rcx] aesenc xmm6,xmm4 movdqa xmm0,XMMWORD[((160-128))+rax] paddd xmm0,xmm11 DB 69,15,56,205,227 DB 69,15,56,204,234 movups xmm4,XMMWORD[((-64))+rcx] aesenc xmm6,xmm5 DB 15,56,203,209 pshufd xmm0,xmm0,0x0e movdqa xmm3,xmm12 DB 102,65,15,58,15,219,4 paddd xmm13,xmm3 movups xmm5,XMMWORD[((-48))+rcx] aesenc xmm6,xmm4 DB 15,56,203,202 movdqa xmm0,XMMWORD[((192-128))+rax] paddd xmm0,xmm12 DB 69,15,56,205,236 DB 69,15,56,204,211 movups xmm4,XMMWORD[((-32))+rcx] aesenc xmm6,xmm5 DB 15,56,203,209 pshufd xmm0,xmm0,0x0e movdqa xmm3,xmm13 DB 102,65,15,58,15,220,4 paddd xmm10,xmm3 movups xmm5,XMMWORD[((-16))+rcx] aesenc xmm6,xmm4 DB 15,56,203,202 movdqa xmm0,XMMWORD[((224-128))+rax] paddd xmm0,xmm13 DB 69,15,56,205,213 DB 69,15,56,204,220 movups xmm4,XMMWORD[rcx] aesenc xmm6,xmm5 DB 15,56,203,209 pshufd xmm0,xmm0,0x0e movdqa xmm3,xmm10 DB 102,65,15,58,15,221,4 paddd xmm11,xmm3 movups xmm5,XMMWORD[16+rcx] aesenc xmm6,xmm4 DB 15,56,203,202 movdqa xmm0,XMMWORD[((256-128))+rax] paddd xmm0,xmm10 DB 69,15,56,205,218 DB 69,15,56,204,229 movups xmm4,XMMWORD[32+rcx] aesenc xmm6,xmm5 DB 15,56,203,209 pshufd xmm0,xmm0,0x0e movdqa xmm3,xmm11 DB 102,65,15,58,15,218,4 paddd xmm12,xmm3 movups xmm5,XMMWORD[48+rcx] aesenc xmm6,xmm4 cmp r11d,11 jb NEAR $L$aesenclast2 movups xmm4,XMMWORD[64+rcx] aesenc xmm6,xmm5 movups xmm5,XMMWORD[80+rcx] aesenc xmm6,xmm4 je NEAR $L$aesenclast2 movups xmm4,XMMWORD[96+rcx] aesenc xmm6,xmm5 movups xmm5,XMMWORD[112+rcx] aesenc xmm6,xmm4 $L$aesenclast2: aesenclast xmm6,xmm5 movups xmm4,XMMWORD[((16-112))+rcx] nop DB 15,56,203,202 movups xmm14,XMMWORD[32+rdi] xorps xmm14,xmm15 movups XMMWORD[16+rdi*1+rsi],xmm6 xorps xmm6,xmm14 movups xmm5,XMMWORD[((-80))+rcx] aesenc xmm6,xmm4 movdqa xmm0,XMMWORD[((288-128))+rax] paddd xmm0,xmm11 DB 69,15,56,205,227 DB 69,15,56,204,234 movups xmm4,XMMWORD[((-64))+rcx] aesenc xmm6,xmm5 DB 15,56,203,209 pshufd xmm0,xmm0,0x0e movdqa xmm3,xmm12 DB 102,65,15,58,15,219,4 paddd xmm13,xmm3 movups xmm5,XMMWORD[((-48))+rcx] aesenc xmm6,xmm4 DB 15,56,203,202 movdqa xmm0,XMMWORD[((320-128))+rax] paddd xmm0,xmm12 DB 69,15,56,205,236 DB 69,15,56,204,211 movups xmm4,XMMWORD[((-32))+rcx] aesenc xmm6,xmm5 DB 15,56,203,209 pshufd xmm0,xmm0,0x0e movdqa xmm3,xmm13 DB 102,65,15,58,15,220,4 paddd xmm10,xmm3 movups xmm5,XMMWORD[((-16))+rcx] aesenc xmm6,xmm4 DB 15,56,203,202 movdqa xmm0,XMMWORD[((352-128))+rax] paddd xmm0,xmm13 DB 69,15,56,205,213 DB 69,15,56,204,220 movups xmm4,XMMWORD[rcx] aesenc xmm6,xmm5 DB 15,56,203,209 pshufd xmm0,xmm0,0x0e movdqa xmm3,xmm10 DB 102,65,15,58,15,221,4 paddd xmm11,xmm3 movups xmm5,XMMWORD[16+rcx] aesenc xmm6,xmm4 DB 15,56,203,202 movdqa xmm0,XMMWORD[((384-128))+rax] paddd xmm0,xmm10 DB 69,15,56,205,218 DB 69,15,56,204,229 movups xmm4,XMMWORD[32+rcx] aesenc xmm6,xmm5 DB 15,56,203,209 pshufd xmm0,xmm0,0x0e movdqa xmm3,xmm11 DB 102,65,15,58,15,218,4 paddd xmm12,xmm3 movups xmm5,XMMWORD[48+rcx] aesenc xmm6,xmm4 DB 15,56,203,202 movdqa xmm0,XMMWORD[((416-128))+rax] paddd xmm0,xmm11 DB 69,15,56,205,227 DB 69,15,56,204,234 cmp r11d,11 jb NEAR $L$aesenclast3 movups xmm4,XMMWORD[64+rcx] aesenc xmm6,xmm5 movups xmm5,XMMWORD[80+rcx] aesenc xmm6,xmm4 je NEAR $L$aesenclast3 movups xmm4,XMMWORD[96+rcx] aesenc xmm6,xmm5 movups xmm5,XMMWORD[112+rcx] aesenc xmm6,xmm4 $L$aesenclast3: aesenclast xmm6,xmm5 movups xmm4,XMMWORD[((16-112))+rcx] nop DB 15,56,203,209 pshufd xmm0,xmm0,0x0e movdqa xmm3,xmm12 DB 102,65,15,58,15,219,4 paddd xmm13,xmm3 movups xmm14,XMMWORD[48+rdi] xorps xmm14,xmm15 movups XMMWORD[32+rdi*1+rsi],xmm6 xorps xmm6,xmm14 movups xmm5,XMMWORD[((-80))+rcx] aesenc xmm6,xmm4 movups xmm4,XMMWORD[((-64))+rcx] aesenc xmm6,xmm5 DB 15,56,203,202 movdqa xmm0,XMMWORD[((448-128))+rax] paddd xmm0,xmm12 DB 69,15,56,205,236 movdqa xmm3,xmm7 movups xmm5,XMMWORD[((-48))+rcx] aesenc xmm6,xmm4 DB 15,56,203,209 pshufd xmm0,xmm0,0x0e movups xmm4,XMMWORD[((-32))+rcx] aesenc xmm6,xmm5 DB 15,56,203,202 movdqa xmm0,XMMWORD[((480-128))+rax] paddd xmm0,xmm13 movups xmm5,XMMWORD[((-16))+rcx] aesenc xmm6,xmm4 movups xmm4,XMMWORD[rcx] aesenc xmm6,xmm5 DB 15,56,203,209 pshufd xmm0,xmm0,0x0e movups xmm5,XMMWORD[16+rcx] aesenc xmm6,xmm4 DB 15,56,203,202 movups xmm4,XMMWORD[32+rcx] aesenc xmm6,xmm5 movups xmm5,XMMWORD[48+rcx] aesenc xmm6,xmm4 cmp r11d,11 jb NEAR $L$aesenclast4 movups xmm4,XMMWORD[64+rcx] aesenc xmm6,xmm5 movups xmm5,XMMWORD[80+rcx] aesenc xmm6,xmm4 je NEAR $L$aesenclast4 movups xmm4,XMMWORD[96+rcx] aesenc xmm6,xmm5 movups xmm5,XMMWORD[112+rcx] aesenc xmm6,xmm4 $L$aesenclast4: aesenclast xmm6,xmm5 movups xmm4,XMMWORD[((16-112))+rcx] nop paddd xmm2,xmm9 paddd xmm1,xmm8 dec rdx movups XMMWORD[48+rdi*1+rsi],xmm6 lea rdi,[64+rdi] jnz NEAR $L$oop_shaext pshufd xmm2,xmm2,0xb1 pshufd xmm3,xmm1,0x1b pshufd xmm1,xmm1,0xb1 punpckhqdq xmm1,xmm2 DB 102,15,58,15,211,8 movups XMMWORD[r8],xmm6 movdqu XMMWORD[r9],xmm1 movdqu XMMWORD[16+r9],xmm2 movaps xmm6,XMMWORD[rsp] movaps xmm7,XMMWORD[16+rsp] movaps xmm8,XMMWORD[32+rsp] movaps xmm9,XMMWORD[48+rsp] movaps xmm10,XMMWORD[64+rsp] movaps xmm11,XMMWORD[80+rsp] movaps xmm12,XMMWORD[96+rsp] movaps xmm13,XMMWORD[112+rsp] movaps xmm14,XMMWORD[128+rsp] movaps xmm15,XMMWORD[144+rsp] lea rsp,[((8+160))+rsp] $L$epilogue_shaext: mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$SEH_end_aesni_cbc_sha256_enc_shaext: EXTERN __imp_RtlVirtualUnwind ALIGN 16 se_handler: push rsi push rdi push rbx push rbp push r12 push r13 push r14 push r15 pushfq sub rsp,64 mov rax,QWORD[120+r8] mov rbx,QWORD[248+r8] mov rsi,QWORD[8+r9] mov r11,QWORD[56+r9] mov r10d,DWORD[r11] lea r10,[r10*1+rsi] cmp rbx,r10 jb NEAR $L$in_prologue mov rax,QWORD[152+r8] mov r10d,DWORD[4+r11] lea r10,[r10*1+rsi] cmp rbx,r10 jae NEAR $L$in_prologue lea r10,[aesni_cbc_sha256_enc_shaext] cmp rbx,r10 jb NEAR $L$not_in_shaext lea rsi,[rax] lea rdi,[512+r8] mov ecx,20 DD 0xa548f3fc lea rax,[168+rax] jmp NEAR $L$in_prologue $L$not_in_shaext: lea r10,[$L$avx2_shortcut] cmp rbx,r10 jb NEAR $L$not_in_avx2 and rax,-256*4 add rax,448 $L$not_in_avx2: mov rsi,rax mov rax,QWORD[((64+56))+rax] mov rbx,QWORD[((-8))+rax] mov rbp,QWORD[((-16))+rax] mov r12,QWORD[((-24))+rax] mov r13,QWORD[((-32))+rax] mov r14,QWORD[((-40))+rax] mov r15,QWORD[((-48))+rax] mov QWORD[144+r8],rbx mov QWORD[160+r8],rbp mov QWORD[216+r8],r12 mov QWORD[224+r8],r13 mov QWORD[232+r8],r14 mov QWORD[240+r8],r15 lea rsi,[((64+64))+rsi] lea rdi,[512+r8] mov ecx,20 DD 0xa548f3fc $L$in_prologue: mov rdi,QWORD[8+rax] mov rsi,QWORD[16+rax] mov QWORD[152+r8],rax mov QWORD[168+r8],rsi mov QWORD[176+r8],rdi mov rdi,QWORD[40+r9] mov rsi,r8 mov ecx,154 DD 0xa548f3fc mov rsi,r9 xor rcx,rcx mov rdx,QWORD[8+rsi] mov r8,QWORD[rsi] mov r9,QWORD[16+rsi] mov r10,QWORD[40+rsi] lea r11,[56+rsi] lea r12,[24+rsi] mov QWORD[32+rsp],r10 mov QWORD[40+rsp],r11 mov QWORD[48+rsp],r12 mov QWORD[56+rsp],rcx call QWORD[__imp_RtlVirtualUnwind] mov eax,1 add rsp,64 popfq pop r15 pop r14 pop r13 pop r12 pop rbp pop rbx pop rdi pop rsi DB 0F3h,0C3h ;repret section .pdata rdata align=4 DD $L$SEH_begin_aesni_cbc_sha256_enc_xop wrt ..imagebase DD $L$SEH_end_aesni_cbc_sha256_enc_xop wrt ..imagebase DD $L$SEH_info_aesni_cbc_sha256_enc_xop wrt ..imagebase DD $L$SEH_begin_aesni_cbc_sha256_enc_avx wrt ..imagebase DD $L$SEH_end_aesni_cbc_sha256_enc_avx wrt ..imagebase DD $L$SEH_info_aesni_cbc_sha256_enc_avx wrt ..imagebase DD $L$SEH_begin_aesni_cbc_sha256_enc_avx2 wrt ..imagebase DD $L$SEH_end_aesni_cbc_sha256_enc_avx2 wrt ..imagebase DD $L$SEH_info_aesni_cbc_sha256_enc_avx2 wrt ..imagebase DD $L$SEH_begin_aesni_cbc_sha256_enc_shaext wrt ..imagebase DD $L$SEH_end_aesni_cbc_sha256_enc_shaext wrt ..imagebase DD $L$SEH_info_aesni_cbc_sha256_enc_shaext wrt ..imagebase section .xdata rdata align=8 ALIGN 8 $L$SEH_info_aesni_cbc_sha256_enc_xop: DB 9,0,0,0 DD se_handler wrt ..imagebase DD $L$prologue_xop wrt ..imagebase,$L$epilogue_xop wrt ..imagebase $L$SEH_info_aesni_cbc_sha256_enc_avx: DB 9,0,0,0 DD se_handler wrt ..imagebase DD $L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase $L$SEH_info_aesni_cbc_sha256_enc_avx2: DB 9,0,0,0 DD se_handler wrt ..imagebase DD $L$prologue_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase $L$SEH_info_aesni_cbc_sha256_enc_shaext: DB 9,0,0,0 DD se_handler wrt ..imagebase DD $L$prologue_shaext wrt ..imagebase,$L$epilogue_shaext wrt ..imagebase