default rel %define XMMWORD %define YMMWORD %define ZMMWORD section .text code align=64 EXTERN OPENSSL_ia32cap_P global sha1_multi_block ALIGN 32 sha1_multi_block: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp $L$SEH_begin_sha1_multi_block: mov rdi,rcx mov rsi,rdx mov rdx,r8 mov rcx,QWORD[((OPENSSL_ia32cap_P+4))] bt rcx,61 jc NEAR _shaext_shortcut test ecx,268435456 jnz NEAR _avx_shortcut mov rax,rsp push rbx push rbp lea rsp,[((-168))+rsp] movaps XMMWORD[rsp],xmm6 movaps XMMWORD[16+rsp],xmm7 movaps XMMWORD[32+rsp],xmm8 movaps XMMWORD[48+rsp],xmm9 movaps XMMWORD[(-120)+rax],xmm10 movaps XMMWORD[(-104)+rax],xmm11 movaps XMMWORD[(-88)+rax],xmm12 movaps XMMWORD[(-72)+rax],xmm13 movaps XMMWORD[(-56)+rax],xmm14 movaps XMMWORD[(-40)+rax],xmm15 sub rsp,288 and rsp,-256 mov QWORD[272+rsp],rax $L$body: lea rbp,[K_XX_XX] lea rbx,[256+rsp] $L$oop_grande: mov DWORD[280+rsp],edx xor edx,edx mov r8,QWORD[rsi] mov ecx,DWORD[8+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[rbx],ecx cmovle r8,rbp mov r9,QWORD[16+rsi] mov ecx,DWORD[24+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[4+rbx],ecx cmovle r9,rbp mov r10,QWORD[32+rsi] mov ecx,DWORD[40+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[8+rbx],ecx cmovle r10,rbp mov r11,QWORD[48+rsi] mov ecx,DWORD[56+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[12+rbx],ecx cmovle r11,rbp test edx,edx jz NEAR $L$done movdqu xmm10,XMMWORD[rdi] lea rax,[128+rsp] movdqu xmm11,XMMWORD[32+rdi] movdqu xmm12,XMMWORD[64+rdi] movdqu xmm13,XMMWORD[96+rdi] movdqu xmm14,XMMWORD[128+rdi] movdqa xmm5,XMMWORD[96+rbp] movdqa xmm15,XMMWORD[((-32))+rbp] jmp NEAR $L$oop ALIGN 32 $L$oop: movd xmm0,DWORD[r8] lea r8,[64+r8] movd xmm2,DWORD[r9] lea r9,[64+r9] movd xmm3,DWORD[r10] lea r10,[64+r10] movd xmm4,DWORD[r11] lea r11,[64+r11] punpckldq xmm0,xmm3 movd xmm1,DWORD[((-60))+r8] punpckldq xmm2,xmm4 movd xmm9,DWORD[((-60))+r9] punpckldq xmm0,xmm2 movd xmm8,DWORD[((-60))+r10] DB 102,15,56,0,197 movd xmm7,DWORD[((-60))+r11] punpckldq xmm1,xmm8 movdqa xmm8,xmm10 paddd xmm14,xmm15 punpckldq xmm9,xmm7 movdqa xmm7,xmm11 movdqa xmm6,xmm11 pslld xmm8,5 pandn xmm7,xmm13 pand xmm6,xmm12 punpckldq xmm1,xmm9 movdqa xmm9,xmm10 movdqa XMMWORD[(0-128)+rax],xmm0 paddd xmm14,xmm0 movd xmm2,DWORD[((-56))+r8] psrld xmm9,27 pxor xmm6,xmm7 movdqa xmm7,xmm11 por xmm8,xmm9 movd xmm9,DWORD[((-56))+r9] pslld xmm7,30 paddd xmm14,xmm6 psrld xmm11,2 paddd xmm14,xmm8 DB 102,15,56,0,205 movd xmm8,DWORD[((-56))+r10] por xmm11,xmm7 movd xmm7,DWORD[((-56))+r11] punpckldq xmm2,xmm8 movdqa xmm8,xmm14 paddd xmm13,xmm15 punpckldq xmm9,xmm7 movdqa xmm7,xmm10 movdqa xmm6,xmm10 pslld xmm8,5 pandn xmm7,xmm12 pand xmm6,xmm11 punpckldq xmm2,xmm9 movdqa xmm9,xmm14 movdqa XMMWORD[(16-128)+rax],xmm1 paddd xmm13,xmm1 movd xmm3,DWORD[((-52))+r8] psrld xmm9,27 pxor xmm6,xmm7 movdqa xmm7,xmm10 por xmm8,xmm9 movd xmm9,DWORD[((-52))+r9] pslld xmm7,30 paddd xmm13,xmm6 psrld xmm10,2 paddd xmm13,xmm8 DB 102,15,56,0,213 movd xmm8,DWORD[((-52))+r10] por xmm10,xmm7 movd xmm7,DWORD[((-52))+r11] punpckldq xmm3,xmm8 movdqa xmm8,xmm13 paddd xmm12,xmm15 punpckldq xmm9,xmm7 movdqa xmm7,xmm14 movdqa xmm6,xmm14 pslld xmm8,5 pandn xmm7,xmm11 pand xmm6,xmm10 punpckldq xmm3,xmm9 movdqa xmm9,xmm13 movdqa XMMWORD[(32-128)+rax],xmm2 paddd xmm12,xmm2 movd xmm4,DWORD[((-48))+r8] psrld xmm9,27 pxor xmm6,xmm7 movdqa xmm7,xmm14 por xmm8,xmm9 movd xmm9,DWORD[((-48))+r9] pslld xmm7,30 paddd xmm12,xmm6 psrld xmm14,2 paddd xmm12,xmm8 DB 102,15,56,0,221 movd xmm8,DWORD[((-48))+r10] por xmm14,xmm7 movd xmm7,DWORD[((-48))+r11] punpckldq xmm4,xmm8 movdqa xmm8,xmm12 paddd xmm11,xmm15 punpckldq xmm9,xmm7 movdqa xmm7,xmm13 movdqa xmm6,xmm13 pslld xmm8,5 pandn xmm7,xmm10 pand xmm6,xmm14 punpckldq xmm4,xmm9 movdqa xmm9,xmm12 movdqa XMMWORD[(48-128)+rax],xmm3 paddd xmm11,xmm3 movd xmm0,DWORD[((-44))+r8] psrld xmm9,27 pxor xmm6,xmm7 movdqa xmm7,xmm13 por xmm8,xmm9 movd xmm9,DWORD[((-44))+r9] pslld xmm7,30 paddd xmm11,xmm6 psrld xmm13,2 paddd xmm11,xmm8 DB 102,15,56,0,229 movd xmm8,DWORD[((-44))+r10] por xmm13,xmm7 movd xmm7,DWORD[((-44))+r11] punpckldq xmm0,xmm8 movdqa xmm8,xmm11 paddd xmm10,xmm15 punpckldq xmm9,xmm7 movdqa xmm7,xmm12 movdqa xmm6,xmm12 pslld xmm8,5 pandn xmm7,xmm14 pand xmm6,xmm13 punpckldq xmm0,xmm9 movdqa xmm9,xmm11 movdqa XMMWORD[(64-128)+rax],xmm4 paddd xmm10,xmm4 movd xmm1,DWORD[((-40))+r8] psrld xmm9,27 pxor xmm6,xmm7 movdqa xmm7,xmm12 por xmm8,xmm9 movd xmm9,DWORD[((-40))+r9] pslld xmm7,30 paddd xmm10,xmm6 psrld xmm12,2 paddd xmm10,xmm8 DB 102,15,56,0,197 movd xmm8,DWORD[((-40))+r10] por xmm12,xmm7 movd xmm7,DWORD[((-40))+r11] punpckldq xmm1,xmm8 movdqa xmm8,xmm10 paddd xmm14,xmm15 punpckldq xmm9,xmm7 movdqa xmm7,xmm11 movdqa xmm6,xmm11 pslld xmm8,5 pandn xmm7,xmm13 pand xmm6,xmm12 punpckldq xmm1,xmm9 movdqa xmm9,xmm10 movdqa XMMWORD[(80-128)+rax],xmm0 paddd xmm14,xmm0 movd xmm2,DWORD[((-36))+r8] psrld xmm9,27 pxor xmm6,xmm7 movdqa xmm7,xmm11 por xmm8,xmm9 movd xmm9,DWORD[((-36))+r9] pslld xmm7,30 paddd xmm14,xmm6 psrld xmm11,2 paddd xmm14,xmm8 DB 102,15,56,0,205 movd xmm8,DWORD[((-36))+r10] por xmm11,xmm7 movd xmm7,DWORD[((-36))+r11] punpckldq xmm2,xmm8 movdqa xmm8,xmm14 paddd xmm13,xmm15 punpckldq xmm9,xmm7 movdqa xmm7,xmm10 movdqa xmm6,xmm10 pslld xmm8,5 pandn xmm7,xmm12 pand xmm6,xmm11 punpckldq xmm2,xmm9 movdqa xmm9,xmm14 movdqa XMMWORD[(96-128)+rax],xmm1 paddd xmm13,xmm1 movd xmm3,DWORD[((-32))+r8] psrld xmm9,27 pxor xmm6,xmm7 movdqa xmm7,xmm10 por xmm8,xmm9 movd xmm9,DWORD[((-32))+r9] pslld xmm7,30 paddd xmm13,xmm6 psrld xmm10,2 paddd xmm13,xmm8 DB 102,15,56,0,213 movd xmm8,DWORD[((-32))+r10] por xmm10,xmm7 movd xmm7,DWORD[((-32))+r11] punpckldq xmm3,xmm8 movdqa xmm8,xmm13 paddd xmm12,xmm15 punpckldq xmm9,xmm7 movdqa xmm7,xmm14 movdqa xmm6,xmm14 pslld xmm8,5 pandn xmm7,xmm11 pand xmm6,xmm10 punpckldq xmm3,xmm9 movdqa xmm9,xmm13 movdqa XMMWORD[(112-128)+rax],xmm2 paddd xmm12,xmm2 movd xmm4,DWORD[((-28))+r8] psrld xmm9,27 pxor xmm6,xmm7 movdqa xmm7,xmm14 por xmm8,xmm9 movd xmm9,DWORD[((-28))+r9] pslld xmm7,30 paddd xmm12,xmm6 psrld xmm14,2 paddd xmm12,xmm8 DB 102,15,56,0,221 movd xmm8,DWORD[((-28))+r10] por xmm14,xmm7 movd xmm7,DWORD[((-28))+r11] punpckldq xmm4,xmm8 movdqa xmm8,xmm12 paddd xmm11,xmm15 punpckldq xmm9,xmm7 movdqa xmm7,xmm13 movdqa xmm6,xmm13 pslld xmm8,5 pandn xmm7,xmm10 pand xmm6,xmm14 punpckldq xmm4,xmm9 movdqa xmm9,xmm12 movdqa XMMWORD[(128-128)+rax],xmm3 paddd xmm11,xmm3 movd xmm0,DWORD[((-24))+r8] psrld xmm9,27 pxor xmm6,xmm7 movdqa xmm7,xmm13 por xmm8,xmm9 movd xmm9,DWORD[((-24))+r9] pslld xmm7,30 paddd xmm11,xmm6 psrld xmm13,2 paddd xmm11,xmm8 DB 102,15,56,0,229 movd xmm8,DWORD[((-24))+r10] por xmm13,xmm7 movd xmm7,DWORD[((-24))+r11] punpckldq xmm0,xmm8 movdqa xmm8,xmm11 paddd xmm10,xmm15 punpckldq xmm9,xmm7 movdqa xmm7,xmm12 movdqa xmm6,xmm12 pslld xmm8,5 pandn xmm7,xmm14 pand xmm6,xmm13 punpckldq xmm0,xmm9 movdqa xmm9,xmm11 movdqa XMMWORD[(144-128)+rax],xmm4 paddd xmm10,xmm4 movd xmm1,DWORD[((-20))+r8] psrld xmm9,27 pxor xmm6,xmm7 movdqa xmm7,xmm12 por xmm8,xmm9 movd xmm9,DWORD[((-20))+r9] pslld xmm7,30 paddd xmm10,xmm6 psrld xmm12,2 paddd xmm10,xmm8 DB 102,15,56,0,197 movd xmm8,DWORD[((-20))+r10] por xmm12,xmm7 movd xmm7,DWORD[((-20))+r11] punpckldq xmm1,xmm8 movdqa xmm8,xmm10 paddd xmm14,xmm15 punpckldq xmm9,xmm7 movdqa xmm7,xmm11 movdqa xmm6,xmm11 pslld xmm8,5 pandn xmm7,xmm13 pand xmm6,xmm12 punpckldq xmm1,xmm9 movdqa xmm9,xmm10 movdqa XMMWORD[(160-128)+rax],xmm0 paddd xmm14,xmm0 movd xmm2,DWORD[((-16))+r8] psrld xmm9,27 pxor xmm6,xmm7 movdqa xmm7,xmm11 por xmm8,xmm9 movd xmm9,DWORD[((-16))+r9] pslld xmm7,30 paddd xmm14,xmm6 psrld xmm11,2 paddd xmm14,xmm8 DB 102,15,56,0,205 movd xmm8,DWORD[((-16))+r10] por xmm11,xmm7 movd xmm7,DWORD[((-16))+r11] punpckldq xmm2,xmm8 movdqa xmm8,xmm14 paddd xmm13,xmm15 punpckldq xmm9,xmm7 movdqa xmm7,xmm10 movdqa xmm6,xmm10 pslld xmm8,5 pandn xmm7,xmm12 pand xmm6,xmm11 punpckldq xmm2,xmm9 movdqa xmm9,xmm14 movdqa XMMWORD[(176-128)+rax],xmm1 paddd xmm13,xmm1 movd xmm3,DWORD[((-12))+r8] psrld xmm9,27 pxor xmm6,xmm7 movdqa xmm7,xmm10 por xmm8,xmm9 movd xmm9,DWORD[((-12))+r9] pslld xmm7,30 paddd xmm13,xmm6 psrld xmm10,2 paddd xmm13,xmm8 DB 102,15,56,0,213 movd xmm8,DWORD[((-12))+r10] por xmm10,xmm7 movd xmm7,DWORD[((-12))+r11] punpckldq xmm3,xmm8 movdqa xmm8,xmm13 paddd xmm12,xmm15 punpckldq xmm9,xmm7 movdqa xmm7,xmm14 movdqa xmm6,xmm14 pslld xmm8,5 pandn xmm7,xmm11 pand xmm6,xmm10 punpckldq xmm3,xmm9 movdqa xmm9,xmm13 movdqa XMMWORD[(192-128)+rax],xmm2 paddd xmm12,xmm2 movd xmm4,DWORD[((-8))+r8] psrld xmm9,27 pxor xmm6,xmm7 movdqa xmm7,xmm14 por xmm8,xmm9 movd xmm9,DWORD[((-8))+r9] pslld xmm7,30 paddd xmm12,xmm6 psrld xmm14,2 paddd xmm12,xmm8 DB 102,15,56,0,221 movd xmm8,DWORD[((-8))+r10] por xmm14,xmm7 movd xmm7,DWORD[((-8))+r11] punpckldq xmm4,xmm8 movdqa xmm8,xmm12 paddd xmm11,xmm15 punpckldq xmm9,xmm7 movdqa xmm7,xmm13 movdqa xmm6,xmm13 pslld xmm8,5 pandn xmm7,xmm10 pand xmm6,xmm14 punpckldq xmm4,xmm9 movdqa xmm9,xmm12 movdqa XMMWORD[(208-128)+rax],xmm3 paddd xmm11,xmm3 movd xmm0,DWORD[((-4))+r8] psrld xmm9,27 pxor xmm6,xmm7 movdqa xmm7,xmm13 por xmm8,xmm9 movd xmm9,DWORD[((-4))+r9] pslld xmm7,30 paddd xmm11,xmm6 psrld xmm13,2 paddd xmm11,xmm8 DB 102,15,56,0,229 movd xmm8,DWORD[((-4))+r10] por xmm13,xmm7 movdqa xmm1,XMMWORD[((0-128))+rax] movd xmm7,DWORD[((-4))+r11] punpckldq xmm0,xmm8 movdqa xmm8,xmm11 paddd xmm10,xmm15 punpckldq xmm9,xmm7 movdqa xmm7,xmm12 movdqa xmm6,xmm12 pslld xmm8,5 prefetcht0 [63+r8] pandn xmm7,xmm14 pand xmm6,xmm13 punpckldq xmm0,xmm9 movdqa xmm9,xmm11 movdqa XMMWORD[(224-128)+rax],xmm4 paddd xmm10,xmm4 psrld xmm9,27 pxor xmm6,xmm7 movdqa xmm7,xmm12 prefetcht0 [63+r9] por xmm8,xmm9 pslld xmm7,30 paddd xmm10,xmm6 prefetcht0 [63+r10] psrld xmm12,2 paddd xmm10,xmm8 DB 102,15,56,0,197 prefetcht0 [63+r11] por xmm12,xmm7 movdqa xmm2,XMMWORD[((16-128))+rax] pxor xmm1,xmm3 movdqa xmm3,XMMWORD[((32-128))+rax] movdqa xmm8,xmm10 pxor xmm1,XMMWORD[((128-128))+rax] paddd xmm14,xmm15 movdqa xmm7,xmm11 pslld xmm8,5 pxor xmm1,xmm3 movdqa xmm6,xmm11 pandn xmm7,xmm13 movdqa xmm5,xmm1 pand xmm6,xmm12 movdqa xmm9,xmm10 psrld xmm5,31 paddd xmm1,xmm1 movdqa XMMWORD[(240-128)+rax],xmm0 paddd xmm14,xmm0 psrld xmm9,27 pxor xmm6,xmm7 movdqa xmm7,xmm11 por xmm8,xmm9 pslld xmm7,30 paddd xmm14,xmm6 psrld xmm11,2 paddd xmm14,xmm8 por xmm1,xmm5 por xmm11,xmm7 pxor xmm2,xmm4 movdqa xmm4,XMMWORD[((48-128))+rax] movdqa xmm8,xmm14 pxor xmm2,XMMWORD[((144-128))+rax] paddd xmm13,xmm15 movdqa xmm7,xmm10 pslld xmm8,5 pxor xmm2,xmm4 movdqa xmm6,xmm10 pandn xmm7,xmm12 movdqa xmm5,xmm2 pand xmm6,xmm11 movdqa xmm9,xmm14 psrld xmm5,31 paddd xmm2,xmm2 movdqa XMMWORD[(0-128)+rax],xmm1 paddd xmm13,xmm1 psrld xmm9,27 pxor xmm6,xmm7 movdqa xmm7,xmm10 por xmm8,xmm9 pslld xmm7,30 paddd xmm13,xmm6 psrld xmm10,2 paddd xmm13,xmm8 por xmm2,xmm5 por xmm10,xmm7 pxor xmm3,xmm0 movdqa xmm0,XMMWORD[((64-128))+rax] movdqa xmm8,xmm13 pxor xmm3,XMMWORD[((160-128))+rax] paddd xmm12,xmm15 movdqa xmm7,xmm14 pslld xmm8,5 pxor xmm3,xmm0 movdqa xmm6,xmm14 pandn xmm7,xmm11 movdqa xmm5,xmm3 pand xmm6,xmm10 movdqa xmm9,xmm13 psrld xmm5,31 paddd xmm3,xmm3 movdqa XMMWORD[(16-128)+rax],xmm2 paddd xmm12,xmm2 psrld xmm9,27 pxor xmm6,xmm7 movdqa xmm7,xmm14 por xmm8,xmm9 pslld xmm7,30 paddd xmm12,xmm6 psrld xmm14,2 paddd xmm12,xmm8 por xmm3,xmm5 por xmm14,xmm7 pxor xmm4,xmm1 movdqa xmm1,XMMWORD[((80-128))+rax] movdqa xmm8,xmm12 pxor xmm4,XMMWORD[((176-128))+rax] paddd xmm11,xmm15 movdqa xmm7,xmm13 pslld xmm8,5 pxor xmm4,xmm1 movdqa xmm6,xmm13 pandn xmm7,xmm10 movdqa xmm5,xmm4 pand xmm6,xmm14 movdqa xmm9,xmm12 psrld xmm5,31 paddd xmm4,xmm4 movdqa XMMWORD[(32-128)+rax],xmm3 paddd xmm11,xmm3 psrld xmm9,27 pxor xmm6,xmm7 movdqa xmm7,xmm13 por xmm8,xmm9 pslld xmm7,30 paddd xmm11,xmm6 psrld xmm13,2 paddd xmm11,xmm8 por xmm4,xmm5 por xmm13,xmm7 pxor xmm0,xmm2 movdqa xmm2,XMMWORD[((96-128))+rax] movdqa xmm8,xmm11 pxor xmm0,XMMWORD[((192-128))+rax] paddd xmm10,xmm15 movdqa xmm7,xmm12 pslld xmm8,5 pxor xmm0,xmm2 movdqa xmm6,xmm12 pandn xmm7,xmm14 movdqa xmm5,xmm0 pand xmm6,xmm13 movdqa xmm9,xmm11 psrld xmm5,31 paddd xmm0,xmm0 movdqa XMMWORD[(48-128)+rax],xmm4 paddd xmm10,xmm4 psrld xmm9,27 pxor xmm6,xmm7 movdqa xmm7,xmm12 por xmm8,xmm9 pslld xmm7,30 paddd xmm10,xmm6 psrld xmm12,2 paddd xmm10,xmm8 por xmm0,xmm5 por xmm12,xmm7 movdqa xmm15,XMMWORD[rbp] pxor xmm1,xmm3 movdqa xmm3,XMMWORD[((112-128))+rax] movdqa xmm8,xmm10 movdqa xmm6,xmm13 pxor xmm1,XMMWORD[((208-128))+rax] paddd xmm14,xmm15 pslld xmm8,5 pxor xmm6,xmm11 movdqa xmm9,xmm10 movdqa XMMWORD[(64-128)+rax],xmm0 paddd xmm14,xmm0 pxor xmm1,xmm3 psrld xmm9,27 pxor xmm6,xmm12 movdqa xmm7,xmm11 pslld xmm7,30 movdqa xmm5,xmm1 por xmm8,xmm9 psrld xmm5,31 paddd xmm14,xmm6 paddd xmm1,xmm1 psrld xmm11,2 paddd xmm14,xmm8 por xmm1,xmm5 por xmm11,xmm7 pxor xmm2,xmm4 movdqa xmm4,XMMWORD[((128-128))+rax] movdqa xmm8,xmm14 movdqa xmm6,xmm12 pxor xmm2,XMMWORD[((224-128))+rax] paddd xmm13,xmm15 pslld xmm8,5 pxor xmm6,xmm10 movdqa xmm9,xmm14 movdqa XMMWORD[(80-128)+rax],xmm1 paddd xmm13,xmm1 pxor xmm2,xmm4 psrld xmm9,27 pxor xmm6,xmm11 movdqa xmm7,xmm10 pslld xmm7,30 movdqa xmm5,xmm2 por xmm8,xmm9 psrld xmm5,31 paddd xmm13,xmm6 paddd xmm2,xmm2 psrld xmm10,2 paddd xmm13,xmm8 por xmm2,xmm5 por xmm10,xmm7 pxor xmm3,xmm0 movdqa xmm0,XMMWORD[((144-128))+rax] movdqa xmm8,xmm13 movdqa xmm6,xmm11 pxor xmm3,XMMWORD[((240-128))+rax] paddd xmm12,xmm15 pslld xmm8,5 pxor xmm6,xmm14 movdqa xmm9,xmm13 movdqa XMMWORD[(96-128)+rax],xmm2 paddd xmm12,xmm2 pxor xmm3,xmm0 psrld xmm9,27 pxor xmm6,xmm10 movdqa xmm7,xmm14 pslld xmm7,30 movdqa xmm5,xmm3 por xmm8,xmm9 psrld xmm5,31 paddd xmm12,xmm6 paddd xmm3,xmm3 psrld xmm14,2 paddd xmm12,xmm8 por xmm3,xmm5 por xmm14,xmm7 pxor xmm4,xmm1 movdqa xmm1,XMMWORD[((160-128))+rax] movdqa xmm8,xmm12 movdqa xmm6,xmm10 pxor xmm4,XMMWORD[((0-128))+rax] paddd xmm11,xmm15 pslld xmm8,5 pxor xmm6,xmm13 movdqa xmm9,xmm12 movdqa XMMWORD[(112-128)+rax],xmm3 paddd xmm11,xmm3 pxor xmm4,xmm1 psrld xmm9,27 pxor xmm6,xmm14 movdqa xmm7,xmm13 pslld xmm7,30 movdqa xmm5,xmm4 por xmm8,xmm9 psrld xmm5,31 paddd xmm11,xmm6 paddd xmm4,xmm4 psrld xmm13,2 paddd xmm11,xmm8 por xmm4,xmm5 por xmm13,xmm7 pxor xmm0,xmm2 movdqa xmm2,XMMWORD[((176-128))+rax] movdqa xmm8,xmm11 movdqa xmm6,xmm14 pxor xmm0,XMMWORD[((16-128))+rax] paddd xmm10,xmm15 pslld xmm8,5 pxor xmm6,xmm12 movdqa xmm9,xmm11 movdqa XMMWORD[(128-128)+rax],xmm4 paddd xmm10,xmm4 pxor xmm0,xmm2 psrld xmm9,27 pxor xmm6,xmm13 movdqa xmm7,xmm12 pslld xmm7,30 movdqa xmm5,xmm0 por xmm8,xmm9 psrld xmm5,31 paddd xmm10,xmm6 paddd xmm0,xmm0 psrld xmm12,2 paddd xmm10,xmm8 por xmm0,xmm5 por xmm12,xmm7 pxor xmm1,xmm3 movdqa xmm3,XMMWORD[((192-128))+rax] movdqa xmm8,xmm10 movdqa xmm6,xmm13 pxor xmm1,XMMWORD[((32-128))+rax] paddd xmm14,xmm15 pslld xmm8,5 pxor xmm6,xmm11 movdqa xmm9,xmm10 movdqa XMMWORD[(144-128)+rax],xmm0 paddd xmm14,xmm0 pxor xmm1,xmm3 psrld xmm9,27 pxor xmm6,xmm12 movdqa xmm7,xmm11 pslld xmm7,30 movdqa xmm5,xmm1 por xmm8,xmm9 psrld xmm5,31 paddd xmm14,xmm6 paddd xmm1,xmm1 psrld xmm11,2 paddd xmm14,xmm8 por xmm1,xmm5 por xmm11,xmm7 pxor xmm2,xmm4 movdqa xmm4,XMMWORD[((208-128))+rax] movdqa xmm8,xmm14 movdqa xmm6,xmm12 pxor xmm2,XMMWORD[((48-128))+rax] paddd xmm13,xmm15 pslld xmm8,5 pxor xmm6,xmm10 movdqa xmm9,xmm14 movdqa XMMWORD[(160-128)+rax],xmm1 paddd xmm13,xmm1 pxor xmm2,xmm4 psrld xmm9,27 pxor xmm6,xmm11 movdqa xmm7,xmm10 pslld xmm7,30 movdqa xmm5,xmm2 por xmm8,xmm9 psrld xmm5,31 paddd xmm13,xmm6 paddd xmm2,xmm2 psrld xmm10,2 paddd xmm13,xmm8 por xmm2,xmm5 por xmm10,xmm7 pxor xmm3,xmm0 movdqa xmm0,XMMWORD[((224-128))+rax] movdqa xmm8,xmm13 movdqa xmm6,xmm11 pxor xmm3,XMMWORD[((64-128))+rax] paddd xmm12,xmm15 pslld xmm8,5 pxor xmm6,xmm14 movdqa xmm9,xmm13 movdqa XMMWORD[(176-128)+rax],xmm2 paddd xmm12,xmm2 pxor xmm3,xmm0 psrld xmm9,27 pxor xmm6,xmm10 movdqa xmm7,xmm14 pslld xmm7,30 movdqa xmm5,xmm3 por xmm8,xmm9 psrld xmm5,31 paddd xmm12,xmm6 paddd xmm3,xmm3 psrld xmm14,2 paddd xmm12,xmm8 por xmm3,xmm5 por xmm14,xmm7 pxor xmm4,xmm1 movdqa xmm1,XMMWORD[((240-128))+rax] movdqa xmm8,xmm12 movdqa xmm6,xmm10 pxor xmm4,XMMWORD[((80-128))+rax] paddd xmm11,xmm15 pslld xmm8,5 pxor xmm6,xmm13 movdqa xmm9,xmm12 movdqa XMMWORD[(192-128)+rax],xmm3 paddd xmm11,xmm3 pxor xmm4,xmm1 psrld xmm9,27 pxor xmm6,xmm14 movdqa xmm7,xmm13 pslld xmm7,30 movdqa xmm5,xmm4 por xmm8,xmm9 psrld xmm5,31 paddd xmm11,xmm6 paddd xmm4,xmm4 psrld xmm13,2 paddd xmm11,xmm8 por xmm4,xmm5 por xmm13,xmm7 pxor xmm0,xmm2 movdqa xmm2,XMMWORD[((0-128))+rax] movdqa xmm8,xmm11 movdqa xmm6,xmm14 pxor xmm0,XMMWORD[((96-128))+rax] paddd xmm10,xmm15 pslld xmm8,5 pxor xmm6,xmm12 movdqa xmm9,xmm11 movdqa XMMWORD[(208-128)+rax],xmm4 paddd xmm10,xmm4 pxor xmm0,xmm2 psrld xmm9,27 pxor xmm6,xmm13 movdqa xmm7,xmm12 pslld xmm7,30 movdqa xmm5,xmm0 por xmm8,xmm9 psrld xmm5,31 paddd xmm10,xmm6 paddd xmm0,xmm0 psrld xmm12,2 paddd xmm10,xmm8 por xmm0,xmm5 por xmm12,xmm7 pxor xmm1,xmm3 movdqa xmm3,XMMWORD[((16-128))+rax] movdqa xmm8,xmm10 movdqa xmm6,xmm13 pxor xmm1,XMMWORD[((112-128))+rax] paddd xmm14,xmm15 pslld xmm8,5 pxor xmm6,xmm11 movdqa xmm9,xmm10 movdqa XMMWORD[(224-128)+rax],xmm0 paddd xmm14,xmm0 pxor xmm1,xmm3 psrld xmm9,27 pxor xmm6,xmm12 movdqa xmm7,xmm11 pslld xmm7,30 movdqa xmm5,xmm1 por xmm8,xmm9 psrld xmm5,31 paddd xmm14,xmm6 paddd xmm1,xmm1 psrld xmm11,2 paddd xmm14,xmm8 por xmm1,xmm5 por xmm11,xmm7 pxor xmm2,xmm4 movdqa xmm4,XMMWORD[((32-128))+rax] movdqa xmm8,xmm14 movdqa xmm6,xmm12 pxor xmm2,XMMWORD[((128-128))+rax] paddd xmm13,xmm15 pslld xmm8,5 pxor xmm6,xmm10 movdqa xmm9,xmm14 movdqa XMMWORD[(240-128)+rax],xmm1 paddd xmm13,xmm1 pxor xmm2,xmm4 psrld xmm9,27 pxor xmm6,xmm11 movdqa xmm7,xmm10 pslld xmm7,30 movdqa xmm5,xmm2 por xmm8,xmm9 psrld xmm5,31 paddd xmm13,xmm6 paddd xmm2,xmm2 psrld xmm10,2 paddd xmm13,xmm8 por xmm2,xmm5 por xmm10,xmm7 pxor xmm3,xmm0 movdqa xmm0,XMMWORD[((48-128))+rax] movdqa xmm8,xmm13 movdqa xmm6,xmm11 pxor xmm3,XMMWORD[((144-128))+rax] paddd xmm12,xmm15 pslld xmm8,5 pxor xmm6,xmm14 movdqa xmm9,xmm13 movdqa XMMWORD[(0-128)+rax],xmm2 paddd xmm12,xmm2 pxor xmm3,xmm0 psrld xmm9,27 pxor xmm6,xmm10 movdqa xmm7,xmm14 pslld xmm7,30 movdqa xmm5,xmm3 por xmm8,xmm9 psrld xmm5,31 paddd xmm12,xmm6 paddd xmm3,xmm3 psrld xmm14,2 paddd xmm12,xmm8 por xmm3,xmm5 por xmm14,xmm7 pxor xmm4,xmm1 movdqa xmm1,XMMWORD[((64-128))+rax] movdqa xmm8,xmm12 movdqa xmm6,xmm10 pxor xmm4,XMMWORD[((160-128))+rax] paddd xmm11,xmm15 pslld xmm8,5 pxor xmm6,xmm13 movdqa xmm9,xmm12 movdqa XMMWORD[(16-128)+rax],xmm3 paddd xmm11,xmm3 pxor xmm4,xmm1 psrld xmm9,27 pxor xmm6,xmm14 movdqa xmm7,xmm13 pslld xmm7,30 movdqa xmm5,xmm4 por xmm8,xmm9 psrld xmm5,31 paddd xmm11,xmm6 paddd xmm4,xmm4 psrld xmm13,2 paddd xmm11,xmm8 por xmm4,xmm5 por xmm13,xmm7 pxor xmm0,xmm2 movdqa xmm2,XMMWORD[((80-128))+rax] movdqa xmm8,xmm11 movdqa xmm6,xmm14 pxor xmm0,XMMWORD[((176-128))+rax] paddd xmm10,xmm15 pslld xmm8,5 pxor xmm6,xmm12 movdqa xmm9,xmm11 movdqa XMMWORD[(32-128)+rax],xmm4 paddd xmm10,xmm4 pxor xmm0,xmm2 psrld xmm9,27 pxor xmm6,xmm13 movdqa xmm7,xmm12 pslld xmm7,30 movdqa xmm5,xmm0 por xmm8,xmm9 psrld xmm5,31 paddd xmm10,xmm6 paddd xmm0,xmm0 psrld xmm12,2 paddd xmm10,xmm8 por xmm0,xmm5 por xmm12,xmm7 pxor xmm1,xmm3 movdqa xmm3,XMMWORD[((96-128))+rax] movdqa xmm8,xmm10 movdqa xmm6,xmm13 pxor xmm1,XMMWORD[((192-128))+rax] paddd xmm14,xmm15 pslld xmm8,5 pxor xmm6,xmm11 movdqa xmm9,xmm10 movdqa XMMWORD[(48-128)+rax],xmm0 paddd xmm14,xmm0 pxor xmm1,xmm3 psrld xmm9,27 pxor xmm6,xmm12 movdqa xmm7,xmm11 pslld xmm7,30 movdqa xmm5,xmm1 por xmm8,xmm9 psrld xmm5,31 paddd xmm14,xmm6 paddd xmm1,xmm1 psrld xmm11,2 paddd xmm14,xmm8 por xmm1,xmm5 por xmm11,xmm7 pxor xmm2,xmm4 movdqa xmm4,XMMWORD[((112-128))+rax] movdqa xmm8,xmm14 movdqa xmm6,xmm12 pxor xmm2,XMMWORD[((208-128))+rax] paddd xmm13,xmm15 pslld xmm8,5 pxor xmm6,xmm10 movdqa xmm9,xmm14 movdqa XMMWORD[(64-128)+rax],xmm1 paddd xmm13,xmm1 pxor xmm2,xmm4 psrld xmm9,27 pxor xmm6,xmm11 movdqa xmm7,xmm10 pslld xmm7,30 movdqa xmm5,xmm2 por xmm8,xmm9 psrld xmm5,31 paddd xmm13,xmm6 paddd xmm2,xmm2 psrld xmm10,2 paddd xmm13,xmm8 por xmm2,xmm5 por xmm10,xmm7 pxor xmm3,xmm0 movdqa xmm0,XMMWORD[((128-128))+rax] movdqa xmm8,xmm13 movdqa xmm6,xmm11 pxor xmm3,XMMWORD[((224-128))+rax] paddd xmm12,xmm15 pslld xmm8,5 pxor xmm6,xmm14 movdqa xmm9,xmm13 movdqa XMMWORD[(80-128)+rax],xmm2 paddd xmm12,xmm2 pxor xmm3,xmm0 psrld xmm9,27 pxor xmm6,xmm10 movdqa xmm7,xmm14 pslld xmm7,30 movdqa xmm5,xmm3 por xmm8,xmm9 psrld xmm5,31 paddd xmm12,xmm6 paddd xmm3,xmm3 psrld xmm14,2 paddd xmm12,xmm8 por xmm3,xmm5 por xmm14,xmm7 pxor xmm4,xmm1 movdqa xmm1,XMMWORD[((144-128))+rax] movdqa xmm8,xmm12 movdqa xmm6,xmm10 pxor xmm4,XMMWORD[((240-128))+rax] paddd xmm11,xmm15 pslld xmm8,5 pxor xmm6,xmm13 movdqa xmm9,xmm12 movdqa XMMWORD[(96-128)+rax],xmm3 paddd xmm11,xmm3 pxor xmm4,xmm1 psrld xmm9,27 pxor xmm6,xmm14 movdqa xmm7,xmm13 pslld xmm7,30 movdqa xmm5,xmm4 por xmm8,xmm9 psrld xmm5,31 paddd xmm11,xmm6 paddd xmm4,xmm4 psrld xmm13,2 paddd xmm11,xmm8 por xmm4,xmm5 por xmm13,xmm7 pxor xmm0,xmm2 movdqa xmm2,XMMWORD[((160-128))+rax] movdqa xmm8,xmm11 movdqa xmm6,xmm14 pxor xmm0,XMMWORD[((0-128))+rax] paddd xmm10,xmm15 pslld xmm8,5 pxor xmm6,xmm12 movdqa xmm9,xmm11 movdqa XMMWORD[(112-128)+rax],xmm4 paddd xmm10,xmm4 pxor xmm0,xmm2 psrld xmm9,27 pxor xmm6,xmm13 movdqa xmm7,xmm12 pslld xmm7,30 movdqa xmm5,xmm0 por xmm8,xmm9 psrld xmm5,31 paddd xmm10,xmm6 paddd xmm0,xmm0 psrld xmm12,2 paddd xmm10,xmm8 por xmm0,xmm5 por xmm12,xmm7 movdqa xmm15,XMMWORD[32+rbp] pxor xmm1,xmm3 movdqa xmm3,XMMWORD[((176-128))+rax] movdqa xmm8,xmm10 movdqa xmm7,xmm13 pxor xmm1,XMMWORD[((16-128))+rax] pxor xmm1,xmm3 paddd xmm14,xmm15 pslld xmm8,5 movdqa xmm9,xmm10 pand xmm7,xmm12 movdqa xmm6,xmm13 movdqa xmm5,xmm1 psrld xmm9,27 paddd xmm14,xmm7 pxor xmm6,xmm12 movdqa XMMWORD[(128-128)+rax],xmm0 paddd xmm14,xmm0 por xmm8,xmm9 psrld xmm5,31 pand xmm6,xmm11 movdqa xmm7,xmm11 pslld xmm7,30 paddd xmm1,xmm1 paddd xmm14,xmm6 psrld xmm11,2 paddd xmm14,xmm8 por xmm1,xmm5 por xmm11,xmm7 pxor xmm2,xmm4 movdqa xmm4,XMMWORD[((192-128))+rax] movdqa xmm8,xmm14 movdqa xmm7,xmm12 pxor xmm2,XMMWORD[((32-128))+rax] pxor xmm2,xmm4 paddd xmm13,xmm15 pslld xmm8,5 movdqa xmm9,xmm14 pand xmm7,xmm11 movdqa xmm6,xmm12 movdqa xmm5,xmm2 psrld xmm9,27 paddd xmm13,xmm7 pxor xmm6,xmm11 movdqa XMMWORD[(144-128)+rax],xmm1 paddd xmm13,xmm1 por xmm8,xmm9 psrld xmm5,31 pand xmm6,xmm10 movdqa xmm7,xmm10 pslld xmm7,30 paddd xmm2,xmm2 paddd xmm13,xmm6 psrld xmm10,2 paddd xmm13,xmm8 por xmm2,xmm5 por xmm10,xmm7 pxor xmm3,xmm0 movdqa xmm0,XMMWORD[((208-128))+rax] movdqa xmm8,xmm13 movdqa xmm7,xmm11 pxor xmm3,XMMWORD[((48-128))+rax] pxor xmm3,xmm0 paddd xmm12,xmm15 pslld xmm8,5 movdqa xmm9,xmm13 pand xmm7,xmm10 movdqa xmm6,xmm11 movdqa xmm5,xmm3 psrld xmm9,27 paddd xmm12,xmm7 pxor xmm6,xmm10 movdqa XMMWORD[(160-128)+rax],xmm2 paddd xmm12,xmm2 por xmm8,xmm9 psrld xmm5,31 pand xmm6,xmm14 movdqa xmm7,xmm14 pslld xmm7,30 paddd xmm3,xmm3 paddd xmm12,xmm6 psrld xmm14,2 paddd xmm12,xmm8 por xmm3,xmm5 por xmm14,xmm7 pxor xmm4,xmm1 movdqa xmm1,XMMWORD[((224-128))+rax] movdqa xmm8,xmm12 movdqa xmm7,xmm10 pxor xmm4,XMMWORD[((64-128))+rax] pxor xmm4,xmm1 paddd xmm11,xmm15 pslld xmm8,5 movdqa xmm9,xmm12 pand xmm7,xmm14 movdqa xmm6,xmm10 movdqa xmm5,xmm4 psrld xmm9,27 paddd xmm11,xmm7 pxor xmm6,xmm14 movdqa XMMWORD[(176-128)+rax],xmm3 paddd xmm11,xmm3 por xmm8,xmm9 psrld xmm5,31 pand xmm6,xmm13 movdqa xmm7,xmm13 pslld xmm7,30 paddd xmm4,xmm4 paddd xmm11,xmm6 psrld xmm13,2 paddd xmm11,xmm8 por xmm4,xmm5 por xmm13,xmm7 pxor xmm0,xmm2 movdqa xmm2,XMMWORD[((240-128))+rax] movdqa xmm8,xmm11 movdqa xmm7,xmm14 pxor xmm0,XMMWORD[((80-128))+rax] pxor xmm0,xmm2 paddd xmm10,xmm15 pslld xmm8,5 movdqa xmm9,xmm11 pand xmm7,xmm13 movdqa xmm6,xmm14 movdqa xmm5,xmm0 psrld xmm9,27 paddd xmm10,xmm7 pxor xmm6,xmm13 movdqa XMMWORD[(192-128)+rax],xmm4 paddd xmm10,xmm4 por xmm8,xmm9 psrld xmm5,31 pand xmm6,xmm12 movdqa xmm7,xmm12 pslld xmm7,30 paddd xmm0,xmm0 paddd xmm10,xmm6 psrld xmm12,2 paddd xmm10,xmm8 por xmm0,xmm5 por xmm12,xmm7 pxor xmm1,xmm3 movdqa xmm3,XMMWORD[((0-128))+rax] movdqa xmm8,xmm10 movdqa xmm7,xmm13 pxor xmm1,XMMWORD[((96-128))+rax] pxor xmm1,xmm3 paddd xmm14,xmm15 pslld xmm8,5 movdqa xmm9,xmm10 pand xmm7,xmm12 movdqa xmm6,xmm13 movdqa xmm5,xmm1 psrld xmm9,27 paddd xmm14,xmm7 pxor xmm6,xmm12 movdqa XMMWORD[(208-128)+rax],xmm0 paddd xmm14,xmm0 por xmm8,xmm9 psrld xmm5,31 pand xmm6,xmm11 movdqa xmm7,xmm11 pslld xmm7,30 paddd xmm1,xmm1 paddd xmm14,xmm6 psrld xmm11,2 paddd xmm14,xmm8 por xmm1,xmm5 por xmm11,xmm7 pxor xmm2,xmm4 movdqa xmm4,XMMWORD[((16-128))+rax] movdqa xmm8,xmm14 movdqa xmm7,xmm12 pxor xmm2,XMMWORD[((112-128))+rax] pxor xmm2,xmm4 paddd xmm13,xmm15 pslld xmm8,5 movdqa xmm9,xmm14 pand xmm7,xmm11 movdqa xmm6,xmm12 movdqa xmm5,xmm2 psrld xmm9,27 paddd xmm13,xmm7 pxor xmm6,xmm11 movdqa XMMWORD[(224-128)+rax],xmm1 paddd xmm13,xmm1 por xmm8,xmm9 psrld xmm5,31 pand xmm6,xmm10 movdqa xmm7,xmm10 pslld xmm7,30 paddd xmm2,xmm2 paddd xmm13,xmm6 psrld xmm10,2 paddd xmm13,xmm8 por xmm2,xmm5 por xmm10,xmm7 pxor xmm3,xmm0 movdqa xmm0,XMMWORD[((32-128))+rax] movdqa xmm8,xmm13 movdqa xmm7,xmm11 pxor xmm3,XMMWORD[((128-128))+rax] pxor xmm3,xmm0 paddd xmm12,xmm15 pslld xmm8,5 movdqa xmm9,xmm13 pand xmm7,xmm10 movdqa xmm6,xmm11 movdqa xmm5,xmm3 psrld xmm9,27 paddd xmm12,xmm7 pxor xmm6,xmm10 movdqa XMMWORD[(240-128)+rax],xmm2 paddd xmm12,xmm2 por xmm8,xmm9 psrld xmm5,31 pand xmm6,xmm14 movdqa xmm7,xmm14 pslld xmm7,30 paddd xmm3,xmm3 paddd xmm12,xmm6 psrld xmm14,2 paddd xmm12,xmm8 por xmm3,xmm5 por xmm14,xmm7 pxor xmm4,xmm1 movdqa xmm1,XMMWORD[((48-128))+rax] movdqa xmm8,xmm12 movdqa xmm7,xmm10 pxor xmm4,XMMWORD[((144-128))+rax] pxor xmm4,xmm1 paddd xmm11,xmm15 pslld xmm8,5 movdqa xmm9,xmm12 pand xmm7,xmm14 movdqa xmm6,xmm10 movdqa xmm5,xmm4 psrld xmm9,27 paddd xmm11,xmm7 pxor xmm6,xmm14 movdqa XMMWORD[(0-128)+rax],xmm3 paddd xmm11,xmm3 por xmm8,xmm9 psrld xmm5,31 pand xmm6,xmm13 movdqa xmm7,xmm13 pslld xmm7,30 paddd xmm4,xmm4 paddd xmm11,xmm6 psrld xmm13,2 paddd xmm11,xmm8 por xmm4,xmm5 por xmm13,xmm7 pxor xmm0,xmm2 movdqa xmm2,XMMWORD[((64-128))+rax] movdqa xmm8,xmm11 movdqa xmm7,xmm14 pxor xmm0,XMMWORD[((160-128))+rax] pxor xmm0,xmm2 paddd xmm10,xmm15 pslld xmm8,5 movdqa xmm9,xmm11 pand xmm7,xmm13 movdqa xmm6,xmm14 movdqa xmm5,xmm0 psrld xmm9,27 paddd xmm10,xmm7 pxor xmm6,xmm13 movdqa XMMWORD[(16-128)+rax],xmm4 paddd xmm10,xmm4 por xmm8,xmm9 psrld xmm5,31 pand xmm6,xmm12 movdqa xmm7,xmm12 pslld xmm7,30 paddd xmm0,xmm0 paddd xmm10,xmm6 psrld xmm12,2 paddd xmm10,xmm8 por xmm0,xmm5 por xmm12,xmm7 pxor xmm1,xmm3 movdqa xmm3,XMMWORD[((80-128))+rax] movdqa xmm8,xmm10 movdqa xmm7,xmm13 pxor xmm1,XMMWORD[((176-128))+rax] pxor xmm1,xmm3 paddd xmm14,xmm15 pslld xmm8,5 movdqa xmm9,xmm10 pand xmm7,xmm12 movdqa xmm6,xmm13 movdqa xmm5,xmm1 psrld xmm9,27 paddd xmm14,xmm7 pxor xmm6,xmm12 movdqa XMMWORD[(32-128)+rax],xmm0 paddd xmm14,xmm0 por xmm8,xmm9 psrld xmm5,31 pand xmm6,xmm11 movdqa xmm7,xmm11 pslld xmm7,30 paddd xmm1,xmm1 paddd xmm14,xmm6 psrld xmm11,2 paddd xmm14,xmm8 por xmm1,xmm5 por xmm11,xmm7 pxor xmm2,xmm4 movdqa xmm4,XMMWORD[((96-128))+rax] movdqa xmm8,xmm14 movdqa xmm7,xmm12 pxor xmm2,XMMWORD[((192-128))+rax] pxor xmm2,xmm4 paddd xmm13,xmm15 pslld xmm8,5 movdqa xmm9,xmm14 pand xmm7,xmm11 movdqa xmm6,xmm12 movdqa xmm5,xmm2 psrld xmm9,27 paddd xmm13,xmm7 pxor xmm6,xmm11 movdqa XMMWORD[(48-128)+rax],xmm1 paddd xmm13,xmm1 por xmm8,xmm9 psrld xmm5,31 pand xmm6,xmm10 movdqa xmm7,xmm10 pslld xmm7,30 paddd xmm2,xmm2 paddd xmm13,xmm6 psrld xmm10,2 paddd xmm13,xmm8 por xmm2,xmm5 por xmm10,xmm7 pxor xmm3,xmm0 movdqa xmm0,XMMWORD[((112-128))+rax] movdqa xmm8,xmm13 movdqa xmm7,xmm11 pxor xmm3,XMMWORD[((208-128))+rax] pxor xmm3,xmm0 paddd xmm12,xmm15 pslld xmm8,5 movdqa xmm9,xmm13 pand xmm7,xmm10 movdqa xmm6,xmm11 movdqa xmm5,xmm3 psrld xmm9,27 paddd xmm12,xmm7 pxor xmm6,xmm10 movdqa XMMWORD[(64-128)+rax],xmm2 paddd xmm12,xmm2 por xmm8,xmm9 psrld xmm5,31 pand xmm6,xmm14 movdqa xmm7,xmm14 pslld xmm7,30 paddd xmm3,xmm3 paddd xmm12,xmm6 psrld xmm14,2 paddd xmm12,xmm8 por xmm3,xmm5 por xmm14,xmm7 pxor xmm4,xmm1 movdqa xmm1,XMMWORD[((128-128))+rax] movdqa xmm8,xmm12 movdqa xmm7,xmm10 pxor xmm4,XMMWORD[((224-128))+rax] pxor xmm4,xmm1 paddd xmm11,xmm15 pslld xmm8,5 movdqa xmm9,xmm12 pand xmm7,xmm14 movdqa xmm6,xmm10 movdqa xmm5,xmm4 psrld xmm9,27 paddd xmm11,xmm7 pxor xmm6,xmm14 movdqa XMMWORD[(80-128)+rax],xmm3 paddd xmm11,xmm3 por xmm8,xmm9 psrld xmm5,31 pand xmm6,xmm13 movdqa xmm7,xmm13 pslld xmm7,30 paddd xmm4,xmm4 paddd xmm11,xmm6 psrld xmm13,2 paddd xmm11,xmm8 por xmm4,xmm5 por xmm13,xmm7 pxor xmm0,xmm2 movdqa xmm2,XMMWORD[((144-128))+rax] movdqa xmm8,xmm11 movdqa xmm7,xmm14 pxor xmm0,XMMWORD[((240-128))+rax] pxor xmm0,xmm2 paddd xmm10,xmm15 pslld xmm8,5 movdqa xmm9,xmm11 pand xmm7,xmm13 movdqa xmm6,xmm14 movdqa xmm5,xmm0 psrld xmm9,27 paddd xmm10,xmm7 pxor xmm6,xmm13 movdqa XMMWORD[(96-128)+rax],xmm4 paddd xmm10,xmm4 por xmm8,xmm9 psrld xmm5,31 pand xmm6,xmm12 movdqa xmm7,xmm12 pslld xmm7,30 paddd xmm0,xmm0 paddd xmm10,xmm6 psrld xmm12,2 paddd xmm10,xmm8 por xmm0,xmm5 por xmm12,xmm7 pxor xmm1,xmm3 movdqa xmm3,XMMWORD[((160-128))+rax] movdqa xmm8,xmm10 movdqa xmm7,xmm13 pxor xmm1,XMMWORD[((0-128))+rax] pxor xmm1,xmm3 paddd xmm14,xmm15 pslld xmm8,5 movdqa xmm9,xmm10 pand xmm7,xmm12 movdqa xmm6,xmm13 movdqa xmm5,xmm1 psrld xmm9,27 paddd xmm14,xmm7 pxor xmm6,xmm12 movdqa XMMWORD[(112-128)+rax],xmm0 paddd xmm14,xmm0 por xmm8,xmm9 psrld xmm5,31 pand xmm6,xmm11 movdqa xmm7,xmm11 pslld xmm7,30 paddd xmm1,xmm1 paddd xmm14,xmm6 psrld xmm11,2 paddd xmm14,xmm8 por xmm1,xmm5 por xmm11,xmm7 pxor xmm2,xmm4 movdqa xmm4,XMMWORD[((176-128))+rax] movdqa xmm8,xmm14 movdqa xmm7,xmm12 pxor xmm2,XMMWORD[((16-128))+rax] pxor xmm2,xmm4 paddd xmm13,xmm15 pslld xmm8,5 movdqa xmm9,xmm14 pand xmm7,xmm11 movdqa xmm6,xmm12 movdqa xmm5,xmm2 psrld xmm9,27 paddd xmm13,xmm7 pxor xmm6,xmm11 movdqa XMMWORD[(128-128)+rax],xmm1 paddd xmm13,xmm1 por xmm8,xmm9 psrld xmm5,31 pand xmm6,xmm10 movdqa xmm7,xmm10 pslld xmm7,30 paddd xmm2,xmm2 paddd xmm13,xmm6 psrld xmm10,2 paddd xmm13,xmm8 por xmm2,xmm5 por xmm10,xmm7 pxor xmm3,xmm0 movdqa xmm0,XMMWORD[((192-128))+rax] movdqa xmm8,xmm13 movdqa xmm7,xmm11 pxor xmm3,XMMWORD[((32-128))+rax] pxor xmm3,xmm0 paddd xmm12,xmm15 pslld xmm8,5 movdqa xmm9,xmm13 pand xmm7,xmm10 movdqa xmm6,xmm11 movdqa xmm5,xmm3 psrld xmm9,27 paddd xmm12,xmm7 pxor xmm6,xmm10 movdqa XMMWORD[(144-128)+rax],xmm2 paddd xmm12,xmm2 por xmm8,xmm9 psrld xmm5,31 pand xmm6,xmm14 movdqa xmm7,xmm14 pslld xmm7,30 paddd xmm3,xmm3 paddd xmm12,xmm6 psrld xmm14,2 paddd xmm12,xmm8 por xmm3,xmm5 por xmm14,xmm7 pxor xmm4,xmm1 movdqa xmm1,XMMWORD[((208-128))+rax] movdqa xmm8,xmm12 movdqa xmm7,xmm10 pxor xmm4,XMMWORD[((48-128))+rax] pxor xmm4,xmm1 paddd xmm11,xmm15 pslld xmm8,5 movdqa xmm9,xmm12 pand xmm7,xmm14 movdqa xmm6,xmm10 movdqa xmm5,xmm4 psrld xmm9,27 paddd xmm11,xmm7 pxor xmm6,xmm14 movdqa XMMWORD[(160-128)+rax],xmm3 paddd xmm11,xmm3 por xmm8,xmm9 psrld xmm5,31 pand xmm6,xmm13 movdqa xmm7,xmm13 pslld xmm7,30 paddd xmm4,xmm4 paddd xmm11,xmm6 psrld xmm13,2 paddd xmm11,xmm8 por xmm4,xmm5 por xmm13,xmm7 pxor xmm0,xmm2 movdqa xmm2,XMMWORD[((224-128))+rax] movdqa xmm8,xmm11 movdqa xmm7,xmm14 pxor xmm0,XMMWORD[((64-128))+rax] pxor xmm0,xmm2 paddd xmm10,xmm15 pslld xmm8,5 movdqa xmm9,xmm11 pand xmm7,xmm13 movdqa xmm6,xmm14 movdqa xmm5,xmm0 psrld xmm9,27 paddd xmm10,xmm7 pxor xmm6,xmm13 movdqa XMMWORD[(176-128)+rax],xmm4 paddd xmm10,xmm4 por xmm8,xmm9 psrld xmm5,31 pand xmm6,xmm12 movdqa xmm7,xmm12 pslld xmm7,30 paddd xmm0,xmm0 paddd xmm10,xmm6 psrld xmm12,2 paddd xmm10,xmm8 por xmm0,xmm5 por xmm12,xmm7 movdqa xmm15,XMMWORD[64+rbp] pxor xmm1,xmm3 movdqa xmm3,XMMWORD[((240-128))+rax] movdqa xmm8,xmm10 movdqa xmm6,xmm13 pxor xmm1,XMMWORD[((80-128))+rax] paddd xmm14,xmm15 pslld xmm8,5 pxor xmm6,xmm11 movdqa xmm9,xmm10 movdqa XMMWORD[(192-128)+rax],xmm0 paddd xmm14,xmm0 pxor xmm1,xmm3 psrld xmm9,27 pxor xmm6,xmm12 movdqa xmm7,xmm11 pslld xmm7,30 movdqa xmm5,xmm1 por xmm8,xmm9 psrld xmm5,31 paddd xmm14,xmm6 paddd xmm1,xmm1 psrld xmm11,2 paddd xmm14,xmm8 por xmm1,xmm5 por xmm11,xmm7 pxor xmm2,xmm4 movdqa xmm4,XMMWORD[((0-128))+rax] movdqa xmm8,xmm14 movdqa xmm6,xmm12 pxor xmm2,XMMWORD[((96-128))+rax] paddd xmm13,xmm15 pslld xmm8,5 pxor xmm6,xmm10 movdqa xmm9,xmm14 movdqa XMMWORD[(208-128)+rax],xmm1 paddd xmm13,xmm1 pxor xmm2,xmm4 psrld xmm9,27 pxor xmm6,xmm11 movdqa xmm7,xmm10 pslld xmm7,30 movdqa xmm5,xmm2 por xmm8,xmm9 psrld xmm5,31 paddd xmm13,xmm6 paddd xmm2,xmm2 psrld xmm10,2 paddd xmm13,xmm8 por xmm2,xmm5 por xmm10,xmm7 pxor xmm3,xmm0 movdqa xmm0,XMMWORD[((16-128))+rax] movdqa xmm8,xmm13 movdqa xmm6,xmm11 pxor xmm3,XMMWORD[((112-128))+rax] paddd xmm12,xmm15 pslld xmm8,5 pxor xmm6,xmm14 movdqa xmm9,xmm13 movdqa XMMWORD[(224-128)+rax],xmm2 paddd xmm12,xmm2 pxor xmm3,xmm0 psrld xmm9,27 pxor xmm6,xmm10 movdqa xmm7,xmm14 pslld xmm7,30 movdqa xmm5,xmm3 por xmm8,xmm9 psrld xmm5,31 paddd xmm12,xmm6 paddd xmm3,xmm3 psrld xmm14,2 paddd xmm12,xmm8 por xmm3,xmm5 por xmm14,xmm7 pxor xmm4,xmm1 movdqa xmm1,XMMWORD[((32-128))+rax] movdqa xmm8,xmm12 movdqa xmm6,xmm10 pxor xmm4,XMMWORD[((128-128))+rax] paddd xmm11,xmm15 pslld xmm8,5 pxor xmm6,xmm13 movdqa xmm9,xmm12 movdqa XMMWORD[(240-128)+rax],xmm3 paddd xmm11,xmm3 pxor xmm4,xmm1 psrld xmm9,27 pxor xmm6,xmm14 movdqa xmm7,xmm13 pslld xmm7,30 movdqa xmm5,xmm4 por xmm8,xmm9 psrld xmm5,31 paddd xmm11,xmm6 paddd xmm4,xmm4 psrld xmm13,2 paddd xmm11,xmm8 por xmm4,xmm5 por xmm13,xmm7 pxor xmm0,xmm2 movdqa xmm2,XMMWORD[((48-128))+rax] movdqa xmm8,xmm11 movdqa xmm6,xmm14 pxor xmm0,XMMWORD[((144-128))+rax] paddd xmm10,xmm15 pslld xmm8,5 pxor xmm6,xmm12 movdqa xmm9,xmm11 movdqa XMMWORD[(0-128)+rax],xmm4 paddd xmm10,xmm4 pxor xmm0,xmm2 psrld xmm9,27 pxor xmm6,xmm13 movdqa xmm7,xmm12 pslld xmm7,30 movdqa xmm5,xmm0 por xmm8,xmm9 psrld xmm5,31 paddd xmm10,xmm6 paddd xmm0,xmm0 psrld xmm12,2 paddd xmm10,xmm8 por xmm0,xmm5 por xmm12,xmm7 pxor xmm1,xmm3 movdqa xmm3,XMMWORD[((64-128))+rax] movdqa xmm8,xmm10 movdqa xmm6,xmm13 pxor xmm1,XMMWORD[((160-128))+rax] paddd xmm14,xmm15 pslld xmm8,5 pxor xmm6,xmm11 movdqa xmm9,xmm10 movdqa XMMWORD[(16-128)+rax],xmm0 paddd xmm14,xmm0 pxor xmm1,xmm3 psrld xmm9,27 pxor xmm6,xmm12 movdqa xmm7,xmm11 pslld xmm7,30 movdqa xmm5,xmm1 por xmm8,xmm9 psrld xmm5,31 paddd xmm14,xmm6 paddd xmm1,xmm1 psrld xmm11,2 paddd xmm14,xmm8 por xmm1,xmm5 por xmm11,xmm7 pxor xmm2,xmm4 movdqa xmm4,XMMWORD[((80-128))+rax] movdqa xmm8,xmm14 movdqa xmm6,xmm12 pxor xmm2,XMMWORD[((176-128))+rax] paddd xmm13,xmm15 pslld xmm8,5 pxor xmm6,xmm10 movdqa xmm9,xmm14 movdqa XMMWORD[(32-128)+rax],xmm1 paddd xmm13,xmm1 pxor xmm2,xmm4 psrld xmm9,27 pxor xmm6,xmm11 movdqa xmm7,xmm10 pslld xmm7,30 movdqa xmm5,xmm2 por xmm8,xmm9 psrld xmm5,31 paddd xmm13,xmm6 paddd xmm2,xmm2 psrld xmm10,2 paddd xmm13,xmm8 por xmm2,xmm5 por xmm10,xmm7 pxor xmm3,xmm0 movdqa xmm0,XMMWORD[((96-128))+rax] movdqa xmm8,xmm13 movdqa xmm6,xmm11 pxor xmm3,XMMWORD[((192-128))+rax] paddd xmm12,xmm15 pslld xmm8,5 pxor xmm6,xmm14 movdqa xmm9,xmm13 movdqa XMMWORD[(48-128)+rax],xmm2 paddd xmm12,xmm2 pxor xmm3,xmm0 psrld xmm9,27 pxor xmm6,xmm10 movdqa xmm7,xmm14 pslld xmm7,30 movdqa xmm5,xmm3 por xmm8,xmm9 psrld xmm5,31 paddd xmm12,xmm6 paddd xmm3,xmm3 psrld xmm14,2 paddd xmm12,xmm8 por xmm3,xmm5 por xmm14,xmm7 pxor xmm4,xmm1 movdqa xmm1,XMMWORD[((112-128))+rax] movdqa xmm8,xmm12 movdqa xmm6,xmm10 pxor xmm4,XMMWORD[((208-128))+rax] paddd xmm11,xmm15 pslld xmm8,5 pxor xmm6,xmm13 movdqa xmm9,xmm12 movdqa XMMWORD[(64-128)+rax],xmm3 paddd xmm11,xmm3 pxor xmm4,xmm1 psrld xmm9,27 pxor xmm6,xmm14 movdqa xmm7,xmm13 pslld xmm7,30 movdqa xmm5,xmm4 por xmm8,xmm9 psrld xmm5,31 paddd xmm11,xmm6 paddd xmm4,xmm4 psrld xmm13,2 paddd xmm11,xmm8 por xmm4,xmm5 por xmm13,xmm7 pxor xmm0,xmm2 movdqa xmm2,XMMWORD[((128-128))+rax] movdqa xmm8,xmm11 movdqa xmm6,xmm14 pxor xmm0,XMMWORD[((224-128))+rax] paddd xmm10,xmm15 pslld xmm8,5 pxor xmm6,xmm12 movdqa xmm9,xmm11 movdqa XMMWORD[(80-128)+rax],xmm4 paddd xmm10,xmm4 pxor xmm0,xmm2 psrld xmm9,27 pxor xmm6,xmm13 movdqa xmm7,xmm12 pslld xmm7,30 movdqa xmm5,xmm0 por xmm8,xmm9 psrld xmm5,31 paddd xmm10,xmm6 paddd xmm0,xmm0 psrld xmm12,2 paddd xmm10,xmm8 por xmm0,xmm5 por xmm12,xmm7 pxor xmm1,xmm3 movdqa xmm3,XMMWORD[((144-128))+rax] movdqa xmm8,xmm10 movdqa xmm6,xmm13 pxor xmm1,XMMWORD[((240-128))+rax] paddd xmm14,xmm15 pslld xmm8,5 pxor xmm6,xmm11 movdqa xmm9,xmm10 movdqa XMMWORD[(96-128)+rax],xmm0 paddd xmm14,xmm0 pxor xmm1,xmm3 psrld xmm9,27 pxor xmm6,xmm12 movdqa xmm7,xmm11 pslld xmm7,30 movdqa xmm5,xmm1 por xmm8,xmm9 psrld xmm5,31 paddd xmm14,xmm6 paddd xmm1,xmm1 psrld xmm11,2 paddd xmm14,xmm8 por xmm1,xmm5 por xmm11,xmm7 pxor xmm2,xmm4 movdqa xmm4,XMMWORD[((160-128))+rax] movdqa xmm8,xmm14 movdqa xmm6,xmm12 pxor xmm2,XMMWORD[((0-128))+rax] paddd xmm13,xmm15 pslld xmm8,5 pxor xmm6,xmm10 movdqa xmm9,xmm14 movdqa XMMWORD[(112-128)+rax],xmm1 paddd xmm13,xmm1 pxor xmm2,xmm4 psrld xmm9,27 pxor xmm6,xmm11 movdqa xmm7,xmm10 pslld xmm7,30 movdqa xmm5,xmm2 por xmm8,xmm9 psrld xmm5,31 paddd xmm13,xmm6 paddd xmm2,xmm2 psrld xmm10,2 paddd xmm13,xmm8 por xmm2,xmm5 por xmm10,xmm7 pxor xmm3,xmm0 movdqa xmm0,XMMWORD[((176-128))+rax] movdqa xmm8,xmm13 movdqa xmm6,xmm11 pxor xmm3,XMMWORD[((16-128))+rax] paddd xmm12,xmm15 pslld xmm8,5 pxor xmm6,xmm14 movdqa xmm9,xmm13 paddd xmm12,xmm2 pxor xmm3,xmm0 psrld xmm9,27 pxor xmm6,xmm10 movdqa xmm7,xmm14 pslld xmm7,30 movdqa xmm5,xmm3 por xmm8,xmm9 psrld xmm5,31 paddd xmm12,xmm6 paddd xmm3,xmm3 psrld xmm14,2 paddd xmm12,xmm8 por xmm3,xmm5 por xmm14,xmm7 pxor xmm4,xmm1 movdqa xmm1,XMMWORD[((192-128))+rax] movdqa xmm8,xmm12 movdqa xmm6,xmm10 pxor xmm4,XMMWORD[((32-128))+rax] paddd xmm11,xmm15 pslld xmm8,5 pxor xmm6,xmm13 movdqa xmm9,xmm12 paddd xmm11,xmm3 pxor xmm4,xmm1 psrld xmm9,27 pxor xmm6,xmm14 movdqa xmm7,xmm13 pslld xmm7,30 movdqa xmm5,xmm4 por xmm8,xmm9 psrld xmm5,31 paddd xmm11,xmm6 paddd xmm4,xmm4 psrld xmm13,2 paddd xmm11,xmm8 por xmm4,xmm5 por xmm13,xmm7 pxor xmm0,xmm2 movdqa xmm2,XMMWORD[((208-128))+rax] movdqa xmm8,xmm11 movdqa xmm6,xmm14 pxor xmm0,XMMWORD[((48-128))+rax] paddd xmm10,xmm15 pslld xmm8,5 pxor xmm6,xmm12 movdqa xmm9,xmm11 paddd xmm10,xmm4 pxor xmm0,xmm2 psrld xmm9,27 pxor xmm6,xmm13 movdqa xmm7,xmm12 pslld xmm7,30 movdqa xmm5,xmm0 por xmm8,xmm9 psrld xmm5,31 paddd xmm10,xmm6 paddd xmm0,xmm0 psrld xmm12,2 paddd xmm10,xmm8 por xmm0,xmm5 por xmm12,xmm7 pxor xmm1,xmm3 movdqa xmm3,XMMWORD[((224-128))+rax] movdqa xmm8,xmm10 movdqa xmm6,xmm13 pxor xmm1,XMMWORD[((64-128))+rax] paddd xmm14,xmm15 pslld xmm8,5 pxor xmm6,xmm11 movdqa xmm9,xmm10 paddd xmm14,xmm0 pxor xmm1,xmm3 psrld xmm9,27 pxor xmm6,xmm12 movdqa xmm7,xmm11 pslld xmm7,30 movdqa xmm5,xmm1 por xmm8,xmm9 psrld xmm5,31 paddd xmm14,xmm6 paddd xmm1,xmm1 psrld xmm11,2 paddd xmm14,xmm8 por xmm1,xmm5 por xmm11,xmm7 pxor xmm2,xmm4 movdqa xmm4,XMMWORD[((240-128))+rax] movdqa xmm8,xmm14 movdqa xmm6,xmm12 pxor xmm2,XMMWORD[((80-128))+rax] paddd xmm13,xmm15 pslld xmm8,5 pxor xmm6,xmm10 movdqa xmm9,xmm14 paddd xmm13,xmm1 pxor xmm2,xmm4 psrld xmm9,27 pxor xmm6,xmm11 movdqa xmm7,xmm10 pslld xmm7,30 movdqa xmm5,xmm2 por xmm8,xmm9 psrld xmm5,31 paddd xmm13,xmm6 paddd xmm2,xmm2 psrld xmm10,2 paddd xmm13,xmm8 por xmm2,xmm5 por xmm10,xmm7 pxor xmm3,xmm0 movdqa xmm0,XMMWORD[((0-128))+rax] movdqa xmm8,xmm13 movdqa xmm6,xmm11 pxor xmm3,XMMWORD[((96-128))+rax] paddd xmm12,xmm15 pslld xmm8,5 pxor xmm6,xmm14 movdqa xmm9,xmm13 paddd xmm12,xmm2 pxor xmm3,xmm0 psrld xmm9,27 pxor xmm6,xmm10 movdqa xmm7,xmm14 pslld xmm7,30 movdqa xmm5,xmm3 por xmm8,xmm9 psrld xmm5,31 paddd xmm12,xmm6 paddd xmm3,xmm3 psrld xmm14,2 paddd xmm12,xmm8 por xmm3,xmm5 por xmm14,xmm7 pxor xmm4,xmm1 movdqa xmm1,XMMWORD[((16-128))+rax] movdqa xmm8,xmm12 movdqa xmm6,xmm10 pxor xmm4,XMMWORD[((112-128))+rax] paddd xmm11,xmm15 pslld xmm8,5 pxor xmm6,xmm13 movdqa xmm9,xmm12 paddd xmm11,xmm3 pxor xmm4,xmm1 psrld xmm9,27 pxor xmm6,xmm14 movdqa xmm7,xmm13 pslld xmm7,30 movdqa xmm5,xmm4 por xmm8,xmm9 psrld xmm5,31 paddd xmm11,xmm6 paddd xmm4,xmm4 psrld xmm13,2 paddd xmm11,xmm8 por xmm4,xmm5 por xmm13,xmm7 movdqa xmm8,xmm11 paddd xmm10,xmm15 movdqa xmm6,xmm14 pslld xmm8,5 pxor xmm6,xmm12 movdqa xmm9,xmm11 paddd xmm10,xmm4 psrld xmm9,27 movdqa xmm7,xmm12 pxor xmm6,xmm13 pslld xmm7,30 por xmm8,xmm9 paddd xmm10,xmm6 psrld xmm12,2 paddd xmm10,xmm8 por xmm12,xmm7 movdqa xmm0,XMMWORD[rbx] mov ecx,1 cmp ecx,DWORD[rbx] pxor xmm8,xmm8 cmovge r8,rbp cmp ecx,DWORD[4+rbx] movdqa xmm1,xmm0 cmovge r9,rbp cmp ecx,DWORD[8+rbx] pcmpgtd xmm1,xmm8 cmovge r10,rbp cmp ecx,DWORD[12+rbx] paddd xmm0,xmm1 cmovge r11,rbp movdqu xmm6,XMMWORD[rdi] pand xmm10,xmm1 movdqu xmm7,XMMWORD[32+rdi] pand xmm11,xmm1 paddd xmm10,xmm6 movdqu xmm8,XMMWORD[64+rdi] pand xmm12,xmm1 paddd xmm11,xmm7 movdqu xmm9,XMMWORD[96+rdi] pand xmm13,xmm1 paddd xmm12,xmm8 movdqu xmm5,XMMWORD[128+rdi] pand xmm14,xmm1 movdqu XMMWORD[rdi],xmm10 paddd xmm13,xmm9 movdqu XMMWORD[32+rdi],xmm11 paddd xmm14,xmm5 movdqu XMMWORD[64+rdi],xmm12 movdqu XMMWORD[96+rdi],xmm13 movdqu XMMWORD[128+rdi],xmm14 movdqa XMMWORD[rbx],xmm0 movdqa xmm5,XMMWORD[96+rbp] movdqa xmm15,XMMWORD[((-32))+rbp] dec edx jnz NEAR $L$oop mov edx,DWORD[280+rsp] lea rdi,[16+rdi] lea rsi,[64+rsi] dec edx jnz NEAR $L$oop_grande $L$done: mov rax,QWORD[272+rsp] movaps xmm6,XMMWORD[((-184))+rax] movaps xmm7,XMMWORD[((-168))+rax] movaps xmm8,XMMWORD[((-152))+rax] movaps xmm9,XMMWORD[((-136))+rax] movaps xmm10,XMMWORD[((-120))+rax] movaps xmm11,XMMWORD[((-104))+rax] movaps xmm12,XMMWORD[((-88))+rax] movaps xmm13,XMMWORD[((-72))+rax] movaps xmm14,XMMWORD[((-56))+rax] movaps xmm15,XMMWORD[((-40))+rax] mov rbp,QWORD[((-16))+rax] mov rbx,QWORD[((-8))+rax] lea rsp,[rax] $L$epilogue: mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$SEH_end_sha1_multi_block: ALIGN 32 sha1_multi_block_shaext: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp $L$SEH_begin_sha1_multi_block_shaext: mov rdi,rcx mov rsi,rdx mov rdx,r8 _shaext_shortcut: mov rax,rsp push rbx push rbp lea rsp,[((-168))+rsp] movaps XMMWORD[rsp],xmm6 movaps XMMWORD[16+rsp],xmm7 movaps XMMWORD[32+rsp],xmm8 movaps XMMWORD[48+rsp],xmm9 movaps XMMWORD[(-120)+rax],xmm10 movaps XMMWORD[(-104)+rax],xmm11 movaps XMMWORD[(-88)+rax],xmm12 movaps XMMWORD[(-72)+rax],xmm13 movaps XMMWORD[(-56)+rax],xmm14 movaps XMMWORD[(-40)+rax],xmm15 sub rsp,288 shl edx,1 and rsp,-256 lea rdi,[64+rdi] mov QWORD[272+rsp],rax $L$body_shaext: lea rbx,[256+rsp] movdqa xmm3,XMMWORD[((K_XX_XX+128))] $L$oop_grande_shaext: mov DWORD[280+rsp],edx xor edx,edx mov r8,QWORD[rsi] mov ecx,DWORD[8+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[rbx],ecx cmovle r8,rsp mov r9,QWORD[16+rsi] mov ecx,DWORD[24+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[4+rbx],ecx cmovle r9,rsp test edx,edx jz NEAR $L$done_shaext movq xmm0,QWORD[((0-64))+rdi] movq xmm4,QWORD[((32-64))+rdi] movq xmm5,QWORD[((64-64))+rdi] movq xmm6,QWORD[((96-64))+rdi] movq xmm7,QWORD[((128-64))+rdi] punpckldq xmm0,xmm4 punpckldq xmm5,xmm6 movdqa xmm8,xmm0 punpcklqdq xmm0,xmm5 punpckhqdq xmm8,xmm5 pshufd xmm1,xmm7,63 pshufd xmm9,xmm7,127 pshufd xmm0,xmm0,27 pshufd xmm8,xmm8,27 jmp NEAR $L$oop_shaext ALIGN 32 $L$oop_shaext: movdqu xmm4,XMMWORD[r8] movdqu xmm11,XMMWORD[r9] movdqu xmm5,XMMWORD[16+r8] movdqu xmm12,XMMWORD[16+r9] movdqu xmm6,XMMWORD[32+r8] DB 102,15,56,0,227 movdqu xmm13,XMMWORD[32+r9] DB 102,68,15,56,0,219 movdqu xmm7,XMMWORD[48+r8] lea r8,[64+r8] DB 102,15,56,0,235 movdqu xmm14,XMMWORD[48+r9] lea r9,[64+r9] DB 102,68,15,56,0,227 movdqa XMMWORD[80+rsp],xmm1 paddd xmm1,xmm4 movdqa XMMWORD[112+rsp],xmm9 paddd xmm9,xmm11 movdqa XMMWORD[64+rsp],xmm0 movdqa xmm2,xmm0 movdqa XMMWORD[96+rsp],xmm8 movdqa xmm10,xmm8 DB 15,58,204,193,0 DB 15,56,200,213 DB 69,15,58,204,193,0 DB 69,15,56,200,212 DB 102,15,56,0,243 prefetcht0 [127+r8] DB 15,56,201,229 DB 102,68,15,56,0,235 prefetcht0 [127+r9] DB 69,15,56,201,220 DB 102,15,56,0,251 movdqa xmm1,xmm0 DB 102,68,15,56,0,243 movdqa xmm9,xmm8 DB 15,58,204,194,0 DB 15,56,200,206 DB 69,15,58,204,194,0 DB 69,15,56,200,205 pxor xmm4,xmm6 DB 15,56,201,238 pxor xmm11,xmm13 DB 69,15,56,201,229 movdqa xmm2,xmm0 movdqa xmm10,xmm8 DB 15,58,204,193,0 DB 15,56,200,215 DB 69,15,58,204,193,0 DB 69,15,56,200,214 DB 15,56,202,231 DB 69,15,56,202,222 pxor xmm5,xmm7 DB 15,56,201,247 pxor xmm12,xmm14 DB 69,15,56,201,238 movdqa xmm1,xmm0 movdqa xmm9,xmm8 DB 15,58,204,194,0 DB 15,56,200,204 DB 69,15,58,204,194,0 DB 69,15,56,200,203 DB 15,56,202,236 DB 69,15,56,202,227 pxor xmm6,xmm4 DB 15,56,201,252 pxor xmm13,xmm11 DB 69,15,56,201,243 movdqa xmm2,xmm0 movdqa xmm10,xmm8 DB 15,58,204,193,0 DB 15,56,200,213 DB 69,15,58,204,193,0 DB 69,15,56,200,212 DB 15,56,202,245 DB 69,15,56,202,236 pxor xmm7,xmm5 DB 15,56,201,229 pxor xmm14,xmm12 DB 69,15,56,201,220 movdqa xmm1,xmm0 movdqa xmm9,xmm8 DB 15,58,204,194,1 DB 15,56,200,206 DB 69,15,58,204,194,1 DB 69,15,56,200,205 DB 15,56,202,254 DB 69,15,56,202,245 pxor xmm4,xmm6 DB 15,56,201,238 pxor xmm11,xmm13 DB 69,15,56,201,229 movdqa xmm2,xmm0 movdqa xmm10,xmm8 DB 15,58,204,193,1 DB 15,56,200,215 DB 69,15,58,204,193,1 DB 69,15,56,200,214 DB 15,56,202,231 DB 69,15,56,202,222 pxor xmm5,xmm7 DB 15,56,201,247 pxor xmm12,xmm14 DB 69,15,56,201,238 movdqa xmm1,xmm0 movdqa xmm9,xmm8 DB 15,58,204,194,1 DB 15,56,200,204 DB 69,15,58,204,194,1 DB 69,15,56,200,203 DB 15,56,202,236 DB 69,15,56,202,227 pxor xmm6,xmm4 DB 15,56,201,252 pxor xmm13,xmm11 DB 69,15,56,201,243 movdqa xmm2,xmm0 movdqa xmm10,xmm8 DB 15,58,204,193,1 DB 15,56,200,213 DB 69,15,58,204,193,1 DB 69,15,56,200,212 DB 15,56,202,245 DB 69,15,56,202,236 pxor xmm7,xmm5 DB 15,56,201,229 pxor xmm14,xmm12 DB 69,15,56,201,220 movdqa xmm1,xmm0 movdqa xmm9,xmm8 DB 15,58,204,194,1 DB 15,56,200,206 DB 69,15,58,204,194,1 DB 69,15,56,200,205 DB 15,56,202,254 DB 69,15,56,202,245 pxor xmm4,xmm6 DB 15,56,201,238 pxor xmm11,xmm13 DB 69,15,56,201,229 movdqa xmm2,xmm0 movdqa xmm10,xmm8 DB 15,58,204,193,2 DB 15,56,200,215 DB 69,15,58,204,193,2 DB 69,15,56,200,214 DB 15,56,202,231 DB 69,15,56,202,222 pxor xmm5,xmm7 DB 15,56,201,247 pxor xmm12,xmm14 DB 69,15,56,201,238 movdqa xmm1,xmm0 movdqa xmm9,xmm8 DB 15,58,204,194,2 DB 15,56,200,204 DB 69,15,58,204,194,2 DB 69,15,56,200,203 DB 15,56,202,236 DB 69,15,56,202,227 pxor xmm6,xmm4 DB 15,56,201,252 pxor xmm13,xmm11 DB 69,15,56,201,243 movdqa xmm2,xmm0 movdqa xmm10,xmm8 DB 15,58,204,193,2 DB 15,56,200,213 DB 69,15,58,204,193,2 DB 69,15,56,200,212 DB 15,56,202,245 DB 69,15,56,202,236 pxor xmm7,xmm5 DB 15,56,201,229 pxor xmm14,xmm12 DB 69,15,56,201,220 movdqa xmm1,xmm0 movdqa xmm9,xmm8 DB 15,58,204,194,2 DB 15,56,200,206 DB 69,15,58,204,194,2 DB 69,15,56,200,205 DB 15,56,202,254 DB 69,15,56,202,245 pxor xmm4,xmm6 DB 15,56,201,238 pxor xmm11,xmm13 DB 69,15,56,201,229 movdqa xmm2,xmm0 movdqa xmm10,xmm8 DB 15,58,204,193,2 DB 15,56,200,215 DB 69,15,58,204,193,2 DB 69,15,56,200,214 DB 15,56,202,231 DB 69,15,56,202,222 pxor xmm5,xmm7 DB 15,56,201,247 pxor xmm12,xmm14 DB 69,15,56,201,238 movdqa xmm1,xmm0 movdqa xmm9,xmm8 DB 15,58,204,194,3 DB 15,56,200,204 DB 69,15,58,204,194,3 DB 69,15,56,200,203 DB 15,56,202,236 DB 69,15,56,202,227 pxor xmm6,xmm4 DB 15,56,201,252 pxor xmm13,xmm11 DB 69,15,56,201,243 movdqa xmm2,xmm0 movdqa xmm10,xmm8 DB 15,58,204,193,3 DB 15,56,200,213 DB 69,15,58,204,193,3 DB 69,15,56,200,212 DB 15,56,202,245 DB 69,15,56,202,236 pxor xmm7,xmm5 pxor xmm14,xmm12 mov ecx,1 pxor xmm4,xmm4 cmp ecx,DWORD[rbx] cmovge r8,rsp movdqa xmm1,xmm0 movdqa xmm9,xmm8 DB 15,58,204,194,3 DB 15,56,200,206 DB 69,15,58,204,194,3 DB 69,15,56,200,205 DB 15,56,202,254 DB 69,15,56,202,245 cmp ecx,DWORD[4+rbx] cmovge r9,rsp movq xmm6,QWORD[rbx] movdqa xmm2,xmm0 movdqa xmm10,xmm8 DB 15,58,204,193,3 DB 15,56,200,215 DB 69,15,58,204,193,3 DB 69,15,56,200,214 pshufd xmm11,xmm6,0x00 pshufd xmm12,xmm6,0x55 movdqa xmm7,xmm6 pcmpgtd xmm11,xmm4 pcmpgtd xmm12,xmm4 movdqa xmm1,xmm0 movdqa xmm9,xmm8 DB 15,58,204,194,3 DB 15,56,200,204 DB 69,15,58,204,194,3 DB 68,15,56,200,204 pcmpgtd xmm7,xmm4 pand xmm0,xmm11 pand xmm1,xmm11 pand xmm8,xmm12 pand xmm9,xmm12 paddd xmm6,xmm7 paddd xmm0,XMMWORD[64+rsp] paddd xmm1,XMMWORD[80+rsp] paddd xmm8,XMMWORD[96+rsp] paddd xmm9,XMMWORD[112+rsp] movq QWORD[rbx],xmm6 dec edx jnz NEAR $L$oop_shaext mov edx,DWORD[280+rsp] pshufd xmm0,xmm0,27 pshufd xmm8,xmm8,27 movdqa xmm6,xmm0 punpckldq xmm0,xmm8 punpckhdq xmm6,xmm8 punpckhdq xmm1,xmm9 movq QWORD[(0-64)+rdi],xmm0 psrldq xmm0,8 movq QWORD[(64-64)+rdi],xmm6 psrldq xmm6,8 movq QWORD[(32-64)+rdi],xmm0 psrldq xmm1,8 movq QWORD[(96-64)+rdi],xmm6 movq QWORD[(128-64)+rdi],xmm1 lea rdi,[8+rdi] lea rsi,[32+rsi] dec edx jnz NEAR $L$oop_grande_shaext $L$done_shaext: movaps xmm6,XMMWORD[((-184))+rax] movaps xmm7,XMMWORD[((-168))+rax] movaps xmm8,XMMWORD[((-152))+rax] movaps xmm9,XMMWORD[((-136))+rax] movaps xmm10,XMMWORD[((-120))+rax] movaps xmm11,XMMWORD[((-104))+rax] movaps xmm12,XMMWORD[((-88))+rax] movaps xmm13,XMMWORD[((-72))+rax] movaps xmm14,XMMWORD[((-56))+rax] movaps xmm15,XMMWORD[((-40))+rax] mov rbp,QWORD[((-16))+rax] mov rbx,QWORD[((-8))+rax] lea rsp,[rax] $L$epilogue_shaext: mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$SEH_end_sha1_multi_block_shaext: ALIGN 32 sha1_multi_block_avx: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp $L$SEH_begin_sha1_multi_block_avx: mov rdi,rcx mov rsi,rdx mov rdx,r8 _avx_shortcut: shr rcx,32 cmp edx,2 jb NEAR $L$avx test ecx,32 jnz NEAR _avx2_shortcut jmp NEAR $L$avx ALIGN 32 $L$avx: mov rax,rsp push rbx push rbp lea rsp,[((-168))+rsp] movaps XMMWORD[rsp],xmm6 movaps XMMWORD[16+rsp],xmm7 movaps XMMWORD[32+rsp],xmm8 movaps XMMWORD[48+rsp],xmm9 movaps XMMWORD[(-120)+rax],xmm10 movaps XMMWORD[(-104)+rax],xmm11 movaps XMMWORD[(-88)+rax],xmm12 movaps XMMWORD[(-72)+rax],xmm13 movaps XMMWORD[(-56)+rax],xmm14 movaps XMMWORD[(-40)+rax],xmm15 sub rsp,288 and rsp,-256 mov QWORD[272+rsp],rax $L$body_avx: lea rbp,[K_XX_XX] lea rbx,[256+rsp] vzeroupper $L$oop_grande_avx: mov DWORD[280+rsp],edx xor edx,edx mov r8,QWORD[rsi] mov ecx,DWORD[8+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[rbx],ecx cmovle r8,rbp mov r9,QWORD[16+rsi] mov ecx,DWORD[24+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[4+rbx],ecx cmovle r9,rbp mov r10,QWORD[32+rsi] mov ecx,DWORD[40+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[8+rbx],ecx cmovle r10,rbp mov r11,QWORD[48+rsi] mov ecx,DWORD[56+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[12+rbx],ecx cmovle r11,rbp test edx,edx jz NEAR $L$done_avx vmovdqu xmm10,XMMWORD[rdi] lea rax,[128+rsp] vmovdqu xmm11,XMMWORD[32+rdi] vmovdqu xmm12,XMMWORD[64+rdi] vmovdqu xmm13,XMMWORD[96+rdi] vmovdqu xmm14,XMMWORD[128+rdi] vmovdqu xmm5,XMMWORD[96+rbp] jmp NEAR $L$oop_avx ALIGN 32 $L$oop_avx: vmovdqa xmm15,XMMWORD[((-32))+rbp] vmovd xmm0,DWORD[r8] lea r8,[64+r8] vmovd xmm2,DWORD[r9] lea r9,[64+r9] vpinsrd xmm0,xmm0,DWORD[r10],1 lea r10,[64+r10] vpinsrd xmm2,xmm2,DWORD[r11],1 lea r11,[64+r11] vmovd xmm1,DWORD[((-60))+r8] vpunpckldq xmm0,xmm0,xmm2 vmovd xmm9,DWORD[((-60))+r9] vpshufb xmm0,xmm0,xmm5 vpinsrd xmm1,xmm1,DWORD[((-60))+r10],1 vpinsrd xmm9,xmm9,DWORD[((-60))+r11],1 vpaddd xmm14,xmm14,xmm15 vpslld xmm8,xmm10,5 vpandn xmm7,xmm11,xmm13 vpand xmm6,xmm11,xmm12 vmovdqa XMMWORD[(0-128)+rax],xmm0 vpaddd xmm14,xmm14,xmm0 vpunpckldq xmm1,xmm1,xmm9 vpsrld xmm9,xmm10,27 vpxor xmm6,xmm6,xmm7 vmovd xmm2,DWORD[((-56))+r8] vpslld xmm7,xmm11,30 vpor xmm8,xmm8,xmm9 vmovd xmm9,DWORD[((-56))+r9] vpaddd xmm14,xmm14,xmm6 vpsrld xmm11,xmm11,2 vpaddd xmm14,xmm14,xmm8 vpshufb xmm1,xmm1,xmm5 vpor xmm11,xmm11,xmm7 vpinsrd xmm2,xmm2,DWORD[((-56))+r10],1 vpinsrd xmm9,xmm9,DWORD[((-56))+r11],1 vpaddd xmm13,xmm13,xmm15 vpslld xmm8,xmm14,5 vpandn xmm7,xmm10,xmm12 vpand xmm6,xmm10,xmm11 vmovdqa XMMWORD[(16-128)+rax],xmm1 vpaddd xmm13,xmm13,xmm1 vpunpckldq xmm2,xmm2,xmm9 vpsrld xmm9,xmm14,27 vpxor xmm6,xmm6,xmm7 vmovd xmm3,DWORD[((-52))+r8] vpslld xmm7,xmm10,30 vpor xmm8,xmm8,xmm9 vmovd xmm9,DWORD[((-52))+r9] vpaddd xmm13,xmm13,xmm6 vpsrld xmm10,xmm10,2 vpaddd xmm13,xmm13,xmm8 vpshufb xmm2,xmm2,xmm5 vpor xmm10,xmm10,xmm7 vpinsrd xmm3,xmm3,DWORD[((-52))+r10],1 vpinsrd xmm9,xmm9,DWORD[((-52))+r11],1 vpaddd xmm12,xmm12,xmm15 vpslld xmm8,xmm13,5 vpandn xmm7,xmm14,xmm11 vpand xmm6,xmm14,xmm10 vmovdqa XMMWORD[(32-128)+rax],xmm2 vpaddd xmm12,xmm12,xmm2 vpunpckldq xmm3,xmm3,xmm9 vpsrld xmm9,xmm13,27 vpxor xmm6,xmm6,xmm7 vmovd xmm4,DWORD[((-48))+r8] vpslld xmm7,xmm14,30 vpor xmm8,xmm8,xmm9 vmovd xmm9,DWORD[((-48))+r9] vpaddd xmm12,xmm12,xmm6 vpsrld xmm14,xmm14,2 vpaddd xmm12,xmm12,xmm8 vpshufb xmm3,xmm3,xmm5 vpor xmm14,xmm14,xmm7 vpinsrd xmm4,xmm4,DWORD[((-48))+r10],1 vpinsrd xmm9,xmm9,DWORD[((-48))+r11],1 vpaddd xmm11,xmm11,xmm15 vpslld xmm8,xmm12,5 vpandn xmm7,xmm13,xmm10 vpand xmm6,xmm13,xmm14 vmovdqa XMMWORD[(48-128)+rax],xmm3 vpaddd xmm11,xmm11,xmm3 vpunpckldq xmm4,xmm4,xmm9 vpsrld xmm9,xmm12,27 vpxor xmm6,xmm6,xmm7 vmovd xmm0,DWORD[((-44))+r8] vpslld xmm7,xmm13,30 vpor xmm8,xmm8,xmm9 vmovd xmm9,DWORD[((-44))+r9] vpaddd xmm11,xmm11,xmm6 vpsrld xmm13,xmm13,2 vpaddd xmm11,xmm11,xmm8 vpshufb xmm4,xmm4,xmm5 vpor xmm13,xmm13,xmm7 vpinsrd xmm0,xmm0,DWORD[((-44))+r10],1 vpinsrd xmm9,xmm9,DWORD[((-44))+r11],1 vpaddd xmm10,xmm10,xmm15 vpslld xmm8,xmm11,5 vpandn xmm7,xmm12,xmm14 vpand xmm6,xmm12,xmm13 vmovdqa XMMWORD[(64-128)+rax],xmm4 vpaddd xmm10,xmm10,xmm4 vpunpckldq xmm0,xmm0,xmm9 vpsrld xmm9,xmm11,27 vpxor xmm6,xmm6,xmm7 vmovd xmm1,DWORD[((-40))+r8] vpslld xmm7,xmm12,30 vpor xmm8,xmm8,xmm9 vmovd xmm9,DWORD[((-40))+r9] vpaddd xmm10,xmm10,xmm6 vpsrld xmm12,xmm12,2 vpaddd xmm10,xmm10,xmm8 vpshufb xmm0,xmm0,xmm5 vpor xmm12,xmm12,xmm7 vpinsrd xmm1,xmm1,DWORD[((-40))+r10],1 vpinsrd xmm9,xmm9,DWORD[((-40))+r11],1 vpaddd xmm14,xmm14,xmm15 vpslld xmm8,xmm10,5 vpandn xmm7,xmm11,xmm13 vpand xmm6,xmm11,xmm12 vmovdqa XMMWORD[(80-128)+rax],xmm0 vpaddd xmm14,xmm14,xmm0 vpunpckldq xmm1,xmm1,xmm9 vpsrld xmm9,xmm10,27 vpxor xmm6,xmm6,xmm7 vmovd xmm2,DWORD[((-36))+r8] vpslld xmm7,xmm11,30 vpor xmm8,xmm8,xmm9 vmovd xmm9,DWORD[((-36))+r9] vpaddd xmm14,xmm14,xmm6 vpsrld xmm11,xmm11,2 vpaddd xmm14,xmm14,xmm8 vpshufb xmm1,xmm1,xmm5 vpor xmm11,xmm11,xmm7 vpinsrd xmm2,xmm2,DWORD[((-36))+r10],1 vpinsrd xmm9,xmm9,DWORD[((-36))+r11],1 vpaddd xmm13,xmm13,xmm15 vpslld xmm8,xmm14,5 vpandn xmm7,xmm10,xmm12 vpand xmm6,xmm10,xmm11 vmovdqa XMMWORD[(96-128)+rax],xmm1 vpaddd xmm13,xmm13,xmm1 vpunpckldq xmm2,xmm2,xmm9 vpsrld xmm9,xmm14,27 vpxor xmm6,xmm6,xmm7 vmovd xmm3,DWORD[((-32))+r8] vpslld xmm7,xmm10,30 vpor xmm8,xmm8,xmm9 vmovd xmm9,DWORD[((-32))+r9] vpaddd xmm13,xmm13,xmm6 vpsrld xmm10,xmm10,2 vpaddd xmm13,xmm13,xmm8 vpshufb xmm2,xmm2,xmm5 vpor xmm10,xmm10,xmm7 vpinsrd xmm3,xmm3,DWORD[((-32))+r10],1 vpinsrd xmm9,xmm9,DWORD[((-32))+r11],1 vpaddd xmm12,xmm12,xmm15 vpslld xmm8,xmm13,5 vpandn xmm7,xmm14,xmm11 vpand xmm6,xmm14,xmm10 vmovdqa XMMWORD[(112-128)+rax],xmm2 vpaddd xmm12,xmm12,xmm2 vpunpckldq xmm3,xmm3,xmm9 vpsrld xmm9,xmm13,27 vpxor xmm6,xmm6,xmm7 vmovd xmm4,DWORD[((-28))+r8] vpslld xmm7,xmm14,30 vpor xmm8,xmm8,xmm9 vmovd xmm9,DWORD[((-28))+r9] vpaddd xmm12,xmm12,xmm6 vpsrld xmm14,xmm14,2 vpaddd xmm12,xmm12,xmm8 vpshufb xmm3,xmm3,xmm5 vpor xmm14,xmm14,xmm7 vpinsrd xmm4,xmm4,DWORD[((-28))+r10],1 vpinsrd xmm9,xmm9,DWORD[((-28))+r11],1 vpaddd xmm11,xmm11,xmm15 vpslld xmm8,xmm12,5 vpandn xmm7,xmm13,xmm10 vpand xmm6,xmm13,xmm14 vmovdqa XMMWORD[(128-128)+rax],xmm3 vpaddd xmm11,xmm11,xmm3 vpunpckldq xmm4,xmm4,xmm9 vpsrld xmm9,xmm12,27 vpxor xmm6,xmm6,xmm7 vmovd xmm0,DWORD[((-24))+r8] vpslld xmm7,xmm13,30 vpor xmm8,xmm8,xmm9 vmovd xmm9,DWORD[((-24))+r9] vpaddd xmm11,xmm11,xmm6 vpsrld xmm13,xmm13,2 vpaddd xmm11,xmm11,xmm8 vpshufb xmm4,xmm4,xmm5 vpor xmm13,xmm13,xmm7 vpinsrd xmm0,xmm0,DWORD[((-24))+r10],1 vpinsrd xmm9,xmm9,DWORD[((-24))+r11],1 vpaddd xmm10,xmm10,xmm15 vpslld xmm8,xmm11,5 vpandn xmm7,xmm12,xmm14 vpand xmm6,xmm12,xmm13 vmovdqa XMMWORD[(144-128)+rax],xmm4 vpaddd xmm10,xmm10,xmm4 vpunpckldq xmm0,xmm0,xmm9 vpsrld xmm9,xmm11,27 vpxor xmm6,xmm6,xmm7 vmovd xmm1,DWORD[((-20))+r8] vpslld xmm7,xmm12,30 vpor xmm8,xmm8,xmm9 vmovd xmm9,DWORD[((-20))+r9] vpaddd xmm10,xmm10,xmm6 vpsrld xmm12,xmm12,2 vpaddd xmm10,xmm10,xmm8 vpshufb xmm0,xmm0,xmm5 vpor xmm12,xmm12,xmm7 vpinsrd xmm1,xmm1,DWORD[((-20))+r10],1 vpinsrd xmm9,xmm9,DWORD[((-20))+r11],1 vpaddd xmm14,xmm14,xmm15 vpslld xmm8,xmm10,5 vpandn xmm7,xmm11,xmm13 vpand xmm6,xmm11,xmm12 vmovdqa XMMWORD[(160-128)+rax],xmm0 vpaddd xmm14,xmm14,xmm0 vpunpckldq xmm1,xmm1,xmm9 vpsrld xmm9,xmm10,27 vpxor xmm6,xmm6,xmm7 vmovd xmm2,DWORD[((-16))+r8] vpslld xmm7,xmm11,30 vpor xmm8,xmm8,xmm9 vmovd xmm9,DWORD[((-16))+r9] vpaddd xmm14,xmm14,xmm6 vpsrld xmm11,xmm11,2 vpaddd xmm14,xmm14,xmm8 vpshufb xmm1,xmm1,xmm5 vpor xmm11,xmm11,xmm7 vpinsrd xmm2,xmm2,DWORD[((-16))+r10],1 vpinsrd xmm9,xmm9,DWORD[((-16))+r11],1 vpaddd xmm13,xmm13,xmm15 vpslld xmm8,xmm14,5 vpandn xmm7,xmm10,xmm12 vpand xmm6,xmm10,xmm11 vmovdqa XMMWORD[(176-128)+rax],xmm1 vpaddd xmm13,xmm13,xmm1 vpunpckldq xmm2,xmm2,xmm9 vpsrld xmm9,xmm14,27 vpxor xmm6,xmm6,xmm7 vmovd xmm3,DWORD[((-12))+r8] vpslld xmm7,xmm10,30 vpor xmm8,xmm8,xmm9 vmovd xmm9,DWORD[((-12))+r9] vpaddd xmm13,xmm13,xmm6 vpsrld xmm10,xmm10,2 vpaddd xmm13,xmm13,xmm8 vpshufb xmm2,xmm2,xmm5 vpor xmm10,xmm10,xmm7 vpinsrd xmm3,xmm3,DWORD[((-12))+r10],1 vpinsrd xmm9,xmm9,DWORD[((-12))+r11],1 vpaddd xmm12,xmm12,xmm15 vpslld xmm8,xmm13,5 vpandn xmm7,xmm14,xmm11 vpand xmm6,xmm14,xmm10 vmovdqa XMMWORD[(192-128)+rax],xmm2 vpaddd xmm12,xmm12,xmm2 vpunpckldq xmm3,xmm3,xmm9 vpsrld xmm9,xmm13,27 vpxor xmm6,xmm6,xmm7 vmovd xmm4,DWORD[((-8))+r8] vpslld xmm7,xmm14,30 vpor xmm8,xmm8,xmm9 vmovd xmm9,DWORD[((-8))+r9] vpaddd xmm12,xmm12,xmm6 vpsrld xmm14,xmm14,2 vpaddd xmm12,xmm12,xmm8 vpshufb xmm3,xmm3,xmm5 vpor xmm14,xmm14,xmm7 vpinsrd xmm4,xmm4,DWORD[((-8))+r10],1 vpinsrd xmm9,xmm9,DWORD[((-8))+r11],1 vpaddd xmm11,xmm11,xmm15 vpslld xmm8,xmm12,5 vpandn xmm7,xmm13,xmm10 vpand xmm6,xmm13,xmm14 vmovdqa XMMWORD[(208-128)+rax],xmm3 vpaddd xmm11,xmm11,xmm3 vpunpckldq xmm4,xmm4,xmm9 vpsrld xmm9,xmm12,27 vpxor xmm6,xmm6,xmm7 vmovd xmm0,DWORD[((-4))+r8] vpslld xmm7,xmm13,30 vpor xmm8,xmm8,xmm9 vmovd xmm9,DWORD[((-4))+r9] vpaddd xmm11,xmm11,xmm6 vpsrld xmm13,xmm13,2 vpaddd xmm11,xmm11,xmm8 vpshufb xmm4,xmm4,xmm5 vpor xmm13,xmm13,xmm7 vmovdqa xmm1,XMMWORD[((0-128))+rax] vpinsrd xmm0,xmm0,DWORD[((-4))+r10],1 vpinsrd xmm9,xmm9,DWORD[((-4))+r11],1 vpaddd xmm10,xmm10,xmm15 prefetcht0 [63+r8] vpslld xmm8,xmm11,5 vpandn xmm7,xmm12,xmm14 vpand xmm6,xmm12,xmm13 vmovdqa XMMWORD[(224-128)+rax],xmm4 vpaddd xmm10,xmm10,xmm4 vpunpckldq xmm0,xmm0,xmm9 vpsrld xmm9,xmm11,27 prefetcht0 [63+r9] vpxor xmm6,xmm6,xmm7 vpslld xmm7,xmm12,30 vpor xmm8,xmm8,xmm9 prefetcht0 [63+r10] vpaddd xmm10,xmm10,xmm6 vpsrld xmm12,xmm12,2 vpaddd xmm10,xmm10,xmm8 prefetcht0 [63+r11] vpshufb xmm0,xmm0,xmm5 vpor xmm12,xmm12,xmm7 vmovdqa xmm2,XMMWORD[((16-128))+rax] vpxor xmm1,xmm1,xmm3 vmovdqa xmm3,XMMWORD[((32-128))+rax] vpaddd xmm14,xmm14,xmm15 vpslld xmm8,xmm10,5 vpandn xmm7,xmm11,xmm13 vpand xmm6,xmm11,xmm12 vmovdqa XMMWORD[(240-128)+rax],xmm0 vpaddd xmm14,xmm14,xmm0 vpxor xmm1,xmm1,XMMWORD[((128-128))+rax] vpsrld xmm9,xmm10,27 vpxor xmm6,xmm6,xmm7 vpxor xmm1,xmm1,xmm3 vpslld xmm7,xmm11,30 vpor xmm8,xmm8,xmm9 vpaddd xmm14,xmm14,xmm6 vpsrld xmm5,xmm1,31 vpaddd xmm1,xmm1,xmm1 vpsrld xmm11,xmm11,2 vpaddd xmm14,xmm14,xmm8 vpor xmm1,xmm1,xmm5 vpor xmm11,xmm11,xmm7 vpxor xmm2,xmm2,xmm4 vmovdqa xmm4,XMMWORD[((48-128))+rax] vpaddd xmm13,xmm13,xmm15 vpslld xmm8,xmm14,5 vpandn xmm7,xmm10,xmm12 vpand xmm6,xmm10,xmm11 vmovdqa XMMWORD[(0-128)+rax],xmm1 vpaddd xmm13,xmm13,xmm1 vpxor xmm2,xmm2,XMMWORD[((144-128))+rax] vpsrld xmm9,xmm14,27 vpxor xmm6,xmm6,xmm7 vpxor xmm2,xmm2,xmm4 vpslld xmm7,xmm10,30 vpor xmm8,xmm8,xmm9 vpaddd xmm13,xmm13,xmm6 vpsrld xmm5,xmm2,31 vpaddd xmm2,xmm2,xmm2 vpsrld xmm10,xmm10,2 vpaddd xmm13,xmm13,xmm8 vpor xmm2,xmm2,xmm5 vpor xmm10,xmm10,xmm7 vpxor xmm3,xmm3,xmm0 vmovdqa xmm0,XMMWORD[((64-128))+rax] vpaddd xmm12,xmm12,xmm15 vpslld xmm8,xmm13,5 vpandn xmm7,xmm14,xmm11 vpand xmm6,xmm14,xmm10 vmovdqa XMMWORD[(16-128)+rax],xmm2 vpaddd xmm12,xmm12,xmm2 vpxor xmm3,xmm3,XMMWORD[((160-128))+rax] vpsrld xmm9,xmm13,27 vpxor xmm6,xmm6,xmm7 vpxor xmm3,xmm3,xmm0 vpslld xmm7,xmm14,30 vpor xmm8,xmm8,xmm9 vpaddd xmm12,xmm12,xmm6 vpsrld xmm5,xmm3,31 vpaddd xmm3,xmm3,xmm3 vpsrld xmm14,xmm14,2 vpaddd xmm12,xmm12,xmm8 vpor xmm3,xmm3,xmm5 vpor xmm14,xmm14,xmm7 vpxor xmm4,xmm4,xmm1 vmovdqa xmm1,XMMWORD[((80-128))+rax] vpaddd xmm11,xmm11,xmm15 vpslld xmm8,xmm12,5 vpandn xmm7,xmm13,xmm10 vpand xmm6,xmm13,xmm14 vmovdqa XMMWORD[(32-128)+rax],xmm3 vpaddd xmm11,xmm11,xmm3 vpxor xmm4,xmm4,XMMWORD[((176-128))+rax] vpsrld xmm9,xmm12,27 vpxor xmm6,xmm6,xmm7 vpxor xmm4,xmm4,xmm1 vpslld xmm7,xmm13,30 vpor xmm8,xmm8,xmm9 vpaddd xmm11,xmm11,xmm6 vpsrld xmm5,xmm4,31 vpaddd xmm4,xmm4,xmm4 vpsrld xmm13,xmm13,2 vpaddd xmm11,xmm11,xmm8 vpor xmm4,xmm4,xmm5 vpor xmm13,xmm13,xmm7 vpxor xmm0,xmm0,xmm2 vmovdqa xmm2,XMMWORD[((96-128))+rax] vpaddd xmm10,xmm10,xmm15 vpslld xmm8,xmm11,5 vpandn xmm7,xmm12,xmm14 vpand xmm6,xmm12,xmm13 vmovdqa XMMWORD[(48-128)+rax],xmm4 vpaddd xmm10,xmm10,xmm4 vpxor xmm0,xmm0,XMMWORD[((192-128))+rax] vpsrld xmm9,xmm11,27 vpxor xmm6,xmm6,xmm7 vpxor xmm0,xmm0,xmm2 vpslld xmm7,xmm12,30 vpor xmm8,xmm8,xmm9 vpaddd xmm10,xmm10,xmm6 vpsrld xmm5,xmm0,31 vpaddd xmm0,xmm0,xmm0 vpsrld xmm12,xmm12,2 vpaddd xmm10,xmm10,xmm8 vpor xmm0,xmm0,xmm5 vpor xmm12,xmm12,xmm7 vmovdqa xmm15,XMMWORD[rbp] vpxor xmm1,xmm1,xmm3 vmovdqa xmm3,XMMWORD[((112-128))+rax] vpslld xmm8,xmm10,5 vpaddd xmm14,xmm14,xmm15 vpxor xmm6,xmm13,xmm11 vmovdqa XMMWORD[(64-128)+rax],xmm0 vpaddd xmm14,xmm14,xmm0 vpxor xmm1,xmm1,XMMWORD[((208-128))+rax] vpsrld xmm9,xmm10,27 vpxor xmm6,xmm6,xmm12 vpxor xmm1,xmm1,xmm3 vpslld xmm7,xmm11,30 vpor xmm8,xmm8,xmm9 vpaddd xmm14,xmm14,xmm6 vpsrld xmm5,xmm1,31 vpaddd xmm1,xmm1,xmm1 vpsrld xmm11,xmm11,2 vpaddd xmm14,xmm14,xmm8 vpor xmm1,xmm1,xmm5 vpor xmm11,xmm11,xmm7 vpxor xmm2,xmm2,xmm4 vmovdqa xmm4,XMMWORD[((128-128))+rax] vpslld xmm8,xmm14,5 vpaddd xmm13,xmm13,xmm15 vpxor xmm6,xmm12,xmm10 vmovdqa XMMWORD[(80-128)+rax],xmm1 vpaddd xmm13,xmm13,xmm1 vpxor xmm2,xmm2,XMMWORD[((224-128))+rax] vpsrld xmm9,xmm14,27 vpxor xmm6,xmm6,xmm11 vpxor xmm2,xmm2,xmm4 vpslld xmm7,xmm10,30 vpor xmm8,xmm8,xmm9 vpaddd xmm13,xmm13,xmm6 vpsrld xmm5,xmm2,31 vpaddd xmm2,xmm2,xmm2 vpsrld xmm10,xmm10,2 vpaddd xmm13,xmm13,xmm8 vpor xmm2,xmm2,xmm5 vpor xmm10,xmm10,xmm7 vpxor xmm3,xmm3,xmm0 vmovdqa xmm0,XMMWORD[((144-128))+rax] vpslld xmm8,xmm13,5 vpaddd xmm12,xmm12,xmm15 vpxor xmm6,xmm11,xmm14 vmovdqa XMMWORD[(96-128)+rax],xmm2 vpaddd xmm12,xmm12,xmm2 vpxor xmm3,xmm3,XMMWORD[((240-128))+rax] vpsrld xmm9,xmm13,27 vpxor xmm6,xmm6,xmm10 vpxor xmm3,xmm3,xmm0 vpslld xmm7,xmm14,30 vpor xmm8,xmm8,xmm9 vpaddd xmm12,xmm12,xmm6 vpsrld xmm5,xmm3,31 vpaddd xmm3,xmm3,xmm3 vpsrld xmm14,xmm14,2 vpaddd xmm12,xmm12,xmm8 vpor xmm3,xmm3,xmm5 vpor xmm14,xmm14,xmm7 vpxor xmm4,xmm4,xmm1 vmovdqa xmm1,XMMWORD[((160-128))+rax] vpslld xmm8,xmm12,5 vpaddd xmm11,xmm11,xmm15 vpxor xmm6,xmm10,xmm13 vmovdqa XMMWORD[(112-128)+rax],xmm3 vpaddd xmm11,xmm11,xmm3 vpxor xmm4,xmm4,XMMWORD[((0-128))+rax] vpsrld xmm9,xmm12,27 vpxor xmm6,xmm6,xmm14 vpxor xmm4,xmm4,xmm1 vpslld xmm7,xmm13,30 vpor xmm8,xmm8,xmm9 vpaddd xmm11,xmm11,xmm6 vpsrld xmm5,xmm4,31 vpaddd xmm4,xmm4,xmm4 vpsrld xmm13,xmm13,2 vpaddd xmm11,xmm11,xmm8 vpor xmm4,xmm4,xmm5 vpor xmm13,xmm13,xmm7 vpxor xmm0,xmm0,xmm2 vmovdqa xmm2,XMMWORD[((176-128))+rax] vpslld xmm8,xmm11,5 vpaddd xmm10,xmm10,xmm15 vpxor xmm6,xmm14,xmm12 vmovdqa XMMWORD[(128-128)+rax],xmm4 vpaddd xmm10,xmm10,xmm4 vpxor xmm0,xmm0,XMMWORD[((16-128))+rax] vpsrld xmm9,xmm11,27 vpxor xmm6,xmm6,xmm13 vpxor xmm0,xmm0,xmm2 vpslld xmm7,xmm12,30 vpor xmm8,xmm8,xmm9 vpaddd xmm10,xmm10,xmm6 vpsrld xmm5,xmm0,31 vpaddd xmm0,xmm0,xmm0 vpsrld xmm12,xmm12,2 vpaddd xmm10,xmm10,xmm8 vpor xmm0,xmm0,xmm5 vpor xmm12,xmm12,xmm7 vpxor xmm1,xmm1,xmm3 vmovdqa xmm3,XMMWORD[((192-128))+rax] vpslld xmm8,xmm10,5 vpaddd xmm14,xmm14,xmm15 vpxor xmm6,xmm13,xmm11 vmovdqa XMMWORD[(144-128)+rax],xmm0 vpaddd xmm14,xmm14,xmm0 vpxor xmm1,xmm1,XMMWORD[((32-128))+rax] vpsrld xmm9,xmm10,27 vpxor xmm6,xmm6,xmm12 vpxor xmm1,xmm1,xmm3 vpslld xmm7,xmm11,30 vpor xmm8,xmm8,xmm9 vpaddd xmm14,xmm14,xmm6 vpsrld xmm5,xmm1,31 vpaddd xmm1,xmm1,xmm1 vpsrld xmm11,xmm11,2 vpaddd xmm14,xmm14,xmm8 vpor xmm1,xmm1,xmm5 vpor xmm11,xmm11,xmm7 vpxor xmm2,xmm2,xmm4 vmovdqa xmm4,XMMWORD[((208-128))+rax] vpslld xmm8,xmm14,5 vpaddd xmm13,xmm13,xmm15 vpxor xmm6,xmm12,xmm10 vmovdqa XMMWORD[(160-128)+rax],xmm1 vpaddd xmm13,xmm13,xmm1 vpxor xmm2,xmm2,XMMWORD[((48-128))+rax] vpsrld xmm9,xmm14,27 vpxor xmm6,xmm6,xmm11 vpxor xmm2,xmm2,xmm4 vpslld xmm7,xmm10,30 vpor xmm8,xmm8,xmm9 vpaddd xmm13,xmm13,xmm6 vpsrld xmm5,xmm2,31 vpaddd xmm2,xmm2,xmm2 vpsrld xmm10,xmm10,2 vpaddd xmm13,xmm13,xmm8 vpor xmm2,xmm2,xmm5 vpor xmm10,xmm10,xmm7 vpxor xmm3,xmm3,xmm0 vmovdqa xmm0,XMMWORD[((224-128))+rax] vpslld xmm8,xmm13,5 vpaddd xmm12,xmm12,xmm15 vpxor xmm6,xmm11,xmm14 vmovdqa XMMWORD[(176-128)+rax],xmm2 vpaddd xmm12,xmm12,xmm2 vpxor xmm3,xmm3,XMMWORD[((64-128))+rax] vpsrld xmm9,xmm13,27 vpxor xmm6,xmm6,xmm10 vpxor xmm3,xmm3,xmm0 vpslld xmm7,xmm14,30 vpor xmm8,xmm8,xmm9 vpaddd xmm12,xmm12,xmm6 vpsrld xmm5,xmm3,31 vpaddd xmm3,xmm3,xmm3 vpsrld xmm14,xmm14,2 vpaddd xmm12,xmm12,xmm8 vpor xmm3,xmm3,xmm5 vpor xmm14,xmm14,xmm7 vpxor xmm4,xmm4,xmm1 vmovdqa xmm1,XMMWORD[((240-128))+rax] vpslld xmm8,xmm12,5 vpaddd xmm11,xmm11,xmm15 vpxor xmm6,xmm10,xmm13 vmovdqa XMMWORD[(192-128)+rax],xmm3 vpaddd xmm11,xmm11,xmm3 vpxor xmm4,xmm4,XMMWORD[((80-128))+rax] vpsrld xmm9,xmm12,27 vpxor xmm6,xmm6,xmm14 vpxor xmm4,xmm4,xmm1 vpslld xmm7,xmm13,30 vpor xmm8,xmm8,xmm9 vpaddd xmm11,xmm11,xmm6 vpsrld xmm5,xmm4,31 vpaddd xmm4,xmm4,xmm4 vpsrld xmm13,xmm13,2 vpaddd xmm11,xmm11,xmm8 vpor xmm4,xmm4,xmm5 vpor xmm13,xmm13,xmm7 vpxor xmm0,xmm0,xmm2 vmovdqa xmm2,XMMWORD[((0-128))+rax] vpslld xmm8,xmm11,5 vpaddd xmm10,xmm10,xmm15 vpxor xmm6,xmm14,xmm12 vmovdqa XMMWORD[(208-128)+rax],xmm4 vpaddd xmm10,xmm10,xmm4 vpxor xmm0,xmm0,XMMWORD[((96-128))+rax] vpsrld xmm9,xmm11,27 vpxor xmm6,xmm6,xmm13 vpxor xmm0,xmm0,xmm2 vpslld xmm7,xmm12,30 vpor xmm8,xmm8,xmm9 vpaddd xmm10,xmm10,xmm6 vpsrld xmm5,xmm0,31 vpaddd xmm0,xmm0,xmm0 vpsrld xmm12,xmm12,2 vpaddd xmm10,xmm10,xmm8 vpor xmm0,xmm0,xmm5 vpor xmm12,xmm12,xmm7 vpxor xmm1,xmm1,xmm3 vmovdqa xmm3,XMMWORD[((16-128))+rax] vpslld xmm8,xmm10,5 vpaddd xmm14,xmm14,xmm15 vpxor xmm6,xmm13,xmm11 vmovdqa XMMWORD[(224-128)+rax],xmm0 vpaddd xmm14,xmm14,xmm0 vpxor xmm1,xmm1,XMMWORD[((112-128))+rax] vpsrld xmm9,xmm10,27 vpxor xmm6,xmm6,xmm12 vpxor xmm1,xmm1,xmm3 vpslld xmm7,xmm11,30 vpor xmm8,xmm8,xmm9 vpaddd xmm14,xmm14,xmm6 vpsrld xmm5,xmm1,31 vpaddd xmm1,xmm1,xmm1 vpsrld xmm11,xmm11,2 vpaddd xmm14,xmm14,xmm8 vpor xmm1,xmm1,xmm5 vpor xmm11,xmm11,xmm7 vpxor xmm2,xmm2,xmm4 vmovdqa xmm4,XMMWORD[((32-128))+rax] vpslld xmm8,xmm14,5 vpaddd xmm13,xmm13,xmm15 vpxor xmm6,xmm12,xmm10 vmovdqa XMMWORD[(240-128)+rax],xmm1 vpaddd xmm13,xmm13,xmm1 vpxor xmm2,xmm2,XMMWORD[((128-128))+rax] vpsrld xmm9,xmm14,27 vpxor xmm6,xmm6,xmm11 vpxor xmm2,xmm2,xmm4 vpslld xmm7,xmm10,30 vpor xmm8,xmm8,xmm9 vpaddd xmm13,xmm13,xmm6 vpsrld xmm5,xmm2,31 vpaddd xmm2,xmm2,xmm2 vpsrld xmm10,xmm10,2 vpaddd xmm13,xmm13,xmm8 vpor xmm2,xmm2,xmm5 vpor xmm10,xmm10,xmm7 vpxor xmm3,xmm3,xmm0 vmovdqa xmm0,XMMWORD[((48-128))+rax] vpslld xmm8,xmm13,5 vpaddd xmm12,xmm12,xmm15 vpxor xmm6,xmm11,xmm14 vmovdqa XMMWORD[(0-128)+rax],xmm2 vpaddd xmm12,xmm12,xmm2 vpxor xmm3,xmm3,XMMWORD[((144-128))+rax] vpsrld xmm9,xmm13,27 vpxor xmm6,xmm6,xmm10 vpxor xmm3,xmm3,xmm0 vpslld xmm7,xmm14,30 vpor xmm8,xmm8,xmm9 vpaddd xmm12,xmm12,xmm6 vpsrld xmm5,xmm3,31 vpaddd xmm3,xmm3,xmm3 vpsrld xmm14,xmm14,2 vpaddd xmm12,xmm12,xmm8 vpor xmm3,xmm3,xmm5 vpor xmm14,xmm14,xmm7 vpxor xmm4,xmm4,xmm1 vmovdqa xmm1,XMMWORD[((64-128))+rax] vpslld xmm8,xmm12,5 vpaddd xmm11,xmm11,xmm15 vpxor xmm6,xmm10,xmm13 vmovdqa XMMWORD[(16-128)+rax],xmm3 vpaddd xmm11,xmm11,xmm3 vpxor xmm4,xmm4,XMMWORD[((160-128))+rax] vpsrld xmm9,xmm12,27 vpxor xmm6,xmm6,xmm14 vpxor xmm4,xmm4,xmm1 vpslld xmm7,xmm13,30 vpor xmm8,xmm8,xmm9 vpaddd xmm11,xmm11,xmm6 vpsrld xmm5,xmm4,31 vpaddd xmm4,xmm4,xmm4 vpsrld xmm13,xmm13,2 vpaddd xmm11,xmm11,xmm8 vpor xmm4,xmm4,xmm5 vpor xmm13,xmm13,xmm7 vpxor xmm0,xmm0,xmm2 vmovdqa xmm2,XMMWORD[((80-128))+rax] vpslld xmm8,xmm11,5 vpaddd xmm10,xmm10,xmm15 vpxor xmm6,xmm14,xmm12 vmovdqa XMMWORD[(32-128)+rax],xmm4 vpaddd xmm10,xmm10,xmm4 vpxor xmm0,xmm0,XMMWORD[((176-128))+rax] vpsrld xmm9,xmm11,27 vpxor xmm6,xmm6,xmm13 vpxor xmm0,xmm0,xmm2 vpslld xmm7,xmm12,30 vpor xmm8,xmm8,xmm9 vpaddd xmm10,xmm10,xmm6 vpsrld xmm5,xmm0,31 vpaddd xmm0,xmm0,xmm0 vpsrld xmm12,xmm12,2 vpaddd xmm10,xmm10,xmm8 vpor xmm0,xmm0,xmm5 vpor xmm12,xmm12,xmm7 vpxor xmm1,xmm1,xmm3 vmovdqa xmm3,XMMWORD[((96-128))+rax] vpslld xmm8,xmm10,5 vpaddd xmm14,xmm14,xmm15 vpxor xmm6,xmm13,xmm11 vmovdqa XMMWORD[(48-128)+rax],xmm0 vpaddd xmm14,xmm14,xmm0 vpxor xmm1,xmm1,XMMWORD[((192-128))+rax] vpsrld xmm9,xmm10,27 vpxor xmm6,xmm6,xmm12 vpxor xmm1,xmm1,xmm3 vpslld xmm7,xmm11,30 vpor xmm8,xmm8,xmm9 vpaddd xmm14,xmm14,xmm6 vpsrld xmm5,xmm1,31 vpaddd xmm1,xmm1,xmm1 vpsrld xmm11,xmm11,2 vpaddd xmm14,xmm14,xmm8 vpor xmm1,xmm1,xmm5 vpor xmm11,xmm11,xmm7 vpxor xmm2,xmm2,xmm4 vmovdqa xmm4,XMMWORD[((112-128))+rax] vpslld xmm8,xmm14,5 vpaddd xmm13,xmm13,xmm15 vpxor xmm6,xmm12,xmm10 vmovdqa XMMWORD[(64-128)+rax],xmm1 vpaddd xmm13,xmm13,xmm1 vpxor xmm2,xmm2,XMMWORD[((208-128))+rax] vpsrld xmm9,xmm14,27 vpxor xmm6,xmm6,xmm11 vpxor xmm2,xmm2,xmm4 vpslld xmm7,xmm10,30 vpor xmm8,xmm8,xmm9 vpaddd xmm13,xmm13,xmm6 vpsrld xmm5,xmm2,31 vpaddd xmm2,xmm2,xmm2 vpsrld xmm10,xmm10,2 vpaddd xmm13,xmm13,xmm8 vpor xmm2,xmm2,xmm5 vpor xmm10,xmm10,xmm7 vpxor xmm3,xmm3,xmm0 vmovdqa xmm0,XMMWORD[((128-128))+rax] vpslld xmm8,xmm13,5 vpaddd xmm12,xmm12,xmm15 vpxor xmm6,xmm11,xmm14 vmovdqa XMMWORD[(80-128)+rax],xmm2 vpaddd xmm12,xmm12,xmm2 vpxor xmm3,xmm3,XMMWORD[((224-128))+rax] vpsrld xmm9,xmm13,27 vpxor xmm6,xmm6,xmm10 vpxor xmm3,xmm3,xmm0 vpslld xmm7,xmm14,30 vpor xmm8,xmm8,xmm9 vpaddd xmm12,xmm12,xmm6 vpsrld xmm5,xmm3,31 vpaddd xmm3,xmm3,xmm3 vpsrld xmm14,xmm14,2 vpaddd xmm12,xmm12,xmm8 vpor xmm3,xmm3,xmm5 vpor xmm14,xmm14,xmm7 vpxor xmm4,xmm4,xmm1 vmovdqa xmm1,XMMWORD[((144-128))+rax] vpslld xmm8,xmm12,5 vpaddd xmm11,xmm11,xmm15 vpxor xmm6,xmm10,xmm13 vmovdqa XMMWORD[(96-128)+rax],xmm3 vpaddd xmm11,xmm11,xmm3 vpxor xmm4,xmm4,XMMWORD[((240-128))+rax] vpsrld xmm9,xmm12,27 vpxor xmm6,xmm6,xmm14 vpxor xmm4,xmm4,xmm1 vpslld xmm7,xmm13,30 vpor xmm8,xmm8,xmm9 vpaddd xmm11,xmm11,xmm6 vpsrld xmm5,xmm4,31 vpaddd xmm4,xmm4,xmm4 vpsrld xmm13,xmm13,2 vpaddd xmm11,xmm11,xmm8 vpor xmm4,xmm4,xmm5 vpor xmm13,xmm13,xmm7 vpxor xmm0,xmm0,xmm2 vmovdqa xmm2,XMMWORD[((160-128))+rax] vpslld xmm8,xmm11,5 vpaddd xmm10,xmm10,xmm15 vpxor xmm6,xmm14,xmm12 vmovdqa XMMWORD[(112-128)+rax],xmm4 vpaddd xmm10,xmm10,xmm4 vpxor xmm0,xmm0,XMMWORD[((0-128))+rax] vpsrld xmm9,xmm11,27 vpxor xmm6,xmm6,xmm13 vpxor xmm0,xmm0,xmm2 vpslld xmm7,xmm12,30 vpor xmm8,xmm8,xmm9 vpaddd xmm10,xmm10,xmm6 vpsrld xmm5,xmm0,31 vpaddd xmm0,xmm0,xmm0 vpsrld xmm12,xmm12,2 vpaddd xmm10,xmm10,xmm8 vpor xmm0,xmm0,xmm5 vpor xmm12,xmm12,xmm7 vmovdqa xmm15,XMMWORD[32+rbp] vpxor xmm1,xmm1,xmm3 vmovdqa xmm3,XMMWORD[((176-128))+rax] vpaddd xmm14,xmm14,xmm15 vpslld xmm8,xmm10,5 vpand xmm7,xmm13,xmm12 vpxor xmm1,xmm1,XMMWORD[((16-128))+rax] vpaddd xmm14,xmm14,xmm7 vpsrld xmm9,xmm10,27 vpxor xmm6,xmm13,xmm12 vpxor xmm1,xmm1,xmm3 vmovdqu XMMWORD[(128-128)+rax],xmm0 vpaddd xmm14,xmm14,xmm0 vpor xmm8,xmm8,xmm9 vpsrld xmm5,xmm1,31 vpand xmm6,xmm6,xmm11 vpaddd xmm1,xmm1,xmm1 vpslld xmm7,xmm11,30 vpaddd xmm14,xmm14,xmm6 vpsrld xmm11,xmm11,2 vpaddd xmm14,xmm14,xmm8 vpor xmm1,xmm1,xmm5 vpor xmm11,xmm11,xmm7 vpxor xmm2,xmm2,xmm4 vmovdqa xmm4,XMMWORD[((192-128))+rax] vpaddd xmm13,xmm13,xmm15 vpslld xmm8,xmm14,5 vpand xmm7,xmm12,xmm11 vpxor xmm2,xmm2,XMMWORD[((32-128))+rax] vpaddd xmm13,xmm13,xmm7 vpsrld xmm9,xmm14,27 vpxor xmm6,xmm12,xmm11 vpxor xmm2,xmm2,xmm4 vmovdqu XMMWORD[(144-128)+rax],xmm1 vpaddd xmm13,xmm13,xmm1 vpor xmm8,xmm8,xmm9 vpsrld xmm5,xmm2,31 vpand xmm6,xmm6,xmm10 vpaddd xmm2,xmm2,xmm2 vpslld xmm7,xmm10,30 vpaddd xmm13,xmm13,xmm6 vpsrld xmm10,xmm10,2 vpaddd xmm13,xmm13,xmm8 vpor xmm2,xmm2,xmm5 vpor xmm10,xmm10,xmm7 vpxor xmm3,xmm3,xmm0 vmovdqa xmm0,XMMWORD[((208-128))+rax] vpaddd xmm12,xmm12,xmm15 vpslld xmm8,xmm13,5 vpand xmm7,xmm11,xmm10 vpxor xmm3,xmm3,XMMWORD[((48-128))+rax] vpaddd xmm12,xmm12,xmm7 vpsrld xmm9,xmm13,27 vpxor xmm6,xmm11,xmm10 vpxor xmm3,xmm3,xmm0 vmovdqu XMMWORD[(160-128)+rax],xmm2 vpaddd xmm12,xmm12,xmm2 vpor xmm8,xmm8,xmm9 vpsrld xmm5,xmm3,31 vpand xmm6,xmm6,xmm14 vpaddd xmm3,xmm3,xmm3 vpslld xmm7,xmm14,30 vpaddd xmm12,xmm12,xmm6 vpsrld xmm14,xmm14,2 vpaddd xmm12,xmm12,xmm8 vpor xmm3,xmm3,xmm5 vpor xmm14,xmm14,xmm7 vpxor xmm4,xmm4,xmm1 vmovdqa xmm1,XMMWORD[((224-128))+rax] vpaddd xmm11,xmm11,xmm15 vpslld xmm8,xmm12,5 vpand xmm7,xmm10,xmm14 vpxor xmm4,xmm4,XMMWORD[((64-128))+rax] vpaddd xmm11,xmm11,xmm7 vpsrld xmm9,xmm12,27 vpxor xmm6,xmm10,xmm14 vpxor xmm4,xmm4,xmm1 vmovdqu XMMWORD[(176-128)+rax],xmm3 vpaddd xmm11,xmm11,xmm3 vpor xmm8,xmm8,xmm9 vpsrld xmm5,xmm4,31 vpand xmm6,xmm6,xmm13 vpaddd xmm4,xmm4,xmm4 vpslld xmm7,xmm13,30 vpaddd xmm11,xmm11,xmm6 vpsrld xmm13,xmm13,2 vpaddd xmm11,xmm11,xmm8 vpor xmm4,xmm4,xmm5 vpor xmm13,xmm13,xmm7 vpxor xmm0,xmm0,xmm2 vmovdqa xmm2,XMMWORD[((240-128))+rax] vpaddd xmm10,xmm10,xmm15 vpslld xmm8,xmm11,5 vpand xmm7,xmm14,xmm13 vpxor xmm0,xmm0,XMMWORD[((80-128))+rax] vpaddd xmm10,xmm10,xmm7 vpsrld xmm9,xmm11,27 vpxor xmm6,xmm14,xmm13 vpxor xmm0,xmm0,xmm2 vmovdqu XMMWORD[(192-128)+rax],xmm4 vpaddd xmm10,xmm10,xmm4 vpor xmm8,xmm8,xmm9 vpsrld xmm5,xmm0,31 vpand xmm6,xmm6,xmm12 vpaddd xmm0,xmm0,xmm0 vpslld xmm7,xmm12,30 vpaddd xmm10,xmm10,xmm6 vpsrld xmm12,xmm12,2 vpaddd xmm10,xmm10,xmm8 vpor xmm0,xmm0,xmm5 vpor xmm12,xmm12,xmm7 vpxor xmm1,xmm1,xmm3 vmovdqa xmm3,XMMWORD[((0-128))+rax] vpaddd xmm14,xmm14,xmm15 vpslld xmm8,xmm10,5 vpand xmm7,xmm13,xmm12 vpxor xmm1,xmm1,XMMWORD[((96-128))+rax] vpaddd xmm14,xmm14,xmm7 vpsrld xmm9,xmm10,27 vpxor xmm6,xmm13,xmm12 vpxor xmm1,xmm1,xmm3 vmovdqu XMMWORD[(208-128)+rax],xmm0 vpaddd xmm14,xmm14,xmm0 vpor xmm8,xmm8,xmm9 vpsrld xmm5,xmm1,31 vpand xmm6,xmm6,xmm11 vpaddd xmm1,xmm1,xmm1 vpslld xmm7,xmm11,30 vpaddd xmm14,xmm14,xmm6 vpsrld xmm11,xmm11,2 vpaddd xmm14,xmm14,xmm8 vpor xmm1,xmm1,xmm5 vpor xmm11,xmm11,xmm7 vpxor xmm2,xmm2,xmm4 vmovdqa xmm4,XMMWORD[((16-128))+rax] vpaddd xmm13,xmm13,xmm15 vpslld xmm8,xmm14,5 vpand xmm7,xmm12,xmm11 vpxor xmm2,xmm2,XMMWORD[((112-128))+rax] vpaddd xmm13,xmm13,xmm7 vpsrld xmm9,xmm14,27 vpxor xmm6,xmm12,xmm11 vpxor xmm2,xmm2,xmm4 vmovdqu XMMWORD[(224-128)+rax],xmm1 vpaddd xmm13,xmm13,xmm1 vpor xmm8,xmm8,xmm9 vpsrld xmm5,xmm2,31 vpand xmm6,xmm6,xmm10 vpaddd xmm2,xmm2,xmm2 vpslld xmm7,xmm10,30 vpaddd xmm13,xmm13,xmm6 vpsrld xmm10,xmm10,2 vpaddd xmm13,xmm13,xmm8 vpor xmm2,xmm2,xmm5 vpor xmm10,xmm10,xmm7 vpxor xmm3,xmm3,xmm0 vmovdqa xmm0,XMMWORD[((32-128))+rax] vpaddd xmm12,xmm12,xmm15 vpslld xmm8,xmm13,5 vpand xmm7,xmm11,xmm10 vpxor xmm3,xmm3,XMMWORD[((128-128))+rax] vpaddd xmm12,xmm12,xmm7 vpsrld xmm9,xmm13,27 vpxor xmm6,xmm11,xmm10 vpxor xmm3,xmm3,xmm0 vmovdqu XMMWORD[(240-128)+rax],xmm2 vpaddd xmm12,xmm12,xmm2 vpor xmm8,xmm8,xmm9 vpsrld xmm5,xmm3,31 vpand xmm6,xmm6,xmm14 vpaddd xmm3,xmm3,xmm3 vpslld xmm7,xmm14,30 vpaddd xmm12,xmm12,xmm6 vpsrld xmm14,xmm14,2 vpaddd xmm12,xmm12,xmm8 vpor xmm3,xmm3,xmm5 vpor xmm14,xmm14,xmm7 vpxor xmm4,xmm4,xmm1 vmovdqa xmm1,XMMWORD[((48-128))+rax] vpaddd xmm11,xmm11,xmm15 vpslld xmm8,xmm12,5 vpand xmm7,xmm10,xmm14 vpxor xmm4,xmm4,XMMWORD[((144-128))+rax] vpaddd xmm11,xmm11,xmm7 vpsrld xmm9,xmm12,27 vpxor xmm6,xmm10,xmm14 vpxor xmm4,xmm4,xmm1 vmovdqu XMMWORD[(0-128)+rax],xmm3 vpaddd xmm11,xmm11,xmm3 vpor xmm8,xmm8,xmm9 vpsrld xmm5,xmm4,31 vpand xmm6,xmm6,xmm13 vpaddd xmm4,xmm4,xmm4 vpslld xmm7,xmm13,30 vpaddd xmm11,xmm11,xmm6 vpsrld xmm13,xmm13,2 vpaddd xmm11,xmm11,xmm8 vpor xmm4,xmm4,xmm5 vpor xmm13,xmm13,xmm7 vpxor xmm0,xmm0,xmm2 vmovdqa xmm2,XMMWORD[((64-128))+rax] vpaddd xmm10,xmm10,xmm15 vpslld xmm8,xmm11,5 vpand xmm7,xmm14,xmm13 vpxor xmm0,xmm0,XMMWORD[((160-128))+rax] vpaddd xmm10,xmm10,xmm7 vpsrld xmm9,xmm11,27 vpxor xmm6,xmm14,xmm13 vpxor xmm0,xmm0,xmm2 vmovdqu XMMWORD[(16-128)+rax],xmm4 vpaddd xmm10,xmm10,xmm4 vpor xmm8,xmm8,xmm9 vpsrld xmm5,xmm0,31 vpand xmm6,xmm6,xmm12 vpaddd xmm0,xmm0,xmm0 vpslld xmm7,xmm12,30 vpaddd xmm10,xmm10,xmm6 vpsrld xmm12,xmm12,2 vpaddd xmm10,xmm10,xmm8 vpor xmm0,xmm0,xmm5 vpor xmm12,xmm12,xmm7 vpxor xmm1,xmm1,xmm3 vmovdqa xmm3,XMMWORD[((80-128))+rax] vpaddd xmm14,xmm14,xmm15 vpslld xmm8,xmm10,5 vpand xmm7,xmm13,xmm12 vpxor xmm1,xmm1,XMMWORD[((176-128))+rax] vpaddd xmm14,xmm14,xmm7 vpsrld xmm9,xmm10,27 vpxor xmm6,xmm13,xmm12 vpxor xmm1,xmm1,xmm3 vmovdqu XMMWORD[(32-128)+rax],xmm0 vpaddd xmm14,xmm14,xmm0 vpor xmm8,xmm8,xmm9 vpsrld xmm5,xmm1,31 vpand xmm6,xmm6,xmm11 vpaddd xmm1,xmm1,xmm1 vpslld xmm7,xmm11,30 vpaddd xmm14,xmm14,xmm6 vpsrld xmm11,xmm11,2 vpaddd xmm14,xmm14,xmm8 vpor xmm1,xmm1,xmm5 vpor xmm11,xmm11,xmm7 vpxor xmm2,xmm2,xmm4 vmovdqa xmm4,XMMWORD[((96-128))+rax] vpaddd xmm13,xmm13,xmm15 vpslld xmm8,xmm14,5 vpand xmm7,xmm12,xmm11 vpxor xmm2,xmm2,XMMWORD[((192-128))+rax] vpaddd xmm13,xmm13,xmm7 vpsrld xmm9,xmm14,27 vpxor xmm6,xmm12,xmm11 vpxor xmm2,xmm2,xmm4 vmovdqu XMMWORD[(48-128)+rax],xmm1 vpaddd xmm13,xmm13,xmm1 vpor xmm8,xmm8,xmm9 vpsrld xmm5,xmm2,31 vpand xmm6,xmm6,xmm10 vpaddd xmm2,xmm2,xmm2 vpslld xmm7,xmm10,30 vpaddd xmm13,xmm13,xmm6 vpsrld xmm10,xmm10,2 vpaddd xmm13,xmm13,xmm8 vpor xmm2,xmm2,xmm5 vpor xmm10,xmm10,xmm7 vpxor xmm3,xmm3,xmm0 vmovdqa xmm0,XMMWORD[((112-128))+rax] vpaddd xmm12,xmm12,xmm15 vpslld xmm8,xmm13,5 vpand xmm7,xmm11,xmm10 vpxor xmm3,xmm3,XMMWORD[((208-128))+rax] vpaddd xmm12,xmm12,xmm7 vpsrld xmm9,xmm13,27 vpxor xmm6,xmm11,xmm10 vpxor xmm3,xmm3,xmm0 vmovdqu XMMWORD[(64-128)+rax],xmm2 vpaddd xmm12,xmm12,xmm2 vpor xmm8,xmm8,xmm9 vpsrld xmm5,xmm3,31 vpand xmm6,xmm6,xmm14 vpaddd xmm3,xmm3,xmm3 vpslld xmm7,xmm14,30 vpaddd xmm12,xmm12,xmm6 vpsrld xmm14,xmm14,2 vpaddd xmm12,xmm12,xmm8 vpor xmm3,xmm3,xmm5 vpor xmm14,xmm14,xmm7 vpxor xmm4,xmm4,xmm1 vmovdqa xmm1,XMMWORD[((128-128))+rax] vpaddd xmm11,xmm11,xmm15 vpslld xmm8,xmm12,5 vpand xmm7,xmm10,xmm14 vpxor xmm4,xmm4,XMMWORD[((224-128))+rax] vpaddd xmm11,xmm11,xmm7 vpsrld xmm9,xmm12,27 vpxor xmm6,xmm10,xmm14 vpxor xmm4,xmm4,xmm1 vmovdqu XMMWORD[(80-128)+rax],xmm3 vpaddd xmm11,xmm11,xmm3 vpor xmm8,xmm8,xmm9 vpsrld xmm5,xmm4,31 vpand xmm6,xmm6,xmm13 vpaddd xmm4,xmm4,xmm4 vpslld xmm7,xmm13,30 vpaddd xmm11,xmm11,xmm6 vpsrld xmm13,xmm13,2 vpaddd xmm11,xmm11,xmm8 vpor xmm4,xmm4,xmm5 vpor xmm13,xmm13,xmm7 vpxor xmm0,xmm0,xmm2 vmovdqa xmm2,XMMWORD[((144-128))+rax] vpaddd xmm10,xmm10,xmm15 vpslld xmm8,xmm11,5 vpand xmm7,xmm14,xmm13 vpxor xmm0,xmm0,XMMWORD[((240-128))+rax] vpaddd xmm10,xmm10,xmm7 vpsrld xmm9,xmm11,27 vpxor xmm6,xmm14,xmm13 vpxor xmm0,xmm0,xmm2 vmovdqu XMMWORD[(96-128)+rax],xmm4 vpaddd xmm10,xmm10,xmm4 vpor xmm8,xmm8,xmm9 vpsrld xmm5,xmm0,31 vpand xmm6,xmm6,xmm12 vpaddd xmm0,xmm0,xmm0 vpslld xmm7,xmm12,30 vpaddd xmm10,xmm10,xmm6 vpsrld xmm12,xmm12,2 vpaddd xmm10,xmm10,xmm8 vpor xmm0,xmm0,xmm5 vpor xmm12,xmm12,xmm7 vpxor xmm1,xmm1,xmm3 vmovdqa xmm3,XMMWORD[((160-128))+rax] vpaddd xmm14,xmm14,xmm15 vpslld xmm8,xmm10,5 vpand xmm7,xmm13,xmm12 vpxor xmm1,xmm1,XMMWORD[((0-128))+rax] vpaddd xmm14,xmm14,xmm7 vpsrld xmm9,xmm10,27 vpxor xmm6,xmm13,xmm12 vpxor xmm1,xmm1,xmm3 vmovdqu XMMWORD[(112-128)+rax],xmm0 vpaddd xmm14,xmm14,xmm0 vpor xmm8,xmm8,xmm9 vpsrld xmm5,xmm1,31 vpand xmm6,xmm6,xmm11 vpaddd xmm1,xmm1,xmm1 vpslld xmm7,xmm11,30 vpaddd xmm14,xmm14,xmm6 vpsrld xmm11,xmm11,2 vpaddd xmm14,xmm14,xmm8 vpor xmm1,xmm1,xmm5 vpor xmm11,xmm11,xmm7 vpxor xmm2,xmm2,xmm4 vmovdqa xmm4,XMMWORD[((176-128))+rax] vpaddd xmm13,xmm13,xmm15 vpslld xmm8,xmm14,5 vpand xmm7,xmm12,xmm11 vpxor xmm2,xmm2,XMMWORD[((16-128))+rax] vpaddd xmm13,xmm13,xmm7 vpsrld xmm9,xmm14,27 vpxor xmm6,xmm12,xmm11 vpxor xmm2,xmm2,xmm4 vmovdqu XMMWORD[(128-128)+rax],xmm1 vpaddd xmm13,xmm13,xmm1 vpor xmm8,xmm8,xmm9 vpsrld xmm5,xmm2,31 vpand xmm6,xmm6,xmm10 vpaddd xmm2,xmm2,xmm2 vpslld xmm7,xmm10,30 vpaddd xmm13,xmm13,xmm6 vpsrld xmm10,xmm10,2 vpaddd xmm13,xmm13,xmm8 vpor xmm2,xmm2,xmm5 vpor xmm10,xmm10,xmm7 vpxor xmm3,xmm3,xmm0 vmovdqa xmm0,XMMWORD[((192-128))+rax] vpaddd xmm12,xmm12,xmm15 vpslld xmm8,xmm13,5 vpand xmm7,xmm11,xmm10 vpxor xmm3,xmm3,XMMWORD[((32-128))+rax] vpaddd xmm12,xmm12,xmm7 vpsrld xmm9,xmm13,27 vpxor xmm6,xmm11,xmm10 vpxor xmm3,xmm3,xmm0 vmovdqu XMMWORD[(144-128)+rax],xmm2 vpaddd xmm12,xmm12,xmm2 vpor xmm8,xmm8,xmm9 vpsrld xmm5,xmm3,31 vpand xmm6,xmm6,xmm14 vpaddd xmm3,xmm3,xmm3 vpslld xmm7,xmm14,30 vpaddd xmm12,xmm12,xmm6 vpsrld xmm14,xmm14,2 vpaddd xmm12,xmm12,xmm8 vpor xmm3,xmm3,xmm5 vpor xmm14,xmm14,xmm7 vpxor xmm4,xmm4,xmm1 vmovdqa xmm1,XMMWORD[((208-128))+rax] vpaddd xmm11,xmm11,xmm15 vpslld xmm8,xmm12,5 vpand xmm7,xmm10,xmm14 vpxor xmm4,xmm4,XMMWORD[((48-128))+rax] vpaddd xmm11,xmm11,xmm7 vpsrld xmm9,xmm12,27 vpxor xmm6,xmm10,xmm14 vpxor xmm4,xmm4,xmm1 vmovdqu XMMWORD[(160-128)+rax],xmm3 vpaddd xmm11,xmm11,xmm3 vpor xmm8,xmm8,xmm9 vpsrld xmm5,xmm4,31 vpand xmm6,xmm6,xmm13 vpaddd xmm4,xmm4,xmm4 vpslld xmm7,xmm13,30 vpaddd xmm11,xmm11,xmm6 vpsrld xmm13,xmm13,2 vpaddd xmm11,xmm11,xmm8 vpor xmm4,xmm4,xmm5 vpor xmm13,xmm13,xmm7 vpxor xmm0,xmm0,xmm2 vmovdqa xmm2,XMMWORD[((224-128))+rax] vpaddd xmm10,xmm10,xmm15 vpslld xmm8,xmm11,5 vpand xmm7,xmm14,xmm13 vpxor xmm0,xmm0,XMMWORD[((64-128))+rax] vpaddd xmm10,xmm10,xmm7 vpsrld xmm9,xmm11,27 vpxor xmm6,xmm14,xmm13 vpxor xmm0,xmm0,xmm2 vmovdqu XMMWORD[(176-128)+rax],xmm4 vpaddd xmm10,xmm10,xmm4 vpor xmm8,xmm8,xmm9 vpsrld xmm5,xmm0,31 vpand xmm6,xmm6,xmm12 vpaddd xmm0,xmm0,xmm0 vpslld xmm7,xmm12,30 vpaddd xmm10,xmm10,xmm6 vpsrld xmm12,xmm12,2 vpaddd xmm10,xmm10,xmm8 vpor xmm0,xmm0,xmm5 vpor xmm12,xmm12,xmm7 vmovdqa xmm15,XMMWORD[64+rbp] vpxor xmm1,xmm1,xmm3 vmovdqa xmm3,XMMWORD[((240-128))+rax] vpslld xmm8,xmm10,5 vpaddd xmm14,xmm14,xmm15 vpxor xmm6,xmm13,xmm11 vmovdqa XMMWORD[(192-128)+rax],xmm0 vpaddd xmm14,xmm14,xmm0 vpxor xmm1,xmm1,XMMWORD[((80-128))+rax] vpsrld xmm9,xmm10,27 vpxor xmm6,xmm6,xmm12 vpxor xmm1,xmm1,xmm3 vpslld xmm7,xmm11,30 vpor xmm8,xmm8,xmm9 vpaddd xmm14,xmm14,xmm6 vpsrld xmm5,xmm1,31 vpaddd xmm1,xmm1,xmm1 vpsrld xmm11,xmm11,2 vpaddd xmm14,xmm14,xmm8 vpor xmm1,xmm1,xmm5 vpor xmm11,xmm11,xmm7 vpxor xmm2,xmm2,xmm4 vmovdqa xmm4,XMMWORD[((0-128))+rax] vpslld xmm8,xmm14,5 vpaddd xmm13,xmm13,xmm15 vpxor xmm6,xmm12,xmm10 vmovdqa XMMWORD[(208-128)+rax],xmm1 vpaddd xmm13,xmm13,xmm1 vpxor xmm2,xmm2,XMMWORD[((96-128))+rax] vpsrld xmm9,xmm14,27 vpxor xmm6,xmm6,xmm11 vpxor xmm2,xmm2,xmm4 vpslld xmm7,xmm10,30 vpor xmm8,xmm8,xmm9 vpaddd xmm13,xmm13,xmm6 vpsrld xmm5,xmm2,31 vpaddd xmm2,xmm2,xmm2 vpsrld xmm10,xmm10,2 vpaddd xmm13,xmm13,xmm8 vpor xmm2,xmm2,xmm5 vpor xmm10,xmm10,xmm7 vpxor xmm3,xmm3,xmm0 vmovdqa xmm0,XMMWORD[((16-128))+rax] vpslld xmm8,xmm13,5 vpaddd xmm12,xmm12,xmm15 vpxor xmm6,xmm11,xmm14 vmovdqa XMMWORD[(224-128)+rax],xmm2 vpaddd xmm12,xmm12,xmm2 vpxor xmm3,xmm3,XMMWORD[((112-128))+rax] vpsrld xmm9,xmm13,27 vpxor xmm6,xmm6,xmm10 vpxor xmm3,xmm3,xmm0 vpslld xmm7,xmm14,30 vpor xmm8,xmm8,xmm9 vpaddd xmm12,xmm12,xmm6 vpsrld xmm5,xmm3,31 vpaddd xmm3,xmm3,xmm3 vpsrld xmm14,xmm14,2 vpaddd xmm12,xmm12,xmm8 vpor xmm3,xmm3,xmm5 vpor xmm14,xmm14,xmm7 vpxor xmm4,xmm4,xmm1 vmovdqa xmm1,XMMWORD[((32-128))+rax] vpslld xmm8,xmm12,5 vpaddd xmm11,xmm11,xmm15 vpxor xmm6,xmm10,xmm13 vmovdqa XMMWORD[(240-128)+rax],xmm3 vpaddd xmm11,xmm11,xmm3 vpxor xmm4,xmm4,XMMWORD[((128-128))+rax] vpsrld xmm9,xmm12,27 vpxor xmm6,xmm6,xmm14 vpxor xmm4,xmm4,xmm1 vpslld xmm7,xmm13,30 vpor xmm8,xmm8,xmm9 vpaddd xmm11,xmm11,xmm6 vpsrld xmm5,xmm4,31 vpaddd xmm4,xmm4,xmm4 vpsrld xmm13,xmm13,2 vpaddd xmm11,xmm11,xmm8 vpor xmm4,xmm4,xmm5 vpor xmm13,xmm13,xmm7 vpxor xmm0,xmm0,xmm2 vmovdqa xmm2,XMMWORD[((48-128))+rax] vpslld xmm8,xmm11,5 vpaddd xmm10,xmm10,xmm15 vpxor xmm6,xmm14,xmm12 vmovdqa XMMWORD[(0-128)+rax],xmm4 vpaddd xmm10,xmm10,xmm4 vpxor xmm0,xmm0,XMMWORD[((144-128))+rax] vpsrld xmm9,xmm11,27 vpxor xmm6,xmm6,xmm13 vpxor xmm0,xmm0,xmm2 vpslld xmm7,xmm12,30 vpor xmm8,xmm8,xmm9 vpaddd xmm10,xmm10,xmm6 vpsrld xmm5,xmm0,31 vpaddd xmm0,xmm0,xmm0 vpsrld xmm12,xmm12,2 vpaddd xmm10,xmm10,xmm8 vpor xmm0,xmm0,xmm5 vpor xmm12,xmm12,xmm7 vpxor xmm1,xmm1,xmm3 vmovdqa xmm3,XMMWORD[((64-128))+rax] vpslld xmm8,xmm10,5 vpaddd xmm14,xmm14,xmm15 vpxor xmm6,xmm13,xmm11 vmovdqa XMMWORD[(16-128)+rax],xmm0 vpaddd xmm14,xmm14,xmm0 vpxor xmm1,xmm1,XMMWORD[((160-128))+rax] vpsrld xmm9,xmm10,27 vpxor xmm6,xmm6,xmm12 vpxor xmm1,xmm1,xmm3 vpslld xmm7,xmm11,30 vpor xmm8,xmm8,xmm9 vpaddd xmm14,xmm14,xmm6 vpsrld xmm5,xmm1,31 vpaddd xmm1,xmm1,xmm1 vpsrld xmm11,xmm11,2 vpaddd xmm14,xmm14,xmm8 vpor xmm1,xmm1,xmm5 vpor xmm11,xmm11,xmm7 vpxor xmm2,xmm2,xmm4 vmovdqa xmm4,XMMWORD[((80-128))+rax] vpslld xmm8,xmm14,5 vpaddd xmm13,xmm13,xmm15 vpxor xmm6,xmm12,xmm10 vmovdqa XMMWORD[(32-128)+rax],xmm1 vpaddd xmm13,xmm13,xmm1 vpxor xmm2,xmm2,XMMWORD[((176-128))+rax] vpsrld xmm9,xmm14,27 vpxor xmm6,xmm6,xmm11 vpxor xmm2,xmm2,xmm4 vpslld xmm7,xmm10,30 vpor xmm8,xmm8,xmm9 vpaddd xmm13,xmm13,xmm6 vpsrld xmm5,xmm2,31 vpaddd xmm2,xmm2,xmm2 vpsrld xmm10,xmm10,2 vpaddd xmm13,xmm13,xmm8 vpor xmm2,xmm2,xmm5 vpor xmm10,xmm10,xmm7 vpxor xmm3,xmm3,xmm0 vmovdqa xmm0,XMMWORD[((96-128))+rax] vpslld xmm8,xmm13,5 vpaddd xmm12,xmm12,xmm15 vpxor xmm6,xmm11,xmm14 vmovdqa XMMWORD[(48-128)+rax],xmm2 vpaddd xmm12,xmm12,xmm2 vpxor xmm3,xmm3,XMMWORD[((192-128))+rax] vpsrld xmm9,xmm13,27 vpxor xmm6,xmm6,xmm10 vpxor xmm3,xmm3,xmm0 vpslld xmm7,xmm14,30 vpor xmm8,xmm8,xmm9 vpaddd xmm12,xmm12,xmm6 vpsrld xmm5,xmm3,31 vpaddd xmm3,xmm3,xmm3 vpsrld xmm14,xmm14,2 vpaddd xmm12,xmm12,xmm8 vpor xmm3,xmm3,xmm5 vpor xmm14,xmm14,xmm7 vpxor xmm4,xmm4,xmm1 vmovdqa xmm1,XMMWORD[((112-128))+rax] vpslld xmm8,xmm12,5 vpaddd xmm11,xmm11,xmm15 vpxor xmm6,xmm10,xmm13 vmovdqa XMMWORD[(64-128)+rax],xmm3 vpaddd xmm11,xmm11,xmm3 vpxor xmm4,xmm4,XMMWORD[((208-128))+rax] vpsrld xmm9,xmm12,27 vpxor xmm6,xmm6,xmm14 vpxor xmm4,xmm4,xmm1 vpslld xmm7,xmm13,30 vpor xmm8,xmm8,xmm9 vpaddd xmm11,xmm11,xmm6 vpsrld xmm5,xmm4,31 vpaddd xmm4,xmm4,xmm4 vpsrld xmm13,xmm13,2 vpaddd xmm11,xmm11,xmm8 vpor xmm4,xmm4,xmm5 vpor xmm13,xmm13,xmm7 vpxor xmm0,xmm0,xmm2 vmovdqa xmm2,XMMWORD[((128-128))+rax] vpslld xmm8,xmm11,5 vpaddd xmm10,xmm10,xmm15 vpxor xmm6,xmm14,xmm12 vmovdqa XMMWORD[(80-128)+rax],xmm4 vpaddd xmm10,xmm10,xmm4 vpxor xmm0,xmm0,XMMWORD[((224-128))+rax] vpsrld xmm9,xmm11,27 vpxor xmm6,xmm6,xmm13 vpxor xmm0,xmm0,xmm2 vpslld xmm7,xmm12,30 vpor xmm8,xmm8,xmm9 vpaddd xmm10,xmm10,xmm6 vpsrld xmm5,xmm0,31 vpaddd xmm0,xmm0,xmm0 vpsrld xmm12,xmm12,2 vpaddd xmm10,xmm10,xmm8 vpor xmm0,xmm0,xmm5 vpor xmm12,xmm12,xmm7 vpxor xmm1,xmm1,xmm3 vmovdqa xmm3,XMMWORD[((144-128))+rax] vpslld xmm8,xmm10,5 vpaddd xmm14,xmm14,xmm15 vpxor xmm6,xmm13,xmm11 vmovdqa XMMWORD[(96-128)+rax],xmm0 vpaddd xmm14,xmm14,xmm0 vpxor xmm1,xmm1,XMMWORD[((240-128))+rax] vpsrld xmm9,xmm10,27 vpxor xmm6,xmm6,xmm12 vpxor xmm1,xmm1,xmm3 vpslld xmm7,xmm11,30 vpor xmm8,xmm8,xmm9 vpaddd xmm14,xmm14,xmm6 vpsrld xmm5,xmm1,31 vpaddd xmm1,xmm1,xmm1 vpsrld xmm11,xmm11,2 vpaddd xmm14,xmm14,xmm8 vpor xmm1,xmm1,xmm5 vpor xmm11,xmm11,xmm7 vpxor xmm2,xmm2,xmm4 vmovdqa xmm4,XMMWORD[((160-128))+rax] vpslld xmm8,xmm14,5 vpaddd xmm13,xmm13,xmm15 vpxor xmm6,xmm12,xmm10 vmovdqa XMMWORD[(112-128)+rax],xmm1 vpaddd xmm13,xmm13,xmm1 vpxor xmm2,xmm2,XMMWORD[((0-128))+rax] vpsrld xmm9,xmm14,27 vpxor xmm6,xmm6,xmm11 vpxor xmm2,xmm2,xmm4 vpslld xmm7,xmm10,30 vpor xmm8,xmm8,xmm9 vpaddd xmm13,xmm13,xmm6 vpsrld xmm5,xmm2,31 vpaddd xmm2,xmm2,xmm2 vpsrld xmm10,xmm10,2 vpaddd xmm13,xmm13,xmm8 vpor xmm2,xmm2,xmm5 vpor xmm10,xmm10,xmm7 vpxor xmm3,xmm3,xmm0 vmovdqa xmm0,XMMWORD[((176-128))+rax] vpslld xmm8,xmm13,5 vpaddd xmm12,xmm12,xmm15 vpxor xmm6,xmm11,xmm14 vpaddd xmm12,xmm12,xmm2 vpxor xmm3,xmm3,XMMWORD[((16-128))+rax] vpsrld xmm9,xmm13,27 vpxor xmm6,xmm6,xmm10 vpxor xmm3,xmm3,xmm0 vpslld xmm7,xmm14,30 vpor xmm8,xmm8,xmm9 vpaddd xmm12,xmm12,xmm6 vpsrld xmm5,xmm3,31 vpaddd xmm3,xmm3,xmm3 vpsrld xmm14,xmm14,2 vpaddd xmm12,xmm12,xmm8 vpor xmm3,xmm3,xmm5 vpor xmm14,xmm14,xmm7 vpxor xmm4,xmm4,xmm1 vmovdqa xmm1,XMMWORD[((192-128))+rax] vpslld xmm8,xmm12,5 vpaddd xmm11,xmm11,xmm15 vpxor xmm6,xmm10,xmm13 vpaddd xmm11,xmm11,xmm3 vpxor xmm4,xmm4,XMMWORD[((32-128))+rax] vpsrld xmm9,xmm12,27 vpxor xmm6,xmm6,xmm14 vpxor xmm4,xmm4,xmm1 vpslld xmm7,xmm13,30 vpor xmm8,xmm8,xmm9 vpaddd xmm11,xmm11,xmm6 vpsrld xmm5,xmm4,31 vpaddd xmm4,xmm4,xmm4 vpsrld xmm13,xmm13,2 vpaddd xmm11,xmm11,xmm8 vpor xmm4,xmm4,xmm5 vpor xmm13,xmm13,xmm7 vpxor xmm0,xmm0,xmm2 vmovdqa xmm2,XMMWORD[((208-128))+rax] vpslld xmm8,xmm11,5 vpaddd xmm10,xmm10,xmm15 vpxor xmm6,xmm14,xmm12 vpaddd xmm10,xmm10,xmm4 vpxor xmm0,xmm0,XMMWORD[((48-128))+rax] vpsrld xmm9,xmm11,27 vpxor xmm6,xmm6,xmm13 vpxor xmm0,xmm0,xmm2 vpslld xmm7,xmm12,30 vpor xmm8,xmm8,xmm9 vpaddd xmm10,xmm10,xmm6 vpsrld xmm5,xmm0,31 vpaddd xmm0,xmm0,xmm0 vpsrld xmm12,xmm12,2 vpaddd xmm10,xmm10,xmm8 vpor xmm0,xmm0,xmm5 vpor xmm12,xmm12,xmm7 vpxor xmm1,xmm1,xmm3 vmovdqa xmm3,XMMWORD[((224-128))+rax] vpslld xmm8,xmm10,5 vpaddd xmm14,xmm14,xmm15 vpxor xmm6,xmm13,xmm11 vpaddd xmm14,xmm14,xmm0 vpxor xmm1,xmm1,XMMWORD[((64-128))+rax] vpsrld xmm9,xmm10,27 vpxor xmm6,xmm6,xmm12 vpxor xmm1,xmm1,xmm3 vpslld xmm7,xmm11,30 vpor xmm8,xmm8,xmm9 vpaddd xmm14,xmm14,xmm6 vpsrld xmm5,xmm1,31 vpaddd xmm1,xmm1,xmm1 vpsrld xmm11,xmm11,2 vpaddd xmm14,xmm14,xmm8 vpor xmm1,xmm1,xmm5 vpor xmm11,xmm11,xmm7 vpxor xmm2,xmm2,xmm4 vmovdqa xmm4,XMMWORD[((240-128))+rax] vpslld xmm8,xmm14,5 vpaddd xmm13,xmm13,xmm15 vpxor xmm6,xmm12,xmm10 vpaddd xmm13,xmm13,xmm1 vpxor xmm2,xmm2,XMMWORD[((80-128))+rax] vpsrld xmm9,xmm14,27 vpxor xmm6,xmm6,xmm11 vpxor xmm2,xmm2,xmm4 vpslld xmm7,xmm10,30 vpor xmm8,xmm8,xmm9 vpaddd xmm13,xmm13,xmm6 vpsrld xmm5,xmm2,31 vpaddd xmm2,xmm2,xmm2 vpsrld xmm10,xmm10,2 vpaddd xmm13,xmm13,xmm8 vpor xmm2,xmm2,xmm5 vpor xmm10,xmm10,xmm7 vpxor xmm3,xmm3,xmm0 vmovdqa xmm0,XMMWORD[((0-128))+rax] vpslld xmm8,xmm13,5 vpaddd xmm12,xmm12,xmm15 vpxor xmm6,xmm11,xmm14 vpaddd xmm12,xmm12,xmm2 vpxor xmm3,xmm3,XMMWORD[((96-128))+rax] vpsrld xmm9,xmm13,27 vpxor xmm6,xmm6,xmm10 vpxor xmm3,xmm3,xmm0 vpslld xmm7,xmm14,30 vpor xmm8,xmm8,xmm9 vpaddd xmm12,xmm12,xmm6 vpsrld xmm5,xmm3,31 vpaddd xmm3,xmm3,xmm3 vpsrld xmm14,xmm14,2 vpaddd xmm12,xmm12,xmm8 vpor xmm3,xmm3,xmm5 vpor xmm14,xmm14,xmm7 vpxor xmm4,xmm4,xmm1 vmovdqa xmm1,XMMWORD[((16-128))+rax] vpslld xmm8,xmm12,5 vpaddd xmm11,xmm11,xmm15 vpxor xmm6,xmm10,xmm13 vpaddd xmm11,xmm11,xmm3 vpxor xmm4,xmm4,XMMWORD[((112-128))+rax] vpsrld xmm9,xmm12,27 vpxor xmm6,xmm6,xmm14 vpxor xmm4,xmm4,xmm1 vpslld xmm7,xmm13,30 vpor xmm8,xmm8,xmm9 vpaddd xmm11,xmm11,xmm6 vpsrld xmm5,xmm4,31 vpaddd xmm4,xmm4,xmm4 vpsrld xmm13,xmm13,2 vpaddd xmm11,xmm11,xmm8 vpor xmm4,xmm4,xmm5 vpor xmm13,xmm13,xmm7 vpslld xmm8,xmm11,5 vpaddd xmm10,xmm10,xmm15 vpxor xmm6,xmm14,xmm12 vpsrld xmm9,xmm11,27 vpaddd xmm10,xmm10,xmm4 vpxor xmm6,xmm6,xmm13 vpslld xmm7,xmm12,30 vpor xmm8,xmm8,xmm9 vpaddd xmm10,xmm10,xmm6 vpsrld xmm12,xmm12,2 vpaddd xmm10,xmm10,xmm8 vpor xmm12,xmm12,xmm7 mov ecx,1 cmp ecx,DWORD[rbx] cmovge r8,rbp cmp ecx,DWORD[4+rbx] cmovge r9,rbp cmp ecx,DWORD[8+rbx] cmovge r10,rbp cmp ecx,DWORD[12+rbx] cmovge r11,rbp vmovdqu xmm6,XMMWORD[rbx] vpxor xmm8,xmm8,xmm8 vmovdqa xmm7,xmm6 vpcmpgtd xmm7,xmm7,xmm8 vpaddd xmm6,xmm6,xmm7 vpand xmm10,xmm10,xmm7 vpand xmm11,xmm11,xmm7 vpaddd xmm10,xmm10,XMMWORD[rdi] vpand xmm12,xmm12,xmm7 vpaddd xmm11,xmm11,XMMWORD[32+rdi] vpand xmm13,xmm13,xmm7 vpaddd xmm12,xmm12,XMMWORD[64+rdi] vpand xmm14,xmm14,xmm7 vpaddd xmm13,xmm13,XMMWORD[96+rdi] vpaddd xmm14,xmm14,XMMWORD[128+rdi] vmovdqu XMMWORD[rdi],xmm10 vmovdqu XMMWORD[32+rdi],xmm11 vmovdqu XMMWORD[64+rdi],xmm12 vmovdqu XMMWORD[96+rdi],xmm13 vmovdqu XMMWORD[128+rdi],xmm14 vmovdqu XMMWORD[rbx],xmm6 vmovdqu xmm5,XMMWORD[96+rbp] dec edx jnz NEAR $L$oop_avx mov edx,DWORD[280+rsp] lea rdi,[16+rdi] lea rsi,[64+rsi] dec edx jnz NEAR $L$oop_grande_avx $L$done_avx: mov rax,QWORD[272+rsp] vzeroupper movaps xmm6,XMMWORD[((-184))+rax] movaps xmm7,XMMWORD[((-168))+rax] movaps xmm8,XMMWORD[((-152))+rax] movaps xmm9,XMMWORD[((-136))+rax] movaps xmm10,XMMWORD[((-120))+rax] movaps xmm11,XMMWORD[((-104))+rax] movaps xmm12,XMMWORD[((-88))+rax] movaps xmm13,XMMWORD[((-72))+rax] movaps xmm14,XMMWORD[((-56))+rax] movaps xmm15,XMMWORD[((-40))+rax] mov rbp,QWORD[((-16))+rax] mov rbx,QWORD[((-8))+rax] lea rsp,[rax] $L$epilogue_avx: mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$SEH_end_sha1_multi_block_avx: ALIGN 32 sha1_multi_block_avx2: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp $L$SEH_begin_sha1_multi_block_avx2: mov rdi,rcx mov rsi,rdx mov rdx,r8 _avx2_shortcut: mov rax,rsp push rbx push rbp push r12 push r13 push r14 push r15 lea rsp,[((-168))+rsp] movaps XMMWORD[rsp],xmm6 movaps XMMWORD[16+rsp],xmm7 movaps XMMWORD[32+rsp],xmm8 movaps XMMWORD[48+rsp],xmm9 movaps XMMWORD[64+rsp],xmm10 movaps XMMWORD[80+rsp],xmm11 movaps XMMWORD[(-120)+rax],xmm12 movaps XMMWORD[(-104)+rax],xmm13 movaps XMMWORD[(-88)+rax],xmm14 movaps XMMWORD[(-72)+rax],xmm15 sub rsp,576 and rsp,-256 mov QWORD[544+rsp],rax $L$body_avx2: lea rbp,[K_XX_XX] shr edx,1 vzeroupper $L$oop_grande_avx2: mov DWORD[552+rsp],edx xor edx,edx lea rbx,[512+rsp] mov r12,QWORD[rsi] mov ecx,DWORD[8+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[rbx],ecx cmovle r12,rbp mov r13,QWORD[16+rsi] mov ecx,DWORD[24+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[4+rbx],ecx cmovle r13,rbp mov r14,QWORD[32+rsi] mov ecx,DWORD[40+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[8+rbx],ecx cmovle r14,rbp mov r15,QWORD[48+rsi] mov ecx,DWORD[56+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[12+rbx],ecx cmovle r15,rbp mov r8,QWORD[64+rsi] mov ecx,DWORD[72+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[16+rbx],ecx cmovle r8,rbp mov r9,QWORD[80+rsi] mov ecx,DWORD[88+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[20+rbx],ecx cmovle r9,rbp mov r10,QWORD[96+rsi] mov ecx,DWORD[104+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[24+rbx],ecx cmovle r10,rbp mov r11,QWORD[112+rsi] mov ecx,DWORD[120+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[28+rbx],ecx cmovle r11,rbp vmovdqu ymm0,YMMWORD[rdi] lea rax,[128+rsp] vmovdqu ymm1,YMMWORD[32+rdi] lea rbx,[((256+128))+rsp] vmovdqu ymm2,YMMWORD[64+rdi] vmovdqu ymm3,YMMWORD[96+rdi] vmovdqu ymm4,YMMWORD[128+rdi] vmovdqu ymm9,YMMWORD[96+rbp] jmp NEAR $L$oop_avx2 ALIGN 32 $L$oop_avx2: vmovdqa ymm15,YMMWORD[((-32))+rbp] vmovd xmm10,DWORD[r12] lea r12,[64+r12] vmovd xmm12,DWORD[r8] lea r8,[64+r8] vmovd xmm7,DWORD[r13] lea r13,[64+r13] vmovd xmm6,DWORD[r9] lea r9,[64+r9] vpinsrd xmm10,xmm10,DWORD[r14],1 lea r14,[64+r14] vpinsrd xmm12,xmm12,DWORD[r10],1 lea r10,[64+r10] vpinsrd xmm7,xmm7,DWORD[r15],1 lea r15,[64+r15] vpunpckldq ymm10,ymm10,ymm7 vpinsrd xmm6,xmm6,DWORD[r11],1 lea r11,[64+r11] vpunpckldq ymm12,ymm12,ymm6 vmovd xmm11,DWORD[((-60))+r12] vinserti128 ymm10,ymm10,xmm12,1 vmovd xmm8,DWORD[((-60))+r8] vpshufb ymm10,ymm10,ymm9 vmovd xmm7,DWORD[((-60))+r13] vmovd xmm6,DWORD[((-60))+r9] vpinsrd xmm11,xmm11,DWORD[((-60))+r14],1 vpinsrd xmm8,xmm8,DWORD[((-60))+r10],1 vpinsrd xmm7,xmm7,DWORD[((-60))+r15],1 vpunpckldq ymm11,ymm11,ymm7 vpinsrd xmm6,xmm6,DWORD[((-60))+r11],1 vpunpckldq ymm8,ymm8,ymm6 vpaddd ymm4,ymm4,ymm15 vpslld ymm7,ymm0,5 vpandn ymm6,ymm1,ymm3 vpand ymm5,ymm1,ymm2 vmovdqa YMMWORD[(0-128)+rax],ymm10 vpaddd ymm4,ymm4,ymm10 vinserti128 ymm11,ymm11,xmm8,1 vpsrld ymm8,ymm0,27 vpxor ymm5,ymm5,ymm6 vmovd xmm12,DWORD[((-56))+r12] vpslld ymm6,ymm1,30 vpor ymm7,ymm7,ymm8 vmovd xmm8,DWORD[((-56))+r8] vpaddd ymm4,ymm4,ymm5 vpsrld ymm1,ymm1,2 vpaddd ymm4,ymm4,ymm7 vpshufb ymm11,ymm11,ymm9 vpor ymm1,ymm1,ymm6 vmovd xmm7,DWORD[((-56))+r13] vmovd xmm6,DWORD[((-56))+r9] vpinsrd xmm12,xmm12,DWORD[((-56))+r14],1 vpinsrd xmm8,xmm8,DWORD[((-56))+r10],1 vpinsrd xmm7,xmm7,DWORD[((-56))+r15],1 vpunpckldq ymm12,ymm12,ymm7 vpinsrd xmm6,xmm6,DWORD[((-56))+r11],1 vpunpckldq ymm8,ymm8,ymm6 vpaddd ymm3,ymm3,ymm15 vpslld ymm7,ymm4,5 vpandn ymm6,ymm0,ymm2 vpand ymm5,ymm0,ymm1 vmovdqa YMMWORD[(32-128)+rax],ymm11 vpaddd ymm3,ymm3,ymm11 vinserti128 ymm12,ymm12,xmm8,1 vpsrld ymm8,ymm4,27 vpxor ymm5,ymm5,ymm6 vmovd xmm13,DWORD[((-52))+r12] vpslld ymm6,ymm0,30 vpor ymm7,ymm7,ymm8 vmovd xmm8,DWORD[((-52))+r8] vpaddd ymm3,ymm3,ymm5 vpsrld ymm0,ymm0,2 vpaddd ymm3,ymm3,ymm7 vpshufb ymm12,ymm12,ymm9 vpor ymm0,ymm0,ymm6 vmovd xmm7,DWORD[((-52))+r13] vmovd xmm6,DWORD[((-52))+r9] vpinsrd xmm13,xmm13,DWORD[((-52))+r14],1 vpinsrd xmm8,xmm8,DWORD[((-52))+r10],1 vpinsrd xmm7,xmm7,DWORD[((-52))+r15],1 vpunpckldq ymm13,ymm13,ymm7 vpinsrd xmm6,xmm6,DWORD[((-52))+r11],1 vpunpckldq ymm8,ymm8,ymm6 vpaddd ymm2,ymm2,ymm15 vpslld ymm7,ymm3,5 vpandn ymm6,ymm4,ymm1 vpand ymm5,ymm4,ymm0 vmovdqa YMMWORD[(64-128)+rax],ymm12 vpaddd ymm2,ymm2,ymm12 vinserti128 ymm13,ymm13,xmm8,1 vpsrld ymm8,ymm3,27 vpxor ymm5,ymm5,ymm6 vmovd xmm14,DWORD[((-48))+r12] vpslld ymm6,ymm4,30 vpor ymm7,ymm7,ymm8 vmovd xmm8,DWORD[((-48))+r8] vpaddd ymm2,ymm2,ymm5 vpsrld ymm4,ymm4,2 vpaddd ymm2,ymm2,ymm7 vpshufb ymm13,ymm13,ymm9 vpor ymm4,ymm4,ymm6 vmovd xmm7,DWORD[((-48))+r13] vmovd xmm6,DWORD[((-48))+r9] vpinsrd xmm14,xmm14,DWORD[((-48))+r14],1 vpinsrd xmm8,xmm8,DWORD[((-48))+r10],1 vpinsrd xmm7,xmm7,DWORD[((-48))+r15],1 vpunpckldq ymm14,ymm14,ymm7 vpinsrd xmm6,xmm6,DWORD[((-48))+r11],1 vpunpckldq ymm8,ymm8,ymm6 vpaddd ymm1,ymm1,ymm15 vpslld ymm7,ymm2,5 vpandn ymm6,ymm3,ymm0 vpand ymm5,ymm3,ymm4 vmovdqa YMMWORD[(96-128)+rax],ymm13 vpaddd ymm1,ymm1,ymm13 vinserti128 ymm14,ymm14,xmm8,1 vpsrld ymm8,ymm2,27 vpxor ymm5,ymm5,ymm6 vmovd xmm10,DWORD[((-44))+r12] vpslld ymm6,ymm3,30 vpor ymm7,ymm7,ymm8 vmovd xmm8,DWORD[((-44))+r8] vpaddd ymm1,ymm1,ymm5 vpsrld ymm3,ymm3,2 vpaddd ymm1,ymm1,ymm7 vpshufb ymm14,ymm14,ymm9 vpor ymm3,ymm3,ymm6 vmovd xmm7,DWORD[((-44))+r13] vmovd xmm6,DWORD[((-44))+r9] vpinsrd xmm10,xmm10,DWORD[((-44))+r14],1 vpinsrd xmm8,xmm8,DWORD[((-44))+r10],1 vpinsrd xmm7,xmm7,DWORD[((-44))+r15],1 vpunpckldq ymm10,ymm10,ymm7 vpinsrd xmm6,xmm6,DWORD[((-44))+r11],1 vpunpckldq ymm8,ymm8,ymm6 vpaddd ymm0,ymm0,ymm15 vpslld ymm7,ymm1,5 vpandn ymm6,ymm2,ymm4 vpand ymm5,ymm2,ymm3 vmovdqa YMMWORD[(128-128)+rax],ymm14 vpaddd ymm0,ymm0,ymm14 vinserti128 ymm10,ymm10,xmm8,1 vpsrld ymm8,ymm1,27 vpxor ymm5,ymm5,ymm6 vmovd xmm11,DWORD[((-40))+r12] vpslld ymm6,ymm2,30 vpor ymm7,ymm7,ymm8 vmovd xmm8,DWORD[((-40))+r8] vpaddd ymm0,ymm0,ymm5 vpsrld ymm2,ymm2,2 vpaddd ymm0,ymm0,ymm7 vpshufb ymm10,ymm10,ymm9 vpor ymm2,ymm2,ymm6 vmovd xmm7,DWORD[((-40))+r13] vmovd xmm6,DWORD[((-40))+r9] vpinsrd xmm11,xmm11,DWORD[((-40))+r14],1 vpinsrd xmm8,xmm8,DWORD[((-40))+r10],1 vpinsrd xmm7,xmm7,DWORD[((-40))+r15],1 vpunpckldq ymm11,ymm11,ymm7 vpinsrd xmm6,xmm6,DWORD[((-40))+r11],1 vpunpckldq ymm8,ymm8,ymm6 vpaddd ymm4,ymm4,ymm15 vpslld ymm7,ymm0,5 vpandn ymm6,ymm1,ymm3 vpand ymm5,ymm1,ymm2 vmovdqa YMMWORD[(160-128)+rax],ymm10 vpaddd ymm4,ymm4,ymm10 vinserti128 ymm11,ymm11,xmm8,1 vpsrld ymm8,ymm0,27 vpxor ymm5,ymm5,ymm6 vmovd xmm12,DWORD[((-36))+r12] vpslld ymm6,ymm1,30 vpor ymm7,ymm7,ymm8 vmovd xmm8,DWORD[((-36))+r8] vpaddd ymm4,ymm4,ymm5 vpsrld ymm1,ymm1,2 vpaddd ymm4,ymm4,ymm7 vpshufb ymm11,ymm11,ymm9 vpor ymm1,ymm1,ymm6 vmovd xmm7,DWORD[((-36))+r13] vmovd xmm6,DWORD[((-36))+r9] vpinsrd xmm12,xmm12,DWORD[((-36))+r14],1 vpinsrd xmm8,xmm8,DWORD[((-36))+r10],1 vpinsrd xmm7,xmm7,DWORD[((-36))+r15],1 vpunpckldq ymm12,ymm12,ymm7 vpinsrd xmm6,xmm6,DWORD[((-36))+r11],1 vpunpckldq ymm8,ymm8,ymm6 vpaddd ymm3,ymm3,ymm15 vpslld ymm7,ymm4,5 vpandn ymm6,ymm0,ymm2 vpand ymm5,ymm0,ymm1 vmovdqa YMMWORD[(192-128)+rax],ymm11 vpaddd ymm3,ymm3,ymm11 vinserti128 ymm12,ymm12,xmm8,1 vpsrld ymm8,ymm4,27 vpxor ymm5,ymm5,ymm6 vmovd xmm13,DWORD[((-32))+r12] vpslld ymm6,ymm0,30 vpor ymm7,ymm7,ymm8 vmovd xmm8,DWORD[((-32))+r8] vpaddd ymm3,ymm3,ymm5 vpsrld ymm0,ymm0,2 vpaddd ymm3,ymm3,ymm7 vpshufb ymm12,ymm12,ymm9 vpor ymm0,ymm0,ymm6 vmovd xmm7,DWORD[((-32))+r13] vmovd xmm6,DWORD[((-32))+r9] vpinsrd xmm13,xmm13,DWORD[((-32))+r14],1 vpinsrd xmm8,xmm8,DWORD[((-32))+r10],1 vpinsrd xmm7,xmm7,DWORD[((-32))+r15],1 vpunpckldq ymm13,ymm13,ymm7 vpinsrd xmm6,xmm6,DWORD[((-32))+r11],1 vpunpckldq ymm8,ymm8,ymm6 vpaddd ymm2,ymm2,ymm15 vpslld ymm7,ymm3,5 vpandn ymm6,ymm4,ymm1 vpand ymm5,ymm4,ymm0 vmovdqa YMMWORD[(224-128)+rax],ymm12 vpaddd ymm2,ymm2,ymm12 vinserti128 ymm13,ymm13,xmm8,1 vpsrld ymm8,ymm3,27 vpxor ymm5,ymm5,ymm6 vmovd xmm14,DWORD[((-28))+r12] vpslld ymm6,ymm4,30 vpor ymm7,ymm7,ymm8 vmovd xmm8,DWORD[((-28))+r8] vpaddd ymm2,ymm2,ymm5 vpsrld ymm4,ymm4,2 vpaddd ymm2,ymm2,ymm7 vpshufb ymm13,ymm13,ymm9 vpor ymm4,ymm4,ymm6 vmovd xmm7,DWORD[((-28))+r13] vmovd xmm6,DWORD[((-28))+r9] vpinsrd xmm14,xmm14,DWORD[((-28))+r14],1 vpinsrd xmm8,xmm8,DWORD[((-28))+r10],1 vpinsrd xmm7,xmm7,DWORD[((-28))+r15],1 vpunpckldq ymm14,ymm14,ymm7 vpinsrd xmm6,xmm6,DWORD[((-28))+r11],1 vpunpckldq ymm8,ymm8,ymm6 vpaddd ymm1,ymm1,ymm15 vpslld ymm7,ymm2,5 vpandn ymm6,ymm3,ymm0 vpand ymm5,ymm3,ymm4 vmovdqa YMMWORD[(256-256-128)+rbx],ymm13 vpaddd ymm1,ymm1,ymm13 vinserti128 ymm14,ymm14,xmm8,1 vpsrld ymm8,ymm2,27 vpxor ymm5,ymm5,ymm6 vmovd xmm10,DWORD[((-24))+r12] vpslld ymm6,ymm3,30 vpor ymm7,ymm7,ymm8 vmovd xmm8,DWORD[((-24))+r8] vpaddd ymm1,ymm1,ymm5 vpsrld ymm3,ymm3,2 vpaddd ymm1,ymm1,ymm7 vpshufb ymm14,ymm14,ymm9 vpor ymm3,ymm3,ymm6 vmovd xmm7,DWORD[((-24))+r13] vmovd xmm6,DWORD[((-24))+r9] vpinsrd xmm10,xmm10,DWORD[((-24))+r14],1 vpinsrd xmm8,xmm8,DWORD[((-24))+r10],1 vpinsrd xmm7,xmm7,DWORD[((-24))+r15],1 vpunpckldq ymm10,ymm10,ymm7 vpinsrd xmm6,xmm6,DWORD[((-24))+r11],1 vpunpckldq ymm8,ymm8,ymm6 vpaddd ymm0,ymm0,ymm15 vpslld ymm7,ymm1,5 vpandn ymm6,ymm2,ymm4 vpand ymm5,ymm2,ymm3 vmovdqa YMMWORD[(288-256-128)+rbx],ymm14 vpaddd ymm0,ymm0,ymm14 vinserti128 ymm10,ymm10,xmm8,1 vpsrld ymm8,ymm1,27 vpxor ymm5,ymm5,ymm6 vmovd xmm11,DWORD[((-20))+r12] vpslld ymm6,ymm2,30 vpor ymm7,ymm7,ymm8 vmovd xmm8,DWORD[((-20))+r8] vpaddd ymm0,ymm0,ymm5 vpsrld ymm2,ymm2,2 vpaddd ymm0,ymm0,ymm7 vpshufb ymm10,ymm10,ymm9 vpor ymm2,ymm2,ymm6 vmovd xmm7,DWORD[((-20))+r13] vmovd xmm6,DWORD[((-20))+r9] vpinsrd xmm11,xmm11,DWORD[((-20))+r14],1 vpinsrd xmm8,xmm8,DWORD[((-20))+r10],1 vpinsrd xmm7,xmm7,DWORD[((-20))+r15],1 vpunpckldq ymm11,ymm11,ymm7 vpinsrd xmm6,xmm6,DWORD[((-20))+r11],1 vpunpckldq ymm8,ymm8,ymm6 vpaddd ymm4,ymm4,ymm15 vpslld ymm7,ymm0,5 vpandn ymm6,ymm1,ymm3 vpand ymm5,ymm1,ymm2 vmovdqa YMMWORD[(320-256-128)+rbx],ymm10 vpaddd ymm4,ymm4,ymm10 vinserti128 ymm11,ymm11,xmm8,1 vpsrld ymm8,ymm0,27 vpxor ymm5,ymm5,ymm6 vmovd xmm12,DWORD[((-16))+r12] vpslld ymm6,ymm1,30 vpor ymm7,ymm7,ymm8 vmovd xmm8,DWORD[((-16))+r8] vpaddd ymm4,ymm4,ymm5 vpsrld ymm1,ymm1,2 vpaddd ymm4,ymm4,ymm7 vpshufb ymm11,ymm11,ymm9 vpor ymm1,ymm1,ymm6 vmovd xmm7,DWORD[((-16))+r13] vmovd xmm6,DWORD[((-16))+r9] vpinsrd xmm12,xmm12,DWORD[((-16))+r14],1 vpinsrd xmm8,xmm8,DWORD[((-16))+r10],1 vpinsrd xmm7,xmm7,DWORD[((-16))+r15],1 vpunpckldq ymm12,ymm12,ymm7 vpinsrd xmm6,xmm6,DWORD[((-16))+r11],1 vpunpckldq ymm8,ymm8,ymm6 vpaddd ymm3,ymm3,ymm15 vpslld ymm7,ymm4,5 vpandn ymm6,ymm0,ymm2 vpand ymm5,ymm0,ymm1 vmovdqa YMMWORD[(352-256-128)+rbx],ymm11 vpaddd ymm3,ymm3,ymm11 vinserti128 ymm12,ymm12,xmm8,1 vpsrld ymm8,ymm4,27 vpxor ymm5,ymm5,ymm6 vmovd xmm13,DWORD[((-12))+r12] vpslld ymm6,ymm0,30 vpor ymm7,ymm7,ymm8 vmovd xmm8,DWORD[((-12))+r8] vpaddd ymm3,ymm3,ymm5 vpsrld ymm0,ymm0,2 vpaddd ymm3,ymm3,ymm7 vpshufb ymm12,ymm12,ymm9 vpor ymm0,ymm0,ymm6 vmovd xmm7,DWORD[((-12))+r13] vmovd xmm6,DWORD[((-12))+r9] vpinsrd xmm13,xmm13,DWORD[((-12))+r14],1 vpinsrd xmm8,xmm8,DWORD[((-12))+r10],1 vpinsrd xmm7,xmm7,DWORD[((-12))+r15],1 vpunpckldq ymm13,ymm13,ymm7 vpinsrd xmm6,xmm6,DWORD[((-12))+r11],1 vpunpckldq ymm8,ymm8,ymm6 vpaddd ymm2,ymm2,ymm15 vpslld ymm7,ymm3,5 vpandn ymm6,ymm4,ymm1 vpand ymm5,ymm4,ymm0 vmovdqa YMMWORD[(384-256-128)+rbx],ymm12 vpaddd ymm2,ymm2,ymm12 vinserti128 ymm13,ymm13,xmm8,1 vpsrld ymm8,ymm3,27 vpxor ymm5,ymm5,ymm6 vmovd xmm14,DWORD[((-8))+r12] vpslld ymm6,ymm4,30 vpor ymm7,ymm7,ymm8 vmovd xmm8,DWORD[((-8))+r8] vpaddd ymm2,ymm2,ymm5 vpsrld ymm4,ymm4,2 vpaddd ymm2,ymm2,ymm7 vpshufb ymm13,ymm13,ymm9 vpor ymm4,ymm4,ymm6 vmovd xmm7,DWORD[((-8))+r13] vmovd xmm6,DWORD[((-8))+r9] vpinsrd xmm14,xmm14,DWORD[((-8))+r14],1 vpinsrd xmm8,xmm8,DWORD[((-8))+r10],1 vpinsrd xmm7,xmm7,DWORD[((-8))+r15],1 vpunpckldq ymm14,ymm14,ymm7 vpinsrd xmm6,xmm6,DWORD[((-8))+r11],1 vpunpckldq ymm8,ymm8,ymm6 vpaddd ymm1,ymm1,ymm15 vpslld ymm7,ymm2,5 vpandn ymm6,ymm3,ymm0 vpand ymm5,ymm3,ymm4 vmovdqa YMMWORD[(416-256-128)+rbx],ymm13 vpaddd ymm1,ymm1,ymm13 vinserti128 ymm14,ymm14,xmm8,1 vpsrld ymm8,ymm2,27 vpxor ymm5,ymm5,ymm6 vmovd xmm10,DWORD[((-4))+r12] vpslld ymm6,ymm3,30 vpor ymm7,ymm7,ymm8 vmovd xmm8,DWORD[((-4))+r8] vpaddd ymm1,ymm1,ymm5 vpsrld ymm3,ymm3,2 vpaddd ymm1,ymm1,ymm7 vpshufb ymm14,ymm14,ymm9 vpor ymm3,ymm3,ymm6 vmovdqa ymm11,YMMWORD[((0-128))+rax] vmovd xmm7,DWORD[((-4))+r13] vmovd xmm6,DWORD[((-4))+r9] vpinsrd xmm10,xmm10,DWORD[((-4))+r14],1 vpinsrd xmm8,xmm8,DWORD[((-4))+r10],1 vpinsrd xmm7,xmm7,DWORD[((-4))+r15],1 vpunpckldq ymm10,ymm10,ymm7 vpinsrd xmm6,xmm6,DWORD[((-4))+r11],1 vpunpckldq ymm8,ymm8,ymm6 vpaddd ymm0,ymm0,ymm15 prefetcht0 [63+r12] vpslld ymm7,ymm1,5 vpandn ymm6,ymm2,ymm4 vpand ymm5,ymm2,ymm3 vmovdqa YMMWORD[(448-256-128)+rbx],ymm14 vpaddd ymm0,ymm0,ymm14 vinserti128 ymm10,ymm10,xmm8,1 vpsrld ymm8,ymm1,27 prefetcht0 [63+r13] vpxor ymm5,ymm5,ymm6 vpslld ymm6,ymm2,30 vpor ymm7,ymm7,ymm8 prefetcht0 [63+r14] vpaddd ymm0,ymm0,ymm5 vpsrld ymm2,ymm2,2 vpaddd ymm0,ymm0,ymm7 prefetcht0 [63+r15] vpshufb ymm10,ymm10,ymm9 vpor ymm2,ymm2,ymm6 vmovdqa ymm12,YMMWORD[((32-128))+rax] vpxor ymm11,ymm11,ymm13 vmovdqa ymm13,YMMWORD[((64-128))+rax] vpaddd ymm4,ymm4,ymm15 vpslld ymm7,ymm0,5 vpandn ymm6,ymm1,ymm3 prefetcht0 [63+r8] vpand ymm5,ymm1,ymm2 vmovdqa YMMWORD[(480-256-128)+rbx],ymm10 vpaddd ymm4,ymm4,ymm10 vpxor ymm11,ymm11,YMMWORD[((256-256-128))+rbx] vpsrld ymm8,ymm0,27 vpxor ymm5,ymm5,ymm6 vpxor ymm11,ymm11,ymm13 prefetcht0 [63+r9] vpslld ymm6,ymm1,30 vpor ymm7,ymm7,ymm8 vpaddd ymm4,ymm4,ymm5 prefetcht0 [63+r10] vpsrld ymm9,ymm11,31 vpaddd ymm11,ymm11,ymm11 vpsrld ymm1,ymm1,2 prefetcht0 [63+r11] vpaddd ymm4,ymm4,ymm7 vpor ymm11,ymm11,ymm9 vpor ymm1,ymm1,ymm6 vpxor ymm12,ymm12,ymm14 vmovdqa ymm14,YMMWORD[((96-128))+rax] vpaddd ymm3,ymm3,ymm15 vpslld ymm7,ymm4,5 vpandn ymm6,ymm0,ymm2 vpand ymm5,ymm0,ymm1 vmovdqa YMMWORD[(0-128)+rax],ymm11 vpaddd ymm3,ymm3,ymm11 vpxor ymm12,ymm12,YMMWORD[((288-256-128))+rbx] vpsrld ymm8,ymm4,27 vpxor ymm5,ymm5,ymm6 vpxor ymm12,ymm12,ymm14 vpslld ymm6,ymm0,30 vpor ymm7,ymm7,ymm8 vpaddd ymm3,ymm3,ymm5 vpsrld ymm9,ymm12,31 vpaddd ymm12,ymm12,ymm12 vpsrld ymm0,ymm0,2 vpaddd ymm3,ymm3,ymm7 vpor ymm12,ymm12,ymm9 vpor ymm0,ymm0,ymm6 vpxor ymm13,ymm13,ymm10 vmovdqa ymm10,YMMWORD[((128-128))+rax] vpaddd ymm2,ymm2,ymm15 vpslld ymm7,ymm3,5 vpandn ymm6,ymm4,ymm1 vpand ymm5,ymm4,ymm0 vmovdqa YMMWORD[(32-128)+rax],ymm12 vpaddd ymm2,ymm2,ymm12 vpxor ymm13,ymm13,YMMWORD[((320-256-128))+rbx] vpsrld ymm8,ymm3,27 vpxor ymm5,ymm5,ymm6 vpxor ymm13,ymm13,ymm10 vpslld ymm6,ymm4,30 vpor ymm7,ymm7,ymm8 vpaddd ymm2,ymm2,ymm5 vpsrld ymm9,ymm13,31 vpaddd ymm13,ymm13,ymm13 vpsrld ymm4,ymm4,2 vpaddd ymm2,ymm2,ymm7 vpor ymm13,ymm13,ymm9 vpor ymm4,ymm4,ymm6 vpxor ymm14,ymm14,ymm11 vmovdqa ymm11,YMMWORD[((160-128))+rax] vpaddd ymm1,ymm1,ymm15 vpslld ymm7,ymm2,5 vpandn ymm6,ymm3,ymm0 vpand ymm5,ymm3,ymm4 vmovdqa YMMWORD[(64-128)+rax],ymm13 vpaddd ymm1,ymm1,ymm13 vpxor ymm14,ymm14,YMMWORD[((352-256-128))+rbx] vpsrld ymm8,ymm2,27 vpxor ymm5,ymm5,ymm6 vpxor ymm14,ymm14,ymm11 vpslld ymm6,ymm3,30 vpor ymm7,ymm7,ymm8 vpaddd ymm1,ymm1,ymm5 vpsrld ymm9,ymm14,31 vpaddd ymm14,ymm14,ymm14 vpsrld ymm3,ymm3,2 vpaddd ymm1,ymm1,ymm7 vpor ymm14,ymm14,ymm9 vpor ymm3,ymm3,ymm6 vpxor ymm10,ymm10,ymm12 vmovdqa ymm12,YMMWORD[((192-128))+rax] vpaddd ymm0,ymm0,ymm15 vpslld ymm7,ymm1,5 vpandn ymm6,ymm2,ymm4 vpand ymm5,ymm2,ymm3 vmovdqa YMMWORD[(96-128)+rax],ymm14 vpaddd ymm0,ymm0,ymm14 vpxor ymm10,ymm10,YMMWORD[((384-256-128))+rbx] vpsrld ymm8,ymm1,27 vpxor ymm5,ymm5,ymm6 vpxor ymm10,ymm10,ymm12 vpslld ymm6,ymm2,30 vpor ymm7,ymm7,ymm8 vpaddd ymm0,ymm0,ymm5 vpsrld ymm9,ymm10,31 vpaddd ymm10,ymm10,ymm10 vpsrld ymm2,ymm2,2 vpaddd ymm0,ymm0,ymm7 vpor ymm10,ymm10,ymm9 vpor ymm2,ymm2,ymm6 vmovdqa ymm15,YMMWORD[rbp] vpxor ymm11,ymm11,ymm13 vmovdqa ymm13,YMMWORD[((224-128))+rax] vpslld ymm7,ymm0,5 vpaddd ymm4,ymm4,ymm15 vpxor ymm5,ymm3,ymm1 vmovdqa YMMWORD[(128-128)+rax],ymm10 vpaddd ymm4,ymm4,ymm10 vpxor ymm11,ymm11,YMMWORD[((416-256-128))+rbx] vpsrld ymm8,ymm0,27 vpxor ymm5,ymm5,ymm2 vpxor ymm11,ymm11,ymm13 vpslld ymm6,ymm1,30 vpor ymm7,ymm7,ymm8 vpaddd ymm4,ymm4,ymm5 vpsrld ymm9,ymm11,31 vpaddd ymm11,ymm11,ymm11 vpsrld ymm1,ymm1,2 vpaddd ymm4,ymm4,ymm7 vpor ymm11,ymm11,ymm9 vpor ymm1,ymm1,ymm6 vpxor ymm12,ymm12,ymm14 vmovdqa ymm14,YMMWORD[((256-256-128))+rbx] vpslld ymm7,ymm4,5 vpaddd ymm3,ymm3,ymm15 vpxor ymm5,ymm2,ymm0 vmovdqa YMMWORD[(160-128)+rax],ymm11 vpaddd ymm3,ymm3,ymm11 vpxor ymm12,ymm12,YMMWORD[((448-256-128))+rbx] vpsrld ymm8,ymm4,27 vpxor ymm5,ymm5,ymm1 vpxor ymm12,ymm12,ymm14 vpslld ymm6,ymm0,30 vpor ymm7,ymm7,ymm8 vpaddd ymm3,ymm3,ymm5 vpsrld ymm9,ymm12,31 vpaddd ymm12,ymm12,ymm12 vpsrld ymm0,ymm0,2 vpaddd ymm3,ymm3,ymm7 vpor ymm12,ymm12,ymm9 vpor ymm0,ymm0,ymm6 vpxor ymm13,ymm13,ymm10 vmovdqa ymm10,YMMWORD[((288-256-128))+rbx] vpslld ymm7,ymm3,5 vpaddd ymm2,ymm2,ymm15 vpxor ymm5,ymm1,ymm4 vmovdqa YMMWORD[(192-128)+rax],ymm12 vpaddd ymm2,ymm2,ymm12 vpxor ymm13,ymm13,YMMWORD[((480-256-128))+rbx] vpsrld ymm8,ymm3,27 vpxor ymm5,ymm5,ymm0 vpxor ymm13,ymm13,ymm10 vpslld ymm6,ymm4,30 vpor ymm7,ymm7,ymm8 vpaddd ymm2,ymm2,ymm5 vpsrld ymm9,ymm13,31 vpaddd ymm13,ymm13,ymm13 vpsrld ymm4,ymm4,2 vpaddd ymm2,ymm2,ymm7 vpor ymm13,ymm13,ymm9 vpor ymm4,ymm4,ymm6 vpxor ymm14,ymm14,ymm11 vmovdqa ymm11,YMMWORD[((320-256-128))+rbx] vpslld ymm7,ymm2,5 vpaddd ymm1,ymm1,ymm15 vpxor ymm5,ymm0,ymm3 vmovdqa YMMWORD[(224-128)+rax],ymm13 vpaddd ymm1,ymm1,ymm13 vpxor ymm14,ymm14,YMMWORD[((0-128))+rax] vpsrld ymm8,ymm2,27 vpxor ymm5,ymm5,ymm4 vpxor ymm14,ymm14,ymm11 vpslld ymm6,ymm3,30 vpor ymm7,ymm7,ymm8 vpaddd ymm1,ymm1,ymm5 vpsrld ymm9,ymm14,31 vpaddd ymm14,ymm14,ymm14 vpsrld ymm3,ymm3,2 vpaddd ymm1,ymm1,ymm7 vpor ymm14,ymm14,ymm9 vpor ymm3,ymm3,ymm6 vpxor ymm10,ymm10,ymm12 vmovdqa ymm12,YMMWORD[((352-256-128))+rbx] vpslld ymm7,ymm1,5 vpaddd ymm0,ymm0,ymm15 vpxor ymm5,ymm4,ymm2 vmovdqa YMMWORD[(256-256-128)+rbx],ymm14 vpaddd ymm0,ymm0,ymm14 vpxor ymm10,ymm10,YMMWORD[((32-128))+rax] vpsrld ymm8,ymm1,27 vpxor ymm5,ymm5,ymm3 vpxor ymm10,ymm10,ymm12 vpslld ymm6,ymm2,30 vpor ymm7,ymm7,ymm8 vpaddd ymm0,ymm0,ymm5 vpsrld ymm9,ymm10,31 vpaddd ymm10,ymm10,ymm10 vpsrld ymm2,ymm2,2 vpaddd ymm0,ymm0,ymm7 vpor ymm10,ymm10,ymm9 vpor ymm2,ymm2,ymm6 vpxor ymm11,ymm11,ymm13 vmovdqa ymm13,YMMWORD[((384-256-128))+rbx] vpslld ymm7,ymm0,5 vpaddd ymm4,ymm4,ymm15 vpxor ymm5,ymm3,ymm1 vmovdqa YMMWORD[(288-256-128)+rbx],ymm10 vpaddd ymm4,ymm4,ymm10 vpxor ymm11,ymm11,YMMWORD[((64-128))+rax] vpsrld ymm8,ymm0,27 vpxor ymm5,ymm5,ymm2 vpxor ymm11,ymm11,ymm13 vpslld ymm6,ymm1,30 vpor ymm7,ymm7,ymm8 vpaddd ymm4,ymm4,ymm5 vpsrld ymm9,ymm11,31 vpaddd ymm11,ymm11,ymm11 vpsrld ymm1,ymm1,2 vpaddd ymm4,ymm4,ymm7 vpor ymm11,ymm11,ymm9 vpor ymm1,ymm1,ymm6 vpxor ymm12,ymm12,ymm14 vmovdqa ymm14,YMMWORD[((416-256-128))+rbx] vpslld ymm7,ymm4,5 vpaddd ymm3,ymm3,ymm15 vpxor ymm5,ymm2,ymm0 vmovdqa YMMWORD[(320-256-128)+rbx],ymm11 vpaddd ymm3,ymm3,ymm11 vpxor ymm12,ymm12,YMMWORD[((96-128))+rax] vpsrld ymm8,ymm4,27 vpxor ymm5,ymm5,ymm1 vpxor ymm12,ymm12,ymm14 vpslld ymm6,ymm0,30 vpor ymm7,ymm7,ymm8 vpaddd ymm3,ymm3,ymm5 vpsrld ymm9,ymm12,31 vpaddd ymm12,ymm12,ymm12 vpsrld ymm0,ymm0,2 vpaddd ymm3,ymm3,ymm7 vpor ymm12,ymm12,ymm9 vpor ymm0,ymm0,ymm6 vpxor ymm13,ymm13,ymm10 vmovdqa ymm10,YMMWORD[((448-256-128))+rbx] vpslld ymm7,ymm3,5 vpaddd ymm2,ymm2,ymm15 vpxor ymm5,ymm1,ymm4 vmovdqa YMMWORD[(352-256-128)+rbx],ymm12 vpaddd ymm2,ymm2,ymm12 vpxor ymm13,ymm13,YMMWORD[((128-128))+rax] vpsrld ymm8,ymm3,27 vpxor ymm5,ymm5,ymm0 vpxor ymm13,ymm13,ymm10 vpslld ymm6,ymm4,30 vpor ymm7,ymm7,ymm8 vpaddd ymm2,ymm2,ymm5 vpsrld ymm9,ymm13,31 vpaddd ymm13,ymm13,ymm13 vpsrld ymm4,ymm4,2 vpaddd ymm2,ymm2,ymm7 vpor ymm13,ymm13,ymm9 vpor ymm4,ymm4,ymm6 vpxor ymm14,ymm14,ymm11 vmovdqa ymm11,YMMWORD[((480-256-128))+rbx] vpslld ymm7,ymm2,5 vpaddd ymm1,ymm1,ymm15 vpxor ymm5,ymm0,ymm3 vmovdqa YMMWORD[(384-256-128)+rbx],ymm13 vpaddd ymm1,ymm1,ymm13 vpxor ymm14,ymm14,YMMWORD[((160-128))+rax] vpsrld ymm8,ymm2,27 vpxor ymm5,ymm5,ymm4 vpxor ymm14,ymm14,ymm11 vpslld ymm6,ymm3,30 vpor ymm7,ymm7,ymm8 vpaddd ymm1,ymm1,ymm5 vpsrld ymm9,ymm14,31 vpaddd ymm14,ymm14,ymm14 vpsrld ymm3,ymm3,2 vpaddd ymm1,ymm1,ymm7 vpor ymm14,ymm14,ymm9 vpor ymm3,ymm3,ymm6 vpxor ymm10,ymm10,ymm12 vmovdqa ymm12,YMMWORD[((0-128))+rax] vpslld ymm7,ymm1,5 vpaddd ymm0,ymm0,ymm15 vpxor ymm5,ymm4,ymm2 vmovdqa YMMWORD[(416-256-128)+rbx],ymm14 vpaddd ymm0,ymm0,ymm14 vpxor ymm10,ymm10,YMMWORD[((192-128))+rax] vpsrld ymm8,ymm1,27 vpxor ymm5,ymm5,ymm3 vpxor ymm10,ymm10,ymm12 vpslld ymm6,ymm2,30 vpor ymm7,ymm7,ymm8 vpaddd ymm0,ymm0,ymm5 vpsrld ymm9,ymm10,31 vpaddd ymm10,ymm10,ymm10 vpsrld ymm2,ymm2,2 vpaddd ymm0,ymm0,ymm7 vpor ymm10,ymm10,ymm9 vpor ymm2,ymm2,ymm6 vpxor ymm11,ymm11,ymm13 vmovdqa ymm13,YMMWORD[((32-128))+rax] vpslld ymm7,ymm0,5 vpaddd ymm4,ymm4,ymm15 vpxor ymm5,ymm3,ymm1 vmovdqa YMMWORD[(448-256-128)+rbx],ymm10 vpaddd ymm4,ymm4,ymm10 vpxor ymm11,ymm11,YMMWORD[((224-128))+rax] vpsrld ymm8,ymm0,27 vpxor ymm5,ymm5,ymm2 vpxor ymm11,ymm11,ymm13 vpslld ymm6,ymm1,30 vpor ymm7,ymm7,ymm8 vpaddd ymm4,ymm4,ymm5 vpsrld ymm9,ymm11,31 vpaddd ymm11,ymm11,ymm11 vpsrld ymm1,ymm1,2 vpaddd ymm4,ymm4,ymm7 vpor ymm11,ymm11,ymm9 vpor ymm1,ymm1,ymm6 vpxor ymm12,ymm12,ymm14 vmovdqa ymm14,YMMWORD[((64-128))+rax] vpslld ymm7,ymm4,5 vpaddd ymm3,ymm3,ymm15 vpxor ymm5,ymm2,ymm0 vmovdqa YMMWORD[(480-256-128)+rbx],ymm11 vpaddd ymm3,ymm3,ymm11 vpxor ymm12,ymm12,YMMWORD[((256-256-128))+rbx] vpsrld ymm8,ymm4,27 vpxor ymm5,ymm5,ymm1 vpxor ymm12,ymm12,ymm14 vpslld ymm6,ymm0,30 vpor ymm7,ymm7,ymm8 vpaddd ymm3,ymm3,ymm5 vpsrld ymm9,ymm12,31 vpaddd ymm12,ymm12,ymm12 vpsrld ymm0,ymm0,2 vpaddd ymm3,ymm3,ymm7 vpor ymm12,ymm12,ymm9 vpor ymm0,ymm0,ymm6 vpxor ymm13,ymm13,ymm10 vmovdqa ymm10,YMMWORD[((96-128))+rax] vpslld ymm7,ymm3,5 vpaddd ymm2,ymm2,ymm15 vpxor ymm5,ymm1,ymm4 vmovdqa YMMWORD[(0-128)+rax],ymm12 vpaddd ymm2,ymm2,ymm12 vpxor ymm13,ymm13,YMMWORD[((288-256-128))+rbx] vpsrld ymm8,ymm3,27 vpxor ymm5,ymm5,ymm0 vpxor ymm13,ymm13,ymm10 vpslld ymm6,ymm4,30 vpor ymm7,ymm7,ymm8 vpaddd ymm2,ymm2,ymm5 vpsrld ymm9,ymm13,31 vpaddd ymm13,ymm13,ymm13 vpsrld ymm4,ymm4,2 vpaddd ymm2,ymm2,ymm7 vpor ymm13,ymm13,ymm9 vpor ymm4,ymm4,ymm6 vpxor ymm14,ymm14,ymm11 vmovdqa ymm11,YMMWORD[((128-128))+rax] vpslld ymm7,ymm2,5 vpaddd ymm1,ymm1,ymm15 vpxor ymm5,ymm0,ymm3 vmovdqa YMMWORD[(32-128)+rax],ymm13 vpaddd ymm1,ymm1,ymm13 vpxor ymm14,ymm14,YMMWORD[((320-256-128))+rbx] vpsrld ymm8,ymm2,27 vpxor ymm5,ymm5,ymm4 vpxor ymm14,ymm14,ymm11 vpslld ymm6,ymm3,30 vpor ymm7,ymm7,ymm8 vpaddd ymm1,ymm1,ymm5 vpsrld ymm9,ymm14,31 vpaddd ymm14,ymm14,ymm14 vpsrld ymm3,ymm3,2 vpaddd ymm1,ymm1,ymm7 vpor ymm14,ymm14,ymm9 vpor ymm3,ymm3,ymm6 vpxor ymm10,ymm10,ymm12 vmovdqa ymm12,YMMWORD[((160-128))+rax] vpslld ymm7,ymm1,5 vpaddd ymm0,ymm0,ymm15 vpxor ymm5,ymm4,ymm2 vmovdqa YMMWORD[(64-128)+rax],ymm14 vpaddd ymm0,ymm0,ymm14 vpxor ymm10,ymm10,YMMWORD[((352-256-128))+rbx] vpsrld ymm8,ymm1,27 vpxor ymm5,ymm5,ymm3 vpxor ymm10,ymm10,ymm12 vpslld ymm6,ymm2,30 vpor ymm7,ymm7,ymm8 vpaddd ymm0,ymm0,ymm5 vpsrld ymm9,ymm10,31 vpaddd ymm10,ymm10,ymm10 vpsrld ymm2,ymm2,2 vpaddd ymm0,ymm0,ymm7 vpor ymm10,ymm10,ymm9 vpor ymm2,ymm2,ymm6 vpxor ymm11,ymm11,ymm13 vmovdqa ymm13,YMMWORD[((192-128))+rax] vpslld ymm7,ymm0,5 vpaddd ymm4,ymm4,ymm15 vpxor ymm5,ymm3,ymm1 vmovdqa YMMWORD[(96-128)+rax],ymm10 vpaddd ymm4,ymm4,ymm10 vpxor ymm11,ymm11,YMMWORD[((384-256-128))+rbx] vpsrld ymm8,ymm0,27 vpxor ymm5,ymm5,ymm2 vpxor ymm11,ymm11,ymm13 vpslld ymm6,ymm1,30 vpor ymm7,ymm7,ymm8 vpaddd ymm4,ymm4,ymm5 vpsrld ymm9,ymm11,31 vpaddd ymm11,ymm11,ymm11 vpsrld ymm1,ymm1,2 vpaddd ymm4,ymm4,ymm7 vpor ymm11,ymm11,ymm9 vpor ymm1,ymm1,ymm6 vpxor ymm12,ymm12,ymm14 vmovdqa ymm14,YMMWORD[((224-128))+rax] vpslld ymm7,ymm4,5 vpaddd ymm3,ymm3,ymm15 vpxor ymm5,ymm2,ymm0 vmovdqa YMMWORD[(128-128)+rax],ymm11 vpaddd ymm3,ymm3,ymm11 vpxor ymm12,ymm12,YMMWORD[((416-256-128))+rbx] vpsrld ymm8,ymm4,27 vpxor ymm5,ymm5,ymm1 vpxor ymm12,ymm12,ymm14 vpslld ymm6,ymm0,30 vpor ymm7,ymm7,ymm8 vpaddd ymm3,ymm3,ymm5 vpsrld ymm9,ymm12,31 vpaddd ymm12,ymm12,ymm12 vpsrld ymm0,ymm0,2 vpaddd ymm3,ymm3,ymm7 vpor ymm12,ymm12,ymm9 vpor ymm0,ymm0,ymm6 vpxor ymm13,ymm13,ymm10 vmovdqa ymm10,YMMWORD[((256-256-128))+rbx] vpslld ymm7,ymm3,5 vpaddd ymm2,ymm2,ymm15 vpxor ymm5,ymm1,ymm4 vmovdqa YMMWORD[(160-128)+rax],ymm12 vpaddd ymm2,ymm2,ymm12 vpxor ymm13,ymm13,YMMWORD[((448-256-128))+rbx] vpsrld ymm8,ymm3,27 vpxor ymm5,ymm5,ymm0 vpxor ymm13,ymm13,ymm10 vpslld ymm6,ymm4,30 vpor ymm7,ymm7,ymm8 vpaddd ymm2,ymm2,ymm5 vpsrld ymm9,ymm13,31 vpaddd ymm13,ymm13,ymm13 vpsrld ymm4,ymm4,2 vpaddd ymm2,ymm2,ymm7 vpor ymm13,ymm13,ymm9 vpor ymm4,ymm4,ymm6 vpxor ymm14,ymm14,ymm11 vmovdqa ymm11,YMMWORD[((288-256-128))+rbx] vpslld ymm7,ymm2,5 vpaddd ymm1,ymm1,ymm15 vpxor ymm5,ymm0,ymm3 vmovdqa YMMWORD[(192-128)+rax],ymm13 vpaddd ymm1,ymm1,ymm13 vpxor ymm14,ymm14,YMMWORD[((480-256-128))+rbx] vpsrld ymm8,ymm2,27 vpxor ymm5,ymm5,ymm4 vpxor ymm14,ymm14,ymm11 vpslld ymm6,ymm3,30 vpor ymm7,ymm7,ymm8 vpaddd ymm1,ymm1,ymm5 vpsrld ymm9,ymm14,31 vpaddd ymm14,ymm14,ymm14 vpsrld ymm3,ymm3,2 vpaddd ymm1,ymm1,ymm7 vpor ymm14,ymm14,ymm9 vpor ymm3,ymm3,ymm6 vpxor ymm10,ymm10,ymm12 vmovdqa ymm12,YMMWORD[((320-256-128))+rbx] vpslld ymm7,ymm1,5 vpaddd ymm0,ymm0,ymm15 vpxor ymm5,ymm4,ymm2 vmovdqa YMMWORD[(224-128)+rax],ymm14 vpaddd ymm0,ymm0,ymm14 vpxor ymm10,ymm10,YMMWORD[((0-128))+rax] vpsrld ymm8,ymm1,27 vpxor ymm5,ymm5,ymm3 vpxor ymm10,ymm10,ymm12 vpslld ymm6,ymm2,30 vpor ymm7,ymm7,ymm8 vpaddd ymm0,ymm0,ymm5 vpsrld ymm9,ymm10,31 vpaddd ymm10,ymm10,ymm10 vpsrld ymm2,ymm2,2 vpaddd ymm0,ymm0,ymm7 vpor ymm10,ymm10,ymm9 vpor ymm2,ymm2,ymm6 vmovdqa ymm15,YMMWORD[32+rbp] vpxor ymm11,ymm11,ymm13 vmovdqa ymm13,YMMWORD[((352-256-128))+rbx] vpaddd ymm4,ymm4,ymm15 vpslld ymm7,ymm0,5 vpand ymm6,ymm3,ymm2 vpxor ymm11,ymm11,YMMWORD[((32-128))+rax] vpaddd ymm4,ymm4,ymm6 vpsrld ymm8,ymm0,27 vpxor ymm5,ymm3,ymm2 vpxor ymm11,ymm11,ymm13 vmovdqu YMMWORD[(256-256-128)+rbx],ymm10 vpaddd ymm4,ymm4,ymm10 vpor ymm7,ymm7,ymm8 vpsrld ymm9,ymm11,31 vpand ymm5,ymm5,ymm1 vpaddd ymm11,ymm11,ymm11 vpslld ymm6,ymm1,30 vpaddd ymm4,ymm4,ymm5 vpsrld ymm1,ymm1,2 vpaddd ymm4,ymm4,ymm7 vpor ymm11,ymm11,ymm9 vpor ymm1,ymm1,ymm6 vpxor ymm12,ymm12,ymm14 vmovdqa ymm14,YMMWORD[((384-256-128))+rbx] vpaddd ymm3,ymm3,ymm15 vpslld ymm7,ymm4,5 vpand ymm6,ymm2,ymm1 vpxor ymm12,ymm12,YMMWORD[((64-128))+rax] vpaddd ymm3,ymm3,ymm6 vpsrld ymm8,ymm4,27 vpxor ymm5,ymm2,ymm1 vpxor ymm12,ymm12,ymm14 vmovdqu YMMWORD[(288-256-128)+rbx],ymm11 vpaddd ymm3,ymm3,ymm11 vpor ymm7,ymm7,ymm8 vpsrld ymm9,ymm12,31 vpand ymm5,ymm5,ymm0 vpaddd ymm12,ymm12,ymm12 vpslld ymm6,ymm0,30 vpaddd ymm3,ymm3,ymm5 vpsrld ymm0,ymm0,2 vpaddd ymm3,ymm3,ymm7 vpor ymm12,ymm12,ymm9 vpor ymm0,ymm0,ymm6 vpxor ymm13,ymm13,ymm10 vmovdqa ymm10,YMMWORD[((416-256-128))+rbx] vpaddd ymm2,ymm2,ymm15 vpslld ymm7,ymm3,5 vpand ymm6,ymm1,ymm0 vpxor ymm13,ymm13,YMMWORD[((96-128))+rax] vpaddd ymm2,ymm2,ymm6 vpsrld ymm8,ymm3,27 vpxor ymm5,ymm1,ymm0 vpxor ymm13,ymm13,ymm10 vmovdqu YMMWORD[(320-256-128)+rbx],ymm12 vpaddd ymm2,ymm2,ymm12 vpor ymm7,ymm7,ymm8 vpsrld ymm9,ymm13,31 vpand ymm5,ymm5,ymm4 vpaddd ymm13,ymm13,ymm13 vpslld ymm6,ymm4,30 vpaddd ymm2,ymm2,ymm5 vpsrld ymm4,ymm4,2 vpaddd ymm2,ymm2,ymm7 vpor ymm13,ymm13,ymm9 vpor ymm4,ymm4,ymm6 vpxor ymm14,ymm14,ymm11 vmovdqa ymm11,YMMWORD[((448-256-128))+rbx] vpaddd ymm1,ymm1,ymm15 vpslld ymm7,ymm2,5 vpand ymm6,ymm0,ymm4 vpxor ymm14,ymm14,YMMWORD[((128-128))+rax] vpaddd ymm1,ymm1,ymm6 vpsrld ymm8,ymm2,27 vpxor ymm5,ymm0,ymm4 vpxor ymm14,ymm14,ymm11 vmovdqu YMMWORD[(352-256-128)+rbx],ymm13 vpaddd ymm1,ymm1,ymm13 vpor ymm7,ymm7,ymm8 vpsrld ymm9,ymm14,31 vpand ymm5,ymm5,ymm3 vpaddd ymm14,ymm14,ymm14 vpslld ymm6,ymm3,30 vpaddd ymm1,ymm1,ymm5 vpsrld ymm3,ymm3,2 vpaddd ymm1,ymm1,ymm7 vpor ymm14,ymm14,ymm9 vpor ymm3,ymm3,ymm6 vpxor ymm10,ymm10,ymm12 vmovdqa ymm12,YMMWORD[((480-256-128))+rbx] vpaddd ymm0,ymm0,ymm15 vpslld ymm7,ymm1,5 vpand ymm6,ymm4,ymm3 vpxor ymm10,ymm10,YMMWORD[((160-128))+rax] vpaddd ymm0,ymm0,ymm6 vpsrld ymm8,ymm1,27 vpxor ymm5,ymm4,ymm3 vpxor ymm10,ymm10,ymm12 vmovdqu YMMWORD[(384-256-128)+rbx],ymm14 vpaddd ymm0,ymm0,ymm14 vpor ymm7,ymm7,ymm8 vpsrld ymm9,ymm10,31 vpand ymm5,ymm5,ymm2 vpaddd ymm10,ymm10,ymm10 vpslld ymm6,ymm2,30 vpaddd ymm0,ymm0,ymm5 vpsrld ymm2,ymm2,2 vpaddd ymm0,ymm0,ymm7 vpor ymm10,ymm10,ymm9 vpor ymm2,ymm2,ymm6 vpxor ymm11,ymm11,ymm13 vmovdqa ymm13,YMMWORD[((0-128))+rax] vpaddd ymm4,ymm4,ymm15 vpslld ymm7,ymm0,5 vpand ymm6,ymm3,ymm2 vpxor ymm11,ymm11,YMMWORD[((192-128))+rax] vpaddd ymm4,ymm4,ymm6 vpsrld ymm8,ymm0,27 vpxor ymm5,ymm3,ymm2 vpxor ymm11,ymm11,ymm13 vmovdqu YMMWORD[(416-256-128)+rbx],ymm10 vpaddd ymm4,ymm4,ymm10 vpor ymm7,ymm7,ymm8 vpsrld ymm9,ymm11,31 vpand ymm5,ymm5,ymm1 vpaddd ymm11,ymm11,ymm11 vpslld ymm6,ymm1,30 vpaddd ymm4,ymm4,ymm5 vpsrld ymm1,ymm1,2 vpaddd ymm4,ymm4,ymm7 vpor ymm11,ymm11,ymm9 vpor ymm1,ymm1,ymm6 vpxor ymm12,ymm12,ymm14 vmovdqa ymm14,YMMWORD[((32-128))+rax] vpaddd ymm3,ymm3,ymm15 vpslld ymm7,ymm4,5 vpand ymm6,ymm2,ymm1 vpxor ymm12,ymm12,YMMWORD[((224-128))+rax] vpaddd ymm3,ymm3,ymm6 vpsrld ymm8,ymm4,27 vpxor ymm5,ymm2,ymm1 vpxor ymm12,ymm12,ymm14 vmovdqu YMMWORD[(448-256-128)+rbx],ymm11 vpaddd ymm3,ymm3,ymm11 vpor ymm7,ymm7,ymm8 vpsrld ymm9,ymm12,31 vpand ymm5,ymm5,ymm0 vpaddd ymm12,ymm12,ymm12 vpslld ymm6,ymm0,30 vpaddd ymm3,ymm3,ymm5 vpsrld ymm0,ymm0,2 vpaddd ymm3,ymm3,ymm7 vpor ymm12,ymm12,ymm9 vpor ymm0,ymm0,ymm6 vpxor ymm13,ymm13,ymm10 vmovdqa ymm10,YMMWORD[((64-128))+rax] vpaddd ymm2,ymm2,ymm15 vpslld ymm7,ymm3,5 vpand ymm6,ymm1,ymm0 vpxor ymm13,ymm13,YMMWORD[((256-256-128))+rbx] vpaddd ymm2,ymm2,ymm6 vpsrld ymm8,ymm3,27 vpxor ymm5,ymm1,ymm0 vpxor ymm13,ymm13,ymm10 vmovdqu YMMWORD[(480-256-128)+rbx],ymm12 vpaddd ymm2,ymm2,ymm12 vpor ymm7,ymm7,ymm8 vpsrld ymm9,ymm13,31 vpand ymm5,ymm5,ymm4 vpaddd ymm13,ymm13,ymm13 vpslld ymm6,ymm4,30 vpaddd ymm2,ymm2,ymm5 vpsrld ymm4,ymm4,2 vpaddd ymm2,ymm2,ymm7 vpor ymm13,ymm13,ymm9 vpor ymm4,ymm4,ymm6 vpxor ymm14,ymm14,ymm11 vmovdqa ymm11,YMMWORD[((96-128))+rax] vpaddd ymm1,ymm1,ymm15 vpslld ymm7,ymm2,5 vpand ymm6,ymm0,ymm4 vpxor ymm14,ymm14,YMMWORD[((288-256-128))+rbx] vpaddd ymm1,ymm1,ymm6 vpsrld ymm8,ymm2,27 vpxor ymm5,ymm0,ymm4 vpxor ymm14,ymm14,ymm11 vmovdqu YMMWORD[(0-128)+rax],ymm13 vpaddd ymm1,ymm1,ymm13 vpor ymm7,ymm7,ymm8 vpsrld ymm9,ymm14,31 vpand ymm5,ymm5,ymm3 vpaddd ymm14,ymm14,ymm14 vpslld ymm6,ymm3,30 vpaddd ymm1,ymm1,ymm5 vpsrld ymm3,ymm3,2 vpaddd ymm1,ymm1,ymm7 vpor ymm14,ymm14,ymm9 vpor ymm3,ymm3,ymm6 vpxor ymm10,ymm10,ymm12 vmovdqa ymm12,YMMWORD[((128-128))+rax] vpaddd ymm0,ymm0,ymm15 vpslld ymm7,ymm1,5 vpand ymm6,ymm4,ymm3 vpxor ymm10,ymm10,YMMWORD[((320-256-128))+rbx] vpaddd ymm0,ymm0,ymm6 vpsrld ymm8,ymm1,27 vpxor ymm5,ymm4,ymm3 vpxor ymm10,ymm10,ymm12 vmovdqu YMMWORD[(32-128)+rax],ymm14 vpaddd ymm0,ymm0,ymm14 vpor ymm7,ymm7,ymm8 vpsrld ymm9,ymm10,31 vpand ymm5,ymm5,ymm2 vpaddd ymm10,ymm10,ymm10 vpslld ymm6,ymm2,30 vpaddd ymm0,ymm0,ymm5 vpsrld ymm2,ymm2,2 vpaddd ymm0,ymm0,ymm7 vpor ymm10,ymm10,ymm9 vpor ymm2,ymm2,ymm6 vpxor ymm11,ymm11,ymm13 vmovdqa ymm13,YMMWORD[((160-128))+rax] vpaddd ymm4,ymm4,ymm15 vpslld ymm7,ymm0,5 vpand ymm6,ymm3,ymm2 vpxor ymm11,ymm11,YMMWORD[((352-256-128))+rbx] vpaddd ymm4,ymm4,ymm6 vpsrld ymm8,ymm0,27 vpxor ymm5,ymm3,ymm2 vpxor ymm11,ymm11,ymm13 vmovdqu YMMWORD[(64-128)+rax],ymm10 vpaddd ymm4,ymm4,ymm10 vpor ymm7,ymm7,ymm8 vpsrld ymm9,ymm11,31 vpand ymm5,ymm5,ymm1 vpaddd ymm11,ymm11,ymm11 vpslld ymm6,ymm1,30 vpaddd ymm4,ymm4,ymm5 vpsrld ymm1,ymm1,2 vpaddd ymm4,ymm4,ymm7 vpor ymm11,ymm11,ymm9 vpor ymm1,ymm1,ymm6 vpxor ymm12,ymm12,ymm14 vmovdqa ymm14,YMMWORD[((192-128))+rax] vpaddd ymm3,ymm3,ymm15 vpslld ymm7,ymm4,5 vpand ymm6,ymm2,ymm1 vpxor ymm12,ymm12,YMMWORD[((384-256-128))+rbx] vpaddd ymm3,ymm3,ymm6 vpsrld ymm8,ymm4,27 vpxor ymm5,ymm2,ymm1 vpxor ymm12,ymm12,ymm14 vmovdqu YMMWORD[(96-128)+rax],ymm11 vpaddd ymm3,ymm3,ymm11 vpor ymm7,ymm7,ymm8 vpsrld ymm9,ymm12,31 vpand ymm5,ymm5,ymm0 vpaddd ymm12,ymm12,ymm12 vpslld ymm6,ymm0,30 vpaddd ymm3,ymm3,ymm5 vpsrld ymm0,ymm0,2 vpaddd ymm3,ymm3,ymm7 vpor ymm12,ymm12,ymm9 vpor ymm0,ymm0,ymm6 vpxor ymm13,ymm13,ymm10 vmovdqa ymm10,YMMWORD[((224-128))+rax] vpaddd ymm2,ymm2,ymm15 vpslld ymm7,ymm3,5 vpand ymm6,ymm1,ymm0 vpxor ymm13,ymm13,YMMWORD[((416-256-128))+rbx] vpaddd ymm2,ymm2,ymm6 vpsrld ymm8,ymm3,27 vpxor ymm5,ymm1,ymm0 vpxor ymm13,ymm13,ymm10 vmovdqu YMMWORD[(128-128)+rax],ymm12 vpaddd ymm2,ymm2,ymm12 vpor ymm7,ymm7,ymm8 vpsrld ymm9,ymm13,31 vpand ymm5,ymm5,ymm4 vpaddd ymm13,ymm13,ymm13 vpslld ymm6,ymm4,30 vpaddd ymm2,ymm2,ymm5 vpsrld ymm4,ymm4,2 vpaddd ymm2,ymm2,ymm7 vpor ymm13,ymm13,ymm9 vpor ymm4,ymm4,ymm6 vpxor ymm14,ymm14,ymm11 vmovdqa ymm11,YMMWORD[((256-256-128))+rbx] vpaddd ymm1,ymm1,ymm15 vpslld ymm7,ymm2,5 vpand ymm6,ymm0,ymm4 vpxor ymm14,ymm14,YMMWORD[((448-256-128))+rbx] vpaddd ymm1,ymm1,ymm6 vpsrld ymm8,ymm2,27 vpxor ymm5,ymm0,ymm4 vpxor ymm14,ymm14,ymm11 vmovdqu YMMWORD[(160-128)+rax],ymm13 vpaddd ymm1,ymm1,ymm13 vpor ymm7,ymm7,ymm8 vpsrld ymm9,ymm14,31 vpand ymm5,ymm5,ymm3 vpaddd ymm14,ymm14,ymm14 vpslld ymm6,ymm3,30 vpaddd ymm1,ymm1,ymm5 vpsrld ymm3,ymm3,2 vpaddd ymm1,ymm1,ymm7 vpor ymm14,ymm14,ymm9 vpor ymm3,ymm3,ymm6 vpxor ymm10,ymm10,ymm12 vmovdqa ymm12,YMMWORD[((288-256-128))+rbx] vpaddd ymm0,ymm0,ymm15 vpslld ymm7,ymm1,5 vpand ymm6,ymm4,ymm3 vpxor ymm10,ymm10,YMMWORD[((480-256-128))+rbx] vpaddd ymm0,ymm0,ymm6 vpsrld ymm8,ymm1,27 vpxor ymm5,ymm4,ymm3 vpxor ymm10,ymm10,ymm12 vmovdqu YMMWORD[(192-128)+rax],ymm14 vpaddd ymm0,ymm0,ymm14 vpor ymm7,ymm7,ymm8 vpsrld ymm9,ymm10,31 vpand ymm5,ymm5,ymm2 vpaddd ymm10,ymm10,ymm10 vpslld ymm6,ymm2,30 vpaddd ymm0,ymm0,ymm5 vpsrld ymm2,ymm2,2 vpaddd ymm0,ymm0,ymm7 vpor ymm10,ymm10,ymm9 vpor ymm2,ymm2,ymm6 vpxor ymm11,ymm11,ymm13 vmovdqa ymm13,YMMWORD[((320-256-128))+rbx] vpaddd ymm4,ymm4,ymm15 vpslld ymm7,ymm0,5 vpand ymm6,ymm3,ymm2 vpxor ymm11,ymm11,YMMWORD[((0-128))+rax] vpaddd ymm4,ymm4,ymm6 vpsrld ymm8,ymm0,27 vpxor ymm5,ymm3,ymm2 vpxor ymm11,ymm11,ymm13 vmovdqu YMMWORD[(224-128)+rax],ymm10 vpaddd ymm4,ymm4,ymm10 vpor ymm7,ymm7,ymm8 vpsrld ymm9,ymm11,31 vpand ymm5,ymm5,ymm1 vpaddd ymm11,ymm11,ymm11 vpslld ymm6,ymm1,30 vpaddd ymm4,ymm4,ymm5 vpsrld ymm1,ymm1,2 vpaddd ymm4,ymm4,ymm7 vpor ymm11,ymm11,ymm9 vpor ymm1,ymm1,ymm6 vpxor ymm12,ymm12,ymm14 vmovdqa ymm14,YMMWORD[((352-256-128))+rbx] vpaddd ymm3,ymm3,ymm15 vpslld ymm7,ymm4,5 vpand ymm6,ymm2,ymm1 vpxor ymm12,ymm12,YMMWORD[((32-128))+rax] vpaddd ymm3,ymm3,ymm6 vpsrld ymm8,ymm4,27 vpxor ymm5,ymm2,ymm1 vpxor ymm12,ymm12,ymm14 vmovdqu YMMWORD[(256-256-128)+rbx],ymm11 vpaddd ymm3,ymm3,ymm11 vpor ymm7,ymm7,ymm8 vpsrld ymm9,ymm12,31 vpand ymm5,ymm5,ymm0 vpaddd ymm12,ymm12,ymm12 vpslld ymm6,ymm0,30 vpaddd ymm3,ymm3,ymm5 vpsrld ymm0,ymm0,2 vpaddd ymm3,ymm3,ymm7 vpor ymm12,ymm12,ymm9 vpor ymm0,ymm0,ymm6 vpxor ymm13,ymm13,ymm10 vmovdqa ymm10,YMMWORD[((384-256-128))+rbx] vpaddd ymm2,ymm2,ymm15 vpslld ymm7,ymm3,5 vpand ymm6,ymm1,ymm0 vpxor ymm13,ymm13,YMMWORD[((64-128))+rax] vpaddd ymm2,ymm2,ymm6 vpsrld ymm8,ymm3,27 vpxor ymm5,ymm1,ymm0 vpxor ymm13,ymm13,ymm10 vmovdqu YMMWORD[(288-256-128)+rbx],ymm12 vpaddd ymm2,ymm2,ymm12 vpor ymm7,ymm7,ymm8 vpsrld ymm9,ymm13,31 vpand ymm5,ymm5,ymm4 vpaddd ymm13,ymm13,ymm13 vpslld ymm6,ymm4,30 vpaddd ymm2,ymm2,ymm5 vpsrld ymm4,ymm4,2 vpaddd ymm2,ymm2,ymm7 vpor ymm13,ymm13,ymm9 vpor ymm4,ymm4,ymm6 vpxor ymm14,ymm14,ymm11 vmovdqa ymm11,YMMWORD[((416-256-128))+rbx] vpaddd ymm1,ymm1,ymm15 vpslld ymm7,ymm2,5 vpand ymm6,ymm0,ymm4 vpxor ymm14,ymm14,YMMWORD[((96-128))+rax] vpaddd ymm1,ymm1,ymm6 vpsrld ymm8,ymm2,27 vpxor ymm5,ymm0,ymm4 vpxor ymm14,ymm14,ymm11 vmovdqu YMMWORD[(320-256-128)+rbx],ymm13 vpaddd ymm1,ymm1,ymm13 vpor ymm7,ymm7,ymm8 vpsrld ymm9,ymm14,31 vpand ymm5,ymm5,ymm3 vpaddd ymm14,ymm14,ymm14 vpslld ymm6,ymm3,30 vpaddd ymm1,ymm1,ymm5 vpsrld ymm3,ymm3,2 vpaddd ymm1,ymm1,ymm7 vpor ymm14,ymm14,ymm9 vpor ymm3,ymm3,ymm6 vpxor ymm10,ymm10,ymm12 vmovdqa ymm12,YMMWORD[((448-256-128))+rbx] vpaddd ymm0,ymm0,ymm15 vpslld ymm7,ymm1,5 vpand ymm6,ymm4,ymm3 vpxor ymm10,ymm10,YMMWORD[((128-128))+rax] vpaddd ymm0,ymm0,ymm6 vpsrld ymm8,ymm1,27 vpxor ymm5,ymm4,ymm3 vpxor ymm10,ymm10,ymm12 vmovdqu YMMWORD[(352-256-128)+rbx],ymm14 vpaddd ymm0,ymm0,ymm14 vpor ymm7,ymm7,ymm8 vpsrld ymm9,ymm10,31 vpand ymm5,ymm5,ymm2 vpaddd ymm10,ymm10,ymm10 vpslld ymm6,ymm2,30 vpaddd ymm0,ymm0,ymm5 vpsrld ymm2,ymm2,2 vpaddd ymm0,ymm0,ymm7 vpor ymm10,ymm10,ymm9 vpor ymm2,ymm2,ymm6 vmovdqa ymm15,YMMWORD[64+rbp] vpxor ymm11,ymm11,ymm13 vmovdqa ymm13,YMMWORD[((480-256-128))+rbx] vpslld ymm7,ymm0,5 vpaddd ymm4,ymm4,ymm15 vpxor ymm5,ymm3,ymm1 vmovdqa YMMWORD[(384-256-128)+rbx],ymm10 vpaddd ymm4,ymm4,ymm10 vpxor ymm11,ymm11,YMMWORD[((160-128))+rax] vpsrld ymm8,ymm0,27 vpxor ymm5,ymm5,ymm2 vpxor ymm11,ymm11,ymm13 vpslld ymm6,ymm1,30 vpor ymm7,ymm7,ymm8 vpaddd ymm4,ymm4,ymm5 vpsrld ymm9,ymm11,31 vpaddd ymm11,ymm11,ymm11 vpsrld ymm1,ymm1,2 vpaddd ymm4,ymm4,ymm7 vpor ymm11,ymm11,ymm9 vpor ymm1,ymm1,ymm6 vpxor ymm12,ymm12,ymm14 vmovdqa ymm14,YMMWORD[((0-128))+rax] vpslld ymm7,ymm4,5 vpaddd ymm3,ymm3,ymm15 vpxor ymm5,ymm2,ymm0 vmovdqa YMMWORD[(416-256-128)+rbx],ymm11 vpaddd ymm3,ymm3,ymm11 vpxor ymm12,ymm12,YMMWORD[((192-128))+rax] vpsrld ymm8,ymm4,27 vpxor ymm5,ymm5,ymm1 vpxor ymm12,ymm12,ymm14 vpslld ymm6,ymm0,30 vpor ymm7,ymm7,ymm8 vpaddd ymm3,ymm3,ymm5 vpsrld ymm9,ymm12,31 vpaddd ymm12,ymm12,ymm12 vpsrld ymm0,ymm0,2 vpaddd ymm3,ymm3,ymm7 vpor ymm12,ymm12,ymm9 vpor ymm0,ymm0,ymm6 vpxor ymm13,ymm13,ymm10 vmovdqa ymm10,YMMWORD[((32-128))+rax] vpslld ymm7,ymm3,5 vpaddd ymm2,ymm2,ymm15 vpxor ymm5,ymm1,ymm4 vmovdqa YMMWORD[(448-256-128)+rbx],ymm12 vpaddd ymm2,ymm2,ymm12 vpxor ymm13,ymm13,YMMWORD[((224-128))+rax] vpsrld ymm8,ymm3,27 vpxor ymm5,ymm5,ymm0 vpxor ymm13,ymm13,ymm10 vpslld ymm6,ymm4,30 vpor ymm7,ymm7,ymm8 vpaddd ymm2,ymm2,ymm5 vpsrld ymm9,ymm13,31 vpaddd ymm13,ymm13,ymm13 vpsrld ymm4,ymm4,2 vpaddd ymm2,ymm2,ymm7 vpor ymm13,ymm13,ymm9 vpor ymm4,ymm4,ymm6 vpxor ymm14,ymm14,ymm11 vmovdqa ymm11,YMMWORD[((64-128))+rax] vpslld ymm7,ymm2,5 vpaddd ymm1,ymm1,ymm15 vpxor ymm5,ymm0,ymm3 vmovdqa YMMWORD[(480-256-128)+rbx],ymm13 vpaddd ymm1,ymm1,ymm13 vpxor ymm14,ymm14,YMMWORD[((256-256-128))+rbx] vpsrld ymm8,ymm2,27 vpxor ymm5,ymm5,ymm4 vpxor ymm14,ymm14,ymm11 vpslld ymm6,ymm3,30 vpor ymm7,ymm7,ymm8 vpaddd ymm1,ymm1,ymm5 vpsrld ymm9,ymm14,31 vpaddd ymm14,ymm14,ymm14 vpsrld ymm3,ymm3,2 vpaddd ymm1,ymm1,ymm7 vpor ymm14,ymm14,ymm9 vpor ymm3,ymm3,ymm6 vpxor ymm10,ymm10,ymm12 vmovdqa ymm12,YMMWORD[((96-128))+rax] vpslld ymm7,ymm1,5 vpaddd ymm0,ymm0,ymm15 vpxor ymm5,ymm4,ymm2 vmovdqa YMMWORD[(0-128)+rax],ymm14 vpaddd ymm0,ymm0,ymm14 vpxor ymm10,ymm10,YMMWORD[((288-256-128))+rbx] vpsrld ymm8,ymm1,27 vpxor ymm5,ymm5,ymm3 vpxor ymm10,ymm10,ymm12 vpslld ymm6,ymm2,30 vpor ymm7,ymm7,ymm8 vpaddd ymm0,ymm0,ymm5 vpsrld ymm9,ymm10,31 vpaddd ymm10,ymm10,ymm10 vpsrld ymm2,ymm2,2 vpaddd ymm0,ymm0,ymm7 vpor ymm10,ymm10,ymm9 vpor ymm2,ymm2,ymm6 vpxor ymm11,ymm11,ymm13 vmovdqa ymm13,YMMWORD[((128-128))+rax] vpslld ymm7,ymm0,5 vpaddd ymm4,ymm4,ymm15 vpxor ymm5,ymm3,ymm1 vmovdqa YMMWORD[(32-128)+rax],ymm10 vpaddd ymm4,ymm4,ymm10 vpxor ymm11,ymm11,YMMWORD[((320-256-128))+rbx] vpsrld ymm8,ymm0,27 vpxor ymm5,ymm5,ymm2 vpxor ymm11,ymm11,ymm13 vpslld ymm6,ymm1,30 vpor ymm7,ymm7,ymm8 vpaddd ymm4,ymm4,ymm5 vpsrld ymm9,ymm11,31 vpaddd ymm11,ymm11,ymm11 vpsrld ymm1,ymm1,2 vpaddd ymm4,ymm4,ymm7 vpor ymm11,ymm11,ymm9 vpor ymm1,ymm1,ymm6 vpxor ymm12,ymm12,ymm14 vmovdqa ymm14,YMMWORD[((160-128))+rax] vpslld ymm7,ymm4,5 vpaddd ymm3,ymm3,ymm15 vpxor ymm5,ymm2,ymm0 vmovdqa YMMWORD[(64-128)+rax],ymm11 vpaddd ymm3,ymm3,ymm11 vpxor ymm12,ymm12,YMMWORD[((352-256-128))+rbx] vpsrld ymm8,ymm4,27 vpxor ymm5,ymm5,ymm1 vpxor ymm12,ymm12,ymm14 vpslld ymm6,ymm0,30 vpor ymm7,ymm7,ymm8 vpaddd ymm3,ymm3,ymm5 vpsrld ymm9,ymm12,31 vpaddd ymm12,ymm12,ymm12 vpsrld ymm0,ymm0,2 vpaddd ymm3,ymm3,ymm7 vpor ymm12,ymm12,ymm9 vpor ymm0,ymm0,ymm6 vpxor ymm13,ymm13,ymm10 vmovdqa ymm10,YMMWORD[((192-128))+rax] vpslld ymm7,ymm3,5 vpaddd ymm2,ymm2,ymm15 vpxor ymm5,ymm1,ymm4 vmovdqa YMMWORD[(96-128)+rax],ymm12 vpaddd ymm2,ymm2,ymm12 vpxor ymm13,ymm13,YMMWORD[((384-256-128))+rbx] vpsrld ymm8,ymm3,27 vpxor ymm5,ymm5,ymm0 vpxor ymm13,ymm13,ymm10 vpslld ymm6,ymm4,30 vpor ymm7,ymm7,ymm8 vpaddd ymm2,ymm2,ymm5 vpsrld ymm9,ymm13,31 vpaddd ymm13,ymm13,ymm13 vpsrld ymm4,ymm4,2 vpaddd ymm2,ymm2,ymm7 vpor ymm13,ymm13,ymm9 vpor ymm4,ymm4,ymm6 vpxor ymm14,ymm14,ymm11 vmovdqa ymm11,YMMWORD[((224-128))+rax] vpslld ymm7,ymm2,5 vpaddd ymm1,ymm1,ymm15 vpxor ymm5,ymm0,ymm3 vmovdqa YMMWORD[(128-128)+rax],ymm13 vpaddd ymm1,ymm1,ymm13 vpxor ymm14,ymm14,YMMWORD[((416-256-128))+rbx] vpsrld ymm8,ymm2,27 vpxor ymm5,ymm5,ymm4 vpxor ymm14,ymm14,ymm11 vpslld ymm6,ymm3,30 vpor ymm7,ymm7,ymm8 vpaddd ymm1,ymm1,ymm5 vpsrld ymm9,ymm14,31 vpaddd ymm14,ymm14,ymm14 vpsrld ymm3,ymm3,2 vpaddd ymm1,ymm1,ymm7 vpor ymm14,ymm14,ymm9 vpor ymm3,ymm3,ymm6 vpxor ymm10,ymm10,ymm12 vmovdqa ymm12,YMMWORD[((256-256-128))+rbx] vpslld ymm7,ymm1,5 vpaddd ymm0,ymm0,ymm15 vpxor ymm5,ymm4,ymm2 vmovdqa YMMWORD[(160-128)+rax],ymm14 vpaddd ymm0,ymm0,ymm14 vpxor ymm10,ymm10,YMMWORD[((448-256-128))+rbx] vpsrld ymm8,ymm1,27 vpxor ymm5,ymm5,ymm3 vpxor ymm10,ymm10,ymm12 vpslld ymm6,ymm2,30 vpor ymm7,ymm7,ymm8 vpaddd ymm0,ymm0,ymm5 vpsrld ymm9,ymm10,31 vpaddd ymm10,ymm10,ymm10 vpsrld ymm2,ymm2,2 vpaddd ymm0,ymm0,ymm7 vpor ymm10,ymm10,ymm9 vpor ymm2,ymm2,ymm6 vpxor ymm11,ymm11,ymm13 vmovdqa ymm13,YMMWORD[((288-256-128))+rbx] vpslld ymm7,ymm0,5 vpaddd ymm4,ymm4,ymm15 vpxor ymm5,ymm3,ymm1 vmovdqa YMMWORD[(192-128)+rax],ymm10 vpaddd ymm4,ymm4,ymm10 vpxor ymm11,ymm11,YMMWORD[((480-256-128))+rbx] vpsrld ymm8,ymm0,27 vpxor ymm5,ymm5,ymm2 vpxor ymm11,ymm11,ymm13 vpslld ymm6,ymm1,30 vpor ymm7,ymm7,ymm8 vpaddd ymm4,ymm4,ymm5 vpsrld ymm9,ymm11,31 vpaddd ymm11,ymm11,ymm11 vpsrld ymm1,ymm1,2 vpaddd ymm4,ymm4,ymm7 vpor ymm11,ymm11,ymm9 vpor ymm1,ymm1,ymm6 vpxor ymm12,ymm12,ymm14 vmovdqa ymm14,YMMWORD[((320-256-128))+rbx] vpslld ymm7,ymm4,5 vpaddd ymm3,ymm3,ymm15 vpxor ymm5,ymm2,ymm0 vmovdqa YMMWORD[(224-128)+rax],ymm11 vpaddd ymm3,ymm3,ymm11 vpxor ymm12,ymm12,YMMWORD[((0-128))+rax] vpsrld ymm8,ymm4,27 vpxor ymm5,ymm5,ymm1 vpxor ymm12,ymm12,ymm14 vpslld ymm6,ymm0,30 vpor ymm7,ymm7,ymm8 vpaddd ymm3,ymm3,ymm5 vpsrld ymm9,ymm12,31 vpaddd ymm12,ymm12,ymm12 vpsrld ymm0,ymm0,2 vpaddd ymm3,ymm3,ymm7 vpor ymm12,ymm12,ymm9 vpor ymm0,ymm0,ymm6 vpxor ymm13,ymm13,ymm10 vmovdqa ymm10,YMMWORD[((352-256-128))+rbx] vpslld ymm7,ymm3,5 vpaddd ymm2,ymm2,ymm15 vpxor ymm5,ymm1,ymm4 vpaddd ymm2,ymm2,ymm12 vpxor ymm13,ymm13,YMMWORD[((32-128))+rax] vpsrld ymm8,ymm3,27 vpxor ymm5,ymm5,ymm0 vpxor ymm13,ymm13,ymm10 vpslld ymm6,ymm4,30 vpor ymm7,ymm7,ymm8 vpaddd ymm2,ymm2,ymm5 vpsrld ymm9,ymm13,31 vpaddd ymm13,ymm13,ymm13 vpsrld ymm4,ymm4,2 vpaddd ymm2,ymm2,ymm7 vpor ymm13,ymm13,ymm9 vpor ymm4,ymm4,ymm6 vpxor ymm14,ymm14,ymm11 vmovdqa ymm11,YMMWORD[((384-256-128))+rbx] vpslld ymm7,ymm2,5 vpaddd ymm1,ymm1,ymm15 vpxor ymm5,ymm0,ymm3 vpaddd ymm1,ymm1,ymm13 vpxor ymm14,ymm14,YMMWORD[((64-128))+rax] vpsrld ymm8,ymm2,27 vpxor ymm5,ymm5,ymm4 vpxor ymm14,ymm14,ymm11 vpslld ymm6,ymm3,30 vpor ymm7,ymm7,ymm8 vpaddd ymm1,ymm1,ymm5 vpsrld ymm9,ymm14,31 vpaddd ymm14,ymm14,ymm14 vpsrld ymm3,ymm3,2 vpaddd ymm1,ymm1,ymm7 vpor ymm14,ymm14,ymm9 vpor ymm3,ymm3,ymm6 vpxor ymm10,ymm10,ymm12 vmovdqa ymm12,YMMWORD[((416-256-128))+rbx] vpslld ymm7,ymm1,5 vpaddd ymm0,ymm0,ymm15 vpxor ymm5,ymm4,ymm2 vpaddd ymm0,ymm0,ymm14 vpxor ymm10,ymm10,YMMWORD[((96-128))+rax] vpsrld ymm8,ymm1,27 vpxor ymm5,ymm5,ymm3 vpxor ymm10,ymm10,ymm12 vpslld ymm6,ymm2,30 vpor ymm7,ymm7,ymm8 vpaddd ymm0,ymm0,ymm5 vpsrld ymm9,ymm10,31 vpaddd ymm10,ymm10,ymm10 vpsrld ymm2,ymm2,2 vpaddd ymm0,ymm0,ymm7 vpor ymm10,ymm10,ymm9 vpor ymm2,ymm2,ymm6 vpxor ymm11,ymm11,ymm13 vmovdqa ymm13,YMMWORD[((448-256-128))+rbx] vpslld ymm7,ymm0,5 vpaddd ymm4,ymm4,ymm15 vpxor ymm5,ymm3,ymm1 vpaddd ymm4,ymm4,ymm10 vpxor ymm11,ymm11,YMMWORD[((128-128))+rax] vpsrld ymm8,ymm0,27 vpxor ymm5,ymm5,ymm2 vpxor ymm11,ymm11,ymm13 vpslld ymm6,ymm1,30 vpor ymm7,ymm7,ymm8 vpaddd ymm4,ymm4,ymm5 vpsrld ymm9,ymm11,31 vpaddd ymm11,ymm11,ymm11 vpsrld ymm1,ymm1,2 vpaddd ymm4,ymm4,ymm7 vpor ymm11,ymm11,ymm9 vpor ymm1,ymm1,ymm6 vpxor ymm12,ymm12,ymm14 vmovdqa ymm14,YMMWORD[((480-256-128))+rbx] vpslld ymm7,ymm4,5 vpaddd ymm3,ymm3,ymm15 vpxor ymm5,ymm2,ymm0 vpaddd ymm3,ymm3,ymm11 vpxor ymm12,ymm12,YMMWORD[((160-128))+rax] vpsrld ymm8,ymm4,27 vpxor ymm5,ymm5,ymm1 vpxor ymm12,ymm12,ymm14 vpslld ymm6,ymm0,30 vpor ymm7,ymm7,ymm8 vpaddd ymm3,ymm3,ymm5 vpsrld ymm9,ymm12,31 vpaddd ymm12,ymm12,ymm12 vpsrld ymm0,ymm0,2 vpaddd ymm3,ymm3,ymm7 vpor ymm12,ymm12,ymm9 vpor ymm0,ymm0,ymm6 vpxor ymm13,ymm13,ymm10 vmovdqa ymm10,YMMWORD[((0-128))+rax] vpslld ymm7,ymm3,5 vpaddd ymm2,ymm2,ymm15 vpxor ymm5,ymm1,ymm4 vpaddd ymm2,ymm2,ymm12 vpxor ymm13,ymm13,YMMWORD[((192-128))+rax] vpsrld ymm8,ymm3,27 vpxor ymm5,ymm5,ymm0 vpxor ymm13,ymm13,ymm10 vpslld ymm6,ymm4,30 vpor ymm7,ymm7,ymm8 vpaddd ymm2,ymm2,ymm5 vpsrld ymm9,ymm13,31 vpaddd ymm13,ymm13,ymm13 vpsrld ymm4,ymm4,2 vpaddd ymm2,ymm2,ymm7 vpor ymm13,ymm13,ymm9 vpor ymm4,ymm4,ymm6 vpxor ymm14,ymm14,ymm11 vmovdqa ymm11,YMMWORD[((32-128))+rax] vpslld ymm7,ymm2,5 vpaddd ymm1,ymm1,ymm15 vpxor ymm5,ymm0,ymm3 vpaddd ymm1,ymm1,ymm13 vpxor ymm14,ymm14,YMMWORD[((224-128))+rax] vpsrld ymm8,ymm2,27 vpxor ymm5,ymm5,ymm4 vpxor ymm14,ymm14,ymm11 vpslld ymm6,ymm3,30 vpor ymm7,ymm7,ymm8 vpaddd ymm1,ymm1,ymm5 vpsrld ymm9,ymm14,31 vpaddd ymm14,ymm14,ymm14 vpsrld ymm3,ymm3,2 vpaddd ymm1,ymm1,ymm7 vpor ymm14,ymm14,ymm9 vpor ymm3,ymm3,ymm6 vpslld ymm7,ymm1,5 vpaddd ymm0,ymm0,ymm15 vpxor ymm5,ymm4,ymm2 vpsrld ymm8,ymm1,27 vpaddd ymm0,ymm0,ymm14 vpxor ymm5,ymm5,ymm3 vpslld ymm6,ymm2,30 vpor ymm7,ymm7,ymm8 vpaddd ymm0,ymm0,ymm5 vpsrld ymm2,ymm2,2 vpaddd ymm0,ymm0,ymm7 vpor ymm2,ymm2,ymm6 mov ecx,1 lea rbx,[512+rsp] cmp ecx,DWORD[rbx] cmovge r12,rbp cmp ecx,DWORD[4+rbx] cmovge r13,rbp cmp ecx,DWORD[8+rbx] cmovge r14,rbp cmp ecx,DWORD[12+rbx] cmovge r15,rbp cmp ecx,DWORD[16+rbx] cmovge r8,rbp cmp ecx,DWORD[20+rbx] cmovge r9,rbp cmp ecx,DWORD[24+rbx] cmovge r10,rbp cmp ecx,DWORD[28+rbx] cmovge r11,rbp vmovdqu ymm5,YMMWORD[rbx] vpxor ymm7,ymm7,ymm7 vmovdqa ymm6,ymm5 vpcmpgtd ymm6,ymm6,ymm7 vpaddd ymm5,ymm5,ymm6 vpand ymm0,ymm0,ymm6 vpand ymm1,ymm1,ymm6 vpaddd ymm0,ymm0,YMMWORD[rdi] vpand ymm2,ymm2,ymm6 vpaddd ymm1,ymm1,YMMWORD[32+rdi] vpand ymm3,ymm3,ymm6 vpaddd ymm2,ymm2,YMMWORD[64+rdi] vpand ymm4,ymm4,ymm6 vpaddd ymm3,ymm3,YMMWORD[96+rdi] vpaddd ymm4,ymm4,YMMWORD[128+rdi] vmovdqu YMMWORD[rdi],ymm0 vmovdqu YMMWORD[32+rdi],ymm1 vmovdqu YMMWORD[64+rdi],ymm2 vmovdqu YMMWORD[96+rdi],ymm3 vmovdqu YMMWORD[128+rdi],ymm4 vmovdqu YMMWORD[rbx],ymm5 lea rbx,[((256+128))+rsp] vmovdqu ymm9,YMMWORD[96+rbp] dec edx jnz NEAR $L$oop_avx2 $L$done_avx2: mov rax,QWORD[544+rsp] vzeroupper movaps xmm6,XMMWORD[((-216))+rax] movaps xmm7,XMMWORD[((-200))+rax] movaps xmm8,XMMWORD[((-184))+rax] movaps xmm9,XMMWORD[((-168))+rax] movaps xmm10,XMMWORD[((-152))+rax] movaps xmm11,XMMWORD[((-136))+rax] movaps xmm12,XMMWORD[((-120))+rax] movaps xmm13,XMMWORD[((-104))+rax] movaps xmm14,XMMWORD[((-88))+rax] movaps xmm15,XMMWORD[((-72))+rax] mov r15,QWORD[((-48))+rax] mov r14,QWORD[((-40))+rax] mov r13,QWORD[((-32))+rax] mov r12,QWORD[((-24))+rax] mov rbp,QWORD[((-16))+rax] mov rbx,QWORD[((-8))+rax] lea rsp,[rax] $L$epilogue_avx2: mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$SEH_end_sha1_multi_block_avx2: ALIGN 256 DD 0x5a827999,0x5a827999,0x5a827999,0x5a827999 DD 0x5a827999,0x5a827999,0x5a827999,0x5a827999 K_XX_XX: DD 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 DD 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 DD 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc DD 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc DD 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 DD 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f DB 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 DB 83,72,65,49,32,109,117,108,116,105,45,98,108,111,99,107 DB 32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120 DB 56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77 DB 83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110 DB 115,115,108,46,111,114,103,62,0 EXTERN __imp_RtlVirtualUnwind ALIGN 16 se_handler: push rsi push rdi push rbx push rbp push r12 push r13 push r14 push r15 pushfq sub rsp,64 mov rax,QWORD[120+r8] mov rbx,QWORD[248+r8] mov rsi,QWORD[8+r9] mov r11,QWORD[56+r9] mov r10d,DWORD[r11] lea r10,[r10*1+rsi] cmp rbx,r10 jb NEAR $L$in_prologue mov rax,QWORD[152+r8] mov r10d,DWORD[4+r11] lea r10,[r10*1+rsi] cmp rbx,r10 jae NEAR $L$in_prologue mov rax,QWORD[272+rax] mov rbx,QWORD[((-8))+rax] mov rbp,QWORD[((-16))+rax] mov QWORD[144+r8],rbx mov QWORD[160+r8],rbp lea rsi,[((-24-160))+rax] lea rdi,[512+r8] mov ecx,20 DD 0xa548f3fc $L$in_prologue: mov rdi,QWORD[8+rax] mov rsi,QWORD[16+rax] mov QWORD[152+r8],rax mov QWORD[168+r8],rsi mov QWORD[176+r8],rdi mov rdi,QWORD[40+r9] mov rsi,r8 mov ecx,154 DD 0xa548f3fc mov rsi,r9 xor rcx,rcx mov rdx,QWORD[8+rsi] mov r8,QWORD[rsi] mov r9,QWORD[16+rsi] mov r10,QWORD[40+rsi] lea r11,[56+rsi] lea r12,[24+rsi] mov QWORD[32+rsp],r10 mov QWORD[40+rsp],r11 mov QWORD[48+rsp],r12 mov QWORD[56+rsp],rcx call QWORD[__imp_RtlVirtualUnwind] mov eax,1 add rsp,64 popfq pop r15 pop r14 pop r13 pop r12 pop rbp pop rbx pop rdi pop rsi DB 0F3h,0C3h ;repret ALIGN 16 avx2_handler: push rsi push rdi push rbx push rbp push r12 push r13 push r14 push r15 pushfq sub rsp,64 mov rax,QWORD[120+r8] mov rbx,QWORD[248+r8] mov rsi,QWORD[8+r9] mov r11,QWORD[56+r9] mov r10d,DWORD[r11] lea r10,[r10*1+rsi] cmp rbx,r10 jb NEAR $L$in_prologue mov rax,QWORD[152+r8] mov r10d,DWORD[4+r11] lea r10,[r10*1+rsi] cmp rbx,r10 jae NEAR $L$in_prologue mov rax,QWORD[544+r8] mov rbx,QWORD[((-8))+rax] mov rbp,QWORD[((-16))+rax] mov r12,QWORD[((-24))+rax] mov r13,QWORD[((-32))+rax] mov r14,QWORD[((-40))+rax] mov r15,QWORD[((-48))+rax] mov QWORD[144+r8],rbx mov QWORD[160+r8],rbp mov QWORD[216+r8],r12 mov QWORD[224+r8],r13 mov QWORD[232+r8],r14 mov QWORD[240+r8],r15 lea rsi,[((-56-160))+rax] lea rdi,[512+r8] mov ecx,20 DD 0xa548f3fc jmp NEAR $L$in_prologue section .pdata rdata align=4 ALIGN 4 DD $L$SEH_begin_sha1_multi_block wrt ..imagebase DD $L$SEH_end_sha1_multi_block wrt ..imagebase DD $L$SEH_info_sha1_multi_block wrt ..imagebase DD $L$SEH_begin_sha1_multi_block_shaext wrt ..imagebase DD $L$SEH_end_sha1_multi_block_shaext wrt ..imagebase DD $L$SEH_info_sha1_multi_block_shaext wrt ..imagebase DD $L$SEH_begin_sha1_multi_block_avx wrt ..imagebase DD $L$SEH_end_sha1_multi_block_avx wrt ..imagebase DD $L$SEH_info_sha1_multi_block_avx wrt ..imagebase DD $L$SEH_begin_sha1_multi_block_avx2 wrt ..imagebase DD $L$SEH_end_sha1_multi_block_avx2 wrt ..imagebase DD $L$SEH_info_sha1_multi_block_avx2 wrt ..imagebase section .xdata rdata align=8 ALIGN 8 $L$SEH_info_sha1_multi_block: DB 9,0,0,0 DD se_handler wrt ..imagebase DD $L$body wrt ..imagebase,$L$epilogue wrt ..imagebase $L$SEH_info_sha1_multi_block_shaext: DB 9,0,0,0 DD se_handler wrt ..imagebase DD $L$body_shaext wrt ..imagebase,$L$epilogue_shaext wrt ..imagebase $L$SEH_info_sha1_multi_block_avx: DB 9,0,0,0 DD se_handler wrt ..imagebase DD $L$body_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase $L$SEH_info_sha1_multi_block_avx2: DB 9,0,0,0 DD avx2_handler wrt ..imagebase DD $L$body_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase