diff options
Diffstat (limited to 'deps/openssl/config/archs/VC-WIN64A/asm_avx2/crypto/rc4')
-rw-r--r-- | deps/openssl/config/archs/VC-WIN64A/asm_avx2/crypto/rc4/rc4-md5-x86_64.asm | 1388 | ||||
-rw-r--r-- | deps/openssl/config/archs/VC-WIN64A/asm_avx2/crypto/rc4/rc4-x86_64.asm | 777 |
2 files changed, 2165 insertions, 0 deletions
diff --git a/deps/openssl/config/archs/VC-WIN64A/asm_avx2/crypto/rc4/rc4-md5-x86_64.asm b/deps/openssl/config/archs/VC-WIN64A/asm_avx2/crypto/rc4/rc4-md5-x86_64.asm new file mode 100644 index 0000000000..5e42fe63df --- /dev/null +++ b/deps/openssl/config/archs/VC-WIN64A/asm_avx2/crypto/rc4/rc4-md5-x86_64.asm @@ -0,0 +1,1388 @@ +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +section .text code align=64 + +ALIGN 16 + +global rc4_md5_enc + +rc4_md5_enc: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_rc4_md5_enc: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD[40+rsp] + mov r9,QWORD[48+rsp] + + + + cmp r9,0 + je NEAR $L$abort + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,40 + +$L$body: + mov r11,rcx + mov r12,r9 + mov r13,rsi + mov r14,rdx + mov r15,r8 + xor rbp,rbp + xor rcx,rcx + + lea rdi,[8+rdi] + mov bpl,BYTE[((-8))+rdi] + mov cl,BYTE[((-4))+rdi] + + inc bpl + sub r14,r13 + mov eax,DWORD[rbp*4+rdi] + add cl,al + lea rsi,[rbp*4+rdi] + shl r12,6 + add r12,r15 + mov QWORD[16+rsp],r12 + + mov QWORD[24+rsp],r11 + mov r8d,DWORD[r11] + mov r9d,DWORD[4+r11] + mov r10d,DWORD[8+r11] + mov r11d,DWORD[12+r11] + jmp NEAR $L$oop + +ALIGN 16 +$L$oop: + mov DWORD[rsp],r8d + mov DWORD[4+rsp],r9d + mov DWORD[8+rsp],r10d + mov r12d,r11d + mov DWORD[12+rsp],r11d + pxor xmm0,xmm0 + mov edx,DWORD[rcx*4+rdi] + xor r12d,r10d + mov DWORD[rcx*4+rdi],eax + and r12d,r9d + add r8d,DWORD[r15] + add al,dl + mov ebx,DWORD[4+rsi] + add r8d,3614090360 + xor r12d,r11d + movzx eax,al + mov DWORD[rsi],edx + add r8d,r12d + add cl,bl + rol r8d,7 + mov r12d,r10d + movd xmm0,DWORD[rax*4+rdi] + + add r8d,r9d + pxor xmm1,xmm1 + mov edx,DWORD[rcx*4+rdi] + xor r12d,r9d + mov DWORD[rcx*4+rdi],ebx + and r12d,r8d + add r11d,DWORD[4+r15] + add bl,dl + mov eax,DWORD[8+rsi] + add r11d,3905402710 + xor r12d,r10d + movzx ebx,bl + mov DWORD[4+rsi],edx + add r11d,r12d + add cl,al + rol r11d,12 + mov r12d,r9d + movd xmm1,DWORD[rbx*4+rdi] + + add r11d,r8d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r8d + mov DWORD[rcx*4+rdi],eax + and r12d,r11d + add r10d,DWORD[8+r15] + add al,dl + mov ebx,DWORD[12+rsi] + add r10d,606105819 + xor r12d,r9d + movzx eax,al + mov DWORD[8+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,17 + mov r12d,r8d + pinsrw xmm0,WORD[rax*4+rdi],1 + + add r10d,r11d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r11d + mov DWORD[rcx*4+rdi],ebx + and r12d,r10d + add r9d,DWORD[12+r15] + add bl,dl + mov eax,DWORD[16+rsi] + add r9d,3250441966 + xor r12d,r8d + movzx ebx,bl + mov DWORD[12+rsi],edx + add r9d,r12d + add cl,al + rol r9d,22 + mov r12d,r11d + pinsrw xmm1,WORD[rbx*4+rdi],1 + + add r9d,r10d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r10d + mov DWORD[rcx*4+rdi],eax + and r12d,r9d + add r8d,DWORD[16+r15] + add al,dl + mov ebx,DWORD[20+rsi] + add r8d,4118548399 + xor r12d,r11d + movzx eax,al + mov DWORD[16+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,7 + mov r12d,r10d + pinsrw xmm0,WORD[rax*4+rdi],2 + + add r8d,r9d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r9d + mov DWORD[rcx*4+rdi],ebx + and r12d,r8d + add r11d,DWORD[20+r15] + add bl,dl + mov eax,DWORD[24+rsi] + add r11d,1200080426 + xor r12d,r10d + movzx ebx,bl + mov DWORD[20+rsi],edx + add r11d,r12d + add cl,al + rol r11d,12 + mov r12d,r9d + pinsrw xmm1,WORD[rbx*4+rdi],2 + + add r11d,r8d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r8d + mov DWORD[rcx*4+rdi],eax + and r12d,r11d + add r10d,DWORD[24+r15] + add al,dl + mov ebx,DWORD[28+rsi] + add r10d,2821735955 + xor r12d,r9d + movzx eax,al + mov DWORD[24+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,17 + mov r12d,r8d + pinsrw xmm0,WORD[rax*4+rdi],3 + + add r10d,r11d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r11d + mov DWORD[rcx*4+rdi],ebx + and r12d,r10d + add r9d,DWORD[28+r15] + add bl,dl + mov eax,DWORD[32+rsi] + add r9d,4249261313 + xor r12d,r8d + movzx ebx,bl + mov DWORD[28+rsi],edx + add r9d,r12d + add cl,al + rol r9d,22 + mov r12d,r11d + pinsrw xmm1,WORD[rbx*4+rdi],3 + + add r9d,r10d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r10d + mov DWORD[rcx*4+rdi],eax + and r12d,r9d + add r8d,DWORD[32+r15] + add al,dl + mov ebx,DWORD[36+rsi] + add r8d,1770035416 + xor r12d,r11d + movzx eax,al + mov DWORD[32+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,7 + mov r12d,r10d + pinsrw xmm0,WORD[rax*4+rdi],4 + + add r8d,r9d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r9d + mov DWORD[rcx*4+rdi],ebx + and r12d,r8d + add r11d,DWORD[36+r15] + add bl,dl + mov eax,DWORD[40+rsi] + add r11d,2336552879 + xor r12d,r10d + movzx ebx,bl + mov DWORD[36+rsi],edx + add r11d,r12d + add cl,al + rol r11d,12 + mov r12d,r9d + pinsrw xmm1,WORD[rbx*4+rdi],4 + + add r11d,r8d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r8d + mov DWORD[rcx*4+rdi],eax + and r12d,r11d + add r10d,DWORD[40+r15] + add al,dl + mov ebx,DWORD[44+rsi] + add r10d,4294925233 + xor r12d,r9d + movzx eax,al + mov DWORD[40+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,17 + mov r12d,r8d + pinsrw xmm0,WORD[rax*4+rdi],5 + + add r10d,r11d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r11d + mov DWORD[rcx*4+rdi],ebx + and r12d,r10d + add r9d,DWORD[44+r15] + add bl,dl + mov eax,DWORD[48+rsi] + add r9d,2304563134 + xor r12d,r8d + movzx ebx,bl + mov DWORD[44+rsi],edx + add r9d,r12d + add cl,al + rol r9d,22 + mov r12d,r11d + pinsrw xmm1,WORD[rbx*4+rdi],5 + + add r9d,r10d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r10d + mov DWORD[rcx*4+rdi],eax + and r12d,r9d + add r8d,DWORD[48+r15] + add al,dl + mov ebx,DWORD[52+rsi] + add r8d,1804603682 + xor r12d,r11d + movzx eax,al + mov DWORD[48+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,7 + mov r12d,r10d + pinsrw xmm0,WORD[rax*4+rdi],6 + + add r8d,r9d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r9d + mov DWORD[rcx*4+rdi],ebx + and r12d,r8d + add r11d,DWORD[52+r15] + add bl,dl + mov eax,DWORD[56+rsi] + add r11d,4254626195 + xor r12d,r10d + movzx ebx,bl + mov DWORD[52+rsi],edx + add r11d,r12d + add cl,al + rol r11d,12 + mov r12d,r9d + pinsrw xmm1,WORD[rbx*4+rdi],6 + + add r11d,r8d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r8d + mov DWORD[rcx*4+rdi],eax + and r12d,r11d + add r10d,DWORD[56+r15] + add al,dl + mov ebx,DWORD[60+rsi] + add r10d,2792965006 + xor r12d,r9d + movzx eax,al + mov DWORD[56+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,17 + mov r12d,r8d + pinsrw xmm0,WORD[rax*4+rdi],7 + + add r10d,r11d + movdqu xmm2,XMMWORD[r13] + mov edx,DWORD[rcx*4+rdi] + xor r12d,r11d + mov DWORD[rcx*4+rdi],ebx + and r12d,r10d + add r9d,DWORD[60+r15] + add bl,dl + mov eax,DWORD[64+rsi] + add r9d,1236535329 + xor r12d,r8d + movzx ebx,bl + mov DWORD[60+rsi],edx + add r9d,r12d + add cl,al + rol r9d,22 + mov r12d,r10d + pinsrw xmm1,WORD[rbx*4+rdi],7 + + add r9d,r10d + psllq xmm1,8 + pxor xmm2,xmm0 + pxor xmm2,xmm1 + pxor xmm0,xmm0 + mov edx,DWORD[rcx*4+rdi] + xor r12d,r9d + mov DWORD[rcx*4+rdi],eax + and r12d,r11d + add r8d,DWORD[4+r15] + add al,dl + mov ebx,DWORD[68+rsi] + add r8d,4129170786 + xor r12d,r10d + movzx eax,al + mov DWORD[64+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,5 + mov r12d,r9d + movd xmm0,DWORD[rax*4+rdi] + + add r8d,r9d + pxor xmm1,xmm1 + mov edx,DWORD[rcx*4+rdi] + xor r12d,r8d + mov DWORD[rcx*4+rdi],ebx + and r12d,r10d + add r11d,DWORD[24+r15] + add bl,dl + mov eax,DWORD[72+rsi] + add r11d,3225465664 + xor r12d,r9d + movzx ebx,bl + mov DWORD[68+rsi],edx + add r11d,r12d + add cl,al + rol r11d,9 + mov r12d,r8d + movd xmm1,DWORD[rbx*4+rdi] + + add r11d,r8d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r11d + mov DWORD[rcx*4+rdi],eax + and r12d,r9d + add r10d,DWORD[44+r15] + add al,dl + mov ebx,DWORD[76+rsi] + add r10d,643717713 + xor r12d,r8d + movzx eax,al + mov DWORD[72+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,14 + mov r12d,r11d + pinsrw xmm0,WORD[rax*4+rdi],1 + + add r10d,r11d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r10d + mov DWORD[rcx*4+rdi],ebx + and r12d,r8d + add r9d,DWORD[r15] + add bl,dl + mov eax,DWORD[80+rsi] + add r9d,3921069994 + xor r12d,r11d + movzx ebx,bl + mov DWORD[76+rsi],edx + add r9d,r12d + add cl,al + rol r9d,20 + mov r12d,r10d + pinsrw xmm1,WORD[rbx*4+rdi],1 + + add r9d,r10d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r9d + mov DWORD[rcx*4+rdi],eax + and r12d,r11d + add r8d,DWORD[20+r15] + add al,dl + mov ebx,DWORD[84+rsi] + add r8d,3593408605 + xor r12d,r10d + movzx eax,al + mov DWORD[80+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,5 + mov r12d,r9d + pinsrw xmm0,WORD[rax*4+rdi],2 + + add r8d,r9d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r8d + mov DWORD[rcx*4+rdi],ebx + and r12d,r10d + add r11d,DWORD[40+r15] + add bl,dl + mov eax,DWORD[88+rsi] + add r11d,38016083 + xor r12d,r9d + movzx ebx,bl + mov DWORD[84+rsi],edx + add r11d,r12d + add cl,al + rol r11d,9 + mov r12d,r8d + pinsrw xmm1,WORD[rbx*4+rdi],2 + + add r11d,r8d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r11d + mov DWORD[rcx*4+rdi],eax + and r12d,r9d + add r10d,DWORD[60+r15] + add al,dl + mov ebx,DWORD[92+rsi] + add r10d,3634488961 + xor r12d,r8d + movzx eax,al + mov DWORD[88+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,14 + mov r12d,r11d + pinsrw xmm0,WORD[rax*4+rdi],3 + + add r10d,r11d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r10d + mov DWORD[rcx*4+rdi],ebx + and r12d,r8d + add r9d,DWORD[16+r15] + add bl,dl + mov eax,DWORD[96+rsi] + add r9d,3889429448 + xor r12d,r11d + movzx ebx,bl + mov DWORD[92+rsi],edx + add r9d,r12d + add cl,al + rol r9d,20 + mov r12d,r10d + pinsrw xmm1,WORD[rbx*4+rdi],3 + + add r9d,r10d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r9d + mov DWORD[rcx*4+rdi],eax + and r12d,r11d + add r8d,DWORD[36+r15] + add al,dl + mov ebx,DWORD[100+rsi] + add r8d,568446438 + xor r12d,r10d + movzx eax,al + mov DWORD[96+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,5 + mov r12d,r9d + pinsrw xmm0,WORD[rax*4+rdi],4 + + add r8d,r9d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r8d + mov DWORD[rcx*4+rdi],ebx + and r12d,r10d + add r11d,DWORD[56+r15] + add bl,dl + mov eax,DWORD[104+rsi] + add r11d,3275163606 + xor r12d,r9d + movzx ebx,bl + mov DWORD[100+rsi],edx + add r11d,r12d + add cl,al + rol r11d,9 + mov r12d,r8d + pinsrw xmm1,WORD[rbx*4+rdi],4 + + add r11d,r8d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r11d + mov DWORD[rcx*4+rdi],eax + and r12d,r9d + add r10d,DWORD[12+r15] + add al,dl + mov ebx,DWORD[108+rsi] + add r10d,4107603335 + xor r12d,r8d + movzx eax,al + mov DWORD[104+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,14 + mov r12d,r11d + pinsrw xmm0,WORD[rax*4+rdi],5 + + add r10d,r11d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r10d + mov DWORD[rcx*4+rdi],ebx + and r12d,r8d + add r9d,DWORD[32+r15] + add bl,dl + mov eax,DWORD[112+rsi] + add r9d,1163531501 + xor r12d,r11d + movzx ebx,bl + mov DWORD[108+rsi],edx + add r9d,r12d + add cl,al + rol r9d,20 + mov r12d,r10d + pinsrw xmm1,WORD[rbx*4+rdi],5 + + add r9d,r10d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r9d + mov DWORD[rcx*4+rdi],eax + and r12d,r11d + add r8d,DWORD[52+r15] + add al,dl + mov ebx,DWORD[116+rsi] + add r8d,2850285829 + xor r12d,r10d + movzx eax,al + mov DWORD[112+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,5 + mov r12d,r9d + pinsrw xmm0,WORD[rax*4+rdi],6 + + add r8d,r9d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r8d + mov DWORD[rcx*4+rdi],ebx + and r12d,r10d + add r11d,DWORD[8+r15] + add bl,dl + mov eax,DWORD[120+rsi] + add r11d,4243563512 + xor r12d,r9d + movzx ebx,bl + mov DWORD[116+rsi],edx + add r11d,r12d + add cl,al + rol r11d,9 + mov r12d,r8d + pinsrw xmm1,WORD[rbx*4+rdi],6 + + add r11d,r8d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r11d + mov DWORD[rcx*4+rdi],eax + and r12d,r9d + add r10d,DWORD[28+r15] + add al,dl + mov ebx,DWORD[124+rsi] + add r10d,1735328473 + xor r12d,r8d + movzx eax,al + mov DWORD[120+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,14 + mov r12d,r11d + pinsrw xmm0,WORD[rax*4+rdi],7 + + add r10d,r11d + movdqu xmm3,XMMWORD[16+r13] + add bpl,32 + mov edx,DWORD[rcx*4+rdi] + xor r12d,r10d + mov DWORD[rcx*4+rdi],ebx + and r12d,r8d + add r9d,DWORD[48+r15] + add bl,dl + mov eax,DWORD[rbp*4+rdi] + add r9d,2368359562 + xor r12d,r11d + movzx ebx,bl + mov DWORD[124+rsi],edx + add r9d,r12d + add cl,al + rol r9d,20 + mov r12d,r11d + pinsrw xmm1,WORD[rbx*4+rdi],7 + + add r9d,r10d + mov rsi,rcx + xor rcx,rcx + mov cl,sil + lea rsi,[rbp*4+rdi] + psllq xmm1,8 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + pxor xmm0,xmm0 + mov edx,DWORD[rcx*4+rdi] + xor r12d,r10d + mov DWORD[rcx*4+rdi],eax + xor r12d,r9d + add r8d,DWORD[20+r15] + add al,dl + mov ebx,DWORD[4+rsi] + add r8d,4294588738 + movzx eax,al + add r8d,r12d + mov DWORD[rsi],edx + add cl,bl + rol r8d,4 + mov r12d,r10d + movd xmm0,DWORD[rax*4+rdi] + + add r8d,r9d + pxor xmm1,xmm1 + mov edx,DWORD[rcx*4+rdi] + xor r12d,r9d + mov DWORD[rcx*4+rdi],ebx + xor r12d,r8d + add r11d,DWORD[32+r15] + add bl,dl + mov eax,DWORD[8+rsi] + add r11d,2272392833 + movzx ebx,bl + add r11d,r12d + mov DWORD[4+rsi],edx + add cl,al + rol r11d,11 + mov r12d,r9d + movd xmm1,DWORD[rbx*4+rdi] + + add r11d,r8d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r8d + mov DWORD[rcx*4+rdi],eax + xor r12d,r11d + add r10d,DWORD[44+r15] + add al,dl + mov ebx,DWORD[12+rsi] + add r10d,1839030562 + movzx eax,al + add r10d,r12d + mov DWORD[8+rsi],edx + add cl,bl + rol r10d,16 + mov r12d,r8d + pinsrw xmm0,WORD[rax*4+rdi],1 + + add r10d,r11d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r11d + mov DWORD[rcx*4+rdi],ebx + xor r12d,r10d + add r9d,DWORD[56+r15] + add bl,dl + mov eax,DWORD[16+rsi] + add r9d,4259657740 + movzx ebx,bl + add r9d,r12d + mov DWORD[12+rsi],edx + add cl,al + rol r9d,23 + mov r12d,r11d + pinsrw xmm1,WORD[rbx*4+rdi],1 + + add r9d,r10d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r10d + mov DWORD[rcx*4+rdi],eax + xor r12d,r9d + add r8d,DWORD[4+r15] + add al,dl + mov ebx,DWORD[20+rsi] + add r8d,2763975236 + movzx eax,al + add r8d,r12d + mov DWORD[16+rsi],edx + add cl,bl + rol r8d,4 + mov r12d,r10d + pinsrw xmm0,WORD[rax*4+rdi],2 + + add r8d,r9d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r9d + mov DWORD[rcx*4+rdi],ebx + xor r12d,r8d + add r11d,DWORD[16+r15] + add bl,dl + mov eax,DWORD[24+rsi] + add r11d,1272893353 + movzx ebx,bl + add r11d,r12d + mov DWORD[20+rsi],edx + add cl,al + rol r11d,11 + mov r12d,r9d + pinsrw xmm1,WORD[rbx*4+rdi],2 + + add r11d,r8d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r8d + mov DWORD[rcx*4+rdi],eax + xor r12d,r11d + add r10d,DWORD[28+r15] + add al,dl + mov ebx,DWORD[28+rsi] + add r10d,4139469664 + movzx eax,al + add r10d,r12d + mov DWORD[24+rsi],edx + add cl,bl + rol r10d,16 + mov r12d,r8d + pinsrw xmm0,WORD[rax*4+rdi],3 + + add r10d,r11d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r11d + mov DWORD[rcx*4+rdi],ebx + xor r12d,r10d + add r9d,DWORD[40+r15] + add bl,dl + mov eax,DWORD[32+rsi] + add r9d,3200236656 + movzx ebx,bl + add r9d,r12d + mov DWORD[28+rsi],edx + add cl,al + rol r9d,23 + mov r12d,r11d + pinsrw xmm1,WORD[rbx*4+rdi],3 + + add r9d,r10d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r10d + mov DWORD[rcx*4+rdi],eax + xor r12d,r9d + add r8d,DWORD[52+r15] + add al,dl + mov ebx,DWORD[36+rsi] + add r8d,681279174 + movzx eax,al + add r8d,r12d + mov DWORD[32+rsi],edx + add cl,bl + rol r8d,4 + mov r12d,r10d + pinsrw xmm0,WORD[rax*4+rdi],4 + + add r8d,r9d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r9d + mov DWORD[rcx*4+rdi],ebx + xor r12d,r8d + add r11d,DWORD[r15] + add bl,dl + mov eax,DWORD[40+rsi] + add r11d,3936430074 + movzx ebx,bl + add r11d,r12d + mov DWORD[36+rsi],edx + add cl,al + rol r11d,11 + mov r12d,r9d + pinsrw xmm1,WORD[rbx*4+rdi],4 + + add r11d,r8d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r8d + mov DWORD[rcx*4+rdi],eax + xor r12d,r11d + add r10d,DWORD[12+r15] + add al,dl + mov ebx,DWORD[44+rsi] + add r10d,3572445317 + movzx eax,al + add r10d,r12d + mov DWORD[40+rsi],edx + add cl,bl + rol r10d,16 + mov r12d,r8d + pinsrw xmm0,WORD[rax*4+rdi],5 + + add r10d,r11d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r11d + mov DWORD[rcx*4+rdi],ebx + xor r12d,r10d + add r9d,DWORD[24+r15] + add bl,dl + mov eax,DWORD[48+rsi] + add r9d,76029189 + movzx ebx,bl + add r9d,r12d + mov DWORD[44+rsi],edx + add cl,al + rol r9d,23 + mov r12d,r11d + pinsrw xmm1,WORD[rbx*4+rdi],5 + + add r9d,r10d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r10d + mov DWORD[rcx*4+rdi],eax + xor r12d,r9d + add r8d,DWORD[36+r15] + add al,dl + mov ebx,DWORD[52+rsi] + add r8d,3654602809 + movzx eax,al + add r8d,r12d + mov DWORD[48+rsi],edx + add cl,bl + rol r8d,4 + mov r12d,r10d + pinsrw xmm0,WORD[rax*4+rdi],6 + + add r8d,r9d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r9d + mov DWORD[rcx*4+rdi],ebx + xor r12d,r8d + add r11d,DWORD[48+r15] + add bl,dl + mov eax,DWORD[56+rsi] + add r11d,3873151461 + movzx ebx,bl + add r11d,r12d + mov DWORD[52+rsi],edx + add cl,al + rol r11d,11 + mov r12d,r9d + pinsrw xmm1,WORD[rbx*4+rdi],6 + + add r11d,r8d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r8d + mov DWORD[rcx*4+rdi],eax + xor r12d,r11d + add r10d,DWORD[60+r15] + add al,dl + mov ebx,DWORD[60+rsi] + add r10d,530742520 + movzx eax,al + add r10d,r12d + mov DWORD[56+rsi],edx + add cl,bl + rol r10d,16 + mov r12d,r8d + pinsrw xmm0,WORD[rax*4+rdi],7 + + add r10d,r11d + movdqu xmm4,XMMWORD[32+r13] + mov edx,DWORD[rcx*4+rdi] + xor r12d,r11d + mov DWORD[rcx*4+rdi],ebx + xor r12d,r10d + add r9d,DWORD[8+r15] + add bl,dl + mov eax,DWORD[64+rsi] + add r9d,3299628645 + movzx ebx,bl + add r9d,r12d + mov DWORD[60+rsi],edx + add cl,al + rol r9d,23 + mov r12d,-1 + pinsrw xmm1,WORD[rbx*4+rdi],7 + + add r9d,r10d + psllq xmm1,8 + pxor xmm4,xmm0 + pxor xmm4,xmm1 + pxor xmm0,xmm0 + mov edx,DWORD[rcx*4+rdi] + xor r12d,r11d + mov DWORD[rcx*4+rdi],eax + or r12d,r9d + add r8d,DWORD[r15] + add al,dl + mov ebx,DWORD[68+rsi] + add r8d,4096336452 + movzx eax,al + xor r12d,r10d + mov DWORD[64+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,6 + mov r12d,-1 + movd xmm0,DWORD[rax*4+rdi] + + add r8d,r9d + pxor xmm1,xmm1 + mov edx,DWORD[rcx*4+rdi] + xor r12d,r10d + mov DWORD[rcx*4+rdi],ebx + or r12d,r8d + add r11d,DWORD[28+r15] + add bl,dl + mov eax,DWORD[72+rsi] + add r11d,1126891415 + movzx ebx,bl + xor r12d,r9d + mov DWORD[68+rsi],edx + add r11d,r12d + add cl,al + rol r11d,10 + mov r12d,-1 + movd xmm1,DWORD[rbx*4+rdi] + + add r11d,r8d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r9d + mov DWORD[rcx*4+rdi],eax + or r12d,r11d + add r10d,DWORD[56+r15] + add al,dl + mov ebx,DWORD[76+rsi] + add r10d,2878612391 + movzx eax,al + xor r12d,r8d + mov DWORD[72+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,15 + mov r12d,-1 + pinsrw xmm0,WORD[rax*4+rdi],1 + + add r10d,r11d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r8d + mov DWORD[rcx*4+rdi],ebx + or r12d,r10d + add r9d,DWORD[20+r15] + add bl,dl + mov eax,DWORD[80+rsi] + add r9d,4237533241 + movzx ebx,bl + xor r12d,r11d + mov DWORD[76+rsi],edx + add r9d,r12d + add cl,al + rol r9d,21 + mov r12d,-1 + pinsrw xmm1,WORD[rbx*4+rdi],1 + + add r9d,r10d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r11d + mov DWORD[rcx*4+rdi],eax + or r12d,r9d + add r8d,DWORD[48+r15] + add al,dl + mov ebx,DWORD[84+rsi] + add r8d,1700485571 + movzx eax,al + xor r12d,r10d + mov DWORD[80+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,6 + mov r12d,-1 + pinsrw xmm0,WORD[rax*4+rdi],2 + + add r8d,r9d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r10d + mov DWORD[rcx*4+rdi],ebx + or r12d,r8d + add r11d,DWORD[12+r15] + add bl,dl + mov eax,DWORD[88+rsi] + add r11d,2399980690 + movzx ebx,bl + xor r12d,r9d + mov DWORD[84+rsi],edx + add r11d,r12d + add cl,al + rol r11d,10 + mov r12d,-1 + pinsrw xmm1,WORD[rbx*4+rdi],2 + + add r11d,r8d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r9d + mov DWORD[rcx*4+rdi],eax + or r12d,r11d + add r10d,DWORD[40+r15] + add al,dl + mov ebx,DWORD[92+rsi] + add r10d,4293915773 + movzx eax,al + xor r12d,r8d + mov DWORD[88+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,15 + mov r12d,-1 + pinsrw xmm0,WORD[rax*4+rdi],3 + + add r10d,r11d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r8d + mov DWORD[rcx*4+rdi],ebx + or r12d,r10d + add r9d,DWORD[4+r15] + add bl,dl + mov eax,DWORD[96+rsi] + add r9d,2240044497 + movzx ebx,bl + xor r12d,r11d + mov DWORD[92+rsi],edx + add r9d,r12d + add cl,al + rol r9d,21 + mov r12d,-1 + pinsrw xmm1,WORD[rbx*4+rdi],3 + + add r9d,r10d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r11d + mov DWORD[rcx*4+rdi],eax + or r12d,r9d + add r8d,DWORD[32+r15] + add al,dl + mov ebx,DWORD[100+rsi] + add r8d,1873313359 + movzx eax,al + xor r12d,r10d + mov DWORD[96+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,6 + mov r12d,-1 + pinsrw xmm0,WORD[rax*4+rdi],4 + + add r8d,r9d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r10d + mov DWORD[rcx*4+rdi],ebx + or r12d,r8d + add r11d,DWORD[60+r15] + add bl,dl + mov eax,DWORD[104+rsi] + add r11d,4264355552 + movzx ebx,bl + xor r12d,r9d + mov DWORD[100+rsi],edx + add r11d,r12d + add cl,al + rol r11d,10 + mov r12d,-1 + pinsrw xmm1,WORD[rbx*4+rdi],4 + + add r11d,r8d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r9d + mov DWORD[rcx*4+rdi],eax + or r12d,r11d + add r10d,DWORD[24+r15] + add al,dl + mov ebx,DWORD[108+rsi] + add r10d,2734768916 + movzx eax,al + xor r12d,r8d + mov DWORD[104+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,15 + mov r12d,-1 + pinsrw xmm0,WORD[rax*4+rdi],5 + + add r10d,r11d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r8d + mov DWORD[rcx*4+rdi],ebx + or r12d,r10d + add r9d,DWORD[52+r15] + add bl,dl + mov eax,DWORD[112+rsi] + add r9d,1309151649 + movzx ebx,bl + xor r12d,r11d + mov DWORD[108+rsi],edx + add r9d,r12d + add cl,al + rol r9d,21 + mov r12d,-1 + pinsrw xmm1,WORD[rbx*4+rdi],5 + + add r9d,r10d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r11d + mov DWORD[rcx*4+rdi],eax + or r12d,r9d + add r8d,DWORD[16+r15] + add al,dl + mov ebx,DWORD[116+rsi] + add r8d,4149444226 + movzx eax,al + xor r12d,r10d + mov DWORD[112+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,6 + mov r12d,-1 + pinsrw xmm0,WORD[rax*4+rdi],6 + + add r8d,r9d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r10d + mov DWORD[rcx*4+rdi],ebx + or r12d,r8d + add r11d,DWORD[44+r15] + add bl,dl + mov eax,DWORD[120+rsi] + add r11d,3174756917 + movzx ebx,bl + xor r12d,r9d + mov DWORD[116+rsi],edx + add r11d,r12d + add cl,al + rol r11d,10 + mov r12d,-1 + pinsrw xmm1,WORD[rbx*4+rdi],6 + + add r11d,r8d + mov edx,DWORD[rcx*4+rdi] + xor r12d,r9d + mov DWORD[rcx*4+rdi],eax + or r12d,r11d + add r10d,DWORD[8+r15] + add al,dl + mov ebx,DWORD[124+rsi] + add r10d,718787259 + movzx eax,al + xor r12d,r8d + mov DWORD[120+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,15 + mov r12d,-1 + pinsrw xmm0,WORD[rax*4+rdi],7 + + add r10d,r11d + movdqu xmm5,XMMWORD[48+r13] + add bpl,32 + mov edx,DWORD[rcx*4+rdi] + xor r12d,r8d + mov DWORD[rcx*4+rdi],ebx + or r12d,r10d + add r9d,DWORD[36+r15] + add bl,dl + mov eax,DWORD[rbp*4+rdi] + add r9d,3951481745 + movzx ebx,bl + xor r12d,r11d + mov DWORD[124+rsi],edx + add r9d,r12d + add cl,al + rol r9d,21 + mov r12d,-1 + pinsrw xmm1,WORD[rbx*4+rdi],7 + + add r9d,r10d + mov rsi,rbp + xor rbp,rbp + mov bpl,sil + mov rsi,rcx + xor rcx,rcx + mov cl,sil + lea rsi,[rbp*4+rdi] + psllq xmm1,8 + pxor xmm5,xmm0 + pxor xmm5,xmm1 + add r8d,DWORD[rsp] + add r9d,DWORD[4+rsp] + add r10d,DWORD[8+rsp] + add r11d,DWORD[12+rsp] + + movdqu XMMWORD[r13*1+r14],xmm2 + movdqu XMMWORD[16+r13*1+r14],xmm3 + movdqu XMMWORD[32+r13*1+r14],xmm4 + movdqu XMMWORD[48+r13*1+r14],xmm5 + lea r15,[64+r15] + lea r13,[64+r13] + cmp r15,QWORD[16+rsp] + jb NEAR $L$oop + + mov r12,QWORD[24+rsp] + sub cl,al + mov DWORD[r12],r8d + mov DWORD[4+r12],r9d + mov DWORD[8+r12],r10d + mov DWORD[12+r12],r11d + sub bpl,1 + mov DWORD[((-8))+rdi],ebp + mov DWORD[((-4))+rdi],ecx + + mov r15,QWORD[40+rsp] + + mov r14,QWORD[48+rsp] + + mov r13,QWORD[56+rsp] + + mov r12,QWORD[64+rsp] + + mov rbp,QWORD[72+rsp] + + mov rbx,QWORD[80+rsp] + + lea rsp,[88+rsp] + +$L$epilogue: +$L$abort: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_rc4_md5_enc: +EXTERN __imp_RtlVirtualUnwind + +ALIGN 16 +se_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + lea r10,[$L$body] + cmp rbx,r10 + jb NEAR $L$in_prologue + + mov rax,QWORD[152+r8] + + lea r10,[$L$epilogue] + cmp rbx,r10 + jae NEAR $L$in_prologue + + mov r15,QWORD[40+rax] + mov r14,QWORD[48+rax] + mov r13,QWORD[56+rax] + mov r12,QWORD[64+rax] + mov rbp,QWORD[72+rax] + mov rbx,QWORD[80+rax] + lea rax,[88+rax] + + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + mov QWORD[232+r8],r14 + mov QWORD[240+r8],r15 + +$L$in_prologue: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret + + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_rc4_md5_enc wrt ..imagebase + DD $L$SEH_end_rc4_md5_enc wrt ..imagebase + DD $L$SEH_info_rc4_md5_enc wrt ..imagebase + +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_rc4_md5_enc: +DB 9,0,0,0 + DD se_handler wrt ..imagebase diff --git a/deps/openssl/config/archs/VC-WIN64A/asm_avx2/crypto/rc4/rc4-x86_64.asm b/deps/openssl/config/archs/VC-WIN64A/asm_avx2/crypto/rc4/rc4-x86_64.asm new file mode 100644 index 0000000000..5732b40ed6 --- /dev/null +++ b/deps/openssl/config/archs/VC-WIN64A/asm_avx2/crypto/rc4/rc4-x86_64.asm @@ -0,0 +1,777 @@ +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +section .text code align=64 + +EXTERN OPENSSL_ia32cap_P + +global RC4 + +ALIGN 16 +RC4: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_RC4: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + + or rsi,rsi + jne NEAR $L$entry + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$entry: + + push rbx + + push r12 + + push r13 + +$L$prologue: + mov r11,rsi + mov r12,rdx + mov r13,rcx + xor r10,r10 + xor rcx,rcx + + lea rdi,[8+rdi] + mov r10b,BYTE[((-8))+rdi] + mov cl,BYTE[((-4))+rdi] + cmp DWORD[256+rdi],-1 + je NEAR $L$RC4_CHAR + mov r8d,DWORD[OPENSSL_ia32cap_P] + xor rbx,rbx + inc r10b + sub rbx,r10 + sub r13,r12 + mov eax,DWORD[r10*4+rdi] + test r11,-16 + jz NEAR $L$loop1 + bt r8d,30 + jc NEAR $L$intel + and rbx,7 + lea rsi,[1+r10] + jz NEAR $L$oop8 + sub r11,rbx +$L$oop8_warmup: + add cl,al + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],eax + mov DWORD[r10*4+rdi],edx + add al,dl + inc r10b + mov edx,DWORD[rax*4+rdi] + mov eax,DWORD[r10*4+rdi] + xor dl,BYTE[r12] + mov BYTE[r13*1+r12],dl + lea r12,[1+r12] + dec rbx + jnz NEAR $L$oop8_warmup + + lea rsi,[1+r10] + jmp NEAR $L$oop8 +ALIGN 16 +$L$oop8: + add cl,al + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],eax + mov ebx,DWORD[rsi*4+rdi] + ror r8,8 + mov DWORD[r10*4+rdi],edx + add dl,al + mov r8b,BYTE[rdx*4+rdi] + add cl,bl + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],ebx + mov eax,DWORD[4+rsi*4+rdi] + ror r8,8 + mov DWORD[4+r10*4+rdi],edx + add dl,bl + mov r8b,BYTE[rdx*4+rdi] + add cl,al + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],eax + mov ebx,DWORD[8+rsi*4+rdi] + ror r8,8 + mov DWORD[8+r10*4+rdi],edx + add dl,al + mov r8b,BYTE[rdx*4+rdi] + add cl,bl + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],ebx + mov eax,DWORD[12+rsi*4+rdi] + ror r8,8 + mov DWORD[12+r10*4+rdi],edx + add dl,bl + mov r8b,BYTE[rdx*4+rdi] + add cl,al + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],eax + mov ebx,DWORD[16+rsi*4+rdi] + ror r8,8 + mov DWORD[16+r10*4+rdi],edx + add dl,al + mov r8b,BYTE[rdx*4+rdi] + add cl,bl + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],ebx + mov eax,DWORD[20+rsi*4+rdi] + ror r8,8 + mov DWORD[20+r10*4+rdi],edx + add dl,bl + mov r8b,BYTE[rdx*4+rdi] + add cl,al + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],eax + mov ebx,DWORD[24+rsi*4+rdi] + ror r8,8 + mov DWORD[24+r10*4+rdi],edx + add dl,al + mov r8b,BYTE[rdx*4+rdi] + add sil,8 + add cl,bl + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],ebx + mov eax,DWORD[((-4))+rsi*4+rdi] + ror r8,8 + mov DWORD[28+r10*4+rdi],edx + add dl,bl + mov r8b,BYTE[rdx*4+rdi] + add r10b,8 + ror r8,8 + sub r11,8 + + xor r8,QWORD[r12] + mov QWORD[r13*1+r12],r8 + lea r12,[8+r12] + + test r11,-8 + jnz NEAR $L$oop8 + cmp r11,0 + jne NEAR $L$loop1 + jmp NEAR $L$exit + +ALIGN 16 +$L$intel: + test r11,-32 + jz NEAR $L$loop1 + and rbx,15 + jz NEAR $L$oop16_is_hot + sub r11,rbx +$L$oop16_warmup: + add cl,al + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],eax + mov DWORD[r10*4+rdi],edx + add al,dl + inc r10b + mov edx,DWORD[rax*4+rdi] + mov eax,DWORD[r10*4+rdi] + xor dl,BYTE[r12] + mov BYTE[r13*1+r12],dl + lea r12,[1+r12] + dec rbx + jnz NEAR $L$oop16_warmup + + mov rbx,rcx + xor rcx,rcx + mov cl,bl + +$L$oop16_is_hot: + lea rsi,[r10*4+rdi] + add cl,al + mov edx,DWORD[rcx*4+rdi] + pxor xmm0,xmm0 + mov DWORD[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD[4+rsi] + movzx eax,al + mov DWORD[rsi],edx + add cl,bl + pinsrw xmm0,WORD[rax*4+rdi],0 + jmp NEAR $L$oop16_enter +ALIGN 16 +$L$oop16: + add cl,al + mov edx,DWORD[rcx*4+rdi] + pxor xmm2,xmm0 + psllq xmm1,8 + pxor xmm0,xmm0 + mov DWORD[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD[4+rsi] + movzx eax,al + mov DWORD[rsi],edx + pxor xmm2,xmm1 + add cl,bl + pinsrw xmm0,WORD[rax*4+rdi],0 + movdqu XMMWORD[r13*1+r12],xmm2 + lea r12,[16+r12] +$L$oop16_enter: + mov edx,DWORD[rcx*4+rdi] + pxor xmm1,xmm1 + mov DWORD[rcx*4+rdi],ebx + add bl,dl + mov eax,DWORD[8+rsi] + movzx ebx,bl + mov DWORD[4+rsi],edx + add cl,al + pinsrw xmm1,WORD[rbx*4+rdi],0 + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD[12+rsi] + movzx eax,al + mov DWORD[8+rsi],edx + add cl,bl + pinsrw xmm0,WORD[rax*4+rdi],1 + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],ebx + add bl,dl + mov eax,DWORD[16+rsi] + movzx ebx,bl + mov DWORD[12+rsi],edx + add cl,al + pinsrw xmm1,WORD[rbx*4+rdi],1 + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD[20+rsi] + movzx eax,al + mov DWORD[16+rsi],edx + add cl,bl + pinsrw xmm0,WORD[rax*4+rdi],2 + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],ebx + add bl,dl + mov eax,DWORD[24+rsi] + movzx ebx,bl + mov DWORD[20+rsi],edx + add cl,al + pinsrw xmm1,WORD[rbx*4+rdi],2 + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD[28+rsi] + movzx eax,al + mov DWORD[24+rsi],edx + add cl,bl + pinsrw xmm0,WORD[rax*4+rdi],3 + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],ebx + add bl,dl + mov eax,DWORD[32+rsi] + movzx ebx,bl + mov DWORD[28+rsi],edx + add cl,al + pinsrw xmm1,WORD[rbx*4+rdi],3 + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD[36+rsi] + movzx eax,al + mov DWORD[32+rsi],edx + add cl,bl + pinsrw xmm0,WORD[rax*4+rdi],4 + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],ebx + add bl,dl + mov eax,DWORD[40+rsi] + movzx ebx,bl + mov DWORD[36+rsi],edx + add cl,al + pinsrw xmm1,WORD[rbx*4+rdi],4 + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD[44+rsi] + movzx eax,al + mov DWORD[40+rsi],edx + add cl,bl + pinsrw xmm0,WORD[rax*4+rdi],5 + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],ebx + add bl,dl + mov eax,DWORD[48+rsi] + movzx ebx,bl + mov DWORD[44+rsi],edx + add cl,al + pinsrw xmm1,WORD[rbx*4+rdi],5 + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD[52+rsi] + movzx eax,al + mov DWORD[48+rsi],edx + add cl,bl + pinsrw xmm0,WORD[rax*4+rdi],6 + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],ebx + add bl,dl + mov eax,DWORD[56+rsi] + movzx ebx,bl + mov DWORD[52+rsi],edx + add cl,al + pinsrw xmm1,WORD[rbx*4+rdi],6 + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD[60+rsi] + movzx eax,al + mov DWORD[56+rsi],edx + add cl,bl + pinsrw xmm0,WORD[rax*4+rdi],7 + add r10b,16 + movdqu xmm2,XMMWORD[r12] + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],ebx + add bl,dl + movzx ebx,bl + mov DWORD[60+rsi],edx + lea rsi,[r10*4+rdi] + pinsrw xmm1,WORD[rbx*4+rdi],7 + mov eax,DWORD[rsi] + mov rbx,rcx + xor rcx,rcx + sub r11,16 + mov cl,bl + test r11,-16 + jnz NEAR $L$oop16 + + psllq xmm1,8 + pxor xmm2,xmm0 + pxor xmm2,xmm1 + movdqu XMMWORD[r13*1+r12],xmm2 + lea r12,[16+r12] + + cmp r11,0 + jne NEAR $L$loop1 + jmp NEAR $L$exit + +ALIGN 16 +$L$loop1: + add cl,al + mov edx,DWORD[rcx*4+rdi] + mov DWORD[rcx*4+rdi],eax + mov DWORD[r10*4+rdi],edx + add al,dl + inc r10b + mov edx,DWORD[rax*4+rdi] + mov eax,DWORD[r10*4+rdi] + xor dl,BYTE[r12] + mov BYTE[r13*1+r12],dl + lea r12,[1+r12] + dec r11 + jnz NEAR $L$loop1 + jmp NEAR $L$exit + +ALIGN 16 +$L$RC4_CHAR: + add r10b,1 + movzx eax,BYTE[r10*1+rdi] + test r11,-8 + jz NEAR $L$cloop1 + jmp NEAR $L$cloop8 +ALIGN 16 +$L$cloop8: + mov r8d,DWORD[r12] + mov r9d,DWORD[4+r12] + add cl,al + lea rsi,[1+r10] + movzx edx,BYTE[rcx*1+rdi] + movzx esi,sil + movzx ebx,BYTE[rsi*1+rdi] + mov BYTE[rcx*1+rdi],al + cmp rcx,rsi + mov BYTE[r10*1+rdi],dl + jne NEAR $L$cmov0 + mov rbx,rax +$L$cmov0: + add dl,al + xor r8b,BYTE[rdx*1+rdi] + ror r8d,8 + add cl,bl + lea r10,[1+rsi] + movzx edx,BYTE[rcx*1+rdi] + movzx r10d,r10b + movzx eax,BYTE[r10*1+rdi] + mov BYTE[rcx*1+rdi],bl + cmp rcx,r10 + mov BYTE[rsi*1+rdi],dl + jne NEAR $L$cmov1 + mov rax,rbx +$L$cmov1: + add dl,bl + xor r8b,BYTE[rdx*1+rdi] + ror r8d,8 + add cl,al + lea rsi,[1+r10] + movzx edx,BYTE[rcx*1+rdi] + movzx esi,sil + movzx ebx,BYTE[rsi*1+rdi] + mov BYTE[rcx*1+rdi],al + cmp rcx,rsi + mov BYTE[r10*1+rdi],dl + jne NEAR $L$cmov2 + mov rbx,rax +$L$cmov2: + add dl,al + xor r8b,BYTE[rdx*1+rdi] + ror r8d,8 + add cl,bl + lea r10,[1+rsi] + movzx edx,BYTE[rcx*1+rdi] + movzx r10d,r10b + movzx eax,BYTE[r10*1+rdi] + mov BYTE[rcx*1+rdi],bl + cmp rcx,r10 + mov BYTE[rsi*1+rdi],dl + jne NEAR $L$cmov3 + mov rax,rbx +$L$cmov3: + add dl,bl + xor r8b,BYTE[rdx*1+rdi] + ror r8d,8 + add cl,al + lea rsi,[1+r10] + movzx edx,BYTE[rcx*1+rdi] + movzx esi,sil + movzx ebx,BYTE[rsi*1+rdi] + mov BYTE[rcx*1+rdi],al + cmp rcx,rsi + mov BYTE[r10*1+rdi],dl + jne NEAR $L$cmov4 + mov rbx,rax +$L$cmov4: + add dl,al + xor r9b,BYTE[rdx*1+rdi] + ror r9d,8 + add cl,bl + lea r10,[1+rsi] + movzx edx,BYTE[rcx*1+rdi] + movzx r10d,r10b + movzx eax,BYTE[r10*1+rdi] + mov BYTE[rcx*1+rdi],bl + cmp rcx,r10 + mov BYTE[rsi*1+rdi],dl + jne NEAR $L$cmov5 + mov rax,rbx +$L$cmov5: + add dl,bl + xor r9b,BYTE[rdx*1+rdi] + ror r9d,8 + add cl,al + lea rsi,[1+r10] + movzx edx,BYTE[rcx*1+rdi] + movzx esi,sil + movzx ebx,BYTE[rsi*1+rdi] + mov BYTE[rcx*1+rdi],al + cmp rcx,rsi + mov BYTE[r10*1+rdi],dl + jne NEAR $L$cmov6 + mov rbx,rax +$L$cmov6: + add dl,al + xor r9b,BYTE[rdx*1+rdi] + ror r9d,8 + add cl,bl + lea r10,[1+rsi] + movzx edx,BYTE[rcx*1+rdi] + movzx r10d,r10b + movzx eax,BYTE[r10*1+rdi] + mov BYTE[rcx*1+rdi],bl + cmp rcx,r10 + mov BYTE[rsi*1+rdi],dl + jne NEAR $L$cmov7 + mov rax,rbx +$L$cmov7: + add dl,bl + xor r9b,BYTE[rdx*1+rdi] + ror r9d,8 + lea r11,[((-8))+r11] + mov DWORD[r13],r8d + lea r12,[8+r12] + mov DWORD[4+r13],r9d + lea r13,[8+r13] + + test r11,-8 + jnz NEAR $L$cloop8 + cmp r11,0 + jne NEAR $L$cloop1 + jmp NEAR $L$exit +ALIGN 16 +$L$cloop1: + add cl,al + movzx ecx,cl + movzx edx,BYTE[rcx*1+rdi] + mov BYTE[rcx*1+rdi],al + mov BYTE[r10*1+rdi],dl + add dl,al + add r10b,1 + movzx edx,dl + movzx r10d,r10b + movzx edx,BYTE[rdx*1+rdi] + movzx eax,BYTE[r10*1+rdi] + xor dl,BYTE[r12] + lea r12,[1+r12] + mov BYTE[r13],dl + lea r13,[1+r13] + sub r11,1 + jnz NEAR $L$cloop1 + jmp NEAR $L$exit + +ALIGN 16 +$L$exit: + sub r10b,1 + mov DWORD[((-8))+rdi],r10d + mov DWORD[((-4))+rdi],ecx + + mov r13,QWORD[rsp] + + mov r12,QWORD[8+rsp] + + mov rbx,QWORD[16+rsp] + + add rsp,24 + +$L$epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_RC4: +global RC4_set_key + +ALIGN 16 +RC4_set_key: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_RC4_set_key: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + lea rdi,[8+rdi] + lea rdx,[rsi*1+rdx] + neg rsi + mov rcx,rsi + xor eax,eax + xor r9,r9 + xor r10,r10 + xor r11,r11 + + mov r8d,DWORD[OPENSSL_ia32cap_P] + bt r8d,20 + jc NEAR $L$c1stloop + jmp NEAR $L$w1stloop + +ALIGN 16 +$L$w1stloop: + mov DWORD[rax*4+rdi],eax + add al,1 + jnc NEAR $L$w1stloop + + xor r9,r9 + xor r8,r8 +ALIGN 16 +$L$w2ndloop: + mov r10d,DWORD[r9*4+rdi] + add r8b,BYTE[rsi*1+rdx] + add r8b,r10b + add rsi,1 + mov r11d,DWORD[r8*4+rdi] + cmovz rsi,rcx + mov DWORD[r8*4+rdi],r10d + mov DWORD[r9*4+rdi],r11d + add r9b,1 + jnc NEAR $L$w2ndloop + jmp NEAR $L$exit_key + +ALIGN 16 +$L$c1stloop: + mov BYTE[rax*1+rdi],al + add al,1 + jnc NEAR $L$c1stloop + + xor r9,r9 + xor r8,r8 +ALIGN 16 +$L$c2ndloop: + mov r10b,BYTE[r9*1+rdi] + add r8b,BYTE[rsi*1+rdx] + add r8b,r10b + add rsi,1 + mov r11b,BYTE[r8*1+rdi] + jnz NEAR $L$cnowrap + mov rsi,rcx +$L$cnowrap: + mov BYTE[r8*1+rdi],r10b + mov BYTE[r9*1+rdi],r11b + add r9b,1 + jnc NEAR $L$c2ndloop + mov DWORD[256+rdi],-1 + +ALIGN 16 +$L$exit_key: + xor eax,eax + mov DWORD[((-8))+rdi],eax + mov DWORD[((-4))+rdi],eax + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_RC4_set_key: + +global RC4_options + +ALIGN 16 +RC4_options: + lea rax,[$L$opts] + mov edx,DWORD[OPENSSL_ia32cap_P] + bt edx,20 + jc NEAR $L$8xchar + bt edx,30 + jnc NEAR $L$done + add rax,25 + DB 0F3h,0C3h ;repret +$L$8xchar: + add rax,12 +$L$done: + DB 0F3h,0C3h ;repret +ALIGN 64 +$L$opts: +DB 114,99,52,40,56,120,44,105,110,116,41,0 +DB 114,99,52,40,56,120,44,99,104,97,114,41,0 +DB 114,99,52,40,49,54,120,44,105,110,116,41,0 +DB 82,67,52,32,102,111,114,32,120,56,54,95,54,52,44,32 +DB 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +DB 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +DB 62,0 +ALIGN 64 + +EXTERN __imp_RtlVirtualUnwind + +ALIGN 16 +stream_se_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + lea r10,[$L$prologue] + cmp rbx,r10 + jb NEAR $L$in_prologue + + mov rax,QWORD[152+r8] + + lea r10,[$L$epilogue] + cmp rbx,r10 + jae NEAR $L$in_prologue + + lea rax,[24+rax] + + mov rbx,QWORD[((-8))+rax] + mov r12,QWORD[((-16))+rax] + mov r13,QWORD[((-24))+rax] + mov QWORD[144+r8],rbx + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + +$L$in_prologue: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + jmp NEAR $L$common_seh_exit + + + +ALIGN 16 +key_se_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[152+r8] + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + +$L$common_seh_exit: + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret + + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_RC4 wrt ..imagebase + DD $L$SEH_end_RC4 wrt ..imagebase + DD $L$SEH_info_RC4 wrt ..imagebase + + DD $L$SEH_begin_RC4_set_key wrt ..imagebase + DD $L$SEH_end_RC4_set_key wrt ..imagebase + DD $L$SEH_info_RC4_set_key wrt ..imagebase + +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_RC4: +DB 9,0,0,0 + DD stream_se_handler wrt ..imagebase +$L$SEH_info_RC4_set_key: +DB 9,0,0,0 + DD key_se_handler wrt ..imagebase |