aboutsummaryrefslogtreecommitdiff
path: root/deps/openssl/asm/x64-win32-masm/rc4/rc4-x86_64.asm
diff options
context:
space:
mode:
Diffstat (limited to 'deps/openssl/asm/x64-win32-masm/rc4/rc4-x86_64.asm')
-rw-r--r--deps/openssl/asm/x64-win32-masm/rc4/rc4-x86_64.asm744
1 files changed, 469 insertions, 275 deletions
diff --git a/deps/openssl/asm/x64-win32-masm/rc4/rc4-x86_64.asm b/deps/openssl/asm/x64-win32-masm/rc4/rc4-x86_64.asm
index f508fa6679..aea304fbad 100644
--- a/deps/openssl/asm/x64-win32-masm/rc4/rc4-x86_64.asm
+++ b/deps/openssl/asm/x64-win32-masm/rc4/rc4-x86_64.asm
@@ -1,5 +1,6 @@
OPTION DOTNAME
.text$ SEGMENT ALIGN(64) 'CODE'
+EXTERN OPENSSL_ia32cap_P:NEAR
PUBLIC RC4
@@ -24,316 +25,511 @@ $L$entry::
push r12
push r13
$L$prologue::
+ mov r11,rsi
+ mov r12,rdx
+ mov r13,rcx
+ xor r10,r10
+ xor rcx,rcx
- add rdi,8
- mov r8d,DWORD PTR[((-8))+rdi]
- mov r12d,DWORD PTR[((-4))+rdi]
+ lea rdi,QWORD PTR[8+rdi]
+ mov r10b,BYTE PTR[((-8))+rdi]
+ mov cl,BYTE PTR[((-4))+rdi]
cmp DWORD PTR[256+rdi],-1
je $L$RC4_CHAR
- inc r8b
- mov r9d,DWORD PTR[r8*4+rdi]
- test rsi,-8
- jz $L$loop1
- jmp $L$loop8
-ALIGN 16
-$L$loop8::
- add r12b,r9b
- mov r10,r8
- mov r13d,DWORD PTR[r12*4+rdi]
- ror rax,8
- inc r10b
- mov r11d,DWORD PTR[r10*4+rdi]
- cmp r12,r10
- mov DWORD PTR[r12*4+rdi],r9d
- cmove r11,r9
- mov DWORD PTR[r8*4+rdi],r13d
- add r13b,r9b
- mov al,BYTE PTR[r13*4+rdi]
- add r12b,r11b
- mov r8,r10
- mov r13d,DWORD PTR[r12*4+rdi]
- ror rax,8
- inc r8b
- mov r9d,DWORD PTR[r8*4+rdi]
- cmp r12,r8
- mov DWORD PTR[r12*4+rdi],r11d
- cmove r9,r11
- mov DWORD PTR[r10*4+rdi],r13d
- add r13b,r11b
- mov al,BYTE PTR[r13*4+rdi]
- add r12b,r9b
- mov r10,r8
- mov r13d,DWORD PTR[r12*4+rdi]
- ror rax,8
+ mov r8d,DWORD PTR[OPENSSL_ia32cap_P]
+ xor rbx,rbx
inc r10b
- mov r11d,DWORD PTR[r10*4+rdi]
- cmp r12,r10
- mov DWORD PTR[r12*4+rdi],r9d
- cmove r11,r9
- mov DWORD PTR[r8*4+rdi],r13d
- add r13b,r9b
- mov al,BYTE PTR[r13*4+rdi]
- add r12b,r11b
- mov r8,r10
- mov r13d,DWORD PTR[r12*4+rdi]
- ror rax,8
- inc r8b
- mov r9d,DWORD PTR[r8*4+rdi]
- cmp r12,r8
- mov DWORD PTR[r12*4+rdi],r11d
- cmove r9,r11
- mov DWORD PTR[r10*4+rdi],r13d
- add r13b,r11b
- mov al,BYTE PTR[r13*4+rdi]
- add r12b,r9b
- mov r10,r8
- mov r13d,DWORD PTR[r12*4+rdi]
- ror rax,8
+ sub rbx,r10
+ sub r13,r12
+ mov eax,DWORD PTR[r10*4+rdi]
+ test r11,-16
+ jz $L$loop1
+ bt r8d,30
+ jc $L$intel
+ and rbx,7
+ lea rsi,QWORD PTR[1+r10]
+ jz $L$oop8
+ sub r11,rbx
+$L$oop8_warmup::
+ add cl,al
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],eax
+ mov DWORD PTR[r10*4+rdi],edx
+ add al,dl
inc r10b
- mov r11d,DWORD PTR[r10*4+rdi]
- cmp r12,r10
- mov DWORD PTR[r12*4+rdi],r9d
- cmove r11,r9
- mov DWORD PTR[r8*4+rdi],r13d
- add r13b,r9b
- mov al,BYTE PTR[r13*4+rdi]
- add r12b,r11b
- mov r8,r10
- mov r13d,DWORD PTR[r12*4+rdi]
- ror rax,8
- inc r8b
- mov r9d,DWORD PTR[r8*4+rdi]
- cmp r12,r8
- mov DWORD PTR[r12*4+rdi],r11d
- cmove r9,r11
- mov DWORD PTR[r10*4+rdi],r13d
- add r13b,r11b
- mov al,BYTE PTR[r13*4+rdi]
- add r12b,r9b
- mov r10,r8
- mov r13d,DWORD PTR[r12*4+rdi]
- ror rax,8
+ mov edx,DWORD PTR[rax*4+rdi]
+ mov eax,DWORD PTR[r10*4+rdi]
+ xor dl,BYTE PTR[r12]
+ mov BYTE PTR[r12*1+r13],dl
+ lea r12,QWORD PTR[1+r12]
+ dec rbx
+ jnz $L$oop8_warmup
+
+ lea rsi,QWORD PTR[1+r10]
+ jmp $L$oop8
+ALIGN 16
+$L$oop8::
+ add cl,al
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],eax
+ mov ebx,DWORD PTR[rsi*4+rdi]
+ ror r8,8
+ mov DWORD PTR[r10*4+rdi],edx
+ add dl,al
+ mov r8b,BYTE PTR[rdx*4+rdi]
+ add cl,bl
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],ebx
+ mov eax,DWORD PTR[4+rsi*4+rdi]
+ ror r8,8
+ mov DWORD PTR[4+r10*4+rdi],edx
+ add dl,bl
+ mov r8b,BYTE PTR[rdx*4+rdi]
+ add cl,al
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],eax
+ mov ebx,DWORD PTR[8+rsi*4+rdi]
+ ror r8,8
+ mov DWORD PTR[8+r10*4+rdi],edx
+ add dl,al
+ mov r8b,BYTE PTR[rdx*4+rdi]
+ add cl,bl
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],ebx
+ mov eax,DWORD PTR[12+rsi*4+rdi]
+ ror r8,8
+ mov DWORD PTR[12+r10*4+rdi],edx
+ add dl,bl
+ mov r8b,BYTE PTR[rdx*4+rdi]
+ add cl,al
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],eax
+ mov ebx,DWORD PTR[16+rsi*4+rdi]
+ ror r8,8
+ mov DWORD PTR[16+r10*4+rdi],edx
+ add dl,al
+ mov r8b,BYTE PTR[rdx*4+rdi]
+ add cl,bl
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],ebx
+ mov eax,DWORD PTR[20+rsi*4+rdi]
+ ror r8,8
+ mov DWORD PTR[20+r10*4+rdi],edx
+ add dl,bl
+ mov r8b,BYTE PTR[rdx*4+rdi]
+ add cl,al
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],eax
+ mov ebx,DWORD PTR[24+rsi*4+rdi]
+ ror r8,8
+ mov DWORD PTR[24+r10*4+rdi],edx
+ add dl,al
+ mov r8b,BYTE PTR[rdx*4+rdi]
+ add sil,8
+ add cl,bl
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],ebx
+ mov eax,DWORD PTR[((-4))+rsi*4+rdi]
+ ror r8,8
+ mov DWORD PTR[28+r10*4+rdi],edx
+ add dl,bl
+ mov r8b,BYTE PTR[rdx*4+rdi]
+ add r10b,8
+ ror r8,8
+ sub r11,8
+
+ xor r8,QWORD PTR[r12]
+ mov QWORD PTR[r12*1+r13],r8
+ lea r12,QWORD PTR[8+r12]
+
+ test r11,-8
+ jnz $L$oop8
+ cmp r11,0
+ jne $L$loop1
+ jmp $L$exit
+
+ALIGN 16
+$L$intel::
+ test r11,-32
+ jz $L$loop1
+ and rbx,15
+ jz $L$oop16_is_hot
+ sub r11,rbx
+$L$oop16_warmup::
+ add cl,al
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],eax
+ mov DWORD PTR[r10*4+rdi],edx
+ add al,dl
inc r10b
- mov r11d,DWORD PTR[r10*4+rdi]
- cmp r12,r10
- mov DWORD PTR[r12*4+rdi],r9d
- cmove r11,r9
- mov DWORD PTR[r8*4+rdi],r13d
- add r13b,r9b
- mov al,BYTE PTR[r13*4+rdi]
- add r12b,r11b
- mov r8,r10
- mov r13d,DWORD PTR[r12*4+rdi]
- ror rax,8
- inc r8b
- mov r9d,DWORD PTR[r8*4+rdi]
- cmp r12,r8
- mov DWORD PTR[r12*4+rdi],r11d
- cmove r9,r11
- mov DWORD PTR[r10*4+rdi],r13d
- add r13b,r11b
- mov al,BYTE PTR[r13*4+rdi]
- ror rax,8
- sub rsi,8
-
- xor rax,QWORD PTR[rdx]
- add rdx,8
- mov QWORD PTR[rcx],rax
- add rcx,8
-
- test rsi,-8
- jnz $L$loop8
- cmp rsi,0
+ mov edx,DWORD PTR[rax*4+rdi]
+ mov eax,DWORD PTR[r10*4+rdi]
+ xor dl,BYTE PTR[r12]
+ mov BYTE PTR[r12*1+r13],dl
+ lea r12,QWORD PTR[1+r12]
+ dec rbx
+ jnz $L$oop16_warmup
+
+ mov rbx,rcx
+ xor rcx,rcx
+ mov cl,bl
+
+$L$oop16_is_hot::
+ lea rsi,QWORD PTR[r10*4+rdi]
+ add cl,al
+ mov edx,DWORD PTR[rcx*4+rdi]
+ pxor xmm0,xmm0
+ mov DWORD PTR[rcx*4+rdi],eax
+ add al,dl
+ mov ebx,DWORD PTR[4+rsi]
+ movzx eax,al
+ mov DWORD PTR[rsi],edx
+ add cl,bl
+ pinsrw xmm0,WORD PTR[rax*4+rdi],0
+ jmp $L$oop16_enter
+ALIGN 16
+$L$oop16::
+ add cl,al
+ mov edx,DWORD PTR[rcx*4+rdi]
+ pxor xmm2,xmm0
+ psllq xmm1,8
+ pxor xmm0,xmm0
+ mov DWORD PTR[rcx*4+rdi],eax
+ add al,dl
+ mov ebx,DWORD PTR[4+rsi]
+ movzx eax,al
+ mov DWORD PTR[rsi],edx
+ pxor xmm2,xmm1
+ add cl,bl
+ pinsrw xmm0,WORD PTR[rax*4+rdi],0
+ movdqu XMMWORD PTR[r12*1+r13],xmm2
+ lea r12,QWORD PTR[16+r12]
+$L$oop16_enter::
+ mov edx,DWORD PTR[rcx*4+rdi]
+ pxor xmm1,xmm1
+ mov DWORD PTR[rcx*4+rdi],ebx
+ add bl,dl
+ mov eax,DWORD PTR[8+rsi]
+ movzx ebx,bl
+ mov DWORD PTR[4+rsi],edx
+ add cl,al
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],0
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],eax
+ add al,dl
+ mov ebx,DWORD PTR[12+rsi]
+ movzx eax,al
+ mov DWORD PTR[8+rsi],edx
+ add cl,bl
+ pinsrw xmm0,WORD PTR[rax*4+rdi],1
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],ebx
+ add bl,dl
+ mov eax,DWORD PTR[16+rsi]
+ movzx ebx,bl
+ mov DWORD PTR[12+rsi],edx
+ add cl,al
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],1
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],eax
+ add al,dl
+ mov ebx,DWORD PTR[20+rsi]
+ movzx eax,al
+ mov DWORD PTR[16+rsi],edx
+ add cl,bl
+ pinsrw xmm0,WORD PTR[rax*4+rdi],2
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],ebx
+ add bl,dl
+ mov eax,DWORD PTR[24+rsi]
+ movzx ebx,bl
+ mov DWORD PTR[20+rsi],edx
+ add cl,al
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],2
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],eax
+ add al,dl
+ mov ebx,DWORD PTR[28+rsi]
+ movzx eax,al
+ mov DWORD PTR[24+rsi],edx
+ add cl,bl
+ pinsrw xmm0,WORD PTR[rax*4+rdi],3
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],ebx
+ add bl,dl
+ mov eax,DWORD PTR[32+rsi]
+ movzx ebx,bl
+ mov DWORD PTR[28+rsi],edx
+ add cl,al
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],3
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],eax
+ add al,dl
+ mov ebx,DWORD PTR[36+rsi]
+ movzx eax,al
+ mov DWORD PTR[32+rsi],edx
+ add cl,bl
+ pinsrw xmm0,WORD PTR[rax*4+rdi],4
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],ebx
+ add bl,dl
+ mov eax,DWORD PTR[40+rsi]
+ movzx ebx,bl
+ mov DWORD PTR[36+rsi],edx
+ add cl,al
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],4
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],eax
+ add al,dl
+ mov ebx,DWORD PTR[44+rsi]
+ movzx eax,al
+ mov DWORD PTR[40+rsi],edx
+ add cl,bl
+ pinsrw xmm0,WORD PTR[rax*4+rdi],5
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],ebx
+ add bl,dl
+ mov eax,DWORD PTR[48+rsi]
+ movzx ebx,bl
+ mov DWORD PTR[44+rsi],edx
+ add cl,al
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],5
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],eax
+ add al,dl
+ mov ebx,DWORD PTR[52+rsi]
+ movzx eax,al
+ mov DWORD PTR[48+rsi],edx
+ add cl,bl
+ pinsrw xmm0,WORD PTR[rax*4+rdi],6
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],ebx
+ add bl,dl
+ mov eax,DWORD PTR[56+rsi]
+ movzx ebx,bl
+ mov DWORD PTR[52+rsi],edx
+ add cl,al
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],6
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],eax
+ add al,dl
+ mov ebx,DWORD PTR[60+rsi]
+ movzx eax,al
+ mov DWORD PTR[56+rsi],edx
+ add cl,bl
+ pinsrw xmm0,WORD PTR[rax*4+rdi],7
+ add r10b,16
+ movdqu xmm2,XMMWORD PTR[r12]
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],ebx
+ add bl,dl
+ movzx ebx,bl
+ mov DWORD PTR[60+rsi],edx
+ lea rsi,QWORD PTR[r10*4+rdi]
+ pinsrw xmm1,WORD PTR[rbx*4+rdi],7
+ mov eax,DWORD PTR[rsi]
+ mov rbx,rcx
+ xor rcx,rcx
+ sub r11,16
+ mov cl,bl
+ test r11,-16
+ jnz $L$oop16
+
+ psllq xmm1,8
+ pxor xmm2,xmm0
+ pxor xmm2,xmm1
+ movdqu XMMWORD PTR[r12*1+r13],xmm2
+ lea r12,QWORD PTR[16+r12]
+
+ cmp r11,0
jne $L$loop1
jmp $L$exit
ALIGN 16
$L$loop1::
- add r12b,r9b
- mov r13d,DWORD PTR[r12*4+rdi]
- mov DWORD PTR[r12*4+rdi],r9d
- mov DWORD PTR[r8*4+rdi],r13d
- add r9b,r13b
- inc r8b
- mov r13d,DWORD PTR[r9*4+rdi]
- mov r9d,DWORD PTR[r8*4+rdi]
- xor r13b,BYTE PTR[rdx]
- inc rdx
- mov BYTE PTR[rcx],r13b
- inc rcx
- dec rsi
+ add cl,al
+ mov edx,DWORD PTR[rcx*4+rdi]
+ mov DWORD PTR[rcx*4+rdi],eax
+ mov DWORD PTR[r10*4+rdi],edx
+ add al,dl
+ inc r10b
+ mov edx,DWORD PTR[rax*4+rdi]
+ mov eax,DWORD PTR[r10*4+rdi]
+ xor dl,BYTE PTR[r12]
+ mov BYTE PTR[r12*1+r13],dl
+ lea r12,QWORD PTR[1+r12]
+ dec r11
jnz $L$loop1
jmp $L$exit
ALIGN 16
$L$RC4_CHAR::
- add r8b,1
- movzx r9d,BYTE PTR[r8*1+rdi]
- test rsi,-8
+ add r10b,1
+ movzx eax,BYTE PTR[r10*1+rdi]
+ test r11,-8
jz $L$cloop1
- cmp DWORD PTR[260+rdi],0
- jnz $L$cloop1
jmp $L$cloop8
ALIGN 16
$L$cloop8::
- mov eax,DWORD PTR[rdx]
- mov ebx,DWORD PTR[4+rdx]
- add r12b,r9b
- lea r10,QWORD PTR[1+r8]
- movzx r13d,BYTE PTR[r12*1+rdi]
- movzx r10d,r10b
- movzx r11d,BYTE PTR[r10*1+rdi]
- mov BYTE PTR[r12*1+rdi],r9b
- cmp r12,r10
- mov BYTE PTR[r8*1+rdi],r13b
+ mov r8d,DWORD PTR[r12]
+ mov r9d,DWORD PTR[4+r12]
+ add cl,al
+ lea rsi,QWORD PTR[1+r10]
+ movzx edx,BYTE PTR[rcx*1+rdi]
+ movzx esi,sil
+ movzx ebx,BYTE PTR[rsi*1+rdi]
+ mov BYTE PTR[rcx*1+rdi],al
+ cmp rcx,rsi
+ mov BYTE PTR[r10*1+rdi],dl
jne $L$cmov0
- mov r11,r9
+ mov rbx,rax
$L$cmov0::
- add r13b,r9b
- xor al,BYTE PTR[r13*1+rdi]
- ror eax,8
- add r12b,r11b
- lea r8,QWORD PTR[1+r10]
- movzx r13d,BYTE PTR[r12*1+rdi]
- movzx r8d,r8b
- movzx r9d,BYTE PTR[r8*1+rdi]
- mov BYTE PTR[r12*1+rdi],r11b
- cmp r12,r8
- mov BYTE PTR[r10*1+rdi],r13b
+ add dl,al
+ xor r8b,BYTE PTR[rdx*1+rdi]
+ ror r8d,8
+ add cl,bl
+ lea r10,QWORD PTR[1+rsi]
+ movzx edx,BYTE PTR[rcx*1+rdi]
+ movzx r10d,r10b
+ movzx eax,BYTE PTR[r10*1+rdi]
+ mov BYTE PTR[rcx*1+rdi],bl
+ cmp rcx,r10
+ mov BYTE PTR[rsi*1+rdi],dl
jne $L$cmov1
- mov r9,r11
+ mov rax,rbx
$L$cmov1::
- add r13b,r11b
- xor al,BYTE PTR[r13*1+rdi]
- ror eax,8
- add r12b,r9b
- lea r10,QWORD PTR[1+r8]
- movzx r13d,BYTE PTR[r12*1+rdi]
- movzx r10d,r10b
- movzx r11d,BYTE PTR[r10*1+rdi]
- mov BYTE PTR[r12*1+rdi],r9b
- cmp r12,r10
- mov BYTE PTR[r8*1+rdi],r13b
+ add dl,bl
+ xor r8b,BYTE PTR[rdx*1+rdi]
+ ror r8d,8
+ add cl,al
+ lea rsi,QWORD PTR[1+r10]
+ movzx edx,BYTE PTR[rcx*1+rdi]
+ movzx esi,sil
+ movzx ebx,BYTE PTR[rsi*1+rdi]
+ mov BYTE PTR[rcx*1+rdi],al
+ cmp rcx,rsi
+ mov BYTE PTR[r10*1+rdi],dl
jne $L$cmov2
- mov r11,r9
+ mov rbx,rax
$L$cmov2::
- add r13b,r9b
- xor al,BYTE PTR[r13*1+rdi]
- ror eax,8
- add r12b,r11b
- lea r8,QWORD PTR[1+r10]
- movzx r13d,BYTE PTR[r12*1+rdi]
- movzx r8d,r8b
- movzx r9d,BYTE PTR[r8*1+rdi]
- mov BYTE PTR[r12*1+rdi],r11b
- cmp r12,r8
- mov BYTE PTR[r10*1+rdi],r13b
+ add dl,al
+ xor r8b,BYTE PTR[rdx*1+rdi]
+ ror r8d,8
+ add cl,bl
+ lea r10,QWORD PTR[1+rsi]
+ movzx edx,BYTE PTR[rcx*1+rdi]
+ movzx r10d,r10b
+ movzx eax,BYTE PTR[r10*1+rdi]
+ mov BYTE PTR[rcx*1+rdi],bl
+ cmp rcx,r10
+ mov BYTE PTR[rsi*1+rdi],dl
jne $L$cmov3
- mov r9,r11
+ mov rax,rbx
$L$cmov3::
- add r13b,r11b
- xor al,BYTE PTR[r13*1+rdi]
- ror eax,8
- add r12b,r9b
- lea r10,QWORD PTR[1+r8]
- movzx r13d,BYTE PTR[r12*1+rdi]
- movzx r10d,r10b
- movzx r11d,BYTE PTR[r10*1+rdi]
- mov BYTE PTR[r12*1+rdi],r9b
- cmp r12,r10
- mov BYTE PTR[r8*1+rdi],r13b
+ add dl,bl
+ xor r8b,BYTE PTR[rdx*1+rdi]
+ ror r8d,8
+ add cl,al
+ lea rsi,QWORD PTR[1+r10]
+ movzx edx,BYTE PTR[rcx*1+rdi]
+ movzx esi,sil
+ movzx ebx,BYTE PTR[rsi*1+rdi]
+ mov BYTE PTR[rcx*1+rdi],al
+ cmp rcx,rsi
+ mov BYTE PTR[r10*1+rdi],dl
jne $L$cmov4
- mov r11,r9
+ mov rbx,rax
$L$cmov4::
- add r13b,r9b
- xor bl,BYTE PTR[r13*1+rdi]
- ror ebx,8
- add r12b,r11b
- lea r8,QWORD PTR[1+r10]
- movzx r13d,BYTE PTR[r12*1+rdi]
- movzx r8d,r8b
- movzx r9d,BYTE PTR[r8*1+rdi]
- mov BYTE PTR[r12*1+rdi],r11b
- cmp r12,r8
- mov BYTE PTR[r10*1+rdi],r13b
+ add dl,al
+ xor r9b,BYTE PTR[rdx*1+rdi]
+ ror r9d,8
+ add cl,bl
+ lea r10,QWORD PTR[1+rsi]
+ movzx edx,BYTE PTR[rcx*1+rdi]
+ movzx r10d,r10b
+ movzx eax,BYTE PTR[r10*1+rdi]
+ mov BYTE PTR[rcx*1+rdi],bl
+ cmp rcx,r10
+ mov BYTE PTR[rsi*1+rdi],dl
jne $L$cmov5
- mov r9,r11
+ mov rax,rbx
$L$cmov5::
- add r13b,r11b
- xor bl,BYTE PTR[r13*1+rdi]
- ror ebx,8
- add r12b,r9b
- lea r10,QWORD PTR[1+r8]
- movzx r13d,BYTE PTR[r12*1+rdi]
- movzx r10d,r10b
- movzx r11d,BYTE PTR[r10*1+rdi]
- mov BYTE PTR[r12*1+rdi],r9b
- cmp r12,r10
- mov BYTE PTR[r8*1+rdi],r13b
+ add dl,bl
+ xor r9b,BYTE PTR[rdx*1+rdi]
+ ror r9d,8
+ add cl,al
+ lea rsi,QWORD PTR[1+r10]
+ movzx edx,BYTE PTR[rcx*1+rdi]
+ movzx esi,sil
+ movzx ebx,BYTE PTR[rsi*1+rdi]
+ mov BYTE PTR[rcx*1+rdi],al
+ cmp rcx,rsi
+ mov BYTE PTR[r10*1+rdi],dl
jne $L$cmov6
- mov r11,r9
+ mov rbx,rax
$L$cmov6::
- add r13b,r9b
- xor bl,BYTE PTR[r13*1+rdi]
- ror ebx,8
- add r12b,r11b
- lea r8,QWORD PTR[1+r10]
- movzx r13d,BYTE PTR[r12*1+rdi]
- movzx r8d,r8b
- movzx r9d,BYTE PTR[r8*1+rdi]
- mov BYTE PTR[r12*1+rdi],r11b
- cmp r12,r8
- mov BYTE PTR[r10*1+rdi],r13b
+ add dl,al
+ xor r9b,BYTE PTR[rdx*1+rdi]
+ ror r9d,8
+ add cl,bl
+ lea r10,QWORD PTR[1+rsi]
+ movzx edx,BYTE PTR[rcx*1+rdi]
+ movzx r10d,r10b
+ movzx eax,BYTE PTR[r10*1+rdi]
+ mov BYTE PTR[rcx*1+rdi],bl
+ cmp rcx,r10
+ mov BYTE PTR[rsi*1+rdi],dl
jne $L$cmov7
- mov r9,r11
+ mov rax,rbx
$L$cmov7::
- add r13b,r11b
- xor bl,BYTE PTR[r13*1+rdi]
- ror ebx,8
- lea rsi,QWORD PTR[((-8))+rsi]
- mov DWORD PTR[rcx],eax
- lea rdx,QWORD PTR[8+rdx]
- mov DWORD PTR[4+rcx],ebx
- lea rcx,QWORD PTR[8+rcx]
-
- test rsi,-8
+ add dl,bl
+ xor r9b,BYTE PTR[rdx*1+rdi]
+ ror r9d,8
+ lea r11,QWORD PTR[((-8))+r11]
+ mov DWORD PTR[r13],r8d
+ lea r12,QWORD PTR[8+r12]
+ mov DWORD PTR[4+r13],r9d
+ lea r13,QWORD PTR[8+r13]
+
+ test r11,-8
jnz $L$cloop8
- cmp rsi,0
+ cmp r11,0
jne $L$cloop1
jmp $L$exit
ALIGN 16
$L$cloop1::
- add r12b,r9b
- movzx r13d,BYTE PTR[r12*1+rdi]
- mov BYTE PTR[r12*1+rdi],r9b
- mov BYTE PTR[r8*1+rdi],r13b
- add r13b,r9b
- add r8b,1
- movzx r13d,r13b
- movzx r8d,r8b
- movzx r13d,BYTE PTR[r13*1+rdi]
- movzx r9d,BYTE PTR[r8*1+rdi]
- xor r13b,BYTE PTR[rdx]
- lea rdx,QWORD PTR[1+rdx]
- mov BYTE PTR[rcx],r13b
- lea rcx,QWORD PTR[1+rcx]
- sub rsi,1
+ add cl,al
+ movzx ecx,cl
+ movzx edx,BYTE PTR[rcx*1+rdi]
+ mov BYTE PTR[rcx*1+rdi],al
+ mov BYTE PTR[r10*1+rdi],dl
+ add dl,al
+ add r10b,1
+ movzx edx,dl
+ movzx r10d,r10b
+ movzx edx,BYTE PTR[rdx*1+rdi]
+ movzx eax,BYTE PTR[r10*1+rdi]
+ xor dl,BYTE PTR[r12]
+ lea r12,QWORD PTR[1+r12]
+ mov BYTE PTR[r13],dl
+ lea r13,QWORD PTR[1+r13]
+ sub r11,1
jnz $L$cloop1
jmp $L$exit
ALIGN 16
$L$exit::
- sub r8b,1
- mov DWORD PTR[((-8))+rdi],r8d
- mov DWORD PTR[((-4))+rdi],r12d
+ sub r10b,1
+ mov DWORD PTR[((-8))+rdi],r10d
+ mov DWORD PTR[((-4))+rdi],ecx
mov r13,QWORD PTR[rsp]
mov r12,QWORD PTR[8+rsp]
@@ -345,15 +541,14 @@ $L$epilogue::
DB 0F3h,0C3h ;repret
$L$SEH_end_RC4::
RC4 ENDP
-EXTERN OPENSSL_ia32cap_P:NEAR
-PUBLIC RC4_set_key
+PUBLIC private_RC4_set_key
ALIGN 16
-RC4_set_key PROC PUBLIC
+private_RC4_set_key PROC PUBLIC
mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
mov QWORD PTR[16+rsp],rsi
mov rax,rsp
-$L$SEH_begin_RC4_set_key::
+$L$SEH_begin_private_RC4_set_key::
mov rdi,rcx
mov rsi,rdx
mov rdx,r8
@@ -370,11 +565,8 @@ $L$SEH_begin_RC4_set_key::
mov r8d,DWORD PTR[OPENSSL_ia32cap_P]
bt r8d,20
- jnc $L$w1stloop
- bt r8d,30
- setc r9b
- mov DWORD PTR[260+rdi],r9d
- jmp $L$c1stloop
+ jc $L$c1stloop
+ jmp $L$w1stloop
ALIGN 16
$L$w1stloop::
@@ -430,8 +622,8 @@ $L$exit_key::
mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
mov rsi,QWORD PTR[16+rsp]
DB 0F3h,0C3h ;repret
-$L$SEH_end_RC4_set_key::
-RC4_set_key ENDP
+$L$SEH_end_private_RC4_set_key::
+private_RC4_set_key ENDP
PUBLIC RC4_options
@@ -440,18 +632,20 @@ RC4_options PROC PUBLIC
lea rax,QWORD PTR[$L$opts]
mov edx,DWORD PTR[OPENSSL_ia32cap_P]
bt edx,20
- jnc $L$done
- add rax,12
+ jc $L$8xchar
bt edx,30
jnc $L$done
- add rax,13
+ add rax,25
+ DB 0F3h,0C3h ;repret
+$L$8xchar::
+ add rax,12
$L$done::
DB 0F3h,0C3h ;repret
ALIGN 64
$L$opts::
DB 114,99,52,40,56,120,44,105,110,116,41,0
DB 114,99,52,40,56,120,44,99,104,97,114,41,0
-DB 114,99,52,40,49,120,44,99,104,97,114,41,0
+DB 114,99,52,40,49,54,120,44,105,110,116,41,0
DB 82,67,52,32,102,111,114,32,120,56,54,95,54,52,44,32
DB 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
DB 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
@@ -568,9 +762,9 @@ ALIGN 4
DD imagerel $L$SEH_end_RC4
DD imagerel $L$SEH_info_RC4
- DD imagerel $L$SEH_begin_RC4_set_key
- DD imagerel $L$SEH_end_RC4_set_key
- DD imagerel $L$SEH_info_RC4_set_key
+ DD imagerel $L$SEH_begin_private_RC4_set_key
+ DD imagerel $L$SEH_end_private_RC4_set_key
+ DD imagerel $L$SEH_info_private_RC4_set_key
.pdata ENDS
.xdata SEGMENT READONLY ALIGN(8)
@@ -578,7 +772,7 @@ ALIGN 8
$L$SEH_info_RC4::
DB 9,0,0,0
DD imagerel stream_se_handler
-$L$SEH_info_RC4_set_key::
+$L$SEH_info_private_RC4_set_key::
DB 9,0,0,0
DD imagerel key_se_handler