aboutsummaryrefslogtreecommitdiff
path: root/deps/openssl/asm/x64-macosx-gas/rc4/rc4-x86_64.s
diff options
context:
space:
mode:
Diffstat (limited to 'deps/openssl/asm/x64-macosx-gas/rc4/rc4-x86_64.s')
-rw-r--r--deps/openssl/asm/x64-macosx-gas/rc4/rc4-x86_64.s731
1 files changed, 462 insertions, 269 deletions
diff --git a/deps/openssl/asm/x64-macosx-gas/rc4/rc4-x86_64.s b/deps/openssl/asm/x64-macosx-gas/rc4/rc4-x86_64.s
index 41183cebec..8c4f29ecbb 100644
--- a/deps/openssl/asm/x64-macosx-gas/rc4/rc4-x86_64.s
+++ b/deps/openssl/asm/x64-macosx-gas/rc4/rc4-x86_64.s
@@ -1,6 +1,7 @@
.text
+
.globl _RC4
.p2align 4
@@ -12,316 +13,511 @@ L$entry:
pushq %r12
pushq %r13
L$prologue:
+ movq %rsi,%r11
+ movq %rdx,%r12
+ movq %rcx,%r13
+ xorq %r10,%r10
+ xorq %rcx,%rcx
- addq $8,%rdi
- movl -8(%rdi),%r8d
- movl -4(%rdi),%r12d
+ leaq 8(%rdi),%rdi
+ movb -8(%rdi),%r10b
+ movb -4(%rdi),%cl
cmpl $-1,256(%rdi)
je L$RC4_CHAR
- incb %r8b
- movl (%rdi,%r8,4),%r9d
- testq $-8,%rsi
- jz L$loop1
- jmp L$loop8
-.p2align 4
-L$loop8:
- addb %r9b,%r12b
- movq %r8,%r10
- movl (%rdi,%r12,4),%r13d
- rorq $8,%rax
- incb %r10b
- movl (%rdi,%r10,4),%r11d
- cmpq %r10,%r12
- movl %r9d,(%rdi,%r12,4)
- cmoveq %r9,%r11
- movl %r13d,(%rdi,%r8,4)
- addb %r9b,%r13b
- movb (%rdi,%r13,4),%al
- addb %r11b,%r12b
- movq %r10,%r8
- movl (%rdi,%r12,4),%r13d
- rorq $8,%rax
- incb %r8b
- movl (%rdi,%r8,4),%r9d
- cmpq %r8,%r12
- movl %r11d,(%rdi,%r12,4)
- cmoveq %r11,%r9
- movl %r13d,(%rdi,%r10,4)
- addb %r11b,%r13b
- movb (%rdi,%r13,4),%al
- addb %r9b,%r12b
- movq %r8,%r10
- movl (%rdi,%r12,4),%r13d
- rorq $8,%rax
+ movl _OPENSSL_ia32cap_P(%rip),%r8d
+ xorq %rbx,%rbx
incb %r10b
- movl (%rdi,%r10,4),%r11d
- cmpq %r10,%r12
- movl %r9d,(%rdi,%r12,4)
- cmoveq %r9,%r11
- movl %r13d,(%rdi,%r8,4)
- addb %r9b,%r13b
- movb (%rdi,%r13,4),%al
- addb %r11b,%r12b
- movq %r10,%r8
- movl (%rdi,%r12,4),%r13d
- rorq $8,%rax
- incb %r8b
- movl (%rdi,%r8,4),%r9d
- cmpq %r8,%r12
- movl %r11d,(%rdi,%r12,4)
- cmoveq %r11,%r9
- movl %r13d,(%rdi,%r10,4)
- addb %r11b,%r13b
- movb (%rdi,%r13,4),%al
- addb %r9b,%r12b
- movq %r8,%r10
- movl (%rdi,%r12,4),%r13d
- rorq $8,%rax
+ subq %r10,%rbx
+ subq %r12,%r13
+ movl (%rdi,%r10,4),%eax
+ testq $-16,%r11
+ jz L$loop1
+ btl $30,%r8d
+ jc L$intel
+ andq $7,%rbx
+ leaq 1(%r10),%rsi
+ jz L$oop8
+ subq %rbx,%r11
+L$oop8_warmup:
+ addb %al,%cl
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ movl %edx,(%rdi,%r10,4)
+ addb %dl,%al
incb %r10b
- movl (%rdi,%r10,4),%r11d
- cmpq %r10,%r12
- movl %r9d,(%rdi,%r12,4)
- cmoveq %r9,%r11
- movl %r13d,(%rdi,%r8,4)
- addb %r9b,%r13b
- movb (%rdi,%r13,4),%al
- addb %r11b,%r12b
- movq %r10,%r8
- movl (%rdi,%r12,4),%r13d
- rorq $8,%rax
- incb %r8b
- movl (%rdi,%r8,4),%r9d
- cmpq %r8,%r12
- movl %r11d,(%rdi,%r12,4)
- cmoveq %r11,%r9
- movl %r13d,(%rdi,%r10,4)
- addb %r11b,%r13b
- movb (%rdi,%r13,4),%al
- addb %r9b,%r12b
- movq %r8,%r10
- movl (%rdi,%r12,4),%r13d
- rorq $8,%rax
+ movl (%rdi,%rax,4),%edx
+ movl (%rdi,%r10,4),%eax
+ xorb (%r12),%dl
+ movb %dl,(%r13,%r12,1)
+ leaq 1(%r12),%r12
+ decq %rbx
+ jnz L$oop8_warmup
+
+ leaq 1(%r10),%rsi
+ jmp L$oop8
+.p2align 4
+L$oop8:
+ addb %al,%cl
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ movl 0(%rdi,%rsi,4),%ebx
+ rorq $8,%r8
+ movl %edx,0(%rdi,%r10,4)
+ addb %al,%dl
+ movb (%rdi,%rdx,4),%r8b
+ addb %bl,%cl
+ movl (%rdi,%rcx,4),%edx
+ movl %ebx,(%rdi,%rcx,4)
+ movl 4(%rdi,%rsi,4),%eax
+ rorq $8,%r8
+ movl %edx,4(%rdi,%r10,4)
+ addb %bl,%dl
+ movb (%rdi,%rdx,4),%r8b
+ addb %al,%cl
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ movl 8(%rdi,%rsi,4),%ebx
+ rorq $8,%r8
+ movl %edx,8(%rdi,%r10,4)
+ addb %al,%dl
+ movb (%rdi,%rdx,4),%r8b
+ addb %bl,%cl
+ movl (%rdi,%rcx,4),%edx
+ movl %ebx,(%rdi,%rcx,4)
+ movl 12(%rdi,%rsi,4),%eax
+ rorq $8,%r8
+ movl %edx,12(%rdi,%r10,4)
+ addb %bl,%dl
+ movb (%rdi,%rdx,4),%r8b
+ addb %al,%cl
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ movl 16(%rdi,%rsi,4),%ebx
+ rorq $8,%r8
+ movl %edx,16(%rdi,%r10,4)
+ addb %al,%dl
+ movb (%rdi,%rdx,4),%r8b
+ addb %bl,%cl
+ movl (%rdi,%rcx,4),%edx
+ movl %ebx,(%rdi,%rcx,4)
+ movl 20(%rdi,%rsi,4),%eax
+ rorq $8,%r8
+ movl %edx,20(%rdi,%r10,4)
+ addb %bl,%dl
+ movb (%rdi,%rdx,4),%r8b
+ addb %al,%cl
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ movl 24(%rdi,%rsi,4),%ebx
+ rorq $8,%r8
+ movl %edx,24(%rdi,%r10,4)
+ addb %al,%dl
+ movb (%rdi,%rdx,4),%r8b
+ addb $8,%sil
+ addb %bl,%cl
+ movl (%rdi,%rcx,4),%edx
+ movl %ebx,(%rdi,%rcx,4)
+ movl -4(%rdi,%rsi,4),%eax
+ rorq $8,%r8
+ movl %edx,28(%rdi,%r10,4)
+ addb %bl,%dl
+ movb (%rdi,%rdx,4),%r8b
+ addb $8,%r10b
+ rorq $8,%r8
+ subq $8,%r11
+
+ xorq (%r12),%r8
+ movq %r8,(%r13,%r12,1)
+ leaq 8(%r12),%r12
+
+ testq $-8,%r11
+ jnz L$oop8
+ cmpq $0,%r11
+ jne L$loop1
+ jmp L$exit
+
+.p2align 4
+L$intel:
+ testq $-32,%r11
+ jz L$loop1
+ andq $15,%rbx
+ jz L$oop16_is_hot
+ subq %rbx,%r11
+L$oop16_warmup:
+ addb %al,%cl
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ movl %edx,(%rdi,%r10,4)
+ addb %dl,%al
incb %r10b
- movl (%rdi,%r10,4),%r11d
- cmpq %r10,%r12
- movl %r9d,(%rdi,%r12,4)
- cmoveq %r9,%r11
- movl %r13d,(%rdi,%r8,4)
- addb %r9b,%r13b
- movb (%rdi,%r13,4),%al
- addb %r11b,%r12b
- movq %r10,%r8
- movl (%rdi,%r12,4),%r13d
- rorq $8,%rax
- incb %r8b
- movl (%rdi,%r8,4),%r9d
- cmpq %r8,%r12
- movl %r11d,(%rdi,%r12,4)
- cmoveq %r11,%r9
- movl %r13d,(%rdi,%r10,4)
- addb %r11b,%r13b
- movb (%rdi,%r13,4),%al
- rorq $8,%rax
- subq $8,%rsi
-
- xorq (%rdx),%rax
- addq $8,%rdx
- movq %rax,(%rcx)
- addq $8,%rcx
-
- testq $-8,%rsi
- jnz L$loop8
- cmpq $0,%rsi
+ movl (%rdi,%rax,4),%edx
+ movl (%rdi,%r10,4),%eax
+ xorb (%r12),%dl
+ movb %dl,(%r13,%r12,1)
+ leaq 1(%r12),%r12
+ decq %rbx
+ jnz L$oop16_warmup
+
+ movq %rcx,%rbx
+ xorq %rcx,%rcx
+ movb %bl,%cl
+
+L$oop16_is_hot:
+ leaq (%rdi,%r10,4),%rsi
+ addb %al,%cl
+ movl (%rdi,%rcx,4),%edx
+ pxor %xmm0,%xmm0
+ movl %eax,(%rdi,%rcx,4)
+ addb %dl,%al
+ movl 4(%rsi),%ebx
+ movzbl %al,%eax
+ movl %edx,0(%rsi)
+ addb %bl,%cl
+ pinsrw $0,(%rdi,%rax,4),%xmm0
+ jmp L$oop16_enter
+.p2align 4
+L$oop16:
+ addb %al,%cl
+ movl (%rdi,%rcx,4),%edx
+ pxor %xmm0,%xmm2
+ psllq $8,%xmm1
+ pxor %xmm0,%xmm0
+ movl %eax,(%rdi,%rcx,4)
+ addb %dl,%al
+ movl 4(%rsi),%ebx
+ movzbl %al,%eax
+ movl %edx,0(%rsi)
+ pxor %xmm1,%xmm2
+ addb %bl,%cl
+ pinsrw $0,(%rdi,%rax,4),%xmm0
+ movdqu %xmm2,(%r13,%r12,1)
+ leaq 16(%r12),%r12
+L$oop16_enter:
+ movl (%rdi,%rcx,4),%edx
+ pxor %xmm1,%xmm1
+ movl %ebx,(%rdi,%rcx,4)
+ addb %dl,%bl
+ movl 8(%rsi),%eax
+ movzbl %bl,%ebx
+ movl %edx,4(%rsi)
+ addb %al,%cl
+ pinsrw $0,(%rdi,%rbx,4),%xmm1
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ addb %dl,%al
+ movl 12(%rsi),%ebx
+ movzbl %al,%eax
+ movl %edx,8(%rsi)
+ addb %bl,%cl
+ pinsrw $1,(%rdi,%rax,4),%xmm0
+ movl (%rdi,%rcx,4),%edx
+ movl %ebx,(%rdi,%rcx,4)
+ addb %dl,%bl
+ movl 16(%rsi),%eax
+ movzbl %bl,%ebx
+ movl %edx,12(%rsi)
+ addb %al,%cl
+ pinsrw $1,(%rdi,%rbx,4),%xmm1
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ addb %dl,%al
+ movl 20(%rsi),%ebx
+ movzbl %al,%eax
+ movl %edx,16(%rsi)
+ addb %bl,%cl
+ pinsrw $2,(%rdi,%rax,4),%xmm0
+ movl (%rdi,%rcx,4),%edx
+ movl %ebx,(%rdi,%rcx,4)
+ addb %dl,%bl
+ movl 24(%rsi),%eax
+ movzbl %bl,%ebx
+ movl %edx,20(%rsi)
+ addb %al,%cl
+ pinsrw $2,(%rdi,%rbx,4),%xmm1
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ addb %dl,%al
+ movl 28(%rsi),%ebx
+ movzbl %al,%eax
+ movl %edx,24(%rsi)
+ addb %bl,%cl
+ pinsrw $3,(%rdi,%rax,4),%xmm0
+ movl (%rdi,%rcx,4),%edx
+ movl %ebx,(%rdi,%rcx,4)
+ addb %dl,%bl
+ movl 32(%rsi),%eax
+ movzbl %bl,%ebx
+ movl %edx,28(%rsi)
+ addb %al,%cl
+ pinsrw $3,(%rdi,%rbx,4),%xmm1
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ addb %dl,%al
+ movl 36(%rsi),%ebx
+ movzbl %al,%eax
+ movl %edx,32(%rsi)
+ addb %bl,%cl
+ pinsrw $4,(%rdi,%rax,4),%xmm0
+ movl (%rdi,%rcx,4),%edx
+ movl %ebx,(%rdi,%rcx,4)
+ addb %dl,%bl
+ movl 40(%rsi),%eax
+ movzbl %bl,%ebx
+ movl %edx,36(%rsi)
+ addb %al,%cl
+ pinsrw $4,(%rdi,%rbx,4),%xmm1
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ addb %dl,%al
+ movl 44(%rsi),%ebx
+ movzbl %al,%eax
+ movl %edx,40(%rsi)
+ addb %bl,%cl
+ pinsrw $5,(%rdi,%rax,4),%xmm0
+ movl (%rdi,%rcx,4),%edx
+ movl %ebx,(%rdi,%rcx,4)
+ addb %dl,%bl
+ movl 48(%rsi),%eax
+ movzbl %bl,%ebx
+ movl %edx,44(%rsi)
+ addb %al,%cl
+ pinsrw $5,(%rdi,%rbx,4),%xmm1
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ addb %dl,%al
+ movl 52(%rsi),%ebx
+ movzbl %al,%eax
+ movl %edx,48(%rsi)
+ addb %bl,%cl
+ pinsrw $6,(%rdi,%rax,4),%xmm0
+ movl (%rdi,%rcx,4),%edx
+ movl %ebx,(%rdi,%rcx,4)
+ addb %dl,%bl
+ movl 56(%rsi),%eax
+ movzbl %bl,%ebx
+ movl %edx,52(%rsi)
+ addb %al,%cl
+ pinsrw $6,(%rdi,%rbx,4),%xmm1
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ addb %dl,%al
+ movl 60(%rsi),%ebx
+ movzbl %al,%eax
+ movl %edx,56(%rsi)
+ addb %bl,%cl
+ pinsrw $7,(%rdi,%rax,4),%xmm0
+ addb $16,%r10b
+ movdqu (%r12),%xmm2
+ movl (%rdi,%rcx,4),%edx
+ movl %ebx,(%rdi,%rcx,4)
+ addb %dl,%bl
+ movzbl %bl,%ebx
+ movl %edx,60(%rsi)
+ leaq (%rdi,%r10,4),%rsi
+ pinsrw $7,(%rdi,%rbx,4),%xmm1
+ movl (%rsi),%eax
+ movq %rcx,%rbx
+ xorq %rcx,%rcx
+ subq $16,%r11
+ movb %bl,%cl
+ testq $-16,%r11
+ jnz L$oop16
+
+ psllq $8,%xmm1
+ pxor %xmm0,%xmm2
+ pxor %xmm1,%xmm2
+ movdqu %xmm2,(%r13,%r12,1)
+ leaq 16(%r12),%r12
+
+ cmpq $0,%r11
jne L$loop1
jmp L$exit
.p2align 4
L$loop1:
- addb %r9b,%r12b
- movl (%rdi,%r12,4),%r13d
- movl %r9d,(%rdi,%r12,4)
- movl %r13d,(%rdi,%r8,4)
- addb %r13b,%r9b
- incb %r8b
- movl (%rdi,%r9,4),%r13d
- movl (%rdi,%r8,4),%r9d
- xorb (%rdx),%r13b
- incq %rdx
- movb %r13b,(%rcx)
- incq %rcx
- decq %rsi
+ addb %al,%cl
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ movl %edx,(%rdi,%r10,4)
+ addb %dl,%al
+ incb %r10b
+ movl (%rdi,%rax,4),%edx
+ movl (%rdi,%r10,4),%eax
+ xorb (%r12),%dl
+ movb %dl,(%r13,%r12,1)
+ leaq 1(%r12),%r12
+ decq %r11
jnz L$loop1
jmp L$exit
.p2align 4
L$RC4_CHAR:
- addb $1,%r8b
- movzbl (%rdi,%r8,1),%r9d
- testq $-8,%rsi
+ addb $1,%r10b
+ movzbl (%rdi,%r10,1),%eax
+ testq $-8,%r11
jz L$cloop1
- cmpl $0,260(%rdi)
- jnz L$cloop1
jmp L$cloop8
.p2align 4
L$cloop8:
- movl (%rdx),%eax
- movl 4(%rdx),%ebx
- addb %r9b,%r12b
- leaq 1(%r8),%r10
- movzbl (%rdi,%r12,1),%r13d
- movzbl %r10b,%r10d
- movzbl (%rdi,%r10,1),%r11d
- movb %r9b,(%rdi,%r12,1)
- cmpq %r10,%r12
- movb %r13b,(%rdi,%r8,1)
+ movl (%r12),%r8d
+ movl 4(%r12),%r9d
+ addb %al,%cl
+ leaq 1(%r10),%rsi
+ movzbl (%rdi,%rcx,1),%edx
+ movzbl %sil,%esi
+ movzbl (%rdi,%rsi,1),%ebx
+ movb %al,(%rdi,%rcx,1)
+ cmpq %rsi,%rcx
+ movb %dl,(%rdi,%r10,1)
jne L$cmov0
- movq %r9,%r11
+ movq %rax,%rbx
L$cmov0:
- addb %r9b,%r13b
- xorb (%rdi,%r13,1),%al
- rorl $8,%eax
- addb %r11b,%r12b
- leaq 1(%r10),%r8
- movzbl (%rdi,%r12,1),%r13d
- movzbl %r8b,%r8d
- movzbl (%rdi,%r8,1),%r9d
- movb %r11b,(%rdi,%r12,1)
- cmpq %r8,%r12
- movb %r13b,(%rdi,%r10,1)
+ addb %al,%dl
+ xorb (%rdi,%rdx,1),%r8b
+ rorl $8,%r8d
+ addb %bl,%cl
+ leaq 1(%rsi),%r10
+ movzbl (%rdi,%rcx,1),%edx
+ movzbl %r10b,%r10d
+ movzbl (%rdi,%r10,1),%eax
+ movb %bl,(%rdi,%rcx,1)
+ cmpq %r10,%rcx
+ movb %dl,(%rdi,%rsi,1)
jne L$cmov1
- movq %r11,%r9
+ movq %rbx,%rax
L$cmov1:
- addb %r11b,%r13b
- xorb (%rdi,%r13,1),%al
- rorl $8,%eax
- addb %r9b,%r12b
- leaq 1(%r8),%r10
- movzbl (%rdi,%r12,1),%r13d
- movzbl %r10b,%r10d
- movzbl (%rdi,%r10,1),%r11d
- movb %r9b,(%rdi,%r12,1)
- cmpq %r10,%r12
- movb %r13b,(%rdi,%r8,1)
+ addb %bl,%dl
+ xorb (%rdi,%rdx,1),%r8b
+ rorl $8,%r8d
+ addb %al,%cl
+ leaq 1(%r10),%rsi
+ movzbl (%rdi,%rcx,1),%edx
+ movzbl %sil,%esi
+ movzbl (%rdi,%rsi,1),%ebx
+ movb %al,(%rdi,%rcx,1)
+ cmpq %rsi,%rcx
+ movb %dl,(%rdi,%r10,1)
jne L$cmov2
- movq %r9,%r11
+ movq %rax,%rbx
L$cmov2:
- addb %r9b,%r13b
- xorb (%rdi,%r13,1),%al
- rorl $8,%eax
- addb %r11b,%r12b
- leaq 1(%r10),%r8
- movzbl (%rdi,%r12,1),%r13d
- movzbl %r8b,%r8d
- movzbl (%rdi,%r8,1),%r9d
- movb %r11b,(%rdi,%r12,1)
- cmpq %r8,%r12
- movb %r13b,(%rdi,%r10,1)
+ addb %al,%dl
+ xorb (%rdi,%rdx,1),%r8b
+ rorl $8,%r8d
+ addb %bl,%cl
+ leaq 1(%rsi),%r10
+ movzbl (%rdi,%rcx,1),%edx
+ movzbl %r10b,%r10d
+ movzbl (%rdi,%r10,1),%eax
+ movb %bl,(%rdi,%rcx,1)
+ cmpq %r10,%rcx
+ movb %dl,(%rdi,%rsi,1)
jne L$cmov3
- movq %r11,%r9
+ movq %rbx,%rax
L$cmov3:
- addb %r11b,%r13b
- xorb (%rdi,%r13,1),%al
- rorl $8,%eax
- addb %r9b,%r12b
- leaq 1(%r8),%r10
- movzbl (%rdi,%r12,1),%r13d
- movzbl %r10b,%r10d
- movzbl (%rdi,%r10,1),%r11d
- movb %r9b,(%rdi,%r12,1)
- cmpq %r10,%r12
- movb %r13b,(%rdi,%r8,1)
+ addb %bl,%dl
+ xorb (%rdi,%rdx,1),%r8b
+ rorl $8,%r8d
+ addb %al,%cl
+ leaq 1(%r10),%rsi
+ movzbl (%rdi,%rcx,1),%edx
+ movzbl %sil,%esi
+ movzbl (%rdi,%rsi,1),%ebx
+ movb %al,(%rdi,%rcx,1)
+ cmpq %rsi,%rcx
+ movb %dl,(%rdi,%r10,1)
jne L$cmov4
- movq %r9,%r11
+ movq %rax,%rbx
L$cmov4:
- addb %r9b,%r13b
- xorb (%rdi,%r13,1),%bl
- rorl $8,%ebx
- addb %r11b,%r12b
- leaq 1(%r10),%r8
- movzbl (%rdi,%r12,1),%r13d
- movzbl %r8b,%r8d
- movzbl (%rdi,%r8,1),%r9d
- movb %r11b,(%rdi,%r12,1)
- cmpq %r8,%r12
- movb %r13b,(%rdi,%r10,1)
+ addb %al,%dl
+ xorb (%rdi,%rdx,1),%r9b
+ rorl $8,%r9d
+ addb %bl,%cl
+ leaq 1(%rsi),%r10
+ movzbl (%rdi,%rcx,1),%edx
+ movzbl %r10b,%r10d
+ movzbl (%rdi,%r10,1),%eax
+ movb %bl,(%rdi,%rcx,1)
+ cmpq %r10,%rcx
+ movb %dl,(%rdi,%rsi,1)
jne L$cmov5
- movq %r11,%r9
+ movq %rbx,%rax
L$cmov5:
- addb %r11b,%r13b
- xorb (%rdi,%r13,1),%bl
- rorl $8,%ebx
- addb %r9b,%r12b
- leaq 1(%r8),%r10
- movzbl (%rdi,%r12,1),%r13d
- movzbl %r10b,%r10d
- movzbl (%rdi,%r10,1),%r11d
- movb %r9b,(%rdi,%r12,1)
- cmpq %r10,%r12
- movb %r13b,(%rdi,%r8,1)
+ addb %bl,%dl
+ xorb (%rdi,%rdx,1),%r9b
+ rorl $8,%r9d
+ addb %al,%cl
+ leaq 1(%r10),%rsi
+ movzbl (%rdi,%rcx,1),%edx
+ movzbl %sil,%esi
+ movzbl (%rdi,%rsi,1),%ebx
+ movb %al,(%rdi,%rcx,1)
+ cmpq %rsi,%rcx
+ movb %dl,(%rdi,%r10,1)
jne L$cmov6
- movq %r9,%r11
+ movq %rax,%rbx
L$cmov6:
- addb %r9b,%r13b
- xorb (%rdi,%r13,1),%bl
- rorl $8,%ebx
- addb %r11b,%r12b
- leaq 1(%r10),%r8
- movzbl (%rdi,%r12,1),%r13d
- movzbl %r8b,%r8d
- movzbl (%rdi,%r8,1),%r9d
- movb %r11b,(%rdi,%r12,1)
- cmpq %r8,%r12
- movb %r13b,(%rdi,%r10,1)
+ addb %al,%dl
+ xorb (%rdi,%rdx,1),%r9b
+ rorl $8,%r9d
+ addb %bl,%cl
+ leaq 1(%rsi),%r10
+ movzbl (%rdi,%rcx,1),%edx
+ movzbl %r10b,%r10d
+ movzbl (%rdi,%r10,1),%eax
+ movb %bl,(%rdi,%rcx,1)
+ cmpq %r10,%rcx
+ movb %dl,(%rdi,%rsi,1)
jne L$cmov7
- movq %r11,%r9
+ movq %rbx,%rax
L$cmov7:
- addb %r11b,%r13b
- xorb (%rdi,%r13,1),%bl
- rorl $8,%ebx
- leaq -8(%rsi),%rsi
- movl %eax,(%rcx)
- leaq 8(%rdx),%rdx
- movl %ebx,4(%rcx)
- leaq 8(%rcx),%rcx
-
- testq $-8,%rsi
+ addb %bl,%dl
+ xorb (%rdi,%rdx,1),%r9b
+ rorl $8,%r9d
+ leaq -8(%r11),%r11
+ movl %r8d,(%r13)
+ leaq 8(%r12),%r12
+ movl %r9d,4(%r13)
+ leaq 8(%r13),%r13
+
+ testq $-8,%r11
jnz L$cloop8
- cmpq $0,%rsi
+ cmpq $0,%r11
jne L$cloop1
jmp L$exit
.p2align 4
L$cloop1:
- addb %r9b,%r12b
- movzbl (%rdi,%r12,1),%r13d
- movb %r9b,(%rdi,%r12,1)
- movb %r13b,(%rdi,%r8,1)
- addb %r9b,%r13b
- addb $1,%r8b
- movzbl %r13b,%r13d
- movzbl %r8b,%r8d
- movzbl (%rdi,%r13,1),%r13d
- movzbl (%rdi,%r8,1),%r9d
- xorb (%rdx),%r13b
- leaq 1(%rdx),%rdx
- movb %r13b,(%rcx)
- leaq 1(%rcx),%rcx
- subq $1,%rsi
+ addb %al,%cl
+ movzbl %cl,%ecx
+ movzbl (%rdi,%rcx,1),%edx
+ movb %al,(%rdi,%rcx,1)
+ movb %dl,(%rdi,%r10,1)
+ addb %al,%dl
+ addb $1,%r10b
+ movzbl %dl,%edx
+ movzbl %r10b,%r10d
+ movzbl (%rdi,%rdx,1),%edx
+ movzbl (%rdi,%r10,1),%eax
+ xorb (%r12),%dl
+ leaq 1(%r12),%r12
+ movb %dl,(%r13)
+ leaq 1(%r13),%r13
+ subq $1,%r11
jnz L$cloop1
jmp L$exit
.p2align 4
L$exit:
- subb $1,%r8b
- movl %r8d,-8(%rdi)
- movl %r12d,-4(%rdi)
+ subb $1,%r10b
+ movl %r10d,-8(%rdi)
+ movl %ecx,-4(%rdi)
movq (%rsp),%r13
movq 8(%rsp),%r12
@@ -330,11 +526,10 @@ L$exit:
L$epilogue:
.byte 0xf3,0xc3
-
-.globl _RC4_set_key
+.globl _private_RC4_set_key
.p2align 4
-_RC4_set_key:
+_private_RC4_set_key:
leaq 8(%rdi),%rdi
leaq (%rdx,%rsi,1),%rdx
negq %rsi
@@ -346,11 +541,8 @@ _RC4_set_key:
movl _OPENSSL_ia32cap_P(%rip),%r8d
btl $20,%r8d
- jnc L$w1stloop
- btl $30,%r8d
- setc %r9b
- movl %r9d,260(%rdi)
- jmp L$c1stloop
+ jc L$c1stloop
+ jmp L$w1stloop
.p2align 4
L$w1stloop:
@@ -413,18 +605,19 @@ _RC4_options:
leaq L$opts(%rip),%rax
movl _OPENSSL_ia32cap_P(%rip),%edx
btl $20,%edx
- jnc L$done
- addq $12,%rax
+ jc L$8xchar
btl $30,%edx
jnc L$done
- addq $13,%rax
+ addq $25,%rax
+ .byte 0xf3,0xc3
+L$8xchar:
+ addq $12,%rax
L$done:
.byte 0xf3,0xc3
.p2align 6
L$opts:
.byte 114,99,52,40,56,120,44,105,110,116,41,0
.byte 114,99,52,40,56,120,44,99,104,97,114,41,0
-.byte 114,99,52,40,49,120,44,99,104,97,114,41,0
+.byte 114,99,52,40,49,54,120,44,105,110,116,41,0
.byte 82,67,52,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.p2align 6
-