summaryrefslogtreecommitdiff
path: root/deps/openssl/asm/x64-elf-gas/rc4/rc4-x86_64.s
diff options
context:
space:
mode:
Diffstat (limited to 'deps/openssl/asm/x64-elf-gas/rc4/rc4-x86_64.s')
-rw-r--r--deps/openssl/asm/x64-elf-gas/rc4/rc4-x86_64.s430
1 files changed, 430 insertions, 0 deletions
diff --git a/deps/openssl/asm/x64-elf-gas/rc4/rc4-x86_64.s b/deps/openssl/asm/x64-elf-gas/rc4/rc4-x86_64.s
new file mode 100644
index 0000000000..1bafefeb02
--- /dev/null
+++ b/deps/openssl/asm/x64-elf-gas/rc4/rc4-x86_64.s
@@ -0,0 +1,430 @@
+.text
+
+
+.globl RC4
+.type RC4,@function
+.align 16
+RC4: orq %rsi,%rsi
+ jne .Lentry
+ .byte 0xf3,0xc3
+.Lentry:
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+.Lprologue:
+
+ addq $8,%rdi
+ movl -8(%rdi),%r8d
+ movl -4(%rdi),%r12d
+ cmpl $-1,256(%rdi)
+ je .LRC4_CHAR
+ incb %r8b
+ movl (%rdi,%r8,4),%r9d
+ testq $-8,%rsi
+ jz .Lloop1
+ jmp .Lloop8
+.align 16
+.Lloop8:
+ addb %r9b,%r12b
+ movq %r8,%r10
+ movl (%rdi,%r12,4),%r13d
+ rorq $8,%rax
+ incb %r10b
+ movl (%rdi,%r10,4),%r11d
+ cmpq %r10,%r12
+ movl %r9d,(%rdi,%r12,4)
+ cmoveq %r9,%r11
+ movl %r13d,(%rdi,%r8,4)
+ addb %r9b,%r13b
+ movb (%rdi,%r13,4),%al
+ addb %r11b,%r12b
+ movq %r10,%r8
+ movl (%rdi,%r12,4),%r13d
+ rorq $8,%rax
+ incb %r8b
+ movl (%rdi,%r8,4),%r9d
+ cmpq %r8,%r12
+ movl %r11d,(%rdi,%r12,4)
+ cmoveq %r11,%r9
+ movl %r13d,(%rdi,%r10,4)
+ addb %r11b,%r13b
+ movb (%rdi,%r13,4),%al
+ addb %r9b,%r12b
+ movq %r8,%r10
+ movl (%rdi,%r12,4),%r13d
+ rorq $8,%rax
+ incb %r10b
+ movl (%rdi,%r10,4),%r11d
+ cmpq %r10,%r12
+ movl %r9d,(%rdi,%r12,4)
+ cmoveq %r9,%r11
+ movl %r13d,(%rdi,%r8,4)
+ addb %r9b,%r13b
+ movb (%rdi,%r13,4),%al
+ addb %r11b,%r12b
+ movq %r10,%r8
+ movl (%rdi,%r12,4),%r13d
+ rorq $8,%rax
+ incb %r8b
+ movl (%rdi,%r8,4),%r9d
+ cmpq %r8,%r12
+ movl %r11d,(%rdi,%r12,4)
+ cmoveq %r11,%r9
+ movl %r13d,(%rdi,%r10,4)
+ addb %r11b,%r13b
+ movb (%rdi,%r13,4),%al
+ addb %r9b,%r12b
+ movq %r8,%r10
+ movl (%rdi,%r12,4),%r13d
+ rorq $8,%rax
+ incb %r10b
+ movl (%rdi,%r10,4),%r11d
+ cmpq %r10,%r12
+ movl %r9d,(%rdi,%r12,4)
+ cmoveq %r9,%r11
+ movl %r13d,(%rdi,%r8,4)
+ addb %r9b,%r13b
+ movb (%rdi,%r13,4),%al
+ addb %r11b,%r12b
+ movq %r10,%r8
+ movl (%rdi,%r12,4),%r13d
+ rorq $8,%rax
+ incb %r8b
+ movl (%rdi,%r8,4),%r9d
+ cmpq %r8,%r12
+ movl %r11d,(%rdi,%r12,4)
+ cmoveq %r11,%r9
+ movl %r13d,(%rdi,%r10,4)
+ addb %r11b,%r13b
+ movb (%rdi,%r13,4),%al
+ addb %r9b,%r12b
+ movq %r8,%r10
+ movl (%rdi,%r12,4),%r13d
+ rorq $8,%rax
+ incb %r10b
+ movl (%rdi,%r10,4),%r11d
+ cmpq %r10,%r12
+ movl %r9d,(%rdi,%r12,4)
+ cmoveq %r9,%r11
+ movl %r13d,(%rdi,%r8,4)
+ addb %r9b,%r13b
+ movb (%rdi,%r13,4),%al
+ addb %r11b,%r12b
+ movq %r10,%r8
+ movl (%rdi,%r12,4),%r13d
+ rorq $8,%rax
+ incb %r8b
+ movl (%rdi,%r8,4),%r9d
+ cmpq %r8,%r12
+ movl %r11d,(%rdi,%r12,4)
+ cmoveq %r11,%r9
+ movl %r13d,(%rdi,%r10,4)
+ addb %r11b,%r13b
+ movb (%rdi,%r13,4),%al
+ rorq $8,%rax
+ subq $8,%rsi
+
+ xorq (%rdx),%rax
+ addq $8,%rdx
+ movq %rax,(%rcx)
+ addq $8,%rcx
+
+ testq $-8,%rsi
+ jnz .Lloop8
+ cmpq $0,%rsi
+ jne .Lloop1
+ jmp .Lexit
+
+.align 16
+.Lloop1:
+ addb %r9b,%r12b
+ movl (%rdi,%r12,4),%r13d
+ movl %r9d,(%rdi,%r12,4)
+ movl %r13d,(%rdi,%r8,4)
+ addb %r13b,%r9b
+ incb %r8b
+ movl (%rdi,%r9,4),%r13d
+ movl (%rdi,%r8,4),%r9d
+ xorb (%rdx),%r13b
+ incq %rdx
+ movb %r13b,(%rcx)
+ incq %rcx
+ decq %rsi
+ jnz .Lloop1
+ jmp .Lexit
+
+.align 16
+.LRC4_CHAR:
+ addb $1,%r8b
+ movzbl (%rdi,%r8,1),%r9d
+ testq $-8,%rsi
+ jz .Lcloop1
+ cmpl $0,260(%rdi)
+ jnz .Lcloop1
+ jmp .Lcloop8
+.align 16
+.Lcloop8:
+ movl (%rdx),%eax
+ movl 4(%rdx),%ebx
+ addb %r9b,%r12b
+ leaq 1(%r8),%r10
+ movzbl (%rdi,%r12,1),%r13d
+ movzbl %r10b,%r10d
+ movzbl (%rdi,%r10,1),%r11d
+ movb %r9b,(%rdi,%r12,1)
+ cmpq %r10,%r12
+ movb %r13b,(%rdi,%r8,1)
+ jne .Lcmov0
+
+ movq %r9,%r11
+.Lcmov0:
+ addb %r9b,%r13b
+ xorb (%rdi,%r13,1),%al
+ rorl $8,%eax
+ addb %r11b,%r12b
+ leaq 1(%r10),%r8
+ movzbl (%rdi,%r12,1),%r13d
+ movzbl %r8b,%r8d
+ movzbl (%rdi,%r8,1),%r9d
+ movb %r11b,(%rdi,%r12,1)
+ cmpq %r8,%r12
+ movb %r13b,(%rdi,%r10,1)
+ jne .Lcmov1
+
+ movq %r11,%r9
+.Lcmov1:
+ addb %r11b,%r13b
+ xorb (%rdi,%r13,1),%al
+ rorl $8,%eax
+ addb %r9b,%r12b
+ leaq 1(%r8),%r10
+ movzbl (%rdi,%r12,1),%r13d
+ movzbl %r10b,%r10d
+ movzbl (%rdi,%r10,1),%r11d
+ movb %r9b,(%rdi,%r12,1)
+ cmpq %r10,%r12
+ movb %r13b,(%rdi,%r8,1)
+ jne .Lcmov2
+
+ movq %r9,%r11
+.Lcmov2:
+ addb %r9b,%r13b
+ xorb (%rdi,%r13,1),%al
+ rorl $8,%eax
+ addb %r11b,%r12b
+ leaq 1(%r10),%r8
+ movzbl (%rdi,%r12,1),%r13d
+ movzbl %r8b,%r8d
+ movzbl (%rdi,%r8,1),%r9d
+ movb %r11b,(%rdi,%r12,1)
+ cmpq %r8,%r12
+ movb %r13b,(%rdi,%r10,1)
+ jne .Lcmov3
+
+ movq %r11,%r9
+.Lcmov3:
+ addb %r11b,%r13b
+ xorb (%rdi,%r13,1),%al
+ rorl $8,%eax
+ addb %r9b,%r12b
+ leaq 1(%r8),%r10
+ movzbl (%rdi,%r12,1),%r13d
+ movzbl %r10b,%r10d
+ movzbl (%rdi,%r10,1),%r11d
+ movb %r9b,(%rdi,%r12,1)
+ cmpq %r10,%r12
+ movb %r13b,(%rdi,%r8,1)
+ jne .Lcmov4
+
+ movq %r9,%r11
+.Lcmov4:
+ addb %r9b,%r13b
+ xorb (%rdi,%r13,1),%bl
+ rorl $8,%ebx
+ addb %r11b,%r12b
+ leaq 1(%r10),%r8
+ movzbl (%rdi,%r12,1),%r13d
+ movzbl %r8b,%r8d
+ movzbl (%rdi,%r8,1),%r9d
+ movb %r11b,(%rdi,%r12,1)
+ cmpq %r8,%r12
+ movb %r13b,(%rdi,%r10,1)
+ jne .Lcmov5
+
+ movq %r11,%r9
+.Lcmov5:
+ addb %r11b,%r13b
+ xorb (%rdi,%r13,1),%bl
+ rorl $8,%ebx
+ addb %r9b,%r12b
+ leaq 1(%r8),%r10
+ movzbl (%rdi,%r12,1),%r13d
+ movzbl %r10b,%r10d
+ movzbl (%rdi,%r10,1),%r11d
+ movb %r9b,(%rdi,%r12,1)
+ cmpq %r10,%r12
+ movb %r13b,(%rdi,%r8,1)
+ jne .Lcmov6
+
+ movq %r9,%r11
+.Lcmov6:
+ addb %r9b,%r13b
+ xorb (%rdi,%r13,1),%bl
+ rorl $8,%ebx
+ addb %r11b,%r12b
+ leaq 1(%r10),%r8
+ movzbl (%rdi,%r12,1),%r13d
+ movzbl %r8b,%r8d
+ movzbl (%rdi,%r8,1),%r9d
+ movb %r11b,(%rdi,%r12,1)
+ cmpq %r8,%r12
+ movb %r13b,(%rdi,%r10,1)
+ jne .Lcmov7
+
+ movq %r11,%r9
+.Lcmov7:
+ addb %r11b,%r13b
+ xorb (%rdi,%r13,1),%bl
+ rorl $8,%ebx
+ leaq -8(%rsi),%rsi
+ movl %eax,(%rcx)
+ leaq 8(%rdx),%rdx
+ movl %ebx,4(%rcx)
+ leaq 8(%rcx),%rcx
+
+ testq $-8,%rsi
+ jnz .Lcloop8
+ cmpq $0,%rsi
+ jne .Lcloop1
+ jmp .Lexit
+.align 16
+.Lcloop1:
+ addb %r9b,%r12b
+ movzbl (%rdi,%r12,1),%r13d
+ movb %r9b,(%rdi,%r12,1)
+ movb %r13b,(%rdi,%r8,1)
+ addb %r9b,%r13b
+ addb $1,%r8b
+ movzbl %r13b,%r13d
+ movzbl %r8b,%r8d
+ movzbl (%rdi,%r13,1),%r13d
+ movzbl (%rdi,%r8,1),%r9d
+ xorb (%rdx),%r13b
+ leaq 1(%rdx),%rdx
+ movb %r13b,(%rcx)
+ leaq 1(%rcx),%rcx
+ subq $1,%rsi
+ jnz .Lcloop1
+ jmp .Lexit
+
+.align 16
+.Lexit:
+ subb $1,%r8b
+ movl %r8d,-8(%rdi)
+ movl %r12d,-4(%rdi)
+
+ movq (%rsp),%r13
+ movq 8(%rsp),%r12
+ movq 16(%rsp),%rbx
+ addq $24,%rsp
+.Lepilogue:
+ .byte 0xf3,0xc3
+.size RC4,.-RC4
+
+.globl RC4_set_key
+.type RC4_set_key,@function
+.align 16
+RC4_set_key:
+ leaq 8(%rdi),%rdi
+ leaq (%rdx,%rsi,1),%rdx
+ negq %rsi
+ movq %rsi,%rcx
+ xorl %eax,%eax
+ xorq %r9,%r9
+ xorq %r10,%r10
+ xorq %r11,%r11
+
+ movl OPENSSL_ia32cap_P(%rip),%r8d
+ btl $20,%r8d
+ jnc .Lw1stloop
+ btl $30,%r8d
+ setc %r9b
+ movl %r9d,260(%rdi)
+ jmp .Lc1stloop
+
+.align 16
+.Lw1stloop:
+ movl %eax,(%rdi,%rax,4)
+ addb $1,%al
+ jnc .Lw1stloop
+
+ xorq %r9,%r9
+ xorq %r8,%r8
+.align 16
+.Lw2ndloop:
+ movl (%rdi,%r9,4),%r10d
+ addb (%rdx,%rsi,1),%r8b
+ addb %r10b,%r8b
+ addq $1,%rsi
+ movl (%rdi,%r8,4),%r11d
+ cmovzq %rcx,%rsi
+ movl %r10d,(%rdi,%r8,4)
+ movl %r11d,(%rdi,%r9,4)
+ addb $1,%r9b
+ jnc .Lw2ndloop
+ jmp .Lexit_key
+
+.align 16
+.Lc1stloop:
+ movb %al,(%rdi,%rax,1)
+ addb $1,%al
+ jnc .Lc1stloop
+
+ xorq %r9,%r9
+ xorq %r8,%r8
+.align 16
+.Lc2ndloop:
+ movb (%rdi,%r9,1),%r10b
+ addb (%rdx,%rsi,1),%r8b
+ addb %r10b,%r8b
+ addq $1,%rsi
+ movb (%rdi,%r8,1),%r11b
+ jnz .Lcnowrap
+ movq %rcx,%rsi
+.Lcnowrap:
+ movb %r10b,(%rdi,%r8,1)
+ movb %r11b,(%rdi,%r9,1)
+ addb $1,%r9b
+ jnc .Lc2ndloop
+ movl $-1,256(%rdi)
+
+.align 16
+.Lexit_key:
+ xorl %eax,%eax
+ movl %eax,-8(%rdi)
+ movl %eax,-4(%rdi)
+ .byte 0xf3,0xc3
+.size RC4_set_key,.-RC4_set_key
+
+.globl RC4_options
+.type RC4_options,@function
+.align 16
+RC4_options:
+ leaq .Lopts(%rip),%rax
+ movl OPENSSL_ia32cap_P(%rip),%edx
+ btl $20,%edx
+ jnc .Ldone
+ addq $12,%rax
+ btl $30,%edx
+ jnc .Ldone
+ addq $13,%rax
+.Ldone:
+ .byte 0xf3,0xc3
+.align 64
+.Lopts:
+.byte 114,99,52,40,56,120,44,105,110,116,41,0
+.byte 114,99,52,40,56,120,44,99,104,97,114,41,0
+.byte 114,99,52,40,49,120,44,99,104,97,114,41,0
+.byte 82,67,52,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 64
+.size RC4_options,.-RC4_options