diff options
Diffstat (limited to 'deps/openssl/asm/x86-elf-gas/rc4/rc4-586.s')
-rw-r--r-- | deps/openssl/asm/x86-elf-gas/rc4/rc4-586.s | 218 |
1 files changed, 180 insertions, 38 deletions
diff --git a/deps/openssl/asm/x86-elf-gas/rc4/rc4-586.s b/deps/openssl/asm/x86-elf-gas/rc4/rc4-586.s index 9ba94e4b1a..513ce6a58b 100644 --- a/deps/openssl/asm/x86-elf-gas/rc4/rc4-586.s +++ b/deps/openssl/asm/x86-elf-gas/rc4/rc4-586.s @@ -29,11 +29,146 @@ RC4: movl (%edi,%eax,4),%ecx andl $-4,%edx jz .L002loop1 - leal -4(%esi,%edx,1),%edx - movl %edx,28(%esp) + testl $-8,%edx movl %ebp,32(%esp) + jz .L003go4loop4 + leal OPENSSL_ia32cap_P,%ebp + btl $26,(%ebp) + jnc .L003go4loop4 + movl 32(%esp),%ebp + andl $-8,%edx + leal -8(%esi,%edx,1),%edx + movl %edx,-4(%edi) + addb %cl,%bl + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + movq (%esi),%mm0 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm2 + jmp .L004loop_mmx_enter +.align 16 +.L005loop_mmx: + addb %cl,%bl + psllq $56,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movq (%esi),%mm0 + movq %mm2,-8(%ebp,%esi,1) + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm2 +.L004loop_mmx_enter: + addb %cl,%bl + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm0,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $8,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $16,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $24,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $32,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $40,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $48,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + movl %ebx,%edx + xorl %ebx,%ebx + movb %dl,%bl + cmpl -4(%edi),%esi + leal 8(%esi),%esi + jb .L005loop_mmx + psllq $56,%mm1 + pxor %mm1,%mm2 + movq %mm2,-8(%ebp,%esi,1) + emms + cmpl 24(%esp),%esi + je .L006done + jmp .L002loop1 .align 16 -.L003loop4: +.L003go4loop4: + leal -4(%esi,%edx,1),%edx + movl %edx,28(%esp) +.L007loop4: addb %cl,%bl movl (%edi,%ebx,4),%edx movl %ecx,(%edi,%ebx,4) @@ -79,9 +214,9 @@ RC4: movl %ebp,(%ecx,%esi,1) leal 4(%esi),%esi movl (%edi,%eax,4),%ecx - jb .L003loop4 + jb .L007loop4 cmpl 24(%esp),%esi - je .L004done + je .L006done movl 32(%esp),%ebp .align 16 .L002loop1: @@ -99,11 +234,11 @@ RC4: cmpl 24(%esp),%esi movb %dl,-1(%ebp,%esi,1) jb .L002loop1 - jmp .L004done + jmp .L006done .align 16 .L001RC4_CHAR: movzbl (%edi,%eax,1),%ecx -.L005cloop1: +.L008cloop1: addb %cl,%bl movzbl (%edi,%ebx,1),%edx movb %cl,(%edi,%ebx,1) @@ -116,10 +251,10 @@ RC4: movzbl (%edi,%eax,1),%ecx cmpl 24(%esp),%esi movb %dl,-1(%ebp,%esi,1) - jb .L005cloop1 -.L004done: + jb .L008cloop1 +.L006done: decb %al - movb %bl,-4(%edi) + movl %ebx,-4(%edi) movb %al,-8(%edi) .L000abort: popl %edi @@ -128,11 +263,11 @@ RC4: popl %ebp ret .size RC4,.-.L_RC4_begin -.globl RC4_set_key -.type RC4_set_key,@function +.globl private_RC4_set_key +.type private_RC4_set_key,@function .align 16 -RC4_set_key: -.L_RC4_set_key_begin: +private_RC4_set_key: +.L_private_RC4_set_key_begin: pushl %ebp pushl %ebx pushl %esi @@ -147,53 +282,53 @@ RC4_set_key: xorl %eax,%eax movl %ebp,-4(%edi) btl $20,(%edx) - jc .L006c1stloop + jc .L009c1stloop .align 16 -.L007w1stloop: +.L010w1stloop: movl %eax,(%edi,%eax,4) addb $1,%al - jnc .L007w1stloop + jnc .L010w1stloop xorl %ecx,%ecx xorl %edx,%edx .align 16 -.L008w2ndloop: +.L011w2ndloop: movl (%edi,%ecx,4),%eax addb (%esi,%ebp,1),%dl addb %al,%dl addl $1,%ebp movl (%edi,%edx,4),%ebx - jnz .L009wnowrap + jnz .L012wnowrap movl -4(%edi),%ebp -.L009wnowrap: +.L012wnowrap: movl %eax,(%edi,%edx,4) movl %ebx,(%edi,%ecx,4) addb $1,%cl - jnc .L008w2ndloop - jmp .L010exit + jnc .L011w2ndloop + jmp .L013exit .align 16 -.L006c1stloop: +.L009c1stloop: movb %al,(%edi,%eax,1) addb $1,%al - jnc .L006c1stloop + jnc .L009c1stloop xorl %ecx,%ecx xorl %edx,%edx xorl %ebx,%ebx .align 16 -.L011c2ndloop: +.L014c2ndloop: movb (%edi,%ecx,1),%al addb (%esi,%ebp,1),%dl addb %al,%dl addl $1,%ebp movb (%edi,%edx,1),%bl - jnz .L012cnowrap + jnz .L015cnowrap movl -4(%edi),%ebp -.L012cnowrap: +.L015cnowrap: movb %al,(%edi,%edx,1) movb %bl,(%edi,%ecx,1) addb $1,%cl - jnc .L011c2ndloop + jnc .L014c2ndloop movl $-1,256(%edi) -.L010exit: +.L013exit: xorl %eax,%eax movl %eax,-8(%edi) movl %eax,-4(%edi) @@ -202,29 +337,36 @@ RC4_set_key: popl %ebx popl %ebp ret -.size RC4_set_key,.-.L_RC4_set_key_begin +.size private_RC4_set_key,.-.L_private_RC4_set_key_begin .globl RC4_options .type RC4_options,@function .align 16 RC4_options: .L_RC4_options_begin: - call .L013pic_point -.L013pic_point: + call .L016pic_point +.L016pic_point: popl %eax - leal .L014opts-.L013pic_point(%eax),%eax + leal .L017opts-.L016pic_point(%eax),%eax leal OPENSSL_ia32cap_P,%edx - btl $20,(%edx) - jnc .L015skip + movl (%edx),%edx + btl $20,%edx + jc .L0181xchar + btl $26,%edx + jnc .L019ret + addl $25,%eax + ret +.L0181xchar: addl $12,%eax -.L015skip: +.L019ret: ret .align 64 -.L014opts: +.L017opts: .byte 114,99,52,40,52,120,44,105,110,116,41,0 .byte 114,99,52,40,49,120,44,99,104,97,114,41,0 +.byte 114,99,52,40,56,120,44,109,109,120,41,0 .byte 82,67,52,32,102,111,114,32,120,56,54,44,32,67,82,89 .byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114 .byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 64 .size RC4_options,.-.L_RC4_options_begin -.comm OPENSSL_ia32cap_P,4,4 +.comm OPENSSL_ia32cap_P,8,4 |