diff options
Diffstat (limited to 'deps/openssl/openssl/crypto/rc4/asm')
-rw-r--r-- | deps/openssl/openssl/crypto/rc4/asm/rc4-586.pl | 22 | ||||
-rw-r--r-- | deps/openssl/openssl/crypto/rc4/asm/rc4-c64xplus.pl | 192 | ||||
-rw-r--r-- | deps/openssl/openssl/crypto/rc4/asm/rc4-ia64.pl | 14 | ||||
-rw-r--r-- | deps/openssl/openssl/crypto/rc4/asm/rc4-md5-x86_64.pl | 17 | ||||
-rw-r--r-- | deps/openssl/openssl/crypto/rc4/asm/rc4-parisc.pl | 13 | ||||
-rw-r--r-- | deps/openssl/openssl/crypto/rc4/asm/rc4-s390x.pl | 19 | ||||
-rwxr-xr-x | deps/openssl/openssl/crypto/rc4/asm/rc4-x86_64.pl | 32 |
7 files changed, 282 insertions, 27 deletions
diff --git a/deps/openssl/openssl/crypto/rc4/asm/rc4-586.pl b/deps/openssl/openssl/crypto/rc4/asm/rc4-586.pl index 1d55d551e9..7d6f97c59e 100644 --- a/deps/openssl/openssl/crypto/rc4/asm/rc4-586.pl +++ b/deps/openssl/openssl/crypto/rc4/asm/rc4-586.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 1998-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== # [Re]written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL @@ -43,6 +50,9 @@ # Westmere 5.1/+94%(**) # Sandy Bridge 5.0/+8% # Atom 12.6/+6% +# VIA Nano 6.4/+9% +# Ivy Bridge 4.9/±0% +# Bulldozer 4.9/+15% # # (*) PIII can actually deliver 6.6 cycles per byte with MMX code, # but this specific code performs poorly on Core2. And vice @@ -60,6 +70,9 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; push(@INC,"${dir}","${dir}../../perlasm"); require "x86asm.pl"; +$output=pop; +open STDOUT,">$output"; + &asm_init($ARGV[0],"rc4-586.pl",$x86only = $ARGV[$#ARGV] eq "386"); $xx="eax"; @@ -144,7 +157,7 @@ if ($alt=0) { &movd ($i>0?"mm1":"mm2",&DWP(0,$dat,$ty,4)); # (*) This is the key to Core2 and Westmere performance. - # Whithout movz out-of-order execution logic confuses + # Without movz out-of-order execution logic confuses # itself and fails to reorder loads and stores. Problem # appears to be fixed in Sandy Bridge... } @@ -304,7 +317,7 @@ $ido="ecx"; $idx="edx"; # void RC4_set_key(RC4_KEY *key,int len,const unsigned char *data); -&function_begin("private_RC4_set_key"); +&function_begin("RC4_set_key"); &mov ($out,&wparam(0)); # load key &mov ($idi,&wparam(1)); # load len &mov ($inp,&wparam(2)); # load data @@ -382,7 +395,7 @@ $idx="edx"; &xor ("eax","eax"); &mov (&DWP(-8,$out),"eax"); # key->x=0; &mov (&DWP(-4,$out),"eax"); # key->y=0; -&function_end("private_RC4_set_key"); +&function_end("RC4_set_key"); # const char *RC4_options(void); &function_begin_B("RC4_options"); @@ -412,3 +425,4 @@ $idx="edx"; &asm_finish(); +close STDOUT; diff --git a/deps/openssl/openssl/crypto/rc4/asm/rc4-c64xplus.pl b/deps/openssl/openssl/crypto/rc4/asm/rc4-c64xplus.pl new file mode 100644 index 0000000000..1354d18214 --- /dev/null +++ b/deps/openssl/openssl/crypto/rc4/asm/rc4-c64xplus.pl @@ -0,0 +1,192 @@ +#! /usr/bin/env perl +# Copyright 2014-2018 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + +# +# ==================================================================== +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# RC4 for C64x+. +# +# April 2014 +# +# RC4 subroutine processes one byte in 7.0 cycles, which is 3x faster +# than TI CGT-generated code. Loop is scheduled in such way that +# there is only one reference to memory in each cycle. This is done +# to avoid L1D memory banking conflicts, see SPRU871 TI publication +# for further details. Otherwise it should be possible to schedule +# the loop for iteration interval of 6... + +($KEY,$LEN,$INP,$OUT)=("A4","B4","A6","B6"); + +($KEYA,$XX,$TY,$xx,$ONE,$ret)=map("A$_",(5,7,8,9,1,2)); +($KEYB,$YY,$TX,$tx,$SUM,$dat)=map("B$_",(5,7,8,9,1,2)); + +$code.=<<___; + .text + + .if .ASSEMBLER_VERSION<7000000 + .asg 0,__TI_EABI__ + .endif + .if __TI_EABI__ + .nocmp + .asg RC4,_RC4 + .asg RC4_set_key,_RC4_set_key + .asg RC4_options,_RC4_options + .endif + + .global _RC4 + .align 16 +_RC4: + .asmfunc + MV $LEN,B0 + [!B0] BNOP B3 ; if (len==0) return; +||[B0] ADD $KEY,2,$KEYA +||[B0] ADD $KEY,2,$KEYB + [B0] MVK 1,$ONE +||[B0] LDBU *${KEYA}[-2],$XX ; key->x + [B0] LDBU *${KEYB}[-1],$YY ; key->y +|| NOP 4 + + ADD4 $ONE,$XX,$XX + LDBU *${KEYA}[$XX],$TX +|| MVC $LEN,ILC + NOP 4 +;;================================================== + SPLOOP 7 +|| ADD4 $TX,$YY,$YY + + LDBU *${KEYB}[$YY],$TY +|| MVD $XX,$xx +|| ADD4 $ONE,$XX,$XX + LDBU *${KEYA}[$XX],$tx + CMPEQ $YY,$XX,B0 +|| NOP 3 + STB $TX,*${KEYB}[$YY] +||[B0] ADD4 $TX,$YY,$YY + STB $TY,*${KEYA}[$xx] +||[!B0] ADD4 $tx,$YY,$YY +||[!B0] MVD $tx,$TX + ADD4 $TY,$TX,$SUM ; [0,0] $TX is not replaced by $tx yet! +|| NOP 2 + LDBU *$INP++,$dat +|| NOP 2 + LDBU *${KEYB}[$SUM],$ret +|| NOP 5 + XOR.L $dat,$ret,$ret + SPKERNEL +|| STB $ret,*$OUT++ +;;================================================== + SUB4 $XX,$ONE,$XX +|| NOP 5 + STB $XX,*${KEYA}[-2] ; key->x +|| SUB4 $YY,$TX,$YY +|| BNOP B3 + STB $YY,*${KEYB}[-1] ; key->y +|| NOP 5 + .endasmfunc + + .global _RC4_set_key + .align 16 +_RC4_set_key: + .asmfunc + .if .BIG_ENDIAN + MVK 0x00000404,$ONE +|| MVK 0x00000203,B0 + MVKH 0x04040000,$ONE +|| MVKH 0x00010000,B0 + .else + MVK 0x00000404,$ONE +|| MVK 0x00000100,B0 + MVKH 0x04040000,$ONE +|| MVKH 0x03020000,B0 + .endif + ADD $KEY,2,$KEYA +|| ADD $KEY,2,$KEYB +|| ADD $INP,$LEN,$ret ; end of input + LDBU *${INP}++,$dat +|| MVK 0,$TX + STH $TX,*${KEY}++ ; key->x=key->y=0 +|| MV B0,A0 +|| MVK 64-4,B0 + +;;================================================== + SPLOOPD 1 +|| MVC B0,ILC + + STNW A0,*${KEY}++ +|| ADD4 $ONE,A0,A0 + SPKERNEL +;;================================================== + + MVK 0,$YY +|| MVK 0,$XX + MVK 1,$ONE +|| MVK 256-1,B0 + +;;================================================== + SPLOOPD 8 +|| MVC B0,ILC + + ADD4 $dat,$YY,$YY +|| CMPEQ $INP,$ret,A0 ; end of input? + LDBU *${KEYB}[$YY],$TY +|| MVD $XX,$xx +|| ADD4 $ONE,$XX,$XX + LDBU *${KEYA}[$XX],$tx +||[A0] SUB $INP,$LEN,$INP ; rewind + LDBU *${INP}++,$dat +|| CMPEQ $YY,$XX,B0 +|| NOP 3 + STB $TX,*${KEYB}[$YY] +||[B0] ADD4 $TX,$YY,$YY + STB $TY,*${KEYA}[$xx] +||[!B0] ADD4 $tx,$YY,$YY +||[!B0] MV $tx,$TX + SPKERNEL +;;================================================== + + BNOP B3,5 + .endasmfunc + + .global _RC4_options + .align 16 +_RC4_options: +_rc4_options: + .asmfunc + BNOP B3,1 + ADDKPC _rc4_options,B4 + .if __TI_EABI__ + MVKL \$PCR_OFFSET(rc4_options,_rc4_options),A4 + MVKH \$PCR_OFFSET(rc4_options,_rc4_options),A4 + .else + MVKL (rc4_options-_rc4_options),A4 + MVKH (rc4_options-_rc4_options),A4 + .endif + ADD B4,A4,A4 + .endasmfunc + + .if __TI_EABI__ + .sect ".text:rc4_options.const" + .else + .sect ".const:rc4_options" + .endif + .align 4 +rc4_options: + .cstring "rc4(sploop,char)" + .cstring "RC4 for C64+, CRYPTOGAMS by <appro\@openssl.org>" + .align 4 +___ + +$output=pop; +open STDOUT,">$output"; +print $code; +close STDOUT; diff --git a/deps/openssl/openssl/crypto/rc4/asm/rc4-ia64.pl b/deps/openssl/openssl/crypto/rc4/asm/rc4-ia64.pl index 49cd5b5e69..5e8f5f55b2 100644 --- a/deps/openssl/openssl/crypto/rc4/asm/rc4-ia64.pl +++ b/deps/openssl/openssl/crypto/rc4/asm/rc4-ia64.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== # Written by David Mosberger <David.Mosberger@acm.org> based on the @@ -164,6 +171,9 @@ # random input data). # +$output = pop; +open STDOUT,">$output"; + $phases = 4; # number of stages/phases in the pipelined-loop $unroll_count = 6; # number of times we unrolled it $pComI = (1 << 0); @@ -753,3 +763,5 @@ $code.=<<___; ___ print $code; + +close STDOUT; diff --git a/deps/openssl/openssl/crypto/rc4/asm/rc4-md5-x86_64.pl b/deps/openssl/openssl/crypto/rc4/asm/rc4-md5-x86_64.pl index 272fa91e1a..890161bac5 100644 --- a/deps/openssl/openssl/crypto/rc4/asm/rc4-md5-x86_64.pl +++ b/deps/openssl/openssl/crypto/rc4/asm/rc4-md5-x86_64.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @@ -29,10 +36,16 @@ # Core2 6.5 5.8 12.3 7.7 +60% # Westmere 4.3 5.2 9.5 7.0 +36% # Sandy Bridge 4.2 5.5 9.7 6.8 +43% +# Ivy Bridge 4.1 5.2 9.3 6.0 +54% +# Haswell 4.0 5.0 9.0 5.7 +60% +# Skylake 6.3(**) 5.0 11.3 5.3 +110% # Atom 9.3 6.5 15.8 11.1 +42% +# VIA Nano 6.3 5.4 11.7 8.6 +37% +# Bulldozer 4.5 5.4 9.9 7.7 +29% # # (*) rc4-x86_64.pl delivers 5.3 on Opteron, so real improvement # is +53%... +# (**) unidentified anomaly; my ($rc4,$md5)=(1,1); # what to generate? my $D="#" if (!$md5); # if set to "#", MD5 is stitched into RC4(), @@ -51,7 +64,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; my $dir=$1; my $xlate; ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or die "can't locate x86_64-xlate.pl"; -open OUT,"| \"$^X\" $xlate $flavour $output"; +open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; *STDOUT=*OUT; my ($dat,$in0,$out,$ctx,$inp,$len, $func,$nargs); diff --git a/deps/openssl/openssl/crypto/rc4/asm/rc4-parisc.pl b/deps/openssl/openssl/crypto/rc4/asm/rc4-parisc.pl index ad7e65651c..006b6b01af 100644 --- a/deps/openssl/openssl/crypto/rc4/asm/rc4-parisc.pl +++ b/deps/openssl/openssl/crypto/rc4/asm/rc4-parisc.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2009-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL @@ -243,9 +250,9 @@ ___ $code.=<<___; - .EXPORT private_RC4_set_key,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR + .EXPORT RC4_set_key,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR .ALIGN 8 -private_RC4_set_key +RC4_set_key .PROC .CALLINFO NO_CALLS .ENTRY diff --git a/deps/openssl/openssl/crypto/rc4/asm/rc4-s390x.pl b/deps/openssl/openssl/crypto/rc4/asm/rc4-s390x.pl index 7528ece13c..5589503aa2 100644 --- a/deps/openssl/openssl/crypto/rc4/asm/rc4-s390x.pl +++ b/deps/openssl/openssl/crypto/rc4/asm/rc4-s390x.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2009-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL @@ -33,7 +40,7 @@ if ($flavour =~ /3[12]/) { $g="g"; } -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} +while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; $rp="%r14"; @@ -171,10 +178,10 @@ $ikey="%r7"; $iinp="%r8"; $code.=<<___; -.globl private_RC4_set_key -.type private_RC4_set_key,\@function +.globl RC4_set_key +.type RC4_set_key,\@function .align 64 -private_RC4_set_key: +RC4_set_key: stm${g} %r6,%r8,6*$SIZE_T($sp) lhi $cnt,256 la $idx,0(%r0) @@ -210,7 +217,7 @@ private_RC4_set_key: .Ldone: lm${g} %r6,%r8,6*$SIZE_T($sp) br $rp -.size private_RC4_set_key,.-private_RC4_set_key +.size RC4_set_key,.-RC4_set_key ___ } diff --git a/deps/openssl/openssl/crypto/rc4/asm/rc4-x86_64.pl b/deps/openssl/openssl/crypto/rc4/asm/rc4-x86_64.pl index 20722d3e72..aaed2b1e61 100755 --- a/deps/openssl/openssl/crypto/rc4/asm/rc4-x86_64.pl +++ b/deps/openssl/openssl/crypto/rc4/asm/rc4-x86_64.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL @@ -50,7 +57,7 @@ # As was shown by Zou Nanhai loop unrolling can improve Intel EM64T # performance by >30% [unlike P4 32-bit case that is]. But this is # provided that loads are reordered even more aggressively! Both code -# pathes, AMD64 and EM64T, reorder loads in essentially same manner +# paths, AMD64 and EM64T, reorder loads in essentially same manner # as my IA-64 implementation. On Opteron this resulted in modest 5% # improvement [I had to test it], while final Intel P4 performance # achieves respectful 432MBps on 2.8GHz processor now. For reference. @@ -92,6 +99,9 @@ # Westmere 4.2/+60% # Sandy Bridge 4.2/+120% # Atom 9.3/+80% +# VIA Nano 6.4/+4% +# Ivy Bridge 4.1/+30% +# Bulldozer 4.5/+30%(*) # # (*) But corresponding loop has less instructions, which should have # positive effect on upcoming Bulldozer, which has one less ALU. @@ -112,7 +122,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or die "can't locate x86_64-xlate.pl"; -open OUT,"| \"$^X\" $xlate $flavour $output"; +open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; *STDOUT=*OUT; $dat="%rdi"; # arg1 @@ -430,10 +440,10 @@ $idx="%r8"; $ido="%r9"; $code.=<<___; -.globl private_RC4_set_key -.type private_RC4_set_key,\@function,3 +.globl RC4_set_key +.type RC4_set_key,\@function,3 .align 16 -private_RC4_set_key: +RC4_set_key: lea 8($dat),$dat lea ($inp,$len),$inp neg $len @@ -500,7 +510,7 @@ private_RC4_set_key: mov %eax,-8($dat) mov %eax,-4($dat) ret -.size private_RC4_set_key,.-private_RC4_set_key +.size RC4_set_key,.-RC4_set_key .globl RC4_options .type RC4_options,\@abi-omnipotent @@ -645,16 +655,16 @@ key_se_handler: .rva .LSEH_end_RC4 .rva .LSEH_info_RC4 - .rva .LSEH_begin_private_RC4_set_key - .rva .LSEH_end_private_RC4_set_key - .rva .LSEH_info_private_RC4_set_key + .rva .LSEH_begin_RC4_set_key + .rva .LSEH_end_RC4_set_key + .rva .LSEH_info_RC4_set_key .section .xdata .align 8 .LSEH_info_RC4: .byte 9,0,0,0 .rva stream_se_handler -.LSEH_info_private_RC4_set_key: +.LSEH_info_RC4_set_key: .byte 9,0,0,0 .rva key_se_handler ___ |