summaryrefslogtreecommitdiff
path: root/deps/openssl/openssl/crypto/rc4/asm
diff options
context:
space:
mode:
Diffstat (limited to 'deps/openssl/openssl/crypto/rc4/asm')
-rw-r--r--deps/openssl/openssl/crypto/rc4/asm/rc4-586.pl22
-rw-r--r--deps/openssl/openssl/crypto/rc4/asm/rc4-c64xplus.pl192
-rw-r--r--deps/openssl/openssl/crypto/rc4/asm/rc4-ia64.pl14
-rw-r--r--deps/openssl/openssl/crypto/rc4/asm/rc4-md5-x86_64.pl17
-rw-r--r--deps/openssl/openssl/crypto/rc4/asm/rc4-parisc.pl13
-rw-r--r--deps/openssl/openssl/crypto/rc4/asm/rc4-s390x.pl19
-rwxr-xr-xdeps/openssl/openssl/crypto/rc4/asm/rc4-x86_64.pl32
7 files changed, 282 insertions, 27 deletions
diff --git a/deps/openssl/openssl/crypto/rc4/asm/rc4-586.pl b/deps/openssl/openssl/crypto/rc4/asm/rc4-586.pl
index 1d55d551e9..7d6f97c59e 100644
--- a/deps/openssl/openssl/crypto/rc4/asm/rc4-586.pl
+++ b/deps/openssl/openssl/crypto/rc4/asm/rc4-586.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 1998-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# [Re]written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
@@ -43,6 +50,9 @@
# Westmere 5.1/+94%(**)
# Sandy Bridge 5.0/+8%
# Atom 12.6/+6%
+# VIA Nano 6.4/+9%
+# Ivy Bridge 4.9/±0%
+# Bulldozer 4.9/+15%
#
# (*) PIII can actually deliver 6.6 cycles per byte with MMX code,
# but this specific code performs poorly on Core2. And vice
@@ -60,6 +70,9 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
push(@INC,"${dir}","${dir}../../perlasm");
require "x86asm.pl";
+$output=pop;
+open STDOUT,">$output";
+
&asm_init($ARGV[0],"rc4-586.pl",$x86only = $ARGV[$#ARGV] eq "386");
$xx="eax";
@@ -144,7 +157,7 @@ if ($alt=0) {
&movd ($i>0?"mm1":"mm2",&DWP(0,$dat,$ty,4));
# (*) This is the key to Core2 and Westmere performance.
- # Whithout movz out-of-order execution logic confuses
+ # Without movz out-of-order execution logic confuses
# itself and fails to reorder loads and stores. Problem
# appears to be fixed in Sandy Bridge...
}
@@ -304,7 +317,7 @@ $ido="ecx";
$idx="edx";
# void RC4_set_key(RC4_KEY *key,int len,const unsigned char *data);
-&function_begin("private_RC4_set_key");
+&function_begin("RC4_set_key");
&mov ($out,&wparam(0)); # load key
&mov ($idi,&wparam(1)); # load len
&mov ($inp,&wparam(2)); # load data
@@ -382,7 +395,7 @@ $idx="edx";
&xor ("eax","eax");
&mov (&DWP(-8,$out),"eax"); # key->x=0;
&mov (&DWP(-4,$out),"eax"); # key->y=0;
-&function_end("private_RC4_set_key");
+&function_end("RC4_set_key");
# const char *RC4_options(void);
&function_begin_B("RC4_options");
@@ -412,3 +425,4 @@ $idx="edx";
&asm_finish();
+close STDOUT;
diff --git a/deps/openssl/openssl/crypto/rc4/asm/rc4-c64xplus.pl b/deps/openssl/openssl/crypto/rc4/asm/rc4-c64xplus.pl
new file mode 100644
index 0000000000..1354d18214
--- /dev/null
+++ b/deps/openssl/openssl/crypto/rc4/asm/rc4-c64xplus.pl
@@ -0,0 +1,192 @@
+#! /usr/bin/env perl
+# Copyright 2014-2018 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# RC4 for C64x+.
+#
+# April 2014
+#
+# RC4 subroutine processes one byte in 7.0 cycles, which is 3x faster
+# than TI CGT-generated code. Loop is scheduled in such way that
+# there is only one reference to memory in each cycle. This is done
+# to avoid L1D memory banking conflicts, see SPRU871 TI publication
+# for further details. Otherwise it should be possible to schedule
+# the loop for iteration interval of 6...
+
+($KEY,$LEN,$INP,$OUT)=("A4","B4","A6","B6");
+
+($KEYA,$XX,$TY,$xx,$ONE,$ret)=map("A$_",(5,7,8,9,1,2));
+($KEYB,$YY,$TX,$tx,$SUM,$dat)=map("B$_",(5,7,8,9,1,2));
+
+$code.=<<___;
+ .text
+
+ .if .ASSEMBLER_VERSION<7000000
+ .asg 0,__TI_EABI__
+ .endif
+ .if __TI_EABI__
+ .nocmp
+ .asg RC4,_RC4
+ .asg RC4_set_key,_RC4_set_key
+ .asg RC4_options,_RC4_options
+ .endif
+
+ .global _RC4
+ .align 16
+_RC4:
+ .asmfunc
+ MV $LEN,B0
+ [!B0] BNOP B3 ; if (len==0) return;
+||[B0] ADD $KEY,2,$KEYA
+||[B0] ADD $KEY,2,$KEYB
+ [B0] MVK 1,$ONE
+||[B0] LDBU *${KEYA}[-2],$XX ; key->x
+ [B0] LDBU *${KEYB}[-1],$YY ; key->y
+|| NOP 4
+
+ ADD4 $ONE,$XX,$XX
+ LDBU *${KEYA}[$XX],$TX
+|| MVC $LEN,ILC
+ NOP 4
+;;==================================================
+ SPLOOP 7
+|| ADD4 $TX,$YY,$YY
+
+ LDBU *${KEYB}[$YY],$TY
+|| MVD $XX,$xx
+|| ADD4 $ONE,$XX,$XX
+ LDBU *${KEYA}[$XX],$tx
+ CMPEQ $YY,$XX,B0
+|| NOP 3
+ STB $TX,*${KEYB}[$YY]
+||[B0] ADD4 $TX,$YY,$YY
+ STB $TY,*${KEYA}[$xx]
+||[!B0] ADD4 $tx,$YY,$YY
+||[!B0] MVD $tx,$TX
+ ADD4 $TY,$TX,$SUM ; [0,0] $TX is not replaced by $tx yet!
+|| NOP 2
+ LDBU *$INP++,$dat
+|| NOP 2
+ LDBU *${KEYB}[$SUM],$ret
+|| NOP 5
+ XOR.L $dat,$ret,$ret
+ SPKERNEL
+|| STB $ret,*$OUT++
+;;==================================================
+ SUB4 $XX,$ONE,$XX
+|| NOP 5
+ STB $XX,*${KEYA}[-2] ; key->x
+|| SUB4 $YY,$TX,$YY
+|| BNOP B3
+ STB $YY,*${KEYB}[-1] ; key->y
+|| NOP 5
+ .endasmfunc
+
+ .global _RC4_set_key
+ .align 16
+_RC4_set_key:
+ .asmfunc
+ .if .BIG_ENDIAN
+ MVK 0x00000404,$ONE
+|| MVK 0x00000203,B0
+ MVKH 0x04040000,$ONE
+|| MVKH 0x00010000,B0
+ .else
+ MVK 0x00000404,$ONE
+|| MVK 0x00000100,B0
+ MVKH 0x04040000,$ONE
+|| MVKH 0x03020000,B0
+ .endif
+ ADD $KEY,2,$KEYA
+|| ADD $KEY,2,$KEYB
+|| ADD $INP,$LEN,$ret ; end of input
+ LDBU *${INP}++,$dat
+|| MVK 0,$TX
+ STH $TX,*${KEY}++ ; key->x=key->y=0
+|| MV B0,A0
+|| MVK 64-4,B0
+
+;;==================================================
+ SPLOOPD 1
+|| MVC B0,ILC
+
+ STNW A0,*${KEY}++
+|| ADD4 $ONE,A0,A0
+ SPKERNEL
+;;==================================================
+
+ MVK 0,$YY
+|| MVK 0,$XX
+ MVK 1,$ONE
+|| MVK 256-1,B0
+
+;;==================================================
+ SPLOOPD 8
+|| MVC B0,ILC
+
+ ADD4 $dat,$YY,$YY
+|| CMPEQ $INP,$ret,A0 ; end of input?
+ LDBU *${KEYB}[$YY],$TY
+|| MVD $XX,$xx
+|| ADD4 $ONE,$XX,$XX
+ LDBU *${KEYA}[$XX],$tx
+||[A0] SUB $INP,$LEN,$INP ; rewind
+ LDBU *${INP}++,$dat
+|| CMPEQ $YY,$XX,B0
+|| NOP 3
+ STB $TX,*${KEYB}[$YY]
+||[B0] ADD4 $TX,$YY,$YY
+ STB $TY,*${KEYA}[$xx]
+||[!B0] ADD4 $tx,$YY,$YY
+||[!B0] MV $tx,$TX
+ SPKERNEL
+;;==================================================
+
+ BNOP B3,5
+ .endasmfunc
+
+ .global _RC4_options
+ .align 16
+_RC4_options:
+_rc4_options:
+ .asmfunc
+ BNOP B3,1
+ ADDKPC _rc4_options,B4
+ .if __TI_EABI__
+ MVKL \$PCR_OFFSET(rc4_options,_rc4_options),A4
+ MVKH \$PCR_OFFSET(rc4_options,_rc4_options),A4
+ .else
+ MVKL (rc4_options-_rc4_options),A4
+ MVKH (rc4_options-_rc4_options),A4
+ .endif
+ ADD B4,A4,A4
+ .endasmfunc
+
+ .if __TI_EABI__
+ .sect ".text:rc4_options.const"
+ .else
+ .sect ".const:rc4_options"
+ .endif
+ .align 4
+rc4_options:
+ .cstring "rc4(sploop,char)"
+ .cstring "RC4 for C64+, CRYPTOGAMS by <appro\@openssl.org>"
+ .align 4
+___
+
+$output=pop;
+open STDOUT,">$output";
+print $code;
+close STDOUT;
diff --git a/deps/openssl/openssl/crypto/rc4/asm/rc4-ia64.pl b/deps/openssl/openssl/crypto/rc4/asm/rc4-ia64.pl
index 49cd5b5e69..5e8f5f55b2 100644
--- a/deps/openssl/openssl/crypto/rc4/asm/rc4-ia64.pl
+++ b/deps/openssl/openssl/crypto/rc4/asm/rc4-ia64.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by David Mosberger <David.Mosberger@acm.org> based on the
@@ -164,6 +171,9 @@
# random input data).
#
+$output = pop;
+open STDOUT,">$output";
+
$phases = 4; # number of stages/phases in the pipelined-loop
$unroll_count = 6; # number of times we unrolled it
$pComI = (1 << 0);
@@ -753,3 +763,5 @@ $code.=<<___;
___
print $code;
+
+close STDOUT;
diff --git a/deps/openssl/openssl/crypto/rc4/asm/rc4-md5-x86_64.pl b/deps/openssl/openssl/crypto/rc4/asm/rc4-md5-x86_64.pl
index 272fa91e1a..890161bac5 100644
--- a/deps/openssl/openssl/crypto/rc4/asm/rc4-md5-x86_64.pl
+++ b/deps/openssl/openssl/crypto/rc4/asm/rc4-md5-x86_64.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -29,10 +36,16 @@
# Core2 6.5 5.8 12.3 7.7 +60%
# Westmere 4.3 5.2 9.5 7.0 +36%
# Sandy Bridge 4.2 5.5 9.7 6.8 +43%
+# Ivy Bridge 4.1 5.2 9.3 6.0 +54%
+# Haswell 4.0 5.0 9.0 5.7 +60%
+# Skylake 6.3(**) 5.0 11.3 5.3 +110%
# Atom 9.3 6.5 15.8 11.1 +42%
+# VIA Nano 6.3 5.4 11.7 8.6 +37%
+# Bulldozer 4.5 5.4 9.9 7.7 +29%
#
# (*) rc4-x86_64.pl delivers 5.3 on Opteron, so real improvement
# is +53%...
+# (**) unidentified anomaly;
my ($rc4,$md5)=(1,1); # what to generate?
my $D="#" if (!$md5); # if set to "#", MD5 is stitched into RC4(),
@@ -51,7 +64,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; my $dir=$1; my $xlate;
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";
-open OUT,"| \"$^X\" $xlate $flavour $output";
+open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
*STDOUT=*OUT;
my ($dat,$in0,$out,$ctx,$inp,$len, $func,$nargs);
diff --git a/deps/openssl/openssl/crypto/rc4/asm/rc4-parisc.pl b/deps/openssl/openssl/crypto/rc4/asm/rc4-parisc.pl
index ad7e65651c..006b6b01af 100644
--- a/deps/openssl/openssl/crypto/rc4/asm/rc4-parisc.pl
+++ b/deps/openssl/openssl/crypto/rc4/asm/rc4-parisc.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2009-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
@@ -243,9 +250,9 @@ ___
$code.=<<___;
- .EXPORT private_RC4_set_key,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
+ .EXPORT RC4_set_key,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
.ALIGN 8
-private_RC4_set_key
+RC4_set_key
.PROC
.CALLINFO NO_CALLS
.ENTRY
diff --git a/deps/openssl/openssl/crypto/rc4/asm/rc4-s390x.pl b/deps/openssl/openssl/crypto/rc4/asm/rc4-s390x.pl
index 7528ece13c..5589503aa2 100644
--- a/deps/openssl/openssl/crypto/rc4/asm/rc4-s390x.pl
+++ b/deps/openssl/openssl/crypto/rc4/asm/rc4-s390x.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2009-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
@@ -33,7 +40,7 @@ if ($flavour =~ /3[12]/) {
$g="g";
}
-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
+while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
$rp="%r14";
@@ -171,10 +178,10 @@ $ikey="%r7";
$iinp="%r8";
$code.=<<___;
-.globl private_RC4_set_key
-.type private_RC4_set_key,\@function
+.globl RC4_set_key
+.type RC4_set_key,\@function
.align 64
-private_RC4_set_key:
+RC4_set_key:
stm${g} %r6,%r8,6*$SIZE_T($sp)
lhi $cnt,256
la $idx,0(%r0)
@@ -210,7 +217,7 @@ private_RC4_set_key:
.Ldone:
lm${g} %r6,%r8,6*$SIZE_T($sp)
br $rp
-.size private_RC4_set_key,.-private_RC4_set_key
+.size RC4_set_key,.-RC4_set_key
___
}
diff --git a/deps/openssl/openssl/crypto/rc4/asm/rc4-x86_64.pl b/deps/openssl/openssl/crypto/rc4/asm/rc4-x86_64.pl
index 20722d3e72..aaed2b1e61 100755
--- a/deps/openssl/openssl/crypto/rc4/asm/rc4-x86_64.pl
+++ b/deps/openssl/openssl/crypto/rc4/asm/rc4-x86_64.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
#
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
@@ -50,7 +57,7 @@
# As was shown by Zou Nanhai loop unrolling can improve Intel EM64T
# performance by >30% [unlike P4 32-bit case that is]. But this is
# provided that loads are reordered even more aggressively! Both code
-# pathes, AMD64 and EM64T, reorder loads in essentially same manner
+# paths, AMD64 and EM64T, reorder loads in essentially same manner
# as my IA-64 implementation. On Opteron this resulted in modest 5%
# improvement [I had to test it], while final Intel P4 performance
# achieves respectful 432MBps on 2.8GHz processor now. For reference.
@@ -92,6 +99,9 @@
# Westmere 4.2/+60%
# Sandy Bridge 4.2/+120%
# Atom 9.3/+80%
+# VIA Nano 6.4/+4%
+# Ivy Bridge 4.1/+30%
+# Bulldozer 4.5/+30%(*)
#
# (*) But corresponding loop has less instructions, which should have
# positive effect on upcoming Bulldozer, which has one less ALU.
@@ -112,7 +122,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";
-open OUT,"| \"$^X\" $xlate $flavour $output";
+open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
*STDOUT=*OUT;
$dat="%rdi"; # arg1
@@ -430,10 +440,10 @@ $idx="%r8";
$ido="%r9";
$code.=<<___;
-.globl private_RC4_set_key
-.type private_RC4_set_key,\@function,3
+.globl RC4_set_key
+.type RC4_set_key,\@function,3
.align 16
-private_RC4_set_key:
+RC4_set_key:
lea 8($dat),$dat
lea ($inp,$len),$inp
neg $len
@@ -500,7 +510,7 @@ private_RC4_set_key:
mov %eax,-8($dat)
mov %eax,-4($dat)
ret
-.size private_RC4_set_key,.-private_RC4_set_key
+.size RC4_set_key,.-RC4_set_key
.globl RC4_options
.type RC4_options,\@abi-omnipotent
@@ -645,16 +655,16 @@ key_se_handler:
.rva .LSEH_end_RC4
.rva .LSEH_info_RC4
- .rva .LSEH_begin_private_RC4_set_key
- .rva .LSEH_end_private_RC4_set_key
- .rva .LSEH_info_private_RC4_set_key
+ .rva .LSEH_begin_RC4_set_key
+ .rva .LSEH_end_RC4_set_key
+ .rva .LSEH_info_RC4_set_key
.section .xdata
.align 8
.LSEH_info_RC4:
.byte 9,0,0,0
.rva stream_se_handler
-.LSEH_info_private_RC4_set_key:
+.LSEH_info_RC4_set_key:
.byte 9,0,0,0
.rva key_se_handler
___