aboutsummaryrefslogtreecommitdiff
path: root/deps/openssl/openssl/crypto/bn/asm/s390x-mont.pl
diff options
context:
space:
mode:
authorBen Noordhuis <info@bnoordhuis.nl>2013-04-29 14:15:07 +0200
committerBen Noordhuis <info@bnoordhuis.nl>2013-04-29 14:17:50 +0200
commit179784e31e72fcdd0a2b1a596f7aebb43dc87913 (patch)
tree2351f6f6bcd90b40d39e9630662a380e74a2c515 /deps/openssl/openssl/crypto/bn/asm/s390x-mont.pl
parentd3ddee61c2f83e2a3b7ebabfbefee060801d27f5 (diff)
downloadandroid-node-v8-179784e31e72fcdd0a2b1a596f7aebb43dc87913.tar.gz
android-node-v8-179784e31e72fcdd0a2b1a596f7aebb43dc87913.tar.bz2
android-node-v8-179784e31e72fcdd0a2b1a596f7aebb43dc87913.zip
Revert "deps: downgrade openssl to v1.0.0f"
This commit undoes the downgrade from OpenSSL v1.0.1e to v1.0.0f, effectively upgrading OpenSSL to v1.0.1e again. The reason for the downgrade was to work around compatibility issues with certain TLS servers in the stable branch. See the commit log of 4fdb8ac and the linked issue for details. We're going to revisit that in the master branch. This reverts commit 4fdb8acdaef4c3cb1d855e992ada0e63fee520a6.
Diffstat (limited to 'deps/openssl/openssl/crypto/bn/asm/s390x-mont.pl')
-rw-r--r--deps/openssl/openssl/crypto/bn/asm/s390x-mont.pl102
1 files changed, 77 insertions, 25 deletions
diff --git a/deps/openssl/openssl/crypto/bn/asm/s390x-mont.pl b/deps/openssl/openssl/crypto/bn/asm/s390x-mont.pl
index f61246f5b6..9fd64e81ee 100644
--- a/deps/openssl/openssl/crypto/bn/asm/s390x-mont.pl
+++ b/deps/openssl/openssl/crypto/bn/asm/s390x-mont.pl
@@ -32,6 +32,33 @@
# Reschedule to minimize/avoid Address Generation Interlock hazard,
# make inner loops counter-based.
+# November 2010.
+#
+# Adapt for -m31 build. If kernel supports what's called "highgprs"
+# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
+# instructions and achieve "64-bit" performance even in 31-bit legacy
+# application context. The feature is not specific to any particular
+# processor, as long as it's "z-CPU". Latter implies that the code
+# remains z/Architecture specific. Compatibility with 32-bit BN_ULONG
+# is achieved by swapping words after 64-bit loads, follow _dswap-s.
+# On z990 it was measured to perform 2.6-2.2 times better than
+# compiler-generated code, less for longer keys...
+
+$flavour = shift;
+
+if ($flavour =~ /3[12]/) {
+ $SIZE_T=4;
+ $g="";
+} else {
+ $SIZE_T=8;
+ $g="g";
+}
+
+while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
+open STDOUT,">$output";
+
+$stdframe=16*$SIZE_T+4*8;
+
$mn0="%r0";
$num="%r1";
@@ -60,34 +87,44 @@ $code.=<<___;
.globl bn_mul_mont
.type bn_mul_mont,\@function
bn_mul_mont:
- lgf $num,164($sp) # pull $num
- sla $num,3 # $num to enumerate bytes
+ lgf $num,`$stdframe+$SIZE_T-4`($sp) # pull $num
+ sla $num,`log($SIZE_T)/log(2)` # $num to enumerate bytes
la $bp,0($num,$bp)
- stg %r2,16($sp)
+ st${g} %r2,2*$SIZE_T($sp)
cghi $num,16 #
lghi %r2,0 #
blr %r14 # if($num<16) return 0;
+___
+$code.=<<___ if ($flavour =~ /3[12]/);
+ tmll $num,4
+ bnzr %r14 # if ($num&1) return 0;
+___
+$code.=<<___ if ($flavour !~ /3[12]/);
cghi $num,96 #
bhr %r14 # if($num>96) return 0;
+___
+$code.=<<___;
+ stm${g} %r3,%r15,3*$SIZE_T($sp)
- stmg %r3,%r15,24($sp)
-
- lghi $rp,-160-8 # leave room for carry bit
+ lghi $rp,-$stdframe-8 # leave room for carry bit
lcgr $j,$num # -$num
lgr %r0,$sp
la $rp,0($rp,$sp)
la $sp,0($j,$rp) # alloca
- stg %r0,0($sp) # back chain
+ st${g} %r0,0($sp) # back chain
sra $num,3 # restore $num
la $bp,0($j,$bp) # restore $bp
ahi $num,-1 # adjust $num for inner loop
lg $n0,0($n0) # pull n0
+ _dswap $n0
lg $bi,0($bp)
+ _dswap $bi
lg $alo,0($ap)
+ _dswap $alo
mlgr $ahi,$bi # ap[0]*bp[0]
lgr $AHI,$ahi
@@ -95,6 +132,7 @@ bn_mul_mont:
msgr $mn0,$n0
lg $nlo,0($np) #
+ _dswap $nlo
mlgr $nhi,$mn0 # np[0]*m1
algr $nlo,$alo # +="tp[0]"
lghi $NHI,0
@@ -106,12 +144,14 @@ bn_mul_mont:
.align 16
.L1st:
lg $alo,0($j,$ap)
+ _dswap $alo
mlgr $ahi,$bi # ap[j]*bp[0]
algr $alo,$AHI
lghi $AHI,0
alcgr $AHI,$ahi
lg $nlo,0($j,$np)
+ _dswap $nlo
mlgr $nhi,$mn0 # np[j]*m1
algr $nlo,$NHI
lghi $NHI,0
@@ -119,22 +159,24 @@ bn_mul_mont:
algr $nlo,$alo
alcgr $NHI,$nhi
- stg $nlo,160-8($j,$sp) # tp[j-1]=
+ stg $nlo,$stdframe-8($j,$sp) # tp[j-1]=
la $j,8($j) # j++
brct $count,.L1st
algr $NHI,$AHI
lghi $AHI,0
alcgr $AHI,$AHI # upmost overflow bit
- stg $NHI,160-8($j,$sp)
- stg $AHI,160($j,$sp)
+ stg $NHI,$stdframe-8($j,$sp)
+ stg $AHI,$stdframe($j,$sp)
la $bp,8($bp) # bp++
.Louter:
lg $bi,0($bp) # bp[i]
+ _dswap $bi
lg $alo,0($ap)
+ _dswap $alo
mlgr $ahi,$bi # ap[0]*bp[i]
- alg $alo,160($sp) # +=tp[0]
+ alg $alo,$stdframe($sp) # +=tp[0]
lghi $AHI,0
alcgr $AHI,$ahi
@@ -142,6 +184,7 @@ bn_mul_mont:
msgr $mn0,$n0 # tp[0]*n0
lg $nlo,0($np) # np[0]
+ _dswap $nlo
mlgr $nhi,$mn0 # np[0]*m1
algr $nlo,$alo # +="tp[0]"
lghi $NHI,0
@@ -153,14 +196,16 @@ bn_mul_mont:
.align 16
.Linner:
lg $alo,0($j,$ap)
+ _dswap $alo
mlgr $ahi,$bi # ap[j]*bp[i]
algr $alo,$AHI
lghi $AHI,0
alcgr $ahi,$AHI
- alg $alo,160($j,$sp)# +=tp[j]
+ alg $alo,$stdframe($j,$sp)# +=tp[j]
alcgr $AHI,$ahi
lg $nlo,0($j,$np)
+ _dswap $nlo
mlgr $nhi,$mn0 # np[j]*m1
algr $nlo,$NHI
lghi $NHI,0
@@ -168,31 +213,33 @@ bn_mul_mont:
algr $nlo,$alo # +="tp[j]"
alcgr $NHI,$nhi
- stg $nlo,160-8($j,$sp) # tp[j-1]=
+ stg $nlo,$stdframe-8($j,$sp) # tp[j-1]=
la $j,8($j) # j++
brct $count,.Linner
algr $NHI,$AHI
lghi $AHI,0
alcgr $AHI,$AHI
- alg $NHI,160($j,$sp)# accumulate previous upmost overflow bit
+ alg $NHI,$stdframe($j,$sp)# accumulate previous upmost overflow bit
lghi $ahi,0
alcgr $AHI,$ahi # new upmost overflow bit
- stg $NHI,160-8($j,$sp)
- stg $AHI,160($j,$sp)
+ stg $NHI,$stdframe-8($j,$sp)
+ stg $AHI,$stdframe($j,$sp)
la $bp,8($bp) # bp++
- clg $bp,160+8+32($j,$sp) # compare to &bp[num]
+ cl${g} $bp,`$stdframe+8+4*$SIZE_T`($j,$sp) # compare to &bp[num]
jne .Louter
- lg $rp,160+8+16($j,$sp) # reincarnate rp
- la $ap,160($sp)
+ l${g} $rp,`$stdframe+8+2*$SIZE_T`($j,$sp) # reincarnate rp
+ la $ap,$stdframe($sp)
ahi $num,1 # restore $num, incidentally clears "borrow"
la $j,0(%r0)
lr $count,$num
.Lsub: lg $alo,0($j,$ap)
- slbg $alo,0($j,$np)
+ lg $nlo,0($j,$np)
+ _dswap $nlo
+ slbgr $alo,$nlo
stg $alo,0($j,$rp)
la $j,8($j)
brct $count,.Lsub
@@ -207,19 +254,24 @@ bn_mul_mont:
la $j,0(%r0)
lgr $count,$num
-.Lcopy: lg $alo,0($j,$ap) # copy or in-place refresh
- stg $j,160($j,$sp) # zap tp
+.Lcopy: lg $alo,0($j,$ap) # copy or in-place refresh
+ _dswap $alo
+ stg $j,$stdframe($j,$sp) # zap tp
stg $alo,0($j,$rp)
la $j,8($j)
brct $count,.Lcopy
- la %r1,160+8+48($j,$sp)
- lmg %r6,%r15,0(%r1)
+ la %r1,`$stdframe+8+6*$SIZE_T`($j,$sp)
+ lm${g} %r6,%r15,0(%r1)
lghi %r2,1 # signal "processed"
br %r14
.size bn_mul_mont,.-bn_mul_mont
.string "Montgomery Multiplication for s390x, CRYPTOGAMS by <appro\@openssl.org>"
___
-print $code;
+foreach (split("\n",$code)) {
+ s/\`([^\`]*)\`/eval $1/ge;
+ s/_dswap\s+(%r[0-9]+)/sprintf("rllg\t%s,%s,32",$1,$1) if($SIZE_T==4)/e;
+ print $_,"\n";
+}
close STDOUT;