summaryrefslogtreecommitdiff
path: root/deps/openssl/openssl/crypto/aes/asm/aes-s390x.pl
diff options
context:
space:
mode:
Diffstat (limited to 'deps/openssl/openssl/crypto/aes/asm/aes-s390x.pl')
-rw-r--r--deps/openssl/openssl/crypto/aes/asm/aes-s390x.pl100
1 files changed, 78 insertions, 22 deletions
diff --git a/deps/openssl/openssl/crypto/aes/asm/aes-s390x.pl b/deps/openssl/openssl/crypto/aes/asm/aes-s390x.pl
index fd8a737166..0c40059066 100644
--- a/deps/openssl/openssl/crypto/aes/asm/aes-s390x.pl
+++ b/deps/openssl/openssl/crypto/aes/asm/aes-s390x.pl
@@ -1,5 +1,5 @@
#! /usr/bin/env perl
-# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2007-2018 The OpenSSL Project Authors. All Rights Reserved.
#
# Licensed under the OpenSSL license (the "License"). You may not use
# this file except in compliance with the License. You can obtain a copy
@@ -8,7 +8,7 @@
# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
@@ -44,7 +44,7 @@
# minimize/avoid Address Generation Interlock hazard and to favour
# dual-issue z10 pipeline. This gave ~25% improvement on z10 and
# almost 50% on z9. The gain is smaller on z10, because being dual-
-# issue z10 makes it improssible to eliminate the interlock condition:
+# issue z10 makes it impossible to eliminate the interlock condition:
# critial path is not long enough. Yet it spends ~24 cycles per byte
# processed with 128-bit key.
#
@@ -129,6 +129,8 @@ sub _data_word()
}
$code=<<___;
+#include "s390x_arch.h"
+
.text
.type AES_Te,\@object
@@ -404,7 +406,7 @@ _s390x_AES_encrypt:
or $s1,$t1
or $t2,$i2
or $t3,$i3
-
+
srlg $i1,$s2,`8-3` # i0
srlg $i2,$s2,`16-3` # i1
nr $i1,$mask
@@ -457,7 +459,7 @@ _s390x_AES_encrypt:
x $s2,24($key)
x $s3,28($key)
- br $ra
+ br $ra
.size _s390x_AES_encrypt,.-_s390x_AES_encrypt
___
@@ -779,7 +781,7 @@ _s390x_AES_decrypt:
x $s2,24($key)
x $s3,28($key)
- br $ra
+ br $ra
.size _s390x_AES_decrypt,.-_s390x_AES_decrypt
___
@@ -823,8 +825,8 @@ $code.=<<___ if (!$softonly);
larl %r1,OPENSSL_s390xcap_P
llihh %r0,0x8000
srlg %r0,%r0,0(%r5)
- ng %r0,32(%r1) # check availability of both km...
- ng %r0,48(%r1) # ...and kmc support for given key length
+ ng %r0,S390X_KM(%r1) # check availability of both km...
+ ng %r0,S390X_KMC(%r1) # ...and kmc support for given key length
jz .Lekey_internal
lmg %r0,%r1,0($inp) # just copy 128 bits...
@@ -1084,7 +1086,7 @@ $code.=<<___ if (!$softonly);
lhi $t1,16
cr $t0,$t1
jl .Lgo
- oill $t0,0x80 # set "decrypt" bit
+ oill $t0,S390X_DECRYPT # set "decrypt" bit
st $t0,240($key)
br $ra
___
@@ -1223,7 +1225,7 @@ $code.=<<___ if (!$softonly);
.align 16
.Lkmc_truncated:
ahi $key,-1 # it's the way it's encoded in mvc
- tmll %r0,0x80
+ tmll %r0,S390X_DECRYPT
jnz .Lkmc_truncated_dec
lghi %r1,0
stg %r1,16*$SIZE_T($sp)
@@ -1294,7 +1296,7 @@ $code.=<<___;
.Lcbc_enc_done:
l${g} $ivp,6*$SIZE_T($sp)
st $s0,0($ivp)
- st $s1,4($ivp)
+ st $s1,4($ivp)
st $s2,8($ivp)
st $s3,12($ivp)
@@ -1403,7 +1405,61 @@ $code.=<<___ if (!$softonly);
clr %r0,%r1
jl .Lctr32_software
- stm${g} %r6,$s3,6*$SIZE_T($sp)
+ st${g} $s2,10*$SIZE_T($sp)
+ st${g} $s3,11*$SIZE_T($sp)
+
+ clr $len,%r1 # does work even in 64-bit mode
+ jle .Lctr32_nokma # kma is slower for <= 16 blocks
+
+ larl %r1,OPENSSL_s390xcap_P
+ lr $s2,%r0
+ llihh $s3,0x8000
+ srlg $s3,$s3,0($s2)
+ ng $s3,S390X_KMA(%r1) # check kma capability vector
+ jz .Lctr32_nokma
+
+ l${g}hi %r1,-$stdframe-112
+ l${g}r $s3,$sp
+ la $sp,0(%r1,$sp) # prepare parameter block
+
+ lhi %r1,0x0600
+ sllg $len,$len,4
+ or %r0,%r1 # set HS and LAAD flags
+
+ st${g} $s3,0($sp) # backchain
+ la %r1,$stdframe($sp)
+
+ lmg $s2,$s3,0($key) # copy key
+ stg $s2,$stdframe+80($sp)
+ stg $s3,$stdframe+88($sp)
+ lmg $s2,$s3,16($key)
+ stg $s2,$stdframe+96($sp)
+ stg $s3,$stdframe+104($sp)
+
+ lmg $s2,$s3,0($ivp) # copy iv
+ stg $s2,$stdframe+64($sp)
+ ahi $s3,-1 # kma requires counter-1
+ stg $s3,$stdframe+72($sp)
+ st $s3,$stdframe+12($sp) # copy counter
+
+ lghi $s2,0 # no AAD
+ lghi $s3,0
+
+ .long 0xb929a042 # kma $out,$s2,$inp
+ brc 1,.-4 # pay attention to "partial completion"
+
+ stg %r0,$stdframe+80($sp) # wipe key
+ stg %r0,$stdframe+88($sp)
+ stg %r0,$stdframe+96($sp)
+ stg %r0,$stdframe+104($sp)
+ la $sp,$stdframe+112($sp)
+
+ lm${g} $s2,$s3,10*$SIZE_T($sp)
+ br $ra
+
+.align 16
+.Lctr32_nokma:
+ stm${g} %r6,$s1,6*$SIZE_T($sp)
slgr $out,$inp
la %r1,0($key) # %r1 is permanent copy of $key
@@ -1442,7 +1498,7 @@ $code.=<<___ if (!$softonly && 0);# kmctr code was measured to be ~12% slower
larl %r1,OPENSSL_s390xcap_P
llihh %r0,0x8000 # check if kmctr supports the function code
srlg %r0,%r0,0($s0)
- ng %r0,64(%r1) # check kmctr capability vector
+ ng %r0,S390X_KMCTR(%r1) # check kmctr capability vector
lgr %r0,$s0
lgr %r1,$s1
jz .Lctr32_km_loop
@@ -1567,8 +1623,8 @@ ___
}
########################################################################
-# void AES_xts_encrypt(const char *inp,char *out,size_t len,
-# const AES_KEY *key1, const AES_KEY *key2,
+# void AES_xts_encrypt(const unsigned char *inp, unsigned char *out,
+# size_t len, const AES_KEY *key1, const AES_KEY *key2,
# const unsigned char iv[16]);
#
{
@@ -1592,7 +1648,7 @@ $code.=<<___ if(1);
larl %r1,OPENSSL_s390xcap_P
llihh %r0,0x8000
srlg %r0,%r0,32($s1) # check for 32+function code
- ng %r0,32(%r1) # check km capability vector
+ ng %r0,S390X_KM(%r1) # check km capability vector
lgr %r0,$s0 # restore the function code
la %r1,0($key1) # restore $key1
jz .Lxts_km_vanilla
@@ -1627,7 +1683,7 @@ $code.=<<___ if(1);
llgc $len,2*$SIZE_T-1($sp)
nill $len,0x0f # $len%=16
br $ra
-
+
.align 16
.Lxts_km_vanilla:
___
@@ -1854,7 +1910,7 @@ $code.=<<___;
xgr $s1,%r1
lrvgr $s1,$s1 # flip byte order
lrvgr $s3,$s3
- srlg $s0,$s1,32 # smash the tweak to 4x32-bits
+ srlg $s0,$s1,32 # smash the tweak to 4x32-bits
stg $s1,$tweak+0($sp) # save the tweak
llgfr $s1,$s1
srlg $s2,$s3,32
@@ -1905,7 +1961,7 @@ $code.=<<___;
xgr $s1,%r1
lrvgr $s1,$s1 # flip byte order
lrvgr $s3,$s3
- srlg $s0,$s1,32 # smash the tweak to 4x32-bits
+ srlg $s0,$s1,32 # smash the tweak to 4x32-bits
stg $s1,$tweak+0($sp) # save the tweak
llgfr $s1,$s1
srlg $s2,$s3,32
@@ -1936,8 +1992,8 @@ $code.=<<___;
br $ra
.size AES_xts_encrypt,.-AES_xts_encrypt
___
-# void AES_xts_decrypt(const char *inp,char *out,size_t len,
-# const AES_KEY *key1, const AES_KEY *key2,
+# void AES_xts_decrypt(const unsigned char *inp, unsigned char *out,
+# size_t len, const AES_KEY *key1, const AES_KEY *key2,
# const unsigned char iv[16]);
#
$code.=<<___;
@@ -2097,7 +2153,7 @@ $code.=<<___;
xgr $s1,%r1
lrvgr $s1,$s1 # flip byte order
lrvgr $s3,$s3
- srlg $s0,$s1,32 # smash the tweak to 4x32-bits
+ srlg $s0,$s1,32 # smash the tweak to 4x32-bits
stg $s1,$tweak+0($sp) # save the tweak
llgfr $s1,$s1
srlg $s2,$s3,32