summaryrefslogtreecommitdiff
path: root/deps/openssl/config/archs/linux32-s390x/asm_avx2/crypto/sha/keccak1600-s390x.S
diff options
context:
space:
mode:
Diffstat (limited to 'deps/openssl/config/archs/linux32-s390x/asm_avx2/crypto/sha/keccak1600-s390x.S')
-rw-r--r--deps/openssl/config/archs/linux32-s390x/asm_avx2/crypto/sha/keccak1600-s390x.S464
1 files changed, 464 insertions, 0 deletions
diff --git a/deps/openssl/config/archs/linux32-s390x/asm_avx2/crypto/sha/keccak1600-s390x.S b/deps/openssl/config/archs/linux32-s390x/asm_avx2/crypto/sha/keccak1600-s390x.S
new file mode 100644
index 0000000000..6c816e675d
--- /dev/null
+++ b/deps/openssl/config/archs/linux32-s390x/asm_avx2/crypto/sha/keccak1600-s390x.S
@@ -0,0 +1,464 @@
+.text
+
+.type __KeccakF1600,@function
+.align 32
+__KeccakF1600:
+ st %r14,4*14(%r15)
+ lg %r0,160(%r2)
+ lg %r1,168(%r2)
+ lg %r5,176(%r2)
+ lg %r6,184(%r2)
+ lg %r7,192(%r2)
+ larl %r4,iotas
+ j .Loop
+
+.align 16
+.Loop:
+ lg %r8,0(%r2)
+ lg %r9,48(%r2)
+ lg %r10,96(%r2)
+ lg %r11,144(%r2)
+
+ xgr %r0,%r8
+ xg %r1,8(%r2)
+ xg %r5,16(%r2)
+ xg %r6,24(%r2)
+ lgr %r12,%r7
+ xg %r7,32(%r2)
+
+ xg %r0,40(%r2)
+ xgr %r1,%r9
+ xg %r5,56(%r2)
+ xg %r6,64(%r2)
+ xg %r7,72(%r2)
+
+ xg %r0,80(%r2)
+ xg %r1,88(%r2)
+ xgr %r5,%r10
+ xg %r6,104(%r2)
+ xg %r7,112(%r2)
+
+ xg %r0,120(%r2)
+ xg %r1,128(%r2)
+ xg %r5,136(%r2)
+ xgr %r6,%r11
+ xg %r7,152(%r2)
+
+ lgr %r13,%r5
+ rllg %r5,%r5,1
+ xgr %r5,%r0 # D[1] = ROL64(C[2], 1) ^ C[0]
+
+ rllg %r0,%r0,1
+ xgr %r0,%r6 # D[4] = ROL64(C[0], 1) ^ C[3]
+
+ rllg %r6,%r6,1
+ xgr %r6,%r1 # D[2] = ROL64(C[3], 1) ^ C[1]
+
+ rllg %r1,%r1,1
+ xgr %r1,%r7 # D[0] = ROL64(C[1], 1) ^ C[4]
+
+ rllg %r7,%r7,1
+ xgr %r7,%r13 # D[3] = ROL64(C[4], 1) ^ C[2]
+ xgr %r9,%r5
+ xgr %r10,%r6
+ xgr %r11,%r7
+ rllg %r9,%r9,44
+ xgr %r12,%r0
+ rllg %r10,%r10,43
+ xgr %r8,%r1
+
+ lgr %r13,%r9
+ ogr %r9,%r10
+ rllg %r11,%r11,21
+ xgr %r9,%r8 # C[0] ^ ( C[1] | C[2])
+ rllg %r12,%r12,14
+ xg %r9,0(%r4)
+ la %r4,8(%r4)
+ stg %r9,0(%r3) # R[0][0] = C[0] ^ ( C[1] | C[2]) ^ iotas[i]
+
+ lgr %r14,%r12
+ ngr %r12,%r11
+ lghi %r9,-1 # no 'not' instruction :-(
+ xgr %r12,%r10 # C[2] ^ ( C[4] & C[3])
+ xgr %r10,%r9 # not %r10
+ stg %r12,16(%r3) # R[0][2] = C[2] ^ ( C[4] & C[3])
+ ogr %r10,%r11
+ xgr %r10,%r13 # C[1] ^ (~C[2] | C[3])
+
+ ngr %r13,%r8
+ stg %r10,8(%r3) # R[0][1] = C[1] ^ (~C[2] | C[3])
+ xgr %r13,%r14 # C[4] ^ ( C[1] & C[0])
+ ogr %r14,%r8
+ stg %r13,32(%r3) # R[0][4] = C[4] ^ ( C[1] & C[0])
+ xgr %r14,%r11 # C[3] ^ ( C[4] | C[0])
+ stg %r14,24(%r3) # R[0][3] = C[3] ^ ( C[4] | C[0])
+
+
+ lg %r8,24(%r2)
+ lg %r12,176(%r2)
+ lg %r11,128(%r2)
+ lg %r9,72(%r2)
+ lg %r10,80(%r2)
+
+ xgr %r8,%r7
+ xgr %r12,%r6
+ rllg %r8,%r8,28
+ xgr %r11,%r5
+ rllg %r12,%r12,61
+ xgr %r9,%r0
+ rllg %r11,%r11,45
+ xgr %r10,%r1
+
+ lgr %r13,%r8
+ ogr %r8,%r12
+ rllg %r9,%r9,20
+ xgr %r8,%r11 # C[3] ^ (C[0] | C[4])
+ rllg %r10,%r10,3
+ stg %r8,64(%r3) # R[1][3] = C[3] ^ (C[0] | C[4])
+
+ lgr %r14,%r9
+ ngr %r9,%r13
+ lghi %r8,-1 # no 'not' instruction :-(
+ xgr %r9,%r12 # C[4] ^ (C[1] & C[0])
+ xgr %r12,%r8 # not %r12
+ stg %r9,72(%r3) # R[1][4] = C[4] ^ (C[1] & C[0])
+
+ ogr %r12,%r11
+ xgr %r12,%r10 # C[2] ^ (~C[4] | C[3])
+
+ ngr %r11,%r10
+ stg %r12,56(%r3) # R[1][2] = C[2] ^ (~C[4] | C[3])
+ xgr %r11,%r14 # C[1] ^ (C[3] & C[2])
+ ogr %r14,%r10
+ stg %r11,48(%r3) # R[1][1] = C[1] ^ (C[3] & C[2])
+ xgr %r14,%r13 # C[0] ^ (C[1] | C[2])
+ stg %r14,40(%r3) # R[1][0] = C[0] ^ (C[1] | C[2])
+
+
+ lg %r10,104(%r2)
+ lg %r11,152(%r2)
+ lg %r9,56(%r2)
+ lg %r12,160(%r2)
+ lg %r8,8(%r2)
+
+ xgr %r10,%r7
+ xgr %r11,%r0
+ rllg %r10,%r10,25
+ xgr %r9,%r6
+ rllg %r11,%r11,8
+ xgr %r12,%r1
+ rllg %r9,%r9,6
+ xgr %r8,%r5
+
+ lgr %r13,%r10
+ ngr %r10,%r11
+ rllg %r12,%r12,18
+ xgr %r10,%r9 # C[1] ^ ( C[2] & C[3])
+ lghi %r14,-1 # no 'not' instruction :-(
+ stg %r10,88(%r3) # R[2][1] = C[1] ^ ( C[2] & C[3])
+
+ xgr %r11,%r14 # not %r11
+ lgr %r14,%r12
+ ngr %r12,%r11
+ rllg %r8,%r8,1
+ xgr %r12,%r13 # C[2] ^ ( C[4] & ~C[3])
+ ogr %r13,%r9
+ stg %r12,96(%r3) # R[2][2] = C[2] ^ ( C[4] & ~C[3])
+ xgr %r13,%r8 # C[0] ^ ( C[2] | C[1])
+
+ ngr %r9,%r8
+ stg %r13,80(%r3) # R[2][0] = C[0] ^ ( C[2] | C[1])
+ xgr %r9,%r14 # C[4] ^ ( C[1] & C[0])
+ ogr %r8,%r14
+ stg %r9,112(%r3) # R[2][4] = C[4] ^ ( C[1] & C[0])
+ xgr %r8,%r11 # ~C[3] ^ ( C[0] | C[4])
+ stg %r8,104(%r3) # R[2][3] = ~C[3] ^ ( C[0] | C[4])
+
+
+ lg %r10,88(%r2)
+ lg %r11,136(%r2)
+ lg %r9,40(%r2)
+ lg %r12,184(%r2)
+ lg %r8,32(%r2)
+
+ xgr %r10,%r5
+ xgr %r11,%r6
+ rllg %r10,%r10,10
+ xgr %r9,%r1
+ rllg %r11,%r11,15
+ xgr %r12,%r7
+ rllg %r9,%r9,36
+ xgr %r8,%r0
+ rllg %r12,%r12,56
+
+ lgr %r13,%r10
+ ogr %r10,%r11
+ lghi %r14,-1 # no 'not' instruction :-(
+ xgr %r10,%r9 # C[1] ^ ( C[2] | C[3])
+ xgr %r11,%r14 # not %r11
+ stg %r10,128(%r3) # R[3][1] = C[1] ^ ( C[2] | C[3])
+
+ lgr %r14,%r12
+ ogr %r12,%r11
+ rllg %r8,%r8,27
+ xgr %r12,%r13 # C[2] ^ ( C[4] | ~C[3])
+ ngr %r13,%r9
+ stg %r12,136(%r3) # R[3][2] = C[2] ^ ( C[4] | ~C[3])
+ xgr %r13,%r8 # C[0] ^ ( C[2] & C[1])
+
+ ogr %r9,%r8
+ stg %r13,120(%r3) # R[3][0] = C[0] ^ ( C[2] & C[1])
+ xgr %r9,%r14 # C[4] ^ ( C[1] | C[0])
+ ngr %r8,%r14
+ stg %r9,152(%r3) # R[3][4] = C[4] ^ ( C[1] | C[0])
+ xgr %r8,%r11 # ~C[3] ^ ( C[0] & C[4])
+ stg %r8,144(%r3) # R[3][3] = ~C[3] ^ ( C[0] & C[4])
+
+
+ xg %r6,16(%r2)
+ xg %r7,64(%r2)
+ xg %r5,168(%r2)
+ xg %r0,112(%r2)
+ xgr %r3,%r2 # xchg %r3,%r2
+ rllg %r6,%r6,62
+ xg %r1,120(%r2)
+ rllg %r7,%r7,55
+ xgr %r2,%r3
+ rllg %r5,%r5,2
+ xgr %r3,%r2
+ rllg %r0,%r0,39
+ lgr %r13,%r6
+ ngr %r6,%r7
+ lghi %r14,-1 # no 'not' instruction :-(
+ xgr %r6,%r5 # C[4] ^ ( C[0] & C[1])
+ xgr %r7,%r14 # not %r7
+ stg %r6,192(%r2) # R[4][4] = C[4] ^ ( C[0] & C[1])
+
+ lgr %r14,%r0
+ ngr %r0,%r7
+ rllg %r1,%r1,41
+ xgr %r0,%r13 # C[0] ^ ( C[2] & ~C[1])
+ ogr %r13,%r5
+ stg %r0,160(%r2) # R[4][0] = C[0] ^ ( C[2] & ~C[1])
+ xgr %r13,%r1 # C[3] ^ ( C[0] | C[4])
+
+ ngr %r5,%r1
+ stg %r13,184(%r2) # R[4][3] = C[3] ^ ( C[0] | C[4])
+ xgr %r5,%r14 # C[2] ^ ( C[4] & C[3])
+ ogr %r1,%r14
+ stg %r5,176(%r2) # R[4][2] = C[2] ^ ( C[4] & C[3])
+ xgr %r1,%r7 # ~C[1] ^ ( C[2] | C[3])
+
+ lgr %r7,%r6 # harmonize with the loop top
+ lgr %r6,%r13
+ stg %r1,168(%r2) # R[4][1] = ~C[1] ^ ( C[2] | C[3])
+
+ tmll %r4,255
+ jnz .Loop
+
+ l %r14,4*14(%r15)
+ br %r14
+.size __KeccakF1600,.-__KeccakF1600
+.type KeccakF1600,@function
+.align 32
+KeccakF1600:
+.LKeccakF1600:
+ lghi %r1,-296
+ stm %r6,%r15,4*6(%r15)
+ lgr %r0,%r15
+ la %r15,0(%r1,%r15)
+ st %r0,0(%r15)
+
+ lghi %r8,-1 # no 'not' instruction :-(
+ lghi %r9,-1
+ lghi %r10,-1
+ lghi %r11,-1
+ lghi %r12,-1
+ lghi %r13,-1
+ xg %r8,8(%r2)
+ xg %r9,16(%r2)
+ xg %r10,64(%r2)
+ xg %r11,96(%r2)
+ xg %r12,136(%r2)
+ xg %r13,160(%r2)
+ stmg %r8,%r9,8(%r2)
+ stg %r10,64(%r2)
+ stg %r11,96(%r2)
+ stg %r12,136(%r2)
+ stg %r13,160(%r2)
+
+ la %r3,96(%r15)
+
+ bras %r14,__KeccakF1600
+
+ lghi %r8,-1 # no 'not' instruction :-(
+ lghi %r9,-1
+ lghi %r10,-1
+ lghi %r11,-1
+ lghi %r12,-1
+ lghi %r13,-1
+ xg %r8,8(%r2)
+ xg %r9,16(%r2)
+ xg %r10,64(%r2)
+ xg %r11,96(%r2)
+ xg %r12,136(%r2)
+ xg %r13,160(%r2)
+ stmg %r8,%r9,8(%r2)
+ stg %r10,64(%r2)
+ stg %r11,96(%r2)
+ stg %r12,136(%r2)
+ stg %r13,160(%r2)
+
+ lm %r6,%r15,296+6*4(%r15)
+ br %r14
+.size KeccakF1600,.-KeccakF1600
+.globl SHA3_absorb
+.type SHA3_absorb,@function
+.align 32
+SHA3_absorb:
+ lghi %r1,-296
+ stm %r5,%r15,4*5(%r15)
+ lgr %r0,%r15
+ la %r15,0(%r1,%r15)
+ st %r0,0(%r15)
+
+ lghi %r8,-1 # no 'not' instruction :-(
+ lghi %r9,-1
+ lghi %r10,-1
+ lghi %r11,-1
+ lghi %r12,-1
+ lghi %r13,-1
+ xg %r8,8(%r2)
+ xg %r9,16(%r2)
+ xg %r10,64(%r2)
+ xg %r11,96(%r2)
+ xg %r12,136(%r2)
+ xg %r13,160(%r2)
+ stmg %r8,%r9,8(%r2)
+ stg %r10,64(%r2)
+ stg %r11,96(%r2)
+ stg %r12,136(%r2)
+ stg %r13,160(%r2)
+
+.Loop_absorb:
+ clr %r4,%r5
+ jl .Ldone_absorb
+
+ srl %r5,3
+ la %r1,0(%r2)
+
+.Lblock_absorb:
+ lrvg %r0,0(%r3)
+ la %r3,8(%r3)
+ xg %r0,0(%r1)
+ ahi %r4,-8
+ stg %r0,0(%r1)
+ la %r1,8(%r1)
+ brct %r5,.Lblock_absorb
+
+ stm %r3,%r4,296+3*4(%r15)
+ la %r3,96(%r15)
+ bras %r14,__KeccakF1600
+ lm %r3,%r5,296+3*4(%r15)
+ j .Loop_absorb
+
+.align 16
+.Ldone_absorb:
+ lghi %r8,-1 # no 'not' instruction :-(
+ lghi %r9,-1
+ lghi %r10,-1
+ lghi %r11,-1
+ lghi %r12,-1
+ lghi %r13,-1
+ xg %r8,8(%r2)
+ xg %r9,16(%r2)
+ xg %r10,64(%r2)
+ xg %r11,96(%r2)
+ xg %r12,136(%r2)
+ xg %r13,160(%r2)
+ stmg %r8,%r9,8(%r2)
+ stg %r10,64(%r2)
+ stg %r11,96(%r2)
+ stg %r12,136(%r2)
+ stg %r13,160(%r2)
+
+ lgr %r2,%r4 # return value
+
+ lm %r6,%r15,296+6*4(%r15)
+ br %r14
+.size SHA3_absorb,.-SHA3_absorb
+.globl SHA3_squeeze
+.type SHA3_squeeze,@function
+.align 32
+SHA3_squeeze:
+ srl %r5,3
+ st %r14,2*4(%r15)
+ lghi %r14,8
+ st %r5,5*4(%r15)
+ la %r1,0(%r2)
+
+ j .Loop_squeeze
+
+.align 16
+.Loop_squeeze:
+ clr %r4,%r14
+ jl .Ltail_squeeze
+
+ lrvg %r0,0(%r1)
+ la %r1,8(%r1)
+ stg %r0,0(%r3)
+ la %r3,8(%r3)
+ ahi %r4,-8 # len -= 8
+ jz .Ldone_squeeze
+
+ brct %r5,.Loop_squeeze # bsz--
+
+ stm %r3,%r4,3*4(%r15)
+ bras %r14,.LKeccakF1600
+ lm %r3,%r5,3*4(%r15)
+ lghi %r14,8
+ la %r1,0(%r2)
+ j .Loop_squeeze
+
+.Ltail_squeeze:
+ lg %r0,0(%r1)
+.Loop_tail_squeeze:
+ stc %r0,0(%r3)
+ la %r3,1(%r3)
+ srlg %r0,%r0,8
+ brct %r4,.Loop_tail_squeeze
+
+.Ldone_squeeze:
+ l %r14,2*4(%r15)
+ br %r14
+.size SHA3_squeeze,.-SHA3_squeeze
+.align 256
+ .quad 0,0,0,0,0,0,0,0
+.type iotas,@object
+iotas:
+ .quad 0x0000000000000001
+ .quad 0x0000000000008082
+ .quad 0x800000000000808a
+ .quad 0x8000000080008000
+ .quad 0x000000000000808b
+ .quad 0x0000000080000001
+ .quad 0x8000000080008081
+ .quad 0x8000000000008009
+ .quad 0x000000000000008a
+ .quad 0x0000000000000088
+ .quad 0x0000000080008009
+ .quad 0x000000008000000a
+ .quad 0x000000008000808b
+ .quad 0x800000000000008b
+ .quad 0x8000000000008089
+ .quad 0x8000000000008003
+ .quad 0x8000000000008002
+ .quad 0x8000000000000080
+ .quad 0x000000000000800a
+ .quad 0x800000008000000a
+ .quad 0x8000000080008081
+ .quad 0x8000000000008080
+ .quad 0x0000000080000001
+ .quad 0x8000000080008008
+.size iotas,.-iotas
+.asciz "Keccak-1600 absorb and squeeze for s390x, CRYPTOGAMS by <appro@openssl.org>"