diff options
author | Sam Roberts <vieuxtech@gmail.com> | 2018-11-22 11:47:07 -0800 |
---|---|---|
committer | Sam Roberts <vieuxtech@gmail.com> | 2019-01-22 13:33:54 -0800 |
commit | 807ed7883a12423270450776f015a7c2348c0913 (patch) | |
tree | 00ec21dd290b29c782680ffc2f97e6d59fd2ab2f /deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto | |
parent | 57119fbdb200702d6e2cf23428de4c458ae86bbc (diff) | |
download | android-node-v8-807ed7883a12423270450776f015a7c2348c0913.tar.gz android-node-v8-807ed7883a12423270450776f015a7c2348c0913.tar.bz2 android-node-v8-807ed7883a12423270450776f015a7c2348c0913.zip |
deps: update archs files for OpenSSL-1.1.1a
`cd deps/openssl/config; make` updates all archs dependant files.
PR-URL: https://github.com/nodejs/node/pull/25381
Reviewed-By: Daniel Bevenius <daniel.bevenius@gmail.com>
Reviewed-By: Shigeki Ohtsu <ohtsu@ohtsu.org>
Diffstat (limited to 'deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto')
15 files changed, 8170 insertions, 0 deletions
diff --git a/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/aes/aes-s390x.S b/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/aes/aes-s390x.S new file mode 100644 index 0000000000..e00f81ecd4 --- /dev/null +++ b/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/aes/aes-s390x.S @@ -0,0 +1,2365 @@ +#include "s390x_arch.h" + +.text + +.type AES_Te,@object +.align 256 +AES_Te: +.long 0xc66363a5,0xc66363a5 +.long 0xf87c7c84,0xf87c7c84 +.long 0xee777799,0xee777799 +.long 0xf67b7b8d,0xf67b7b8d +.long 0xfff2f20d,0xfff2f20d +.long 0xd66b6bbd,0xd66b6bbd +.long 0xde6f6fb1,0xde6f6fb1 +.long 0x91c5c554,0x91c5c554 +.long 0x60303050,0x60303050 +.long 0x02010103,0x02010103 +.long 0xce6767a9,0xce6767a9 +.long 0x562b2b7d,0x562b2b7d +.long 0xe7fefe19,0xe7fefe19 +.long 0xb5d7d762,0xb5d7d762 +.long 0x4dababe6,0x4dababe6 +.long 0xec76769a,0xec76769a +.long 0x8fcaca45,0x8fcaca45 +.long 0x1f82829d,0x1f82829d +.long 0x89c9c940,0x89c9c940 +.long 0xfa7d7d87,0xfa7d7d87 +.long 0xeffafa15,0xeffafa15 +.long 0xb25959eb,0xb25959eb +.long 0x8e4747c9,0x8e4747c9 +.long 0xfbf0f00b,0xfbf0f00b +.long 0x41adadec,0x41adadec +.long 0xb3d4d467,0xb3d4d467 +.long 0x5fa2a2fd,0x5fa2a2fd +.long 0x45afafea,0x45afafea +.long 0x239c9cbf,0x239c9cbf +.long 0x53a4a4f7,0x53a4a4f7 +.long 0xe4727296,0xe4727296 +.long 0x9bc0c05b,0x9bc0c05b +.long 0x75b7b7c2,0x75b7b7c2 +.long 0xe1fdfd1c,0xe1fdfd1c +.long 0x3d9393ae,0x3d9393ae +.long 0x4c26266a,0x4c26266a +.long 0x6c36365a,0x6c36365a +.long 0x7e3f3f41,0x7e3f3f41 +.long 0xf5f7f702,0xf5f7f702 +.long 0x83cccc4f,0x83cccc4f +.long 0x6834345c,0x6834345c +.long 0x51a5a5f4,0x51a5a5f4 +.long 0xd1e5e534,0xd1e5e534 +.long 0xf9f1f108,0xf9f1f108 +.long 0xe2717193,0xe2717193 +.long 0xabd8d873,0xabd8d873 +.long 0x62313153,0x62313153 +.long 0x2a15153f,0x2a15153f +.long 0x0804040c,0x0804040c +.long 0x95c7c752,0x95c7c752 +.long 0x46232365,0x46232365 +.long 0x9dc3c35e,0x9dc3c35e +.long 0x30181828,0x30181828 +.long 0x379696a1,0x379696a1 +.long 0x0a05050f,0x0a05050f +.long 0x2f9a9ab5,0x2f9a9ab5 +.long 0x0e070709,0x0e070709 +.long 0x24121236,0x24121236 +.long 0x1b80809b,0x1b80809b +.long 0xdfe2e23d,0xdfe2e23d +.long 0xcdebeb26,0xcdebeb26 +.long 0x4e272769,0x4e272769 +.long 0x7fb2b2cd,0x7fb2b2cd +.long 0xea75759f,0xea75759f +.long 0x1209091b,0x1209091b +.long 0x1d83839e,0x1d83839e +.long 0x582c2c74,0x582c2c74 +.long 0x341a1a2e,0x341a1a2e +.long 0x361b1b2d,0x361b1b2d +.long 0xdc6e6eb2,0xdc6e6eb2 +.long 0xb45a5aee,0xb45a5aee +.long 0x5ba0a0fb,0x5ba0a0fb +.long 0xa45252f6,0xa45252f6 +.long 0x763b3b4d,0x763b3b4d +.long 0xb7d6d661,0xb7d6d661 +.long 0x7db3b3ce,0x7db3b3ce +.long 0x5229297b,0x5229297b +.long 0xdde3e33e,0xdde3e33e +.long 0x5e2f2f71,0x5e2f2f71 +.long 0x13848497,0x13848497 +.long 0xa65353f5,0xa65353f5 +.long 0xb9d1d168,0xb9d1d168 +.long 0x00000000,0x00000000 +.long 0xc1eded2c,0xc1eded2c +.long 0x40202060,0x40202060 +.long 0xe3fcfc1f,0xe3fcfc1f +.long 0x79b1b1c8,0x79b1b1c8 +.long 0xb65b5bed,0xb65b5bed +.long 0xd46a6abe,0xd46a6abe +.long 0x8dcbcb46,0x8dcbcb46 +.long 0x67bebed9,0x67bebed9 +.long 0x7239394b,0x7239394b +.long 0x944a4ade,0x944a4ade +.long 0x984c4cd4,0x984c4cd4 +.long 0xb05858e8,0xb05858e8 +.long 0x85cfcf4a,0x85cfcf4a +.long 0xbbd0d06b,0xbbd0d06b +.long 0xc5efef2a,0xc5efef2a +.long 0x4faaaae5,0x4faaaae5 +.long 0xedfbfb16,0xedfbfb16 +.long 0x864343c5,0x864343c5 +.long 0x9a4d4dd7,0x9a4d4dd7 +.long 0x66333355,0x66333355 +.long 0x11858594,0x11858594 +.long 0x8a4545cf,0x8a4545cf +.long 0xe9f9f910,0xe9f9f910 +.long 0x04020206,0x04020206 +.long 0xfe7f7f81,0xfe7f7f81 +.long 0xa05050f0,0xa05050f0 +.long 0x783c3c44,0x783c3c44 +.long 0x259f9fba,0x259f9fba +.long 0x4ba8a8e3,0x4ba8a8e3 +.long 0xa25151f3,0xa25151f3 +.long 0x5da3a3fe,0x5da3a3fe +.long 0x804040c0,0x804040c0 +.long 0x058f8f8a,0x058f8f8a +.long 0x3f9292ad,0x3f9292ad +.long 0x219d9dbc,0x219d9dbc +.long 0x70383848,0x70383848 +.long 0xf1f5f504,0xf1f5f504 +.long 0x63bcbcdf,0x63bcbcdf +.long 0x77b6b6c1,0x77b6b6c1 +.long 0xafdada75,0xafdada75 +.long 0x42212163,0x42212163 +.long 0x20101030,0x20101030 +.long 0xe5ffff1a,0xe5ffff1a +.long 0xfdf3f30e,0xfdf3f30e +.long 0xbfd2d26d,0xbfd2d26d +.long 0x81cdcd4c,0x81cdcd4c +.long 0x180c0c14,0x180c0c14 +.long 0x26131335,0x26131335 +.long 0xc3ecec2f,0xc3ecec2f +.long 0xbe5f5fe1,0xbe5f5fe1 +.long 0x359797a2,0x359797a2 +.long 0x884444cc,0x884444cc +.long 0x2e171739,0x2e171739 +.long 0x93c4c457,0x93c4c457 +.long 0x55a7a7f2,0x55a7a7f2 +.long 0xfc7e7e82,0xfc7e7e82 +.long 0x7a3d3d47,0x7a3d3d47 +.long 0xc86464ac,0xc86464ac +.long 0xba5d5de7,0xba5d5de7 +.long 0x3219192b,0x3219192b +.long 0xe6737395,0xe6737395 +.long 0xc06060a0,0xc06060a0 +.long 0x19818198,0x19818198 +.long 0x9e4f4fd1,0x9e4f4fd1 +.long 0xa3dcdc7f,0xa3dcdc7f +.long 0x44222266,0x44222266 +.long 0x542a2a7e,0x542a2a7e +.long 0x3b9090ab,0x3b9090ab +.long 0x0b888883,0x0b888883 +.long 0x8c4646ca,0x8c4646ca +.long 0xc7eeee29,0xc7eeee29 +.long 0x6bb8b8d3,0x6bb8b8d3 +.long 0x2814143c,0x2814143c +.long 0xa7dede79,0xa7dede79 +.long 0xbc5e5ee2,0xbc5e5ee2 +.long 0x160b0b1d,0x160b0b1d +.long 0xaddbdb76,0xaddbdb76 +.long 0xdbe0e03b,0xdbe0e03b +.long 0x64323256,0x64323256 +.long 0x743a3a4e,0x743a3a4e +.long 0x140a0a1e,0x140a0a1e +.long 0x924949db,0x924949db +.long 0x0c06060a,0x0c06060a +.long 0x4824246c,0x4824246c +.long 0xb85c5ce4,0xb85c5ce4 +.long 0x9fc2c25d,0x9fc2c25d +.long 0xbdd3d36e,0xbdd3d36e +.long 0x43acacef,0x43acacef +.long 0xc46262a6,0xc46262a6 +.long 0x399191a8,0x399191a8 +.long 0x319595a4,0x319595a4 +.long 0xd3e4e437,0xd3e4e437 +.long 0xf279798b,0xf279798b +.long 0xd5e7e732,0xd5e7e732 +.long 0x8bc8c843,0x8bc8c843 +.long 0x6e373759,0x6e373759 +.long 0xda6d6db7,0xda6d6db7 +.long 0x018d8d8c,0x018d8d8c +.long 0xb1d5d564,0xb1d5d564 +.long 0x9c4e4ed2,0x9c4e4ed2 +.long 0x49a9a9e0,0x49a9a9e0 +.long 0xd86c6cb4,0xd86c6cb4 +.long 0xac5656fa,0xac5656fa +.long 0xf3f4f407,0xf3f4f407 +.long 0xcfeaea25,0xcfeaea25 +.long 0xca6565af,0xca6565af +.long 0xf47a7a8e,0xf47a7a8e +.long 0x47aeaee9,0x47aeaee9 +.long 0x10080818,0x10080818 +.long 0x6fbabad5,0x6fbabad5 +.long 0xf0787888,0xf0787888 +.long 0x4a25256f,0x4a25256f +.long 0x5c2e2e72,0x5c2e2e72 +.long 0x381c1c24,0x381c1c24 +.long 0x57a6a6f1,0x57a6a6f1 +.long 0x73b4b4c7,0x73b4b4c7 +.long 0x97c6c651,0x97c6c651 +.long 0xcbe8e823,0xcbe8e823 +.long 0xa1dddd7c,0xa1dddd7c +.long 0xe874749c,0xe874749c +.long 0x3e1f1f21,0x3e1f1f21 +.long 0x964b4bdd,0x964b4bdd +.long 0x61bdbddc,0x61bdbddc +.long 0x0d8b8b86,0x0d8b8b86 +.long 0x0f8a8a85,0x0f8a8a85 +.long 0xe0707090,0xe0707090 +.long 0x7c3e3e42,0x7c3e3e42 +.long 0x71b5b5c4,0x71b5b5c4 +.long 0xcc6666aa,0xcc6666aa +.long 0x904848d8,0x904848d8 +.long 0x06030305,0x06030305 +.long 0xf7f6f601,0xf7f6f601 +.long 0x1c0e0e12,0x1c0e0e12 +.long 0xc26161a3,0xc26161a3 +.long 0x6a35355f,0x6a35355f +.long 0xae5757f9,0xae5757f9 +.long 0x69b9b9d0,0x69b9b9d0 +.long 0x17868691,0x17868691 +.long 0x99c1c158,0x99c1c158 +.long 0x3a1d1d27,0x3a1d1d27 +.long 0x279e9eb9,0x279e9eb9 +.long 0xd9e1e138,0xd9e1e138 +.long 0xebf8f813,0xebf8f813 +.long 0x2b9898b3,0x2b9898b3 +.long 0x22111133,0x22111133 +.long 0xd26969bb,0xd26969bb +.long 0xa9d9d970,0xa9d9d970 +.long 0x078e8e89,0x078e8e89 +.long 0x339494a7,0x339494a7 +.long 0x2d9b9bb6,0x2d9b9bb6 +.long 0x3c1e1e22,0x3c1e1e22 +.long 0x15878792,0x15878792 +.long 0xc9e9e920,0xc9e9e920 +.long 0x87cece49,0x87cece49 +.long 0xaa5555ff,0xaa5555ff +.long 0x50282878,0x50282878 +.long 0xa5dfdf7a,0xa5dfdf7a +.long 0x038c8c8f,0x038c8c8f +.long 0x59a1a1f8,0x59a1a1f8 +.long 0x09898980,0x09898980 +.long 0x1a0d0d17,0x1a0d0d17 +.long 0x65bfbfda,0x65bfbfda +.long 0xd7e6e631,0xd7e6e631 +.long 0x844242c6,0x844242c6 +.long 0xd06868b8,0xd06868b8 +.long 0x824141c3,0x824141c3 +.long 0x299999b0,0x299999b0 +.long 0x5a2d2d77,0x5a2d2d77 +.long 0x1e0f0f11,0x1e0f0f11 +.long 0x7bb0b0cb,0x7bb0b0cb +.long 0xa85454fc,0xa85454fc +.long 0x6dbbbbd6,0x6dbbbbd6 +.long 0x2c16163a,0x2c16163a +# Te4[256] +.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 +.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 +.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 +.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 +.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc +.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 +.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a +.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 +.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 +.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 +.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b +.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf +.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 +.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 +.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 +.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 +.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 +.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 +.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 +.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb +.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c +.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 +.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 +.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 +.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 +.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a +.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e +.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e +.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 +.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf +.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 +.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 +# rcon[] +.long 0x01000000, 0x02000000, 0x04000000, 0x08000000 +.long 0x10000000, 0x20000000, 0x40000000, 0x80000000 +.long 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0 +.align 256 +.size AES_Te,.-AES_Te + +# void AES_encrypt(const unsigned char *inp, unsigned char *out, +# const AES_KEY *key) { +.globl AES_encrypt +.type AES_encrypt,@function +AES_encrypt: + l %r0,240(%r4) + lhi %r1,16 + clr %r0,%r1 + jl .Lesoft + + la %r1,0(%r4) + #la %r2,0(%r2) + la %r4,0(%r3) + lghi %r3,16 # single block length + .long 0xb92e0042 # km %r4,%r2 + brc 1,.-4 # can this happen? + br %r14 +.align 64 +.Lesoft: + stmg %r3,%r14,3*8(%r15) + + llgf %r8,0(%r2) + llgf %r9,4(%r2) + llgf %r10,8(%r2) + llgf %r11,12(%r2) + + larl %r12,AES_Te + bras %r14,_s390x_AES_encrypt + + lg %r3,3*8(%r15) + st %r8,0(%r3) + st %r9,4(%r3) + st %r10,8(%r3) + st %r11,12(%r3) + + lmg %r6,%r14,6*8(%r15) + br %r14 +.size AES_encrypt,.-AES_encrypt + +.type _s390x_AES_encrypt,@function +.align 16 +_s390x_AES_encrypt: + stg %r14,15*8(%r15) + x %r8,0(%r4) + x %r9,4(%r4) + x %r10,8(%r4) + x %r11,12(%r4) + l %r13,240(%r4) + llill %r0,2040 + aghi %r13,-1 + j .Lenc_loop +.align 16 +.Lenc_loop: + sllg %r1,%r8,3 + srlg %r2,%r8,5 + srlg %r3,%r8,13 + srl %r8,21 + nr %r8,%r0 + ngr %r1,%r0 + nr %r2,%r0 + nr %r3,%r0 + + srlg %r5,%r9,13 # i0 + sllg %r6,%r9,3 + srlg %r7,%r9,5 + srl %r9,21 + nr %r5,%r0 + nr %r9,%r0 + ngr %r6,%r0 + nr %r7,%r0 + + l %r8,0(%r8,%r12) # Te0[s0>>24] + l %r1,1(%r1,%r12) # Te3[s0>>0] + l %r2,2(%r2,%r12) # Te2[s0>>8] + l %r3,3(%r3,%r12) # Te1[s0>>16] + + x %r8,3(%r5,%r12) # Te1[s1>>16] + l %r9,0(%r9,%r12) # Te0[s1>>24] + x %r2,1(%r6,%r12) # Te3[s1>>0] + x %r3,2(%r7,%r12) # Te2[s1>>8] + + srlg %r5,%r10,5 # i0 + srlg %r6,%r10,13 # i1 + nr %r5,%r0 + nr %r6,%r0 + sllg %r7,%r10,3 + srl %r10,21 + nr %r10,%r0 + ngr %r7,%r0 + + xr %r9,%r1 + srlg %r14,%r11,5 # i1 + sllg %r1,%r11,3 # i0 + nr %r14,%r0 + la %r4,16(%r4) + ngr %r1,%r0 + + x %r8,2(%r5,%r12) # Te2[s2>>8] + x %r9,3(%r6,%r12) # Te1[s2>>16] + l %r10,0(%r10,%r12) # Te0[s2>>24] + x %r3,1(%r7,%r12) # Te3[s2>>0] + + srlg %r7,%r11,13 # i2 + xr %r10,%r2 + srl %r11,21 + nr %r7,%r0 + nr %r11,%r0 + + x %r8,0(%r4) + x %r9,4(%r4) + x %r10,8(%r4) + x %r3,12(%r4) + + x %r8,1(%r1,%r12) # Te3[s3>>0] + x %r9,2(%r14,%r12) # Te2[s3>>8] + x %r10,3(%r7,%r12) # Te1[s3>>16] + l %r11,0(%r11,%r12) # Te0[s3>>24] + xr %r11,%r3 + + brct %r13,.Lenc_loop + .align 16 + + sllg %r1,%r8,3 + srlg %r2,%r8,5 + ngr %r1,%r0 + srlg %r3,%r8,13 + srl %r8,21 + nr %r8,%r0 + nr %r2,%r0 + nr %r3,%r0 + + srlg %r5,%r9,13 # i0 + sllg %r6,%r9,3 + ngr %r6,%r0 + srlg %r7,%r9,5 + srl %r9,21 + nr %r5,%r0 + nr %r9,%r0 + nr %r7,%r0 + + llgc %r8,2(%r8,%r12) # Te4[s0>>24] + llgc %r1,2(%r1,%r12) # Te4[s0>>0] + sll %r8,24 + llgc %r2,2(%r2,%r12) # Te4[s0>>8] + llgc %r3,2(%r3,%r12) # Te4[s0>>16] + sll %r2,8 + sll %r3,16 + + llgc %r5,2(%r5,%r12) # Te4[s1>>16] + llgc %r9,2(%r9,%r12) # Te4[s1>>24] + llgc %r6,2(%r6,%r12) # Te4[s1>>0] + llgc %r7,2(%r7,%r12) # Te4[s1>>8] + sll %r5,16 + sll %r9,24 + sll %r7,8 + or %r8,%r5 + or %r9,%r1 + or %r2,%r6 + or %r3,%r7 + + srlg %r5,%r10,5 # i0 + srlg %r6,%r10,13 # i1 + nr %r5,%r0 + nr %r6,%r0 + sllg %r7,%r10,3 + srl %r10,21 + ngr %r7,%r0 + nr %r10,%r0 + + sllg %r1,%r11,3 # i0 + srlg %r14,%r11,5 # i1 + ngr %r1,%r0 + + llgc %r5,2(%r5,%r12) # Te4[s2>>8] + llgc %r6,2(%r6,%r12) # Te4[s2>>16] + sll %r5,8 + llgc %r10,2(%r10,%r12) # Te4[s2>>24] + llgc %r7,2(%r7,%r12) # Te4[s2>>0] + sll %r6,16 + nr %r14,%r0 + sll %r10,24 + or %r8,%r5 + or %r9,%r6 + or %r10,%r2 + or %r3,%r7 + + srlg %r7,%r11,13 # i2 + srl %r11,21 + nr %r7,%r0 + nr %r11,%r0 + + l %r0,16(%r4) + l %r2,20(%r4) + + llgc %r5,2(%r1,%r12) # Te4[s3>>0] + llgc %r6,2(%r14,%r12) # Te4[s3>>8] + llgc %r7,2(%r7,%r12) # Te4[s3>>16] + llgc %r11,2(%r11,%r12) # Te4[s3>>24] + sll %r6,8 + sll %r7,16 + sll %r11,24 + or %r8,%r5 + or %r9,%r6 + or %r10,%r7 + or %r11,%r3 + + lg %r14,15*8(%r15) + xr %r8,%r0 + xr %r9,%r2 + x %r10,24(%r4) + x %r11,28(%r4) + + br %r14 +.size _s390x_AES_encrypt,.-_s390x_AES_encrypt +.type AES_Td,@object +.align 256 +AES_Td: +.long 0x51f4a750,0x51f4a750 +.long 0x7e416553,0x7e416553 +.long 0x1a17a4c3,0x1a17a4c3 +.long 0x3a275e96,0x3a275e96 +.long 0x3bab6bcb,0x3bab6bcb +.long 0x1f9d45f1,0x1f9d45f1 +.long 0xacfa58ab,0xacfa58ab +.long 0x4be30393,0x4be30393 +.long 0x2030fa55,0x2030fa55 +.long 0xad766df6,0xad766df6 +.long 0x88cc7691,0x88cc7691 +.long 0xf5024c25,0xf5024c25 +.long 0x4fe5d7fc,0x4fe5d7fc +.long 0xc52acbd7,0xc52acbd7 +.long 0x26354480,0x26354480 +.long 0xb562a38f,0xb562a38f +.long 0xdeb15a49,0xdeb15a49 +.long 0x25ba1b67,0x25ba1b67 +.long 0x45ea0e98,0x45ea0e98 +.long 0x5dfec0e1,0x5dfec0e1 +.long 0xc32f7502,0xc32f7502 +.long 0x814cf012,0x814cf012 +.long 0x8d4697a3,0x8d4697a3 +.long 0x6bd3f9c6,0x6bd3f9c6 +.long 0x038f5fe7,0x038f5fe7 +.long 0x15929c95,0x15929c95 +.long 0xbf6d7aeb,0xbf6d7aeb +.long 0x955259da,0x955259da +.long 0xd4be832d,0xd4be832d +.long 0x587421d3,0x587421d3 +.long 0x49e06929,0x49e06929 +.long 0x8ec9c844,0x8ec9c844 +.long 0x75c2896a,0x75c2896a +.long 0xf48e7978,0xf48e7978 +.long 0x99583e6b,0x99583e6b +.long 0x27b971dd,0x27b971dd +.long 0xbee14fb6,0xbee14fb6 +.long 0xf088ad17,0xf088ad17 +.long 0xc920ac66,0xc920ac66 +.long 0x7dce3ab4,0x7dce3ab4 +.long 0x63df4a18,0x63df4a18 +.long 0xe51a3182,0xe51a3182 +.long 0x97513360,0x97513360 +.long 0x62537f45,0x62537f45 +.long 0xb16477e0,0xb16477e0 +.long 0xbb6bae84,0xbb6bae84 +.long 0xfe81a01c,0xfe81a01c +.long 0xf9082b94,0xf9082b94 +.long 0x70486858,0x70486858 +.long 0x8f45fd19,0x8f45fd19 +.long 0x94de6c87,0x94de6c87 +.long 0x527bf8b7,0x527bf8b7 +.long 0xab73d323,0xab73d323 +.long 0x724b02e2,0x724b02e2 +.long 0xe31f8f57,0xe31f8f57 +.long 0x6655ab2a,0x6655ab2a +.long 0xb2eb2807,0xb2eb2807 +.long 0x2fb5c203,0x2fb5c203 +.long 0x86c57b9a,0x86c57b9a +.long 0xd33708a5,0xd33708a5 +.long 0x302887f2,0x302887f2 +.long 0x23bfa5b2,0x23bfa5b2 +.long 0x02036aba,0x02036aba +.long 0xed16825c,0xed16825c +.long 0x8acf1c2b,0x8acf1c2b +.long 0xa779b492,0xa779b492 +.long 0xf307f2f0,0xf307f2f0 +.long 0x4e69e2a1,0x4e69e2a1 +.long 0x65daf4cd,0x65daf4cd +.long 0x0605bed5,0x0605bed5 +.long 0xd134621f,0xd134621f +.long 0xc4a6fe8a,0xc4a6fe8a +.long 0x342e539d,0x342e539d +.long 0xa2f355a0,0xa2f355a0 +.long 0x058ae132,0x058ae132 +.long 0xa4f6eb75,0xa4f6eb75 +.long 0x0b83ec39,0x0b83ec39 +.long 0x4060efaa,0x4060efaa +.long 0x5e719f06,0x5e719f06 +.long 0xbd6e1051,0xbd6e1051 +.long 0x3e218af9,0x3e218af9 +.long 0x96dd063d,0x96dd063d +.long 0xdd3e05ae,0xdd3e05ae +.long 0x4de6bd46,0x4de6bd46 +.long 0x91548db5,0x91548db5 +.long 0x71c45d05,0x71c45d05 +.long 0x0406d46f,0x0406d46f +.long 0x605015ff,0x605015ff +.long 0x1998fb24,0x1998fb24 +.long 0xd6bde997,0xd6bde997 +.long 0x894043cc,0x894043cc +.long 0x67d99e77,0x67d99e77 +.long 0xb0e842bd,0xb0e842bd +.long 0x07898b88,0x07898b88 +.long 0xe7195b38,0xe7195b38 +.long 0x79c8eedb,0x79c8eedb +.long 0xa17c0a47,0xa17c0a47 +.long 0x7c420fe9,0x7c420fe9 +.long 0xf8841ec9,0xf8841ec9 +.long 0x00000000,0x00000000 +.long 0x09808683,0x09808683 +.long 0x322bed48,0x322bed48 +.long 0x1e1170ac,0x1e1170ac +.long 0x6c5a724e,0x6c5a724e +.long 0xfd0efffb,0xfd0efffb +.long 0x0f853856,0x0f853856 +.long 0x3daed51e,0x3daed51e +.long 0x362d3927,0x362d3927 +.long 0x0a0fd964,0x0a0fd964 +.long 0x685ca621,0x685ca621 +.long 0x9b5b54d1,0x9b5b54d1 +.long 0x24362e3a,0x24362e3a +.long 0x0c0a67b1,0x0c0a67b1 +.long 0x9357e70f,0x9357e70f +.long 0xb4ee96d2,0xb4ee96d2 +.long 0x1b9b919e,0x1b9b919e +.long 0x80c0c54f,0x80c0c54f +.long 0x61dc20a2,0x61dc20a2 +.long 0x5a774b69,0x5a774b69 +.long 0x1c121a16,0x1c121a16 +.long 0xe293ba0a,0xe293ba0a +.long 0xc0a02ae5,0xc0a02ae5 +.long 0x3c22e043,0x3c22e043 +.long 0x121b171d,0x121b171d +.long 0x0e090d0b,0x0e090d0b +.long 0xf28bc7ad,0xf28bc7ad +.long 0x2db6a8b9,0x2db6a8b9 +.long 0x141ea9c8,0x141ea9c8 +.long 0x57f11985,0x57f11985 +.long 0xaf75074c,0xaf75074c +.long 0xee99ddbb,0xee99ddbb +.long 0xa37f60fd,0xa37f60fd +.long 0xf701269f,0xf701269f +.long 0x5c72f5bc,0x5c72f5bc +.long 0x44663bc5,0x44663bc5 +.long 0x5bfb7e34,0x5bfb7e34 +.long 0x8b432976,0x8b432976 +.long 0xcb23c6dc,0xcb23c6dc +.long 0xb6edfc68,0xb6edfc68 +.long 0xb8e4f163,0xb8e4f163 +.long 0xd731dcca,0xd731dcca +.long 0x42638510,0x42638510 +.long 0x13972240,0x13972240 +.long 0x84c61120,0x84c61120 +.long 0x854a247d,0x854a247d +.long 0xd2bb3df8,0xd2bb3df8 +.long 0xaef93211,0xaef93211 +.long 0xc729a16d,0xc729a16d +.long 0x1d9e2f4b,0x1d9e2f4b +.long 0xdcb230f3,0xdcb230f3 +.long 0x0d8652ec,0x0d8652ec +.long 0x77c1e3d0,0x77c1e3d0 +.long 0x2bb3166c,0x2bb3166c +.long 0xa970b999,0xa970b999 +.long 0x119448fa,0x119448fa +.long 0x47e96422,0x47e96422 +.long 0xa8fc8cc4,0xa8fc8cc4 +.long 0xa0f03f1a,0xa0f03f1a +.long 0x567d2cd8,0x567d2cd8 +.long 0x223390ef,0x223390ef +.long 0x87494ec7,0x87494ec7 +.long 0xd938d1c1,0xd938d1c1 +.long 0x8ccaa2fe,0x8ccaa2fe +.long 0x98d40b36,0x98d40b36 +.long 0xa6f581cf,0xa6f581cf +.long 0xa57ade28,0xa57ade28 +.long 0xdab78e26,0xdab78e26 +.long 0x3fadbfa4,0x3fadbfa4 +.long 0x2c3a9de4,0x2c3a9de4 +.long 0x5078920d,0x5078920d +.long 0x6a5fcc9b,0x6a5fcc9b +.long 0x547e4662,0x547e4662 +.long 0xf68d13c2,0xf68d13c2 +.long 0x90d8b8e8,0x90d8b8e8 +.long 0x2e39f75e,0x2e39f75e +.long 0x82c3aff5,0x82c3aff5 +.long 0x9f5d80be,0x9f5d80be +.long 0x69d0937c,0x69d0937c +.long 0x6fd52da9,0x6fd52da9 +.long 0xcf2512b3,0xcf2512b3 +.long 0xc8ac993b,0xc8ac993b +.long 0x10187da7,0x10187da7 +.long 0xe89c636e,0xe89c636e +.long 0xdb3bbb7b,0xdb3bbb7b +.long 0xcd267809,0xcd267809 +.long 0x6e5918f4,0x6e5918f4 +.long 0xec9ab701,0xec9ab701 +.long 0x834f9aa8,0x834f9aa8 +.long 0xe6956e65,0xe6956e65 +.long 0xaaffe67e,0xaaffe67e +.long 0x21bccf08,0x21bccf08 +.long 0xef15e8e6,0xef15e8e6 +.long 0xbae79bd9,0xbae79bd9 +.long 0x4a6f36ce,0x4a6f36ce +.long 0xea9f09d4,0xea9f09d4 +.long 0x29b07cd6,0x29b07cd6 +.long 0x31a4b2af,0x31a4b2af +.long 0x2a3f2331,0x2a3f2331 +.long 0xc6a59430,0xc6a59430 +.long 0x35a266c0,0x35a266c0 +.long 0x744ebc37,0x744ebc37 +.long 0xfc82caa6,0xfc82caa6 +.long 0xe090d0b0,0xe090d0b0 +.long 0x33a7d815,0x33a7d815 +.long 0xf104984a,0xf104984a +.long 0x41ecdaf7,0x41ecdaf7 +.long 0x7fcd500e,0x7fcd500e +.long 0x1791f62f,0x1791f62f +.long 0x764dd68d,0x764dd68d +.long 0x43efb04d,0x43efb04d +.long 0xccaa4d54,0xccaa4d54 +.long 0xe49604df,0xe49604df +.long 0x9ed1b5e3,0x9ed1b5e3 +.long 0x4c6a881b,0x4c6a881b +.long 0xc12c1fb8,0xc12c1fb8 +.long 0x4665517f,0x4665517f +.long 0x9d5eea04,0x9d5eea04 +.long 0x018c355d,0x018c355d +.long 0xfa877473,0xfa877473 +.long 0xfb0b412e,0xfb0b412e +.long 0xb3671d5a,0xb3671d5a +.long 0x92dbd252,0x92dbd252 +.long 0xe9105633,0xe9105633 +.long 0x6dd64713,0x6dd64713 +.long 0x9ad7618c,0x9ad7618c +.long 0x37a10c7a,0x37a10c7a +.long 0x59f8148e,0x59f8148e +.long 0xeb133c89,0xeb133c89 +.long 0xcea927ee,0xcea927ee +.long 0xb761c935,0xb761c935 +.long 0xe11ce5ed,0xe11ce5ed +.long 0x7a47b13c,0x7a47b13c +.long 0x9cd2df59,0x9cd2df59 +.long 0x55f2733f,0x55f2733f +.long 0x1814ce79,0x1814ce79 +.long 0x73c737bf,0x73c737bf +.long 0x53f7cdea,0x53f7cdea +.long 0x5ffdaa5b,0x5ffdaa5b +.long 0xdf3d6f14,0xdf3d6f14 +.long 0x7844db86,0x7844db86 +.long 0xcaaff381,0xcaaff381 +.long 0xb968c43e,0xb968c43e +.long 0x3824342c,0x3824342c +.long 0xc2a3405f,0xc2a3405f +.long 0x161dc372,0x161dc372 +.long 0xbce2250c,0xbce2250c +.long 0x283c498b,0x283c498b +.long 0xff0d9541,0xff0d9541 +.long 0x39a80171,0x39a80171 +.long 0x080cb3de,0x080cb3de +.long 0xd8b4e49c,0xd8b4e49c +.long 0x6456c190,0x6456c190 +.long 0x7bcb8461,0x7bcb8461 +.long 0xd532b670,0xd532b670 +.long 0x486c5c74,0x486c5c74 +.long 0xd0b85742,0xd0b85742 +# Td4[256] +.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 +.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb +.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 +.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb +.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d +.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e +.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 +.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 +.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 +.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 +.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda +.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 +.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a +.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 +.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 +.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b +.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea +.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 +.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 +.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e +.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 +.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b +.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 +.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 +.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 +.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f +.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d +.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef +.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 +.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 +.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 +.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d +.size AES_Td,.-AES_Td + +# void AES_decrypt(const unsigned char *inp, unsigned char *out, +# const AES_KEY *key) { +.globl AES_decrypt +.type AES_decrypt,@function +AES_decrypt: + l %r0,240(%r4) + lhi %r1,16 + clr %r0,%r1 + jl .Ldsoft + + la %r1,0(%r4) + #la %r2,0(%r2) + la %r4,0(%r3) + lghi %r3,16 # single block length + .long 0xb92e0042 # km %r4,%r2 + brc 1,.-4 # can this happen? + br %r14 +.align 64 +.Ldsoft: + stmg %r3,%r14,3*8(%r15) + + llgf %r8,0(%r2) + llgf %r9,4(%r2) + llgf %r10,8(%r2) + llgf %r11,12(%r2) + + larl %r12,AES_Td + bras %r14,_s390x_AES_decrypt + + lg %r3,3*8(%r15) + st %r8,0(%r3) + st %r9,4(%r3) + st %r10,8(%r3) + st %r11,12(%r3) + + lmg %r6,%r14,6*8(%r15) + br %r14 +.size AES_decrypt,.-AES_decrypt + +.type _s390x_AES_decrypt,@function +.align 16 +_s390x_AES_decrypt: + stg %r14,15*8(%r15) + x %r8,0(%r4) + x %r9,4(%r4) + x %r10,8(%r4) + x %r11,12(%r4) + l %r13,240(%r4) + llill %r0,2040 + aghi %r13,-1 + j .Ldec_loop +.align 16 +.Ldec_loop: + srlg %r1,%r8,13 + srlg %r2,%r8,5 + sllg %r3,%r8,3 + srl %r8,21 + nr %r8,%r0 + nr %r1,%r0 + nr %r2,%r0 + ngr %r3,%r0 + + sllg %r5,%r9,3 # i0 + srlg %r6,%r9,13 + srlg %r7,%r9,5 + srl %r9,21 + ngr %r5,%r0 + nr %r9,%r0 + nr %r6,%r0 + nr %r7,%r0 + + l %r8,0(%r8,%r12) # Td0[s0>>24] + l %r1,3(%r1,%r12) # Td1[s0>>16] + l %r2,2(%r2,%r12) # Td2[s0>>8] + l %r3,1(%r3,%r12) # Td3[s0>>0] + + x %r8,1(%r5,%r12) # Td3[s1>>0] + l %r9,0(%r9,%r12) # Td0[s1>>24] + x %r2,3(%r6,%r12) # Td1[s1>>16] + x %r3,2(%r7,%r12) # Td2[s1>>8] + + srlg %r5,%r10,5 # i0 + sllg %r6,%r10,3 # i1 + srlg %r7,%r10,13 + srl %r10,21 + nr %r5,%r0 + ngr %r6,%r0 + nr %r10,%r0 + nr %r7,%r0 + + xr %r9,%r1 + srlg %r14,%r11,5 # i1 + srlg %r1,%r11,13 # i0 + nr %r14,%r0 + la %r4,16(%r4) + nr %r1,%r0 + + x %r8,2(%r5,%r12) # Td2[s2>>8] + x %r9,1(%r6,%r12) # Td3[s2>>0] + l %r10,0(%r10,%r12) # Td0[s2>>24] + x %r3,3(%r7,%r12) # Td1[s2>>16] + + sllg %r7,%r11,3 # i2 + srl %r11,21 + ngr %r7,%r0 + nr %r11,%r0 + + xr %r10,%r2 + x %r8,0(%r4) + x %r9,4(%r4) + x %r10,8(%r4) + x %r3,12(%r4) + + x %r8,3(%r1,%r12) # Td1[s3>>16] + x %r9,2(%r14,%r12) # Td2[s3>>8] + x %r10,1(%r7,%r12) # Td3[s3>>0] + l %r11,0(%r11,%r12) # Td0[s3>>24] + xr %r11,%r3 + + brct %r13,.Ldec_loop + .align 16 + + l %r1,2048(%r12) # prefetch Td4 + l %r2,2112(%r12) + l %r3,2176(%r12) + l %r5,2240(%r12) + llill %r0,0xff + + srlg %r7,%r8,24 # i0 + srlg %r1,%r8,16 + srlg %r2,%r8,8 + nr %r8,%r0 # i3 + nr %r1,%r0 + + srlg %r5,%r9,24 + nr %r2,%r0 + srlg %r6,%r9,16 + srlg %r14,%r9,8 + nr %r9,%r0 # i0 + nr %r6,%r0 + nr %r14,%r0 + + llgc %r7,2048(%r7,%r12) # Td4[s0>>24] + llgc %r1,2048(%r1,%r12) # Td4[s0>>16] + llgc %r2,2048(%r2,%r12) # Td4[s0>>8] + sll %r1,16 + llgc %r3,2048(%r8,%r12) # Td4[s0>>0] + sllg %r8,%r7,24 + sll %r2,8 + + llgc %r9,2048(%r9,%r12) # Td4[s1>>0] + llgc %r5,2048(%r5,%r12) # Td4[s1>>24] + llgc %r6,2048(%r6,%r12) # Td4[s1>>16] + sll %r5,24 + llgc %r7,2048(%r14,%r12) # Td4[s1>>8] + sll %r6,16 + sll %r7,8 + or %r8,%r9 + or %r1,%r5 + or %r2,%r6 + or %r3,%r7 + + srlg %r5,%r10,8 # i0 + srlg %r6,%r10,24 + srlg %r7,%r10,16 + nr %r10,%r0 # i1 + nr %r5,%r0 + nr %r7,%r0 + llgc %r5,2048(%r5,%r12) # Td4[s2>>8] + llgc %r9,2048(%r10,%r12) # Td4[s2>>0] + llgc %r6,2048(%r6,%r12) # Td4[s2>>24] + llgc %r7,2048(%r7,%r12) # Td4[s2>>16] + sll %r5,8 + sll %r6,24 + or %r8,%r5 + sll %r7,16 + or %r2,%r6 + or %r3,%r7 + + srlg %r5,%r11,16 # i0 + srlg %r6,%r11,8 # i1 + srlg %r7,%r11,24 + nr %r11,%r0 # i2 + nr %r5,%r0 + nr %r6,%r0 + + lg %r14,15*8(%r15) + or %r9,%r1 + l %r0,16(%r4) + l %r1,20(%r4) + + llgc %r5,2048(%r5,%r12) # Td4[s3>>16] + llgc %r6,2048(%r6,%r12) # Td4[s3>>8] + sll %r5,16 + llgc %r10,2048(%r11,%r12) # Td4[s3>>0] + llgc %r11,2048(%r7,%r12) # Td4[s3>>24] + sll %r6,8 + sll %r11,24 + or %r8,%r5 + or %r9,%r6 + or %r10,%r2 + or %r11,%r3 + + xr %r8,%r0 + xr %r9,%r1 + x %r10,24(%r4) + x %r11,28(%r4) + + br %r14 +.size _s390x_AES_decrypt,.-_s390x_AES_decrypt +# void AES_set_encrypt_key(const unsigned char *in, int bits, +# AES_KEY *key) { +.globl AES_set_encrypt_key +.type AES_set_encrypt_key,@function +.align 16 +AES_set_encrypt_key: +_s390x_AES_set_encrypt_key: + lghi %r0,0 + clgr %r2,%r0 + je .Lminus1 + clgr %r4,%r0 + je .Lminus1 + + lghi %r0,128 + clr %r3,%r0 + je .Lproceed + lghi %r0,192 + clr %r3,%r0 + je .Lproceed + lghi %r0,256 + clr %r3,%r0 + je .Lproceed + lghi %r2,-2 + br %r14 + +.align 16 +.Lproceed: + # convert bits to km(c) code, [128,192,256]->[18,19,20] + lhi %r5,-128 + lhi %r0,18 + ar %r5,%r3 + srl %r5,6 + ar %r5,%r0 + + larl %r1,OPENSSL_s390xcap_P + llihh %r0,0x8000 + srlg %r0,%r0,0(%r5) + ng %r0,S390X_KM(%r1) # check availability of both km... + ng %r0,S390X_KMC(%r1) # ...and kmc support for given key length + jz .Lekey_internal + + lmg %r0,%r1,0(%r2) # just copy 128 bits... + stmg %r0,%r1,0(%r4) + lhi %r0,192 + cr %r3,%r0 + jl 1f + lg %r1,16(%r2) + stg %r1,16(%r4) + je 1f + lg %r1,24(%r2) + stg %r1,24(%r4) +1: st %r3,236(%r4) # save bits [for debugging purposes] + lgr %r0,%r5 + st %r5,240(%r4) # save km(c) code + lghi %r2,0 + br %r14 +.align 16 +.Lekey_internal: + stmg %r4,%r13,4*8(%r15) # all non-volatile regs and %r4 + + larl %r12,AES_Te+2048 + + llgf %r8,0(%r2) + llgf %r9,4(%r2) + llgf %r10,8(%r2) + llgf %r11,12(%r2) + st %r8,0(%r4) + st %r9,4(%r4) + st %r10,8(%r4) + st %r11,12(%r4) + lghi %r0,128 + cr %r3,%r0 + jne .Lnot128 + + llill %r0,0xff + lghi %r3,0 # i=0 + lghi %r13,10 + st %r13,240(%r4) + + llgfr %r2,%r11 # temp=rk[3] + srlg %r5,%r11,8 + srlg %r6,%r11,16 + srlg %r7,%r11,24 + nr %r2,%r0 + nr %r5,%r0 + nr %r6,%r0 + +.align 16 +.L128_loop: + la %r2,0(%r2,%r12) + la %r5,0(%r5,%r12) + la %r6,0(%r6,%r12) + la %r7,0(%r7,%r12) + icm %r2,2,0(%r2) # Te4[rk[3]>>0]<<8 + icm %r2,4,0(%r5) # Te4[rk[3]>>8]<<16 + icm %r2,8,0(%r6) # Te4[rk[3]>>16]<<24 + icm %r2,1,0(%r7) # Te4[rk[3]>>24] + x %r2,256(%r3,%r12) # rcon[i] + xr %r8,%r2 # rk[4]=rk[0]^... + xr %r9,%r8 # rk[5]=rk[1]^rk[4] + xr %r10,%r9 # rk[6]=rk[2]^rk[5] + xr %r11,%r10 # rk[7]=rk[3]^rk[6] + + llgfr %r2,%r11 # temp=rk[3] + srlg %r5,%r11,8 + srlg %r6,%r11,16 + nr %r2,%r0 + nr %r5,%r0 + srlg %r7,%r11,24 + nr %r6,%r0 + + st %r8,16(%r4) + st %r9,20(%r4) + st %r10,24(%r4) + st %r11,28(%r4) + la %r4,16(%r4) # key+=4 + la %r3,4(%r3) # i++ + brct %r13,.L128_loop + lghi %r0,10 + lghi %r2,0 + lmg %r4,%r13,4*8(%r15) + br %r14 + +.align 16 +.Lnot128: + llgf %r0,16(%r2) + llgf %r1,20(%r2) + st %r0,16(%r4) + st %r1,20(%r4) + lghi %r0,192 + cr %r3,%r0 + jne .Lnot192 + + llill %r0,0xff + lghi %r3,0 # i=0 + lghi %r13,12 + st %r13,240(%r4) + lghi %r13,8 + + srlg %r5,%r1,8 + srlg %r6,%r1,16 + srlg %r7,%r1,24 + nr %r1,%r0 + nr %r5,%r0 + nr %r6,%r0 + +.align 16 +.L192_loop: + la %r1,0(%r1,%r12) + la %r5,0(%r5,%r12) + la %r6,0(%r6,%r12) + la %r7,0(%r7,%r12) + icm %r1,2,0(%r1) # Te4[rk[5]>>0]<<8 + icm %r1,4,0(%r5) # Te4[rk[5]>>8]<<16 + icm %r1,8,0(%r6) # Te4[rk[5]>>16]<<24 + icm %r1,1,0(%r7) # Te4[rk[5]>>24] + x %r1,256(%r3,%r12) # rcon[i] + xr %r8,%r1 # rk[6]=rk[0]^... + xr %r9,%r8 # rk[7]=rk[1]^rk[6] + xr %r10,%r9 # rk[8]=rk[2]^rk[7] + xr %r11,%r10 # rk[9]=rk[3]^rk[8] + + st %r8,24(%r4) + st %r9,28(%r4) + st %r10,32(%r4) + st %r11,36(%r4) + brct %r13,.L192_continue + lghi %r0,12 + lghi %r2,0 + lmg %r4,%r13,4*8(%r15) + br %r14 + +.align 16 +.L192_continue: + lgr %r1,%r11 + x %r1,16(%r4) # rk[10]=rk[4]^rk[9] + st %r1,40(%r4) + x %r1,20(%r4) # rk[11]=rk[5]^rk[10] + st %r1,44(%r4) + + srlg %r5,%r1,8 + srlg %r6,%r1,16 + srlg %r7,%r1,24 + nr %r1,%r0 + nr %r5,%r0 + nr %r6,%r0 + + la %r4,24(%r4) # key+=6 + la %r3,4(%r3) # i++ + j .L192_loop + +.align 16 +.Lnot192: + llgf %r0,24(%r2) + llgf %r1,28(%r2) + st %r0,24(%r4) + st %r1,28(%r4) + llill %r0,0xff + lghi %r3,0 # i=0 + lghi %r13,14 + st %r13,240(%r4) + lghi %r13,7 + + srlg %r5,%r1,8 + srlg %r6,%r1,16 + srlg %r7,%r1,24 + nr %r1,%r0 + nr %r5,%r0 + nr %r6,%r0 + +.align 16 +.L256_loop: + la %r1,0(%r1,%r12) + la %r5,0(%r5,%r12) + la %r6,0(%r6,%r12) + la %r7,0(%r7,%r12) + icm %r1,2,0(%r1) # Te4[rk[7]>>0]<<8 + icm %r1,4,0(%r5) # Te4[rk[7]>>8]<<16 + icm %r1,8,0(%r6) # Te4[rk[7]>>16]<<24 + icm %r1,1,0(%r7) # Te4[rk[7]>>24] + x %r1,256(%r3,%r12) # rcon[i] + xr %r8,%r1 # rk[8]=rk[0]^... + xr %r9,%r8 # rk[9]=rk[1]^rk[8] + xr %r10,%r9 # rk[10]=rk[2]^rk[9] + xr %r11,%r10 # rk[11]=rk[3]^rk[10] + st %r8,32(%r4) + st %r9,36(%r4) + st %r10,40(%r4) + st %r11,44(%r4) + brct %r13,.L256_continue + lghi %r0,14 + lghi %r2,0 + lmg %r4,%r13,4*8(%r15) + br %r14 + +.align 16 +.L256_continue: + lgr %r1,%r11 # temp=rk[11] + srlg %r5,%r11,8 + srlg %r6,%r11,16 + srlg %r7,%r11,24 + nr %r1,%r0 + nr %r5,%r0 + nr %r6,%r0 + la %r1,0(%r1,%r12) + la %r5,0(%r5,%r12) + la %r6,0(%r6,%r12) + la %r7,0(%r7,%r12) + llgc %r1,0(%r1) # Te4[rk[11]>>0] + icm %r1,2,0(%r5) # Te4[rk[11]>>8]<<8 + icm %r1,4,0(%r6) # Te4[rk[11]>>16]<<16 + icm %r1,8,0(%r7) # Te4[rk[11]>>24]<<24 + x %r1,16(%r4) # rk[12]=rk[4]^... + st %r1,48(%r4) + x %r1,20(%r4) # rk[13]=rk[5]^rk[12] + st %r1,52(%r4) + x %r1,24(%r4) # rk[14]=rk[6]^rk[13] + st %r1,56(%r4) + x %r1,28(%r4) # rk[15]=rk[7]^rk[14] + st %r1,60(%r4) + + srlg %r5,%r1,8 + srlg %r6,%r1,16 + srlg %r7,%r1,24 + nr %r1,%r0 + nr %r5,%r0 + nr %r6,%r0 + + la %r4,32(%r4) # key+=8 + la %r3,4(%r3) # i++ + j .L256_loop + +.Lminus1: + lghi %r2,-1 + br %r14 +.size AES_set_encrypt_key,.-AES_set_encrypt_key + +# void AES_set_decrypt_key(const unsigned char *in, int bits, +# AES_KEY *key) { +.globl AES_set_decrypt_key +.type AES_set_decrypt_key,@function +.align 16 +AES_set_decrypt_key: + #stg %r4,4*8(%r15) # I rely on AES_set_encrypt_key to + stg %r14,14*8(%r15) # save non-volatile registers and %r4! + bras %r14,_s390x_AES_set_encrypt_key + #lg %r4,4*8(%r15) + lg %r14,14*8(%r15) + ltgr %r2,%r2 + bnzr %r14 + #l %r0,240(%r4) + lhi %r1,16 + cr %r0,%r1 + jl .Lgo + oill %r0,S390X_DECRYPT # set "decrypt" bit + st %r0,240(%r4) + br %r14 +.align 16 +.Lgo: lgr %r13,%r0 #llgf %r13,240(%r4) + la %r5,0(%r4) + sllg %r6,%r13,4 + la %r6,0(%r6,%r4) + srl %r13,1 + lghi %r1,-16 + +.align 16 +.Linv: lmg %r8,%r9,0(%r5) + lmg %r10,%r11,0(%r6) + stmg %r8,%r9,0(%r6) + stmg %r10,%r11,0(%r5) + la %r5,16(%r5) + la %r6,0(%r1,%r6) + brct %r13,.Linv + llgf %r13,240(%r4) + aghi %r13,-1 + sll %r13,2 # (rounds-1)*4 + llilh %r5,0x8080 + llilh %r6,0x1b1b + llilh %r7,0xfefe + oill %r5,0x8080 + oill %r6,0x1b1b + oill %r7,0xfefe + +.align 16 +.Lmix: l %r8,16(%r4) # tp1 + lr %r9,%r8 + ngr %r9,%r5 + srlg %r1,%r9,7 + slr %r9,%r1 + nr %r9,%r6 + sllg %r1,%r8,1 + nr %r1,%r7 + xr %r9,%r1 # tp2 + + lr %r10,%r9 + ngr %r10,%r5 + srlg %r1,%r10,7 + slr %r10,%r1 + nr %r10,%r6 + sllg %r1,%r9,1 + nr %r1,%r7 + xr %r10,%r1 # tp4 + + lr %r11,%r10 + ngr %r11,%r5 + srlg %r1,%r11,7 + slr %r11,%r1 + nr %r11,%r6 + sllg %r1,%r10,1 + nr %r1,%r7 + xr %r11,%r1 # tp8 + + xr %r9,%r8 # tp2^tp1 + xr %r10,%r8 # tp4^tp1 + rll %r8,%r8,24 # = ROTATE(tp1,8) + xr %r10,%r11 # ^=tp8 + xr %r8,%r9 # ^=tp2^tp1 + xr %r9,%r11 # tp2^tp1^tp8 + xr %r8,%r10 # ^=tp4^tp1^tp8 + rll %r9,%r9,8 + rll %r10,%r10,16 + xr %r8,%r9 # ^= ROTATE(tp8^tp2^tp1,24) + rll %r11,%r11,24 + xr %r8,%r10 # ^= ROTATE(tp8^tp4^tp1,16) + xr %r8,%r11 # ^= ROTATE(tp8,8) + + st %r8,16(%r4) + la %r4,4(%r4) + brct %r13,.Lmix + + lmg %r6,%r13,6*8(%r15)# as was saved by AES_set_encrypt_key! + lghi %r2,0 + br %r14 +.size AES_set_decrypt_key,.-AES_set_decrypt_key +.globl AES_cbc_encrypt +.type AES_cbc_encrypt,@function +.align 16 +AES_cbc_encrypt: + xgr %r3,%r4 # flip %r3 and %r4, out and len + xgr %r4,%r3 + xgr %r3,%r4 + lhi %r0,16 + cl %r0,240(%r5) + jh .Lcbc_software + + lg %r0,0(%r6) # copy ivec + lg %r1,8(%r6) + stmg %r0,%r1,16(%r15) + lmg %r0,%r1,0(%r5) # copy key, cover 256 bit + stmg %r0,%r1,32(%r15) + lmg %r0,%r1,16(%r5) + stmg %r0,%r1,48(%r15) + l %r0,240(%r5) # load kmc code + lghi %r5,15 # res=len%16, len-=res; + ngr %r5,%r3 + slgr %r3,%r5 + la %r1,16(%r15) # parameter block - ivec || key + jz .Lkmc_truncated + .long 0xb92f0042 # kmc %r4,%r2 + brc 1,.-4 # pay attention to "partial completion" + ltr %r5,%r5 + jnz .Lkmc_truncated +.Lkmc_done: + lmg %r0,%r1,16(%r15) # copy ivec to caller + stg %r0,0(%r6) + stg %r1,8(%r6) + br %r14 +.align 16 +.Lkmc_truncated: + ahi %r5,-1 # it's the way it's encoded in mvc + tmll %r0,S390X_DECRYPT + jnz .Lkmc_truncated_dec + lghi %r1,0 + stg %r1,16*8(%r15) + stg %r1,16*8+8(%r15) + bras %r1,1f + mvc 16*8(1,%r15),0(%r2) +1: ex %r5,0(%r1) + la %r1,16(%r15) # restore parameter block + la %r2,16*8(%r15) + lghi %r3,16 + .long 0xb92f0042 # kmc %r4,%r2 + j .Lkmc_done +.align 16 +.Lkmc_truncated_dec: + stg %r4,4*8(%r15) + la %r4,16*8(%r15) + lghi %r3,16 + .long 0xb92f0042 # kmc %r4,%r2 + lg %r4,4*8(%r15) + bras %r1,2f + mvc 0(1,%r4),16*8(%r15) +2: ex %r5,0(%r1) + j .Lkmc_done +.align 16 +.Lcbc_software: + stmg %r5,%r14,5*8(%r15) + lhi %r0,0 + cl %r0,164(%r15) + je .Lcbc_decrypt + + larl %r12,AES_Te + + llgf %r8,0(%r6) + llgf %r9,4(%r6) + llgf %r10,8(%r6) + llgf %r11,12(%r6) + + lghi %r0,16 + slgr %r3,%r0 + brc 4,.Lcbc_enc_tail # if borrow +.Lcbc_enc_loop: + stmg %r2,%r4,2*8(%r15) + x %r8,0(%r2) + x %r9,4(%r2) + x %r10,8(%r2) + x %r11,12(%r2) + lgr %r4,%r5 + + bras %r14,_s390x_AES_encrypt + + lmg %r2,%r5,2*8(%r15) + st %r8,0(%r4) + st %r9,4(%r4) + st %r10,8(%r4) + st %r11,12(%r4) + + la %r2,16(%r2) + la %r4,16(%r4) + lghi %r0,16 + ltgr %r3,%r3 + jz .Lcbc_enc_done + slgr %r3,%r0 + brc 4,.Lcbc_enc_tail # if borrow + j .Lcbc_enc_loop +.align 16 +.Lcbc_enc_done: + lg %r6,6*8(%r15) + st %r8,0(%r6) + st %r9,4(%r6) + st %r10,8(%r6) + st %r11,12(%r6) + + lmg %r7,%r14,7*8(%r15) + br %r14 + +.align 16 +.Lcbc_enc_tail: + aghi %r3,15 + lghi %r0,0 + stg %r0,16*8(%r15) + stg %r0,16*8+8(%r15) + bras %r1,3f + mvc 16*8(1,%r15),0(%r2) +3: ex %r3,0(%r1) + lghi %r3,0 + la %r2,16*8(%r15) + j .Lcbc_enc_loop + +.align 16 +.Lcbc_decrypt: + larl %r12,AES_Td + + lg %r0,0(%r6) + lg %r1,8(%r6) + stmg %r0,%r1,16*8(%r15) + +.Lcbc_dec_loop: + stmg %r2,%r4,2*8(%r15) + llgf %r8,0(%r2) + llgf %r9,4(%r2) + llgf %r10,8(%r2) + llgf %r11,12(%r2) + lgr %r4,%r5 + + bras %r14,_s390x_AES_decrypt + + lmg %r2,%r5,2*8(%r15) + sllg %r8,%r8,32 + sllg %r10,%r10,32 + lr %r8,%r9 + lr %r10,%r11 + + lg %r0,0(%r2) + lg %r1,8(%r2) + xg %r8,16*8(%r15) + xg %r10,16*8+8(%r15) + lghi %r9,16 + slgr %r3,%r9 + brc 4,.Lcbc_dec_tail # if borrow + brc 2,.Lcbc_dec_done # if zero + stg %r8,0(%r4) + stg %r10,8(%r4) + stmg %r0,%r1,16*8(%r15) + + la %r2,16(%r2) + la %r4,16(%r4) + j .Lcbc_dec_loop + +.Lcbc_dec_done: + stg %r8,0(%r4) + stg %r10,8(%r4) +.Lcbc_dec_exit: + lmg %r6,%r14,6*8(%r15) + stmg %r0,%r1,0(%r6) + + br %r14 + +.align 16 +.Lcbc_dec_tail: + aghi %r3,15 + stg %r8,16*8(%r15) + stg %r10,16*8+8(%r15) + bras %r9,4f + mvc 0(1,%r4),16*8(%r15) +4: ex %r3,0(%r9) + j .Lcbc_dec_exit +.size AES_cbc_encrypt,.-AES_cbc_encrypt +.globl AES_ctr32_encrypt +.type AES_ctr32_encrypt,@function +.align 16 +AES_ctr32_encrypt: + xgr %r3,%r4 # flip %r3 and %r4, %r4 and %r3 + xgr %r4,%r3 + xgr %r3,%r4 + llgfr %r3,%r3 # safe in ctr32 subroutine even in 64-bit case + l %r0,240(%r5) + lhi %r1,16 + clr %r0,%r1 + jl .Lctr32_software + + stg %r10,10*8(%r15) + stg %r11,11*8(%r15) + + clr %r3,%r1 # does work even in 64-bit mode + jle .Lctr32_nokma # kma is slower for <= 16 blocks + + larl %r1,OPENSSL_s390xcap_P + lr %r10,%r0 + llihh %r11,0x8000 + srlg %r11,%r11,0(%r10) + ng %r11,S390X_KMA(%r1) # check kma capability vector + jz .Lctr32_nokma + + lghi %r1,-160-112 + lgr %r11,%r15 + la %r15,0(%r1,%r15) # prepare parameter block + + lhi %r1,0x0600 + sllg %r3,%r3,4 + or %r0,%r1 # set HS and LAAD flags + + stg %r11,0(%r15) # backchain + la %r1,160(%r15) + + lmg %r10,%r11,0(%r5) # copy key + stg %r10,160+80(%r15) + stg %r11,160+88(%r15) + lmg %r10,%r11,16(%r5) + stg %r10,160+96(%r15) + stg %r11,160+104(%r15) + + lmg %r10,%r11,0(%r6) # copy iv + stg %r10,160+64(%r15) + ahi %r11,-1 # kma requires counter-1 + stg %r11,160+72(%r15) + st %r11,160+12(%r15) # copy counter + + lghi %r10,0 # no AAD + lghi %r11,0 + + .long 0xb929a042 # kma %r4,%r10,%r2 + brc 1,.-4 # pay attention to "partial completion" + + stg %r0,160+80(%r15) # wipe key + stg %r0,160+88(%r15) + stg %r0,160+96(%r15) + stg %r0,160+104(%r15) + la %r15,160+112(%r15) + + lmg %r10,%r11,10*8(%r15) + br %r14 + +.align 16 +.Lctr32_nokma: + stmg %r6,%r9,6*8(%r15) + + slgr %r4,%r2 + la %r1,0(%r5) # %r1 is permanent copy of %r5 + lg %r5,0(%r6) # load ivec + lg %r6,8(%r6) + + # prepare and allocate stack frame at the top of 4K page + # with 1K reserved for eventual signal handling + lghi %r8,-1024-256-16# guarantee at least 256-bytes buffer + lghi %r9,-4096 + algr %r8,%r15 + lgr %r7,%r15 + ngr %r8,%r9 # align at page boundary + slgr %r7,%r8 # total buffer size + lgr %r10,%r15 + lghi %r9,1024+16 # sl[g]fi is extended-immediate facility + slgr %r7,%r9 # deduct reservation to get usable buffer size + # buffer size is at lest 256 and at most 3072+256-16 + + la %r15,1024(%r8) # alloca + srlg %r7,%r7,4 # convert bytes to blocks, minimum 16 + stg %r10,0(%r15) # back-chain + stg %r7,8(%r15) + + slgr %r3,%r7 + brc 1,.Lctr32_hw_switch # not zero, no borrow + algr %r7,%r3 # input is shorter than allocated buffer + lghi %r3,0 + stg %r7,8(%r15) + +.Lctr32_hw_switch: +.Lctr32_km_loop: + la %r10,16(%r15) + lgr %r11,%r7 +.Lctr32_km_prepare: + stg %r5,0(%r10) + stg %r6,8(%r10) + la %r10,16(%r10) + ahi %r6,1 # 32-bit increment, preserves upper half + brct %r11,.Lctr32_km_prepare + + la %r8,16(%r15) # inp + sllg %r9,%r7,4 # len + la %r10,16(%r15) # out + .long 0xb92e00a8 # km %r10,%r8 + brc 1,.-4 # pay attention to "partial completion" + + la %r10,16(%r15) + lgr %r11,%r7 + slgr %r10,%r2 +.Lctr32_km_xor: + lg %r8,0(%r2) + lg %r9,8(%r2) + xg %r8,0(%r10,%r2) + xg %r9,8(%r10,%r2) + stg %r8,0(%r4,%r2) + stg %r9,8(%r4,%r2) + la %r2,16(%r2) + brct %r11,.Lctr32_km_xor + + slgr %r3,%r7 + brc 1,.Lctr32_km_loop # not zero, no borrow + algr %r7,%r3 + lghi %r3,0 + brc 4+1,.Lctr32_km_loop # not zero + + lg %r8,0(%r15) + lg %r9,8(%r15) + la %r10,16(%r15) +.Lctr32_km_zap: + stg %r8,0(%r10) + stg %r8,8(%r10) + la %r10,16(%r10) + brct %r9,.Lctr32_km_zap + + la %r15,0(%r8) + lmg %r6,%r11,6*8(%r15) + br %r14 +.align 16 +.Lctr32_software: + stmg %r5,%r14,5*8(%r15) + slgr %r2,%r4 + larl %r12,AES_Te + llgf %r1,12(%r6) + +.Lctr32_loop: + stmg %r2,%r4,2*8(%r15) + llgf %r8,0(%r6) + llgf %r9,4(%r6) + llgf %r10,8(%r6) + lgr %r11,%r1 + st %r1,16*8(%r15) + lgr %r4,%r5 + + bras %r14,_s390x_AES_encrypt + + lmg %r2,%r6,2*8(%r15) + llgf %r1,16*8(%r15) + x %r8,0(%r2,%r4) + x %r9,4(%r2,%r4) + x %r10,8(%r2,%r4) + x %r11,12(%r2,%r4) + stm %r8,%r11,0(%r4) + + la %r4,16(%r4) + ahi %r1,1 # 32-bit increment + brct %r3,.Lctr32_loop + + lmg %r6,%r14,6*8(%r15) + br %r14 +.size AES_ctr32_encrypt,.-AES_ctr32_encrypt +.type _s390x_xts_km,@function +.align 16 +_s390x_xts_km: + llgfr %r8,%r0 # put aside the function code + lghi %r9,0x7f + nr %r9,%r0 + larl %r1,OPENSSL_s390xcap_P + llihh %r0,0x8000 + srlg %r0,%r0,32(%r9) # check for 32+function code + ng %r0,S390X_KM(%r1) # check km capability vector + lgr %r0,%r8 # restore the function code + la %r1,0(%r5) # restore %r5 + jz .Lxts_km_vanilla + + lmg %r6,%r7,144(%r15) # put aside the tweak value + algr %r4,%r2 + + oill %r0,32 # switch to xts function code + aghi %r9,-18 # + sllg %r9,%r9,3 # (function code - 18)*8, 0 or 16 + la %r1,144-16(%r15) + slgr %r1,%r9 # parameter block position + lmg %r8,%r11,0(%r5) # load 256 bits of key material, + stmg %r8,%r11,0(%r1) # and copy it to parameter block. + # yes, it contains junk and overlaps + # with the tweak in 128-bit case. + # it's done to avoid conditional + # branch. + stmg %r6,%r7,144(%r15) # "re-seat" the tweak value + + .long 0xb92e0042 # km %r4,%r2 + brc 1,.-4 # pay attention to "partial completion" + + lrvg %r8,144+0(%r15) # load the last tweak + lrvg %r9,144+8(%r15) + stmg %r0,%r3,144-32(%r15) # wipe copy of the key + + nill %r0,0xffdf # switch back to original function code + la %r1,0(%r5) # restore pointer to %r5 + slgr %r4,%r2 + + llgc %r3,2*8-1(%r15) + nill %r3,0x0f # %r3%=16 + br %r14 + +.align 16 +.Lxts_km_vanilla: + # prepare and allocate stack frame at the top of 4K page + # with 1K reserved for eventual signal handling + lghi %r8,-1024-256-16# guarantee at least 256-bytes buffer + lghi %r9,-4096 + algr %r8,%r15 + lgr %r7,%r15 + ngr %r8,%r9 # align at page boundary + slgr %r7,%r8 # total buffer size + lgr %r10,%r15 + lghi %r9,1024+16 # sl[g]fi is extended-immediate facility + slgr %r7,%r9 # deduct reservation to get usable buffer size + # buffer size is at lest 256 and at most 3072+256-16 + + la %r15,1024(%r8) # alloca + nill %r7,0xfff0 # round to 16*n + stg %r10,0(%r15) # back-chain + nill %r3,0xfff0 # redundant + stg %r7,8(%r15) + + slgr %r3,%r7 + brc 1,.Lxts_km_go # not zero, no borrow + algr %r7,%r3 # input is shorter than allocated buffer + lghi %r3,0 + stg %r7,8(%r15) + +.Lxts_km_go: + lrvg %r8,144+0(%r10) # load the tweak value in little-endian + lrvg %r9,144+8(%r10) + + la %r10,16(%r15) # vector of ascending tweak values + slgr %r10,%r2 + srlg %r11,%r7,4 + j .Lxts_km_start + +.Lxts_km_loop: + la %r10,16(%r15) + slgr %r10,%r2 + srlg %r11,%r7,4 +.Lxts_km_prepare: + lghi %r5,0x87 + srag %r6,%r9,63 # broadcast upper bit + ngr %r5,%r6 # rem + algr %r8,%r8 + alcgr %r9,%r9 + xgr %r8,%r5 +.Lxts_km_start: + lrvgr %r5,%r8 # flip byte order + lrvgr %r6,%r9 + stg %r5,0(%r10,%r2) + stg %r6,8(%r10,%r2) + xg %r5,0(%r2) + xg %r6,8(%r2) + stg %r5,0(%r4,%r2) + stg %r6,8(%r4,%r2) + la %r2,16(%r2) + brct %r11,.Lxts_km_prepare + + slgr %r2,%r7 # rewind %r2 + la %r10,0(%r4,%r2) + lgr %r11,%r7 + .long 0xb92e00aa # km %r10,%r10 + brc 1,.-4 # pay attention to "partial completion" + + la %r10,16(%r15) + slgr %r10,%r2 + srlg %r11,%r7,4 +.Lxts_km_xor: + lg %r5,0(%r4,%r2) + lg %r6,8(%r4,%r2) + xg %r5,0(%r10,%r2) + xg %r6,8(%r10,%r2) + stg %r5,0(%r4,%r2) + stg %r6,8(%r4,%r2) + la %r2,16(%r2) + brct %r11,.Lxts_km_xor + + slgr %r3,%r7 + brc 1,.Lxts_km_loop # not zero, no borrow + algr %r7,%r3 + lghi %r3,0 + brc 4+1,.Lxts_km_loop # not zero + + lg %r5,0(%r15) # back-chain + llgf %r7,12(%r15) # bytes used + la %r6,16(%r15) + srlg %r7,%r7,4 +.Lxts_km_zap: + stg %r5,0(%r6) + stg %r5,8(%r6) + la %r6,16(%r6) + brct %r7,.Lxts_km_zap + + la %r15,0(%r5) + llgc %r3,2*8-1(%r5) + nill %r3,0x0f # %r3%=16 + bzr %r14 + + # generate one more tweak... + lghi %r5,0x87 + srag %r6,%r9,63 # broadcast upper bit + ngr %r5,%r6 # rem + algr %r8,%r8 + alcgr %r9,%r9 + xgr %r8,%r5 + + ltr %r3,%r3 # clear zero flag + br %r14 +.size _s390x_xts_km,.-_s390x_xts_km + +.globl AES_xts_encrypt +.type AES_xts_encrypt,@function +.align 16 +AES_xts_encrypt: + xgr %r3,%r4 # flip %r3 and %r4, %r4 and %r3 + xgr %r4,%r3 + xgr %r3,%r4 + stg %r3,1*8(%r15) # save copy of %r3 + srag %r3,%r3,4 # formally wrong, because it expands + # sign byte, but who can afford asking + # to process more than 2^63-1 bytes? + # I use it, because it sets condition + # code... + bcr 8,%r14 # abort if zero (i.e. less than 16) + llgf %r0,240(%r6) + lhi %r1,16 + clr %r0,%r1 + jl .Lxts_enc_software + + stg %r14,5*8(%r15) + stmg %r6,%r11,6*8(%r15) + + sllg %r3,%r3,4 # %r3&=~15 + slgr %r4,%r2 + + # generate the tweak value + lg %r11,160(%r15) # pointer to iv + la %r10,144(%r15) + lmg %r8,%r9,0(%r11) + lghi %r11,16 + stmg %r8,%r9,0(%r10) + la %r1,0(%r6) # %r6 is not needed anymore + .long 0xb92e00aa # km %r10,%r10, generate the tweak + brc 1,.-4 # can this happen? + + l %r0,240(%r5) + la %r1,0(%r5) # %r5 is not needed anymore + bras %r14,_s390x_xts_km + jz .Lxts_enc_km_done + + aghi %r2,-16 # take one step back + la %r7,0(%r4,%r2) # put aside real %r4 +.Lxts_enc_km_steal: + llgc %r5,16(%r2) + llgc %r6,0(%r4,%r2) + stc %r5,0(%r4,%r2) + stc %r6,16(%r4,%r2) + la %r2,1(%r2) + brct %r3,.Lxts_enc_km_steal + + la %r10,0(%r7) + lghi %r11,16 + lrvgr %r5,%r8 # flip byte order + lrvgr %r6,%r9 + xg %r5,0(%r10) + xg %r6,8(%r10) + stg %r5,0(%r10) + stg %r6,8(%r10) + .long 0xb92e00aa # km %r10,%r10 + brc 1,.-4 # can this happen? + lrvgr %r5,%r8 # flip byte order + lrvgr %r6,%r9 + xg %r5,0(%r7) + xg %r6,8(%r7) + stg %r5,0(%r7) + stg %r6,8(%r7) + +.Lxts_enc_km_done: + stg %r15,144+0(%r15) # wipe tweak + stg %r15,144+8(%r15) + lg %r14,5*8(%r15) + lmg %r6,%r11,6*8(%r15) + br %r14 +.align 16 +.Lxts_enc_software: + stmg %r6,%r14,6*8(%r15) + + slgr %r4,%r2 + + lg %r11,160(%r15) # ivp + llgf %r8,0(%r11) # load iv + llgf %r9,4(%r11) + llgf %r10,8(%r11) + llgf %r11,12(%r11) + stmg %r2,%r5,2*8(%r15) + la %r4,0(%r6) + larl %r12,AES_Te + bras %r14,_s390x_AES_encrypt # generate the tweak + lmg %r2,%r5,2*8(%r15) + stm %r8,%r11,144(%r15) # save the tweak + j .Lxts_enc_enter + +.align 16 +.Lxts_enc_loop: + lrvg %r9,144+0(%r15) # load the tweak in little-endian + lrvg %r11,144+8(%r15) + lghi %r1,0x87 + srag %r0,%r11,63 # broadcast upper bit + ngr %r1,%r0 # rem + algr %r9,%r9 + alcgr %r11,%r11 + xgr %r9,%r1 + lrvgr %r9,%r9 # flip byte order + lrvgr %r11,%r11 + srlg %r8,%r9,32 # smash the tweak to 4x32-bits + stg %r9,144+0(%r15) # save the tweak + llgfr %r9,%r9 + srlg %r10,%r11,32 + stg %r11,144+8(%r15) + llgfr %r11,%r11 + la %r2,16(%r2) # %r2+=16 +.Lxts_enc_enter: + x %r8,0(%r2) # ^=*(%r2) + x %r9,4(%r2) + x %r10,8(%r2) + x %r11,12(%r2) + stmg %r2,%r3,2*8(%r15) # only two registers are changing + la %r4,0(%r5) + bras %r14,_s390x_AES_encrypt + lmg %r2,%r5,2*8(%r15) + x %r8,144+0(%r15) # ^=tweak + x %r9,144+4(%r15) + x %r10,144+8(%r15) + x %r11,144+12(%r15) + st %r8,0(%r4,%r2) + st %r9,4(%r4,%r2) + st %r10,8(%r4,%r2) + st %r11,12(%r4,%r2) + brctg %r3,.Lxts_enc_loop + + llgc %r3,15(%r15) + nill %r3,0x0f # %r3%16 + jz .Lxts_enc_done + + la %r7,0(%r2,%r4) # put aside real %r4 +.Lxts_enc_steal: + llgc %r0,16(%r2) + llgc %r1,0(%r4,%r2) + stc %r0,0(%r4,%r2) + stc %r1,16(%r4,%r2) + la %r2,1(%r2) + brct %r3,.Lxts_enc_steal + la %r4,0(%r7) # restore real %r4 + + # generate last tweak... + lrvg %r9,144+0(%r15) # load the tweak in little-endian + lrvg %r11,144+8(%r15) + lghi %r1,0x87 + srag %r0,%r11,63 # broadcast upper bit + ngr %r1,%r0 # rem + algr %r9,%r9 + alcgr %r11,%r11 + xgr %r9,%r1 + lrvgr %r9,%r9 # flip byte order + lrvgr %r11,%r11 + srlg %r8,%r9,32 # smash the tweak to 4x32-bits + stg %r9,144+0(%r15) # save the tweak + llgfr %r9,%r9 + srlg %r10,%r11,32 + stg %r11,144+8(%r15) + llgfr %r11,%r11 + + x %r8,0(%r4) # ^=*(inp)|stolen cipther-text + x %r9,4(%r4) + x %r10,8(%r4) + x %r11,12(%r4) + stg %r4,4*8(%r15) + la %r4,0(%r5) + bras %r14,_s390x_AES_encrypt + lg %r4,4*8(%r15) + x %r8,144(%r15) # ^=tweak + x %r9,148(%r15) + x %r10,152(%r15) + x %r11,156(%r15) + st %r8,0(%r4) + st %r9,4(%r4) + st %r10,8(%r4) + st %r11,12(%r4) + +.Lxts_enc_done: + stg %r15,144+0(%r15) # wipe tweak + stg %r15,+8(%r15) + lmg %r6,%r14,6*8(%r15) + br %r14 +.size AES_xts_encrypt,.-AES_xts_encrypt +.globl AES_xts_decrypt +.type AES_xts_decrypt,@function +.align 16 +AES_xts_decrypt: + xgr %r3,%r4 # flip %r3 and %r4, %r4 and %r3 + xgr %r4,%r3 + xgr %r3,%r4 + stg %r3,1*8(%r15) # save copy of %r3 + aghi %r3,-16 + bcr 4,%r14 # abort if less than zero. formally + # wrong, because %r3 is unsigned, + # but who can afford asking to + # process more than 2^63-1 bytes? + tmll %r3,0x0f + jnz .Lxts_dec_proceed + aghi %r3,16 +.Lxts_dec_proceed: + llgf %r0,240(%r6) + lhi %r1,16 + clr %r0,%r1 + jl .Lxts_dec_software + + stg %r14,5*8(%r15) + stmg %r6,%r11,6*8(%r15) + + nill %r3,0xfff0 # %r3&=~15 + slgr %r4,%r2 + + # generate the tweak value + lg %r11,160(%r15) # pointer to iv + la %r10,144(%r15) + lmg %r8,%r9,0(%r11) + lghi %r11,16 + stmg %r8,%r9,0(%r10) + la %r1,0(%r6) # %r6 is not needed past this point + .long 0xb92e00aa # km %r10,%r10, generate the tweak + brc 1,.-4 # can this happen? + + l %r0,240(%r5) + la %r1,0(%r5) # %r5 is not needed anymore + + ltgr %r3,%r3 + jz .Lxts_dec_km_short + bras %r14,_s390x_xts_km + jz .Lxts_dec_km_done + + lrvgr %r10,%r8 # make copy in reverse byte order + lrvgr %r11,%r9 + j .Lxts_dec_km_2ndtweak + +.Lxts_dec_km_short: + llgc %r3,15(%r15) + nill %r3,0x0f # %r3%=16 + lrvg %r8,144+0(%r15) # load the tweak + lrvg %r9,144+8(%r15) + lrvgr %r10,%r8 # make copy in reverse byte order + lrvgr %r11,%r9 + +.Lxts_dec_km_2ndtweak: + lghi %r5,0x87 + srag %r6,%r9,63 # broadcast upper bit + ngr %r5,%r6 # rem + algr %r8,%r8 + alcgr %r9,%r9 + xgr %r8,%r5 + lrvgr %r5,%r8 # flip byte order + lrvgr %r6,%r9 + + xg %r5,0(%r2) + xg %r6,8(%r2) + stg %r5,0(%r4,%r2) + stg %r6,8(%r4,%r2) + la %r6,0(%r4,%r2) + lghi %r7,16 + .long 0xb92e0066 # km %r6,%r6 + brc 1,.-4 # can this happen? + lrvgr %r5,%r8 + lrvgr %r6,%r9 + xg %r5,0(%r4,%r2) + xg %r6,8(%r4,%r2) + stg %r5,0(%r4,%r2) + stg %r6,8(%r4,%r2) + + la %r7,0(%r4,%r2) # put aside real %r4 +.Lxts_dec_km_steal: + llgc %r5,16(%r2) + llgc %r6,0(%r4,%r2) + stc %r5,0(%r4,%r2) + stc %r6,16(%r4,%r2) + la %r2,1(%r2) + brct %r3,.Lxts_dec_km_steal + + lgr %r8,%r10 + lgr %r9,%r11 + xg %r8,0(%r7) + xg %r9,8(%r7) + stg %r8,0(%r7) + stg %r9,8(%r7) + la %r8,0(%r7) + lghi %r9,16 + .long 0xb92e0088 # km %r8,%r8 + brc 1,.-4 # can this happen? + xg %r10,0(%r7) + xg %r11,8(%r7) + stg %r10,0(%r7) + stg %r11,8(%r7) +.Lxts_dec_km_done: + stg %r15,144+0(%r15) # wipe tweak + stg %r15,144+8(%r15) + lg %r14,5*8(%r15) + lmg %r6,%r11,6*8(%r15) + br %r14 +.align 16 +.Lxts_dec_software: + stmg %r6,%r14,6*8(%r15) + + srlg %r3,%r3,4 + slgr %r4,%r2 + + lg %r11,160(%r15) # ivp + llgf %r8,0(%r11) # load iv + llgf %r9,4(%r11) + llgf %r10,8(%r11) + llgf %r11,12(%r11) + stmg %r2,%r5,2*8(%r15) + la %r4,0(%r6) + larl %r12,AES_Te + bras %r14,_s390x_AES_encrypt # generate the tweak + lmg %r2,%r5,2*8(%r15) + larl %r12,AES_Td + ltgr %r3,%r3 + stm %r8,%r11,144(%r15) # save the tweak + jz .Lxts_dec_short + j .Lxts_dec_enter + +.align 16 +.Lxts_dec_loop: + lrvg %r9,144+0(%r15) # load the tweak in little-endian + lrvg %r11,144+8(%r15) + lghi %r1,0x87 + srag %r0,%r11,63 # broadcast upper bit + ngr %r1,%r0 # rem + algr %r9,%r9 + alcgr %r11,%r11 + xgr %r9,%r1 + lrvgr %r9,%r9 # flip byte order + lrvgr %r11,%r11 + srlg %r8,%r9,32 # smash the tweak to 4x32-bits + stg %r9,144+0(%r15) # save the tweak + llgfr %r9,%r9 + srlg %r10,%r11,32 + stg %r11,144+8(%r15) + llgfr %r11,%r11 +.Lxts_dec_enter: + x %r8,0(%r2) # tweak^=*(inp) + x %r9,4(%r2) + x %r10,8(%r2) + x %r11,12(%r2) + stmg %r2,%r3,2*8(%r15) # only two registers are changing + la %r4,0(%r5) + bras %r14,_s390x_AES_decrypt + lmg %r2,%r5,2*8(%r15) + x %r8,144+0(%r15) # ^=tweak + x %r9,144+4(%r15) + x %r10,144+8(%r15) + x %r11,144+12(%r15) + st %r8,0(%r4,%r2) + st %r9,4(%r4,%r2) + st %r10,8(%r4,%r2) + st %r11,12(%r4,%r2) + la %r2,16(%r2) + brctg %r3,.Lxts_dec_loop + + llgc %r3,15(%r15) + nill %r3,0x0f # %r3%16 + jz .Lxts_dec_done + + # generate pair of tweaks... + lrvg %r9,144+0(%r15) # load the tweak in little-endian + lrvg %r11,144+8(%r15) + lghi %r1,0x87 + srag %r0,%r11,63 # broadcast upper bit + ngr %r1,%r0 # rem + algr %r9,%r9 + alcgr %r11,%r11 + xgr %r9,%r1 + lrvgr %r6,%r9 # flip byte order + lrvgr %r7,%r11 + stmg %r6,%r7,144(%r15) # save the 1st tweak + j .Lxts_dec_2ndtweak + +.align 16 +.Lxts_dec_short: + llgc %r3,15(%r15) + nill %r3,0x0f # %r3%16 + lrvg %r9,144+0(%r15) # load the tweak in little-endian + lrvg %r11,144+8(%r15) +.Lxts_dec_2ndtweak: + lghi %r1,0x87 + srag %r0,%r11,63 # broadcast upper bit + ngr %r1,%r0 # rem + algr %r9,%r9 + alcgr %r11,%r11 + xgr %r9,%r1 + lrvgr %r9,%r9 # flip byte order + lrvgr %r11,%r11 + srlg %r8,%r9,32 # smash the tweak to 4x32-bits + stg %r9,144-16+0(%r15) # save the 2nd tweak + llgfr %r9,%r9 + srlg %r10,%r11,32 + stg %r11,144-16+8(%r15) + llgfr %r11,%r11 + + x %r8,0(%r2) # tweak_the_2nd^=*(inp) + x %r9,4(%r2) + x %r10,8(%r2) + x %r11,12(%r2) + stmg %r2,%r3,2*8(%r15) + la %r4,0(%r5) + bras %r14,_s390x_AES_decrypt + lmg %r2,%r5,2*8(%r15) + x %r8,144-16+0(%r15) # ^=tweak_the_2nd + x %r9,144-16+4(%r15) + x %r10,144-16+8(%r15) + x %r11,144-16+12(%r15) + st %r8,0(%r4,%r2) + st %r9,4(%r4,%r2) + st %r10,8(%r4,%r2) + st %r11,12(%r4,%r2) + + la %r7,0(%r4,%r2) # put aside real %r4 +.Lxts_dec_steal: + llgc %r0,16(%r2) + llgc %r1,0(%r4,%r2) + stc %r0,0(%r4,%r2) + stc %r1,16(%r4,%r2) + la %r2,1(%r2) + brct %r3,.Lxts_dec_steal + la %r4,0(%r7) # restore real %r4 + + lm %r8,%r11,144(%r15) # load the 1st tweak + x %r8,0(%r4) # tweak^=*(inp)|stolen cipher-text + x %r9,4(%r4) + x %r10,8(%r4) + x %r11,12(%r4) + stg %r4,4*8(%r15) + la %r4,0(%r5) + bras %r14,_s390x_AES_decrypt + lg %r4,4*8(%r15) + x %r8,144+0(%r15) # ^=tweak + x %r9,144+4(%r15) + x %r10,144+8(%r15) + x %r11,144+12(%r15) + st %r8,0(%r4) + st %r9,4(%r4) + st %r10,8(%r4) + st %r11,12(%r4) + stg %r15,144-16+0(%r15) # wipe 2nd tweak + stg %r15,144-16+8(%r15) +.Lxts_dec_done: + stg %r15,144+0(%r15) # wipe tweak + stg %r15,+8(%r15) + lmg %r6,%r14,6*8(%r15) + br %r14 +.size AES_xts_decrypt,.-AES_xts_decrypt +.string "AES for s390x, CRYPTOGAMS by <appro@openssl.org>" diff --git a/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/bn/s390x-gf2m.s b/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/bn/s390x-gf2m.s new file mode 100644 index 0000000000..e0b0822cae --- /dev/null +++ b/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/bn/s390x-gf2m.s @@ -0,0 +1,217 @@ +.text + +.type _mul_1x1,@function +.align 16 +_mul_1x1: + lgr %r6,%r3 + sllg %r7,%r3,1 + sllg %r8,%r3,2 + sllg %r9,%r3,3 + + srag %r3,%r6,63 # broadcast 63rd bit + nihh %r6,0x1fff + srag %r12,%r7,63 # broadcast 62nd bit + nihh %r7,0x3fff + srag %r13,%r8,63 # broadcast 61st bit + nihh %r8,0x7fff + ngr %r3,%r5 + ngr %r12,%r5 + ngr %r13,%r5 + + lghi %r0,0 + lgr %r10,%r6 + stg %r0,160(%r15) # tab[0]=0 + xgr %r10,%r7 + stg %r6,168(%r15) # tab[1]=a1 + lgr %r11,%r8 + stg %r7,176(%r15) # tab[2]=a2 + xgr %r11,%r9 + stg %r10,184(%r15) # tab[3]=a1^a2 + xgr %r6,%r8 + + stg %r8,192(%r15) # tab[4]=a4 + xgr %r7,%r8 + stg %r6,200(%r15) # tab[5]=a1^a4 + xgr %r10,%r8 + stg %r7,208(%r15) # tab[6]=a2^a4 + xgr %r6,%r11 + stg %r10,216(%r15) # tab[7]=a1^a2^a4 + xgr %r7,%r11 + + stg %r9,224(%r15) # tab[8]=a8 + xgr %r10,%r11 + stg %r6,232(%r15) # tab[9]=a1^a8 + xgr %r6,%r8 + stg %r7,240(%r15) # tab[10]=a2^a8 + xgr %r7,%r8 + stg %r10,248(%r15) # tab[11]=a1^a2^a8 + + xgr %r10,%r8 + stg %r11,256(%r15) # tab[12]=a4^a8 + srlg %r4,%r3,1 + stg %r6,264(%r15) # tab[13]=a1^a4^a8 + sllg %r3,%r3,63 + stg %r7,272(%r15) # tab[14]=a2^a4^a8 + srlg %r0,%r12,2 + stg %r10,280(%r15) # tab[15]=a1^a2^a4^a8 + + lghi %r9,120 + sllg %r6,%r12,62 + sllg %r12,%r5,3 + srlg %r1,%r13,3 + ngr %r12,%r9 + sllg %r7,%r13,61 + srlg %r13,%r5,4-3 + xgr %r4,%r0 + ngr %r13,%r9 + xgr %r3,%r6 + xgr %r4,%r1 + xgr %r3,%r7 + + xg %r3,160(%r12,%r15) + srlg %r12,%r5,8-3 + ngr %r12,%r9 + lg %r1,160(%r13,%r15) + srlg %r13,%r5,12-3 + sllg %r0,%r1,4 + ngr %r13,%r9 + srlg %r1,%r1,60 + xgr %r3,%r0 + xgr %r4,%r1 + lg %r0,160(%r12,%r15) + srlg %r12,%r5,16-3 + sllg %r1,%r0,8 + ngr %r12,%r9 + srlg %r0,%r0,56 + xgr %r3,%r1 + xgr %r4,%r0 + lg %r1,160(%r13,%r15) + srlg %r13,%r5,20-3 + sllg %r0,%r1,12 + ngr %r13,%r9 + srlg %r1,%r1,52 + xgr %r3,%r0 + xgr %r4,%r1 + lg %r0,160(%r12,%r15) + srlg %r12,%r5,24-3 + sllg %r1,%r0,16 + ngr %r12,%r9 + srlg %r0,%r0,48 + xgr %r3,%r1 + xgr %r4,%r0 + lg %r1,160(%r13,%r15) + srlg %r13,%r5,28-3 + sllg %r0,%r1,20 + ngr %r13,%r9 + srlg %r1,%r1,44 + xgr %r3,%r0 + xgr %r4,%r1 + lg %r0,160(%r12,%r15) + srlg %r12,%r5,32-3 + sllg %r1,%r0,24 + ngr %r12,%r9 + srlg %r0,%r0,40 + xgr %r3,%r1 + xgr %r4,%r0 + lg %r1,160(%r13,%r15) + srlg %r13,%r5,36-3 + sllg %r0,%r1,28 + ngr %r13,%r9 + srlg %r1,%r1,36 + xgr %r3,%r0 + xgr %r4,%r1 + lg %r0,160(%r12,%r15) + srlg %r12,%r5,40-3 + sllg %r1,%r0,32 + ngr %r12,%r9 + srlg %r0,%r0,32 + xgr %r3,%r1 + xgr %r4,%r0 + lg %r1,160(%r13,%r15) + srlg %r13,%r5,44-3 + sllg %r0,%r1,36 + ngr %r13,%r9 + srlg %r1,%r1,28 + xgr %r3,%r0 + xgr %r4,%r1 + lg %r0,160(%r12,%r15) + srlg %r12,%r5,48-3 + sllg %r1,%r0,40 + ngr %r12,%r9 + srlg %r0,%r0,24 + xgr %r3,%r1 + xgr %r4,%r0 + lg %r1,160(%r13,%r15) + srlg %r13,%r5,52-3 + sllg %r0,%r1,44 + ngr %r13,%r9 + srlg %r1,%r1,20 + xgr %r3,%r0 + xgr %r4,%r1 + lg %r0,160(%r12,%r15) + srlg %r12,%r5,56-3 + sllg %r1,%r0,48 + ngr %r12,%r9 + srlg %r0,%r0,16 + xgr %r3,%r1 + xgr %r4,%r0 + lg %r1,160(%r13,%r15) + srlg %r13,%r5,60-3 + sllg %r0,%r1,52 + ngr %r13,%r9 + srlg %r1,%r1,12 + xgr %r3,%r0 + xgr %r4,%r1 + lg %r0,160(%r12,%r15) + sllg %r1,%r0,56 + srlg %r0,%r0,8 + xgr %r3,%r1 + xgr %r4,%r0 + + lg %r1,160(%r13,%r15) + sllg %r0,%r1,60 + srlg %r1,%r1,4 + xgr %r3,%r0 + xgr %r4,%r1 + + br %r14 +.size _mul_1x1,.-_mul_1x1 + +.globl bn_GF2m_mul_2x2 +.type bn_GF2m_mul_2x2,@function +.align 16 +bn_GF2m_mul_2x2: + stmg %r3,%r15,3*8(%r15) + + lghi %r1,-160-128 + la %r0,0(%r15) + la %r15,0(%r1,%r15) # alloca + stg %r0,0(%r15) # back chain + bras %r14,_mul_1x1 # a1·b1 + stmg %r3,%r4,16(%r2) + + lg %r3,320(%r15) + lg %r5,336(%r15) + bras %r14,_mul_1x1 # a0·b0 + stmg %r3,%r4,0(%r2) + + lg %r3,312(%r15) + lg %r5,328(%r15) + xg %r3,320(%r15) + xg %r5,336(%r15) + bras %r14,_mul_1x1 # (a0+a1)·(b0+b1) + lmg %r6,%r9,0(%r2) + + xgr %r3,%r4 + xgr %r4,%r7 + xgr %r3,%r6 + xgr %r4,%r8 + xgr %r3,%r9 + xgr %r4,%r9 + xgr %r3,%r4 + stg %r4,16(%r2) + stg %r3,8(%r2) + lmg %r6,%r15,336(%r15) + br %r14 +.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 +.string "GF(2^m) Multiplication for s390x, CRYPTOGAMS by <appro@openssl.org>" diff --git a/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/bn/s390x-mont.S b/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/bn/s390x-mont.S new file mode 100644 index 0000000000..b8dea0a66f --- /dev/null +++ b/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/bn/s390x-mont.S @@ -0,0 +1,176 @@ +.text +.globl bn_mul_mont +.type bn_mul_mont,@function +bn_mul_mont: + lgf %r1,164(%r15) # pull %r1 + sla %r1,3 # %r1 to enumerate bytes + la %r4,0(%r1,%r4) + + stg %r2,2*8(%r15) + + cghi %r1,16 # + lghi %r2,0 # + blr %r14 # if(%r1<16) return 0; + cghi %r1,96 # + bhr %r14 # if(%r1>96) return 0; + stmg %r3,%r15,3*8(%r15) + + lghi %r2,-160-8 # leave room for carry bit + lcgr %r7,%r1 # -%r1 + lgr %r0,%r15 + la %r2,0(%r2,%r15) + la %r15,0(%r7,%r2) # alloca + stg %r0,0(%r15) # back chain + + sra %r1,3 # restore %r1 + la %r4,0(%r7,%r4) # restore %r4 + ahi %r1,-1 # adjust %r1 for inner loop + lg %r6,0(%r6) # pull n0 + + + lg %r2,0(%r4) + + lg %r9,0(%r3) + + mlgr %r8,%r2 # ap[0]*bp[0] + lgr %r12,%r8 + + lgr %r0,%r9 # "tp[0]"*n0 + msgr %r0,%r6 + + lg %r11,0(%r5) # + + mlgr %r10,%r0 # np[0]*m1 + algr %r11,%r9 # +="tp[0]" + lghi %r13,0 + alcgr %r13,%r10 + + la %r7,8(%r0) # j=1 + lr %r14,%r1 + +.align 16 +.L1st: + lg %r9,0(%r7,%r3) + + mlgr %r8,%r2 # ap[j]*bp[0] + algr %r9,%r12 + lghi %r12,0 + alcgr %r12,%r8 + + lg %r11,0(%r7,%r5) + + mlgr %r10,%r0 # np[j]*m1 + algr %r11,%r13 + lghi %r13,0 + alcgr %r10,%r13 # +="tp[j]" + algr %r11,%r9 + alcgr %r13,%r10 + + stg %r11,160-8(%r7,%r15) # tp[j-1]= + la %r7,8(%r7) # j++ + brct %r14,.L1st + + algr %r13,%r12 + lghi %r12,0 + alcgr %r12,%r12 # upmost overflow bit + stg %r13,160-8(%r7,%r15) + stg %r12,160(%r7,%r15) + la %r4,8(%r4) # bp++ + +.Louter: + lg %r2,0(%r4) # bp[i] + + lg %r9,0(%r3) + + mlgr %r8,%r2 # ap[0]*bp[i] + alg %r9,160(%r15) # +=tp[0] + lghi %r12,0 + alcgr %r12,%r8 + + lgr %r0,%r9 + msgr %r0,%r6 # tp[0]*n0 + + lg %r11,0(%r5) # np[0] + + mlgr %r10,%r0 # np[0]*m1 + algr %r11,%r9 # +="tp[0]" + lghi %r13,0 + alcgr %r13,%r10 + + la %r7,8(%r0) # j=1 + lr %r14,%r1 + +.align 16 +.Linner: + lg %r9,0(%r7,%r3) + + mlgr %r8,%r2 # ap[j]*bp[i] + algr %r9,%r12 + lghi %r12,0 + alcgr %r8,%r12 + alg %r9,160(%r7,%r15)# +=tp[j] + alcgr %r12,%r8 + + lg %r11,0(%r7,%r5) + + mlgr %r10,%r0 # np[j]*m1 + algr %r11,%r13 + lghi %r13,0 + alcgr %r10,%r13 + algr %r11,%r9 # +="tp[j]" + alcgr %r13,%r10 + + stg %r11,160-8(%r7,%r15) # tp[j-1]= + la %r7,8(%r7) # j++ + brct %r14,.Linner + + algr %r13,%r12 + lghi %r12,0 + alcgr %r12,%r12 + alg %r13,160(%r7,%r15)# accumulate previous upmost overflow bit + lghi %r8,0 + alcgr %r12,%r8 # new upmost overflow bit + stg %r13,160-8(%r7,%r15) + stg %r12,160(%r7,%r15) + + la %r4,8(%r4) # bp++ + clg %r4,200(%r7,%r15) # compare to &bp[num] + jne .Louter + + lg %r2,184(%r7,%r15) # reincarnate rp + la %r3,160(%r15) + ahi %r1,1 # restore %r1, incidentally clears "borrow" + + la %r7,0(%r0) + lr %r14,%r1 +.Lsub: lg %r9,0(%r7,%r3) + lg %r11,0(%r7,%r5) + + slbgr %r9,%r11 + stg %r9,0(%r7,%r2) + la %r7,8(%r7) + brct %r14,.Lsub + lghi %r8,0 + slbgr %r12,%r8 # handle upmost carry + lghi %r13,-1 + xgr %r13,%r12 + + la %r7,0(%r0) + lgr %r14,%r1 +.Lcopy: lg %r8,160(%r7,%r15) # conditional copy + lg %r9,0(%r7,%r2) + ngr %r8,%r12 + ngr %r9,%r13 + ogr %r9,%r8 + + stg %r7,160(%r7,%r15) # zap tp + stg %r9,0(%r7,%r2) + la %r7,8(%r7) + brct %r14,.Lcopy + + la %r1,216(%r7,%r15) + lmg %r6,%r15,0(%r1) + lghi %r2,1 # signal "processed" + br %r14 +.size bn_mul_mont,.-bn_mul_mont +.string "Montgomery Multiplication for s390x, CRYPTOGAMS by <appro@openssl.org>" diff --git a/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/buildinf.h b/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/buildinf.h new file mode 100644 index 0000000000..59a48acd96 --- /dev/null +++ b/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/buildinf.h @@ -0,0 +1,43 @@ +/* + * WARNING: do not edit! + * Generated by util/mkbuildinf.pl + * + * Copyright 2014-2017 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#define PLATFORM "platform: linux64-s390x" +#define DATE "built on: Thu Nov 22 19:35:23 2018 UTC" + +/* + * Generate compiler_flags as an array of individual characters. This is a + * workaround for the situation where CFLAGS gets too long for a C90 string + * literal + */ +static const char compiler_flags[] = { + 'c','o','m','p','i','l','e','r',':',' ','.','.','/','c','o','n', + 'f','i','g','/','f','a','k','e','_','g','c','c','.','p','l',' ', + '-','f','P','I','C',' ','-','p','t','h','r','e','a','d',' ','-', + 'm','6','4',' ','-','W','a',',','-','-','n','o','e','x','e','c', + 's','t','a','c','k',' ','-','W','a','l','l',' ','-','O','3',' ', + '-','D','O','P','E','N','S','S','L','_','U','S','E','_','N','O', + 'D','E','L','E','T','E',' ','-','D','B','_','E','N','D','I','A', + 'N',' ','-','D','O','P','E','N','S','S','L','_','P','I','C',' ', + '-','D','O','P','E','N','S','S','L','_','C','P','U','I','D','_', + 'O','B','J',' ','-','D','O','P','E','N','S','S','L','_','B','N', + '_','A','S','M','_','M','O','N','T',' ','-','D','O','P','E','N', + 'S','S','L','_','B','N','_','A','S','M','_','G','F','2','m',' ', + '-','D','S','H','A','1','_','A','S','M',' ','-','D','S','H','A', + '2','5','6','_','A','S','M',' ','-','D','S','H','A','5','1','2', + '_','A','S','M',' ','-','D','K','E','C','C','A','K','1','6','0', + '0','_','A','S','M',' ','-','D','R','C','4','_','A','S','M',' ', + '-','D','A','E','S','_','A','S','M',' ','-','D','A','E','S','_', + 'C','T','R','_','A','S','M',' ','-','D','A','E','S','_','X','T', + 'S','_','A','S','M',' ','-','D','G','H','A','S','H','_','A','S', + 'M',' ','-','D','P','O','L','Y','1','3','0','5','_','A','S','M', + ' ','-','D','N','D','E','B','U','G','\0' +}; diff --git a/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/chacha/chacha-s390x.S b/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/chacha/chacha-s390x.S new file mode 100644 index 0000000000..75cf92e487 --- /dev/null +++ b/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/chacha/chacha-s390x.S @@ -0,0 +1,271 @@ +.text + +.globl ChaCha20_ctr32 +.type ChaCha20_ctr32,@function +.align 32 +ChaCha20_ctr32: + ltgr %r4,%r4 # %r4==0? + bzr %r14 + aghi %r4,-64 + lghi %r1,-240 + stmg %r6,%r15,48(%r15) + slgr %r2,%r3 # difference + la %r4,0(%r3,%r4) # end of input minus 64 + larl %r7,.Lsigma + lgr %r0,%r15 + la %r15,0(%r1,%r15) + stg %r0,0(%r15) + + lmg %r8,%r11,0(%r5) # load key + lmg %r12,%r13,0(%r6) # load counter + lmg %r6,%r7,0(%r7) # load sigma constant + + la %r14,0(%r3) + stg %r2,240+3*8(%r15) + stg %r4,240+4*8(%r15) + stmg %r6,%r13,160(%r15) # copy key schedule to stack + srlg %r10,%r12,32 # 32-bit counter value + j .Loop_outer + +.align 16 +.Loop_outer: + lm %r0,%r7,160+4*0(%r15) # load x[0]-x[7] + lm %r8,%r9,160+4*10(%r15) # load x[10]-x[11] + lm %r11,%r13,160+4*13(%r15) # load x[13]-x[15] + stm %r8,%r9,160+4*8+4*10(%r15) # offload x[10]-x[11] + lm %r8,%r9,160+4*8(%r15) # load x[8]-x[9] + st %r10,160+4*12(%r15) # save counter + stg %r14,240+2*8(%r15) # save input pointer + lhi %r14,10 + j .Loop + +.align 4 +.Loop: + alr %r0,%r4 + alr %r1,%r5 + xr %r10,%r0 + xr %r11,%r1 + rll %r10,%r10,16 + rll %r11,%r11,16 + alr %r8,%r10 + alr %r9,%r11 + xr %r4,%r8 + xr %r5,%r9 + rll %r4,%r4,12 + rll %r5,%r5,12 + alr %r0,%r4 + alr %r1,%r5 + xr %r10,%r0 + xr %r11,%r1 + rll %r10,%r10,8 + rll %r11,%r11,8 + alr %r8,%r10 + alr %r9,%r11 + xr %r4,%r8 + xr %r5,%r9 + rll %r4,%r4,7 + rll %r5,%r5,7 + stm %r8,%r9,160+4*8+4*8(%r15) + lm %r8,%r9,160+4*8+4*10(%r15) + alr %r2,%r6 + alr %r3,%r7 + xr %r12,%r2 + xr %r13,%r3 + rll %r12,%r12,16 + rll %r13,%r13,16 + alr %r8,%r12 + alr %r9,%r13 + xr %r6,%r8 + xr %r7,%r9 + rll %r6,%r6,12 + rll %r7,%r7,12 + alr %r2,%r6 + alr %r3,%r7 + xr %r12,%r2 + xr %r13,%r3 + rll %r12,%r12,8 + rll %r13,%r13,8 + alr %r8,%r12 + alr %r9,%r13 + xr %r6,%r8 + xr %r7,%r9 + rll %r6,%r6,7 + rll %r7,%r7,7 + alr %r0,%r5 + alr %r1,%r6 + xr %r13,%r0 + xr %r10,%r1 + rll %r13,%r13,16 + rll %r10,%r10,16 + alr %r8,%r13 + alr %r9,%r10 + xr %r5,%r8 + xr %r6,%r9 + rll %r5,%r5,12 + rll %r6,%r6,12 + alr %r0,%r5 + alr %r1,%r6 + xr %r13,%r0 + xr %r10,%r1 + rll %r13,%r13,8 + rll %r10,%r10,8 + alr %r8,%r13 + alr %r9,%r10 + xr %r5,%r8 + xr %r6,%r9 + rll %r5,%r5,7 + rll %r6,%r6,7 + stm %r8,%r9,160+4*8+4*10(%r15) + lm %r8,%r9,160+4*8+4*8(%r15) + alr %r2,%r7 + alr %r3,%r4 + xr %r11,%r2 + xr %r12,%r3 + rll %r11,%r11,16 + rll %r12,%r12,16 + alr %r8,%r11 + alr %r9,%r12 + xr %r7,%r8 + xr %r4,%r9 + rll %r7,%r7,12 + rll %r4,%r4,12 + alr %r2,%r7 + alr %r3,%r4 + xr %r11,%r2 + xr %r12,%r3 + rll %r11,%r11,8 + rll %r12,%r12,8 + alr %r8,%r11 + alr %r9,%r12 + xr %r7,%r8 + xr %r4,%r9 + rll %r7,%r7,7 + rll %r4,%r4,7 + brct %r14,.Loop + + lg %r14,240+2*8(%r15) # pull input pointer + stm %r8,%r9,160+4*8+4*8(%r15) # offload x[8]-x[9] + lmg %r8,%r9,240+3*8(%r15) + + al %r0,160+4*0(%r15) # accumulate key schedule + al %r1,160+4*1(%r15) + al %r2,160+4*2(%r15) + al %r3,160+4*3(%r15) + al %r4,160+4*4(%r15) + al %r5,160+4*5(%r15) + al %r6,160+4*6(%r15) + al %r7,160+4*7(%r15) + lrvr %r0,%r0 + lrvr %r1,%r1 + lrvr %r2,%r2 + lrvr %r3,%r3 + lrvr %r4,%r4 + lrvr %r5,%r5 + lrvr %r6,%r6 + lrvr %r7,%r7 + al %r10,160+4*12(%r15) + al %r11,160+4*13(%r15) + al %r12,160+4*14(%r15) + al %r13,160+4*15(%r15) + lrvr %r10,%r10 + lrvr %r11,%r11 + lrvr %r12,%r12 + lrvr %r13,%r13 + + la %r8,0(%r8,%r14) # reconstruct output pointer + clgr %r14,%r9 + jh .Ltail + + x %r0,4*0(%r14) # xor with input + x %r1,4*1(%r14) + st %r0,4*0(%r8) # store output + x %r2,4*2(%r14) + st %r1,4*1(%r8) + x %r3,4*3(%r14) + st %r2,4*2(%r8) + x %r4,4*4(%r14) + st %r3,4*3(%r8) + lm %r0,%r3,160+4*8+4*8(%r15) # load x[8]-x[11] + x %r5,4*5(%r14) + st %r4,4*4(%r8) + x %r6,4*6(%r14) + al %r0,160+4*8(%r15) + st %r5,4*5(%r8) + x %r7,4*7(%r14) + al %r1,160+4*9(%r15) + st %r6,4*6(%r8) + x %r10,4*12(%r14) + al %r2,160+4*10(%r15) + st %r7,4*7(%r8) + x %r11,4*13(%r14) + al %r3,160+4*11(%r15) + st %r10,4*12(%r8) + x %r12,4*14(%r14) + st %r11,4*13(%r8) + x %r13,4*15(%r14) + st %r12,4*14(%r8) + lrvr %r0,%r0 + st %r13,4*15(%r8) + lrvr %r1,%r1 + lrvr %r2,%r2 + lrvr %r3,%r3 + lhi %r10,1 + x %r0,4*8(%r14) + al %r10,160+4*12(%r15) # increment counter + x %r1,4*9(%r14) + st %r0,4*8(%r8) + x %r2,4*10(%r14) + st %r1,4*9(%r8) + x %r3,4*11(%r14) + st %r2,4*10(%r8) + st %r3,4*11(%r8) + + clgr %r14,%r9 # done yet? + la %r14,64(%r14) + jl .Loop_outer + +.Ldone: + xgr %r0,%r0 + xgr %r1,%r1 + xgr %r2,%r2 + xgr %r3,%r3 + stmg %r0,%r3,160+4*4(%r15) # wipe key copy + stmg %r0,%r3,160+4*12(%r15) + + lmg %r6,%r15,288(%r15) + br %r14 + +.align 16 +.Ltail: + la %r9,64(%r9) + stm %r0,%r7,160+4*0(%r15) + slgr %r9,%r14 + lm %r0,%r3,160+4*8+4*8(%r15) + lghi %r6,0 + stm %r10,%r13,160+4*12(%r15) + al %r0,160+4*8(%r15) + al %r1,160+4*9(%r15) + al %r2,160+4*10(%r15) + al %r3,160+4*11(%r15) + lrvr %r0,%r0 + lrvr %r1,%r1 + lrvr %r2,%r2 + lrvr %r3,%r3 + stm %r0,%r3,160+4*8(%r15) + +.Loop_tail: + llgc %r4,0(%r6,%r14) + llgc %r5,160(%r6,%r15) + xr %r5,%r4 + stc %r5,0(%r6,%r8) + la %r6,1(%r6) + brct %r9,.Loop_tail + + j .Ldone +.size ChaCha20_ctr32,.-ChaCha20_ctr32 + +.align 32 +.Lsigma: +.long 0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral +.asciz "ChaCha20 for s390x, CRYPTOGAMS by <appro@openssl.org>" +.align 4 diff --git a/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/include/internal/bn_conf.h b/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/include/internal/bn_conf.h new file mode 100644 index 0000000000..34bd8b78b4 --- /dev/null +++ b/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/include/internal/bn_conf.h @@ -0,0 +1,28 @@ +/* WARNING: do not edit! */ +/* Generated by Makefile from crypto/include/internal/bn_conf.h.in */ +/* + * Copyright 2016 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#ifndef HEADER_BN_CONF_H +# define HEADER_BN_CONF_H + +/* + * The contents of this file are not used in the UEFI build, as + * both 32-bit and 64-bit builds are supported from a single run + * of the Configure script. + */ + +/* Should we define BN_DIV2W here? */ + +/* Only one for the following should be defined */ +#define SIXTY_FOUR_BIT_LONG +#undef SIXTY_FOUR_BIT +#undef THIRTY_TWO_BIT + +#endif diff --git a/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/include/internal/dso_conf.h b/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/include/internal/dso_conf.h new file mode 100644 index 0000000000..18f6ac3bff --- /dev/null +++ b/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/include/internal/dso_conf.h @@ -0,0 +1,19 @@ +/* WARNING: do not edit! */ +/* Generated by Makefile from crypto/include/internal/dso_conf.h.in */ +/* + * Copyright 2016-2018 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#ifndef HEADER_DSO_CONF_H +# define HEADER_DSO_CONF_H + +# define DSO_DLFCN +# define HAVE_DLFCN_H +# define DSO_EXTENSION ".so" + +#endif diff --git a/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/modes/ghash-s390x.S b/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/modes/ghash-s390x.S new file mode 100644 index 0000000000..f6bd2d0857 --- /dev/null +++ b/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/modes/ghash-s390x.S @@ -0,0 +1,147 @@ +#include "s390x_arch.h" + +.text + +.globl gcm_gmult_4bit +.align 32 +gcm_gmult_4bit: + stmg %r6,%r14,6*8(%r15) + + aghi %r2,-1 + lghi %r5,1 + lghi %r13,120 + larl %r14,rem_4bit + + lg %r1,8+1(%r2) # Xi + j .Lgmult_shortcut +.type gcm_gmult_4bit,@function +.size gcm_gmult_4bit,(.-gcm_gmult_4bit) + +.globl gcm_ghash_4bit +.align 32 +gcm_ghash_4bit: + larl %r1,OPENSSL_s390xcap_P + lg %r0,S390X_KIMD+8(%r1) # load second word of kimd capabilities + # vector + tmhh %r0,0x4000 # check for function 65 + jz .Lsoft_ghash + lghi %r0,S390X_GHASH # function 65 + la %r1,0(%r2) # H lies right after Xi in gcm128_context + .long 0xb93e0004 # kimd %r0,%r4 + brc 1,.-4 # pay attention to "partial completion" + br %r14 +.align 32 +.Lsoft_ghash: + stmg %r6,%r14,6*8(%r15) + + aghi %r2,-1 + srlg %r5,%r5,4 + lghi %r13,120 + larl %r14,rem_4bit + + lg %r1,8+1(%r2) # Xi + lg %r0,0+1(%r2) + lghi %r12,0 +.Louter: + xg %r0,0(%r4) # Xi ^= inp + xg %r1,8(%r4) + xgr %r0,%r12 + stg %r1,8+1(%r2) + stg %r0,0+1(%r2) + +.Lgmult_shortcut: + lghi %r12,0xf0 + sllg %r8,%r1,4 + srlg %r10,%r1,8 # extract second byte + ngr %r8,%r12 + lgr %r9,%r1 + lghi %r11,14 + ngr %r9,%r12 + + lg %r1,8(%r8,%r3) + lg %r0,0(%r8,%r3) + + sllg %r8,%r10,4 + sllg %r6,%r1,3 + ngr %r8,%r12 + ngr %r6,%r13 + ngr %r10,%r12 + + sllg %r12,%r0,60 + srlg %r1,%r1,4 + srlg %r0,%r0,4 + xg %r1,8(%r9,%r3) + xg %r0,0(%r9,%r3) + lgr %r9,%r10 + sllg %r7,%r1,3 + xgr %r1,%r12 + ngr %r7,%r13 + sllg %r12,%r0,60 + j .Lghash_inner +.align 16 +.Lghash_inner: + srlg %r1,%r1,4 + srlg %r0,%r0,4 + xg %r1,8(%r8,%r3) + llgc %r10,0(%r11,%r2) + xg %r0,0(%r8,%r3) + sllg %r8,%r10,4 + xg %r0,0(%r6,%r14) + nill %r8,0xf0 + sllg %r6,%r1,3 + xgr %r1,%r12 + ngr %r6,%r13 + nill %r10,0xf0 + + sllg %r12,%r0,60 + srlg %r1,%r1,4 + srlg %r0,%r0,4 + xg %r1,8(%r9,%r3) + xg %r0,0(%r9,%r3) + lgr %r9,%r10 + xg %r0,0(%r7,%r14) + sllg %r7,%r1,3 + xgr %r1,%r12 + ngr %r7,%r13 + sllg %r12,%r0,60 + brct %r11,.Lghash_inner + + srlg %r1,%r1,4 + srlg %r0,%r0,4 + xg %r1,8(%r8,%r3) + xg %r0,0(%r8,%r3) + sllg %r10,%r1,3 + xg %r0,0(%r6,%r14) + xgr %r1,%r12 + ngr %r10,%r13 + + sllg %r12,%r0,60 + srlg %r1,%r1,4 + srlg %r0,%r0,4 + xg %r1,8(%r9,%r3) + xg %r0,0(%r9,%r3) + xgr %r1,%r12 + xg %r0,0(%r7,%r14) + + lg %r12,0(%r10,%r14) + la %r4,16(%r4) + sllg %r12,%r12,4 # correct last rem_4bit[rem] + brctg %r5,.Louter + + xgr %r0,%r12 + stg %r1,8+1(%r2) + stg %r0,0+1(%r2) + lmg %r6,%r14,6*8(%r15) + br %r14 +.type gcm_ghash_4bit,@function +.size gcm_ghash_4bit,(.-gcm_ghash_4bit) + +.align 64 +rem_4bit: + .long 0,0,29491200,0,58982400,0,38141952,0 + .long 117964800,0,113901568,0,76283904,0,88997888,0 + .long 235929600,0,265420800,0,227803136,0,206962688,0 + .long 152567808,0,148504576,0,177995776,0,190709760,0 +.type rem_4bit,@object +.size rem_4bit,(.-rem_4bit) +.string "GHASH for s390x, CRYPTOGAMS by <appro@openssl.org>" diff --git a/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/poly1305/poly1305-s390x.S b/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/poly1305/poly1305-s390x.S new file mode 100644 index 0000000000..262ee2dca7 --- /dev/null +++ b/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/poly1305/poly1305-s390x.S @@ -0,0 +1,162 @@ +.text + +.globl poly1305_init +.type poly1305_init,@function +.align 16 +poly1305_init: + lghi %r0,0 + lghi %r1,-1 + stg %r0,0(%r2) # zero hash value + stg %r0,8(%r2) + stg %r0,16(%r2) + + clgr %r3,%r0 + je .Lno_key + + lrvg %r4,0(%r3) # load little-endian key + lrvg %r5,8(%r3) + + nihl %r1,0xffc0 # 0xffffffc0ffffffff + srlg %r0,%r1,4 # 0x0ffffffc0fffffff + srlg %r1,%r1,4 + nill %r1,0xfffc # 0x0ffffffc0ffffffc + + ngr %r4,%r0 + ngr %r5,%r1 + + stg %r4,32(%r2) + stg %r5,40(%r2) + +.Lno_key: + lghi %r2,0 + br %r14 +.size poly1305_init,.-poly1305_init +.globl poly1305_blocks +.type poly1305_blocks,@function +.align 16 +poly1305_blocks: + srlg %r4,%r4,4 # fixed-up in 64-bit build + lghi %r0,0 + clgr %r4,%r0 + je .Lno_data + + stmg %r6,%r14,48(%r15) + + llgfr %r5,%r5 # clear upper half, much needed with + # non-64-bit ABI + lg %r0,32(%r2) # load key + lg %r1,40(%r2) + + lg %r11,0(%r2) # load hash value + lg %r13,8(%r2) + lg %r14,16(%r2) + + stg %r2,16(%r15) # off-load %r2 + srlg %r2,%r1,2 + algr %r2,%r1 # s1 = r1 + r1>>2 + j .Loop + +.align 16 +.Loop: + lrvg %r7,0(%r3) # load little-endian input + lrvg %r9,8(%r3) + la %r3,16(%r3) + + algr %r7,%r11 # accumulate input + alcgr %r9,%r13 + + lgr %r11,%r7 + mlgr %r6,%r0 # h0*r0 -> %r6:%r7 + lgr %r13,%r9 + mlgr %r8,%r2 # h1*5*r1 -> %r8:%r9 + + mlgr %r10,%r1 # h0*r1 -> %r10:%r11 + mlgr %r12,%r0 # h1*r0 -> %r12:%r13 + alcgr %r14,%r5 + + algr %r7,%r9 + lgr %r9,%r14 + alcgr %r6,%r8 + lghi %r8,0 + + algr %r13,%r11 + alcgr %r12,%r10 + + msgr %r9,%r2 # h2*s1 + msgr %r14,%r0 # h2*r0 + + algr %r13,%r9 + alcgr %r12,%r8 # %r8 is zero + + algr %r13,%r6 + alcgr %r14,%r12 + + lghi %r11,-4 # final reduction step + ngr %r11,%r14 + srlg %r10,%r14,2 + algr %r11,%r10 + lghi %r12,3 + ngr %r14,%r12 + + algr %r11,%r7 + alcgr %r13,%r8 # %r8 is still zero + alcgr %r14,%r8 # %r8 is still zero + + brctg %r4,.Loop + + lg %r2,16(%r15) # restore %r2 + + stg %r11,0(%r2) # store hash value + stg %r13,8(%r2) + stg %r14,16(%r2) + + lmg %r6,%r14,48(%r15) +.Lno_data: + br %r14 +.size poly1305_blocks,.-poly1305_blocks +.globl poly1305_emit +.type poly1305_emit,@function +.align 16 +poly1305_emit: + stmg %r6,%r9,48(%r15) + + lg %r5,0(%r2) + lg %r6,8(%r2) + lg %r7,16(%r2) + + lghi %r0,5 + lghi %r1,0 + lgr %r8,%r5 + lgr %r9,%r6 + + algr %r5,%r0 # compare to modulus + alcgr %r6,%r1 + alcgr %r7,%r1 + + srlg %r7,%r7,2 # did it borrow/carry? + slgr %r1,%r7 # 0-%r7>>2 + lg %r7,0(%r4) # load nonce + lghi %r0,-1 + lg %r2,8(%r4) + xgr %r0,%r1 # ~%r1 + + ngr %r5,%r1 + ngr %r8,%r0 + ngr %r6,%r1 + ngr %r9,%r0 + ogr %r5,%r8 + rllg %r8,%r7,32 # flip nonce words + ogr %r6,%r9 + rllg %r9,%r2,32 + + algr %r5,%r8 # accumulate nonce + alcgr %r6,%r9 + + strvg %r5,0(%r3) # write little-endian result + strvg %r6,8(%r3) + + lmg %r6,%r9,48(%r15) + br %r14 +.size poly1305_emit,.-poly1305_emit + +.string "Poly1305 for s390x, CRYPTOGAMS by <appro@openssl.org>" diff --git a/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/rc4/rc4-s390x.s b/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/rc4/rc4-s390x.s new file mode 100644 index 0000000000..e5883fea42 --- /dev/null +++ b/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/rc4/rc4-s390x.s @@ -0,0 +1,240 @@ +.text + +.globl RC4 +.type RC4,@function +.align 64 +RC4: + stmg %r6,%r11,6*8(%r15) + llgc %r6,0(%r2) + llgc %r10,1(%r2) + la %r6,1(%r6) + nill %r6,0xff + srlg %r1,%r3,3 + ltgr %r1,%r1 + llgc %r8,2(%r6,%r2) + jz .Lshort + j .Loop8 + +.align 64 +.Loop8: + la %r10,0(%r10,%r8) # 0 + nill %r10,255 + la %r7,1(%r6) + nill %r7,255 + llgc %r11,2(%r10,%r2) + stc %r8,2(%r10,%r2) + llgc %r9,2(%r7,%r2) + stc %r11,2(%r6,%r2) + cr %r7,%r10 + jne .Lcmov0 + la %r9,0(%r8) +.Lcmov0: + la %r11,0(%r11,%r8) + nill %r11,255 + la %r10,0(%r10,%r9) # 1 + nill %r10,255 + la %r6,1(%r7) + nill %r6,255 + llgc %r0,2(%r11,%r2) + llgc %r11,2(%r10,%r2) + stc %r9,2(%r10,%r2) + llgc %r8,2(%r6,%r2) + stc %r11,2(%r7,%r2) + cr %r6,%r10 + jne .Lcmov1 + la %r8,0(%r9) +.Lcmov1: + la %r11,0(%r11,%r9) + nill %r11,255 + la %r10,0(%r10,%r8) # 2 + nill %r10,255 + la %r7,1(%r6) + nill %r7,255 + sllg %r0,%r0,8 + ic %r0,2(%r11,%r2) + llgc %r11,2(%r10,%r2) + stc %r8,2(%r10,%r2) + llgc %r9,2(%r7,%r2) + stc %r11,2(%r6,%r2) + cr %r7,%r10 + jne .Lcmov2 + la %r9,0(%r8) +.Lcmov2: + la %r11,0(%r11,%r8) + nill %r11,255 + la %r10,0(%r10,%r9) # 3 + nill %r10,255 + la %r6,1(%r7) + nill %r6,255 + sllg %r0,%r0,8 + ic %r0,2(%r11,%r2) + llgc %r11,2(%r10,%r2) + stc %r9,2(%r10,%r2) + llgc %r8,2(%r6,%r2) + stc %r11,2(%r7,%r2) + cr %r6,%r10 + jne .Lcmov3 + la %r8,0(%r9) +.Lcmov3: + la %r11,0(%r11,%r9) + nill %r11,255 + la %r10,0(%r10,%r8) # 4 + nill %r10,255 + la %r7,1(%r6) + nill %r7,255 + sllg %r0,%r0,8 + ic %r0,2(%r11,%r2) + llgc %r11,2(%r10,%r2) + stc %r8,2(%r10,%r2) + llgc %r9,2(%r7,%r2) + stc %r11,2(%r6,%r2) + cr %r7,%r10 + jne .Lcmov4 + la %r9,0(%r8) +.Lcmov4: + la %r11,0(%r11,%r8) + nill %r11,255 + la %r10,0(%r10,%r9) # 5 + nill %r10,255 + la %r6,1(%r7) + nill %r6,255 + sllg %r0,%r0,8 + ic %r0,2(%r11,%r2) + llgc %r11,2(%r10,%r2) + stc %r9,2(%r10,%r2) + llgc %r8,2(%r6,%r2) + stc %r11,2(%r7,%r2) + cr %r6,%r10 + jne .Lcmov5 + la %r8,0(%r9) +.Lcmov5: + la %r11,0(%r11,%r9) + nill %r11,255 + la %r10,0(%r10,%r8) # 6 + nill %r10,255 + la %r7,1(%r6) + nill %r7,255 + sllg %r0,%r0,8 + ic %r0,2(%r11,%r2) + llgc %r11,2(%r10,%r2) + stc %r8,2(%r10,%r2) + llgc %r9,2(%r7,%r2) + stc %r11,2(%r6,%r2) + cr %r7,%r10 + jne .Lcmov6 + la %r9,0(%r8) +.Lcmov6: + la %r11,0(%r11,%r8) + nill %r11,255 + la %r10,0(%r10,%r9) # 7 + nill %r10,255 + la %r6,1(%r7) + nill %r6,255 + sllg %r0,%r0,8 + ic %r0,2(%r11,%r2) + llgc %r11,2(%r10,%r2) + stc %r9,2(%r10,%r2) + llgc %r8,2(%r6,%r2) + stc %r11,2(%r7,%r2) + cr %r6,%r10 + jne .Lcmov7 + la %r8,0(%r9) +.Lcmov7: + la %r11,0(%r11,%r9) + nill %r11,255 + lg %r9,0(%r4) + sllg %r0,%r0,8 + la %r4,8(%r4) + ic %r0,2(%r11,%r2) + xgr %r0,%r9 + stg %r0,0(%r5) + la %r5,8(%r5) + brctg %r1,.Loop8 + +.Lshort: + lghi %r0,7 + ngr %r3,%r0 + jz .Lexit + j .Loop1 + +.align 16 +.Loop1: + la %r10,0(%r10,%r8) + nill %r10,255 + llgc %r11,2(%r10,%r2) + stc %r8,2(%r10,%r2) + stc %r11,2(%r6,%r2) + ar %r11,%r8 + ahi %r6,1 + nill %r11,255 + nill %r6,255 + llgc %r0,0(%r4) + la %r4,1(%r4) + llgc %r11,2(%r11,%r2) + llgc %r8,2(%r6,%r2) + xr %r0,%r11 + stc %r0,0(%r5) + la %r5,1(%r5) + brct %r3,.Loop1 + +.Lexit: + ahi %r6,-1 + stc %r6,0(%r2) + stc %r10,1(%r2) + lmg %r6,%r11,6*8(%r15) + br %r14 +.size RC4,.-RC4 +.string "RC4 for s390x, CRYPTOGAMS by <appro@openssl.org>" + +.globl RC4_set_key +.type RC4_set_key,@function +.align 64 +RC4_set_key: + stmg %r6,%r8,6*8(%r15) + lhi %r0,256 + la %r1,0(%r0) + sth %r1,0(%r2) +.align 4 +.L1stloop: + stc %r1,2(%r1,%r2) + la %r1,1(%r1) + brct %r0,.L1stloop + + lghi %r7,-256 + lr %r0,%r3 + la %r8,0(%r0) + la %r1,0(%r0) +.align 16 +.L2ndloop: + llgc %r5,2+256(%r7,%r2) + llgc %r6,0(%r8,%r4) + la %r1,0(%r1,%r5) + la %r7,1(%r7) + la %r1,0(%r1,%r6) + nill %r1,255 + la %r8,1(%r8) + tml %r7,255 + llgc %r6,2(%r1,%r2) + stc %r6,2+256-1(%r7,%r2) + stc %r5,2(%r1,%r2) + jz .Ldone + brct %r0,.L2ndloop + lr %r0,%r3 + la %r8,0(%r0) + j .L2ndloop +.Ldone: + lmg %r6,%r8,6*8(%r15) + br %r14 +.size RC4_set_key,.-RC4_set_key + +.globl RC4_options +.type RC4_options,@function +.align 16 +RC4_options: + larl %r2,.Loptions + br %r14 +.size RC4_options,.-RC4_options +.section .rodata +.Loptions: +.align 8 +.string "rc4(8x,char)" diff --git a/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/s390xcpuid.S b/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/s390xcpuid.S new file mode 100644 index 0000000000..7c0a5a0720 --- /dev/null +++ b/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/s390xcpuid.S @@ -0,0 +1,323 @@ +#include "s390x_arch.h" + +.text + +.globl OPENSSL_s390x_facilities +.type OPENSSL_s390x_facilities,@function +.align 16 +OPENSSL_s390x_facilities: + lghi %r0,0 + larl %r4,OPENSSL_s390xcap_P + + stg %r0,S390X_STFLE+8(%r4) # wipe capability vectors + stg %r0,S390X_STFLE+16(%r4) + stg %r0,S390X_STFLE+24(%r4) + stg %r0,S390X_KIMD(%r4) + stg %r0,S390X_KIMD+8(%r4) + stg %r0,S390X_KLMD(%r4) + stg %r0,S390X_KLMD+8(%r4) + stg %r0,S390X_KM(%r4) + stg %r0,S390X_KM+8(%r4) + stg %r0,S390X_KMC(%r4) + stg %r0,S390X_KMC+8(%r4) + stg %r0,S390X_KMAC(%r4) + stg %r0,S390X_KMAC+8(%r4) + stg %r0,S390X_KMCTR(%r4) + stg %r0,S390X_KMCTR+8(%r4) + stg %r0,S390X_KMO(%r4) + stg %r0,S390X_KMO+8(%r4) + stg %r0,S390X_KMF(%r4) + stg %r0,S390X_KMF+8(%r4) + stg %r0,S390X_PRNO(%r4) + stg %r0,S390X_PRNO+8(%r4) + stg %r0,S390X_KMA(%r4) + stg %r0,S390X_KMA+8(%r4) + + .long 0xb2b04000 # stfle 0(%r4) + brc 8,.Ldone + lghi %r0,1 + .long 0xb2b04000 # stfle 0(%r4) + brc 8,.Ldone + lghi %r0,2 + .long 0xb2b04000 # stfle 0(%r4) +.Ldone: + lmg %r2,%r3,S390X_STFLE(%r4) + tmhl %r2,0x4000 # check for message-security-assist + jz .Lret + + lghi %r0,S390X_QUERY # query kimd capabilities + la %r1,S390X_KIMD(%r4) + .long 0xb93e0002 # kimd %r0,%r2 + + lghi %r0,S390X_QUERY # query klmd capabilities + la %r1,S390X_KLMD(%r4) + .long 0xb93f0002 # klmd %r0,%r2 + + lghi %r0,S390X_QUERY # query km capability vector + la %r1,S390X_KM(%r4) + .long 0xb92e0042 # km %r4,%r2 + + lghi %r0,S390X_QUERY # query kmc capability vector + la %r1,S390X_KMC(%r4) + .long 0xb92f0042 # kmc %r4,%r2 + + lghi %r0,S390X_QUERY # query kmac capability vector + la %r1,S390X_KMAC(%r4) + .long 0xb91e0042 # kmac %r4,%r2 + + tmhh %r3,0x0004 # check for message-security-assist-4 + jz .Lret + + lghi %r0,S390X_QUERY # query kmctr capability vector + la %r1,S390X_KMCTR(%r4) + .long 0xb92d2042 # kmctr %r4,%r2,%r2 + + lghi %r0,S390X_QUERY # query kmo capability vector + la %r1,S390X_KMO(%r4) + .long 0xb92b0042 # kmo %r4,%r2 + + lghi %r0,S390X_QUERY # query kmf capability vector + la %r1,S390X_KMF(%r4) + .long 0xb92a0042 # kmf %r4,%r2 + + tml %r2,0x40 # check for message-security-assist-5 + jz .Lret + + lghi %r0,S390X_QUERY # query prno capability vector + la %r1,S390X_PRNO(%r4) + .long 0xb93c0042 # prno %r4,%r2 + + lg %r2,S390X_STFLE+16(%r4) + tmhl %r2,0x2000 # check for message-security-assist-8 + jz .Lret + + lghi %r0,S390X_QUERY # query kma capability vector + la %r1,S390X_KMA(%r4) + .long 0xb9294022 # kma %r2,%r4,%r2 + +.Lret: + br %r14 +.size OPENSSL_s390x_facilities,.-OPENSSL_s390x_facilities + +.globl OPENSSL_rdtsc +.type OPENSSL_rdtsc,@function +.align 16 +OPENSSL_rdtsc: + larl %r4,OPENSSL_s390xcap_P + tm S390X_STFLE+3(%r4),0x40 # check for store-clock-fast facility + jz .Lstck + + .long 0xb27cf010 # stckf 16(%r15) + lg %r2,16(%r15) + br %r14 +.Lstck: + stck 16(%r15) + lg %r2,16(%r15) + br %r14 +.size OPENSSL_rdtsc,.-OPENSSL_rdtsc + +.globl OPENSSL_atomic_add +.type OPENSSL_atomic_add,@function +.align 16 +OPENSSL_atomic_add: + l %r1,0(%r2) +.Lspin: lr %r0,%r1 + ar %r0,%r3 + cs %r1,%r0,0(%r2) + brc 4,.Lspin + lgfr %r2,%r0 # OpenSSL expects the new value + br %r14 +.size OPENSSL_atomic_add,.-OPENSSL_atomic_add + +.globl OPENSSL_wipe_cpu +.type OPENSSL_wipe_cpu,@function +.align 16 +OPENSSL_wipe_cpu: + xgr %r0,%r0 + xgr %r1,%r1 + lgr %r2,%r15 + xgr %r3,%r3 + xgr %r4,%r4 + lzdr %f0 + lzdr %f1 + lzdr %f2 + lzdr %f3 + lzdr %f4 + lzdr %f5 + lzdr %f6 + lzdr %f7 + br %r14 +.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu + +.globl OPENSSL_cleanse +.type OPENSSL_cleanse,@function +.align 16 +OPENSSL_cleanse: +#if !defined(__s390x__) && !defined(__s390x) + llgfr %r3,%r3 +#endif + lghi %r4,15 + lghi %r0,0 + clgr %r3,%r4 + jh .Lot + clgr %r3,%r0 + bcr 8,%r14 +.Little: + stc %r0,0(%r2) + la %r2,1(%r2) + brctg %r3,.Little + br %r14 +.align 4 +.Lot: tmll %r2,7 + jz .Laligned + stc %r0,0(%r2) + la %r2,1(%r2) + brctg %r3,.Lot +.Laligned: + srlg %r4,%r3,3 +.Loop: stg %r0,0(%r2) + la %r2,8(%r2) + brctg %r4,.Loop + lghi %r4,7 + ngr %r3,%r4 + jnz .Little + br %r14 +.size OPENSSL_cleanse,.-OPENSSL_cleanse + +.globl CRYPTO_memcmp +.type CRYPTO_memcmp,@function +.align 16 +CRYPTO_memcmp: +#if !defined(__s390x__) && !defined(__s390x) + llgfr %r4,%r4 +#endif + lghi %r5,0 + clgr %r4,%r5 + je .Lno_data + +.Loop_cmp: + llgc %r0,0(%r2) + la %r2,1(%r2) + llgc %r1,0(%r3) + la %r3,1(%r3) + xr %r1,%r0 + or %r5,%r1 + brctg %r4,.Loop_cmp + + lnr %r5,%r5 + srl %r5,31 +.Lno_data: + lgr %r2,%r5 + br %r14 +.size CRYPTO_memcmp,.-CRYPTO_memcmp + +.globl OPENSSL_instrument_bus +.type OPENSSL_instrument_bus,@function +.align 16 +OPENSSL_instrument_bus: + lghi %r2,0 + br %r14 +.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus + +.globl OPENSSL_instrument_bus2 +.type OPENSSL_instrument_bus2,@function +.align 16 +OPENSSL_instrument_bus2: + lghi %r2,0 + br %r14 +.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2 + +.globl OPENSSL_vx_probe +.type OPENSSL_vx_probe,@function +.align 16 +OPENSSL_vx_probe: + .word 0xe700,0x0000,0x0044 # vzero %v0 + br %r14 +.size OPENSSL_vx_probe,.-OPENSSL_vx_probe +.globl s390x_kimd +.type s390x_kimd,@function +.align 16 +s390x_kimd: + llgfr %r0,%r4 + lgr %r1,%r5 + + .long 0xb93e0002 # kimd %r0,%r2 + brc 1,.-4 # pay attention to "partial completion" + + br %r14 +.size s390x_kimd,.-s390x_kimd +.globl s390x_klmd +.type s390x_klmd,@function +.align 32 +s390x_klmd: + llgfr %r0,%r6 + lg %r1,160(%r15) + + .long 0xb93f0042 # klmd %r4,%r2 + brc 1,.-4 # pay attention to "partial completion" + + br %r14 +.size s390x_klmd,.-s390x_klmd +.globl s390x_km +.type s390x_km,@function +.align 16 +s390x_km: + lr %r0,%r5 + lgr %r1,%r6 + + .long 0xb92e0042 # km %r4,%r2 + brc 1,.-4 # pay attention to "partial completion" + + br %r14 +.size s390x_km,.-s390x_km +.globl s390x_kmac +.type s390x_kmac,@function +.align 16 +s390x_kmac: + lr %r0,%r4 + lgr %r1,%r5 + + .long 0xb91e0002 # kmac %r0,%r2 + brc 1,.-4 # pay attention to "partial completion" + + br %r14 +.size s390x_kmac,.-s390x_kmac +.globl s390x_kmo +.type s390x_kmo,@function +.align 16 +s390x_kmo: + lr %r0,%r5 + lgr %r1,%r6 + + .long 0xb92b0042 # kmo %r4,%r2 + brc 1,.-4 # pay attention to "partial completion" + + br %r14 +.size s390x_kmo,.-s390x_kmo +.globl s390x_kmf +.type s390x_kmf,@function +.align 16 +s390x_kmf: + lr %r0,%r5 + lgr %r1,%r6 + + .long 0xb92a0042 # kmf %r4,%r2 + brc 1,.-4 # pay attention to "partial completion" + + br %r14 +.size s390x_kmf,.-s390x_kmf +.globl s390x_kma +.type s390x_kma,@function +.align 16 +s390x_kma: + stg %r6,6*8(%r15) + lmg %r0,%r1,160(%r15) + + .long 0xb9292064 # kma %r6,%r2,%r4 + brc 1,.-4 # pay attention to "partial completion" + + lg %r6,6*8(%r15) + br %r14 +.size s390x_kma,.-s390x_kma +.section .init + brasl %r14,OPENSSL_cpuid_setup diff --git a/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/sha/keccak1600-s390x.S b/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/sha/keccak1600-s390x.S new file mode 100644 index 0000000000..1a74fdb20e --- /dev/null +++ b/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/sha/keccak1600-s390x.S @@ -0,0 +1,464 @@ +.text + +.type __KeccakF1600,@function +.align 32 +__KeccakF1600: + stg %r14,8*14(%r15) + lg %r0,160(%r2) + lg %r1,168(%r2) + lg %r5,176(%r2) + lg %r6,184(%r2) + lg %r7,192(%r2) + larl %r4,iotas + j .Loop + +.align 16 +.Loop: + lg %r8,0(%r2) + lg %r9,48(%r2) + lg %r10,96(%r2) + lg %r11,144(%r2) + + xgr %r0,%r8 + xg %r1,8(%r2) + xg %r5,16(%r2) + xg %r6,24(%r2) + lgr %r12,%r7 + xg %r7,32(%r2) + + xg %r0,40(%r2) + xgr %r1,%r9 + xg %r5,56(%r2) + xg %r6,64(%r2) + xg %r7,72(%r2) + + xg %r0,80(%r2) + xg %r1,88(%r2) + xgr %r5,%r10 + xg %r6,104(%r2) + xg %r7,112(%r2) + + xg %r0,120(%r2) + xg %r1,128(%r2) + xg %r5,136(%r2) + xgr %r6,%r11 + xg %r7,152(%r2) + + lgr %r13,%r5 + rllg %r5,%r5,1 + xgr %r5,%r0 # D[1] = ROL64(C[2], 1) ^ C[0] + + rllg %r0,%r0,1 + xgr %r0,%r6 # D[4] = ROL64(C[0], 1) ^ C[3] + + rllg %r6,%r6,1 + xgr %r6,%r1 # D[2] = ROL64(C[3], 1) ^ C[1] + + rllg %r1,%r1,1 + xgr %r1,%r7 # D[0] = ROL64(C[1], 1) ^ C[4] + + rllg %r7,%r7,1 + xgr %r7,%r13 # D[3] = ROL64(C[4], 1) ^ C[2] + xgr %r9,%r5 + xgr %r10,%r6 + xgr %r11,%r7 + rllg %r9,%r9,44 + xgr %r12,%r0 + rllg %r10,%r10,43 + xgr %r8,%r1 + + lgr %r13,%r9 + ogr %r9,%r10 + rllg %r11,%r11,21 + xgr %r9,%r8 # C[0] ^ ( C[1] | C[2]) + rllg %r12,%r12,14 + xg %r9,0(%r4) + la %r4,8(%r4) + stg %r9,0(%r3) # R[0][0] = C[0] ^ ( C[1] | C[2]) ^ iotas[i] + + lgr %r14,%r12 + ngr %r12,%r11 + lghi %r9,-1 # no 'not' instruction :-( + xgr %r12,%r10 # C[2] ^ ( C[4] & C[3]) + xgr %r10,%r9 # not %r10 + stg %r12,16(%r3) # R[0][2] = C[2] ^ ( C[4] & C[3]) + ogr %r10,%r11 + xgr %r10,%r13 # C[1] ^ (~C[2] | C[3]) + + ngr %r13,%r8 + stg %r10,8(%r3) # R[0][1] = C[1] ^ (~C[2] | C[3]) + xgr %r13,%r14 # C[4] ^ ( C[1] & C[0]) + ogr %r14,%r8 + stg %r13,32(%r3) # R[0][4] = C[4] ^ ( C[1] & C[0]) + xgr %r14,%r11 # C[3] ^ ( C[4] | C[0]) + stg %r14,24(%r3) # R[0][3] = C[3] ^ ( C[4] | C[0]) + + + lg %r8,24(%r2) + lg %r12,176(%r2) + lg %r11,128(%r2) + lg %r9,72(%r2) + lg %r10,80(%r2) + + xgr %r8,%r7 + xgr %r12,%r6 + rllg %r8,%r8,28 + xgr %r11,%r5 + rllg %r12,%r12,61 + xgr %r9,%r0 + rllg %r11,%r11,45 + xgr %r10,%r1 + + lgr %r13,%r8 + ogr %r8,%r12 + rllg %r9,%r9,20 + xgr %r8,%r11 # C[3] ^ (C[0] | C[4]) + rllg %r10,%r10,3 + stg %r8,64(%r3) # R[1][3] = C[3] ^ (C[0] | C[4]) + + lgr %r14,%r9 + ngr %r9,%r13 + lghi %r8,-1 # no 'not' instruction :-( + xgr %r9,%r12 # C[4] ^ (C[1] & C[0]) + xgr %r12,%r8 # not %r12 + stg %r9,72(%r3) # R[1][4] = C[4] ^ (C[1] & C[0]) + + ogr %r12,%r11 + xgr %r12,%r10 # C[2] ^ (~C[4] | C[3]) + + ngr %r11,%r10 + stg %r12,56(%r3) # R[1][2] = C[2] ^ (~C[4] | C[3]) + xgr %r11,%r14 # C[1] ^ (C[3] & C[2]) + ogr %r14,%r10 + stg %r11,48(%r3) # R[1][1] = C[1] ^ (C[3] & C[2]) + xgr %r14,%r13 # C[0] ^ (C[1] | C[2]) + stg %r14,40(%r3) # R[1][0] = C[0] ^ (C[1] | C[2]) + + + lg %r10,104(%r2) + lg %r11,152(%r2) + lg %r9,56(%r2) + lg %r12,160(%r2) + lg %r8,8(%r2) + + xgr %r10,%r7 + xgr %r11,%r0 + rllg %r10,%r10,25 + xgr %r9,%r6 + rllg %r11,%r11,8 + xgr %r12,%r1 + rllg %r9,%r9,6 + xgr %r8,%r5 + + lgr %r13,%r10 + ngr %r10,%r11 + rllg %r12,%r12,18 + xgr %r10,%r9 # C[1] ^ ( C[2] & C[3]) + lghi %r14,-1 # no 'not' instruction :-( + stg %r10,88(%r3) # R[2][1] = C[1] ^ ( C[2] & C[3]) + + xgr %r11,%r14 # not %r11 + lgr %r14,%r12 + ngr %r12,%r11 + rllg %r8,%r8,1 + xgr %r12,%r13 # C[2] ^ ( C[4] & ~C[3]) + ogr %r13,%r9 + stg %r12,96(%r3) # R[2][2] = C[2] ^ ( C[4] & ~C[3]) + xgr %r13,%r8 # C[0] ^ ( C[2] | C[1]) + + ngr %r9,%r8 + stg %r13,80(%r3) # R[2][0] = C[0] ^ ( C[2] | C[1]) + xgr %r9,%r14 # C[4] ^ ( C[1] & C[0]) + ogr %r8,%r14 + stg %r9,112(%r3) # R[2][4] = C[4] ^ ( C[1] & C[0]) + xgr %r8,%r11 # ~C[3] ^ ( C[0] | C[4]) + stg %r8,104(%r3) # R[2][3] = ~C[3] ^ ( C[0] | C[4]) + + + lg %r10,88(%r2) + lg %r11,136(%r2) + lg %r9,40(%r2) + lg %r12,184(%r2) + lg %r8,32(%r2) + + xgr %r10,%r5 + xgr %r11,%r6 + rllg %r10,%r10,10 + xgr %r9,%r1 + rllg %r11,%r11,15 + xgr %r12,%r7 + rllg %r9,%r9,36 + xgr %r8,%r0 + rllg %r12,%r12,56 + + lgr %r13,%r10 + ogr %r10,%r11 + lghi %r14,-1 # no 'not' instruction :-( + xgr %r10,%r9 # C[1] ^ ( C[2] | C[3]) + xgr %r11,%r14 # not %r11 + stg %r10,128(%r3) # R[3][1] = C[1] ^ ( C[2] | C[3]) + + lgr %r14,%r12 + ogr %r12,%r11 + rllg %r8,%r8,27 + xgr %r12,%r13 # C[2] ^ ( C[4] | ~C[3]) + ngr %r13,%r9 + stg %r12,136(%r3) # R[3][2] = C[2] ^ ( C[4] | ~C[3]) + xgr %r13,%r8 # C[0] ^ ( C[2] & C[1]) + + ogr %r9,%r8 + stg %r13,120(%r3) # R[3][0] = C[0] ^ ( C[2] & C[1]) + xgr %r9,%r14 # C[4] ^ ( C[1] | C[0]) + ngr %r8,%r14 + stg %r9,152(%r3) # R[3][4] = C[4] ^ ( C[1] | C[0]) + xgr %r8,%r11 # ~C[3] ^ ( C[0] & C[4]) + stg %r8,144(%r3) # R[3][3] = ~C[3] ^ ( C[0] & C[4]) + + + xg %r6,16(%r2) + xg %r7,64(%r2) + xg %r5,168(%r2) + xg %r0,112(%r2) + xgr %r3,%r2 # xchg %r3,%r2 + rllg %r6,%r6,62 + xg %r1,120(%r2) + rllg %r7,%r7,55 + xgr %r2,%r3 + rllg %r5,%r5,2 + xgr %r3,%r2 + rllg %r0,%r0,39 + lgr %r13,%r6 + ngr %r6,%r7 + lghi %r14,-1 # no 'not' instruction :-( + xgr %r6,%r5 # C[4] ^ ( C[0] & C[1]) + xgr %r7,%r14 # not %r7 + stg %r6,192(%r2) # R[4][4] = C[4] ^ ( C[0] & C[1]) + + lgr %r14,%r0 + ngr %r0,%r7 + rllg %r1,%r1,41 + xgr %r0,%r13 # C[0] ^ ( C[2] & ~C[1]) + ogr %r13,%r5 + stg %r0,160(%r2) # R[4][0] = C[0] ^ ( C[2] & ~C[1]) + xgr %r13,%r1 # C[3] ^ ( C[0] | C[4]) + + ngr %r5,%r1 + stg %r13,184(%r2) # R[4][3] = C[3] ^ ( C[0] | C[4]) + xgr %r5,%r14 # C[2] ^ ( C[4] & C[3]) + ogr %r1,%r14 + stg %r5,176(%r2) # R[4][2] = C[2] ^ ( C[4] & C[3]) + xgr %r1,%r7 # ~C[1] ^ ( C[2] | C[3]) + + lgr %r7,%r6 # harmonize with the loop top + lgr %r6,%r13 + stg %r1,168(%r2) # R[4][1] = ~C[1] ^ ( C[2] | C[3]) + + tmll %r4,255 + jnz .Loop + + lg %r14,8*14(%r15) + br %r14 +.size __KeccakF1600,.-__KeccakF1600 +.type KeccakF1600,@function +.align 32 +KeccakF1600: +.LKeccakF1600: + lghi %r1,-360 + stmg %r6,%r15,8*6(%r15) + lgr %r0,%r15 + la %r15,0(%r1,%r15) + stg %r0,0(%r15) + + lghi %r8,-1 # no 'not' instruction :-( + lghi %r9,-1 + lghi %r10,-1 + lghi %r11,-1 + lghi %r12,-1 + lghi %r13,-1 + xg %r8,8(%r2) + xg %r9,16(%r2) + xg %r10,64(%r2) + xg %r11,96(%r2) + xg %r12,136(%r2) + xg %r13,160(%r2) + stmg %r8,%r9,8(%r2) + stg %r10,64(%r2) + stg %r11,96(%r2) + stg %r12,136(%r2) + stg %r13,160(%r2) + + la %r3,160(%r15) + + bras %r14,__KeccakF1600 + + lghi %r8,-1 # no 'not' instruction :-( + lghi %r9,-1 + lghi %r10,-1 + lghi %r11,-1 + lghi %r12,-1 + lghi %r13,-1 + xg %r8,8(%r2) + xg %r9,16(%r2) + xg %r10,64(%r2) + xg %r11,96(%r2) + xg %r12,136(%r2) + xg %r13,160(%r2) + stmg %r8,%r9,8(%r2) + stg %r10,64(%r2) + stg %r11,96(%r2) + stg %r12,136(%r2) + stg %r13,160(%r2) + + lmg %r6,%r15,360+6*8(%r15) + br %r14 +.size KeccakF1600,.-KeccakF1600 +.globl SHA3_absorb +.type SHA3_absorb,@function +.align 32 +SHA3_absorb: + lghi %r1,-360 + stmg %r5,%r15,8*5(%r15) + lgr %r0,%r15 + la %r15,0(%r1,%r15) + stg %r0,0(%r15) + + lghi %r8,-1 # no 'not' instruction :-( + lghi %r9,-1 + lghi %r10,-1 + lghi %r11,-1 + lghi %r12,-1 + lghi %r13,-1 + xg %r8,8(%r2) + xg %r9,16(%r2) + xg %r10,64(%r2) + xg %r11,96(%r2) + xg %r12,136(%r2) + xg %r13,160(%r2) + stmg %r8,%r9,8(%r2) + stg %r10,64(%r2) + stg %r11,96(%r2) + stg %r12,136(%r2) + stg %r13,160(%r2) + +.Loop_absorb: + clgr %r4,%r5 + jl .Ldone_absorb + + srlg %r5,%r5,3 + la %r1,0(%r2) + +.Lblock_absorb: + lrvg %r0,0(%r3) + la %r3,8(%r3) + xg %r0,0(%r1) + aghi %r4,-8 + stg %r0,0(%r1) + la %r1,8(%r1) + brct %r5,.Lblock_absorb + + stmg %r3,%r4,360+3*8(%r15) + la %r3,160(%r15) + bras %r14,__KeccakF1600 + lmg %r3,%r5,360+3*8(%r15) + j .Loop_absorb + +.align 16 +.Ldone_absorb: + lghi %r8,-1 # no 'not' instruction :-( + lghi %r9,-1 + lghi %r10,-1 + lghi %r11,-1 + lghi %r12,-1 + lghi %r13,-1 + xg %r8,8(%r2) + xg %r9,16(%r2) + xg %r10,64(%r2) + xg %r11,96(%r2) + xg %r12,136(%r2) + xg %r13,160(%r2) + stmg %r8,%r9,8(%r2) + stg %r10,64(%r2) + stg %r11,96(%r2) + stg %r12,136(%r2) + stg %r13,160(%r2) + + lgr %r2,%r4 # return value + + lmg %r6,%r15,360+6*8(%r15) + br %r14 +.size SHA3_absorb,.-SHA3_absorb +.globl SHA3_squeeze +.type SHA3_squeeze,@function +.align 32 +SHA3_squeeze: + srlg %r5,%r5,3 + stg %r14,2*8(%r15) + lghi %r14,8 + stg %r5,5*8(%r15) + la %r1,0(%r2) + + j .Loop_squeeze + +.align 16 +.Loop_squeeze: + clgr %r4,%r14 + jl .Ltail_squeeze + + lrvg %r0,0(%r1) + la %r1,8(%r1) + stg %r0,0(%r3) + la %r3,8(%r3) + aghi %r4,-8 # len -= 8 + jz .Ldone_squeeze + + brct %r5,.Loop_squeeze # bsz-- + + stmg %r3,%r4,3*8(%r15) + bras %r14,.LKeccakF1600 + lmg %r3,%r5,3*8(%r15) + lghi %r14,8 + la %r1,0(%r2) + j .Loop_squeeze + +.Ltail_squeeze: + lg %r0,0(%r1) +.Loop_tail_squeeze: + stc %r0,0(%r3) + la %r3,1(%r3) + srlg %r0,%r0,8 + brct %r4,.Loop_tail_squeeze + +.Ldone_squeeze: + lg %r14,2*8(%r15) + br %r14 +.size SHA3_squeeze,.-SHA3_squeeze +.align 256 + .quad 0,0,0,0,0,0,0,0 +.type iotas,@object +iotas: + .quad 0x0000000000000001 + .quad 0x0000000000008082 + .quad 0x800000000000808a + .quad 0x8000000080008000 + .quad 0x000000000000808b + .quad 0x0000000080000001 + .quad 0x8000000080008081 + .quad 0x8000000000008009 + .quad 0x000000000000008a + .quad 0x0000000000000088 + .quad 0x0000000080008009 + .quad 0x000000008000000a + .quad 0x000000008000808b + .quad 0x800000000000008b + .quad 0x8000000000008089 + .quad 0x8000000000008003 + .quad 0x8000000000008002 + .quad 0x8000000000000080 + .quad 0x000000000000800a + .quad 0x800000008000000a + .quad 0x8000000080008081 + .quad 0x8000000000008080 + .quad 0x0000000080000001 + .quad 0x8000000080008008 +.size iotas,.-iotas +.asciz "Keccak-1600 absorb and squeeze for s390x, CRYPTOGAMS by <appro@openssl.org>" diff --git a/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/sha/sha1-s390x.S b/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/sha/sha1-s390x.S new file mode 100644 index 0000000000..37413bfb08 --- /dev/null +++ b/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/sha/sha1-s390x.S @@ -0,0 +1,1207 @@ +#include "s390x_arch.h" + +.text +.align 64 +.type Ktable,@object +Ktable: .long 0x5a827999,0x6ed9eba1,0x8f1bbcdc,0xca62c1d6 + .skip 48 #.long 0,0,0,0,0,0,0,0,0,0,0,0 +.size Ktable,.-Ktable +.globl sha1_block_data_order +.type sha1_block_data_order,@function +sha1_block_data_order: + larl %r1,OPENSSL_s390xcap_P + lg %r0,S390X_KIMD(%r1) # check kimd capabilities + tmhh %r0,16384 + jz .Lsoftware + lghi %r0,1 + lgr %r1,%r2 + lgr %r2,%r3 + sllg %r3,%r4,6 + .long 0xb93e0002 # kimd %r0,%r2 + brc 1,.-4 # pay attention to "partial completion" + br %r14 +.align 16 +.Lsoftware: + lghi %r1,-224 + stg %r2,16(%r15) + stmg %r6,%r15,48(%r15) + lgr %r0,%r15 + la %r15,0(%r1,%r15) + stg %r0,0(%r15) + + larl %r10,Ktable + llgf %r5,0(%r2) + llgf %r6,4(%r2) + llgf %r7,8(%r2) + llgf %r8,12(%r2) + llgf %r9,16(%r2) + + lg %r0,0(%r10) + lg %r1,8(%r10) + +.Lloop: + rllg %r0,%r0,32 + lg %r12,0(%r3) ### Xload(0) + rllg %r13,%r12,32 + stg %r12,160(%r15) + alr %r9,%r0 ### 0 + rll %r11,%r5,5 + lr %r10,%r8 + xr %r10,%r7 + alr %r9,%r11 + nr %r10,%r6 + alr %r9,%r13 + xr %r10,%r8 + rll %r6,%r6,30 + alr %r9,%r10 + alr %r8,%r0 ### 1 + rll %r11,%r9,5 + lr %r10,%r7 + xr %r10,%r6 + alr %r8,%r11 + nr %r10,%r5 + alr %r8,%r12 + xr %r10,%r7 + rll %r5,%r5,30 + alr %r8,%r10 + lg %r14,8(%r3) ### Xload(2) + rllg %r12,%r14,32 + stg %r14,168(%r15) + alr %r7,%r0 ### 2 + rll %r11,%r8,5 + lr %r10,%r6 + xr %r10,%r5 + alr %r7,%r11 + nr %r10,%r9 + alr %r7,%r12 + xr %r10,%r6 + rll %r9,%r9,30 + alr %r7,%r10 + alr %r6,%r0 ### 3 + rll %r11,%r7,5 + lr %r10,%r5 + xr %r10,%r9 + alr %r6,%r11 + nr %r10,%r8 + alr %r6,%r14 + xr %r10,%r5 + rll %r8,%r8,30 + alr %r6,%r10 + lg %r13,16(%r3) ### Xload(4) + rllg %r14,%r13,32 + stg %r13,176(%r15) + alr %r5,%r0 ### 4 + rll %r11,%r6,5 + lr %r10,%r9 + xr %r10,%r8 + alr %r5,%r11 + nr %r10,%r7 + alr %r5,%r14 + xr %r10,%r9 + rll %r7,%r7,30 + alr %r5,%r10 + alr %r9,%r0 ### 5 + rll %r11,%r5,5 + lr %r10,%r8 + xr %r10,%r7 + alr %r9,%r11 + nr %r10,%r6 + alr %r9,%r13 + xr %r10,%r8 + rll %r6,%r6,30 + alr %r9,%r10 + lg %r12,24(%r3) ### Xload(6) + rllg %r13,%r12,32 + stg %r12,184(%r15) + alr %r8,%r0 ### 6 + rll %r11,%r9,5 + lr %r10,%r7 + xr %r10,%r6 + alr %r8,%r11 + nr %r10,%r5 + alr %r8,%r13 + xr %r10,%r7 + rll %r5,%r5,30 + alr %r8,%r10 + alr %r7,%r0 ### 7 + rll %r11,%r8,5 + lr %r10,%r6 + xr %r10,%r5 + alr %r7,%r11 + nr %r10,%r9 + alr %r7,%r12 + xr %r10,%r6 + rll %r9,%r9,30 + alr %r7,%r10 + lg %r14,32(%r3) ### Xload(8) + rllg %r12,%r14,32 + stg %r14,192(%r15) + alr %r6,%r0 ### 8 + rll %r11,%r7,5 + lr %r10,%r5 + xr %r10,%r9 + alr %r6,%r11 + nr %r10,%r8 + alr %r6,%r12 + xr %r10,%r5 + rll %r8,%r8,30 + alr %r6,%r10 + alr %r5,%r0 ### 9 + rll %r11,%r6,5 + lr %r10,%r9 + xr %r10,%r8 + alr %r5,%r11 + nr %r10,%r7 + alr %r5,%r14 + xr %r10,%r9 + rll %r7,%r7,30 + alr %r5,%r10 + lg %r13,40(%r3) ### Xload(10) + rllg %r14,%r13,32 + stg %r13,200(%r15) + alr %r9,%r0 ### 10 + rll %r11,%r5,5 + lr %r10,%r8 + xr %r10,%r7 + alr %r9,%r11 + nr %r10,%r6 + alr %r9,%r14 + xr %r10,%r8 + rll %r6,%r6,30 + alr %r9,%r10 + alr %r8,%r0 ### 11 + rll %r11,%r9,5 + lr %r10,%r7 + xr %r10,%r6 + alr %r8,%r11 + nr %r10,%r5 + alr %r8,%r13 + xr %r10,%r7 + rll %r5,%r5,30 + alr %r8,%r10 + lg %r12,48(%r3) ### Xload(12) + rllg %r13,%r12,32 + stg %r12,208(%r15) + alr %r7,%r0 ### 12 + rll %r11,%r8,5 + lr %r10,%r6 + xr %r10,%r5 + alr %r7,%r11 + nr %r10,%r9 + alr %r7,%r13 + xr %r10,%r6 + rll %r9,%r9,30 + alr %r7,%r10 + alr %r6,%r0 ### 13 + rll %r11,%r7,5 + lr %r10,%r5 + xr %r10,%r9 + alr %r6,%r11 + nr %r10,%r8 + alr %r6,%r12 + xr %r10,%r5 + rll %r8,%r8,30 + alr %r6,%r10 + lg %r14,56(%r3) ### Xload(14) + rllg %r12,%r14,32 + stg %r14,216(%r15) + alr %r5,%r0 ### 14 + rll %r11,%r6,5 + lr %r10,%r9 + xr %r10,%r8 + alr %r5,%r11 + nr %r10,%r7 + alr %r5,%r12 + xr %r10,%r9 + rll %r7,%r7,30 + alr %r5,%r10 + lg %r2,160(%r15) ### Xupdate(16) warm-up + lr %r13,%r12 + alr %r9,%r0 ### 15 + rll %r11,%r5,5 + lr %r10,%r8 + xr %r10,%r7 + alr %r9,%r11 + nr %r10,%r6 + alr %r9,%r14 + xr %r10,%r8 + rll %r6,%r6,30 + alr %r9,%r10 + xgr %r13,%r2 ### Xupdate(16) + lg %r2,168(%r15) + xg %r13,192(%r15) + xgr %r13,%r2 + rll %r13,%r13,1 + rllg %r14,%r13,32 + rll %r14,%r14,1 + rllg %r13,%r14,32 + lr %r12,%r14 # feedback + stg %r13,160(%r15) + alr %r8,%r0 ### 16 + rll %r11,%r9,5 + lr %r10,%r7 + xr %r10,%r6 + alr %r8,%r11 + nr %r10,%r5 + alr %r8,%r14 + xr %r10,%r7 + rll %r5,%r5,30 + alr %r8,%r10 + alr %r7,%r0 ### 17 + rll %r11,%r8,5 + lr %r10,%r6 + xr %r10,%r5 + alr %r7,%r11 + nr %r10,%r9 + alr %r7,%r13 + xr %r10,%r6 + rll %r9,%r9,30 + alr %r7,%r10 + xgr %r12,%r2 ### Xupdate(18) + lg %r2,176(%r15) + xg %r12,200(%r15) + xgr %r12,%r2 + rll %r12,%r12,1 + rllg %r13,%r12,32 + rll %r13,%r13,1 + rllg %r12,%r13,32 + lr %r14,%r13 # feedback + stg %r12,168(%r15) + alr %r6,%r0 ### 18 + rll %r11,%r7,5 + lr %r10,%r5 + xr %r10,%r9 + alr %r6,%r11 + nr %r10,%r8 + alr %r6,%r13 + xr %r10,%r5 + rll %r8,%r8,30 + alr %r6,%r10 + alr %r5,%r0 ### 19 + rll %r11,%r6,5 + lr %r10,%r9 + xr %r10,%r8 + alr %r5,%r11 + nr %r10,%r7 + alr %r5,%r12 + xr %r10,%r9 + rll %r7,%r7,30 + alr %r5,%r10 + rllg %r0,%r0,32 + xgr %r14,%r2 ### Xupdate(20) + lg %r2,184(%r15) + xg %r14,208(%r15) + xgr %r14,%r2 + rll %r14,%r14,1 + rllg %r12,%r14,32 + rll %r12,%r12,1 + rllg %r14,%r12,32 + lr %r13,%r12 # feedback + stg %r14,176(%r15) + alr %r9,%r0 ### 20 + rll %r11,%r5,5 + lr %r10,%r6 + alr %r9,%r11 + xr %r10,%r7 + alr %r9,%r12 + xr %r10,%r8 + rll %r6,%r6,30 + alr %r9,%r10 + alr %r8,%r0 ### 21 + rll %r11,%r9,5 + lr %r10,%r5 + alr %r8,%r11 + xr %r10,%r6 + alr %r8,%r14 + xr %r10,%r7 + rll %r5,%r5,30 + alr %r8,%r10 + xgr %r13,%r2 ### Xupdate(22) + lg %r2,192(%r15) + xg %r13,216(%r15) + xgr %r13,%r2 + rll %r13,%r13,1 + rllg %r14,%r13,32 + rll %r14,%r14,1 + rllg %r13,%r14,32 + lr %r12,%r14 # feedback + stg %r13,184(%r15) + alr %r7,%r0 ### 22 + rll %r11,%r8,5 + lr %r10,%r9 + alr %r7,%r11 + xr %r10,%r5 + alr %r7,%r14 + xr %r10,%r6 + rll %r9,%r9,30 + alr %r7,%r10 + alr %r6,%r0 ### 23 + rll %r11,%r7,5 + lr %r10,%r8 + alr %r6,%r11 + xr %r10,%r9 + alr %r6,%r13 + xr %r10,%r5 + rll %r8,%r8,30 + alr %r6,%r10 + xgr %r12,%r2 ### Xupdate(24) + lg %r2,200(%r15) + xg %r12,160(%r15) + xgr %r12,%r2 + rll %r12,%r12,1 + rllg %r13,%r12,32 + rll %r13,%r13,1 + rllg %r12,%r13,32 + lr %r14,%r13 # feedback + stg %r12,192(%r15) + alr %r5,%r0 ### 24 + rll %r11,%r6,5 + lr %r10,%r7 + alr %r5,%r11 + xr %r10,%r8 + alr %r5,%r13 + xr %r10,%r9 + rll %r7,%r7,30 + alr %r5,%r10 + alr %r9,%r0 ### 25 + rll %r11,%r5,5 + lr %r10,%r6 + alr %r9,%r11 + xr %r10,%r7 + alr %r9,%r12 + xr %r10,%r8 + rll %r6,%r6,30 + alr %r9,%r10 + xgr %r14,%r2 ### Xupdate(26) + lg %r2,208(%r15) + xg %r14,168(%r15) + xgr %r14,%r2 + rll %r14,%r14,1 + rllg %r12,%r14,32 + rll %r12,%r12,1 + rllg %r14,%r12,32 + lr %r13,%r12 # feedback + stg %r14,200(%r15) + alr %r8,%r0 ### 26 + rll %r11,%r9,5 + lr %r10,%r5 + alr %r8,%r11 + xr %r10,%r6 + alr %r8,%r12 + xr %r10,%r7 + rll %r5,%r5,30 + alr %r8,%r10 + alr %r7,%r0 ### 27 + rll %r11,%r8,5 + lr %r10,%r9 + alr %r7,%r11 + xr %r10,%r5 + alr %r7,%r14 + xr %r10,%r6 + rll %r9,%r9,30 + alr %r7,%r10 + xgr %r13,%r2 ### Xupdate(28) + lg %r2,216(%r15) + xg %r13,176(%r15) + xgr %r13,%r2 + rll %r13,%r13,1 + rllg %r14,%r13,32 + rll %r14,%r14,1 + rllg %r13,%r14,32 + lr %r12,%r14 # feedback + stg %r13,208(%r15) + alr %r6,%r0 ### 28 + rll %r11,%r7,5 + lr %r10,%r8 + alr %r6,%r11 + xr %r10,%r9 + alr %r6,%r14 + xr %r10,%r5 + rll %r8,%r8,30 + alr %r6,%r10 + alr %r5,%r0 ### 29 + rll %r11,%r6,5 + lr %r10,%r7 + alr %r5,%r11 + xr %r10,%r8 + alr %r5,%r13 + xr %r10,%r9 + rll %r7,%r7,30 + alr %r5,%r10 + xgr %r12,%r2 ### Xupdate(30) + lg %r2,160(%r15) + xg %r12,184(%r15) + xgr %r12,%r2 + rll %r12,%r12,1 + rllg %r13,%r12,32 + rll %r13,%r13,1 + rllg %r12,%r13,32 + lr %r14,%r13 # feedback + stg %r12,216(%r15) + alr %r9,%r0 ### 30 + rll %r11,%r5,5 + lr %r10,%r6 + alr %r9,%r11 + xr %r10,%r7 + alr %r9,%r13 + xr %r10,%r8 + rll %r6,%r6,30 + alr %r9,%r10 + alr %r8,%r0 ### 31 + rll %r11,%r9,5 + lr %r10,%r5 + alr %r8,%r11 + xr %r10,%r6 + alr %r8,%r12 + xr %r10,%r7 + rll %r5,%r5,30 + alr %r8,%r10 + xgr %r14,%r2 ### Xupdate(32) + lg %r2,168(%r15) + xg %r14,192(%r15) + xgr %r14,%r2 + rll %r14,%r14,1 + rllg %r12,%r14,32 + rll %r12,%r12,1 + rllg %r14,%r12,32 + lr %r13,%r12 # feedback + stg %r14,160(%r15) + alr %r7,%r0 ### 32 + rll %r11,%r8,5 + lr %r10,%r9 + alr %r7,%r11 + xr %r10,%r5 + alr %r7,%r12 + xr %r10,%r6 + rll %r9,%r9,30 + alr %r7,%r10 + alr %r6,%r0 ### 33 + rll %r11,%r7,5 + lr %r10,%r8 + alr %r6,%r11 + xr %r10,%r9 + alr %r6,%r14 + xr %r10,%r5 + rll %r8,%r8,30 + alr %r6,%r10 + xgr %r13,%r2 ### Xupdate(34) + lg %r2,176(%r15) + xg %r13,200(%r15) + xgr %r13,%r2 + rll %r13,%r13,1 + rllg %r14,%r13,32 + rll %r14,%r14,1 + rllg %r13,%r14,32 + lr %r12,%r14 # feedback + stg %r13,168(%r15) + alr %r5,%r0 ### 34 + rll %r11,%r6,5 + lr %r10,%r7 + alr %r5,%r11 + xr %r10,%r8 + alr %r5,%r14 + xr %r10,%r9 + rll %r7,%r7,30 + alr %r5,%r10 + alr %r9,%r0 ### 35 + rll %r11,%r5,5 + lr %r10,%r6 + alr %r9,%r11 + xr %r10,%r7 + alr %r9,%r13 + xr %r10,%r8 + rll %r6,%r6,30 + alr %r9,%r10 + xgr %r12,%r2 ### Xupdate(36) + lg %r2,184(%r15) + xg %r12,208(%r15) + xgr %r12,%r2 + rll %r12,%r12,1 + rllg %r13,%r12,32 + rll %r13,%r13,1 + rllg %r12,%r13,32 + lr %r14,%r13 # feedback + stg %r12,176(%r15) + alr %r8,%r0 ### 36 + rll %r11,%r9,5 + lr %r10,%r5 + alr %r8,%r11 + xr %r10,%r6 + alr %r8,%r13 + xr %r10,%r7 + rll %r5,%r5,30 + alr %r8,%r10 + alr %r7,%r0 ### 37 + rll %r11,%r8,5 + lr %r10,%r9 + alr %r7,%r11 + xr %r10,%r5 + alr %r7,%r12 + xr %r10,%r6 + rll %r9,%r9,30 + alr %r7,%r10 + xgr %r14,%r2 ### Xupdate(38) + lg %r2,192(%r15) + xg %r14,216(%r15) + xgr %r14,%r2 + rll %r14,%r14,1 + rllg %r12,%r14,32 + rll %r12,%r12,1 + rllg %r14,%r12,32 + lr %r13,%r12 # feedback + stg %r14,184(%r15) + alr %r6,%r0 ### 38 + rll %r11,%r7,5 + lr %r10,%r8 + alr %r6,%r11 + xr %r10,%r9 + alr %r6,%r12 + xr %r10,%r5 + rll %r8,%r8,30 + alr %r6,%r10 + alr %r5,%r0 ### 39 + rll %r11,%r6,5 + lr %r10,%r7 + alr %r5,%r11 + xr %r10,%r8 + alr %r5,%r14 + xr %r10,%r9 + rll %r7,%r7,30 + alr %r5,%r10 + rllg %r1,%r1,32 + xgr %r13,%r2 ### Xupdate(40) + lg %r2,200(%r15) + xg %r13,160(%r15) + xgr %r13,%r2 + rll %r13,%r13,1 + rllg %r14,%r13,32 + rll %r14,%r14,1 + rllg %r13,%r14,32 + lr %r12,%r14 # feedback + stg %r13,192(%r15) + alr %r9,%r1 ### 40 + rll %r11,%r5,5 + lr %r10,%r6 + alr %r9,%r11 + or %r10,%r7 + lr %r11,%r6 + nr %r10,%r8 + nr %r11,%r7 + alr %r9,%r14 + or %r10,%r11 + rll %r6,%r6,30 + alr %r9,%r10 + alr %r8,%r1 ### 41 + rll %r11,%r9,5 + lr %r10,%r5 + alr %r8,%r11 + or %r10,%r6 + lr %r11,%r5 + nr %r10,%r7 + nr %r11,%r6 + alr %r8,%r13 + or %r10,%r11 + rll %r5,%r5,30 + alr %r8,%r10 + xgr %r12,%r2 ### Xupdate(42) + lg %r2,208(%r15) + xg %r12,168(%r15) + xgr %r12,%r2 + rll %r12,%r12,1 + rllg %r13,%r12,32 + rll %r13,%r13,1 + rllg %r12,%r13,32 + lr %r14,%r13 # feedback + stg %r12,200(%r15) + alr %r7,%r1 ### 42 + rll %r11,%r8,5 + lr %r10,%r9 + alr %r7,%r11 + or %r10,%r5 + lr %r11,%r9 + nr %r10,%r6 + nr %r11,%r5 + alr %r7,%r13 + or %r10,%r11 + rll %r9,%r9,30 + alr %r7,%r10 + alr %r6,%r1 ### 43 + rll %r11,%r7,5 + lr %r10,%r8 + alr %r6,%r11 + or %r10,%r9 + lr %r11,%r8 + nr %r10,%r5 + nr %r11,%r9 + alr %r6,%r12 + or %r10,%r11 + rll %r8,%r8,30 + alr %r6,%r10 + xgr %r14,%r2 ### Xupdate(44) + lg %r2,216(%r15) + xg %r14,176(%r15) + xgr %r14,%r2 + rll %r14,%r14,1 + rllg %r12,%r14,32 + rll %r12,%r12,1 + rllg %r14,%r12,32 + lr %r13,%r12 # feedback + stg %r14,208(%r15) + alr %r5,%r1 ### 44 + rll %r11,%r6,5 + lr %r10,%r7 + alr %r5,%r11 + or %r10,%r8 + lr %r11,%r7 + nr %r10,%r9 + nr %r11,%r8 + alr %r5,%r12 + or %r10,%r11 + rll %r7,%r7,30 + alr %r5,%r10 + alr %r9,%r1 ### 45 + rll %r11,%r5,5 + lr %r10,%r6 + alr %r9,%r11 + or %r10,%r7 + lr %r11,%r6 + nr %r10,%r8 + nr %r11,%r7 + alr %r9,%r14 + or %r10,%r11 + rll %r6,%r6,30 + alr %r9,%r10 + xgr %r13,%r2 ### Xupdate(46) + lg %r2,160(%r15) + xg %r13,184(%r15) + xgr %r13,%r2 + rll %r13,%r13,1 + rllg %r14,%r13,32 + rll %r14,%r14,1 + rllg %r13,%r14,32 + lr %r12,%r14 # feedback + stg %r13,216(%r15) + alr %r8,%r1 ### 46 + rll %r11,%r9,5 + lr %r10,%r5 + alr %r8,%r11 + or %r10,%r6 + lr %r11,%r5 + nr %r10,%r7 + nr %r11,%r6 + alr %r8,%r14 + or %r10,%r11 + rll %r5,%r5,30 + alr %r8,%r10 + alr %r7,%r1 ### 47 + rll %r11,%r8,5 + lr %r10,%r9 + alr %r7,%r11 + or %r10,%r5 + lr %r11,%r9 + nr %r10,%r6 + nr %r11,%r5 + alr %r7,%r13 + or %r10,%r11 + rll %r9,%r9,30 + alr %r7,%r10 + xgr %r12,%r2 ### Xupdate(48) + lg %r2,168(%r15) + xg %r12,192(%r15) + xgr %r12,%r2 + rll %r12,%r12,1 + rllg %r13,%r12,32 + rll %r13,%r13,1 + rllg %r12,%r13,32 + lr %r14,%r13 # feedback + stg %r12,160(%r15) + alr %r6,%r1 ### 48 + rll %r11,%r7,5 + lr %r10,%r8 + alr %r6,%r11 + or %r10,%r9 + lr %r11,%r8 + nr %r10,%r5 + nr %r11,%r9 + alr %r6,%r13 + or %r10,%r11 + rll %r8,%r8,30 + alr %r6,%r10 + alr %r5,%r1 ### 49 + rll %r11,%r6,5 + lr %r10,%r7 + alr %r5,%r11 + or %r10,%r8 + lr %r11,%r7 + nr %r10,%r9 + nr %r11,%r8 + alr %r5,%r12 + or %r10,%r11 + rll %r7,%r7,30 + alr %r5,%r10 + xgr %r14,%r2 ### Xupdate(50) + lg %r2,176(%r15) + xg %r14,200(%r15) + xgr %r14,%r2 + rll %r14,%r14,1 + rllg %r12,%r14,32 + rll %r12,%r12,1 + rllg %r14,%r12,32 + lr %r13,%r12 # feedback + stg %r14,168(%r15) + alr %r9,%r1 ### 50 + rll %r11,%r5,5 + lr %r10,%r6 + alr %r9,%r11 + or %r10,%r7 + lr %r11,%r6 + nr %r10,%r8 + nr %r11,%r7 + alr %r9,%r12 + or %r10,%r11 + rll %r6,%r6,30 + alr %r9,%r10 + alr %r8,%r1 ### 51 + rll %r11,%r9,5 + lr %r10,%r5 + alr %r8,%r11 + or %r10,%r6 + lr %r11,%r5 + nr %r10,%r7 + nr %r11,%r6 + alr %r8,%r14 + or %r10,%r11 + rll %r5,%r5,30 + alr %r8,%r10 + xgr %r13,%r2 ### Xupdate(52) + lg %r2,184(%r15) + xg %r13,208(%r15) + xgr %r13,%r2 + rll %r13,%r13,1 + rllg %r14,%r13,32 + rll %r14,%r14,1 + rllg %r13,%r14,32 + lr %r12,%r14 # feedback + stg %r13,176(%r15) + alr %r7,%r1 ### 52 + rll %r11,%r8,5 + lr %r10,%r9 + alr %r7,%r11 + or %r10,%r5 + lr %r11,%r9 + nr %r10,%r6 + nr %r11,%r5 + alr %r7,%r14 + or %r10,%r11 + rll %r9,%r9,30 + alr %r7,%r10 + alr %r6,%r1 ### 53 + rll %r11,%r7,5 + lr %r10,%r8 + alr %r6,%r11 + or %r10,%r9 + lr %r11,%r8 + nr %r10,%r5 + nr %r11,%r9 + alr %r6,%r13 + or %r10,%r11 + rll %r8,%r8,30 + alr %r6,%r10 + xgr %r12,%r2 ### Xupdate(54) + lg %r2,192(%r15) + xg %r12,216(%r15) + xgr %r12,%r2 + rll %r12,%r12,1 + rllg %r13,%r12,32 + rll %r13,%r13,1 + rllg %r12,%r13,32 + lr %r14,%r13 # feedback + stg %r12,184(%r15) + alr %r5,%r1 ### 54 + rll %r11,%r6,5 + lr %r10,%r7 + alr %r5,%r11 + or %r10,%r8 + lr %r11,%r7 + nr %r10,%r9 + nr %r11,%r8 + alr %r5,%r13 + or %r10,%r11 + rll %r7,%r7,30 + alr %r5,%r10 + alr %r9,%r1 ### 55 + rll %r11,%r5,5 + lr %r10,%r6 + alr %r9,%r11 + or %r10,%r7 + lr %r11,%r6 + nr %r10,%r8 + nr %r11,%r7 + alr %r9,%r12 + or %r10,%r11 + rll %r6,%r6,30 + alr %r9,%r10 + xgr %r14,%r2 ### Xupdate(56) + lg %r2,200(%r15) + xg %r14,160(%r15) + xgr %r14,%r2 + rll %r14,%r14,1 + rllg %r12,%r14,32 + rll %r12,%r12,1 + rllg %r14,%r12,32 + lr %r13,%r12 # feedback + stg %r14,192(%r15) + alr %r8,%r1 ### 56 + rll %r11,%r9,5 + lr %r10,%r5 + alr %r8,%r11 + or %r10,%r6 + lr %r11,%r5 + nr %r10,%r7 + nr %r11,%r6 + alr %r8,%r12 + or %r10,%r11 + rll %r5,%r5,30 + alr %r8,%r10 + alr %r7,%r1 ### 57 + rll %r11,%r8,5 + lr %r10,%r9 + alr %r7,%r11 + or %r10,%r5 + lr %r11,%r9 + nr %r10,%r6 + nr %r11,%r5 + alr %r7,%r14 + or %r10,%r11 + rll %r9,%r9,30 + alr %r7,%r10 + xgr %r13,%r2 ### Xupdate(58) + lg %r2,208(%r15) + xg %r13,168(%r15) + xgr %r13,%r2 + rll %r13,%r13,1 + rllg %r14,%r13,32 + rll %r14,%r14,1 + rllg %r13,%r14,32 + lr %r12,%r14 # feedback + stg %r13,200(%r15) + alr %r6,%r1 ### 58 + rll %r11,%r7,5 + lr %r10,%r8 + alr %r6,%r11 + or %r10,%r9 + lr %r11,%r8 + nr %r10,%r5 + nr %r11,%r9 + alr %r6,%r14 + or %r10,%r11 + rll %r8,%r8,30 + alr %r6,%r10 + alr %r5,%r1 ### 59 + rll %r11,%r6,5 + lr %r10,%r7 + alr %r5,%r11 + or %r10,%r8 + lr %r11,%r7 + nr %r10,%r9 + nr %r11,%r8 + alr %r5,%r13 + or %r10,%r11 + rll %r7,%r7,30 + alr %r5,%r10 + rllg %r1,%r1,32 + xgr %r12,%r2 ### Xupdate(60) + lg %r2,216(%r15) + xg %r12,176(%r15) + xgr %r12,%r2 + rll %r12,%r12,1 + rllg %r13,%r12,32 + rll %r13,%r13,1 + rllg %r12,%r13,32 + lr %r14,%r13 # feedback + stg %r12,208(%r15) + alr %r9,%r1 ### 60 + rll %r11,%r5,5 + lr %r10,%r6 + alr %r9,%r11 + xr %r10,%r7 + alr %r9,%r13 + xr %r10,%r8 + rll %r6,%r6,30 + alr %r9,%r10 + alr %r8,%r1 ### 61 + rll %r11,%r9,5 + lr %r10,%r5 + alr %r8,%r11 + xr %r10,%r6 + alr %r8,%r12 + xr %r10,%r7 + rll %r5,%r5,30 + alr %r8,%r10 + xgr %r14,%r2 ### Xupdate(62) + lg %r2,160(%r15) + xg %r14,184(%r15) + xgr %r14,%r2 + rll %r14,%r14,1 + rllg %r12,%r14,32 + rll %r12,%r12,1 + rllg %r14,%r12,32 + lr %r13,%r12 # feedback + stg %r14,216(%r15) + alr %r7,%r1 ### 62 + rll %r11,%r8,5 + lr %r10,%r9 + alr %r7,%r11 + xr %r10,%r5 + alr %r7,%r12 + xr %r10,%r6 + rll %r9,%r9,30 + alr %r7,%r10 + alr %r6,%r1 ### 63 + rll %r11,%r7,5 + lr %r10,%r8 + alr %r6,%r11 + xr %r10,%r9 + alr %r6,%r14 + xr %r10,%r5 + rll %r8,%r8,30 + alr %r6,%r10 + xgr %r13,%r2 ### Xupdate(64) + lg %r2,168(%r15) + xg %r13,192(%r15) + xgr %r13,%r2 + rll %r13,%r13,1 + rllg %r14,%r13,32 + rll %r14,%r14,1 + rllg %r13,%r14,32 + lr %r12,%r14 # feedback + stg %r13,160(%r15) + alr %r5,%r1 ### 64 + rll %r11,%r6,5 + lr %r10,%r7 + alr %r5,%r11 + xr %r10,%r8 + alr %r5,%r14 + xr %r10,%r9 + rll %r7,%r7,30 + alr %r5,%r10 + alr %r9,%r1 ### 65 + rll %r11,%r5,5 + lr %r10,%r6 + alr %r9,%r11 + xr %r10,%r7 + alr %r9,%r13 + xr %r10,%r8 + rll %r6,%r6,30 + alr %r9,%r10 + xgr %r12,%r2 ### Xupdate(66) + lg %r2,176(%r15) + xg %r12,200(%r15) + xgr %r12,%r2 + rll %r12,%r12,1 + rllg %r13,%r12,32 + rll %r13,%r13,1 + rllg %r12,%r13,32 + lr %r14,%r13 # feedback + stg %r12,168(%r15) + alr %r8,%r1 ### 66 + rll %r11,%r9,5 + lr %r10,%r5 + alr %r8,%r11 + xr %r10,%r6 + alr %r8,%r13 + xr %r10,%r7 + rll %r5,%r5,30 + alr %r8,%r10 + alr %r7,%r1 ### 67 + rll %r11,%r8,5 + lr %r10,%r9 + alr %r7,%r11 + xr %r10,%r5 + alr %r7,%r12 + xr %r10,%r6 + rll %r9,%r9,30 + alr %r7,%r10 + xgr %r14,%r2 ### Xupdate(68) + lg %r2,184(%r15) + xg %r14,208(%r15) + xgr %r14,%r2 + rll %r14,%r14,1 + rllg %r12,%r14,32 + rll %r12,%r12,1 + rllg %r14,%r12,32 + lr %r13,%r12 # feedback + stg %r14,176(%r15) + alr %r6,%r1 ### 68 + rll %r11,%r7,5 + lr %r10,%r8 + alr %r6,%r11 + xr %r10,%r9 + alr %r6,%r12 + xr %r10,%r5 + rll %r8,%r8,30 + alr %r6,%r10 + alr %r5,%r1 ### 69 + rll %r11,%r6,5 + lr %r10,%r7 + alr %r5,%r11 + xr %r10,%r8 + alr %r5,%r14 + xr %r10,%r9 + rll %r7,%r7,30 + alr %r5,%r10 + xgr %r13,%r2 ### Xupdate(70) + lg %r2,192(%r15) + xg %r13,216(%r15) + xgr %r13,%r2 + rll %r13,%r13,1 + rllg %r14,%r13,32 + rll %r14,%r14,1 + rllg %r13,%r14,32 + lr %r12,%r14 # feedback + stg %r13,184(%r15) + alr %r9,%r1 ### 70 + rll %r11,%r5,5 + lr %r10,%r6 + alr %r9,%r11 + xr %r10,%r7 + alr %r9,%r14 + xr %r10,%r8 + rll %r6,%r6,30 + alr %r9,%r10 + alr %r8,%r1 ### 71 + rll %r11,%r9,5 + lr %r10,%r5 + alr %r8,%r11 + xr %r10,%r6 + alr %r8,%r13 + xr %r10,%r7 + rll %r5,%r5,30 + alr %r8,%r10 + xgr %r12,%r2 ### Xupdate(72) + lg %r2,200(%r15) + xg %r12,160(%r15) + xgr %r12,%r2 + rll %r12,%r12,1 + rllg %r13,%r12,32 + rll %r13,%r13,1 + rllg %r12,%r13,32 + lr %r14,%r13 # feedback + alr %r7,%r1 ### 72 + rll %r11,%r8,5 + lr %r10,%r9 + alr %r7,%r11 + xr %r10,%r5 + alr %r7,%r13 + xr %r10,%r6 + rll %r9,%r9,30 + alr %r7,%r10 + alr %r6,%r1 ### 73 + rll %r11,%r7,5 + lr %r10,%r8 + alr %r6,%r11 + xr %r10,%r9 + alr %r6,%r12 + xr %r10,%r5 + rll %r8,%r8,30 + alr %r6,%r10 + xgr %r14,%r2 ### Xupdate(74) + lg %r2,208(%r15) + xg %r14,168(%r15) + xgr %r14,%r2 + rll %r14,%r14,1 + rllg %r12,%r14,32 + rll %r12,%r12,1 + rllg %r14,%r12,32 + lr %r13,%r12 # feedback + alr %r5,%r1 ### 74 + rll %r11,%r6,5 + lr %r10,%r7 + alr %r5,%r11 + xr %r10,%r8 + alr %r5,%r12 + xr %r10,%r9 + rll %r7,%r7,30 + alr %r5,%r10 + alr %r9,%r1 ### 75 + rll %r11,%r5,5 + lr %r10,%r6 + alr %r9,%r11 + xr %r10,%r7 + alr %r9,%r14 + xr %r10,%r8 + rll %r6,%r6,30 + alr %r9,%r10 + xgr %r13,%r2 ### Xupdate(76) + lg %r2,216(%r15) + xg %r13,176(%r15) + xgr %r13,%r2 + rll %r13,%r13,1 + rllg %r14,%r13,32 + rll %r14,%r14,1 + rllg %r13,%r14,32 + lr %r12,%r14 # feedback + alr %r8,%r1 ### 76 + rll %r11,%r9,5 + lr %r10,%r5 + alr %r8,%r11 + xr %r10,%r6 + alr %r8,%r14 + xr %r10,%r7 + rll %r5,%r5,30 + alr %r8,%r10 + alr %r7,%r1 ### 77 + rll %r11,%r8,5 + lr %r10,%r9 + alr %r7,%r11 + xr %r10,%r5 + alr %r7,%r13 + xr %r10,%r6 + rll %r9,%r9,30 + alr %r7,%r10 + xgr %r12,%r2 ### Xupdate(78) + lg %r2,160(%r15) + xg %r12,184(%r15) + xgr %r12,%r2 + rll %r12,%r12,1 + rllg %r13,%r12,32 + rll %r13,%r13,1 + rllg %r12,%r13,32 + lr %r14,%r13 # feedback + alr %r6,%r1 ### 78 + rll %r11,%r7,5 + lr %r10,%r8 + alr %r6,%r11 + xr %r10,%r9 + alr %r6,%r13 + xr %r10,%r5 + rll %r8,%r8,30 + alr %r6,%r10 + alr %r5,%r1 ### 79 + rll %r11,%r6,5 + lr %r10,%r7 + alr %r5,%r11 + xr %r10,%r8 + alr %r5,%r12 + xr %r10,%r9 + rll %r7,%r7,30 + alr %r5,%r10 + + lg %r2,240(%r15) + la %r3,64(%r3) + al %r5,0(%r2) + al %r6,4(%r2) + al %r7,8(%r2) + al %r8,12(%r2) + al %r9,16(%r2) + st %r5,0(%r2) + st %r6,4(%r2) + st %r7,8(%r2) + st %r8,12(%r2) + st %r9,16(%r2) + brctg %r4,.Lloop + + lmg %r6,%r15,272(%r15) + br %r14 +.size sha1_block_data_order,.-sha1_block_data_order +.string "SHA1 block transform for s390x, CRYPTOGAMS by <appro@openssl.org>" diff --git a/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/sha/sha256-s390x.S b/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/sha/sha256-s390x.S new file mode 100644 index 0000000000..45d3601ac2 --- /dev/null +++ b/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/sha/sha256-s390x.S @@ -0,0 +1,1242 @@ +#include "s390x_arch.h" + +.text +.align 64 +.type K256,@object +K256: + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.size K256,.-K256 +.globl sha256_block_data_order +.type sha256_block_data_order,@function +sha256_block_data_order: + sllg %r4,%r4,6 + larl %r1,OPENSSL_s390xcap_P + lg %r0,S390X_KIMD(%r1) # check kimd capabilities + tmhh %r0,8192 + jz .Lsoftware + lghi %r0,2 + lgr %r1,%r2 + lgr %r2,%r3 + lgr %r3,%r4 + .long 0xb93e0002 # kimd %r0,%r2 + brc 1,.-4 # pay attention to "partial completion" + br %r14 +.align 16 +.Lsoftware: + lghi %r1,-224 + la %r4,0(%r4,%r3) + stmg %r2,%r15,16(%r15) + lgr %r0,%r15 + la %r15,0(%r1,%r15) + stg %r0,0(%r15) + + larl %r13,K256 + llgf %r5,0(%r2) + llgf %r6,4(%r2) + llgf %r7,8(%r2) + llgf %r8,12(%r2) + llgf %r9,16(%r2) + llgf %r10,20(%r2) + llgf %r11,24(%r2) + llgf %r12,28(%r2) + +.Lloop: + lghi %r4,0 + llgf %r14,0(%r3) ### 0 + rll %r0,%r9,7 + rll %r1,%r9,21 + lgr %r2,%r10 + xgr %r0,%r1 + rll %r1,%r1,5 + xgr %r2,%r11 + st %r14,160(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r12 # T1+=h + ngr %r2,%r9 + lgr %r1,%r5 + algr %r14,%r0 # T1+=Sigma1(e) + rll %r12,%r5,10 + xgr %r2,%r11 # Ch(e,f,g) + al %r14,0(%r4,%r13) # T1+=K[i] + rll %r0,%r5,19 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r6 + xgr %r12,%r0 + lgr %r2,%r5 + ngr %r1,%r7 + rll %r0,%r0,11 + xgr %r12,%r0 # h=Sigma0(a) + ngr %r2,%r6 + algr %r12,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r8,%r14 # d+=T1 + algr %r12,%r2 # h+=Maj(a,b,c) + llgf %r14,4(%r3) ### 1 + rll %r0,%r8,7 + rll %r1,%r8,21 + lgr %r2,%r9 + xgr %r0,%r1 + rll %r1,%r1,5 + xgr %r2,%r10 + st %r14,164(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r11 # T1+=h + ngr %r2,%r8 + lgr %r1,%r12 + algr %r14,%r0 # T1+=Sigma1(e) + rll %r11,%r12,10 + xgr %r2,%r10 # Ch(e,f,g) + al %r14,4(%r4,%r13) # T1+=K[i] + rll %r0,%r12,19 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r5 + xgr %r11,%r0 + lgr %r2,%r12 + ngr %r1,%r6 + rll %r0,%r0,11 + xgr %r11,%r0 # h=Sigma0(a) + ngr %r2,%r5 + algr %r11,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r7,%r14 # d+=T1 + algr %r11,%r2 # h+=Maj(a,b,c) + llgf %r14,8(%r3) ### 2 + rll %r0,%r7,7 + rll %r1,%r7,21 + lgr %r2,%r8 + xgr %r0,%r1 + rll %r1,%r1,5 + xgr %r2,%r9 + st %r14,168(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r10 # T1+=h + ngr %r2,%r7 + lgr %r1,%r11 + algr %r14,%r0 # T1+=Sigma1(e) + rll %r10,%r11,10 + xgr %r2,%r9 # Ch(e,f,g) + al %r14,8(%r4,%r13) # T1+=K[i] + rll %r0,%r11,19 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r12 + xgr %r10,%r0 + lgr %r2,%r11 + ngr %r1,%r5 + rll %r0,%r0,11 + xgr %r10,%r0 # h=Sigma0(a) + ngr %r2,%r12 + algr %r10,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r6,%r14 # d+=T1 + algr %r10,%r2 # h+=Maj(a,b,c) + llgf %r14,12(%r3) ### 3 + rll %r0,%r6,7 + rll %r1,%r6,21 + lgr %r2,%r7 + xgr %r0,%r1 + rll %r1,%r1,5 + xgr %r2,%r8 + st %r14,172(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r9 # T1+=h + ngr %r2,%r6 + lgr %r1,%r10 + algr %r14,%r0 # T1+=Sigma1(e) + rll %r9,%r10,10 + xgr %r2,%r8 # Ch(e,f,g) + al %r14,12(%r4,%r13) # T1+=K[i] + rll %r0,%r10,19 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r11 + xgr %r9,%r0 + lgr %r2,%r10 + ngr %r1,%r12 + rll %r0,%r0,11 + xgr %r9,%r0 # h=Sigma0(a) + ngr %r2,%r11 + algr %r9,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r5,%r14 # d+=T1 + algr %r9,%r2 # h+=Maj(a,b,c) + llgf %r14,16(%r3) ### 4 + rll %r0,%r5,7 + rll %r1,%r5,21 + lgr %r2,%r6 + xgr %r0,%r1 + rll %r1,%r1,5 + xgr %r2,%r7 + st %r14,176(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r8 # T1+=h + ngr %r2,%r5 + lgr %r1,%r9 + algr %r14,%r0 # T1+=Sigma1(e) + rll %r8,%r9,10 + xgr %r2,%r7 # Ch(e,f,g) + al %r14,16(%r4,%r13) # T1+=K[i] + rll %r0,%r9,19 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r10 + xgr %r8,%r0 + lgr %r2,%r9 + ngr %r1,%r11 + rll %r0,%r0,11 + xgr %r8,%r0 # h=Sigma0(a) + ngr %r2,%r10 + algr %r8,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r12,%r14 # d+=T1 + algr %r8,%r2 # h+=Maj(a,b,c) + llgf %r14,20(%r3) ### 5 + rll %r0,%r12,7 + rll %r1,%r12,21 + lgr %r2,%r5 + xgr %r0,%r1 + rll %r1,%r1,5 + xgr %r2,%r6 + st %r14,180(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r7 # T1+=h + ngr %r2,%r12 + lgr %r1,%r8 + algr %r14,%r0 # T1+=Sigma1(e) + rll %r7,%r8,10 + xgr %r2,%r6 # Ch(e,f,g) + al %r14,20(%r4,%r13) # T1+=K[i] + rll %r0,%r8,19 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r9 + xgr %r7,%r0 + lgr %r2,%r8 + ngr %r1,%r10 + rll %r0,%r0,11 + xgr %r7,%r0 # h=Sigma0(a) + ngr %r2,%r9 + algr %r7,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r11,%r14 # d+=T1 + algr %r7,%r2 # h+=Maj(a,b,c) + llgf %r14,24(%r3) ### 6 + rll %r0,%r11,7 + rll %r1,%r11,21 + lgr %r2,%r12 + xgr %r0,%r1 + rll %r1,%r1,5 + xgr %r2,%r5 + st %r14,184(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r6 # T1+=h + ngr %r2,%r11 + lgr %r1,%r7 + algr %r14,%r0 # T1+=Sigma1(e) + rll %r6,%r7,10 + xgr %r2,%r5 # Ch(e,f,g) + al %r14,24(%r4,%r13) # T1+=K[i] + rll %r0,%r7,19 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r8 + xgr %r6,%r0 + lgr %r2,%r7 + ngr %r1,%r9 + rll %r0,%r0,11 + xgr %r6,%r0 # h=Sigma0(a) + ngr %r2,%r8 + algr %r6,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r10,%r14 # d+=T1 + algr %r6,%r2 # h+=Maj(a,b,c) + llgf %r14,28(%r3) ### 7 + rll %r0,%r10,7 + rll %r1,%r10,21 + lgr %r2,%r11 + xgr %r0,%r1 + rll %r1,%r1,5 + xgr %r2,%r12 + st %r14,188(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r5 # T1+=h + ngr %r2,%r10 + lgr %r1,%r6 + algr %r14,%r0 # T1+=Sigma1(e) + rll %r5,%r6,10 + xgr %r2,%r12 # Ch(e,f,g) + al %r14,28(%r4,%r13) # T1+=K[i] + rll %r0,%r6,19 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r7 + xgr %r5,%r0 + lgr %r2,%r6 + ngr %r1,%r8 + rll %r0,%r0,11 + xgr %r5,%r0 # h=Sigma0(a) + ngr %r2,%r7 + algr %r5,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r9,%r14 # d+=T1 + algr %r5,%r2 # h+=Maj(a,b,c) + llgf %r14,32(%r3) ### 8 + rll %r0,%r9,7 + rll %r1,%r9,21 + lgr %r2,%r10 + xgr %r0,%r1 + rll %r1,%r1,5 + xgr %r2,%r11 + st %r14,192(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r12 # T1+=h + ngr %r2,%r9 + lgr %r1,%r5 + algr %r14,%r0 # T1+=Sigma1(e) + rll %r12,%r5,10 + xgr %r2,%r11 # Ch(e,f,g) + al %r14,32(%r4,%r13) # T1+=K[i] + rll %r0,%r5,19 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r6 + xgr %r12,%r0 + lgr %r2,%r5 + ngr %r1,%r7 + rll %r0,%r0,11 + xgr %r12,%r0 # h=Sigma0(a) + ngr %r2,%r6 + algr %r12,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r8,%r14 # d+=T1 + algr %r12,%r2 # h+=Maj(a,b,c) + llgf %r14,36(%r3) ### 9 + rll %r0,%r8,7 + rll %r1,%r8,21 + lgr %r2,%r9 + xgr %r0,%r1 + rll %r1,%r1,5 + xgr %r2,%r10 + st %r14,196(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r11 # T1+=h + ngr %r2,%r8 + lgr %r1,%r12 + algr %r14,%r0 # T1+=Sigma1(e) + rll %r11,%r12,10 + xgr %r2,%r10 # Ch(e,f,g) + al %r14,36(%r4,%r13) # T1+=K[i] + rll %r0,%r12,19 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r5 + xgr %r11,%r0 + lgr %r2,%r12 + ngr %r1,%r6 + rll %r0,%r0,11 + xgr %r11,%r0 # h=Sigma0(a) + ngr %r2,%r5 + algr %r11,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r7,%r14 # d+=T1 + algr %r11,%r2 # h+=Maj(a,b,c) + llgf %r14,40(%r3) ### 10 + rll %r0,%r7,7 + rll %r1,%r7,21 + lgr %r2,%r8 + xgr %r0,%r1 + rll %r1,%r1,5 + xgr %r2,%r9 + st %r14,200(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r10 # T1+=h + ngr %r2,%r7 + lgr %r1,%r11 + algr %r14,%r0 # T1+=Sigma1(e) + rll %r10,%r11,10 + xgr %r2,%r9 # Ch(e,f,g) + al %r14,40(%r4,%r13) # T1+=K[i] + rll %r0,%r11,19 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r12 + xgr %r10,%r0 + lgr %r2,%r11 + ngr %r1,%r5 + rll %r0,%r0,11 + xgr %r10,%r0 # h=Sigma0(a) + ngr %r2,%r12 + algr %r10,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r6,%r14 # d+=T1 + algr %r10,%r2 # h+=Maj(a,b,c) + llgf %r14,44(%r3) ### 11 + rll %r0,%r6,7 + rll %r1,%r6,21 + lgr %r2,%r7 + xgr %r0,%r1 + rll %r1,%r1,5 + xgr %r2,%r8 + st %r14,204(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r9 # T1+=h + ngr %r2,%r6 + lgr %r1,%r10 + algr %r14,%r0 # T1+=Sigma1(e) + rll %r9,%r10,10 + xgr %r2,%r8 # Ch(e,f,g) + al %r14,44(%r4,%r13) # T1+=K[i] + rll %r0,%r10,19 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r11 + xgr %r9,%r0 + lgr %r2,%r10 + ngr %r1,%r12 + rll %r0,%r0,11 + xgr %r9,%r0 # h=Sigma0(a) + ngr %r2,%r11 + algr %r9,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r5,%r14 # d+=T1 + algr %r9,%r2 # h+=Maj(a,b,c) + llgf %r14,48(%r3) ### 12 + rll %r0,%r5,7 + rll %r1,%r5,21 + lgr %r2,%r6 + xgr %r0,%r1 + rll %r1,%r1,5 + xgr %r2,%r7 + st %r14,208(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r8 # T1+=h + ngr %r2,%r5 + lgr %r1,%r9 + algr %r14,%r0 # T1+=Sigma1(e) + rll %r8,%r9,10 + xgr %r2,%r7 # Ch(e,f,g) + al %r14,48(%r4,%r13) # T1+=K[i] + rll %r0,%r9,19 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r10 + xgr %r8,%r0 + lgr %r2,%r9 + ngr %r1,%r11 + rll %r0,%r0,11 + xgr %r8,%r0 # h=Sigma0(a) + ngr %r2,%r10 + algr %r8,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r12,%r14 # d+=T1 + algr %r8,%r2 # h+=Maj(a,b,c) + llgf %r14,52(%r3) ### 13 + rll %r0,%r12,7 + rll %r1,%r12,21 + lgr %r2,%r5 + xgr %r0,%r1 + rll %r1,%r1,5 + xgr %r2,%r6 + st %r14,212(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r7 # T1+=h + ngr %r2,%r12 + lgr %r1,%r8 + algr %r14,%r0 # T1+=Sigma1(e) + rll %r7,%r8,10 + xgr %r2,%r6 # Ch(e,f,g) + al %r14,52(%r4,%r13) # T1+=K[i] + rll %r0,%r8,19 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r9 + xgr %r7,%r0 + lgr %r2,%r8 + ngr %r1,%r10 + rll %r0,%r0,11 + xgr %r7,%r0 # h=Sigma0(a) + ngr %r2,%r9 + algr %r7,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r11,%r14 # d+=T1 + algr %r7,%r2 # h+=Maj(a,b,c) + llgf %r14,56(%r3) ### 14 + rll %r0,%r11,7 + rll %r1,%r11,21 + lgr %r2,%r12 + xgr %r0,%r1 + rll %r1,%r1,5 + xgr %r2,%r5 + st %r14,216(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r6 # T1+=h + ngr %r2,%r11 + lgr %r1,%r7 + algr %r14,%r0 # T1+=Sigma1(e) + rll %r6,%r7,10 + xgr %r2,%r5 # Ch(e,f,g) + al %r14,56(%r4,%r13) # T1+=K[i] + rll %r0,%r7,19 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r8 + xgr %r6,%r0 + lgr %r2,%r7 + ngr %r1,%r9 + rll %r0,%r0,11 + xgr %r6,%r0 # h=Sigma0(a) + ngr %r2,%r8 + algr %r6,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r10,%r14 # d+=T1 + algr %r6,%r2 # h+=Maj(a,b,c) + llgf %r14,60(%r3) ### 15 + rll %r0,%r10,7 + rll %r1,%r10,21 + lgr %r2,%r11 + xgr %r0,%r1 + rll %r1,%r1,5 + xgr %r2,%r12 + st %r14,220(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r5 # T1+=h + ngr %r2,%r10 + lgr %r1,%r6 + algr %r14,%r0 # T1+=Sigma1(e) + rll %r5,%r6,10 + xgr %r2,%r12 # Ch(e,f,g) + al %r14,60(%r4,%r13) # T1+=K[i] + rll %r0,%r6,19 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r7 + xgr %r5,%r0 + lgr %r2,%r6 + ngr %r1,%r8 + rll %r0,%r0,11 + xgr %r5,%r0 # h=Sigma0(a) + ngr %r2,%r7 + algr %r5,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r9,%r14 # d+=T1 + algr %r5,%r2 # h+=Maj(a,b,c) +.Lrounds_16_xx: + llgf %r14,164(%r15) ### 16 + llgf %r1,216(%r15) + rll %r0,%r14,14 + srl %r14,3 + rll %r2,%r0,11 + xgr %r14,%r0 + rll %r0,%r1,13 + xgr %r14,%r2 # sigma0(X[i+1]) + srl %r1,10 + al %r14,160(%r15) # +=X[i] + xgr %r1,%r0 + rll %r0,%r0,2 + al %r14,196(%r15) # +=X[i+9] + xgr %r1,%r0 # sigma1(X[i+14]) + algr %r14,%r1 # +=sigma1(X[i+14]) + rll %r0,%r9,7 + rll %r1,%r9,21 + lgr %r2,%r10 + xgr %r0,%r1 + rll %r1,%r1,5 + xgr %r2,%r11 + st %r14,160(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r12 # T1+=h + ngr %r2,%r9 + lgr %r1,%r5 + algr %r14,%r0 # T1+=Sigma1(e) + rll %r12,%r5,10 + xgr %r2,%r11 # Ch(e,f,g) + al %r14,64(%r4,%r13) # T1+=K[i] + rll %r0,%r5,19 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r6 + xgr %r12,%r0 + lgr %r2,%r5 + ngr %r1,%r7 + rll %r0,%r0,11 + xgr %r12,%r0 # h=Sigma0(a) + ngr %r2,%r6 + algr %r12,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r8,%r14 # d+=T1 + algr %r12,%r2 # h+=Maj(a,b,c) + llgf %r14,168(%r15) ### 17 + llgf %r1,220(%r15) + rll %r0,%r14,14 + srl %r14,3 + rll %r2,%r0,11 + xgr %r14,%r0 + rll %r0,%r1,13 + xgr %r14,%r2 # sigma0(X[i+1]) + srl %r1,10 + al %r14,164(%r15) # +=X[i] + xgr %r1,%r0 + rll %r0,%r0,2 + al %r14,200(%r15) # +=X[i+9] + xgr %r1,%r0 # sigma1(X[i+14]) + algr %r14,%r1 # +=sigma1(X[i+14]) + rll %r0,%r8,7 + rll %r1,%r8,21 + lgr %r2,%r9 + xgr %r0,%r1 + rll %r1,%r1,5 + xgr %r2,%r10 + st %r14,164(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r11 # T1+=h + ngr %r2,%r8 + lgr %r1,%r12 + algr %r14,%r0 # T1+=Sigma1(e) + rll %r11,%r12,10 + xgr %r2,%r10 # Ch(e,f,g) + al %r14,68(%r4,%r13) # T1+=K[i] + rll %r0,%r12,19 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r5 + xgr %r11,%r0 + lgr %r2,%r12 + ngr %r1,%r6 + rll %r0,%r0,11 + xgr %r11,%r0 # h=Sigma0(a) + ngr %r2,%r5 + algr %r11,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r7,%r14 # d+=T1 + algr %r11,%r2 # h+=Maj(a,b,c) + llgf %r14,172(%r15) ### 18 + llgf %r1,160(%r15) + rll %r0,%r14,14 + srl %r14,3 + rll %r2,%r0,11 + xgr %r14,%r0 + rll %r0,%r1,13 + xgr %r14,%r2 # sigma0(X[i+1]) + srl %r1,10 + al %r14,168(%r15) # +=X[i] + xgr %r1,%r0 + rll %r0,%r0,2 + al %r14,204(%r15) # +=X[i+9] + xgr %r1,%r0 # sigma1(X[i+14]) + algr %r14,%r1 # +=sigma1(X[i+14]) + rll %r0,%r7,7 + rll %r1,%r7,21 + lgr %r2,%r8 + xgr %r0,%r1 + rll %r1,%r1,5 + xgr %r2,%r9 + st %r14,168(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r10 # T1+=h + ngr %r2,%r7 + lgr %r1,%r11 + algr %r14,%r0 # T1+=Sigma1(e) + rll %r10,%r11,10 + xgr %r2,%r9 # Ch(e,f,g) + al %r14,72(%r4,%r13) # T1+=K[i] + rll %r0,%r11,19 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r12 + xgr %r10,%r0 + lgr %r2,%r11 + ngr %r1,%r5 + rll %r0,%r0,11 + xgr %r10,%r0 # h=Sigma0(a) + ngr %r2,%r12 + algr %r10,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r6,%r14 # d+=T1 + algr %r10,%r2 # h+=Maj(a,b,c) + llgf %r14,176(%r15) ### 19 + llgf %r1,164(%r15) + rll %r0,%r14,14 + srl %r14,3 + rll %r2,%r0,11 + xgr %r14,%r0 + rll %r0,%r1,13 + xgr %r14,%r2 # sigma0(X[i+1]) + srl %r1,10 + al %r14,172(%r15) # +=X[i] + xgr %r1,%r0 + rll %r0,%r0,2 + al %r14,208(%r15) # +=X[i+9] + xgr %r1,%r0 # sigma1(X[i+14]) + algr %r14,%r1 # +=sigma1(X[i+14]) + rll %r0,%r6,7 + rll %r1,%r6,21 + lgr %r2,%r7 + xgr %r0,%r1 + rll %r1,%r1,5 + xgr %r2,%r8 + st %r14,172(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r9 # T1+=h + ngr %r2,%r6 + lgr %r1,%r10 + algr %r14,%r0 # T1+=Sigma1(e) + rll %r9,%r10,10 + xgr %r2,%r8 # Ch(e,f,g) + al %r14,76(%r4,%r13) # T1+=K[i] + rll %r0,%r10,19 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r11 + xgr %r9,%r0 + lgr %r2,%r10 + ngr %r1,%r12 + rll %r0,%r0,11 + xgr %r9,%r0 # h=Sigma0(a) + ngr %r2,%r11 + algr %r9,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r5,%r14 # d+=T1 + algr %r9,%r2 # h+=Maj(a,b,c) + llgf %r14,180(%r15) ### 20 + llgf %r1,168(%r15) + rll %r0,%r14,14 + srl %r14,3 + rll %r2,%r0,11 + xgr %r14,%r0 + rll %r0,%r1,13 + xgr %r14,%r2 # sigma0(X[i+1]) + srl %r1,10 + al %r14,176(%r15) # +=X[i] + xgr %r1,%r0 + rll %r0,%r0,2 + al %r14,212(%r15) # +=X[i+9] + xgr %r1,%r0 # sigma1(X[i+14]) + algr %r14,%r1 # +=sigma1(X[i+14]) + rll %r0,%r5,7 + rll %r1,%r5,21 + lgr %r2,%r6 + xgr %r0,%r1 + rll %r1,%r1,5 + xgr %r2,%r7 + st %r14,176(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r8 # T1+=h + ngr %r2,%r5 + lgr %r1,%r9 + algr %r14,%r0 # T1+=Sigma1(e) + rll %r8,%r9,10 + xgr %r2,%r7 # Ch(e,f,g) + al %r14,80(%r4,%r13) # T1+=K[i] + rll %r0,%r9,19 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r10 + xgr %r8,%r0 + lgr %r2,%r9 + ngr %r1,%r11 + rll %r0,%r0,11 + xgr %r8,%r0 # h=Sigma0(a) + ngr %r2,%r10 + algr %r8,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r12,%r14 # d+=T1 + algr %r8,%r2 # h+=Maj(a,b,c) + llgf %r14,184(%r15) ### 21 + llgf %r1,172(%r15) + rll %r0,%r14,14 + srl %r14,3 + rll %r2,%r0,11 + xgr %r14,%r0 + rll %r0,%r1,13 + xgr %r14,%r2 # sigma0(X[i+1]) + srl %r1,10 + al %r14,180(%r15) # +=X[i] + xgr %r1,%r0 + rll %r0,%r0,2 + al %r14,216(%r15) # +=X[i+9] + xgr %r1,%r0 # sigma1(X[i+14]) + algr %r14,%r1 # +=sigma1(X[i+14]) + rll %r0,%r12,7 + rll %r1,%r12,21 + lgr %r2,%r5 + xgr %r0,%r1 + rll %r1,%r1,5 + xgr %r2,%r6 + st %r14,180(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r7 # T1+=h + ngr %r2,%r12 + lgr %r1,%r8 + algr %r14,%r0 # T1+=Sigma1(e) + rll %r7,%r8,10 + xgr %r2,%r6 # Ch(e,f,g) + al %r14,84(%r4,%r13) # T1+=K[i] + rll %r0,%r8,19 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r9 + xgr %r7,%r0 + lgr %r2,%r8 + ngr %r1,%r10 + rll %r0,%r0,11 + xgr %r7,%r0 # h=Sigma0(a) + ngr %r2,%r9 + algr %r7,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r11,%r14 # d+=T1 + algr %r7,%r2 # h+=Maj(a,b,c) + llgf %r14,188(%r15) ### 22 + llgf %r1,176(%r15) + rll %r0,%r14,14 + srl %r14,3 + rll %r2,%r0,11 + xgr %r14,%r0 + rll %r0,%r1,13 + xgr %r14,%r2 # sigma0(X[i+1]) + srl %r1,10 + al %r14,184(%r15) # +=X[i] + xgr %r1,%r0 + rll %r0,%r0,2 + al %r14,220(%r15) # +=X[i+9] + xgr %r1,%r0 # sigma1(X[i+14]) + algr %r14,%r1 # +=sigma1(X[i+14]) + rll %r0,%r11,7 + rll %r1,%r11,21 + lgr %r2,%r12 + xgr %r0,%r1 + rll %r1,%r1,5 + xgr %r2,%r5 + st %r14,184(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r6 # T1+=h + ngr %r2,%r11 + lgr %r1,%r7 + algr %r14,%r0 # T1+=Sigma1(e) + rll %r6,%r7,10 + xgr %r2,%r5 # Ch(e,f,g) + al %r14,88(%r4,%r13) # T1+=K[i] + rll %r0,%r7,19 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r8 + xgr %r6,%r0 + lgr %r2,%r7 + ngr %r1,%r9 + rll %r0,%r0,11 + xgr %r6,%r0 # h=Sigma0(a) + ngr %r2,%r8 + algr %r6,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r10,%r14 # d+=T1 + algr %r6,%r2 # h+=Maj(a,b,c) + llgf %r14,192(%r15) ### 23 + llgf %r1,180(%r15) + rll %r0,%r14,14 + srl %r14,3 + rll %r2,%r0,11 + xgr %r14,%r0 + rll %r0,%r1,13 + xgr %r14,%r2 # sigma0(X[i+1]) + srl %r1,10 + al %r14,188(%r15) # +=X[i] + xgr %r1,%r0 + rll %r0,%r0,2 + al %r14,160(%r15) # +=X[i+9] + xgr %r1,%r0 # sigma1(X[i+14]) + algr %r14,%r1 # +=sigma1(X[i+14]) + rll %r0,%r10,7 + rll %r1,%r10,21 + lgr %r2,%r11 + xgr %r0,%r1 + rll %r1,%r1,5 + xgr %r2,%r12 + st %r14,188(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r5 # T1+=h + ngr %r2,%r10 + lgr %r1,%r6 + algr %r14,%r0 # T1+=Sigma1(e) + rll %r5,%r6,10 + xgr %r2,%r12 # Ch(e,f,g) + al %r14,92(%r4,%r13) # T1+=K[i] + rll %r0,%r6,19 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r7 + xgr %r5,%r0 + lgr %r2,%r6 + ngr %r1,%r8 + rll %r0,%r0,11 + xgr %r5,%r0 # h=Sigma0(a) + ngr %r2,%r7 + algr %r5,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r9,%r14 # d+=T1 + algr %r5,%r2 # h+=Maj(a,b,c) + llgf %r14,196(%r15) ### 24 + llgf %r1,184(%r15) + rll %r0,%r14,14 + srl %r14,3 + rll %r2,%r0,11 + xgr %r14,%r0 + rll %r0,%r1,13 + xgr %r14,%r2 # sigma0(X[i+1]) + srl %r1,10 + al %r14,192(%r15) # +=X[i] + xgr %r1,%r0 + rll %r0,%r0,2 + al %r14,164(%r15) # +=X[i+9] + xgr %r1,%r0 # sigma1(X[i+14]) + algr %r14,%r1 # +=sigma1(X[i+14]) + rll %r0,%r9,7 + rll %r1,%r9,21 + lgr %r2,%r10 + xgr %r0,%r1 + rll %r1,%r1,5 + xgr %r2,%r11 + st %r14,192(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r12 # T1+=h + ngr %r2,%r9 + lgr %r1,%r5 + algr %r14,%r0 # T1+=Sigma1(e) + rll %r12,%r5,10 + xgr %r2,%r11 # Ch(e,f,g) + al %r14,96(%r4,%r13) # T1+=K[i] + rll %r0,%r5,19 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r6 + xgr %r12,%r0 + lgr %r2,%r5 + ngr %r1,%r7 + rll %r0,%r0,11 + xgr %r12,%r0 # h=Sigma0(a) + ngr %r2,%r6 + algr %r12,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r8,%r14 # d+=T1 + algr %r12,%r2 # h+=Maj(a,b,c) + llgf %r14,200(%r15) ### 25 + llgf %r1,188(%r15) + rll %r0,%r14,14 + srl %r14,3 + rll %r2,%r0,11 + xgr %r14,%r0 + rll %r0,%r1,13 + xgr %r14,%r2 # sigma0(X[i+1]) + srl %r1,10 + al %r14,196(%r15) # +=X[i] + xgr %r1,%r0 + rll %r0,%r0,2 + al %r14,168(%r15) # +=X[i+9] + xgr %r1,%r0 # sigma1(X[i+14]) + algr %r14,%r1 # +=sigma1(X[i+14]) + rll %r0,%r8,7 + rll %r1,%r8,21 + lgr %r2,%r9 + xgr %r0,%r1 + rll %r1,%r1,5 + xgr %r2,%r10 + st %r14,196(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r11 # T1+=h + ngr %r2,%r8 + lgr %r1,%r12 + algr %r14,%r0 # T1+=Sigma1(e) + rll %r11,%r12,10 + xgr %r2,%r10 # Ch(e,f,g) + al %r14,100(%r4,%r13) # T1+=K[i] + rll %r0,%r12,19 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r5 + xgr %r11,%r0 + lgr %r2,%r12 + ngr %r1,%r6 + rll %r0,%r0,11 + xgr %r11,%r0 # h=Sigma0(a) + ngr %r2,%r5 + algr %r11,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r7,%r14 # d+=T1 + algr %r11,%r2 # h+=Maj(a,b,c) + llgf %r14,204(%r15) ### 26 + llgf %r1,192(%r15) + rll %r0,%r14,14 + srl %r14,3 + rll %r2,%r0,11 + xgr %r14,%r0 + rll %r0,%r1,13 + xgr %r14,%r2 # sigma0(X[i+1]) + srl %r1,10 + al %r14,200(%r15) # +=X[i] + xgr %r1,%r0 + rll %r0,%r0,2 + al %r14,172(%r15) # +=X[i+9] + xgr %r1,%r0 # sigma1(X[i+14]) + algr %r14,%r1 # +=sigma1(X[i+14]) + rll %r0,%r7,7 + rll %r1,%r7,21 + lgr %r2,%r8 + xgr %r0,%r1 + rll %r1,%r1,5 + xgr %r2,%r9 + st %r14,200(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r10 # T1+=h + ngr %r2,%r7 + lgr %r1,%r11 + algr %r14,%r0 # T1+=Sigma1(e) + rll %r10,%r11,10 + xgr %r2,%r9 # Ch(e,f,g) + al %r14,104(%r4,%r13) # T1+=K[i] + rll %r0,%r11,19 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r12 + xgr %r10,%r0 + lgr %r2,%r11 + ngr %r1,%r5 + rll %r0,%r0,11 + xgr %r10,%r0 # h=Sigma0(a) + ngr %r2,%r12 + algr %r10,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r6,%r14 # d+=T1 + algr %r10,%r2 # h+=Maj(a,b,c) + llgf %r14,208(%r15) ### 27 + llgf %r1,196(%r15) + rll %r0,%r14,14 + srl %r14,3 + rll %r2,%r0,11 + xgr %r14,%r0 + rll %r0,%r1,13 + xgr %r14,%r2 # sigma0(X[i+1]) + srl %r1,10 + al %r14,204(%r15) # +=X[i] + xgr %r1,%r0 + rll %r0,%r0,2 + al %r14,176(%r15) # +=X[i+9] + xgr %r1,%r0 # sigma1(X[i+14]) + algr %r14,%r1 # +=sigma1(X[i+14]) + rll %r0,%r6,7 + rll %r1,%r6,21 + lgr %r2,%r7 + xgr %r0,%r1 + rll %r1,%r1,5 + xgr %r2,%r8 + st %r14,204(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r9 # T1+=h + ngr %r2,%r6 + lgr %r1,%r10 + algr %r14,%r0 # T1+=Sigma1(e) + rll %r9,%r10,10 + xgr %r2,%r8 # Ch(e,f,g) + al %r14,108(%r4,%r13) # T1+=K[i] + rll %r0,%r10,19 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r11 + xgr %r9,%r0 + lgr %r2,%r10 + ngr %r1,%r12 + rll %r0,%r0,11 + xgr %r9,%r0 # h=Sigma0(a) + ngr %r2,%r11 + algr %r9,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r5,%r14 # d+=T1 + algr %r9,%r2 # h+=Maj(a,b,c) + llgf %r14,212(%r15) ### 28 + llgf %r1,200(%r15) + rll %r0,%r14,14 + srl %r14,3 + rll %r2,%r0,11 + xgr %r14,%r0 + rll %r0,%r1,13 + xgr %r14,%r2 # sigma0(X[i+1]) + srl %r1,10 + al %r14,208(%r15) # +=X[i] + xgr %r1,%r0 + rll %r0,%r0,2 + al %r14,180(%r15) # +=X[i+9] + xgr %r1,%r0 # sigma1(X[i+14]) + algr %r14,%r1 # +=sigma1(X[i+14]) + rll %r0,%r5,7 + rll %r1,%r5,21 + lgr %r2,%r6 + xgr %r0,%r1 + rll %r1,%r1,5 + xgr %r2,%r7 + st %r14,208(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r8 # T1+=h + ngr %r2,%r5 + lgr %r1,%r9 + algr %r14,%r0 # T1+=Sigma1(e) + rll %r8,%r9,10 + xgr %r2,%r7 # Ch(e,f,g) + al %r14,112(%r4,%r13) # T1+=K[i] + rll %r0,%r9,19 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r10 + xgr %r8,%r0 + lgr %r2,%r9 + ngr %r1,%r11 + rll %r0,%r0,11 + xgr %r8,%r0 # h=Sigma0(a) + ngr %r2,%r10 + algr %r8,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r12,%r14 # d+=T1 + algr %r8,%r2 # h+=Maj(a,b,c) + llgf %r14,216(%r15) ### 29 + llgf %r1,204(%r15) + rll %r0,%r14,14 + srl %r14,3 + rll %r2,%r0,11 + xgr %r14,%r0 + rll %r0,%r1,13 + xgr %r14,%r2 # sigma0(X[i+1]) + srl %r1,10 + al %r14,212(%r15) # +=X[i] + xgr %r1,%r0 + rll %r0,%r0,2 + al %r14,184(%r15) # +=X[i+9] + xgr %r1,%r0 # sigma1(X[i+14]) + algr %r14,%r1 # +=sigma1(X[i+14]) + rll %r0,%r12,7 + rll %r1,%r12,21 + lgr %r2,%r5 + xgr %r0,%r1 + rll %r1,%r1,5 + xgr %r2,%r6 + st %r14,212(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r7 # T1+=h + ngr %r2,%r12 + lgr %r1,%r8 + algr %r14,%r0 # T1+=Sigma1(e) + rll %r7,%r8,10 + xgr %r2,%r6 # Ch(e,f,g) + al %r14,116(%r4,%r13) # T1+=K[i] + rll %r0,%r8,19 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r9 + xgr %r7,%r0 + lgr %r2,%r8 + ngr %r1,%r10 + rll %r0,%r0,11 + xgr %r7,%r0 # h=Sigma0(a) + ngr %r2,%r9 + algr %r7,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r11,%r14 # d+=T1 + algr %r7,%r2 # h+=Maj(a,b,c) + llgf %r14,220(%r15) ### 30 + llgf %r1,208(%r15) + rll %r0,%r14,14 + srl %r14,3 + rll %r2,%r0,11 + xgr %r14,%r0 + rll %r0,%r1,13 + xgr %r14,%r2 # sigma0(X[i+1]) + srl %r1,10 + al %r14,216(%r15) # +=X[i] + xgr %r1,%r0 + rll %r0,%r0,2 + al %r14,188(%r15) # +=X[i+9] + xgr %r1,%r0 # sigma1(X[i+14]) + algr %r14,%r1 # +=sigma1(X[i+14]) + rll %r0,%r11,7 + rll %r1,%r11,21 + lgr %r2,%r12 + xgr %r0,%r1 + rll %r1,%r1,5 + xgr %r2,%r5 + st %r14,216(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r6 # T1+=h + ngr %r2,%r11 + lgr %r1,%r7 + algr %r14,%r0 # T1+=Sigma1(e) + rll %r6,%r7,10 + xgr %r2,%r5 # Ch(e,f,g) + al %r14,120(%r4,%r13) # T1+=K[i] + rll %r0,%r7,19 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r8 + xgr %r6,%r0 + lgr %r2,%r7 + ngr %r1,%r9 + rll %r0,%r0,11 + xgr %r6,%r0 # h=Sigma0(a) + ngr %r2,%r8 + algr %r6,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r10,%r14 # d+=T1 + algr %r6,%r2 # h+=Maj(a,b,c) + llgf %r14,160(%r15) ### 31 + llgf %r1,212(%r15) + rll %r0,%r14,14 + srl %r14,3 + rll %r2,%r0,11 + xgr %r14,%r0 + rll %r0,%r1,13 + xgr %r14,%r2 # sigma0(X[i+1]) + srl %r1,10 + al %r14,220(%r15) # +=X[i] + xgr %r1,%r0 + rll %r0,%r0,2 + al %r14,192(%r15) # +=X[i+9] + xgr %r1,%r0 # sigma1(X[i+14]) + algr %r14,%r1 # +=sigma1(X[i+14]) + rll %r0,%r10,7 + rll %r1,%r10,21 + lgr %r2,%r11 + xgr %r0,%r1 + rll %r1,%r1,5 + xgr %r2,%r12 + st %r14,220(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r5 # T1+=h + ngr %r2,%r10 + lgr %r1,%r6 + algr %r14,%r0 # T1+=Sigma1(e) + rll %r5,%r6,10 + xgr %r2,%r12 # Ch(e,f,g) + al %r14,124(%r4,%r13) # T1+=K[i] + rll %r0,%r6,19 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r7 + xgr %r5,%r0 + lgr %r2,%r6 + ngr %r1,%r8 + rll %r0,%r0,11 + xgr %r5,%r0 # h=Sigma0(a) + ngr %r2,%r7 + algr %r5,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r9,%r14 # d+=T1 + algr %r5,%r2 # h+=Maj(a,b,c) + aghi %r4,64 + lghi %r0,192 + clgr %r4,%r0 + jne .Lrounds_16_xx + + lg %r2,240(%r15) + la %r3,64(%r3) + al %r5,0(%r2) + al %r6,4(%r2) + al %r7,8(%r2) + al %r8,12(%r2) + al %r9,16(%r2) + al %r10,20(%r2) + al %r11,24(%r2) + al %r12,28(%r2) + st %r5,0(%r2) + st %r6,4(%r2) + st %r7,8(%r2) + st %r8,12(%r2) + st %r9,16(%r2) + st %r10,20(%r2) + st %r11,24(%r2) + st %r12,28(%r2) + clg %r3,256(%r15) + jne .Lloop + + lmg %r6,%r15,272(%r15) + br %r14 +.size sha256_block_data_order,.-sha256_block_data_order +.string "SHA256 block transform for s390x, CRYPTOGAMS by <appro@openssl.org>" diff --git a/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/sha/sha512-s390x.S b/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/sha/sha512-s390x.S new file mode 100644 index 0000000000..79e4277351 --- /dev/null +++ b/deps/openssl/config/archs/linux64-s390x/asm_avx2/crypto/sha/sha512-s390x.S @@ -0,0 +1,1266 @@ +#include "s390x_arch.h" + +.text +.align 64 +.type K512,@object +K512: + .quad 0x428a2f98d728ae22,0x7137449123ef65cd + .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc + .quad 0x3956c25bf348b538,0x59f111f1b605d019 + .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 + .quad 0xd807aa98a3030242,0x12835b0145706fbe + .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 + .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 + .quad 0x9bdc06a725c71235,0xc19bf174cf692694 + .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 + .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 + .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 + .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 + .quad 0x983e5152ee66dfab,0xa831c66d2db43210 + .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 + .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 + .quad 0x06ca6351e003826f,0x142929670a0e6e70 + .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 + .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df + .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 + .quad 0x81c2c92e47edaee6,0x92722c851482353b + .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 + .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 + .quad 0xd192e819d6ef5218,0xd69906245565a910 + .quad 0xf40e35855771202a,0x106aa07032bbd1b8 + .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 + .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 + .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb + .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 + .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 + .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec + .quad 0x90befffa23631e28,0xa4506cebde82bde9 + .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b + .quad 0xca273eceea26619c,0xd186b8c721c0c207 + .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 + .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 + .quad 0x113f9804bef90dae,0x1b710b35131c471b + .quad 0x28db77f523047d84,0x32caab7b40c72493 + .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c + .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a + .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 +.size K512,.-K512 +.globl sha512_block_data_order +.type sha512_block_data_order,@function +sha512_block_data_order: + sllg %r4,%r4,7 + larl %r1,OPENSSL_s390xcap_P + lg %r0,S390X_KIMD(%r1) # check kimd capabilities + tmhh %r0,4096 + jz .Lsoftware + lghi %r0,3 + lgr %r1,%r2 + lgr %r2,%r3 + lgr %r3,%r4 + .long 0xb93e0002 # kimd %r0,%r2 + brc 1,.-4 # pay attention to "partial completion" + br %r14 +.align 16 +.Lsoftware: + lghi %r1,-288 + la %r4,0(%r4,%r3) + stmg %r2,%r15,16(%r15) + lgr %r0,%r15 + la %r15,0(%r1,%r15) + stg %r0,0(%r15) + + larl %r13,K512 + lg %r5,0(%r2) + lg %r6,8(%r2) + lg %r7,16(%r2) + lg %r8,24(%r2) + lg %r9,32(%r2) + lg %r10,40(%r2) + lg %r11,48(%r2) + lg %r12,56(%r2) + +.Lloop: + lghi %r4,0 + lg %r14,0(%r3) ### 0 + rllg %r0,%r9,23 + rllg %r1,%r9,46 + lgr %r2,%r10 + xgr %r0,%r1 + rllg %r1,%r1,4 + xgr %r2,%r11 + stg %r14,160(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r12 # T1+=h + ngr %r2,%r9 + lgr %r1,%r5 + algr %r14,%r0 # T1+=Sigma1(e) + rllg %r12,%r5,25 + xgr %r2,%r11 # Ch(e,f,g) + alg %r14,0(%r4,%r13) # T1+=K[i] + rllg %r0,%r5,30 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r6 + xgr %r12,%r0 + lgr %r2,%r5 + ngr %r1,%r7 + rllg %r0,%r0,6 + xgr %r12,%r0 # h=Sigma0(a) + ngr %r2,%r6 + algr %r12,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r8,%r14 # d+=T1 + algr %r12,%r2 # h+=Maj(a,b,c) + lg %r14,8(%r3) ### 1 + rllg %r0,%r8,23 + rllg %r1,%r8,46 + lgr %r2,%r9 + xgr %r0,%r1 + rllg %r1,%r1,4 + xgr %r2,%r10 + stg %r14,168(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r11 # T1+=h + ngr %r2,%r8 + lgr %r1,%r12 + algr %r14,%r0 # T1+=Sigma1(e) + rllg %r11,%r12,25 + xgr %r2,%r10 # Ch(e,f,g) + alg %r14,8(%r4,%r13) # T1+=K[i] + rllg %r0,%r12,30 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r5 + xgr %r11,%r0 + lgr %r2,%r12 + ngr %r1,%r6 + rllg %r0,%r0,6 + xgr %r11,%r0 # h=Sigma0(a) + ngr %r2,%r5 + algr %r11,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r7,%r14 # d+=T1 + algr %r11,%r2 # h+=Maj(a,b,c) + lg %r14,16(%r3) ### 2 + rllg %r0,%r7,23 + rllg %r1,%r7,46 + lgr %r2,%r8 + xgr %r0,%r1 + rllg %r1,%r1,4 + xgr %r2,%r9 + stg %r14,176(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r10 # T1+=h + ngr %r2,%r7 + lgr %r1,%r11 + algr %r14,%r0 # T1+=Sigma1(e) + rllg %r10,%r11,25 + xgr %r2,%r9 # Ch(e,f,g) + alg %r14,16(%r4,%r13) # T1+=K[i] + rllg %r0,%r11,30 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r12 + xgr %r10,%r0 + lgr %r2,%r11 + ngr %r1,%r5 + rllg %r0,%r0,6 + xgr %r10,%r0 # h=Sigma0(a) + ngr %r2,%r12 + algr %r10,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r6,%r14 # d+=T1 + algr %r10,%r2 # h+=Maj(a,b,c) + lg %r14,24(%r3) ### 3 + rllg %r0,%r6,23 + rllg %r1,%r6,46 + lgr %r2,%r7 + xgr %r0,%r1 + rllg %r1,%r1,4 + xgr %r2,%r8 + stg %r14,184(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r9 # T1+=h + ngr %r2,%r6 + lgr %r1,%r10 + algr %r14,%r0 # T1+=Sigma1(e) + rllg %r9,%r10,25 + xgr %r2,%r8 # Ch(e,f,g) + alg %r14,24(%r4,%r13) # T1+=K[i] + rllg %r0,%r10,30 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r11 + xgr %r9,%r0 + lgr %r2,%r10 + ngr %r1,%r12 + rllg %r0,%r0,6 + xgr %r9,%r0 # h=Sigma0(a) + ngr %r2,%r11 + algr %r9,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r5,%r14 # d+=T1 + algr %r9,%r2 # h+=Maj(a,b,c) + lg %r14,32(%r3) ### 4 + rllg %r0,%r5,23 + rllg %r1,%r5,46 + lgr %r2,%r6 + xgr %r0,%r1 + rllg %r1,%r1,4 + xgr %r2,%r7 + stg %r14,192(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r8 # T1+=h + ngr %r2,%r5 + lgr %r1,%r9 + algr %r14,%r0 # T1+=Sigma1(e) + rllg %r8,%r9,25 + xgr %r2,%r7 # Ch(e,f,g) + alg %r14,32(%r4,%r13) # T1+=K[i] + rllg %r0,%r9,30 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r10 + xgr %r8,%r0 + lgr %r2,%r9 + ngr %r1,%r11 + rllg %r0,%r0,6 + xgr %r8,%r0 # h=Sigma0(a) + ngr %r2,%r10 + algr %r8,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r12,%r14 # d+=T1 + algr %r8,%r2 # h+=Maj(a,b,c) + lg %r14,40(%r3) ### 5 + rllg %r0,%r12,23 + rllg %r1,%r12,46 + lgr %r2,%r5 + xgr %r0,%r1 + rllg %r1,%r1,4 + xgr %r2,%r6 + stg %r14,200(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r7 # T1+=h + ngr %r2,%r12 + lgr %r1,%r8 + algr %r14,%r0 # T1+=Sigma1(e) + rllg %r7,%r8,25 + xgr %r2,%r6 # Ch(e,f,g) + alg %r14,40(%r4,%r13) # T1+=K[i] + rllg %r0,%r8,30 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r9 + xgr %r7,%r0 + lgr %r2,%r8 + ngr %r1,%r10 + rllg %r0,%r0,6 + xgr %r7,%r0 # h=Sigma0(a) + ngr %r2,%r9 + algr %r7,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r11,%r14 # d+=T1 + algr %r7,%r2 # h+=Maj(a,b,c) + lg %r14,48(%r3) ### 6 + rllg %r0,%r11,23 + rllg %r1,%r11,46 + lgr %r2,%r12 + xgr %r0,%r1 + rllg %r1,%r1,4 + xgr %r2,%r5 + stg %r14,208(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r6 # T1+=h + ngr %r2,%r11 + lgr %r1,%r7 + algr %r14,%r0 # T1+=Sigma1(e) + rllg %r6,%r7,25 + xgr %r2,%r5 # Ch(e,f,g) + alg %r14,48(%r4,%r13) # T1+=K[i] + rllg %r0,%r7,30 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r8 + xgr %r6,%r0 + lgr %r2,%r7 + ngr %r1,%r9 + rllg %r0,%r0,6 + xgr %r6,%r0 # h=Sigma0(a) + ngr %r2,%r8 + algr %r6,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r10,%r14 # d+=T1 + algr %r6,%r2 # h+=Maj(a,b,c) + lg %r14,56(%r3) ### 7 + rllg %r0,%r10,23 + rllg %r1,%r10,46 + lgr %r2,%r11 + xgr %r0,%r1 + rllg %r1,%r1,4 + xgr %r2,%r12 + stg %r14,216(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r5 # T1+=h + ngr %r2,%r10 + lgr %r1,%r6 + algr %r14,%r0 # T1+=Sigma1(e) + rllg %r5,%r6,25 + xgr %r2,%r12 # Ch(e,f,g) + alg %r14,56(%r4,%r13) # T1+=K[i] + rllg %r0,%r6,30 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r7 + xgr %r5,%r0 + lgr %r2,%r6 + ngr %r1,%r8 + rllg %r0,%r0,6 + xgr %r5,%r0 # h=Sigma0(a) + ngr %r2,%r7 + algr %r5,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r9,%r14 # d+=T1 + algr %r5,%r2 # h+=Maj(a,b,c) + lg %r14,64(%r3) ### 8 + rllg %r0,%r9,23 + rllg %r1,%r9,46 + lgr %r2,%r10 + xgr %r0,%r1 + rllg %r1,%r1,4 + xgr %r2,%r11 + stg %r14,224(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r12 # T1+=h + ngr %r2,%r9 + lgr %r1,%r5 + algr %r14,%r0 # T1+=Sigma1(e) + rllg %r12,%r5,25 + xgr %r2,%r11 # Ch(e,f,g) + alg %r14,64(%r4,%r13) # T1+=K[i] + rllg %r0,%r5,30 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r6 + xgr %r12,%r0 + lgr %r2,%r5 + ngr %r1,%r7 + rllg %r0,%r0,6 + xgr %r12,%r0 # h=Sigma0(a) + ngr %r2,%r6 + algr %r12,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r8,%r14 # d+=T1 + algr %r12,%r2 # h+=Maj(a,b,c) + lg %r14,72(%r3) ### 9 + rllg %r0,%r8,23 + rllg %r1,%r8,46 + lgr %r2,%r9 + xgr %r0,%r1 + rllg %r1,%r1,4 + xgr %r2,%r10 + stg %r14,232(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r11 # T1+=h + ngr %r2,%r8 + lgr %r1,%r12 + algr %r14,%r0 # T1+=Sigma1(e) + rllg %r11,%r12,25 + xgr %r2,%r10 # Ch(e,f,g) + alg %r14,72(%r4,%r13) # T1+=K[i] + rllg %r0,%r12,30 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r5 + xgr %r11,%r0 + lgr %r2,%r12 + ngr %r1,%r6 + rllg %r0,%r0,6 + xgr %r11,%r0 # h=Sigma0(a) + ngr %r2,%r5 + algr %r11,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r7,%r14 # d+=T1 + algr %r11,%r2 # h+=Maj(a,b,c) + lg %r14,80(%r3) ### 10 + rllg %r0,%r7,23 + rllg %r1,%r7,46 + lgr %r2,%r8 + xgr %r0,%r1 + rllg %r1,%r1,4 + xgr %r2,%r9 + stg %r14,240(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r10 # T1+=h + ngr %r2,%r7 + lgr %r1,%r11 + algr %r14,%r0 # T1+=Sigma1(e) + rllg %r10,%r11,25 + xgr %r2,%r9 # Ch(e,f,g) + alg %r14,80(%r4,%r13) # T1+=K[i] + rllg %r0,%r11,30 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r12 + xgr %r10,%r0 + lgr %r2,%r11 + ngr %r1,%r5 + rllg %r0,%r0,6 + xgr %r10,%r0 # h=Sigma0(a) + ngr %r2,%r12 + algr %r10,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r6,%r14 # d+=T1 + algr %r10,%r2 # h+=Maj(a,b,c) + lg %r14,88(%r3) ### 11 + rllg %r0,%r6,23 + rllg %r1,%r6,46 + lgr %r2,%r7 + xgr %r0,%r1 + rllg %r1,%r1,4 + xgr %r2,%r8 + stg %r14,248(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r9 # T1+=h + ngr %r2,%r6 + lgr %r1,%r10 + algr %r14,%r0 # T1+=Sigma1(e) + rllg %r9,%r10,25 + xgr %r2,%r8 # Ch(e,f,g) + alg %r14,88(%r4,%r13) # T1+=K[i] + rllg %r0,%r10,30 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r11 + xgr %r9,%r0 + lgr %r2,%r10 + ngr %r1,%r12 + rllg %r0,%r0,6 + xgr %r9,%r0 # h=Sigma0(a) + ngr %r2,%r11 + algr %r9,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r5,%r14 # d+=T1 + algr %r9,%r2 # h+=Maj(a,b,c) + lg %r14,96(%r3) ### 12 + rllg %r0,%r5,23 + rllg %r1,%r5,46 + lgr %r2,%r6 + xgr %r0,%r1 + rllg %r1,%r1,4 + xgr %r2,%r7 + stg %r14,256(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r8 # T1+=h + ngr %r2,%r5 + lgr %r1,%r9 + algr %r14,%r0 # T1+=Sigma1(e) + rllg %r8,%r9,25 + xgr %r2,%r7 # Ch(e,f,g) + alg %r14,96(%r4,%r13) # T1+=K[i] + rllg %r0,%r9,30 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r10 + xgr %r8,%r0 + lgr %r2,%r9 + ngr %r1,%r11 + rllg %r0,%r0,6 + xgr %r8,%r0 # h=Sigma0(a) + ngr %r2,%r10 + algr %r8,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r12,%r14 # d+=T1 + algr %r8,%r2 # h+=Maj(a,b,c) + lg %r14,104(%r3) ### 13 + rllg %r0,%r12,23 + rllg %r1,%r12,46 + lgr %r2,%r5 + xgr %r0,%r1 + rllg %r1,%r1,4 + xgr %r2,%r6 + stg %r14,264(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r7 # T1+=h + ngr %r2,%r12 + lgr %r1,%r8 + algr %r14,%r0 # T1+=Sigma1(e) + rllg %r7,%r8,25 + xgr %r2,%r6 # Ch(e,f,g) + alg %r14,104(%r4,%r13) # T1+=K[i] + rllg %r0,%r8,30 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r9 + xgr %r7,%r0 + lgr %r2,%r8 + ngr %r1,%r10 + rllg %r0,%r0,6 + xgr %r7,%r0 # h=Sigma0(a) + ngr %r2,%r9 + algr %r7,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r11,%r14 # d+=T1 + algr %r7,%r2 # h+=Maj(a,b,c) + lg %r14,112(%r3) ### 14 + rllg %r0,%r11,23 + rllg %r1,%r11,46 + lgr %r2,%r12 + xgr %r0,%r1 + rllg %r1,%r1,4 + xgr %r2,%r5 + stg %r14,272(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r6 # T1+=h + ngr %r2,%r11 + lgr %r1,%r7 + algr %r14,%r0 # T1+=Sigma1(e) + rllg %r6,%r7,25 + xgr %r2,%r5 # Ch(e,f,g) + alg %r14,112(%r4,%r13) # T1+=K[i] + rllg %r0,%r7,30 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r8 + xgr %r6,%r0 + lgr %r2,%r7 + ngr %r1,%r9 + rllg %r0,%r0,6 + xgr %r6,%r0 # h=Sigma0(a) + ngr %r2,%r8 + algr %r6,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r10,%r14 # d+=T1 + algr %r6,%r2 # h+=Maj(a,b,c) + lg %r14,120(%r3) ### 15 + rllg %r0,%r10,23 + rllg %r1,%r10,46 + lgr %r2,%r11 + xgr %r0,%r1 + rllg %r1,%r1,4 + xgr %r2,%r12 + stg %r14,280(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r5 # T1+=h + ngr %r2,%r10 + lgr %r1,%r6 + algr %r14,%r0 # T1+=Sigma1(e) + rllg %r5,%r6,25 + xgr %r2,%r12 # Ch(e,f,g) + alg %r14,120(%r4,%r13) # T1+=K[i] + rllg %r0,%r6,30 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r7 + xgr %r5,%r0 + lgr %r2,%r6 + ngr %r1,%r8 + rllg %r0,%r0,6 + xgr %r5,%r0 # h=Sigma0(a) + ngr %r2,%r7 + algr %r5,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r9,%r14 # d+=T1 + algr %r5,%r2 # h+=Maj(a,b,c) +.Lrounds_16_xx: + lg %r14,168(%r15) ### 16 + lg %r1,272(%r15) + rllg %r0,%r14,56 + srlg %r14,%r14,7 + rllg %r2,%r0,7 + xgr %r14,%r0 + rllg %r0,%r1,3 + xgr %r14,%r2 # sigma0(X[i+1]) + srlg %r1,%r1,6 + alg %r14,160(%r15) # +=X[i] + xgr %r1,%r0 + rllg %r0,%r0,42 + alg %r14,232(%r15) # +=X[i+9] + xgr %r1,%r0 # sigma1(X[i+14]) + algr %r14,%r1 # +=sigma1(X[i+14]) + rllg %r0,%r9,23 + rllg %r1,%r9,46 + lgr %r2,%r10 + xgr %r0,%r1 + rllg %r1,%r1,4 + xgr %r2,%r11 + stg %r14,160(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r12 # T1+=h + ngr %r2,%r9 + lgr %r1,%r5 + algr %r14,%r0 # T1+=Sigma1(e) + rllg %r12,%r5,25 + xgr %r2,%r11 # Ch(e,f,g) + alg %r14,128(%r4,%r13) # T1+=K[i] + rllg %r0,%r5,30 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r6 + xgr %r12,%r0 + lgr %r2,%r5 + ngr %r1,%r7 + rllg %r0,%r0,6 + xgr %r12,%r0 # h=Sigma0(a) + ngr %r2,%r6 + algr %r12,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r8,%r14 # d+=T1 + algr %r12,%r2 # h+=Maj(a,b,c) + lg %r14,176(%r15) ### 17 + lg %r1,280(%r15) + rllg %r0,%r14,56 + srlg %r14,%r14,7 + rllg %r2,%r0,7 + xgr %r14,%r0 + rllg %r0,%r1,3 + xgr %r14,%r2 # sigma0(X[i+1]) + srlg %r1,%r1,6 + alg %r14,168(%r15) # +=X[i] + xgr %r1,%r0 + rllg %r0,%r0,42 + alg %r14,240(%r15) # +=X[i+9] + xgr %r1,%r0 # sigma1(X[i+14]) + algr %r14,%r1 # +=sigma1(X[i+14]) + rllg %r0,%r8,23 + rllg %r1,%r8,46 + lgr %r2,%r9 + xgr %r0,%r1 + rllg %r1,%r1,4 + xgr %r2,%r10 + stg %r14,168(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r11 # T1+=h + ngr %r2,%r8 + lgr %r1,%r12 + algr %r14,%r0 # T1+=Sigma1(e) + rllg %r11,%r12,25 + xgr %r2,%r10 # Ch(e,f,g) + alg %r14,136(%r4,%r13) # T1+=K[i] + rllg %r0,%r12,30 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r5 + xgr %r11,%r0 + lgr %r2,%r12 + ngr %r1,%r6 + rllg %r0,%r0,6 + xgr %r11,%r0 # h=Sigma0(a) + ngr %r2,%r5 + algr %r11,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r7,%r14 # d+=T1 + algr %r11,%r2 # h+=Maj(a,b,c) + lg %r14,184(%r15) ### 18 + lg %r1,160(%r15) + rllg %r0,%r14,56 + srlg %r14,%r14,7 + rllg %r2,%r0,7 + xgr %r14,%r0 + rllg %r0,%r1,3 + xgr %r14,%r2 # sigma0(X[i+1]) + srlg %r1,%r1,6 + alg %r14,176(%r15) # +=X[i] + xgr %r1,%r0 + rllg %r0,%r0,42 + alg %r14,248(%r15) # +=X[i+9] + xgr %r1,%r0 # sigma1(X[i+14]) + algr %r14,%r1 # +=sigma1(X[i+14]) + rllg %r0,%r7,23 + rllg %r1,%r7,46 + lgr %r2,%r8 + xgr %r0,%r1 + rllg %r1,%r1,4 + xgr %r2,%r9 + stg %r14,176(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r10 # T1+=h + ngr %r2,%r7 + lgr %r1,%r11 + algr %r14,%r0 # T1+=Sigma1(e) + rllg %r10,%r11,25 + xgr %r2,%r9 # Ch(e,f,g) + alg %r14,144(%r4,%r13) # T1+=K[i] + rllg %r0,%r11,30 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r12 + xgr %r10,%r0 + lgr %r2,%r11 + ngr %r1,%r5 + rllg %r0,%r0,6 + xgr %r10,%r0 # h=Sigma0(a) + ngr %r2,%r12 + algr %r10,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r6,%r14 # d+=T1 + algr %r10,%r2 # h+=Maj(a,b,c) + lg %r14,192(%r15) ### 19 + lg %r1,168(%r15) + rllg %r0,%r14,56 + srlg %r14,%r14,7 + rllg %r2,%r0,7 + xgr %r14,%r0 + rllg %r0,%r1,3 + xgr %r14,%r2 # sigma0(X[i+1]) + srlg %r1,%r1,6 + alg %r14,184(%r15) # +=X[i] + xgr %r1,%r0 + rllg %r0,%r0,42 + alg %r14,256(%r15) # +=X[i+9] + xgr %r1,%r0 # sigma1(X[i+14]) + algr %r14,%r1 # +=sigma1(X[i+14]) + rllg %r0,%r6,23 + rllg %r1,%r6,46 + lgr %r2,%r7 + xgr %r0,%r1 + rllg %r1,%r1,4 + xgr %r2,%r8 + stg %r14,184(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r9 # T1+=h + ngr %r2,%r6 + lgr %r1,%r10 + algr %r14,%r0 # T1+=Sigma1(e) + rllg %r9,%r10,25 + xgr %r2,%r8 # Ch(e,f,g) + alg %r14,152(%r4,%r13) # T1+=K[i] + rllg %r0,%r10,30 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r11 + xgr %r9,%r0 + lgr %r2,%r10 + ngr %r1,%r12 + rllg %r0,%r0,6 + xgr %r9,%r0 # h=Sigma0(a) + ngr %r2,%r11 + algr %r9,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r5,%r14 # d+=T1 + algr %r9,%r2 # h+=Maj(a,b,c) + lg %r14,200(%r15) ### 20 + lg %r1,176(%r15) + rllg %r0,%r14,56 + srlg %r14,%r14,7 + rllg %r2,%r0,7 + xgr %r14,%r0 + rllg %r0,%r1,3 + xgr %r14,%r2 # sigma0(X[i+1]) + srlg %r1,%r1,6 + alg %r14,192(%r15) # +=X[i] + xgr %r1,%r0 + rllg %r0,%r0,42 + alg %r14,264(%r15) # +=X[i+9] + xgr %r1,%r0 # sigma1(X[i+14]) + algr %r14,%r1 # +=sigma1(X[i+14]) + rllg %r0,%r5,23 + rllg %r1,%r5,46 + lgr %r2,%r6 + xgr %r0,%r1 + rllg %r1,%r1,4 + xgr %r2,%r7 + stg %r14,192(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r8 # T1+=h + ngr %r2,%r5 + lgr %r1,%r9 + algr %r14,%r0 # T1+=Sigma1(e) + rllg %r8,%r9,25 + xgr %r2,%r7 # Ch(e,f,g) + alg %r14,160(%r4,%r13) # T1+=K[i] + rllg %r0,%r9,30 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r10 + xgr %r8,%r0 + lgr %r2,%r9 + ngr %r1,%r11 + rllg %r0,%r0,6 + xgr %r8,%r0 # h=Sigma0(a) + ngr %r2,%r10 + algr %r8,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r12,%r14 # d+=T1 + algr %r8,%r2 # h+=Maj(a,b,c) + lg %r14,208(%r15) ### 21 + lg %r1,184(%r15) + rllg %r0,%r14,56 + srlg %r14,%r14,7 + rllg %r2,%r0,7 + xgr %r14,%r0 + rllg %r0,%r1,3 + xgr %r14,%r2 # sigma0(X[i+1]) + srlg %r1,%r1,6 + alg %r14,200(%r15) # +=X[i] + xgr %r1,%r0 + rllg %r0,%r0,42 + alg %r14,272(%r15) # +=X[i+9] + xgr %r1,%r0 # sigma1(X[i+14]) + algr %r14,%r1 # +=sigma1(X[i+14]) + rllg %r0,%r12,23 + rllg %r1,%r12,46 + lgr %r2,%r5 + xgr %r0,%r1 + rllg %r1,%r1,4 + xgr %r2,%r6 + stg %r14,200(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r7 # T1+=h + ngr %r2,%r12 + lgr %r1,%r8 + algr %r14,%r0 # T1+=Sigma1(e) + rllg %r7,%r8,25 + xgr %r2,%r6 # Ch(e,f,g) + alg %r14,168(%r4,%r13) # T1+=K[i] + rllg %r0,%r8,30 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r9 + xgr %r7,%r0 + lgr %r2,%r8 + ngr %r1,%r10 + rllg %r0,%r0,6 + xgr %r7,%r0 # h=Sigma0(a) + ngr %r2,%r9 + algr %r7,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r11,%r14 # d+=T1 + algr %r7,%r2 # h+=Maj(a,b,c) + lg %r14,216(%r15) ### 22 + lg %r1,192(%r15) + rllg %r0,%r14,56 + srlg %r14,%r14,7 + rllg %r2,%r0,7 + xgr %r14,%r0 + rllg %r0,%r1,3 + xgr %r14,%r2 # sigma0(X[i+1]) + srlg %r1,%r1,6 + alg %r14,208(%r15) # +=X[i] + xgr %r1,%r0 + rllg %r0,%r0,42 + alg %r14,280(%r15) # +=X[i+9] + xgr %r1,%r0 # sigma1(X[i+14]) + algr %r14,%r1 # +=sigma1(X[i+14]) + rllg %r0,%r11,23 + rllg %r1,%r11,46 + lgr %r2,%r12 + xgr %r0,%r1 + rllg %r1,%r1,4 + xgr %r2,%r5 + stg %r14,208(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r6 # T1+=h + ngr %r2,%r11 + lgr %r1,%r7 + algr %r14,%r0 # T1+=Sigma1(e) + rllg %r6,%r7,25 + xgr %r2,%r5 # Ch(e,f,g) + alg %r14,176(%r4,%r13) # T1+=K[i] + rllg %r0,%r7,30 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r8 + xgr %r6,%r0 + lgr %r2,%r7 + ngr %r1,%r9 + rllg %r0,%r0,6 + xgr %r6,%r0 # h=Sigma0(a) + ngr %r2,%r8 + algr %r6,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r10,%r14 # d+=T1 + algr %r6,%r2 # h+=Maj(a,b,c) + lg %r14,224(%r15) ### 23 + lg %r1,200(%r15) + rllg %r0,%r14,56 + srlg %r14,%r14,7 + rllg %r2,%r0,7 + xgr %r14,%r0 + rllg %r0,%r1,3 + xgr %r14,%r2 # sigma0(X[i+1]) + srlg %r1,%r1,6 + alg %r14,216(%r15) # +=X[i] + xgr %r1,%r0 + rllg %r0,%r0,42 + alg %r14,160(%r15) # +=X[i+9] + xgr %r1,%r0 # sigma1(X[i+14]) + algr %r14,%r1 # +=sigma1(X[i+14]) + rllg %r0,%r10,23 + rllg %r1,%r10,46 + lgr %r2,%r11 + xgr %r0,%r1 + rllg %r1,%r1,4 + xgr %r2,%r12 + stg %r14,216(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r5 # T1+=h + ngr %r2,%r10 + lgr %r1,%r6 + algr %r14,%r0 # T1+=Sigma1(e) + rllg %r5,%r6,25 + xgr %r2,%r12 # Ch(e,f,g) + alg %r14,184(%r4,%r13) # T1+=K[i] + rllg %r0,%r6,30 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r7 + xgr %r5,%r0 + lgr %r2,%r6 + ngr %r1,%r8 + rllg %r0,%r0,6 + xgr %r5,%r0 # h=Sigma0(a) + ngr %r2,%r7 + algr %r5,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r9,%r14 # d+=T1 + algr %r5,%r2 # h+=Maj(a,b,c) + lg %r14,232(%r15) ### 24 + lg %r1,208(%r15) + rllg %r0,%r14,56 + srlg %r14,%r14,7 + rllg %r2,%r0,7 + xgr %r14,%r0 + rllg %r0,%r1,3 + xgr %r14,%r2 # sigma0(X[i+1]) + srlg %r1,%r1,6 + alg %r14,224(%r15) # +=X[i] + xgr %r1,%r0 + rllg %r0,%r0,42 + alg %r14,168(%r15) # +=X[i+9] + xgr %r1,%r0 # sigma1(X[i+14]) + algr %r14,%r1 # +=sigma1(X[i+14]) + rllg %r0,%r9,23 + rllg %r1,%r9,46 + lgr %r2,%r10 + xgr %r0,%r1 + rllg %r1,%r1,4 + xgr %r2,%r11 + stg %r14,224(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r12 # T1+=h + ngr %r2,%r9 + lgr %r1,%r5 + algr %r14,%r0 # T1+=Sigma1(e) + rllg %r12,%r5,25 + xgr %r2,%r11 # Ch(e,f,g) + alg %r14,192(%r4,%r13) # T1+=K[i] + rllg %r0,%r5,30 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r6 + xgr %r12,%r0 + lgr %r2,%r5 + ngr %r1,%r7 + rllg %r0,%r0,6 + xgr %r12,%r0 # h=Sigma0(a) + ngr %r2,%r6 + algr %r12,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r8,%r14 # d+=T1 + algr %r12,%r2 # h+=Maj(a,b,c) + lg %r14,240(%r15) ### 25 + lg %r1,216(%r15) + rllg %r0,%r14,56 + srlg %r14,%r14,7 + rllg %r2,%r0,7 + xgr %r14,%r0 + rllg %r0,%r1,3 + xgr %r14,%r2 # sigma0(X[i+1]) + srlg %r1,%r1,6 + alg %r14,232(%r15) # +=X[i] + xgr %r1,%r0 + rllg %r0,%r0,42 + alg %r14,176(%r15) # +=X[i+9] + xgr %r1,%r0 # sigma1(X[i+14]) + algr %r14,%r1 # +=sigma1(X[i+14]) + rllg %r0,%r8,23 + rllg %r1,%r8,46 + lgr %r2,%r9 + xgr %r0,%r1 + rllg %r1,%r1,4 + xgr %r2,%r10 + stg %r14,232(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r11 # T1+=h + ngr %r2,%r8 + lgr %r1,%r12 + algr %r14,%r0 # T1+=Sigma1(e) + rllg %r11,%r12,25 + xgr %r2,%r10 # Ch(e,f,g) + alg %r14,200(%r4,%r13) # T1+=K[i] + rllg %r0,%r12,30 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r5 + xgr %r11,%r0 + lgr %r2,%r12 + ngr %r1,%r6 + rllg %r0,%r0,6 + xgr %r11,%r0 # h=Sigma0(a) + ngr %r2,%r5 + algr %r11,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r7,%r14 # d+=T1 + algr %r11,%r2 # h+=Maj(a,b,c) + lg %r14,248(%r15) ### 26 + lg %r1,224(%r15) + rllg %r0,%r14,56 + srlg %r14,%r14,7 + rllg %r2,%r0,7 + xgr %r14,%r0 + rllg %r0,%r1,3 + xgr %r14,%r2 # sigma0(X[i+1]) + srlg %r1,%r1,6 + alg %r14,240(%r15) # +=X[i] + xgr %r1,%r0 + rllg %r0,%r0,42 + alg %r14,184(%r15) # +=X[i+9] + xgr %r1,%r0 # sigma1(X[i+14]) + algr %r14,%r1 # +=sigma1(X[i+14]) + rllg %r0,%r7,23 + rllg %r1,%r7,46 + lgr %r2,%r8 + xgr %r0,%r1 + rllg %r1,%r1,4 + xgr %r2,%r9 + stg %r14,240(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r10 # T1+=h + ngr %r2,%r7 + lgr %r1,%r11 + algr %r14,%r0 # T1+=Sigma1(e) + rllg %r10,%r11,25 + xgr %r2,%r9 # Ch(e,f,g) + alg %r14,208(%r4,%r13) # T1+=K[i] + rllg %r0,%r11,30 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r12 + xgr %r10,%r0 + lgr %r2,%r11 + ngr %r1,%r5 + rllg %r0,%r0,6 + xgr %r10,%r0 # h=Sigma0(a) + ngr %r2,%r12 + algr %r10,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r6,%r14 # d+=T1 + algr %r10,%r2 # h+=Maj(a,b,c) + lg %r14,256(%r15) ### 27 + lg %r1,232(%r15) + rllg %r0,%r14,56 + srlg %r14,%r14,7 + rllg %r2,%r0,7 + xgr %r14,%r0 + rllg %r0,%r1,3 + xgr %r14,%r2 # sigma0(X[i+1]) + srlg %r1,%r1,6 + alg %r14,248(%r15) # +=X[i] + xgr %r1,%r0 + rllg %r0,%r0,42 + alg %r14,192(%r15) # +=X[i+9] + xgr %r1,%r0 # sigma1(X[i+14]) + algr %r14,%r1 # +=sigma1(X[i+14]) + rllg %r0,%r6,23 + rllg %r1,%r6,46 + lgr %r2,%r7 + xgr %r0,%r1 + rllg %r1,%r1,4 + xgr %r2,%r8 + stg %r14,248(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r9 # T1+=h + ngr %r2,%r6 + lgr %r1,%r10 + algr %r14,%r0 # T1+=Sigma1(e) + rllg %r9,%r10,25 + xgr %r2,%r8 # Ch(e,f,g) + alg %r14,216(%r4,%r13) # T1+=K[i] + rllg %r0,%r10,30 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r11 + xgr %r9,%r0 + lgr %r2,%r10 + ngr %r1,%r12 + rllg %r0,%r0,6 + xgr %r9,%r0 # h=Sigma0(a) + ngr %r2,%r11 + algr %r9,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r5,%r14 # d+=T1 + algr %r9,%r2 # h+=Maj(a,b,c) + lg %r14,264(%r15) ### 28 + lg %r1,240(%r15) + rllg %r0,%r14,56 + srlg %r14,%r14,7 + rllg %r2,%r0,7 + xgr %r14,%r0 + rllg %r0,%r1,3 + xgr %r14,%r2 # sigma0(X[i+1]) + srlg %r1,%r1,6 + alg %r14,256(%r15) # +=X[i] + xgr %r1,%r0 + rllg %r0,%r0,42 + alg %r14,200(%r15) # +=X[i+9] + xgr %r1,%r0 # sigma1(X[i+14]) + algr %r14,%r1 # +=sigma1(X[i+14]) + rllg %r0,%r5,23 + rllg %r1,%r5,46 + lgr %r2,%r6 + xgr %r0,%r1 + rllg %r1,%r1,4 + xgr %r2,%r7 + stg %r14,256(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r8 # T1+=h + ngr %r2,%r5 + lgr %r1,%r9 + algr %r14,%r0 # T1+=Sigma1(e) + rllg %r8,%r9,25 + xgr %r2,%r7 # Ch(e,f,g) + alg %r14,224(%r4,%r13) # T1+=K[i] + rllg %r0,%r9,30 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r10 + xgr %r8,%r0 + lgr %r2,%r9 + ngr %r1,%r11 + rllg %r0,%r0,6 + xgr %r8,%r0 # h=Sigma0(a) + ngr %r2,%r10 + algr %r8,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r12,%r14 # d+=T1 + algr %r8,%r2 # h+=Maj(a,b,c) + lg %r14,272(%r15) ### 29 + lg %r1,248(%r15) + rllg %r0,%r14,56 + srlg %r14,%r14,7 + rllg %r2,%r0,7 + xgr %r14,%r0 + rllg %r0,%r1,3 + xgr %r14,%r2 # sigma0(X[i+1]) + srlg %r1,%r1,6 + alg %r14,264(%r15) # +=X[i] + xgr %r1,%r0 + rllg %r0,%r0,42 + alg %r14,208(%r15) # +=X[i+9] + xgr %r1,%r0 # sigma1(X[i+14]) + algr %r14,%r1 # +=sigma1(X[i+14]) + rllg %r0,%r12,23 + rllg %r1,%r12,46 + lgr %r2,%r5 + xgr %r0,%r1 + rllg %r1,%r1,4 + xgr %r2,%r6 + stg %r14,264(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r7 # T1+=h + ngr %r2,%r12 + lgr %r1,%r8 + algr %r14,%r0 # T1+=Sigma1(e) + rllg %r7,%r8,25 + xgr %r2,%r6 # Ch(e,f,g) + alg %r14,232(%r4,%r13) # T1+=K[i] + rllg %r0,%r8,30 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r9 + xgr %r7,%r0 + lgr %r2,%r8 + ngr %r1,%r10 + rllg %r0,%r0,6 + xgr %r7,%r0 # h=Sigma0(a) + ngr %r2,%r9 + algr %r7,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r11,%r14 # d+=T1 + algr %r7,%r2 # h+=Maj(a,b,c) + lg %r14,280(%r15) ### 30 + lg %r1,256(%r15) + rllg %r0,%r14,56 + srlg %r14,%r14,7 + rllg %r2,%r0,7 + xgr %r14,%r0 + rllg %r0,%r1,3 + xgr %r14,%r2 # sigma0(X[i+1]) + srlg %r1,%r1,6 + alg %r14,272(%r15) # +=X[i] + xgr %r1,%r0 + rllg %r0,%r0,42 + alg %r14,216(%r15) # +=X[i+9] + xgr %r1,%r0 # sigma1(X[i+14]) + algr %r14,%r1 # +=sigma1(X[i+14]) + rllg %r0,%r11,23 + rllg %r1,%r11,46 + lgr %r2,%r12 + xgr %r0,%r1 + rllg %r1,%r1,4 + xgr %r2,%r5 + stg %r14,272(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r6 # T1+=h + ngr %r2,%r11 + lgr %r1,%r7 + algr %r14,%r0 # T1+=Sigma1(e) + rllg %r6,%r7,25 + xgr %r2,%r5 # Ch(e,f,g) + alg %r14,240(%r4,%r13) # T1+=K[i] + rllg %r0,%r7,30 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r8 + xgr %r6,%r0 + lgr %r2,%r7 + ngr %r1,%r9 + rllg %r0,%r0,6 + xgr %r6,%r0 # h=Sigma0(a) + ngr %r2,%r8 + algr %r6,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r10,%r14 # d+=T1 + algr %r6,%r2 # h+=Maj(a,b,c) + lg %r14,160(%r15) ### 31 + lg %r1,264(%r15) + rllg %r0,%r14,56 + srlg %r14,%r14,7 + rllg %r2,%r0,7 + xgr %r14,%r0 + rllg %r0,%r1,3 + xgr %r14,%r2 # sigma0(X[i+1]) + srlg %r1,%r1,6 + alg %r14,280(%r15) # +=X[i] + xgr %r1,%r0 + rllg %r0,%r0,42 + alg %r14,224(%r15) # +=X[i+9] + xgr %r1,%r0 # sigma1(X[i+14]) + algr %r14,%r1 # +=sigma1(X[i+14]) + rllg %r0,%r10,23 + rllg %r1,%r10,46 + lgr %r2,%r11 + xgr %r0,%r1 + rllg %r1,%r1,4 + xgr %r2,%r12 + stg %r14,280(%r15) + xgr %r0,%r1 # Sigma1(e) + algr %r14,%r5 # T1+=h + ngr %r2,%r10 + lgr %r1,%r6 + algr %r14,%r0 # T1+=Sigma1(e) + rllg %r5,%r6,25 + xgr %r2,%r12 # Ch(e,f,g) + alg %r14,248(%r4,%r13) # T1+=K[i] + rllg %r0,%r6,30 + algr %r14,%r2 # T1+=Ch(e,f,g) + ogr %r1,%r7 + xgr %r5,%r0 + lgr %r2,%r6 + ngr %r1,%r8 + rllg %r0,%r0,6 + xgr %r5,%r0 # h=Sigma0(a) + ngr %r2,%r7 + algr %r5,%r14 # h+=T1 + ogr %r2,%r1 # Maj(a,b,c) + algr %r9,%r14 # d+=T1 + algr %r5,%r2 # h+=Maj(a,b,c) + aghi %r4,128 + lghi %r0,512 + clgr %r4,%r0 + jne .Lrounds_16_xx + + lg %r2,304(%r15) + la %r3,128(%r3) + alg %r5,0(%r2) + alg %r6,8(%r2) + alg %r7,16(%r2) + alg %r8,24(%r2) + alg %r9,32(%r2) + alg %r10,40(%r2) + alg %r11,48(%r2) + alg %r12,56(%r2) + stg %r5,0(%r2) + stg %r6,8(%r2) + stg %r7,16(%r2) + stg %r8,24(%r2) + stg %r9,32(%r2) + stg %r10,40(%r2) + stg %r11,48(%r2) + stg %r12,56(%r2) + clg %r3,320(%r15) + jne .Lloop + + lmg %r6,%r15,336(%r15) + br %r14 +.size sha512_block_data_order,.-sha512_block_data_order +.string "SHA512 block transform for s390x, CRYPTOGAMS by <appro@openssl.org>" |