diff options
author | Sam Roberts <vieuxtech@gmail.com> | 2018-11-22 11:47:07 -0800 |
---|---|---|
committer | Sam Roberts <vieuxtech@gmail.com> | 2019-01-22 13:33:54 -0800 |
commit | 807ed7883a12423270450776f015a7c2348c0913 (patch) | |
tree | 00ec21dd290b29c782680ffc2f97e6d59fd2ab2f /deps/openssl/config/archs/linux-ppc/asm/crypto/chacha/chacha-ppc.s | |
parent | 57119fbdb200702d6e2cf23428de4c458ae86bbc (diff) | |
download | android-node-v8-807ed7883a12423270450776f015a7c2348c0913.tar.gz android-node-v8-807ed7883a12423270450776f015a7c2348c0913.tar.bz2 android-node-v8-807ed7883a12423270450776f015a7c2348c0913.zip |
deps: update archs files for OpenSSL-1.1.1a
`cd deps/openssl/config; make` updates all archs dependant files.
PR-URL: https://github.com/nodejs/node/pull/25381
Reviewed-By: Daniel Bevenius <daniel.bevenius@gmail.com>
Reviewed-By: Shigeki Ohtsu <ohtsu@ohtsu.org>
Diffstat (limited to 'deps/openssl/config/archs/linux-ppc/asm/crypto/chacha/chacha-ppc.s')
-rw-r--r-- | deps/openssl/config/archs/linux-ppc/asm/crypto/chacha/chacha-ppc.s | 971 |
1 files changed, 666 insertions, 305 deletions
diff --git a/deps/openssl/config/archs/linux-ppc/asm/crypto/chacha/chacha-ppc.s b/deps/openssl/config/archs/linux-ppc/asm/crypto/chacha/chacha-ppc.s index 02f53619e5..1c76710d70 100644 --- a/deps/openssl/config/archs/linux-ppc/asm/crypto/chacha/chacha-ppc.s +++ b/deps/openssl/config/archs/linux-ppc/asm/crypto/chacha/chacha-ppc.s @@ -408,54 +408,48 @@ ChaCha20_ctr32_vmx: cmplwi 5,256 blt __ChaCha20_ctr32_int - stwu 1,-368(1) + stwu 1,-320(1) mflr 0 li 10,103 li 11,119 mfspr 12,256 - stvx 20,10,1 + stvx 23,10,1 addi 10,10,32 - stvx 21,11,1 + stvx 24,11,1 addi 11,11,32 - stvx 22,10,1 + stvx 25,10,1 addi 10,10,32 - stvx 23,11,1 + stvx 26,11,1 addi 11,11,32 - stvx 24,10,1 + stvx 27,10,1 addi 10,10,32 - stvx 25,11,1 + stvx 28,11,1 addi 11,11,32 - stvx 26,10,1 + stvx 29,10,1 addi 10,10,32 - stvx 27,11,1 - addi 11,11,32 - stvx 28,10,1 - addi 10,10,32 - stvx 29,11,1 - addi 11,11,32 - stvx 30,10,1 - stvx 31,11,1 - stw 12,292(1) - stw 14,296(1) - stw 15,300(1) - stw 16,304(1) - stw 17,308(1) - stw 18,312(1) - stw 19,316(1) - stw 20,320(1) - stw 21,324(1) - stw 22,328(1) - stw 23,332(1) - stw 24,336(1) - stw 25,340(1) - stw 26,344(1) - stw 27,348(1) - stw 28,352(1) - stw 29,356(1) - stw 30,360(1) - stw 31,364(1) - li 12,-1 - stw 0, 372(1) + stvx 30,11,1 + stvx 31,10,1 + stw 12,244(1) + stw 14,248(1) + stw 15,252(1) + stw 16,256(1) + stw 17,260(1) + stw 18,264(1) + stw 19,268(1) + stw 20,272(1) + stw 21,276(1) + stw 22,280(1) + stw 23,284(1) + stw 24,288(1) + stw 25,292(1) + stw 26,296(1) + stw 27,300(1) + stw 28,304(1) + stw 29,308(1) + stw 30,312(1) + stw 31,316(1) + li 12,-4096+511 + stw 0, 324(1) mtspr 256,12 bl .Lconsts @@ -466,308 +460,281 @@ ChaCha20_ctr32_vmx: li 20,31 li 21,15 - lvx 16,0,6 - lvsl 12,0,6 - lvx 17,16,6 - lvx 31,20,6 + lvx 13,0,6 + lvsl 29,0,6 + lvx 14,16,6 + lvx 27,20,6 - lvx 18,0,7 - lvsl 13,0,7 - lvx 27,21,7 + lvx 15,0,7 + lvsl 30,0,7 + lvx 28,21,7 - lvx 15,0,12 - lvx 20,16,12 - lvx 21,17,12 - lvx 22,18,12 + lvx 12,0,12 + lvx 17,16,12 + lvx 18,17,12 + lvx 19,18,12 lvx 23,19,12 - vperm 16,16,17,12 - vperm 17,17,31,12 - vperm 18,18,27,13 + vperm 13,13,14,29 + vperm 14,14,27,29 + vperm 15,15,28,30 lwz 11,0(7) lwz 12,4(7) - vadduwm 18,18,20 + vadduwm 15,15,17 lwz 14,8(7) - vadduwm 19,18,20 + vadduwm 16,15,17 lwz 15,12(7) - vadduwm 20,19,20 - - vspltisw 24,-12 - vspltisw 25,12 - vspltisw 26,-7 - + vadduwm 17,16,17 - vxor 12,12,12 - vspltisw 30,-1 - lvsl 28,0,4 - lvsr 29,0,3 - vperm 30,12,30,29 + vxor 29,29,29 + vspltisw 26,-1 + lvsl 24,0,4 + lvsr 25,0,3 + vperm 26,29,26,25 - lvsl 12,0,16 - vspltisb 13,3 - vxor 12,12,13 - vxor 29,29,13 - vperm 28,28,28,12 + lvsl 29,0,16 + vspltisb 30,3 + vxor 29,29,30 + vxor 25,25,30 + vperm 24,24,24,29 + li 0,10 b .Loop_outer_vmx .align 4 .Loop_outer_vmx: lis 16,0x6170 lis 17,0x3320 - vor 0,15,15 + vor 0,12,12 lis 18,0x7962 lis 19,0x6b20 - vor 4,15,15 + vor 4,12,12 ori 16,16,0x7865 ori 17,17,0x646e - vor 8,15,15 + vor 8,12,12 ori 18,18,0x2d32 ori 19,19,0x6574 - vor 1,16,16 + vor 1,13,13 - li 0,10 lwz 20,0(6) - vor 5,16,16 + vor 5,13,13 lwz 21,4(6) - vor 9,16,16 + vor 9,13,13 lwz 22,8(6) - vor 2,17,17 + vor 2,14,14 lwz 23,12(6) - vor 6,17,17 + vor 6,14,14 lwz 24,16(6) - vor 10,17,17 + vor 10,14,14 mr 28,11 lwz 25,20(6) - vor 3,18,18 + vor 3,15,15 mr 29,12 lwz 26,24(6) - vor 7,19,19 + vor 7,16,16 mr 30,14 lwz 27,28(6) - vor 11,20,20 + vor 11,17,17 mr 31,15 mr 7,20 mr 8,21 mr 9,22 mr 10,23 - vspltisw 27,7 + + vspltisw 27,12 + vspltisw 28,7 mtctr 0 nop .Loop_vmx: vadduwm 0,0,1 - add 16,16,20 vadduwm 4,4,5 - add 17,17,21 vadduwm 8,8,9 + add 16,16,20 + add 17,17,21 add 18,18,22 vxor 3,3,0 - add 19,19,23 vxor 7,7,4 - xor 28,28,16 vxor 11,11,8 + add 19,19,23 + xor 28,28,16 xor 29,29,17 - vperm 3,3,3,22 + vperm 3,3,3,19 + vperm 7,7,7,19 + vperm 11,11,11,19 xor 30,30,18 - vperm 7,7,7,22 xor 31,31,19 - vperm 11,11,11,22 rotlwi 28,28,16 vadduwm 2,2,3 - rotlwi 29,29,16 vadduwm 6,6,7 - rotlwi 30,30,16 vadduwm 10,10,11 + rotlwi 29,29,16 + rotlwi 30,30,16 rotlwi 31,31,16 - vxor 12,1,2 + vxor 1,1,2 + vxor 5,5,6 + vxor 9,9,10 add 24,24,28 - vxor 13,5,6 add 25,25,29 - vxor 14,9,10 add 26,26,30 - vsrw 1,12,24 + vrlw 1,1,27 + vrlw 5,5,27 + vrlw 9,9,27 add 27,27,31 - vsrw 5,13,24 xor 20,20,24 - vsrw 9,14,24 xor 21,21,25 - vslw 12,12,25 + vadduwm 0,0,1 + vadduwm 4,4,5 + vadduwm 8,8,9 xor 22,22,26 - vslw 13,13,25 xor 23,23,27 - vslw 14,14,25 rotlwi 20,20,12 - vor 1,1,12 + vxor 3,3,0 + vxor 7,7,4 + vxor 11,11,8 rotlwi 21,21,12 - vor 5,5,13 rotlwi 22,22,12 - vor 9,9,14 rotlwi 23,23,12 - vadduwm 0,0,1 + vperm 3,3,3,23 + vperm 7,7,7,23 + vperm 11,11,11,23 add 16,16,20 - vadduwm 4,4,5 add 17,17,21 - vadduwm 8,8,9 add 18,18,22 - vxor 3,3,0 + vadduwm 2,2,3 + vadduwm 6,6,7 + vadduwm 10,10,11 add 19,19,23 - vxor 7,7,4 xor 28,28,16 - vxor 11,11,8 xor 29,29,17 - vperm 3,3,3,23 + vxor 1,1,2 + vxor 5,5,6 + vxor 9,9,10 xor 30,30,18 - vperm 7,7,7,23 xor 31,31,19 - vperm 11,11,11,23 rotlwi 28,28,8 - vadduwm 2,2,3 + vrlw 1,1,28 + vrlw 5,5,28 + vrlw 9,9,28 rotlwi 29,29,8 - vadduwm 6,6,7 rotlwi 30,30,8 - vadduwm 10,10,11 rotlwi 31,31,8 - vxor 12,1,2 + vsldoi 2,2,2, 16-8 + vsldoi 6,6,6, 16-8 + vsldoi 10,10,10, 16-8 add 24,24,28 - vxor 13,5,6 add 25,25,29 - vxor 14,9,10 add 26,26,30 - vsrw 1,12,26 + vsldoi 1,1,1, 16-12 + vsldoi 5,5,5, 16-12 + vsldoi 9,9,9, 16-12 add 27,27,31 - vsrw 5,13,26 xor 20,20,24 - vsrw 9,14,26 xor 21,21,25 - vslw 12,12,27 + vsldoi 3,3,3, 16-4 + vsldoi 7,7,7, 16-4 + vsldoi 11,11,11, 16-4 xor 22,22,26 - vslw 13,13,27 xor 23,23,27 - vslw 14,14,27 rotlwi 20,20,7 - vor 1,1,12 rotlwi 21,21,7 - vor 5,5,13 rotlwi 22,22,7 - vor 9,9,14 rotlwi 23,23,7 - vsldoi 2,2,2, 16-8 - vsldoi 6,6,6, 16-8 - vsldoi 10,10,10, 16-8 - vsldoi 1,1,1, 16-12 - vsldoi 5,5,5, 16-12 - vsldoi 9,9,9, 16-12 - vsldoi 3,3,3, 16-4 - vsldoi 7,7,7, 16-4 - vsldoi 11,11,11, 16-4 vadduwm 0,0,1 - add 16,16,21 vadduwm 4,4,5 - add 17,17,22 vadduwm 8,8,9 + add 16,16,21 + add 17,17,22 add 18,18,23 vxor 3,3,0 - add 19,19,20 vxor 7,7,4 - xor 31,31,16 vxor 11,11,8 + add 19,19,20 + xor 31,31,16 xor 28,28,17 - vperm 3,3,3,22 + vperm 3,3,3,19 + vperm 7,7,7,19 + vperm 11,11,11,19 xor 29,29,18 - vperm 7,7,7,22 xor 30,30,19 - vperm 11,11,11,22 rotlwi 31,31,16 vadduwm 2,2,3 - rotlwi 28,28,16 vadduwm 6,6,7 - rotlwi 29,29,16 vadduwm 10,10,11 + rotlwi 28,28,16 + rotlwi 29,29,16 rotlwi 30,30,16 - vxor 12,1,2 + vxor 1,1,2 + vxor 5,5,6 + vxor 9,9,10 add 26,26,31 - vxor 13,5,6 add 27,27,28 - vxor 14,9,10 add 24,24,29 - vsrw 1,12,24 + vrlw 1,1,27 + vrlw 5,5,27 + vrlw 9,9,27 add 25,25,30 - vsrw 5,13,24 xor 21,21,26 - vsrw 9,14,24 xor 22,22,27 - vslw 12,12,25 + vadduwm 0,0,1 + vadduwm 4,4,5 + vadduwm 8,8,9 xor 23,23,24 - vslw 13,13,25 xor 20,20,25 - vslw 14,14,25 rotlwi 21,21,12 - vor 1,1,12 + vxor 3,3,0 + vxor 7,7,4 + vxor 11,11,8 rotlwi 22,22,12 - vor 5,5,13 rotlwi 23,23,12 - vor 9,9,14 rotlwi 20,20,12 - vadduwm 0,0,1 + vperm 3,3,3,23 + vperm 7,7,7,23 + vperm 11,11,11,23 add 16,16,21 - vadduwm 4,4,5 add 17,17,22 - vadduwm 8,8,9 add 18,18,23 - vxor 3,3,0 + vadduwm 2,2,3 + vadduwm 6,6,7 + vadduwm 10,10,11 add 19,19,20 - vxor 7,7,4 xor 31,31,16 - vxor 11,11,8 xor 28,28,17 - vperm 3,3,3,23 + vxor 1,1,2 + vxor 5,5,6 + vxor 9,9,10 xor 29,29,18 - vperm 7,7,7,23 xor 30,30,19 - vperm 11,11,11,23 rotlwi 31,31,8 - vadduwm 2,2,3 + vrlw 1,1,28 + vrlw 5,5,28 + vrlw 9,9,28 rotlwi 28,28,8 - vadduwm 6,6,7 rotlwi 29,29,8 - vadduwm 10,10,11 rotlwi 30,30,8 - vxor 12,1,2 + vsldoi 2,2,2, 16-8 + vsldoi 6,6,6, 16-8 + vsldoi 10,10,10, 16-8 add 26,26,31 - vxor 13,5,6 add 27,27,28 - vxor 14,9,10 add 24,24,29 - vsrw 1,12,26 + vsldoi 1,1,1, 16-4 + vsldoi 5,5,5, 16-4 + vsldoi 9,9,9, 16-4 add 25,25,30 - vsrw 5,13,26 xor 21,21,26 - vsrw 9,14,26 xor 22,22,27 - vslw 12,12,27 + vsldoi 3,3,3, 16-12 + vsldoi 7,7,7, 16-12 + vsldoi 11,11,11, 16-12 xor 23,23,24 - vslw 13,13,27 xor 20,20,25 - vslw 14,14,27 rotlwi 21,21,7 - vor 1,1,12 rotlwi 22,22,7 - vor 5,5,13 rotlwi 23,23,7 - vor 9,9,14 rotlwi 20,20,7 - vsldoi 2,2,2, 16-8 - vsldoi 6,6,6, 16-8 - vsldoi 10,10,10, 16-8 - vsldoi 1,1,1, 16-4 - vsldoi 5,5,5, 16-4 - vsldoi 9,9,9, 16-4 - vsldoi 3,3,3, 16-12 - vsldoi 7,7,7, 16-12 - vsldoi 11,11,11, 16-12 bdnz .Loop_vmx subi 5,5,256 @@ -796,23 +763,23 @@ ChaCha20_ctr32_vmx: add 30,30,14 add 31,31,15 - vadduwm 0,0,15 - vadduwm 4,4,15 - vadduwm 8,8,15 - vadduwm 1,1,16 - vadduwm 5,5,16 - vadduwm 9,9,16 - vadduwm 2,2,17 - vadduwm 6,6,17 - vadduwm 10,10,17 - vadduwm 3,3,18 - vadduwm 7,7,19 - vadduwm 11,11,20 + vadduwm 0,0,12 + vadduwm 4,4,12 + vadduwm 8,8,12 + vadduwm 1,1,13 + vadduwm 5,5,13 + vadduwm 9,9,13 + vadduwm 2,2,14 + vadduwm 6,6,14 + vadduwm 10,10,14 + vadduwm 3,3,15 + vadduwm 7,7,16 + vadduwm 11,11,17 addi 11,11,4 - vadduwm 18,18,21 - vadduwm 19,19,21 - vadduwm 20,20,21 + vadduwm 15,15,18 + vadduwm 16,16,18 + vadduwm 17,17,18 mr 7,16 rotlwi 16,16,8 @@ -933,84 +900,84 @@ ChaCha20_ctr32_vmx: stw 31,60(3) addi 3,3,64 - lvx 31,0,4 - lvx 27,7,4 - lvx 12,8,4 - lvx 13,9,4 - lvx 14,10,4 + lvx 27,0,4 + lvx 28,7,4 + lvx 29,8,4 + lvx 30,9,4 + lvx 31,10,4 addi 4,4,64 - vperm 31,31,27,28 - vperm 27,27,12,28 - vperm 12,12,13,28 - vperm 13,13,14,28 - vxor 0,0,31 - vxor 1,1,27 - lvx 27,7,4 - vxor 2,2,12 - lvx 12,8,4 - vxor 3,3,13 - lvx 13,9,4 - lvx 31,10,4 + vperm 27,27,28,24 + vperm 28,28,29,24 + vperm 29,29,30,24 + vperm 30,30,31,24 + vxor 0,0,27 + vxor 1,1,28 + lvx 28,7,4 + vxor 2,2,29 + lvx 29,8,4 + vxor 3,3,30 + lvx 30,9,4 + lvx 27,10,4 addi 4,4,64 li 10,63 - vperm 0,0,0,29 - vperm 1,1,1,29 - vperm 2,2,2,29 - vperm 3,3,3,29 - - vperm 14,14,27,28 - vperm 27,27,12,28 - vperm 12,12,13,28 - vperm 13,13,31,28 - vxor 4,4,14 - vxor 5,5,27 - lvx 27,7,4 - vxor 6,6,12 - lvx 12,8,4 - vxor 7,7,13 - lvx 13,9,4 - lvx 14,10,4 + vperm 0,0,0,25 + vperm 1,1,1,25 + vperm 2,2,2,25 + vperm 3,3,3,25 + + vperm 31,31,28,24 + vperm 28,28,29,24 + vperm 29,29,30,24 + vperm 30,30,27,24 + vxor 4,4,31 + vxor 5,5,28 + lvx 28,7,4 + vxor 6,6,29 + lvx 29,8,4 + vxor 7,7,30 + lvx 30,9,4 + lvx 31,10,4 addi 4,4,64 - vperm 4,4,4,29 - vperm 5,5,5,29 - vperm 6,6,6,29 - vperm 7,7,7,29 - - vperm 31,31,27,28 - vperm 27,27,12,28 - vperm 12,12,13,28 - vperm 13,13,14,28 - vxor 8,8,31 - vxor 9,9,27 - vxor 10,10,12 - vxor 11,11,13 - vperm 8,8,8,29 - vperm 9,9,9,29 - vperm 10,10,10,29 - vperm 11,11,11,29 + vperm 4,4,4,25 + vperm 5,5,5,25 + vperm 6,6,6,25 + vperm 7,7,7,25 + + vperm 27,27,28,24 + vperm 28,28,29,24 + vperm 29,29,30,24 + vperm 30,30,31,24 + vxor 8,8,27 + vxor 9,9,28 + vxor 10,10,29 + vxor 11,11,30 + vperm 8,8,8,25 + vperm 9,9,9,25 + vperm 10,10,10,25 + vperm 11,11,11,25 andi. 17,3,15 mr 16,3 - vsel 31,0,1,30 - vsel 27,1,2,30 - vsel 12,2,3,30 - vsel 13,3,4,30 - vsel 1,4,5,30 - vsel 2,5,6,30 - vsel 3,6,7,30 - vsel 4,7,8,30 - vsel 5,8,9,30 - vsel 6,9,10,30 - vsel 7,10,11,30 - - - stvx 31,7,3 - stvx 27,8,3 - stvx 12,9,3 + vsel 27,0,1,26 + vsel 28,1,2,26 + vsel 29,2,3,26 + vsel 30,3,4,26 + vsel 1,4,5,26 + vsel 2,5,6,26 + vsel 3,6,7,26 + vsel 4,7,8,26 + vsel 5,8,9,26 + vsel 6,9,10,26 + vsel 7,10,11,26 + + + stvx 27,7,3 + stvx 28,8,3 + stvx 29,9,3 addi 3,3,64 - stvx 13,0,3 + stvx 30,0,3 stvx 1,7,3 stvx 2,8,3 stvx 3,9,3 @@ -1055,22 +1022,405 @@ ChaCha20_ctr32_vmx: cmplwi 5,0 bnel __ChaCha20_1x - lwz 12,292(1) + lwz 12,244(1) li 10,103 li 11,119 mtspr 256,12 - lvx 20,10,1 + lvx 23,10,1 addi 10,10,32 - lvx 21,11,1 + lvx 24,11,1 addi 11,11,32 - lvx 22,10,1 + lvx 25,10,1 addi 10,10,32 - lvx 23,11,1 + lvx 26,11,1 addi 11,11,32 - lvx 24,10,1 + lvx 27,10,1 addi 10,10,32 - lvx 25,11,1 + lvx 28,11,1 addi 11,11,32 + lvx 29,10,1 + addi 10,10,32 + lvx 30,11,1 + lvx 31,10,1 + lwz 0, 324(1) + lwz 14,248(1) + lwz 15,252(1) + lwz 16,256(1) + lwz 17,260(1) + lwz 18,264(1) + lwz 19,268(1) + lwz 20,272(1) + lwz 21,276(1) + lwz 22,280(1) + lwz 23,284(1) + lwz 24,288(1) + lwz 25,292(1) + lwz 26,296(1) + lwz 27,300(1) + lwz 28,304(1) + lwz 29,308(1) + lwz 30,312(1) + lwz 31,316(1) + mtlr 0 + addi 1,1,320 + blr +.long 0 +.byte 0,12,0x04,1,0x80,18,5,0 +.long 0 +.size ChaCha20_ctr32_vmx,.-ChaCha20_ctr32_vmx + +.globl ChaCha20_ctr32_vsx +.type ChaCha20_ctr32_vsx,@function +.align 5 +ChaCha20_ctr32_vsx: + stwu 1,-200(1) + mflr 0 + li 10,103 + li 11,119 + mfspr 12,256 + stvx 26,10,1 + addi 10,10,32 + stvx 27,11,1 + addi 11,11,32 + stvx 28,10,1 + addi 10,10,32 + stvx 29,11,1 + addi 11,11,32 + stvx 30,10,1 + stvx 31,11,1 + stw 12,196(1) + li 12,-4096+63 + stw 0, 204(1) + mtspr 256,12 + + bl .Lconsts + .long 0x7E006619 + addi 12,12,0x50 + li 8,16 + li 9,32 + li 10,48 + li 11,64 + + .long 0x7E203619 + .long 0x7E483619 + .long 0x7E603E19 + + vxor 27,27,27 + .long 0x7F8B6619 + vspltw 26,19,0 + vsldoi 19,19,27,4 + vsldoi 19,27,19,12 + vadduwm 26,26,28 + + lvsl 31,0,8 + vspltisb 27,3 + vxor 31,31,27 + + li 0,10 + mtctr 0 + b .Loop_outer_vsx + +.align 5 +.Loop_outer_vsx: + lvx 0,0,12 + lvx 1,8,12 + lvx 2,9,12 + lvx 3,10,12 + + vspltw 4,17,0 + vspltw 5,17,1 + vspltw 6,17,2 + vspltw 7,17,3 + + vspltw 8,18,0 + vspltw 9,18,1 + vspltw 10,18,2 + vspltw 11,18,3 + + vor 12,26,26 + vspltw 13,19,1 + vspltw 14,19,2 + vspltw 15,19,3 + + vspltisw 27,-16 + vspltisw 28,12 + vspltisw 29,8 + vspltisw 30,7 + +.Loop_vsx: + vadduwm 0,0,4 + vadduwm 1,1,5 + vadduwm 2,2,6 + vadduwm 3,3,7 + vxor 12,12,0 + vxor 13,13,1 + vxor 14,14,2 + vxor 15,15,3 + vrlw 12,12,27 + vrlw 13,13,27 + vrlw 14,14,27 + vrlw 15,15,27 + vadduwm 8,8,12 + vadduwm 9,9,13 + vadduwm 10,10,14 + vadduwm 11,11,15 + vxor 4,4,8 + vxor 5,5,9 + vxor 6,6,10 + vxor 7,7,11 + vrlw 4,4,28 + vrlw 5,5,28 + vrlw 6,6,28 + vrlw 7,7,28 + vadduwm 0,0,4 + vadduwm 1,1,5 + vadduwm 2,2,6 + vadduwm 3,3,7 + vxor 12,12,0 + vxor 13,13,1 + vxor 14,14,2 + vxor 15,15,3 + vrlw 12,12,29 + vrlw 13,13,29 + vrlw 14,14,29 + vrlw 15,15,29 + vadduwm 8,8,12 + vadduwm 9,9,13 + vadduwm 10,10,14 + vadduwm 11,11,15 + vxor 4,4,8 + vxor 5,5,9 + vxor 6,6,10 + vxor 7,7,11 + vrlw 4,4,30 + vrlw 5,5,30 + vrlw 6,6,30 + vrlw 7,7,30 + vadduwm 0,0,5 + vadduwm 1,1,6 + vadduwm 2,2,7 + vadduwm 3,3,4 + vxor 15,15,0 + vxor 12,12,1 + vxor 13,13,2 + vxor 14,14,3 + vrlw 15,15,27 + vrlw 12,12,27 + vrlw 13,13,27 + vrlw 14,14,27 + vadduwm 10,10,15 + vadduwm 11,11,12 + vadduwm 8,8,13 + vadduwm 9,9,14 + vxor 5,5,10 + vxor 6,6,11 + vxor 7,7,8 + vxor 4,4,9 + vrlw 5,5,28 + vrlw 6,6,28 + vrlw 7,7,28 + vrlw 4,4,28 + vadduwm 0,0,5 + vadduwm 1,1,6 + vadduwm 2,2,7 + vadduwm 3,3,4 + vxor 15,15,0 + vxor 12,12,1 + vxor 13,13,2 + vxor 14,14,3 + vrlw 15,15,29 + vrlw 12,12,29 + vrlw 13,13,29 + vrlw 14,14,29 + vadduwm 10,10,15 + vadduwm 11,11,12 + vadduwm 8,8,13 + vadduwm 9,9,14 + vxor 5,5,10 + vxor 6,6,11 + vxor 7,7,8 + vxor 4,4,9 + vrlw 5,5,30 + vrlw 6,6,30 + vrlw 7,7,30 + vrlw 4,4,30 + bdnz .Loop_vsx + + vadduwm 12,12,26 + + .long 0x13600F8C + .long 0x13821F8C + .long 0x10000E8C + .long 0x10421E8C + .long 0x13A42F8C + .long 0x13C63F8C + .long 0xF0201057 + .long 0xF0601357 + .long 0xF01BE057 + .long 0xF05BE357 + + .long 0x10842E8C + .long 0x10C63E8C + .long 0x13684F8C + .long 0x138A5F8C + .long 0xF0A43057 + .long 0xF0E43357 + .long 0xF09DF057 + .long 0xF0DDF357 + + .long 0x11084E8C + .long 0x114A5E8C + .long 0x13AC6F8C + .long 0x13CE7F8C + .long 0xF1285057 + .long 0xF1685357 + .long 0xF11BE057 + .long 0xF15BE357 + + .long 0x118C6E8C + .long 0x11CE7E8C + vspltisw 27,4 + vadduwm 26,26,27 + .long 0xF1AC7057 + .long 0xF1EC7357 + .long 0xF19DF057 + .long 0xF1DDF357 + + vadduwm 0,0,16 + vadduwm 4,4,17 + vadduwm 8,8,18 + vadduwm 12,12,19 + + vperm 0,0,0,31 + vperm 4,4,4,31 + vperm 8,8,8,31 + vperm 12,12,12,31 + + cmplwi 5,0x40 + blt .Ltail_vsx + + .long 0x7F602619 + .long 0x7F882619 + .long 0x7FA92619 + .long 0x7FCA2619 + + vxor 27,27,0 + vxor 28,28,4 + vxor 29,29,8 + vxor 30,30,12 + + .long 0x7F601F19 + .long 0x7F881F19 + addi 4,4,0x40 + .long 0x7FA91F19 + subi 5,5,0x40 + .long 0x7FCA1F19 + addi 3,3,0x40 + beq .Ldone_vsx + + vadduwm 0,1,16 + vadduwm 4,5,17 + vadduwm 8,9,18 + vadduwm 12,13,19 + + vperm 0,0,0,31 + vperm 4,4,4,31 + vperm 8,8,8,31 + vperm 12,12,12,31 + + cmplwi 5,0x40 + blt .Ltail_vsx + + .long 0x7F602619 + .long 0x7F882619 + .long 0x7FA92619 + .long 0x7FCA2619 + + vxor 27,27,0 + vxor 28,28,4 + vxor 29,29,8 + vxor 30,30,12 + + .long 0x7F601F19 + .long 0x7F881F19 + addi 4,4,0x40 + .long 0x7FA91F19 + subi 5,5,0x40 + .long 0x7FCA1F19 + addi 3,3,0x40 + beq .Ldone_vsx + + vadduwm 0,2,16 + vadduwm 4,6,17 + vadduwm 8,10,18 + vadduwm 12,14,19 + + vperm 0,0,0,31 + vperm 4,4,4,31 + vperm 8,8,8,31 + vperm 12,12,12,31 + + cmplwi 5,0x40 + blt .Ltail_vsx + + .long 0x7F602619 + .long 0x7F882619 + .long 0x7FA92619 + .long 0x7FCA2619 + + vxor 27,27,0 + vxor 28,28,4 + vxor 29,29,8 + vxor 30,30,12 + + .long 0x7F601F19 + .long 0x7F881F19 + addi 4,4,0x40 + .long 0x7FA91F19 + subi 5,5,0x40 + .long 0x7FCA1F19 + addi 3,3,0x40 + beq .Ldone_vsx + + vadduwm 0,3,16 + vadduwm 4,7,17 + vadduwm 8,11,18 + vadduwm 12,15,19 + + vperm 0,0,0,31 + vperm 4,4,4,31 + vperm 8,8,8,31 + vperm 12,12,12,31 + + cmplwi 5,0x40 + blt .Ltail_vsx + + .long 0x7F602619 + .long 0x7F882619 + .long 0x7FA92619 + .long 0x7FCA2619 + + vxor 27,27,0 + vxor 28,28,4 + vxor 29,29,8 + vxor 30,30,12 + + .long 0x7F601F19 + .long 0x7F881F19 + addi 4,4,0x40 + .long 0x7FA91F19 + subi 5,5,0x40 + .long 0x7FCA1F19 + addi 3,3,0x40 + mtctr 0 + bne .Loop_outer_vsx + +.Ldone_vsx: + lwz 12,196(1) + li 10,103 + li 11,119 + lwz 0, 204(1) + mtspr 256,12 lvx 26,10,1 addi 10,10,32 lvx 27,11,1 @@ -1081,33 +1431,39 @@ ChaCha20_ctr32_vmx: addi 11,11,32 lvx 30,10,1 lvx 31,11,1 - lwz 0, 372(1) - lwz 14,296(1) - lwz 15,300(1) - lwz 16,304(1) - lwz 17,308(1) - lwz 18,312(1) - lwz 19,316(1) - lwz 20,320(1) - lwz 21,324(1) - lwz 22,328(1) - lwz 23,332(1) - lwz 24,336(1) - lwz 25,340(1) - lwz 26,344(1) - lwz 27,348(1) - lwz 28,352(1) - lwz 29,356(1) - lwz 30,360(1) - lwz 31,364(1) mtlr 0 - addi 1,1,368 + addi 1,1,200 blr + +.align 4 +.Ltail_vsx: + addi 11,1,24 + mtctr 5 + .long 0x7C005F19 + .long 0x7C885F19 + .long 0x7D095F19 + .long 0x7D8A5F19 + subi 12,11,1 + subi 4,4,1 + subi 3,3,1 + +.Loop_tail_vsx: + lbzu 6,1(12) + lbzu 7,1(4) + xor 6,6,7 + stbu 6,1(3) + bdnz .Loop_tail_vsx + + .long 0x7E005F19 + .long 0x7E085F19 + .long 0x7E095F19 + .long 0x7E0A5F19 + + b .Ldone_vsx .long 0 -.byte 0,12,0x04,1,0x80,18,5,0 +.byte 0,12,0x04,1,0x80,0,5,0 .long 0 -.size ChaCha20_ctr32_vmx,.-ChaCha20_ctr32_vmx - +.size ChaCha20_ctr32_vsx,.-ChaCha20_ctr32_vsx .align 5 .Lconsts: mflr 0 @@ -1125,6 +1481,11 @@ ChaCha20_ctr32_vmx: .long 4,0,0,0 .long 0x02030001,0x06070405,0x0a0b0809,0x0e0f0c0d .long 0x01020300,0x05060704,0x090a0b08,0x0d0e0f0c -.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,80,111,119,101,114,80,67,47,65,108,116,105,86,101,99,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.long 0x61707865,0x61707865,0x61707865,0x61707865 +.long 0x3320646e,0x3320646e,0x3320646e,0x3320646e +.long 0x79622d32,0x79622d32,0x79622d32,0x79622d32 +.long 0x6b206574,0x6b206574,0x6b206574,0x6b206574 +.long 0,1,2,3 +.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,80,111,119,101,114,80,67,47,65,108,116,105,86,101,99,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 2 .align 2 |