diff options
Diffstat (limited to 'deps/openssl/config/archs/linux-ppc64le/asm_avx2/crypto/chacha/chacha-ppc.s')
-rw-r--r-- | deps/openssl/config/archs/linux-ppc64le/asm_avx2/crypto/chacha/chacha-ppc.s | 1370 |
1 files changed, 1370 insertions, 0 deletions
diff --git a/deps/openssl/config/archs/linux-ppc64le/asm_avx2/crypto/chacha/chacha-ppc.s b/deps/openssl/config/archs/linux-ppc64le/asm_avx2/crypto/chacha/chacha-ppc.s new file mode 100644 index 0000000000..ea614d4ce4 --- /dev/null +++ b/deps/openssl/config/archs/linux-ppc64le/asm_avx2/crypto/chacha/chacha-ppc.s @@ -0,0 +1,1370 @@ +.machine "any" +.abiversion 2 +.text + +.globl ChaCha20_ctr32_int +.type ChaCha20_ctr32_int,@function +.align 5 +ChaCha20_ctr32_int: +.localentry ChaCha20_ctr32_int,0 + +__ChaCha20_ctr32_int: + cmpldi 5,0 + .long 0x4DC20020 + + stdu 1,-256(1) + mflr 0 + + std 14,112(1) + std 15,120(1) + std 16,128(1) + std 17,136(1) + std 18,144(1) + std 19,152(1) + std 20,160(1) + std 21,168(1) + std 22,176(1) + std 23,184(1) + std 24,192(1) + std 25,200(1) + std 26,208(1) + std 27,216(1) + std 28,224(1) + std 29,232(1) + std 30,240(1) + std 31,248(1) + std 0,272(1) + + lwz 11,0(7) + lwz 12,4(7) + lwz 14,8(7) + lwz 15,12(7) + + bl __ChaCha20_1x + + ld 0,272(1) + ld 14,112(1) + ld 15,120(1) + ld 16,128(1) + ld 17,136(1) + ld 18,144(1) + ld 19,152(1) + ld 20,160(1) + ld 21,168(1) + ld 22,176(1) + ld 23,184(1) + ld 24,192(1) + ld 25,200(1) + ld 26,208(1) + ld 27,216(1) + ld 28,224(1) + ld 29,232(1) + ld 30,240(1) + ld 31,248(1) + mtlr 0 + addi 1,1,256 + blr +.long 0 +.byte 0,12,4,1,0x80,18,5,0 +.long 0 +.size ChaCha20_ctr32_int,.-ChaCha20_ctr32_int + +.align 5 +__ChaCha20_1x: +.Loop_outer: + lis 16,0x6170 + lis 17,0x3320 + lis 18,0x7962 + lis 19,0x6b20 + ori 16,16,0x7865 + ori 17,17,0x646e + ori 18,18,0x2d32 + ori 19,19,0x6574 + + li 0,10 + lwz 20,0(6) + lwz 21,4(6) + lwz 22,8(6) + lwz 23,12(6) + lwz 24,16(6) + mr 28,11 + lwz 25,20(6) + mr 29,12 + lwz 26,24(6) + mr 30,14 + lwz 27,28(6) + mr 31,15 + + mr 7,20 + mr 8,21 + mr 9,22 + mr 10,23 + + mtctr 0 +.Loop: + add 16,16,20 + add 17,17,21 + add 18,18,22 + add 19,19,23 + xor 28,28,16 + xor 29,29,17 + xor 30,30,18 + xor 31,31,19 + rotlwi 28,28,16 + rotlwi 29,29,16 + rotlwi 30,30,16 + rotlwi 31,31,16 + add 24,24,28 + add 25,25,29 + add 26,26,30 + add 27,27,31 + xor 20,20,24 + xor 21,21,25 + xor 22,22,26 + xor 23,23,27 + rotlwi 20,20,12 + rotlwi 21,21,12 + rotlwi 22,22,12 + rotlwi 23,23,12 + add 16,16,20 + add 17,17,21 + add 18,18,22 + add 19,19,23 + xor 28,28,16 + xor 29,29,17 + xor 30,30,18 + xor 31,31,19 + rotlwi 28,28,8 + rotlwi 29,29,8 + rotlwi 30,30,8 + rotlwi 31,31,8 + add 24,24,28 + add 25,25,29 + add 26,26,30 + add 27,27,31 + xor 20,20,24 + xor 21,21,25 + xor 22,22,26 + xor 23,23,27 + rotlwi 20,20,7 + rotlwi 21,21,7 + rotlwi 22,22,7 + rotlwi 23,23,7 + add 16,16,21 + add 17,17,22 + add 18,18,23 + add 19,19,20 + xor 31,31,16 + xor 28,28,17 + xor 29,29,18 + xor 30,30,19 + rotlwi 31,31,16 + rotlwi 28,28,16 + rotlwi 29,29,16 + rotlwi 30,30,16 + add 26,26,31 + add 27,27,28 + add 24,24,29 + add 25,25,30 + xor 21,21,26 + xor 22,22,27 + xor 23,23,24 + xor 20,20,25 + rotlwi 21,21,12 + rotlwi 22,22,12 + rotlwi 23,23,12 + rotlwi 20,20,12 + add 16,16,21 + add 17,17,22 + add 18,18,23 + add 19,19,20 + xor 31,31,16 + xor 28,28,17 + xor 29,29,18 + xor 30,30,19 + rotlwi 31,31,8 + rotlwi 28,28,8 + rotlwi 29,29,8 + rotlwi 30,30,8 + add 26,26,31 + add 27,27,28 + add 24,24,29 + add 25,25,30 + xor 21,21,26 + xor 22,22,27 + xor 23,23,24 + xor 20,20,25 + rotlwi 21,21,7 + rotlwi 22,22,7 + rotlwi 23,23,7 + rotlwi 20,20,7 + bdnz .Loop + + subic 5,5,64 + addi 16,16,0x7865 + addi 17,17,0x646e + addi 18,18,0x2d32 + addi 19,19,0x6574 + addis 16,16,0x6170 + addis 17,17,0x3320 + addis 18,18,0x7962 + addis 19,19,0x6b20 + + subfe. 0,0,0 + add 20,20,7 + lwz 7,16(6) + add 21,21,8 + lwz 8,20(6) + add 22,22,9 + lwz 9,24(6) + add 23,23,10 + lwz 10,28(6) + add 24,24,7 + add 25,25,8 + add 26,26,9 + add 27,27,10 + + add 28,28,11 + add 29,29,12 + add 30,30,14 + add 31,31,15 + addi 11,11,1 + bne .Ltail + + lwz 7,0(4) + lwz 8,4(4) + cmpldi 5,0 + lwz 9,8(4) + lwz 10,12(4) + xor 16,16,7 + lwz 7,16(4) + xor 17,17,8 + lwz 8,20(4) + xor 18,18,9 + lwz 9,24(4) + xor 19,19,10 + lwz 10,28(4) + xor 20,20,7 + lwz 7,32(4) + xor 21,21,8 + lwz 8,36(4) + xor 22,22,9 + lwz 9,40(4) + xor 23,23,10 + lwz 10,44(4) + xor 24,24,7 + lwz 7,48(4) + xor 25,25,8 + lwz 8,52(4) + xor 26,26,9 + lwz 9,56(4) + xor 27,27,10 + lwz 10,60(4) + xor 28,28,7 + stw 16,0(3) + xor 29,29,8 + stw 17,4(3) + xor 30,30,9 + stw 18,8(3) + xor 31,31,10 + stw 19,12(3) + stw 20,16(3) + stw 21,20(3) + stw 22,24(3) + stw 23,28(3) + stw 24,32(3) + stw 25,36(3) + stw 26,40(3) + stw 27,44(3) + stw 28,48(3) + stw 29,52(3) + stw 30,56(3) + addi 4,4,64 + stw 31,60(3) + addi 3,3,64 + + bne .Loop_outer + + blr + +.align 4 +.Ltail: + addi 5,5,64 + subi 4,4,1 + subi 3,3,1 + addi 7,1,48-1 + mtctr 5 + + stw 16,48(1) + stw 17,52(1) + stw 18,56(1) + stw 19,60(1) + stw 20,64(1) + stw 21,68(1) + stw 22,72(1) + stw 23,76(1) + stw 24,80(1) + stw 25,84(1) + stw 26,88(1) + stw 27,92(1) + stw 28,96(1) + stw 29,100(1) + stw 30,104(1) + stw 31,108(1) + +.Loop_tail: + lbzu 11,1(4) + lbzu 16,1(7) + xor 12,11,16 + stbu 12,1(3) + bdnz .Loop_tail + + stw 1,48(1) + stw 1,52(1) + stw 1,56(1) + stw 1,60(1) + stw 1,64(1) + stw 1,68(1) + stw 1,72(1) + stw 1,76(1) + stw 1,80(1) + stw 1,84(1) + stw 1,88(1) + stw 1,92(1) + stw 1,96(1) + stw 1,100(1) + stw 1,104(1) + stw 1,108(1) + + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 + +.globl ChaCha20_ctr32_vmx +.type ChaCha20_ctr32_vmx,@function +.align 5 +ChaCha20_ctr32_vmx: +.localentry ChaCha20_ctr32_vmx,0 + + cmpldi 5,256 + blt __ChaCha20_ctr32_int + + stdu 1,-416(1) + mflr 0 + li 10,127 + li 11,143 + li 12,-1 + stvx 23,10,1 + addi 10,10,32 + stvx 24,11,1 + addi 11,11,32 + stvx 25,10,1 + addi 10,10,32 + stvx 26,11,1 + addi 11,11,32 + stvx 27,10,1 + addi 10,10,32 + stvx 28,11,1 + addi 11,11,32 + stvx 29,10,1 + addi 10,10,32 + stvx 30,11,1 + stvx 31,10,1 + stw 12,268(1) + std 14,272(1) + std 15,280(1) + std 16,288(1) + std 17,296(1) + std 18,304(1) + std 19,312(1) + std 20,320(1) + std 21,328(1) + std 22,336(1) + std 23,344(1) + std 24,352(1) + std 25,360(1) + std 26,368(1) + std 27,376(1) + std 28,384(1) + std 29,392(1) + std 30,400(1) + std 31,408(1) + li 12,-4096+511 + std 0, 432(1) + or 12,12,12 + + bl .Lconsts + li 16,16 + li 17,32 + li 18,48 + li 19,64 + li 20,31 + li 21,15 + + lvx 13,0,6 + lvsr 29,0,6 + lvx 14,16,6 + lvx 27,20,6 + + lvx 15,0,7 + lvsr 30,0,7 + lvx 28,21,7 + + lvx 12,0,12 + lvx 17,16,12 + lvx 18,17,12 + lvx 19,18,12 + lvx 23,19,12 + + vperm 13,14,13,29 + vperm 14,27,14,29 + vperm 15,28,15,30 + + lwz 11,0(7) + lwz 12,4(7) + vadduwm 15,15,17 + lwz 14,8(7) + vadduwm 16,15,17 + lwz 15,12(7) + vadduwm 17,16,17 + + vxor 29,29,29 + vspltisw 26,-1 + lvsr 24,0,4 + lvsl 25,0,3 + vperm 26,26,29,25 + + + + + + + + li 0,10 + b .Loop_outer_vmx + +.align 4 +.Loop_outer_vmx: + lis 16,0x6170 + lis 17,0x3320 + vor 0,12,12 + lis 18,0x7962 + lis 19,0x6b20 + vor 4,12,12 + ori 16,16,0x7865 + ori 17,17,0x646e + vor 8,12,12 + ori 18,18,0x2d32 + ori 19,19,0x6574 + vor 1,13,13 + + lwz 20,0(6) + vor 5,13,13 + lwz 21,4(6) + vor 9,13,13 + lwz 22,8(6) + vor 2,14,14 + lwz 23,12(6) + vor 6,14,14 + lwz 24,16(6) + vor 10,14,14 + mr 28,11 + lwz 25,20(6) + vor 3,15,15 + mr 29,12 + lwz 26,24(6) + vor 7,16,16 + mr 30,14 + lwz 27,28(6) + vor 11,17,17 + mr 31,15 + + mr 7,20 + mr 8,21 + mr 9,22 + mr 10,23 + + vspltisw 27,12 + vspltisw 28,7 + + mtctr 0 + nop +.Loop_vmx: + vadduwm 0,0,1 + vadduwm 4,4,5 + vadduwm 8,8,9 + add 16,16,20 + add 17,17,21 + add 18,18,22 + vxor 3,3,0 + vxor 7,7,4 + vxor 11,11,8 + add 19,19,23 + xor 28,28,16 + xor 29,29,17 + vperm 3,3,3,19 + vperm 7,7,7,19 + vperm 11,11,11,19 + xor 30,30,18 + xor 31,31,19 + rotlwi 28,28,16 + vadduwm 2,2,3 + vadduwm 6,6,7 + vadduwm 10,10,11 + rotlwi 29,29,16 + rotlwi 30,30,16 + rotlwi 31,31,16 + vxor 1,1,2 + vxor 5,5,6 + vxor 9,9,10 + add 24,24,28 + add 25,25,29 + add 26,26,30 + vrlw 1,1,27 + vrlw 5,5,27 + vrlw 9,9,27 + add 27,27,31 + xor 20,20,24 + xor 21,21,25 + vadduwm 0,0,1 + vadduwm 4,4,5 + vadduwm 8,8,9 + xor 22,22,26 + xor 23,23,27 + rotlwi 20,20,12 + vxor 3,3,0 + vxor 7,7,4 + vxor 11,11,8 + rotlwi 21,21,12 + rotlwi 22,22,12 + rotlwi 23,23,12 + vperm 3,3,3,23 + vperm 7,7,7,23 + vperm 11,11,11,23 + add 16,16,20 + add 17,17,21 + add 18,18,22 + vadduwm 2,2,3 + vadduwm 6,6,7 + vadduwm 10,10,11 + add 19,19,23 + xor 28,28,16 + xor 29,29,17 + vxor 1,1,2 + vxor 5,5,6 + vxor 9,9,10 + xor 30,30,18 + xor 31,31,19 + rotlwi 28,28,8 + vrlw 1,1,28 + vrlw 5,5,28 + vrlw 9,9,28 + rotlwi 29,29,8 + rotlwi 30,30,8 + rotlwi 31,31,8 + vsldoi 2,2,2, 8 + vsldoi 6,6,6, 8 + vsldoi 10,10,10, 8 + add 24,24,28 + add 25,25,29 + add 26,26,30 + vsldoi 1,1,1, 12 + vsldoi 5,5,5, 12 + vsldoi 9,9,9, 12 + add 27,27,31 + xor 20,20,24 + xor 21,21,25 + vsldoi 3,3,3, 4 + vsldoi 7,7,7, 4 + vsldoi 11,11,11, 4 + xor 22,22,26 + xor 23,23,27 + rotlwi 20,20,7 + rotlwi 21,21,7 + rotlwi 22,22,7 + rotlwi 23,23,7 + vadduwm 0,0,1 + vadduwm 4,4,5 + vadduwm 8,8,9 + add 16,16,21 + add 17,17,22 + add 18,18,23 + vxor 3,3,0 + vxor 7,7,4 + vxor 11,11,8 + add 19,19,20 + xor 31,31,16 + xor 28,28,17 + vperm 3,3,3,19 + vperm 7,7,7,19 + vperm 11,11,11,19 + xor 29,29,18 + xor 30,30,19 + rotlwi 31,31,16 + vadduwm 2,2,3 + vadduwm 6,6,7 + vadduwm 10,10,11 + rotlwi 28,28,16 + rotlwi 29,29,16 + rotlwi 30,30,16 + vxor 1,1,2 + vxor 5,5,6 + vxor 9,9,10 + add 26,26,31 + add 27,27,28 + add 24,24,29 + vrlw 1,1,27 + vrlw 5,5,27 + vrlw 9,9,27 + add 25,25,30 + xor 21,21,26 + xor 22,22,27 + vadduwm 0,0,1 + vadduwm 4,4,5 + vadduwm 8,8,9 + xor 23,23,24 + xor 20,20,25 + rotlwi 21,21,12 + vxor 3,3,0 + vxor 7,7,4 + vxor 11,11,8 + rotlwi 22,22,12 + rotlwi 23,23,12 + rotlwi 20,20,12 + vperm 3,3,3,23 + vperm 7,7,7,23 + vperm 11,11,11,23 + add 16,16,21 + add 17,17,22 + add 18,18,23 + vadduwm 2,2,3 + vadduwm 6,6,7 + vadduwm 10,10,11 + add 19,19,20 + xor 31,31,16 + xor 28,28,17 + vxor 1,1,2 + vxor 5,5,6 + vxor 9,9,10 + xor 29,29,18 + xor 30,30,19 + rotlwi 31,31,8 + vrlw 1,1,28 + vrlw 5,5,28 + vrlw 9,9,28 + rotlwi 28,28,8 + rotlwi 29,29,8 + rotlwi 30,30,8 + vsldoi 2,2,2, 8 + vsldoi 6,6,6, 8 + vsldoi 10,10,10, 8 + add 26,26,31 + add 27,27,28 + add 24,24,29 + vsldoi 1,1,1, 4 + vsldoi 5,5,5, 4 + vsldoi 9,9,9, 4 + add 25,25,30 + xor 21,21,26 + xor 22,22,27 + vsldoi 3,3,3, 12 + vsldoi 7,7,7, 12 + vsldoi 11,11,11, 12 + xor 23,23,24 + xor 20,20,25 + rotlwi 21,21,7 + rotlwi 22,22,7 + rotlwi 23,23,7 + rotlwi 20,20,7 + bdnz .Loop_vmx + + subi 5,5,256 + addi 16,16,0x7865 + addi 17,17,0x646e + addi 18,18,0x2d32 + addi 19,19,0x6574 + addis 16,16,0x6170 + addis 17,17,0x3320 + addis 18,18,0x7962 + addis 19,19,0x6b20 + add 20,20,7 + lwz 7,16(6) + add 21,21,8 + lwz 8,20(6) + add 22,22,9 + lwz 9,24(6) + add 23,23,10 + lwz 10,28(6) + add 24,24,7 + add 25,25,8 + add 26,26,9 + add 27,27,10 + add 28,28,11 + add 29,29,12 + add 30,30,14 + add 31,31,15 + + vadduwm 0,0,12 + vadduwm 4,4,12 + vadduwm 8,8,12 + vadduwm 1,1,13 + vadduwm 5,5,13 + vadduwm 9,9,13 + vadduwm 2,2,14 + vadduwm 6,6,14 + vadduwm 10,10,14 + vadduwm 3,3,15 + vadduwm 7,7,16 + vadduwm 11,11,17 + + addi 11,11,4 + vadduwm 15,15,18 + vadduwm 16,16,18 + vadduwm 17,17,18 + + lwz 7,0(4) + lwz 8,4(4) + lwz 9,8(4) + lwz 10,12(4) + xor 16,16,7 + lwz 7,16(4) + xor 17,17,8 + lwz 8,20(4) + xor 18,18,9 + lwz 9,24(4) + xor 19,19,10 + lwz 10,28(4) + xor 20,20,7 + lwz 7,32(4) + xor 21,21,8 + lwz 8,36(4) + xor 22,22,9 + lwz 9,40(4) + xor 23,23,10 + lwz 10,44(4) + xor 24,24,7 + lwz 7,48(4) + xor 25,25,8 + lwz 8,52(4) + xor 26,26,9 + lwz 9,56(4) + xor 27,27,10 + lwz 10,60(4) + xor 28,28,7 + stw 16,0(3) + xor 29,29,8 + stw 17,4(3) + xor 30,30,9 + stw 18,8(3) + xor 31,31,10 + stw 19,12(3) + addi 4,4,64 + stw 20,16(3) + li 7,16 + stw 21,20(3) + li 8,32 + stw 22,24(3) + li 9,48 + stw 23,28(3) + li 10,64 + stw 24,32(3) + stw 25,36(3) + stw 26,40(3) + stw 27,44(3) + stw 28,48(3) + stw 29,52(3) + stw 30,56(3) + stw 31,60(3) + addi 3,3,64 + + lvx 27,0,4 + lvx 28,7,4 + lvx 29,8,4 + lvx 30,9,4 + lvx 31,10,4 + addi 4,4,64 + + vperm 27,28,27,24 + vperm 28,29,28,24 + vperm 29,30,29,24 + vperm 30,31,30,24 + vxor 0,0,27 + vxor 1,1,28 + lvx 28,7,4 + vxor 2,2,29 + lvx 29,8,4 + vxor 3,3,30 + lvx 30,9,4 + lvx 27,10,4 + addi 4,4,64 + li 10,63 + vperm 0,0,0,25 + vperm 1,1,1,25 + vperm 2,2,2,25 + vperm 3,3,3,25 + + vperm 31,28,31,24 + vperm 28,29,28,24 + vperm 29,30,29,24 + vperm 30,27,30,24 + vxor 4,4,31 + vxor 5,5,28 + lvx 28,7,4 + vxor 6,6,29 + lvx 29,8,4 + vxor 7,7,30 + lvx 30,9,4 + lvx 31,10,4 + addi 4,4,64 + vperm 4,4,4,25 + vperm 5,5,5,25 + vperm 6,6,6,25 + vperm 7,7,7,25 + + vperm 27,28,27,24 + vperm 28,29,28,24 + vperm 29,30,29,24 + vperm 30,31,30,24 + vxor 8,8,27 + vxor 9,9,28 + vxor 10,10,29 + vxor 11,11,30 + vperm 8,8,8,25 + vperm 9,9,9,25 + vperm 10,10,10,25 + vperm 11,11,11,25 + + andi. 17,3,15 + mr 16,3 + + vsel 27,0,1,26 + vsel 28,1,2,26 + vsel 29,2,3,26 + vsel 30,3,4,26 + vsel 1,4,5,26 + vsel 2,5,6,26 + vsel 3,6,7,26 + vsel 4,7,8,26 + vsel 5,8,9,26 + vsel 6,9,10,26 + vsel 7,10,11,26 + + + stvx 27,7,3 + stvx 28,8,3 + stvx 29,9,3 + addi 3,3,64 + stvx 30,0,3 + stvx 1,7,3 + stvx 2,8,3 + stvx 3,9,3 + addi 3,3,64 + stvx 4,0,3 + stvx 5,7,3 + stvx 6,8,3 + stvx 7,9,3 + addi 3,3,64 + + beq .Laligned_vmx + + sub 18,3,17 + li 19,0 +.Lunaligned_tail_vmx: + stvebx 11,19,18 + addi 19,19,1 + cmpw 19,17 + bne .Lunaligned_tail_vmx + + sub 18,16,17 +.Lunaligned_head_vmx: + stvebx 0,17,18 + cmpwi 17,15 + addi 17,17,1 + bne .Lunaligned_head_vmx + + cmpldi 5,255 + bgt .Loop_outer_vmx + + b .Ldone_vmx + +.align 4 +.Laligned_vmx: + stvx 0,0,16 + + cmpldi 5,255 + bgt .Loop_outer_vmx + nop + +.Ldone_vmx: + cmpldi 5,0 + bnel __ChaCha20_1x + + lwz 12,268(1) + li 10,127 + li 11,143 + or 12,12,12 + lvx 23,10,1 + addi 10,10,32 + lvx 24,11,1 + addi 11,11,32 + lvx 25,10,1 + addi 10,10,32 + lvx 26,11,1 + addi 11,11,32 + lvx 27,10,1 + addi 10,10,32 + lvx 28,11,1 + addi 11,11,32 + lvx 29,10,1 + addi 10,10,32 + lvx 30,11,1 + lvx 31,10,1 + ld 0, 432(1) + ld 14,272(1) + ld 15,280(1) + ld 16,288(1) + ld 17,296(1) + ld 18,304(1) + ld 19,312(1) + ld 20,320(1) + ld 21,328(1) + ld 22,336(1) + ld 23,344(1) + ld 24,352(1) + ld 25,360(1) + ld 26,368(1) + ld 27,376(1) + ld 28,384(1) + ld 29,392(1) + ld 30,400(1) + ld 31,408(1) + mtlr 0 + addi 1,1,416 + blr +.long 0 +.byte 0,12,0x04,1,0x80,18,5,0 +.long 0 +.size ChaCha20_ctr32_vmx,.-ChaCha20_ctr32_vmx + +.globl ChaCha20_ctr32_vsx +.type ChaCha20_ctr32_vsx,@function +.align 5 +ChaCha20_ctr32_vsx: +.localentry ChaCha20_ctr32_vsx,0 + + stdu 1,-224(1) + mflr 0 + li 10,127 + li 11,143 + li 12,-1 + stvx 26,10,1 + addi 10,10,32 + stvx 27,11,1 + addi 11,11,32 + stvx 28,10,1 + addi 10,10,32 + stvx 29,11,1 + addi 11,11,32 + stvx 30,10,1 + stvx 31,11,1 + stw 12,220(1) + li 12,-4096+63 + std 0, 240(1) + or 12,12,12 + + bl .Lconsts + .long 0x7E006619 + addi 12,12,0x50 + li 8,16 + li 9,32 + li 10,48 + li 11,64 + + .long 0x7E203619 + .long 0x7E483619 + .long 0x7E603E19 + + vxor 27,27,27 + .long 0x7F8B6619 + vspltw 26,19,0 + vsldoi 19,19,27,4 + vsldoi 19,27,19,12 + vadduwm 26,26,28 + + + + + + li 0,10 + mtctr 0 + b .Loop_outer_vsx + +.align 5 +.Loop_outer_vsx: + lvx 0,0,12 + lvx 1,8,12 + lvx 2,9,12 + lvx 3,10,12 + + vspltw 4,17,0 + vspltw 5,17,1 + vspltw 6,17,2 + vspltw 7,17,3 + + vspltw 8,18,0 + vspltw 9,18,1 + vspltw 10,18,2 + vspltw 11,18,3 + + vor 12,26,26 + vspltw 13,19,1 + vspltw 14,19,2 + vspltw 15,19,3 + + vspltisw 27,-16 + vspltisw 28,12 + vspltisw 29,8 + vspltisw 30,7 + +.Loop_vsx: + vadduwm 0,0,4 + vadduwm 1,1,5 + vadduwm 2,2,6 + vadduwm 3,3,7 + vxor 12,12,0 + vxor 13,13,1 + vxor 14,14,2 + vxor 15,15,3 + vrlw 12,12,27 + vrlw 13,13,27 + vrlw 14,14,27 + vrlw 15,15,27 + vadduwm 8,8,12 + vadduwm 9,9,13 + vadduwm 10,10,14 + vadduwm 11,11,15 + vxor 4,4,8 + vxor 5,5,9 + vxor 6,6,10 + vxor 7,7,11 + vrlw 4,4,28 + vrlw 5,5,28 + vrlw 6,6,28 + vrlw 7,7,28 + vadduwm 0,0,4 + vadduwm 1,1,5 + vadduwm 2,2,6 + vadduwm 3,3,7 + vxor 12,12,0 + vxor 13,13,1 + vxor 14,14,2 + vxor 15,15,3 + vrlw 12,12,29 + vrlw 13,13,29 + vrlw 14,14,29 + vrlw 15,15,29 + vadduwm 8,8,12 + vadduwm 9,9,13 + vadduwm 10,10,14 + vadduwm 11,11,15 + vxor 4,4,8 + vxor 5,5,9 + vxor 6,6,10 + vxor 7,7,11 + vrlw 4,4,30 + vrlw 5,5,30 + vrlw 6,6,30 + vrlw 7,7,30 + vadduwm 0,0,5 + vadduwm 1,1,6 + vadduwm 2,2,7 + vadduwm 3,3,4 + vxor 15,15,0 + vxor 12,12,1 + vxor 13,13,2 + vxor 14,14,3 + vrlw 15,15,27 + vrlw 12,12,27 + vrlw 13,13,27 + vrlw 14,14,27 + vadduwm 10,10,15 + vadduwm 11,11,12 + vadduwm 8,8,13 + vadduwm 9,9,14 + vxor 5,5,10 + vxor 6,6,11 + vxor 7,7,8 + vxor 4,4,9 + vrlw 5,5,28 + vrlw 6,6,28 + vrlw 7,7,28 + vrlw 4,4,28 + vadduwm 0,0,5 + vadduwm 1,1,6 + vadduwm 2,2,7 + vadduwm 3,3,4 + vxor 15,15,0 + vxor 12,12,1 + vxor 13,13,2 + vxor 14,14,3 + vrlw 15,15,29 + vrlw 12,12,29 + vrlw 13,13,29 + vrlw 14,14,29 + vadduwm 10,10,15 + vadduwm 11,11,12 + vadduwm 8,8,13 + vadduwm 9,9,14 + vxor 5,5,10 + vxor 6,6,11 + vxor 7,7,8 + vxor 4,4,9 + vrlw 5,5,30 + vrlw 6,6,30 + vrlw 7,7,30 + vrlw 4,4,30 + bdnz .Loop_vsx + + vadduwm 12,12,26 + + .long 0x13600F8C + .long 0x13821F8C + .long 0x10000E8C + .long 0x10421E8C + .long 0x13A42F8C + .long 0x13C63F8C + .long 0xF0201057 + .long 0xF0601357 + .long 0xF01BE057 + .long 0xF05BE357 + + .long 0x10842E8C + .long 0x10C63E8C + .long 0x13684F8C + .long 0x138A5F8C + .long 0xF0A43057 + .long 0xF0E43357 + .long 0xF09DF057 + .long 0xF0DDF357 + + .long 0x11084E8C + .long 0x114A5E8C + .long 0x13AC6F8C + .long 0x13CE7F8C + .long 0xF1285057 + .long 0xF1685357 + .long 0xF11BE057 + .long 0xF15BE357 + + .long 0x118C6E8C + .long 0x11CE7E8C + vspltisw 27,4 + vadduwm 26,26,27 + .long 0xF1AC7057 + .long 0xF1EC7357 + .long 0xF19DF057 + .long 0xF1DDF357 + + vadduwm 0,0,16 + vadduwm 4,4,17 + vadduwm 8,8,18 + vadduwm 12,12,19 + + + + + + + cmpldi 5,0x40 + blt .Ltail_vsx + + .long 0x7F602619 + .long 0x7F882619 + .long 0x7FA92619 + .long 0x7FCA2619 + + vxor 27,27,0 + vxor 28,28,4 + vxor 29,29,8 + vxor 30,30,12 + + .long 0x7F601F19 + .long 0x7F881F19 + addi 4,4,0x40 + .long 0x7FA91F19 + subi 5,5,0x40 + .long 0x7FCA1F19 + addi 3,3,0x40 + beq .Ldone_vsx + + vadduwm 0,1,16 + vadduwm 4,5,17 + vadduwm 8,9,18 + vadduwm 12,13,19 + + + + + + + cmpldi 5,0x40 + blt .Ltail_vsx + + .long 0x7F602619 + .long 0x7F882619 + .long 0x7FA92619 + .long 0x7FCA2619 + + vxor 27,27,0 + vxor 28,28,4 + vxor 29,29,8 + vxor 30,30,12 + + .long 0x7F601F19 + .long 0x7F881F19 + addi 4,4,0x40 + .long 0x7FA91F19 + subi 5,5,0x40 + .long 0x7FCA1F19 + addi 3,3,0x40 + beq .Ldone_vsx + + vadduwm 0,2,16 + vadduwm 4,6,17 + vadduwm 8,10,18 + vadduwm 12,14,19 + + + + + + + cmpldi 5,0x40 + blt .Ltail_vsx + + .long 0x7F602619 + .long 0x7F882619 + .long 0x7FA92619 + .long 0x7FCA2619 + + vxor 27,27,0 + vxor 28,28,4 + vxor 29,29,8 + vxor 30,30,12 + + .long 0x7F601F19 + .long 0x7F881F19 + addi 4,4,0x40 + .long 0x7FA91F19 + subi 5,5,0x40 + .long 0x7FCA1F19 + addi 3,3,0x40 + beq .Ldone_vsx + + vadduwm 0,3,16 + vadduwm 4,7,17 + vadduwm 8,11,18 + vadduwm 12,15,19 + + + + + + + cmpldi 5,0x40 + blt .Ltail_vsx + + .long 0x7F602619 + .long 0x7F882619 + .long 0x7FA92619 + .long 0x7FCA2619 + + vxor 27,27,0 + vxor 28,28,4 + vxor 29,29,8 + vxor 30,30,12 + + .long 0x7F601F19 + .long 0x7F881F19 + addi 4,4,0x40 + .long 0x7FA91F19 + subi 5,5,0x40 + .long 0x7FCA1F19 + addi 3,3,0x40 + mtctr 0 + bne .Loop_outer_vsx + +.Ldone_vsx: + lwz 12,220(1) + li 10,127 + li 11,143 + ld 0, 240(1) + or 12,12,12 + lvx 26,10,1 + addi 10,10,32 + lvx 27,11,1 + addi 11,11,32 + lvx 28,10,1 + addi 10,10,32 + lvx 29,11,1 + addi 11,11,32 + lvx 30,10,1 + lvx 31,11,1 + mtlr 0 + addi 1,1,224 + blr + +.align 4 +.Ltail_vsx: + addi 11,1,48 + mtctr 5 + .long 0x7C005F19 + .long 0x7C885F19 + .long 0x7D095F19 + .long 0x7D8A5F19 + subi 12,11,1 + subi 4,4,1 + subi 3,3,1 + +.Loop_tail_vsx: + lbzu 6,1(12) + lbzu 7,1(4) + xor 6,6,7 + stbu 6,1(3) + bdnz .Loop_tail_vsx + + .long 0x7E005F19 + .long 0x7E085F19 + .long 0x7E095F19 + .long 0x7E0A5F19 + + b .Ldone_vsx +.long 0 +.byte 0,12,0x04,1,0x80,0,5,0 +.long 0 +.size ChaCha20_ctr32_vsx,.-ChaCha20_ctr32_vsx +.align 5 +.Lconsts: + mflr 0 + bcl 20,31,$+4 + mflr 12 + addi 12,12,56 + mtlr 0 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +.space 28 +.Lsigma: +.long 0x61707865,0x3320646e,0x79622d32,0x6b206574 +.long 1,0,0,0 +.long 4,0,0,0 +.long 0x0e0f0c0d,0x0a0b0809,0x06070405,0x02030001 +.long 0x0d0e0f0c,0x090a0b08,0x05060704,0x01020300 +.long 0x61707865,0x61707865,0x61707865,0x61707865 +.long 0x3320646e,0x3320646e,0x3320646e,0x3320646e +.long 0x79622d32,0x79622d32,0x79622d32,0x79622d32 +.long 0x6b206574,0x6b206574,0x6b206574,0x6b206574 +.long 0,1,2,3 +.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,80,111,119,101,114,80,67,47,65,108,116,105,86,101,99,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 |