summaryrefslogtreecommitdiff
path: root/deps/openssl/config/archs/linux-ppc/asm_avx2/crypto/bn/ppc-mont.s
diff options
context:
space:
mode:
Diffstat (limited to 'deps/openssl/config/archs/linux-ppc/asm_avx2/crypto/bn/ppc-mont.s')
-rw-r--r--deps/openssl/config/archs/linux-ppc/asm_avx2/crypto/bn/ppc-mont.s1786
1 files changed, 1786 insertions, 0 deletions
diff --git a/deps/openssl/config/archs/linux-ppc/asm_avx2/crypto/bn/ppc-mont.s b/deps/openssl/config/archs/linux-ppc/asm_avx2/crypto/bn/ppc-mont.s
new file mode 100644
index 0000000000..040ce625a6
--- /dev/null
+++ b/deps/openssl/config/archs/linux-ppc/asm_avx2/crypto/bn/ppc-mont.s
@@ -0,0 +1,1786 @@
+.machine "any"
+.text
+
+.globl bn_mul_mont_int
+.type bn_mul_mont_int,@function
+.align 5
+bn_mul_mont_int:
+ mr 9,3
+ li 3,0
+ cmpwi 8,32
+ bgelr
+ slwi 8,8,2
+ li 12,-4096
+ addi 3,8,256
+ subf 3,3,1
+ and 3,3,12
+ subf 3,1,3
+ mr 12,1
+ srwi 8,8,2
+ stwux 1,1,3
+
+ stw 20,-48(12)
+ stw 21,-44(12)
+ stw 22,-40(12)
+ stw 23,-36(12)
+ stw 24,-32(12)
+ stw 25,-28(12)
+ stw 26,-24(12)
+ stw 27,-20(12)
+ stw 28,-16(12)
+ stw 29,-12(12)
+ stw 30,-8(12)
+ stw 31,-4(12)
+
+ lwz 7,0(7)
+ addi 8,8,-2
+
+ lwz 23,0(5)
+ lwz 10,0(4)
+ addi 22,1,32
+ mullw 25,10,23
+ mulhwu 26,10,23
+
+ lwz 10,4(4)
+ lwz 11,0(6)
+
+ mullw 24,25,7
+
+ mullw 29,10,23
+ mulhwu 30,10,23
+
+ mullw 27,11,24
+ mulhwu 28,11,24
+ lwz 11,4(6)
+ addc 27,27,25
+ addze 28,28
+
+ mullw 31,11,24
+ mulhwu 0,11,24
+
+ mtctr 8
+ li 21,8
+.align 4
+.L1st:
+ lwzx 10,4,21
+ addc 25,29,26
+ lwzx 11,6,21
+ addze 26,30
+ mullw 29,10,23
+ addc 27,31,28
+ mulhwu 30,10,23
+ addze 28,0
+ mullw 31,11,24
+ addc 27,27,25
+ mulhwu 0,11,24
+ addze 28,28
+ stw 27,0(22)
+
+ addi 21,21,4
+ addi 22,22,4
+ bdnz .L1st
+
+ addc 25,29,26
+ addze 26,30
+
+ addc 27,31,28
+ addze 28,0
+ addc 27,27,25
+ addze 28,28
+ stw 27,0(22)
+
+ li 3,0
+ addc 28,28,26
+ addze 3,3
+ stw 28,4(22)
+
+ li 20,4
+.align 4
+.Louter:
+ lwzx 23,5,20
+ lwz 10,0(4)
+ addi 22,1,32
+ lwz 12,32(1)
+ mullw 25,10,23
+ mulhwu 26,10,23
+ lwz 10,4(4)
+ lwz 11,0(6)
+ addc 25,25,12
+ mullw 29,10,23
+ addze 26,26
+ mullw 24,25,7
+ mulhwu 30,10,23
+ mullw 27,11,24
+ mulhwu 28,11,24
+ lwz 11,4(6)
+ addc 27,27,25
+ mullw 31,11,24
+ addze 28,28
+ mulhwu 0,11,24
+
+ mtctr 8
+ li 21,8
+.align 4
+.Linner:
+ lwzx 10,4,21
+ addc 25,29,26
+ lwz 12,4(22)
+ addze 26,30
+ lwzx 11,6,21
+ addc 27,31,28
+ mullw 29,10,23
+ addze 28,0
+ mulhwu 30,10,23
+ addc 25,25,12
+ mullw 31,11,24
+ addze 26,26
+ mulhwu 0,11,24
+ addc 27,27,25
+ addi 21,21,4
+ addze 28,28
+ stw 27,0(22)
+ addi 22,22,4
+ bdnz .Linner
+
+ lwz 12,4(22)
+ addc 25,29,26
+ addze 26,30
+ addc 25,25,12
+ addze 26,26
+
+ addc 27,31,28
+ addze 28,0
+ addc 27,27,25
+ addze 28,28
+ stw 27,0(22)
+
+ addic 3,3,-1
+ li 3,0
+ adde 28,28,26
+ addze 3,3
+ stw 28,4(22)
+
+ slwi 12,8,2
+ .long 0x7c146040
+ addi 20,20,4
+ ble .Louter
+
+ addi 8,8,2
+ subfc 21,21,21
+ addi 22,1,32
+ mtctr 8
+
+.align 4
+.Lsub: lwzx 12,22,21
+ lwzx 11,6,21
+ subfe 10,11,12
+ stwx 10,9,21
+ addi 21,21,4
+ bdnz .Lsub
+
+ li 21,0
+ mtctr 8
+ subfe 3,21,3
+
+.align 4
+.Lcopy:
+ lwzx 12,22,21
+ lwzx 10,9,21
+ and 12,12,3
+ andc 10,10,3
+ stwx 21,22,21
+ or 10,10,12
+ stwx 10,9,21
+ addi 21,21,4
+ bdnz .Lcopy
+
+ lwz 12,0(1)
+ li 3,1
+ lwz 20,-48(12)
+ lwz 21,-44(12)
+ lwz 22,-40(12)
+ lwz 23,-36(12)
+ lwz 24,-32(12)
+ lwz 25,-28(12)
+ lwz 26,-24(12)
+ lwz 27,-20(12)
+ lwz 28,-16(12)
+ lwz 29,-12(12)
+ lwz 30,-8(12)
+ lwz 31,-4(12)
+ mr 1,12
+ blr
+.long 0
+.byte 0,12,4,0,0x80,12,6,0
+.long 0
+.size bn_mul_mont_int,.-bn_mul_mont_int
+.globl bn_mul4x_mont_int
+.type bn_mul4x_mont_int,@function
+.align 5
+bn_mul4x_mont_int:
+ andi. 0,8,7
+ bne .Lmul4x_do
+ .long 0x7c042840
+ bne .Lmul4x_do
+ b .Lsqr8x_do
+.Lmul4x_do:
+ slwi 8,8,2
+ mr 9,1
+ li 10,-32*4
+ sub 10,10,8
+ stwux 1,1,10
+
+ stw 14,-4*18(9)
+ stw 15,-4*17(9)
+ stw 16,-4*16(9)
+ stw 17,-4*15(9)
+ stw 18,-4*14(9)
+ stw 19,-4*13(9)
+ stw 20,-4*12(9)
+ stw 21,-4*11(9)
+ stw 22,-4*10(9)
+ stw 23,-4*9(9)
+ stw 24,-4*8(9)
+ stw 25,-4*7(9)
+ stw 26,-4*6(9)
+ stw 27,-4*5(9)
+ stw 28,-4*4(9)
+ stw 29,-4*3(9)
+ stw 30,-4*2(9)
+ stw 31,-4*1(9)
+
+ subi 4,4,4
+ subi 6,6,4
+ subi 3,3,4
+ lwz 7,0(7)
+
+ add 14,5,8
+ add 30,4,8
+ subi 14,14,4*4
+
+ lwz 27,4*0(5)
+ li 22,0
+ lwz 9,4*1(4)
+ li 23,0
+ lwz 10,4*2(4)
+ li 24,0
+ lwz 11,4*3(4)
+ li 25,0
+ lwzu 12,4*4(4)
+ lwz 18,4*1(6)
+ lwz 19,4*2(6)
+ lwz 20,4*3(6)
+ lwzu 21,4*4(6)
+
+ stw 3,4*6(1)
+ stw 14,4*7(1)
+ li 3,0
+ addic 29,1,4*7
+ li 31,0
+ li 0,0
+ b .Loop_mul4x_1st_reduction
+
+.align 5
+.Loop_mul4x_1st_reduction:
+ mullw 14,9,27
+ addze 3,3
+ mullw 15,10,27
+ addi 31,31,4
+ mullw 16,11,27
+ andi. 31,31,4*4-1
+ mullw 17,12,27
+ addc 22,22,14
+ mulhwu 14,9,27
+ adde 23,23,15
+ mulhwu 15,10,27
+ adde 24,24,16
+ mullw 28,22,7
+ adde 25,25,17
+ mulhwu 16,11,27
+ addze 26,0
+ mulhwu 17,12,27
+ lwzx 27,5,31
+ addc 23,23,14
+
+ stwu 28,4(29)
+ adde 24,24,15
+ mullw 15,19,28
+ adde 25,25,16
+ mullw 16,20,28
+ adde 26,26,17
+ mullw 17,21,28
+
+
+
+
+
+
+
+
+
+
+ addic 22,22,-1
+ mulhwu 14,18,28
+ adde 22,23,15
+ mulhwu 15,19,28
+ adde 23,24,16
+ mulhwu 16,20,28
+ adde 24,25,17
+ mulhwu 17,21,28
+ adde 25,26,3
+ addze 3,0
+ addc 22,22,14
+ adde 23,23,15
+ adde 24,24,16
+ adde 25,25,17
+
+ bne .Loop_mul4x_1st_reduction
+
+ .long 0x7c1e2040
+ beq .Lmul4x4_post_condition
+
+ lwz 9,4*1(4)
+ lwz 10,4*2(4)
+ lwz 11,4*3(4)
+ lwzu 12,4*4(4)
+ lwz 28,4*8(1)
+ lwz 18,4*1(6)
+ lwz 19,4*2(6)
+ lwz 20,4*3(6)
+ lwzu 21,4*4(6)
+ b .Loop_mul4x_1st_tail
+
+.align 5
+.Loop_mul4x_1st_tail:
+ mullw 14,9,27
+ addze 3,3
+ mullw 15,10,27
+ addi 31,31,4
+ mullw 16,11,27
+ andi. 31,31,4*4-1
+ mullw 17,12,27
+ addc 22,22,14
+ mulhwu 14,9,27
+ adde 23,23,15
+ mulhwu 15,10,27
+ adde 24,24,16
+ mulhwu 16,11,27
+ adde 25,25,17
+ mulhwu 17,12,27
+ addze 26,0
+ lwzx 27,5,31
+ addc 23,23,14
+ mullw 14,18,28
+ adde 24,24,15
+ mullw 15,19,28
+ adde 25,25,16
+ mullw 16,20,28
+ adde 26,26,17
+ mullw 17,21,28
+ addc 22,22,14
+ mulhwu 14,18,28
+ adde 23,23,15
+ mulhwu 15,19,28
+ adde 24,24,16
+ mulhwu 16,20,28
+ adde 25,25,17
+ adde 26,26,3
+ mulhwu 17,21,28
+ addze 3,0
+ addi 28,1,4*8
+ lwzx 28,28,31
+ stwu 22,4(29)
+ addc 22,23,14
+ adde 23,24,15
+ adde 24,25,16
+ adde 25,26,17
+
+ bne .Loop_mul4x_1st_tail
+
+ sub 15,30,8
+ .long 0x7c1e2040
+ beq .Lmul4x_proceed
+
+ lwz 9,4*1(4)
+ lwz 10,4*2(4)
+ lwz 11,4*3(4)
+ lwzu 12,4*4(4)
+ lwz 18,4*1(6)
+ lwz 19,4*2(6)
+ lwz 20,4*3(6)
+ lwzu 21,4*4(6)
+ b .Loop_mul4x_1st_tail
+
+.align 5
+.Lmul4x_proceed:
+ lwzu 27,4*4(5)
+ addze 3,3
+ lwz 9,4*1(15)
+ lwz 10,4*2(15)
+ lwz 11,4*3(15)
+ lwz 12,4*4(15)
+ addi 4,15,4*4
+ sub 6,6,8
+
+ stw 22,4*1(29)
+ stw 23,4*2(29)
+ stw 24,4*3(29)
+ stw 25,4*4(29)
+ stw 3,4*5(29)
+ lwz 22,4*12(1)
+ lwz 23,4*13(1)
+ lwz 24,4*14(1)
+ lwz 25,4*15(1)
+
+ lwz 18,4*1(6)
+ lwz 19,4*2(6)
+ lwz 20,4*3(6)
+ lwzu 21,4*4(6)
+ addic 29,1,4*7
+ li 3,0
+ b .Loop_mul4x_reduction
+
+.align 5
+.Loop_mul4x_reduction:
+ mullw 14,9,27
+ addze 3,3
+ mullw 15,10,27
+ addi 31,31,4
+ mullw 16,11,27
+ andi. 31,31,4*4-1
+ mullw 17,12,27
+ addc 22,22,14
+ mulhwu 14,9,27
+ adde 23,23,15
+ mulhwu 15,10,27
+ adde 24,24,16
+ mullw 28,22,7
+ adde 25,25,17
+ mulhwu 16,11,27
+ addze 26,0
+ mulhwu 17,12,27
+ lwzx 27,5,31
+ addc 23,23,14
+
+ stwu 28,4(29)
+ adde 24,24,15
+ mullw 15,19,28
+ adde 25,25,16
+ mullw 16,20,28
+ adde 26,26,17
+ mullw 17,21,28
+
+ addic 22,22,-1
+ mulhwu 14,18,28
+ adde 22,23,15
+ mulhwu 15,19,28
+ adde 23,24,16
+ mulhwu 16,20,28
+ adde 24,25,17
+ mulhwu 17,21,28
+ adde 25,26,3
+ addze 3,0
+ addc 22,22,14
+ adde 23,23,15
+ adde 24,24,16
+ adde 25,25,17
+
+ bne .Loop_mul4x_reduction
+
+ lwz 14,4*5(29)
+ addze 3,3
+ lwz 15,4*6(29)
+ lwz 16,4*7(29)
+ lwz 17,4*8(29)
+ lwz 9,4*1(4)
+ lwz 10,4*2(4)
+ lwz 11,4*3(4)
+ lwzu 12,4*4(4)
+ addc 22,22,14
+ adde 23,23,15
+ adde 24,24,16
+ adde 25,25,17
+
+
+ lwz 28,4*8(1)
+ lwz 18,4*1(6)
+ lwz 19,4*2(6)
+ lwz 20,4*3(6)
+ lwzu 21,4*4(6)
+ b .Loop_mul4x_tail
+
+.align 5
+.Loop_mul4x_tail:
+ mullw 14,9,27
+ addze 3,3
+ mullw 15,10,27
+ addi 31,31,4
+ mullw 16,11,27
+ andi. 31,31,4*4-1
+ mullw 17,12,27
+ addc 22,22,14
+ mulhwu 14,9,27
+ adde 23,23,15
+ mulhwu 15,10,27
+ adde 24,24,16
+ mulhwu 16,11,27
+ adde 25,25,17
+ mulhwu 17,12,27
+ addze 26,0
+ lwzx 27,5,31
+ addc 23,23,14
+ mullw 14,18,28
+ adde 24,24,15
+ mullw 15,19,28
+ adde 25,25,16
+ mullw 16,20,28
+ adde 26,26,17
+ mullw 17,21,28
+ addc 22,22,14
+ mulhwu 14,18,28
+ adde 23,23,15
+ mulhwu 15,19,28
+ adde 24,24,16
+ mulhwu 16,20,28
+ adde 25,25,17
+ mulhwu 17,21,28
+ adde 26,26,3
+ addi 28,1,4*8
+ lwzx 28,28,31
+ addze 3,0
+ stwu 22,4(29)
+ addc 22,23,14
+ adde 23,24,15
+ adde 24,25,16
+ adde 25,26,17
+
+ bne .Loop_mul4x_tail
+
+ lwz 14,4*5(29)
+ sub 15,6,8
+ addze 3,3
+ .long 0x7c1e2040
+ beq .Loop_mul4x_break
+
+ lwz 15,4*6(29)
+ lwz 16,4*7(29)
+ lwz 17,4*8(29)
+ lwz 9,4*1(4)
+ lwz 10,4*2(4)
+ lwz 11,4*3(4)
+ lwzu 12,4*4(4)
+ addc 22,22,14
+ adde 23,23,15
+ adde 24,24,16
+ adde 25,25,17
+
+
+ lwz 18,4*1(6)
+ lwz 19,4*2(6)
+ lwz 20,4*3(6)
+ lwzu 21,4*4(6)
+ b .Loop_mul4x_tail
+
+.align 5
+.Loop_mul4x_break:
+ lwz 16,4*6(1)
+ lwz 17,4*7(1)
+ addc 9,22,14
+ lwz 22,4*12(1)
+ addze 10,23
+ lwz 23,4*13(1)
+ addze 11,24
+ lwz 24,4*14(1)
+ addze 12,25
+ lwz 25,4*15(1)
+ addze 3,3
+ stw 9,4*1(29)
+ sub 4,30,8
+ stw 10,4*2(29)
+ stw 11,4*3(29)
+ stw 12,4*4(29)
+ stw 3,4*5(29)
+
+ lwz 18,4*1(15)
+ lwz 19,4*2(15)
+ lwz 20,4*3(15)
+ lwz 21,4*4(15)
+ addi 6,15,4*4
+ .long 0x7c058840
+ beq .Lmul4x_post
+
+ lwzu 27,4*4(5)
+ lwz 9,4*1(4)
+ lwz 10,4*2(4)
+ lwz 11,4*3(4)
+ lwzu 12,4*4(4)
+ li 3,0
+ addic 29,1,4*7
+ b .Loop_mul4x_reduction
+
+.align 5
+.Lmul4x_post:
+
+
+
+
+ srwi 31,8,4
+ mr 5,16
+ subi 31,31,1
+ mr 30,16
+ subfc 14,18,22
+ addi 29,1,4*15
+ subfe 15,19,23
+
+ mtctr 31
+.Lmul4x_sub:
+ lwz 18,4*1(6)
+ lwz 22,4*1(29)
+ subfe 16,20,24
+ lwz 19,4*2(6)
+ lwz 23,4*2(29)
+ subfe 17,21,25
+ lwz 20,4*3(6)
+ lwz 24,4*3(29)
+ lwzu 21,4*4(6)
+ lwzu 25,4*4(29)
+ stw 14,4*1(5)
+ stw 15,4*2(5)
+ subfe 14,18,22
+ stw 16,4*3(5)
+ stwu 17,4*4(5)
+ subfe 15,19,23
+ bdnz .Lmul4x_sub
+
+ lwz 9,4*1(30)
+ stw 14,4*1(5)
+ lwz 14,4*12(1)
+ subfe 16,20,24
+ lwz 10,4*2(30)
+ stw 15,4*2(5)
+ lwz 15,4*13(1)
+ subfe 17,21,25
+ subfe 3,0,3
+ addi 29,1,4*12
+ lwz 11,4*3(30)
+ stw 16,4*3(5)
+ lwz 16,4*14(1)
+ lwz 12,4*4(30)
+ stw 17,4*4(5)
+ lwz 17,4*15(1)
+
+ mtctr 31
+.Lmul4x_cond_copy:
+ and 14,14,3
+ andc 9,9,3
+ stw 0,4*0(29)
+ and 15,15,3
+ andc 10,10,3
+ stw 0,4*1(29)
+ and 16,16,3
+ andc 11,11,3
+ stw 0,4*2(29)
+ and 17,17,3
+ andc 12,12,3
+ stw 0,4*3(29)
+ or 22,14,9
+ lwz 9,4*5(30)
+ lwz 14,4*4(29)
+ or 23,15,10
+ lwz 10,4*6(30)
+ lwz 15,4*5(29)
+ or 24,16,11
+ lwz 11,4*7(30)
+ lwz 16,4*6(29)
+ or 25,17,12
+ lwz 12,4*8(30)
+ lwz 17,4*7(29)
+ addi 29,29,4*4
+ stw 22,4*1(30)
+ stw 23,4*2(30)
+ stw 24,4*3(30)
+ stwu 25,4*4(30)
+ bdnz .Lmul4x_cond_copy
+
+ lwz 5,0(1)
+ and 14,14,3
+ andc 9,9,3
+ stw 0,4*0(29)
+ and 15,15,3
+ andc 10,10,3
+ stw 0,4*1(29)
+ and 16,16,3
+ andc 11,11,3
+ stw 0,4*2(29)
+ and 17,17,3
+ andc 12,12,3
+ stw 0,4*3(29)
+ or 22,14,9
+ or 23,15,10
+ stw 0,4*4(29)
+ or 24,16,11
+ or 25,17,12
+ stw 22,4*1(30)
+ stw 23,4*2(30)
+ stw 24,4*3(30)
+ stw 25,4*4(30)
+
+ b .Lmul4x_done
+
+.align 4
+.Lmul4x4_post_condition:
+ lwz 4,4*6(1)
+ lwz 5,0(1)
+ addze 3,3
+
+ subfc 9,18,22
+ subfe 10,19,23
+ subfe 11,20,24
+ subfe 12,21,25
+ subfe 3,0,3
+
+ and 18,18,3
+ and 19,19,3
+ addc 9,9,18
+ and 20,20,3
+ adde 10,10,19
+ and 21,21,3
+ adde 11,11,20
+ adde 12,12,21
+
+ stw 9,4*1(4)
+ stw 10,4*2(4)
+ stw 11,4*3(4)
+ stw 12,4*4(4)
+
+.Lmul4x_done:
+ stw 0,4*8(1)
+ stw 0,4*9(1)
+ stw 0,4*10(1)
+ stw 0,4*11(1)
+ li 3,1
+ lwz 14,-4*18(5)
+ lwz 15,-4*17(5)
+ lwz 16,-4*16(5)
+ lwz 17,-4*15(5)
+ lwz 18,-4*14(5)
+ lwz 19,-4*13(5)
+ lwz 20,-4*12(5)
+ lwz 21,-4*11(5)
+ lwz 22,-4*10(5)
+ lwz 23,-4*9(5)
+ lwz 24,-4*8(5)
+ lwz 25,-4*7(5)
+ lwz 26,-4*6(5)
+ lwz 27,-4*5(5)
+ lwz 28,-4*4(5)
+ lwz 29,-4*3(5)
+ lwz 30,-4*2(5)
+ lwz 31,-4*1(5)
+ mr 1,5
+ blr
+.long 0
+.byte 0,12,4,0x20,0x80,18,6,0
+.long 0
+.size bn_mul4x_mont_int,.-bn_mul4x_mont_int
+.align 5
+__bn_sqr8x_mont:
+.Lsqr8x_do:
+ mr 9,1
+ slwi 10,8,3
+ li 11,-32*4
+ sub 10,11,10
+ slwi 8,8,2
+ stwux 1,1,10
+
+ stw 14,-4*18(9)
+ stw 15,-4*17(9)
+ stw 16,-4*16(9)
+ stw 17,-4*15(9)
+ stw 18,-4*14(9)
+ stw 19,-4*13(9)
+ stw 20,-4*12(9)
+ stw 21,-4*11(9)
+ stw 22,-4*10(9)
+ stw 23,-4*9(9)
+ stw 24,-4*8(9)
+ stw 25,-4*7(9)
+ stw 26,-4*6(9)
+ stw 27,-4*5(9)
+ stw 28,-4*4(9)
+ stw 29,-4*3(9)
+ stw 30,-4*2(9)
+ stw 31,-4*1(9)
+
+ subi 4,4,4
+ subi 18,6,4
+ subi 3,3,4
+ lwz 7,0(7)
+ li 0,0
+
+ add 6,4,8
+ lwz 9,4*1(4)
+
+ lwz 10,4*2(4)
+ li 23,0
+ lwz 11,4*3(4)
+ li 24,0
+ lwz 12,4*4(4)
+ li 25,0
+ lwz 14,4*5(4)
+ li 26,0
+ lwz 15,4*6(4)
+ li 27,0
+ lwz 16,4*7(4)
+ li 28,0
+ lwzu 17,4*8(4)
+ li 29,0
+
+ addi 5,1,4*11
+ subic. 30,8,4*8
+ b .Lsqr8x_zero_start
+
+.align 5
+.Lsqr8x_zero:
+ subic. 30,30,4*8
+ stw 0,4*1(5)
+ stw 0,4*2(5)
+ stw 0,4*3(5)
+ stw 0,4*4(5)
+ stw 0,4*5(5)
+ stw 0,4*6(5)
+ stw 0,4*7(5)
+ stw 0,4*8(5)
+.Lsqr8x_zero_start:
+ stw 0,4*9(5)
+ stw 0,4*10(5)
+ stw 0,4*11(5)
+ stw 0,4*12(5)
+ stw 0,4*13(5)
+ stw 0,4*14(5)
+ stw 0,4*15(5)
+ stwu 0,4*16(5)
+ bne .Lsqr8x_zero
+
+ stw 3,4*6(1)
+ stw 18,4*7(1)
+ stw 7,4*8(1)
+ stw 5,4*9(1)
+ stw 0,4*10(1)
+ addi 5,1,4*11
+
+
+.align 5
+.Lsqr8x_outer_loop:
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ mullw 18,10,9
+ mullw 19,11,9
+ mullw 20,12,9
+ mullw 21,14,9
+ addc 23,23,18
+ mullw 18,15,9
+ adde 24,24,19
+ mullw 19,16,9
+ adde 25,25,20
+ mullw 20,17,9
+ adde 26,26,21
+ mulhwu 21,10,9
+ adde 27,27,18
+ mulhwu 18,11,9
+ adde 28,28,19
+ mulhwu 19,12,9
+ adde 29,29,20
+ mulhwu 20,14,9
+ stw 22,4*1(5)
+ addze 22,0
+ stw 23,4*2(5)
+ addc 24,24,21
+ mulhwu 21,15,9
+ adde 25,25,18
+ mulhwu 18,16,9
+ adde 26,26,19
+ mulhwu 19,17,9
+ adde 27,27,20
+ mullw 20,11,10
+ adde 28,28,21
+ mullw 21,12,10
+ adde 29,29,18
+ mullw 18,14,10
+ adde 22,22,19
+
+ mullw 19,15,10
+ addc 25,25,20
+ mullw 20,16,10
+ adde 26,26,21
+ mullw 21,17,10
+ adde 27,27,18
+ mulhwu 18,11,10
+ adde 28,28,19
+ mulhwu 19,12,10
+ adde 29,29,20
+ mulhwu 20,14,10
+ adde 22,22,21
+ mulhwu 21,15,10
+ stw 24,4*3(5)
+ addze 23,0
+ stw 25,4*4(5)
+ addc 26,26,18
+ mulhwu 18,16,10
+ adde 27,27,19
+ mulhwu 19,17,10
+ adde 28,28,20
+ mullw 20,12,11
+ adde 29,29,21
+ mullw 21,14,11
+ adde 22,22,18
+ mullw 18,15,11
+ adde 23,23,19
+
+ mullw 19,16,11
+ addc 27,27,20
+ mullw 20,17,11
+ adde 28,28,21
+ mulhwu 21,12,11
+ adde 29,29,18
+ mulhwu 18,14,11
+ adde 22,22,19
+ mulhwu 19,15,11
+ adde 23,23,20
+ mulhwu 20,16,11
+ stw 26,4*5(5)
+ addze 24,0
+ stw 27,4*6(5)
+ addc 28,28,21
+ mulhwu 21,17,11
+ adde 29,29,18
+ mullw 18,14,12
+ adde 22,22,19
+ mullw 19,15,12
+ adde 23,23,20
+ mullw 20,16,12
+ adde 24,24,21
+
+ mullw 21,17,12
+ addc 29,29,18
+ mulhwu 18,14,12
+ adde 22,22,19
+ mulhwu 19,15,12
+ adde 23,23,20
+ mulhwu 20,16,12
+ adde 24,24,21
+ mulhwu 21,17,12
+ stw 28,4*7(5)
+ addze 25,0
+ stwu 29,4*8(5)
+ addc 22,22,18
+ mullw 18,15,14
+ adde 23,23,19
+ mullw 19,16,14
+ adde 24,24,20
+ mullw 20,17,14
+ adde 25,25,21
+
+ mulhwu 21,15,14
+ addc 23,23,18
+ mulhwu 18,16,14
+ adde 24,24,19
+ mulhwu 19,17,14
+ adde 25,25,20
+ mullw 20,16,15
+ addze 26,0
+ addc 24,24,21
+ mullw 21,17,15
+ adde 25,25,18
+ mulhwu 18,16,15
+ adde 26,26,19
+
+ mulhwu 19,17,15
+ addc 25,25,20
+ mullw 20,17,16
+ adde 26,26,21
+ mulhwu 21,17,16
+ addze 27,0
+ addc 26,26,18
+ .long 0x7c062040
+ adde 27,27,19
+
+ addc 27,27,20
+ sub 18,6,8
+ addze 28,0
+ add 28,28,21
+
+ beq .Lsqr8x_outer_break
+
+ mr 7,9
+ lwz 9,4*1(5)
+ lwz 10,4*2(5)
+ lwz 11,4*3(5)
+ lwz 12,4*4(5)
+ lwz 14,4*5(5)
+ lwz 15,4*6(5)
+ lwz 16,4*7(5)
+ lwz 17,4*8(5)
+ addc 22,22,9
+ lwz 9,4*1(4)
+ adde 23,23,10
+ lwz 10,4*2(4)
+ adde 24,24,11
+ lwz 11,4*3(4)
+ adde 25,25,12
+ lwz 12,4*4(4)
+ adde 26,26,14
+ lwz 14,4*5(4)
+ adde 27,27,15
+ lwz 15,4*6(4)
+ adde 28,28,16
+ lwz 16,4*7(4)
+ subi 3,4,4*7
+ addze 29,17
+ lwzu 17,4*8(4)
+
+ li 30,0
+ b .Lsqr8x_mul
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.align 5
+.Lsqr8x_mul:
+ mullw 18,9,7
+ addze 31,0
+ mullw 19,10,7
+ addi 30,30,4
+ mullw 20,11,7
+ andi. 30,30,4*8-1
+ mullw 21,12,7
+ addc 22,22,18
+ mullw 18,14,7
+ adde 23,23,19
+ mullw 19,15,7
+ adde 24,24,20
+ mullw 20,16,7
+ adde 25,25,21
+ mullw 21,17,7
+ adde 26,26,18
+ mulhwu 18,9,7
+ adde 27,27,19
+ mulhwu 19,10,7
+ adde 28,28,20
+ mulhwu 20,11,7
+ adde 29,29,21
+ mulhwu 21,12,7
+ addze 31,31
+ stwu 22,4(5)
+ addc 22,23,18
+ mulhwu 18,14,7
+ adde 23,24,19
+ mulhwu 19,15,7
+ adde 24,25,20
+ mulhwu 20,16,7
+ adde 25,26,21
+ mulhwu 21,17,7
+ lwzx 7,3,30
+ adde 26,27,18
+ adde 27,28,19
+ adde 28,29,20
+ adde 29,31,21
+
+ bne .Lsqr8x_mul
+
+
+ .long 0x7c043040
+ beq .Lsqr8x_break
+
+ lwz 9,4*1(5)
+ lwz 10,4*2(5)
+ lwz 11,4*3(5)
+ lwz 12,4*4(5)
+ lwz 14,4*5(5)
+ lwz 15,4*6(5)
+ lwz 16,4*7(5)
+ lwz 17,4*8(5)
+ addc 22,22,9
+ lwz 9,4*1(4)
+ adde 23,23,10
+ lwz 10,4*2(4)
+ adde 24,24,11
+ lwz 11,4*3(4)
+ adde 25,25,12
+ lwz 12,4*4(4)
+ adde 26,26,14
+ lwz 14,4*5(4)
+ adde 27,27,15
+ lwz 15,4*6(4)
+ adde 28,28,16
+ lwz 16,4*7(4)
+ adde 29,29,17
+ lwzu 17,4*8(4)
+
+ b .Lsqr8x_mul
+
+.align 5
+.Lsqr8x_break:
+ lwz 9,4*8(3)
+ addi 4,3,4*15
+ lwz 10,4*9(3)
+ sub. 18,6,4
+ lwz 11,4*10(3)
+ sub 19,5,18
+ lwz 12,4*11(3)
+ lwz 14,4*12(3)
+ lwz 15,4*13(3)
+ lwz 16,4*14(3)
+ lwz 17,4*15(3)
+ beq .Lsqr8x_outer_loop
+
+ stw 22,4*1(5)
+ lwz 22,4*1(19)
+ stw 23,4*2(5)
+ lwz 23,4*2(19)
+ stw 24,4*3(5)
+ lwz 24,4*3(19)
+ stw 25,4*4(5)
+ lwz 25,4*4(19)
+ stw 26,4*5(5)
+ lwz 26,4*5(19)
+ stw 27,4*6(5)
+ lwz 27,4*6(19)
+ stw 28,4*7(5)
+ lwz 28,4*7(19)
+ stw 29,4*8(5)
+ lwz 29,4*8(19)
+ mr 5,19
+ b .Lsqr8x_outer_loop
+
+.align 5
+.Lsqr8x_outer_break:
+
+
+ lwz 10,4*1(18)
+ lwz 12,4*2(18)
+ lwz 15,4*3(18)
+ lwz 17,4*4(18)
+ addi 4,18,4*4
+
+ lwz 19,4*13(1)
+ lwz 20,4*14(1)
+ lwz 21,4*15(1)
+ lwz 18,4*16(1)
+
+ stw 22,4*1(5)
+ srwi 30,8,4
+ stw 23,4*2(5)
+ subi 30,30,1
+ stw 24,4*3(5)
+ stw 25,4*4(5)
+ stw 26,4*5(5)
+ stw 27,4*6(5)
+ stw 28,4*7(5)
+
+ addi 5,1,4*11
+ mullw 22,10,10
+ mulhwu 10,10,10
+ add 23,19,19
+ srwi 19,19,32-1
+ mullw 11,12,12
+ mulhwu 12,12,12
+ addc 23,23,10
+ add 24,20,20
+ srwi 20,20,32-1
+ add 25,21,21
+ srwi 21,21,32-1
+ or 24,24,19
+
+ mtctr 30
+.Lsqr4x_shift_n_add:
+ mullw 14,15,15
+ mulhwu 15,15,15
+ lwz 19,4*6(5)
+ lwz 10,4*1(4)
+ adde 24,24,11
+ add 26,18,18
+ srwi 18,18,32-1
+ or 25,25,20
+ lwz 20,4*7(5)
+ adde 25,25,12
+ lwz 12,4*2(4)
+ add 27,19,19
+ srwi 19,19,32-1
+ or 26,26,21
+ lwz 21,4*8(5)
+ mullw 16,17,17
+ mulhwu 17,17,17
+ adde 26,26,14
+ add 28,20,20
+ srwi 20,20,32-1
+ or 27,27,18
+ lwz 18,4*9(5)
+ adde 27,27,15
+ lwz 15,4*3(4)
+ add 29,21,21
+ srwi 21,21,32-1
+ or 28,28,19
+ lwz 19,4*10(5)
+ mullw 9,10,10
+ mulhwu 10,10,10
+ adde 28,28,16
+ stw 22,4*1(5)
+ add 22,18,18
+ srwi 18,18,32-1
+ or 29,29,20
+ lwz 20,4*11(5)
+ adde 29,29,17
+ lwzu 17,4*4(4)
+ stw 23,4*2(5)
+ add 23,19,19
+ srwi 19,19,32-1
+ or 22,22,21
+ lwz 21,4*12(5)
+ mullw 11,12,12
+ mulhwu 12,12,12
+ adde 22,22,9
+ stw 24,4*3(5)
+ add 24,20,20
+ srwi 20,20,32-1
+ or 23,23,18
+ lwz 18,4*13(5)
+ adde 23,23,10
+ stw 25,4*4(5)
+ stw 26,4*5(5)
+ stw 27,4*6(5)
+ stw 28,4*7(5)
+ stwu 29,4*8(5)
+ add 25,21,21
+ srwi 21,21,32-1
+ or 24,24,19
+ bdnz .Lsqr4x_shift_n_add
+ lwz 4,4*7(1)
+ lwz 7,4*8(1)
+
+ mullw 14,15,15
+ mulhwu 15,15,15
+ stw 22,4*1(5)
+ lwz 22,4*12(1)
+ lwz 19,4*6(5)
+ adde 24,24,11
+ add 26,18,18
+ srwi 18,18,32-1
+ or 25,25,20
+ lwz 20,4*7(5)
+ adde 25,25,12
+ add 27,19,19
+ srwi 19,19,32-1
+ or 26,26,21
+ mullw 16,17,17
+ mulhwu 17,17,17
+ adde 26,26,14
+ add 28,20,20
+ srwi 20,20,32-1
+ or 27,27,18
+ stw 23,4*2(5)
+ lwz 23,4*13(1)
+ adde 27,27,15
+ or 28,28,19
+ lwz 9,4*1(4)
+ lwz 10,4*2(4)
+ adde 28,28,16
+ lwz 11,4*3(4)
+ lwz 12,4*4(4)
+ adde 29,17,20
+ lwz 14,4*5(4)
+ lwz 15,4*6(4)
+
+
+
+ mullw 31,7,22
+ li 30,8
+ lwz 16,4*7(4)
+ add 6,4,8
+ lwzu 17,4*8(4)
+ stw 24,4*3(5)
+ lwz 24,4*14(1)
+ stw 25,4*4(5)
+ lwz 25,4*15(1)
+ stw 26,4*5(5)
+ lwz 26,4*16(1)
+ stw 27,4*6(5)
+ lwz 27,4*17(1)
+ stw 28,4*7(5)
+ lwz 28,4*18(1)
+ stw 29,4*8(5)
+ lwz 29,4*19(1)
+ addi 5,1,4*11
+ mtctr 30
+ b .Lsqr8x_reduction
+
+.align 5
+.Lsqr8x_reduction:
+
+ mullw 19,10,31
+ mullw 20,11,31
+ stwu 31,4(5)
+ mullw 21,12,31
+
+ addic 22,22,-1
+ mullw 18,14,31
+ adde 22,23,19
+ mullw 19,15,31
+ adde 23,24,20
+ mullw 20,16,31
+ adde 24,25,21
+ mullw 21,17,31
+ adde 25,26,18
+ mulhwu 18,9,31
+ adde 26,27,19
+ mulhwu 19,10,31
+ adde 27,28,20
+ mulhwu 20,11,31
+ adde 28,29,21
+ mulhwu 21,12,31
+ addze 29,0
+ addc 22,22,18
+ mulhwu 18,14,31
+ adde 23,23,19
+ mulhwu 19,15,31
+ adde 24,24,20
+ mulhwu 20,16,31
+ adde 25,25,21
+ mulhwu 21,17,31
+ mullw 31,7,22
+ adde 26,26,18
+ adde 27,27,19
+ adde 28,28,20
+ adde 29,29,21
+ bdnz .Lsqr8x_reduction
+
+ lwz 18,4*1(5)
+ lwz 19,4*2(5)
+ lwz 20,4*3(5)
+ lwz 21,4*4(5)
+ subi 3,5,4*7
+ .long 0x7c062040
+ addc 22,22,18
+ lwz 18,4*5(5)
+ adde 23,23,19
+ lwz 19,4*6(5)
+ adde 24,24,20
+ lwz 20,4*7(5)
+ adde 25,25,21
+ lwz 21,4*8(5)
+ adde 26,26,18
+ adde 27,27,19
+ adde 28,28,20
+ adde 29,29,21
+
+ beq .Lsqr8x8_post_condition
+
+ lwz 7,4*0(3)
+ lwz 9,4*1(4)
+ lwz 10,4*2(4)
+ lwz 11,4*3(4)
+ lwz 12,4*4(4)
+ lwz 14,4*5(4)
+ lwz 15,4*6(4)
+ lwz 16,4*7(4)
+ lwzu 17,4*8(4)
+ li 30,0
+
+.align 5
+.Lsqr8x_tail:
+ mullw 18,9,7
+ addze 31,0
+ mullw 19,10,7
+ addi 30,30,4
+ mullw 20,11,7
+ andi. 30,30,4*8-1
+ mullw 21,12,7
+ addc 22,22,18
+ mullw 18,14,7
+ adde 23,23,19
+ mullw 19,15,7
+ adde 24,24,20
+ mullw 20,16,7
+ adde 25,25,21
+ mullw 21,17,7
+ adde 26,26,18
+ mulhwu 18,9,7
+ adde 27,27,19
+ mulhwu 19,10,7
+ adde 28,28,20
+ mulhwu 20,11,7
+ adde 29,29,21
+ mulhwu 21,12,7
+ addze 31,31
+ stwu 22,4(5)
+ addc 22,23,18
+ mulhwu 18,14,7
+ adde 23,24,19
+ mulhwu 19,15,7
+ adde 24,25,20
+ mulhwu 20,16,7
+ adde 25,26,21
+ mulhwu 21,17,7
+ lwzx 7,3,30
+ adde 26,27,18
+ adde 27,28,19
+ adde 28,29,20
+ adde 29,31,21
+
+ bne .Lsqr8x_tail
+
+
+ lwz 9,4*1(5)
+ lwz 31,4*10(1)
+ .long 0x7c062040
+ lwz 10,4*2(5)
+ sub 20,6,8
+ lwz 11,4*3(5)
+ lwz 12,4*4(5)
+ lwz 14,4*5(5)
+ lwz 15,4*6(5)
+ lwz 16,4*7(5)
+ lwz 17,4*8(5)
+ beq .Lsqr8x_tail_break
+
+ addc 22,22,9
+ lwz 9,4*1(4)
+ adde 23,23,10
+ lwz 10,4*2(4)
+ adde 24,24,11
+ lwz 11,4*3(4)
+ adde 25,25,12
+ lwz 12,4*4(4)
+ adde 26,26,14
+ lwz 14,4*5(4)
+ adde 27,27,15
+ lwz 15,4*6(4)
+ adde 28,28,16
+ lwz 16,4*7(4)
+ adde 29,29,17
+ lwzu 17,4*8(4)
+
+ b .Lsqr8x_tail
+
+.align 5
+.Lsqr8x_tail_break:
+ lwz 7,4*8(1)
+ lwz 21,4*9(1)
+ addi 30,5,4*8
+
+ addic 31,31,-1
+ adde 18,22,9
+ lwz 22,4*8(3)
+ lwz 9,4*1(20)
+ adde 19,23,10
+ lwz 23,4*9(3)
+ lwz 10,4*2(20)
+ adde 24,24,11
+ lwz 11,4*3(20)
+ adde 25,25,12
+ lwz 12,4*4(20)
+ adde 26,26,14
+ lwz 14,4*5(20)
+ adde 27,27,15
+ lwz 15,4*6(20)
+ adde 28,28,16
+ lwz 16,4*7(20)
+ adde 29,29,17
+ lwz 17,4*8(20)
+ addi 4,20,4*8
+ addze 20,0
+ mullw 31,7,22
+ stw 18,4*1(5)
+ .long 0x7c1ea840
+ stw 19,4*2(5)
+ li 30,8
+ stw 24,4*3(5)
+ lwz 24,4*10(3)
+ stw 25,4*4(5)
+ lwz 25,4*11(3)
+ stw 26,4*5(5)
+ lwz 26,4*12(3)
+ stw 27,4*6(5)
+ lwz 27,4*13(3)
+ stw 28,4*7(5)
+ lwz 28,4*14(3)
+ stw 29,4*8(5)
+ lwz 29,4*15(3)
+ stw 20,4*10(1)
+ addi 5,3,4*7
+ mtctr 30
+ bne .Lsqr8x_reduction
+
+
+
+
+
+
+ lwz 3,4*6(1)
+ srwi 30,8,5
+ mr 7,5
+ addi 5,5,4*8
+ subi 30,30,1
+ subfc 18,9,22
+ subfe 19,10,23
+ mr 31,20
+ mr 6,3
+
+ mtctr 30
+ b .Lsqr8x_sub
+
+.align 5
+.Lsqr8x_sub:
+ lwz 9,4*1(4)
+ lwz 22,4*1(5)
+ lwz 10,4*2(4)
+ lwz 23,4*2(5)
+ subfe 20,11,24
+ lwz 11,4*3(4)
+ lwz 24,4*3(5)
+ subfe 21,12,25
+ lwz 12,4*4(4)
+ lwz 25,4*4(5)
+ stw 18,4*1(3)
+ subfe 18,14,26
+ lwz 14,4*5(4)
+ lwz 26,4*5(5)
+ stw 19,4*2(3)
+ subfe 19,15,27
+ lwz 15,4*6(4)
+ lwz 27,4*6(5)
+ stw 20,4*3(3)
+ subfe 20,16,28
+ lwz 16,4*7(4)
+ lwz 28,4*7(5)
+ stw 21,4*4(3)
+ subfe 21,17,29
+ lwzu 17,4*8(4)
+ lwzu 29,4*8(5)
+ stw 18,4*5(3)
+ subfe 18,9,22
+ stw 19,4*6(3)
+ subfe 19,10,23
+ stw 20,4*7(3)
+ stwu 21,4*8(3)
+ bdnz .Lsqr8x_sub
+
+ srwi 30,8,4
+ lwz 9,4*1(6)
+ lwz 22,4*1(7)
+ subi 30,30,1
+ lwz 10,4*2(6)
+ lwz 23,4*2(7)
+ subfe 20,11,24
+ lwz 11,4*3(6)
+ lwz 24,4*3(7)
+ subfe 21,12,25
+ lwz 12,4*4(6)
+ lwzu 25,4*4(7)
+ stw 18,4*1(3)
+ subfe 18,14,26
+ stw 19,4*2(3)
+ subfe 19,15,27
+ stw 20,4*3(3)
+ subfe 20,16,28
+ stw 21,4*4(3)
+ subfe 21,17,29
+ stw 18,4*5(3)
+ subfe 31,0,31
+ stw 19,4*6(3)
+ stw 20,4*7(3)
+ stw 21,4*8(3)
+
+ addi 5,1,4*11
+ mtctr 30
+
+.Lsqr4x_cond_copy:
+ andc 9,9,31
+ stw 0,-4*3(7)
+ and 22,22,31
+ stw 0,-4*2(7)
+ andc 10,10,31
+ stw 0,-4*1(7)
+ and 23,23,31
+ stw 0,-4*0(7)
+ andc 11,11,31
+ stw 0,4*1(5)
+ and 24,24,31
+ stw 0,4*2(5)
+ andc 12,12,31
+ stw 0,4*3(5)
+ and 25,25,31
+ stwu 0,4*4(5)
+ or 18,9,22
+ lwz 9,4*5(6)
+ lwz 22,4*1(7)
+ or 19,10,23
+ lwz 10,4*6(6)
+ lwz 23,4*2(7)
+ or 20,11,24
+ lwz 11,4*7(6)
+ lwz 24,4*3(7)
+ or 21,12,25
+ lwz 12,4*8(6)
+ lwzu 25,4*4(7)
+ stw 18,4*1(6)
+ stw 19,4*2(6)
+ stw 20,4*3(6)
+ stwu 21,4*4(6)
+ bdnz .Lsqr4x_cond_copy
+
+ lwz 4,0(1)
+ andc 9,9,31
+ and 22,22,31
+ andc 10,10,31
+ and 23,23,31
+ andc 11,11,31
+ and 24,24,31
+ andc 12,12,31
+ and 25,25,31
+ or 18,9,22
+ or 19,10,23
+ or 20,11,24
+ or 21,12,25
+ stw 18,4*1(6)
+ stw 19,4*2(6)
+ stw 20,4*3(6)
+ stw 21,4*4(6)
+
+ b .Lsqr8x_done
+
+.align 5
+.Lsqr8x8_post_condition:
+ lwz 3,4*6(1)
+ lwz 4,0(1)
+ addze 31,0
+
+
+ subfc 22,9,22
+ subfe 23,10,23
+ stw 0,4*12(1)
+ stw 0,4*13(1)
+ subfe 24,11,24
+ stw 0,4*14(1)
+ stw 0,4*15(1)
+ subfe 25,12,25
+ stw 0,4*16(1)
+ stw 0,4*17(1)
+ subfe 26,14,26
+ stw 0,4*18(1)
+ stw 0,4*19(1)
+ subfe 27,15,27
+ stw 0,4*20(1)
+ stw 0,4*21(1)
+ subfe 28,16,28
+ stw 0,4*22(1)
+ stw 0,4*23(1)
+ subfe 29,17,29
+ stw 0,4*24(1)
+ stw 0,4*25(1)
+ subfe 31,0,31
+ stw 0,4*26(1)
+ stw 0,4*27(1)
+
+ and 9,9,31
+ and 10,10,31
+ addc 22,22,9
+ and 11,11,31
+ adde 23,23,10
+ and 12,12,31
+ adde 24,24,11
+ and 14,14,31
+ adde 25,25,12
+ and 15,15,31
+ adde 26,26,14
+ and 16,16,31
+ adde 27,27,15
+ and 17,17,31
+ adde 28,28,16
+ adde 29,29,17
+ stw 22,4*1(3)
+ stw 23,4*2(3)
+ stw 24,4*3(3)
+ stw 25,4*4(3)
+ stw 26,4*5(3)
+ stw 27,4*6(3)
+ stw 28,4*7(3)
+ stw 29,4*8(3)
+
+.Lsqr8x_done:
+ stw 0,4*8(1)
+ stw 0,4*10(1)
+
+ lwz 14,-4*18(4)
+ li 3,1
+ lwz 15,-4*17(4)
+ lwz 16,-4*16(4)
+ lwz 17,-4*15(4)
+ lwz 18,-4*14(4)
+ lwz 19,-4*13(4)
+ lwz 20,-4*12(4)
+ lwz 21,-4*11(4)
+ lwz 22,-4*10(4)
+ lwz 23,-4*9(4)
+ lwz 24,-4*8(4)
+ lwz 25,-4*7(4)
+ lwz 26,-4*6(4)
+ lwz 27,-4*5(4)
+ lwz 28,-4*4(4)
+ lwz 29,-4*3(4)
+ lwz 30,-4*2(4)
+ lwz 31,-4*1(4)
+ mr 1,4
+ blr
+.long 0
+.byte 0,12,4,0x20,0x80,18,6,0
+.long 0
+.size __bn_sqr8x_mont,.-__bn_sqr8x_mont
+.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,80,80,67,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 2