diff options
Diffstat (limited to 'deps/openssl/config/archs/linux64-mips64/asm_avx2/crypto/bn')
-rw-r--r-- | deps/openssl/config/archs/linux64-mips64/asm_avx2/crypto/bn/bn-mips.S | 2180 | ||||
-rw-r--r-- | deps/openssl/config/archs/linux64-mips64/asm_avx2/crypto/bn/mips-mont.S | 282 |
2 files changed, 2462 insertions, 0 deletions
diff --git a/deps/openssl/config/archs/linux64-mips64/asm_avx2/crypto/bn/bn-mips.S b/deps/openssl/config/archs/linux64-mips64/asm_avx2/crypto/bn/bn-mips.S new file mode 100644 index 0000000000..bd5cf8340f --- /dev/null +++ b/deps/openssl/config/archs/linux64-mips64/asm_avx2/crypto/bn/bn-mips.S @@ -0,0 +1,2180 @@ +#include "mips_arch.h" + +#if defined(_MIPS_ARCH_MIPS64R6) +# define ddivu(rs,rt) +# define mfqt(rd,rs,rt) ddivu rd,rs,rt +# define mfrm(rd,rs,rt) dmodu rd,rs,rt +#elif defined(_MIPS_ARCH_MIPS32R6) +# define divu(rs,rt) +# define mfqt(rd,rs,rt) divu rd,rs,rt +# define mfrm(rd,rs,rt) modu rd,rs,rt +#else +# define ddivu(rs,rt) ddivu $0,rs,rt +# define mfqt(rd,rs,rt) mflo rd +# define mfrm(rd,rs,rt) mfhi rd +#endif + +.rdata +.asciiz "mips3.s, Version 1.2" +.asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" + +.text +.set noat + +.align 5 +.globl bn_mul_add_words +.ent bn_mul_add_words +bn_mul_add_words: + .set noreorder + bgtz $6,bn_mul_add_words_internal + move $2,$0 + jr $31 + move $4,$2 +.end bn_mul_add_words + +.align 5 +.ent bn_mul_add_words_internal +bn_mul_add_words_internal: + .set reorder + li $3,-4 + and $8,$6,$3 + beqz $8,.L_bn_mul_add_words_tail + +.L_bn_mul_add_words_loop: + ld $12,0($5) + dmultu ($12,$7) + ld $13,0($4) + ld $14,8($5) + ld $15,8($4) + ld $8,2*8($5) + ld $9,2*8($4) + daddu $13,$2 + sltu $2,$13,$2 # All manuals say it "compares 32-bit + # values", but it seems to work fine + # even on 64-bit registers. + mflo ($1,$12,$7) + mfhi ($12,$12,$7) + daddu $13,$1 + daddu $2,$12 + dmultu ($14,$7) + sltu $1,$13,$1 + sd $13,0($4) + daddu $2,$1 + + ld $10,3*8($5) + ld $11,3*8($4) + daddu $15,$2 + sltu $2,$15,$2 + mflo ($1,$14,$7) + mfhi ($14,$14,$7) + daddu $15,$1 + daddu $2,$14 + dmultu ($8,$7) + sltu $1,$15,$1 + sd $15,8($4) + daddu $2,$1 + + subu $6,4 + daddu $4,4*8 + daddu $5,4*8 + daddu $9,$2 + sltu $2,$9,$2 + mflo ($1,$8,$7) + mfhi ($8,$8,$7) + daddu $9,$1 + daddu $2,$8 + dmultu ($10,$7) + sltu $1,$9,$1 + sd $9,-2*8($4) + daddu $2,$1 + + + and $8,$6,$3 + daddu $11,$2 + sltu $2,$11,$2 + mflo ($1,$10,$7) + mfhi ($10,$10,$7) + daddu $11,$1 + daddu $2,$10 + sltu $1,$11,$1 + sd $11,-8($4) + .set noreorder + bgtz $8,.L_bn_mul_add_words_loop + daddu $2,$1 + + beqz $6,.L_bn_mul_add_words_return + nop + +.L_bn_mul_add_words_tail: + .set reorder + ld $12,0($5) + dmultu ($12,$7) + ld $13,0($4) + subu $6,1 + daddu $13,$2 + sltu $2,$13,$2 + mflo ($1,$12,$7) + mfhi ($12,$12,$7) + daddu $13,$1 + daddu $2,$12 + sltu $1,$13,$1 + sd $13,0($4) + daddu $2,$1 + beqz $6,.L_bn_mul_add_words_return + + ld $12,8($5) + dmultu ($12,$7) + ld $13,8($4) + subu $6,1 + daddu $13,$2 + sltu $2,$13,$2 + mflo ($1,$12,$7) + mfhi ($12,$12,$7) + daddu $13,$1 + daddu $2,$12 + sltu $1,$13,$1 + sd $13,8($4) + daddu $2,$1 + beqz $6,.L_bn_mul_add_words_return + + ld $12,2*8($5) + dmultu ($12,$7) + ld $13,2*8($4) + daddu $13,$2 + sltu $2,$13,$2 + mflo ($1,$12,$7) + mfhi ($12,$12,$7) + daddu $13,$1 + daddu $2,$12 + sltu $1,$13,$1 + sd $13,2*8($4) + daddu $2,$1 + +.L_bn_mul_add_words_return: + .set noreorder + jr $31 + move $4,$2 +.end bn_mul_add_words_internal + +.align 5 +.globl bn_mul_words +.ent bn_mul_words +bn_mul_words: + .set noreorder + bgtz $6,bn_mul_words_internal + move $2,$0 + jr $31 + move $4,$2 +.end bn_mul_words + +.align 5 +.ent bn_mul_words_internal +bn_mul_words_internal: + .set reorder + li $3,-4 + and $8,$6,$3 + beqz $8,.L_bn_mul_words_tail + +.L_bn_mul_words_loop: + ld $12,0($5) + dmultu ($12,$7) + ld $14,8($5) + ld $8,2*8($5) + ld $10,3*8($5) + mflo ($1,$12,$7) + mfhi ($12,$12,$7) + daddu $2,$1 + sltu $13,$2,$1 + dmultu ($14,$7) + sd $2,0($4) + daddu $2,$13,$12 + + subu $6,4 + daddu $4,4*8 + daddu $5,4*8 + mflo ($1,$14,$7) + mfhi ($14,$14,$7) + daddu $2,$1 + sltu $15,$2,$1 + dmultu ($8,$7) + sd $2,-3*8($4) + daddu $2,$15,$14 + + mflo ($1,$8,$7) + mfhi ($8,$8,$7) + daddu $2,$1 + sltu $9,$2,$1 + dmultu ($10,$7) + sd $2,-2*8($4) + daddu $2,$9,$8 + + and $8,$6,$3 + mflo ($1,$10,$7) + mfhi ($10,$10,$7) + daddu $2,$1 + sltu $11,$2,$1 + sd $2,-8($4) + .set noreorder + bgtz $8,.L_bn_mul_words_loop + daddu $2,$11,$10 + + beqz $6,.L_bn_mul_words_return + nop + +.L_bn_mul_words_tail: + .set reorder + ld $12,0($5) + dmultu ($12,$7) + subu $6,1 + mflo ($1,$12,$7) + mfhi ($12,$12,$7) + daddu $2,$1 + sltu $13,$2,$1 + sd $2,0($4) + daddu $2,$13,$12 + beqz $6,.L_bn_mul_words_return + + ld $12,8($5) + dmultu ($12,$7) + subu $6,1 + mflo ($1,$12,$7) + mfhi ($12,$12,$7) + daddu $2,$1 + sltu $13,$2,$1 + sd $2,8($4) + daddu $2,$13,$12 + beqz $6,.L_bn_mul_words_return + + ld $12,2*8($5) + dmultu ($12,$7) + mflo ($1,$12,$7) + mfhi ($12,$12,$7) + daddu $2,$1 + sltu $13,$2,$1 + sd $2,2*8($4) + daddu $2,$13,$12 + +.L_bn_mul_words_return: + .set noreorder + jr $31 + move $4,$2 +.end bn_mul_words_internal + +.align 5 +.globl bn_sqr_words +.ent bn_sqr_words +bn_sqr_words: + .set noreorder + bgtz $6,bn_sqr_words_internal + move $2,$0 + jr $31 + move $4,$2 +.end bn_sqr_words + +.align 5 +.ent bn_sqr_words_internal +bn_sqr_words_internal: + .set reorder + li $3,-4 + and $8,$6,$3 + beqz $8,.L_bn_sqr_words_tail + +.L_bn_sqr_words_loop: + ld $12,0($5) + dmultu ($12,$12) + ld $14,8($5) + ld $8,2*8($5) + ld $10,3*8($5) + mflo ($13,$12,$12) + mfhi ($12,$12,$12) + sd $13,0($4) + sd $12,8($4) + + dmultu ($14,$14) + subu $6,4 + daddu $4,8*8 + daddu $5,4*8 + mflo ($15,$14,$14) + mfhi ($14,$14,$14) + sd $15,-6*8($4) + sd $14,-5*8($4) + + dmultu ($8,$8) + mflo ($9,$8,$8) + mfhi ($8,$8,$8) + sd $9,-4*8($4) + sd $8,-3*8($4) + + + dmultu ($10,$10) + and $8,$6,$3 + mflo ($11,$10,$10) + mfhi ($10,$10,$10) + sd $11,-2*8($4) + + .set noreorder + bgtz $8,.L_bn_sqr_words_loop + sd $10,-8($4) + + beqz $6,.L_bn_sqr_words_return + nop + +.L_bn_sqr_words_tail: + .set reorder + ld $12,0($5) + dmultu ($12,$12) + subu $6,1 + mflo ($13,$12,$12) + mfhi ($12,$12,$12) + sd $13,0($4) + sd $12,8($4) + beqz $6,.L_bn_sqr_words_return + + ld $12,8($5) + dmultu ($12,$12) + subu $6,1 + mflo ($13,$12,$12) + mfhi ($12,$12,$12) + sd $13,2*8($4) + sd $12,3*8($4) + beqz $6,.L_bn_sqr_words_return + + ld $12,2*8($5) + dmultu ($12,$12) + mflo ($13,$12,$12) + mfhi ($12,$12,$12) + sd $13,4*8($4) + sd $12,5*8($4) + +.L_bn_sqr_words_return: + .set noreorder + jr $31 + move $4,$2 + +.end bn_sqr_words_internal + +.align 5 +.globl bn_add_words +.ent bn_add_words +bn_add_words: + .set noreorder + bgtz $7,bn_add_words_internal + move $2,$0 + jr $31 + move $4,$2 +.end bn_add_words + +.align 5 +.ent bn_add_words_internal +bn_add_words_internal: + .set reorder + li $3,-4 + and $1,$7,$3 + beqz $1,.L_bn_add_words_tail + +.L_bn_add_words_loop: + ld $12,0($5) + ld $8,0($6) + subu $7,4 + ld $13,8($5) + and $1,$7,$3 + ld $14,2*8($5) + daddu $6,4*8 + ld $15,3*8($5) + daddu $4,4*8 + ld $9,-3*8($6) + daddu $5,4*8 + ld $10,-2*8($6) + ld $11,-8($6) + daddu $8,$12 + sltu $24,$8,$12 + daddu $12,$8,$2 + sltu $2,$12,$8 + sd $12,-4*8($4) + daddu $2,$24 + + daddu $9,$13 + sltu $25,$9,$13 + daddu $13,$9,$2 + sltu $2,$13,$9 + sd $13,-3*8($4) + daddu $2,$25 + + daddu $10,$14 + sltu $24,$10,$14 + daddu $14,$10,$2 + sltu $2,$14,$10 + sd $14,-2*8($4) + daddu $2,$24 + + daddu $11,$15 + sltu $25,$11,$15 + daddu $15,$11,$2 + sltu $2,$15,$11 + sd $15,-8($4) + + .set noreorder + bgtz $1,.L_bn_add_words_loop + daddu $2,$25 + + beqz $7,.L_bn_add_words_return + nop + +.L_bn_add_words_tail: + .set reorder + ld $12,0($5) + ld $8,0($6) + daddu $8,$12 + subu $7,1 + sltu $24,$8,$12 + daddu $12,$8,$2 + sltu $2,$12,$8 + sd $12,0($4) + daddu $2,$24 + beqz $7,.L_bn_add_words_return + + ld $13,8($5) + ld $9,8($6) + daddu $9,$13 + subu $7,1 + sltu $25,$9,$13 + daddu $13,$9,$2 + sltu $2,$13,$9 + sd $13,8($4) + daddu $2,$25 + beqz $7,.L_bn_add_words_return + + ld $14,2*8($5) + ld $10,2*8($6) + daddu $10,$14 + sltu $24,$10,$14 + daddu $14,$10,$2 + sltu $2,$14,$10 + sd $14,2*8($4) + daddu $2,$24 + +.L_bn_add_words_return: + .set noreorder + jr $31 + move $4,$2 + +.end bn_add_words_internal + +.align 5 +.globl bn_sub_words +.ent bn_sub_words +bn_sub_words: + .set noreorder + bgtz $7,bn_sub_words_internal + move $2,$0 + jr $31 + move $4,$0 +.end bn_sub_words + +.align 5 +.ent bn_sub_words_internal +bn_sub_words_internal: + .set reorder + li $3,-4 + and $1,$7,$3 + beqz $1,.L_bn_sub_words_tail + +.L_bn_sub_words_loop: + ld $12,0($5) + ld $8,0($6) + subu $7,4 + ld $13,8($5) + and $1,$7,$3 + ld $14,2*8($5) + daddu $6,4*8 + ld $15,3*8($5) + daddu $4,4*8 + ld $9,-3*8($6) + daddu $5,4*8 + ld $10,-2*8($6) + ld $11,-8($6) + sltu $24,$12,$8 + dsubu $8,$12,$8 + dsubu $12,$8,$2 + sgtu $2,$12,$8 + sd $12,-4*8($4) + daddu $2,$24 + + sltu $25,$13,$9 + dsubu $9,$13,$9 + dsubu $13,$9,$2 + sgtu $2,$13,$9 + sd $13,-3*8($4) + daddu $2,$25 + + + sltu $24,$14,$10 + dsubu $10,$14,$10 + dsubu $14,$10,$2 + sgtu $2,$14,$10 + sd $14,-2*8($4) + daddu $2,$24 + + sltu $25,$15,$11 + dsubu $11,$15,$11 + dsubu $15,$11,$2 + sgtu $2,$15,$11 + sd $15,-8($4) + + .set noreorder + bgtz $1,.L_bn_sub_words_loop + daddu $2,$25 + + beqz $7,.L_bn_sub_words_return + nop + +.L_bn_sub_words_tail: + .set reorder + ld $12,0($5) + ld $8,0($6) + subu $7,1 + sltu $24,$12,$8 + dsubu $8,$12,$8 + dsubu $12,$8,$2 + sgtu $2,$12,$8 + sd $12,0($4) + daddu $2,$24 + beqz $7,.L_bn_sub_words_return + + ld $13,8($5) + subu $7,1 + ld $9,8($6) + sltu $25,$13,$9 + dsubu $9,$13,$9 + dsubu $13,$9,$2 + sgtu $2,$13,$9 + sd $13,8($4) + daddu $2,$25 + beqz $7,.L_bn_sub_words_return + + ld $14,2*8($5) + ld $10,2*8($6) + sltu $24,$14,$10 + dsubu $10,$14,$10 + dsubu $14,$10,$2 + sgtu $2,$14,$10 + sd $14,2*8($4) + daddu $2,$24 + +.L_bn_sub_words_return: + .set noreorder + jr $31 + move $4,$2 +.end bn_sub_words_internal + +#if 0 +/* + * The bn_div_3_words entry point is re-used for constant-time interface. + * Implementation is retained as historical reference. + */ +.align 5 +.globl bn_div_3_words +.ent bn_div_3_words +bn_div_3_words: + .set noreorder + move $7,$4 # we know that bn_div_words does not + # touch $7, $10, $11 and preserves $6 + # so that we can save two arguments + # and return address in registers + # instead of stack:-) + + ld $4,($7) + move $10,$5 + bne $4,$6,bn_div_3_words_internal + ld $5,-8($7) + li $2,-1 + jr $31 + move $4,$2 +.end bn_div_3_words + +.align 5 +.ent bn_div_3_words_internal +bn_div_3_words_internal: + .set reorder + move $11,$31 + bal bn_div_words_internal + move $31,$11 + dmultu ($10,$2) + ld $14,-2*8($7) + move $8,$0 + mfhi ($13,$10,$2) + mflo ($12,$10,$2) + sltu $24,$13,$5 +.L_bn_div_3_words_inner_loop: + bnez $24,.L_bn_div_3_words_inner_loop_done + sgeu $1,$14,$12 + seq $25,$13,$5 + and $1,$25 + sltu $15,$12,$10 + daddu $5,$6 + dsubu $13,$15 + dsubu $12,$10 + sltu $24,$13,$5 + sltu $8,$5,$6 + or $24,$8 + .set noreorder + beqz $1,.L_bn_div_3_words_inner_loop + dsubu $2,1 + daddu $2,1 + .set reorder +.L_bn_div_3_words_inner_loop_done: + .set noreorder + jr $31 + move $4,$2 +.end bn_div_3_words_internal +#endif + +.align 5 +.globl bn_div_words +.ent bn_div_words +bn_div_words: + .set noreorder + bnez $6,bn_div_words_internal + li $2,-1 # I would rather signal div-by-zero + # which can be done with 'break 7' + jr $31 + move $4,$2 +.end bn_div_words + +.align 5 +.ent bn_div_words_internal +bn_div_words_internal: + move $3,$0 + bltz $6,.L_bn_div_words_body + move $25,$3 + dsll $6,1 + bgtz $6,.-4 + addu $25,1 + + .set reorder + negu $13,$25 + li $14,-1 + dsll $14,$13 + and $14,$4 + dsrl $1,$5,$13 + .set noreorder + beqz $14,.+12 + nop + break 6 # signal overflow + .set reorder + dsll $4,$25 + dsll $5,$25 + or $4,$1 +.L_bn_div_words_body: + dsrl $3,$6,4*8 # bits + sgeu $1,$4,$6 + .set noreorder + beqz $1,.+12 + nop + dsubu $4,$6 + .set reorder + + li $8,-1 + dsrl $9,$4,4*8 # bits + dsrl $8,4*8 # q=0xffffffff + beq $3,$9,.L_bn_div_words_skip_div1 + ddivu ($4,$3) + mfqt ($8,$4,$3) +.L_bn_div_words_skip_div1: + dmultu ($6,$8) + dsll $15,$4,4*8 # bits + dsrl $1,$5,4*8 # bits + or $15,$1 + mflo ($12,$6,$8) + mfhi ($13,$6,$8) +.L_bn_div_words_inner_loop1: + sltu $14,$15,$12 + seq $24,$9,$13 + sltu $1,$9,$13 + and $14,$24 + sltu $2,$12,$6 + or $1,$14 + .set noreorder + beqz $1,.L_bn_div_words_inner_loop1_done + dsubu $13,$2 + dsubu $12,$6 + b .L_bn_div_words_inner_loop1 + dsubu $8,1 + .set reorder +.L_bn_div_words_inner_loop1_done: + + dsll $5,4*8 # bits + dsubu $4,$15,$12 + dsll $2,$8,4*8 # bits + + li $8,-1 + dsrl $9,$4,4*8 # bits + dsrl $8,4*8 # q=0xffffffff + beq $3,$9,.L_bn_div_words_skip_div2 + ddivu ($4,$3) + mfqt ($8,$4,$3) +.L_bn_div_words_skip_div2: + dmultu ($6,$8) + dsll $15,$4,4*8 # bits + dsrl $1,$5,4*8 # bits + or $15,$1 + mflo ($12,$6,$8) + mfhi ($13,$6,$8) +.L_bn_div_words_inner_loop2: + sltu $14,$15,$12 + seq $24,$9,$13 + sltu $1,$9,$13 + and $14,$24 + sltu $3,$12,$6 + or $1,$14 + .set noreorder + beqz $1,.L_bn_div_words_inner_loop2_done + dsubu $13,$3 + dsubu $12,$6 + b .L_bn_div_words_inner_loop2 + dsubu $8,1 + .set reorder +.L_bn_div_words_inner_loop2_done: + + dsubu $4,$15,$12 + or $2,$8 + dsrl $3,$4,$25 # $3 contains remainder if anybody wants it + dsrl $6,$25 # restore $6 + + .set noreorder + move $5,$3 + jr $31 + move $4,$2 +.end bn_div_words_internal + +.align 5 +.globl bn_mul_comba8 +.ent bn_mul_comba8 +bn_mul_comba8: + .set noreorder + .frame $29,6*8,$31 + .mask 0x003f0000,-8 + dsubu $29,6*8 + sd $21,5*8($29) + sd $20,4*8($29) + sd $19,3*8($29) + sd $18,2*8($29) + sd $17,1*8($29) + sd $16,0*8($29) + + .set reorder + ld $12,0($5) # If compiled with -mips3 option on + # R5000 box assembler barks on this + # 1ine with "should not have mult/div + # as last instruction in bb (R10K + # bug)" warning. If anybody out there + # has a clue about how to circumvent + # this do send me a note. + # <appro@fy.chalmers.se> + + ld $8,0($6) + ld $13,8($5) + ld $14,2*8($5) + dmultu ($12,$8) # mul_add_c(a[0],b[0],c1,c2,c3); + ld $15,3*8($5) + ld $9,8($6) + ld $10,2*8($6) + ld $11,3*8($6) + mflo ($2,$12,$8) + mfhi ($3,$12,$8) + + ld $16,4*8($5) + ld $18,5*8($5) + dmultu ($12,$9) # mul_add_c(a[0],b[1],c2,c3,c1); + ld $20,6*8($5) + ld $5,7*8($5) + ld $17,4*8($6) + ld $19,5*8($6) + mflo ($24,$12,$9) + mfhi ($25,$12,$9) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($13,$8) # mul_add_c(a[1],b[0],c2,c3,c1); + daddu $7,$25,$1 + ld $21,6*8($6) + ld $6,7*8($6) + sd $2,0($4) # r[0]=c1; + mflo ($24,$13,$8) + mfhi ($25,$13,$8) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($14,$8) # mul_add_c(a[2],b[0],c3,c1,c2); + daddu $25,$1 + daddu $7,$25 + sltu $2,$7,$25 + sd $3,8($4) # r[1]=c2; + + mflo ($24,$14,$8) + mfhi ($25,$14,$8) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($13,$9) # mul_add_c(a[1],b[1],c3,c1,c2); + daddu $25,$1 + daddu $2,$25 + mflo ($24,$13,$9) + mfhi ($25,$13,$9) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($12,$10) # mul_add_c(a[0],b[2],c3,c1,c2); + daddu $25,$1 + daddu $2,$25 + sltu $3,$2,$25 + mflo ($24,$12,$10) + mfhi ($25,$12,$10) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($12,$11) # mul_add_c(a[0],b[3],c1,c2,c3); + daddu $25,$1 + daddu $2,$25 + sltu $1,$2,$25 + daddu $3,$1 + sd $7,2*8($4) # r[2]=c3; + + mflo ($24,$12,$11) + mfhi ($25,$12,$11) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($13,$10) # mul_add_c(a[1],b[2],c1,c2,c3); + daddu $25,$1 + daddu $3,$25 + sltu $7,$3,$25 + mflo ($24,$13,$10) + mfhi ($25,$13,$10) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($14,$9) # mul_add_c(a[2],b[1],c1,c2,c3); + daddu $25,$1 + daddu $3,$25 + sltu $1,$3,$25 + daddu $7,$1 + mflo ($24,$14,$9) + mfhi ($25,$14,$9) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($15,$8) # mul_add_c(a[3],b[0],c1,c2,c3); + daddu $25,$1 + daddu $3,$25 + sltu $1,$3,$25 + daddu $7,$1 + mflo ($24,$15,$8) + mfhi ($25,$15,$8) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($16,$8) # mul_add_c(a[4],b[0],c2,c3,c1); + daddu $25,$1 + daddu $3,$25 + sltu $1,$3,$25 + daddu $7,$1 + sd $2,3*8($4) # r[3]=c1; + + mflo ($24,$16,$8) + mfhi ($25,$16,$8) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($15,$9) # mul_add_c(a[3],b[1],c2,c3,c1); + daddu $25,$1 + daddu $7,$25 + sltu $2,$7,$25 + mflo ($24,$15,$9) + mfhi ($25,$15,$9) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($14,$10) # mul_add_c(a[2],b[2],c2,c3,c1); + daddu $25,$1 + daddu $7,$25 + sltu $1,$7,$25 + daddu $2,$1 + mflo ($24,$14,$10) + mfhi ($25,$14,$10) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($13,$11) # mul_add_c(a[1],b[3],c2,c3,c1); + daddu $25,$1 + daddu $7,$25 + sltu $1,$7,$25 + daddu $2,$1 + mflo ($24,$13,$11) + mfhi ($25,$13,$11) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($12,$17) # mul_add_c(a[0],b[4],c2,c3,c1); + daddu $25,$1 + daddu $7,$25 + sltu $1,$7,$25 + daddu $2,$1 + mflo ($24,$12,$17) + mfhi ($25,$12,$17) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($12,$19) # mul_add_c(a[0],b[5],c3,c1,c2); + daddu $25,$1 + daddu $7,$25 + sltu $1,$7,$25 + daddu $2,$1 + sd $3,4*8($4) # r[4]=c2; + + mflo ($24,$12,$19) + mfhi ($25,$12,$19) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($13,$17) # mul_add_c(a[1],b[4],c3,c1,c2); + daddu $25,$1 + daddu $2,$25 + sltu $3,$2,$25 + mflo ($24,$13,$17) + mfhi ($25,$13,$17) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($14,$11) # mul_add_c(a[2],b[3],c3,c1,c2); + daddu $25,$1 + daddu $2,$25 + sltu $1,$2,$25 + daddu $3,$1 + mflo ($24,$14,$11) + mfhi ($25,$14,$11) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($15,$10) # mul_add_c(a[3],b[2],c3,c1,c2); + daddu $25,$1 + daddu $2,$25 + sltu $1,$2,$25 + daddu $3,$1 + mflo ($24,$15,$10) + mfhi ($25,$15,$10) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($16,$9) # mul_add_c(a[4],b[1],c3,c1,c2); + daddu $25,$1 + daddu $2,$25 + sltu $1,$2,$25 + daddu $3,$1 + mflo ($24,$16,$9) + mfhi ($25,$16,$9) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($18,$8) # mul_add_c(a[5],b[0],c3,c1,c2); + daddu $25,$1 + daddu $2,$25 + sltu $1,$2,$25 + daddu $3,$1 + mflo ($24,$18,$8) + mfhi ($25,$18,$8) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($20,$8) # mul_add_c(a[6],b[0],c1,c2,c3); + daddu $25,$1 + daddu $2,$25 + sltu $1,$2,$25 + daddu $3,$1 + sd $7,5*8($4) # r[5]=c3; + + mflo ($24,$20,$8) + mfhi ($25,$20,$8) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($18,$9) # mul_add_c(a[5],b[1],c1,c2,c3); + daddu $25,$1 + daddu $3,$25 + sltu $7,$3,$25 + mflo ($24,$18,$9) + mfhi ($25,$18,$9) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($16,$10) # mul_add_c(a[4],b[2],c1,c2,c3); + daddu $25,$1 + daddu $3,$25 + sltu $1,$3,$25 + daddu $7,$1 + mflo ($24,$16,$10) + mfhi ($25,$16,$10) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($15,$11) # mul_add_c(a[3],b[3],c1,c2,c3); + daddu $25,$1 + daddu $3,$25 + sltu $1,$3,$25 + daddu $7,$1 + mflo ($24,$15,$11) + mfhi ($25,$15,$11) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($14,$17) # mul_add_c(a[2],b[4],c1,c2,c3); + daddu $25,$1 + daddu $3,$25 + sltu $1,$3,$25 + daddu $7,$1 + mflo ($24,$14,$17) + mfhi ($25,$14,$17) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($13,$19) # mul_add_c(a[1],b[5],c1,c2,c3); + daddu $25,$1 + daddu $3,$25 + sltu $1,$3,$25 + daddu $7,$1 + mflo ($24,$13,$19) + mfhi ($25,$13,$19) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($12,$21) # mul_add_c(a[0],b[6],c1,c2,c3); + daddu $25,$1 + daddu $3,$25 + sltu $1,$3,$25 + daddu $7,$1 + mflo ($24,$12,$21) + mfhi ($25,$12,$21) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($12,$6) # mul_add_c(a[0],b[7],c2,c3,c1); + daddu $25,$1 + daddu $3,$25 + sltu $1,$3,$25 + daddu $7,$1 + sd $2,6*8($4) # r[6]=c1; + + mflo ($24,$12,$6) + mfhi ($25,$12,$6) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($13,$21) # mul_add_c(a[1],b[6],c2,c3,c1); + daddu $25,$1 + daddu $7,$25 + sltu $2,$7,$25 + mflo ($24,$13,$21) + mfhi ($25,$13,$21) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($14,$19) # mul_add_c(a[2],b[5],c2,c3,c1); + daddu $25,$1 + daddu $7,$25 + sltu $1,$7,$25 + daddu $2,$1 + mflo ($24,$14,$19) + mfhi ($25,$14,$19) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($15,$17) # mul_add_c(a[3],b[4],c2,c3,c1); + daddu $25,$1 + daddu $7,$25 + sltu $1,$7,$25 + daddu $2,$1 + mflo ($24,$15,$17) + mfhi ($25,$15,$17) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($16,$11) # mul_add_c(a[4],b[3],c2,c3,c1); + daddu $25,$1 + daddu $7,$25 + sltu $1,$7,$25 + daddu $2,$1 + mflo ($24,$16,$11) + mfhi ($25,$16,$11) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($18,$10) # mul_add_c(a[5],b[2],c2,c3,c1); + daddu $25,$1 + daddu $7,$25 + sltu $1,$7,$25 + daddu $2,$1 + mflo ($24,$18,$10) + mfhi ($25,$18,$10) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($20,$9) # mul_add_c(a[6],b[1],c2,c3,c1); + daddu $25,$1 + daddu $7,$25 + sltu $1,$7,$25 + daddu $2,$1 + mflo ($24,$20,$9) + mfhi ($25,$20,$9) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($5,$8) # mul_add_c(a[7],b[0],c2,c3,c1); + daddu $25,$1 + daddu $7,$25 + sltu $1,$7,$25 + daddu $2,$1 + mflo ($24,$5,$8) + mfhi ($25,$5,$8) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($5,$9) # mul_add_c(a[7],b[1],c3,c1,c2); + daddu $25,$1 + daddu $7,$25 + sltu $1,$7,$25 + daddu $2,$1 + sd $3,7*8($4) # r[7]=c2; + + mflo ($24,$5,$9) + mfhi ($25,$5,$9) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($20,$10) # mul_add_c(a[6],b[2],c3,c1,c2); + daddu $25,$1 + daddu $2,$25 + sltu $3,$2,$25 + mflo ($24,$20,$10) + mfhi ($25,$20,$10) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($18,$11) # mul_add_c(a[5],b[3],c3,c1,c2); + daddu $25,$1 + daddu $2,$25 + sltu $1,$2,$25 + daddu $3,$1 + mflo ($24,$18,$11) + mfhi ($25,$18,$11) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($16,$17) # mul_add_c(a[4],b[4],c3,c1,c2); + daddu $25,$1 + daddu $2,$25 + sltu $1,$2,$25 + daddu $3,$1 + mflo ($24,$16,$17) + mfhi ($25,$16,$17) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($15,$19) # mul_add_c(a[3],b[5],c3,c1,c2); + daddu $25,$1 + daddu $2,$25 + sltu $1,$2,$25 + daddu $3,$1 + mflo ($24,$15,$19) + mfhi ($25,$15,$19) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($14,$21) # mul_add_c(a[2],b[6],c3,c1,c2); + daddu $25,$1 + daddu $2,$25 + sltu $1,$2,$25 + daddu $3,$1 + mflo ($24,$14,$21) + mfhi ($25,$14,$21) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($13,$6) # mul_add_c(a[1],b[7],c3,c1,c2); + daddu $25,$1 + daddu $2,$25 + sltu $1,$2,$25 + daddu $3,$1 + mflo ($24,$13,$6) + mfhi ($25,$13,$6) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($14,$6) # mul_add_c(a[2],b[7],c1,c2,c3); + daddu $25,$1 + daddu $2,$25 + sltu $1,$2,$25 + daddu $3,$1 + sd $7,8*8($4) # r[8]=c3; + + mflo ($24,$14,$6) + mfhi ($25,$14,$6) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($15,$21) # mul_add_c(a[3],b[6],c1,c2,c3); + daddu $25,$1 + daddu $3,$25 + sltu $7,$3,$25 + mflo ($24,$15,$21) + mfhi ($25,$15,$21) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($16,$19) # mul_add_c(a[4],b[5],c1,c2,c3); + daddu $25,$1 + daddu $3,$25 + sltu $1,$3,$25 + daddu $7,$1 + mflo ($24,$16,$19) + mfhi ($25,$16,$19) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($18,$17) # mul_add_c(a[5],b[4],c1,c2,c3); + daddu $25,$1 + daddu $3,$25 + sltu $1,$3,$25 + daddu $7,$1 + mflo ($24,$18,$17) + mfhi ($25,$18,$17) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($20,$11) # mul_add_c(a[6],b[3],c1,c2,c3); + daddu $25,$1 + daddu $3,$25 + sltu $1,$3,$25 + daddu $7,$1 + mflo ($24,$20,$11) + mfhi ($25,$20,$11) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($5,$10) # mul_add_c(a[7],b[2],c1,c2,c3); + daddu $25,$1 + daddu $3,$25 + sltu $1,$3,$25 + daddu $7,$1 + mflo ($24,$5,$10) + mfhi ($25,$5,$10) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($5,$11) # mul_add_c(a[7],b[3],c2,c3,c1); + daddu $25,$1 + daddu $3,$25 + sltu $1,$3,$25 + daddu $7,$1 + sd $2,9*8($4) # r[9]=c1; + + mflo ($24,$5,$11) + mfhi ($25,$5,$11) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($20,$17) # mul_add_c(a[6],b[4],c2,c3,c1); + daddu $25,$1 + daddu $7,$25 + sltu $2,$7,$25 + mflo ($24,$20,$17) + mfhi ($25,$20,$17) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($18,$19) # mul_add_c(a[5],b[5],c2,c3,c1); + daddu $25,$1 + daddu $7,$25 + sltu $1,$7,$25 + daddu $2,$1 + mflo ($24,$18,$19) + mfhi ($25,$18,$19) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($16,$21) # mul_add_c(a[4],b[6],c2,c3,c1); + daddu $25,$1 + daddu $7,$25 + sltu $1,$7,$25 + daddu $2,$1 + mflo ($24,$16,$21) + mfhi ($25,$16,$21) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($15,$6) # mul_add_c(a[3],b[7],c2,c3,c1); + daddu $25,$1 + daddu $7,$25 + sltu $1,$7,$25 + daddu $2,$1 + mflo ($24,$15,$6) + mfhi ($25,$15,$6) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($16,$6) # mul_add_c(a[4],b[7],c3,c1,c2); + daddu $25,$1 + daddu $7,$25 + sltu $1,$7,$25 + daddu $2,$1 + sd $3,10*8($4) # r[10]=c2; + + mflo ($24,$16,$6) + mfhi ($25,$16,$6) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($18,$21) # mul_add_c(a[5],b[6],c3,c1,c2); + daddu $25,$1 + daddu $2,$25 + sltu $3,$2,$25 + mflo ($24,$18,$21) + mfhi ($25,$18,$21) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($20,$19) # mul_add_c(a[6],b[5],c3,c1,c2); + daddu $25,$1 + daddu $2,$25 + sltu $1,$2,$25 + daddu $3,$1 + mflo ($24,$20,$19) + mfhi ($25,$20,$19) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($5,$17) # mul_add_c(a[7],b[4],c3,c1,c2); + daddu $25,$1 + daddu $2,$25 + sltu $1,$2,$25 + daddu $3,$1 + mflo ($24,$5,$17) + mfhi ($25,$5,$17) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($5,$19) # mul_add_c(a[7],b[5],c1,c2,c3); + daddu $25,$1 + daddu $2,$25 + sltu $1,$2,$25 + daddu $3,$1 + sd $7,11*8($4) # r[11]=c3; + + mflo ($24,$5,$19) + mfhi ($25,$5,$19) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($20,$21) # mul_add_c(a[6],b[6],c1,c2,c3); + daddu $25,$1 + daddu $3,$25 + sltu $7,$3,$25 + mflo ($24,$20,$21) + mfhi ($25,$20,$21) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($18,$6) # mul_add_c(a[5],b[7],c1,c2,c3); + daddu $25,$1 + daddu $3,$25 + sltu $1,$3,$25 + daddu $7,$1 + mflo ($24,$18,$6) + mfhi ($25,$18,$6) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($20,$6) # mul_add_c(a[6],b[7],c2,c3,c1); + daddu $25,$1 + daddu $3,$25 + sltu $1,$3,$25 + daddu $7,$1 + sd $2,12*8($4) # r[12]=c1; + + mflo ($24,$20,$6) + mfhi ($25,$20,$6) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($5,$21) # mul_add_c(a[7],b[6],c2,c3,c1); + daddu $25,$1 + daddu $7,$25 + sltu $2,$7,$25 + mflo ($24,$5,$21) + mfhi ($25,$5,$21) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($5,$6) # mul_add_c(a[7],b[7],c3,c1,c2); + daddu $25,$1 + daddu $7,$25 + sltu $1,$7,$25 + daddu $2,$1 + sd $3,13*8($4) # r[13]=c2; + + mflo ($24,$5,$6) + mfhi ($25,$5,$6) + daddu $7,$24 + sltu $1,$7,$24 + daddu $25,$1 + daddu $2,$25 + sd $7,14*8($4) # r[14]=c3; + sd $2,15*8($4) # r[15]=c1; + + .set noreorder + ld $21,5*8($29) + ld $20,4*8($29) + ld $19,3*8($29) + ld $18,2*8($29) + ld $17,1*8($29) + ld $16,0*8($29) + jr $31 + daddu $29,6*8 +.end bn_mul_comba8 + +.align 5 +.globl bn_mul_comba4 +.ent bn_mul_comba4 +bn_mul_comba4: + .set reorder + ld $12,0($5) + ld $8,0($6) + ld $13,8($5) + ld $14,2*8($5) + dmultu ($12,$8) # mul_add_c(a[0],b[0],c1,c2,c3); + ld $15,3*8($5) + ld $9,8($6) + ld $10,2*8($6) + ld $11,3*8($6) + mflo ($2,$12,$8) + mfhi ($3,$12,$8) + sd $2,0($4) + + dmultu ($12,$9) # mul_add_c(a[0],b[1],c2,c3,c1); + mflo ($24,$12,$9) + mfhi ($25,$12,$9) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($13,$8) # mul_add_c(a[1],b[0],c2,c3,c1); + daddu $7,$25,$1 + mflo ($24,$13,$8) + mfhi ($25,$13,$8) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($14,$8) # mul_add_c(a[2],b[0],c3,c1,c2); + daddu $25,$1 + daddu $7,$25 + sltu $2,$7,$25 + sd $3,8($4) + + mflo ($24,$14,$8) + mfhi ($25,$14,$8) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($13,$9) # mul_add_c(a[1],b[1],c3,c1,c2); + daddu $25,$1 + daddu $2,$25 + mflo ($24,$13,$9) + mfhi ($25,$13,$9) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($12,$10) # mul_add_c(a[0],b[2],c3,c1,c2); + daddu $25,$1 + daddu $2,$25 + sltu $3,$2,$25 + mflo ($24,$12,$10) + mfhi ($25,$12,$10) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($12,$11) # mul_add_c(a[0],b[3],c1,c2,c3); + daddu $25,$1 + daddu $2,$25 + sltu $1,$2,$25 + daddu $3,$1 + sd $7,2*8($4) + + mflo ($24,$12,$11) + mfhi ($25,$12,$11) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($13,$10) # mul_add_c(a[1],b[2],c1,c2,c3); + daddu $25,$1 + daddu $3,$25 + sltu $7,$3,$25 + mflo ($24,$13,$10) + mfhi ($25,$13,$10) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($14,$9) # mul_add_c(a[2],b[1],c1,c2,c3); + daddu $25,$1 + daddu $3,$25 + sltu $1,$3,$25 + daddu $7,$1 + mflo ($24,$14,$9) + mfhi ($25,$14,$9) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($15,$8) # mul_add_c(a[3],b[0],c1,c2,c3); + daddu $25,$1 + daddu $3,$25 + sltu $1,$3,$25 + daddu $7,$1 + mflo ($24,$15,$8) + mfhi ($25,$15,$8) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($15,$9) # mul_add_c(a[3],b[1],c2,c3,c1); + daddu $25,$1 + daddu $3,$25 + sltu $1,$3,$25 + daddu $7,$1 + sd $2,3*8($4) + + mflo ($24,$15,$9) + mfhi ($25,$15,$9) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($14,$10) # mul_add_c(a[2],b[2],c2,c3,c1); + daddu $25,$1 + daddu $7,$25 + sltu $2,$7,$25 + mflo ($24,$14,$10) + mfhi ($25,$14,$10) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($13,$11) # mul_add_c(a[1],b[3],c2,c3,c1); + daddu $25,$1 + daddu $7,$25 + sltu $1,$7,$25 + daddu $2,$1 + mflo ($24,$13,$11) + mfhi ($25,$13,$11) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($14,$11) # mul_add_c(a[2],b[3],c3,c1,c2); + daddu $25,$1 + daddu $7,$25 + sltu $1,$7,$25 + daddu $2,$1 + sd $3,4*8($4) + + mflo ($24,$14,$11) + mfhi ($25,$14,$11) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($15,$10) # mul_add_c(a[3],b[2],c3,c1,c2); + daddu $25,$1 + daddu $2,$25 + sltu $3,$2,$25 + mflo ($24,$15,$10) + mfhi ($25,$15,$10) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($15,$11) # mul_add_c(a[3],b[3],c1,c2,c3); + daddu $25,$1 + daddu $2,$25 + sltu $1,$2,$25 + daddu $3,$1 + sd $7,5*8($4) + + mflo ($24,$15,$11) + mfhi ($25,$15,$11) + daddu $2,$24 + sltu $1,$2,$24 + daddu $25,$1 + daddu $3,$25 + sd $2,6*8($4) + sd $3,7*8($4) + + .set noreorder + jr $31 + nop +.end bn_mul_comba4 + +.align 5 +.globl bn_sqr_comba8 +.ent bn_sqr_comba8 +bn_sqr_comba8: + .set reorder + ld $12,0($5) + ld $13,8($5) + ld $14,2*8($5) + ld $15,3*8($5) + + dmultu ($12,$12) # mul_add_c(a[0],b[0],c1,c2,c3); + ld $8,4*8($5) + ld $9,5*8($5) + ld $10,6*8($5) + ld $11,7*8($5) + mflo ($2,$12,$12) + mfhi ($3,$12,$12) + sd $2,0($4) + + dmultu ($12,$13) # mul_add_c2(a[0],b[1],c2,c3,c1); + mflo ($24,$12,$13) + mfhi ($25,$12,$13) + slt $2,$25,$0 + dsll $25,1 + dmultu ($14,$12) # mul_add_c2(a[2],b[0],c3,c1,c2); + slt $6,$24,$0 + daddu $25,$6 + dsll $24,1 + daddu $3,$24 + sltu $1,$3,$24 + daddu $7,$25,$1 + sd $3,8($4) + mflo ($24,$14,$12) + mfhi ($25,$14,$12) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($13,$13) # forward multiplication + daddu $7,$24 + daddu $1,$25 + sltu $24,$7,$24 + daddu $2,$1 + daddu $25,$24 + sltu $3,$2,$1 + daddu $2,$25 + sltu $25,$2,$25 + daddu $3,$25 + mflo ($24,$13,$13) + mfhi ($25,$13,$13) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($12,$15) # mul_add_c2(a[0],b[3],c1,c2,c3); + daddu $25,$1 + daddu $2,$25 + sltu $1,$2,$25 + daddu $3,$1 + sd $7,2*8($4) + mflo ($24,$12,$15) + mfhi ($25,$12,$15) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($13,$14) # forward multiplication + daddu $2,$24 + daddu $1,$25 + sltu $24,$2,$24 + daddu $3,$1 + daddu $25,$24 + sltu $7,$3,$1 + daddu $3,$25 + sltu $25,$3,$25 + daddu $7,$25 + mflo ($24,$13,$14) + mfhi ($25,$13,$14) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($8,$12) # forward multiplication + daddu $2,$24 + daddu $1,$25 + sltu $24,$2,$24 + daddu $3,$1 + daddu $25,$24 + sltu $1,$3,$1 + daddu $3,$25 + daddu $7,$1 + sltu $25,$3,$25 + daddu $7,$25 + mflo ($24,$8,$12) + mfhi ($25,$8,$12) + sd $2,3*8($4) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($15,$13) # forward multiplication + daddu $3,$24 + daddu $1,$25 + sltu $24,$3,$24 + daddu $7,$1 + daddu $25,$24 + sltu $2,$7,$1 + daddu $7,$25 + sltu $25,$7,$25 + daddu $2,$25 + mflo ($24,$15,$13) + mfhi ($25,$15,$13) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($14,$14) # forward multiplication + daddu $3,$24 + daddu $1,$25 + sltu $24,$3,$24 + daddu $7,$1 + daddu $25,$24 + sltu $1,$7,$1 + daddu $7,$25 + daddu $2,$1 + sltu $25,$7,$25 + daddu $2,$25 + mflo ($24,$14,$14) + mfhi ($25,$14,$14) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($12,$9) # mul_add_c2(a[0],b[5],c3,c1,c2); + daddu $25,$1 + daddu $7,$25 + sltu $1,$7,$25 + daddu $2,$1 + sd $3,4*8($4) + mflo ($24,$12,$9) + mfhi ($25,$12,$9) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($13,$8) # forward multiplication + daddu $7,$24 + daddu $1,$25 + sltu $24,$7,$24 + daddu $2,$1 + daddu $25,$24 + sltu $3,$2,$1 + daddu $2,$25 + sltu $25,$2,$25 + daddu $3,$25 + mflo ($24,$13,$8) + mfhi ($25,$13,$8) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($14,$15) # forward multiplication + daddu $7,$24 + daddu $1,$25 + sltu $24,$7,$24 + daddu $2,$1 + daddu $25,$24 + sltu $1,$2,$1 + daddu $2,$25 + daddu $3,$1 + sltu $25,$2,$25 + daddu $3,$25 + mflo ($24,$14,$15) + mfhi ($25,$14,$15) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($10,$12) # forward multiplication + daddu $7,$24 + daddu $1,$25 + sltu $24,$7,$24 + daddu $2,$1 + daddu $25,$24 + sltu $1,$2,$1 + daddu $2,$25 + daddu $3,$1 + sltu $25,$2,$25 + daddu $3,$25 + mflo ($24,$10,$12) + mfhi ($25,$10,$12) + sd $7,5*8($4) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($9,$13) # forward multiplication + daddu $2,$24 + daddu $1,$25 + sltu $24,$2,$24 + daddu $3,$1 + daddu $25,$24 + sltu $7,$3,$1 + daddu $3,$25 + sltu $25,$3,$25 + daddu $7,$25 + mflo ($24,$9,$13) + mfhi ($25,$9,$13) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($8,$14) # forward multiplication + daddu $2,$24 + daddu $1,$25 + sltu $24,$2,$24 + daddu $3,$1 + daddu $25,$24 + sltu $1,$3,$1 + daddu $3,$25 + daddu $7,$1 + sltu $25,$3,$25 + daddu $7,$25 + mflo ($24,$8,$14) + mfhi ($25,$8,$14) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($15,$15) # forward multiplication + daddu $2,$24 + daddu $1,$25 + sltu $24,$2,$24 + daddu $3,$1 + daddu $25,$24 + sltu $1,$3,$1 + daddu $3,$25 + daddu $7,$1 + sltu $25,$3,$25 + daddu $7,$25 + mflo ($24,$15,$15) + mfhi ($25,$15,$15) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($12,$11) # mul_add_c2(a[0],b[7],c2,c3,c1); + daddu $25,$1 + daddu $3,$25 + sltu $1,$3,$25 + daddu $7,$1 + sd $2,6*8($4) + mflo ($24,$12,$11) + mfhi ($25,$12,$11) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($13,$10) # forward multiplication + daddu $3,$24 + daddu $1,$25 + sltu $24,$3,$24 + daddu $7,$1 + daddu $25,$24 + sltu $2,$7,$1 + daddu $7,$25 + sltu $25,$7,$25 + daddu $2,$25 + mflo ($24,$13,$10) + mfhi ($25,$13,$10) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($14,$9) # forward multiplication + daddu $3,$24 + daddu $1,$25 + sltu $24,$3,$24 + daddu $7,$1 + daddu $25,$24 + sltu $1,$7,$1 + daddu $7,$25 + daddu $2,$1 + sltu $25,$7,$25 + daddu $2,$25 + mflo ($24,$14,$9) + mfhi ($25,$14,$9) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($15,$8) # forward multiplication + daddu $3,$24 + daddu $1,$25 + sltu $24,$3,$24 + daddu $7,$1 + daddu $25,$24 + sltu $1,$7,$1 + daddu $7,$25 + daddu $2,$1 + sltu $25,$7,$25 + daddu $2,$25 + mflo ($24,$15,$8) + mfhi ($25,$15,$8) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($11,$13) # forward multiplication + daddu $3,$24 + daddu $1,$25 + sltu $24,$3,$24 + daddu $7,$1 + daddu $25,$24 + sltu $1,$7,$1 + daddu $7,$25 + daddu $2,$1 + sltu $25,$7,$25 + daddu $2,$25 + mflo ($24,$11,$13) + mfhi ($25,$11,$13) + sd $3,7*8($4) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($10,$14) # forward multiplication + daddu $7,$24 + daddu $1,$25 + sltu $24,$7,$24 + daddu $2,$1 + daddu $25,$24 + sltu $3,$2,$1 + daddu $2,$25 + sltu $25,$2,$25 + daddu $3,$25 + mflo ($24,$10,$14) + mfhi ($25,$10,$14) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($9,$15) # forward multiplication + daddu $7,$24 + daddu $1,$25 + sltu $24,$7,$24 + daddu $2,$1 + daddu $25,$24 + sltu $1,$2,$1 + daddu $2,$25 + daddu $3,$1 + sltu $25,$2,$25 + daddu $3,$25 + mflo ($24,$9,$15) + mfhi ($25,$9,$15) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($8,$8) # forward multiplication + daddu $7,$24 + daddu $1,$25 + sltu $24,$7,$24 + daddu $2,$1 + daddu $25,$24 + sltu $1,$2,$1 + daddu $2,$25 + daddu $3,$1 + sltu $25,$2,$25 + daddu $3,$25 + mflo ($24,$8,$8) + mfhi ($25,$8,$8) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($14,$11) # mul_add_c2(a[2],b[7],c1,c2,c3); + daddu $25,$1 + daddu $2,$25 + sltu $1,$2,$25 + daddu $3,$1 + sd $7,8*8($4) + mflo ($24,$14,$11) + mfhi ($25,$14,$11) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($15,$10) # forward multiplication + daddu $2,$24 + daddu $1,$25 + sltu $24,$2,$24 + daddu $3,$1 + daddu $25,$24 + sltu $7,$3,$1 + daddu $3,$25 + sltu $25,$3,$25 + daddu $7,$25 + mflo ($24,$15,$10) + mfhi ($25,$15,$10) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($8,$9) # forward multiplication + daddu $2,$24 + daddu $1,$25 + sltu $24,$2,$24 + daddu $3,$1 + daddu $25,$24 + sltu $1,$3,$1 + daddu $3,$25 + daddu $7,$1 + sltu $25,$3,$25 + daddu $7,$25 + mflo ($24,$8,$9) + mfhi ($25,$8,$9) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($11,$15) # forward multiplication + daddu $2,$24 + daddu $1,$25 + sltu $24,$2,$24 + daddu $3,$1 + daddu $25,$24 + sltu $1,$3,$1 + daddu $3,$25 + daddu $7,$1 + sltu $25,$3,$25 + daddu $7,$25 + mflo ($24,$11,$15) + mfhi ($25,$11,$15) + sd $2,9*8($4) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($10,$8) # forward multiplication + daddu $3,$24 + daddu $1,$25 + sltu $24,$3,$24 + daddu $7,$1 + daddu $25,$24 + sltu $2,$7,$1 + daddu $7,$25 + sltu $25,$7,$25 + daddu $2,$25 + mflo ($24,$10,$8) + mfhi ($25,$10,$8) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($9,$9) # forward multiplication + daddu $3,$24 + daddu $1,$25 + sltu $24,$3,$24 + daddu $7,$1 + daddu $25,$24 + sltu $1,$7,$1 + daddu $7,$25 + daddu $2,$1 + sltu $25,$7,$25 + daddu $2,$25 + mflo ($24,$9,$9) + mfhi ($25,$9,$9) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($8,$11) # mul_add_c2(a[4],b[7],c3,c1,c2); + daddu $25,$1 + daddu $7,$25 + sltu $1,$7,$25 + daddu $2,$1 + sd $3,10*8($4) + mflo ($24,$8,$11) + mfhi ($25,$8,$11) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($9,$10) # forward multiplication + daddu $7,$24 + daddu $1,$25 + sltu $24,$7,$24 + daddu $2,$1 + daddu $25,$24 + sltu $3,$2,$1 + daddu $2,$25 + sltu $25,$2,$25 + daddu $3,$25 + mflo ($24,$9,$10) + mfhi ($25,$9,$10) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($11,$9) # forward multiplication + daddu $7,$24 + daddu $1,$25 + sltu $24,$7,$24 + daddu $2,$1 + daddu $25,$24 + sltu $1,$2,$1 + daddu $2,$25 + daddu $3,$1 + sltu $25,$2,$25 + daddu $3,$25 + mflo ($24,$11,$9) + mfhi ($25,$11,$9) + sd $7,11*8($4) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($10,$10) # forward multiplication + daddu $2,$24 + daddu $1,$25 + sltu $24,$2,$24 + daddu $3,$1 + daddu $25,$24 + sltu $7,$3,$1 + daddu $3,$25 + sltu $25,$3,$25 + daddu $7,$25 + mflo ($24,$10,$10) + mfhi ($25,$10,$10) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($10,$11) # mul_add_c2(a[6],b[7],c2,c3,c1); + daddu $25,$1 + daddu $3,$25 + sltu $1,$3,$25 + daddu $7,$1 + sd $2,12*8($4) + mflo ($24,$10,$11) + mfhi ($25,$10,$11) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($11,$11) # forward multiplication + daddu $3,$24 + daddu $1,$25 + sltu $24,$3,$24 + daddu $7,$1 + daddu $25,$24 + sltu $2,$7,$1 + daddu $7,$25 + sltu $25,$7,$25 + daddu $2,$25 + mflo ($24,$11,$11) + mfhi ($25,$11,$11) + sd $3,13*8($4) + + daddu $7,$24 + sltu $1,$7,$24 + daddu $25,$1 + daddu $2,$25 + sd $7,14*8($4) + sd $2,15*8($4) + + .set noreorder + jr $31 + nop +.end bn_sqr_comba8 + +.align 5 +.globl bn_sqr_comba4 +.ent bn_sqr_comba4 +bn_sqr_comba4: + .set reorder + ld $12,0($5) + ld $13,8($5) + dmultu ($12,$12) # mul_add_c(a[0],b[0],c1,c2,c3); + ld $14,2*8($5) + ld $15,3*8($5) + mflo ($2,$12,$12) + mfhi ($3,$12,$12) + sd $2,0($4) + + dmultu ($12,$13) # mul_add_c2(a[0],b[1],c2,c3,c1); + mflo ($24,$12,$13) + mfhi ($25,$12,$13) + slt $2,$25,$0 + dsll $25,1 + dmultu ($14,$12) # mul_add_c2(a[2],b[0],c3,c1,c2); + slt $6,$24,$0 + daddu $25,$6 + dsll $24,1 + daddu $3,$24 + sltu $1,$3,$24 + daddu $7,$25,$1 + sd $3,8($4) + mflo ($24,$14,$12) + mfhi ($25,$14,$12) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($13,$13) # forward multiplication + daddu $7,$24 + daddu $1,$25 + sltu $24,$7,$24 + daddu $2,$1 + daddu $25,$24 + sltu $3,$2,$1 + daddu $2,$25 + sltu $25,$2,$25 + daddu $3,$25 + mflo ($24,$13,$13) + mfhi ($25,$13,$13) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($12,$15) # mul_add_c2(a[0],b[3],c1,c2,c3); + daddu $25,$1 + daddu $2,$25 + sltu $1,$2,$25 + daddu $3,$1 + sd $7,2*8($4) + mflo ($24,$12,$15) + mfhi ($25,$12,$15) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($13,$14) # forward multiplication + daddu $2,$24 + daddu $1,$25 + sltu $24,$2,$24 + daddu $3,$1 + daddu $25,$24 + sltu $7,$3,$1 + daddu $3,$25 + sltu $25,$3,$25 + daddu $7,$25 + mflo ($24,$13,$14) + mfhi ($25,$13,$14) + daddu $2,$24 + sltu $1,$2,$24 + dmultu ($15,$13) # forward multiplication + daddu $2,$24 + daddu $1,$25 + sltu $24,$2,$24 + daddu $3,$1 + daddu $25,$24 + sltu $1,$3,$1 + daddu $3,$25 + daddu $7,$1 + sltu $25,$3,$25 + daddu $7,$25 + mflo ($24,$15,$13) + mfhi ($25,$15,$13) + sd $2,3*8($4) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($14,$14) # forward multiplication + daddu $3,$24 + daddu $1,$25 + sltu $24,$3,$24 + daddu $7,$1 + daddu $25,$24 + sltu $2,$7,$1 + daddu $7,$25 + sltu $25,$7,$25 + daddu $2,$25 + mflo ($24,$14,$14) + mfhi ($25,$14,$14) + daddu $3,$24 + sltu $1,$3,$24 + dmultu ($14,$15) # mul_add_c2(a[2],b[3],c3,c1,c2); + daddu $25,$1 + daddu $7,$25 + sltu $1,$7,$25 + daddu $2,$1 + sd $3,4*8($4) + mflo ($24,$14,$15) + mfhi ($25,$14,$15) + daddu $7,$24 + sltu $1,$7,$24 + dmultu ($15,$15) # forward multiplication + daddu $7,$24 + daddu $1,$25 + sltu $24,$7,$24 + daddu $2,$1 + daddu $25,$24 + sltu $3,$2,$1 + daddu $2,$25 + sltu $25,$2,$25 + daddu $3,$25 + mflo ($24,$15,$15) + mfhi ($25,$15,$15) + sd $7,5*8($4) + + daddu $2,$24 + sltu $1,$2,$24 + daddu $25,$1 + daddu $3,$25 + sd $2,6*8($4) + sd $3,7*8($4) + + .set noreorder + jr $31 + nop +.end bn_sqr_comba4 diff --git a/deps/openssl/config/archs/linux64-mips64/asm_avx2/crypto/bn/mips-mont.S b/deps/openssl/config/archs/linux64-mips64/asm_avx2/crypto/bn/mips-mont.S new file mode 100644 index 0000000000..f107837fa1 --- /dev/null +++ b/deps/openssl/config/archs/linux64-mips64/asm_avx2/crypto/bn/mips-mont.S @@ -0,0 +1,282 @@ +#include "mips_arch.h" + +.text + +.set noat +.set noreorder + +.align 5 +.globl bn_mul_mont +.ent bn_mul_mont +bn_mul_mont: + slt $1,$9,4 + bnez $1,1f + li $2,0 + slt $1,$9,17 # on in-order CPU + bnez $1,bn_mul_mont_internal + nop +1: jr $31 + li $4,0 +.end bn_mul_mont + +.align 5 +.ent bn_mul_mont_internal +bn_mul_mont_internal: + .frame $30,14*8,$31 + .mask 0x40000000|16711680,-8 + dsubu $29,14*8 + sd $30,(14-1)*8($29) + sd $23,(14-2)*8($29) + sd $22,(14-3)*8($29) + sd $21,(14-4)*8($29) + sd $20,(14-5)*8($29) + sd $19,(14-6)*8($29) + sd $18,(14-7)*8($29) + sd $17,(14-8)*8($29) + sd $16,(14-9)*8($29) + move $30,$29 + + .set reorder + ld $8,0($8) + ld $13,0($6) # bp[0] + ld $12,0($5) # ap[0] + ld $14,0($7) # np[0] + + dsubu $29,2*8 # place for two extra words + sll $9,3 + li $1,-4096 + dsubu $29,$9 + and $29,$1 + + dmultu ($12,$13) + ld $17,8($5) + ld $19,8($7) + mflo ($10,$12,$13) + mfhi ($11,$12,$13) + dmultu ($10,$8) + mflo ($23,$10,$8) + + dmultu ($17,$13) + mflo ($16,$17,$13) + mfhi ($17,$17,$13) + + dmultu ($14,$23) + mflo ($24,$14,$23) + mfhi ($25,$14,$23) + dmultu ($19,$23) + daddu $24,$10 + sltu $1,$24,$10 + daddu $25,$1 + mflo ($18,$19,$23) + mfhi ($19,$19,$23) + + move $15,$29 + li $22,2*8 +.align 4 +.L1st: + .set noreorder + daddu $12,$5,$22 + daddu $14,$7,$22 + ld $12,($12) + ld $14,($14) + + dmultu ($12,$13) + daddu $10,$16,$11 + daddu $24,$18,$25 + sltu $1,$10,$11 + sltu $2,$24,$25 + daddu $11,$17,$1 + daddu $25,$19,$2 + mflo ($16,$12,$13) + mfhi ($17,$12,$13) + + daddu $24,$10 + sltu $1,$24,$10 + dmultu ($14,$23) + daddu $25,$1 + addu $22,8 + sd $24,($15) + sltu $2,$22,$9 + mflo ($18,$14,$23) + mfhi ($19,$14,$23) + + bnez $2,.L1st + daddu $15,8 + .set reorder + + daddu $10,$16,$11 + sltu $1,$10,$11 + daddu $11,$17,$1 + + daddu $24,$18,$25 + sltu $2,$24,$25 + daddu $25,$19,$2 + daddu $24,$10 + sltu $1,$24,$10 + daddu $25,$1 + + sd $24,($15) + + daddu $25,$11 + sltu $1,$25,$11 + sd $25,8($15) + sd $1,2*8($15) + + li $21,8 +.align 4 +.Louter: + daddu $13,$6,$21 + ld $13,($13) + ld $12,($5) + ld $17,8($5) + ld $20,($29) + + dmultu ($12,$13) + ld $14,($7) + ld $19,8($7) + mflo ($10,$12,$13) + mfhi ($11,$12,$13) + daddu $10,$20 + dmultu ($10,$8) + sltu $1,$10,$20 + daddu $11,$1 + mflo ($23,$10,$8) + + dmultu ($17,$13) + mflo ($16,$17,$13) + mfhi ($17,$17,$13) + + dmultu ($14,$23) + mflo ($24,$14,$23) + mfhi ($25,$14,$23) + + dmultu ($19,$23) + daddu $24,$10 + sltu $1,$24,$10 + daddu $25,$1 + mflo ($18,$19,$23) + mfhi ($19,$19,$23) + + move $15,$29 + li $22,2*8 + ld $20,8($15) +.align 4 +.Linner: + .set noreorder + daddu $12,$5,$22 + daddu $14,$7,$22 + ld $12,($12) + ld $14,($14) + + dmultu ($12,$13) + daddu $10,$16,$11 + daddu $24,$18,$25 + sltu $1,$10,$11 + sltu $2,$24,$25 + daddu $11,$17,$1 + daddu $25,$19,$2 + mflo ($16,$12,$13) + mfhi ($17,$12,$13) + + daddu $10,$20 + addu $22,8 + dmultu ($14,$23) + sltu $1,$10,$20 + daddu $24,$10 + daddu $11,$1 + sltu $2,$24,$10 + ld $20,2*8($15) + daddu $25,$2 + sltu $1,$22,$9 + mflo ($18,$14,$23) + mfhi ($19,$14,$23) + sd $24,($15) + bnez $1,.Linner + daddu $15,8 + .set reorder + + daddu $10,$16,$11 + sltu $1,$10,$11 + daddu $11,$17,$1 + daddu $10,$20 + sltu $2,$10,$20 + daddu $11,$2 + + ld $20,2*8($15) + daddu $24,$18,$25 + sltu $1,$24,$25 + daddu $25,$19,$1 + daddu $24,$10 + sltu $2,$24,$10 + daddu $25,$2 + sd $24,($15) + + daddu $24,$25,$11 + sltu $25,$24,$11 + daddu $24,$20 + sltu $1,$24,$20 + daddu $25,$1 + sd $24,8($15) + sd $25,2*8($15) + + addu $21,8 + sltu $2,$21,$9 + bnez $2,.Louter + + .set noreorder + daddu $20,$29,$9 # &tp[num] + move $15,$29 + move $5,$29 + li $11,0 # clear borrow bit + +.align 4 +.Lsub: ld $10,($15) + ld $24,($7) + daddu $15,8 + daddu $7,8 + dsubu $24,$10,$24 # tp[i]-np[i] + sgtu $1,$24,$10 + dsubu $10,$24,$11 + sgtu $11,$10,$24 + sd $10,($4) + or $11,$1 + sltu $1,$15,$20 + bnez $1,.Lsub + daddu $4,8 + + dsubu $11,$25,$11 # handle upmost overflow bit + move $15,$29 + dsubu $4,$9 # restore rp + not $25,$11 + +.Lcopy: ld $14,($15) # conditional move + ld $12,($4) + sd $0,($15) + daddu $15,8 + and $14,$11 + and $12,$25 + or $12,$14 + sltu $1,$15,$20 + sd $12,($4) + bnez $1,.Lcopy + daddu $4,8 + + li $4,1 + li $2,1 + + .set noreorder + move $29,$30 + ld $30,(14-1)*8($29) + ld $23,(14-2)*8($29) + ld $22,(14-3)*8($29) + ld $21,(14-4)*8($29) + ld $20,(14-5)*8($29) + ld $19,(14-6)*8($29) + ld $18,(14-7)*8($29) + ld $17,(14-8)*8($29) + ld $16,(14-9)*8($29) + jr $31 + daddu $29,14*8 +.end bn_mul_mont_internal +.rdata +.asciiz "Montgomery Multiplication for MIPS, CRYPTOGAMS by <appro@openssl.org>" |