summaryrefslogtreecommitdiff
path: root/deps/openssl/config/archs/linux64-mips64/asm_avx2/crypto/bn
diff options
context:
space:
mode:
Diffstat (limited to 'deps/openssl/config/archs/linux64-mips64/asm_avx2/crypto/bn')
-rw-r--r--deps/openssl/config/archs/linux64-mips64/asm_avx2/crypto/bn/bn-mips.S2180
-rw-r--r--deps/openssl/config/archs/linux64-mips64/asm_avx2/crypto/bn/mips-mont.S282
2 files changed, 2462 insertions, 0 deletions
diff --git a/deps/openssl/config/archs/linux64-mips64/asm_avx2/crypto/bn/bn-mips.S b/deps/openssl/config/archs/linux64-mips64/asm_avx2/crypto/bn/bn-mips.S
new file mode 100644
index 0000000000..bd5cf8340f
--- /dev/null
+++ b/deps/openssl/config/archs/linux64-mips64/asm_avx2/crypto/bn/bn-mips.S
@@ -0,0 +1,2180 @@
+#include "mips_arch.h"
+
+#if defined(_MIPS_ARCH_MIPS64R6)
+# define ddivu(rs,rt)
+# define mfqt(rd,rs,rt) ddivu rd,rs,rt
+# define mfrm(rd,rs,rt) dmodu rd,rs,rt
+#elif defined(_MIPS_ARCH_MIPS32R6)
+# define divu(rs,rt)
+# define mfqt(rd,rs,rt) divu rd,rs,rt
+# define mfrm(rd,rs,rt) modu rd,rs,rt
+#else
+# define ddivu(rs,rt) ddivu $0,rs,rt
+# define mfqt(rd,rs,rt) mflo rd
+# define mfrm(rd,rs,rt) mfhi rd
+#endif
+
+.rdata
+.asciiz "mips3.s, Version 1.2"
+.asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
+
+.text
+.set noat
+
+.align 5
+.globl bn_mul_add_words
+.ent bn_mul_add_words
+bn_mul_add_words:
+ .set noreorder
+ bgtz $6,bn_mul_add_words_internal
+ move $2,$0
+ jr $31
+ move $4,$2
+.end bn_mul_add_words
+
+.align 5
+.ent bn_mul_add_words_internal
+bn_mul_add_words_internal:
+ .set reorder
+ li $3,-4
+ and $8,$6,$3
+ beqz $8,.L_bn_mul_add_words_tail
+
+.L_bn_mul_add_words_loop:
+ ld $12,0($5)
+ dmultu ($12,$7)
+ ld $13,0($4)
+ ld $14,8($5)
+ ld $15,8($4)
+ ld $8,2*8($5)
+ ld $9,2*8($4)
+ daddu $13,$2
+ sltu $2,$13,$2 # All manuals say it "compares 32-bit
+ # values", but it seems to work fine
+ # even on 64-bit registers.
+ mflo ($1,$12,$7)
+ mfhi ($12,$12,$7)
+ daddu $13,$1
+ daddu $2,$12
+ dmultu ($14,$7)
+ sltu $1,$13,$1
+ sd $13,0($4)
+ daddu $2,$1
+
+ ld $10,3*8($5)
+ ld $11,3*8($4)
+ daddu $15,$2
+ sltu $2,$15,$2
+ mflo ($1,$14,$7)
+ mfhi ($14,$14,$7)
+ daddu $15,$1
+ daddu $2,$14
+ dmultu ($8,$7)
+ sltu $1,$15,$1
+ sd $15,8($4)
+ daddu $2,$1
+
+ subu $6,4
+ daddu $4,4*8
+ daddu $5,4*8
+ daddu $9,$2
+ sltu $2,$9,$2
+ mflo ($1,$8,$7)
+ mfhi ($8,$8,$7)
+ daddu $9,$1
+ daddu $2,$8
+ dmultu ($10,$7)
+ sltu $1,$9,$1
+ sd $9,-2*8($4)
+ daddu $2,$1
+
+
+ and $8,$6,$3
+ daddu $11,$2
+ sltu $2,$11,$2
+ mflo ($1,$10,$7)
+ mfhi ($10,$10,$7)
+ daddu $11,$1
+ daddu $2,$10
+ sltu $1,$11,$1
+ sd $11,-8($4)
+ .set noreorder
+ bgtz $8,.L_bn_mul_add_words_loop
+ daddu $2,$1
+
+ beqz $6,.L_bn_mul_add_words_return
+ nop
+
+.L_bn_mul_add_words_tail:
+ .set reorder
+ ld $12,0($5)
+ dmultu ($12,$7)
+ ld $13,0($4)
+ subu $6,1
+ daddu $13,$2
+ sltu $2,$13,$2
+ mflo ($1,$12,$7)
+ mfhi ($12,$12,$7)
+ daddu $13,$1
+ daddu $2,$12
+ sltu $1,$13,$1
+ sd $13,0($4)
+ daddu $2,$1
+ beqz $6,.L_bn_mul_add_words_return
+
+ ld $12,8($5)
+ dmultu ($12,$7)
+ ld $13,8($4)
+ subu $6,1
+ daddu $13,$2
+ sltu $2,$13,$2
+ mflo ($1,$12,$7)
+ mfhi ($12,$12,$7)
+ daddu $13,$1
+ daddu $2,$12
+ sltu $1,$13,$1
+ sd $13,8($4)
+ daddu $2,$1
+ beqz $6,.L_bn_mul_add_words_return
+
+ ld $12,2*8($5)
+ dmultu ($12,$7)
+ ld $13,2*8($4)
+ daddu $13,$2
+ sltu $2,$13,$2
+ mflo ($1,$12,$7)
+ mfhi ($12,$12,$7)
+ daddu $13,$1
+ daddu $2,$12
+ sltu $1,$13,$1
+ sd $13,2*8($4)
+ daddu $2,$1
+
+.L_bn_mul_add_words_return:
+ .set noreorder
+ jr $31
+ move $4,$2
+.end bn_mul_add_words_internal
+
+.align 5
+.globl bn_mul_words
+.ent bn_mul_words
+bn_mul_words:
+ .set noreorder
+ bgtz $6,bn_mul_words_internal
+ move $2,$0
+ jr $31
+ move $4,$2
+.end bn_mul_words
+
+.align 5
+.ent bn_mul_words_internal
+bn_mul_words_internal:
+ .set reorder
+ li $3,-4
+ and $8,$6,$3
+ beqz $8,.L_bn_mul_words_tail
+
+.L_bn_mul_words_loop:
+ ld $12,0($5)
+ dmultu ($12,$7)
+ ld $14,8($5)
+ ld $8,2*8($5)
+ ld $10,3*8($5)
+ mflo ($1,$12,$7)
+ mfhi ($12,$12,$7)
+ daddu $2,$1
+ sltu $13,$2,$1
+ dmultu ($14,$7)
+ sd $2,0($4)
+ daddu $2,$13,$12
+
+ subu $6,4
+ daddu $4,4*8
+ daddu $5,4*8
+ mflo ($1,$14,$7)
+ mfhi ($14,$14,$7)
+ daddu $2,$1
+ sltu $15,$2,$1
+ dmultu ($8,$7)
+ sd $2,-3*8($4)
+ daddu $2,$15,$14
+
+ mflo ($1,$8,$7)
+ mfhi ($8,$8,$7)
+ daddu $2,$1
+ sltu $9,$2,$1
+ dmultu ($10,$7)
+ sd $2,-2*8($4)
+ daddu $2,$9,$8
+
+ and $8,$6,$3
+ mflo ($1,$10,$7)
+ mfhi ($10,$10,$7)
+ daddu $2,$1
+ sltu $11,$2,$1
+ sd $2,-8($4)
+ .set noreorder
+ bgtz $8,.L_bn_mul_words_loop
+ daddu $2,$11,$10
+
+ beqz $6,.L_bn_mul_words_return
+ nop
+
+.L_bn_mul_words_tail:
+ .set reorder
+ ld $12,0($5)
+ dmultu ($12,$7)
+ subu $6,1
+ mflo ($1,$12,$7)
+ mfhi ($12,$12,$7)
+ daddu $2,$1
+ sltu $13,$2,$1
+ sd $2,0($4)
+ daddu $2,$13,$12
+ beqz $6,.L_bn_mul_words_return
+
+ ld $12,8($5)
+ dmultu ($12,$7)
+ subu $6,1
+ mflo ($1,$12,$7)
+ mfhi ($12,$12,$7)
+ daddu $2,$1
+ sltu $13,$2,$1
+ sd $2,8($4)
+ daddu $2,$13,$12
+ beqz $6,.L_bn_mul_words_return
+
+ ld $12,2*8($5)
+ dmultu ($12,$7)
+ mflo ($1,$12,$7)
+ mfhi ($12,$12,$7)
+ daddu $2,$1
+ sltu $13,$2,$1
+ sd $2,2*8($4)
+ daddu $2,$13,$12
+
+.L_bn_mul_words_return:
+ .set noreorder
+ jr $31
+ move $4,$2
+.end bn_mul_words_internal
+
+.align 5
+.globl bn_sqr_words
+.ent bn_sqr_words
+bn_sqr_words:
+ .set noreorder
+ bgtz $6,bn_sqr_words_internal
+ move $2,$0
+ jr $31
+ move $4,$2
+.end bn_sqr_words
+
+.align 5
+.ent bn_sqr_words_internal
+bn_sqr_words_internal:
+ .set reorder
+ li $3,-4
+ and $8,$6,$3
+ beqz $8,.L_bn_sqr_words_tail
+
+.L_bn_sqr_words_loop:
+ ld $12,0($5)
+ dmultu ($12,$12)
+ ld $14,8($5)
+ ld $8,2*8($5)
+ ld $10,3*8($5)
+ mflo ($13,$12,$12)
+ mfhi ($12,$12,$12)
+ sd $13,0($4)
+ sd $12,8($4)
+
+ dmultu ($14,$14)
+ subu $6,4
+ daddu $4,8*8
+ daddu $5,4*8
+ mflo ($15,$14,$14)
+ mfhi ($14,$14,$14)
+ sd $15,-6*8($4)
+ sd $14,-5*8($4)
+
+ dmultu ($8,$8)
+ mflo ($9,$8,$8)
+ mfhi ($8,$8,$8)
+ sd $9,-4*8($4)
+ sd $8,-3*8($4)
+
+
+ dmultu ($10,$10)
+ and $8,$6,$3
+ mflo ($11,$10,$10)
+ mfhi ($10,$10,$10)
+ sd $11,-2*8($4)
+
+ .set noreorder
+ bgtz $8,.L_bn_sqr_words_loop
+ sd $10,-8($4)
+
+ beqz $6,.L_bn_sqr_words_return
+ nop
+
+.L_bn_sqr_words_tail:
+ .set reorder
+ ld $12,0($5)
+ dmultu ($12,$12)
+ subu $6,1
+ mflo ($13,$12,$12)
+ mfhi ($12,$12,$12)
+ sd $13,0($4)
+ sd $12,8($4)
+ beqz $6,.L_bn_sqr_words_return
+
+ ld $12,8($5)
+ dmultu ($12,$12)
+ subu $6,1
+ mflo ($13,$12,$12)
+ mfhi ($12,$12,$12)
+ sd $13,2*8($4)
+ sd $12,3*8($4)
+ beqz $6,.L_bn_sqr_words_return
+
+ ld $12,2*8($5)
+ dmultu ($12,$12)
+ mflo ($13,$12,$12)
+ mfhi ($12,$12,$12)
+ sd $13,4*8($4)
+ sd $12,5*8($4)
+
+.L_bn_sqr_words_return:
+ .set noreorder
+ jr $31
+ move $4,$2
+
+.end bn_sqr_words_internal
+
+.align 5
+.globl bn_add_words
+.ent bn_add_words
+bn_add_words:
+ .set noreorder
+ bgtz $7,bn_add_words_internal
+ move $2,$0
+ jr $31
+ move $4,$2
+.end bn_add_words
+
+.align 5
+.ent bn_add_words_internal
+bn_add_words_internal:
+ .set reorder
+ li $3,-4
+ and $1,$7,$3
+ beqz $1,.L_bn_add_words_tail
+
+.L_bn_add_words_loop:
+ ld $12,0($5)
+ ld $8,0($6)
+ subu $7,4
+ ld $13,8($5)
+ and $1,$7,$3
+ ld $14,2*8($5)
+ daddu $6,4*8
+ ld $15,3*8($5)
+ daddu $4,4*8
+ ld $9,-3*8($6)
+ daddu $5,4*8
+ ld $10,-2*8($6)
+ ld $11,-8($6)
+ daddu $8,$12
+ sltu $24,$8,$12
+ daddu $12,$8,$2
+ sltu $2,$12,$8
+ sd $12,-4*8($4)
+ daddu $2,$24
+
+ daddu $9,$13
+ sltu $25,$9,$13
+ daddu $13,$9,$2
+ sltu $2,$13,$9
+ sd $13,-3*8($4)
+ daddu $2,$25
+
+ daddu $10,$14
+ sltu $24,$10,$14
+ daddu $14,$10,$2
+ sltu $2,$14,$10
+ sd $14,-2*8($4)
+ daddu $2,$24
+
+ daddu $11,$15
+ sltu $25,$11,$15
+ daddu $15,$11,$2
+ sltu $2,$15,$11
+ sd $15,-8($4)
+
+ .set noreorder
+ bgtz $1,.L_bn_add_words_loop
+ daddu $2,$25
+
+ beqz $7,.L_bn_add_words_return
+ nop
+
+.L_bn_add_words_tail:
+ .set reorder
+ ld $12,0($5)
+ ld $8,0($6)
+ daddu $8,$12
+ subu $7,1
+ sltu $24,$8,$12
+ daddu $12,$8,$2
+ sltu $2,$12,$8
+ sd $12,0($4)
+ daddu $2,$24
+ beqz $7,.L_bn_add_words_return
+
+ ld $13,8($5)
+ ld $9,8($6)
+ daddu $9,$13
+ subu $7,1
+ sltu $25,$9,$13
+ daddu $13,$9,$2
+ sltu $2,$13,$9
+ sd $13,8($4)
+ daddu $2,$25
+ beqz $7,.L_bn_add_words_return
+
+ ld $14,2*8($5)
+ ld $10,2*8($6)
+ daddu $10,$14
+ sltu $24,$10,$14
+ daddu $14,$10,$2
+ sltu $2,$14,$10
+ sd $14,2*8($4)
+ daddu $2,$24
+
+.L_bn_add_words_return:
+ .set noreorder
+ jr $31
+ move $4,$2
+
+.end bn_add_words_internal
+
+.align 5
+.globl bn_sub_words
+.ent bn_sub_words
+bn_sub_words:
+ .set noreorder
+ bgtz $7,bn_sub_words_internal
+ move $2,$0
+ jr $31
+ move $4,$0
+.end bn_sub_words
+
+.align 5
+.ent bn_sub_words_internal
+bn_sub_words_internal:
+ .set reorder
+ li $3,-4
+ and $1,$7,$3
+ beqz $1,.L_bn_sub_words_tail
+
+.L_bn_sub_words_loop:
+ ld $12,0($5)
+ ld $8,0($6)
+ subu $7,4
+ ld $13,8($5)
+ and $1,$7,$3
+ ld $14,2*8($5)
+ daddu $6,4*8
+ ld $15,3*8($5)
+ daddu $4,4*8
+ ld $9,-3*8($6)
+ daddu $5,4*8
+ ld $10,-2*8($6)
+ ld $11,-8($6)
+ sltu $24,$12,$8
+ dsubu $8,$12,$8
+ dsubu $12,$8,$2
+ sgtu $2,$12,$8
+ sd $12,-4*8($4)
+ daddu $2,$24
+
+ sltu $25,$13,$9
+ dsubu $9,$13,$9
+ dsubu $13,$9,$2
+ sgtu $2,$13,$9
+ sd $13,-3*8($4)
+ daddu $2,$25
+
+
+ sltu $24,$14,$10
+ dsubu $10,$14,$10
+ dsubu $14,$10,$2
+ sgtu $2,$14,$10
+ sd $14,-2*8($4)
+ daddu $2,$24
+
+ sltu $25,$15,$11
+ dsubu $11,$15,$11
+ dsubu $15,$11,$2
+ sgtu $2,$15,$11
+ sd $15,-8($4)
+
+ .set noreorder
+ bgtz $1,.L_bn_sub_words_loop
+ daddu $2,$25
+
+ beqz $7,.L_bn_sub_words_return
+ nop
+
+.L_bn_sub_words_tail:
+ .set reorder
+ ld $12,0($5)
+ ld $8,0($6)
+ subu $7,1
+ sltu $24,$12,$8
+ dsubu $8,$12,$8
+ dsubu $12,$8,$2
+ sgtu $2,$12,$8
+ sd $12,0($4)
+ daddu $2,$24
+ beqz $7,.L_bn_sub_words_return
+
+ ld $13,8($5)
+ subu $7,1
+ ld $9,8($6)
+ sltu $25,$13,$9
+ dsubu $9,$13,$9
+ dsubu $13,$9,$2
+ sgtu $2,$13,$9
+ sd $13,8($4)
+ daddu $2,$25
+ beqz $7,.L_bn_sub_words_return
+
+ ld $14,2*8($5)
+ ld $10,2*8($6)
+ sltu $24,$14,$10
+ dsubu $10,$14,$10
+ dsubu $14,$10,$2
+ sgtu $2,$14,$10
+ sd $14,2*8($4)
+ daddu $2,$24
+
+.L_bn_sub_words_return:
+ .set noreorder
+ jr $31
+ move $4,$2
+.end bn_sub_words_internal
+
+#if 0
+/*
+ * The bn_div_3_words entry point is re-used for constant-time interface.
+ * Implementation is retained as historical reference.
+ */
+.align 5
+.globl bn_div_3_words
+.ent bn_div_3_words
+bn_div_3_words:
+ .set noreorder
+ move $7,$4 # we know that bn_div_words does not
+ # touch $7, $10, $11 and preserves $6
+ # so that we can save two arguments
+ # and return address in registers
+ # instead of stack:-)
+
+ ld $4,($7)
+ move $10,$5
+ bne $4,$6,bn_div_3_words_internal
+ ld $5,-8($7)
+ li $2,-1
+ jr $31
+ move $4,$2
+.end bn_div_3_words
+
+.align 5
+.ent bn_div_3_words_internal
+bn_div_3_words_internal:
+ .set reorder
+ move $11,$31
+ bal bn_div_words_internal
+ move $31,$11
+ dmultu ($10,$2)
+ ld $14,-2*8($7)
+ move $8,$0
+ mfhi ($13,$10,$2)
+ mflo ($12,$10,$2)
+ sltu $24,$13,$5
+.L_bn_div_3_words_inner_loop:
+ bnez $24,.L_bn_div_3_words_inner_loop_done
+ sgeu $1,$14,$12
+ seq $25,$13,$5
+ and $1,$25
+ sltu $15,$12,$10
+ daddu $5,$6
+ dsubu $13,$15
+ dsubu $12,$10
+ sltu $24,$13,$5
+ sltu $8,$5,$6
+ or $24,$8
+ .set noreorder
+ beqz $1,.L_bn_div_3_words_inner_loop
+ dsubu $2,1
+ daddu $2,1
+ .set reorder
+.L_bn_div_3_words_inner_loop_done:
+ .set noreorder
+ jr $31
+ move $4,$2
+.end bn_div_3_words_internal
+#endif
+
+.align 5
+.globl bn_div_words
+.ent bn_div_words
+bn_div_words:
+ .set noreorder
+ bnez $6,bn_div_words_internal
+ li $2,-1 # I would rather signal div-by-zero
+ # which can be done with 'break 7'
+ jr $31
+ move $4,$2
+.end bn_div_words
+
+.align 5
+.ent bn_div_words_internal
+bn_div_words_internal:
+ move $3,$0
+ bltz $6,.L_bn_div_words_body
+ move $25,$3
+ dsll $6,1
+ bgtz $6,.-4
+ addu $25,1
+
+ .set reorder
+ negu $13,$25
+ li $14,-1
+ dsll $14,$13
+ and $14,$4
+ dsrl $1,$5,$13
+ .set noreorder
+ beqz $14,.+12
+ nop
+ break 6 # signal overflow
+ .set reorder
+ dsll $4,$25
+ dsll $5,$25
+ or $4,$1
+.L_bn_div_words_body:
+ dsrl $3,$6,4*8 # bits
+ sgeu $1,$4,$6
+ .set noreorder
+ beqz $1,.+12
+ nop
+ dsubu $4,$6
+ .set reorder
+
+ li $8,-1
+ dsrl $9,$4,4*8 # bits
+ dsrl $8,4*8 # q=0xffffffff
+ beq $3,$9,.L_bn_div_words_skip_div1
+ ddivu ($4,$3)
+ mfqt ($8,$4,$3)
+.L_bn_div_words_skip_div1:
+ dmultu ($6,$8)
+ dsll $15,$4,4*8 # bits
+ dsrl $1,$5,4*8 # bits
+ or $15,$1
+ mflo ($12,$6,$8)
+ mfhi ($13,$6,$8)
+.L_bn_div_words_inner_loop1:
+ sltu $14,$15,$12
+ seq $24,$9,$13
+ sltu $1,$9,$13
+ and $14,$24
+ sltu $2,$12,$6
+ or $1,$14
+ .set noreorder
+ beqz $1,.L_bn_div_words_inner_loop1_done
+ dsubu $13,$2
+ dsubu $12,$6
+ b .L_bn_div_words_inner_loop1
+ dsubu $8,1
+ .set reorder
+.L_bn_div_words_inner_loop1_done:
+
+ dsll $5,4*8 # bits
+ dsubu $4,$15,$12
+ dsll $2,$8,4*8 # bits
+
+ li $8,-1
+ dsrl $9,$4,4*8 # bits
+ dsrl $8,4*8 # q=0xffffffff
+ beq $3,$9,.L_bn_div_words_skip_div2
+ ddivu ($4,$3)
+ mfqt ($8,$4,$3)
+.L_bn_div_words_skip_div2:
+ dmultu ($6,$8)
+ dsll $15,$4,4*8 # bits
+ dsrl $1,$5,4*8 # bits
+ or $15,$1
+ mflo ($12,$6,$8)
+ mfhi ($13,$6,$8)
+.L_bn_div_words_inner_loop2:
+ sltu $14,$15,$12
+ seq $24,$9,$13
+ sltu $1,$9,$13
+ and $14,$24
+ sltu $3,$12,$6
+ or $1,$14
+ .set noreorder
+ beqz $1,.L_bn_div_words_inner_loop2_done
+ dsubu $13,$3
+ dsubu $12,$6
+ b .L_bn_div_words_inner_loop2
+ dsubu $8,1
+ .set reorder
+.L_bn_div_words_inner_loop2_done:
+
+ dsubu $4,$15,$12
+ or $2,$8
+ dsrl $3,$4,$25 # $3 contains remainder if anybody wants it
+ dsrl $6,$25 # restore $6
+
+ .set noreorder
+ move $5,$3
+ jr $31
+ move $4,$2
+.end bn_div_words_internal
+
+.align 5
+.globl bn_mul_comba8
+.ent bn_mul_comba8
+bn_mul_comba8:
+ .set noreorder
+ .frame $29,6*8,$31
+ .mask 0x003f0000,-8
+ dsubu $29,6*8
+ sd $21,5*8($29)
+ sd $20,4*8($29)
+ sd $19,3*8($29)
+ sd $18,2*8($29)
+ sd $17,1*8($29)
+ sd $16,0*8($29)
+
+ .set reorder
+ ld $12,0($5) # If compiled with -mips3 option on
+ # R5000 box assembler barks on this
+ # 1ine with "should not have mult/div
+ # as last instruction in bb (R10K
+ # bug)" warning. If anybody out there
+ # has a clue about how to circumvent
+ # this do send me a note.
+ # <appro@fy.chalmers.se>
+
+ ld $8,0($6)
+ ld $13,8($5)
+ ld $14,2*8($5)
+ dmultu ($12,$8) # mul_add_c(a[0],b[0],c1,c2,c3);
+ ld $15,3*8($5)
+ ld $9,8($6)
+ ld $10,2*8($6)
+ ld $11,3*8($6)
+ mflo ($2,$12,$8)
+ mfhi ($3,$12,$8)
+
+ ld $16,4*8($5)
+ ld $18,5*8($5)
+ dmultu ($12,$9) # mul_add_c(a[0],b[1],c2,c3,c1);
+ ld $20,6*8($5)
+ ld $5,7*8($5)
+ ld $17,4*8($6)
+ ld $19,5*8($6)
+ mflo ($24,$12,$9)
+ mfhi ($25,$12,$9)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($13,$8) # mul_add_c(a[1],b[0],c2,c3,c1);
+ daddu $7,$25,$1
+ ld $21,6*8($6)
+ ld $6,7*8($6)
+ sd $2,0($4) # r[0]=c1;
+ mflo ($24,$13,$8)
+ mfhi ($25,$13,$8)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($14,$8) # mul_add_c(a[2],b[0],c3,c1,c2);
+ daddu $25,$1
+ daddu $7,$25
+ sltu $2,$7,$25
+ sd $3,8($4) # r[1]=c2;
+
+ mflo ($24,$14,$8)
+ mfhi ($25,$14,$8)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($13,$9) # mul_add_c(a[1],b[1],c3,c1,c2);
+ daddu $25,$1
+ daddu $2,$25
+ mflo ($24,$13,$9)
+ mfhi ($25,$13,$9)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($12,$10) # mul_add_c(a[0],b[2],c3,c1,c2);
+ daddu $25,$1
+ daddu $2,$25
+ sltu $3,$2,$25
+ mflo ($24,$12,$10)
+ mfhi ($25,$12,$10)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($12,$11) # mul_add_c(a[0],b[3],c1,c2,c3);
+ daddu $25,$1
+ daddu $2,$25
+ sltu $1,$2,$25
+ daddu $3,$1
+ sd $7,2*8($4) # r[2]=c3;
+
+ mflo ($24,$12,$11)
+ mfhi ($25,$12,$11)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($13,$10) # mul_add_c(a[1],b[2],c1,c2,c3);
+ daddu $25,$1
+ daddu $3,$25
+ sltu $7,$3,$25
+ mflo ($24,$13,$10)
+ mfhi ($25,$13,$10)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($14,$9) # mul_add_c(a[2],b[1],c1,c2,c3);
+ daddu $25,$1
+ daddu $3,$25
+ sltu $1,$3,$25
+ daddu $7,$1
+ mflo ($24,$14,$9)
+ mfhi ($25,$14,$9)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($15,$8) # mul_add_c(a[3],b[0],c1,c2,c3);
+ daddu $25,$1
+ daddu $3,$25
+ sltu $1,$3,$25
+ daddu $7,$1
+ mflo ($24,$15,$8)
+ mfhi ($25,$15,$8)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($16,$8) # mul_add_c(a[4],b[0],c2,c3,c1);
+ daddu $25,$1
+ daddu $3,$25
+ sltu $1,$3,$25
+ daddu $7,$1
+ sd $2,3*8($4) # r[3]=c1;
+
+ mflo ($24,$16,$8)
+ mfhi ($25,$16,$8)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($15,$9) # mul_add_c(a[3],b[1],c2,c3,c1);
+ daddu $25,$1
+ daddu $7,$25
+ sltu $2,$7,$25
+ mflo ($24,$15,$9)
+ mfhi ($25,$15,$9)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($14,$10) # mul_add_c(a[2],b[2],c2,c3,c1);
+ daddu $25,$1
+ daddu $7,$25
+ sltu $1,$7,$25
+ daddu $2,$1
+ mflo ($24,$14,$10)
+ mfhi ($25,$14,$10)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($13,$11) # mul_add_c(a[1],b[3],c2,c3,c1);
+ daddu $25,$1
+ daddu $7,$25
+ sltu $1,$7,$25
+ daddu $2,$1
+ mflo ($24,$13,$11)
+ mfhi ($25,$13,$11)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($12,$17) # mul_add_c(a[0],b[4],c2,c3,c1);
+ daddu $25,$1
+ daddu $7,$25
+ sltu $1,$7,$25
+ daddu $2,$1
+ mflo ($24,$12,$17)
+ mfhi ($25,$12,$17)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($12,$19) # mul_add_c(a[0],b[5],c3,c1,c2);
+ daddu $25,$1
+ daddu $7,$25
+ sltu $1,$7,$25
+ daddu $2,$1
+ sd $3,4*8($4) # r[4]=c2;
+
+ mflo ($24,$12,$19)
+ mfhi ($25,$12,$19)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($13,$17) # mul_add_c(a[1],b[4],c3,c1,c2);
+ daddu $25,$1
+ daddu $2,$25
+ sltu $3,$2,$25
+ mflo ($24,$13,$17)
+ mfhi ($25,$13,$17)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($14,$11) # mul_add_c(a[2],b[3],c3,c1,c2);
+ daddu $25,$1
+ daddu $2,$25
+ sltu $1,$2,$25
+ daddu $3,$1
+ mflo ($24,$14,$11)
+ mfhi ($25,$14,$11)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($15,$10) # mul_add_c(a[3],b[2],c3,c1,c2);
+ daddu $25,$1
+ daddu $2,$25
+ sltu $1,$2,$25
+ daddu $3,$1
+ mflo ($24,$15,$10)
+ mfhi ($25,$15,$10)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($16,$9) # mul_add_c(a[4],b[1],c3,c1,c2);
+ daddu $25,$1
+ daddu $2,$25
+ sltu $1,$2,$25
+ daddu $3,$1
+ mflo ($24,$16,$9)
+ mfhi ($25,$16,$9)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($18,$8) # mul_add_c(a[5],b[0],c3,c1,c2);
+ daddu $25,$1
+ daddu $2,$25
+ sltu $1,$2,$25
+ daddu $3,$1
+ mflo ($24,$18,$8)
+ mfhi ($25,$18,$8)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($20,$8) # mul_add_c(a[6],b[0],c1,c2,c3);
+ daddu $25,$1
+ daddu $2,$25
+ sltu $1,$2,$25
+ daddu $3,$1
+ sd $7,5*8($4) # r[5]=c3;
+
+ mflo ($24,$20,$8)
+ mfhi ($25,$20,$8)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($18,$9) # mul_add_c(a[5],b[1],c1,c2,c3);
+ daddu $25,$1
+ daddu $3,$25
+ sltu $7,$3,$25
+ mflo ($24,$18,$9)
+ mfhi ($25,$18,$9)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($16,$10) # mul_add_c(a[4],b[2],c1,c2,c3);
+ daddu $25,$1
+ daddu $3,$25
+ sltu $1,$3,$25
+ daddu $7,$1
+ mflo ($24,$16,$10)
+ mfhi ($25,$16,$10)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($15,$11) # mul_add_c(a[3],b[3],c1,c2,c3);
+ daddu $25,$1
+ daddu $3,$25
+ sltu $1,$3,$25
+ daddu $7,$1
+ mflo ($24,$15,$11)
+ mfhi ($25,$15,$11)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($14,$17) # mul_add_c(a[2],b[4],c1,c2,c3);
+ daddu $25,$1
+ daddu $3,$25
+ sltu $1,$3,$25
+ daddu $7,$1
+ mflo ($24,$14,$17)
+ mfhi ($25,$14,$17)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($13,$19) # mul_add_c(a[1],b[5],c1,c2,c3);
+ daddu $25,$1
+ daddu $3,$25
+ sltu $1,$3,$25
+ daddu $7,$1
+ mflo ($24,$13,$19)
+ mfhi ($25,$13,$19)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($12,$21) # mul_add_c(a[0],b[6],c1,c2,c3);
+ daddu $25,$1
+ daddu $3,$25
+ sltu $1,$3,$25
+ daddu $7,$1
+ mflo ($24,$12,$21)
+ mfhi ($25,$12,$21)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($12,$6) # mul_add_c(a[0],b[7],c2,c3,c1);
+ daddu $25,$1
+ daddu $3,$25
+ sltu $1,$3,$25
+ daddu $7,$1
+ sd $2,6*8($4) # r[6]=c1;
+
+ mflo ($24,$12,$6)
+ mfhi ($25,$12,$6)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($13,$21) # mul_add_c(a[1],b[6],c2,c3,c1);
+ daddu $25,$1
+ daddu $7,$25
+ sltu $2,$7,$25
+ mflo ($24,$13,$21)
+ mfhi ($25,$13,$21)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($14,$19) # mul_add_c(a[2],b[5],c2,c3,c1);
+ daddu $25,$1
+ daddu $7,$25
+ sltu $1,$7,$25
+ daddu $2,$1
+ mflo ($24,$14,$19)
+ mfhi ($25,$14,$19)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($15,$17) # mul_add_c(a[3],b[4],c2,c3,c1);
+ daddu $25,$1
+ daddu $7,$25
+ sltu $1,$7,$25
+ daddu $2,$1
+ mflo ($24,$15,$17)
+ mfhi ($25,$15,$17)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($16,$11) # mul_add_c(a[4],b[3],c2,c3,c1);
+ daddu $25,$1
+ daddu $7,$25
+ sltu $1,$7,$25
+ daddu $2,$1
+ mflo ($24,$16,$11)
+ mfhi ($25,$16,$11)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($18,$10) # mul_add_c(a[5],b[2],c2,c3,c1);
+ daddu $25,$1
+ daddu $7,$25
+ sltu $1,$7,$25
+ daddu $2,$1
+ mflo ($24,$18,$10)
+ mfhi ($25,$18,$10)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($20,$9) # mul_add_c(a[6],b[1],c2,c3,c1);
+ daddu $25,$1
+ daddu $7,$25
+ sltu $1,$7,$25
+ daddu $2,$1
+ mflo ($24,$20,$9)
+ mfhi ($25,$20,$9)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($5,$8) # mul_add_c(a[7],b[0],c2,c3,c1);
+ daddu $25,$1
+ daddu $7,$25
+ sltu $1,$7,$25
+ daddu $2,$1
+ mflo ($24,$5,$8)
+ mfhi ($25,$5,$8)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($5,$9) # mul_add_c(a[7],b[1],c3,c1,c2);
+ daddu $25,$1
+ daddu $7,$25
+ sltu $1,$7,$25
+ daddu $2,$1
+ sd $3,7*8($4) # r[7]=c2;
+
+ mflo ($24,$5,$9)
+ mfhi ($25,$5,$9)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($20,$10) # mul_add_c(a[6],b[2],c3,c1,c2);
+ daddu $25,$1
+ daddu $2,$25
+ sltu $3,$2,$25
+ mflo ($24,$20,$10)
+ mfhi ($25,$20,$10)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($18,$11) # mul_add_c(a[5],b[3],c3,c1,c2);
+ daddu $25,$1
+ daddu $2,$25
+ sltu $1,$2,$25
+ daddu $3,$1
+ mflo ($24,$18,$11)
+ mfhi ($25,$18,$11)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($16,$17) # mul_add_c(a[4],b[4],c3,c1,c2);
+ daddu $25,$1
+ daddu $2,$25
+ sltu $1,$2,$25
+ daddu $3,$1
+ mflo ($24,$16,$17)
+ mfhi ($25,$16,$17)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($15,$19) # mul_add_c(a[3],b[5],c3,c1,c2);
+ daddu $25,$1
+ daddu $2,$25
+ sltu $1,$2,$25
+ daddu $3,$1
+ mflo ($24,$15,$19)
+ mfhi ($25,$15,$19)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($14,$21) # mul_add_c(a[2],b[6],c3,c1,c2);
+ daddu $25,$1
+ daddu $2,$25
+ sltu $1,$2,$25
+ daddu $3,$1
+ mflo ($24,$14,$21)
+ mfhi ($25,$14,$21)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($13,$6) # mul_add_c(a[1],b[7],c3,c1,c2);
+ daddu $25,$1
+ daddu $2,$25
+ sltu $1,$2,$25
+ daddu $3,$1
+ mflo ($24,$13,$6)
+ mfhi ($25,$13,$6)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($14,$6) # mul_add_c(a[2],b[7],c1,c2,c3);
+ daddu $25,$1
+ daddu $2,$25
+ sltu $1,$2,$25
+ daddu $3,$1
+ sd $7,8*8($4) # r[8]=c3;
+
+ mflo ($24,$14,$6)
+ mfhi ($25,$14,$6)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($15,$21) # mul_add_c(a[3],b[6],c1,c2,c3);
+ daddu $25,$1
+ daddu $3,$25
+ sltu $7,$3,$25
+ mflo ($24,$15,$21)
+ mfhi ($25,$15,$21)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($16,$19) # mul_add_c(a[4],b[5],c1,c2,c3);
+ daddu $25,$1
+ daddu $3,$25
+ sltu $1,$3,$25
+ daddu $7,$1
+ mflo ($24,$16,$19)
+ mfhi ($25,$16,$19)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($18,$17) # mul_add_c(a[5],b[4],c1,c2,c3);
+ daddu $25,$1
+ daddu $3,$25
+ sltu $1,$3,$25
+ daddu $7,$1
+ mflo ($24,$18,$17)
+ mfhi ($25,$18,$17)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($20,$11) # mul_add_c(a[6],b[3],c1,c2,c3);
+ daddu $25,$1
+ daddu $3,$25
+ sltu $1,$3,$25
+ daddu $7,$1
+ mflo ($24,$20,$11)
+ mfhi ($25,$20,$11)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($5,$10) # mul_add_c(a[7],b[2],c1,c2,c3);
+ daddu $25,$1
+ daddu $3,$25
+ sltu $1,$3,$25
+ daddu $7,$1
+ mflo ($24,$5,$10)
+ mfhi ($25,$5,$10)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($5,$11) # mul_add_c(a[7],b[3],c2,c3,c1);
+ daddu $25,$1
+ daddu $3,$25
+ sltu $1,$3,$25
+ daddu $7,$1
+ sd $2,9*8($4) # r[9]=c1;
+
+ mflo ($24,$5,$11)
+ mfhi ($25,$5,$11)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($20,$17) # mul_add_c(a[6],b[4],c2,c3,c1);
+ daddu $25,$1
+ daddu $7,$25
+ sltu $2,$7,$25
+ mflo ($24,$20,$17)
+ mfhi ($25,$20,$17)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($18,$19) # mul_add_c(a[5],b[5],c2,c3,c1);
+ daddu $25,$1
+ daddu $7,$25
+ sltu $1,$7,$25
+ daddu $2,$1
+ mflo ($24,$18,$19)
+ mfhi ($25,$18,$19)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($16,$21) # mul_add_c(a[4],b[6],c2,c3,c1);
+ daddu $25,$1
+ daddu $7,$25
+ sltu $1,$7,$25
+ daddu $2,$1
+ mflo ($24,$16,$21)
+ mfhi ($25,$16,$21)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($15,$6) # mul_add_c(a[3],b[7],c2,c3,c1);
+ daddu $25,$1
+ daddu $7,$25
+ sltu $1,$7,$25
+ daddu $2,$1
+ mflo ($24,$15,$6)
+ mfhi ($25,$15,$6)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($16,$6) # mul_add_c(a[4],b[7],c3,c1,c2);
+ daddu $25,$1
+ daddu $7,$25
+ sltu $1,$7,$25
+ daddu $2,$1
+ sd $3,10*8($4) # r[10]=c2;
+
+ mflo ($24,$16,$6)
+ mfhi ($25,$16,$6)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($18,$21) # mul_add_c(a[5],b[6],c3,c1,c2);
+ daddu $25,$1
+ daddu $2,$25
+ sltu $3,$2,$25
+ mflo ($24,$18,$21)
+ mfhi ($25,$18,$21)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($20,$19) # mul_add_c(a[6],b[5],c3,c1,c2);
+ daddu $25,$1
+ daddu $2,$25
+ sltu $1,$2,$25
+ daddu $3,$1
+ mflo ($24,$20,$19)
+ mfhi ($25,$20,$19)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($5,$17) # mul_add_c(a[7],b[4],c3,c1,c2);
+ daddu $25,$1
+ daddu $2,$25
+ sltu $1,$2,$25
+ daddu $3,$1
+ mflo ($24,$5,$17)
+ mfhi ($25,$5,$17)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($5,$19) # mul_add_c(a[7],b[5],c1,c2,c3);
+ daddu $25,$1
+ daddu $2,$25
+ sltu $1,$2,$25
+ daddu $3,$1
+ sd $7,11*8($4) # r[11]=c3;
+
+ mflo ($24,$5,$19)
+ mfhi ($25,$5,$19)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($20,$21) # mul_add_c(a[6],b[6],c1,c2,c3);
+ daddu $25,$1
+ daddu $3,$25
+ sltu $7,$3,$25
+ mflo ($24,$20,$21)
+ mfhi ($25,$20,$21)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($18,$6) # mul_add_c(a[5],b[7],c1,c2,c3);
+ daddu $25,$1
+ daddu $3,$25
+ sltu $1,$3,$25
+ daddu $7,$1
+ mflo ($24,$18,$6)
+ mfhi ($25,$18,$6)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($20,$6) # mul_add_c(a[6],b[7],c2,c3,c1);
+ daddu $25,$1
+ daddu $3,$25
+ sltu $1,$3,$25
+ daddu $7,$1
+ sd $2,12*8($4) # r[12]=c1;
+
+ mflo ($24,$20,$6)
+ mfhi ($25,$20,$6)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($5,$21) # mul_add_c(a[7],b[6],c2,c3,c1);
+ daddu $25,$1
+ daddu $7,$25
+ sltu $2,$7,$25
+ mflo ($24,$5,$21)
+ mfhi ($25,$5,$21)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($5,$6) # mul_add_c(a[7],b[7],c3,c1,c2);
+ daddu $25,$1
+ daddu $7,$25
+ sltu $1,$7,$25
+ daddu $2,$1
+ sd $3,13*8($4) # r[13]=c2;
+
+ mflo ($24,$5,$6)
+ mfhi ($25,$5,$6)
+ daddu $7,$24
+ sltu $1,$7,$24
+ daddu $25,$1
+ daddu $2,$25
+ sd $7,14*8($4) # r[14]=c3;
+ sd $2,15*8($4) # r[15]=c1;
+
+ .set noreorder
+ ld $21,5*8($29)
+ ld $20,4*8($29)
+ ld $19,3*8($29)
+ ld $18,2*8($29)
+ ld $17,1*8($29)
+ ld $16,0*8($29)
+ jr $31
+ daddu $29,6*8
+.end bn_mul_comba8
+
+.align 5
+.globl bn_mul_comba4
+.ent bn_mul_comba4
+bn_mul_comba4:
+ .set reorder
+ ld $12,0($5)
+ ld $8,0($6)
+ ld $13,8($5)
+ ld $14,2*8($5)
+ dmultu ($12,$8) # mul_add_c(a[0],b[0],c1,c2,c3);
+ ld $15,3*8($5)
+ ld $9,8($6)
+ ld $10,2*8($6)
+ ld $11,3*8($6)
+ mflo ($2,$12,$8)
+ mfhi ($3,$12,$8)
+ sd $2,0($4)
+
+ dmultu ($12,$9) # mul_add_c(a[0],b[1],c2,c3,c1);
+ mflo ($24,$12,$9)
+ mfhi ($25,$12,$9)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($13,$8) # mul_add_c(a[1],b[0],c2,c3,c1);
+ daddu $7,$25,$1
+ mflo ($24,$13,$8)
+ mfhi ($25,$13,$8)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($14,$8) # mul_add_c(a[2],b[0],c3,c1,c2);
+ daddu $25,$1
+ daddu $7,$25
+ sltu $2,$7,$25
+ sd $3,8($4)
+
+ mflo ($24,$14,$8)
+ mfhi ($25,$14,$8)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($13,$9) # mul_add_c(a[1],b[1],c3,c1,c2);
+ daddu $25,$1
+ daddu $2,$25
+ mflo ($24,$13,$9)
+ mfhi ($25,$13,$9)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($12,$10) # mul_add_c(a[0],b[2],c3,c1,c2);
+ daddu $25,$1
+ daddu $2,$25
+ sltu $3,$2,$25
+ mflo ($24,$12,$10)
+ mfhi ($25,$12,$10)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($12,$11) # mul_add_c(a[0],b[3],c1,c2,c3);
+ daddu $25,$1
+ daddu $2,$25
+ sltu $1,$2,$25
+ daddu $3,$1
+ sd $7,2*8($4)
+
+ mflo ($24,$12,$11)
+ mfhi ($25,$12,$11)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($13,$10) # mul_add_c(a[1],b[2],c1,c2,c3);
+ daddu $25,$1
+ daddu $3,$25
+ sltu $7,$3,$25
+ mflo ($24,$13,$10)
+ mfhi ($25,$13,$10)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($14,$9) # mul_add_c(a[2],b[1],c1,c2,c3);
+ daddu $25,$1
+ daddu $3,$25
+ sltu $1,$3,$25
+ daddu $7,$1
+ mflo ($24,$14,$9)
+ mfhi ($25,$14,$9)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($15,$8) # mul_add_c(a[3],b[0],c1,c2,c3);
+ daddu $25,$1
+ daddu $3,$25
+ sltu $1,$3,$25
+ daddu $7,$1
+ mflo ($24,$15,$8)
+ mfhi ($25,$15,$8)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($15,$9) # mul_add_c(a[3],b[1],c2,c3,c1);
+ daddu $25,$1
+ daddu $3,$25
+ sltu $1,$3,$25
+ daddu $7,$1
+ sd $2,3*8($4)
+
+ mflo ($24,$15,$9)
+ mfhi ($25,$15,$9)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($14,$10) # mul_add_c(a[2],b[2],c2,c3,c1);
+ daddu $25,$1
+ daddu $7,$25
+ sltu $2,$7,$25
+ mflo ($24,$14,$10)
+ mfhi ($25,$14,$10)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($13,$11) # mul_add_c(a[1],b[3],c2,c3,c1);
+ daddu $25,$1
+ daddu $7,$25
+ sltu $1,$7,$25
+ daddu $2,$1
+ mflo ($24,$13,$11)
+ mfhi ($25,$13,$11)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($14,$11) # mul_add_c(a[2],b[3],c3,c1,c2);
+ daddu $25,$1
+ daddu $7,$25
+ sltu $1,$7,$25
+ daddu $2,$1
+ sd $3,4*8($4)
+
+ mflo ($24,$14,$11)
+ mfhi ($25,$14,$11)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($15,$10) # mul_add_c(a[3],b[2],c3,c1,c2);
+ daddu $25,$1
+ daddu $2,$25
+ sltu $3,$2,$25
+ mflo ($24,$15,$10)
+ mfhi ($25,$15,$10)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($15,$11) # mul_add_c(a[3],b[3],c1,c2,c3);
+ daddu $25,$1
+ daddu $2,$25
+ sltu $1,$2,$25
+ daddu $3,$1
+ sd $7,5*8($4)
+
+ mflo ($24,$15,$11)
+ mfhi ($25,$15,$11)
+ daddu $2,$24
+ sltu $1,$2,$24
+ daddu $25,$1
+ daddu $3,$25
+ sd $2,6*8($4)
+ sd $3,7*8($4)
+
+ .set noreorder
+ jr $31
+ nop
+.end bn_mul_comba4
+
+.align 5
+.globl bn_sqr_comba8
+.ent bn_sqr_comba8
+bn_sqr_comba8:
+ .set reorder
+ ld $12,0($5)
+ ld $13,8($5)
+ ld $14,2*8($5)
+ ld $15,3*8($5)
+
+ dmultu ($12,$12) # mul_add_c(a[0],b[0],c1,c2,c3);
+ ld $8,4*8($5)
+ ld $9,5*8($5)
+ ld $10,6*8($5)
+ ld $11,7*8($5)
+ mflo ($2,$12,$12)
+ mfhi ($3,$12,$12)
+ sd $2,0($4)
+
+ dmultu ($12,$13) # mul_add_c2(a[0],b[1],c2,c3,c1);
+ mflo ($24,$12,$13)
+ mfhi ($25,$12,$13)
+ slt $2,$25,$0
+ dsll $25,1
+ dmultu ($14,$12) # mul_add_c2(a[2],b[0],c3,c1,c2);
+ slt $6,$24,$0
+ daddu $25,$6
+ dsll $24,1
+ daddu $3,$24
+ sltu $1,$3,$24
+ daddu $7,$25,$1
+ sd $3,8($4)
+ mflo ($24,$14,$12)
+ mfhi ($25,$14,$12)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($13,$13) # forward multiplication
+ daddu $7,$24
+ daddu $1,$25
+ sltu $24,$7,$24
+ daddu $2,$1
+ daddu $25,$24
+ sltu $3,$2,$1
+ daddu $2,$25
+ sltu $25,$2,$25
+ daddu $3,$25
+ mflo ($24,$13,$13)
+ mfhi ($25,$13,$13)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($12,$15) # mul_add_c2(a[0],b[3],c1,c2,c3);
+ daddu $25,$1
+ daddu $2,$25
+ sltu $1,$2,$25
+ daddu $3,$1
+ sd $7,2*8($4)
+ mflo ($24,$12,$15)
+ mfhi ($25,$12,$15)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($13,$14) # forward multiplication
+ daddu $2,$24
+ daddu $1,$25
+ sltu $24,$2,$24
+ daddu $3,$1
+ daddu $25,$24
+ sltu $7,$3,$1
+ daddu $3,$25
+ sltu $25,$3,$25
+ daddu $7,$25
+ mflo ($24,$13,$14)
+ mfhi ($25,$13,$14)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($8,$12) # forward multiplication
+ daddu $2,$24
+ daddu $1,$25
+ sltu $24,$2,$24
+ daddu $3,$1
+ daddu $25,$24
+ sltu $1,$3,$1
+ daddu $3,$25
+ daddu $7,$1
+ sltu $25,$3,$25
+ daddu $7,$25
+ mflo ($24,$8,$12)
+ mfhi ($25,$8,$12)
+ sd $2,3*8($4)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($15,$13) # forward multiplication
+ daddu $3,$24
+ daddu $1,$25
+ sltu $24,$3,$24
+ daddu $7,$1
+ daddu $25,$24
+ sltu $2,$7,$1
+ daddu $7,$25
+ sltu $25,$7,$25
+ daddu $2,$25
+ mflo ($24,$15,$13)
+ mfhi ($25,$15,$13)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($14,$14) # forward multiplication
+ daddu $3,$24
+ daddu $1,$25
+ sltu $24,$3,$24
+ daddu $7,$1
+ daddu $25,$24
+ sltu $1,$7,$1
+ daddu $7,$25
+ daddu $2,$1
+ sltu $25,$7,$25
+ daddu $2,$25
+ mflo ($24,$14,$14)
+ mfhi ($25,$14,$14)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($12,$9) # mul_add_c2(a[0],b[5],c3,c1,c2);
+ daddu $25,$1
+ daddu $7,$25
+ sltu $1,$7,$25
+ daddu $2,$1
+ sd $3,4*8($4)
+ mflo ($24,$12,$9)
+ mfhi ($25,$12,$9)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($13,$8) # forward multiplication
+ daddu $7,$24
+ daddu $1,$25
+ sltu $24,$7,$24
+ daddu $2,$1
+ daddu $25,$24
+ sltu $3,$2,$1
+ daddu $2,$25
+ sltu $25,$2,$25
+ daddu $3,$25
+ mflo ($24,$13,$8)
+ mfhi ($25,$13,$8)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($14,$15) # forward multiplication
+ daddu $7,$24
+ daddu $1,$25
+ sltu $24,$7,$24
+ daddu $2,$1
+ daddu $25,$24
+ sltu $1,$2,$1
+ daddu $2,$25
+ daddu $3,$1
+ sltu $25,$2,$25
+ daddu $3,$25
+ mflo ($24,$14,$15)
+ mfhi ($25,$14,$15)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($10,$12) # forward multiplication
+ daddu $7,$24
+ daddu $1,$25
+ sltu $24,$7,$24
+ daddu $2,$1
+ daddu $25,$24
+ sltu $1,$2,$1
+ daddu $2,$25
+ daddu $3,$1
+ sltu $25,$2,$25
+ daddu $3,$25
+ mflo ($24,$10,$12)
+ mfhi ($25,$10,$12)
+ sd $7,5*8($4)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($9,$13) # forward multiplication
+ daddu $2,$24
+ daddu $1,$25
+ sltu $24,$2,$24
+ daddu $3,$1
+ daddu $25,$24
+ sltu $7,$3,$1
+ daddu $3,$25
+ sltu $25,$3,$25
+ daddu $7,$25
+ mflo ($24,$9,$13)
+ mfhi ($25,$9,$13)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($8,$14) # forward multiplication
+ daddu $2,$24
+ daddu $1,$25
+ sltu $24,$2,$24
+ daddu $3,$1
+ daddu $25,$24
+ sltu $1,$3,$1
+ daddu $3,$25
+ daddu $7,$1
+ sltu $25,$3,$25
+ daddu $7,$25
+ mflo ($24,$8,$14)
+ mfhi ($25,$8,$14)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($15,$15) # forward multiplication
+ daddu $2,$24
+ daddu $1,$25
+ sltu $24,$2,$24
+ daddu $3,$1
+ daddu $25,$24
+ sltu $1,$3,$1
+ daddu $3,$25
+ daddu $7,$1
+ sltu $25,$3,$25
+ daddu $7,$25
+ mflo ($24,$15,$15)
+ mfhi ($25,$15,$15)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($12,$11) # mul_add_c2(a[0],b[7],c2,c3,c1);
+ daddu $25,$1
+ daddu $3,$25
+ sltu $1,$3,$25
+ daddu $7,$1
+ sd $2,6*8($4)
+ mflo ($24,$12,$11)
+ mfhi ($25,$12,$11)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($13,$10) # forward multiplication
+ daddu $3,$24
+ daddu $1,$25
+ sltu $24,$3,$24
+ daddu $7,$1
+ daddu $25,$24
+ sltu $2,$7,$1
+ daddu $7,$25
+ sltu $25,$7,$25
+ daddu $2,$25
+ mflo ($24,$13,$10)
+ mfhi ($25,$13,$10)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($14,$9) # forward multiplication
+ daddu $3,$24
+ daddu $1,$25
+ sltu $24,$3,$24
+ daddu $7,$1
+ daddu $25,$24
+ sltu $1,$7,$1
+ daddu $7,$25
+ daddu $2,$1
+ sltu $25,$7,$25
+ daddu $2,$25
+ mflo ($24,$14,$9)
+ mfhi ($25,$14,$9)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($15,$8) # forward multiplication
+ daddu $3,$24
+ daddu $1,$25
+ sltu $24,$3,$24
+ daddu $7,$1
+ daddu $25,$24
+ sltu $1,$7,$1
+ daddu $7,$25
+ daddu $2,$1
+ sltu $25,$7,$25
+ daddu $2,$25
+ mflo ($24,$15,$8)
+ mfhi ($25,$15,$8)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($11,$13) # forward multiplication
+ daddu $3,$24
+ daddu $1,$25
+ sltu $24,$3,$24
+ daddu $7,$1
+ daddu $25,$24
+ sltu $1,$7,$1
+ daddu $7,$25
+ daddu $2,$1
+ sltu $25,$7,$25
+ daddu $2,$25
+ mflo ($24,$11,$13)
+ mfhi ($25,$11,$13)
+ sd $3,7*8($4)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($10,$14) # forward multiplication
+ daddu $7,$24
+ daddu $1,$25
+ sltu $24,$7,$24
+ daddu $2,$1
+ daddu $25,$24
+ sltu $3,$2,$1
+ daddu $2,$25
+ sltu $25,$2,$25
+ daddu $3,$25
+ mflo ($24,$10,$14)
+ mfhi ($25,$10,$14)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($9,$15) # forward multiplication
+ daddu $7,$24
+ daddu $1,$25
+ sltu $24,$7,$24
+ daddu $2,$1
+ daddu $25,$24
+ sltu $1,$2,$1
+ daddu $2,$25
+ daddu $3,$1
+ sltu $25,$2,$25
+ daddu $3,$25
+ mflo ($24,$9,$15)
+ mfhi ($25,$9,$15)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($8,$8) # forward multiplication
+ daddu $7,$24
+ daddu $1,$25
+ sltu $24,$7,$24
+ daddu $2,$1
+ daddu $25,$24
+ sltu $1,$2,$1
+ daddu $2,$25
+ daddu $3,$1
+ sltu $25,$2,$25
+ daddu $3,$25
+ mflo ($24,$8,$8)
+ mfhi ($25,$8,$8)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($14,$11) # mul_add_c2(a[2],b[7],c1,c2,c3);
+ daddu $25,$1
+ daddu $2,$25
+ sltu $1,$2,$25
+ daddu $3,$1
+ sd $7,8*8($4)
+ mflo ($24,$14,$11)
+ mfhi ($25,$14,$11)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($15,$10) # forward multiplication
+ daddu $2,$24
+ daddu $1,$25
+ sltu $24,$2,$24
+ daddu $3,$1
+ daddu $25,$24
+ sltu $7,$3,$1
+ daddu $3,$25
+ sltu $25,$3,$25
+ daddu $7,$25
+ mflo ($24,$15,$10)
+ mfhi ($25,$15,$10)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($8,$9) # forward multiplication
+ daddu $2,$24
+ daddu $1,$25
+ sltu $24,$2,$24
+ daddu $3,$1
+ daddu $25,$24
+ sltu $1,$3,$1
+ daddu $3,$25
+ daddu $7,$1
+ sltu $25,$3,$25
+ daddu $7,$25
+ mflo ($24,$8,$9)
+ mfhi ($25,$8,$9)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($11,$15) # forward multiplication
+ daddu $2,$24
+ daddu $1,$25
+ sltu $24,$2,$24
+ daddu $3,$1
+ daddu $25,$24
+ sltu $1,$3,$1
+ daddu $3,$25
+ daddu $7,$1
+ sltu $25,$3,$25
+ daddu $7,$25
+ mflo ($24,$11,$15)
+ mfhi ($25,$11,$15)
+ sd $2,9*8($4)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($10,$8) # forward multiplication
+ daddu $3,$24
+ daddu $1,$25
+ sltu $24,$3,$24
+ daddu $7,$1
+ daddu $25,$24
+ sltu $2,$7,$1
+ daddu $7,$25
+ sltu $25,$7,$25
+ daddu $2,$25
+ mflo ($24,$10,$8)
+ mfhi ($25,$10,$8)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($9,$9) # forward multiplication
+ daddu $3,$24
+ daddu $1,$25
+ sltu $24,$3,$24
+ daddu $7,$1
+ daddu $25,$24
+ sltu $1,$7,$1
+ daddu $7,$25
+ daddu $2,$1
+ sltu $25,$7,$25
+ daddu $2,$25
+ mflo ($24,$9,$9)
+ mfhi ($25,$9,$9)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($8,$11) # mul_add_c2(a[4],b[7],c3,c1,c2);
+ daddu $25,$1
+ daddu $7,$25
+ sltu $1,$7,$25
+ daddu $2,$1
+ sd $3,10*8($4)
+ mflo ($24,$8,$11)
+ mfhi ($25,$8,$11)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($9,$10) # forward multiplication
+ daddu $7,$24
+ daddu $1,$25
+ sltu $24,$7,$24
+ daddu $2,$1
+ daddu $25,$24
+ sltu $3,$2,$1
+ daddu $2,$25
+ sltu $25,$2,$25
+ daddu $3,$25
+ mflo ($24,$9,$10)
+ mfhi ($25,$9,$10)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($11,$9) # forward multiplication
+ daddu $7,$24
+ daddu $1,$25
+ sltu $24,$7,$24
+ daddu $2,$1
+ daddu $25,$24
+ sltu $1,$2,$1
+ daddu $2,$25
+ daddu $3,$1
+ sltu $25,$2,$25
+ daddu $3,$25
+ mflo ($24,$11,$9)
+ mfhi ($25,$11,$9)
+ sd $7,11*8($4)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($10,$10) # forward multiplication
+ daddu $2,$24
+ daddu $1,$25
+ sltu $24,$2,$24
+ daddu $3,$1
+ daddu $25,$24
+ sltu $7,$3,$1
+ daddu $3,$25
+ sltu $25,$3,$25
+ daddu $7,$25
+ mflo ($24,$10,$10)
+ mfhi ($25,$10,$10)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($10,$11) # mul_add_c2(a[6],b[7],c2,c3,c1);
+ daddu $25,$1
+ daddu $3,$25
+ sltu $1,$3,$25
+ daddu $7,$1
+ sd $2,12*8($4)
+ mflo ($24,$10,$11)
+ mfhi ($25,$10,$11)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($11,$11) # forward multiplication
+ daddu $3,$24
+ daddu $1,$25
+ sltu $24,$3,$24
+ daddu $7,$1
+ daddu $25,$24
+ sltu $2,$7,$1
+ daddu $7,$25
+ sltu $25,$7,$25
+ daddu $2,$25
+ mflo ($24,$11,$11)
+ mfhi ($25,$11,$11)
+ sd $3,13*8($4)
+
+ daddu $7,$24
+ sltu $1,$7,$24
+ daddu $25,$1
+ daddu $2,$25
+ sd $7,14*8($4)
+ sd $2,15*8($4)
+
+ .set noreorder
+ jr $31
+ nop
+.end bn_sqr_comba8
+
+.align 5
+.globl bn_sqr_comba4
+.ent bn_sqr_comba4
+bn_sqr_comba4:
+ .set reorder
+ ld $12,0($5)
+ ld $13,8($5)
+ dmultu ($12,$12) # mul_add_c(a[0],b[0],c1,c2,c3);
+ ld $14,2*8($5)
+ ld $15,3*8($5)
+ mflo ($2,$12,$12)
+ mfhi ($3,$12,$12)
+ sd $2,0($4)
+
+ dmultu ($12,$13) # mul_add_c2(a[0],b[1],c2,c3,c1);
+ mflo ($24,$12,$13)
+ mfhi ($25,$12,$13)
+ slt $2,$25,$0
+ dsll $25,1
+ dmultu ($14,$12) # mul_add_c2(a[2],b[0],c3,c1,c2);
+ slt $6,$24,$0
+ daddu $25,$6
+ dsll $24,1
+ daddu $3,$24
+ sltu $1,$3,$24
+ daddu $7,$25,$1
+ sd $3,8($4)
+ mflo ($24,$14,$12)
+ mfhi ($25,$14,$12)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($13,$13) # forward multiplication
+ daddu $7,$24
+ daddu $1,$25
+ sltu $24,$7,$24
+ daddu $2,$1
+ daddu $25,$24
+ sltu $3,$2,$1
+ daddu $2,$25
+ sltu $25,$2,$25
+ daddu $3,$25
+ mflo ($24,$13,$13)
+ mfhi ($25,$13,$13)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($12,$15) # mul_add_c2(a[0],b[3],c1,c2,c3);
+ daddu $25,$1
+ daddu $2,$25
+ sltu $1,$2,$25
+ daddu $3,$1
+ sd $7,2*8($4)
+ mflo ($24,$12,$15)
+ mfhi ($25,$12,$15)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($13,$14) # forward multiplication
+ daddu $2,$24
+ daddu $1,$25
+ sltu $24,$2,$24
+ daddu $3,$1
+ daddu $25,$24
+ sltu $7,$3,$1
+ daddu $3,$25
+ sltu $25,$3,$25
+ daddu $7,$25
+ mflo ($24,$13,$14)
+ mfhi ($25,$13,$14)
+ daddu $2,$24
+ sltu $1,$2,$24
+ dmultu ($15,$13) # forward multiplication
+ daddu $2,$24
+ daddu $1,$25
+ sltu $24,$2,$24
+ daddu $3,$1
+ daddu $25,$24
+ sltu $1,$3,$1
+ daddu $3,$25
+ daddu $7,$1
+ sltu $25,$3,$25
+ daddu $7,$25
+ mflo ($24,$15,$13)
+ mfhi ($25,$15,$13)
+ sd $2,3*8($4)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($14,$14) # forward multiplication
+ daddu $3,$24
+ daddu $1,$25
+ sltu $24,$3,$24
+ daddu $7,$1
+ daddu $25,$24
+ sltu $2,$7,$1
+ daddu $7,$25
+ sltu $25,$7,$25
+ daddu $2,$25
+ mflo ($24,$14,$14)
+ mfhi ($25,$14,$14)
+ daddu $3,$24
+ sltu $1,$3,$24
+ dmultu ($14,$15) # mul_add_c2(a[2],b[3],c3,c1,c2);
+ daddu $25,$1
+ daddu $7,$25
+ sltu $1,$7,$25
+ daddu $2,$1
+ sd $3,4*8($4)
+ mflo ($24,$14,$15)
+ mfhi ($25,$14,$15)
+ daddu $7,$24
+ sltu $1,$7,$24
+ dmultu ($15,$15) # forward multiplication
+ daddu $7,$24
+ daddu $1,$25
+ sltu $24,$7,$24
+ daddu $2,$1
+ daddu $25,$24
+ sltu $3,$2,$1
+ daddu $2,$25
+ sltu $25,$2,$25
+ daddu $3,$25
+ mflo ($24,$15,$15)
+ mfhi ($25,$15,$15)
+ sd $7,5*8($4)
+
+ daddu $2,$24
+ sltu $1,$2,$24
+ daddu $25,$1
+ daddu $3,$25
+ sd $2,6*8($4)
+ sd $3,7*8($4)
+
+ .set noreorder
+ jr $31
+ nop
+.end bn_sqr_comba4
diff --git a/deps/openssl/config/archs/linux64-mips64/asm_avx2/crypto/bn/mips-mont.S b/deps/openssl/config/archs/linux64-mips64/asm_avx2/crypto/bn/mips-mont.S
new file mode 100644
index 0000000000..f107837fa1
--- /dev/null
+++ b/deps/openssl/config/archs/linux64-mips64/asm_avx2/crypto/bn/mips-mont.S
@@ -0,0 +1,282 @@
+#include "mips_arch.h"
+
+.text
+
+.set noat
+.set noreorder
+
+.align 5
+.globl bn_mul_mont
+.ent bn_mul_mont
+bn_mul_mont:
+ slt $1,$9,4
+ bnez $1,1f
+ li $2,0
+ slt $1,$9,17 # on in-order CPU
+ bnez $1,bn_mul_mont_internal
+ nop
+1: jr $31
+ li $4,0
+.end bn_mul_mont
+
+.align 5
+.ent bn_mul_mont_internal
+bn_mul_mont_internal:
+ .frame $30,14*8,$31
+ .mask 0x40000000|16711680,-8
+ dsubu $29,14*8
+ sd $30,(14-1)*8($29)
+ sd $23,(14-2)*8($29)
+ sd $22,(14-3)*8($29)
+ sd $21,(14-4)*8($29)
+ sd $20,(14-5)*8($29)
+ sd $19,(14-6)*8($29)
+ sd $18,(14-7)*8($29)
+ sd $17,(14-8)*8($29)
+ sd $16,(14-9)*8($29)
+ move $30,$29
+
+ .set reorder
+ ld $8,0($8)
+ ld $13,0($6) # bp[0]
+ ld $12,0($5) # ap[0]
+ ld $14,0($7) # np[0]
+
+ dsubu $29,2*8 # place for two extra words
+ sll $9,3
+ li $1,-4096
+ dsubu $29,$9
+ and $29,$1
+
+ dmultu ($12,$13)
+ ld $17,8($5)
+ ld $19,8($7)
+ mflo ($10,$12,$13)
+ mfhi ($11,$12,$13)
+ dmultu ($10,$8)
+ mflo ($23,$10,$8)
+
+ dmultu ($17,$13)
+ mflo ($16,$17,$13)
+ mfhi ($17,$17,$13)
+
+ dmultu ($14,$23)
+ mflo ($24,$14,$23)
+ mfhi ($25,$14,$23)
+ dmultu ($19,$23)
+ daddu $24,$10
+ sltu $1,$24,$10
+ daddu $25,$1
+ mflo ($18,$19,$23)
+ mfhi ($19,$19,$23)
+
+ move $15,$29
+ li $22,2*8
+.align 4
+.L1st:
+ .set noreorder
+ daddu $12,$5,$22
+ daddu $14,$7,$22
+ ld $12,($12)
+ ld $14,($14)
+
+ dmultu ($12,$13)
+ daddu $10,$16,$11
+ daddu $24,$18,$25
+ sltu $1,$10,$11
+ sltu $2,$24,$25
+ daddu $11,$17,$1
+ daddu $25,$19,$2
+ mflo ($16,$12,$13)
+ mfhi ($17,$12,$13)
+
+ daddu $24,$10
+ sltu $1,$24,$10
+ dmultu ($14,$23)
+ daddu $25,$1
+ addu $22,8
+ sd $24,($15)
+ sltu $2,$22,$9
+ mflo ($18,$14,$23)
+ mfhi ($19,$14,$23)
+
+ bnez $2,.L1st
+ daddu $15,8
+ .set reorder
+
+ daddu $10,$16,$11
+ sltu $1,$10,$11
+ daddu $11,$17,$1
+
+ daddu $24,$18,$25
+ sltu $2,$24,$25
+ daddu $25,$19,$2
+ daddu $24,$10
+ sltu $1,$24,$10
+ daddu $25,$1
+
+ sd $24,($15)
+
+ daddu $25,$11
+ sltu $1,$25,$11
+ sd $25,8($15)
+ sd $1,2*8($15)
+
+ li $21,8
+.align 4
+.Louter:
+ daddu $13,$6,$21
+ ld $13,($13)
+ ld $12,($5)
+ ld $17,8($5)
+ ld $20,($29)
+
+ dmultu ($12,$13)
+ ld $14,($7)
+ ld $19,8($7)
+ mflo ($10,$12,$13)
+ mfhi ($11,$12,$13)
+ daddu $10,$20
+ dmultu ($10,$8)
+ sltu $1,$10,$20
+ daddu $11,$1
+ mflo ($23,$10,$8)
+
+ dmultu ($17,$13)
+ mflo ($16,$17,$13)
+ mfhi ($17,$17,$13)
+
+ dmultu ($14,$23)
+ mflo ($24,$14,$23)
+ mfhi ($25,$14,$23)
+
+ dmultu ($19,$23)
+ daddu $24,$10
+ sltu $1,$24,$10
+ daddu $25,$1
+ mflo ($18,$19,$23)
+ mfhi ($19,$19,$23)
+
+ move $15,$29
+ li $22,2*8
+ ld $20,8($15)
+.align 4
+.Linner:
+ .set noreorder
+ daddu $12,$5,$22
+ daddu $14,$7,$22
+ ld $12,($12)
+ ld $14,($14)
+
+ dmultu ($12,$13)
+ daddu $10,$16,$11
+ daddu $24,$18,$25
+ sltu $1,$10,$11
+ sltu $2,$24,$25
+ daddu $11,$17,$1
+ daddu $25,$19,$2
+ mflo ($16,$12,$13)
+ mfhi ($17,$12,$13)
+
+ daddu $10,$20
+ addu $22,8
+ dmultu ($14,$23)
+ sltu $1,$10,$20
+ daddu $24,$10
+ daddu $11,$1
+ sltu $2,$24,$10
+ ld $20,2*8($15)
+ daddu $25,$2
+ sltu $1,$22,$9
+ mflo ($18,$14,$23)
+ mfhi ($19,$14,$23)
+ sd $24,($15)
+ bnez $1,.Linner
+ daddu $15,8
+ .set reorder
+
+ daddu $10,$16,$11
+ sltu $1,$10,$11
+ daddu $11,$17,$1
+ daddu $10,$20
+ sltu $2,$10,$20
+ daddu $11,$2
+
+ ld $20,2*8($15)
+ daddu $24,$18,$25
+ sltu $1,$24,$25
+ daddu $25,$19,$1
+ daddu $24,$10
+ sltu $2,$24,$10
+ daddu $25,$2
+ sd $24,($15)
+
+ daddu $24,$25,$11
+ sltu $25,$24,$11
+ daddu $24,$20
+ sltu $1,$24,$20
+ daddu $25,$1
+ sd $24,8($15)
+ sd $25,2*8($15)
+
+ addu $21,8
+ sltu $2,$21,$9
+ bnez $2,.Louter
+
+ .set noreorder
+ daddu $20,$29,$9 # &tp[num]
+ move $15,$29
+ move $5,$29
+ li $11,0 # clear borrow bit
+
+.align 4
+.Lsub: ld $10,($15)
+ ld $24,($7)
+ daddu $15,8
+ daddu $7,8
+ dsubu $24,$10,$24 # tp[i]-np[i]
+ sgtu $1,$24,$10
+ dsubu $10,$24,$11
+ sgtu $11,$10,$24
+ sd $10,($4)
+ or $11,$1
+ sltu $1,$15,$20
+ bnez $1,.Lsub
+ daddu $4,8
+
+ dsubu $11,$25,$11 # handle upmost overflow bit
+ move $15,$29
+ dsubu $4,$9 # restore rp
+ not $25,$11
+
+.Lcopy: ld $14,($15) # conditional move
+ ld $12,($4)
+ sd $0,($15)
+ daddu $15,8
+ and $14,$11
+ and $12,$25
+ or $12,$14
+ sltu $1,$15,$20
+ sd $12,($4)
+ bnez $1,.Lcopy
+ daddu $4,8
+
+ li $4,1
+ li $2,1
+
+ .set noreorder
+ move $29,$30
+ ld $30,(14-1)*8($29)
+ ld $23,(14-2)*8($29)
+ ld $22,(14-3)*8($29)
+ ld $21,(14-4)*8($29)
+ ld $20,(14-5)*8($29)
+ ld $19,(14-6)*8($29)
+ ld $18,(14-7)*8($29)
+ ld $17,(14-8)*8($29)
+ ld $16,(14-9)*8($29)
+ jr $31
+ daddu $29,14*8
+.end bn_mul_mont_internal
+.rdata
+.asciiz "Montgomery Multiplication for MIPS, CRYPTOGAMS by <appro@openssl.org>"